|
@@ -258,20 +258,20 @@ def _platform_tree_dir() -> Path:
|
|
|
return _BASE_INPUT / "xiaohongshu" / "tree"
|
|
return _BASE_INPUT / "xiaohongshu" / "tree"
|
|
|
|
|
|
|
|
|
|
|
|
|
-def get_platform_nodes_by_conditional_ratio(
|
|
|
|
|
|
|
+def _collect_platform_scored_tuples(
|
|
|
derived_list: list[tuple[str, str]],
|
|
derived_list: list[tuple[str, str]],
|
|
|
threshold: float,
|
|
threshold: float,
|
|
|
- top_n: int,
|
|
|
|
|
-) -> list[dict[str, Any]]:
|
|
|
|
|
|
|
+ max_nodes: int = 12000,
|
|
|
|
|
+) -> list[tuple[str, float, str, str]]:
|
|
|
"""
|
|
"""
|
|
|
- 平台库人设树节点条件概率筛选,计算方式与 get_nodes_by_conditional_ratio 一致
|
|
|
|
|
- (同一套 calc_node_conditional_ratio / _post_ids 规则,索引来自 xiaohongshu/tree)。
|
|
|
|
|
- derived_list 为空时用节点 _ratio。
|
|
|
|
|
|
|
+ 平台库人设树:条件概率 >= threshold 的节点全量收集,按条件概率降序。
|
|
|
|
|
+ max_nodes 防止极端大树占满内存;截断发生在全局排序之后(保留高分段)。
|
|
|
"""
|
|
"""
|
|
|
tree_dir = _platform_tree_dir()
|
|
tree_dir = _platform_tree_dir()
|
|
|
if not tree_dir.is_dir():
|
|
if not tree_dir.is_dir():
|
|
|
return []
|
|
return []
|
|
|
|
|
|
|
|
|
|
+ thr = float(threshold)
|
|
|
scored: list[tuple[str, float, str, str]] = []
|
|
scored: list[tuple[str, float, str, str]] = []
|
|
|
|
|
|
|
|
if not derived_list:
|
|
if not derived_list:
|
|
@@ -282,11 +282,8 @@ def get_platform_nodes_by_conditional_ratio(
|
|
|
if not isinstance(child, dict):
|
|
if not isinstance(child, dict):
|
|
|
continue
|
|
continue
|
|
|
ratio = child.get("_ratio")
|
|
ratio = child.get("_ratio")
|
|
|
- if ratio is None:
|
|
|
|
|
- r = 0.0
|
|
|
|
|
- else:
|
|
|
|
|
- r = float(ratio)
|
|
|
|
|
- if r >= threshold:
|
|
|
|
|
|
|
+ r = 0.0 if ratio is None else float(ratio)
|
|
|
|
|
+ if r >= thr:
|
|
|
scored.append((name, r, parent_name, dim_name))
|
|
scored.append((name, r, parent_name, dim_name))
|
|
|
walk(name, child)
|
|
walk(name, child)
|
|
|
|
|
|
|
@@ -312,13 +309,30 @@ def get_platform_nodes_by_conditional_ratio(
|
|
|
node_name,
|
|
node_name,
|
|
|
base_dir=_BASE_INPUT,
|
|
base_dir=_BASE_INPUT,
|
|
|
node_post_index=node_post_index,
|
|
node_post_index=node_post_index,
|
|
|
- target_ratio=threshold,
|
|
|
|
|
|
|
+ target_ratio=thr,
|
|
|
)
|
|
)
|
|
|
- if ratio >= threshold:
|
|
|
|
|
|
|
+ if ratio >= thr:
|
|
|
scored.append((node_name, ratio, parent_name, dim_name))
|
|
scored.append((node_name, ratio, parent_name, dim_name))
|
|
|
|
|
|
|
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
scored.sort(key=lambda x: x[1], reverse=True)
|
|
|
- top = scored[:top_n]
|
|
|
|
|
|
|
+ if max_nodes > 0 and len(scored) > max_nodes:
|
|
|
|
|
+ scored = scored[:max_nodes]
|
|
|
|
|
+ return scored
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def get_platform_nodes_by_conditional_ratio(
|
|
|
|
|
+ derived_list: list[tuple[str, str]],
|
|
|
|
|
+ threshold: float,
|
|
|
|
|
+ top_n: int,
|
|
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 平台库人设树节点条件概率筛选,计算方式与 get_nodes_by_conditional_ratio 一致
|
|
|
|
|
+ (同一套 calc_node_conditional_ratio / _post_ids 规则,索引来自 xiaohongshu/tree)。
|
|
|
|
|
+ derived_list 为空时用节点 _ratio。
|
|
|
|
|
+ """
|
|
|
|
|
+ n = max(0, int(top_n))
|
|
|
|
|
+ scored = _collect_platform_scored_tuples(derived_list, threshold)
|
|
|
|
|
+ top = scored[:n]
|
|
|
return [
|
|
return [
|
|
|
{
|
|
{
|
|
|
"节点名称": name,
|
|
"节点名称": name,
|
|
@@ -435,51 +449,103 @@ def _platform_node_belonging_dim_from_anchor_nodes(
|
|
|
return dim_map
|
|
return dim_map
|
|
|
|
|
|
|
|
|
|
|
|
|
-def _load_platform_match_nodes(
|
|
|
|
|
|
|
+def _load_platform_nodes_split(
|
|
|
post_id: str,
|
|
post_id: str,
|
|
|
derived_list: list[tuple[str, str]],
|
|
derived_list: list[tuple[str, str]],
|
|
|
conditional_ratio_threshold: float,
|
|
conditional_ratio_threshold: float,
|
|
|
match_score_threshold: float,
|
|
match_score_threshold: float,
|
|
|
top_n: int,
|
|
top_n: int,
|
|
|
node_belonging_dim_platform: Optional[dict[str, str]] = None,
|
|
node_belonging_dim_platform: Optional[dict[str, str]] = None,
|
|
|
-) -> list[dict[str, Any]]:
|
|
|
|
|
|
|
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
|
"""
|
|
"""
|
|
|
- 平台库人设树:先按与账号一致的条件概率筛选(get_platform_nodes_by_conditional_ratio),
|
|
|
|
|
- 再仅保留在 xiaohongshu 匹配文件中、且单条 match_score >= match_score_threshold 的帖子选题点;
|
|
|
|
|
- 无达标选题点匹配的节点丢弃。
|
|
|
|
|
|
|
+ 平台库人设树:用 _collect_platform_scored_tuples 得到条件概率达标的节点,
|
|
|
|
|
+ 再按 xiaohongshu/match_data 分为「有帖子选题点匹配 / 无匹配」两类,**两类各自按条件概率取 Top 池**(同一全局 TopN 不会挤掉另一类),
|
|
|
|
|
+ 最后分别组装返回:
|
|
|
|
|
+ - matched:有 match_score >= match_score_threshold 的帖子选题点匹配的节点
|
|
|
|
|
+ - unmatched:无达标帖子选题点匹配的节点
|
|
|
|
|
+ 两组均要求节点在 node_belonging_dim_platform 中有有效的所属维度(不为「—」)。
|
|
|
"""
|
|
"""
|
|
|
- candidates = get_platform_nodes_by_conditional_ratio(
|
|
|
|
|
|
|
+ matched: list[dict[str, Any]] = []
|
|
|
|
|
+ unmatched: list[dict[str, Any]] = []
|
|
|
|
|
+
|
|
|
|
|
+ topic_map: dict[tuple[str, str], dict[str, float]] = {}
|
|
|
|
|
+ if post_id:
|
|
|
|
|
+ topic_map = _platform_match_topics_by_node(post_id, float(match_score_threshold))
|
|
|
|
|
+ # 维度标签可能与树侧不完全一致:保留一个按节点名聚合的兜底索引,避免误判为“无匹配”。
|
|
|
|
|
+ topic_map_by_name: dict[str, dict[str, float]] = {}
|
|
|
|
|
+ for (_dim, n), topics in topic_map.items():
|
|
|
|
|
+ bucket = topic_map_by_name.setdefault(str(n).strip(), {})
|
|
|
|
|
+ for t, sc in (topics or {}).items():
|
|
|
|
|
+ prev = bucket.get(t)
|
|
|
|
|
+ if prev is None or sc > prev:
|
|
|
|
|
+ bucket[t] = sc
|
|
|
|
|
+
|
|
|
|
|
+ # 有 match_data 命中与无命中两类分开按条件概率取 Top,避免混在一个全局 TopN 里挤掉某一类。
|
|
|
|
|
+ all_scored = _collect_platform_scored_tuples(
|
|
|
derived_list,
|
|
derived_list,
|
|
|
float(conditional_ratio_threshold),
|
|
float(conditional_ratio_threshold),
|
|
|
- int(top_n),
|
|
|
|
|
)
|
|
)
|
|
|
- if not candidates or not post_id:
|
|
|
|
|
- return []
|
|
|
|
|
|
|
+ if not all_scored:
|
|
|
|
|
+ return matched, unmatched
|
|
|
|
|
+
|
|
|
|
|
+ matched_tuples: list[tuple[str, float, str, str]] = []
|
|
|
|
|
+ unmatched_tuples: list[tuple[str, float, str, str]] = []
|
|
|
|
|
+ for name, ratio, parent, dim in all_scored:
|
|
|
|
|
+ lookup_dim = str(dim).strip()
|
|
|
|
|
+ key = (lookup_dim, str(name).strip())
|
|
|
|
|
+ topics = topic_map.get(key) or topic_map_by_name.get(str(name).strip()) or {}
|
|
|
|
|
+ if topics:
|
|
|
|
|
+ matched_tuples.append((name, ratio, parent, dim))
|
|
|
|
|
+ else:
|
|
|
|
|
+ unmatched_tuples.append((name, ratio, parent, dim))
|
|
|
|
|
+
|
|
|
|
|
+ _pool = max(int(top_n), min(2000, max(500, int(top_n) * 5)))
|
|
|
|
|
+ matched_tuples = matched_tuples[:_pool]
|
|
|
|
|
+ unmatched_tuples = unmatched_tuples[:_pool]
|
|
|
|
|
+
|
|
|
|
|
+ def _emit_tuple_rows(
|
|
|
|
|
+ tuples: list[tuple[str, float, str, str]],
|
|
|
|
|
+ *,
|
|
|
|
|
+ has_topics: bool,
|
|
|
|
|
+ ) -> None:
|
|
|
|
|
+ for name, ratio, parent, dim in tuples:
|
|
|
|
|
+ row = {
|
|
|
|
|
+ "节点名称": name,
|
|
|
|
|
+ "条件概率": ratio,
|
|
|
|
|
+ "父节点名称": parent,
|
|
|
|
|
+ "所属维度": dim,
|
|
|
|
|
+ }
|
|
|
|
|
+ name_s = str(row.get("节点名称") or "").strip()
|
|
|
|
|
+ out_dim = "—"
|
|
|
|
|
+ if node_belonging_dim_platform is not None:
|
|
|
|
|
+ out_dim = node_belonging_dim_platform.get(name_s) or "—"
|
|
|
|
|
+ if node_belonging_dim_platform is not None and out_dim == "—":
|
|
|
|
|
+ continue
|
|
|
|
|
+ row_out = dict(row)
|
|
|
|
|
+ row_out["所属维度"] = out_dim
|
|
|
|
|
+
|
|
|
|
|
+ lookup_dim = str(row.get("所属维度") or "").strip()
|
|
|
|
|
+ key2 = (lookup_dim, name_s)
|
|
|
|
|
+ topics = topic_map.get(key2) or topic_map_by_name.get(name_s) or {}
|
|
|
|
|
+ if has_topics:
|
|
|
|
|
+ if not topics:
|
|
|
|
|
+ continue
|
|
|
|
|
+ topic_items = sorted(topics.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
|
+ row_out["帖子选题点匹配"] = [{"帖子选题点": t, "匹配分数": sc} for t, sc in topic_items]
|
|
|
|
|
+ matched.append(row_out)
|
|
|
|
|
+ else:
|
|
|
|
|
+ if topics:
|
|
|
|
|
+ continue
|
|
|
|
|
+ row_out["帖子选题点匹配"] = "无"
|
|
|
|
|
+ unmatched.append(row_out)
|
|
|
|
|
|
|
|
- topic_map = _platform_match_topics_by_node(post_id, float(match_score_threshold))
|
|
|
|
|
- out: list[dict[str, Any]] = []
|
|
|
|
|
- for row in candidates:
|
|
|
|
|
- lookup_dim = str(row.get("所属维度") or "").strip()
|
|
|
|
|
- name = str(row.get("节点名称") or "").strip()
|
|
|
|
|
- key = (lookup_dim, name)
|
|
|
|
|
- topics = topic_map.get(key) or {}
|
|
|
|
|
- if not topics:
|
|
|
|
|
- continue
|
|
|
|
|
- topic_items = sorted(topics.items(), key=lambda x: x[1], reverse=True)
|
|
|
|
|
- match_list = [{"帖子选题点": t, "匹配分数": sc} for t, sc in topic_items]
|
|
|
|
|
- out_dim = "—"
|
|
|
|
|
- if node_belonging_dim_platform is not None:
|
|
|
|
|
- out_dim = node_belonging_dim_platform.get(name) or "—"
|
|
|
|
|
- if out_dim == "—":
|
|
|
|
|
- continue
|
|
|
|
|
- row_out = dict(row)
|
|
|
|
|
- row_out["所属维度"] = out_dim
|
|
|
|
|
- row_out["帖子选题点匹配"] = match_list
|
|
|
|
|
- out.append(row_out)
|
|
|
|
|
- return out
|
|
|
|
|
|
|
+ _emit_tuple_rows(matched_tuples, has_topics=True)
|
|
|
|
|
+ _emit_tuple_rows(unmatched_tuples, has_topics=False)
|
|
|
|
|
+
|
|
|
|
|
+ return matched, unmatched
|
|
|
|
|
|
|
|
|
|
|
|
|
-def build_platform_tree_section_items(
|
|
|
|
|
|
|
+def build_platform_tree_section_items_split(
|
|
|
post_id: str,
|
|
post_id: str,
|
|
|
derived_list: list[tuple[str, str]],
|
|
derived_list: list[tuple[str, str]],
|
|
|
conditional_ratio_threshold: float,
|
|
conditional_ratio_threshold: float,
|
|
@@ -487,14 +553,16 @@ def build_platform_tree_section_items(
|
|
|
top_n: int,
|
|
top_n: int,
|
|
|
exclude_node_names: set[str],
|
|
exclude_node_names: set[str],
|
|
|
node_belonging_dim_platform: Optional[dict[str, str]] = None,
|
|
node_belonging_dim_platform: Optional[dict[str, str]] = None,
|
|
|
-) -> list[dict[str, Any]]:
|
|
|
|
|
|
|
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
|
"""
|
|
"""
|
|
|
- 平台库人设树节点:条件概率 + xiaohongshu/match_data 匹配,并排除与账号段重复的节点名称。
|
|
|
|
|
|
|
+ 平台库人设树节点:条件概率 + xiaohongshu/match_data 匹配,排除与账号段重复的节点名称,
|
|
|
|
|
+ 返回 (有帖子选题点匹配的节点列表, 无帖子选题点匹配的节点列表)。
|
|
|
供 find_tree_nodes_by_conditional_ratio 聚合输出使用。
|
|
供 find_tree_nodes_by_conditional_ratio 聚合输出使用。
|
|
|
"""
|
|
"""
|
|
|
if not post_id:
|
|
if not post_id:
|
|
|
- return []
|
|
|
|
|
- plat = _load_platform_match_nodes(
|
|
|
|
|
|
|
+ return [], []
|
|
|
|
|
+ ex = {str(n).strip() for n in exclude_node_names}
|
|
|
|
|
+ matched, unmatched = _load_platform_nodes_split(
|
|
|
post_id=post_id,
|
|
post_id=post_id,
|
|
|
derived_list=derived_list,
|
|
derived_list=derived_list,
|
|
|
conditional_ratio_threshold=float(conditional_ratio_threshold),
|
|
conditional_ratio_threshold=float(conditional_ratio_threshold),
|
|
@@ -502,11 +570,9 @@ def build_platform_tree_section_items(
|
|
|
top_n=int(top_n),
|
|
top_n=int(top_n),
|
|
|
node_belonging_dim_platform=node_belonging_dim_platform,
|
|
node_belonging_dim_platform=node_belonging_dim_platform,
|
|
|
)
|
|
)
|
|
|
- ex = {str(n).strip() for n in exclude_node_names}
|
|
|
|
|
- return [
|
|
|
|
|
- p for p in plat
|
|
|
|
|
- if str(p.get("节点名称", "")).strip() not in ex
|
|
|
|
|
- ]
|
|
|
|
|
|
|
+ matched_filtered = [p for p in matched if str(p.get("节点名称", "")).strip() not in ex]
|
|
|
|
|
+ unmatched_filtered = [p for p in unmatched if str(p.get("节点名称", "")).strip() not in ex]
|
|
|
|
|
+ return matched_filtered, unmatched_filtered
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ---------------------------------------------------------------------------
|
|
@@ -594,14 +660,17 @@ async def find_tree_nodes_by_conditional_ratio(
|
|
|
"""
|
|
"""
|
|
|
按条件概率阈值筛选节点:先账号人设树(优先使用),再平台库人设树;两段不合并。
|
|
按条件概率阈值筛选节点:先账号人设树(优先使用),再平台库人设树;两段不合并。
|
|
|
条件概率计算对两棵树使用同一套规则(calc_node_conditional_ratio / 节点 _post_ids)。
|
|
条件概率计算对两棵树使用同一套规则(calc_node_conditional_ratio / 节点 _post_ids)。
|
|
|
- 「帖子选题点匹配」仅保留匹配分 >= match_score_threshold 的选题点;无达标匹配的节点不返回。
|
|
|
|
|
|
|
+ 返回结果按以下配额分配(合计 top_n 条):
|
|
|
|
|
+ - 账号人设树节点占 60%,其中有帖子选题点匹配的记录和无帖子选题点匹配的记录各占一半;
|
|
|
|
|
+ - 平台库人设树节点占 40%,其中有帖子选题点匹配的记录和无帖子选题点匹配的记录各占一半。
|
|
|
|
|
+ 「帖子选题点匹配」仅收录匹配分 >= match_score_threshold 的选题点。
|
|
|
|
|
|
|
|
Args:
|
|
Args:
|
|
|
account_name : 账号名,用于定位该账号的人设树数据。
|
|
account_name : 账号名,用于定位该账号的人设树数据。
|
|
|
post_id : 帖子ID,用于加载帖子选题点并与各节点做匹配判断。
|
|
post_id : 帖子ID,用于加载帖子选题点并与各节点做匹配判断。
|
|
|
derived_items : 已推导选题点列表,可为空。非空时每项为字典,需含 topic(或「已推导的选题点」)与 source_node(或「推导来源人设树节点」)
|
|
derived_items : 已推导选题点列表,可为空。非空时每项为字典,需含 topic(或「已推导的选题点」)与 source_node(或「推导来源人设树节点」)
|
|
|
conditional_ratio_threshold : 条件概率阈值,仅返回条件概率 >= 该值的节点。
|
|
conditional_ratio_threshold : 条件概率阈值,仅返回条件概率 >= 该值的节点。
|
|
|
- top_n : 返回条数上限(账号段、平台段各自取前 top_n 条条件概率结果后再按匹配过滤)。
|
|
|
|
|
|
|
+ top_n : 最终返回总条数上限,按 账号60%/平台40%、有匹配/无匹配各半 分配。
|
|
|
round : 推导轮次。
|
|
round : 推导轮次。
|
|
|
log_id : 推导日志ID
|
|
log_id : 推导日志ID
|
|
|
match_score_threshold : 帖子选题点匹配分阈值,与 point_match 默认一致。
|
|
match_score_threshold : 帖子选题点匹配分阈值,与 point_match 默认一致。
|
|
@@ -678,22 +747,36 @@ async def find_tree_nodes_by_conditional_ratio(
|
|
|
matches = node_match_map.get(item["节点名称"], [])
|
|
matches = node_match_map.get(item["节点名称"], [])
|
|
|
item["帖子选题点匹配"] = matches if matches else "无"
|
|
item["帖子选题点匹配"] = matches if matches else "无"
|
|
|
|
|
|
|
|
- # [临时] 仅保留有帖子选题点匹配的记录(过滤掉「无」),方便后续删除
|
|
|
|
|
- items = [x for x in items if isinstance(x.get("帖子选题点匹配"), list)]
|
|
|
|
|
-
|
|
|
|
|
- # 2)平台库人设树(条件概率 + xiaohongshu 匹配文件;与账号节点同名则剔除)
|
|
|
|
|
- account_node_names = {str(x.get("节点名称", "")).strip() for x in items}
|
|
|
|
|
|
|
+ # 账号配额:占 top_n 的 60%,有/无匹配各一半
|
|
|
|
|
+ account_quota = int(top_n * 0.6 + 0.5)
|
|
|
|
|
+ account_with_n = account_quota // 2
|
|
|
|
|
+ account_without_n = account_quota - account_with_n
|
|
|
|
|
+ items_with_match = [x for x in items if isinstance(x.get("帖子选题点匹配"), list)]
|
|
|
|
|
+ items_without_match = [x for x in items if not isinstance(x.get("帖子选题点匹配"), list)]
|
|
|
|
|
+ items = items_with_match[:account_with_n] + items_without_match[:account_without_n]
|
|
|
|
|
+
|
|
|
|
|
+ # 2)平台库人设树(条件概率 + xiaohongshu 匹配文件)
|
|
|
|
|
+ # 平台配额:占 top_n 的 40%,有/无匹配各一半
|
|
|
|
|
+ platform_quota = top_n - account_quota
|
|
|
|
|
+ platform_with_n = platform_quota // 2
|
|
|
|
|
+ platform_without_n = platform_quota - platform_with_n
|
|
|
|
|
+ # 平台「有匹配」排除账号侧已有帖子选题点匹配的节点名(与账号段去重)。
|
|
|
|
|
+ # 平台「无匹配」排除已在账号段输出里出现过的节点名(避免重复罗列无新信息的同名节点)。
|
|
|
|
|
+ account_matched_names = {str(x.get("节点名称", "")).strip() for x in items if isinstance(x.get("帖子选题点匹配"), list)}
|
|
|
|
|
+ account_all_names = {str(x.get("节点名称", "")).strip() for x in items}
|
|
|
platform_items: list[dict[str, Any]] = []
|
|
platform_items: list[dict[str, Any]] = []
|
|
|
if post_id:
|
|
if post_id:
|
|
|
- platform_items = build_platform_tree_section_items(
|
|
|
|
|
|
|
+ p_matched_raw, p_unmatched_raw = _load_platform_nodes_split(
|
|
|
post_id=post_id,
|
|
post_id=post_id,
|
|
|
derived_list=derived_list,
|
|
derived_list=derived_list,
|
|
|
conditional_ratio_threshold=float(conditional_ratio_threshold),
|
|
conditional_ratio_threshold=float(conditional_ratio_threshold),
|
|
|
match_score_threshold=float(match_score_threshold),
|
|
match_score_threshold=float(match_score_threshold),
|
|
|
top_n=top_n,
|
|
top_n=top_n,
|
|
|
- exclude_node_names=account_node_names,
|
|
|
|
|
node_belonging_dim_platform=node_belonging_dim_platform,
|
|
node_belonging_dim_platform=node_belonging_dim_platform,
|
|
|
)
|
|
)
|
|
|
|
|
+ p_matched = [p for p in p_matched_raw if str(p.get("节点名称", "")).strip() not in account_matched_names]
|
|
|
|
|
+ p_unmatched = [p for p in p_unmatched_raw if str(p.get("节点名称", "")).strip() not in account_all_names]
|
|
|
|
|
+ platform_items = p_matched[:platform_with_n] + p_unmatched[:platform_without_n]
|
|
|
|
|
|
|
|
def _format_node_line(x: dict[str, Any]) -> str:
|
|
def _format_node_line(x: dict[str, Any]) -> str:
|
|
|
match_info = x.get("帖子选题点匹配", "无")
|
|
match_info = x.get("帖子选题点匹配", "无")
|
|
@@ -709,20 +792,20 @@ async def find_tree_nodes_by_conditional_ratio(
|
|
|
|
|
|
|
|
lines: list[str] = []
|
|
lines: list[str] = []
|
|
|
lines.append(
|
|
lines.append(
|
|
|
- "【优先使用】第一节为账号人设树中条件概率达标的节点;"
|
|
|
|
|
- "第二节为平台库人设树中条件概率达标的节点;"
|
|
|
|
|
|
|
+ "【优先使用】第一节为账号人设树中条件概率达标的节点(占60%配额,有/无帖子匹配各半);"
|
|
|
|
|
+ "第二节为平台库人设树中条件概率达标的节点(占40%配额,有/无帖子匹配各半);"
|
|
|
)
|
|
)
|
|
|
lines.append("")
|
|
lines.append("")
|
|
|
lines.append("—— 账号人设树节点 ——")
|
|
lines.append("—— 账号人设树节点 ——")
|
|
|
if not items:
|
|
if not items:
|
|
|
- lines.append(f"(无:未找到条件概率 >= {conditional_ratio_threshold} 且与帖子选题点有匹配的节点)")
|
|
|
|
|
|
|
+ lines.append(f"(无:未找到条件概率 >= {conditional_ratio_threshold} 的节点)")
|
|
|
else:
|
|
else:
|
|
|
lines.extend(_format_node_line(x) for x in items)
|
|
lines.extend(_format_node_line(x) for x in items)
|
|
|
lines.append("")
|
|
lines.append("")
|
|
|
lines.append("—— 平台库人设树节点 ——")
|
|
lines.append("—— 平台库人设树节点 ——")
|
|
|
if not platform_items:
|
|
if not platform_items:
|
|
|
lines.append(
|
|
lines.append(
|
|
|
- "(无:未找到条件概率达标且存在达标帖子选题点匹配的节点)"
|
|
|
|
|
|
|
+ "(无:未找到条件概率达标的节点)"
|
|
|
)
|
|
)
|
|
|
else:
|
|
else:
|
|
|
lines.extend(_format_node_line(x) for x in platform_items)
|
|
lines.extend(_format_node_line(x) for x in platform_items)
|
|
@@ -736,6 +819,14 @@ async def find_tree_nodes_by_conditional_ratio(
|
|
|
"threshold": conditional_ratio_threshold,
|
|
"threshold": conditional_ratio_threshold,
|
|
|
"match_score_threshold": float(match_score_threshold),
|
|
"match_score_threshold": float(match_score_threshold),
|
|
|
"top_n": top_n,
|
|
"top_n": top_n,
|
|
|
|
|
+ "quota": {
|
|
|
|
|
+ "account_quota": account_quota,
|
|
|
|
|
+ "account_with_match": len([x for x in items if isinstance(x.get("帖子选题点匹配"), list)]),
|
|
|
|
|
+ "account_without_match": len([x for x in items if not isinstance(x.get("帖子选题点匹配"), list)]),
|
|
|
|
|
+ "platform_quota": platform_quota,
|
|
|
|
|
+ "platform_with_match": len([x for x in platform_items if isinstance(x.get("帖子选题点匹配"), list)]),
|
|
|
|
|
+ "platform_without_match": len([x for x in platform_items if not isinstance(x.get("帖子选题点匹配"), list)]),
|
|
|
|
|
+ },
|
|
|
"account_tree_count": len(items),
|
|
"account_tree_count": len(items),
|
|
|
"platform_tree_count": len(platform_items),
|
|
"platform_tree_count": len(platform_items),
|
|
|
"count": len(items) + len(platform_items),
|
|
"count": len(items) + len(platform_items),
|
|
@@ -770,7 +861,7 @@ def main() -> None:
|
|
|
# ]
|
|
# ]
|
|
|
derived_items = [{"topic":"推广","source_node":"推广"},{"topic":"视觉调性","source_node":"视觉调性"}]
|
|
derived_items = [{"topic":"推广","source_node":"推广"},{"topic":"视觉调性","source_node":"视觉调性"}]
|
|
|
conditional_ratio_threshold = 0.2
|
|
conditional_ratio_threshold = 0.2
|
|
|
- top_n = 2000
|
|
|
|
|
|
|
+ top_n = 200
|
|
|
|
|
|
|
|
# # 1)常量节点(核心函数,无匹配)
|
|
# # 1)常量节点(核心函数,无匹配)
|
|
|
# constant_nodes = get_constant_nodes(account_name)
|
|
# constant_nodes = get_constant_nodes(account_name)
|