3 месяцев назад · f3fc138127
--- a/examples_how/overall_derivation/derivation_main.md
+++ b/examples_how/overall_derivation/derivation_main.md
@@ -224,7 +224,7 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
				         "derived_nodes": []
			
 
				       },
			
 
				       "output": ["分享"],
			
 
				-      "reason": "意图维度中，'分享'节点是全局常量（c=true）且整体概率 r=0.913，极高，是账号最核心的创作意图起点。",
			
 
				+      "reason": "'分享'节点是全局常量（c=true）且整体概率 r=0.913，极高，是账号最核心的创作意图起点。",
			
 
				       "tools": []
			
 
				     },
			
 
				     {
			
@@ -351,9 +351,9 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
				 - **适用场景**：通过人设树条件概率关联推导相关节点；非首轮进行内部推导时可以使用。
			
 
				 - **操作方式**：调用工具 `find_tree_nodes_by_conditional_ratio(account_name, post_id, derived_items, conditional_ratio_threshold, top_n, round, log_id)`。`log_id` 为当前推导日志ID；`round` 为当前推导轮次。`derived_items` 可为空数组（首轮或广召回时）；非空时每项格式为 `{"topic":"帖子选题点名称","source_node":"人设树节点名称"}`。工具返回格式示例：
			
 
				   ```
			
 
				-  - 分享  条件概率=1.0  父节点=分享  所属维度=分享  帖子选题点匹配=分享(1.0)
			
 
				-  - 趣味道具  条件概率=0.125  父节点=家居用品  所属维度=物品  帖子选题点匹配=夸张道具(0.7831)
			
 
				-  - 第一人称视角  条件概率=1.0  父节点=体验式呈现  所属维度=故事编排  帖子选题点匹配=无
			
 
				+  - 分享  条件概率=1.0  所属维度=分享  帖子选题点匹配=分享(1.0)
			
 
				+  - 趣味道具  条件概率=0.125  所属维度=物品  帖子选题点匹配=夸张道具(0.7831)
			
 
				+  - 第一人称视角  条件概率=1.0  所属维度=故事编排  帖子选题点匹配=无
			
 
				   ```
			
 
				   - **推导路径的 `output`**：填写工具返回的**人设树节点名称**（如 `趣味道具`）。
			
 
				   - **匹配判断**：读取「帖子选题点匹配」字段——若有值（如 `夸张道具(0.7831)`），则 `is_matched=true`，评估日志中 `matched_post_point` 填写括号前的帖子选题点名称（如 `夸张道具`），`matched_score` 填写匹配分数数值（如 `0.7831`），`matched_reason` 填写匹配分数描述（如 `匹配分数=0.7831`）；若字段值为「无」，则 `is_matched=false`。
			
@@ -371,14 +371,12 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
				                 "趣味道具"
			
 
				             ],
			
 
				             "patterns": [],
			
 
				-            "derived_nodes": [
			
 
				-                "家居用品"
			
 
				-            ]
			
 
				+            "derived_nodes": []
			
 
				         },
			
 
				         "output": [
			
 
				             "趣味道具"
			
 
				         ],
			
 
				-        "reason": "根据已推导出的'家居用品'及维度'物品'，人设树中'趣味道具'节点（父节点=家居用品）的条件概率=0.125，工具返回该节点存在帖子选题点匹配，因此将其作为推导候选。",
			
 
				+        "reason": "在已推导出的维度'物品'下，'趣味道具'节点条件概率=0.125，工具返回该节点存在帖子选题点匹配，因此将其作为推导候选。",
			
 
				         "tools": []
			
 
				     },
			
 
				     {
			
@@ -393,7 +391,7 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
				         "output": [
			
 
				             "第一人称视角"
			
 
				         ],
			
 
				-        "reason": "根据已推导出维度'故事编排'，人设树中'第一人称视角'节点的条件概率=1.0且属于'故事编排'维度下的孩子节点，因此将其作为推导候选。",
			
 
				+        "reason": "在已推导出的维度'故事编排'下，'第一人称视角'节点的条件概率=1.0，工具返回该节点存在帖子选题点匹配，因此将其作为推导候选。",
			
 
				         "tools": []
			
 
				     }
			
 
				 ]  
			
--- a/examples_how/overall_derivation/generate_visualize_data.py
+++ b/examples_how/overall_derivation/generate_visualize_data.py
@@ -146,7 +146,8 @@ def build_derivation_result(
 
				     result = []
			
 
				     derived_names_so_far: set[str] = set()
			
 
				     fully_derived_names_so_far: set[str] = set()  # 已出现过 is_fully_derived=true 的选题点
			
 
				-    best_score_by_name: dict[str, tuple[float, bool]] = {}  # name -> (matched_score, is_fully_derived)，遇 is_fully=true 时更新
			
 
				+    # name -> (matched_score, is_fully_derived)，一旦 is_fully_derived=True，后续轮次不再更新 matched_score
			
 
				+    best_score_by_name: dict[str, tuple[float, bool]] = {}
			
 
				 
			
 
				     for i, (derivation, eval_data) in enumerate(zip(derivations, evals)):
			
 
				         round_num = derivation.get("round", i + 1)
			
@@ -175,7 +176,10 @@ def build_derivation_result(
 
				             elif name not in fully_derived_names_so_far and is_fully:
			
 
				                 new_derived_names.add(name)
			
 
				 
			
 
				-        # 更新推导集合与 best：首次出现或本轮 is_fully=true 时更新 best
			
 
				+        # 更新推导集合与 best：
			
 
				+        # - 首次出现时写入
			
 
				+        # - 若尚未 fully 且本轮 fully，则更新为 fully，并锁定，不再被后续轮次覆盖
			
 
				+        # - 若尚未 fully 且本轮仍为部分推导，可用更高分数更新
			
 
				         derived_names_so_far |= matched_post_points
			
 
				         for name in matched_post_points:
			
 
				             val = this_round_scores.get(name)
			
@@ -184,8 +188,18 @@ def build_derivation_result(
 
				             score, is_fully = val
			
 
				             if name not in best_score_by_name:
			
 
				                 best_score_by_name[name] = (score, is_fully)
			
 
				-            elif is_fully:
			
 
				-                best_score_by_name[name] = (score, is_fully)
			
 
				+            else:
			
 
				+                prev_score, prev_fully = best_score_by_name[name]
			
 
				+                # 已经 fully 的节点，后续轮次不再更新 matched_score
			
 
				+                if prev_fully:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    if is_fully:
			
 
				+                        best_score_by_name[name] = (score, True)
			
 
				+                    else:
			
 
				+                        # 都是部分推导时，可以用更高分覆盖
			
 
				+                        if score > prev_score:
			
 
				+                            best_score_by_name[name] = (score, False)
			
 
				             if is_fully:
			
 
				                 fully_derived_names_so_far.add(name)
			
 
				 
			
@@ -320,9 +334,6 @@ def build_visualize_edges(
 
				                 if k not in match_by_round_output:
			
 
				                     match_by_round_output[k] = val
			
 
				 
			
 
				-    # 按 (round_num, mp) 收集节点候选，同轮同节点保留 matched_score 最高的一条
			
 
				-    node_candidates: dict[tuple[int, str], dict] = {}  # (round_num, mp) -> node_dict (含 score, is_fully_derived)
			
 
				-
			
 
				     def get_match(round_num: int, path_id: int | None, item_id: int | None, out_item: str) -> tuple[str, str, float, bool] | None:
			
 
				         if path_id is not None and item_id is not None:
			
 
				             v = match_by_path_item.get((round_num, path_id, item_id))
			
@@ -330,13 +341,41 @@ def build_visualize_edges(
 
				                 return v
			
 
				         return match_by_round_output.get((round_num, out_item))
			
 
				 
			
 
				+    # 第一遍：按 (round_num, mp) 聚合节点最佳信息（不考虑边是否最终保留）
			
 
				+    # (round_num, mp) -> (score, is_fully_derived, derivation_output_point, method)
			
 
				+    best_node_info_by_round_mp: dict[tuple[int, str], tuple[float, bool, str, str]] = {}
			
 
				+    for round_idx, derivation in enumerate(derivations):
			
 
				+        round_num = derivation.get("round", round_idx + 1)
			
 
				+        for dr in derivation.get("derivation_results") or []:
			
 
				+            output_list = dr.get("output") or []
			
 
				+            path_id = dr.get("id")
			
 
				+            for i, out_item in enumerate(output_list):
			
 
				+                item_id = i + 1
			
 
				+                v = get_match(round_num, path_id, item_id, out_item)
			
 
				+                if not v:
			
 
				+                    continue
			
 
				+                mp, _reason, score, is_fully = v
			
 
				+                key = (round_num, mp)
			
 
				+                prev = best_node_info_by_round_mp.get(key)
			
 
				+                if prev is None or score > prev[0]:
			
 
				+                    best_node_info_by_round_mp[key] = (score, bool(is_fully), out_item, dr.get("method", ""))
			
 
				+
			
 
				     edge_list = []
			
 
				     round_output_seen: set[tuple[int, str]] = set()  # (round_num, node_name) 本轮已作为某边的 output
			
 
				     best_score_by_node: dict[str, float] = {}  # node_name -> 已出现过的最高 matched_score
			
 
				     fully_derived_nodes: set[str] = set()
			
 
				+    current_round: int | None = None
			
 
				 
			
 
				     for round_idx, derivation in enumerate(derivations):
			
 
				         round_num = derivation.get("round", round_idx + 1)
			
 
				+        if current_round is None:
			
 
				+            current_round = round_num
			
 
				+        elif round_num != current_round:
			
 
				+            # 一轮结束后，将本轮 is_fully_derived=true 的节点加入 fully_derived_nodes，用于后续轮次过滤
			
 
				+            for (rn, name), (score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
			
 
				+                if rn == current_round and is_fully:
			
 
				+                    fully_derived_nodes.add(name)
			
 
				+            current_round = round_num
			
 
				         for dr in derivation.get("derivation_results") or []:
			
 
				             output_list = dr.get("output") or []
			
 
				             path_id = dr.get("id")
			
@@ -352,7 +391,8 @@ def build_visualize_edges(
 
				             if not matched:
			
 
				                 continue
			
 
				 
			
 
				-            # 同一轮内 output 节点不重复；若前面轮次该节点匹配分更高则本轮不保留
			
 
				+            # 同一轮内 output 节点不重复；若前面轮次该节点匹配分更高则本轮不保留；
			
 
				+            # 并且只保留与 node_list 中该轮该节点的最高分记录一致的边
			
 
				             output_names_this_edge = []
			
 
				             for mp, reason, score, is_fully, out_item in matched:
			
 
				                 if (round_num, mp) in round_output_seen:
			
@@ -361,6 +401,9 @@ def build_visualize_edges(
 
				                     continue
			
 
				                 if score <= best_score_by_node.get(mp, -1.0):
			
 
				                     continue
			
 
				+                best_info = best_node_info_by_round_mp.get((round_num, mp))
			
 
				+                if not best_info or score < best_info[0]:
			
 
				+                    continue
			
 
				                 output_names_this_edge.append((mp, reason, score, is_fully, out_item))
			
 
				 
			
 
				             if not output_names_this_edge:
			
@@ -370,20 +413,6 @@ def build_visualize_edges(
 
				                 round_output_seen.add((round_num, mp))
			
 
				                 best_score_by_node[mp] = max(best_score_by_node.get(mp, -1.0), score)
			
 
				 
			
 
				-            # 节点候选：同轮同节点保留匹配分更高的
			
 
				-            for mp, _reason, score, is_fully, out_item in output_names_this_edge:
			
 
				-                key = (round_num, mp)
			
 
				-                if key not in node_candidates or node_candidates[key].get("matched_score", 0) < score:
			
 
				-                    node = dict(topic_by_name.get(mp, {"name": mp, "point": "", "dimension": "", "root_source": "", "root_sources_desc": ""}))
			
 
				-                    node["level"] = round_num
			
 
				-                    node.setdefault("original_word", node.get("name", mp))
			
 
				-                    node["derivation_type"] = dr.get("method", "")
			
 
				-                    node["matched_score"] = score
			
 
				-                    node["is_fully_derived"] = is_fully
			
 
				-                    # 对应评估中的 derivation_output_point
			
 
				-                    node["derivation_output_point"] = out_item
			
 
				-                    node_candidates[key] = node
			
 
				-
			
 
				             input_data = dr.get("input") or {}
			
 
				             derived_nodes = input_data.get("derived_nodes") or []
			
 
				             tree_nodes = input_data.get("tree_nodes") or []
			
@@ -425,11 +454,38 @@ def build_visualize_edges(
 
				                 "detail": detail,
			
 
				             })
			
 
				 
			
 
				-        for (rn, name), nd in node_candidates.items():
			
 
				-            if rn == round_num and nd.get("is_fully_derived"):
			
 
				+    # 处理最后一轮的 fully_derived_nodes
			
 
				+    if current_round is not None:
			
 
				+        for (rn, name), (score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
			
 
				+            if rn == current_round and is_fully:
			
 
				                 fully_derived_nodes.add(name)
			
 
				 
			
 
				-    node_list = list(node_candidates.values())
			
 
				+    # 根据按 (round, mp) 聚合后的最佳信息生成 node_list
			
 
				+    # 规则：若某节点在某轮已经 is_fully_derived=True，则之后轮次即便分数更高也不再保留该节点
			
 
				+    first_full_round_by_name: dict[str, int] = {}
			
 
				+    for (round_num, mp), (_score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
			
 
				+        if not is_fully:
			
 
				+            continue
			
 
				+        prev = first_full_round_by_name.get(mp)
			
 
				+        if prev is None or round_num < prev:
			
 
				+            first_full_round_by_name[mp] = round_num
			
 
				+
			
 
				+    node_list: list[dict] = []
			
 
				+    for (round_num, mp), (score, is_fully, out_item, method) in best_node_info_by_round_mp.items():
			
 
				+        full_round = first_full_round_by_name.get(mp)
			
 
				+        # 若存在更早的 fully 轮次，且当前轮次在其之后，则不再保留
			
 
				+        if full_round is not None and round_num > full_round:
			
 
				+            continue
			
 
				+        base = dict(topic_by_name.get(mp, {"name": mp, "point": "", "dimension": "", "root_source": "", "root_sources_desc": ""}))
			
 
				+        base["level"] = round_num
			
 
				+        base.setdefault("original_word", base.get("name", mp))
			
 
				+        base["derivation_type"] = method
			
 
				+        base["matched_score"] = score
			
 
				+        base["is_fully_derived"] = is_fully
			
 
				+        base["derivation_output_point"] = out_item
			
 
				+        node_list.append(base)
			
 
				+
			
 
				+    node_list.sort(key=lambda n: (n.get("level", 0), str(n.get("name", ""))))
			
 
				     return node_list, edge_list
			
 
				 
			
 
				 
			
@@ -502,9 +558,9 @@ if __name__ == "__main__":
 
				     account_name="家有大志"
			
 
				 
			
 
				     items = [
			
 
				-        {"post_id":"68fb6a5c000000000302e5de","log_id":"20260317214307"},
			
 
				-        {"post_id":"69185d49000000000d00f94e","log_id":"20260317214841"},
			
 
				-        {"post_id":"6921937a000000001b0278d1","log_id":"20260317215616"}
			
 
				+        {"post_id":"68fb6a5c000000000302e5de","log_id":"20260318220540"},
			
 
				+        {"post_id":"69185d49000000000d00f94e","log_id":"20260318221136"},
			
 
				+        {"post_id":"6921937a000000001b0278d1","log_id":"20260318221538"}
			
 
				     ]
			
 
				     for item in items:
			
 
				         post_id = item["post_id"]
			
--- a/examples_how/overall_derivation/overall_derivation_agent_run.py
+++ b/examples_how/overall_derivation/overall_derivation_agent_run.py
@@ -525,7 +525,7 @@ async def main(account_name, post_id):
 
				         # 以脚本所在目录为基准，兼容从项目根或脚本目录启动
			
 
				         script_dir = Path(__file__).resolve().parent
			
 
				         output_dir = script_dir / "output"
			
 
				-        output_file = output_dir / account_name / "推导日志" / current_trace_id / log_id / "result.txt"
			
 
				+        output_file = output_dir / account_name / "推导日志" / post_id / log_id / "result.txt"
			
 
				         output_file.parent.mkdir(parents=True, exist_ok=True)
			
 
				         with open(output_file, 'w', encoding='utf-8') as f:
			
 
				             f.write(final_response)
			
@@ -550,4 +550,4 @@ async def main(account_name, post_id):
 
				 if __name__ == "__main__":
			
 
				     # anthropic/claude-sonnet-4.6
			
 
				     # google/gemini-3-flash-preview
			
 
				-    asyncio.run(main(account_name="家有大志", post_id="68fb6a5c000000000302e5de"))
			
 
				+    asyncio.run(main(account_name="家有大志", post_id="6921937a000000001b0278d1"))
			
--- a/examples_how/overall_derivation/tools/pattern_dimension_analyze.py
+++ b/examples_how/overall_derivation/tools/pattern_dimension_analyze.py
@@ -335,6 +335,14 @@ class TreeIndex:
 
				                 self.node_info.setdefault(child, {})
			
 
				                 if self.node_info[child].get("depth") is None:
			
 
				                     self.node_info[child]["depth"] = cur_depth + 1
			
 
				+                    # BFS 首次到达该节点时（即最短路径），同步修正 parent 指针，
			
 
				+                    # 确保 parent 与 depth 始终保持一致。
			
 
				+                    # 若同名节点在树中多处出现，walk() 会用最后一次遍历的父节点
			
 
				+                    # 覆盖 parent，导致 parent 指向更深处的节点，
			
 
				+                    # 进而使 find_ancestor_at_level 沿 parent 链爬升时出现深度
			
 
				+                    # 倒退（越走越深）甚至返回错误祖先/None 的问题。
			
 
				+                    # 在 BFS 阶段统一修正，可保证 parent 链单调递减至根节点。
			
 
				+                    self.node_info[child]["parent"] = cur
			
 
				                     q.append(child)
			
 
				 
			
 
				     def find_ancestor_at_level(self, node_name: str, level: int) -> Optional[str]:
			
@@ -343,6 +351,16 @@ class TreeIndex:
 
				         - 若 node_name 自身 depth == level，直接返回自身。
			
 
				         - 若 node_name depth < level（比目标层浅），返回自身。
			
 
				         - 否则沿 parent 链向上查找，返回第一个 depth == level 的祖先节点。
			
 
				+
			
 
				+        说明：
			
 
				+        早期实现中为了防止意外环路使用了 visited 集合，一旦检测到「重复节点」就直接
			
 
				+        返回 None，导致在树中存在同名节点、且 parent 指针被覆盖的情况下，会错误返回
			
 
				+        None。这里改为**只沿 parent 链向上行走**，不再依赖 visited 截断：
			
 
				+        - 每一步仅查看当前节点的 depth 与 parent；
			
 
				+        - 一旦到达 depth <= level，直接返回当前节点；
			
 
				+        - 若 parent 为空，则返回当前已到达的最高节点。
			
 
				+        在正常树结构下（parent 指针无环），该过程必然在有限步内结束；若底层数据意外
			
 
				+        形成环，需在构建 node_info 时修复，祖先查找本身不再额外承担防御职责。
			
 
				         """
			
 
				         info = self.node_info.get(node_name)
			
 
				         if not info:
			
@@ -352,21 +370,20 @@ class TreeIndex:
 
				             return None
			
 
				         if depth <= level:
			
 
				             return node_name
			
 
				+        # 只沿 parent 链向上查找，不再依赖 visited 截断；
			
 
				+        # 一旦到达 depth <= level 或 parent 为空即返回当前节点。
			
 
				         cur = node_name
			
 
				-        visited: Set[str] = set()
			
 
				-        while cur and cur not in visited:
			
 
				-            visited.add(cur)
			
 
				+        while True:
			
 
				             cur_info = self.node_info.get(cur) or {}
			
 
				-            cur_depth = cur_info.get("depth") or 0
			
 
				-            if cur_depth == level:
			
 
				+            cur_depth = cur_info.get("depth")
			
 
				+            if cur_depth is None:
			
 
				                 return cur
			
 
				-            if cur_depth < level:
			
 
				+            if cur_depth <= level:
			
 
				                 return cur
			
 
				             parent = cur_info.get("parent")
			
 
				             if parent is None:
			
 
				                 return cur
			
 
				             cur = parent
			
 
				-        return None
			
 
				 
			
 
				     # 聚类搜索（不再区分维度）
			
 
				     def find_clusters(
			
@@ -919,13 +936,22 @@ def main(account_name, post_id, log_id) -> None:
 
				     # 控制台打印前 4000 字符，便于快速查看
			
 
				     # print(json.dumps(result, ensure_ascii=False, indent=2)[:4000] + "...")
			
 
				 
			
 
				-    # 写入输出文件：../output/{account_name}/推导日志/{post_id}/{log_id}/pattern_dimension_analyze.json
			
 
				+    # 写入输出文件 1：../output/{account_name}/推导日志/{post_id}/{log_id}/pattern_dimension_analyze.json
			
 
				     out_dir = _round_eval_dir(account_name, post_id, log_id)
			
 
				     out_dir.mkdir(parents=True, exist_ok=True)
			
 
				     output_file_name = f"{post_id}_pattern_dimension_analyze.json"
			
 
				     out_path = out_dir / output_file_name
			
 
				     with open(out_path, "w", encoding="utf-8") as f:
			
 
				         json.dump(result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    # 写入输出文件 2：../output/{account_name}/整体推导维度分析/{post_id}_pattern_dimension_analyze.json
			
 
				+    overall_dir = _BASE_OUTPUT / account_name / "整体推导维度分析"
			
 
				+    overall_dir.mkdir(parents=True, exist_ok=True)
			
 
				+    overall_output_file_name = f"{post_id}_pattern_dimension_analyze.json"
			
 
				+    overall_out_path = overall_dir / overall_output_file_name
			
 
				+    with open(overall_out_path, "w", encoding="utf-8") as f:
			
 
				+        json.dump(result, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				     print(f"\n分析结果已写入: {out_path}")
			
 
				 
			
 
				 
			
@@ -944,7 +970,6 @@ def main_round_pattern_dimension_analyze(
 
				             post_id=post_id,
			
 
				             log_id=log_id,
			
 
				             round=round_num,
			
 
				-            context=None,
			
 
				         )
			
 
				         if result is None:
			
 
				             print(
			
@@ -984,14 +1009,14 @@ if __name__ == "__main__":
 
				     run_full_pattern_analyze = True
			
 
				 
			
 
				     test_account_name = "家有大志"
			
 
				-    test_post_id = "68fb6a5c000000000302e5de"
			
 
				-    test_log_id = "20260317214307"
			
 
				+    test_post_id = "69185d49000000000d00f94e"
			
 
				+    test_log_id = "20260318221136"
			
 
				     test_round = 1
			
 
				 
			
 
				     items = [
			
 
				-        {"post_id": "68fb6a5c000000000302e5de", "log_id": "20260318172724"},
			
 
				-        # {"post_id": "69185d49000000000d00f94e", "log_id": "20260317214841"},
			
 
				-        # {"post_id": "6921937a000000001b0278d1", "log_id": "20260317215616"},
			
 
				+        {"post_id": "68fb6a5c000000000302e5de", "log_id": "20260318220540"},
			
 
				+        {"post_id": "69185d49000000000d00f94e", "log_id": "20260318221136"},
			
 
				+        {"post_id": "6921937a000000001b0278d1", "log_id": "20260318221538"}
			
 
				     ]
			
 
				 
			
 
				     if run_round_pattern_test: