2 месяцев назад · 1942aa829c
--- a/examples_how/overall_derivation/derivation_main.md
+++ b/examples_how/overall_derivation/derivation_main.md
@@ -93,7 +93,6 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
															 搜索子 agent 返回的结果格式包含：
														
 
															 - **搜索摘要**（`result`）：搜索结果概要
														
 
															-- **原始数据**（`raw_result`）：搜索工具返回的原始结果
														
 
															 - **推导候选点**（`candidate_points`）：从搜索结果中筛选出的与账号人设相关的推导选题点列表
														
 
															 - **匹配结果**（`match_result`）：`point_match` 工具对候选点的匹配判断结果，每项包含 `candidate_point`、`is_matched`、`matched_post_point`、`matched_score`
														
@@ -425,7 +424,6 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
															         "name": "agent(derivation_search)",
														
 
															         "query": "（搜索子 agent 返回的query）",
														
 
															         "result": "（搜索子 agent 返回的摘要或关键内容）",
														
 
															-        "raw_result": "（搜索子 agent 返回的原始搜索结果，完整保留或按需截断）",
														
 
															         "candidate_points": ["家居改造利用", "废旧物品利用"],
														
 
															         "match_result": [
														
 
															           {"candidate_point": "家居改造利用", "is_matched": true, "matched_post_point": "家居改造", "matched_score": 0.85},
														
@@ -579,7 +577,6 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
															           "name": "工具名称（如 agent(derivation_search)）",
														
 
															           "query": "若为搜索工具则记录 query 词",
														
 
															           "result": "若为搜索工具则记录搜索返回的数据摘要或关键内容",
														
 
															-          "raw_result": "若为搜索工具则记录搜索工具返回的原始数据（完整保留或按需截断）",
														
 
															           "candidate_points": ["若为搜索工具则记录评估子 agent 筛选出的候选点列表"],
														
 
															           "match_result": [
														
 
															             {
														
@@ -606,7 +603,7 @@ agent(agent_type="derivation_search", task="执行搜索任务，account_name=xx
 
															   - `input.derived_nodes`: 本路径用到的已推导成功选题点名称列表（**只能引用 `derived_success_set` 中完全推导成功的选题点名称**，不能引用 `partial_derived_set` 中部分推导成功的选题点名称）
														
 
															   - `output`: 本路径产出的待评估选题点名称列表（可多个）
														
 
															   - `reason`: 必须详细、可追溯，引用工具返回的具体数据；禁止牵强或凭空联想
														
 
															-  - `tools`: 本路径使用的工具列表；若使用搜索工具，必须包含 `query`、`result`（数据摘要或关键内容）、`raw_result`（原始数据）、`candidate_points`（评估子 agent 筛选的候选点）和 `match_result`（匹配结果）；若未使用工具则为空数组 `[]`
														
 
															+  - `tools`: 本路径使用的工具列表；若使用搜索工具，必须包含 `query`、`result`（数据摘要或关键内容）、`candidate_points`（评估子 agent 筛选的候选点）和 `match_result`（匹配结果）；若未使用工具则为空数组 `[]`
														
 
															 > **原子化要求体现在日志中**：每条推导路径遵循最小输入输出原子化规则——即用最少输入数据推导出哪些必要的选题点；路径中所有输入对产出该路径每个输出点都是必要的；逻辑上可以分开的推导路径不要混在一起。
														
--- a/examples_how/overall_derivation/skills/derivation_search.md
+++ b/examples_how/overall_derivation/skills/derivation_search.md
@@ -1,18 +1,18 @@
 
															 ---
														
 
															 name: derivation_search
														
 
															-description: 选题点推导-信息搜索子 Agent，根据主 agent 传入的参数自主构造搜索 query、调用 search_and_eval 工具完成搜索与评估，整理后返回搜索结果和匹配结果
														
 
															+description: 选题点推导-信息搜索子 Agent，根据主 agent 传入的参数自主构造多个搜索 query、调用 search_and_eval 工具完成搜索与评估，整理后返回搜索结果和匹配结果
														
 
															 ---
														
 
															 # 选题点推导 - 信息搜索子任务
														
 
															 ## 角色
														
 
															-你是选题点推导流程中的**信息搜索执行者**，负责根据主 agent 传入的已推导集合等参数，自主构造搜索 query，然后调用 `search_and_eval` 工具一次性完成帖子搜索、人设匹配评估和选题点匹配，最终将结果整理后返回给主 agent。
														
 
															+你是选题点推导流程中的**信息搜索执行者**，负责根据主 agent 传入的已推导集合等参数，自主构造**多个**搜索 query，然后调用 `search_and_eval` 工具（传入 `query_list`）一次性完成帖子搜索、人设匹配评估和选题点匹配，最终将结果整理后返回给主 agent。
														
 
															 ## 任务描述
														
 
															 主 agent 在采用「信息搜索」推导方法时，会调用你（`agent_type="derivation_search"`），并在 `task` 中给出 `account_name`、`post_id`、已推导集合等参数。你的职责是：
														
 
															-1. **构造搜索 query**：根据传入的已推导集合参数，按照关键词约束规则自主构造搜索 query。
														
 
															-2. **调用 search_and_eval 工具**：传入 `account_name`、`post_id`、构造好的 `query`，由工具完成搜索、人设匹配评估和选题点匹配，一次性返回结果。
														
 
															-3. **整理返回**：从工具返回的帖子列表中提取候选点和匹配结果，整理成结构化文本返回给主 agent。
														
 
															+1. **构造多个搜索 query**：根据传入的已推导集合参数，按照关键词约束规则自主构造**多个**搜索 query，形成 `query_list`。
														
 
															+2. **调用 search_and_eval 工具**：传入 `account_name`、`post_id`、构造好的 `query_list`，由工具并发完成搜索、人设匹配评估和选题点匹配，一次性返回所有 query 的结果。
														
 
															+3. **整理返回**：从工具返回的多 query 结果中提取候选点和匹配结果，整理成结构化文本返回给主 agent。
														
 
															 ## 输入
														
 
															 主 agent 传入的 `task` 中**必须包含**以下参数：
														
@@ -27,7 +27,7 @@ description: 选题点推导-信息搜索子 Agent，根据主 agent 传入的
 
															 执行搜索任务，account_name=xxx
														
 
															 post_id=yyy
														
 
															 derived_success_set=[{"topic":"分享","source_node":"分享"},{"topic":"日常物品","source_node":"日常物品"}]
														
 
															-partial_derived_set=[{"source_node":"趣味道具"}]
														
 
															+partial_derived_set=[{"source_node":"趣味道具"},{"source_node":"创意玩具"}]
														
 
															 ```
														
 
															 ## 操作步骤
														
@@ -35,9 +35,9 @@ partial_derived_set=[{"source_node":"趣味道具"}]
 
															 ### 步骤一：提取参数
														
 
															 从 `task` 中提取 `account_name`、`post_id`、`derived_success_set`、`partial_derived_set`。
														
 
															-### 步骤二：构造搜索 query
														
 
															+### 步骤二：构造多个搜索 query
														
 
															-根据传入的已推导集合参数，自主构造本次搜索的 query。**必须严格遵守以下关键词约束规则**：
														
 
															+根据传入的已推导集合参数，自主构造**多个** query 组成 `query_list`。**必须严格遵守以下关键词约束规则**：
														
 
															 #### 关键词来源（仅限以下两类）
														
 
															 1. **完全推导成功选题点的 `topic` 名称**：即 `derived_success_set` 中每项的 `topic` 字段（对应帖子选题点名称 `matched_post_point`）
														
@@ -48,20 +48,33 @@ partial_derived_set=[{"source_node":"趣味道具"}]
 
															 - **禁止**使用大模型自行推测或联想出的关键词
														
 
															 - **禁止**使用账号名称（`account_name`）作为搜索关键词
														
 
															-#### query 构造策略
														
 
															-- 从上述合法来源中选取 2~4 个语义相关的关键词组合为 query
														
 
															-- 优先选取尚未被充分探索的方向——例如 `source_node` 中尚未与其他 `topic` 组合搜索过的节点
														
 
															-- 关键词之间用空格分隔
														
 
															+#### query 构造策略（核心规则）
														
 
															+
														
 
															+**以 `partial_derived_set` 中的 `source_node` 为核心构建每个 query**：
														
 
															+- 每个 query 应以一个 `partial_derived_set` 中的 `source_node` 作为**主关键词**
														
 
															+- 然后从 `derived_success_set` 中选取 1~2 个 `topic` 字段作为**辅助关键词**进行组合
														
 
															+- 这样构建的 query 才更有机会搜索出帖子中其他未推导出的选题点
														
 
															+
														
 
															+**query 数量**：
														
 
															+- 为 `partial_derived_set` 中的**每个 `source_node`** 至少构建 1 个 query
														
 
															+- 同一个 `source_node` 可搭配不同的 `topic` 组合构建多个 query，以覆盖更多搜索方向
														
 
															+- 总 query 数量建议在 1~5 个之间（根据 `partial_derived_set` 大小灵活调整）
														
 
															+
														
 
															+**单个 query 格式**：
														
 
															+- 每个 query 由 2~4 个关键词组成，用空格分隔
														
 
															 - query 应有明确的搜索意图，避免过于宽泛或过于具体
														
 
															 #### 构造示例
														
 
															 假设输入：
														
 
															 - `derived_success_set=[{"topic":"分享","source_node":"分享"},{"topic":"日常物品","source_node":"日常物品"}]`
														
 
															-- `partial_derived_set=[{"source_node":"趣味道具"}]`
														
 
															+- `partial_derived_set=[{"source_node":"趣味道具"},{"source_node":"创意玩具"}]`
														
 
															-合法 query 示例：
														
 
															-- `分享 日常物品 趣味道具`（使用 topic + source_node 组合）
														
 
															-- `日常物品 趣味道具`（聚焦未充分探索的方向）
														
 
															+合法 query_list 示例：
														
 
															+- `["趣味道具 分享", "趣味道具 日常物品", "创意玩具 分享", "创意玩具 日常物品"]`
														
 
															+  - 每个 query 以 partial_derived_set 的 source_node 为核心，搭配 derived_success_set 的 topic
														
 
															+
														
 
															+简化版（当 partial_derived_set 较少时）：
														
 
															+- `["趣味道具 分享 日常物品", "创意玩具 分享"]`
														
 
															 非法 query 示例：
														
 
															 - `xxx账号 分享 日常物品`（使用了账号名称）
														
@@ -71,40 +84,44 @@ partial_derived_set=[{"source_node":"趣味道具"}]
 
															 调用工具 **search_and_eval**，传入以下参数：
														
 
															 - `account_name`：账号名称
														
 
															 - `post_id`：帖子 ID
														
 
															-- `query`：步骤二构造的搜索词
														
 
															+- `query_list`：步骤二构造的搜索词列表（JSON 数组格式）
														
 
															-工具内部会自动完成：搜索（优先 xhs，失败或空则降级 zhihu）、人设匹配评估、关键词提取和选题点匹配，无需额外操作。
														
 
															+工具内部会对 `query_list` 中的每个 query 进行搜索、人设匹配评估、关键词提取和选题点匹配，无需额外操作。
														
 
															-工具返回的数据结构为帖子列表，每项包含：
														
 
															+工具返回的数据结构为按 query 分组的结果字典，每个 query 对应一个帖子列表：
														
 
															 ```json
														
 
															 {
														
 
															-  "channel_content_id": "帖子ID",
														
 
															-  "title": "标题",
														
 
															-  "body_text": "正文",
														
 
															-  "images": ["图片URL"],
														
 
															-  "persona_match_result": true,
														
 
															-  "post_keywords": ["关键词1", "关键词2"],
														
 
															-  "point_match_results": [
														
 
															-    {"推导选题点": "关键词1", "帖子选题点": "xxx", "匹配分数": 0.85}
														
 
															-  ]
														
 
															+  "趣味道具 分享": [
														
 
															+    {
														
 
															+      "channel_content_id": "帖子ID",
														
 
															+      "title": "标题",
														
 
															+      "body_text": "正文",
														
 
															+      "images": ["图片URL"],
														
 
															+      "persona_match_result": true,
														
 
															+      "post_keywords": ["关键词1", "关键词2"],
														
 
															+      "point_match_results": [
														
 
															+        {"推导选题点": "关键词1", "帖子选题点": "xxx", "匹配分数": 0.85}
														
 
															+      ]
														
 
															+    }
														
 
															+  ],
														
 
															+  "创意玩具 日常物品": [...]
														
 
															 }
														
 
															 ```
														
 
															 ### 步骤四：整理返回
														
 
															-从工具返回的帖子列表中提取数据，按以下固定格式返回给主 agent：
														
 
															+从工具返回的多 query 结果中提取数据，**汇总所有 query 的结果**，按以下固定格式返回给主 agent：
														
 
															 ```
														
 
															-【query】<本次实际使用的搜索关键词>
														
 
															-
														
 
															-【result】<摘要：概括搜索结果中与账号人设相关的内容、高频关键词、可能的推导方向，约 100~200 字>
														
 
															+【query_list】<本次实际使用的搜索关键词列表，JSON 数组格式>
														
 
															-【raw_result】<search_and_eval 工具返回的帖子列表>
														
 
															+【result】<摘要：概括所有 query 搜索结果中与账号人设相关的内容、高频关键词、可能的推导方向，约 500~500 字>
														
 
															-【candidate_points】<从所有 persona_match_result=true 的帖子中汇总去重后的 post_keywords，格式为 JSON 数组，如：["家居改造利用", "废旧物品利用"]>
														
 
															+【candidate_points】<从所有 query 的所有 persona_match_result=true 的帖子中汇总去重后的 post_keywords，格式为 JSON 数组，如：["家居改造利用", "废旧物品利用"]>
														
 
															-【match_result】<从所有帖子的 point_match_results 中汇总并转换为以下格式的 JSON 数组，每项包含 candidate_point、is_matched、matched_post_point、matched_score：
														
 
															+【match_result】<从所有 query 的所有帖子的 point_match_results 中汇总并转换为以下格式的 JSON 数组，每项包含 candidate_point、is_matched、matched_post_point、matched_score：
														
 
															 - 若某关键词在 point_match_results 中存在匹配记录，则 is_matched=true，填入最高分的 matched_post_point 和 matched_score
														
 
															 - 若某关键词在 candidate_points 中但 point_match_results 无对应记录，则 is_matched=false，matched_post_point 和 matched_score 为 null
														
 
															+- 注意：多个 query 可能返回相同的 candidate_point，需去重后取最高分
														
 
															 示例：
														
 
															 [
														
@@ -114,11 +131,11 @@ partial_derived_set=[{"source_node":"趣味道具"}]
 
															 ```
														
 
															 **异常处理**：
														
 
															-- 若 `search_and_eval` 返回空列表，`result` 填写"未找到相关内容"，`raw_result` 填写空数组，`candidate_points` 为 `[]`，`match_result` 为 `[]`，**不得捏造任何内容**。
														
 
															+- 若 `search_and_eval` 返回所有 query 均为空列表，`result` 填写"未找到相关内容"，`candidate_points` 为 `[]`，`match_result` 为 `[]`，**不得捏造任何内容**。
														
 
															 - 若所有帖子的 `persona_match_result` 均为 `false`（搜索结果与账号人设无关联），`candidate_points` 为 `[]`，`match_result` 为 `[]`。
														
 
															 ## 约束
														
 
															-- **仅调用一次 search_and_eval**：每次被调用只调用一次 `search_and_eval` 工具，不要多轮搜索或合并历史结果。
														
 
															+- **仅调用一次 search_and_eval**：每次被调用只调用一次 `search_and_eval` 工具（传入 `query_list`），工具内部并发处理多个 query，不要多轮搜索或合并历史结果。
														
 
															 - **闭眼搜索**：query 中的关键词**只能来自** `derived_success_set` 的 `topic`/`source_node` 以及 `partial_derived_set` 的 `source_node`，不得自行编造或联想新关键词，不得使用账号名称。
														
 
															 - **不替主 agent 做推导**：你只负责构造 query、调用工具、整理返回结果。不判断"能推导出哪些选题点"或"该选题点是否应加入推导集合"；由主 agent 根据你的返回整理推导路径。
														
 
															 - **不直接调用 search_posts 或 point_match**：搜索、评估、匹配均由 `search_and_eval` 工具内部完成，你不得单独调用这些工具。
														
--- a/examples_how/overall_derivation/tools/find_pattern.py
+++ b/examples_how/overall_derivation/tools/find_pattern.py
@@ -191,7 +191,7 @@ def get_patterns_by_conditional_ratio(
 
															     description="按条件概率从 pattern 库中筛选 pattern，优先返回包含已推导选题点的 pattern，并检查每个 pattern 的元素是否与帖子选题点匹配。"
														
 
															     "功能：根据账号与已推导选题点（可选），筛选条件概率不低于阈值的 pattern；当 derived_items 非空时，优先返回 pattern 元素中包含已推导选题点的 pattern；同时对每个 pattern 的所有元素做帖子选题点匹配，匹配结果直接包含在返回数据中。"
														
 
															     "参数：account_name 为账号名；post_id 为帖子ID，用于加载帖子选题点并做匹配判断；derived_items 为已推导选题点列表，每项含 topic（或已推导的选题点）与 source_node（或推导来源人设树节点），可为空，为空时条件概率使用 pattern 自身的 support；conditional_ratio_threshold 为条件概率阈值；top_n 为返回条数上限，默认 100。"
														
 
															-    "返回：ToolResult，output 为可读的 pattern 列表文本，metadata.items 为列表，每项含「pattern名称」（nameA+nameB+nameC 形式）、「条件概率」、「帖子选题点匹配」=无/匹配结果（无匹配时为「无」，有匹配时为匹配列表，每项含 pattern元素、帖子选题点与匹配分数）。"
														
 
															+    "返回：ToolResult，output 为可读的 pattern 列表文本"
														
 
															 )
														
 
															 async def find_pattern(
														
 
															     account_name: str,
														
@@ -220,8 +220,6 @@ async def find_pattern(
 
															     ToolResult：
														
 
															         - title: 结果标题。
														
 
															         - output: 可读的 pattern 列表文本（每行：pattern名称、条件概率、帖子匹配情况）。
														
 
															-        - metadata: 含 account_name、conditional_ratio_threshold、top_n、count、items；
														
 
															-          items 为列表，每项为 {"pattern名称": str, "条件概率": float,
														
 
															           "帖子选题点匹配": 无匹配时为 "无"，有匹配时为 list[{"pattern元素", "帖子选题点", "匹配分数"}]}。
														
 
															         - 出错时 error 为错误信息。
														
 
															     """
														
--- a/examples_how/overall_derivation/tools/find_tree_node.py
+++ b/examples_how/overall_derivation/tools/find_tree_node.py
@@ -167,7 +167,7 @@ def _parse_derived_list(derived_items: list[dict[str, str]]) -> list[tuple[str,
 
															     description="获取指定账号人设树中的常量节点（全局常量、局部常量），并检查每个节点与帖子选题点的匹配情况。"
														
 
															     "功能：根据账号名查询该账号人设树中所有常量节点，同时对每个节点判断是否匹配帖子选题点，匹配结果直接包含在返回数据中。"
														
 
															     "参数：account_name 为账号名；post_id 为帖子ID，用于加载帖子选题点并做匹配判断。"
														
 
															-    "返回：ToolResult，output 为可读的节点列表文本，metadata.items 为列表，每项含「节点名称」「概率」「常量类型」「帖子选题点匹配」=无/匹配结果（无匹配时为「无」，有匹配时为匹配列表，每项含帖子选题点与匹配分数）。"
														
 
															+    "返回：ToolResult，output 为可读的节点列表文本"
														
 
															 )
														
 
															 async def find_tree_constant_nodes(
														
 
															     account_name: str,
														
@@ -188,9 +188,6 @@ async def find_tree_constant_nodes(
 
															     ToolResult：
														
 
															         - title: 结果标题。
														
 
															         - output: 可读的节点列表文本（每行：节点名称、概率、常量类型、帖子匹配情况）。
														
 
															-        - metadata: 含 account_name、count、items；items 为列表，每项为
														
 
															-          {"节点名称": str, "概率": 数值或 None, "常量类型": "全局常量"|"局部常量",
														
 
															-           "帖子选题点匹配": 无匹配时为 "无"，有匹配时为 list[{"帖子选题点": str, "匹配分数": float}]}。
														
 
															         - 出错时 error 为错误信息。
														
 
															     """
														
 
															     tree_dir = _tree_dir(account_name)
														
@@ -244,7 +241,7 @@ async def find_tree_constant_nodes(
 
															     description="按条件概率从人设树中筛选节点，返回达到阈值且按条件概率排序的前 topN 条，并检查每个节点与帖子选题点的匹配情况。"
														
 
															     "功能：根据账号与已推导选题点（可选），筛选人设树中条件概率不低于阈值的节点，同时对每个节点判断是否匹配帖子选题点，匹配结果直接包含在返回数据中。"
														
 
															     "参数：account_name 为账号名；post_id 为帖子ID，用于加载帖子选题点并做匹配判断；derived_items 为已推导选题点列表，每项含 topic（或已推导的选题点）与 source_node（或推导来源人设树节点），可为空，为空时条件概率使用节点自身的 _ratio；conditional_ratio_threshold 为条件概率阈值；top_n 为返回条数上限，默认 100。"
														
 
															-    "返回：ToolResult，output 为可读的节点列表文本，metadata.items 为列表，每项含「节点名称」「条件概率」「父节点名称」「帖子选题点匹配」=无/匹配结果（无匹配时为「无」，有匹配时为匹配列表，每项含帖子选题点与匹配分数）。"
														
 
															+    "返回：ToolResult，output 为可读的节点列表文本"
														
 
															 )
														
 
															 async def find_tree_nodes_by_conditional_ratio(
														
 
															     account_name: str,
														
@@ -271,9 +268,6 @@ async def find_tree_nodes_by_conditional_ratio(
 
															     ToolResult：
														
 
															         - title: 结果标题。
														
 
															         - output: 可读的节点列表文本（每行：节点名称、条件概率、父节点名称、帖子匹配情况）。
														
 
															-        - metadata: 含 account_name、threshold、top_n、count、items；
														
 
															-          items 为列表，每项为 {"节点名称": str, "条件概率": float, "父节点名称": str,
														
 
															-          "帖子选题点匹配": 无匹配时为 "无"，有匹配时为 list[{"帖子选题点": str, "匹配分数": float}]}。
														
 
															         - 出错时 error 为错误信息。
														
 
															     """
														
 
															     tree_dir = _tree_dir(account_name)
														
--- a/examples_how/overall_derivation/tools/point_match.py
+++ b/examples_how/overall_derivation/tools/point_match.py
@@ -153,7 +153,7 @@ async def match_derivation_to_post_points(
 
															     description="判断推导选题点（人设树节点）与帖子选题点是否匹配。"
														
 
															     "功能：根据账号与帖子ID，将传入的推导选题点列表与帖子选题点做匹配，返回达到阈值的匹配对。"
														
 
															     "参数：derivation_output_points 为推导选题点字符串列表；account_name 为账号名；post_id 为帖子ID；match_threshold 为匹配分数阈值，默认 0.8。"
														
 
															-    "返回：ToolResult，output 为可读匹配结果文本，metadata.items 为匹配列表，每项含「推导选题点」「帖子选题点」「匹配分数」。"
														
 
															+    "返回：ToolResult，output 为可读匹配结果文本"
														
 
															 )
														
 
															 async def point_match(
														
 
															     derivation_output_points: List[str],
														
@@ -178,8 +178,6 @@ async def point_match(
 
															     ToolResult：
														
 
															         - title: 结果标题。
														
 
															         - output: 可读的匹配结果文本（每行：推导选题点、帖子选题点、匹配分数）。
														
 
															-        - metadata: 含 account_name、post_id、match_threshold、count、items；
														
 
															-          items 为列表，每项为 {"推导选题点": str, "帖子选题点": str, "匹配分数": float}。
														
 
															         - 出错时 error 为错误信息。
														
 
															     """
														
 
															     topic_path = _post_topic_file(account_name, post_id)
														
@@ -216,7 +214,6 @@ async def point_match(
 
															                 "post_id": post_id,
														
 
															                 "match_threshold": match_threshold,
														
 
															                 "count": len(matched),
														
 
															-                "items": matched,
														
 
															             },
														
 
															         )
														
 
															     except Exception as e:
														
--- a/examples_how/overall_derivation/tools/search_and_eval.py
+++ b/examples_how/overall_derivation/tools/search_and_eval.py
@@ -2,19 +2,22 @@
 
															 搜索评估工具：搜索帖子并评估是否与账号人设匹配，提取关键词并匹配选题点。
														
 
															 处理流程：
														
 
															-1. 使用 xhs（失败或空则用 zhihu）搜索帖子
														
 
															-2. 并发对每篇帖子调用 LLM 判断人设匹配 & 提取关键词
														
 
															-3. 对匹配人设的帖子，调用 match_derivation_to_post_points 匹配选题点
														
 
															-4. 返回完整评估结果列表
														
 
															+1. 接收 query_list（多个搜索 query），并发处理
														
 
															+2. 每个 query：使用 xhs（失败或空则用 zhihu）搜索帖子
														
 
															+3. 并发对每篇帖子调用 LLM 判断人设匹配 & 提取关键词
														
 
															+4. 对匹配人设的帖子，调用 match_derivation_to_post_points 匹配选题点
														
 
															+5. 返回按 query 分组的评估结果字典
														
 
															+6. 支持本地文件缓存（.cache/search/{account_name}/{post_id}/）
														
 
															 """
														
 
															 import asyncio
														
 
															+import hashlib
														
 
															 import json
														
 
															 import logging
														
 
															 import re
														
 
															 import sys
														
 
															 from pathlib import Path
														
 
															-from typing import Any, Dict, List, Optional
														
 
															+from typing import Dict, List, Optional
														
 
															 logger = logging.getLogger(__name__)
														
@@ -39,6 +42,7 @@ except ImportError:
 
															 _BASE_INPUT = Path(__file__).resolve().parent.parent / "input"
														
 
															 _TOOLS_DIR = Path(__file__).resolve().parent
														
 
															+_CACHE_ROOT = Path(__file__).resolve().parent.parent / ".cache" / "search"
														
 
															 BASE_URL = "http://aigc-channel.aiddit.com/aigc/channel"
														
 
															 DEFAULT_TIMEOUT = 60.0
														
@@ -224,89 +228,155 @@ async def _eval_single_post(
 
															     return result
														
 
															+def _cache_key(query: str) -> str:
														
 
															+    """将 query 转为安全的文件名：使用 MD5 哈希避免特殊字符问题"""
														
 
															+    h = hashlib.md5(query.encode("utf-8")).hexdigest()[:12]
														
 
															+    safe = re.sub(r'[^\w\u4e00-\u9fff]+', '_', query)[:60].strip('_')
														
 
															+    return f"{safe}_{h}"
														
 
															+
														
 
															+
														
 
															+def _get_cache_path(account_name: str, post_id: str, query: str) -> Path:
														
 
															+    return _CACHE_ROOT / account_name / post_id / f"{_cache_key(query)}.json"
														
 
															+
														
 
															+
														
 
															+def _read_cache(account_name: str, post_id: str, query: str) -> Optional[List[dict]]:
														
 
															+    """读取缓存，存在且合法则返回帖子列表，否则返回 None"""
														
 
															+    path = _get_cache_path(account_name, post_id, query)
														
 
															+    if not path.is_file():
														
 
															+        return None
														
 
															+    try:
														
 
															+        with open(path, "r", encoding="utf-8") as f:
														
 
															+            data = json.load(f)
														
 
															+        if isinstance(data, list):
														
 
															+            logger.info("_read_cache: hit cache for query=%s, %d items", query, len(data))
														
 
															+            return data
														
 
															+    except Exception as e:
														
 
															+        logger.warning("_read_cache: failed to read cache for query=%s: %s", query, e)
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															+def _write_cache(account_name: str, post_id: str, query: str, results: List[dict]) -> None:
														
 
															+    """写入缓存"""
														
 
															+    path = _get_cache_path(account_name, post_id, query)
														
 
															+    try:
														
 
															+        path.parent.mkdir(parents=True, exist_ok=True)
														
 
															+        with open(path, "w", encoding="utf-8") as f:
														
 
															+            json.dump(results, f, ensure_ascii=False, indent=2)
														
 
															+        logger.info("_write_cache: wrote cache for query=%s, %d items", query, len(results))
														
 
															+    except Exception as e:
														
 
															+        logger.warning("_write_cache: failed to write cache for query=%s: %s", query, e)
														
 
															+
														
 
															+
														
 
															+async def _search_and_eval_single_query(
														
 
															+    query: str,
														
 
															+    system_prompt: str,
														
 
															+    account_name: str,
														
 
															+    post_id: str,
														
 
															+) -> List[dict]:
														
 
															+    """处理单个 query 的搜索、评估、匹配流程，支持缓存"""
														
 
															+    cached = _read_cache(account_name, post_id, query)
														
 
															+    if cached is not None:
														
 
															+        return cached
														
 
															+
														
 
															+    posts = await _search_posts(query)
														
 
															+    if not posts:
														
 
															+        logger.warning("_search_and_eval_single_query: no posts for query=%s", query)
														
 
															+        _write_cache(account_name, post_id, query, [])
														
 
															+        return []
														
 
															+
														
 
															+    logger.info("_search_and_eval_single_query: got %d posts for query=%s", len(posts), query)
														
 
															+    tasks = [
														
 
															+        _eval_single_post(post, system_prompt, account_name, post_id)
														
 
															+        for post in posts
														
 
															+    ]
														
 
															+    results: List[dict] = await asyncio.gather(*tasks)
														
 
															+
														
 
															+    _write_cache(account_name, post_id, query, results)
														
 
															+    return results
														
 
															+
														
 
															+
														
 
															 @tool(
														
 
															     description=(
														
 
															         "搜索帖子并评估是否与账号人设匹配，提取帖子关键词并与帖子选题点进行匹配。"
														
 
															-        "参数：account_name 账号名称；post_id 帖子ID；query 搜索词。"
														
 
															+        "参数：account_name 账号名称；post_id 帖子ID；query_list 搜索词列表。"
														
 
															     )
														
 
															 )
														
 
															 async def search_and_eval(
														
 
															     account_name: str,
														
 
															     post_id: str,
														
 
															-    query: str,
														
 
															+    query_list: List[str],
														
 
															     context: Optional[ToolContext] = None,
														
 
															 ) -> ToolResult:
														
 
															     """
														
 
															     搜索帖子并评估是否与账号人设匹配，提取关键词并匹配选题点。
														
 
															+    支持多个 query 并发处理，结果按 query 分组返回。
														
 
															+    本地文件缓存：.cache/search/{account_name}/{post_id}/ 下每个 query 一个 JSON 文件。
														
 
															     Args:
														
 
															         account_name: 账号名称，用于读取人设数据和选题点文件
														
 
															         post_id: 帖子ID，用于定位选题点匹配文件
														
 
															-        query: 搜索词
														
 
															+        query_list: 搜索词列表，每个元素为一个 query 字符串
														
 
															     Returns:
														
 
															-        ToolResult，output 为 JSON 格式的帖子评估结果列表，每项包含：
														
 
															-        - channel_content_id: 帖子ID
														
 
															-        - title: 标题
														
 
															-        - body_text: 正文
														
 
															-        - images: 图集URL列表
														
 
															+        ToolResult，output 为 JSON 格式的按 query 分组的结果字典：
														
 
															+        {
														
 
															+          "query1": [帖子评估结果列表],
														
 
															+          "query2": [帖子评估结果列表],
														
 
															+          ...
														
 
															+        }
														
 
															+        每个帖子评估结果包含：
														
 
															+        - channel_content_id, title, body_text, images
														
 
															         - persona_match_result: 是否与账号人设匹配（bool）
														
 
															         - post_keywords: 提取的帖子关键词列表
														
 
															-        - point_match_results: 关键词与帖子选题点的匹配结果列表，
														
 
															-          每项含「推导选题点」「帖子选题点」「匹配分数」
														
 
															+        - point_match_results: 关键词与帖子选题点的匹配结果列表
														
 
															     """
														
 
															     logger.info(
														
 
															-        "search_and_eval: account_name=%s post_id=%s query=%s",
														
 
															+        "search_and_eval: account_name=%s post_id=%s query_list=%s",
														
 
															         account_name,
														
 
															         post_id,
														
 
															-        query,
														
 
															+        query_list,
														
 
															     )
														
 
															-    try:
														
 
															-        # 1. 搜索帖子
														
 
															-        posts = await _search_posts(query)
														
 
															-        if not posts:
														
 
															-            logger.warning("search_and_eval: no posts found for query=%s", query)
														
 
															-            return ToolResult(
														
 
															-                title=f"搜索评估: {query}",
														
 
															-                output="[]",
														
 
															-                long_term_memory=f"search_and_eval: query='{query}', no posts found",
														
 
															-            )
														
 
															-        logger.info("search_and_eval: got %d posts, loading prompt and persona", len(posts))
														
 
															-        # 2. 构建 system prompt（替换账号人设）
														
 
															+    if not query_list:
														
 
															+        return ToolResult(
														
 
															+            title="搜索评估: 空 query_list",
														
 
															+            output="{}",
														
 
															+        )
														
 
															+
														
 
															+    try:
														
 
															         prompt_template = _load_match_and_extract_prompt()
														
 
															         persona_text = _load_persona_text(account_name)
														
 
															         system_prompt = prompt_template.replace("{persona}", persona_text)
														
 
															-        # 3. 并发评估所有帖子
														
 
															         tasks = [
														
 
															-            _eval_single_post(post, system_prompt, account_name, post_id)
														
 
															-            for post in posts
														
 
															+            _search_and_eval_single_query(q, system_prompt, account_name, post_id)
														
 
															+            for q in query_list
														
 
															         ]
														
 
															-        results: List[dict] = await asyncio.gather(*tasks)
														
 
															+        all_results: List[List[dict]] = await asyncio.gather(*tasks)
														
 
															+
														
 
															+        grouped: Dict[str, List[dict]] = {}
														
 
															+        total_posts = 0
														
 
															+        total_matched = 0
														
 
															+        for query, results in zip(query_list, all_results):
														
 
															+            grouped[query] = results
														
 
															+            total_posts += len(results)
														
 
															+            total_matched += sum(1 for r in results if r.get("persona_match_result"))
														
 
															-        matched_count = sum(1 for r in results if r.get("persona_match_result"))
														
 
															-        error_count = sum(1 for r in results if r.get("error"))
														
 
															         logger.info(
														
 
															-            "search_and_eval: done. total=%d persona_matched=%d errors=%d",
														
 
															-            len(results),
														
 
															-            matched_count,
														
 
															-            error_count,
														
 
															+            "search_and_eval: done. queries=%d total_posts=%d persona_matched=%d",
														
 
															+            len(query_list),
														
 
															+            total_posts,
														
 
															+            total_matched,
														
 
															         )
														
 
															-        output = json.dumps(results, ensure_ascii=False, indent=2)
														
 
															-        logger.info("search_and_eval: output=%s", output)
														
 
															+        output = json.dumps(grouped, ensure_ascii=False, indent=2)
														
 
															         return ToolResult(
														
 
															             title=(
														
 
															-                f"搜索评估: {query} "
														
 
															-                f"（共 {len(results)} 条，{matched_count} 条匹配人设）"
														
 
															+                f"搜索评估: {len(query_list)} 个 query "
														
 
															+                f"（共 {total_posts} 条帖子，{total_matched} 条匹配人设）"
														
 
															             ),
														
 
															             output=output,
														
 
															-            long_term_memory=(
														
 
															-                f"search_and_eval: query='{query}', "
														
 
															-                f"found {len(results)} posts, {matched_count} matched persona"
														
 
															-            ),
														
 
															-            metadata={"items": results},
														
 
															+            metadata={"search_and_eval summary": f"{len(query_list)} queries, found {total_posts} posts, {total_matched} matched persona"},
														
 
															         )
														
 
															     except Exception as e:
														
@@ -329,7 +399,7 @@ def main() -> None:
 
															     )
														
 
															     account_name = "家有大志"
														
 
															     post_id = "68fb6a5c000000000302e5de"
														
 
															-    query = "柴犬 鞋子"
														
 
															+    query_list = ["柴犬 鞋子", "柴犬 日常"]
														
 
															     async def run():
														
 
															         if ToolResult is None:
														
@@ -338,19 +408,21 @@ def main() -> None:
 
															         result = await search_and_eval(
														
 
															             account_name=account_name,
														
 
															             post_id=post_id,
														
 
															-            query=query,
														
 
															+            query_list=query_list,
														
 
															         )
														
 
															         if result.error:
														
 
															             print(f"Error: {result.error}")
														
 
															         else:
														
 
															             print(result.title)
														
 
															-            data = json.loads(result.output)
														
 
															-            for item in data:
														
 
															-                print(
														
 
															-                    f"  [{item.get('persona_match_result')}] {item.get('title', '')[:30]}"
														
 
															-                    f" | keywords: {item.get('post_keywords')}"
														
 
															-                    f" | matches: {len(item.get('point_match_results', []))}"
														
 
															-                )
														
 
															+            grouped = json.loads(result.output)
														
 
															+            for query, items in grouped.items():
														
 
															+                print(f"\n  === query: {query} ({len(items)} posts) ===")
														
 
															+                for item in items:
														
 
															+                    print(
														
 
															+                        f"    [{item.get('persona_match_result')}] {item.get('title', '')[:30]}"
														
 
															+                        f" | keywords: {item.get('post_keywords')}"
														
 
															+                        f" | matches: {len(item.get('point_match_results', []))}"
														
 
															+                    )
														
 
															     asyncio.run(run())
														
--- a/examples_how/overall_derivation/utils/conditional_ratio_calc.py
+++ b/examples_how/overall_derivation/utils/conditional_ratio_calc.py
@@ -126,7 +126,7 @@ def calc_node_conditional_ratio(
 
															             num = len(derived_post_ids & set_n)
														
 
															             ratio = min(1.0, num / den)
														
 
															             max_ratio = max(max_ratio, ratio)
														
 
															-    return max_ratio
														
 
															+    return round(max_ratio, 4)
														
 
															 def _pattern_nodes_and_post_count(pattern: dict[str, Any]) -> tuple[list[str], int, float]:
														
@@ -193,8 +193,8 @@ def calc_pattern_conditional_ratio(
 
															     if common is None or len(common) == 0:
														
 
															         return pattern_s
														
 
															     den = len(common)
														
 
															-    # 分子为 pattern 本身的帖子数（post_count），分母为条件集合大小
														
 
															-    return min(1.0, post_count / den)
														
 
															+    # 分子为 pattern 本身的帖子数（post_count），分母为条件集合大小；条件概率最多保留 4 位小数
														
 
															+    return round(min(1.0, post_count / den), 4)
														
 
															 def _test_with_user_example() -> None: