1 mēnesi atpakaļ · 76853dc862
--- a/agent/llm/openrouter.py
+++ b/agent/llm/openrouter.py
@@ -621,7 +621,6 @@ async def openrouter_llm_call(
 
															         }
														
 
															     """
														
 
															     api_key = os.getenv("OPEN_ROUTER_API_KEY")
														
 
															-    api_key = "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"
														
 
															     if not api_key:
														
 
															         raise ValueError("OPEN_ROUTER_API_KEY environment variable not set")
														
--- a/tests/content_finder.prompt
+++ b/tests/content_finder.prompt
@@ -34,22 +34,26 @@ $system$
 
															 3. **账号沉淀阶段（account_precipitation）**
														
 
															    - 对通过过滤的文章逐条调用 `fetch_weixin_account` 获取账号信息。
														
 
															-   - 账号聚合去重规则：优先 `account_id`，缺失时用 `account_name` 兜底。
														
 
															+   - 账号聚合去重规则：优先 `wx_gh`，缺失时用 `account_name` 兜底。
														
 
															    - 统计账号命中文章数与代表文章（用于内部沉淀与后续复用）。
														
 
															-   - 账号沉淀为流程内步骤，不改变本次最终输出 Schema。
														
 
															+   - 生成账号结果 `accounts` 与文章-账号关系 `article_account_relations`。
														
 
															-4. **输出阶段**
														
 
															+4. **数据记录阶段（本地）**
														
 
															    - 先按 `output_schema` 生成并写入 `output.json`（JSON 格式）。
														
 
															-   - 仅输出通过过滤后的文章结果（按综合排序取前 M 条）。
														
 
															+   - 输出必须包含：文章结果 + 账号结果 + 文章-账号关系。
														
 
															+   - 本阶段仅保存本地文件，不写数据库。
														
 
															 ## 强制要求（违反即为错误）
														
 
															 ### 输出字段必须严格遵循 Schema
														
 
															-- 顶层字段只能有：`trace_id`、`query`、`demand_id`、`summary`、`contents`
														
 
															-- 每条内容字段只能有：`title`、`url`、`statistics`
														
 
															+- 顶层字段只能有：`trace_id`、`query`、`demand_id`、`summary`、`contents`、`accounts`、`article_account_relations`
														
 
															+- `contents` 每条字段只能有：`title`、`url`、`statistics`、`reason`
														
 
															+- `accounts` 每条字段只能有：`wx_gh`、`account_name`、`channel_account_id`、`biz_info`、`article_count`、`sample_articles`、`source_urls`
														
 
															+- `article_account_relations` 每条字段只能有：`article_url`、`wx_gh`
														
 
															 - **禁止自创字段**（如 `results`、`metrics`、`tags`、`platform` 等）
														
 
															 - **禁止使用中文 key**
														
 
															-- `summary` 中需简要说明三阶段执行情况（搜索数量、过滤后数量、沉淀账号数）
														
 
															+- `summary` 必须包含：`candidate_count`、`filtered_in_count`、`account_count`
														
 
															+- `title` 中英文双引号 `"` 必须标准化为中文双引号 `“`、`”`
														
 
															 ## 流程自检
														
@@ -57,12 +61,12 @@ $system$
 
															 ### 输出、校验、入库顺序是否正确
														
 
															 - 无需写数据库，直接写入 `output.json` 即可。
														
 
															-- 输出沉淀的账号
														
 
															+- 必须输出沉淀的账号以及文章-账号关系
														
 
															 - **禁止**：未校验 Schema 就直接入库。
														
 
															 $user$
														
 
															-任务：找10个与「%query%」相关的、老年人感兴趣的文章。
														
 
															+任务：找 20 个与「%query%」相关的、老年人感兴趣的文章。
														
 
															 要求：
														
 
															 - 适合老年人分享观看
														
 
															 - 热度要高，质量要好
														
--- a/tests/run_single.py
+++ b/tests/run_single.py
@@ -1,6 +1,10 @@
 
															+from dotenv import load_dotenv
														
 
															+load_dotenv()
														
 
															+
														
 
															 from typing import Dict, Any, Optional
														
 
															 import os
														
 
															 from pathlib import Path
														
 
															+import json
														
 
															 from tools import fetch_account_article_list, fetch_weixin_account, weixin_search
														
@@ -19,6 +23,56 @@ logger = logging.getLogger(__name__)
 
															 PROJECT_ROOT = Path(__file__).resolve().parent
														
 
															+def _normalize_ascii_double_quotes(text: str) -> str:
														
 
															+    """将字符串中的 ASCII 双引号 `"` 规范化为中文双引号 `“`、`”`。"""
														
 
															+    if '"' not in text:
														
 
															+        return text
														
 
															+
														
 
															+    chars: list[str] = []
														
 
															+    open_quote = True
														
 
															+    for ch in text:
														
 
															+        if ch == '"':
														
 
															+            chars.append("“" if open_quote else "”")
														
 
															+            open_quote = not open_quote
														
 
															+        else:
														
 
															+            chars.append(ch)
														
 
															+    return "".join(chars)
														
 
															+
														
 
															+
														
 
															+def _sanitize_json_strings(value: Any) -> Any:
														
 
															+    if isinstance(value, str):
														
 
															+        return _normalize_ascii_double_quotes(value)
														
 
															+    if isinstance(value, list):
														
 
															+        return [_sanitize_json_strings(v) for v in value]
														
 
															+    if isinstance(value, dict):
														
 
															+        return {k: _sanitize_json_strings(v) for k, v in value.items()}
														
 
															+    return value
														
 
															+
														
 
															+
														
 
															+def _sanitize_output_json(output_json_path: Path) -> None:
														
 
															+    """
														
 
															+    任务完成后对 output.json 做后处理：
														
 
															+    - 递归清洗所有字符串值中的英文双引号 `"`
														
 
															+    - 保持合法 JSON
														
 
															+    """
														
 
															+    if not output_json_path.exists():
														
 
															+        logger.warning(f"未找到 output.json，跳过清洗: {output_json_path}")
														
 
															+        return
														
 
															+
														
 
															+    try:
														
 
															+        data = json.loads(output_json_path.read_text(encoding="utf-8"))
														
 
															+    except Exception as e:
														
 
															+        logger.warning(f"output.json 解析失败，跳过清洗: {e}")
														
 
															+        return
														
 
															+
														
 
															+    cleaned = _sanitize_json_strings(data)
														
 
															+    output_json_path.write_text(
														
 
															+        json.dumps(cleaned, ensure_ascii=False, indent=2),
														
 
															+        encoding="utf-8"
														
 
															+    )
														
 
															+    logger.info(f"已完成 output.json 引号清洗: {output_json_path}")
														
 
															+
														
 
															+
														
 
															 async def run_agent(
														
 
															         query: Optional[str] = None,
														
 
															         demand_id: Optional[int] = None,
														
@@ -47,14 +101,14 @@ async def run_agent(
 
															     prompt = SimplePrompt(prompt_path)
														
 
															     # output 目录
														
 
															-    output_dir = str(PROJECT_ROOT / "tests" / "output")
														
 
															+    output_dir = str(PROJECT_ROOT / "output")
														
 
															     # 构建消息（替换 %query%、%output_dir%、%demand_id%）
														
 
															     demand_id_str = str(demand_id) if demand_id is not None else ""
														
 
															     messages = prompt.build_messages(query=query, output_dir=output_dir, demand_id=demand_id_str)
														
 
															     # 初始化配置
														
 
															-    api_key = "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"
														
 
															+    api_key = os.getenv("OPEN_ROUTER_API_KEY")
														
 
															     if not api_key:
														
 
															         raise ValueError("OPEN_ROUTER_API_KEY 未设置")
														
@@ -74,6 +128,7 @@ async def run_agent(
 
															         "weixin_search",
														
 
															         "fetch_weixin_account",
														
 
															         "fetch_account_article_list",
														
 
															+        "fetch_article_detail",
														
 
															     ]
														
 
															     runner = AgentRunner(
														
@@ -90,14 +145,14 @@ async def run_agent(
 
															         tools=allowed_tools,
														
 
															         extra_llm_params={"max_tokens": 8192},
														
 
															         knowledge=KnowledgeConfig(
														
 
															-            enable_extraction=True,
														
 
															-            enable_completion_extraction=True,
														
 
															-            enable_injection=True,
														
 
															-            owner="content_finder_agent",
														
 
															-            default_tags={"project": "content_finder"},
														
 
															-            default_scopes=["com.piaoquantv.supply"],
														
 
															-            default_search_types=["tool", "usecase", "definition"],
														
 
															-            default_search_owner="content_finder_agent"
														
 
															+            enable_extraction=False,
														
 
															+            enable_completion_extraction=False,
														
 
															+            enable_injection=False,
														
 
															+            # owner="content_finder_agent",
														
 
															+            # default_tags={"project": "content_finder"},
														
 
															+            # default_scopes=["com.piaoquantv.supply"],
														
 
															+            # default_search_types=["tool", "usecase", "definition"],
														
 
															+            # default_search_owner="content_finder_agent"
														
 
															         )
														
 
															     )
														
@@ -110,6 +165,9 @@ async def run_agent(
 
															                 trace_id = item.trace_id
														
 
															                 if item.status == "completed":
														
 
															+                    if trace_id:
														
 
															+                        output_json_path = Path(output_dir) / trace_id / "output.json"
														
 
															+                        _sanitize_output_json(output_json_path)
														
 
															                     logger.info(f"Agent 执行完成: trace_id={trace_id}")
														
 
															                     return {
														
 
															                         "trace_id": trace_id,
														
--- a/tests/skills/account_precipitation.md
+++ b/tests/skills/account_precipitation.md
@@ -14,13 +14,33 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
															 ## 输入与输出
														
 
															 ### 输入
														
 
															-- 文章列表（建议来自 `article_filter_strategy` 输出）
														
 
															-- 每条至少包含 `url`、`title`
														
 
															-- url链接必须是完整的链接，绝对不能修改或者截断
														
 
															+- `filtered_articles`：`array<object>`，建议来自 `article_filter_strategy` 输出
														
 
															+- `filtered_articles[i]` 必须包含：
														
 
															+  - `url`：`string`，完整文章链接（原样透传）
														
 
															+  - `title`：`string`，文章标题
														
 
															+
														
 
															+### 输入强约束（必须满足）
														
 
															+- 逐条以文章 `url` 作为 `fetch_weixin_account` 的 `content_link` 入参
														
 
															+- 禁止把 `wx_gh` 当作 `fetch_weixin_account` 的输入参数
														
 
															+- 禁止修改、截断、重写 `url`
														
 
															+- 任一条缺失 `url` 或 `title`，该条直接跳过
														
 
															 ### 输出
														
 
															-- 去重后的账号列表
														
 
															-- 每个账号包含：`account_id`、`account_name`、`article_count`、`sample_articles`、`tags`（可选）
														
 
															+- `accounts`：`array<object>`，去重后的账号列表
														
 
															+- `accounts[i]` 固定字段：
														
 
															+  - `account_name`：`string`
														
 
															+  - `wx_gh`：`string`（公众号 ID）
														
 
															+  - `channel_account_id`：`string | int | null`
														
 
															+  - `biz_info`：`object | null`
														
 
															+  - `article_count`：`int`
														
 
															+  - `sample_articles`：`array<string>`（最多 5 条）
														
 
															+  - `source_urls`：`array<string>`（可追溯来源链接）
														
 
															+
														
 
															+### 输出强约束（必须满足）
														
 
															+- 仅允许输出上述字段，禁止额外字段
														
 
															+- `account_name/wx_gh/channel_account_id/biz_info` 必须来自 `metadata.account_info`
														
 
															+- `article_count` 必须等于该账号聚合到的文章数
														
 
															+- `source_urls` 必须为真实输入文章链接，不得编造
														
 
															 ---
														
@@ -29,9 +49,20 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
															 ### 必用工具
														
 
															 - `fetch_weixin_account`：根据文章查询其所属公众号账号信息
														
 
															+### 工具 I/O 契约（强约束）
														
 
															+- 入参固定：
														
 
															+  - `content_link` ← 文章 `url`
														
 
															+- 仅从以下路径读取账号信息：
														
 
															+  - `metadata.account_info.account_name`
														
 
															+  - `metadata.account_info.wx_gh`
														
 
															+  - `metadata.account_info.biz_info`
														
 
															+  - `metadata.account_info.channel_account_id`
														
 
															+- 返回 `None` 或缺失 `metadata.account_info` 时：
														
 
															+  - 当前文章不参与账号沉淀，记录为失败样本但不编造账号
														
 
															+
														
 
															 ### 调用顺序
														
 
															-1. 遍历文章，逐条调用 `fetch_weixin_account`， 输入的信息是 `wx_gh`, 是一个以 `gh_` 开头的字符串
														
 
															-2. 将返回账号按唯一键聚合（优先 `account_id`）
														
 
															+1. 遍历文章，逐条调用 `fetch_weixin_account(content_link=url)`
														
 
															+2. 将返回账号按唯一键聚合（优先 `wx_gh`，若缺失则用 `account_name`）
														
 
															 3. 统计每个账号命中文章数与代表文章
														
 
															 4. 生成账号沉淀结果
														
@@ -39,10 +70,11 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
															 ## 聚合与去重规则
														
 
															-- 优先用 `account_id` 去重；若无 `account_id`，使用 `account_name` 兜底
														
 
															+- 优先用 `wx_gh` 去重；若无 `wx_gh`，使用 `account_name` 兜底
														
 
															 - 同一账号下累计：
														
 
															   - `article_count += 1`
														
 
															-  - 将文章标题加入 `sample_articles`（保留前 3-5 条示例）
														
 
															+  - 将文章标题加入 `sample_articles`（最多保留 5 条）
														
 
															+  - 将文章链接加入 `source_urls`（去重保留）
														
 
															 - 若账号字段冲突，采用“非空优先 + 最新记录优先”策略
														
 
															 ---
														
@@ -63,20 +95,25 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
															 - 账号信息必须来自 `fetch_weixin_account` 返回
														
 
															 - 不得根据标题或 URL 猜测账号名
														
 
															-- 文章与账号映射关系必须可追溯（建议保留 `source_urls`）
														
 
															+- 文章与账号映射关系必须可追溯（必须保留 `source_urls`）
														
 
															+- 禁止解析工具 `output` 文本，必须使用 `metadata.account_info`
														
 
															 ---
														
 
															-## 输出建议格式
														
 
															+## 输出固定格式（必须严格遵守）
														
 
															 ```json
														
 
															-[
														
 
															-  {
														
 
															-    "account_id": "gh_xxx",
														
 
															-    "account_name": "示例公众号",
														
 
															-    "article_count": 4,
														
 
															-    "sample_articles": ["标题A", "标题B", "标题C"],
														
 
															-    "tags": ["健康科普", "防诈骗"]
														
 
															-  }
														
 
															-]
														
 
															+{
														
 
															+  "accounts": [
														
 
															+    {
														
 
															+      "account_name": "示例公众号",
														
 
															+      "wx_gh": "gh_xxx",
														
 
															+      "channel_account_id": "12345",
														
 
															+      "biz_info": {},
														
 
															+      "article_count": 4,
														
 
															+      "sample_articles": ["标题A", "标题B", "标题C"],
														
 
															+      "source_urls": ["https://mp.weixin.qq.com/s?..."]
														
 
															+    }
														
 
															+  ]
														
 
															+}
														
 
															 ```
														
--- a/tests/skills/article_filter_strategy.md
+++ b/tests/skills/article_filter_strategy.md
@@ -7,21 +7,44 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
															 ## 目标
														
 
															-在 `article_finding_strategy` 拿到候选文章后，调用 `fetch_article_detail` 获取详情，筛选出更适合老年人、且老年人更感兴趣的内容，输出可直接使用的高质量文章集合。
														
 
															+在 `article_finding_strategy` 拿到候选文章后，调用 `fetch_article_detail` 获取详情，筛选出更适合老年人、且老年人更感兴趣的内容，
														
 
															+输出可直接使用的高质量文章集合。
														
 
															 ---
														
 
															 ## 输入与输出
														
 
															 ### 输入
														
 
															-- `input_query`（用户原始需求）
														
 
															-- 候选文章列表（至少包含 `url`、`title`、`statistics.time`）
														
 
															-- 目标数量 **M**
														
 
															+- `input_query`：`string`，用户原始需求，非空
														
 
															+- `target_count`：`int`，目标数量，`target_count >= 1`
														
 
															+- `candidate_articles`：`array<object>`，候选文章列表，非空数组
														
 
															+- `candidate_articles[i]` 必须包含：
														
 
															+  - `title`：`string`，非空
														
 
															+  - `url`：`string`，完整文章链接，禁止截断/改写
														
 
															+  - `statistics.time`：`int`，秒级时间戳
														
 
															+
														
 
															+### 输入强约束（必须满足）
														
 
															+- 只接受上述字段；禁止传入未定义字段作为筛选依据
														
 
															+- 严禁从工具 `output` 文本解析结构化数据
														
 
															+- 候选文章字段必须来自同一条 `metadata.search_results` 记录
														
 
															+- 任一候选缺失 `title/url/statistics.time`，该条直接丢弃，不补造字段
														
 
															 ### 输出
														
 
															-- 通过筛选的文章列表（最多 M 条）
														
 
															-- 每条保留：`title`、`url`、`publish_time`、`reason`（入选原因）
														
 
															-- 可选附加：`score`（用于内部排序）
														
 
															+- `filtered_articles`：`array<object>`，最多 `target_count` 条
														
 
															+- `filtered_articles[i]` 字段固定为：
														
 
															+  - `title`：`string`
														
 
															+  - `url`：`string`
														
 
															+  - `publish_time`：`int`（秒级时间戳）
														
 
															+  - `reason`：`string`（基于证据的入选原因）
														
 
															+  - `relevance_level`：`"high" | "medium" | "low"`
														
 
															+  - `interest_level`：`"high" | "medium" | "low"`
														
 
															+
														
 
															+### 输出强约束（必须满足）
														
 
															+- 只允许输出上述字段，禁止增加自定义 key
														
 
															+- `title/url/publish_time` 必须与原候选记录或详情记录一致，不得改写
														
 
															+- `publish_time` 统一为秒级：若详情返回毫秒时间戳（`publish_timestamp`），需整除 1000
														
 
															+- `reason` 必须可被详情证据支撑，禁止主观臆测
														
 
															+- `title` 必须做引号标准化：若包含英文双引号 `"`，统一替换为中文双引号 `“` 和 `”`（成对），禁止保留英文双引号
														
 
															 ---
														
@@ -30,11 +53,24 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
															 ### 必用工具
														
 
															 - `fetch_article_detail`：用于查询文章详情（正文、摘要、标签、互动信息等）
														
 
															+### 工具 I/O 契约（强约束）
														
 
															+- 入参固定：
														
 
															+  - `article_link` ← 候选文章 `url`
														
 
															+  - `is_count` 使用默认值（不强制改写）
														
 
															+  - `is_cache` 使用默认值（不强制改写）
														
 
															+- 只从以下路径读取详情：
														
 
															+  - `metadata.article_info.title`
														
 
															+  - `metadata.article_info.content_link`
														
 
															+  - `metadata.article_info.body_text`
														
 
															+  - `metadata.article_info.publish_timestamp`（毫秒）
														
 
															+- 调用失败（返回 `None` 或缺失 `metadata.article_info`）：
														
 
															+  - 当前文章标记为“详情缺失”，不进入最终结果
														
 
															+
														
 
															 ### 调用顺序
														
 
															 1. 先基于候选列表做初筛（去重、基础质量过滤）
														
 
															 2. 对初筛后的文章逐条调用 `fetch_article_detail`
														
 
															 3. 根据详情做老年人兴趣打分与淘汰
														
 
															-4. 按得分排序，输出前 M 条
														
 
															+4. 按得分排序，输出前 `target_count` 条
														
 
															 ---
														
@@ -75,9 +111,9 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
															 ## 结果数量控制
														
 
															-- 若通过数 **C >= M**：按评分取前 M 条输出
														
 
															-- 若 **M × 0.8 <= C < M**：输出当前结果并标注“数量接近目标”
														
 
															-- 若 **C < M × 0.8**：返回上游继续补充候选，再进入本流程
														
 
															+- 若通过数 **C >= target_count**：按评分取前 `target_count` 条输出
														
 
															+- 若 **target_count × 0.8 <= C < target_count**：输出当前结果并标注“数量接近目标”
														
 
															+- 若 **C < target_count × 0.8**：返回上游继续补充候选，再进入本流程
														
 
															 ---
														
@@ -86,18 +122,25 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
															 - 仅使用工具返回数据，不编造字段
														
 
															 - `title`、`url`、`publish_time` 必须来自同一条记录
														
 
															 - `reason` 必须基于可验证内容生成（例如“涉及防诈骗案例且表达清晰”）
														
 
															+- 优先使用结构化路径：搜索结果取 `metadata.search_results`，详情取 `metadata.article_info`
														
 
															+- 禁止把不同文章的字段进行拼接混用
														
 
															+- 允许在不改变语义的前提下对 `title` 做最小清洗：去 HTML 标签 + 英文双引号标准化（`"` -> `“”`）
														
 
															 ---
														
 
															-## 输出建议格式
														
 
															+## 输出固定格式（必须严格遵守）
														
 
															 ```json
														
 
															-[
														
 
															-  {
														
 
															-    "title": "文章标题",
														
 
															-    "url": "完整链接",
														
 
															-    "publish_time": 1710000000,
														
 
															-    "reason": "适合老年人阅读：主题实用、表达清晰、信息可信"
														
 
															-  }
														
 
															-]
														
 
															+{
														
 
															+  "filtered_articles": [
														
 
															+    {
														
 
															+      "title": "文章标题",
														
 
															+      "url": "完整链接",
														
 
															+      "publish_time": 1710000000,
														
 
															+      "reason": "适合老年人阅读：围绕 query、信息清晰且证据充分",
														
 
															+      "relevance_level": "high",
														
 
															+      "interest_level": "high"
														
 
															+    }
														
 
															+  ]
														
 
															+}
														
 
															 ```
														
--- a/tests/skills/article_finding_strategy.md
+++ b/tests/skills/article_finding_strategy.md
@@ -34,14 +34,13 @@ description: 内容搜索方法论
 
															 ### 字段完整性要求
														
 
															 - `url`：文章链接，必须**逐字符完整复制**，不能截断或修改。
														
 
															-- `title`: 文章标题，必须来自**同一条记录**，不能混用，去掉标题中的 html 符号（如 `<p>`、`</p>` 等）。
														
 
															-- `title`: 标题中若出现英文双引号（`"`），需要把标题中的双引号换成中文双引号(`“”`）。
														
 
															+- `title`: 文章标题，必须来自**同一条记录**，不能混用，去掉标题中的 html 符号（如 `<p>`、`</p>` 等）；标题中若出现英文双引号（`"`），必须标准化为中文双引号（成对 `“`、`”`），禁止保留英文双引号。
														
 
															 - `statistics.time`: 文章发布时间戳（秒），必须来自**同一条记录**，不能混用。
														
 
															 ### 正确做法
														
 
															 ```python
														
 
															 item = metadata.search_results[0]
														
 
															 url = item["url"]
														
 
															-title = item["title"].replace('"', '“')  # 完整复制符
														
 
															+title = normalize_quotes(item["title"])  # 英文双引号标准化为中文双引号“”
														
 
															 ```
														
 
															 ### 禁止行为
														
--- a/tests/skills/output_schema.md
+++ b/tests/skills/output_schema.md
@@ -1,85 +1,101 @@
 
															 ---
														
 
															 name: output_schema
														
 
															-description: 输出结果指南
														
 
															+description: 微信文章搜索任务输出结构规范（文章+账号+关系）
														
 
															 ---
														
 
															 ## 输出结果指南
														
 
															-### 输出目录
														
 
															-输出 JSON 写入到output_dir目录下当次执行的 trace_id 目录内的 `output.json` 文件。
														
 
															-**获取路径方式**：先调用 `get_current_context` 获取 `trace_id` 和 `output_dir`，再使用 `write_file` 写入 `{output_dir}/{trace_id}/output.json`。
														
 
															+### 输出目录（本地 JSON）
														
 
															+输出结果必须写入当前任务目录下的 `output.json` 文件。
														
 
															-### **输出 JSON Schema**
														
 
															+- 先调用 `get_current_context` 获取 `trace_id` 和 `output_dir`
														
 
															+- 再调用 `write_file` 写入 `{output_dir}/{trace_id}/output.json`
														
 
															-> ⚠️ 所有字段名必须与下面完全一致，禁止自创字段名（如 `results`、`metrics`、`like_count`、`age_distribution`、`platform` 等）
														
 
															+---
														
 
															+
														
 
															+## 输出 JSON Schema（强约束）
														
 
															+
														
 
															+> 所有字段名必须与下面完全一致；禁止增删顶层字段、禁止自创字段名。
														
 
															 ```json
														
 
															 {
														
 
															-  "trace_id": "<由系统生成的真实 trace_id；如果你不知道就填空字符串，程序会覆盖修正>",
														
 
															-  "query": "<本次任务的 query>",
														
 
															-  "demand_id": "<来自 user 消息的搜索词 id>",
														
 
															+  "trace_id": "<真实 trace_id>",
														
 
															+  "query": "<本次任务 query>",
														
 
															+  "demand_id": "<搜索词 id，无则空字符串>",
														
 
															   "summary": {
														
 
															     "candidate_count": 0,
														
 
															-    "portrait_content_like_count": 0,
														
 
															-    "portrait_account_fans_count": 0,
														
 
															-    "portrait_none_count": 0,
														
 
															-    "filtered_in_count": 0
														
 
															-  },
														
 
															-  "good_account_expansion": {
														
 
															-    "enabled": false,
														
 
															-    "accounts": [
														
 
															-      {
														
 
															-        "author_nickname": "<作者名>",
														
 
															-        "author_sec_uid": "<完整 sec_uid>",
														
 
															-        "age_50_plus_ratio": null,
														
 
															-        "age_50_plus_tgi": null
														
 
															-      }
														
 
															-    ]
														
 
															+    "filtered_in_count": 0,
														
 
															+    "account_count": 0
														
 
															   },
														
 
															   "contents": [
														
 
															     {
														
 
															-      "title": "<来自 metadata 的标题/desc>",
														
 
															-      "aweme_id": "内容id",
														
 
															-      "rank": 1,
														
 
															-      "video_url": "https://www.douyin.com/video/<aweme_id>",
														
 
															-      "author_nickname": "作者名",
														
 
															-      "author_sec_uid": "作者id",
														
 
															-      "author_url": "https://www.douyin.com/user/<author_sec_uid>",
														
 
															+      "title": "<文章标题>",
														
 
															+      "url": "<完整文章链接>",
														
 
															       "statistics": {
														
 
															-        "digg_count": 0,
														
 
															-        "comment_count": 0,
														
 
															-        "share_count": 0
														
 
															-      },
														
 
															-      "portrait_data": {
														
 
															-        "source": "content_like | account_fans | none",
														
 
															-        "age_50_plus_ratio": null,
														
 
															-        "age_50_plus_tgi": null,
														
 
															-        "url": "画像链接"
														
 
															+        "time": 1710000000
														
 
															       },
														
 
															-      "reason": "<入选理由>"
														
 
															+      "reason": "<入选原因>"
														
 
															+    }
														
 
															+  ],
														
 
															+  "accounts": [
														
 
															+    {
														
 
															+      "wx_gh": "<公众号ID>",
														
 
															+      "account_name": "<公众号名称>",
														
 
															+      "channel_account_id": "<内部账号ID或空字符串>",
														
 
															+      "biz_info": {},
														
 
															+      "article_count": 0,
														
 
															+      "sample_articles": [],
														
 
															+      "source_urls": []
														
 
															+    }
														
 
															+  ],
														
 
															+  "article_account_relations": [
														
 
															+    {
														
 
															+      "article_url": "<文章链接>",
														
 
															+      "wx_gh": "<公众号ID>"
														
 
															     }
														
 
															   ]
														
 
															 }
														
 
															 ```
														
 
															-### 易错字段说明
														
 
															+---
														
 
															-| 字段 | 正确写法 | 错误写法（禁止） |
														
 
															-|---|---|---|
														
 
															-| 点赞数 | `statistics.digg_count` | `statistics.like_count` / `metrics.likes` |
														
 
															-| 50岁以上占比 | `portrait_data.age_50_plus_ratio` | `portrait_data.age_distribution["50+"]` |
														
 
															-| 50岁以上偏好度 | `portrait_data.age_50_plus_tgi` | 任何其他写法 |
														
 
															-| 画像来源 | `portrait_data.source` 值为 `content_like` / `account_fans` / `none` | `"content"` / `"account"` 等缩写 |
														
 
															-| 优质账号扩展 | `good_account_expansion` 为**对象**，含 `enabled` + `accounts` | 直接输出为**数组** |
														
 
															-| 摘要 | `summary` 为**对象**，含 `candidate_count` 等字段 | `summary` 为字符串 |
														
 
															+## 字段约束（必须遵守）
														
 
															-### portrait_data 字段规则
														
 
															+### 顶层字段
														
 
															+- 顶层字段只能有：`trace_id`、`query`、`demand_id`、`summary`、`contents`、`accounts`、`article_account_relations`
														
 
															+- `trace_id` 必须使用真实 trace_id，不得伪造
														
 
															+
														
 
															+### contents
														
 
															+- 每条内容字段只能有：`title`、`url`、`statistics`、`reason`
														
 
															+- `statistics` 只能有：`time`
														
 
															+- `title` 若含英文双引号 `"`，必须标准化为中文双引号 `“`、`”`
														
 
															+- `url` 必须是完整原始链接，禁止截断/改写
														
 
															+
														
 
															+### accounts
														
 
															+- 每条账号字段只能有：`wx_gh`、`account_name`、`channel_account_id`、`biz_info`、`article_count`、`sample_articles`、`source_urls`
														
 
															+- `wx_gh`、`account_name` 必须来自 `fetch_weixin_account` 的 `metadata.account_info`
														
 
															+- `article_count` 必须等于该账号下实际沉淀文章数
														
 
															+- `source_urls` 内链接必须可在 `contents[*].url` 找到对应来源
														
 
															+
														
 
															+### article_account_relations
														
 
															+- 每条关系字段只能有：`article_url`、`wx_gh`
														
 
															+- `article_url` 必须存在于 `contents[*].url`
														
 
															+- `wx_gh` 必须存在于 `accounts[*].wx_gh`
														
 
															+
														
 
															+---
														
 
															+
														
 
															+## 一致性校验（写文件前必须检查）
														
 
															+
														
 
															+- `summary.candidate_count >= summary.filtered_in_count`
														
 
															+- `summary.account_count == len(accounts)`
														
 
															+- `len(contents) == summary.filtered_in_count`（若策略要求输出全部入选）
														
 
															+- 每个 `accounts[*].article_count` 应等于该账号在 `article_account_relations` 中出现次数
														
 
															+- 禁止输出 `null` 作为必填字符串字段（改为空字符串）
														
 
															+
														
 
															+---
														
 
															-- `source="content_like"` → `url = https://douhot.douyin.com/video/detail?active_tab=video_fans&video_id={aweme_id}`
														
 
															-- `source="account_fans"` → `url = https://douhot.douyin.com/creator/detail?active_tab=creator_fans_portrait&creator_id={author_sec_uid}`
														
 
															-- `source="none"` → `url=null`，`age_50_plus_ratio=null`，`age_50_plus_tgi=null`
														
 
															+## 本地记录要求
														
 
															-## JSON 编写规范
														
 
															-- 字符串值中若有双引号 `"`，必须写成 `\"`（反斜杠 + 双引号）
														
 
															-- 若有反斜杠 `\`，必须写成 `\\`
														
 
															-- 若标题含引号，建议使用中文引号「」避免转义，或严格转义为 \"
														
 
															+- 本阶段只做本地落盘：`output.json`
														
 
															+- 不写数据库，不调用入库工具
														
 
															+- 文件内容必须是合法 JSON（可被 `json.loads` 解析）
														
--- a/tests/tools/weixin_tools.py
+++ b/tests/tools/weixin_tools.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 
															 import json
														
 
															 import logging
														
 
															-from agent.tools import tool, ToolResult
														
 
															+from agent.tools import tool, ToolContext, ToolResult
														
 
															 from src.infra.shared.http_client import AsyncHttpClient
														
 
															 from src.infra.shared.common import extract_history_articles
														
@@ -14,8 +14,23 @@ base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
 
															 headers = {"Content-Type": "application/json"}
														
 
															+def _build_success_result(title: str, response: dict) -> ToolResult:
														
 
															+    """把上游响应规范为 ToolResult。"""
														
 
															+    output = response.get("output")
														
 
															+    if not output:
														
 
															+        output = json.dumps(response, ensure_ascii=False)
														
 
															+
														
 
															+    metadata = response.get("metadata")
														
 
															+    if not isinstance(metadata, dict):
														
 
															+        metadata = {"raw_data": response}
														
 
															+    elif "raw_data" not in metadata:
														
 
															+        metadata["raw_data"] = response
														
 
															+
														
 
															+    return ToolResult(title=title, output=output, metadata=metadata)
														
 
															+
														
 
															+
														
 
															 @tool(description="通过关键词搜索微信文章")
														
 
															-async def weixin_search(keyword: str, page="1") -> dict | None:
														
 
															+async def weixin_search(keyword: str, page: str = "1", ctx: ToolContext = None) -> ToolResult:
														
 
															     """
														
 
															         微信关键词搜索
														
@@ -44,16 +59,19 @@ async def weixin_search(keyword: str, page="1") -> dict | None:
 
															     try:
														
 
															         async with AsyncHttpClient(timeout=120) as http_client:
														
 
															             response = await http_client.post(url=url, headers=headers, data=payload)
														
 
															-
														
 
															+        return _build_success_result("微信文章搜索结果", response)
														
 
															     except Exception as e:
														
 
															-        print(e)
														
 
															-        return None
														
 
															-    print(json.dumps(response, ensure_ascii=False, indent=4))
														
 
															-    return response
														
 
															+        logger.exception("weixin_search failed")
														
 
															+        return ToolResult(
														
 
															+            title="微信文章搜索失败",
														
 
															+            output="",
														
 
															+            error=str(e),
														
 
															+            metadata={"keyword": keyword, "page": page},
														
 
															+        )
														
 
															 @tool(description="通过公众号文章链接获取公众号详情信息")
														
 
															-async def fetch_weixin_account(content_link: str) -> dict | None:
														
 
															+async def fetch_weixin_account(content_link: str, ctx: ToolContext = None) -> ToolResult:
														
 
															     """
														
 
															         通过公众号文章链接获取公众号的详情信息
														
@@ -79,16 +97,24 @@ async def fetch_weixin_account(content_link: str) -> dict | None:
 
															     try:
														
 
															         async with AsyncHttpClient(timeout=120) as http_client:
														
 
															             response = await http_client.post(url=url, headers=headers, data=payload)
														
 
															-
														
 
															+        return _build_success_result("公众号详情信息", response)
														
 
															     except Exception as e:
														
 
															-        logger.error(e)
														
 
															-        return None
														
 
															-    print(json.dumps(response, ensure_ascii=False, indent=4))
														
 
															-    return response
														
 
															+        logger.exception("fetch_weixin_account failed")
														
 
															+        return ToolResult(
														
 
															+            title="公众号详情获取失败",
														
 
															+            output="",
														
 
															+            error=str(e),
														
 
															+            metadata={"content_link": content_link},
														
 
															+        )
														
 
															 @tool(description="通过微信公众号的 wx_gh 获取微信公众号的历史发文列表")
														
 
															-async def fetch_account_article_list(wx_gh: str, index=None, is_cache=True) -> dict | None:
														
 
															+async def fetch_account_article_list(
														
 
															+    wx_gh: str,
														
 
															+    index: str | None = None,
														
 
															+    is_cache: bool = True,
														
 
															+    ctx: ToolContext = None,
														
 
															+) -> ToolResult:
														
 
															     """
														
 
															     通过公众号的 wx_gh 获取历史发文列表
														
@@ -134,16 +160,25 @@ async def fetch_account_article_list(wx_gh: str, index=None, is_cache=True) -> d
 
															     try:
														
 
															         async with AsyncHttpClient(timeout=120) as http_client:
														
 
															             response = await http_client.post(url=url, headers=headers, data=payload)
														
 
															-
														
 
															+        normalized = extract_history_articles(response)
														
 
															+        return _build_success_result("公众号历史发文列表", normalized)
														
 
															     except Exception as e:
														
 
															-        logger.error(e)
														
 
															-        return None
														
 
															-
														
 
															-    return extract_history_articles(response)
														
 
															+        logger.exception("fetch_account_article_list failed")
														
 
															+        return ToolResult(
														
 
															+            title="公众号历史发文获取失败",
														
 
															+            output="",
														
 
															+            error=str(e),
														
 
															+            metadata={"wx_gh": wx_gh, "index": index, "is_cache": is_cache},
														
 
															+        )
														
 
															 @tool(description="通过公众号文章链接获取文章详情")
														
 
															-async def fetch_article_detail(article_link: str, is_count: bool = False, is_cache: bool = True) -> dict | None:
														
 
															+async def fetch_article_detail(
														
 
															+    article_link: str,
														
 
															+    is_count: bool = False,
														
 
															+    is_cache: bool = True,
														
 
															+    ctx: ToolContext = None,
														
 
															+) -> ToolResult:
														
 
															     """
														
 
															     通过公众号的 文章链接获取文章详情
														
 
															     Args:
														
@@ -180,11 +215,19 @@ async def fetch_article_detail(article_link: str, is_count: bool = False, is_cac
 
															     try:
														
 
															         async with AsyncHttpClient(timeout=10) as http_client:
														
 
															             response = await http_client.post(target_url, headers=headers, data=payload)
														
 
															+        return _build_success_result("文章详情信息", response)
														
 
															     except Exception as e:
														
 
															-        print(e)
														
 
															-        return None
														
 
															-
														
 
															-    return response
														
 
															+        logger.exception("fetch_article_detail failed")
														
 
															+        return ToolResult(
														
 
															+            title="文章详情获取失败",
														
 
															+            output="",
														
 
															+            error=str(e),
														
 
															+            metadata={
														
 
															+                "article_link": article_link,
														
 
															+                "is_count": is_count,
														
 
															+                "is_cache": is_cache,
														
 
															+            },
														
 
															+        )
														
 
															 if __name__ == "__main__":