3 месяцев назад · 76853dc862
--- a/agent/llm/openrouter.py
+++ b/agent/llm/openrouter.py
@@ -621,7 +621,6 @@ async def openrouter_llm_call(
 
				         }
			
 
				     """
			
 
				     api_key = os.getenv("OPEN_ROUTER_API_KEY")
			
 
				-    api_key = "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"
			
 
				     if not api_key:
			
 
				         raise ValueError("OPEN_ROUTER_API_KEY environment variable not set")
			
 
				 
			
--- a/tests/content_finder.prompt
+++ b/tests/content_finder.prompt
@@ -34,22 +34,26 @@ $system$
 
				 
			
 
				 3. **账号沉淀阶段（account_precipitation）**
			
 
				    - 对通过过滤的文章逐条调用 `fetch_weixin_account` 获取账号信息。
			
 
				-   - 账号聚合去重规则：优先 `account_id`，缺失时用 `account_name` 兜底。
			
 
				+   - 账号聚合去重规则：优先 `wx_gh`，缺失时用 `account_name` 兜底。
			
 
				    - 统计账号命中文章数与代表文章（用于内部沉淀与后续复用）。
			
 
				-   - 账号沉淀为流程内步骤，不改变本次最终输出 Schema。
			
 
				+   - 生成账号结果 `accounts` 与文章-账号关系 `article_account_relations`。
			
 
				 
			
 
				-4. **输出阶段**
			
 
				+4. **数据记录阶段（本地）**
			
 
				    - 先按 `output_schema` 生成并写入 `output.json`（JSON 格式）。
			
 
				-   - 仅输出通过过滤后的文章结果（按综合排序取前 M 条）。
			
 
				+   - 输出必须包含：文章结果 + 账号结果 + 文章-账号关系。
			
 
				+   - 本阶段仅保存本地文件，不写数据库。
			
 
				 
			
 
				 ## 强制要求（违反即为错误）
			
 
				 
			
 
				 ### 输出字段必须严格遵循 Schema
			
 
				-- 顶层字段只能有：`trace_id`、`query`、`demand_id`、`summary`、`contents`
			
 
				-- 每条内容字段只能有：`title`、`url`、`statistics`
			
 
				+- 顶层字段只能有：`trace_id`、`query`、`demand_id`、`summary`、`contents`、`accounts`、`article_account_relations`
			
 
				+- `contents` 每条字段只能有：`title`、`url`、`statistics`、`reason`
			
 
				+- `accounts` 每条字段只能有：`wx_gh`、`account_name`、`channel_account_id`、`biz_info`、`article_count`、`sample_articles`、`source_urls`
			
 
				+- `article_account_relations` 每条字段只能有：`article_url`、`wx_gh`
			
 
				 - **禁止自创字段**（如 `results`、`metrics`、`tags`、`platform` 等）
			
 
				 - **禁止使用中文 key**
			
 
				-- `summary` 中需简要说明三阶段执行情况（搜索数量、过滤后数量、沉淀账号数）
			
 
				+- `summary` 必须包含：`candidate_count`、`filtered_in_count`、`account_count`
			
 
				+- `title` 中英文双引号 `"` 必须标准化为中文双引号 `“`、`”`
			
 
				 
			
 
				 ## 流程自检
			
 
				 
			
@@ -57,12 +61,12 @@ $system$
 
				 
			
 
				 ### 输出、校验、入库顺序是否正确
			
 
				 - 无需写数据库，直接写入 `output.json` 即可。
			
 
				-- 输出沉淀的账号
			
 
				+- 必须输出沉淀的账号以及文章-账号关系
			
 
				 - **禁止**：未校验 Schema 就直接入库。
			
 
				 
			
 
				 
			
 
				 $user$
			
 
				-任务：找10个与「%query%」相关的、老年人感兴趣的文章。
			
 
				+任务：找 20 个与「%query%」相关的、老年人感兴趣的文章。
			
 
				 要求：
			
 
				 - 适合老年人分享观看
			
 
				 - 热度要高，质量要好
			
--- a/tests/run_single.py
+++ b/tests/run_single.py
@@ -1,6 +1,10 @@
 
				+from dotenv import load_dotenv
			
 
				+load_dotenv()
			
 
				+
			
 
				 from typing import Dict, Any, Optional
			
 
				 import os
			
 
				 from pathlib import Path
			
 
				+import json
			
 
				 
			
 
				 from tools import fetch_account_article_list, fetch_weixin_account, weixin_search
			
 
				 
			
@@ -19,6 +23,56 @@ logger = logging.getLogger(__name__)
 
				 PROJECT_ROOT = Path(__file__).resolve().parent
			
 
				 
			
 
				 
			
 
				+def _normalize_ascii_double_quotes(text: str) -> str:
			
 
				+    """将字符串中的 ASCII 双引号 `"` 规范化为中文双引号 `“`、`”`。"""
			
 
				+    if '"' not in text:
			
 
				+        return text
			
 
				+
			
 
				+    chars: list[str] = []
			
 
				+    open_quote = True
			
 
				+    for ch in text:
			
 
				+        if ch == '"':
			
 
				+            chars.append("“" if open_quote else "”")
			
 
				+            open_quote = not open_quote
			
 
				+        else:
			
 
				+            chars.append(ch)
			
 
				+    return "".join(chars)
			
 
				+
			
 
				+
			
 
				+def _sanitize_json_strings(value: Any) -> Any:
			
 
				+    if isinstance(value, str):
			
 
				+        return _normalize_ascii_double_quotes(value)
			
 
				+    if isinstance(value, list):
			
 
				+        return [_sanitize_json_strings(v) for v in value]
			
 
				+    if isinstance(value, dict):
			
 
				+        return {k: _sanitize_json_strings(v) for k, v in value.items()}
			
 
				+    return value
			
 
				+
			
 
				+
			
 
				+def _sanitize_output_json(output_json_path: Path) -> None:
			
 
				+    """
			
 
				+    任务完成后对 output.json 做后处理：
			
 
				+    - 递归清洗所有字符串值中的英文双引号 `"`
			
 
				+    - 保持合法 JSON
			
 
				+    """
			
 
				+    if not output_json_path.exists():
			
 
				+        logger.warning(f"未找到 output.json，跳过清洗: {output_json_path}")
			
 
				+        return
			
 
				+
			
 
				+    try:
			
 
				+        data = json.loads(output_json_path.read_text(encoding="utf-8"))
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f"output.json 解析失败，跳过清洗: {e}")
			
 
				+        return
			
 
				+
			
 
				+    cleaned = _sanitize_json_strings(data)
			
 
				+    output_json_path.write_text(
			
 
				+        json.dumps(cleaned, ensure_ascii=False, indent=2),
			
 
				+        encoding="utf-8"
			
 
				+    )
			
 
				+    logger.info(f"已完成 output.json 引号清洗: {output_json_path}")
			
 
				+
			
 
				+
			
 
				 async def run_agent(
			
 
				         query: Optional[str] = None,
			
 
				         demand_id: Optional[int] = None,
			
@@ -47,14 +101,14 @@ async def run_agent(
 
				     prompt = SimplePrompt(prompt_path)
			
 
				 
			
 
				     # output 目录
			
 
				-    output_dir = str(PROJECT_ROOT / "tests" / "output")
			
 
				+    output_dir = str(PROJECT_ROOT / "output")
			
 
				 
			
 
				     # 构建消息（替换 %query%、%output_dir%、%demand_id%）
			
 
				     demand_id_str = str(demand_id) if demand_id is not None else ""
			
 
				     messages = prompt.build_messages(query=query, output_dir=output_dir, demand_id=demand_id_str)
			
 
				 
			
 
				     # 初始化配置
			
 
				-    api_key = "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"
			
 
				+    api_key = os.getenv("OPEN_ROUTER_API_KEY")
			
 
				     if not api_key:
			
 
				         raise ValueError("OPEN_ROUTER_API_KEY 未设置")
			
 
				 
			
@@ -74,6 +128,7 @@ async def run_agent(
 
				         "weixin_search",
			
 
				         "fetch_weixin_account",
			
 
				         "fetch_account_article_list",
			
 
				+        "fetch_article_detail",
			
 
				     ]
			
 
				 
			
 
				     runner = AgentRunner(
			
@@ -90,14 +145,14 @@ async def run_agent(
 
				         tools=allowed_tools,
			
 
				         extra_llm_params={"max_tokens": 8192},
			
 
				         knowledge=KnowledgeConfig(
			
 
				-            enable_extraction=True,
			
 
				-            enable_completion_extraction=True,
			
 
				-            enable_injection=True,
			
 
				-            owner="content_finder_agent",
			
 
				-            default_tags={"project": "content_finder"},
			
 
				-            default_scopes=["com.piaoquantv.supply"],
			
 
				-            default_search_types=["tool", "usecase", "definition"],
			
 
				-            default_search_owner="content_finder_agent"
			
 
				+            enable_extraction=False,
			
 
				+            enable_completion_extraction=False,
			
 
				+            enable_injection=False,
			
 
				+            # owner="content_finder_agent",
			
 
				+            # default_tags={"project": "content_finder"},
			
 
				+            # default_scopes=["com.piaoquantv.supply"],
			
 
				+            # default_search_types=["tool", "usecase", "definition"],
			
 
				+            # default_search_owner="content_finder_agent"
			
 
				         )
			
 
				     )
			
 
				 
			
@@ -110,6 +165,9 @@ async def run_agent(
 
				                 trace_id = item.trace_id
			
 
				 
			
 
				                 if item.status == "completed":
			
 
				+                    if trace_id:
			
 
				+                        output_json_path = Path(output_dir) / trace_id / "output.json"
			
 
				+                        _sanitize_output_json(output_json_path)
			
 
				                     logger.info(f"Agent 执行完成: trace_id={trace_id}")
			
 
				                     return {
			
 
				                         "trace_id": trace_id,
			
--- a/tests/skills/account_precipitation.md
+++ b/tests/skills/account_precipitation.md
@@ -14,13 +14,33 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
				 ## 输入与输出
			
 
				 
			
 
				 ### 输入
			
 
				-- 文章列表（建议来自 `article_filter_strategy` 输出）
			
 
				-- 每条至少包含 `url`、`title`
			
 
				-- url链接必须是完整的链接，绝对不能修改或者截断
			
 
				+- `filtered_articles`：`array<object>`，建议来自 `article_filter_strategy` 输出
			
 
				+- `filtered_articles[i]` 必须包含：
			
 
				+  - `url`：`string`，完整文章链接（原样透传）
			
 
				+  - `title`：`string`，文章标题
			
 
				+
			
 
				+### 输入强约束（必须满足）
			
 
				+- 逐条以文章 `url` 作为 `fetch_weixin_account` 的 `content_link` 入参
			
 
				+- 禁止把 `wx_gh` 当作 `fetch_weixin_account` 的输入参数
			
 
				+- 禁止修改、截断、重写 `url`
			
 
				+- 任一条缺失 `url` 或 `title`，该条直接跳过
			
 
				 
			
 
				 ### 输出
			
 
				-- 去重后的账号列表
			
 
				-- 每个账号包含：`account_id`、`account_name`、`article_count`、`sample_articles`、`tags`（可选）
			
 
				+- `accounts`：`array<object>`，去重后的账号列表
			
 
				+- `accounts[i]` 固定字段：
			
 
				+  - `account_name`：`string`
			
 
				+  - `wx_gh`：`string`（公众号 ID）
			
 
				+  - `channel_account_id`：`string | int | null`
			
 
				+  - `biz_info`：`object | null`
			
 
				+  - `article_count`：`int`
			
 
				+  - `sample_articles`：`array<string>`（最多 5 条）
			
 
				+  - `source_urls`：`array<string>`（可追溯来源链接）
			
 
				+
			
 
				+### 输出强约束（必须满足）
			
 
				+- 仅允许输出上述字段，禁止额外字段
			
 
				+- `account_name/wx_gh/channel_account_id/biz_info` 必须来自 `metadata.account_info`
			
 
				+- `article_count` 必须等于该账号聚合到的文章数
			
 
				+- `source_urls` 必须为真实输入文章链接，不得编造
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -29,9 +49,20 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
				 ### 必用工具
			
 
				 - `fetch_weixin_account`：根据文章查询其所属公众号账号信息
			
 
				 
			
 
				+### 工具 I/O 契约（强约束）
			
 
				+- 入参固定：
			
 
				+  - `content_link` ← 文章 `url`
			
 
				+- 仅从以下路径读取账号信息：
			
 
				+  - `metadata.account_info.account_name`
			
 
				+  - `metadata.account_info.wx_gh`
			
 
				+  - `metadata.account_info.biz_info`
			
 
				+  - `metadata.account_info.channel_account_id`
			
 
				+- 返回 `None` 或缺失 `metadata.account_info` 时：
			
 
				+  - 当前文章不参与账号沉淀，记录为失败样本但不编造账号
			
 
				+
			
 
				 ### 调用顺序
			
 
				-1. 遍历文章，逐条调用 `fetch_weixin_account`， 输入的信息是 `wx_gh`, 是一个以 `gh_` 开头的字符串
			
 
				-2. 将返回账号按唯一键聚合（优先 `account_id`）
			
 
				+1. 遍历文章，逐条调用 `fetch_weixin_account(content_link=url)`
			
 
				+2. 将返回账号按唯一键聚合（优先 `wx_gh`，若缺失则用 `account_name`）
			
 
				 3. 统计每个账号命中文章数与代表文章
			
 
				 4. 生成账号沉淀结果
			
 
				 
			
@@ -39,10 +70,11 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
				 
			
 
				 ## 聚合与去重规则
			
 
				 
			
 
				-- 优先用 `account_id` 去重；若无 `account_id`，使用 `account_name` 兜底
			
 
				+- 优先用 `wx_gh` 去重；若无 `wx_gh`，使用 `account_name` 兜底
			
 
				 - 同一账号下累计：
			
 
				   - `article_count += 1`
			
 
				-  - 将文章标题加入 `sample_articles`（保留前 3-5 条示例）
			
 
				+  - 将文章标题加入 `sample_articles`（最多保留 5 条）
			
 
				+  - 将文章链接加入 `source_urls`（去重保留）
			
 
				 - 若账号字段冲突，采用“非空优先 + 最新记录优先”策略
			
 
				 
			
 
				 ---
			
@@ -63,20 +95,25 @@ description: 账号沉淀策略（公众号账号信息提取与聚合）
 
				 
			
 
				 - 账号信息必须来自 `fetch_weixin_account` 返回
			
 
				 - 不得根据标题或 URL 猜测账号名
			
 
				-- 文章与账号映射关系必须可追溯（建议保留 `source_urls`）
			
 
				+- 文章与账号映射关系必须可追溯（必须保留 `source_urls`）
			
 
				+- 禁止解析工具 `output` 文本，必须使用 `metadata.account_info`
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## 输出建议格式
			
 
				+## 输出固定格式（必须严格遵守）
			
 
				 
			
 
				 ```json
			
 
				-[
			
 
				-  {
			
 
				-    "account_id": "gh_xxx",
			
 
				-    "account_name": "示例公众号",
			
 
				-    "article_count": 4,
			
 
				-    "sample_articles": ["标题A", "标题B", "标题C"],
			
 
				-    "tags": ["健康科普", "防诈骗"]
			
 
				-  }
			
 
				-]
			
 
				+{
			
 
				+  "accounts": [
			
 
				+    {
			
 
				+      "account_name": "示例公众号",
			
 
				+      "wx_gh": "gh_xxx",
			
 
				+      "channel_account_id": "12345",
			
 
				+      "biz_info": {},
			
 
				+      "article_count": 4,
			
 
				+      "sample_articles": ["标题A", "标题B", "标题C"],
			
 
				+      "source_urls": ["https://mp.weixin.qq.com/s?..."]
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				 ```
			
--- a/tests/skills/article_filter_strategy.md
+++ b/tests/skills/article_filter_strategy.md
@@ -7,21 +7,44 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
				 
			
 
				 ## 目标
			
 
				 
			
 
				-在 `article_finding_strategy` 拿到候选文章后，调用 `fetch_article_detail` 获取详情，筛选出更适合老年人、且老年人更感兴趣的内容，输出可直接使用的高质量文章集合。
			
 
				+在 `article_finding_strategy` 拿到候选文章后，调用 `fetch_article_detail` 获取详情，筛选出更适合老年人、且老年人更感兴趣的内容，
			
 
				+输出可直接使用的高质量文章集合。
			
 
				 
			
 
				 ---
			
 
				 
			
 
				 ## 输入与输出
			
 
				 
			
 
				 ### 输入
			
 
				-- `input_query`（用户原始需求）
			
 
				-- 候选文章列表（至少包含 `url`、`title`、`statistics.time`）
			
 
				-- 目标数量 **M**
			
 
				+- `input_query`：`string`，用户原始需求，非空
			
 
				+- `target_count`：`int`，目标数量，`target_count >= 1`
			
 
				+- `candidate_articles`：`array<object>`，候选文章列表，非空数组
			
 
				+- `candidate_articles[i]` 必须包含：
			
 
				+  - `title`：`string`，非空
			
 
				+  - `url`：`string`，完整文章链接，禁止截断/改写
			
 
				+  - `statistics.time`：`int`，秒级时间戳
			
 
				+
			
 
				+### 输入强约束（必须满足）
			
 
				+- 只接受上述字段；禁止传入未定义字段作为筛选依据
			
 
				+- 严禁从工具 `output` 文本解析结构化数据
			
 
				+- 候选文章字段必须来自同一条 `metadata.search_results` 记录
			
 
				+- 任一候选缺失 `title/url/statistics.time`，该条直接丢弃，不补造字段
			
 
				 
			
 
				 ### 输出
			
 
				-- 通过筛选的文章列表（最多 M 条）
			
 
				-- 每条保留：`title`、`url`、`publish_time`、`reason`（入选原因）
			
 
				-- 可选附加：`score`（用于内部排序）
			
 
				+- `filtered_articles`：`array<object>`，最多 `target_count` 条
			
 
				+- `filtered_articles[i]` 字段固定为：
			
 
				+  - `title`：`string`
			
 
				+  - `url`：`string`
			
 
				+  - `publish_time`：`int`（秒级时间戳）
			
 
				+  - `reason`：`string`（基于证据的入选原因）
			
 
				+  - `relevance_level`：`"high" | "medium" | "low"`
			
 
				+  - `interest_level`：`"high" | "medium" | "low"`
			
 
				+
			
 
				+### 输出强约束（必须满足）
			
 
				+- 只允许输出上述字段，禁止增加自定义 key
			
 
				+- `title/url/publish_time` 必须与原候选记录或详情记录一致，不得改写
			
 
				+- `publish_time` 统一为秒级：若详情返回毫秒时间戳（`publish_timestamp`），需整除 1000
			
 
				+- `reason` 必须可被详情证据支撑，禁止主观臆测
			
 
				+- `title` 必须做引号标准化：若包含英文双引号 `"`，统一替换为中文双引号 `“` 和 `”`（成对），禁止保留英文双引号
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -30,11 +53,24 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
				 ### 必用工具
			
 
				 - `fetch_article_detail`：用于查询文章详情（正文、摘要、标签、互动信息等）
			
 
				 
			
 
				+### 工具 I/O 契约（强约束）
			
 
				+- 入参固定：
			
 
				+  - `article_link` ← 候选文章 `url`
			
 
				+  - `is_count` 使用默认值（不强制改写）
			
 
				+  - `is_cache` 使用默认值（不强制改写）
			
 
				+- 只从以下路径读取详情：
			
 
				+  - `metadata.article_info.title`
			
 
				+  - `metadata.article_info.content_link`
			
 
				+  - `metadata.article_info.body_text`
			
 
				+  - `metadata.article_info.publish_timestamp`（毫秒）
			
 
				+- 调用失败（返回 `None` 或缺失 `metadata.article_info`）：
			
 
				+  - 当前文章标记为“详情缺失”，不进入最终结果
			
 
				+
			
 
				 ### 调用顺序
			
 
				 1. 先基于候选列表做初筛（去重、基础质量过滤）
			
 
				 2. 对初筛后的文章逐条调用 `fetch_article_detail`
			
 
				 3. 根据详情做老年人兴趣打分与淘汰
			
 
				-4. 按得分排序，输出前 M 条
			
 
				+4. 按得分排序，输出前 `target_count` 条
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -75,9 +111,9 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
				 
			
 
				 ## 结果数量控制
			
 
				 
			
 
				-- 若通过数 **C >= M**：按评分取前 M 条输出
			
 
				-- 若 **M × 0.8 <= C < M**：输出当前结果并标注“数量接近目标”
			
 
				-- 若 **C < M × 0.8**：返回上游继续补充候选，再进入本流程
			
 
				+- 若通过数 **C >= target_count**：按评分取前 `target_count` 条输出
			
 
				+- 若 **target_count × 0.8 <= C < target_count**：输出当前结果并标注“数量接近目标”
			
 
				+- 若 **C < target_count × 0.8**：返回上游继续补充候选，再进入本流程
			
 
				 
			
 
				 ---
			
 
				 
			
@@ -86,18 +122,25 @@ description: 文章过滤与筛选策略（老年人兴趣向）
 
				 - 仅使用工具返回数据，不编造字段
			
 
				 - `title`、`url`、`publish_time` 必须来自同一条记录
			
 
				 - `reason` 必须基于可验证内容生成（例如“涉及防诈骗案例且表达清晰”）
			
 
				+- 优先使用结构化路径：搜索结果取 `metadata.search_results`，详情取 `metadata.article_info`
			
 
				+- 禁止把不同文章的字段进行拼接混用
			
 
				+- 允许在不改变语义的前提下对 `title` 做最小清洗：去 HTML 标签 + 英文双引号标准化（`"` -> `“”`）
			
 
				 
			
 
				 ---
			
 
				 
			
 
				-## 输出建议格式
			
 
				+## 输出固定格式（必须严格遵守）
			
 
				 
			
 
				 ```json
			
 
				-[
			
 
				-  {
			
 
				-    "title": "文章标题",
			
 
				-    "url": "完整链接",
			
 
				-    "publish_time": 1710000000,
			
 
				-    "reason": "适合老年人阅读：主题实用、表达清晰、信息可信"
			
 
				-  }
			
 
				-]
			
 
				+{
			
 
				+  "filtered_articles": [
			
 
				+    {
			
 
				+      "title": "文章标题",
			
 
				+      "url": "完整链接",
			
 
				+      "publish_time": 1710000000,
			
 
				+      "reason": "适合老年人阅读：围绕 query、信息清晰且证据充分",
			
 
				+      "relevance_level": "high",
			
 
				+      "interest_level": "high"
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				 ```
			
--- a/tests/skills/article_finding_strategy.md
+++ b/tests/skills/article_finding_strategy.md
@@ -34,14 +34,13 @@ description: 内容搜索方法论
 
				 
			
 
				 ### 字段完整性要求
			
 
				 - `url`：文章链接，必须**逐字符完整复制**，不能截断或修改。
			
 
				-- `title`: 文章标题，必须来自**同一条记录**，不能混用，去掉标题中的 html 符号（如 `<p>`、`</p>` 等）。
			
 
				-- `title`: 标题中若出现英文双引号（`"`），需要把标题中的双引号换成中文双引号(`“”`）。
			
 
				+- `title`: 文章标题，必须来自**同一条记录**，不能混用，去掉标题中的 html 符号（如 `<p>`、`</p>` 等）；标题中若出现英文双引号（`"`），必须标准化为中文双引号（成对 `“`、`”`），禁止保留英文双引号。
			
 
				 - `statistics.time`: 文章发布时间戳（秒），必须来自**同一条记录**，不能混用。
			
 
				 ### 正确做法
			
 
				 ```python
			
 
				 item = metadata.search_results[0]
			
 
				 url = item["url"]
			
 
				-title = item["title"].replace('"', '“')  # 完整复制符
			
 
				+title = normalize_quotes(item["title"])  # 英文双引号标准化为中文双引号“”
			
 
				 ```
			
 
				 
			
 
				 ### 禁止行为
			
--- a/tests/skills/output_schema.md
+++ b/tests/skills/output_schema.md
@@ -1,85 +1,101 @@
 
				 ---
			
 
				 name: output_schema
			
 
				-description: 输出结果指南
			
 
				+description: 微信文章搜索任务输出结构规范（文章+账号+关系）
			
 
				 ---
			
 
				 
			
 
				 ## 输出结果指南
			
 
				 
			
 
				-### 输出目录
			
 
				-输出 JSON 写入到output_dir目录下当次执行的 trace_id 目录内的 `output.json` 文件。
			
 
				-**获取路径方式**：先调用 `get_current_context` 获取 `trace_id` 和 `output_dir`，再使用 `write_file` 写入 `{output_dir}/{trace_id}/output.json`。
			
 
				+### 输出目录（本地 JSON）
			
 
				+输出结果必须写入当前任务目录下的 `output.json` 文件。
			
 
				 
			
 
				-### **输出 JSON Schema**
			
 
				+- 先调用 `get_current_context` 获取 `trace_id` 和 `output_dir`
			
 
				+- 再调用 `write_file` 写入 `{output_dir}/{trace_id}/output.json`
			
 
				 
			
 
				-> ⚠️ 所有字段名必须与下面完全一致，禁止自创字段名（如 `results`、`metrics`、`like_count`、`age_distribution`、`platform` 等）
			
 
				+---
			
 
				+
			
 
				+## 输出 JSON Schema（强约束）
			
 
				+
			
 
				+> 所有字段名必须与下面完全一致；禁止增删顶层字段、禁止自创字段名。
			
 
				 
			
 
				 ```json
			
 
				 {
			
 
				-  "trace_id": "<由系统生成的真实 trace_id；如果你不知道就填空字符串，程序会覆盖修正>",
			
 
				-  "query": "<本次任务的 query>",
			
 
				-  "demand_id": "<来自 user 消息的搜索词 id>",
			
 
				+  "trace_id": "<真实 trace_id>",
			
 
				+  "query": "<本次任务 query>",
			
 
				+  "demand_id": "<搜索词 id，无则空字符串>",
			
 
				   "summary": {
			
 
				     "candidate_count": 0,
			
 
				-    "portrait_content_like_count": 0,
			
 
				-    "portrait_account_fans_count": 0,
			
 
				-    "portrait_none_count": 0,
			
 
				-    "filtered_in_count": 0
			
 
				-  },
			
 
				-  "good_account_expansion": {
			
 
				-    "enabled": false,
			
 
				-    "accounts": [
			
 
				-      {
			
 
				-        "author_nickname": "<作者名>",
			
 
				-        "author_sec_uid": "<完整 sec_uid>",
			
 
				-        "age_50_plus_ratio": null,
			
 
				-        "age_50_plus_tgi": null
			
 
				-      }
			
 
				-    ]
			
 
				+    "filtered_in_count": 0,
			
 
				+    "account_count": 0
			
 
				   },
			
 
				   "contents": [
			
 
				     {
			
 
				-      "title": "<来自 metadata 的标题/desc>",
			
 
				-      "aweme_id": "内容id",
			
 
				-      "rank": 1,
			
 
				-      "video_url": "https://www.douyin.com/video/<aweme_id>",
			
 
				-      "author_nickname": "作者名",
			
 
				-      "author_sec_uid": "作者id",
			
 
				-      "author_url": "https://www.douyin.com/user/<author_sec_uid>",
			
 
				+      "title": "<文章标题>",
			
 
				+      "url": "<完整文章链接>",
			
 
				       "statistics": {
			
 
				-        "digg_count": 0,
			
 
				-        "comment_count": 0,
			
 
				-        "share_count": 0
			
 
				-      },
			
 
				-      "portrait_data": {
			
 
				-        "source": "content_like | account_fans | none",
			
 
				-        "age_50_plus_ratio": null,
			
 
				-        "age_50_plus_tgi": null,
			
 
				-        "url": "画像链接"
			
 
				+        "time": 1710000000
			
 
				       },
			
 
				-      "reason": "<入选理由>"
			
 
				+      "reason": "<入选原因>"
			
 
				+    }
			
 
				+  ],
			
 
				+  "accounts": [
			
 
				+    {
			
 
				+      "wx_gh": "<公众号ID>",
			
 
				+      "account_name": "<公众号名称>",
			
 
				+      "channel_account_id": "<内部账号ID或空字符串>",
			
 
				+      "biz_info": {},
			
 
				+      "article_count": 0,
			
 
				+      "sample_articles": [],
			
 
				+      "source_urls": []
			
 
				+    }
			
 
				+  ],
			
 
				+  "article_account_relations": [
			
 
				+    {
			
 
				+      "article_url": "<文章链接>",
			
 
				+      "wx_gh": "<公众号ID>"
			
 
				     }
			
 
				   ]
			
 
				 }
			
 
				 ```
			
 
				 
			
 
				-### 易错字段说明
			
 
				+---
			
 
				 
			
 
				-| 字段 | 正确写法 | 错误写法（禁止） |
			
 
				-|---|---|---|
			
 
				-| 点赞数 | `statistics.digg_count` | `statistics.like_count` / `metrics.likes` |
			
 
				-| 50岁以上占比 | `portrait_data.age_50_plus_ratio` | `portrait_data.age_distribution["50+"]` |
			
 
				-| 50岁以上偏好度 | `portrait_data.age_50_plus_tgi` | 任何其他写法 |
			
 
				-| 画像来源 | `portrait_data.source` 值为 `content_like` / `account_fans` / `none` | `"content"` / `"account"` 等缩写 |
			
 
				-| 优质账号扩展 | `good_account_expansion` 为**对象**，含 `enabled` + `accounts` | 直接输出为**数组** |
			
 
				-| 摘要 | `summary` 为**对象**，含 `candidate_count` 等字段 | `summary` 为字符串 |
			
 
				+## 字段约束（必须遵守）
			
 
				 
			
 
				-### portrait_data 字段规则
			
 
				+### 顶层字段
			
 
				+- 顶层字段只能有：`trace_id`、`query`、`demand_id`、`summary`、`contents`、`accounts`、`article_account_relations`
			
 
				+- `trace_id` 必须使用真实 trace_id，不得伪造
			
 
				+
			
 
				+### contents
			
 
				+- 每条内容字段只能有：`title`、`url`、`statistics`、`reason`
			
 
				+- `statistics` 只能有：`time`
			
 
				+- `title` 若含英文双引号 `"`，必须标准化为中文双引号 `“`、`”`
			
 
				+- `url` 必须是完整原始链接，禁止截断/改写
			
 
				+
			
 
				+### accounts
			
 
				+- 每条账号字段只能有：`wx_gh`、`account_name`、`channel_account_id`、`biz_info`、`article_count`、`sample_articles`、`source_urls`
			
 
				+- `wx_gh`、`account_name` 必须来自 `fetch_weixin_account` 的 `metadata.account_info`
			
 
				+- `article_count` 必须等于该账号下实际沉淀文章数
			
 
				+- `source_urls` 内链接必须可在 `contents[*].url` 找到对应来源
			
 
				+
			
 
				+### article_account_relations
			
 
				+- 每条关系字段只能有：`article_url`、`wx_gh`
			
 
				+- `article_url` 必须存在于 `contents[*].url`
			
 
				+- `wx_gh` 必须存在于 `accounts[*].wx_gh`
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 一致性校验（写文件前必须检查）
			
 
				+
			
 
				+- `summary.candidate_count >= summary.filtered_in_count`
			
 
				+- `summary.account_count == len(accounts)`
			
 
				+- `len(contents) == summary.filtered_in_count`（若策略要求输出全部入选）
			
 
				+- 每个 `accounts[*].article_count` 应等于该账号在 `article_account_relations` 中出现次数
			
 
				+- 禁止输出 `null` 作为必填字符串字段（改为空字符串）
			
 
				+
			
 
				+---
			
 
				 
			
 
				-- `source="content_like"` → `url = https://douhot.douyin.com/video/detail?active_tab=video_fans&video_id={aweme_id}`
			
 
				-- `source="account_fans"` → `url = https://douhot.douyin.com/creator/detail?active_tab=creator_fans_portrait&creator_id={author_sec_uid}`
			
 
				-- `source="none"` → `url=null`，`age_50_plus_ratio=null`，`age_50_plus_tgi=null`
			
 
				+## 本地记录要求
			
 
				 
			
 
				-## JSON 编写规范
			
 
				-- 字符串值中若有双引号 `"`，必须写成 `\"`（反斜杠 + 双引号）
			
 
				-- 若有反斜杠 `\`，必须写成 `\\`
			
 
				-- 若标题含引号，建议使用中文引号「」避免转义，或严格转义为 \"
			
 
				+- 本阶段只做本地落盘：`output.json`
			
 
				+- 不写数据库，不调用入库工具
			
 
				+- 文件内容必须是合法 JSON（可被 `json.loads` 解析）
			
--- a/tests/tools/weixin_tools.py
+++ b/tests/tools/weixin_tools.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 
				 import json
			
 
				 import logging
			
 
				 
			
 
				-from agent.tools import tool, ToolResult
			
 
				+from agent.tools import tool, ToolContext, ToolResult
			
 
				 from src.infra.shared.http_client import AsyncHttpClient
			
 
				 from src.infra.shared.common import extract_history_articles
			
 
				 
			
@@ -14,8 +14,23 @@ base_url = "http://crawler-cn.aiddit.com/crawler/wei_xin"
 
				 headers = {"Content-Type": "application/json"}
			
 
				 
			
 
				 
			
 
				+def _build_success_result(title: str, response: dict) -> ToolResult:
			
 
				+    """把上游响应规范为 ToolResult。"""
			
 
				+    output = response.get("output")
			
 
				+    if not output:
			
 
				+        output = json.dumps(response, ensure_ascii=False)
			
 
				+
			
 
				+    metadata = response.get("metadata")
			
 
				+    if not isinstance(metadata, dict):
			
 
				+        metadata = {"raw_data": response}
			
 
				+    elif "raw_data" not in metadata:
			
 
				+        metadata["raw_data"] = response
			
 
				+
			
 
				+    return ToolResult(title=title, output=output, metadata=metadata)
			
 
				+
			
 
				+
			
 
				 @tool(description="通过关键词搜索微信文章")
			
 
				-async def weixin_search(keyword: str, page="1") -> dict | None:
			
 
				+async def weixin_search(keyword: str, page: str = "1", ctx: ToolContext = None) -> ToolResult:
			
 
				     """
			
 
				         微信关键词搜索
			
 
				 
			
@@ -44,16 +59,19 @@ async def weixin_search(keyword: str, page="1") -> dict | None:
 
				     try:
			
 
				         async with AsyncHttpClient(timeout=120) as http_client:
			
 
				             response = await http_client.post(url=url, headers=headers, data=payload)
			
 
				-
			
 
				+        return _build_success_result("微信文章搜索结果", response)
			
 
				     except Exception as e:
			
 
				-        print(e)
			
 
				-        return None
			
 
				-    print(json.dumps(response, ensure_ascii=False, indent=4))
			
 
				-    return response
			
 
				+        logger.exception("weixin_search failed")
			
 
				+        return ToolResult(
			
 
				+            title="微信文章搜索失败",
			
 
				+            output="",
			
 
				+            error=str(e),
			
 
				+            metadata={"keyword": keyword, "page": page},
			
 
				+        )
			
 
				 
			
 
				 
			
 
				 @tool(description="通过公众号文章链接获取公众号详情信息")
			
 
				-async def fetch_weixin_account(content_link: str) -> dict | None:
			
 
				+async def fetch_weixin_account(content_link: str, ctx: ToolContext = None) -> ToolResult:
			
 
				     """
			
 
				         通过公众号文章链接获取公众号的详情信息
			
 
				 
			
@@ -79,16 +97,24 @@ async def fetch_weixin_account(content_link: str) -> dict | None:
 
				     try:
			
 
				         async with AsyncHttpClient(timeout=120) as http_client:
			
 
				             response = await http_client.post(url=url, headers=headers, data=payload)
			
 
				-
			
 
				+        return _build_success_result("公众号详情信息", response)
			
 
				     except Exception as e:
			
 
				-        logger.error(e)
			
 
				-        return None
			
 
				-    print(json.dumps(response, ensure_ascii=False, indent=4))
			
 
				-    return response
			
 
				+        logger.exception("fetch_weixin_account failed")
			
 
				+        return ToolResult(
			
 
				+            title="公众号详情获取失败",
			
 
				+            output="",
			
 
				+            error=str(e),
			
 
				+            metadata={"content_link": content_link},
			
 
				+        )
			
 
				 
			
 
				 
			
 
				 @tool(description="通过微信公众号的 wx_gh 获取微信公众号的历史发文列表")
			
 
				-async def fetch_account_article_list(wx_gh: str, index=None, is_cache=True) -> dict | None:
			
 
				+async def fetch_account_article_list(
			
 
				+    wx_gh: str,
			
 
				+    index: str | None = None,
			
 
				+    is_cache: bool = True,
			
 
				+    ctx: ToolContext = None,
			
 
				+) -> ToolResult:
			
 
				     """
			
 
				     通过公众号的 wx_gh 获取历史发文列表
			
 
				 
			
@@ -134,16 +160,25 @@ async def fetch_account_article_list(wx_gh: str, index=None, is_cache=True) -> d
 
				     try:
			
 
				         async with AsyncHttpClient(timeout=120) as http_client:
			
 
				             response = await http_client.post(url=url, headers=headers, data=payload)
			
 
				-
			
 
				+        normalized = extract_history_articles(response)
			
 
				+        return _build_success_result("公众号历史发文列表", normalized)
			
 
				     except Exception as e:
			
 
				-        logger.error(e)
			
 
				-        return None
			
 
				-
			
 
				-    return extract_history_articles(response)
			
 
				+        logger.exception("fetch_account_article_list failed")
			
 
				+        return ToolResult(
			
 
				+            title="公众号历史发文获取失败",
			
 
				+            output="",
			
 
				+            error=str(e),
			
 
				+            metadata={"wx_gh": wx_gh, "index": index, "is_cache": is_cache},
			
 
				+        )
			
 
				 
			
 
				 
			
 
				 @tool(description="通过公众号文章链接获取文章详情")
			
 
				-async def fetch_article_detail(article_link: str, is_count: bool = False, is_cache: bool = True) -> dict | None:
			
 
				+async def fetch_article_detail(
			
 
				+    article_link: str,
			
 
				+    is_count: bool = False,
			
 
				+    is_cache: bool = True,
			
 
				+    ctx: ToolContext = None,
			
 
				+) -> ToolResult:
			
 
				     """
			
 
				     通过公众号的 文章链接获取文章详情
			
 
				     Args:
			
@@ -180,11 +215,19 @@ async def fetch_article_detail(article_link: str, is_count: bool = False, is_cac
 
				     try:
			
 
				         async with AsyncHttpClient(timeout=10) as http_client:
			
 
				             response = await http_client.post(target_url, headers=headers, data=payload)
			
 
				+        return _build_success_result("文章详情信息", response)
			
 
				     except Exception as e:
			
 
				-        print(e)
			
 
				-        return None
			
 
				-
			
 
				-    return response
			
 
				+        logger.exception("fetch_article_detail failed")
			
 
				+        return ToolResult(
			
 
				+            title="文章详情获取失败",
			
 
				+            output="",
			
 
				+            error=str(e),
			
 
				+            metadata={
			
 
				+                "article_link": article_link,
			
 
				+                "is_count": is_count,
			
 
				+                "is_cache": is_cache,
			
 
				+            },
			
 
				+        )
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":