1 mese fa · 887a97001b
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@@ -20,9 +20,38 @@
 
				       "Bash(TOOL_AGENT_ROUTER_URL=\"http://43.106.118.91:8001\" python:*)",
			
 
				       "Bash(rm /Users/sunlit/.claude/skills/agent /Users/sunlit/.claude/skills/toolhub /Users/sunlit/.claude/skills/knowhub /Users/sunlit/.claude/skills/content-search)",
			
 
				       "Read(//Users/sunlit/.claude/skills/**)",
			
 
				-      "Read(//c/c/Users/11304/gitlab/cybertogether/Agent-tao_test/**)",
			
 
				-      "Read(//c/c/Users/11304/gitlab/cybertogether/Agent-tao_test/agent/tools/**)",
			
 
				-      "Read(//c/c/Users/11304/gitlab/cybertogether/Agent-tao_test/agent/llm/**)"
			
 
				+      "Bash(git checkout *)",
			
 
				+      "Bash(git stash *)",
			
 
				+      "Bash(python3 -c ' *)",
			
 
				+      "Bash(python3 -c \"import json;d=json.load\\(open\\('evaluation/high_priority_queries_full.json'\\)\\);q=d.get\\('queries',d\\);print\\('queries:',len\\(q\\)\\);print\\('sample:',json.dumps\\(q[0],ensure_ascii=False\\)[:200]\\)\")",
			
 
				+      "Bash(rmdir fixed_query_eval/runs)",
			
 
				+      "Bash(mkdir -p fixed_query_eval/runs_full)",
			
 
				+      "Bash(cp server.py fixed_query_eval/server.py)",
			
 
				+      "Bash(cp index.html fixed_query_eval/index.html)",
			
 
				+      "Bash(.venv/bin/python -c \"import PIL; print\\('PIL OK', PIL.__version__\\)\")",
			
 
				+      "Bash(.venv/bin/pip list *)",
			
 
				+      "Bash(command -v uv)",
			
 
				+      "Bash(uv run *)",
			
 
				+      "Bash(.venv/bin/python -c \"from examples.process_pipeline.script.llm_evaluate_sources import build_eval_llm_call\")",
			
 
				+      "mcp__plugin_superpowers-chrome_chrome__use_browser",
			
 
				+      "Bash(kill 14972)",
			
 
				+      "Bash(rm -rf examples/process_pipeline/script/search_eval/fixed_query_eval/runs_full/q0000 examples/process_pipeline/script/search_eval/fixed_query_eval/runs_full/q0001 examples/process_pipeline/script/search_eval/fixed_query_eval/runs_full/summary.json fqe_verify)",
			
 
				+      "Bash(ls -la \"examples/process_pipeline/script/search_eval/fixed_query_eval/runs_full/\")",
			
 
				+      "Bash(lsof -nP -iTCP:8770 -sTCP:LISTEN)",
			
 
				+      "Bash(/usr/libexec/ApplicationFirewall/socketfilterfw --getglobalstate)",
			
 
				+      "Bash(/usr/libexec/ApplicationFirewall/socketfilterfw --getblockall)",
			
 
				+      "Bash(/usr/libexec/ApplicationFirewall/socketfilterfw --getstealthmode)",
			
 
				+      "Bash(ipconfig getifaddr *)",
			
 
				+      "Bash(env)",
			
 
				+      "Bash(cloudflared --version)",
			
 
				+      "Bash(brew --version)",
			
 
				+      "Bash(brew install *)",
			
 
				+      "Bash(rm -f /tmp/cf_tunnel.log)",
			
 
				+      "Bash(nohup cloudflared tunnel --url http://localhost:8770)",
			
 
				+      "Bash(echo \"cloudflared PID: $!\")",
			
 
				+      "Bash(xargs kill -9)",
			
 
				+      "Bash(awk '{print $9, $5}')",
			
 
				+      "Bash(awk '{printf \"%-18s %s bytes\\\\n\", $9, $5}')"
			
 
				     ],
			
 
				     "deny": [],
			
 
				     "ask": []
			
--- a/.gitignore
+++ b/.gitignore
@@ -110,3 +110,6 @@ data/.mcp.json
 
				 *.bat
			
 
				 HOW_IT_RUNS.md
			
 
				 PROJECT_STRUCTURE.md
			
 
				+runs_full/
			
 
				+.ocr_cache
			
 
				+fixed_query_eval/docs/
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/README.md
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/README.md
@@ -0,0 +1,201 @@
 
				+# fixed_query_eval · 固定 Query 搜索评估
			
 
				+
			
 
				+> 在 `search_eval/` 下新建的自包含模块。**用写死的 4 组 query（不走动作×类型正交矩阵选词）**，
			
 
				+> 对每组 query 做「同义扩展 + 多渠道搜索 + 合并去重 + LLM 评分」，并提供一个去掉矩阵的查看界面。
			
 
				+>
			
 
				+> **不改动任何原 `search_eval/` 文件**——搜索/评估引擎全部 `import` 复用，server/index 是复制后改造。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 1. 这个模块做什么
			
 
				+
			
 
				+```
			
 
				+4 组写死的 query（产品名 + 意图词）
			
 
				+   │  产品名锁死，只对意图词扩同义（评测→测评/实测/体验；案例→作品/效果/实例）
			
 
				+   ▼
			
 
				+每组多措辞 × 3 渠道(xhs/x/gzh) 各搜 10 条
			
 
				+   │  search_all 内建按 (platform, cid) 去重 + found_by_queries 记录命中措辞
			
 
				+   ▼
			
 
				+合并去重后的帖子池 → 视频转写 → LLM 多模态评分（同 search_eval 的 rubric）
			
 
				+   ▼
			
 
				+runs_full/q000N/form_A.json  →  server.py(:8770) 卡片界面浏览/筛选/排序/工序提取
			
 
				+```
			
 
				+
			
 
				+与原 `batch_3forms.py` 的区别：**不生成 query、不做 A/B/C 三形式对比**，只用固定 4 组 query 直接搜，单一形式（沿用 `form_A` 字段名让前端零改动）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 2. 文件结构
			
 
				+
			
 
				+```
			
 
				+fixed_query_eval/
			
 
				+├── run_search.py    ★ 生产脚本：固定 query + 同义扩展 + 合并去重 + 评分 → runs_full/
			
 
				+├── tool_extract.py  ★ 工具解构：帖子 → 结构化工具知识条目（gemini-3.1-flash-lite）
			
 
				+├── server.py        查看后端（复制自 ../server.py + 加 sys.path + 工具解构端点；矩阵自动降级）
			
 
				+├── index.html       查看前端（删正交矩阵 + 4-query 选择条 + 工具解构按钮/弹层/详情 tab）
			
 
				+├── runs_full/       产出目录
			
 
				+│   └── q000N/
			
 
				+│       ├── form_A.json          搜索+评分结果
			
 
				+│       └── tools/{case_id}.json  工具解构结果（每帖一份）
			
 
				+└── README.md        本文件
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 3. 改 query / 同义词（最常改的地方）
			
 
				+
			
 
				+全部集中在 `run_search.py` 顶部常量，改完直接重跑：
			
 
				+
			
 
				+```python
			
 
				+QUERIES = [
			
 
				+    {"id": "q0000", "product": "GPT image2",      "intent": "评测"},
			
 
				+    {"id": "q0001", "product": "GPT image2",      "intent": "案例"},
			
 
				+    {"id": "q0002", "product": "nano banana pro", "intent": "评测"},
			
 
				+    {"id": "q0003", "product": "nano banana pro", "intent": "案例"},
			
 
				+]
			
 
				+INTENT_SYNONYMS = {
			
 
				+    "评测": ["评测", "测评", "实测", "体验"],     # 产品名 + 这些词分别搜，再合并去重
			
 
				+    "案例": ["案例", "作品", "效果", "实例"],
			
 
				+}
			
 
				+DEFAULT_PLATFORMS = "xhs,x,gzh"
			
 
				+DEFAULT_MAX_COUNT = 10   # 每条措辞每渠道取帖数（默认 10，控制成本）
			
 
				+```
			
 
				+
			
 
				+> 加产品 / 加意图词：往 `QUERIES` 加条目即可；新意图词记得在 `INTENT_SYNONYMS` 配同义（没配则原样单条搜）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 4. 怎么跑
			
 
				+
			
 
				+```bash
			
 
				+cd examples/process_pipeline/script/search_eval/fixed_query_eval
			
 
				+
			
 
				+# ① 跑搜索 + 评分（已存在 form_A.json 的 query 默认跳过）
			
 
				+python run_search.py                       # 全部 4 组
			
 
				+python run_search.py --only-q q0,q2        # 只跑指定
			
 
				+python run_search.py --force               # 覆盖重跑
			
 
				+python run_search.py --no-eval             # 只搜不评分（省钱，验证召回用）
			
 
				+
			
 
				+# ② 起查看界面（端口 8770，与原 search_eval server 同端口，别同时开）
			
 
				+python server.py                           # http://0.0.0.0:8770
			
 
				+```
			
 
				+
			
 
				+`run_search.py` 主要参数：`--platforms` `--max-count` `--eval-model`（默认 gemini-flash-lite）
			
 
				+`--max-concurrent` `--no-transcribe` `--no-images` `--only-q` `--force`。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 5. 查看界面（server.py :8770）
			
 
				+
			
 
				+复用原 search_eval 界面的**全部功能**，只去掉正交矩阵：
			
 
				+- 顶部 **Query 选择条**（4 组 query，替代原矩阵导航）→ 点击切换看某组结果
			
 
				+- 渠道 tab（全部 / 小红书 / X / 公众号）
			
 
				+- 卡片：标题 / 配图 / 互动数 / 知识类型标签
			
 
				+- 评分详情弹窗、相关性过滤阈值、综合分排序
			
 
				+- 「重评当前 query」「工序提取」按钮保留
			
 
				+
			
 
				+接口（与原 server 一致）：`GET /api/data`、`POST /api/generate_procedure`、`POST /api/reeval`、`GET /api/procedure_status`、`GET /api/procedure_log`。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 5.5 工具解构（从帖子提取结构化工具知识）
			
 
				+
			
 
				+把帖子（正文 + 配图）里提到的工具，用 **gemini-3.1-flash-lite** 提炼成结构化条目，每个工具一条。
			
 
				+
			
 
				+**两个入口**：
			
 
				+- **批量**：query 选择条上「🔧 工具解构」按钮 → 弹层勾选「当前 query + 当前渠道」的帖子 → 确认 → 后台批量解构。
			
 
				+- **单帖**：帖子详情弹窗的「工具解构」tab（在「对应工序」后面）→ 未解构则「开始解构」，已解构则展示工具卡片。
			
 
				+
			
 
				+**解构结果字段**（每个工具）：工具名称 / 实质作用域 / 形式作用域 / 创作层级（制作层·创作层）/ 来源链接 / 输入 / 输出 / 用法 / 案例 / 缺点 / 最新更新时间。null 字段在 UI 自动省略。
			
 
				+
			
 
				+**实现要点**：
			
 
				+- `tool_extract.py` 单次多模态 LLM 调用（正文 + 配图），比工序解构（多轮 agent）轻得多；复用引擎的收图 / 帖子格式化 / JSON 重试封装。
			
 
				+- 模型固定 `google/gemini-3.1-flash-lite`（`build_eval_llm_call("gemini-flash-lite")`，OpenRouter 后端）；评分筛选用的也是 OpenRouter，与之一致。
			
 
				+- 结果存 `runs_full/{q}/tools/{case_id}.json`，**已解构默认跳过**（详情页「重新解构」可强制覆盖）。
			
 
				+- server 走 subprocess 起 `tool_extract.py`（同工序解构模式），LLM 重依赖留子进程，server 本身保持轻量。
			
 
				+
			
 
				+相关接口：`POST /api/extract_tools {q, case_ids[], force?}`、`GET /api/tools_status?q=&case_id=`、`GET /api/tools_data?q=&case_id=`。
			
 
				+
			
 
				+手动跑（一般由界面触发）：
			
 
				+```bash
			
 
				+python tool_extract.py --q q0000 --case-ids xhs_abc,gzh_def [--force]
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 6. 产出结构（runs_full/q000N/form_A.json）
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "form": "A",
			
 
				+  "query": "GPT image2 评测",          // 评估锚点（用户真实意图）
			
 
				+  "original_q": "GPT image2 评测",     // server 用作 query 标签
			
 
				+  "phrasings": ["GPT image2 评测", "GPT image2 测评", "GPT image2 实测", "GPT image2 体验"],
			
 
				+  "platforms": ["xhs", "x", "gzh"],
			
 
				+  "total": 80,
			
 
				+  "results": [ /* 去重池：帖子 + llm_evaluation + found_by_queries（哪些措辞命中）*/ ]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+schema 与 `batch_3forms.py` 完全一致，所以本目录 server/index 与原版数据契约相同。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 6.5 数据库存储（双写 MySQL）
			
 
				+
			
 
				+搜索/评分结果和工具解构结果在落本地 json 的**同时**写入 MySQL（`.env` 的 `MYSQL_*`，host `rm-t4n…`，库 `agent-…`）。
			
 
				+
			
 
				+**两张表**（`db.py` 自动建，幂等）：
			
 
				+- `fqe_posts` —— 每行一个 (query, 帖子)：q / query_text / case_id / platform / title / url / body / images / like_count / publish_time / found_by / knowledge_type / overall_score / llm_evaluation。唯一键 `(q, case_id)`，重跑 upsert 不重复。
			
 
				+- `fqe_tools` —— 每行一个解构出的工具：q / case_id / 工具名称 / 实质作用域 / 形式作用域 / 创作层级 / 来源链接 / 输入 / 输出 / 用法 / 案例 / 缺点 / 最新更新时间 / model。重新解构按 `(q, case_id)` 先删旧再插新。
			
 
				+
			
 
				+**特性**：
			
 
				+- **双写**：DB 是附加存储，本地 json 仍是查看界面的主数据源；**DB 写入失败不阻断**（打印告警，本地 json 已落盘）。
			
 
				+- **本地清空时自动回退读库**：`server.py` 的 `scan_runs()` 先读本地 `runs_full/*/form_A.json`，**本地缺的 query 自动从 `fqe_posts` 补**；详情页工具 tab（`/api/tools_data`）本地无结果时也回退读 `fqe_tools`。所以即使 `runs_full` 被清空，界面仍能从库展示数据（本地优先）。
			
 
				+- **从库重建本地**：`python db.py rebuild` —— 把库里数据写回本地 `runs_full/{q}/form_A.json` 和 `tools/{case_id}.json`。界面回退只让「看」有数据，但提取脚本（tool_extract / 工序）仍读本地文件，故本地文件丢失后用此命令完整恢复。
			
 
				+- **建表**：首次跑一次 `python db.py init`（用 `CREATE TABLE IF NOT EXISTS`，可重复跑）。
			
 
				+- 驱动：`pymysql`（仓库 MySQL 约定）。装：`pip install pymysql`。
			
 
				+
			
 
				+```bash
			
 
				+python db.py init                              # 建表（幂等）
			
 
				+python db.py rebuild                           # 本地文件丢失后，从库重建 runs_full
			
 
				+python run_search.py                           # 搜索结果自动双写 fqe_posts
			
 
				+python tool_extract.py --q q0000 --case-ids …  # 工具结果自动双写 fqe_tools
			
 
				+```
			
 
				+
			
 
				+> 默认每措辞每渠道搜 **10** 条（`run_search.py` 顶部 `DEFAULT_MAX_COUNT`，原为 20，调小控成本）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 7. 环境依赖注意 ⚠️
			
 
				+
			
 
				+`run_search.py` 会触发 `agent.tools.builtin` 的完整注册链（同原 `batch_3forms.py`），需要：
			
 
				+- `Pillow`（PIL，图像处理）
			
 
				+- `fastapi` 等 `[server]` 依赖
			
 
				+
			
 
				+若报 `ModuleNotFoundError: No module named 'PIL' / 'fastapi'`，说明当前 venv 不全，装齐依赖即可：
			
 
				+
			
 
				+```bash
			
 
				+pip install -e '.[all]'          # 在仓库根
			
 
				+# 或最小补：pip install Pillow fastapi uvicorn websockets
			
 
				+```
			
 
				+
			
 
				+`server.py`（查看界面）**不依赖**上述链，缺这些也能起、能看已有产出。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 8. 与原 search_eval 的关系
			
 
				+
			
 
				+| | 原 `search_eval/` | 本 `fixed_query_eval/` |
			
 
				+|---|---|---|
			
 
				+| query 来源 | 动作×类型正交矩阵生成 + 三形式(A/B/C) | **写死 4 组**，单一形式 |
			
 
				+| 搜索引擎 | `search_and_evaluate.py` | **import 复用同一份**（零改动） |
			
 
				+| 评分 rubric | `eval_prompt_template.md` | 同上（复用） |
			
 
				+| 查看界面 | 矩阵导航 + 卡片 | **去矩阵**，4-query 选择条 + 卡片 |
			
 
				+| 产出 | `runs_full/q*/form_A\|B\|C.json` | `runs_full/q*/form_A.json` |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+*实现说明：搜索/评估/转写/中译英全部复用 `../search_and_evaluate.py` 的函数（只读 import）；
			
 
				+server/index 复制自原版后做最小改造（server 加一行 sys.path 复用兄弟模块 + 矩阵自动降级；
			
 
				+index 用 CSS 隐藏矩阵 DOM 并新增 #navQ 选择条，矩阵元素保留在 DOM 以免 JS 取空崩溃）。*
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/db.py
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/db.py
@@ -0,0 +1,354 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""fixed_query_eval · MySQL 持久化（双写：本地 json + 数据库）
			
 
				+================================================================================
			
 
				+
			
 
				+读 .env 的 MYSQL_* 连接 MySQL（仓库 MySQL 约定是 pymysql）。两张表：
			
 
				+  fqe_posts —— 每行一个 (query, 帖子)：搜索 + llm 评分结果
			
 
				+  fqe_tools —— 每行一个解构出的工具：工具解构结果
			
 
				+
			
 
				+设计原则：
			
 
				+- **失败不阻断**：所有写入用 try/except 包，DB 挂了不影响本地 json 写入（文件是主存储）。
			
 
				+- **幂等**：posts 用 (q, case_id) 唯一键 upsert；tools 先按 (q, case_id) 删旧再插新（重新解构会覆盖）。
			
 
				+- 建表：init_tables() 用 CREATE TABLE IF NOT EXISTS，跑 `python db.py init` 即建表。
			
 
				+"""
			
 
				+import os
			
 
				+import json
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+
			
 
				+PROJECT_ROOT = Path(__file__).resolve().parents[5]
			
 
				+sys.path.insert(0, str(PROJECT_ROOT))
			
 
				+
			
 
				+from dotenv import load_dotenv
			
 
				+load_dotenv()
			
 
				+
			
 
				+try:
			
 
				+    import pymysql
			
 
				+    from pymysql.cursors import DictCursor
			
 
				+except ImportError:
			
 
				+    pymysql = None
			
 
				+
			
 
				+
			
 
				+def _enabled() -> bool:
			
 
				+    return pymysql is not None and bool(os.getenv("MYSQL_HOST"))
			
 
				+
			
 
				+
			
 
				+def _conn():
			
 
				+    return pymysql.connect(
			
 
				+        host=os.getenv("MYSQL_HOST"),
			
 
				+        port=int(os.getenv("MYSQL_PORT", 3306)),
			
 
				+        user=os.getenv("MYSQL_USER"),
			
 
				+        password=os.getenv("MYSQL_PASSWORD"),
			
 
				+        database=os.getenv("MYSQL_DATABASE"),
			
 
				+        charset="utf8mb4",
			
 
				+        cursorclass=DictCursor,
			
 
				+        autocommit=True,
			
 
				+        connect_timeout=10,
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+# ── DDL ──────────────────────────────────────────────────────────────────────
			
 
				+
			
 
				+DDL_POSTS = """
			
 
				+CREATE TABLE IF NOT EXISTS fqe_posts (
			
 
				+  id            BIGINT AUTO_INCREMENT PRIMARY KEY,
			
 
				+  q             VARCHAR(16)   NOT NULL COMMENT 'query 目录名 q0000',
			
 
				+  query_text    VARCHAR(255)  NULL     COMMENT '基准 query（如 GPT image2 评测）',
			
 
				+  case_id       VARCHAR(128)  NOT NULL COMMENT 'platform_channelContentId',
			
 
				+  platform      VARCHAR(32)   NULL,
			
 
				+  channel_content_id VARCHAR(128) NULL,
			
 
				+  title         VARCHAR(512)  NULL,
			
 
				+  url           VARCHAR(1024) NULL,
			
 
				+  body          MEDIUMTEXT    NULL,
			
 
				+  images        JSON          NULL     COMMENT '图片 URL 数组',
			
 
				+  like_count    INT           NULL,
			
 
				+  publish_time  VARCHAR(64)   NULL,
			
 
				+  found_by      JSON          NULL     COMMENT '命中的措辞数组',
			
 
				+  knowledge_type JSON         NULL     COMMENT 'llm 判定的知识类型',
			
 
				+  overall_score FLOAT         NULL     COMMENT '综合分（相关性两子项均值，便于排序）',
			
 
				+  llm_evaluation JSON         NULL     COMMENT '完整评分 blob',
			
 
				+  created_at    TIMESTAMP     DEFAULT CURRENT_TIMESTAMP,
			
 
				+  updated_at    TIMESTAMP     DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
			
 
				+  UNIQUE KEY uk_q_case (q, case_id),
			
 
				+  KEY idx_platform (platform),
			
 
				+  KEY idx_query_text (query_text)
			
 
				+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='固定query搜索+评分结果';
			
 
				+"""
			
 
				+
			
 
				+DDL_TOOLS = """
			
 
				+CREATE TABLE IF NOT EXISTS fqe_tools (
			
 
				+  id            BIGINT AUTO_INCREMENT PRIMARY KEY,
			
 
				+  q             VARCHAR(16)   NOT NULL,
			
 
				+  case_id       VARCHAR(128)  NOT NULL,
			
 
				+  platform      VARCHAR(32)   NULL,
			
 
				+  post_title    VARCHAR(512)  NULL,
			
 
				+  tool_name     VARCHAR(255)  NULL     COMMENT '工具名称',
			
 
				+  substance_scope VARCHAR(255) NULL    COMMENT '实质作用域',
			
 
				+  form_scope    VARCHAR(255)  NULL     COMMENT '形式作用域',
			
 
				+  creation_layer VARCHAR(32)  NULL     COMMENT '创作层级：制作层/创作层',
			
 
				+  source_link   VARCHAR(1024) NULL     COMMENT '来源链接',
			
 
				+  input_desc    TEXT          NULL     COMMENT '输入',
			
 
				+  output_desc   TEXT          NULL     COMMENT '输出',
			
 
				+  usage_json    JSON          NULL     COMMENT '用法数组',
			
 
				+  cases_json    JSON          NULL     COMMENT '案例数组',
			
 
				+  defects_json  JSON          NULL     COMMENT '缺点数组',
			
 
				+  updated_time  VARCHAR(64)   NULL     COMMENT '工具最新更新时间',
			
 
				+  model         VARCHAR(64)   NULL     COMMENT '解构模型',
			
 
				+  created_at    TIMESTAMP     DEFAULT CURRENT_TIMESTAMP,
			
 
				+  KEY idx_q_case (q, case_id),
			
 
				+  KEY idx_tool_name (tool_name)
			
 
				+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='工具解构结果（每行一个工具）';
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+def init_tables():
			
 
				+    """建表（幂等）。"""
			
 
				+    if not _enabled():
			
 
				+        print("⚠️ MySQL 未启用（缺 pymysql 或 MYSQL_HOST），跳过建表")
			
 
				+        return False
			
 
				+    conn = _conn()
			
 
				+    try:
			
 
				+        with conn.cursor() as cur:
			
 
				+            cur.execute(DDL_POSTS)
			
 
				+            cur.execute(DDL_TOOLS)
			
 
				+        print("✅ 建表完成：fqe_posts, fqe_tools")
			
 
				+        return True
			
 
				+    finally:
			
 
				+        conn.close()
			
 
				+
			
 
				+
			
 
				+# ── 写入 ─────────────────────────────────────────────────────────────────────
			
 
				+
			
 
				+def _overall_from_eval(e):
			
 
				+    """从 mod schema 评分粗算综合分（相关性两子项均值）。算不出返回 None。"""
			
 
				+    try:
			
 
				+        rel = (e or {}).get("相关性") or {}
			
 
				+        vals = []
			
 
				+        for k in ("和内容制作知识相关", "和 query 相关"):
			
 
				+            v = (rel.get(k) or {}).get("得分")
			
 
				+            if v is not None:
			
 
				+                vals.append(float(v))
			
 
				+        return round(sum(vals) / len(vals), 2) if vals else None
			
 
				+    except Exception:
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def upsert_posts(q, query_text, results):
			
 
				+    """把一组搜索结果写入 fqe_posts（按 (q, case_id) upsert）。返回写入条数；失败返回 0。"""
			
 
				+    if not _enabled() or not results:
			
 
				+        return 0
			
 
				+    rows = []
			
 
				+    for r in results:
			
 
				+        post = r.get("post") or {}
			
 
				+        e = r.get("llm_evaluation") or {}
			
 
				+        rows.append((
			
 
				+            q, query_text, r.get("case_id"), r.get("platform"), r.get("channel_content_id"),
			
 
				+            (post.get("title") or post.get("desc") or "")[:500],
			
 
				+            r.get("source_url"),
			
 
				+            post.get("body_text") or post.get("desc") or "",
			
 
				+            json.dumps(post.get("images") or [], ensure_ascii=False),
			
 
				+            post.get("like_count"),
			
 
				+            str(post.get("publish_time") or post.get("publish_timestamp") or "")[:64],
			
 
				+            json.dumps(r.get("found_by_queries") or [], ensure_ascii=False),
			
 
				+            json.dumps(e.get("知识类型") or [], ensure_ascii=False),
			
 
				+            _overall_from_eval(e),
			
 
				+            json.dumps(e, ensure_ascii=False),
			
 
				+        ))
			
 
				+    sql = """
			
 
				+    INSERT INTO fqe_posts
			
 
				+      (q, query_text, case_id, platform, channel_content_id, title, url, body,
			
 
				+       images, like_count, publish_time, found_by, knowledge_type, overall_score, llm_evaluation)
			
 
				+    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
			
 
				+    ON DUPLICATE KEY UPDATE
			
 
				+      query_text=VALUES(query_text), platform=VALUES(platform),
			
 
				+      channel_content_id=VALUES(channel_content_id), title=VALUES(title), url=VALUES(url),
			
 
				+      body=VALUES(body), images=VALUES(images), like_count=VALUES(like_count),
			
 
				+      publish_time=VALUES(publish_time), found_by=VALUES(found_by),
			
 
				+      knowledge_type=VALUES(knowledge_type), overall_score=VALUES(overall_score),
			
 
				+      llm_evaluation=VALUES(llm_evaluation);
			
 
				+    """
			
 
				+    try:
			
 
				+        conn = _conn()
			
 
				+        try:
			
 
				+            with conn.cursor() as cur:
			
 
				+                cur.executemany(sql, rows)
			
 
				+            return len(rows)
			
 
				+        finally:
			
 
				+            conn.close()
			
 
				+    except Exception as ex:
			
 
				+        print(f"⚠️ fqe_posts 写库失败（不影响本地 json）：{ex}")
			
 
				+        return 0
			
 
				+
			
 
				+
			
 
				+def upsert_tools(q, case_id, model, tools, platform=None, post_title=None):
			
 
				+    """把一帖的工具解构结果写入 fqe_tools（先删该 (q,case_id) 旧行再插）。失败返回 0。"""
			
 
				+    if not _enabled():
			
 
				+        return 0
			
 
				+    try:
			
 
				+        conn = _conn()
			
 
				+        try:
			
 
				+            with conn.cursor() as cur:
			
 
				+                cur.execute("DELETE FROM fqe_tools WHERE q=%s AND case_id=%s", (q, case_id))
			
 
				+                if tools:
			
 
				+                    rows = [(
			
 
				+                        q, case_id, platform, (post_title or "")[:500],
			
 
				+                        t.get("工具名称"), t.get("实质作用域"), t.get("形式作用域"),
			
 
				+                        t.get("创作层级"), t.get("来源链接"), t.get("输入"), t.get("输出"),
			
 
				+                        json.dumps(t.get("用法"), ensure_ascii=False) if t.get("用法") is not None else None,
			
 
				+                        json.dumps(t.get("案例"), ensure_ascii=False) if t.get("案例") is not None else None,
			
 
				+                        json.dumps(t.get("缺点"), ensure_ascii=False) if t.get("缺点") is not None else None,
			
 
				+                        t.get("最新更新时间"), model,
			
 
				+                    ) for t in tools]
			
 
				+                    cur.executemany("""
			
 
				+                    INSERT INTO fqe_tools
			
 
				+                      (q, case_id, platform, post_title, tool_name, substance_scope, form_scope,
			
 
				+                       creation_layer, source_link, input_desc, output_desc,
			
 
				+                       usage_json, cases_json, defects_json, updated_time, model)
			
 
				+                    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
			
 
				+                    """, rows)
			
 
				+            return len(tools)
			
 
				+        finally:
			
 
				+            conn.close()
			
 
				+    except Exception as ex:
			
 
				+        print(f"⚠️ fqe_tools 写库失败（不影响本地 json）：{ex}")
			
 
				+        return 0
			
 
				+
			
 
				+
			
 
				+# ── 读取（本地 runs_full 被清空时，server 回退读库重建视图）────────────────────────
			
 
				+
			
 
				+def _loads(v, default=None):
			
 
				+    """pymysql 的 JSON 列可能返回字符串，统一解析。"""
			
 
				+    if v is None:
			
 
				+        return default
			
 
				+    if isinstance(v, (list, dict)):
			
 
				+        return v
			
 
				+    try:
			
 
				+        return json.loads(v)
			
 
				+    except Exception:
			
 
				+        return default
			
 
				+
			
 
				+
			
 
				+def fetch_posts_grouped():
			
 
				+    """从 fqe_posts 重建 {q: {query_text, results:[r...]}}（r 的结构对齐 form_A.json 的 result，
			
 
				+    便于喂给 server 的 adapt()）。库不可用/空/异常返回 {}。"""
			
 
				+    if not _enabled():
			
 
				+        return {}
			
 
				+    try:
			
 
				+        conn = _conn()
			
 
				+        try:
			
 
				+            with conn.cursor() as cur:
			
 
				+                cur.execute("SELECT * FROM fqe_posts ORDER BY q, overall_score DESC")
			
 
				+                rows = cur.fetchall()
			
 
				+        finally:
			
 
				+            conn.close()
			
 
				+    except Exception as ex:
			
 
				+        print(f"⚠️ 读 fqe_posts 失败：{ex}")
			
 
				+        return {}
			
 
				+    out = {}
			
 
				+    for row in rows:
			
 
				+        q = row["q"]
			
 
				+        r = {
			
 
				+            "case_id": row["case_id"], "platform": row["platform"],
			
 
				+            "channel_content_id": row["channel_content_id"], "source_url": row["url"],
			
 
				+            "found_by_queries": _loads(row["found_by"], []),
			
 
				+            "llm_evaluation": _loads(row["llm_evaluation"], {}),
			
 
				+            "post": {
			
 
				+                "title": row["title"], "body_text": row["body"],
			
 
				+                "images": _loads(row["images"], []), "like_count": row["like_count"],
			
 
				+                "publish_timestamp": row["publish_time"],
			
 
				+            },
			
 
				+        }
			
 
				+        out.setdefault(q, {"query_text": row["query_text"], "results": []})["results"].append(r)
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+def fetch_tools(q, case_id):
			
 
				+    """从 fqe_tools 重建 {case_id, model, tool_count, tools:[...]}（对齐本地 tools/{case_id}.json）。
			
 
				+    无记录返回 None。"""
			
 
				+    if not _enabled():
			
 
				+        return None
			
 
				+    try:
			
 
				+        conn = _conn()
			
 
				+        try:
			
 
				+            with conn.cursor() as cur:
			
 
				+                cur.execute("SELECT * FROM fqe_tools WHERE q=%s AND case_id=%s ORDER BY id", (q, case_id))
			
 
				+                rows = cur.fetchall()
			
 
				+        finally:
			
 
				+            conn.close()
			
 
				+    except Exception as ex:
			
 
				+        print(f"⚠️ 读 fqe_tools 失败：{ex}")
			
 
				+        return None
			
 
				+    if not rows:
			
 
				+        return None
			
 
				+    tools = [{
			
 
				+        "工具名称": r["tool_name"], "实质作用域": r["substance_scope"],
			
 
				+        "形式作用域": r["form_scope"], "创作层级": r["creation_layer"],
			
 
				+        "来源链接": r["source_link"], "输入": r["input_desc"], "输出": r["output_desc"],
			
 
				+        "用法": _loads(r["usage_json"]), "案例": _loads(r["cases_json"]),
			
 
				+        "缺点": _loads(r["defects_json"]), "最新更新时间": r["updated_time"],
			
 
				+    } for r in rows]
			
 
				+    return {"case_id": case_id, "platform": rows[0]["platform"],
			
 
				+            "title": rows[0]["post_title"], "model": rows[0]["model"],
			
 
				+            "tool_count": len(tools), "tools": tools}
			
 
				+
			
 
				+
			
 
				+def has_tools(q, case_id):
			
 
				+    """库里是否有该帖的工具解构记录。"""
			
 
				+    if not _enabled():
			
 
				+        return False
			
 
				+    try:
			
 
				+        conn = _conn()
			
 
				+        try:
			
 
				+            with conn.cursor() as cur:
			
 
				+                cur.execute("SELECT 1 FROM fqe_tools WHERE q=%s AND case_id=%s LIMIT 1", (q, case_id))
			
 
				+                return cur.fetchone() is not None
			
 
				+        finally:
			
 
				+            conn.close()
			
 
				+    except Exception:
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def rebuild_local(runs_dir=None):
			
 
				+    """从数据库重建本地 runs_full/{q}/form_A.json 和 tools/{case_id}.json。
			
 
				+    用于本地文件丢失后恢复（界面回退只让界面有数据；提取脚本仍读本地文件，故提供本命令）。"""
			
 
				+    if not _enabled():
			
 
				+        print("⚠️ MySQL 未启用，无法重建"); return
			
 
				+    runs_dir = Path(runs_dir) if runs_dir else (Path(__file__).resolve().parent / "runs_full")
			
 
				+    grouped = fetch_posts_grouped()
			
 
				+    np = nt = 0
			
 
				+    for q, g in grouped.items():
			
 
				+        d = {"form": "A", "query": g["query_text"], "original_q": g["query_text"] or "",
			
 
				+             "platforms": [], "total": len(g["results"]), "failed": 0, "results": g["results"]}
			
 
				+        p = runs_dir / q / "form_A.json"
			
 
				+        p.parent.mkdir(parents=True, exist_ok=True)
			
 
				+        p.write_text(json.dumps(d, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+        np += 1
			
 
				+    # tools
			
 
				+    try:
			
 
				+        conn = _conn()
			
 
				+        try:
			
 
				+            with conn.cursor() as cur:
			
 
				+                cur.execute("SELECT DISTINCT q, case_id FROM fqe_tools")
			
 
				+                pairs = cur.fetchall()
			
 
				+        finally:
			
 
				+            conn.close()
			
 
				+        for row in pairs:
			
 
				+            data = fetch_tools(row["q"], row["case_id"])
			
 
				+            if data:
			
 
				+                tp = runs_dir / row["q"] / "tools" / f"{row['case_id']}.json"
			
 
				+                tp.parent.mkdir(parents=True, exist_ok=True)
			
 
				+                tp.write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+                nt += 1
			
 
				+    except Exception as ex:
			
 
				+        print(f"⚠️ 重建 tools 失败：{ex}")
			
 
				+    print(f"✅ 从库重建本地：{np} 个 query 的 form_A.json，{nt} 帖工具结果 → {runs_dir}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    cmd = sys.argv[1] if len(sys.argv) > 1 else ""
			
 
				+    if cmd == "init":
			
 
				+        init_tables()
			
 
				+    elif cmd == "rebuild":
			
 
				+        rebuild_local()
			
 
				+    else:
			
 
				+        print("用法：\n  python db.py init      # 建表\n  python db.py rebuild   # 从库重建本地 runs_full（本地文件丢失后恢复）")
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/docs/导读-process_pipeline.md
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/docs/导读-process_pipeline.md
@@ -0,0 +1,400 @@
 
				+# process_pipeline 导读
			
 
				+
			
 
				+> 给刚接手这个目录的人。读完你能回答：**这个流水线在做什么生意、由哪两代系统组成、每个文件夹干什么、数据怎么一步步变形、我该从哪读起。**
			
 
				+>
			
 
				+> 一句话：**这是一条「内容挖矿」流水线——把社媒上真实的 AI 图文/视频作品，反向拆解成结构化、可复用、可检索的「工序知识」，沉淀进公司的内容知识库。**
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 0. 业务目标：到底在生产什么
			
 
				+
			
 
				+公司做的是 **AI 内容生产中台**。问题是：怎么知道「一张爆款 AI 图 / 一条爆款视频是怎么做出来的」？答案不能靠拍脑袋，要从**全网真实案例**里挖。
			
 
				+
			
 
				+这条流水线就是那台「挖矿机」：
			
 
				+
			
 
				+```
			
 
				+一条小红书/抖音/知乎/公众号/YouTube 上的 AI 作品帖
			
 
				+        │  （输入：别人做好的成品 + 教程正文 + 配图）
			
 
				+        ▼   反向工程
			
 
				+一份结构化「工序表」 = 第1步用什么工具做什么 → 产出什么 → 喂给第2步 → …
			
 
				+        │  （输出：可教学、可复现、可自动化的生产流程）
			
 
				+        ▼   归类入库
			
 
				+挂到公司的「内容树」分类体系上（实质维度 + 形式维度）→ 可检索、可复用
			
 
				+```
			
 
				+
			
 
				+**最终价值**：建一个**案例驱动的工序知识库**——下次要生产某类内容时，能直接检索「这类内容业内是怎么做的、用哪些工具、什么参数」。
			
 
				+
			
 
				+> 判断业务的硬证据（来自代码注释/prompt）：
			
 
				+> - `eval_prompt_template.md`：「所有『成品』『效果』『用例』均指 **AI 生成的图片或视频**…纯文字输出、代码生成、论文写作、生活记录**不属于本管线范围**」
			
 
				+> - `db_requirements.json`：116 条需求，如「生成人物在不同场景下呈现丰富面部表情的图片，例如夸张的痛苦、无奈…」——这些就是流水线的「选题」。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 1. ⚠️ 最重要：这里有「两代」流水线，别搞混
			
 
				+
			
 
				+目录里 1168 个文件，绝大多数是数据产出。代码其实分两套系统，**做的事相似但实现不同、产出目录不同**：
			
 
				+
			
 
				+| | **第一代：主流水线** | **第二代：search_eval（当前活跃）** |
			
 
				+|---|---|---|
			
 
				+| 入口 | `run_pipeline.py` / `batch.py` | `script/search_eval/` 下各脚本 |
			
 
				+| 形态 | 5 步 agent 流水线，多轮迭代 | 批量「搜+评+抽」，单轮、更快更便宜 |
			
 
				+| 驱动 | 按需求 `--index N` 跑 | 按 query 矩阵（224 条）批量跑 |
			
 
				+| 产出在 | `output/{NNN}/`（已不在仓库里，gitignore） | `script/search_eval/runs_full/q{NNNN}/` |
			
 
				+| 前端 | 根目录 `server.py` + `ui/` | `search_eval/server.py` + `index.html` |
			
 
				+| 工序提取 | `decode_workflow_agent/`（LangChain+Gemini） | `procedure-dsl/`（Claude，DSL 规范驱动） |
			
 
				+| 当前状态 | 较完整，但开发重心已转移 | **git 最近提交都在动它**，是现在的主战场 |
			
 
				+
			
 
				+**给新人的建议**：先理解**第一代**建立全局心智模型（它的步骤命名更直白），再看**第二代**（它是第一代的演进——更工程化、加了「query 三形式对比」和「工序 DSL 标准」）。
			
 
				+
			
 
				+二者**不是替代关系也不是并行跑**——search_eval 是把「搜索→评估→工序提取」这段重新做了一套更专注、更可批量的实现，并引入了**工序 DSL**这个标准中间格式。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 2. 核心概念词典（先背下来，否则看不懂代码）
			
 
				+
			
 
				+整条流水线围绕「**数据一层层变结构化**」展开。这些名词在 schema、文件名、注释里反复出现：
			
 
				+
			
 
				+| 概念 | 中文 | 是什么 | 对应文件 |
			
 
				+|------|------|--------|---------|
			
 
				+| **source** | 数据源 | 一条原始社媒帖（标题+正文+图+互动数），跨平台字段已归一 | `source.schema.json`、`source.json` |
			
 
				+| **case** | 案例 | source + 提取出的工序结构；流水线的核心载体 | `case_detailed.schema.json`、`case.json` |
			
 
				+| **workflow / 工序** | 工序 | 把一条作品拆成「步骤序列 + 数据流 DAG」：每步有动作/工具/输入/输出 | `workflow.json`、`decode_workflow.prompt` |
			
 
				+| **capability** | 能力 | 比工序更原子的「单个技能」（如局部重绘、一致性保持） | `capabilities.schema.json` |
			
 
				+| **strategy** | 策略 | 工序的最终态：每一步都挂到了分类树上的运行手册 | `apply_to_grounding_strategy.schema.json` |
			
 
				+| **grounding** | 接地/映射 | 把能力映射到公司「内容树」分类库的精确节点 | `apply_to_grounding_agent.py` |
			
 
				+| **实质 / 形式** | — | 内容树的两个维度：**实质**=内容是什么（题材/叙事）；**形式**=怎么呈现（镜头/口播/排版） | `分类库导出_实质_*.json`、`分类库导出_形式_*.json` |
			
 
				+| **method vocab** | 方法词库 | 给工序步骤打标签的受控词表。v5 是 10 维结构化（流程角色/模态/主动作/动作方式/工件类型…） | `script/resource/method_vocab_v5.json` |
			
 
				+
			
 
				+**三种知识类型**（评估时第一步就要分类，贯穿 search_eval）：
			
 
				+- **工序**：端到端流程，目标是产出一张 AI 图/视频（**只有工序帖能抽出 procedure**）。
			
 
				+- **能力**：直接影响最终画面的原子操作（太原子，抽不出完整流程）。
			
 
				+- **工具**：讲某个具体工具怎么用。
			
 
				+
			
 
				+> 数据变形主线（记住这条就抓住了整个流水线）：
			
 
				+> **帖子(source) → 标准化(case) → 反向拆解(workflow/工序) → 打标签(capability) → 挂分类树(grounding→strategy)**
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 3. 目录结构逐层讲解
			
 
				+
			
 
				+```
			
 
				+process_pipeline/
			
 
				+│
			
 
				+├── 【第一代 · 编排层】
			
 
				+│   ├── run_pipeline.py        ★ 主编排：5 步流水线 (research→source→generate-case→decode-workflow→apply-grounding)
			
 
				+│   │                            CLI 丰富：--index N 选需求，--only-step / --start-from / --end-at 控制步骤
			
 
				+│   ├── batch.py                多开 CMD 窗口并行跑多个需求（round-robin 分配 + 失败重试）。注意：Windows .bat，面向 Windows 环境
			
 
				+│   ├── presets.json            定义 "researcher" agent（Phase 1 爬虫角色）
			
 
				+│   ├── server.py (43KB)        第一代的 Web 后端（FastAPI，给 ui/ 提供 API）
			
 
				+│   └── ui/                     第一代前端（原生 JS）：app.js/render.js/modals.js/lightbox.js/scratchpad.js
			
 
				+│                               功能：选需求、配置并触发流水线、看 case/能力/工序、改 prompt+schema、看日志
			
 
				+│
			
 
				+├── 【输入 / 配置数据】
			
 
				+│   ├── db_requirements.json    ★ 116 条业务需求（流水线的「选题库」，--index 指向它）
			
 
				+│   ├── run_metrics.json        221 条历史运行的成本/耗时指标（自动累积）
			
 
				+│   └── prompts/                ★ 所有 prompt + JSON Schema（数据模型的事实来源）
			
 
				+│       ├── researcher.prompt + .schema.json          Phase1 爬虫的系统提示 + 输出契约
			
 
				+│       ├── apply_to_grounding_agent.prompt           grounding agent 提示
			
 
				+│       ├── apply_to_grounding_oneshot.prompt         grounding 的 oneshot（更快）变体
			
 
				+│       ├── apply_to_grounding_*.schema.json          grounding / strategy 输出契约
			
 
				+│       └── temp_schema/        核心实体 schema：source / case_detailed / process / capabilities / capabilities_extracted
			
 
				+│                               （schema 里 "-boundary"/"-ref" 后缀 = 稳定引用层，与易变内容分离，便于版本化）
			
 
				+│
			
 
				+├── 【第一代 · 脚本层】script/   ★ run_pipeline.py 的具体步骤实现
			
 
				+│   ├── extract_sources.py        源提取：解析帖子 URL → 匹配缓存原文 → 廉价预筛(正文>30字/近半年) → 视频自动转写(Deepgram)
			
 
				+│   ├── generate_case.py          案例标准化：跨平台字段归一 + 图片下载 + 上传 OSS CDN(res.cybertogether.net)
			
 
				+│   ├── extract_decode_workflow.py  工序拆解入口（调下面的 agent）
			
 
				+│   ├── decode_workflow_agent/    ★ 工序拆解 agent（LangChain + Gemini）
			
 
				+│   │   ├── DecodeProcessAgent.py   主体：用 add_step/add_step_input/... 工具增量构造工序 DAG
			
 
				+│   │   ├── decode_process_prompt.md  系统提示，核心原则=「客观还原，禁止臆造，每步必须有图/文依据」
			
 
				+│   │   ├── workflow_store.py        原子持久化（tmp→replace），定义 step/input/output 字段 schema
			
 
				+│   │   └── visualize_workflow.py    工序 → HTML 表格可视化
			
 
				+│   ├── apply_to_grounding_agent.py  grounding：把 capability 映射到 实质/形式 分类库（Claude SDK 或 OpenRouter）
			
 
				+│   ├── llm_evaluate_sources.py + evaluate_source_quality.py  源质量的 LLM rubric 评估
			
 
				+│   ├── generate_case.py / case_history.py  案例生成 + 快照（支持回滚）
			
 
				+│   ├── llm_helper.py             ★ 统一 LLM 调用封装（重试 + schema 校验 + JSON 修复反馈环）
			
 
				+│   ├── schema_manager.py / validate_schema.py / fix_json_quotes.py  schema 校验 + JSON 修复基础设施
			
 
				+│   ├── recover.py / update_schema.py  运维/迁移脚本
			
 
				+│   └── resource/               ★ 词库与分类树（grounding 的知识底座）
			
 
				+│       ├── method_vocab_v5.json    10 维结构化方法词库（当前版）
			
 
				+│       ├── method_vocab.json       旧版 3 维（作用/模态/动作）
			
 
				+│       ├── category_tree_56.json   内容树（3MB+，实质/形式两根，最多 6 层）
			
 
				+│       ├── query_tree.py           内容树查询 CLI
			
 
				+│       └── 分类库导出_实质/形式_*.json  导出的两份分类库（grounding 直接读这两份）
			
 
				+│
			
 
				+└── 【第二代 · 当前主战场】script/search_eval/   ★★ 批量「搜+评+抽工序」
			
 
				+    ├── search_and_evaluate.py     主入口：query 多渠道搜索 + LLM 逐条评估（不走 agent、不写 source.json，自包含）
			
 
				+    ├── batch_3forms.py            ★ 同一 query 用「三种形式」搜索做对比：
			
 
				+    │                                A=原词组合, B=自然句填充, C=同义替换。检验 query 表达对召回的影响
			
 
				+    ├── batch_extract_procedures.py  ★ 批量工序提取编排：筛「工序帖」→ 打分 → 去重 → 并发起子进程抽工序
			
 
				+    ├── build_workflows.py         合并：帖子元信息(llm_evaluation/query) + 抽出的 workflow.json → workflows/*.json（含 HTTP API:8771）
			
 
				+    ├── eval_one_sample.py         单样本评估调试器（render→execute→dump，改 rubric 时用）
			
 
				+    ├── eval_prompt_template.md    ★ 评估 rubric 的事实来源（知识类型/相关性/质量 多维打分标准）
			
 
				+    ├── eval_prompt_sample-mod.md  渲染后的样例（SYSTEM/USER 块 + 内嵌帖子 JSON + 多模态图）
			
 
				+    ├── server.py + index.html     ★ 第二代 Web UI（:8770）：按 query→三形式→渠道 浏览卡片、看评分、按需触发工序提取、看矩阵
			
 
				+    ├── evaluation/               评估配置：high_priority_queries_full.json(224条结构化query) / synonym_pools.json / judged_matrix.json / type_action_scores.json
			
 
				+    ├── procedure-dsl/            ★★ 工序 DSL（第二代的工序提取标准）
			
 
				+    │   ├── run_procedure_dsl.py     提取入口（默认 claude-sonnet-4-6）
			
 
				+    │   ├── spec/                    DSL 规范（事实来源）：三阶段提取(phase1骨架→phase2归一→phase3定稿) + taxonomy(action/effect/type) + schema
			
 
				+    │   ├── input/                   提取输入暂存
			
 
				+    │   └── .ocr_cache/              图片 OCR 缓存
			
 
				+    ├── runs_full/                数据产出：q0000…q0223 每个 query 一个目录
			
 
				+    │   └── q{NNNN}/
			
 
				+    │       ├── form_A/B/C.json      三种形式各自的「帖子 + llm_evaluation」（每份 ~50 帖）
			
 
				+    │       └── procedures/{form}_{platform}_{hash}/   抽出的工序
			
 
				+    │           ├── _source.json _meta.json workflow.json   输入/元信息/工序表
			
 
				+    │           ├── case-*.html understanding.md            可视化 + 理解笔记
			
 
				+    │           └── _trace*.md .trace_id                    执行轨迹（关联 Agent Core 的 Trace）
			
 
				+    ├── workflows/               build_workflows.py 的合并产出（每帖一份）
			
 
				+    └── scratch/                 临时实验脚本（run_prompt / xhs_fetch）
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 4. 第一代主流水线：5 步数据变形（`run_pipeline.py`）
			
 
				+
			
 
				+拓扑：`research → source → generate-case → decode-workflow → apply-grounding`（默认跑前 4 步，grounding 手动触发）。
			
 
				+
			
 
				+| 步 | 名字 | 输入 | 干什么 | 产出 |
			
 
				+|----|------|------|--------|------|
			
 
				+| 1 | **research** | 需求 + 平台列表 | `researcher` agent 用 `content_search` 在**单个渠道**广度搜索，质量优先地挑帖（含自评分） | `raw_cases/case_<platform>.json` |
			
 
				+| 2 | **source** | 上一步的帖子链接 | 解析 URL → 从缓存匹配原文 → 预筛(正文长度/时效) → 视频转写 → 可选 LLM 评分 | `source.json` / `filtered_cases.json` |
			
 
				+| 3 | **generate-case** | `source.json` | 跨平台字段归一、图片下载 + 上传 CDN、生成统一 case 结构 | `case.json` |
			
 
				+| 4 | **decode-workflow** | `case.json` | Gemini agent 看「标题+正文+图」**反向拆解工序 DAG**，每步必须有依据（禁臆造） | `decode_workflows/case_*.json` + `.html` |
			
 
				+| 5 | **apply-grounding** | `case.json` + 两份分类库 | 把每个 capability 映射到 实质/形式 内容树节点，并可建议新增分类 | 回写 `case.capability.apply_to` |
			
 
				+
			
 
				+> research⇄source 在 Phase1 内部是个**循环**（最多 50 轮），直到每个平台凑够约 15 个合格 case。这对 CLI 是透明的。
			
 
				+> 成本按步骤自动核算，写进 `run_metrics.json`（`costs_breakdown`）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 5. 第二代 search_eval：批量「搜+评+抽」（当前重点）
			
 
				+
			
 
				+和第一代最大的区别：**不迭代、不维护 source.json、单轮产出**，为的是**快速、可批量地**在 224 条 query 矩阵上跑。
			
 
				+
			
 
				+数据流：
			
 
				+
			
 
				+```
			
 
				+high_priority_queries_full.json (224 条结构化 query：动作×类型 矩阵)
			
 
				+        │
			
 
				+        ▼  batch_3forms.py —— 每条 query 生成三种表达形式
			
 
				+   form_A(原词) / form_B(自然句) / form_C(同义替换)
			
 
				+        │   各自多渠道搜索 + LLM rubric 逐条评估(eval_prompt_template.md)
			
 
				+        ▼
			
 
				+   runs_full/q{NNNN}/form_A|B|C.json   （帖子 + 知识类型/相关性/质量 评分）
			
 
				+        │
			
 
				+        ▼  batch_extract_procedures.py —— 筛「知识类型=工序」的高分帖 → 去重 → 并发抽取
			
 
				+   procedure-dsl/run_procedure_dsl.py（Claude，按 DSL 三阶段规范抽工序）
			
 
				+        │
			
 
				+        ▼
			
 
				+   runs_full/q{NNNN}/procedures/{form}_{platform}_{hash}/workflow.json (+ html)
			
 
				+        │
			
 
				+        ▼  build_workflows.py —— 合并帖子元信息 + 工序表
			
 
				+   workflows/q{NNNN}_{form}_{platform}_{hash}.json
			
 
				+        │
			
 
				+        ▼  server.py(:8770) + index.html —— 人工浏览/复核/触发再提取
			
 
				+```
			
 
				+
			
 
				+**为什么要「三种形式」(batch_3forms)**：同一个需求，用「原词堆叠」「自然句」「近义替换」三种 query 去搜，召回到的帖子差异很大。这是在**实验哪种 query 写法能挖到最好的工序帖**——本质是搜索召回的 A/B 测试。
			
 
				+
			
 
				+**工序 DSL（procedure-dsl/spec）**：把「散文式教程 → 机器可读工序表」标准化成一套领域语言，分三阶段：
			
 
				+- Phase1 骨架：识别工序与步骤、连数据流。
			
 
				+- Phase2 归一：用 taxonomy（action 动作 / effect 作用 / type 类型）给每步打标签，分实质/形式。
			
 
				+- Phase3 定稿：lint 检查覆盖度 → 渲染 HTML。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 6. 数据产出长什么样（一个真实样本）
			
 
				+
			
 
				+`runs_full/q0000/procedures/A_gzh_8f5fbfb0/` —— 命名 = `{form}_{platform}_{hash}`：
			
 
				+
			
 
				+| 文件 | 内容 |
			
 
				+|------|------|
			
 
				+| `_source.json` | 喂给提取器的输入（标题/正文/图/链接） |
			
 
				+| `_meta.json` | 元信息：case_id、来自哪个 q、form、分数、时间 |
			
 
				+| `workflow.json` | ★ 抽出的工序表（steps + 输入输出数据流） |
			
 
				+| `case-*.html` | 工序的网页可视化 |
			
 
				+| `understanding.md` | agent 对这条 case 的理解笔记 |
			
 
				+| `_trace.md` / `.trace_id` | 执行轨迹，关联 Agent Core 的 Trace 系统 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 7. 怎么跑（完整 RUNBOOK，按顺序执行）
			
 
				+
			
 
				+> 命令均从各脚本真实 argparse / server 路由核对而来，可直接复制。先看你要跑**哪一代**——产出与接口都不同。
			
 
				+
			
 
				+### 7.0 先决条件（两代都要）
			
 
				+
			
 
				+```bash
			
 
				+cd /Users/max_liu/max_liu/company/Agent
			
 
				+pip install -e .          # 让 from agent / from examples 能 import
			
 
				+# .env 需有 QWEN_API_KEY / OPEN_ROUTER_API_KEY；grounding 走 Claude SDK 需 OAuth
			
 
				+```
			
 
				+
			
 
				+> ⚠️ **端口提示**：业务 server 用 18080 / 8770 / 8771，**不是 8000**。仓库根的 `api_server.py`/`gateway_server.py`（:8000，Agent Core）与这套业务 server 是两回事，别混。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 7.1 第一代流水线 — `run_pipeline.py` → 接口 `server.py`(:18080)
			
 
				+
			
 
				+拓扑：`research → source → generate-case → decode-workflow → apply-grounding`
			
 
				+
			
 
				+```bash
			
 
				+cd /Users/max_liu/max_liu/company/Agent/examples/process_pipeline
			
 
				+
			
 
				+# ① 默认跑前 4 步（--index 是 db_requirements.json 的 0-based 下标；产出落 output/{(index+1):03d}/）
			
 
				+python run_pipeline.py --index 0
			
 
				+python run_pipeline.py --index 0 --platforms xhs,zhihu,gzh,youtube,douyin,sph   # 指定渠道
			
 
				+
			
 
				+# ② grounding 默认不跑，手动触发（三种后端任选）
			
 
				+python run_pipeline.py --index 0 --only-step apply-grounding                    # oneshot（默认）
			
 
				+python run_pipeline.py --index 0 --only-step apply-grounding --use-claude-sdk   # 走 Claude SDK
			
 
				+python run_pipeline.py --index 0 --only-step apply-grounding --model gemini      # 走 OpenRouter
			
 
				+
			
 
				+# 调试：单步 / 区间 / 增量
			
 
				+python run_pipeline.py --index 0 --only-step decode-workflow --case-index 3
			
 
				+python run_pipeline.py --index 0 --start-from source --end-at decode-workflow
			
 
				+python run_pipeline.py --index 0 --skip-existing
			
 
				+
			
 
				+# 批量（⚠️ 仅 Windows，生成 .bat 开多窗口；--start/--end 是 1-based）
			
 
				+python batch.py --start 1 --end 20 --workers 7
			
 
				+```
			
 
				+
			
 
				+**→ 最终接口**：根 `server.py`（FastAPI，端口 **18080**）
			
 
				+
			
 
				+```bash
			
 
				+python server.py     # http://localhost:18080/  （Web UI + API）
			
 
				+```
			
 
				+
			
 
				+| 接口 | 方法 | 作用 |
			
 
				+|------|------|------|
			
 
				+| `/api/requirements` | GET | 列出所有需求 |
			
 
				+| `/api/requirements/{index}/data` | GET | **取某需求的最终产出**（case/能力/工序） |
			
 
				+| `/api/requirements/{index}/pipeline-status` | GET | 跑批进度 |
			
 
				+| `/api/pipeline/run/{index}` | POST | **用接口触发整条流水线**（替代命令行） |
			
 
				+| `/api/pipeline/stop/{index}` | POST | 停止 |
			
 
				+
			
 
				+```bash
			
 
				+curl http://localhost:18080/api/requirements
			
 
				+curl http://localhost:18080/api/requirements/0/data
			
 
				+curl -X POST http://localhost:18080/api/pipeline/run/0 -H "Content-Type: application/json" -d '{}'
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 7.2 第二代流水线 — search_eval（当前主力）→ 接口 `server.py`(:8770)
			
 
				+
			
 
				+> ⚠️ 所有脚本**必须在 `search_eval/` 目录内跑**（同目录相对 import）。默认 query 源 = `evaluation/high_priority_queries_full.json`（共 **2808 条**，用 `--start/--count` 或 `--only-q` 取子集）。
			
 
				+
			
 
				+```bash
			
 
				+cd /Users/max_liu/max_liu/company/Agent/examples/process_pipeline/script/search_eval
			
 
				+
			
 
				+# ① 搜索 + 评估（三形式）→ runs_full/q*/form_A|B|C.json （--output-dir 必填）
			
 
				+python batch_3forms.py --output-dir runs_full --start 0 --count 10
			
 
				+python batch_3forms.py --output-dir runs_full --only-q q0,q5,q9 --platforms xhs,gzh,zhihu
			
 
				+python batch_3forms.py --output-dir runs_full --reeval --only-q q0     # 只重评分不重搜
			
 
				+python batch_3forms.py --output-dir runs_full --append --only-q q0     # 加渠道不重搜
			
 
				+
			
 
				+# ② 抽工序（筛工序帖→打分→去重→并发调 procedure-dsl）→ runs_full/q*/procedures/{form}_{case}/
			
 
				+python batch_extract_procedures.py --output-dir runs_full --start 0 --count 10
			
 
				+python batch_extract_procedures.py --output-dir runs_full --only-q q0 --top-k 1 --model claude-sonnet-4-6
			
 
				+python batch_extract_procedures.py --output-dir runs_full --dry-run     # 只列要跑啥，不真跑
			
 
				+
			
 
				+# ③ 合并工序 → workflows/*.json
			
 
				+python build_workflows.py q0003          # 合并某个 run
			
 
				+python build_workflows.py serve 8771     # 起合并接口（:8771）
			
 
				+
			
 
				+# 调试：单条评估
			
 
				+python eval_one_sample.py render -q 1 --title "AI 抠图"
			
 
				+```
			
 
				+
			
 
				+**→ 最终接口**：`search_eval/server.py`（端口 **8770**）
			
 
				+
			
 
				+```bash
			
 
				+python server.py     # 或 python server.py 8770 ；浏览器开 http://0.0.0.0:8770
			
 
				+```
			
 
				+
			
 
				+| 接口 | 方法 | 作用 |
			
 
				+|------|------|------|
			
 
				+| `/api/data` | GET | **扫描 runs_full，返回全部帖子+评分** |
			
 
				+| `/api/procedure_status` | GET | 工序提取进度 |
			
 
				+| `/api/generate_procedure` | POST | **对某帖按需触发工序提取** |
			
 
				+| `/api/reeval` | POST | 重新评分 |
			
 
				+| `/workflows`（:8771） | GET | build_workflows serve 的合并产出 |
			
 
				+
			
 
				+```bash
			
 
				+curl http://localhost:8770/api/data
			
 
				+curl http://localhost:8771/workflows
			
 
				+curl -X POST http://localhost:8771/build -H "Content-Type: application/json" -d '{"q":"q0003"}'
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 7.3 单独抽一条工序（最小单元，不走批量）
			
 
				+
			
 
				+```bash
			
 
				+cd /Users/max_liu/max_liu/company/Agent/examples/process_pipeline/script/search_eval/procedure-dsl
			
 
				+python run_procedure_dsl.py <原始post.json> --out-dir <输出目录> --model claude-sonnet-4-6
			
 
				+# 例：python run_procedure_dsl.py input/case-5-raw.json --out-dir outputs/case-5
			
 
				+# 产出：<输出目录>/workflow.json + case-<slug>.html
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 7.4 执行坑位（务必先知道）
			
 
				+
			
 
				+| 坑 | 真相 |
			
 
				+|----|------|
			
 
				+| 端口以为是 8000 | 根 `server.py`=**18080**，search_eval `server.py`=**8770**，build_workflows serve=**8771**；三个可同时起。 |
			
 
				+| search_eval 脚本在仓库根跑 | 会 `ImportError`（相对 import）；**必须 `cd script/search_eval`**。 |
			
 
				+| `run_pipeline.py` 在哪跑 | 它自己 `os.chdir` 到仓库根 + `sys.path.insert`，在哪跑都行，但需 `pip install -e .`；它复用 `examples/process_research/config.py` 的 TRACE_STORE/SKILLS。 |
			
 
				+| Mac/Linux 跑 `batch.py` | 它生成 Windows `.bat` 开 CMD 窗口，**仅 Windows**。 |
			
 
				+| `--index` 与 output 目录 | `--index N`（0-based）→ 产出 `output/{(N+1):03d}/`（如 index 0 → output/001/）。 |
			
 
				+| 两代触发哲学不同 | 第一代「一条命令跑完整条」（接口也是触发整条）；第二代「每步独立脚本手动接力」（便于中间任意环节复跑/复核）。 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 8. 推荐阅读顺序
			
 
				+
			
 
				+1. **本文 §0~§2** —— 先建立业务直觉 + 背下核心词典（source/case/工序/能力/strategy/实质形式）。
			
 
				+2. `db_requirements.json` 翻几条 —— 看「选题」长什么样。
			
 
				+3. `prompts/researcher.prompt` —— 看 Phase1 怎么搜、怎么判质量（业务标准最直白）。
			
 
				+4. `run_pipeline.py` 顶部 docstring（前 100 行）—— 看 5 步拓扑和 CLI，**先懂流程别抠实现**。
			
 
				+5. `prompts/temp_schema/*.schema.json` —— 看数据模型（每一步产出的精确结构）。
			
 
				+6. `script/decode_workflow_agent/decode_process_prompt.md` —— 看「工序」到底怎么从一条帖子里拆出来（核心业务逻辑）。
			
 
				+7. 切到第二代：`script/search_eval/eval_prompt_template.md`（评分 rubric）→ `batch_3forms.py`（三形式）→ `procedure-dsl/spec/README.md`（工序 DSL）。
			
 
				+8. 起两个前端各点一遍：`python server.py` + `search_eval/server.py`，对着 UI 理解产出。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 9. 新人易踩的坑
			
 
				+
			
 
				+| 坑 | 真相 |
			
 
				+|----|------|
			
 
				+| 以为只有一套流水线 | **两代并存**：`run_pipeline.py`(老) vs `search_eval/`(新，当前重点)。 |
			
 
				+| 想在仓库里找 `output/{NNN}/` | 第一代产出**被 gitignore**，仓库里看不到；第二代产出在 `runs_full/`（在仓库里）。 |
			
 
				+| 把 `script/` 1168 个文件当代码 | 真代码只有 `script/*.py` 十几个 + `decode_workflow_agent/` + `search_eval/*.py`；其余全是数据产出。 |
			
 
				+| 混淆「工序/能力/工具」 | 三种**知识类型**，只有「工序」能抽出端到端 procedure。 |
			
 
				+| 混淆「实质/形式」 | 内容树的**两个分类维度**：实质=内容是什么，形式=怎么呈现。grounding 要同时映射两边。 |
			
 
				+| 在 Mac/Linux 跑 `batch.py` | 它生成 Windows `.bat` 开 CMD 窗口，**面向 Windows**。 |
			
 
				+| 直接改 schema | schema 里 `-boundary`/`-ref` 后缀是稳定引用契约，`schema_manager.py` 会校验，别乱动。 |
			
 
				+| 以为 decode 和 procedure-dsl 是一回事 | 同样是「抽工序」，但 decode(老,Gemini/LangChain) 与 procedure-dsl(新,Claude+DSL规范) 是两套实现。 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 10. 速查表
			
 
				+
			
 
				+| 我想… | 去看 |
			
 
				+|-------|------|
			
 
				+| 懂业务 | 本文 §0 + `eval_prompt_template.md` + `db_requirements.json` |
			
 
				+| 懂数据模型 | `prompts/temp_schema/*.schema.json` + 本文 §2 |
			
 
				+| 懂老流水线 5 步 | `run_pipeline.py` docstring + `script/` 对应 .py |
			
 
				+| 懂「工序怎么拆」 | `decode_workflow_agent/decode_process_prompt.md`（老）/ `procedure-dsl/spec/`（新） |
			
 
				+| 懂 grounding/分类 | `apply_to_grounding_agent.py` + `resource/分类库导出_*.json` + `query_tree.py` |
			
 
				+| 懂当前在做什么 | `script/search_eval/`（batch_3forms → batch_extract_procedures → build_workflows） |
			
 
				+| 看产出/复核 | 起 `search_eval/server.py`(:8770)，浏览 `runs_full/` |
			
 
				+| 调评分标准 | `eval_prompt_template.md` + `eval_one_sample.py` |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+*本文基于对源码 + 数据样本的系统遍历生成（含 4 路并行深挖）。如与代码冲突，以代码为准并请更新本文。*
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/docs/导读-工序DSL与grounding.md
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/docs/导读-工序DSL与grounding.md
@@ -0,0 +1,270 @@
 
				+# 工序 DSL 与 Grounding 详解（两大核心机制）
			
 
				+
			
 
				+> 配套 `导读-process_pipeline.md`。这两块是整条流水线里**业务逻辑最重、最值钱**的部分：
			
 
				+> - **工序 DSL**（第二代）：把一篇散文教程，还原成机器可读的「工序表」。回答 **「这东西是怎么一步步做出来的」**。
			
 
				+> - **Grounding**（第一代）：把提炼出的能力，挂到公司的「内容树」分类库上。回答 **「这东西在我们的知识体系里属于哪一类」**。
			
 
				+>
			
 
				+> ⚠️ **先记住一个最容易混淆的点**：两者**都用「实质 / 形式」二分**，但**不是同一个东西**——
			
 
				+> | | 工序 DSL 的 substance/form | Grounding 的 实质/形式 |
			
 
				+> |---|---|---|
			
 
				+> | 层面 | **step 级**（每个步骤一对） | **capability 级**（整个能力挂分类） |
			
 
				+> | 有无词表 | **无词表**，读懂步骤直接提炼 | **有外部分类库**（实质 911 路径 / 形式 565 路径） |
			
 
				+> | 产物 | 写进 `workflow.json` 的字段 | 写进 `case.capability.apply_to` |
			
 
				+> | 谁做 | procedure-dsl Phase2（Claude） | apply_to_grounding_agent（Claude SDK 探索分类库） |
			
 
				+>
			
 
				+> 共同的哲学是同一句话：**实质 = 内容讲什么（What）；形式 = 怎么讲（How）**。但一个在「单步」上用，一个在「整条能力挂库」上用。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 第一部分 · 工序 DSL（procedure-dsl）
			
 
				+
			
 
				+## 1. 它解决什么
			
 
				+
			
 
				+输入一篇 AI 创作教程/案例（公众号长文 / 小红书 / 视频转写 + 配图），输出一张**结构化工序表** `workflow.json`，再渲染成网页。
			
 
				+
			
 
				+> spec/README 原话：「读一篇 AI 创作教程/案例，把它背后的『做法』还原成一张工序表，存成 `workflow.json`，再渲染成一个网页。」
			
 
				+
			
 
				+核心约束是 **「客观还原，禁止臆造」**：每一步必须有图/正文的可观察依据；文本类的值（prompt/数据）必须**逐字搬全**，不能概括——所以工具链强制用 `@quote|起锚|止锚` 从原文回填，而不是 agent 自己粘。
			
 
				+
			
 
				+## 2. 数据模型：一张工序表怎么拆
			
 
				+
			
 
				+`case-data.schema.json` 定义的结构（这是机器裁判的事实来源）：
			
 
				+
			
 
				+```
			
 
				+case_data
			
 
				+├── page_title                      网页标题
			
 
				+├── source                          原帖信息（跨所有工序共享，一篇文章一份）
			
 
				+└── procedures[]                    ★ 一篇 case 可含 1~N 个独立工序（如"简单做法"+"进阶做法"）
			
 
				+    └── Procedure
			
 
				+        ├── id / name / purpose      工序标识 + 一句话意图
			
 
				+        ├── category                 顶层分类（如"产物创造"）
			
 
				+        ├── platform / author        来源平台 / 作者
			
 
				+        ├── declarations             声明块（初始输入素材）
			
 
				+        ├── type_registry            ★ 本工序内自造类型的"挂靠"表（自造词 extends 一个标准词）
			
 
				+        ├── steps[]                  ★ 步骤数组，按执行顺序
			
 
				+        └── return_row               返回行（↩ 收尾）
			
 
				+```
			
 
				+
			
 
				+**每个 Step 的字段**（理解工序的关键——一步操作被拆成 9 个层面）：
			
 
				+
			
 
				+```
			
 
				+Step (required: id, kind, via, inputs, outputs, intent)
			
 
				+├── id            步骤编号（s1/s2…；输出编号 s2o1 供后面引用）
			
 
				+├── kind          step（普通）| block（控制流父节点，分组）| nested（block 的子步骤）
			
 
				+├── group         当 kind=nested 时，指向父 block 的 id
			
 
				+│
			
 
				+│   ┌─ 整步一个的字段（描述这一步的"性质"）───────────────────┐
			
 
				+├── effect        【作用/工序位置】查 effect.json，9 个叶子枚举：
			
 
				+│                  预处理/主体生成/装配/后期/工艺规约/预准备/配套伴生/检验/交付
			
 
				+├── action        【动作】查 action.json，根→叶路径，如 "生成/元素生成"、"提取/化学提取/反推"
			
 
				+├── via           【工具】规范化的 L1 工具名：manus / nano_banana_pro / human / <llm-agent>
			
 
				+├── substance     【实质】这步内容"是什么"（理念 vs 表象层）——无词表，直接提炼，没有则 null
			
 
				+├── form          【形式】这步内容"怎么呈现"（呈现 vs 架构层）——无词表，直接提炼
			
 
				+├── directive     【字面 prompt】真正喂给工具的 prompt 字符串（用 @quote 从原文逐字回填）
			
 
				+├── intent        【目的】≤25 字一句话概括这步在干嘛
			
 
				+│   └────────────────────────────────────────────────────────┘
			
 
				+│
			
 
				+│   ┌─ 每个输入/输出各一个的字段（数据流）──────────────────────┐
			
 
				+├── inputs[]      输入：{ type 数据类型, value 实际内容, anchor 来源锚点, id }
			
 
				+└── outputs[]     输出：{ type, value, id（如 s2o1，供下一步 input 的 anchor 引用） }
			
 
				+    └────────────────────────────────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+> 数据流是怎么连起来的：第 2 步的某个 input 的 `anchor` 指向第 1 步某个 output 的 `id`（如 `s1o1`）。这样 steps 就织成一张 **DAG**——前一步的产物喂给后一步。`verify-io.py` 专门校验这张图连得对不对。
			
 
				+
			
 
				+## 3. 三个受控词表（只在 Phase2 查）
			
 
				+
			
 
				+| 维度 | 字段 | 词表文件 | 规模 | 说明 |
			
 
				+|------|------|---------|------|------|
			
 
				+| **作用** | `effect` | `taxonomy/effect.json` | 6 大类→9 叶子 | "制作"链里的工序阶段位置（前置/主体/装配/修饰/验收…） |
			
 
				+| **动作** | `action` | `taxonomy/action.json` | 8 大类(获取/提取/生成/修改/存储…)树 | 干了什么，用根→叶路径 |
			
 
				+| **类型** | `type` | `taxonomy/type.json` | 6 大类→50 叶子 | 数据的角色（程序控制/数据复用/内容/知识…） |
			
 
				+| **特性** | — | `taxonomy/feature.json` | 执行特征+控制流 | 随机/幂等/人工/读写外部 + 并行/遍历/分支… |
			
 
				+
			
 
				+**关键规则**：
			
 
				+- **实质 / 形式 没有词表**——Phase2 读懂步骤后直接提炼元素点，查不到就 null。
			
 
				+- **自造类型要挂靠**：词表里没有的类型词，写进该工序的 `type_registry`，声明它"算作"哪个标准词（如 `主角图` 挂靠 `参考图`）。既能自由起名又不污染标准词表。新类型由 `lint-case.py` 在 Phase3 **自动登记**进 `type_suggestions.md`。
			
 
				+
			
 
				+## 4. 三阶段提取流程（围绕同一个 workflow.json 滚动演化）
			
 
				+
			
 
				+```
			
 
				+┌─ Phase 1 · 搭骨架（只管结构）────────────────────────────────┐
			
 
				+│  读懂案例 → 判断有几个工序 → 切步骤 → 建干骨架                 │
			
 
				+│  此时：只有结构 + 随手起的 type 标签                          │
			
 
				+│        value / anchor / directive 全空，不查词表             │
			
 
				+│  产物：workflow.json 的骨架                                   │
			
 
				+│  (Cyber 引擎调 plan_procedures 工具自动生成；其他引擎手写)     │
			
 
				+└──────────────────────────┬───────────────────────────────────┘
			
 
				+                           ▼
			
 
				+┌─ Phase 2 · 填内容 + 归类标注 ────────────────────────────────┐
			
 
				+│  2.0 填 value/directive（用 @quote 从原文拽真值）             │
			
 
				+│      → 连数据流（input.anchor 指向前面 output.id）            │
			
 
				+│      → 跑 verify-io.py 校验数据流                             │
			
 
				+│  2.1 effect / action / type 归到三张词表                     │
			
 
				+│  2.2 substance / 形式 直接提炼（无词表，没涉及设 null）        │
			
 
				+│  2.3 自造类型登记进 type_registry                            │
			
 
				+│  2.4 每步填 intent（目的列）                                  │
			
 
				+└──────────────────────────┬───────────────────────────────────┘
			
 
				+                           ▼
			
 
				+┌─ Phase 3 · 检查收尾 ─────────────────────────────────────────┐
			
 
				+│  lint-case.py 检查覆盖度（缺字段/孤儿节点/未登记类型）         │
			
 
				+│  render-case.py 渲染出 case-{N}-<slug>.html                  │
			
 
				+│  ⚠️ 文本类 value 若留 <占位> 会被拒绝出 HTML（逼你填真内容）  │
			
 
				+└──────────────────────────────────────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+## 5. 工具链（全程不手写 workflow.json）
			
 
				+
			
 
				+| 脚本 | 干什么 | 为什么 |
			
 
				+|------|--------|--------|
			
 
				+| `wf-patch.py` | **唯一**的 workflow.json 写入口（`--set path=值` 或 `--patch 清单`，缺路径自动建） | 手写嵌套 JSON 易崩、edit 在 p1/p2 重复结构上会撞多匹配 |
			
 
				+| `quote-source.py` | 把 `@quote\|锚点` 标记换成原文/OCR 里的逐字真内容 | agent 不直接读长原文，省 token + 保证逐字 |
			
 
				+| `verify-io.py` | 校验数据流（input.anchor 是否都指向真实 output.id） | 保证 DAG 连通 |
			
 
				+| `lint-case.py` | 检查覆盖度 + 自动登记新类型 | Phase3 质量门 |
			
 
				+| `render-case.py` | 渲染 HTML（`renderer.py`/`styles.css`/`script.js`） | 最终产物 |
			
 
				+
			
 
				+> 入口：`procedure-dsl/run_procedure_dsl.py`（默认 claude-sonnet-4-6）。被 `search_eval/batch_extract_procedures.py` 批量并发拉起。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 第二部分 · Grounding（内容树映射，apply_to_grounding）
			
 
				+
			
 
				+## 1. 它解决什么
			
 
				+
			
 
				+把一段「做法描述」（capability 的 body），**映射到公司已有的内容分类库**——同时匹配**实质库**和**形式库**两边的节点，并对库里没有的概念**建议新增**（只新增实质）。
			
 
				+
			
 
				+业务价值：新挖到的能力与已有知识库**建立关联**，从而可被**分类检索、复用推荐**；同时持续**反哺扩充**分类库。
			
 
				+
			
 
				+## 2. 两份分类库长什么样（grounding 的知识底座）
			
 
				+
			
 
				+都在 `script/resource/`，是「path → 叶子元素数组」的扁平结构：
			
 
				+
			
 
				+```
			
 
				+实质库  分类库导出_实质_*.json   911 条路径   根 = /理念   回答"内容讲什么 What"
			
 
				+  /理念/事件/军事事件/军人事迹/英勇战功/壮烈牺牲
			
 
				+      → ["牺牲遗言", "赵一曼抗日事迹", "以身殉国", "牺牲战友"]
			
 
				+  最深到 L6，每个 path 挂一组具体"元素点"
			
 
				+
			
 
				+形式库  分类库导出_形式_*.json   565 条路径   根 = /呈现   回答"怎么讲 How"
			
 
				+  /呈现/听觉/语音/表达形式/口播讲述/叙事口播
			
 
				+      → ["口播描述", "叙事化口播", "叙事性故事口播", "纪实性口播"]
			
 
				+```
			
 
				+
			
 
				+> 这两份是从 `category_tree_56.json`（3MB 完整内容树）导出的扁平视图，专供 grounding agent 用 Grep/Read 检索。
			
 
				+
			
 
				+## 3. 判断标准：实质 vs 形式（prompt 里的核心定义）
			
 
				+
			
 
				+| | 实质（What） | 形式（How） |
			
 
				+|---|---|---|
			
 
				+| 回答 | 内容呈现/讲述了什么 | 为增强表现力采用的手法 |
			
 
				+| 判据 | 图里可见 / 文字提到；**去掉它主题就变** | **去掉它内容还在**，只是呈现方式变了 |
			
 
				+| 例（"文生图生成废弃医院"） | 「废弃医院」= 实质（产出物本体） | 「AI 生成 / prompt / 文生图」= 形式（制作手段） |
			
 
				+
			
 
				+**两者都要匹配**，用 `category_type` 字段区分。
			
 
				+
			
 
				+## 4. Agent 工作流（带 token 预算的探索）
			
 
				+
			
 
				+```
			
 
				+输入：capability.body（做法描述）+ 实质库路径 + 形式库路径
			
 
				+        │
			
 
				+        ▼  Claude Agent SDK（工具：Read / Grep / Glob，cwd=resource/）
			
 
				+┌────────────────────────────────────────────────────────────┐
			
 
				+│ 推荐高效路径（目标 8-12 次工具调用内收敛，>15 次强制收尾）：  │
			
 
				+│  1. Read 实质库前 ~1500 行 → 看顶层 path 结构                 │
			
 
				+│  2. Read 形式库前 ~1500 行 → 同上                            │
			
 
				+│  3. 针对 body 核心概念，Grep 1-3 次定位候选 path            │
			
 
				+│  4. 必要时 Read 局部 200-500 行精确定位                      │
			
 
				+│  5. 直接输出最终 JSON（不反复二次确认）                      │
			
 
				+└────────────────────────────────┬───────────────────────────┘
			
 
				+                                 ▼
			
 
				+输出：唯一一个 ```json 代码块（中间过程不许 dump 预览）
			
 
				+{
			
 
				+  "matched": [                       ← 匹配到的已有节点
			
 
				+    {
			
 
				+      "category_path": "/理念/.../壮烈牺牲",
			
 
				+      "category_type": "实质" | "形式",
			
 
				+      "action": "对该节点的操作要求（含动词，如"提示词设定"）",
			
 
				+      "ability_type": "prompt / 步骤 / 参数 / 模板 …",
			
 
				+      "matched_elements": [{ "名称": "元素名" }],
			
 
				+      "structured_content": ["与该节点相关的完整描述。排除：…"]
			
 
				+    }
			
 
				+  ],
			
 
				+  "suggested_additions": [           ← 建议新增（只新增实质，不新增形式）
			
 
				+    {
			
 
				+      "category_type": "实质",
			
 
				+      "parent_path": "库中真实存在的最深父级（从 L1 起则空串）",
			
 
				+      "suggested_level": "L3/L4/L5/L6",
			
 
				+      "reason": "缺失原因（一句话）",
			
 
				+      "suggested_categories": [{ "category_name": "…", "category_description": "…" }],
			
 
				+      ...
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+**关键规则**：
			
 
				+- 只输出库中**真实存在**的节点，`element_id` 不得捏造；匹配粒度到 **L6**。
			
 
				+- 新增**只允许实质**：仅当库里**没有任何 L6 能容纳**该概念时才建议；从 L1 逐层比对，定位到第一个不存在的层级，`parent_path` 必须是真实路径。
			
 
				+- Token 纪律：每轮工具调用都会让下轮 input 累积所有 tool_result，**指数增长**——所以"少调多读"（单次 Read 可到 1500-2000 行）。
			
 
				+
			
 
				+## 5. 两种执行模式
			
 
				+
			
 
				+| 模式 | 文件 | 特点 |
			
 
				+|------|------|------|
			
 
				+| **oneshot** | `apply_to_grounding_oneshot.prompt` | 把两份库全文塞进 prompt，1 次 LLM 调用，快；逐个 capability 跑 |
			
 
				+| **agent** | `apply_to_grounding_agent.prompt` | Claude SDK 带工具自己 Grep/Read 探索，更准但慢（OAuth + 多轮） |
			
 
				+
			
 
				+入口 `script/apply_to_grounding_agent.py`：支持单 case（`--case-index N`）或全量并发（max_concurrent=3），结果回写 `case.capability.apply_to` / `suggest_apply_to`，并快照到 history（可回滚）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 第三部分 · 两者如何串起来（全局位置）
			
 
				+
			
 
				+```
			
 
				+                          一条社媒帖（AI 作品 + 教程）
			
 
				+                                    │
			
 
				+        ┌───────────────────────────┴────────────────────────────┐
			
 
				+        │ 第二代 search_eval                  第一代 run_pipeline   │
			
 
				+        ▼                                                          ▼
			
 
				+   ╔═══════════════════╗                              ╔════════════════════╗
			
 
				+   ║  工序 DSL          ║                              ║  decode-workflow    ║
			
 
				+   ║  (procedure-dsl)   ║  ← 两套"抽工序"的不同实现 →   ║  (Gemini+LangChain) ║
			
 
				+   ║  Claude + DSL规范  ║                              ║                     ║
			
 
				+   ╚═════════╤═════════╝                              ╚═══════════╤════════╝
			
 
				+             │ workflow.json                                       │ case.json
			
 
				+             │ (step: effect/action/via/                           │ (含 capability)
			
 
				+             │  substance/form/io/intent)                          ▼
			
 
				+             │                                          ╔════════════════════╗
			
 
				+             │ ← step 级 substance/form                 ║  Grounding          ║
			
 
				+             │   (无词表，直接提炼)                       ║  apply_to_grounding ║
			
 
				+             │                                          ║  capability → 分类库 ║
			
 
				+             │                                          ║  (实质911/形式565)  ║
			
 
				+             │                                          ╚═══════════╤════════╝
			
 
				+             ▼                                                      ▼
			
 
				+   build_workflows.py → workflows/*.json          case.capability.apply_to[]
			
 
				+   (帖子元信息 + 工序表)                            (挂到内容树，可检索复用)
			
 
				+```
			
 
				+
			
 
				+**一句话总结二者关系**：
			
 
				+- **工序 DSL** 关注「**一条作品内部**怎么一步步做」——它的 substance/form 是描述**每一步**的内容性质，**不连外部分类库**。
			
 
				+- **Grounding** 关注「**这个能力在公司知识体系里属于哪类**」——它把 capability **挂到外部分类库**的精确节点，实现跨案例的检索与复用。
			
 
				+- 两者都用「实质=What / 形式=How」这把尺子，只是一个量「步骤」，一个量「能力归属」。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+# 速查
			
 
				+
			
 
				+| 我想… | 去看 |
			
 
				+|-------|------|
			
 
				+| 懂工序表字段 | `procedure-dsl/spec/format/case-data.schema.json` + 本文第一部分 §2 |
			
 
				+| 懂工序三阶段 | `procedure-dsl/spec/extraction/phase1/2/3-*.md` + 本文 §4 |
			
 
				+| 懂三张词表 | `procedure-dsl/spec/taxonomy/{effect,action,type}.json` |
			
 
				+| 跑工序提取 | `procedure-dsl/run_procedure_dsl.py`（被 `batch_extract_procedures.py` 批量调） |
			
 
				+| 懂 grounding 逻辑 | `prompts/apply_to_grounding_agent.prompt` + 本文第二部分 |
			
 
				+| 看分类库 | `script/resource/分类库导出_{实质,形式}_*.json` + `query_tree.py` |
			
 
				+| 跑 grounding | `script/apply_to_grounding_agent.py`（或 `run_pipeline.py --only-step apply-grounding`） |
			
 
				+| 分清两个"实质/形式" | 本文开头的对比表 + 第三部分 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+*本文基于 procedure-dsl/spec 与 apply_to_grounding prompt/分类库的实际字段生成。如与代码冲突，以代码为准并请更新本文。*
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/docs/项目导读.md
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/docs/项目导读.md
@@ -0,0 +1,323 @@
 
				+# 项目导读（ONBOARDING）
			
 
				+
			
 
				+> **这份文档是给「刚接手这个项目的人」看的。** 目标是让你在半天内回答三个问题：
			
 
				+> 1. 这个项目**在现实里到底做什么生意**？（业务指向）
			
 
				+> 2. 它由**哪几块**组成，每块负责什么？（功能模块）
			
 
				+> 3. 我**按什么顺序读**才能最快建立心智模型？（阅读路径）
			
 
				+>
			
 
				+> 仓库里已有两份文档，定位不同，配合本文一起看：
			
 
				+> - `PROJECT_STRUCTURE.md` —— **静态结构**：每个目录/文件的职责（查字典用）。
			
 
				+> - `HOW_IT_RUNS.md` —— **运行时机制**：一次 Agent 跑起来发生了什么（调试用）。
			
 
				+> - 本文 `项目导读.md` —— **业务主线 + 阅读顺序**（入门用，先读我）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 0. 一句话定位
			
 
				+
			
 
				+**这是一家中国内容公司的「AI 图文内容生产平台」，底层是一套自研的模块化 Agent 框架。**
			
 
				+
			
 
				+- **业务层**：把「创作需求」自动变成「AI 生成的图片/图文」——典型场景是小红书 / 抖音风格的内容、电商商品图、品牌视觉。
			
 
				+- **技术层**：一套通用的多 Agent 编排框架（包名 `cyber-agent`），核心抽象是「**所有 Agent 都是一条 Trace**」。
			
 
				+- **关系**：业务跑在框架上。`examples/` 里每个目录就是一个真实业务项目，框架（`agent/`）是它们共享的底座。
			
 
				+
			
 
				+> ⚠️ 不要被「examples」这个名字骗了——它们**不是教学示例，是真实的生产项目**（`production_plan` / `production_restore` / `requirement_extract` 等都是线上业务）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 1. 业务指向：这个项目到底在生产什么？
			
 
				+
			
 
				+把散落在各项目 `requirement.prompt` 里的中文 prompt 串起来，业务闭环长这样：
			
 
				+
			
 
				+```
			
 
				+创作简报 / 内容选题
			
 
				+      │
			
 
				+      ▼
			
 
				+①需求提取  requirement_extract     从「制作点 / 亮点」描述里抽出可执行的制作需求
			
 
				+      │   （如："人物白裙V字露背设计清晰可见"、"调色板颜料厚度感、立体笔触纹理"）
			
 
				+      ▼
			
 
				+②内容分析  content_tree_analyst    分析内容分类树，推断需要哪些视觉生产能力
			
 
				+      │
			
 
				+      ▼
			
 
				+③工具调研  tool_research           深度调研 SOTA 图像工具（ComfyUI / Flux / ControlNet / LoRA…）
			
 
				+      │                            把「工具怎么用、参数、案例」沉淀成知识
			
 
				+      ▼
			
 
				+④工序规划  production_plan         在执行前设计「制作工序计划」（哪步用哪个工具、怎么串）
			
 
				+      │
			
 
				+      ▼
			
 
				+⑤生产执行  production_restore      Craftsman 角色按 pipeline 调工具，真正把图生出来 / 还原出来
			
 
				+      │
			
 
				+      ▼
			
 
				+⑥质量评估  （evaluate 工具 / 子分支）  评估产出质量，不达标则回到上游迭代
			
 
				+      │
			
 
				+      ▼
			
 
				+⑦知识沉淀  knowledge_extract → KnowHub   把好用的工具用法、工序、设计模式存进集体记忆，下次复用
			
 
				+```
			
 
				+
			
 
				+**判断业务领域的证据**（都来自真实 prompt / 配置）：
			
 
				+
			
 
				+| 证据 | 出处 | 说明 |
			
 
				+|------|------|------|
			
 
				+| 全中文 prompt，"制作 / 工序 / 需求 / 质感" 术语 | `examples/*/requirement.prompt` | 面向中文创作 |
			
 
				+| "图片 / 画面 / 风格 / 质感 / 笔触" 高频 | 各 `*.prompt` | 视觉内容生产 |
			
 
				+| 调研 ComfyUI / Flux / ControlNet / LoRA | `tool_research/`、`production_plan/` | 真实 AI 绘图工具栈 |
			
 
				+| 小红书 / B站 / 抖音 / YouTube 内容搜索 | `agent/tools/builtin/content/platforms/` | 社媒内容生产/选题 |
			
 
				+| 飞书联系人（多名团队成员） | `config/feishu_contacts.json` | 内部团队协作（飞书办公） |
			
 
				+| 默认 LLM 用 Qwen（阿里千问） | 各 `config.py` 的 `RUN_CONFIG` | 中文任务优先 |
			
 
				+| 组织作用域 `org:cybertogether` | 各 `config.py` 的 KnowledgeConfig | 公司名线索 |
			
 
				+
			
 
				+**结论**：这是一个 **AI 驱动的内容生产中台**——用多 Agent 自动完成「需求 → 调研 → 规划 → 生成 → 评估 → 知识复用」的视觉内容生产流水线。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 2. 系统全景：五大子系统
			
 
				+
			
 
				+这是一个 **monorepo（多子系统单仓库）**，不是一个 app。包名 `cyber-agent`，但源码目录叫 `agent/`（pip 名 ≠ import 名）。几乎所有子系统都假设你跑过 `pip install -e .`。
			
 
				+
			
 
				+```
			
 
				+        用户 / 飞书                                    Agent ↔ Agent / 用户
			
 
				+          │                                                  │
			
 
				+          ▼  管理-执行关系                                    ▼  协作-沟通关系
			
 
				+   ┌─────────────┐                                   ┌──────────────────────┐
			
 
				+   │  gateway/   │  任务下发、Agent 注册              │ im-server / im-client │  对等消息、数字员工队列
			
 
				+   │ （半实现）  │                                   │     （已完整）        │
			
 
				+   └──────┬──────┘                                   └──────────┬───────────┘
			
 
				+          │                                                     │
			
 
				+          └──────────────────┐               ┌──────────────────┘
			
 
				+                             ▼               ▼
			
 
				+              ┌──────────────────────────────────────────┐
			
 
				+              │            agent/  （Agent Core）          │  ★一切的底座
			
 
				+              │  AgentRunner · Trace · GoalTree · Message  │
			
 
				+              │  Tool 注册表（core/browser/content/im…）   │
			
 
				+              │  Skill 注入（system-prompt 级知识）        │
			
 
				+              │  LLM Provider（qwen/openrouter/gemini…）    │
			
 
				+              └───────┬──────────────────────────┬─────────┘
			
 
				+                      │ HTTP                       │ 本进程
			
 
				+                      ▼                            ▼
			
 
				+        ┌──────────────────────────┐   ┌──────────────────────────┐
			
 
				+        │        knowhub/          │   │      examples/<proj>/     │
			
 
				+        │  集体记忆（PG+pgvector）  │   │  真实业务项目（生产流水线）│
			
 
				+        │  Librarian / Research     │   │  config.py + prompt + 工具 │
			
 
				+        └────────────┬─────────────┘   └──────────────────────────┘
			
 
				+                     │
			
 
				+          ┌──────────┴──────────┐
			
 
				+          ▼                     ▼
			
 
				+   knowhub/frontend        frontend/react-template
			
 
				+   知识管理 UI(React19)     Trace/目标树可视化(React18+D3)
			
 
				+```
			
 
				+
			
 
				+### 核心抽象：「所有 Agent 都是一条 Trace」
			
 
				+
			
 
				+这是理解整个项目的**第一性原理**，务必先吃透：
			
 
				+
			
 
				+- 一次 Agent 执行 = 一个 `Trace` 对象（`agent/trace/models.py:Trace`）。
			
 
				+- 主 Agent、子 Agent、人类协助——**都是同一种数据类型 Trace**，靠 `parent_trace_id` / `parent_goal_id` 链成一棵树。
			
 
				+- 子 Trace 的 ID 是层级式的：`{父uuid}@{mode}-{时间戳}-{序号}`。
			
 
				+- 每一条 REST API、每一种上下文压缩策略、前端那张 DAG 图，**全都围绕这个模型展开**。
			
 
				+
			
 
				+记住这句话，后面所有设计你都能自洽地解释。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 3. 各功能块逐一拆解
			
 
				+
			
 
				+### 3.1 `agent/` —— Agent Core（核心运行时）★最重要
			
 
				+
			
 
				+> 独立于其他子系统，可单独使用。它就是那台「Agent 引擎」。
			
 
				+
			
 
				+| 子模块 | 职责 | 关键文件 |
			
 
				+|--------|------|---------|
			
 
				+| `core/` | 执行引擎 | `runner.py:AgentRunner`（≈3000 行，LLM↔工具主循环）、`presets.py`（角色预设）、`dream.py`（离线记忆总结） |
			
 
				+| `trace/` | 执行追踪 + 计划 + REST/WS | `models.py:Trace/Message`、`goal_models.py:GoalTree/Goal`、`store.py:FileSystemTraceStore`、`compaction.py`（上下文压缩） |
			
 
				+| `tools/` | 工具系统 | `registry.py`（`@tool` 装饰器 + 注册表）、`builtin/`（50+ 内置工具） |
			
 
				+| `skill/` | 技能注入 | `skill_loader.py`（加载 .md）、`skills/*.md`（core/planning/browser/research） |
			
 
				+| `llm/` | LLM 适配 + 计费 | `qwen.py`（默认）、`openrouter.py`、`gemini.py`、`pricing.py`（读 `config/pricing.yaml`） |
			
 
				+| `client.py` | **公开 SDK 入口** | `invoke_agent()`——`remote_*` 前缀走 HTTP，否则走本进程 Runner |
			
 
				+
			
 
				+**三层参数模型**（理解 `AgentRunner` 配置的关键）：
			
 
				+1. **基础设施**（构造时注入一次）：`trace_store`、`llm_call`、`skills_dir`。
			
 
				+2. **`RunConfig`**（每次运行）：`model`、`trace_id`（续跑）、`agent_type`（预设）、`tools`/`tool_groups`、`goal_compression`、`knowledge`、`max_iterations`。
			
 
				+3. **Messages**：OpenAI 格式对话数组。
			
 
				+
			
 
				+**主循环（ReAct）干了什么**（`runner.py:_agent_loop`）：
			
 
				+```
			
 
				+准备 Trace → 构建历史(+注入 system prompt/skills) → 循环{
			
 
				+    检查上下文是否超限 → 必要时进「侧分支」压缩
			
 
				+    调 LLM(history + tools)
			
 
				+    存 assistant 消息
			
 
				+    执行 tool_call → 存 tool 结果
			
 
				+} 直到 end_turn / 达到 max_iterations / 被取消
			
 
				+```
			
 
				+
			
 
				+**工具分组（白名单机制）**：每个工具 `@tool(groups=[...])` 声明分组；`RunConfig.tool_groups` 是白名单（默认 `["core"]`）。分组有：`core` / `browser` / `content`（跨平台内容搜索）/ `toolhub`（远程 AI 工具库，图像生成在这）/ `feishu` / `im` / `resource` / `knowledge_internal`（librarian 专用）。
			
 
				+
			
 
				+**侧分支（side branch）**——容易踩坑，改 `runner.py` 前必读 `compaction.py`：
			
 
				+压缩 / 反思 / 知识评估 这三类操作**不污染主对话**，而是临时开一个分支跑几轮，把摘要合并回主线再继续。三种压缩模式：`none` / `on_complete`（目标完成即压）/ `on_overflow`（超限才压）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.2 `knowhub/` —— 集体记忆平台（共享经验库）
			
 
				+
			
 
				+> 解决的问题：现有生态（Glama/Smithery 等）只管「有哪些工具」，**不管「Agent 实际怎么用这些工具」**。KnowHub 存的是「用了工具之后发生了什么」这一层经验。
			
 
				+
			
 
				+- **形态**：FastAPI + PostgreSQL + pgvector（向量检索），≈2800 行的 `server.py`，约 36 个 API 端点。
			
 
				+- **数据模型**（`docs/schema.md`）：5~6 个实体（`knowledge` 知识 / `resource` 原始素材 / `capability` 原子能力 / `requirement` 业务需求 / `tool` 工具 / `strategy` 工序策略）+ 一堆关联表。
			
 
				+- **职责分工**：Agent 端负责「搜索、评估、提炼、抽取」；服务端负责「存储、去重、简单聚合」。**它不是工具目录，是经验层**。
			
 
				+- **两个远端 Agent**（通过 HTTP `POST /api/agent` 被调用，每次请求起一个 `AgentRunner`）：
			
 
				+  - `remote_librarian`（`agents/librarian.py`）：知识查询整合（ask 模式）/ 知识上传编排（upload 模式）。
			
 
				+  - `remote_research`（`agents/research.py`）：全网深度调研（联网搜索 + 浏览器 + 综合）。
			
 
				+- **知识处理流水线**（`server.py:KnowledgeProcessor`）：新知识 `pending → 去重(向量召回+LLM判断) → 工具关联分析 → approved`。
			
 
				+- **前端**：`knowhub/frontend`（React 19 + Tailwind v4），管理知识库、能力、需求、工具、工序的 UI。
			
 
				+
			
 
				+**关键调用链**（务必记住）：
			
 
				+```
			
 
				+业务 Agent 调 agent(agent_type="remote_librarian", task=..., skills=["ask_strategy"])
			
 
				+  └─► agent/tools/builtin/subagent.py  →  HTTP POST  KNOWHUB_API/api/agent
			
 
				+       └─► knowhub/server.py  →  knowhub/agents/librarian.py
			
 
				+            └─► 内部起 AgentRunner（tool_groups=knowledge_internal）→ 读写 PostgreSQL
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.3 `gateway/` —— 用户→Agent 任务下发（管理-执行关系）
			
 
				+
			
 
				+> ⚠️ **文档与现实有差距，先看代码再动手。**
			
 
				+
			
 
				+- **设计**（README 画的）：channels（飞书渠道）/ lifecycle（Trace/工作区生命周期）/ executor（任务调度）三层。
			
 
				+- **现实**：**只落地了** `core/registry.py`（Agent 注册表 + 心跳）和 `core/router.py`（WebSocket 消息路由）。channels / lifecycle / executor **还没有代码**（router.py 里能看到 TODO）。
			
 
				+- **现状定性**：它目前是一个「**Agent 注册表 + 消息中转**」，**还不是任务调度器**，也还没和 Trace/工作区打通。
			
 
				+- 入口：`gateway_server.py`（绑定 `:8000`，**不能和 `api_server.py` 同时跑**）。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.4 `im-server/` + `im-client/` —— Agent 对等通讯（协作-沟通关系）
			
 
				+
			
 
				+> 与 gateway 相反，**这两块是完整可用的**。
			
 
				+
			
 
				+- **概念**：「数字员工」消息队列。每个 Agent 可以有多个「窗口」(`chat_id`)，IM Server 记录哪些窗口在线并路由消息。
			
 
				+- `im-server/main.py`：FastAPI + WebSocket，内存路由表 `(contact_id, chat_id) → WebSocket`；`contact_store.py` 文件持久化联系人。
			
 
				+- `im-client/client.py`：`IMClient` 管理一个 Agent 的多个 `ChatWindow`；存储在 `~/.gateway/workspaces/{id}/...`，含 `chatbox.jsonl`（历史）/ `in_pending.json`（待处理）；全异步（监听/发送/通知三个 worker）+ 自动重连。
			
 
				+- `im-client/tools.py`：给 Agent 在循环里用的 `im_setup / send / receive / check_notification / get_contacts / ...` 工具。
			
 
				+
			
 
				+**gateway vs im 的本质区别**（别搞混）：
			
 
				+
			
 
				+| | gateway | im-server/client |
			
 
				+|---|---|---|
			
 
				+| 关系 | 管理-执行（用户下任务、Agent 汇报） | 协作-沟通（Agent ↔ Agent/用户 平等对话） |
			
 
				+| 形态 | 飞书个人助理 | 数字员工消息队列 |
			
 
				+| 现状 | 半实现（仅注册+路由） | 已完整 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.5 `frontend/` —— Trace / 目标树可视化
			
 
				+
			
 
				+- `frontend/react-template`（React 18 + Semi UI + **D3**）：当前主力前端。把一条 Trace 的执行过程画成 **GoalTree DAG（有向无环图）**——节点是目标里程碑，边是目标之间的消息序列，子 Trace 可展开/折叠，实时 WebSocket 推送更新。
			
 
				+- **前后端契约在 `frontend/API.md`（v4.0）**——改任何 `/api/traces*` 接口或 WS payload **之前必读**，DAG 渲染器强依赖 `cumulative_stats` / `sub_trace_ids` 的精确形状。
			
 
				+- 核心渲染：`src/components/FlowChart/FlowChart.tsx` + `hooks/useFlowChartData.ts`（GoalTree → DAG 布局）。
			
 
				+- `frontend/htmlTemplate`、`frontend/image`：旧版静态模板和设计稿，非主力。
			
 
				+
			
 
				+> 注意：`frontend/react-template`（Trace 可视化，React 18 + Semi UI）和 `knowhub/frontend`（知识管理，React 19 + Tailwind）是**两个不同的前端**，UI 框架不同，组件不要互相复制。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+### 3.6 配套件：`examples/`、`skills4claude/`、`config/`、`vendor/`
			
 
				+
			
 
				+- **`examples/<proj>/`** —— 真实业务项目，结构约定：
			
 
				+  ```
			
 
				+  config.py          # 必须定义 RUN_CONFIG（RunConfig 实例）
			
 
				+  presets.json       # 可选：角色预设
			
 
				+  requirement.prompt # 可选：$system$ / $user$ 分段的提示模板
			
 
				+  skills/  tools/    # 可选：项目私有 skill / 工具（import tools 即触发 @tool 注册）
			
 
				+  run.py             # 入口
			
 
				+  ```
			
 
				+  `invoke_agent(project_root=...)` 会自动加载以上。
			
 
				+- **`skills4claude/`** —— **Claude Code（开发工具）的 skill 源**，靠 `install.sh` symlink 到 `~/.claude/skills/`。含 `agent`（调 invoke_agent）/ `knowhub` / `toolhub`（远程 AI 工具，图像生成）/ `content-search`（11 平台内容搜索）。
			
 
				+  > **别和 `agent/skill/` 搞混**：`skills4claude/` 是给「在仓库里用 Claude Code 的人」的 CLI；`agent/skill/` 是注入到「运行时 Agent 的 LLM 上下文」里的知识片段。两者互补。
			
 
				+- **`config/`** —— 全局配置：`pricing.yaml`（各家 LLM 定价，用于成本核算）、`feishu_contacts.json`（飞书联系人缓存）。
			
 
				+- **`vendor/opencode`** —— git 子模块，被 `agent/tools/adapters/opencode_bun_adapter.py` 调用。空的话跑 `git submodule update --init`。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 4. 跑起来：四个进程入口
			
 
				+
			
 
				+| 进程 | 命令 | 作用 | 端口 |
			
 
				+|------|------|------|------|
			
 
				+| Trace API | `python api_server.py` | Trace/Goal/Message 的 REST + WebSocket | :8000 |
			
 
				+| Gateway | `python gateway_server.py` | Agent 注册 + 路由 | :8000 |
			
 
				+| KnowHub | `python -m knowhub.server` | 知识库 + 远端 Agent | （自带） |
			
 
				+| Trace UI | `cd frontend/react-template && npm run dev` | 可视化 | :3000 |
			
 
				+| KnowHub UI | `cd knowhub/frontend && npm run dev` | 知识管理 | （vite 默认） |
			
 
				+
			
 
				+> `api_server.py` 和 `gateway_server.py` **都绑 :8000，不能同时跑**（除非改端口）。
			
 
				+> `api_server.py` 用 `reload=True`，启动时任何 import 异常会被 uvicorn 静默吞掉并反复重载——服务没响应就去看 `info.log` / `debug.log`。
			
 
				+
			
 
				+环境变量（复制 `.env.template` → `.env`）：`KNOWHUB_API`、`QWEN_BASE_URL/QWEN_API_KEY`、`OPEN_ROUTER_API_KEY`、`KNOWHUB_DB_*`。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 5. 推荐阅读顺序（新人路径）
			
 
				+
			
 
				+按这个顺序走，每一步都建立在上一步之上，避免一上来就陷进 3000 行的 `runner.py`。
			
 
				+
			
 
				+### 第 0 步 · 建立业务直觉（30 分钟）
			
 
				+1. **本文第 1 节**（业务闭环）——先知道这个项目在生产什么。
			
 
				+2. 翻 2~3 个真实项目的 `requirement.prompt`，**亲眼看业务长什么样**：
			
 
				+   - `examples/requirement_extract/requirement.prompt`（需求怎么抽）
			
 
				+   - `examples/production_plan/requirement.prompt`（工序怎么规划）
			
 
				+   - `examples/production_restore/requirement.prompt`（图怎么生产）
			
 
				+
			
 
				+### 第 1 步 · 吃透核心抽象（半天）
			
 
				+3. `agent/README.md` —— Agent Core 总览（工具分组、preset、REST API）。
			
 
				+4. **本文第 2 节的「所有 Agent 都是一条 Trace」** + `agent/trace/models.py`（`Trace`/`Message`）、`agent/trace/goal_models.py`（`GoalTree`/`Goal`）。这是地基。
			
 
				+5. `agent/docs/architecture.md` —— Runner + Trace + Tool 三者怎么交互。
			
 
				+6. `HOW_IT_RUNS.md` —— 一次运行的完整时序（配合读 `agent/core/runner.py:_agent_loop`，**先读流程别抠细节**）。
			
 
				+
			
 
				+### 第 2 步 · 看一条完整调用链（半天）
			
 
				+7. `agent/client.py:invoke_agent` —— 本地 vs 远端（`remote_`）分发。
			
 
				+8. 跟一遍**本地 Agent 启动链**：`examples/<proj>/run.py → invoke_agent → AgentRunner`。
			
 
				+9. 跟一遍**远端 Agent 链**：`subagent.py → HTTP → knowhub/server.py → librarian.py`（配合 `knowhub/docs/remote-agents.md`）。
			
 
				+
			
 
				+### 第 3 步 · 按需深入（用到再看）
			
 
				+10. 改后端 API / 前端：先读 `frontend/API.md`（契约），再看 `frontend/react-template/src/components/FlowChart/`。
			
 
				+11. 碰知识库：`knowhub/docs/schema.md`（数据模型）+ `knowhub/docs/processing-pipeline.md`（去重流水线）。
			
 
				+12. 碰上下文压缩 / 长任务：`agent/trace/compaction.py`（侧分支，**改 runner 前必读**）。
			
 
				+13. 碰 Agent 间通讯：`im-server/README.md` + `im-client/AGENT_GUIDE.md`。
			
 
				+
			
 
				+### 第 4 步 · 全局收口
			
 
				+14. 回到 `PROJECT_STRUCTURE.md` 当字典，按需查任何目录/文件的职责。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 6. 新人最容易踩的坑（务必先知道）
			
 
				+
			
 
				+| 坑 | 真相 |
			
 
				+|----|------|
			
 
				+| 以为 `examples/` 是教学示例 | 它们是**真实生产项目**（production_* / requirement_extract 等）。 |
			
 
				+| 照 `gateway/README.md` 找 channels/lifecycle/executor | **没实现**，只有 registry + router。 |
			
 
				+| 把两个前端的组件互相复制 | React 18(Semi) vs React 19(Tailwind)，**不同栈**。 |
			
 
				+| 同时跑 `api_server.py` 和 `gateway_server.py` | **都绑 :8000**，冲突。 |
			
 
				+| 直接改 `/api/traces*` 接口 | DAG 渲染器强依赖精确 payload，**先读 `frontend/API.md`**。 |
			
 
				+| 直接改 `runner.py` 的消息迭代 | **先读 `compaction.py` 的侧分支**，否则会破坏压缩/反思。 |
			
 
				+| `api_server.py` 改完没反应 | `reload=True` 会静默吞 import 异常，去看 `info.log`/`debug.log`。 |
			
 
				+| 把 `skills4claude/` 和 `agent/skill/` 当一回事 | 前者给 Claude Code 用户的 CLI，后者注入运行时 Agent 的 LLM 上下文。 |
			
 
				+| `from agent import ...` 报错 | 先 `pip install -e .`（包名 `cyber-agent`，目录 `agent/`）。 |
			
 
				+| `vendor/opencode` 是空的 | `git submodule update --init`。 |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 7. 速查表
			
 
				+
			
 
				+| 我想… | 去看 |
			
 
				+|-------|------|
			
 
				+| 理解业务 | 本文 §1 + `examples/*/requirement.prompt` |
			
 
				+| 理解核心抽象 | 本文 §2 + `agent/trace/models.py`、`goal_models.py` |
			
 
				+| 看主循环 | `agent/core/runner.py:_agent_loop` + `HOW_IT_RUNS.md` |
			
 
				+| 加一个工具 | `agent/tools/registry.py`（`@tool`）+ `agent/tools/builtin/` 找同类 |
			
 
				+| 加一个技能 | `agent/skill/skills/*.md` 仿写 |
			
 
				+| 接知识库 | `knowhub/docs/schema.md` + `knowhub/server.py` |
			
 
				+| 改可视化 | `frontend/API.md` + `frontend/react-template/src/components/FlowChart/` |
			
 
				+| 起一个新业务项目 | 仿照 `examples/<proj>/` 的 `config.py + requirement.prompt + tools/` |
			
 
				+| 查任意目录职责 | `PROJECT_STRUCTURE.md` |
			
 
				+
			
 
				+---
			
 
				+
			
 
				+*本文档基于对源码的系统遍历 + 5 路并行探查生成。如与代码冲突，以代码为准，并请更新本文。*
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/index.html
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/index.html
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/run_search.py
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/run_search.py
@@ -0,0 +1,212 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""固定 query · 同义扩展 + 合并去重 + 评估
			
 
				+================================================================================
			
 
				+
			
 
				+和 batch_3forms.py 的区别：
			
 
				+  - 不生成 / 不选 query（不依赖动作×类型矩阵），直接用本文件顶部写死的 4 组 query；
			
 
				+  - 每组 query = 产品名（锁死）+ 意图词（扩同义）；多措辞一次性丢给 search_all，
			
 
				+    search_all 内建按 (platform, cid) 去重 + found_by_queries 记录命中措辞 → 天然「合并去重」；
			
 
				+  - 产出落 runs_full/q000N/form_A.json（单一形式 A），schema 与 batch_3forms 完全一致，
			
 
				+    所以本目录的 server.py / index.html 直接复用，无需改数据契约。
			
 
				+
			
 
				+搜索 / 评估 / 转写 / 英文平台中译英 全部复用 ../search_and_evaluate.py 的函数（只读，不改原文件）。
			
 
				+
			
 
				+用法：
			
 
				+  python run_search.py                       # 跑全部 4 组 query（已存在的 form_A.json 跳过）
			
 
				+  python run_search.py --only-q q0,q2        # 只跑指定 query
			
 
				+  python run_search.py --force               # 覆盖已存在产物
			
 
				+  python run_search.py --max-count 20 --platforms xhs,x,gzh --eval-model gemini-flash-lite
			
 
				+"""
			
 
				+import argparse
			
 
				+import asyncio
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+
			
 
				+# ── 项目根入 sys.path，让 from examples... / from agent... 可 import ──────────────
			
 
				+PROJECT_ROOT = Path(__file__).resolve().parents[5]   # …/Agent
			
 
				+sys.path.insert(0, str(PROJECT_ROOT))
			
 
				+
			
 
				+from dotenv import load_dotenv
			
 
				+load_dotenv()
			
 
				+
			
 
				+# 复用引擎函数（只读 import，原文件零改动）
			
 
				+from examples.process_pipeline.script.search_eval.search_and_evaluate import (
			
 
				+    search_all, evaluate_posts, transcribe_video_posts, build_query_overrides,
			
 
				+)
			
 
				+from examples.process_pipeline.script.llm_evaluate_sources import (
			
 
				+    build_eval_llm_call, EVAL_MODELS, DEFAULT_EVAL_MODEL,
			
 
				+)
			
 
				+
			
 
				+# 同目录 db 模块（双写 MySQL）；不可用则降级为只写本地 json
			
 
				+sys.path.insert(0, str(Path(__file__).resolve().parent))
			
 
				+try:
			
 
				+    import db as _db
			
 
				+except Exception:
			
 
				+    _db = None
			
 
				+
			
 
				+
			
 
				+# ════════════════════════════════════════════════════════════════════════════════
			
 
				+#  ★ 业务配置：4 组 query + 意图词同义表（要改词直接改这里）
			
 
				+# ════════════════════════════════════════════════════════════════════════════════
			
 
				+
			
 
				+QUERIES = [
			
 
				+    {"id": "q0000", "product": "GPT image2",      "intent": "评测"},
			
 
				+    {"id": "q0001", "product": "GPT image2",      "intent": "案例"},
			
 
				+    {"id": "q0002", "product": "nano banana pro", "intent": "评测"},
			
 
				+    {"id": "q0003", "product": "nano banana pro", "intent": "案例"},
			
 
				+]
			
 
				+
			
 
				+# 产品名锁死，只对意图词扩同义；措辞 = f"{product} {syn}"
			
 
				+INTENT_SYNONYMS = {
			
 
				+    "评测": ["评测", "测评", "实测", "体验"],
			
 
				+    "案例": ["案例", "作品", "效果", "实例"],
			
 
				+}
			
 
				+
			
 
				+DEFAULT_PLATFORMS = "xhs,x,gzh"
			
 
				+DEFAULT_MAX_COUNT = 10   # 每条措辞每渠道取帖数上限（默认 10，控制成本；去重后池子更小）
			
 
				+
			
 
				+
			
 
				+def phrasings_for(product: str, intent: str) -> list[str]:
			
 
				+    """产品名 + 各意图同义 → 措辞列表（去重保序）。意图词无同义表则原样单条。"""
			
 
				+    syns = INTENT_SYNONYMS.get(intent, [intent])
			
 
				+    seen, out = set(), []
			
 
				+    for s in syns:
			
 
				+        q = f"{product} {s}".strip()
			
 
				+        if q not in seen:
			
 
				+            seen.add(q); out.append(q)
			
 
				+    return out
			
 
				+
			
 
				+
			
 
				+# ── 单个 query：多措辞搜索 → 去重池 → 转写 → 评估 → 落盘 ───────────────────────────
			
 
				+
			
 
				+async def run_one_query(spec, args, eval_llm, eval_model_id, out_file: Path) -> dict:
			
 
				+    product, intent = spec["product"], spec["intent"]
			
 
				+    base_query = f"{product} {intent}"               # 评估锚点 = 用户真实意图
			
 
				+    phrasings = phrasings_for(product, intent)
			
 
				+    platforms = [p.strip() for p in args.platforms.split(",") if p.strip()]
			
 
				+
			
 
				+    print(f"\n▶ {spec['id']}  基准={base_query!r}  措辞={phrasings}")
			
 
				+
			
 
				+    # 英文平台(x/youtube)：把全部措辞一次性翻成英文（中文在 x 上召回差）
			
 
				+    overrides = await build_query_overrides(platforms, phrasings, eval_llm, eval_model_id)
			
 
				+
			
 
				+    # search_all：对 (platform × 每个措辞) 并发搜 → 按 (platform, cid) 去重合并，
			
 
				+    # found_by_queries 记录是哪些措辞命中（便于回溯哪个同义词最有效）
			
 
				+    sources = await search_all(platforms, phrasings, args.max_count, args.max_concurrent,
			
 
				+                               query_overrides=overrides)
			
 
				+
			
 
				+    try:
			
 
				+        from examples.process_pipeline.script.extract_sources import _convert_timestamps
			
 
				+        _convert_timestamps(sources)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+    # 视频帖转写：字幕并入正文再评估（默认开）
			
 
				+    if not args.no_transcribe and sources:
			
 
				+        n = await transcribe_video_posts(sources, concurrency=args.max_concurrent)
			
 
				+        if n:
			
 
				+            print(f"   🎙️  视频转写 {n} 条")
			
 
				+
			
 
				+    cost = 0.0
			
 
				+    if not args.no_eval and sources:
			
 
				+        sources, cost = await evaluate_posts(
			
 
				+            sources, "", eval_llm, eval_model_id, args.max_concurrent,
			
 
				+            include_images=not args.no_images, max_images=args.max_images,
			
 
				+            image_mode=args.image_mode, query=base_query,
			
 
				+        )
			
 
				+    for s in sources:
			
 
				+        imgs = s.pop("_image_data_urls", None)
			
 
				+        if imgs is not None:
			
 
				+            s["images_sent"] = len(imgs)
			
 
				+
			
 
				+    failed = sum(1 for s in sources if (s.get("llm_evaluation") or {}).get("_error"))
			
 
				+
			
 
				+    out_file.parent.mkdir(parents=True, exist_ok=True)
			
 
				+    out_file.write_text(json.dumps({
			
 
				+        "form": "A",                       # 单一形式；沿用 "A" 让 server/index 渲染零改动
			
 
				+        "query": base_query,               # 评估锚点
			
 
				+        "original_q": base_query,          # server 用它做 query 标签
			
 
				+        "requirement": base_query,
			
 
				+        "phrasings": phrasings,            # 本次实际搜索用的全部措辞（溯源）
			
 
				+        "platforms": platforms,
			
 
				+        "total": len(sources), "failed": failed,
			
 
				+        "results": sources,                # 去重池：帖子 + llm_evaluation + found_by_queries
			
 
				+    }, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+    print(f"   ✅ {spec['id']} → total={len(sources)} failed={failed} cost=${cost:.4f} → {out_file}")
			
 
				+
			
 
				+    # 双写 MySQL（失败不阻断，本地 json 已落盘）
			
 
				+    if _db:
			
 
				+        n = _db.upsert_posts(spec["id"], base_query, sources)
			
 
				+        if n:
			
 
				+            print(f"   🗄️  写库 fqe_posts: {n} 行")
			
 
				+
			
 
				+    return {"id": spec["id"], "total": len(sources), "failed": failed, "cost": round(cost, 4)}
			
 
				+
			
 
				+
			
 
				+# ── 主流程 ───────────────────────────────────────────────────────────────────────
			
 
				+
			
 
				+async def run(args):
			
 
				+    output_dir = Path(args.output_dir)
			
 
				+    if not output_dir.is_absolute():
			
 
				+        output_dir = Path(__file__).resolve().parent / args.output_dir
			
 
				+    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # --only-q 过滤（接受 q0 / q0000 / 0 等写法，按尾号匹配）
			
 
				+    specs = QUERIES
			
 
				+    if args.only_q:
			
 
				+        wanted = set()
			
 
				+        for t in args.only_q.split(","):
			
 
				+            t = t.strip().lstrip("q").lstrip("Q")
			
 
				+            if t.isdigit():
			
 
				+                wanted.add(int(t))
			
 
				+        specs = [s for s in QUERIES if int(s["id"][1:]) in wanted]
			
 
				+        if not specs:
			
 
				+            print(f"❌ --only-q {args.only_q!r} 没匹配到任何 query"); return
			
 
				+
			
 
				+    eval_llm, eval_model_id = build_eval_llm_call(args.eval_model)
			
 
				+    print(f"🧠 评估模型 {args.eval_model} -> {eval_model_id} | 渠道 {args.platforms} | 每措辞每渠道≤{args.max_count}")
			
 
				+
			
 
				+    summary = []
			
 
				+    for spec in specs:
			
 
				+        out_file = output_dir / spec["id"] / "form_A.json"
			
 
				+        if out_file.exists() and not args.force:
			
 
				+            print(f"⏭️  {spec['id']} 已存在 form_A.json → 跳过（--force 覆盖）")
			
 
				+            continue
			
 
				+        stat = await run_one_query(spec, args, eval_llm, eval_model_id, out_file)
			
 
				+        summary.append(stat)
			
 
				+
			
 
				+    (output_dir / "summary.json").write_text(json.dumps({
			
 
				+        "platforms": args.platforms, "eval_model": eval_model_id,
			
 
				+        "max_count": args.max_count, "per_query": summary,
			
 
				+    }, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+    if summary:
			
 
				+        tot = sum(s["total"] for s in summary)
			
 
				+        cost = sum(s["cost"] for s in summary)
			
 
				+        print(f"\n{'='*60}\n📊 完成 {len(summary)} 组 query · 去重后共 {tot} 帖 · 评估成本 ${cost:.4f}")
			
 
				+        print(f"→ 启动查看：cd {Path(__file__).resolve().parent}  &&  python server.py  (http://0.0.0.0:8770)")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    p = argparse.ArgumentParser(description="固定 query · 同义扩展 + 合并去重 + 评估")
			
 
				+    p.add_argument("--output-dir", default="runs_full", help="产出目录（默认 ./runs_full，server 扫这里）")
			
 
				+    p.add_argument("--only-q", default=None, help="只跑指定 query，逗号分隔，如 q0,q2")
			
 
				+    p.add_argument("--platforms", default=DEFAULT_PLATFORMS, help=f"逗号分隔渠道（默认 {DEFAULT_PLATFORMS}）")
			
 
				+    p.add_argument("--max-count", type=int, default=DEFAULT_MAX_COUNT, help="每条措辞每渠道取帖数上限")
			
 
				+    p.add_argument("--eval-model", default=DEFAULT_EVAL_MODEL, choices=list(EVAL_MODELS),
			
 
				+                   help="评估模型（默认随 search_eval 默认，多模态）")
			
 
				+    p.add_argument("--max-concurrent", type=int, default=3, help="搜索 / 评估并发上限")
			
 
				+    p.add_argument("--max-images", type=int, default=4, help="每帖最多发给模型几张配图")
			
 
				+    p.add_argument("--image-mode", choices=["url", "base64"], default="url",
			
 
				+                   help="配图传给模型的方式（防盗链平台用 base64）")
			
 
				+    p.add_argument("--no-images", action="store_true", help="不发图（纯文本评估）")
			
 
				+    p.add_argument("--no-transcribe", action="store_true", help="不对视频帖转写")
			
 
				+    p.add_argument("--no-eval", action="store_true", help="只搜不评估")
			
 
				+    p.add_argument("--force", action="store_true", help="覆盖已存在的 form_A.json")
			
 
				+    args = p.parse_args()
			
 
				+    asyncio.run(run(args))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/server.py
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/server.py
@@ -0,0 +1,997 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""搜索评估案例查看 server。
			
 
				+沿用 图文排版搜索评估.html 的版式(卡片 + dialog 详情 + rubric 评分条)，
			
 
				+数据实时扫描 runs_full/*/form_*.json —— runs_full 下每新增一个 q 文件夹，刷新即出现。
			
 
				+分页：query → 三种形式(A/B/C) → 三个渠道 三行从上到下。
			
 
				+
			
 
				+用法：python server.py [port]   默认 8770，浏览器开 http://0.0.0.0:8770
			
 
				+"""
			
 
				+import json, re, glob, sys, pathlib, subprocess, threading
			
 
				+from datetime import datetime
			
 
				+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
			
 
				+from urllib.parse import urlparse, parse_qs
			
 
				+
			
 
				+try:  # Windows 控制台默认 cp1252，中文 print 会崩，统一切 utf-8
			
 
				+    sys.stdout.reconfigure(encoding="utf-8")
			
 
				+except Exception:
			
 
				+    pass
			
 
				+
			
 
				+HERE = pathlib.Path(__file__).parent
			
 
				+sys.path.insert(0, str(HERE))
			
 
				+sys.path.insert(0, str(HERE.parent))   # fixed_query_eval：让兄弟模块 batch_extract_procedures 可 import
			
 
				+PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8770
			
 
				+
			
 
				+PLAT = {"xhs": "小红书", "gzh": "公众号", "zhihu": "知乎", "x": "X", "bili": "B站", "douyin": "抖音",
			
 
				+        "sph": "视频号", "youtube": "YouTube", "github": "GitHub", "toutiao": "头条", "weibo": "微博"}
			
 
				+KT = {"procedure": "工序", "step": "步骤", "tool": "工具"}
			
 
				+
			
 
				+# 从 taxonomy 取动作叶子/类型名，用于把 original_q 解析回原始维度(动作×类型 正交)
			
 
				+# 路径优先级：search_eval/evaluation/（主源，IDE 编辑那份就是 runtime 实际读的）
			
 
				+# → test_script/evaluation/（历史副本兜底）→ script/evaluation/（更老兜底）
			
 
				+# 谁也找不到时整目录扫空，server 仍能起。
			
 
				+EVALDIR = HERE / "evaluation"
			
 
				+if not EVALDIR.exists():
			
 
				+    EVALDIR = HERE.parent.parent / "test_script" / "evaluation"
			
 
				+if not EVALDIR.exists():
			
 
				+    EVALDIR = HERE.parent / "evaluation"
			
 
				+try:
			
 
				+    _jm = json.load(open(EVALDIR / "judged_matrix.json", encoding="utf-8"))
			
 
				+    ACT_L1 = {a["name"]: a["l1"] for a in _jm["actions"]}
			
 
				+    ACTION_SET = set(ACT_L1)
			
 
				+    TYPE_SET = {t["name"] for t in _jm["types"]}
			
 
				+    ACTIONS_TAX = [{"name": a["name"], "l1": a["l1"], "l2": a.get("l2", "")} for a in _jm["actions"]]
			
 
				+    TYPES_TAX = [{"name": t["name"], "l1": t["l1"]} for t in _jm["types"]]
			
 
				+    # taxonomy 顺序沿用 judged_matrix(严格版)；矩阵分值改用 type_action_scores(宽松版) —
			
 
				+    # 两份是同一组 27×50 cell 的独立 gemini judging，前者只 53 格到 tier3，后者 156 格到 score3
			
 
				+    _tas = json.load(open(EVALDIR / "type_action_scores.json", encoding="utf-8"))["scores"]
			
 
				+    _MATRIX = []
			
 
				+    for a in _jm["actions"]:
			
 
				+        row = []
			
 
				+        for t in _jm["types"]:
			
 
				+            rec = _tas.get(t["name"], {}).get(a["name"])
			
 
				+            row.append({"tier": rec["score"], "r": rec.get("reason", "")} if rec else {})
			
 
				+        _MATRIX.append(row)
			
 
				+except Exception:
			
 
				+    ACT_L1, ACTION_SET, TYPE_SET, ACTIONS_TAX, TYPES_TAX, _MATRIX = {}, set(), set(), [], [], []
			
 
				+
			
 
				+ACTIVE_TASKS = {}
			
 
				+ACTIVE_REEVALS = {}
			
 
				+
			
 
				+from batch_extract_procedures import _short_case, _source_to_dsl_input, _write_meta, _composite_score
			
 
				+
			
 
				+def run_extraction_task(q, folder_name, src_path, out_dir, engine, model):
			
 
				+    task_key = f"{q}/{folder_name}"
			
 
				+    log_path = out_dir / "_extract.log"
			
 
				+    try:
			
 
				+        out_dir.mkdir(parents=True, exist_ok=True)
			
 
				+        if engine == "cyber_runner":
			
 
				+            script_path = HERE / "procedure-dsl" / "run_cyber.py"
			
 
				+        else:
			
 
				+            script_path = HERE / "procedure-dsl" / "run_procedure_dsl.py"
			
 
				+            
			
 
				+        cmd = [
			
 
				+            sys.executable, "-u", str(script_path),
			
 
				+            str(src_path),
			
 
				+            "--out-dir", str(out_dir),
			
 
				+            "--model", model,
			
 
				+            "--max-turns", "300"
			
 
				+        ]
			
 
				+        if engine != "cyber_runner":
			
 
				+            cmd.extend(["--max-retries", "3"])
			
 
				+        
			
 
				+        flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
			
 
				+        with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
			
 
				+            proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
			
 
				+                                    cwd=str(HERE), creationflags=flags)
			
 
				+            ACTIVE_TASKS[task_key]["pid"] = proc.pid
			
 
				+            proc.wait()
			
 
				+            
			
 
				+            if proc.returncode == 0:
			
 
				+                try:
			
 
				+                    import build_workflows
			
 
				+                    build_workflows.write_one(q, folder_name, runs_dir=HERE / "runs_full")
			
 
				+                    ACTIVE_TASKS[task_key]["status"] = "success"
			
 
				+                except Exception as ex:
			
 
				+                    ACTIVE_TASKS[task_key]["status"] = "failed"
			
 
				+                    ACTIVE_TASKS[task_key]["error"] = f"Workflow compilation failed: {ex}"
			
 
				+                    with open(log_path, "a", encoding="utf-8") as f_err:
			
 
				+                        f_err.write(f"\n[server error] Workflow compilation failed: {ex}\n")
			
 
				+            else:
			
 
				+                ACTIVE_TASKS[task_key]["status"] = "failed"
			
 
				+                ACTIVE_TASKS[task_key]["error"] = f"Runner failed with exit code {proc.returncode}"
			
 
				+    except Exception as e:
			
 
				+        ACTIVE_TASKS[task_key]["status"] = "failed"
			
 
				+        ACTIVE_TASKS[task_key]["error"] = str(e)
			
 
				+        try:
			
 
				+            with open(log_path, "a", encoding="utf-8") as f_err:
			
 
				+                f_err.write(f"\n[server error] Extraction failed: {e}\n")
			
 
				+        except Exception:
			
 
				+            pass
			
 
				+
			
 
				+
			
 
				+# ── 工具解构（fixed_query_eval 新增）──────────────────────────────────────────────
			
 
				+# 单次 LLM 调用（gemini-3.1-flash-lite）即可，比工序解构（多轮 agent）轻得多。
			
 
				+# 仍走 subprocess（tool_extract.py），让 LLM 重依赖留在子进程，server 本身保持轻量。
			
 
				+TOOL_TASKS = {}   # {q: {"status", "case_ids", "error", "start_time"}}
			
 
				+
			
 
				+def run_tool_extraction_task(q, case_ids, force):
			
 
				+    log_path = HERE / "runs_full" / q / "tools" / "_extract.log"
			
 
				+    try:
			
 
				+        log_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+        cmd = [sys.executable, "-u", str(HERE / "tool_extract.py"),
			
 
				+               "--q", q, "--case-ids", ",".join(case_ids)]
			
 
				+        if force:
			
 
				+            cmd.append("--force")
			
 
				+        flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
			
 
				+        with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
			
 
				+            proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
			
 
				+                                    cwd=str(HERE), creationflags=flags)
			
 
				+            TOOL_TASKS[q]["pid"] = proc.pid
			
 
				+            proc.wait()
			
 
				+            TOOL_TASKS[q]["status"] = "success" if proc.returncode == 0 else "failed"
			
 
				+            if proc.returncode != 0:
			
 
				+                TOOL_TASKS[q]["error"] = f"tool_extract exit code {proc.returncode}"
			
 
				+    except Exception as e:
			
 
				+        TOOL_TASKS[q]["status"] = "failed"
			
 
				+        TOOL_TASKS[q]["error"] = str(e)
			
 
				+
			
 
				+
			
 
				+MODSET = {"文", "图", "视频", "音频"}
			
 
				+TOOLQUAL = {"AI": "AI 模型", "软件": "桌面 APP", "电脑端": "桌面 APP", "在线": "云端 Web",
			
 
				+            "网页版": "云端 Web", "代码": "API·CLI", "命令行": "API·CLI", "插件": "插件扩展"}
			
 
				+
			
 
				+def parse_dims(oq):
			
 
				+    """把组合 query(如 '文 元素生成 提示词 教程')解析回 {动作, 类型, 动作L1, 约束}。"""
			
 
				+    toks = (oq or "").split()
			
 
				+    action = next((t for t in toks if t in ACTION_SET), None)
			
 
				+    type_ = next((t for t in toks if t in TYPE_SET), None)
			
 
				+    cons = None
			
 
				+    if toks:
			
 
				+        t0 = toks[0]
			
 
				+        if t0 in MODSET:
			
 
				+            cons = {"kind": "模态", "value": t0}
			
 
				+        elif t0 in TOOLQUAL:
			
 
				+            cons = {"kind": "工具类型", "value": TOOLQUAL[t0]}
			
 
				+    return {"action": action, "type": type_, "action_l1": ACT_L1.get(action, ""), "constraint": cons}
			
 
				+
			
 
				+def flat_scores(sc):
			
 
				+    f = {}
			
 
				+    for k, v in (sc or {}).items():
			
 
				+        if isinstance(v, dict):
			
 
				+            for kk, vv in v.items():
			
 
				+                try: f[kk] = int(vv)
			
 
				+                except Exception: pass
			
 
				+        else:
			
 
				+            try: f[k] = int(v)
			
 
				+            except Exception: pass
			
 
				+    return f
			
 
				+
			
 
				+def _recency_hard(date_str):
			
 
				+    """按 publish_timestamp 头 10 字符（YYYY-MM-DD）算硬时效：半年内=3 / 两年内=2 / 更早=1。
			
 
				+
			
 
				+    取代原 LLM 评的 recency 维度——脚本算更稳，发布时间在帖子抓取时就有，无需 LLM token。
			
 
				+    """
			
 
				+    try:
			
 
				+        d = datetime.strptime((date_str or "")[:10], "%Y-%m-%d")
			
 
				+    except (ValueError, TypeError):
			
 
				+        return None
			
 
				+    days = (datetime.now() - d).days
			
 
				+    if days <= 180: return 3
			
 
				+    if days <= 730: return 2
			
 
				+    return 1
			
 
				+
			
 
				+
			
 
				+def adapt(r, run, form_name=None):
			
 
				+    p = r.get("post", {}); e = r.get("llm_evaluation", {})
			
 
				+    
			
 
				+    # 1. 解析 知识类型 (knowledge_type)
			
 
				+    kt = []
			
 
				+    kt_raw = e.get("知识类型") or e.get("knowledge_type") or []
			
 
				+    for k in kt_raw:
			
 
				+        if k in ("工序", "procedure"): kt.append("procedure")
			
 
				+        elif k in ("能力", "步骤", "step"): kt.append("step")
			
 
				+        elif k in ("工具", "tool"): kt.append("tool")
			
 
				+
			
 
				+    fs = {}
			
 
				+    score_reasons = {}
			
 
				+
			
 
				+    # 检测是否为 eval_prompt_sample-mod 里的新版 0-10 分数 schema
			
 
				+    is_mod_schema = "相关性" in e and isinstance(e["相关性"], dict) and ("和内容制作知识相关" in e["相关性"] or "和 query 相关" in e["相关性"])
			
 
				+
			
 
				+    if is_mod_schema:
			
 
				+        # 新版 0-10 分数格式解析
			
 
				+        # 1. 相关性
			
 
				+        rel = e.get("相关性") or {}
			
 
				+        for subkey, item in rel.items():
			
 
				+            if isinstance(item, dict):
			
 
				+                score_val = item.get("得分")
			
 
				+                reason_val = item.get("理由")
			
 
				+                code_key = None
			
 
				+                if "内容制作" in subkey or "知识" in subkey:
			
 
				+                    code_key = "relevance_production"
			
 
				+                elif "query" in subkey or "检索" in subkey:
			
 
				+                    code_key = "relevance_query"
			
 
				+                if code_key and score_val is not None:
			
 
				+                    try:
			
 
				+                        fs[code_key] = float(score_val)
			
 
				+                        if reason_val:
			
 
				+                            score_reasons[code_key] = reason_val
			
 
				+                    except Exception:
			
 
				+                        pass
			
 
				+
			
 
				+        # 2. 质量
			
 
				+        q_block = e.get("质量") or {}
			
 
				+        fixed = q_block.get("固定维度") or {}
			
 
				+        
			
 
				+        # 固定维度
			
 
				+        fixed_keys = {
			
 
				+            "时效性": "recency",
			
 
				+            "热度性": "popularity",
			
 
				+            "评论反馈": "feedback"
			
 
				+        }
			
 
				+        for cn, code in fixed_keys.items():
			
 
				+            item = fixed.get(cn)
			
 
				+            if isinstance(item, dict):
			
 
				+                score_val = item.get("得分")
			
 
				+                reason_val = item.get("理由")
			
 
				+                if score_val is not None:
			
 
				+                    try:
			
 
				+                        fs[code] = float(score_val)
			
 
				+                        if reason_val:
			
 
				+                            score_reasons[code] = reason_val
			
 
				+                    except Exception:
			
 
				+                        pass
			
 
				+                        
			
 
				+        # 用例 (真实感, 表现力)
			
 
				+        usecase = fixed.get("用例") or {}
			
 
				+        usecase_keys = {
			
 
				+            "真实感": "realism",
			
 
				+            "表现力": "expressiveness"
			
 
				+        }
			
 
				+        for cn, code in usecase_keys.items():
			
 
				+            item = usecase.get(cn)
			
 
				+            if isinstance(item, dict):
			
 
				+                score_val = item.get("得分")
			
 
				+                reason_val = item.get("理由")
			
 
				+                if score_val is not None:
			
 
				+                    try:
			
 
				+                        fs[code] = float(score_val)
			
 
				+                        if reason_val:
			
 
				+                            score_reasons[code] = reason_val
			
 
				+                    except Exception:
			
 
				+                        pass
			
 
				+
			
 
				+        # 动态维度
			
 
				+        dynamic = q_block.get("动态维度") or {}
			
 
				+        
			
 
				+        # 工序
			
 
				+        proc = dynamic.get("工序") or {}
			
 
				+        if proc:
			
 
				+            item = proc.get("流程完整性")
			
 
				+            if isinstance(item, dict):
			
 
				+                score_val = item.get("得分")
			
 
				+                reason_val = item.get("理由")
			
 
				+                if score_val is not None:
			
 
				+                    try:
			
 
				+                        fs["procedure_completeness"] = float(score_val)
			
 
				+                        if reason_val:
			
 
				+                            score_reasons["procedure_completeness"] = reason_val
			
 
				+                    except Exception:
			
 
				+                        pass
			
 
				+            field = proc.get("字段完整性") or {}
			
 
				+            field_keys = {
			
 
				+                "输入完整性": "procedure_input",
			
 
				+                "实现完整性": "procedure_implementation",
			
 
				+                "输出完整性": "procedure_output"
			
 
				+            }
			
 
				+            for cn, code in field_keys.items():
			
 
				+                item = field.get(cn)
			
 
				+                if isinstance(item, dict):
			
 
				+                    score_val = item.get("得分")
			
 
				+                    reason_val = item.get("理由")
			
 
				+                    if score_val is not None:
			
 
				+                        try:
			
 
				+                            fs[code] = float(score_val)
			
 
				+                            if reason_val:
			
 
				+                                score_reasons[code] = reason_val
			
 
				+                        except Exception:
			
 
				+                            pass
			
 
				+            item = proc.get("泛化性")
			
 
				+            if isinstance(item, dict):
			
 
				+                score_val = item.get("得分")
			
 
				+                reason_val = item.get("理由")
			
 
				+                if score_val is not None:
			
 
				+                    try:
			
 
				+                        fs["procedure_generality"] = float(score_val)
			
 
				+                        if reason_val:
			
 
				+                            score_reasons["procedure_generality"] = reason_val
			
 
				+                    except Exception:
			
 
				+                        pass
			
 
				+
			
 
				+        # 能力
			
 
				+        cap = dynamic.get("能力") or dynamic.get("步骤") or {}
			
 
				+        if cap:
			
 
				+            field = cap.get("字段完整性") or {}
			
 
				+            field_keys = {
			
 
				+                "输入完整性": "step_input",
			
 
				+                "实现完整性": "step_implementation",
			
 
				+                "输出完整性": "step_output"
			
 
				+            }
			
 
				+            for cn, code in field_keys.items():
			
 
				+                item = field.get(cn)
			
 
				+                if isinstance(item, dict):
			
 
				+                    score_val = item.get("得分")
			
 
				+                    reason_val = item.get("理由")
			
 
				+                    if score_val is not None:
			
 
				+                        try:
			
 
				+                            fs[code] = float(score_val)
			
 
				+                            if reason_val:
			
 
				+                                score_reasons[code] = reason_val
			
 
				+                        except Exception:
			
 
				+                            pass
			
 
				+            item = cap.get("泛化性")
			
 
				+            if isinstance(item, dict):
			
 
				+                score_val = item.get("得分")
			
 
				+                reason_val = item.get("理由")
			
 
				+                if score_val is not None:
			
 
				+                    try:
			
 
				+                        fs["step_generality"] = float(score_val)
			
 
				+                        if reason_val:
			
 
				+                            score_reasons["step_generality"] = reason_val
			
 
				+                    except Exception:
			
 
				+                        pass
			
 
				+
			
 
				+        # 工具
			
 
				+        tool = dynamic.get("工具") or {}
			
 
				+        if tool:
			
 
				+            tool_keys = {
			
 
				+                "能力边界覆盖": "tool_boundary",
			
 
				+                "有效比较": "tool_comparison",
			
 
				+                "参数/接口具体性": "tool_specificity",
			
 
				+                "实操示例": "tool_example",
			
 
				+                "版本&限制": "tool_limits"
			
 
				+            }
			
 
				+            for cn, code in tool_keys.items():
			
 
				+                item = tool.get(cn)
			
 
				+                if isinstance(item, dict):
			
 
				+                    score_val = item.get("得分")
			
 
				+                    reason_val = item.get("理由")
			
 
				+                    if score_val is not None:
			
 
				+                        try:
			
 
				+                            fs[code] = float(score_val)
			
 
				+                            if reason_val:
			
 
				+                                score_reasons[code] = reason_val
			
 
				+                        except Exception:
			
 
				+                            pass
			
 
				+
			
 
				+    else:
			
 
				+        # 兼容老版 1-5 分数 schema (带 "评分" 或 old-style flatness)
			
 
				+        is_new_schema = "评分" in e or "知识类型" in e or "制作相关性" in e
			
 
				+        CN_TO_EN = {
			
 
				+            "相关性": "relevance",
			
 
				+            "成品质量": "result_quality",
			
 
				+            "可信度": "credibility",
			
 
				+            "具体用例": "concrete_use_case",
			
 
				+            "完整性": "completeness",
			
 
				+            "步骤结构": "step_structure",
			
 
				+            "步骤可复现": "step_reproducibility",
			
 
				+            "步骤可复现性": "step_reproducibility",
			
 
				+            "能力定义": "capability_definition",
			
 
				+            "实现深度": "implementation_depth",
			
 
				+            "边界失败": "boundary_failure_eval",
			
 
				+            "通用性": "generality",
			
 
				+            "能力覆盖": "capability_coverage",
			
 
				+            "有效对比": "effective_comparison",
			
 
				+            "参数具体": "param_specificity",
			
 
				+            "实操示例": "worked_example",
			
 
				+            "实操用例": "worked_example",
			
 
				+            "示例完整": "worked_example",
			
 
				+            "版本限制": "version_limits",
			
 
				+            "版本说明": "version_limits",
			
 
				+            "限制说明": "version_limits",
			
 
				+        }
			
 
				+        
			
 
				+        if is_new_schema:
			
 
				+            pf = e.get("评分") or {}
			
 
				+            for cat, metrics in pf.items():
			
 
				+                if isinstance(metrics, dict):
			
 
				+                    for metric, val in metrics.items():
			
 
				+                        en_key = CN_TO_EN.get(metric, metric)
			
 
				+                        if isinstance(val, dict) and "得分" in val:
			
 
				+                            try: fs[en_key] = int(val["得分"])
			
 
				+                            except Exception: pass
			
 
				+                        elif isinstance(val, (int, float)):
			
 
				+                            fs[en_key] = int(val)
			
 
				+                        
			
 
				+                        if isinstance(val, dict) and "理由" in val:
			
 
				+                            score_reasons[en_key] = val["理由"]
			
 
				+        else:
			
 
				+            fs = flat_scores(e.get("scores", {}))
			
 
				+        
			
 
				+    # 计算均分 (overall)
			
 
				+    if is_mod_schema:
			
 
				+        rel_keys = {"relevance_production", "relevance_query"}
			
 
				+        rel_vals = [v for k, v in fs.items() if k in rel_keys]
			
 
				+        qual_vals = [v for k, v in fs.items() if k not in rel_keys]
			
 
				+        
			
 
				+        rel_avg = sum(rel_vals) / len(rel_vals) if rel_vals else None
			
 
				+        qual_avg = sum(qual_vals) / len(qual_vals) if qual_vals else None
			
 
				+        
			
 
				+        if rel_avg is not None and qual_avg is not None:
			
 
				+            overall = round((rel_avg + qual_avg) / 2, 1)
			
 
				+        elif rel_avg is not None:
			
 
				+            overall = round(rel_avg, 1)
			
 
				+        elif qual_avg is not None:
			
 
				+            overall = round(qual_avg, 1)
			
 
				+        else:
			
 
				+            overall = 0.0
			
 
				+    else:
			
 
				+        overall = round(sum(fs.values()) / len(fs), 1) if fs else 0
			
 
				+    anomaly = bool(e.get("error")) or not fs
			
 
				+    grade = p.get("_quality_grade", "")
			
 
				+    fb = r.get("found_by_queries", [])
			
 
				+    
			
 
				+    # 4. 解析 制作相关性 (production_relevance)
			
 
				+    if is_mod_schema:
			
 
				+        # 新版使用 "相关性" 中的 "和内容制作知识相关" 代表制作相关性
			
 
				+        production_relevance = fs.get("relevance_production")
			
 
				+    else:
			
 
				+        if is_new_schema:
			
 
				+            pr_block = e.get("制作相关性") or {}
			
 
				+            pr_raw = pr_block.get("得分") if isinstance(pr_block, dict) else pr_block
			
 
				+            if isinstance(pr_block, dict) and "理由" in pr_block:
			
 
				+                score_reasons["production_relevance"] = pr_block["理由"]
			
 
				+        else:
			
 
				+            pr_raw = e.get("production_relevance")
			
 
				+            
			
 
				+        try: production_relevance = int(float(pr_raw)) if pr_raw is not None else None
			
 
				+        except (TypeError, ValueError): production_relevance = None
			
 
				+    
			
 
				+    recency_hard = _recency_hard(p.get("publish_timestamp", ""))
			
 
				+    
			
 
				+    # 5. 解析 判定决策 (decision) 和 理由 (reason)
			
 
				+    reason = e.get("判定理由") or e.get("reason") or ""
			
 
				+    
			
 
				+    # 根据过滤指标决定是否保留 (过滤指标判定逻辑优先，不依赖文字匹配)
			
 
				+    is_discard = False
			
 
				+    
			
 
				+    # 制作相关性低于阈值则丢弃 (新版 0-10 满分，因此低于 4 丢弃；老版低于 2 丢弃)
			
 
				+    if production_relevance is not None:
			
 
				+        threshold = 4 if is_mod_schema else 2
			
 
				+        if production_relevance < threshold:
			
 
				+            is_discard = True
			
 
				+            
			
 
				+    # 时效性低于 2 被丢弃（发布时间超两年的老帖）
			
 
				+    if recency_hard is not None and recency_hard < 2:
			
 
				+        is_discard = True
			
 
				+        
			
 
				+    # 综合均分低于阈值被丢弃 (新版低于 6 丢弃；老版低于 3 丢弃)
			
 
				+    if overall is not None:
			
 
				+        threshold_ov = 6 if is_mod_schema else 3
			
 
				+        if overall < threshold_ov:
			
 
				+            is_discard = True
			
 
				+        
			
 
				+    decision = "discard" if is_discard else "report"
			
 
				+
			
 
				+    # Find matching procedure html
			
 
				+    procedure_html = None
			
 
				+    case_id = r.get("case_id", "")
			
 
				+    title = p.get("title", "")
			
 
				+    run_dir = HERE / "runs_full" / run
			
 
				+
			
 
				+    if run_dir.is_dir():
			
 
				+        # 1. 优先扫描该帖子对应的文件夹下的任何 HTML 文件 (不限名称)
			
 
				+        # 文件夹名格式: {form}_{platform}_{channel_content_id[:8]}
			
 
				+        content_id = r.get("channel_content_id") or ""
			
 
				+        if not content_id and case_id and "_" in case_id:
			
 
				+            content_id = case_id.split("_", 1)[1]
			
 
				+        plat_key = r.get("platform") or ""
			
 
				+        
			
 
				+        if form_name and plat_key and content_id:
			
 
				+            folder_name = f"{form_name}_{plat_key}_{content_id[:8]}"
			
 
				+            case_dir = run_dir / "procedures" / folder_name
			
 
				+            if case_dir.is_dir():
			
 
				+                html_files = list(case_dir.glob("*.html"))
			
 
				+                if html_files:
			
 
				+                    procedure_html = f"runs_full/{run}/procedures/{folder_name}/{html_files[0].name}"
			
 
				+
			
 
				+        # 2. 其次匹配标准文件名: case-{case_id}.html 或 {case_id}.html
			
 
				+        candidate_dirs = [run_dir, run_dir / "procedures"]
			
 
				+        if not procedure_html and case_id:
			
 
				+            named_files = [f"case-{case_id}.html", f"{case_id}.html"]
			
 
				+            for d_dir in candidate_dirs:
			
 
				+                if d_dir.is_dir():
			
 
				+                    for name in named_files:
			
 
				+                        if (d_dir / name).is_file():
			
 
				+                            procedure_html = f"runs_full/{run}/procedures/{name}" if d_dir.name == "procedures" else f"runs_full/{run}/{name}"
			
 
				+                            break
			
 
				+                if procedure_html:
			
 
				+                    break
			
 
				+
			
 
				+        # 3. 再次匹配 HTML 内部的标准声明 (meta 标签或 HTML 注释)
			
 
				+        if not procedure_html and case_id:
			
 
				+            for d_dir in candidate_dirs:
			
 
				+                if d_dir.is_dir():
			
 
				+                    for html_path in d_dir.glob("*.html"):
			
 
				+                        try:
			
 
				+                            content = html_path.read_text(encoding="utf-8")
			
 
				+                            if f'name="case-id" content="{case_id}"' in content or \
			
 
				+                               f'name="case_id" content="{case_id}"' in content or \
			
 
				+                               f'<!-- case_id: {case_id} -->' in content or \
			
 
				+                               f'<!-- case-id: {case_id} -->' in content:
			
 
				+                                procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
			
 
				+                                break
			
 
				+                        except Exception:
			
 
				+                            continue
			
 
				+                if procedure_html:
			
 
				+                    break
			
 
				+
			
 
				+        # 4. 最后使用标题作为兜底模糊匹配
			
 
				+        if not procedure_html and title:
			
 
				+            for d_dir in candidate_dirs:
			
 
				+                if d_dir.is_dir():
			
 
				+                    for html_path in d_dir.glob("*.html"):
			
 
				+                        try:
			
 
				+                            content = html_path.read_text(encoding="utf-8")
			
 
				+                            if title in content:
			
 
				+                                procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
			
 
				+                                break
			
 
				+                        except Exception:
			
 
				+                            continue
			
 
				+                if procedure_html:
			
 
				+                    break
			
 
				+
			
 
				+    return {
			
 
				+        "case_id": r.get("case_id", ""),
			
 
				+        "platform": PLAT.get(r.get("platform"), r.get("platform")), "platformKey": r.get("platform"),
			
 
				+        "title": p.get("title", "") or "(无标题)", "date": (p.get("publish_timestamp", "") or "")[:10],
			
 
				+        "url": r.get("source_url", ""), "engagement": f'{p.get("like_count", 0)} 赞',
			
 
				+        "knowledge_type": kt, "decision": decision,
			
 
				+        "tools": [KT.get(k, k) for k in kt] + ([f"质量 {grade}"] if grade else []), "found_by": fb,
			
 
				+        "images": (p.get("images") or [])[:6], "text": p.get("body_text", "") or "",
			
 
				+        "scores": fs, "overall": overall, "reason": reason, "score_reasons": score_reasons,
			
 
				+        "grade": grade, "qscore": p.get("_quality_score", 0), "anomaly": anomaly,
			
 
				+        "production_relevance": production_relevance, "recency_hard": recency_hard,
			
 
				+        "run": run, "procedure_html": procedure_html,
			
 
				+    }
			
 
				+
			
 
				+def scan_runs():
			
 
				+    runs = {}
			
 
				+    for f in sorted(glob.glob(str(HERE / "runs_full" / "*" / "form_*.json"))):
			
 
				+        try:
			
 
				+            d = json.load(open(f, encoding="utf-8"))
			
 
				+        except Exception:
			
 
				+            continue
			
 
				+        run = pathlib.Path(f).parent.name
			
 
				+        form_name = d.get("form") or ""
			
 
				+        results = [adapt(r, run, form_name) for r in d.get("results", [])]
			
 
				+        report_val = sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly"))
			
 
				+        discard_val = sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly"))
			
 
				+        
			
 
				+        runs.setdefault(run, []).append({
			
 
				+            "form": d.get("form"), "query": d.get("query"), "original_q": d.get("original_q", ""),
			
 
				+            "requirement": d.get("requirement", ""),
			
 
				+            "platforms": d.get("platforms", []), "total": d.get("total"),
			
 
				+            "report": report_val, "discard": discard_val,
			
 
				+            "results": results,
			
 
				+        })
			
 
				+
			
 
				+    # 数据库回退：仅当本地 runs_full 完全为空时才读库（本地清空后界面仍有数据）。
			
 
				+    # 本地有任何数据就信本地，避免每次 /api/data 都查远程 DB 拖慢。
			
 
				+    try:
			
 
				+        import db
			
 
				+        for q, g in (db.fetch_posts_grouped().items() if not runs else []):
			
 
				+            if q in runs:
			
 
				+                continue
			
 
				+            results = [adapt(r, q, "A") for r in g["results"]]
			
 
				+            runs[q] = [{
			
 
				+                "form": "A", "query": g.get("query_text"), "original_q": g.get("query_text") or "",
			
 
				+                "requirement": "", "platforms": [], "total": len(results),
			
 
				+                "report": sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly")),
			
 
				+                "discard": sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly")),
			
 
				+                "results": results, "_from_db": True,
			
 
				+            }]
			
 
				+    except Exception as e:
			
 
				+        print(f"⚠️ DB 回退读取失败（仅用本地数据）：{e}")
			
 
				+
			
 
				+    for v in runs.values():
			
 
				+        v.sort(key=lambda x: x.get("form") or "")
			
 
				+
			
 
				+    def _qnum(name):  # "q156" → 156，按数字排，避免 "q156" < "q99" 的字符串误排
			
 
				+        m = re.search(r"\d+", name)
			
 
				+        return (int(m.group()) if m else 0, name)
			
 
				+    out = []
			
 
				+    for k, v in sorted(runs.items(), key=lambda kv: _qnum(kv[0])):
			
 
				+        oq = v[0].get("original_q") or v[0].get("query") or ""
			
 
				+        seen, hits = set(), 0   # 知识命中数 = 各形式采纳(report)且非异常、按 url 去重后的帖子数
			
 
				+        for f in v:
			
 
				+            for r in f.get("results", []):
			
 
				+                if r.get("decision") == "report" and not r.get("anomaly") and r.get("url") not in seen:
			
 
				+                    seen.add(r.get("url")); hits += 1
			
 
				+        out.append({"key": k, "forms": v, "dims": parse_dims(oq), "original_q": oq,
			
 
				+                    "hits": hits, "tot": sum((f.get("total") or 0) for f in v)})
			
 
				+    active_reevals = {k: v["status"] for k, v in ACTIVE_REEVALS.items()}
			
 
				+    return {"queries": out, "actions": ACTIONS_TAX, "types": TYPES_TAX, "matrix": _MATRIX, "active_reevals": active_reevals}
			
 
				+
			
 
				+class H(BaseHTTPRequestHandler):
			
 
				+    def _send(self, code, body, ctype):
			
 
				+        b = body.encode("utf-8") if isinstance(body, str) else body
			
 
				+        self.send_response(code)
			
 
				+        if ctype.startswith("text/") or ctype == "application/json" or ctype == "application/javascript":
			
 
				+            self.send_header("Content-Type", ctype + "; charset=utf-8")
			
 
				+        else:
			
 
				+            self.send_header("Content-Type", ctype)
			
 
				+        self.send_header("Content-Length", str(len(b))); self.end_headers(); self.wfile.write(b)
			
 
				+    def do_GET(self):
			
 
				+        parsed = urlparse(self.path)
			
 
				+        path = parsed.path
			
 
				+        params = parse_qs(parsed.query)
			
 
				+
			
 
				+        if path in ("/", "/index.html"):
			
 
				+            try:
			
 
				+                page = (HERE / "index.html").read_text(encoding="utf-8")
			
 
				+                self._send(200, page, "text/html")
			
 
				+            except Exception as e:
			
 
				+                self._send(500, f"Error reading index.html: {e}", "text/plain")
			
 
				+        elif path == "/api/data":
			
 
				+            self._send(200, json.dumps(scan_runs(), ensure_ascii=False), "application/json")
			
 
				+        elif path == "/api/tools_status":
			
 
				+            # 工具解构状态：done=结果文件已存在；running=该 q 解构任务仍在跑
			
 
				+            q = (params.get("q") or [""])[0].strip()
			
 
				+            case_id = (params.get("case_id") or [""])[0].strip()
			
 
				+            if not q or not case_id:
			
 
				+                self._send(400, "missing q or case_id", "text/plain"); return
			
 
				+            done = (HERE / "runs_full" / q / "tools" / f"{case_id}.json").is_file()
			
 
				+            if not done:                       # 本地无 → 看库里有没有（本地清空后仍算已解构）
			
 
				+                try:
			
 
				+                    import db
			
 
				+                    done = db.has_tools(q, case_id)
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+            task = TOOL_TASKS.get(q) or {}
			
 
				+            running = task.get("status") == "running" and case_id in (task.get("case_ids") or [])
			
 
				+            self._send(200, json.dumps({
			
 
				+                "done": done, "running": running, "error": task.get("error"),
			
 
				+            }, ensure_ascii=False), "application/json")
			
 
				+        elif path == "/api/tools_data":
			
 
				+            # 取某帖的工具解构结果
			
 
				+            q = (params.get("q") or [""])[0].strip()
			
 
				+            case_id = (params.get("case_id") or [""])[0].strip()
			
 
				+            if not q or not case_id:
			
 
				+                self._send(400, "missing q or case_id", "text/plain"); return
			
 
				+            f = HERE / "runs_full" / q / "tools" / f"{case_id}.json"
			
 
				+            if not f.is_file():
			
 
				+                # 本地无 → 回退读库重建
			
 
				+                try:
			
 
				+                    import db
			
 
				+                    dbdata = db.fetch_tools(q, case_id)
			
 
				+                except Exception:
			
 
				+                    dbdata = None
			
 
				+                if dbdata:
			
 
				+                    dbdata["exists"] = True
			
 
				+                    self._send(200, json.dumps(dbdata, ensure_ascii=False), "application/json"); return
			
 
				+                self._send(200, json.dumps({"exists": False}, ensure_ascii=False), "application/json"); return
			
 
				+            try:
			
 
				+                data = json.loads(f.read_text(encoding="utf-8"))
			
 
				+                data["exists"] = True
			
 
				+                self._send(200, json.dumps(data, ensure_ascii=False), "application/json")
			
 
				+            except Exception as e:
			
 
				+                self._send(500, json.dumps({"error": f"read failed: {e}"}, ensure_ascii=False), "application/json")
			
 
				+        elif path == "/api/procedure_status":
			
 
				+            q = (params.get("q") or [""])[0].strip()
			
 
				+            form = (params.get("form") or [""])[0].strip()
			
 
				+            case_id = (params.get("case_id") or [""])[0].strip()
			
 
				+            
			
 
				+            if not q or not form or not case_id:
			
 
				+                self._send(400, "missing q, form, or case_id", "text/plain")
			
 
				+                return
			
 
				+                
			
 
				+            folder_name = f"{form}_{_short_case(case_id)}"
			
 
				+            task_key = f"{q}/{folder_name}"
			
 
				+            
			
 
				+            if task_key in ACTIVE_TASKS:
			
 
				+                task = ACTIVE_TASKS[task_key]
			
 
				+                res = {
			
 
				+                    "status": task["status"],
			
 
				+                    "error": task["error"]
			
 
				+                }
			
 
				+                if task["status"] == "success":
			
 
				+                    out_dir = HERE / "runs_full" / q / "procedures" / folder_name
			
 
				+                    html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
			
 
				+                    if html_files:
			
 
				+                        res["procedure_html"] = f"runs_full/{q}/procedures/{folder_name}/{html_files[0].name}"
			
 
				+                self._send(200, json.dumps(res, ensure_ascii=False), "application/json")
			
 
				+                return
			
 
				+                
			
 
				+            out_dir = HERE / "runs_full" / q / "procedures" / folder_name
			
 
				+            html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
			
 
				+            if html_files:
			
 
				+                self._send(200, json.dumps({
			
 
				+                    "status": "success",
			
 
				+                    "procedure_html": f"runs_full/{q}/procedures/{folder_name}/{html_files[0].name}"
			
 
				+                }, ensure_ascii=False), "application/json")
			
 
				+                return
			
 
				+                
			
 
				+            log_path = out_dir / "_extract.log"
			
 
				+            if log_path.is_file():
			
 
				+                self._send(200, json.dumps({"status": "failed", "error": "Not running, but no HTML output found (possibly crashed)."}, ensure_ascii=False), "application/json")
			
 
				+                return
			
 
				+                
			
 
				+            self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
			
 
				+        elif path == "/api/procedure_log":
			
 
				+            q = (params.get("q") or [""])[0].strip()
			
 
				+            form = (params.get("form") or [""])[0].strip()
			
 
				+            case_id = (params.get("case_id") or [""])[0].strip()
			
 
				+            
			
 
				+            if not q or not form or not case_id:
			
 
				+                self._send(400, "missing q, form, or case_id", "text/plain")
			
 
				+                return
			
 
				+                
			
 
				+            folder_name = f"{form}_{_short_case(case_id)}"
			
 
				+            log_path = HERE / "runs_full" / q / "procedures" / folder_name / "_extract.log"
			
 
				+            
			
 
				+            if not log_path.is_file():
			
 
				+                self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
			
 
				+                return
			
 
				+                
			
 
				+            try:
			
 
				+                content = log_path.read_text(encoding="utf-8", errors="replace")
			
 
				+                self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
			
 
				+            except Exception as e:
			
 
				+                self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
			
 
				+        elif path == "/api/spec_content":
			
 
				+            file_name = (params.get("file") or [""])[0].strip()
			
 
				+            allowed = [
			
 
				+                "README.md",
			
 
				+                "tools.md",
			
 
				+                "extraction/phase1-skeleton.md",
			
 
				+                "extraction/phase2-normalize.md",
			
 
				+                "extraction/phase3-finalize.md",
			
 
				+                "taxonomy/type_suggestions.md"
			
 
				+            ]
			
 
				+            if file_name not in allowed:
			
 
				+                self._send(400, "invalid file parameter", "text/plain")
			
 
				+                return
			
 
				+            target_path = HERE / "procedure-dsl" / "spec" / file_name
			
 
				+            if not target_path.is_file():
			
 
				+                self._send(404, "spec file not found", "text/plain")
			
 
				+                return
			
 
				+            try:
			
 
				+                content = target_path.read_text(encoding="utf-8", errors="replace")
			
 
				+                self._send(200, json.dumps({"content": content}, ensure_ascii=False), "application/json")
			
 
				+            except Exception as e:
			
 
				+                self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
			
 
				+        elif path == "/api/reeval_status":
			
 
				+            q = (params.get("q") or [""])[0].strip()
			
 
				+            if not q:
			
 
				+                self._send(400, "missing q", "text/plain")
			
 
				+                return
			
 
				+            if q in ACTIVE_REEVALS:
			
 
				+                self._send(200, json.dumps({
			
 
				+                    "status": ACTIVE_REEVALS[q]["status"],
			
 
				+                    "error": ACTIVE_REEVALS[q].get("error")
			
 
				+                }, ensure_ascii=False), "application/json")
			
 
				+            else:
			
 
				+                self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
			
 
				+        elif self.path.startswith("/runs_full/"):
			
 
				+            try:
			
 
				+                clean_path = self.path.split("?")[0]
			
 
				+                parts = clean_path.strip("/").split("/")
			
 
				+                target_file = HERE
			
 
				+                for part in parts:
			
 
				+                    target_file = target_file / part
			
 
				+                runs_dir = HERE / "runs_full"
			
 
				+                if runs_dir.resolve() in target_file.resolve().parents and target_file.is_file():
			
 
				+                    content = target_file.read_bytes()
			
 
				+                    ext = target_file.suffix.lower()
			
 
				+                    ctype = "text/html"
			
 
				+                    if ext in (".png", ".webp"):
			
 
				+                        ctype = f"image/{ext[1:]}"
			
 
				+                    elif ext in (".jpg", ".jpeg"):
			
 
				+                        ctype = "image/jpeg"
			
 
				+                    elif ext == ".json":
			
 
				+                        ctype = "application/json"
			
 
				+                    elif ext == ".js":
			
 
				+                        ctype = "application/javascript"
			
 
				+                    elif ext == ".css":
			
 
				+                        ctype = "text/css"
			
 
				+                    self._send(200, content, ctype)
			
 
				+                else:
			
 
				+                    self._send(404, "not found", "text/plain")
			
 
				+            except Exception as e:
			
 
				+                self._send(500, f"Error: {e}", "text/plain")
			
 
				+        else:
			
 
				+            self._send(404, "not found", "text/plain")
			
 
				+    def do_POST(self):
			
 
				+        if self.path == "/api/generate_procedure":
			
 
				+            length = int(self.headers.get("Content-Length") or 0)
			
 
				+            raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
			
 
				+            try:
			
 
				+                payload = json.loads(raw)
			
 
				+            except Exception as e:
			
 
				+                self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
			
 
				+                
			
 
				+            q = (payload.get("q") or "").strip()
			
 
				+            form = (payload.get("form") or "").strip()
			
 
				+            case_id = (payload.get("case_id") or "").strip()
			
 
				+            engine = (payload.get("engine") or "cyber_runner").strip()
			
 
				+            model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
			
 
				+            
			
 
				+            if not re.match(r"^q\d+$", q):
			
 
				+                self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"}, ensure_ascii=False), "application/json"); return
			
 
				+            if form not in ("A", "B", "C"):
			
 
				+                self._send(400, json.dumps({"error": f"bad form: {form!r}"}, ensure_ascii=False), "application/json"); return
			
 
				+            if not case_id:
			
 
				+                self._send(400, json.dumps({"error": "missing case_id"}, ensure_ascii=False), "application/json"); return
			
 
				+                
			
 
				+            q_dir = HERE / "runs_full" / q
			
 
				+            form_file = q_dir / f"form_{form}.json"
			
 
				+            if not form_file.is_file():
			
 
				+                self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
			
 
				+                
			
 
				+            try:
			
 
				+                with open(form_file, encoding="utf-8") as f:
			
 
				+                    form_data = json.load(f)
			
 
				+            except Exception as e:
			
 
				+                self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
			
 
				+                
			
 
				+            matching_result = None
			
 
				+            for r in form_data.get("results", []):
			
 
				+                if r.get("case_id") == case_id:
			
 
				+                    matching_result = r
			
 
				+                    break
			
 
				+                    
			
 
				+            if not matching_result:
			
 
				+                self._send(404, json.dumps({"error": f"case_id {case_id} not found in form {form}"}, ensure_ascii=False), "application/json"); return
			
 
				+                
			
 
				+            folder_name = f"{form}_{_short_case(case_id)}"
			
 
				+            out_dir = q_dir / "procedures" / folder_name
			
 
				+            out_dir.mkdir(parents=True, exist_ok=True)
			
 
				+            
			
 
				+            src_path = out_dir / "_source.json"
			
 
				+            try:
			
 
				+                with open(src_path, "w", encoding="utf-8") as f:
			
 
				+                    json.dump(_source_to_dsl_input(matching_result), f, ensure_ascii=False, indent=2)
			
 
				+                    
			
 
				+                score = _composite_score(matching_result.get("llm_evaluation") or {})
			
 
				+                _write_meta(out_dir, case_id=case_id, from_q=q, form=form, score=score)
			
 
				+            except Exception as e:
			
 
				+                self._send(500, json.dumps({"error": f"failed to write inputs: {e}"}, ensure_ascii=False), "application/json"); return
			
 
				+                
			
 
				+            task_key = f"{q}/{folder_name}"
			
 
				+            ACTIVE_TASKS[task_key] = {
			
 
				+                "status": "running",
			
 
				+                "start_time": datetime.now().isoformat(),
			
 
				+                "pid": None,
			
 
				+                "error": None
			
 
				+            }
			
 
				+            
			
 
				+            t = threading.Thread(target=run_extraction_task, args=(q, folder_name, src_path, out_dir, engine, model))
			
 
				+            t.daemon = True
			
 
				+            t.start()
			
 
				+            
			
 
				+            self._send(200, json.dumps({
			
 
				+                "status": "started",
			
 
				+                "task_key": task_key,
			
 
				+                "log": f"runs_full/{q}/procedures/{folder_name}/_extract.log"
			
 
				+            }, ensure_ascii=False), "application/json")
			
 
				+        elif self.path == "/api/extract_tools":
			
 
				+            # 工具解构：body {q, case_ids:[...], force?} → 起 tool_extract.py 子进程
			
 
				+            length = int(self.headers.get("Content-Length") or 0)
			
 
				+            raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
			
 
				+            try:
			
 
				+                payload = json.loads(raw)
			
 
				+            except Exception as e:
			
 
				+                self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
			
 
				+            q = (payload.get("q") or "").strip()
			
 
				+            case_ids = payload.get("case_ids") or []
			
 
				+            force = bool(payload.get("force"))
			
 
				+            if not re.match(r"^q\d+$", q):
			
 
				+                self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"}, ensure_ascii=False), "application/json"); return
			
 
				+            if not isinstance(case_ids, list) or not case_ids:
			
 
				+                self._send(400, json.dumps({"error": "case_ids must be a non-empty list"}, ensure_ascii=False), "application/json"); return
			
 
				+            if not (HERE / "runs_full" / q / "form_A.json").is_file():
			
 
				+                self._send(404, json.dumps({"error": f"runs_full/{q}/form_A.json not found"}, ensure_ascii=False), "application/json"); return
			
 
				+            TOOL_TASKS[q] = {
			
 
				+                "status": "running", "case_ids": case_ids,
			
 
				+                "start_time": datetime.now().isoformat(), "pid": None, "error": None,
			
 
				+            }
			
 
				+            t = threading.Thread(target=run_tool_extraction_task, args=(q, case_ids, force))
			
 
				+            t.daemon = True
			
 
				+            t.start()
			
 
				+            self._send(200, json.dumps({
			
 
				+                "status": "started", "q": q, "count": len(case_ids),
			
 
				+                "log": f"runs_full/{q}/tools/_extract.log",
			
 
				+            }, ensure_ascii=False), "application/json")
			
 
				+        elif self.path == "/api/reeval":
			
 
				+            length = int(self.headers.get("Content-Length") or 0)
			
 
				+            raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
			
 
				+            try:
			
 
				+                payload = json.loads(raw)
			
 
				+            except Exception as e:
			
 
				+                self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
			
 
				+            q = (payload.get("q") or "").strip()
			
 
				+            if not re.match(r"^q\d+$", q):
			
 
				+                self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"},
			
 
				+                                           ensure_ascii=False), "application/json"); return
			
 
				+            q_dir = HERE / "runs_full" / q
			
 
				+            if not q_dir.is_dir():
			
 
				+                self._send(404, json.dumps({"error": f"runs_full/{q} not found"}, ensure_ascii=False),
			
 
				+                           "application/json"); return
			
 
				+            log_path = q_dir / "_reeval.log"
			
 
				+            try:
			
 
				+                log_fh = open(log_path, "w", encoding="utf-8", buffering=1)
			
 
				+                cmd = [sys.executable, "-u", str(HERE / "batch_3forms.py"),
			
 
				+                       "--reeval", "--reeval-q", q, "--output-dir", str(HERE / "runs_full")]
			
 
				+                flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
			
 
				+                proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
			
 
				+                                        cwd=str(HERE), creationflags=flags)
			
 
				+                
			
 
				+                ACTIVE_REEVALS[q] = {
			
 
				+                    "status": "running",
			
 
				+                    "pid": proc.pid,
			
 
				+                    "error": None
			
 
				+                }
			
 
				+                
			
 
				+                def wait_reeval(q_key, p_obj, fh):
			
 
				+                    try:
			
 
				+                        p_obj.wait()
			
 
				+                        if p_obj.returncode == 0:
			
 
				+                            ACTIVE_REEVALS[q_key]["status"] = "success"
			
 
				+                        else:
			
 
				+                            ACTIVE_REEVALS[q_key]["status"] = "failed"
			
 
				+                            ACTIVE_REEVALS[q_key]["error"] = f"Subprocess exited with code {p_obj.returncode}"
			
 
				+                    except Exception as ex:
			
 
				+                        ACTIVE_REEVALS[q_key]["status"] = "failed"
			
 
				+                        ACTIVE_REEVALS[q_key]["error"] = str(ex)
			
 
				+                    finally:
			
 
				+                        try:
			
 
				+                            fh.close()
			
 
				+                        except Exception:
			
 
				+                            pass
			
 
				+                
			
 
				+                t = threading.Thread(target=wait_reeval, args=(q, proc, log_fh))
			
 
				+                t.daemon = True
			
 
				+                t.start()
			
 
				+                
			
 
				+                self._send(200, json.dumps(
			
 
				+                    {"status": "started", "pid": proc.pid, "q": q,
			
 
				+                     "log": str(log_path.relative_to(HERE))},
			
 
				+                    ensure_ascii=False), "application/json")
			
 
				+            except Exception as e:
			
 
				+                self._send(500, json.dumps({"error": f"failed to start: {e}"},
			
 
				+                                           ensure_ascii=False), "application/json")
			
 
				+        elif self.path == "/api/save_spec":
			
 
				+            length = int(self.headers.get("Content-Length") or 0)
			
 
				+            raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
			
 
				+            try:
			
 
				+                payload = json.loads(raw)
			
 
				+            except Exception as e:
			
 
				+                self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
			
 
				+            file_name = (payload.get("file") or "").strip()
			
 
				+            content = payload.get("content") or ""
			
 
				+            allowed = [
			
 
				+                "README.md",
			
 
				+                "tools.md",
			
 
				+                "extraction/phase1-skeleton.md",
			
 
				+                "extraction/phase2-normalize.md",
			
 
				+                "extraction/phase3-finalize.md",
			
 
				+                "taxonomy/type_suggestions.md"
			
 
				+            ]
			
 
				+            if file_name not in allowed:
			
 
				+                self._send(400, json.dumps({"error": "invalid file parameter"}), "application/json"); return
			
 
				+            target_path = HERE / "procedure-dsl" / "spec" / file_name
			
 
				+            try:
			
 
				+                target_path.parent.mkdir(parents=True, exist_ok=True)
			
 
				+                target_path.write_text(content, encoding="utf-8")
			
 
				+                self._send(200, json.dumps({"status": "ok"}, ensure_ascii=False), "application/json")
			
 
				+            except Exception as e:
			
 
				+                self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
			
 
				+        else:
			
 
				+            self._send(404, json.dumps({"error": "not found"}), "application/json")
			
 
				+    def log_message(self, *a): pass
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    n = len(scan_runs()["queries"])
			
 
				+    print(f"搜索评估查看 server：http://0.0.0.0:{PORT}   (runs_full/ 下 {n} 个 query，实时扫描)")
			
 
				+    ThreadingHTTPServer(("0.0.0.0", PORT), H).serve_forever()
			
--- a/examples/process_pipeline/script/search_eval/fixed_query_eval/tool_extract.py
+++ b/examples/process_pipeline/script/search_eval/fixed_query_eval/tool_extract.py
@@ -0,0 +1,218 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""工具解构 · 从帖子提取结构化工具知识条目
			
 
				+================================================================================
			
 
				+
			
 
				+对一条或多条帖子（runs_full/{q}/form_A.json 里的 results），调 gemini-3.1-flash-lite
			
 
				+多模态（正文 + 配图）抽取「工具信息」，每个工具一条 JSON，写 runs_full/{q}/tools/{case_id}.json。
			
 
				+
			
 
				+- 模型固定 google/gemini-3.1-flash-lite（build_eval_llm_call("gemini-flash-lite")，OpenRouter 后端）
			
 
				+- 多模态：复用 search_and_evaluate 的 _attach_image_refs / llm_evaluate_sources 的 _format_post_for_eval
			
 
				+- JSON：复用 llm_helper.call_llm_with_retry（带重试 + 自动修复）。注意它只认 JSON 对象 {…}，
			
 
				+  所以把 prompt 输出包成 {"tools":[…]}。「用法」「案例」已改为结构化对象数组（见 TOOL_SYSTEM），以便复现
			
 
				+
			
 
				+用法（一般由 server.py 起子进程调，不直接手敲）：
			
 
				+  python tool_extract.py --q q0000 --case-ids xhs_abc,gzh_def
			
 
				+  python tool_extract.py --q q0000 --case-ids xhs_abc --force --model google/gemini-3.1-flash-lite
			
 
				+"""
			
 
				+import argparse
			
 
				+import asyncio
			
 
				+import json
			
 
				+import sys
			
 
				+from pathlib import Path
			
 
				+
			
 
				+PROJECT_ROOT = Path(__file__).resolve().parents[5]   # …/Agent
			
 
				+sys.path.insert(0, str(PROJECT_ROOT))
			
 
				+
			
 
				+from dotenv import load_dotenv
			
 
				+load_dotenv()
			
 
				+
			
 
				+from examples.process_pipeline.script.search_eval.search_and_evaluate import _attach_image_refs
			
 
				+from examples.process_pipeline.script.llm_evaluate_sources import _format_post_for_eval, build_eval_llm_call
			
 
				+from examples.process_pipeline.script.llm_helper import call_llm_with_retry
			
 
				+
			
 
				+HERE = Path(__file__).resolve().parent
			
 
				+
			
 
				+# 同目录 db 模块（双写 MySQL）；不可用则降级为只写本地 json
			
 
				+sys.path.insert(0, str(HERE))
			
 
				+try:
			
 
				+    import db as _db
			
 
				+except Exception:
			
 
				+    _db = None
			
 
				+DEFAULT_MODEL_CHOICE = "gemini-flash-lite"   # → google/gemini-3.1-flash-lite
			
 
				+MAX_IMAGES = 6
			
 
				+
			
 
				+
			
 
				+# ── 解构 Prompt（用户指定；仅把输出容器从裸数组改成 {"tools":[…]} 以适配 JSON 对象提取器）──
			
 
				+
			
 
				+TOOL_SYSTEM = """你是一个内容知识提取助手，将网络帖子（公众号文章、视频号图文、短视频等）中的工具信息提炼为「可复现」的结构化知识条目。
			
 
				+
			
 
				+你的输出会被别人当作操作手册来照着复现，所以核心目标是：**读者只看你的「用法」和「案例」，就能还原出帖子里展示的效果**。宁可信息多，不要丢失任何可操作的细节。
			
 
				+
			
 
				+提取规则：
			
 
				+- **图片 / 视频帧里的信息必须逐字提取成文字**——教程的真正干货（提示词原文、参数数值、按钮/菜单名、步骤顺序、操作前后的对比图说明）几乎都在配图里，不要只看正文。
			
 
				+- **保留具体细节，不要抽象化**：看到提示词就抄原文（中英文照搬），看到参数就记数值，看到操作就写清是哪个按钮/功能、第几步。禁止把「输入提示词 xxx 生成底图」压缩成「用提示词生成底图」这种丢失原文的空话。
			
 
				+- 去除废话：广告语、情绪渲染、关注引导、重复内容、无实质信息的过渡句。
			
 
				+- 一篇帖子中每提到一个工具，输出一条独立知识条目。
			
 
				+- 所有字段无信息则填 null，**只提取帖子真实出现的内容，不猜测、不杜撰、不补全**。区分「帖子明说的」和「你推断的」——只要后者一律不写。
			
 
				+
			
 
				+---
			
 
				+
			
 
				+请识别内容中提到的所有工具。**只输出一个 JSON 对象**，格式如下（每个工具一个对象，放进 tools 数组）：
			
 
				+
			
 
				+{
			
 
				+  "tools": [
			
 
				+    {
			
 
				+      "工具名称": string,
			
 
				+      "实质作用域": string,       // 工具适合处理的内容主体，如「人物」「产品」「风景」；若未提及，填「图片」或「视频」等泛称
			
 
				+      "形式作用域": string | null,  // 工具擅长的风格或表现形式，如「氛围感」「写实」「动漫」；若未提及填 null
			
 
				+      "创作层级": "制作层" | "创作层",  // 制作层 = 生产内容（出图/出视频/剪辑/配音）；创作层 = 辅助方向（选题/灵感/脚本）
			
 
				+      "来源链接": string | null,
			
 
				+      "输入": string | null,        // 该工具需要喂入什么（素材类型 + 提示词），如「角色参考图、光影参考图、提示词」
			
 
				+      "输出": string | null,        // 该工具产出什么，如「底图」「渲染图」
			
 
				+      "用法": [                      // 可复现的操作步骤数组；每一步都要让读者能照做。无则 null
			
 
				+        {
			
 
				+          "步骤": string,           // 这一步在做什么操作（动作 + 用到的按钮/功能/模式）
			
 
				+          "提示词或参数": string | null,  // 这一步用到的提示词原文、参数数值、设置项；逐字抄，无则 null
			
 
				+          "目的": string | null     // 这一步是为了达成什么效果 / 解决什么问题；让读者理解为什么这么做
			
 
				+        }
			
 
				+      ],
			
 
				+      "案例": [                      // 帖子里展示的具体复现实例（输入→操作→输出的完整链路）数组。无则 null
			
 
				+        {
			
 
				+          "场景": string,           // 这个案例要做成什么，如「生成粉色氛围的角色肖像」
			
 
				+          "输入": string,           // 具体喂了什么：垫了哪些参考图（几张/什么图）+ 用的提示词原文
			
 
				+          "使用的用法": string,     // 用了本工具（或配合其它工具）的哪个用法/按钮，按操作顺序串起来
			
 
				+          "输出": string,           // 最终产出了什么图 / 视频 / 文件
			
 
				+          "效果": string | null     // 达成的具体效果、或操作前后的对比差异；无则 null
			
 
				+        }
			
 
				+      ],
			
 
				+      "缺点": string[] | null,       // 帖子中提到的工具局限、不足或注意事项，每条为一个独立缺点
			
 
				+      "最新更新时间": string | null
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
 
				+
			
 
				+若识别到多个工具，tools 数组中包含多个对象。若未识别到任何工具，输出 {"tools": []}。
			
 
				+不要输出 JSON 以外的任何内容（不要 markdown 代码块标记、不要解释）。"""
			
 
				+
			
 
				+TOOL_USER_PREFIX = "【内容】\n"
			
 
				+
			
 
				+
			
 
				+def _validate_tools(data):
			
 
				+    if not isinstance(data, dict) or "tools" not in data:
			
 
				+        return '缺少顶层 "tools" 字段'
			
 
				+    if not isinstance(data["tools"], list):
			
 
				+        return '"tools" 必须是数组'
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+async def extract_one(source, llm_call, model):
			
 
				+    """对一条 source 抽工具，返回 (tools_list, cost)。失败返回 ([], cost)。"""
			
 
				+    post_block = _format_post_for_eval(source)
			
 
				+    image_urls = source.get("_image_data_urls") or None
			
 
				+    user_text = TOOL_USER_PREFIX + post_block
			
 
				+
			
 
				+    if image_urls:
			
 
				+        user_content = [{"type": "text", "text": user_text}]
			
 
				+        for u in image_urls:
			
 
				+            user_content.append({"type": "image_url", "image_url": {"url": u}})
			
 
				+        messages = [{"role": "system", "content": TOOL_SYSTEM},
			
 
				+                    {"role": "user", "content": user_content}]
			
 
				+    else:
			
 
				+        messages = [{"role": "system", "content": TOOL_SYSTEM},
			
 
				+                    {"role": "user", "content": user_text}]
			
 
				+
			
 
				+    data, cost = await call_llm_with_retry(
			
 
				+        llm_call=llm_call, messages=messages, model=model,
			
 
				+        temperature=0.1, max_tokens=4000,
			
 
				+        validate_fn=_validate_tools,
			
 
				+        task_name=f"ToolExtract[{source.get('case_id', '?')}]",
			
 
				+    )
			
 
				+    if not data:
			
 
				+        return [], cost
			
 
				+    return data.get("tools", []), cost
			
 
				+
			
 
				+
			
 
				+async def run(args):
			
 
				+    q = args.q
			
 
				+    form_file = HERE / "runs_full" / q / "form_A.json"
			
 
				+    if not form_file.exists():
			
 
				+        print(f"❌ 找不到 {form_file}"); return
			
 
				+    form = json.loads(form_file.read_text(encoding="utf-8"))
			
 
				+    by_cid = {r.get("case_id"): r for r in form.get("results", [])}
			
 
				+
			
 
				+    case_ids = [c.strip() for c in args.case_ids.split(",") if c.strip()]
			
 
				+    selected = []
			
 
				+    for cid in case_ids:
			
 
				+        if cid in by_cid:
			
 
				+            selected.append(by_cid[cid])
			
 
				+        else:
			
 
				+            print(f"⚠️ {cid} 不在 {q}/form_A.json，跳过")
			
 
				+    if not selected:
			
 
				+        print("❌ 没有可解构的帖子"); return
			
 
				+
			
 
				+    out_dir = HERE / "runs_full" / q / "tools"
			
 
				+    out_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # 已解构跳过（除非 --force）
			
 
				+    todo = []
			
 
				+    for s in selected:
			
 
				+        of = out_dir / f"{s['case_id']}.json"
			
 
				+        if of.exists() and not args.force:
			
 
				+            print(f"⏭️  {s['case_id']} 已解构 → 跳过（--force 覆盖）")
			
 
				+            continue
			
 
				+        todo.append(s)
			
 
				+    if not todo:
			
 
				+        print("✅ 选中帖子均已解构"); return
			
 
				+
			
 
				+    # 模型：默认 gemini-3.1-flash-lite
			
 
				+    if args.model and "/" in args.model:
			
 
				+        from agent.llm.openrouter import create_openrouter_llm_call
			
 
				+        llm_call, model_id = create_openrouter_llm_call(model=args.model), args.model
			
 
				+    else:
			
 
				+        llm_call, model_id = build_eval_llm_call(args.model or DEFAULT_MODEL_CHOICE)
			
 
				+    print(f"🔧 工具解构 {len(todo)} 帖 · 模型 {model_id}")
			
 
				+
			
 
				+    # 收配图（多模态）
			
 
				+    await _attach_image_refs(todo, MAX_IMAGES, max(2, args.max_concurrent * 2), "url")
			
 
				+
			
 
				+    sem = asyncio.Semaphore(args.max_concurrent)
			
 
				+
			
 
				+    async def _work(s):
			
 
				+        async with sem:
			
 
				+            tools, cost = await extract_one(s, llm_call, model_id)
			
 
				+        of = out_dir / f"{s['case_id']}.json"
			
 
				+        of.write_text(json.dumps({
			
 
				+            "case_id": s["case_id"],
			
 
				+            "platform": s.get("platform"),
			
 
				+            "title": (s.get("post") or {}).get("title", ""),
			
 
				+            "url": s.get("source_url", ""),
			
 
				+            "model": model_id,
			
 
				+            "tool_count": len(tools),
			
 
				+            "tools": tools,
			
 
				+        }, ensure_ascii=False, indent=2), encoding="utf-8")
			
 
				+        print(f"   ✅ {s['case_id']} → {len(tools)} 个工具 · ${cost:.4f} → {of.name}")
			
 
				+
			
 
				+        # 双写 MySQL（失败不阻断，本地 json 已落盘）
			
 
				+        if _db:
			
 
				+            _db.upsert_tools(q, s["case_id"], model_id, tools,
			
 
				+                             platform=s.get("platform"),
			
 
				+                             post_title=(s.get("post") or {}).get("title", ""))
			
 
				+        return cost
			
 
				+
			
 
				+    costs = await asyncio.gather(*[_work(s) for s in todo])
			
 
				+    print(f"\n📊 完成 {len(todo)} 帖 · 总成本 ${sum(costs):.4f}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    p = argparse.ArgumentParser(description="工具解构：帖子 → 结构化工具知识条目")
			
 
				+    p.add_argument("--q", required=True, help="query 目录名，如 q0000")
			
 
				+    p.add_argument("--case-ids", required=True, help="逗号分隔的 case_id 列表")
			
 
				+    p.add_argument("--model", default=None, help="模型（默认 gemini-3.1-flash-lite；可传 OpenRouter model id）")
			
 
				+    p.add_argument("--max-concurrent", type=int, default=3)
			
 
				+    p.add_argument("--force", action="store_true", help="覆盖已解构的帖子")
			
 
				+    args = p.parse_args()
			
 
				+    asyncio.run(run(args))
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/examples/process_pipeline/script/search_eval/procedure-dsl/spec/taxonomy/type_suggestions.md
+++ b/examples/process_pipeline/script/search_eval/procedure-dsl/spec/taxonomy/type_suggestions.md
@@ -61,3 +61,84 @@
 
				 - `要点总结`: (无 desc)  (来自 case-A_xhs_69abf820, extends `描述`)
			
 
				 - `参数表`: (无 desc)  (来自 case-A_xhs_69abf820, extends `生成参数`)
			
 
				 - `参数与词汇表`: (无 desc)  (来自 case-A_xhs_69abf820, extends `生成参数`)
			
 
				+
			
 
				+- `创作主题词`: 用户在公共平台搜索参考图时的风格/主题关键词  (来自 case-A_zhihu_20281424, extends `描述`)
			
 
				+- `参考场景图`: 从公共图库选取的场景风格参考图，用于后续提示词反推和图生图  (来自 case-A_zhihu_20281424, extends `参考图`)
			
 
				+- `全景场景提示词`: LLM针对参考场景反推生成的720度全景AI提示词，含核心视角、场景要素和氛围描述  (来自 case-A_zhihu_20281424, extends `提示词`)
			
 
				+- `720度全景场景图`: AI图生图产出的720度全景场景底图，待后期超分增强  (来自 case-A_zhihu_20281424, extends `底图`)
			
 
				+- `高清全景图`: 经AI画质增强放大的高清720度全景图，用于全景查看器取景  (来自 case-A_zhihu_20281424, extends `底图`)
			
 
				+- `角色三视图`: 用于图生图角色合成的人物三视图参考图集  (来自 case-A_zhihu_20281424, extends `参考图`)
			
 
				+
			
 
				+- `视频语言拆解报告`: 由AI从目标视频反推出的完整镜头语言拆解，含时间分段明细（画面/景别/运镜/动作/情绪）与整体节奏/运镜/光影总结  (来自 case-B_xhs_6a12cb56, extends `分镜脚本`)
			
 
				+
			
 
				+- `扣子账户`: 访问扣子(Coze)工作流平台所需的账户凭证，用于创建和管理工作流项目  (来自 case-C_zhihu_20401967, extends `生成参数`)
			
 
				+- `抖音分享链接`: 抖音短视频的分享链接，指向被反推提示词的目标爆款视频  (来自 case-C_zhihu_20401967, extends `对标内容`)
			
 
				+- `API密钥`: 米核(miheai)平台的API密钥，用于调用抖音视频获取和爆款视频二创提示词两个插件  (来自 case-C_zhihu_20401967, extends `生成参数`)
			
 
				+
			
 
				+- `拆解指令`: 向豆包发送的固定分析指令文本，要求豆包对图片进行解构并生成文生图提示词  (来自 case-A_zhihu_19611618, extends `提示词`)
			
 
				+- `复刻图片`: 基于豆包反推提示词在即梦中生成的最终图片，与原参考图相似度90%+  (来自 case-A_zhihu_19611618, extends `成品图`)
			
 
				+
			
 
				+- `ComfyUI节点插件`: ComfyUI-QwenVL 插件，为 ComfyUI 画布新增 QwenVL GGUF 多模态推理节点  (来自 case-B_zhihu_20147454, extends `工作流`)
			
 
				+- `推理运行库`: llama_cpp_python GGUF 视觉推理运行时库及其 Python binding  (来自 case-B_zhihu_20147454, extends `模型权重`)
			
 
				+- `系统动态库`: CUDA GPU 动态链接库（cudart64_12、cublas64_12 等），llama_cpp GGUF 推理引擎所需 GPU 依赖  (来自 case-B_zhihu_20147454, extends `模型权重`)
			
 
				+- `模型配置文件`: gguf_models.json 插件模型注册表，定义 GGUF 模型的仓库 ID、文件名及默认运行参数  (来自 case-B_zhihu_20147454, extends `生成参数`)
			
 
				+- `GGUF视觉推理环境`: P1 产出的 Qwen3.5 GGUF 视觉推理环境，含插件、依赖库及模型文件  (来自 case-B_zhihu_20147454, extends `工作流`)
			
 
				+
			
 
				+- `ComfyUI节点插件`: ComfyUI-QwenVL 插件，为 ComfyUI 画布新增 QwenVL GGUF 多模态推理节点  (来自 case-?, extends `工作流`)
			
 
				+- `推理运行库`: llama_cpp_python GGUF 视觉推理运行时库及其 Python binding  (来自 case-?, extends `模型权重`)
			
 
				+- `系统动态库`: CUDA GPU 动态链接库（cudart64_12、cublas64_12 等），llama_cpp GGUF 推理引擎所需 GPU 依赖  (来自 case-?, extends `模型权重`)
			
 
				+- `模型配置文件`: gguf_models.json 插件模型注册表，定义 GGUF 模型的仓库 ID、文件名及默认运行参数  (来自 case-?, extends `生成参数`)
			
 
				+- `GGUF视觉推理环境`: P1 产出的 Qwen3.5 GGUF 视觉推理环境，含插件、依赖库及模型文件  (来自 case-?, extends `工作流`)
			
 
				+
			
 
				+- `反推提示词`: 豆包按核心主体/环境/细节/构图/光色/风格六维结构反推图片所得的结构化提示词  (来自 case-?, extends `提示词`)
			
 
				+- `反推框架模板`: 用于反推任意 AI 绘画图片提示词的六维结构模板，含核心主体/环境/细节/构图/光色/风格  (来自 case-?, extends `模板`)
			
 
				+
			
 
				+- `多维表格`: 飞书多维表格各阶段配置状态，AI字段捷径驱动的自动化数据处理表格  (来自 case-C_gzh_ca661ccd, extends `工作流`)
			
 
				+
			
 
				+- `平台账号`: 访问 Coze 平台所需的账号凭证  (来自 case-A_gzh_abf2b17e, extends `规格参数`)
			
 
				+- `API密钥`: 调用米核AI插件所需的 api_key（从 miheai.com 获取）  (来自 case-A_gzh_abf2b17e, extends `规格参数`)
			
 
				+- `视频分享链接`: 抖音视频分享链接 URL，作为工作流的触发输入参数  (来自 case-A_gzh_abf2b17e, extends `规格参数`)
			
 
				+- `视频URL`: video_detail 插件返回的视频真实播放 URL，供 ve_video_to_prompt 使用  (来自 case-A_gzh_abf2b17e, extends `规格参数`)
			
 
				+
			
 
				+- `缩放图像`: 经KJNodes调整至1024x1024的输入图像，供JoyCaption分析  (来自 case-B_zhihu_19508812, extends `参考图`)
			
 
				+- `模型配置`: 指定JoyCaption模型名称/路径的配置参数  (来自 case-B_zhihu_19508812, extends `生成参数`)
			
 
				+- `模型管道`: 已实例化的JoyTwoPipeline对象  (来自 case-B_zhihu_19508812, extends `模型权重`)
			
 
				+- `SD模型`: Stable Diffusion图像生成模型（dreamshaper/primemix）  (来自 case-B_zhihu_19508812, extends `模型权重`)
			
 
				+
			
 
				+- `视频分享文本`: 用户输入的抖音视频分享链接文本，含短链接和话题标签  (来自 case-?, extends `描述`)
			
 
				+- `视频下载地址`: 从分享文本解析得到的无水印视频真实播放URL  (来自 case-?, extends `描述`)
			
 
				+- `API响应数据`: Coze工作流API返回的字符串格式JSON响应  (来自 case-?, extends `描述`)
			
 
				+- `解析后JSON`: 经JavaScript解析后的Coze响应JSON对象，含output字段  (来自 case-?, extends `描述`)
			
 
				+- `视频生成任务信息`: Sora2提交视频生成任务后的响应数据，含task_id等字段  (来自 case-?, extends `生成参数`)
			
 
				+- `进度查询结果`: 轮询Sora2任务状态接口返回的进度数据，含status和progress字段  (来自 case-?, extends `生成参数`)
			
 
				+- `完成任务结果`: 状态为SUCCESS的Sora2任务完成数据，含data.output视频URL  (来自 case-?, extends `生成参数`)
			
 
				+- `未完成查询结果`: 状态为未完成的进度数据，触发Wait等待后重新轮询  (来自 case-?, extends `生成参数`)
			
 
				+- `等待完成信号`: Wait节点等待片刻后发出的继续执行控制信号  (来自 case-?, extends `描述`)
			
 
				+- `生成视频URL`: Sora2生成完成的视频文件下载地址URL  (来自 case-?, extends `描述`)
			
 
				+
			
 
				+- `VLM推理实例`: 经 Llama-cpp Model Loader 加载的 VLM 模型运行实例，含主模型权重与多模态投影权重  (来自 case-B_gzh_0ee46f34, extends `模型权重`)
			
 
				+- `视频帧序列`: ComfyUI 加载视频后解码出的帧序列对象，用于逐帧 VLM 推理  (来自 case-B_gzh_0ee46f34, extends `参考视频`)
			
 
				+- `扩散模型实例`: 经 ComfyUI Load Diffusion Model 加载的 z-image/Flux 扩散模型运行实例  (来自 case-B_gzh_0ee46f34, extends `模型权重`)
			
 
				+- `CLIP实例`: 经 ComfyUI 加载的 CLIP 文本编码器运行实例（qwen3_4b lumina2）  (来自 case-B_gzh_0ee46f34, extends `模型权重`)
			
 
				+- `VAE实例`: 经 ComfyUI 加载的 VAE 解码器运行实例（flux_ae.safetensors）  (来自 case-B_gzh_0ee46f34, extends `模型权重`)
			
 
				+- `潜变量`: 扩散模型在潜空间中的张量表示，包含采样中间态及最终潜码  (来自 case-B_gzh_0ee46f34, extends `生成参数`)
			
 
				+
			
 
				+- `目标参考图`: 待分析反推提示词的目标大片图  (来自 case-C_gzh_3a1fb7ba, extends `参考图`)
			
 
				+
			
 
				+- `CLIP权重`: 基础模型中 CLIP 文本编码器的权重，用于把提示词编码为条件向量  (来自 case-A_gzh_11ddd2df, extends `模型权重`)
			
 
				+- `VAE权重`: 基础模型中 VAE 的权重，用于潜空间与像素空间之间的转换  (来自 case-A_gzh_11ddd2df, extends `模型权重`)
			
 
				+- `风格参数`: WujiSuperEditor 的风格预设参数（艺术流派/摄影风格等），控制图像生成的风格方向  (来自 case-A_gzh_11ddd2df, extends `生成参数`)
			
 
				+- `正向条件向量`: 由 CLIP 编码的正向条件嵌入张量，作为扩散采样的正向引导信号  (来自 case-A_gzh_11ddd2df, extends `提示词`)
			
 
				+- `负向条件向量`: 由 CLIP 编码的负向条件嵌入张量，作为扩散采样的排斥信号  (来自 case-A_gzh_11ddd2df, extends `负向提示词`)
			
 
				+- `潜空间图像`: VAE 编码的潜变量图像，作为扩散采样的初始潜空间输入  (来自 case-A_gzh_11ddd2df, extends `底图`)
			
 
				+- `放大参数`: WujiUpscaler 的放大配置（放大模型 seedvr2_ema_3b、最短边尺寸 1080px、颜色校对 lab 色彩空间）  (来自 case-A_gzh_11ddd2df, extends `规格参数`)
			
 
				+
			
 
				+- `提示词结构维度表`: 将生图提示词拆解为维度（风格/场景/人物/光线/构图/配色/氛围等）的结构化对照表  (来自 case-C_gzh_b55de63d, extends `大纲`)
			
 
				+- `案例库文件夹`: 按文案类/设计类/视频类分类的反推案例库文件夹，是案例条目的归档容器  (来自 case-C_gzh_b55de63d, extends `知识库`)
			
 
				+- `案例库条目`: 经AI拆解后按类型入库的单条反推案例，含提示词结构或文案模板，可复用于类似场景  (来自 case-C_gzh_b55de63d, extends `模板`)
			
 
				+
			
 
				+- `提示词结构维度表`: 将生图提示词拆解为维度（风格/场景/人物/光线/构图/配色/氛围等）的结构化对照表  (来自 case-?, extends `大纲`)
			
 
				+- `案例库文件夹`: 按文案类/设计类/视频类分类的反推案例库文件夹，是案例条目的归档容器  (来自 case-?, extends `知识库`)
			
 
				+- `案例库条目`: 经AI拆解后按类型入库的单条反推案例，含提示词结构或文案模板，可复用于类似场景  (来自 case-?, extends `模板`)
			
 
				+
			
 
				+- `图片分析提示词模板`: 发给豆包的图片分析指令模板，引导豆包从六个维度（主体内容/场景设定/风格参考/色彩色调/构图视角/附加细节）反推文生图提示词  (来自 case-B_xhs_6837120e, extends `提示词`)