Talegorithm 1 месяц назад
Родитель
Сommit
690b1feb5a

+ 2 - 2
agent/core/memory.py

@@ -1,7 +1,7 @@
 """
 Memory 系统(Phase 2+)
 
-详见 agent/docs/memory-plan.md。核心概念:
+详见 agent/docs/memory.md。核心概念:
 - Memory:Agent 身份私有的主观记忆,Markdown 文件,人类可读写
 - Dream:记忆反思操作(回顾多个 trace 的执行历史,更新记忆文件)
 
@@ -22,7 +22,7 @@ logger = logging.getLogger(__name__)
 
 @dataclass
 class MemoryConfig:
-    """持久化记忆配置(见 agent/docs/memory-plan.md 第五节)"""
+    """持久化记忆配置(见 agent/docs/memory.md 第五节)"""
 
     base_path: str = ""
     # 记忆文件根目录。所有文件路径相对此目录解析。

+ 1 - 1
agent/core/prompts/knowledge.py

@@ -10,7 +10,7 @@
 
 "pending" 语义:条目落到 cognition_log 的 extraction_pending 事件,
 等待人工(或 reflect_auto_commit=True 时由框架自动)review + commit 才进入 KnowHub。
-详见 agent/docs/memory-plan.md 第三节"提取-审核-提交两阶段"。
+详见 agent/docs/memory.md 第三节"提取-审核-提交两阶段"。
 """
 
 # ===== 压缩时阶段性反思 =====

+ 2 - 2
agent/core/runner.py

@@ -143,7 +143,7 @@ class RunConfig:
     enable_research_flow: bool = True  # 是否启用自动研究流程(知识检索→经验检索→调研→计划)
     # --- 知识管理配置 ---
     knowledge: KnowledgeConfig = field(default_factory=KnowledgeConfig)
-    # --- Memory 配置(见 agent/docs/memory-plan.md) ---
+    # --- Memory 配置(见 agent/docs/memory.md) ---
     # None = 默认 Agent(无长期记忆);赋值 MemoryConfig 使该 Agent 成为 memory-bearing Agent
     memory: Optional["MemoryConfig"] = None
 
@@ -3023,7 +3023,7 @@ class AgentRunner:
         if config.max_iterations and config.max_iterations > 0:
             system_prompt += f"\n\n## Execution Constraint\n这是一项有严格步数限制的任务。你最多可以用 {config.max_iterations} 轮交互来解决问题。\n请务必【边查边写、随时存档】!每当你收集或得出一个有价值的独立结果(如收集到一个独立 Case),请立刻调用工具写入或追加到结果文件中,绝对不要等到所有任务都做完再最后一次性输出。这样即使触达步数上限被强制打断,你已经收集的成果也能安全保留!"
         # Memory 注入(memory-bearing Agent)——在 system prompt 末尾追加
-        # 初版选择 system prompt 追加(见 agent/docs/memory-plan.md 待定问题 1)。
+        # 初版选择 system prompt 追加(见 agent/docs/memory.md 待定问题 1)。
         # 好处:run 启动一次性注入、所有后续轮次都能看到、与 skills 注入方式一致。
         # 代价:若记忆文件很大会持续占 prompt tokens —— 待观察后决定是否切换方案。
         if config.memory:

+ 0 - 296
agent/docs/cognition-log-plan.md

@@ -1,296 +0,0 @@
-# Cognition Log 与知识反馈设计
-
-## 文档维护规范
-
-0. **先改文档,再动代码** - 新功能或重大修改需先完成文档更新、并完成审阅后,再进行代码实现;除非改动较小、不被文档涵盖
-1. **文档分层,链接代码** - 重要或复杂设计可以另有详细文档;关键实现需标注代码文件路径;格式:`module/file.py:function_name`
-2. **简洁快照,日志分离** - 只记录最重要的、与代码准确对应的或者明确的已完成的设计的信息,避免推测、建议、决策历史、修改日志、大量代码;决策依据或修改日志若有必要,可在 `knowhub/docs/decisions.md` 另行记录
-
----
-
-## 概述
-
-每个 trace 维护一个 `cognition_log.json`,按时间顺序记录所有认知事件(知识查询、评估、提取、反思),为知识质量反馈和 Memory 系统的 dream 操作(详见 `agent/docs/memory-plan.md`)提供数据。
-
-> 此文件原名 `knowledge_log.json`,扩展为统一事件流后更名。
-
----
-
-## Cognition Log 数据结构
-
-**位置**:`.trace/{trace_id}/cognition_log.json`
-
-```json
-{
-  "trace_id": "trace-xxx",
-  "events": [
-    { "type": "query", "sequence": 42, ... },
-    { "type": "evaluation", "sequence": 66, ... },
-    { "type": "extraction", "sequence": 88, ... },
-    { "type": "reflection", "sequence": 120, ... }
-  ]
-}
-```
-
-所有事件按 `sequence` 排列,保持在 trace 中的时间顺序。
-
----
-
-## 事件类型
-
-### `query`:知识查询
-
-Agent 通过 `POST /api/knowledge/ask` 查询知识时记录。一次查询返回 KM Agent 的整合回答及引用的各 source,作为一个整体记录。
-
-```json
-{
-  "type": "query",
-  "sequence": 42,
-  "goal_id": "1",
-  "query": "goal 的描述文本",
-  "response": "KM Agent 整合后的回答...",
-  "source_ids": ["knowledge-a1b2", "knowledge-c3d4", "knowledge-e5f6"],
-  "sources": [
-    {"id": "knowledge-a1b2", "task": "...", "content": "...(截断500字)"},
-    {"id": "knowledge-c3d4", "task": "...", "content": "..."}
-  ],
-  "timestamp": "2026-03-20T10:00:00"
-}
-```
-
-**写入时机**:`agent/trace/goal_tool.py:inject_knowledge_for_goal`,`POST /api/knowledge/ask` 返回后。
-
-### `evaluation`:知识评估
-
-对某次 query 中各 source 的使用效果评估。通过 `query_sequence` 关联到对应的 query 事件。
-
-```json
-{
-  "type": "evaluation",
-  "sequence": 66,
-  "query_sequence": 42,
-  "trigger": "goal_completion",
-  "assessments": [
-    {"source_id": "knowledge-a1b2", "status": "helpful", "reason": "准确定位了问题"},
-    {"source_id": "knowledge-c3d4", "status": "irrelevant", "reason": "与当前任务无关"},
-    {"source_id": "knowledge-e5f6", "status": "harmful", "reason": "建议的方法已过时"}
-  ],
-  "timestamp": "2026-03-20T10:05:00"
-}
-```
-
-**`status` 可能的值**:
-
-| 状态 | 含义 |
-|---|---|
-| `irrelevant` | 知识与当前任务无关 |
-| `unused` | 知识相关但未被使用 |
-| `helpful` | 知识对任务有实质帮助 |
-| `harmful` | 知识对任务产生负面作用 |
-| `neutral` | 知识相关但无明显影响 |
-
-### `extraction`:知识提取
-
-Agent 通过 reflection 侧分支将知识上传到 KnowHub 时记录。
-
-```json
-{
-  "type": "extraction",
-  "sequence": 88,
-  "trigger": "compression",
-  "items": [
-    {"knowledge_id": "knowledge-new-1", "type": "experience", "task": "...", "content": "...(截断500字)"}
-  ],
-  "timestamp": "2026-03-20T10:10:00"
-}
-```
-
-**写入时机**:reflection 侧分支中 `upload_knowledge` 调用成功后。
-
-### `reflection`:记忆反思
-
-仅 memory-bearing Agent 使用(详见 `agent/docs/memory-plan.md`)。Dream 操作触发的 per-trace 记忆反思。
-
-```json
-{
-  "type": "reflection",
-  "sequence": 120,
-  "reflected_range": [43, 120],
-  "summary": "这次执行中发现用户偏好XX方向...",
-  "timestamp": "2026-04-07T20:00:00"
-}
-```
-
-**写入时机**:dream 操作中 per-trace 反思完成后。
-
----
-
-## 评估触发机制
-
-评估针对的是未评估的 query 事件(即存在 query 事件但没有对应 evaluation 事件的)。
-
-**判断待评估条件**:查找 cognition_log 中所有 `type: "query"` 事件,检查是否存在 `query_sequence` 指向该 query 的 `type: "evaluation"` 事件。
-
-### 触发点 1:Goal 完成
-
-**时机**:Goal status 变为 `completed` 或 `abandoned`
-
-**触发逻辑**(`agent/trace/store.py:update_goal`):
-
-```
-Goal 完成
-  ↓
-查询 cognition_log 中未评估的 query 事件
-  ↓
-如果有待评估
-  → 设置 trace.context["pending_knowledge_eval"] = true
-  → 设置 trace.context["knowledge_eval_trigger"] = "goal_completion"
-  ↓
-Runner 主循环下一次迭代开头检测到标志(agent/core/runner.py:_agent_loop)
-  → 清除标志
-  → 将 "knowledge_eval" 加入 force_side_branch 队列
-```
-
-### 触发点 2:压缩
-
-**时机**:上下文 token 数超过阈值,即将执行压缩
-
-**触发逻辑**(`agent/core/runner.py:_manage_context_usage`):
-
-```
-压缩条件触发
-  ↓
-查询 cognition_log 中未评估的 query 事件
-  ↓
-如果有待评估
-  → 设置 trace.context["knowledge_eval_trigger"] = "compression"
-  → 侧分支队列:["reflection", "knowledge_eval", "compression"](启用知识提取时)
-  →            或 ["knowledge_eval", "compression"](未启用时)
-  → 返回"需要进入侧分支"信号,暂缓压缩
-```
-
-压缩会删除消息历史,必须在压缩前完成评估。
-
-### 触发点 3:任务结束(兜底)
-
-**时机**:主路径无工具调用,Agent 即将结束
-
-**触发逻辑**(`agent/core/runner.py:_agent_loop`):
-
-```
-任务即将结束
-  ↓
-查询 cognition_log 中未评估的 query 事件
-  ↓
-如果有待评估
-  → 设置 trace.context["knowledge_eval_trigger"] = "task_completion"
-  → 将 ["knowledge_eval"] 加入 force_side_branch 队列
-  → continue(不 break,下一轮执行评估)
-```
-
----
-
-## 侧分支评估流程
-
-### 侧分支类型
-
-复用 `SideBranchContext` 机制,类型 `"knowledge_eval"`(`agent/trace/models.py:Message.branch_type`)。
-
-### 评估 Prompt 结构
-
-完整实现:`agent/core/runner.py:_build_knowledge_eval_prompt`
-
-```
-你是知识评估助手。请评估以下知识查询结果在本次任务执行中的实际效果。
-
-## 当前任务(Mission)       ← trace.task
-## 当前 Goal                ← goal_tree.current 的 description
-## 待评估知识查询            ← 未评估的 query 事件列表
-  对每个 query:展示 query 文本、整合回答、各 source 的 id/task/content
-## 评估要求                  ← 按 source_id 逐一评估
-## 评估分类                  ← 5 个 status 选项
-## 输出格式                  ← JSON
-```
-
-Prompt 中**不包含消息历史**。LLM 依据对话上下文中已有的执行过程作出判断。
-
-### 评估输出格式
-
-LLM 直接输出 JSON:
-
-```json
-{
-  "evaluations": [
-    {
-      "query_sequence": 42,
-      "assessments": [
-        {"source_id": "knowledge-a1b2", "status": "helpful", "reason": "..."},
-        {"source_id": "knowledge-c3d4", "status": "irrelevant", "reason": "..."}
-      ]
-    }
-  ]
-}
-```
-
-### 即时写入
-
-每次 LLM 回复后立即解析,三种策略降级:整体 JSON → ` ```json ` 代码块 → 正则裸对象。
-
-解析成功 → 为每个 query 写入对应的 `evaluation` 事件到 cognition_log。解析失败记日志,不中断。
-
----
-
-## 数据流
-
-```
-知识查询(agent/trace/goal_tool.py:inject_knowledge_for_goal)
-  ↓
-POST /api/knowledge/ask → KM Agent 整合回答
-  ↓
-写入 cognition_log: type="query"(含 response + source_ids)
-  ↓
-  ┌─────────────────────────────────────────────┐
-  │  触发点 A:Goal 完成(goal_completion)       │
-  │  触发点 B:压缩执行前(compression)          │
-  │  触发点 C:任务自然结束(task_completion)    │
-  └─────────────────────────────────────────────┘
-  ↓
-Runner 进入 knowledge_eval 侧分支
-  ↓
-LLM 按 query 维度、逐 source 评估,输出 JSON
-  ↓
-写入 cognition_log: type="evaluation"(含 assessments per source)
-  ↓
-侧分支退出 → 恢复主路径
-
-                    ···
-
-知识提取(reflection 侧分支中 upload_knowledge 成功后)
-  ↓
-写入 cognition_log: type="extraction"
-
-                    ···
-
-Dream 触发(memory-bearing Agent,详见 agent/docs/memory-plan.md)
-  ↓
-读取 cognition_log 全部事件 → per-trace 记忆反思
-  ↓
-写入 cognition_log: type="reflection"
-```
-
----
-
-## 与现有系统的集成点
-
-| 集成位置 | 文件 | 说明 |
-|---|---|---|
-| 知识查询时写 log | `agent/trace/goal_tool.py:inject_knowledge_for_goal` | `goal(focus=...)` 触发 ask → 写入 `query` 事件 |
-| Goal 完成时设置标志 | `agent/trace/store.py:update_goal` | 设置 `trace.context["pending_knowledge_eval"]` |
-| 主循环检测标志 | `agent/core/runner.py:_agent_loop` | 每轮迭代开头检测,触发 `["knowledge_eval"]` |
-| 压缩前触发评估 | `agent/core/runner.py:_manage_context_usage` | 压缩前检查 pending,先评估再压缩 |
-| 任务结束兜底 | `agent/core/runner.py:_agent_loop` | 退出前检查 pending,强制触发评估 |
-| 侧分支类型 | `agent/trace/models.py:Message.branch_type` | Literal 中包含 `"knowledge_eval"` |
-| 即时写入评估 | `agent/core/runner.py:_agent_loop` | 解析 JSON 后写入 `evaluation` 事件 |
-| 知识提取记录 | `agent/core/runner.py` | reflection 侧分支中 upload 成功后写入 `extraction` 事件 |
-| 记忆反思记录 | dream 操作 | per-trace 反思后写入 `reflection` 事件 |
-| Log 文件管理 | `agent/trace/store.py` | 待重构:从 entries[] 改为 events[] |

+ 396 - 0
agent/docs/cognition-log.md

@@ -0,0 +1,396 @@
+# Cognition Log 与知识反馈
+
+> 状态:已实现(2026-04)。本文档同时承担**设计理由**和**使用规范**。
+> 实现入口与代码位置见文末"七、实现与入口"。
+
+## 文档维护规范
+
+0. **先改文档,再动代码** - 新功能或重大修改需先完成文档更新、并完成审阅后,再进行代码实现;除非改动较小、不被文档涵盖
+1. **文档分层,链接代码** - 重要或复杂设计可以另有详细文档;关键实现需标注代码文件路径;格式:`module/file.py:function_name`
+2. **简洁快照,日志分离** - 只记录最重要的、与代码准确对应的或者明确的已完成的设计的信息,避免推测、建议、决策历史、修改日志、大量代码;决策依据或修改日志若有必要,可在 `knowhub/docs/decisions.md` 另行记录
+
+---
+
+## 概述
+
+每个 trace 维护一个 `cognition_log.json`,按时间顺序记录所有认知事件(知识查询、评估、提取三态、记忆反思),为知识质量反馈和 Memory 系统的 dream 操作(详见 `agent/docs/memory.md`)提供数据。
+
+> 此文件原名 `knowledge_log.json`,扩展为统一事件流后更名。读取时仍兼容旧文件名和 `entries[]` 字段。
+
+---
+
+## Cognition Log 数据结构
+
+**位置**:`.trace/{trace_id}/cognition_log.json`
+
+```json
+{
+  "trace_id": "trace-xxx",
+  "events": [
+    { "type": "query", "sequence": 42, ... },
+    { "type": "evaluation", "query_sequence": 42, ... },
+    { "type": "extraction_pending", "extraction_id": "pending-abc", ... },
+    { "type": "extraction_reviewed", "extraction_id": "pending-abc", "decision": "approve", ... },
+    { "type": "extraction_committed", "extraction_id": "pending-abc", "knowledge_id": "knowledge-xyz", ... },
+    { "type": "reflection", "sequence_range": [43, 120], ... }
+  ]
+}
+```
+
+所有事件按写入顺序追加(`query` 和 `reflection` 关联 trace 中 message 的 `sequence`;`evaluation` / `extraction_*` 不强依赖 sequence)。框架自动给每个事件写入 `timestamp`。
+
+---
+
+## 事件类型
+
+### `query`:知识查询
+
+Agent 通过 `POST /api/knowledge/ask` 查询知识时记录。一次查询返回 KM Agent 的整合回答及引用的各 source,作为一个整体记录。
+
+```json
+{
+  "type": "query",
+  "sequence": 42,
+  "goal_id": "1",
+  "query": "goal 的描述文本",
+  "response": "KM Agent 整合后的回答...",
+  "source_ids": ["knowledge-a1b2", "knowledge-c3d4", "knowledge-e5f6"],
+  "sources": [
+    {"id": "knowledge-a1b2", "task": "...", "content": "...(截断500字)"},
+    {"id": "knowledge-c3d4", "task": "...", "content": "..."}
+  ],
+  "timestamp": "2026-03-20T10:00:00"
+}
+```
+
+**写入时机**:`agent/trace/goal_tool.py:inject_knowledge_for_goal`,`POST /api/knowledge/ask` 返回后。
+
+### `evaluation`:知识评估
+
+对某次 query 中各 source 的使用效果评估。通过 `query_sequence` 关联到对应的 query 事件。实际按 knowledge_id 逐条写入(每条知识一个 evaluation 事件)。
+
+```json
+{
+  "type": "evaluation",
+  "query_sequence": 42,
+  "knowledge_id": "knowledge-a1b2",
+  "eval_result": {"status": "helpful", "reason": "准确定位了问题"},
+  "evaluated_at_trigger": "goal_completion",
+  "timestamp": "2026-03-20T10:05:00"
+}
+```
+
+LLM 在侧分支内按 `{evaluations: [{query_sequence, assessments: [{knowledge_id, eval_status, reason}]}]}` 输出;runner 解析后分条写入(一个 evaluation 事件对应一条 knowledge)。
+
+**`status` 可能的值**:
+
+| 状态 | 含义 |
+|---|---|
+| `irrelevant` | 知识与当前任务无关 |
+| `unused` | 知识相关但未被使用 |
+| `helpful` | 知识对任务有实质帮助 |
+| `harmful` | 知识对任务产生负面作用 |
+| `neutral` | 知识相关但无明显影响 |
+
+### `extraction_pending` / `extraction_reviewed` / `extraction_committed`:知识提取三态
+
+Agent 的反思侧分支不再直接 upload 到 KnowHub,而是暂存为 pending,经人工 review 后才 commit。详见 `agent/docs/memory.md` 第三节"提取-审核-提交两阶段"。
+
+#### `extraction_pending`
+
+反思 LLM 调用 `knowledge_save_pending` 工具时写入。payload 字段与 `knowledge_save` 参数一一对应(review 通过后字段透传)。
+
+```json
+{
+  "type": "extraction_pending",
+  "extraction_id": "pending-a1b2c3d4",
+  "sequence": 88,
+  "goal_id": "g1",
+  "branch_id": "reflection-branch-xxx",
+  "payload": {
+    "task": "...", "content": "...", "types": ["tool"],
+    "tags": {...}, "score": 4, "scopes": null, "owner": null,
+    "resource_ids": [], "source_name": "", "source_category": "exp",
+    "urls": [], "agent_id": "...", "submitted_by": "",
+    "capability_ids": [], "tool_ids": []
+  },
+  "timestamp": "2026-03-20T10:10:00"
+}
+```
+
+**写入时机**:`agent/tools/builtin/knowledge.py:knowledge_save_pending` 工具调用时。
+
+#### `extraction_reviewed`
+
+人工审核决策。同一 `extraction_id` 可能被多次 review(以最新一条为准),`edit` 决策携带 `edited_payload` 覆盖原 payload。
+
+```json
+{
+  "type": "extraction_reviewed",
+  "extraction_id": "pending-a1b2c3d4",
+  "decision": "approve",
+  "timestamp": "2026-04-15T14:00:00"
+}
+```
+
+`decision` 取值:`approve` / `edit` / `discard`。`edit` 时附加 `edited_payload` 字段。
+
+**写入时机**:
+- CLI:`python -m agent.cli.extraction_review --review`
+- Interactive 菜单第 8 项
+- HTTP:`POST /api/traces/{tid}/extractions/{eid}/review`
+- 共享核心:`agent/trace/extraction_review.py:review_one`
+
+#### `extraction_committed`
+
+将 approved/edited 的条目实际上传到 KnowHub 后写入。失败条目不写此事件,只记录在 CommitReport 里。
+
+```json
+{
+  "type": "extraction_committed",
+  "extraction_id": "pending-a1b2c3d4",
+  "knowledge_id": "knowledge-new-1",
+  "timestamp": "2026-04-15T14:05:00"
+}
+```
+
+**写入时机**:`agent/trace/extraction_review.py:commit_approved`(调用 `knowledge_save` 成功后)。`reflect_auto_commit=True` 时由反思侧分支退出 hook 自动触发 `auto_commit_branch`。
+
+### `reflection`:记忆反思
+
+仅 memory-bearing Agent 使用(详见 `agent/docs/memory.md`)。Dream 操作触发的 per-trace 记忆反思。
+
+```json
+{
+  "type": "reflection",
+  "sequence_range": [43, 120],
+  "summary": "这次执行中发现用户偏好XX方向...",
+  "consumed_at": "2026-04-07T20:05:00",
+  "timestamp": "2026-04-07T20:00:00"
+}
+```
+
+`sequence_range` 是本次反思覆盖的消息区间 `[start, end]`。`consumed_at` 在跨 trace 整合(dream 的第二阶段)消化了该反思后写入;未消化时此字段缺省。
+
+**写入时机**:`agent/core/dream.py:per_trace_reflect`(写入时 `consumed_at` 缺省);`agent/core/dream.py:cross_trace_integrate`(整合后补 `consumed_at`)。
+
+---
+
+## 评估触发机制
+
+评估针对的是未评估的 query 事件(即存在 query 事件但没有对应 evaluation 事件的)。
+
+**判断待评估条件**:查找 cognition_log 中所有 `type: "query"` 事件,检查是否存在 `query_sequence` 指向该 query 的 `type: "evaluation"` 事件。
+
+### 触发点 1:Goal 完成
+
+**时机**:Goal status 变为 `completed` 或 `abandoned`
+
+**触发逻辑**(`agent/trace/store.py:update_goal`):
+
+```
+Goal 完成
+  ↓
+查询 cognition_log 中未评估的 query 事件
+  ↓
+如果有待评估
+  → 设置 trace.context["pending_knowledge_eval"] = true
+  → 设置 trace.context["knowledge_eval_trigger"] = "goal_completion"
+  ↓
+Runner 主循环下一次迭代开头检测到标志(agent/core/runner.py:_agent_loop)
+  → 清除标志
+  → 将 "knowledge_eval" 加入 force_side_branch 队列
+```
+
+### 触发点 2:压缩
+
+**时机**:上下文 token 数超过阈值,即将执行压缩
+
+**触发逻辑**(`agent/core/runner.py:_manage_context_usage`):
+
+```
+压缩条件触发
+  ↓
+查询 cognition_log 中未评估的 query 事件
+  ↓
+如果有待评估
+  → 设置 trace.context["knowledge_eval_trigger"] = "compression"
+  → 侧分支队列:["reflection", "knowledge_eval", "compression"](启用知识提取时)
+  →            或 ["knowledge_eval", "compression"](未启用时)
+  → 返回"需要进入侧分支"信号,暂缓压缩
+```
+
+压缩会删除消息历史,必须在压缩前完成评估。
+
+### 触发点 3:任务结束(兜底)
+
+**时机**:主路径无工具调用,Agent 即将结束
+
+**触发逻辑**(`agent/core/runner.py:_agent_loop`):
+
+```
+任务即将结束
+  ↓
+查询 cognition_log 中未评估的 query 事件
+  ↓
+如果有待评估
+  → 设置 trace.context["knowledge_eval_trigger"] = "task_completion"
+  → 将 ["knowledge_eval"] 加入 force_side_branch 队列
+  → continue(不 break,下一轮执行评估)
+```
+
+---
+
+## 侧分支评估流程
+
+### 侧分支类型
+
+复用 `SideBranchContext` 机制,类型 `"knowledge_eval"`(`agent/trace/models.py:Message.branch_type`)。
+
+### 评估 Prompt 结构
+
+完整实现:`agent/core/runner.py:_build_knowledge_eval_prompt`
+
+```
+你是知识评估助手。请评估以下知识查询结果在本次任务执行中的实际效果。
+
+## 当前任务(Mission)       ← trace.task
+## 当前 Goal                ← goal_tree.current 的 description
+## 待评估知识查询            ← 未评估的 query 事件列表
+  对每个 query:展示 query 文本、整合回答、各 source 的 id/task/content
+## 评估要求                  ← 按 source_id 逐一评估
+## 评估分类                  ← 5 个 status 选项
+## 输出格式                  ← JSON
+```
+
+Prompt 中**不包含消息历史**。LLM 依据对话上下文中已有的执行过程作出判断。
+
+### 评估输出格式
+
+LLM 直接输出 JSON:
+
+```json
+{
+  "evaluations": [
+    {
+      "query_sequence": 42,
+      "assessments": [
+        {"source_id": "knowledge-a1b2", "status": "helpful", "reason": "..."},
+        {"source_id": "knowledge-c3d4", "status": "irrelevant", "reason": "..."}
+      ]
+    }
+  ]
+}
+```
+
+### 即时写入
+
+每次 LLM 回复后立即解析,三种策略降级:整体 JSON → ` ```json ` 代码块 → 正则裸对象。
+
+解析成功 → 为每个 query 写入对应的 `evaluation` 事件到 cognition_log。解析失败记日志,不中断。
+
+---
+
+## 数据流
+
+```
+知识查询(agent/trace/goal_tool.py:inject_knowledge_for_goal)
+  ↓
+POST /api/knowledge/ask → KM Agent 整合回答
+  ↓
+写入 cognition_log: type="query"(含 response + source_ids)
+  ↓
+  ┌─────────────────────────────────────────────┐
+  │  触发点 A:Goal 完成(goal_completion)       │
+  │  触发点 B:压缩执行前(compression)          │
+  │  触发点 C:任务自然结束(task_completion)    │
+  └─────────────────────────────────────────────┘
+  ↓
+Runner 进入 knowledge_eval 侧分支
+  ↓
+LLM 按 query 维度、逐 source 评估,输出 JSON
+  ↓
+写入 cognition_log: type="evaluation"(含 assessments per source)
+  ↓
+侧分支退出 → 恢复主路径
+
+                    ···
+
+知识提取(reflection 侧分支 LLM 调 knowledge_save_pending)
+  ↓
+写入 cognition_log: type="extraction_pending"
+  ↓
+  ┌─ reflect_auto_commit=True ─→ 侧分支退出 hook 自动 approve + commit
+  │                              写 extraction_reviewed + extraction_committed
+  │
+  └─ reflect_auto_commit=False(默认)
+     ↓
+     人工 CLI / Interactive 菜单 / HTTP API 触发
+     ↓
+     review → 写 extraction_reviewed(approve/edit/discard)
+     ↓
+     commit → 调 knowledge_save → 写 extraction_committed
+
+                    ···
+
+Dream 触发(memory-bearing Agent,详见 agent/docs/memory.md)
+  ↓
+Phase 1: per_trace_reflect(逐 trace,reflected_at_sequence < last_sequence)
+  ↓ 读取增量消息 + cognition_log 中 query/evaluation/extraction_* 事件
+  ↓ 写入 cognition_log: type="reflection"(consumed_at 暂缺)
+  ↓ 更新 Trace.reflected_at_sequence
+
+Phase 2: cross_trace_integrate
+  ↓ 汇总未消化的 reflection + 当前记忆文件
+  ↓ LLM 输出 {updates:[{path,new_content}]} JSON 计划
+  ↓ 写记忆文件 + 给参与的 reflection 补 consumed_at
+```
+
+---
+
+## 与现有系统的集成点
+
+| 集成位置 | 文件 | 说明 |
+|---|---|---|
+| 知识查询时写 log | `agent/trace/goal_tool.py:inject_knowledge_for_goal` | `goal(focus=...)` 触发 ask → 写入 `query` 事件 |
+| Goal 完成时设置标志 | `agent/trace/store.py:update_goal` | 设置 `trace.context["pending_knowledge_eval"]` |
+| 主循环检测标志 | `agent/core/runner.py:_agent_loop` | 每轮迭代开头检测,触发 `["knowledge_eval"]` |
+| 压缩前触发评估 | `agent/core/runner.py:_manage_context_usage` | 压缩前检查 pending,先评估再压缩 |
+| 任务结束兜底 | `agent/core/runner.py:_agent_loop` | 退出前检查 pending,强制触发评估 |
+| 侧分支类型 | `agent/trace/models.py:Message.branch_type` | Literal 中包含 `"knowledge_eval"` |
+| 即时写入评估 | `agent/core/runner.py:_agent_loop` | 解析 JSON 后调 `store.update_knowledge_evaluation` |
+| 知识提取暂存 | `agent/tools/builtin/knowledge.py:knowledge_save_pending` | LLM 工具,写 `extraction_pending` 事件 |
+| 提取审核 / 提交 | `agent/trace/extraction_review.py` | 写 `extraction_reviewed` / `extraction_committed` 事件 |
+| 反思侧分支 auto-commit | `agent/core/runner.py`(反思分支退出分支) | `reflect_auto_commit=True` 时调 `auto_commit_branch` |
+| 记忆反思写入 | `agent/core/dream.py:per_trace_reflect` | 写 `reflection` 事件(consumed_at 缺省) |
+| Reflection 消化标记 | `agent/core/dream.py:cross_trace_integrate` | 整合后补 `consumed_at` |
+| Log 文件格式 | `agent/trace/store.py` | ✅ 已从 entries[] 迁移到 events[];读写兼容旧文件名 |
+
+---
+
+## 七、实现与入口
+
+### 7.1 存储与访问
+
+| 职责 | 位置 |
+|---|---|
+| Cognition log 文件位置 | `.trace/{trace_id}/cognition_log.json`(新建时文件名);旧 trace 兼容读 `knowledge_log.json` |
+| 读取 | `store.py:get_cognition_log`(两种文件名都认;`entries[]` 自动迁移为 `events[]`) |
+| 追加 | `store.py:append_cognition_event`(接受任意 event type,自动补 `timestamp`) |
+| 事件 schema 权威清单 | `store.py:append_cognition_event` 的 docstring |
+
+### 7.2 各事件类型的写入与读取
+
+| 事件类型 | 主要写入者 | 主要读取者 |
+|---|---|---|
+| `query` | `goal_tool.py:inject_knowledge_for_goal` | `_build_knowledge_eval_prompt` 找待评估;`dream.py:_build_reflect_input` 组装反思上下文 |
+| `evaluation` | `store.py:update_knowledge_evaluation`(runner 解析 LLM JSON 后调用) | `dream.py:_build_reflect_input` |
+| `extraction_pending` | `knowledge_save_pending` 工具 | `extraction_review.py:list_pending`;CLI / HTTP API 显示 |
+| `extraction_reviewed` | `extraction_review.py:review_one` | `extraction_review.py:commit_approved`(决定要不要 commit) |
+| `extraction_committed` | `extraction_review.py:commit_approved` | `list_pending`(标记 committed 状态) |
+| `reflection` | `dream.py:per_trace_reflect` | `dream.py:cross_trace_integrate` |
+
+### 7.3 相关文档
+
+- **Memory 系统整体**:`agent/docs/memory.md` —— cognition_log 是其数据底座,Memory/Dream 设计与使用规范在那里
+- **KnowHub 决策历史**:`knowhub/docs/decisions.md` —— 如果需要 knowledge_log → cognition_log 重构等历史决策的背景

+ 2 - 2
agent/docs/memory.md

@@ -37,7 +37,7 @@
 
 **目的**:评估被注入的知识是否有用,记录到本地 `knowledge_log.json`。
 
-**触发时机**(详见 `knowhub/docs/cognition-log-plan.md`):
+**触发时机**(详见 `agent/docs/cognition-log.md`):
 - Goal 完成时(`store.py:update_goal`,设置 `pending_knowledge_eval` 标志)
 - 压缩前(必须在压缩前完成评估,否则执行上下文丢失)
 - 任务结束时(兜底)
@@ -176,7 +176,7 @@ Trace 模型新增字段:
                                            # None = 从未被记忆反思处理
 ```
 
-反思摘要不存在 Trace 模型中,而是作为 `reflection` 事件写入 `cognition_log.json`(详见 `knowhub/docs/cognition-log-plan.md`)。
+反思摘要不存在 Trace 模型中,而是作为 `reflection` 事件写入 `cognition_log.json`(详见 `agent/docs/cognition-log.md`)。
 
 - Agent run 产生新 message → `reflected_at_sequence` 自然落后于实际 sequence
 - 记忆反思完成 → 更新 `reflected_at_sequence` 为当前最新 sequence

+ 1 - 1
agent/tools/builtin/__init__.py

@@ -18,7 +18,7 @@ from agent.tools.builtin.skill import skill, list_skills
 from agent.tools.builtin.subagent import agent, evaluate
 # sandbox 工具已废弃(2026-04);search.py / crawler.py 已重构为 content/ 工具族(2026-04)
 from agent.tools.builtin.knowledge import(knowledge_search,knowledge_save,knowledge_save_pending,knowledge_list,knowledge_update,knowledge_batch_update,knowledge_slim)
-# Memory / Dream(见 agent/docs/memory-plan.md)
+# Memory / Dream(见 agent/docs/memory.md)
 from agent.tools.builtin.memory import dream
 # 知识上传/查询已统一到 agent 工具:
 #   agent(agent_type="remote_librarian", task=...)         # 查询

+ 1 - 1
agent/tools/builtin/knowledge.py

@@ -35,7 +35,7 @@ class KnowledgeConfig:
     enable_completion_extraction: bool = True      # 是否在运行完成后提取知识
     completion_reflect_prompt: str = ""            # 自定义复盘 prompt;空则使用默认,见 agent/core/prompts/knowledge.py:COMPLETION_REFLECT_PROMPT
 
-    # 提取-审核-提交两阶段开关(见 agent/docs/memory-plan.md 第三节)
+    # 提取-审核-提交两阶段开关(见 agent/docs/memory.md 第三节)
     reflect_auto_commit: bool = False
     # False(默认): reflection 仅写 cognition_log: type="extraction_pending",
     #               人工通过 CLI(agent/cli/extraction_review.py)review + commit 才进 KnowHub

+ 1 - 1
agent/tools/builtin/memory.py

@@ -1,5 +1,5 @@
 """
-Memory 相关工具 —— 目前只包含 dream 操作(见 agent/docs/memory-plan.md 第四节)。
+Memory 相关工具 —— 目前只包含 dream 操作(见 agent/docs/memory.md 第四节)。
 
 dream 整理 Agent 身份的长期记忆:回顾最近 trace 的执行历史,
 逐个 trace 做反思,再跨 trace 整合写回记忆文件。

+ 1 - 1
agent/trace/models.py

@@ -79,7 +79,7 @@ class Trace:
     # 当前焦点 goal
     current_goal_id: Optional[str] = None
 
-    # Memory 系统 - 记忆反思的进度追踪(见 agent/docs/memory-plan.md 第四节)
+    # Memory 系统 - 记忆反思的进度追踪(见 agent/docs/memory.md 第四节)
     # dream 操作扫描 reflected_at_sequence < latest_sequence 的 trace 做反思;
     # None 表示该 trace 从未被记忆反思处理过。
     reflected_at_sequence: Optional[int] = None

+ 1 - 1
agent/trace/run_api.py

@@ -114,7 +114,7 @@ class CompactResponse(BaseModel):
     message: str = ""
 
 
-# ===== 提取审核(见 agent/docs/memory-plan.md 第三节) =====
+# ===== 提取审核(见 agent/docs/memory.md 第三节) =====
 
 class PendingExtractionModel(BaseModel):
     extraction_id: str

+ 1 - 1
knowhub/README.md

@@ -58,7 +58,7 @@ Agent(端侧)
 | 文档 | 内容 |
 |------|------|
 | [Schema 迁移](docs/schema-migration-plan.md) | JSONB 软关联 → 关联表 |
-| [Cognition Log](../agent/docs/cognition-log-plan.md) | Agent 侧认知日志事件流(在 agent/docs/ 中) |
+| [Cognition Log](../agent/docs/cognition-log.md) | Agent 侧认知日志事件流(在 agent/docs/ 中) |
 | [前端重构](docs/frontend-restructure-plan.md) | 原子能力为中心的前端重构 |
 | [Dashboard](docs/dashboard-plan.md) | 知识库可视化 Dashboard |
 | [用户反馈](docs/user-feedback-plan.md) | 用户反馈 UI、API、数据模型 |

+ 225 - 0
knowhub/docs/2026-04-21_rebuild_handoff.md

@@ -0,0 +1,225 @@
+# KnowHub 重建与去重工作交接文档
+
+**日期**:2026-04-21
+**上下文**:修复同事 agent 失控污染的 tao_dev_1 数据,统一为 `howard_dedup` 版本;同时系统化去重,建立正确的需求-能力关系。
+
+---
+
+## 一、DB 当前状态
+
+| 表 | 计数 | 版本分布 |
+|---|---|---|
+| capability | **315** | 全部 `howard_dedup` |
+| strategy | **94** | 全部 `howard_dedup` |
+| resource | **2508** | 全部 `howard_dedup` |
+| requirement | 99 | 全部 `v0`(**未动**,不要动) |
+| knowledge | 1046 | 未动 |
+
+| junction 表 | 计数 |
+|---|---|
+| requirement_capability | 659 ⚠️ 不完整(见待办 2) |
+| capability_tool | 1275 |
+| capability_knowledge | 0(源数据无) |
+| capability_resource | 0(源数据无) |
+| strategy_capability | 660 |
+| strategy_resource | 2585 |
+| requirement_strategy | 94 |
+| requirement_resource | 2585 |
+
+### 关键 schema 提醒
+
+| 表 | 是否有 relation_type 列 |
+|---|---|
+| strategy_capability | ✅ 有(值:`'compose'`) |
+| capability_knowledge | ✅ 有 |
+| strategy_knowledge | ✅ 有 |
+| **requirement_capability** | ❌ **没有,不要加**(按 A 方案,语义 = 研究发现的所有 cap) |
+| 其他 junction | 都没有(只有 FK 两列) |
+
+---
+
+## 二、已完成的工作
+
+### 2.1 历次 capability 去重(ROUND 1 → 2 → C → 4)
+对原 `tao_dev_1` + `v0` 数据做了 4 轮合并:
+- **Round 1**:手动聚类 35 簇,合并 112 条 member + 删 22 条 VCAP(`CAP-tao_dev_1-NN-NN` 占位)→ 465 → 331
+- **Round 2**:26 簇跨领域合并 → 331 → 289
+- **Round C**:6 条跨版本(v0 foundation 吸收 tao_dev_1 同义条)→ 289 → 283
+- **Round 4**:修复 rebuild 时 alias 漏判产生的 32 条新 dup → 354 → 315
+
+### 2.2 数据重建(从 `/Users/sunlit/Downloads/output 2/` 99 folder)
+由于同事的 agent 失控污染:
+- purge 所有 capability/strategy/resource(v0 + tao_dev_1)+ 所有 junction
+- seed 历次合并 canonical + 21 条 v0 foundation(+重建 CAP-006)
+- 从 99 folder 重 ingest,全部标 version=`howard_dedup`
+- 别名表 536 条确保**跨所有历史名字**都能命中已有 canonical,避免二次重复
+
+### 2.3 手动修复 6 个 malformed folder
+源数据 schema 不标准但有内容可抢救:
+- **004**: `strategy.strategy.phases` + `capability_mapping` (cap name only)
+- **031**: `strategy.strategy.phases` + `capability_mapping` (capability_id/name)
+- **044**: `capabilities_extracted` 用老 schema(`capability_id`/`capability_name`)
+- **053**: `phases[]` 顶层 list + `core_workflow` 文本
+- **066**: `execution_phases` + `key_capabilities`
+- **070**: `capabilities_mapping` only
+
+全部已经手动解析 + DB 写入。
+
+### 2.4 Rename & 扩写
+32 条 canonical 的 name/description 被改写以覆盖合并后的语义范围(应用 `rename_merged_capabilities.py`)。
+
+### 2.5 capability_tool 回填
+从 `capabilities_extracted.json` 的 `implements` 字段反向回填了 1275 条 cap-tool 关系。
+
+---
+
+## 三、已创建的脚本(`knowhub/scripts/`)
+
+| 脚本 | 作用 | 幂等 |
+|---|---|---|
+| `merge_capabilities.py` | 4 轮 canonical 合并映射 + junction 冲突处理 | ✅ |
+| `rename_merged_capabilities.py` | 32 条 canonical 改名 | ✅ |
+| `rebuild_howard_dedup.py` | 全量重建:purge → seed → ingest output 2/ → 改名 | ✅(resume mode) |
+| `dedup_howard_round4.py` | Round 4 修复 rebuild 产生的 dup | ✅ |
+| `salvage_malformed_folders.py` | 早期尝试处理 malformed folder 的脚本(未使用,我后来手动处理了) | - |
+
+所有脚本使用 `autocommit=True`,每次操作独立提交——连接断了可以直接重跑。
+
+---
+
+## 四、备份位置
+
+`/tmp/knowhub_backup_2026-04-21/`(执行 rebuild 前完整快照)
+- `capability.json` (365 rows, 原 v0+tao_dev_1 混合状态)
+- `strategy.json`、`resource.json`、`requirement.json`、`knowledge.json`
+- 所有 junction 表的 JSON 快照
+
+`/tmp/capabilities_all.md`(465 条 tao_dev_1 pre-round-1 原始快照,重建时作为别名源)
+
+---
+
+## 五、待完成工作(按优先级)
+
+### 待办 1:补齐历史 requirement_capability 到"研究全集"(方案 A)
+
+**问题**:当前 `req_cap=659` 基本等于 `strat_cap=660` 的镜像,只含 strategy 的 `workflow_outline` 子集。按 A 方案设计应该是所有 `capabilities_extracted.json` 里出现的 caps 全集(research-discovered)。
+
+**操作**:
+1. 遍历 `/Users/sunlit/Downloads/output 2/` 的 99 个 folder
+2. 读 `capabilities_extracted.json`,对每个 cap 用 **alias map 解析成 howard_dedup canonical ID**(逻辑与 `rebuild_howard_dedup.py` 的 cap resolution 部分一致)
+3. `INSERT INTO requirement_capability (requirement_id, capability_id) VALUES (REQ_NN, CAP-X) ON CONFLICT DO NOTHING`
+4. 对 malformed 格式的 folder(044 用 `capability_id`/`capability_name`、053 用 `phases` 里的 caps 等)也要按特殊结构处理——参考 `salvage_malformed_folders.py` 的逐 folder 处理逻辑
+
+**预期增量**:1016 cap_entries(含重复) - dedup → 约 900 个 (req, cap) pairs;现有 659 → 预期 ~900+。
+
+**不要**:
+- 不要加 `relation_type` 列
+- 不要动 strategy_capability(保持 compose 语义)
+
+### 待办 2:处理 5 份重跑数据(等用户发来)
+
+User 将提供 032/046/069/085/097 的重跑数据(可能是新的 folder 替换)。
+处理时必须:
+
+1. **检查现有孤儿数据**:
+   - `REQ_032`:DB 已有 24 条 resource(folder=032 tag),没 junction
+   - `REQ_069`:25 条
+   - `REQ_085`:36 条
+   - `REQ_097`:24 条
+   - `REQ_046`:0 条(从未入库)
+   - 如果新数据含这些 URL:`INSERT ... ON CONFLICT` 会更新同样的 resource(URL hash ID 确定)
+   - 如果新数据不含某些 URL:旧 resource 就是孤儿(考虑删除或保留)
+
+2. **按 rebuild_howard_dedup.py 的 ingest_folder 逻辑处理**:
+   - match requirement by exact description
+   - create resources(howard_dedup version)
+   - resolve capabilities via alias map
+   - create strategy(howard_dedup,按 is_selected=true 选 1 条)
+   - wire ALL junctions including 新的 req_cap 全集逻辑(待办 1 的延伸)
+
+3. **关键:req_cap 要写 ALL caps in capabilities_extracted,不只是 strategy 用到的**(A 方案)
+
+### 待办 3(可选,之前说"先不忙"):Embedding 重算
+
+所有 315 条 capability + 94 条 strategy 的 embedding 都是空。
+当用户想启用语义搜索时,批量调用 `knowhub/embeddings.py` 的 `get_embeddings_batch` 重算:
+```python
+for each cap: embedding = get_embedding(f"{name} {description}")
+```
+
+### 待办 4(可选):effects/criterion 从备份回灌
+
+Round 1-3 合并时丢失了 member caps 的 `effects` + `criterion` 数据。
+用户有备份,可从 `/tmp/knowhub_backup_2026-04-21/capability.json` 的旧数据里,对当前 howard_dedup caps 做:
+- 如果当前 canonical 的 effects 是 `[]`、criterion 是 `''`:从备份找同名或同 ID 的老记录合并进来
+
+---
+
+## 六、已知数据问题(转告同事)
+
+### 6.1 Pipeline 输出 schema 不统一
+- 5 folder 的 `strategy.json` 各造一格(`phases`/`execution_phases`/`capabilities_mapping` 等字段名)
+- 1 folder(044)的 `capabilities_extracted.json` 用了老 schema
+- **建议**:Pipeline 加 schema validator
+
+### 6.2 LLM 空输出
+- 069/085:`strategies: []`(没产出策略)
+- 008/034/053:`extracted_capabilities: []`(没产出能力)
+- **建议**:重试或 fallback
+
+### 6.3 JSON 文件损坏
+- 032:只含 `{"ping"}` 
+- 046:`strategies` 是 str
+- 077:JSONDecodeError
+- 097:JSONDecodeError
+- **建议**:atomic write(tmp+rename)
+
+### 6.4 LLM is_new 判定不可靠
+新批次 is_new=true 的 cap 大量是已有能力的重新命名。
+**建议**:pipeline 在产出 is_new=true 前先做 DB 语义查重(name 精确匹配 + embedding cos)。
+
+### 6.5 VCAP 机制已退役但 code 还在
+- `decisions.md §18` 决议 VCAP 退役,但 `ingest_research_output.py` 还在造 `CAP-tao_dev_1-NN-NN`
+- **建议**:下线 VCAP 分支,is_new=true 必须 pre-resolve 或降级为错误
+
+---
+
+## 七、下次 session 快速重建上下文的步骤
+
+1. **读本文档**(`knowhub/docs/2026-04-21_rebuild_handoff.md`)
+2. **读历史脚本**(`knowhub/scripts/rebuild_howard_dedup.py` 是主逻辑,内含 alias 构建、canonical 选择、junction 写入的全套实现)
+3. **查询 DB 现状**确认未被外部干扰:
+   ```python
+   python -c "
+   from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
+   s=PostgreSQLCapabilityStore(); cur=s._get_cursor()
+   for t in ['capability','strategy','resource']:
+       cur.execute(f'SELECT version, COUNT(*) AS c FROM {t} GROUP BY version')
+       for r in cur.fetchall(): print(f'{t}/{r[\"version\"]}: {r[\"c\"]}')
+   "
+   ```
+4. **开工**:优先做 **待办 1(补齐 req_cap)** 再等 **待办 2(5 份重跑数据)**
+
+---
+
+## 八、写入 DB 时的通用约束(务必遵守)
+
+1. **ID 规范**(都用短 hash,ID 里不带 version):
+   - capability: `CAP-<hash8(normalized_name)>` 或 `CAP-NNN`(v0 foundation)
+   - strategy: `strategy-<hash8(req_text + "|" + strategy_name)>`
+   - resource: `resource/research/<platform.lower()>/<hash12(url)>`
+
+2. **Alias 构建传递闭包**(避免 CAP-A → CAP-B → CAP-C 这种链式问题未解析到最终 canonical):
+   ```python
+   member_to_canonical = {m: final_canonical for ...}
+   # 遇到循环或多层嵌套时一定要解析到 final
+   ```
+
+3. **AnalyticDB 约束**:
+   - 新加列 + ON CONFLICT DO UPDATE 对 beam 表不兼容 → 都改 `DELETE + INSERT`
+   - `autocommit=True` 强制(否则 idle-in-tx 会长期持锁)
+   - 连接不稳定 → 脚本必须幂等、支持断点续跑
+
+4. **junction 写入全部用 `ON CONFLICT DO NOTHING`**,避免重复 PK 错误
+
+5. **不要修改** `requirement`、`knowledge` 表及其相关 junction

+ 2 - 2
knowhub/docs/remote-agents.md

@@ -115,7 +115,7 @@ KnowHub 服务器托管的远端 Agent,供客户端通过统一的 `agent` 工
 
 ### 知识注入(框架级)
 
-`inject_knowledge_for_goal`(`agent/trace/goal_tool.py`)在 Goal 开始时自动通过 `agent_type="remote_librarian"` 调用 `/api/agent`,把返回的 summary 作为 cognition_log 的 `query` 事件记录(详见 `agent/docs/cognition-log-plan.md`)。
+`inject_knowledge_for_goal`(`agent/trace/goal_tool.py`)在 Goal 开始时自动通过 `agent_type="remote_librarian"` 调用 `/api/agent`,把返回的 summary 作为 cognition_log 的 `query` 事件记录(详见 `agent/docs/cognition-log.md`)。
 
 ---
 
@@ -134,7 +134,7 @@ KnowHub 服务器托管的远端 Agent,供客户端通过统一的 `agent` 工
 
 ## 与 Cognition Log 的关系
 
-每次 `remote_librarian` 调用在 Agent 侧产生一个 `query` 事件,记录查询和整合回答。后续评估以 query 为单位。详见 [cognition-log-plan.md](cognition-log-plan.md)。
+每次 `remote_librarian` 调用在 Agent 侧产生一个 `query` 事件,记录查询和整合回答。后续评估以 query 为单位。详见 [cognition-log.md](cognition-log.md)。
 
 ## 与知识处理流水线的关系
 

+ 227 - 0
knowhub/scripts/backfill_req_cap.py

@@ -0,0 +1,227 @@
+#!/usr/bin/env python3
+"""
+系统性补齐 requirement_capability 到"研究全集"(A 方案)。
+
+对每个 requirement (folder),读其 capabilities_extracted.json 中所有 cap,
+用 alias 解析成 canonical capability_id,
+INSERT requirement_capability ON CONFLICT DO NOTHING。
+
+现有 req_cap 大多只是 strategy 的 workflow_outline 子集,
+此脚本会把所有 research-discovered 的 cap 都补进来。
+
+源数据:
+  - 94 folder:/Users/sunlit/Downloads/output 2/<NN>/
+  - 5 folder(重跑数据):/Users/sunlit/Downloads/5/<NN>/
+
+非标准 schema 处理:
+  - 008/034:capabilities_extracted.json 没有 extracted_capabilities key,
+             用 'capabilities' list
+  - 044:旧 schema,用 capability_id / capability_name
+  - 053:extracted_capabilities=[],跳过(已有 strategy-subset 即是全集)
+  - 077:JSON parse error,用正则 fallback 提取 name + id
+  - 004/031/066/070:虽 strategy 格式异常,但 caps 文件是标准的
+"""
+import hashlib
+import json
+import re
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
+from knowhub.scripts.merge_capabilities import MERGE_CLUSTERS
+from knowhub.scripts.rename_merged_capabilities import RENAMES
+from knowhub.scripts.llm_renames import LLM_RENAMES
+
+OUTPUT_DIR = Path('/Users/sunlit/Downloads/output 2')
+RERUN_DIR = Path('/Users/sunlit/Downloads/5')
+RERUN_FOLDERS = {'032', '046', '069', '085', '097'}
+
+
+def norm(s):
+    return (s or '').strip().lower()
+
+
+def build_alias_map(cur):
+    member_to_canonical = {}
+    for canonical, members in MERGE_CLUSTERS.items():
+        for m in members:
+            member_to_canonical[m] = canonical
+
+    def final(cid, limit=10):
+        seen = set()
+        while cid in member_to_canonical and cid not in seen and limit > 0:
+            seen.add(cid); cid = member_to_canonical[cid]; limit -= 1
+        return cid
+    for m in list(member_to_canonical.keys()):
+        member_to_canonical[m] = final(m)
+
+    alias = {}
+    cur.execute('SELECT id, name FROM capability')
+    for r in cur.fetchall():
+        alias[norm(r['name'])] = r['id']
+    for cid, (new_name, _) in RENAMES.items():
+        alias[norm(new_name)] = final(cid)
+    # LLM-generated renames (aliases for LLM-invented cap names that are actually dups)
+    for llm_name, canonical in LLM_RENAMES.items():
+        alias[norm(llm_name)] = final(canonical)
+    return alias
+
+
+def extract_caps_from_file(folder_path):
+    """Return list of {id, name} cap dicts from capabilities_extracted.json, or []."""
+    fp = folder_path / 'capabilities_extracted.json'
+    if not fp.exists():
+        return [], 'no_file'
+
+    folder_key = folder_path.name
+    text = fp.read_text(encoding='utf-8')
+
+    # Special: 077 JSON parse error — regex fallback
+    try:
+        data = json.loads(text)
+    except Exception:
+        names = re.findall(r'"name"\s*:\s*"([^"]+)"', text)
+        ids = re.findall(r'"id"\s*:\s*(?:"([^"]+)"|null)', text)
+        caps = []
+        for i, n in enumerate(names):
+            caps.append({'id': ids[i] if i < len(ids) else None, 'name': n})
+        return caps, 'parse_err_regex'
+
+    # 008/034: 'capabilities' key
+    if 'extracted_capabilities' not in data and 'capabilities' in data:
+        ec = data['capabilities']
+        return [{'id': c.get('id') or c.get('cap_id'), 'name': c.get('name', '')}
+                for c in ec if isinstance(c, dict)], 'alt_key'
+
+    ec = data.get('extracted_capabilities', [])
+    if not ec:
+        return [], 'empty'
+
+    # 044: old schema capability_id/capability_name
+    first = ec[0] if ec else {}
+    if isinstance(first, dict) and 'capability_name' in first and 'name' not in first:
+        return [{'id': c.get('capability_id'),
+                 'name': c.get('capability_name', '')}
+                for c in ec if isinstance(c, dict)], 'old_schema'
+
+    return [{'id': c.get('id'), 'name': c.get('name', '')}
+            for c in ec if isinstance(c, dict)], 'standard'
+
+
+def get_req_text_and_id(folder_path, cur):
+    """Try blueprint.json → strategy.json → capabilities_extracted.json for requirement text."""
+    for fn in ['blueprint.json', 'strategy.json', 'capabilities_extracted.json']:
+        fp = folder_path / fn
+        if not fp.exists():
+            continue
+        try:
+            d = json.loads(fp.read_text(encoding='utf-8'))
+            rt = d.get('requirement', '')
+            if rt:
+                cur.execute('SELECT id FROM requirement WHERE description = %s LIMIT 1', (rt,))
+                row = cur.fetchone()
+                if row:
+                    return rt, row['id']
+        except Exception:
+            continue
+    return None, None
+
+
+def main():
+    s = PostgreSQLCapabilityStore()
+    cur = s._get_cursor()
+    try:
+        print('Building alias map...', flush=True)
+        alias = build_alias_map(cur)
+        print(f'  alias entries: {len(alias)}', flush=True)
+
+        # resolve source folder per requirement folder
+        folders = []
+        for d in sorted(OUTPUT_DIR.iterdir()):
+            if not d.is_dir():
+                continue
+            key = d.name
+            if key in RERUN_FOLDERS:
+                folders.append(RERUN_DIR / key)  # use re-run data
+            else:
+                folders.append(d)
+
+        totals = {'folders_processed': 0, 'folders_no_req': 0,
+                  'caps_resolved': 0, 'caps_unresolved': 0,
+                  'inserted': 0, 'already_present': 0,
+                  'schema_counts': {}, 'unresolved_names': []}
+
+        for folder in folders:
+            fk = folder.name
+            req_text, req_id = get_req_text_and_id(folder, cur)
+            if not req_id:
+                totals['folders_no_req'] += 1
+                print(f'[{fk}] ⚠️  no matching req', flush=True)
+                continue
+
+            caps, schema = extract_caps_from_file(folder)
+            totals['schema_counts'][schema] = totals['schema_counts'].get(schema, 0) + 1
+
+            # pre-count current req_cap for this req
+            cur.execute('SELECT COUNT(*) c FROM requirement_capability WHERE requirement_id=%s',
+                        (req_id,))
+            before = cur.fetchone()['c']
+
+            resolved_ids = set()
+            unresolved = []
+            for cap in caps:
+                cid = cap.get('id')
+                name = cap.get('name', '')
+                found = None
+                # (1) id exists in DB?
+                if cid:
+                    cur.execute('SELECT 1 FROM capability WHERE id = %s', (cid,))
+                    if cur.fetchone():
+                        found = cid
+                # (2) alias by name?
+                if not found and name:
+                    cand = alias.get(norm(name))
+                    if cand:
+                        cur.execute('SELECT 1 FROM capability WHERE id = %s', (cand,))
+                        if cur.fetchone():
+                            found = cand
+                if found:
+                    resolved_ids.add(found)
+                    totals['caps_resolved'] += 1
+                else:
+                    unresolved.append(f'{cid}/{name[:30]}')
+                    totals['caps_unresolved'] += 1
+
+            inserted = 0
+            for cid in resolved_ids:
+                cur.execute("""INSERT INTO requirement_capability (requirement_id, capability_id)
+                               VALUES (%s, %s) ON CONFLICT DO NOTHING""", (req_id, cid))
+                inserted += cur.rowcount or 0
+
+            cur.execute('SELECT COUNT(*) c FROM requirement_capability WHERE requirement_id=%s',
+                        (req_id,))
+            after = cur.fetchone()['c']
+
+            totals['inserted'] += (after - before)
+            totals['folders_processed'] += 1
+            if unresolved:
+                totals['unresolved_names'].extend(unresolved[:3])
+            print(f'[{fk}] ({schema}) req={req_id} caps_in_file={len(caps)} resolved={len(resolved_ids)} '
+                  f'new_inserted={after-before} (was {before} → now {after}) '
+                  f'unresolved={len(unresolved)}',
+                  flush=True)
+
+        print(f'\n{"="*60}\nTotals:', flush=True)
+        for k, v in totals.items():
+            if isinstance(v, list):
+                print(f'  {k}: {len(v)} (sample: {v[:5]})', flush=True)
+            else:
+                print(f'  {k}: {v}', flush=True)
+    finally:
+        cur.close()
+        s.close()
+
+
+if __name__ == '__main__':
+    main()

+ 148 - 0
knowhub/scripts/dump_unresolved_caps.py

@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+"""
+把 backfill_req_cap.py 没匹配到 canonical 的 cap 全部 dump 出来,
+附带 folder / is_new / description / implements,供人工判断。
+"""
+import hashlib
+import json
+import re
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
+from knowhub.scripts.merge_capabilities import MERGE_CLUSTERS
+from knowhub.scripts.rename_merged_capabilities import RENAMES
+
+OUTPUT_DIR = Path('/Users/sunlit/Downloads/output 2')
+RERUN_DIR = Path('/Users/sunlit/Downloads/5')
+RERUN_FOLDERS = {'032', '046', '069', '085', '097'}
+
+
+def norm(s):
+    return (s or '').strip().lower()
+
+
+def build_alias(cur):
+    m2c = {}
+    for canonical, members in MERGE_CLUSTERS.items():
+        for m in members:
+            m2c[m] = canonical
+
+    def final(cid, limit=10):
+        seen = set()
+        while cid in m2c and cid not in seen and limit > 0:
+            seen.add(cid); cid = m2c[cid]; limit -= 1
+        return cid
+    for m in list(m2c.keys()):
+        m2c[m] = final(m)
+    alias = {}
+    cur.execute('SELECT id, name FROM capability')
+    for r in cur.fetchall():
+        alias[norm(r['name'])] = r['id']
+    for cid, (new_name, _) in RENAMES.items():
+        alias[norm(new_name)] = final(cid)
+    return alias
+
+
+def extract_caps(folder):
+    fp = folder / 'capabilities_extracted.json'
+    if not fp.exists():
+        return []
+    text = fp.read_text(encoding='utf-8')
+    try:
+        data = json.loads(text)
+    except Exception:
+        names = re.findall(r'"name"\s*:\s*"([^"]+)"', text)
+        ids = re.findall(r'"id"\s*:\s*(?:"([^"]+)"|null)', text)
+        # is_new on same cap boundary — harder, skip for regex case
+        return [{'id': ids[i] if i < len(ids) else None,
+                 'name': n, 'is_new': None, 'description': '', 'implements': {}}
+                for i, n in enumerate(names)]
+    ec = data.get('extracted_capabilities', data.get('capabilities', []))
+    out = []
+    for c in ec:
+        if not isinstance(c, dict):
+            continue
+        out.append({
+            'id': c.get('id') or c.get('cap_id') or c.get('capability_id'),
+            'name': c.get('name') or c.get('capability_name', ''),
+            'is_new': c.get('is_new'),
+            'description': c.get('description', '') or c.get('why_needed', '')
+                           or c.get('relevance_reason', ''),
+            'implements': c.get('implements') or c.get('suggested_tools', []),
+        })
+    return out
+
+
+def main():
+    s = PostgreSQLCapabilityStore()
+    cur = s._get_cursor()
+    try:
+        alias = build_alias(cur)
+        # Group unresolved by normalized name (to see duplicates across folders)
+        by_name = defaultdict(lambda: {'folders': [], 'desc': '', 'impl': '', 'is_new_votes': []})
+        folders = []
+        for d in sorted(OUTPUT_DIR.iterdir()):
+            if not d.is_dir(): continue
+            if d.name in RERUN_FOLDERS:
+                folders.append(RERUN_DIR / d.name)
+            else:
+                folders.append(d)
+        for folder in folders:
+            caps = extract_caps(folder)
+            for cap in caps:
+                cid = cap.get('id'); name = cap.get('name', '')
+                if not name: continue
+                resolved = None
+                if cid:
+                    cur.execute('SELECT 1 FROM capability WHERE id=%s', (cid,))
+                    if cur.fetchone(): resolved = cid
+                if not resolved:
+                    cand = alias.get(norm(name))
+                    if cand:
+                        cur.execute('SELECT 1 FROM capability WHERE id=%s', (cand,))
+                        if cur.fetchone(): resolved = cand
+                if resolved: continue
+                key = norm(name)
+                by_name[key]['name'] = name
+                by_name[key]['folders'].append(folder.name)
+                if not by_name[key]['desc'] and cap.get('description'):
+                    by_name[key]['desc'] = cap['description'][:300]
+                if not by_name[key]['impl'] and cap.get('implements'):
+                    by_name[key]['impl'] = str(cap['implements'])[:200]
+                by_name[key]['is_new_votes'].append(cap.get('is_new'))
+
+        # Sort by frequency (most common first)
+        sorted_list = sorted(by_name.items(), key=lambda x: -len(x[1]['folders']))
+        total_occ = sum(len(v['folders']) for v in by_name.values())
+        print(f'UNIQUE UNRESOLVED CAPS: {len(sorted_list)}', flush=True)
+        print(f'TOTAL OCCURRENCES: {total_occ}', flush=True)
+        print()
+
+        # Write to file for easier review
+        out_path = Path('/tmp/unresolved_caps.md')
+        with out_path.open('w') as f:
+            f.write(f'# Unresolved Caps ({len(sorted_list)} unique)\n\n')
+            for key, v in sorted_list:
+                f.write(f'## {v["name"]}\n')
+                f.write(f'- folders ({len(v["folders"])}): {v["folders"]}\n')
+                f.write(f'- is_new votes: {v["is_new_votes"]}\n')
+                if v['desc']:
+                    f.write(f'- desc: {v["desc"]}\n')
+                if v['impl']:
+                    f.write(f'- impl: {v["impl"]}\n')
+                f.write('\n')
+        print(f'Written: {out_path}', flush=True)
+        # print top 30 to terminal
+        for i, (key, v) in enumerate(sorted_list[:30]):
+            n_fold = len(v['folders']); nm = v['name']; ds = v['desc'][:120] if v['desc'] else ''
+            print(f'{i+1:3d}. [{n_fold}x] {nm}', flush=True)
+            if ds: print(f'     desc: {ds}', flush=True)
+    finally:
+        cur.close(); s.close()
+
+
+if __name__ == '__main__':
+    main()

+ 395 - 0
knowhub/scripts/ingest_reruns.py

@@ -0,0 +1,395 @@
+#!/usr/bin/env python3
+"""
+处理 5 个重跑数据 folder:032/046/069/085/097。
+
+策略(对每个 folder):
+  1. 定位 req_id(blueprint.json 解析失败时 fallback strategy.json)
+  2. 清理旧数据:
+     - 删除 folder 标签为 F 的 resource 及其所有 junction
+     - 删除 req 关联的 strategy + 其 junction
+     - 删除 requirement_capability (req, *) 条目(后续重建全集)
+  3. 重新 ingest:
+     - resources from raw_cases/(case_bili.json parse fail → 正则 fallback)
+     - capabilities via alias(不存在则新建 howard_dedup)
+     - strategy(is_selected 或第一条)
+     - junctions:
+       · req_res / strat_res: 按 resource 逐条写
+       · strat_cap: workflow_outline 的 caps(relation_type='compose')
+       · req_strat: 1 条
+       · req_cap: capabilities_extracted.json 的所有 caps(A 方案:研究全集)
+
+所有操作 autocommit=True;脚本幂等、允许断点重跑。
+"""
+import hashlib
+import json
+import re
+import sys
+import time
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
+from knowhub.scripts.merge_capabilities import MERGE_CLUSTERS
+from knowhub.scripts.rename_merged_capabilities import RENAMES
+
+RERUN_DIR = Path('/Users/sunlit/Downloads/5')
+FOLDERS = ['032', '046', '069', '085', '097']
+DEDUP_VERSION = 'howard_dedup'
+
+
+def norm(s):
+    return (s or '').strip().lower()
+
+
+def hash8(text):
+    return hashlib.sha256(text.encode('utf-8')).hexdigest()[:8]
+
+
+def hash12(text):
+    return hashlib.sha256(text.encode('utf-8')).hexdigest()[:12]
+
+
+def gen_cap_id(name):
+    return f'CAP-{hash8(norm(name))}'
+
+
+def gen_resource_id(platform, url):
+    p = (platform or 'unknown').lower().strip()
+    return f'resource/research/{p}/{hash12(url)}'
+
+
+def gen_strategy_id(req_text, strategy_name):
+    return f'strategy-{hash8((req_text or "") + "|" + (strategy_name or ""))}'
+
+
+# ═══════════════════════════════════════════════════════════
+def build_alias_map(cur):
+    """Build norm(name) -> canonical_id alias from current DB + MERGE_CLUSTERS + RENAMES."""
+    # Step A: member→canonical with transitive closure
+    member_to_canonical = {}
+    for canonical, members in MERGE_CLUSTERS.items():
+        for m in members:
+            member_to_canonical[m] = canonical
+
+    def final(cid, limit=10):
+        seen = set()
+        while cid in member_to_canonical and cid not in seen and limit > 0:
+            seen.add(cid)
+            cid = member_to_canonical[cid]
+            limit -= 1
+        return cid
+    for m in list(member_to_canonical.keys()):
+        member_to_canonical[m] = final(m)
+
+    alias = {}
+    # Current DB names
+    cur.execute('SELECT id, name FROM capability')
+    for r in cur.fetchall():
+        alias[norm(r['name'])] = r['id']
+
+    # RENAMES new names
+    for cid, (new_name, _) in RENAMES.items():
+        alias[norm(new_name)] = final(cid)
+
+    return alias
+
+
+# ═══════════════════════════════════════════════════════════
+def load_raw_cases(folder_path):
+    """Return list of case dicts; fallback to regex when json.load fails."""
+    raw_dir = folder_path / 'raw_cases'
+    all_cases = []
+    if not raw_dir.exists():
+        return all_cases
+    for cf in sorted(raw_dir.glob('*.json')):
+        platform = cf.stem.replace('case_', '')
+        try:
+            data = json.loads(cf.read_text(encoding='utf-8'))
+            cases = data.get('cases', []) if isinstance(data, dict) else data
+            if isinstance(cases, list):
+                for c in cases:
+                    if isinstance(c, dict):
+                        c.setdefault('platform', platform)
+                        all_cases.append(c)
+                continue
+        except Exception as e:
+            print(f'    ⚠️  {cf.name} parse fail ({e}); trying regex fallback', flush=True)
+        # regex fallback: anchor by source_url (titles too unreliable)
+        text = cf.read_text(encoding='utf-8')
+        urls = re.findall(r'"source_url"\s*:\s*"([^"]+)"', text)
+        ids = re.findall(r'"id"\s*:\s*"(case_[^"]+)"', text)
+        recovered_n = 0
+        for i, url in enumerate(urls):
+            case = {
+                'id': ids[i] if i < len(ids) else f'{platform}_fallback_{i}',
+                'title': '',  # unreliable without proper JSON parse
+                'platform': platform,
+                'source_url': url,
+            }
+            all_cases.append(case)
+            recovered_n += 1
+        print(f'    ⇒ recovered {recovered_n} {platform} cases via regex (titles skipped)',
+              flush=True)
+    return all_cases
+
+
+# ═══════════════════════════════════════════════════════════
+def cleanup_folder_data(cur, req_id, folder_key, stats):
+    """Remove old resources with folder tag + related junctions + old strategy for this req + req_cap."""
+    # resources tagged with this folder
+    cur.execute("SELECT id FROM resource WHERE metadata::jsonb->>'folder' = %s", (folder_key,))
+    old_res = [r['id'] for r in cur.fetchall()]
+    for rid in old_res:
+        cur.execute('DELETE FROM requirement_resource WHERE resource_id = %s', (rid,))
+        cur.execute('DELETE FROM strategy_resource WHERE resource_id = %s', (rid,))
+        cur.execute('DELETE FROM capability_resource WHERE resource_id = %s', (rid,))
+        cur.execute('DELETE FROM resource WHERE id = %s', (rid,))
+    stats['deleted_resources'] = len(old_res)
+
+    # any remaining req_res junctions for this req (untagged orphans)
+    cur.execute('DELETE FROM requirement_resource WHERE requirement_id = %s', (req_id,))
+
+    # strategies linked to this req
+    cur.execute('SELECT strategy_id FROM requirement_strategy WHERE requirement_id = %s', (req_id,))
+    old_strats = [r['strategy_id'] for r in cur.fetchall()]
+    for sid in old_strats:
+        cur.execute('DELETE FROM requirement_strategy WHERE strategy_id = %s', (sid,))
+        cur.execute('DELETE FROM strategy_capability WHERE strategy_id = %s', (sid,))
+        cur.execute('DELETE FROM strategy_resource WHERE strategy_id = %s', (sid,))
+        cur.execute('DELETE FROM strategy_knowledge WHERE strategy_id = %s', (sid,))
+        cur.execute('DELETE FROM strategy WHERE id = %s', (sid,))
+    stats['deleted_strategies'] = len(old_strats)
+
+    # req_cap for this req (will be rebuilt)
+    cur.execute('DELETE FROM requirement_capability WHERE requirement_id = %s', (req_id,))
+
+
+# ═══════════════════════════════════════════════════════════
+def ingest_folder(folder_path, cur, alias, stats):
+    folder_key = folder_path.name
+
+    # (1) requirement text — try blueprint, fallback to strategy
+    req_text = ''
+    try:
+        bp = json.loads((folder_path / 'blueprint.json').read_text(encoding='utf-8'))
+        req_text = bp.get('requirement', '')
+    except Exception as e:
+        print(f'    ⚠️  blueprint parse fail ({e}); trying strategy.json', flush=True)
+    if not req_text:
+        try:
+            sd = json.loads((folder_path / 'strategy.json').read_text(encoding='utf-8'))
+            req_text = sd.get('requirement', '')
+        except Exception as e:
+            print(f'    ❌ no requirement text available ({e})', flush=True)
+            return
+
+    cur.execute('SELECT id FROM requirement WHERE description = %s LIMIT 1', (req_text,))
+    row = cur.fetchone()
+    if not row:
+        print(f'    ❌ no matching requirement for {folder_key}', flush=True)
+        return
+    req_id = row['id']
+    print(f'    → req_id={req_id}', flush=True)
+
+    # (2) cleanup
+    cleanup_folder_data(cur, req_id, folder_key, stats)
+    del_r, del_s = stats['deleted_resources'], stats['deleted_strategies']
+    print(f'    cleaned: del_res={del_r}, del_strat={del_s}', flush=True)
+
+    # (3) resources
+    cases = load_raw_cases(folder_path)
+    resource_ids = []
+    for case in cases:
+        url = case.get('source_url') or case.get('url')
+        if not url:
+            continue
+        platform = case.get('platform') or 'unknown'
+        rid = gen_resource_id(platform, url)
+        title = (case.get('title') or '')[:200]
+        metrics = case.get('metrics') if isinstance(case.get('metrics'), dict) else {}
+        likes = (metrics.get('likes') or 0) if metrics else 0
+
+        cur.execute('DELETE FROM resource WHERE id = %s', (rid,))
+        cur.execute(
+            """INSERT INTO resource (id, title, body, content_type, images, metadata, sort_order, version)
+               VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""",
+            (rid, title,
+             json.dumps(case, ensure_ascii=False)[:8000],
+             'research_case',
+             json.dumps(case.get('images', []) or [], ensure_ascii=False),
+             json.dumps({'platform': platform, 'source_url': url,
+                         'metrics': metrics, 'folder': folder_key},
+                        ensure_ascii=False),
+             -int(likes), DEDUP_VERSION))
+        resource_ids.append(rid)
+    stats['resources'] = len(resource_ids)
+
+    # (4) capabilities (from capabilities_extracted.json) — track ALL of them for req_cap superset
+    caps_path = folder_path / 'capabilities_extracted.json'
+    all_cap_ids_research = set()  # for req_cap (A-plan research superset)
+    cap_key_to_id = {}  # source_key -> resolved_id (for strat_cap resolution)
+    if caps_path.exists():
+        try:
+            caps_data = json.loads(caps_path.read_text(encoding='utf-8'))
+        except Exception as e:
+            print(f'    ⚠️  capabilities_extracted parse fail: {e}', flush=True)
+            caps_data = {'extracted_capabilities': []}
+        for cap in caps_data.get('extracted_capabilities', []):
+            name = (cap.get('name') or '').strip()
+            if not name:
+                continue
+            src_id = cap.get('id')
+            resolved = None
+            # (a) source id exists?
+            if src_id:
+                cur.execute('SELECT 1 FROM capability WHERE id = %s', (src_id,))
+                if cur.fetchone():
+                    resolved = src_id
+            # (b) alias by name
+            if not resolved:
+                cand = alias.get(norm(name))
+                if cand:
+                    cur.execute('SELECT 1 FROM capability WHERE id = %s', (cand,))
+                    if cur.fetchone():
+                        resolved = cand
+            # (c) create new
+            if not resolved:
+                new_id = gen_cap_id(name)
+                cur.execute('SELECT 1 FROM capability WHERE id = %s', (new_id,))
+                if not cur.fetchone():
+                    cur.execute(
+                        """INSERT INTO capability (id, name, criterion, description, effects, version)
+                           VALUES (%s, %s, %s, %s, %s, %s)""",
+                        (new_id, name, cap.get('criterion', '') or '',
+                         cap.get('description', '') or '',
+                         json.dumps(cap.get('effects', []) or [], ensure_ascii=False, default=str),
+                         DEDUP_VERSION))
+                    alias[norm(name)] = new_id
+                    stats['cap_new'] += 1
+                resolved = new_id
+            else:
+                # backfill criterion/effects if missing
+                cur.execute('SELECT criterion, effects FROM capability WHERE id = %s', (resolved,))
+                ex = cur.fetchone()
+                if ex:
+                    if (not (ex.get('criterion') or '').strip()) and cap.get('criterion'):
+                        cur.execute('UPDATE capability SET criterion = %s WHERE id = %s',
+                                    (cap['criterion'], resolved))
+                    cur_eff = ex.get('effects')
+                    if (not cur_eff or cur_eff in ([], '[]')) and cap.get('effects'):
+                        cur.execute('UPDATE capability SET effects = %s WHERE id = %s',
+                                    (json.dumps(cap['effects'], ensure_ascii=False, default=str), resolved))
+                stats['cap_linked'] += 1
+            all_cap_ids_research.add(resolved)
+            cap_key_to_id[src_id or name] = resolved
+
+    # (5) strategy
+    strat_path = folder_path / 'strategy.json'
+    strat_id = None
+    strat_cap_ids = set()
+    if strat_path.exists():
+        try:
+            strat_data = json.loads(strat_path.read_text(encoding='utf-8'))
+        except Exception as e:
+            print(f'    ⚠️  strategy parse fail: {e}', flush=True)
+            strat_data = {'strategies': []}
+        selected = next((s for s in strat_data.get('strategies', []) if s.get('is_selected')), None)
+        if not selected and strat_data.get('strategies'):
+            selected = strat_data['strategies'][0]
+        if selected:
+            strategy_name = selected.get('name') or f'Strategy-{folder_key}'
+            strat_id = gen_strategy_id(req_text, strategy_name)
+            now = int(time.time())
+            cur.execute('DELETE FROM strategy WHERE id = %s', (strat_id,))
+            cur.execute(
+                """INSERT INTO strategy (id, name, description, body, status, created_at, updated_at, version)
+                   VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""",
+                (strat_id, strategy_name, (selected.get('reasoning') or '')[:2000],
+                 json.dumps(selected, ensure_ascii=False, indent=2),
+                 'draft', now, now, DEDUP_VERSION))
+            stats['strategies'] = 1
+            # workflow_outline cap resolution
+            wo = selected.get('workflow_outline') or []
+            if isinstance(wo, list):
+                for phase in wo:
+                    if not isinstance(phase, dict):
+                        continue
+                    caps = phase.get('capabilities') or []
+                    if not isinstance(caps, list):
+                        continue
+                    for cref in caps:
+                        if not isinstance(cref, dict):
+                            continue
+                        key = cref.get('id') or cref.get('name', '')
+                        resolved = cap_key_to_id.get(key) or alias.get(norm(cref.get('name', '')))
+                        if resolved:
+                            strat_cap_ids.add(resolved)
+
+    # (6) wire junctions
+    for rid in resource_ids:
+        cur.execute("""INSERT INTO requirement_resource (requirement_id, resource_id)
+                       VALUES (%s, %s) ON CONFLICT DO NOTHING""", (req_id, rid))
+        if strat_id:
+            cur.execute("""INSERT INTO strategy_resource (strategy_id, resource_id)
+                           VALUES (%s, %s) ON CONFLICT DO NOTHING""", (strat_id, rid))
+    if strat_id:
+        cur.execute("""INSERT INTO requirement_strategy (requirement_id, strategy_id)
+                       VALUES (%s, %s) ON CONFLICT DO NOTHING""", (req_id, strat_id))
+        for cid in strat_cap_ids:
+            cur.execute("""INSERT INTO strategy_capability (strategy_id, capability_id, relation_type)
+                           VALUES (%s, %s, 'compose') ON CONFLICT DO NOTHING""", (strat_id, cid))
+
+    # req_cap: research superset (A plan)
+    for cid in all_cap_ids_research:
+        cur.execute("""INSERT INTO requirement_capability (requirement_id, capability_id)
+                       VALUES (%s, %s) ON CONFLICT DO NOTHING""", (req_id, cid))
+    # also include strat-only caps (in case some are in workflow_outline but not in extracted list)
+    for cid in strat_cap_ids:
+        cur.execute("""INSERT INTO requirement_capability (requirement_id, capability_id)
+                       VALUES (%s, %s) ON CONFLICT DO NOTHING""", (req_id, cid))
+
+    stats['req_cap_wired'] = len(all_cap_ids_research | strat_cap_ids)
+    stats['strat_cap_wired'] = len(strat_cap_ids)
+    rc_n, sc_n = stats['req_cap_wired'], stats['strat_cap_wired']
+    s_n = 1 if strat_id else 0
+    print(f'    ingested: res={len(resource_ids)}, strat={s_n}, req_cap={rc_n}, strat_cap={sc_n}', flush=True)
+
+
+# ═══════════════════════════════════════════════════════════
+def main():
+    s = PostgreSQLCapabilityStore()
+    cur = s._get_cursor()
+    try:
+        print('Building alias map...', flush=True)
+        alias = build_alias_map(cur)
+        print(f'  alias entries: {len(alias)}', flush=True)
+
+        totals = {'deleted_resources': 0, 'deleted_strategies': 0,
+                  'resources': 0, 'cap_new': 0, 'cap_linked': 0,
+                  'strategies': 0, 'req_cap_wired': 0, 'strat_cap_wired': 0}
+
+        for f in FOLDERS:
+            print(f'\n=== {f} ===', flush=True)
+            stats = {'deleted_resources': 0, 'deleted_strategies': 0,
+                     'resources': 0, 'cap_new': 0, 'cap_linked': 0,
+                     'strategies': 0, 'req_cap_wired': 0, 'strat_cap_wired': 0}
+            try:
+                ingest_folder(RERUN_DIR / f, cur, alias, stats)
+                for k in totals:
+                    totals[k] += stats.get(k, 0)
+            except Exception as e:
+                print(f'    ❌ {type(e).__name__}: {e}', flush=True)
+                try:
+                    cur.close()
+                except Exception:
+                    pass
+                cur = s._get_cursor()
+
+        print(f'\n{"="*50}\nTOTALS: {totals}', flush=True)
+    finally:
+        cur.close()
+        s.close()
+
+
+if __name__ == '__main__':
+    main()

+ 318 - 0
knowhub/scripts/llm_renames.py

@@ -0,0 +1,318 @@
+"""
+LLM-generated renames: 224 条 LLM 在各 folder 的 capabilities_extracted 阶段即兴命名的
+"新" cap,经人工审核全部判定为已有 canonical 的重新表述。
+
+格式:LLM_name -> canonical_id
+
+被 backfill_req_cap.py 加载,作为 alias map 的额外一层。
+数据来源:2026-04-22 人工审核 /tmp/unresolved_caps.md(见 knowhub/docs 的 session 记录)
+"""
+
+LLM_RENAMES = {
+    # folder 001
+    '音频驱动口型与表情同步': 'CAP-98490894',
+    '多格表情矩阵布局生成': 'CAP-306c15fe',
+    # folder 002
+    '虚拟试衣与服装道具融合': 'CAP-d92ffc99',
+    # folder 003
+    '拟人化角色形象构建': 'CAP-e962c3ef',
+    '情感叙事故事脚本自动生成': 'CAP-da51c2ec',
+    '电影级写实提示词工程': 'CAP-aaaef688',
+    # folder 004
+    '拟人化非人类主体生成': 'CAP-e962c3ef',
+    '角色服装与配饰迁移': 'CAP-d92ffc99',
+    '角色概念表多视角生成': 'CAP-5342ad19',
+    # folder 007
+    '超写实人像去AI感(皮肤材质化与光线结构化)': 'CAP-3b0de1ce',
+    # folder 009
+    '结构化Prompt驱动的多格网格图生成': 'CAP-306c15fe',
+    '照片主体智能抠图': 'CAP-12d2aa10',
+    '多图色调一致性统一': 'CAP-76d7f3af',
+    # folder 010
+    '多格宫格构图约束生成': 'CAP-306c15fe',
+    '卡片式信息模块化排版': 'CAP-6e77db54',
+    '宫格图像切割与拼接导出': 'CAP-fddd3349',
+    # folder 012
+    '数据可视化图表 AI 生成': 'CAP-8d6ec160',
+    '多视觉元素版面自动布局合成': 'CAP-562d91c1',
+    # folder 013
+    '暖色调全局色调锁定生成': 'CAP-fc3c58a4',
+    '配色方案智能推荐与应用': 'CAP-689bac61',
+    # folder 014
+    '霓虹光效与发光元素精准生成': 'CAP-a35e7966',
+    '粒子流与流光动态背景生成': 'CAP-8467736a',
+    '科技感场景元素组合生成': 'CAP-d1f429ff',
+    # folder 015
+    '低饱和度冷色调氛围精准控制': 'CAP-298dcb55',
+    # folder 016
+    '手部结构专项修复与增强': 'CAP-0ba3159e',
+    # folder 017
+    '角色多视图主体库构建与复用': 'CAP-5342ad19',
+    # folder 018
+    '身体局部特写精准构图生成': 'CAP-26100ea8',
+    # folder 020
+    '数据源驱动批量内容填充': 'CAP-a08749c3',
+    # folder 021
+    '霓虹光效与流光线条背景生成': 'CAP-a35e7966',
+    '海报多视觉元素自动排版合成': 'CAP-562d91c1',
+    'AI 生成图像内文字编辑与风格保持': 'CAP-021',
+    '结构化 Prompt 工程驱动 AI 生图风格控制': 'CAP-5b000814',
+    'AI 辅助海报构图与信息层级设计': 'CAP-ac9c30ba',
+    # folder 022
+    '动物拟人化角色扮演场景生成': 'CAP-e962c3ef',
+    '宠物服装虚拟试穿上身生成': 'CAP-7c8532dc',
+    # folder 023
+    'AI虚拟换装(服装迁移)': 'CAP-b4092cfe',
+    '首尾帧约束视频生成': 'CAP-7b9d2baf',
+    # folder 024
+    '双重曝光人像景观融合': 'CAP-19e5402a',
+    '超现实合成光影透视一致性校正': 'CAP-d93a0ac2',
+    # folder 025
+    '全身到局部细节的多景别构图生成': 'CAP-e80e4194',
+    # folder 027
+    '服装平铺图到模特上身转换': 'CAP-b4092cfe',
+    '网格拼贴布局生成': 'CAP-306c15fe',
+    '角色一致性故事创作': 'CAP-003',
+    # folder 028
+    'AI智能抠图与前景分离': 'CAP-12d2aa10',
+    '多格拼贴版式自动排版': 'CAP-41ac8100',
+    '表情情绪驱动的角色替换生成': 'CAP-5a1ac59d',
+    # folder 029
+    'AI 智能主体抠图': 'CAP-12d2aa10',
+    # folder 030
+    '单画布多格布局强制生成': 'CAP-306c15fe',
+    'LLM驱动的图像提示词自动生成': 'CAP-4d8ba002',
+    '多格漫画叙事结构生成': 'CAP-2671cd39',
+    'AI智能主体抠图': 'CAP-12d2aa10',
+    # folder 031
+    '真人照片 AI 全自动卡通化': 'CAP-8d69865f',
+    '角色概念设计表多视图生成': 'CAP-5342ad19',
+    # folder 034
+    '自然光线提示词工程': 'CAP-e8a77f70',
+    '真实世界背景融合': 'CAP-ffb20b0d',
+    'AI 塑料感消除': 'CAP-3b0de1ce',
+    '肢体末端细节生成': 'CAP-0ba3159e',
+    '皮肤质感描述增强': 'CAP-3b0de1ce',
+    '戏剧性明暗对比控制': 'CAP-1649b549',
+    # folder 035
+    'AI 生成内容与真实素材 VFX 合成': 'CAP-3b8df701',
+    '角色唇形同步': 'CAP-98490894',
+    # folder 036
+    '专业摄影参数模拟': 'CAP-ef0a4c0c',
+    '工作室级布光效果生成': 'CAP-c2c42fc7',
+    '材质质感超写实渲染': 'CAP-0dc2a15b',
+    '产品爆炸图/分解图生成': 'CAP-c9426dcc',
+    '色彩配色方案生成': 'CAP-689bac61',
+    # folder 037
+    '多角度角色表一键生成': 'CAP-5342ad19',
+    '单图多视角转换': 'CAP-ee7df476',
+    '多帧拼贴布局生成': 'CAP-41ac8100',
+    # folder 038
+    '电影级虚拟相机运动控制': 'CAP-49175b92',
+    '环境粒子特效生成': 'CAP-8467736a',
+    '专业摄影设备参数模拟': 'CAP-ef0a4c0c',
+    # folder 039
+    '胶片质感与颗粒模拟': 'CAP-6c14041c',
+    '自然光照与氛围渲染': 'CAP-e8a77f70',
+    # folder 040
+    '多情绪表情集批量生成': 'CAP-5a1ac59d',
+    'AI 辅助图文卡片排版合成': 'CAP-87ba3b7d',
+    # folder 041
+    '多图拼贴布局排版': 'CAP-41ac8100',
+    'AI智能抠图/背景移除': 'CAP-12d2aa10',
+    '叙事顺序规划': 'CAP-d1764148',
+    '色彩弹出/局部去色效果': 'CAP-3178172e',
+    # folder 042
+    '信息层级可视化生成': 'CAP-f459d6a8',
+    # folder 043
+    'AI原生文字渲染与图像融合': 'CAP-014',
+    'AI驱动的多层次文字排版布局优化': 'CAP-94c648d6',
+    'AI辅助的图像文字精准修复与替换': 'CAP-021',
+    # folder 044
+    '多情绪表情批量生成': 'CAP-5a1ac59d',
+    # folder 045
+    '多宫格网格布局自动排版': 'CAP-306c15fe',
+    '长文自动分页为图片序列': 'CAP-b5822b4a',
+    '宫格大图自动切割为独立子图': 'CAP-fddd3349',
+    'AI 脚本与分镜文案自动生成': 'CAP-da51c2ec',
+    # folder 047
+    '自然语言驱动多类型数据图表自动生成': 'CAP-34f85267',
+    # folder 049
+    'AI 提示词驱动网格分镜图一次性生成': 'CAP-306c15fe',
+    '网格大图自动切割拆分为独立子图': 'CAP-fddd3349',
+    '多格子叙事一致性分镜布局生成': 'CAP-e9b763d2',
+    '社交媒体多帖无缝横幅拼接对齐': 'CAP-8c805e0e',
+    # folder 050
+    'AI 版面自动排布与信息图生成': 'CAP-562d91c1',
+    '多图版面智能排布规则生成': 'CAP-41ac8100',
+    '数字剪贴簿与轮播图自动化生成': 'CAP-20409fa6',
+    # folder 051
+    '多格叙事分镜版式自动编排': 'CAP-2671cd39',
+    '角色设定卡(Character Sheet)生成': 'CAP-5342ad19',
+    '漫画对话气泡与文字叙事排版生成': 'CAP-fc2bd5cf',
+    '跨格场景连贯性约束生成': 'CAP-e9b763d2',
+    # folder 052
+    '多格网格拼贴布局生成': 'CAP-306c15fe',
+    '跨格视觉节奏与色调统一': 'CAP-76d7f3af',
+    '无缝轮播图像切割与拼接': 'CAP-8c805e0e',
+    '几何形状蒙版图像嵌入': 'CAP-47d6893f',
+    # folder 054
+    '多格子网格布局自动生成': 'CAP-306c15fe',
+    '图文卡片单元自动填充与适配': 'CAP-6e77db54',
+    '多格画面配色主题一致性控制': 'CAP-76d7f3af',
+    '图像智能切割与多格分发': 'CAP-fddd3349',
+    # folder 055
+    '极端特写构图与画面填充控制': 'CAP-26100ea8',
+    '透视感与广角镜头畸变控制': 'CAP-0e3d61ca',
+    '选择性着色(局部保色)': 'CAP-3178172e',
+    '图像反推提示词(图生文)': 'CAP-4d8ba002',
+    '人物微表情与情绪细节控制': 'CAP-a3b3ab31',
+    '皮肤质感与次表面散射细节增强': 'CAP-3b0de1ce',
+    # folder 056
+    '浅景深背景虚化(Bokeh)效果生成': 'CAP-792fd807',
+    '人像皮肤真实感与微纹理生成': 'CAP-3b0de1ce',
+    '人像光影风格精准控制': 'CAP-ed4b506e',
+    '胶片质感与色调风格渲染': 'CAP-6c14041c',
+    '人像构图与景别精准控制': 'CAP-4f15a85f',
+    # folder 057
+    '微距特写构图与画幅填充控制': 'CAP-26100ea8',
+    # folder 058
+    '透视纵深感精确控制生成': 'CAP-3c49ff0a',
+    '室内物理光照精确模拟': 'CAP-5fb6dd66',
+    '建筑空间框景与层次构图生成': 'CAP-8531d74f',
+    '地板线条透视引导生成': 'CAP-3c49ff0a',
+    '360度全景室内图生成': 'CAP-1b3e966f',
+    '体积光与大气光束效果生成': 'CAP-3086677b',
+    '相机参数精确锁定与模拟': 'CAP-ef0a4c0c',
+    # folder 059
+    '鱼眼镜头球形畸变效果生成': 'CAP-0e3d61ca',
+    '极端仰拍/俯拍透视变形控制': 'CAP-0c30af82',
+    '广角焦段透视拉伸模拟': 'CAP-4c471e0d',
+    '荷兰角/倾斜构图视觉冲击生成': 'CAP-0c30af82',
+    # folder 060
+    '超现实矛盾空间与不可能几何生成': 'CAP-1f898cd9',
+    # folder 061
+    '双色调冷暖对撞构图控制': 'CAP-a185d6d2',
+    '高饱和度色彩强度精准控制': 'CAP-2de278d6',
+    '波普艺术网点与几何纹理叠加': 'CAP-9359b49f',
+    '双重曝光与剪影融合生成': 'CAP-19e5402a',
+    # folder 062
+    '暗调长调配色(Low-Key with High Contrast)': 'CAP-1649b549',
+    '明暗对照法(Chiaroscuro/Tenebrism)': 'CAP-1649b549',
+    '局部发光效果(Glow Effect)': 'CAP-a35e7966',
+    '暖光点睛(Warm Light Accent)': 'CAP-a185d6d2',
+    # folder 063
+    '单一主色调全画面主导生成': 'CAP-fc3c58a4',
+    '参考图色彩提取与 HEX 色板生成': 'CAP-689bac61',
+    'LUT 风格提示词调色模板': 'CAP-79590b09',
+    '胶片光学效果模拟(柔焦/散景/颗粒/暗角)': 'CAP-6c14041c',
+    # folder 064
+    '冷暖色调对比设计': 'CAP-a185d6d2',
+    '分段式氛围提示词结构': 'CAP-5b000814',
+    '光晕与泛光效果生成': 'CAP-a35e7966',
+    '渐变映射全局调色': 'CAP-59d2256e',
+    '多层景深空间感营造': 'CAP-3c49ff0a',
+    'AI文生视频氛围场景生成': 'CAP-009',
+    # folder 065
+    '图像参考反向提示词提取': 'CAP-4d8ba002',
+    '撞色配色方案智能推荐': 'CAP-6485105e',
+    # folder 066
+    '暖色调室内风格精准提示词控制': 'CAP-9b71604d',
+    '室内空间氛围光层次生成': 'CAP-5fb6dd66',
+    '室内自然材质与有机形态生成': 'CAP-56368e3a',
+    # folder 067
+    '多格分镜叙事布局生成': 'CAP-7423a8b2',
+    # folder 068
+    '毛绒质感超写实渲染': 'CAP-96182b8f',
+    '角色情绪多态表情生成': 'CAP-5a1ac59d',
+    'AI动画唇同步驱动': 'CAP-98490894',
+    'AI动画脚本与逐场景提示词生成': 'CAP-da51c2ec',
+    # folder 070
+    '多格拼贴画布自动排版合成': 'CAP-41ac8100',
+    '情绪驱动角色表情变体批量生成': 'CAP-5a1ac59d',
+    '漫画叙事分镜脚本自动生成': 'CAP-da51c2ec',
+    # folder 071
+    'AI 智能背景去除与主体抠图': 'CAP-12d2aa10',
+    # folder 072
+    'AI智能抠图与主体分离': 'CAP-12d2aa10',
+    # folder 074
+    'AI智能抠图与背景去除': 'CAP-12d2aa10',
+    # folder 075
+    '戏剧性明暗对比(Chiaroscuro/Low-Key)控制': 'CAP-1649b549',
+    '镜头光学效果生成(光晕/光斑/散景)': 'CAP-6c14041c',
+    '水面/镜面反射光影生成': 'CAP-3b51102e',
+    '3D模型截图转摄影级效果图(光影增强图生图)': 'CAP-019',
+    '经典布光模式精准复现(伦勃朗/蝴蝶光/侧逆光)': 'CAP-ed4b506e',
+    '黄金时刻/蓝调时刻自然光氛围生成': 'CAP-e8a77f70',
+    # folder 076
+    '半色调与孔版印刷纹理模拟': 'CAP-9359b49f',
+    # folder 077 (materials all alias to 2D material cap)
+    'AI PBR材质贴图自动生成': 'CAP-0dc2a15b',
+    '无缝材质纹理平铺生成': 'CAP-0dc2a15b',
+    '材质质感关键词精准控制生成': 'CAP-0dc2a15b',
+    '参考图驱动材质替换': 'CAP-92b0fd72',
+    '真实几何置换表面细节生成': 'CAP-0dc2a15b',
+    '织物/绒毛/布艺质感精准生成': 'CAP-96182b8f',
+    '石材/大理石光滑反射质感生成': 'CAP-0dc2a15b',
+    '木纹/木材质感精准生成': 'CAP-0dc2a15b',
+    '编织/藤编纹理精准生成': 'CAP-56368e3a',
+    '3D白模AI自动贴材质': 'CAP-0dc2a15b',
+    # folder 078
+    '文字描边样式渲染': 'CAP-16c5174b',
+    '文字阴影与投影效果渲染': 'CAP-16c5174b',
+    '字幕与文字动画自动生成': 'CAP-00c474e2',
+    'LLM故事文案生成': 'CAP-da51c2ec',
+    # folder 079
+    '长文本智能分页拆分为多图': 'CAP-b5822b4a',
+    # folder 080
+    '多图网格拼贴自动排版': 'CAP-41ac8100',
+    '信息层级分区布局生成': 'CAP-f459d6a8',
+    # folder 081
+    '活动视觉品牌统一化输出': 'CAP-832e80ac',
+    # folder 082
+    '参数化方位角/距离多视角生成': 'CAP-ee7df476',
+    '画面景深空间分层构图生成': 'CAP-3c49ff0a',
+    # folder 083
+    'LoRA 多角度坐标系精确控制': 'CAP-ee7df476',
+    '图像视角重构与转换': 'CAP-ec45f9c7',
+    # folder 084
+    '空间透视与消失点纵深控制': 'CAP-3c49ff0a',
+    # folder 086
+    '高饱和多色并置色彩方案生成': 'CAP-2de278d6',
+    '互补色对比张力精准控制': 'CAP-2de278d6',
+    '复古印刷质感与叠印效果生成': 'CAP-9359b49f',
+    # folder 088
+    '电影级镜头运动控制': 'CAP-49175b92',
+    'AI 情感配乐生成': 'CAP-5f9644fb',
+    'AI 辅助超现实场景提示词创作': 'CAP-4d8ba002',
+    # folder 089
+    '霓虹发光效果生成': 'CAP-a35e7966',
+    '电影级色彩分级与氛围色调控制': 'CAP-79590b09',
+    '3D透视文字与场景表面融合': 'CAP-bd4828fc',
+    '海报构图与版式智能设计': 'CAP-ac9c30ba',
+    # folder 090
+    '室内多材质纹理精准生成': 'CAP-56368e3a',
+    # folder 091
+    '角色三视图生成': 'CAP-5342ad19',
+    '口型同步与配音驱动动画': 'CAP-98490894',
+    '角色换装(保持外观不变)': 'CAP-b4092cfe',
+    # folder 092
+    '多板块图文混排版式自动生成': 'CAP-562d91c1',
+    '主题色彩系统约束生成': 'CAP-2bd87e28',
+    # folder 094
+    '戏剧性光影对比生成(侧光/逆光/硬光方向控制)': 'CAP-1649b549',
+    '丁达尔体积光效果生成(Crepuscular Rays)': 'CAP-3086677b',
+    '摄影级镜头参数模拟(焦距/光圈/景深/色温)': 'CAP-ef0a4c0c',
+    # folder 095
+    '多光源分层照明提示词工程': 'CAP-5fb6dd66',
+    '电影级光照提示词工程': 'CAP-aaaef688',
+    # folder 096
+    '体积光/上帝光电影级光束渲染': 'CAP-3086677b',
+    '多彩粒子光效场景生成': 'CAP-8467736a',
+    # folder 098
+    '文字透视变形与空间感叠加': 'CAP-bd4828fc',
+    # folder 099
+    'AI一键多页轮播/幻灯片内容生成': 'CAP-20409fa6',
+    '多宫格/Bento网格布局自动排版': 'CAP-306c15fe',
+    '视觉层次排版控制(大标题突出核心信息)': 'CAP-94c648d6',
+}