Просмотр исходного кода

Merge branch 'main' into dev_tao

guantao 1 неделя назад
Родитель
Сommit
2f5f9a6bac
87 измененных файлов с 4710 добавлено и 468 удалено
  1. 1 5
      .env.template
  2. 18 6
      agent/core/presets.py
  3. 226 31
      agent/core/runner.py
  4. 458 3
      agent/llm/openrouter.py
  5. 7 1
      agent/llm/yescode.py
  6. 35 0
      agent/memory/skills/browser.md
  7. 65 0
      agent/memory/skills/planning.md
  8. 10 0
      agent/memory/skills/research.md
  9. 2 0
      agent/tools/builtin/__init__.py
  10. 101 9
      agent/tools/builtin/file/read.py
  11. 572 0
      agent/tools/builtin/nanobanana.py
  12. 61 4
      agent/tools/builtin/subagent.py
  13. 0 4
      agent/trace/__init__.py
  14. 43 2
      agent/trace/compaction.py
  15. 8 27
      agent/trace/goal_tool.py
  16. 7 5
      agent/trace/models.py
  17. 28 15
      agent/trace/run_api.py
  18. 38 9
      docs/README.md
  19. 98 0
      docs/ref/create.md
  20. 357 0
      docs/ref/deconstruct_old.md
  21. 5 5
      docs/trace-api.md
  22. 12 0
      examples/how/README.md
  23. 46 0
      examples/how/analyze_images.py
  24. 12 0
      examples/how/encode_images.py
  25. 0 0
      examples/how/features/images_b64.json
  26. 0 0
      examples/how/features/img1_b64.txt
  27. 0 0
      examples/how/features/img2_b64.txt
  28. 0 0
      examples/how/features/img3_b64.txt
  29. 0 0
      examples/how/features/img4_b64.txt
  30. 0 0
      examples/how/features/img5_b64.txt
  31. 0 0
      examples/how/features/img6_b64.txt
  32. 0 0
      examples/how/features/img7_b64.txt
  33. 0 0
      examples/how/features/img8_b64.txt
  34. 0 0
      examples/how/features/img9_b64.txt
  35. BIN
      examples/how/features/thumb_1.jpg
  36. BIN
      examples/how/features/thumb_2.jpg
  37. BIN
      examples/how/features/thumb_3.jpg
  38. BIN
      examples/how/features/thumb_4.jpg
  39. BIN
      examples/how/features/thumb_5.jpg
  40. BIN
      examples/how/features/thumb_6.jpg
  41. BIN
      examples/how/features/thumb_7.jpg
  42. BIN
      examples/how/features/thumb_8.jpg
  43. BIN
      examples/how/features/thumb_9.jpg
  44. BIN
      examples/how/input/1.jpeg
  45. BIN
      examples/how/input/3.jpeg
  46. BIN
      examples/how/input/7.jpeg
  47. 9 0
      examples/how/input/《秋日际遇》写生油画.json
  48. 9 0
      examples/how/load_imgs.py
  49. 14 0
      examples/how/presets.json
  50. 48 0
      examples/how/production.prompt
  51. 26 0
      examples/how/resource/input_cloud_archive/《秋日际遇》写生油画.json
  52. BIN
      examples/how/resource/input_local_archive/1.jpeg
  53. BIN
      examples/how/resource/input_local_archive/2.jpeg
  54. BIN
      examples/how/resource/input_local_archive/3.jpeg
  55. BIN
      examples/how/resource/input_local_archive/4.jpeg
  56. BIN
      examples/how/resource/input_local_archive/5.jpeg
  57. BIN
      examples/how/resource/input_local_archive/6.jpeg
  58. BIN
      examples/how/resource/input_local_archive/7.jpeg
  59. BIN
      examples/how/resource/input_local_archive/8.jpeg
  60. BIN
      examples/how/resource/input_local_archive/9.jpeg
  61. 26 0
      examples/how/resource/input_local_archive/《秋日际遇》写生油画.json
  62. 529 0
      examples/how/run.py
  63. 8 0
      examples/how/save_b64.py
  64. 48 0
      examples/how/skills/construct.md
  65. 120 0
      examples/how/skills/deconstruct.md
  66. 128 0
      examples/test_cache/run.py
  67. 138 0
      examples/test_cache/run_multi.py
  68. 259 0
      examples/test_cache/run_same_trace.py
  69. 244 0
      examples/test_cache/run_strict.py
  70. 39 1
      frontend/react-template/package-lock.json
  71. 7 6
      frontend/react-template/package.json
  72. 25 15
      frontend/react-template/src/App.tsx
  73. 20 4
      frontend/react-template/src/api/traceApi.ts
  74. 67 76
      frontend/react-template/src/components/DetailPanel/DetailPanel.module.css
  75. 17 8
      frontend/react-template/src/components/DetailPanel/DetailPanel.tsx
  76. 120 29
      frontend/react-template/src/components/FlowChart/FlowChart.tsx
  77. 77 31
      frontend/react-template/src/components/FlowChart/hooks/useFlowChartData.ts
  78. 35 18
      frontend/react-template/src/components/FlowChart/styles/FlowChart.module.css
  79. 32 0
      frontend/react-template/src/components/FlowChart/utils/retryLogic.ts
  80. 45 44
      frontend/react-template/src/components/MainContent/MainContent.module.css
  81. 62 17
      frontend/react-template/src/components/MainContent/MainContent.tsx
  82. 102 47
      frontend/react-template/src/components/TopBar/TopBar.module.css
  83. 143 30
      frontend/react-template/src/components/TopBar/TopBar.tsx
  84. 13 10
      frontend/react-template/src/hooks/useTrace.ts
  85. 28 0
      frontend/react-template/src/styles/global.css
  86. 60 6
      frontend/react-template/src/styles/variables.css
  87. 2 0
      frontend/react-template/src/types/message.ts

+ 1 - 5
.env.template

@@ -1,12 +1,8 @@
 # 完成配置后,将 .env.template 重命名为 .env
 
 
-# OpenRouter API Key
+# OpenRouter API Key,用于sonnet-4.6模型
 OPEN_ROUTER_API_KEY=
 
 # BrowserUse API Key
 BROWSER_USE_API_KEY=
-
-# Yescode 代理
-YESCODE_BASE_URL=
-YESCODE_API_KEY=

+ 18 - 6
agent/core/presets.py

@@ -21,29 +21,41 @@ class AgentPreset:
     max_iterations: int = 30
     temperature: Optional[float] = None
 
+    # Skills(注入 system prompt 的 skill 名称列表;None = 加载全部)
+    skills: Optional[List[str]] = None
+
     # 描述
     description: Optional[str] = None
 
 
 # 内置预设
+_DEFAULT_SKILLS = ["planning", "research", "browser"]
+
 AGENT_PRESETS = {
     "default": AgentPreset(
         allowed_tools=None,
         max_iterations=30,
+        skills=_DEFAULT_SKILLS,
         description="默认 Agent,拥有全部工具权限",
     ),
+    "delegate": AgentPreset(
+        allowed_tools=None,
+        max_iterations=30,
+        skills=_DEFAULT_SKILLS,
+        description="委托子 Agent,拥有全部工具权限(由 agent 工具创建)",
+    ),
     "explore": AgentPreset(
         allowed_tools=["read", "glob", "grep", "list_files"],
         denied_tools=["write", "edit", "bash", "task"],
         max_iterations=15,
+        skills=["planning"],
         description="探索型 Agent,只读权限,用于代码分析",
     ),
-    "analyst": AgentPreset(
-        allowed_tools=["read", "glob", "grep", "web_search", "webfetch"],
-        denied_tools=["write", "edit", "bash", "task"],
-        temperature=0.3,
-        max_iterations=25,
-        description="分析型 Agent,用于深度分析和研究",
+    "evaluate": AgentPreset(
+        allowed_tools=["read_file", "grep_content", "glob_files", "goal"],
+        max_iterations=10,
+        skills=["planning"],
+        description="评估型 Agent,只读权限,用于结果评估",
     ),
 }
 

+ 226 - 31
agent/core/runner.py

@@ -62,9 +62,11 @@ class RunConfig:
     agent_type: str = "default"
     uid: Optional[str] = None
     system_prompt: Optional[str] = None        # None = 从 skills 自动构建
+    skills: Optional[List[str]] = None         # 注入 system prompt 的 skill 名称列表;None = 按 preset 决定
     enable_memory: bool = True
     auto_execute_tools: bool = True
     name: Optional[str] = None                 # 显示名称(空则由 utility_llm 自动生成)
+    enable_prompt_caching: bool = True         # 启用 Anthropic Prompt Caching(仅 Claude 模型有效)
 
     # --- Trace 控制 ---
     trace_id: Optional[str] = None             # None = 新建
@@ -295,16 +297,22 @@ class AgentRunner:
         self,
         messages: List[Dict],
         config: Optional[RunConfig] = None,
+        on_event: Optional[Callable] = None,
     ) -> Dict[str, Any]:
         """
         结果模式 — 消费 run(),返回结构化结果。
 
         主要用于 agent/evaluate 工具内部。
+
+        Args:
+            on_event: 可选回调,每个 Trace/Message 事件触发一次,用于实时输出子 Agent 执行过程。
         """
         last_assistant_text = ""
         final_trace: Optional[Trace] = None
 
         async for item in self.run(messages=messages, config=config):
+            if on_event:
+                on_event(item)
             if isinstance(item, Message) and item.role == "assistant":
                 content = item.content
                 text = ""
@@ -473,6 +481,10 @@ class AgentRunner:
             raise ValueError(f"Trace not found: {config.trace_id}")
 
         goal_tree = await self.trace_store.get_goal_tree(config.trace_id)
+        if goal_tree is None:
+            # 防御性兜底:trace 存在但 goal.json 丢失时,创建空树
+            goal_tree = GoalTree(mission=trace_obj.task or "Agent task")
+            await self.trace_store.update_goal_tree(config.trace_id, goal_tree)
 
         # 自动判断行为:after_sequence 为 None 或 == head → 续跑;< head → 回溯
         after_seq = config.after_sequence
@@ -572,25 +584,38 @@ class AgentRunner:
                 if main_path:
                     head_seq = main_path[-1].sequence
 
-        # 2. 构建 system prompt(如果历史中没有 system message)
+        # 2. 构建/注入 skills 到 system prompt
         has_system = any(m.get("role") == "system" for m in history)
         has_system_in_new = any(m.get("role") == "system" for m in new_messages)
 
-        if not has_system and not has_system_in_new:
-            system_prompt = await self._build_system_prompt(config)
-            if system_prompt:
-                history = [{"role": "system", "content": system_prompt}] + history
+        if not has_system:
+            if has_system_in_new:
+                # 入参消息已含 system,将 skills 注入其中(在 step 4 持久化之前)
+                augmented = []
+                for msg in new_messages:
+                    if msg.get("role") == "system":
+                        base = msg.get("content") or ""
+                        enriched = await self._build_system_prompt(config, base_prompt=base)
+                        augmented.append({**msg, "content": enriched or base})
+                    else:
+                        augmented.append(msg)
+                new_messages = augmented
+            else:
+                # 没有 system,自动构建并插入历史
+                system_prompt = await self._build_system_prompt(config)
+                if system_prompt:
+                    history = [{"role": "system", "content": system_prompt}] + history
 
-                if self.trace_store:
-                    system_msg = Message.create(
-                        trace_id=trace_id, role="system", sequence=sequence,
-                        goal_id=None, content=system_prompt,
-                        parent_sequence=None,  # system message 是 root
-                    )
-                    await self.trace_store.add_message(system_msg)
-                    created_messages.append(system_msg)
-                    head_seq = sequence
-                    sequence += 1
+                    if self.trace_store:
+                        system_msg = Message.create(
+                            trace_id=trace_id, role="system", sequence=sequence,
+                            goal_id=None, content=system_prompt,
+                            parent_sequence=None,  # system message 是 root
+                        )
+                        await self.trace_store.add_message(system_msg)
+                        created_messages.append(system_msg)
+                        head_seq = sequence
+                        sequence += 1
 
         # 3. 追加新 messages(设置 parent_sequence 链接到当前 head)
         for msg_dict in new_messages:
@@ -629,11 +654,6 @@ class AgentRunner:
         # 当前主路径头节点的 sequence(用于设置 parent_sequence)
         head_seq = trace.head_sequence
 
-        # 设置 goal_tree 到 goal 工具
-        if goal_tree and self.trace_store:
-            from agent.trace.goal_tool import set_goal_tree
-            set_goal_tree(goal_tree)
-
         # 经验检索缓存:只在 goal 切换时重新检索
         _last_goal_id = None
         _cached_exp_text = ""
@@ -660,6 +680,22 @@ class AgentRunner:
             token_count = estimate_tokens(history)
             max_tokens = compression_config.get_max_tokens(config.model)
 
+            # 压缩评估日志
+            progress_pct = (token_count / max_tokens * 100) if max_tokens > 0 else 0
+            msg_count = len(history)
+            img_count = sum(
+                1 for msg in history
+                if isinstance(msg.get("content"), list)
+                for part in msg["content"]
+                if isinstance(part, dict) and part.get("type") in ("image", "image_url")
+            )
+            print(f"\n[压缩评估] 消息数: {msg_count} | 图片数: {img_count} | Token: {token_count:,} / {max_tokens:,} ({progress_pct:.1f}%)")
+
+            if token_count > max_tokens:
+                print(f"[压缩评估] ⚠️  超过阈值,触发压缩流程")
+            else:
+                print(f"[压缩评估] ✅ 未超阈值,无需压缩")
+
             if token_count > max_tokens and self.trace_store and goal_tree:
                 # 使用本地 head_seq(store 中的 head_sequence 在 loop 期间未更新,是过时的)
                 if head_seq > 0:
@@ -668,12 +704,21 @@ class AgentRunner:
                     )
                     filtered_msgs = filter_by_goal_status(main_path_msgs, goal_tree)
                     if len(filtered_msgs) < len(main_path_msgs):
+                        filtered_tokens = estimate_tokens([msg.to_llm_dict() for msg in filtered_msgs])
+                        print(
+                            f"[Level 1 压缩] 消息: {len(main_path_msgs)} → {len(filtered_msgs)} 条 | "
+                            f"Token: {token_count:,} → ~{filtered_tokens:,}"
+                        )
                         logger.info(
                             "Level 1 压缩: %d -> %d 条消息 (tokens ~%d, 阈值 %d)",
                             len(main_path_msgs), len(filtered_msgs), token_count, max_tokens,
                         )
                         history = [msg.to_llm_dict() for msg in filtered_msgs]
                     else:
+                        print(
+                            f"[Level 1 压缩] 无可过滤消息 ({len(main_path_msgs)} 条全部保留, "
+                            f"completed/abandoned goals={sum(1 for g in goal_tree.goals if g.status in ('completed', 'abandoned'))})"
+                        )
                         logger.info(
                             "Level 1 压缩: 无可过滤消息 (%d 条全部保留, completed/abandoned goals=%d)",
                             len(main_path_msgs),
@@ -681,6 +726,7 @@ class AgentRunner:
                                 if g.status in ("completed", "abandoned")),
                         )
             elif token_count > max_tokens:
+                print("[压缩评估] ⚠️  无法执行 Level 1 压缩(缺少 store 或 goal_tree)")
                 logger.warning(
                     "消息 token 数 (%d) 超过阈值 (%d),但无法执行 Level 1 压缩(缺少 store 或 goal_tree)",
                     token_count, max_tokens,
@@ -689,6 +735,11 @@ class AgentRunner:
             # Level 2 压缩:LLM 总结(Level 1 后仍超阈值时触发)
             token_count_after = estimate_tokens(history)
             if token_count_after > max_tokens:
+                progress_pct_after = (token_count_after / max_tokens * 100) if max_tokens > 0 else 0
+                print(
+                    f"[Level 2 压缩] Level 1 后仍超阈值: {token_count_after:,} / {max_tokens:,} ({progress_pct_after:.1f}%) "
+                    f"→ 触发 LLM 总结"
+                )
                 logger.info(
                     "Level 1 后 token 仍超阈值 (%d > %d),触发 Level 2 压缩",
                     token_count_after, max_tokens,
@@ -696,6 +747,12 @@ class AgentRunner:
                 history, head_seq, sequence = await self._compress_history(
                     trace_id, history, goal_tree, config, sequence, head_seq,
                 )
+                final_tokens = estimate_tokens(history)
+                print(f"[Level 2 压缩] 完成: Token {token_count_after:,} → {final_tokens:,}")
+            elif token_count > max_tokens:
+                # Level 1 压缩成功,未触发 Level 2
+                print(f"[压缩评估] ✅ Level 1 压缩后达标: {token_count_after:,} / {max_tokens:,}")
+            print()  # 空行分隔
 
             # 构建 LLM messages(注入上下文)
             llm_messages = list(history)
@@ -735,6 +792,12 @@ class AgentRunner:
 
             if _cached_exp_text:
                 llm_messages.append({"role": "system", "content": _cached_exp_text})
+            # 应用 Prompt Caching(不修改原始 history,只在发送给 LLM 时添加缓存标记)
+            llm_messages = self._add_cache_control(
+                llm_messages,
+                config.model,
+                config.enable_prompt_caching
+            )
 
             # 调用 LLM
             result = await self.llm_call(
@@ -751,6 +814,8 @@ class AgentRunner:
             prompt_tokens = result.get("prompt_tokens", 0)
             completion_tokens = result.get("completion_tokens", 0)
             step_cost = result.get("cost", 0)
+            cache_creation_tokens = result.get("cache_creation_tokens")
+            cache_read_tokens = result.get("cache_read_tokens")
 
             # 按需自动创建 root goal
             if goal_tree and not goal_tree.goals and tool_calls:
@@ -768,8 +833,8 @@ class AgentRunner:
                     )
                     goal_tree.focus(goal_tree.goals[0].id)
                     if self.trace_store:
-                        await self.trace_store.update_goal_tree(trace_id, goal_tree)
                         await self.trace_store.add_goal(trace_id, goal_tree.goals[0])
+                        await self.trace_store.update_goal_tree(trace_id, goal_tree)
                     logger.info(f"自动创建 root goal: {goal_tree.goals[0].id}")
 
             # 获取当前 goal_id
@@ -785,6 +850,8 @@ class AgentRunner:
                 content={"text": response_content, "tool_calls": tool_calls},
                 prompt_tokens=prompt_tokens,
                 completion_tokens=completion_tokens,
+                cache_creation_tokens=cache_creation_tokens,
+                cache_read_tokens=cache_read_tokens,
                 finish_reason=finish_reason,
                 cost=step_cost,
             )
@@ -849,6 +916,7 @@ class AgentRunner:
                             "trace_id": trace_id,
                             "goal_id": current_goal_id,
                             "runner": self,
+                            "goal_tree": goal_tree,
                         }
                     )
 
@@ -880,7 +948,8 @@ class AgentRunner:
                         goal_id=current_goal_id,
                         parent_sequence=head_seq,
                         tool_call_id=tc["id"],
-                        content={"tool_name": tool_name, "result": tool_result_text},
+                        # 存储完整内容:有图片时保留 list(含 image_url),纯文本时存字符串
+                        content={"tool_name": tool_name, "result": tool_content_for_llm},
                     )
 
                     if self.trace_store:
@@ -982,6 +1051,13 @@ class AgentRunner:
             reflect_prompt = build_reflect_prompt()
             reflect_messages = list(history) + [{"role": "user", "content": reflect_prompt}]
 
+            # 应用 Prompt Caching
+            reflect_messages = self._add_cache_control(
+                reflect_messages,
+                config.model,
+                config.enable_prompt_caching
+            )
+
             reflect_result = await self.llm_call(
                 messages=reflect_messages,
                 model=config.model,
@@ -1040,6 +1116,13 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
         compress_prompt = build_compression_prompt(goal_tree, used_ex_ids=self.used_ex_ids)
         compress_messages = list(history) + [{"role": "user", "content": compress_prompt}]
 
+        # 应用 Prompt Caching
+        compress_messages = self._add_cache_control(
+            compress_messages,
+            config.model,
+            config.enable_prompt_caching
+        )
+
         compress_result = await self.llm_call(
             messages=compress_messages,
             model=config.model,
@@ -1417,6 +1500,97 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 
     # ===== 辅助方法 =====
 
+    def _add_cache_control(
+        self,
+        messages: List[Dict],
+        model: str,
+        enable: bool
+    ) -> List[Dict]:
+        """
+        为支持的模型添加 Prompt Caching 标记
+
+        策略:
+        1. system message 添加缓存(如果存在且足够长)
+        2. 倒数第 3-5 条 user/assistant 消息添加缓存点
+
+        Args:
+            messages: 原始消息列表
+            model: 模型名称
+            enable: 是否启用缓存
+
+        Returns:
+            添加了 cache_control 的消息列表(深拷贝)
+        """
+        if not enable:
+            return messages
+
+        # 只对 Claude 模型启用
+        if "claude" not in model.lower():
+            return messages
+
+        # 深拷贝避免修改原始数据
+        import copy
+        messages = copy.deepcopy(messages)
+
+        # 策略 1: 为 system message 添加缓存
+        for msg in messages:
+            if msg.get("role") == "system":
+                content = msg.get("content", "")
+                # 只有足够长的 system prompt 才值得缓存(>1024 tokens 约 4000 字符)
+                if isinstance(content, str) and len(content) > 1000:
+                    # Anthropic API 格式:在 content 的最后一个 block 添加 cache_control
+                    # 如果 content 是 string,需要转换为 list 格式
+                    msg["content"] = [
+                        {
+                            "type": "text",
+                            "text": content,
+                            "cache_control": {"type": "ephemeral"}
+                        }
+                    ]
+                    logger.debug(f"[Cache] 为 system message 添加缓存标记 (len={len(content)})")
+                break
+
+        # 策略 2: 为倒数第 3-5 条消息添加缓存点
+        # 这样可以缓存大部分历史对话,只有最新的几条消息是新的
+        cache_positions = []
+        user_assistant_msgs = [
+            (i, msg) for i, msg in enumerate(messages)
+            if msg.get("role") in ("user", "assistant")
+        ]
+
+        if len(user_assistant_msgs) >= 5:
+            # 在倒数第 5 条添加缓存点
+            cache_positions.append(user_assistant_msgs[-5][0])
+        elif len(user_assistant_msgs) >= 3:
+            # 在倒数第 3 条添加缓存点
+            cache_positions.append(user_assistant_msgs[-3][0])
+
+        for idx in cache_positions:
+            msg = messages[idx]
+            content = msg.get("content", "")
+
+            # 处理 string content
+            if isinstance(content, str):
+                msg["content"] = [
+                    {
+                        "type": "text",
+                        "text": content,
+                        "cache_control": {"type": "ephemeral"}
+                    }
+                ]
+                logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 添加缓存标记")
+
+            # 处理 list content(多模态消息)
+            elif isinstance(content, list) and len(content) > 0:
+                # 在最后一个 text block 添加 cache_control
+                for i in range(len(content) - 1, -1, -1):
+                    if isinstance(content[i], dict) and content[i].get("type") == "text":
+                        content[i]["cache_control"] = {"type": "ephemeral"}
+                        logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 的 content[{i}] 添加缓存标记")
+                        break
+
+        return messages
+
     def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:
         """
         获取工具 Schema
@@ -1438,17 +1612,38 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
     # 默认 system prompt 前缀(当 config.system_prompt 和前端都未提供 system message 时使用)
     DEFAULT_SYSTEM_PREFIX = "你是最顶尖的AI助手,可以拆分并调用工具逐步解决复杂问题。"
 
-    async def _build_system_prompt(self, config: RunConfig) -> Optional[str]:
-        """构建 system prompt(注入 skills)"""
-        system_prompt = config.system_prompt
+    async def _build_system_prompt(self, config: RunConfig, base_prompt: Optional[str] = None) -> Optional[str]:
+        """构建 system prompt(注入 skills)
+
+        优先级:
+        1. config.skills 显式指定 → 按名称过滤
+        2. config.skills 为 None → 查 preset 的默认 skills 列表
+        3. preset 也无 skills(None)→ 加载全部(向后兼容)
+
+        Args:
+            base_prompt: 已有 system 内容(来自消息或 config.system_prompt),
+                         None 时使用 config.system_prompt
+        """
+        from agent.core.presets import AGENT_PRESETS
+
+        system_prompt = base_prompt if base_prompt is not None else config.system_prompt
+
+        # 确定要加载哪些 skills
+        skills_filter: Optional[List[str]] = config.skills
+        if skills_filter is None:
+            preset = AGENT_PRESETS.get(config.agent_type)
+            if preset is not None:
+                skills_filter = preset.skills  # 可能仍为 None(加载全部)
+
+        # 加载并过滤
+        all_skills = load_skills_from_dir(self.skills_dir)
+        if skills_filter is not None:
+            skills = [s for s in all_skills if s.name in skills_filter]
+        else:
+            skills = all_skills
 
-        # 加载 Skills
-        skills_text = ""
-        skills = load_skills_from_dir(self.skills_dir)
-        if skills:
-            skills_text = self._format_skills(skills)
+        skills_text = self._format_skills(skills) if skills else ""
 
-        # 拼装:有自定义 system_prompt 则用它,否则用默认前缀
         if system_prompt:
             if skills_text:
                 system_prompt += f"\n\n## Skills\n{skills_text}"

+ 458 - 3
agent/llm/openrouter.py

@@ -2,7 +2,11 @@
 OpenRouter Provider
 
 使用 OpenRouter API 调用各种模型(包括 Claude Sonnet 4.5)
-支持 OpenAI 兼容的 API 格式
+
+路由策略:
+- Claude 模型:走 OpenRouter 的 Anthropic 原生端点(/api/v1/messages),
+  使用自包含的格式转换逻辑,确保多模态工具结果(截图等)正确传递。
+- 其他模型:走 OpenAI 兼容端点(/api/v1/chat/completions)。
 
 OpenRouter 转发多种模型,需要根据实际模型处理不同的 usage 格式:
 - OpenAI 模型: prompt_tokens, completion_tokens, completion_tokens_details.reasoning_tokens
@@ -15,6 +19,7 @@ import json
 import asyncio
 import logging
 import httpx
+from pathlib import Path
 from typing import List, Dict, Any, Optional
 
 from .usage import TokenUsage, create_usage_from_response
@@ -34,6 +39,325 @@ _RETRYABLE_EXCEPTIONS = (
 )
 
 
+# ── OpenRouter Anthropic endpoint: model name mapping ──────────────────────
+# Local copy of yescode's model tables so this module is self-contained.
+_OR_MODEL_EXACT = {
+    "claude-sonnet-4-6": "claude-sonnet-4-6",
+    "claude-sonnet-4.6": "claude-sonnet-4-6",
+    "claude-sonnet-4-5-20250929": "claude-sonnet-4-5-20250929",
+    "claude-sonnet-4-5": "claude-sonnet-4-5-20250929",
+    "claude-sonnet-4.5": "claude-sonnet-4-5-20250929",
+    "claude-opus-4-6": "claude-opus-4-6",
+    "claude-opus-4-5-20251101": "claude-opus-4-5-20251101",
+    "claude-opus-4-5": "claude-opus-4-5-20251101",
+    "claude-opus-4-1-20250805": "claude-opus-4-1-20250805",
+    "claude-opus-4-1": "claude-opus-4-1-20250805",
+    "claude-haiku-4-5-20251001": "claude-haiku-4-5-20251001",
+    "claude-haiku-4-5": "claude-haiku-4-5-20251001",
+}
+
+_OR_MODEL_FUZZY = [
+    ("sonnet-4-6", "claude-sonnet-4-6"),
+    ("sonnet-4.6", "claude-sonnet-4-6"),
+    ("sonnet-4-5", "claude-sonnet-4-5-20250929"),
+    ("sonnet-4.5", "claude-sonnet-4-5-20250929"),
+    ("opus-4-6", "claude-opus-4-6"),
+    ("opus-4.6", "claude-opus-4-6"),
+    ("opus-4-5", "claude-opus-4-5-20251101"),
+    ("opus-4.5", "claude-opus-4-5-20251101"),
+    ("opus-4-1", "claude-opus-4-1-20250805"),
+    ("opus-4.1", "claude-opus-4-1-20250805"),
+    ("haiku-4-5", "claude-haiku-4-5-20251001"),
+    ("haiku-4.5", "claude-haiku-4-5-20251001"),
+    ("sonnet", "claude-sonnet-4-6"),
+    ("opus", "claude-opus-4-6"),
+    ("haiku", "claude-haiku-4-5-20251001"),
+]
+
+
+def _resolve_openrouter_model(model: str) -> str:
+    """Normalize a model name for OpenRouter's Anthropic endpoint.
+
+    Strips ``anthropic/`` prefix, resolves aliases / dot-notation,
+    and re-prepends ``anthropic/`` for OpenRouter routing.
+    """
+    # 1. Strip provider prefix
+    bare = model.split("/", 1)[1] if "/" in model else model
+
+    # 2. Exact match
+    if bare in _OR_MODEL_EXACT:
+        return f"anthropic/{_OR_MODEL_EXACT[bare]}"
+
+    # 3. Fuzzy keyword match (case-insensitive)
+    bare_lower = bare.lower()
+    for keyword, target in _OR_MODEL_FUZZY:
+        if keyword in bare_lower:
+            logger.info("[OpenRouter] Model fuzzy match: %s → anthropic/%s", model, target)
+            return f"anthropic/{target}"
+
+    # 4. Fallback – return as-is (let API report the error)
+    logger.warning("[OpenRouter] Could not resolve model name: %s, passing as-is", model)
+    return model
+
+
+# ── OpenRouter Anthropic endpoint: format conversion helpers ───────────────
+
+def _get_image_dimensions(data: bytes) -> Optional[tuple]:
+    """从图片二进制数据的文件头解析宽高,支持 PNG/JPEG。不依赖 PIL。"""
+    try:
+        # PNG: 前 8 字节签名,IHDR chunk 在 16-24 字节存宽高 (big-endian uint32)
+        if data[:8] == b'\x89PNG\r\n\x1a\n' and len(data) >= 24:
+            import struct
+            w, h = struct.unpack('>II', data[16:24])
+            return (w, h)
+        # JPEG: 扫描 SOF0/SOF2 marker (0xFFC0/0xFFC2)
+        if data[:2] == b'\xff\xd8':
+            import struct
+            i = 2
+            while i < len(data) - 9:
+                if data[i] != 0xFF:
+                    break
+                marker = data[i + 1]
+                if marker in (0xC0, 0xC2):
+                    h, w = struct.unpack('>HH', data[i + 5:i + 9])
+                    return (w, h)
+                length = struct.unpack('>H', data[i + 2:i + 4])[0]
+                i += 2 + length
+    except Exception:
+        pass
+    return None
+
+
+def _to_anthropic_content(content: Any) -> Any:
+    """Convert OpenAI-style *content* (string or block list) to Anthropic format.
+
+    Handles ``image_url`` blocks → Anthropic ``image`` blocks (base64 or url).
+    Passes through ``text`` blocks and ``cache_control`` unchanged.
+    """
+    if not isinstance(content, list):
+        return content
+
+    result = []
+    for block in content:
+        if not isinstance(block, dict):
+            result.append(block)
+            continue
+
+        if block.get("type") == "image_url":
+            image_url_obj = block.get("image_url", {})
+            url = image_url_obj.get("url", "") if isinstance(image_url_obj, dict) else str(image_url_obj)
+            if url.startswith("data:"):
+                header, _, data = url.partition(",")
+                media_type = header.split(":")[1].split(";")[0] if ":" in header else "image/png"
+                import base64 as b64mod
+                raw = b64mod.b64decode(data)
+                dims = _get_image_dimensions(raw)
+                img_block = {
+                    "type": "image",
+                    "source": {
+                        "type": "base64",
+                        "media_type": media_type,
+                        "data": data,
+                    },
+                }
+                if dims:
+                    img_block["_image_meta"] = {"width": dims[0], "height": dims[1]}
+                result.append(img_block)
+            else:
+                # 检测本地文件路径,自动转 base64
+                local_path = Path(url)
+                if local_path.exists() and local_path.is_file():
+                    import base64 as b64mod
+                    import mimetypes
+                    mime_type, _ = mimetypes.guess_type(str(local_path))
+                    mime_type = mime_type or "image/png"
+                    raw = local_path.read_bytes()
+                    dims = _get_image_dimensions(raw)
+                    b64_data = b64mod.b64encode(raw).decode("ascii")
+                    logger.info(f"[OpenRouter] 本地图片自动转 base64: {url} ({len(raw)} bytes)")
+                    img_block = {
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": mime_type,
+                            "data": b64_data,
+                        },
+                    }
+                    if dims:
+                        img_block["_image_meta"] = {"width": dims[0], "height": dims[1]}
+                    result.append(img_block)
+                else:
+                    result.append({
+                        "type": "image",
+                        "source": {"type": "url", "url": url},
+                    })
+        else:
+            result.append(block)
+    return result
+
+
+def _to_anthropic_messages(messages: List[Dict[str, Any]]) -> tuple:
+    """Convert an OpenAI-format message list to Anthropic Messages API format.
+
+    Returns ``(system_prompt, anthropic_messages)`` where *system_prompt* is
+    ``None`` or a string extracted from ``role=system`` messages, and
+    *anthropic_messages* is the converted list.
+    """
+    system_prompt = None
+    anthropic_messages: List[Dict[str, Any]] = []
+
+    for msg in messages:
+        role = msg.get("role", "")
+        content = msg.get("content", "")
+
+        if role == "system":
+            system_prompt = content
+
+        elif role == "user":
+            anthropic_messages.append({
+                "role": "user",
+                "content": _to_anthropic_content(content),
+            })
+
+        elif role == "assistant":
+            tool_calls = msg.get("tool_calls")
+            if tool_calls:
+                content_blocks: List[Dict[str, Any]] = []
+                if content:
+                    converted = _to_anthropic_content(content)
+                    if isinstance(converted, list):
+                        content_blocks.extend(converted)
+                    elif isinstance(converted, str) and converted.strip():
+                        content_blocks.append({"type": "text", "text": converted})
+                for tc in tool_calls:
+                    func = tc.get("function", {})
+                    args_str = func.get("arguments", "{}")
+                    try:
+                        args = json.loads(args_str) if isinstance(args_str, str) else args_str
+                    except json.JSONDecodeError:
+                        args = {}
+                    content_blocks.append({
+                        "type": "tool_use",
+                        "id": tc.get("id", ""),
+                        "name": func.get("name", ""),
+                        "input": args,
+                    })
+                anthropic_messages.append({"role": "assistant", "content": content_blocks})
+            else:
+                anthropic_messages.append({"role": "assistant", "content": content})
+
+        elif role == "tool":
+            # Split tool result into text-only tool_result + sibling image blocks.
+            # Images nested inside tool_result.content are not reliably passed
+            # through by all proxies (e.g. OpenRouter).  Placing them as sibling
+            # content blocks in the same user message is more compatible.
+            converted = _to_anthropic_content(content)
+            text_parts: List[Dict[str, Any]] = []
+            image_parts: List[Dict[str, Any]] = []
+            if isinstance(converted, list):
+                for block in converted:
+                    if isinstance(block, dict) and block.get("type") == "image":
+                        image_parts.append(block)
+                    else:
+                        text_parts.append(block)
+            elif isinstance(converted, str):
+                text_parts = [{"type": "text", "text": converted}] if converted else []
+
+            # tool_result keeps only text content
+            tool_result_block: Dict[str, Any] = {
+                "type": "tool_result",
+                "tool_use_id": msg.get("tool_call_id", ""),
+            }
+            if len(text_parts) == 1 and text_parts[0].get("type") == "text":
+                tool_result_block["content"] = text_parts[0]["text"]
+            elif text_parts:
+                tool_result_block["content"] = text_parts
+            # (omit content key entirely when empty – Anthropic accepts this)
+
+            # Build the blocks to append: tool_result first, then any images
+            new_blocks = [tool_result_block] + image_parts
+
+            # Merge consecutive tool results into one user message
+            if (anthropic_messages
+                    and anthropic_messages[-1].get("role") == "user"
+                    and isinstance(anthropic_messages[-1].get("content"), list)
+                    and anthropic_messages[-1]["content"]
+                    and anthropic_messages[-1]["content"][0].get("type") == "tool_result"):
+                anthropic_messages[-1]["content"].extend(new_blocks)
+            else:
+                anthropic_messages.append({
+                    "role": "user",
+                    "content": new_blocks,
+                })
+
+    return system_prompt, anthropic_messages
+
+
+def _to_anthropic_tools(tools: List[Dict]) -> List[Dict]:
+    """Convert OpenAI tool definitions to Anthropic format."""
+    anthropic_tools = []
+    for tool in tools:
+        if tool.get("type") == "function":
+            func = tool["function"]
+            anthropic_tools.append({
+                "name": func.get("name", ""),
+                "description": func.get("description", ""),
+                "input_schema": func.get("parameters", {"type": "object", "properties": {}}),
+            })
+    return anthropic_tools
+
+
+def _parse_anthropic_response(result: Dict[str, Any]) -> Dict[str, Any]:
+    """Parse an Anthropic Messages API response into the unified format.
+
+    Returns a dict with keys: content, tool_calls, finish_reason, usage.
+    """
+    content_blocks = result.get("content", [])
+
+    text_parts = []
+    tool_calls = []
+    for block in content_blocks:
+        if block.get("type") == "text":
+            text_parts.append(block.get("text", ""))
+        elif block.get("type") == "tool_use":
+            tool_calls.append({
+                "id": block.get("id", ""),
+                "type": "function",
+                "function": {
+                    "name": block.get("name", ""),
+                    "arguments": json.dumps(block.get("input", {}), ensure_ascii=False),
+                },
+            })
+
+    content = "\n".join(text_parts)
+
+    stop_reason = result.get("stop_reason", "end_turn")
+    finish_reason_map = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+    }
+    finish_reason = finish_reason_map.get(stop_reason, stop_reason)
+
+    raw_usage = result.get("usage", {})
+    usage = TokenUsage(
+        input_tokens=raw_usage.get("input_tokens", 0),
+        output_tokens=raw_usage.get("output_tokens", 0),
+        cache_creation_tokens=raw_usage.get("cache_creation_input_tokens", 0),
+        cache_read_tokens=raw_usage.get("cache_read_input_tokens", 0),
+    )
+
+    return {
+        "content": content,
+        "tool_calls": tool_calls if tool_calls else None,
+        "finish_reason": finish_reason,
+        "usage": usage,
+    }
+
+
+# ── Provider detection / usage parsing ─────────────────────────────────────
+
 def _detect_provider_from_model(model: str) -> str:
     """根据模型名称检测提供商"""
     model_lower = model.lower()
@@ -60,11 +384,20 @@ def _parse_openrouter_usage(usage: Dict[str, Any], model: str) -> TokenUsage:
     # OpenRouter 通常返回 OpenAI 格式,但可能包含额外字段
     if provider == "anthropic":
         # Claude 模型可能有缓存字段
+        # OpenRouter 使用 prompt_tokens_details 嵌套结构
+        prompt_details = usage.get("prompt_tokens_details", {})
+
+        # 调试:打印原始 usage
+        if logger.isEnabledFor(logging.DEBUG):
+            logger.debug(f"[OpenRouter] Raw usage: {usage}")
+            logger.debug(f"[OpenRouter] prompt_tokens_details: {prompt_details}")
+
         return TokenUsage(
             input_tokens=usage.get("prompt_tokens") or usage.get("input_tokens", 0),
             output_tokens=usage.get("completion_tokens") or usage.get("output_tokens", 0),
-            cache_creation_tokens=usage.get("cache_creation_input_tokens", 0),
-            cache_read_tokens=usage.get("cache_read_input_tokens", 0),
+            # OpenRouter 格式:prompt_tokens_details.cached_tokens / cache_write_tokens
+            cache_read_tokens=prompt_details.get("cached_tokens", 0),
+            cache_creation_tokens=prompt_details.get("cache_write_tokens", 0),
         )
     elif provider == "deepseek":
         # DeepSeek 可能有 reasoning_tokens
@@ -130,6 +463,122 @@ def _normalize_tool_call_ids(messages: List[Dict[str, Any]], target_prefix: str)
     return result
 
 
+async def _openrouter_anthropic_call(
+    messages: List[Dict[str, Any]],
+    model: str,
+    tools: Optional[List[Dict]],
+    api_key: str,
+    **kwargs,
+) -> Dict[str, Any]:
+    """
+    通过 OpenRouter 的 Anthropic 原生端点调用 Claude 模型。
+
+    使用 Anthropic Messages API 格式(/api/v1/messages),
+    自包含的格式转换逻辑,确保多模态内容(截图等)正确传递。
+    """
+    endpoint = "https://openrouter.ai/api/v1/messages"
+
+    # Resolve model name for OpenRouter (e.g. "claude-sonnet-4.5" → "anthropic/claude-sonnet-4-5-20250929")
+    resolved_model = _resolve_openrouter_model(model)
+    logger.info("[OpenRouter/Anthropic] model: %s → %s", model, resolved_model)
+
+    # 跨 Provider 续跑时,重写不兼容的 tool_call_id 为 toolu_ 前缀
+    messages = _normalize_tool_call_ids(messages, "toolu")
+
+    # OpenAI 格式 → Anthropic 格式
+    system_prompt, anthropic_messages = _to_anthropic_messages(messages)
+
+    # Diagnostic: count image blocks in the payload
+    _img_count = 0
+    for _m in anthropic_messages:
+        if isinstance(_m.get("content"), list):
+            for _b in _m["content"]:
+                if isinstance(_b, dict) and _b.get("type") == "image":
+                    _img_count += 1
+    if _img_count:
+        logger.info("[OpenRouter/Anthropic] payload contains %d image block(s)", _img_count)
+        print(f"[OpenRouter/Anthropic] payload contains {_img_count} image block(s)")
+
+    payload: Dict[str, Any] = {
+        "model": resolved_model,
+        "messages": anthropic_messages,
+        "max_tokens": kwargs.get("max_tokens", 16384),
+    }
+    if system_prompt is not None:
+        payload["system"] = system_prompt
+    if tools:
+        payload["tools"] = _to_anthropic_tools(tools)
+    if "temperature" in kwargs:
+        payload["temperature"] = kwargs["temperature"]
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "anthropic-version": "2023-06-01",
+        "content-type": "application/json",
+        "HTTP-Referer": "https://github.com/your-repo",
+        "X-Title": "Agent Framework",
+    }
+
+    max_retries = 3
+    last_exception = None
+    for attempt in range(max_retries):
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            try:
+                response = await client.post(endpoint, json=payload, headers=headers)
+                response.raise_for_status()
+                result = response.json()
+                break
+
+            except httpx.HTTPStatusError as e:
+                status = e.response.status_code
+                error_body = e.response.text
+                if status in (429, 500, 502, 503, 504) and attempt < max_retries - 1:
+                    wait = 2 ** attempt * 2
+                    logger.warning(
+                        "[OpenRouter/Anthropic] HTTP %d (attempt %d/%d), retrying in %ds: %s",
+                        status, attempt + 1, max_retries, wait, error_body[:200],
+                    )
+                    await asyncio.sleep(wait)
+                    last_exception = e
+                    continue
+                # Log AND print error body so it is visible in console output
+                logger.error("[OpenRouter/Anthropic] HTTP %d error body: %s", status, error_body)
+                print(f"[OpenRouter/Anthropic] API Error {status}: {error_body[:500]}")
+                raise
+
+            except _RETRYABLE_EXCEPTIONS as e:
+                last_exception = e
+                if attempt < max_retries - 1:
+                    wait = 2 ** attempt * 2
+                    logger.warning(
+                        "[OpenRouter/Anthropic] %s (attempt %d/%d), retrying in %ds",
+                        type(e).__name__, attempt + 1, max_retries, wait,
+                    )
+                    await asyncio.sleep(wait)
+                    continue
+                raise
+    else:
+        raise last_exception  # type: ignore[misc]
+
+    # 解析 Anthropic 响应 → 统一格式
+    parsed = _parse_anthropic_response(result)
+    usage = parsed["usage"]
+    cost = calculate_cost(model, usage)
+
+    return {
+        "content": parsed["content"],
+        "tool_calls": parsed["tool_calls"],
+        "prompt_tokens": usage.input_tokens,
+        "completion_tokens": usage.output_tokens,
+        "reasoning_tokens": usage.reasoning_tokens,
+        "cache_creation_tokens": usage.cache_creation_tokens,
+        "cache_read_tokens": usage.cache_read_tokens,
+        "finish_reason": parsed["finish_reason"],
+        "cost": cost,
+        "usage": usage,
+    }
+
+
 async def openrouter_llm_call(
     messages: List[Dict[str, Any]],
     model: str = "anthropic/claude-sonnet-4.5",
@@ -159,6 +608,12 @@ async def openrouter_llm_call(
     if not api_key:
         raise ValueError("OPEN_ROUTER_API_KEY environment variable not set")
 
+    # Claude 模型走 Anthropic 原生端点,其余走 OpenAI 兼容端点
+    provider = _detect_provider_from_model(model)
+    if provider == "anthropic":
+        logger.debug("[OpenRouter] Routing Claude model to Anthropic native endpoint")
+        return await _openrouter_anthropic_call(messages, model, tools, api_key, **kwargs)
+
     base_url = "https://openrouter.ai/api/v1"
     endpoint = f"{base_url}/chat/completions"
 

+ 7 - 1
agent/llm/yescode.py

@@ -212,7 +212,13 @@ def _convert_messages_to_anthropic(messages: List[Dict[str, Any]]) -> tuple:
             if tool_calls:
                 content_blocks = []
                 if content:
-                    content_blocks.append({"type": "text", "text": content})
+                    # content 可能已被 _add_cache_control 转成 list(含 cache_control),
+                    # 也可能是普通字符串。两者都需要正确处理,避免产生 {"type":"text","text":[...]}
+                    converted = _convert_content_to_anthropic(content)
+                    if isinstance(converted, list):
+                        content_blocks.extend(converted)
+                    elif isinstance(converted, str) and converted.strip():
+                        content_blocks.append({"type": "text", "text": converted})
                 for tc in tool_calls:
                     func = tc.get("function", {})
                     args_str = func.get("arguments", "{}")

+ 35 - 0
agent/memory/skills/browser.md

@@ -0,0 +1,35 @@
+---
+name: browser
+description: 浏览器自动化工具使用指南
+---
+
+## 浏览器工具使用指南
+
+所有浏览器工具都以 `browser_` 为前缀。浏览器会话会持久化,无需每次重新启动。
+
+### 基本工作流程
+
+1. **页面导航**: 使用 `browser_navigate_to_url` 或 `browser_search_web` 到达目标页面
+2. **等待加载**: 页面跳转后调用 `browser_wait(seconds=2)` 等待内容加载
+3. **获取元素索引**: 调用 `browser_get_visual_selector_map` 获取可交互元素的索引映射和当前界面的截图
+4. **执行交互**: 使用 `browser_click_element`、`browser_input_text` 等工具操作页面
+5. **提取内容**: 使用 `browser_extract_content`, `browser_read_long_content`, `browser_get_page_html` 获取数据
+
+### 关键原则
+
+- **禁止模拟结果**:不要输出你认为的搜索结果,而是要调用工具获取真实结果
+- **必须先获取索引**: 所有 `index` 参数都需要先通过 `browser_get_selector_map` 获取
+- **高级工具**:优先使用 `browser_extract_content`, `browser_read_long_content` 等工具获取数据,而不是使用 `browser_get_selector_map` 获取索引后手动解析
+- **操作后等待**: 任何可能触发页面变化的操作(点击、输入、滚动)后都要调用 `browser_wait`
+- **登录处理**:
+  - **正常登录**:当遇到需要登录的网页时,使用 `browser_load_cookies` 来登录
+  - **首次登录**:当没有该网站的 cookie 时,点击进入登录界面,然后等待人类来登录,登录后使用 `browser_export_cookies` 将账户信息存储下来
+- **复杂操作用JS**: 当标准工具无法满足时,使用 `browser_evaluate` 执行 JavaScript 代码
+
+### 工具分类
+
+**导航**: browser_navigate_to_url, browser_search_web, browser_go_back, browser_wait
+**交互**: browser_click_element, browser_input_text, browser_send_keys, browser_upload_file
+**视图**: browser_scroll_page, browser_find_text, browser_screenshot
+**提取**: browser_extract_content, browser_read_long_content, browser_get_page_html, browser_get_selector_map, browser_get_visual_selector_map
+**高级**: browser_evaluate, browser_load_cookies, browser_export_cookies, browser_wait_for_user_action, browser_download_direct_url

+ 65 - 0
agent/memory/skills/planning.md

@@ -0,0 +1,65 @@
+---
+name: planning
+description: 计划管理,使用 goal 工具管理执行计划和目标树
+---
+
+## 计划与执行
+
+使用 `goal` 工具管理执行计划。目标树是你的工作记忆——系统会定期将当前计划注入给你,帮助你追踪进度和关键结论。
+
+### 核心原则
+
+- **先明确目标再行动**:开始执行前,用 `goal` 明确当前要做什么
+- **灵活运用,不受约束**:
+  - 可以先做全局规划再行动:`goal(add="调研方案, 实现方案, 测试验证")`
+  - 可以走一步看一步,每次只规划下一个目标
+  - 行动中可以动态放弃并调整:`goal(abandon="方案不可行")`
+  - 规划本身可以作为一个目标(如 "调研并确定技术方案")
+- **简单任务只需一个目标**:`goal(add="将CSV转换为JSON")` 即可,不需要强制拆分
+
+### 使用方式
+
+创建目标:
+
+```
+goal(add="调研并确定方案, 执行方案, 评估结果")
+```
+
+聚焦并开始执行(使用计划视图中的 ID,如 "1", "2.1"):
+
+```
+goal(focus="1")
+```
+
+完成目标,记录**关键结论**(不是过程描述):
+
+```
+goal(done="最佳方案是openpose,精度高且支持多人检测")
+```
+
+完成并切换到下一个:
+
+```
+goal(done="openpose方案确认可行", focus="2")
+```
+
+添加子目标或同级目标:
+
+```
+goal(add="设计接口, 实现代码", under="2")
+goal(add="编写文档", after="2")
+```
+
+放弃不可行的目标:
+
+```
+goal(abandon="方案A需要Redis,环境没有")
+```
+
+### 使用规范
+
+1. **聚焦到具体目标**:始终将焦点放在你正在执行的最具体的子目标上,而不是父目标。创建子目标后立即 `focus` 到第一个要执行的子目标。完成后用 `done` + `focus` 切换到下一个。
+2. **同时只有一个目标处于执行中**:完成当前目标后再切换
+3. **summary 记录结论**:记录关键发现,而非 "已完成调研" 这样无信息量的描述
+4. **计划可调整**:根据执行情况随时追加、跳过或放弃目标
+5. **使用 ID 定位**:focus、after、under 参数使用目标的 ID(如 "1", "2.1")

+ 10 - 0
agent/memory/skills/research.md

@@ -0,0 +1,10 @@
+---
+name: research
+description: 信息调研,使用搜索工具和浏览器获取外部信息
+---
+
+## 信息调研
+
+你可以通过联网搜索工具 `search_posts` 获取来自 Github、小红书、微信公众号、知乎等渠道的信息。对于需要深度交互的网页内容,使用浏览器工具进行操作。
+
+调研过程可能需要多次搜索,比如基于搜索结果中获得的启发或信息启动新的搜索,直到得到令人满意的答案。你可以使用 `goal` 工具管理搜索的过程,或者使用文档记录搜索的中间或最终结果。

+ 2 - 0
agent/tools/builtin/__init__.py

@@ -17,6 +17,7 @@ from agent.tools.builtin.skill import skill, list_skills
 from agent.tools.builtin.subagent import agent, evaluate
 from agent.tools.builtin.experience import get_experience
 from agent.tools.builtin.search import search_posts, get_search_suggestions
+from agent.tools.builtin.nanobanana import nanobanana_extract_features
 from agent.tools.builtin.sandbox import (sandbox_create_environment, sandbox_run_shell,
                                          sandbox_rebuild_with_ports,sandbox_destroy_environment)
 
@@ -41,6 +42,7 @@ __all__ = [
     "evaluate",
     "search_posts",
     "get_search_suggestions",
+    "nanobanana_extract_features",
     "sandbox_create_environment",
     "sandbox_run_shell",
     "sandbox_rebuild_with_ports",

+ 101 - 9
agent/tools/builtin/file/read.py

@@ -11,9 +11,13 @@ Read Tool - 文件读取工具
 """
 
 import os
+import base64
 import mimetypes
 from pathlib import Path
 from typing import Optional
+from urllib.parse import urlparse
+
+import httpx
 
 from agent.tools import tool, ToolResult, ToolContext
 
@@ -23,7 +27,7 @@ MAX_LINE_LENGTH = 2000
 MAX_BYTES = 50 * 1024  # 50KB
 
 
-@tool(description="读取文件内容,支持文本文件、图片、PDF 等多种格式")
+@tool(description="读取文件内容,支持文本文件、图片、PDF 等多种格式,也支持 HTTP/HTTPS URL")
 async def read_file(
     file_path: str,
     offset: int = 0,
@@ -36,7 +40,7 @@ async def read_file(
     参考 OpenCode 实现
 
     Args:
-        file_path: 文件路径(绝对路径或相对路径
+        file_path: 文件路径(绝对路径、相对路径或 HTTP/HTTPS URL
         offset: 起始行号(从 0 开始)
         limit: 读取行数(默认 2000 行)
         context: 工具上下文
@@ -44,6 +48,11 @@ async def read_file(
     Returns:
         ToolResult: 文件内容
     """
+    # 检测是否为 HTTP/HTTPS URL
+    parsed = urlparse(file_path)
+    if parsed.scheme in ("http", "https"):
+        return await _read_from_url(file_path)
+
     # 解析路径
     path = Path(file_path)
     if not path.is_absolute():
@@ -79,13 +88,25 @@ async def read_file(
 
     # 图片文件(参考 opencode:66-91)
     if mime_type.startswith("image/") and mime_type not in ["image/svg+xml", "image/vnd.fastbidsheet"]:
-        # 注意:实际项目中需要实现图片的 base64 编码
-        # 这里简化处理
-        return ToolResult(
-            title=path.name,
-            output=f"图片文件: {path.name} (MIME: {mime_type})",
-            metadata={"mime_type": mime_type, "truncated": False}
-        )
+        try:
+            raw = path.read_bytes()
+            b64_data = base64.b64encode(raw).decode("ascii")
+            return ToolResult(
+                title=path.name,
+                output=f"图片文件: {path.name} (MIME: {mime_type}, {len(raw)} bytes)",
+                metadata={"mime_type": mime_type, "truncated": False},
+                images=[{
+                    "type": "base64",
+                    "media_type": mime_type,
+                    "data": b64_data,
+                }],
+            )
+        except Exception as e:
+            return ToolResult(
+                title=path.name,
+                output=f"图片文件读取失败: {path.name}: {e}",
+                error=str(e),
+            )
 
     # PDF 文件
     if mime_type == "application/pdf":
@@ -225,3 +246,74 @@ def _is_binary_file(path: Path) -> bool:
 
     except Exception:
         return False
+
+
+async def _read_from_url(url: str) -> ToolResult:
+    """
+    从 HTTP/HTTPS URL 读取文件内容。
+
+    主要用于图片等多媒体资源,自动转换为 base64。
+    """
+    try:
+        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+            response = await client.get(url)
+            response.raise_for_status()
+
+            content_type = response.headers.get("content-type", "")
+            raw = response.content
+
+            # 从 URL 提取文件名
+            from urllib.parse import urlparse
+            parsed = urlparse(url)
+            filename = Path(parsed.path).name or "downloaded_file"
+
+            # 图片文件
+            if content_type.startswith("image/") or any(url.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"]):
+                mime_type = content_type.split(";")[0] if content_type else "image/jpeg"
+                b64_data = base64.b64encode(raw).decode("ascii")
+                return ToolResult(
+                    title=filename,
+                    output=f"图片文件: {filename} (URL: {url}, MIME: {mime_type}, {len(raw)} bytes)",
+                    metadata={"mime_type": mime_type, "url": url, "truncated": False},
+                    images=[{
+                        "type": "base64",
+                        "media_type": mime_type,
+                        "data": b64_data,
+                    }],
+                )
+
+            # 文本文件
+            if content_type.startswith("text/") or content_type == "application/json":
+                text = raw.decode("utf-8", errors="replace")
+                lines = text.split("\n")
+                preview = "\n".join(lines[:20])
+                return ToolResult(
+                    title=filename,
+                    output=f"<file>\n{text}\n</file>",
+                    metadata={
+                        "preview": preview,
+                        "url": url,
+                        "mime_type": content_type,
+                        "total_lines": len(lines),
+                    }
+                )
+
+            # 其他二进制文件
+            return ToolResult(
+                title=filename,
+                output=f"二进制文件: {filename} (URL: {url}, {len(raw)} bytes)",
+                metadata={"url": url, "mime_type": content_type, "size": len(raw)}
+            )
+
+    except httpx.HTTPStatusError as e:
+        return ToolResult(
+            title="HTTP 错误",
+            output=f"无法下载文件: {url}\nHTTP {e.response.status_code}: {e.response.reason_phrase}",
+            error=str(e)
+        )
+    except Exception as e:
+        return ToolResult(
+            title="下载失败",
+            output=f"无法从 URL 读取文件: {url}\n错误: {str(e)}",
+            error=str(e)
+        )

+ 572 - 0
agent/tools/builtin/nanobanana.py

@@ -0,0 +1,572 @@
+"""
+NanoBanana Tool - 图像特征提取与图像生成
+
+该工具可以提取图片中的特征,也可以根据描述生成图片。
+支持通过 OpenRouter 调用多模态模型,提取结构化的图像特征并保存为 JSON,
+或基于输入图像生成新的图像。
+"""
+
+import base64
+import json
+import mimetypes
+import os
+import re
+from pathlib import Path
+from typing import Optional, Dict, Any, List, Tuple
+
+import httpx
+from dotenv import load_dotenv
+
+from agent.tools import tool, ToolResult
+
+OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1"
+DEFAULT_TIMEOUT = 120.0
+
+DEFAULT_EXTRACTION_PROMPT = (
+    "请从这张图像中提取跨场景相对稳定、可复用的视觉不变特征。"
+    "输出严格 JSON,字段包含:identity_features、pose_features、appearance_features、"
+    "material_features、style_features、uncertainty、notes。"
+    "每个字段给出简洁要点,避免臆测。"
+)
+
+DEFAULT_IMAGE_PROMPT = (
+    "基于输入图像生成一张保留主体身份与关键视觉特征的新图像。"
+    "保持人物核心特征一致,同时提升清晰度与可用性。"
+)
+
+DEFAULT_IMAGE_MODEL_CANDIDATES = [
+    "google/gemini-2.5-flash-image",
+    "google/gemini-3-pro-image-preview",
+    "black-forest-labs/flux.2-flex",
+    "black-forest-labs/flux.2-pro",
+]
+
+
+def _resolve_api_key() -> Optional[str]:
+    """优先读取环境变量,缺失时尝试从 .env 加载。"""
+    api_key = os.getenv("OPENROUTER_API_KEY") or os.getenv("OPEN_ROUTER_API_KEY")
+    if api_key:
+        return api_key
+
+    load_dotenv()
+    return os.getenv("OPENROUTER_API_KEY") or os.getenv("OPEN_ROUTER_API_KEY")
+
+
+def _image_to_data_url(image_path: Path) -> str:
+    """将图片文件编码为 data URL。"""
+    mime_type = mimetypes.guess_type(str(image_path))[0] or "application/octet-stream"
+    raw = image_path.read_bytes()
+    b64 = base64.b64encode(raw).decode("utf-8")
+    return f"data:{mime_type};base64,{b64}"
+
+
+def _safe_json_parse(content: str) -> Dict[str, Any]:
+    """尽量从模型文本中提取 JSON。"""
+    try:
+        return json.loads(content)
+    except json.JSONDecodeError:
+        start = content.find("{")
+        end = content.rfind("}")
+        if start != -1 and end != -1 and end > start:
+            candidate = content[start:end + 1]
+            return json.loads(candidate)
+        raise
+
+
+def _extract_data_url_images(message: Dict[str, Any]) -> List[Tuple[str, str]]:
+    """
+    从 OpenRouter 响应消息中提取 data URL 图片。
+
+    Returns:
+        List[(mime_type, base64_data)]
+    """
+    extracted: List[Tuple[str, str]] = []
+
+    # 官方文档中的主要位置:message.images[]
+    for img in message.get("images", []) or []:
+        if not isinstance(img, dict):
+            continue
+        if img.get("type") != "image_url":
+            continue
+        data_url = ((img.get("image_url") or {}).get("url") or "").strip()
+        if not data_url.startswith("data:"):
+            continue
+        m = re.match(r"^data:([^;]+);base64,(.+)$", data_url, flags=re.DOTALL)
+        if not m:
+            continue
+        extracted.append((m.group(1), m.group(2)))
+
+    # 兼容某些模型可能把 image_url 放在 content 数组中
+    content = message.get("content")
+    if isinstance(content, list):
+        for part in content:
+            if not isinstance(part, dict):
+                continue
+            if part.get("type") != "image_url":
+                continue
+            data_url = ((part.get("image_url") or {}).get("url") or "").strip()
+            if not data_url.startswith("data:"):
+                continue
+            m = re.match(r"^data:([^;]+);base64,(.+)$", data_url, flags=re.DOTALL)
+            if not m:
+                continue
+            extracted.append((m.group(1), m.group(2)))
+
+    return extracted
+
+
+def _extract_image_refs(choice: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, str]]:
+    """
+    尝试从不同响应格式中提取图片引用。
+
+    返回格式:
+    - {"kind": "data_url", "value": "data:image/png;base64,..."}
+    - {"kind": "base64", "value": "...", "mime_type": "image/png"}
+    - {"kind": "url", "value": "https://..."}
+    """
+    refs: List[Dict[str, str]] = []
+
+    # 1) 标准 message.images
+    for img in message.get("images", []) or []:
+        if not isinstance(img, dict):
+            continue
+        # image_url 结构
+        data_url = ((img.get("image_url") or {}).get("url") or "").strip()
+        if data_url.startswith("data:"):
+            refs.append({"kind": "data_url", "value": data_url})
+            continue
+        if data_url.startswith("http"):
+            refs.append({"kind": "url", "value": data_url})
+            continue
+
+        # 兼容 base64 字段
+        b64 = (img.get("b64_json") or img.get("base64") or "").strip()
+        if b64:
+            refs.append({"kind": "base64", "value": b64, "mime_type": img.get("mime_type", "image/png")})
+
+    # 2) 某些格式可能在 choice.images
+    for img in choice.get("images", []) or []:
+        if not isinstance(img, dict):
+            continue
+        data_url = ((img.get("image_url") or {}).get("url") or "").strip()
+        if data_url.startswith("data:"):
+            refs.append({"kind": "data_url", "value": data_url})
+            continue
+        if data_url.startswith("http"):
+            refs.append({"kind": "url", "value": data_url})
+            continue
+        b64 = (img.get("b64_json") or img.get("base64") or "").strip()
+        if b64:
+            refs.append({"kind": "base64", "value": b64, "mime_type": img.get("mime_type", "image/png")})
+
+    # 3) content 数组里的 image_url
+    content = message.get("content")
+    if isinstance(content, list):
+        for part in content:
+            if not isinstance(part, dict):
+                continue
+            if part.get("type") != "image_url":
+                continue
+            url = ((part.get("image_url") or {}).get("url") or "").strip()
+            if url.startswith("data:"):
+                refs.append({"kind": "data_url", "value": url})
+            elif url.startswith("http"):
+                refs.append({"kind": "url", "value": url})
+
+    # 4) 极端兼容:文本中可能出现 data:image 或 http 图片 URL
+    if isinstance(content, str):
+        # data URL
+        for m in re.finditer(r"(data:image\/[a-zA-Z0-9.+-]+;base64,[A-Za-z0-9+/=]+)", content):
+            refs.append({"kind": "data_url", "value": m.group(1)})
+        # http(s) 图片链接
+        for m in re.finditer(r"(https?://\S+\.(?:png|jpg|jpeg|webp))", content, flags=re.IGNORECASE):
+            refs.append({"kind": "url", "value": m.group(1)})
+
+    return refs
+
+
+def _mime_to_ext(mime_type: str) -> str:
+    """MIME 类型映射到扩展名。"""
+    mapping = {
+        "image/png": ".png",
+        "image/jpeg": ".jpg",
+        "image/webp": ".webp",
+    }
+    return mapping.get(mime_type.lower(), ".png")
+
+
+def _normalize_model_id(model_id: str) -> str:
+    """
+    规范化常见误写模型 ID,减少无效重试。
+    """
+    if not model_id:
+        return model_id
+    m = model_id.strip()
+    # 常见误写:gemini/gemini-xxx -> google/gemini-xxx
+    if m.startswith("gemini/"):
+        m = "google/" + m.split("/", 1)[1]
+    # 常见顺序误写:preview-image -> image
+    if "gemini-2.5-flash-preview-image" in m:
+        m = m.replace("gemini-2.5-flash-preview-image", "gemini-2.5-flash-image")
+    # 兼容旧 ID 到当前可用 ID
+    if "gemini-2.5-flash-image-preview" in m:
+        m = m.replace("gemini-2.5-flash-image-preview", "gemini-2.5-flash-image")
+    return m
+
+
+@tool(description="可以提取图片中的特征,也可以根据描述生成图片")
+async def nanobanana_extract_features(
+    image_path: str = "",
+    image_paths: Optional[List[str]] = None,
+    output_file: Optional[str] = None,
+    prompt: Optional[str] = None,
+    model: Optional[str] = None,
+    max_tokens: int = 1200,
+    generate_image: bool = False,
+    image_output_path: Optional[str] = None,
+) -> ToolResult:
+    """
+    可以提取图片中的特征,也可以根据描述生成图片。
+
+    Args:
+        image_path: 输入图片路径(单图模式,可选)
+        image_paths: 输入图片路径列表(多图整体模式,可选)
+        output_file: 输出 JSON 文件路径(可选,用于特征提取模式)
+        prompt: 自定义提取指令或生成描述(可选)
+        model: OpenRouter 模型名(可选,默认读取 NANOBANANA_MODEL 或使用 Gemini 视觉模型)
+        max_tokens: 最大输出 token
+        generate_image: 是否生成图片(False=提取特征,True=生成图片)
+        image_output_path: 生成图片保存路径(generate_image=True 时可选)
+
+    Returns:
+        ToolResult: 包含结构化特征和输出文件路径,或生成的图片路径
+    """
+    raw_paths: List[str] = []
+    if image_paths:
+        raw_paths.extend(image_paths)
+    if image_path:
+        raw_paths.append(image_path)
+    if not raw_paths:
+        return ToolResult(
+            title="NanoBanana 提取失败",
+            output="",
+            error="未提供输入图片,请传入 image_path 或 image_paths",
+        )
+
+    # 去重并检查路径
+    unique_raw: List[str] = []
+    seen = set()
+    for p in raw_paths:
+        if p and p not in seen:
+            unique_raw.append(p)
+            seen.add(p)
+
+    input_paths: List[Path] = [Path(p) for p in unique_raw]
+    invalid = [str(p) for p in input_paths if (not p.exists() or not p.is_file())]
+    if invalid:
+        return ToolResult(
+            title="NanoBanana 提取失败",
+            output="",
+            error=f"以下图片不存在或不可读: {invalid}",
+        )
+
+    api_key = _resolve_api_key()
+    if not api_key:
+        return ToolResult(
+            title="NanoBanana 提取失败",
+            output="",
+            error="未找到 OpenRouter API Key,请设置 OPENROUTER_API_KEY 或 OPEN_ROUTER_API_KEY",
+        )
+
+    if generate_image:
+        user_prompt = prompt or DEFAULT_IMAGE_PROMPT
+    else:
+        chosen_model = model or os.getenv("NANOBANANA_MODEL") or "google/gemini-2.5-flash"
+        user_prompt = prompt or DEFAULT_EXTRACTION_PROMPT
+
+    try:
+        image_data_urls = [_image_to_data_url(p) for p in input_paths]
+    except Exception as e:
+        return ToolResult(
+            title="NanoBanana 提取失败",
+            output="",
+            error=f"图片编码失败: {e}",
+        )
+
+    user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_prompt}]
+    for u in image_data_urls:
+        user_content.append({"type": "image_url", "image_url": {"url": u}})
+
+    payload: Dict[str, Any] = {
+        "messages": [
+            {
+                "role": "system",
+                "content": (
+                    "你是视觉助手。"
+                    "当任务为特征提取时输出 JSON 对象,不要输出 markdown。"
+                    "当任务为图像生成时请返回图像。"
+                ),
+            },
+            {
+                "role": "user",
+                "content": user_content,
+            },
+        ],
+        "temperature": 0.2,
+        "max_tokens": max_tokens,
+    }
+    if generate_image:
+        payload["modalities"] = ["image", "text"]
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+        "HTTP-Referer": "https://local-agent",
+        "X-Title": "Agent NanoBanana Tool",
+    }
+
+    endpoint = f"{OPENROUTER_BASE_URL}/chat/completions"
+
+    # 图像生成模式:自动尝试多个可用模型,减少 404/invalid model 影响
+    if generate_image:
+        candidates: List[str] = []
+        if model:
+            candidates.append(_normalize_model_id(model))
+        if env_model := os.getenv("NANOBANANA_IMAGE_MODEL"):
+            candidates.append(_normalize_model_id(env_model))
+        candidates.extend([_normalize_model_id(x) for x in DEFAULT_IMAGE_MODEL_CANDIDATES])
+        # 去重并保持顺序
+        dedup: List[str] = []
+        seen = set()
+        for m in candidates:
+            if m and m not in seen:
+                dedup.append(m)
+                seen.add(m)
+        candidates = dedup
+    else:
+        candidates = [chosen_model]
+
+    data: Optional[Dict[str, Any]] = None
+    used_model: Optional[str] = None
+    errors: List[Dict[str, Any]] = []
+
+    for cand in candidates:
+        modality_attempts: List[Optional[List[str]]] = [None]
+        if generate_image:
+            modality_attempts = [["image", "text"], ["image"], None]
+
+        for mods in modality_attempts:
+            trial_payload = dict(payload)
+            trial_payload["model"] = cand
+
+            if mods is None:
+                trial_payload.pop("modalities", None)
+            else:
+                trial_payload["modalities"] = mods
+
+            try:
+                async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
+                    resp = await client.post(endpoint, json=trial_payload, headers=headers)
+                    resp.raise_for_status()
+                    data = resp.json()
+                    used_model = cand
+                    break
+            except httpx.HTTPStatusError as e:
+                errors.append({
+                    "model": cand,
+                    "modalities": mods,
+                    "status_code": e.response.status_code,
+                    "body": e.response.text[:600],
+                })
+                continue
+            except Exception as e:
+                errors.append({
+                    "model": cand,
+                    "modalities": mods,
+                    "status_code": None,
+                    "body": str(e)[:600],
+                })
+                continue
+
+        if data is not None:
+            break
+
+    if data is None:
+        title = "NanoBanana 生成失败" if generate_image else "NanoBanana 提取失败"
+        return ToolResult(
+            title=title,
+            output=json.dumps({"attempted_models": candidates, "errors": errors}, ensure_ascii=False, indent=2),
+            long_term_memory="All candidate models failed for this request",
+            metadata={"attempted_models": candidates, "errors": errors},
+        )
+
+    chosen_model = used_model or candidates[0]
+
+    choices = data.get("choices") or []
+    message = choices[0].get("message", {}) if choices else {}
+
+    # 图像生成分支
+    if generate_image:
+        refs = _extract_image_refs(choices[0] if choices else {}, message)
+        if not refs:
+            content = message.get("content")
+            preview = ""
+            if isinstance(content, str):
+                preview = content[:500]
+            elif isinstance(content, list):
+                preview = json.dumps(content[:3], ensure_ascii=False)[:500]
+
+            return ToolResult(
+                title="NanoBanana 生成失败",
+                output=json.dumps(data, ensure_ascii=False, indent=2),
+                error="模型未返回可解析图片(未在 message.images/choice.images/content 中发现图片)",
+                metadata={
+                    "model": chosen_model,
+                    "choice_keys": list((choices[0] if choices else {}).keys()),
+                    "message_keys": list(message.keys()) if isinstance(message, dict) else [],
+                    "content_preview": preview,
+                },
+            )
+
+        output_paths: List[str] = []
+        if image_output_path:
+            base_path = Path(image_output_path)
+        else:
+            if len(input_paths) > 1:
+                base_path = input_paths[0].parent / "set_generated.png"
+            else:
+                base_path = input_paths[0].parent / f"{input_paths[0].stem}_generated.png"
+        base_path.parent.mkdir(parents=True, exist_ok=True)
+
+        for idx, ref in enumerate(refs):
+            kind = ref.get("kind", "")
+            mime_type = "image/png"
+            raw_bytes: Optional[bytes] = None
+
+            if kind == "data_url":
+                m = re.match(r"^data:([^;]+);base64,(.+)$", ref.get("value", ""), flags=re.DOTALL)
+                if not m:
+                    continue
+                mime_type = m.group(1)
+                raw_bytes = base64.b64decode(m.group(2))
+            elif kind == "base64":
+                mime_type = ref.get("mime_type", "image/png")
+                raw_bytes = base64.b64decode(ref.get("value", ""))
+            elif kind == "url":
+                url = ref.get("value", "")
+                try:
+                    with httpx.Client(timeout=DEFAULT_TIMEOUT) as client:
+                        r = client.get(url)
+                        r.raise_for_status()
+                        raw_bytes = r.content
+                        mime_type = r.headers.get("content-type", "image/png").split(";")[0]
+                except Exception:
+                    continue
+            else:
+                continue
+
+            if not raw_bytes:
+                continue
+
+            ext = _mime_to_ext(mime_type)
+            if len(refs) == 1:
+                target = base_path
+                if target.suffix.lower() not in [".png", ".jpg", ".jpeg", ".webp"]:
+                    target = target.with_suffix(ext)
+            else:
+                stem = base_path.stem
+                target = base_path.with_name(f"{stem}_{idx+1}{ext}")
+            try:
+                target.write_bytes(raw_bytes)
+                output_paths.append(str(target))
+            except Exception as e:
+                return ToolResult(
+                    title="NanoBanana 生成失败",
+                    output="",
+                    error=f"写入生成图片失败: {e}",
+                    metadata={"model": chosen_model},
+                )
+
+        if not output_paths:
+            return ToolResult(
+                title="NanoBanana 生成失败",
+                output=json.dumps(data, ensure_ascii=False, indent=2),
+                error="检测到图片引用但写入失败(可能是无效 base64 或 URL 不可访问)",
+                metadata={"model": chosen_model, "ref_count": len(refs)},
+            )
+
+        usage = data.get("usage", {})
+        prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens", 0)
+        completion_tokens = usage.get("completion_tokens") or usage.get("output_tokens", 0)
+        summary = {
+            "model": chosen_model,
+            "input_images": [str(p) for p in input_paths],
+            "input_count": len(input_paths),
+            "generated_images": output_paths,
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+        }
+        return ToolResult(
+            title="NanoBanana 图片生成完成",
+            output=json.dumps({"summary": summary}, ensure_ascii=False, indent=2),
+            long_term_memory=f"Generated {len(output_paths)} image(s) from {len(input_paths)} input image(s) using {chosen_model}",
+            attachments=output_paths,
+            metadata=summary,
+        )
+
+    content = message.get("content") or ""
+    if not content:
+        return ToolResult(
+            title="NanoBanana 提取失败",
+            output=json.dumps(data, ensure_ascii=False, indent=2),
+            error="模型未返回内容",
+        )
+
+    try:
+        parsed = _safe_json_parse(content)
+    except Exception as e:
+        return ToolResult(
+            title="NanoBanana 提取失败",
+            output=content,
+            error=f"模型返回非 JSON 内容,解析失败: {e}",
+            metadata={"model": chosen_model},
+        )
+
+    if output_file:
+        out_path = Path(output_file)
+    else:
+        if len(input_paths) > 1:
+            out_path = input_paths[0].parent / "set_invariant_features.json"
+        else:
+            out_path = input_paths[0].parent / f"{input_paths[0].stem}_invariant_features.json"
+
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(json.dumps(parsed, ensure_ascii=False, indent=2), encoding="utf-8")
+
+    usage = data.get("usage", {})
+    prompt_tokens = usage.get("prompt_tokens") or usage.get("input_tokens", 0)
+    completion_tokens = usage.get("completion_tokens") or usage.get("output_tokens", 0)
+
+    summary = {
+        "model": chosen_model,
+        "input_images": [str(p) for p in input_paths],
+        "input_count": len(input_paths),
+        "output_file": str(out_path),
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+    }
+
+    return ToolResult(
+        title="NanoBanana 不变特征提取完成",
+        output=json.dumps(
+            {
+                "summary": summary,
+                "features": parsed,
+            },
+            ensure_ascii=False,
+            indent=2,
+        ),
+        long_term_memory=f"Extracted invariant features from {len(input_paths)} input image(s) using {chosen_model}",
+        attachments=[str(out_path)],
+        metadata=summary,
+    )

+ 61 - 4
agent/tools/builtin/subagent.py

@@ -276,6 +276,48 @@ def _build_evaluate_prompt(goal_description: str, messages: Optional[Messages])
     return "\n".join(lines)
 
 
+def _make_event_printer(label: str):
+    """
+    创建子 Agent 执行过程打印函数。
+
+    当父 runner.debug=True 时,传给 run_result(on_event=...),
+    实时输出子 Agent 的工具调用和助手消息。
+    """
+    prefix = f"  [{label}]"
+
+    def on_event(item):
+        from agent.trace.models import Trace, Message
+        if isinstance(item, Message):
+            if item.role == "assistant":
+                content = item.content
+                if isinstance(content, dict):
+                    text = content.get("text", "")
+                    tool_calls = content.get("tool_calls")
+                    if text:
+                        preview = text[:120] + "..." if len(text) > 120 else text
+                        print(f"{prefix} {preview}")
+                    if tool_calls:
+                        for tc in tool_calls:
+                            name = tc.get("function", {}).get("name", "unknown")
+                            print(f"{prefix} 🛠️  {name}")
+            elif item.role == "tool":
+                content = item.content
+                if isinstance(content, dict):
+                    name = content.get("tool_name", "unknown")
+                    desc = item.description or ""
+                    desc_short = (desc[:60] + "...") if len(desc) > 60 else desc
+                    suffix = f": {desc_short}" if desc_short else ""
+                    print(f"{prefix} ✅ {name}{suffix}")
+        elif isinstance(item, Trace):
+            if item.status == "completed":
+                print(f"{prefix} ✓ 完成")
+            elif item.status == "failed":
+                err = (item.error_message or "")[:80]
+                print(f"{prefix} ✗ 失败: {err}")
+
+    return on_event
+
+
 # ===== 统一内部执行函数 =====
 
 async def _run_agents(
@@ -283,6 +325,8 @@ async def _run_agents(
     per_agent_msgs: List[Messages],
     continue_from: Optional[str],
     store, trace_id: str, goal_id: str, runner, context: dict,
+    agent_type: Optional[str] = None,
+    skills: Optional[List[str]] = None,
 ) -> Dict[str, Any]:
     """
     统一 agent 执行逻辑。
@@ -317,7 +361,7 @@ async def _run_agents(
             # continue_from 已经设置了 sub_trace_id
             pass
         else:
-            agent_type = "delegate" if single else "explore"
+            resolved_agent_type = agent_type or ("delegate" if single else "explore")
             suffix = "delegate" if single else f"explore-{i+1:03d}"
             stid = generate_sub_trace_id(trace_id, suffix)
 
@@ -327,7 +371,7 @@ async def _run_agents(
                 task=task_item,
                 parent_trace_id=trace_id,
                 parent_goal_id=goal_id,
-                agent_type=agent_type,
+                agent_type=resolved_agent_type,
                 uid=parent_trace.uid if parent_trace else None,
                 model=parent_trace.model if parent_trace else None,
                 status="running",
@@ -342,7 +386,7 @@ async def _run_agents(
             # 广播 sub_trace_started
             await broadcast_sub_trace_started(
                 trace_id, stid, goal_id or "",
-                agent_type, task_item,
+                resolved_agent_type, task_item,
             )
 
             if single:
@@ -363,16 +407,22 @@ async def _run_agents(
         agent_msgs = list(msgs) + [{"role": "user", "content": task_item}]
         allowed_tools = _get_allowed_tools(single, context)
 
+        debug = getattr(runner, 'debug', False)
+        agent_label = (agent_type or ("delegate" if single else f"explore-{i+1}"))
+        on_event = _make_event_printer(agent_label) if debug else None
+
         coro = runner.run_result(
             messages=agent_msgs,
             config=_make_run_config(
                 trace_id=cur_stid,
-                agent_type="delegate" if single else "explore",
+                agent_type=agent_type or ("delegate" if single else "explore"),
                 model=parent_trace.model if parent_trace else "gpt-4o",
                 uid=parent_trace.uid if parent_trace else None,
                 tools=allowed_tools,
                 name=task_item[:50],
+                skills=skills,
             ),
+            on_event=on_event,
         )
         coros.append((i, cur_stid, collab_name, coro))
 
@@ -492,6 +542,8 @@ async def agent(
     task: Union[str, List[str]],
     messages: Optional[Union[Messages, List[Messages]]] = None,
     continue_from: Optional[str] = None,
+    agent_type: Optional[str] = None,
+    skills: Optional[List[str]] = None,
     context: Optional[dict] = None,
 ) -> Dict[str, Any]:
     """
@@ -504,6 +556,8 @@ async def agent(
         task: 任务描述。字符串=单任务,列表=多任务并行
         messages: 预置消息。1D 列表=所有 agent 共享;2D 列表=per-agent
         continue_from: 继续已有 trace(仅单任务)
+        agent_type: 子 Agent 类型,决定 preset 和默认 skills(如 "deconstruct")
+        skills: 附加到 system prompt 的 skill 名称列表,覆盖 preset 默认值
         context: 框架自动注入的上下文
     """
     if not context:
@@ -545,6 +599,8 @@ async def agent(
     return await _run_agents(
         tasks, per_agent_msgs, continue_from,
         store, trace_id, goal_id, runner, context,
+        agent_type=agent_type,
+        skills=skills,
     )
 
 
@@ -655,6 +711,7 @@ async def evaluate(
                 tools=allowed_tools,
                 name=f"评估: {goal_id}",
             ),
+            on_event=_make_event_printer("evaluate") if getattr(runner, 'debug', False) else None,
         )
 
         await broadcast_sub_trace_completed(

+ 0 - 4
agent/trace/__init__.py

@@ -14,7 +14,6 @@ from .goal_models import Goal, GoalTree, GoalStatus, GoalType, GoalStats
 from .protocols import TraceStore
 from .store import FileSystemTraceStore
 from .trace_id import generate_trace_id, generate_sub_trace_id, parse_parent_trace_id
-from .goal_tool import set_goal_tree, get_goal_tree
 
 __all__ = [
     # Models
@@ -32,7 +31,4 @@ __all__ = [
     "generate_trace_id",
     "generate_sub_trace_id",
     "parse_parent_trace_id",
-    # Goal tool
-    "set_goal_tree",
-    "get_goal_tree",
 ]

+ 43 - 2
agent/trace/compaction.py

@@ -190,8 +190,11 @@ def estimate_tokens(messages: List[Dict[str, Any]]) -> int:
             total_tokens += _estimate_text_tokens(content)
         elif isinstance(content, list):
             for part in content:
-                if isinstance(part, dict) and part.get("type") == "text":
-                    total_tokens += _estimate_text_tokens(part.get("text", ""))
+                if isinstance(part, dict):
+                    if part.get("type") == "text":
+                        total_tokens += _estimate_text_tokens(part.get("text", ""))
+                    elif part.get("type") in ("image_url", "image"):
+                        total_tokens += _estimate_image_tokens(part)
         # tool_calls
         tool_calls = msg.get("tool_calls")
         if tool_calls and isinstance(tool_calls, list):
@@ -226,6 +229,44 @@ def _estimate_text_tokens(text: str) -> int:
     return int(cjk_chars * 1.5) + other_chars // 4
 
 
+def _estimate_image_tokens(block: Dict[str, Any]) -> int:
+    """
+    估算图片块的 token 消耗。
+
+    Anthropic 计算方式:tokens = (width * height) / 750
+    优先从 _image_meta 读取真实尺寸,其次从 base64 数据量粗估,最小 1600 tokens。
+    """
+    MIN_IMAGE_TOKENS = 1600
+
+    # 优先使用 _image_meta 中的真实尺寸
+    meta = block.get("_image_meta")
+    if meta and meta.get("width") and meta.get("height"):
+        tokens = (meta["width"] * meta["height"]) // 750
+        return max(MIN_IMAGE_TOKENS, tokens)
+
+    # 回退:从 base64 数据长度粗估
+    b64_data = ""
+    if block.get("type") == "image":
+        source = block.get("source", {})
+        if source.get("type") == "base64":
+            b64_data = source.get("data", "")
+    elif block.get("type") == "image_url":
+        url_obj = block.get("image_url", {})
+        url = url_obj.get("url", "") if isinstance(url_obj, dict) else str(url_obj)
+        if url.startswith("data:"):
+            _, _, b64_data = url.partition(",")
+
+    if b64_data:
+        # base64 编码后大小约为原始字节的 4/3
+        raw_bytes = len(b64_data) * 3 // 4
+        # 粗估:假设 JPEG 压缩率 ~10:1,像素数 ≈ raw_bytes * 10 / 3 (RGB)
+        estimated_pixels = raw_bytes * 10 // 3
+        estimated_tokens = estimated_pixels // 750
+        return max(MIN_IMAGE_TOKENS, estimated_tokens)
+
+    return MIN_IMAGE_TOKENS
+
+
 def _is_cjk(ch: str) -> bool:
     """判断字符是否为 CJK(中日韩)字符"""
     cp = ord(ch)

+ 8 - 27
agent/trace/goal_tool.py

@@ -13,22 +13,6 @@ if TYPE_CHECKING:
     from .protocols import TraceStore
 
 
-# ===== 全局 GoalTree 状态管理 =====
-
-_current_goal_tree = None
-
-
-def set_goal_tree(tree):
-    """设置当前 GoalTree(由 AgentRunner 调用)"""
-    global _current_goal_tree
-    _current_goal_tree = tree
-
-
-def get_goal_tree():
-    """获取当前 GoalTree"""
-    return _current_goal_tree
-
-
 # ===== LLM 可调用的 goal 工具 =====
 
 @tool(description="管理执行计划,添加/完成/放弃目标,切换焦点")
@@ -53,12 +37,13 @@ async def goal(
         done: 完成当前目标,值为 summary
         abandon: 放弃当前目标,值为原因
         focus: 切换焦点到指定 ID
-        context: 工具执行上下文(包含 store 和 trace_id
+        context: 工具执行上下文(包含 store、trace_id、goal_tree
 
     Returns:
         str: 更新后的计划状态文本
     """
-    tree = get_goal_tree()
+    # GoalTree 从 context 获取,每个 agent 实例独立,不再依赖全局变量
+    tree = context.get("goal_tree") if context else None
     if tree is None:
         return "错误:GoalTree 未初始化"
 
@@ -130,10 +115,7 @@ async def goal_tool(
 
         # 推送事件
         if store and trace_id:
-            print(f"[DEBUG] goal_tool: calling store.update_goal for done: goal_id={goal.id}")
             await store.update_goal(trace_id, goal.id, status="completed", summary=done)
-        else:
-            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
         # 检查是否有级联完成的父目标(complete方法已经处理,这里只需要记录)
         if goal.parent_id:
@@ -163,10 +145,7 @@ async def goal_tool(
 
         # 推送事件
         if store and trace_id:
-            print(f"[DEBUG] goal_tool: calling store.update_goal for abandon: goal_id={goal.id}")
             await store.update_goal(trace_id, goal.id, status="abandoned", summary=abandon)
-        else:
-            print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
     # 4. 处理 add
     if add is not None:
@@ -218,11 +197,8 @@ async def goal_tool(
 
             # 推送事件
             if store and trace_id:
-                print(f"[DEBUG] goal_tool: calling store.add_goal for {len(new_goals)} new goals")
                 for goal in new_goals:
                     await store.add_goal(trace_id, goal)
-            else:
-                print(f"[DEBUG] goal_tool: skip event push (store={store}, trace_id={trace_id})")
 
             # 如果没有焦点且添加了目标,自动 focus 到第一个新目标
             if not tree.current_id and new_goals:
@@ -230,6 +206,11 @@ async def goal_tool(
                 display_id = tree._generate_display_id(new_goals[0])
                 changes.append(f"自动切换焦点: {display_id}")
 
+    # 将完整内存树状态(含 current_id)同步到存储,
+    # 因为 store.add_goal / update_goal 各自从磁盘加载,不包含 focus 等内存变更
+    if store and trace_id and changes:
+        await store.update_goal_tree(trace_id, tree)
+
     # 返回当前状态
     result = []
     if changes:

+ 7 - 5
agent/trace/models.py

@@ -200,12 +200,14 @@ class Message:
         msg: Dict[str, Any] = {"role": self.role}
 
         if self.role == "tool":
-            # tool message: tool_call_id + name + content(string)
+            # tool message: tool_call_id + name + content
             if self.tool_call_id:
                 msg["tool_call_id"] = self.tool_call_id
                 msg["name"] = self.description or "unknown"
             if isinstance(self.content, dict):
-                msg["content"] = str(self.content.get("result", self.content))
+                result = self.content.get("result", self.content)
+                # result 可能是 list(含图片的多模态内容)或字符串
+                msg["content"] = result if isinstance(result, list) else str(result)
             else:
                 msg["content"] = str(self.content) if self.content is not None else ""
 
@@ -405,11 +407,11 @@ class Message:
         # 只添加非空的可选字段
         if self.abandoned_at:
             result["abandoned_at"] = self.abandoned_at.isoformat()
-        if self.reasoning_tokens:
+        if self.reasoning_tokens is not None:
             result["reasoning_tokens"] = self.reasoning_tokens
-        if self.cache_creation_tokens:
+        if self.cache_creation_tokens is not None:
             result["cache_creation_tokens"] = self.cache_creation_tokens
-        if self.cache_read_tokens:
+        if self.cache_read_tokens is not None:
             result["cache_read_tokens"] = self.cache_read_tokens
         return result
 

+ 28 - 15
agent/trace/run_api.py

@@ -75,9 +75,9 @@ class TraceRunRequest(BaseModel):
         default_factory=list,
         description="追加的新消息(可为空,用于重新生成场景)",
     )
-    after_sequence: Optional[int] = Field(
+    after_message_id: Optional[str] = Field(
         None,
-        description="从哪条消息后续跑。None = 从末尾续跑,int = 从该 sequence 后运行(自动判断续跑/回溯)",
+        description="从哪条消息后续跑。None = 从末尾续跑,message_id = 从该消息后运行(自动判断续跑/回溯)",
     )
 
 
@@ -273,17 +273,25 @@ async def _cleanup_incomplete_tool_calls(store, trace_id: str, after_sequence: i
     return safe
 
 
+def _parse_sequence_from_message_id(message_id: str) -> int:
+    """从 message_id 末尾解析 sequence 整数(格式:{trace_id}-{sequence:04d})"""
+    try:
+        return int(message_id.rsplit("-", 1)[-1])
+    except (ValueError, IndexError):
+        raise HTTPException(
+            status_code=422,
+            detail=f"Invalid after_message_id format: {message_id!r}",
+        )
+
+
 @router.post("/{trace_id}/run", response_model=RunResponse)
 async def run_trace(trace_id: str, req: TraceRunRequest):
     """
     运行已有 Trace(统一续跑 + 回溯)
 
-    - after_sequence 为 null(或省略):从末尾续跑
-    - after_sequence 为 int:从该 sequence 后运行(Runner 自动判断续跑/回溯)
-    - messages 为空 + after_sequence 为 int:重新生成(从该位置重跑,不插入新消息)
-
-    after_sequence 的值是 message 的 sequence 号。如果指定的 sequence 是一条带
-    tool_calls 的 assistant 消息,系统会自动扩展截断点到其所有 tool response 之后。
+    - after_message_id 为 null(或省略):从末尾续跑
+    - after_message_id 为 message_id 字符串:从该消息后运行(Runner 自动判断续跑/回溯)
+    - messages 为空 + after_message_id 有值:重新生成(从该位置重跑,不插入新消息)
 
     **自动清理不完整工具调用**:
     如果人工插入 message 的位置打断了一个工具调用过程(assistant 消息有 tool_calls
@@ -293,6 +301,11 @@ async def run_trace(trace_id: str, req: TraceRunRequest):
 
     runner = _get_runner()
 
+    # 将 message_id 转换为内部使用的 sequence 整数
+    after_sequence: Optional[int] = None
+    if req.after_message_id is not None:
+        after_sequence = _parse_sequence_from_message_id(req.after_message_id)
+
     # 验证 trace 存在
     if runner.trace_store:
         trace = await runner.trace_store.get_trace(trace_id)
@@ -300,25 +313,25 @@ async def run_trace(trace_id: str, req: TraceRunRequest):
             raise HTTPException(status_code=404, detail=f"Trace not found: {trace_id}")
 
         # 自动检查并清理不完整的工具调用
-        if req.after_sequence is not None and req.messages:
+        if after_sequence is not None and req.messages:
             adjusted_seq = await _cleanup_incomplete_tool_calls(
-                runner.trace_store, trace_id, req.after_sequence
+                runner.trace_store, trace_id, after_sequence
             )
-            if adjusted_seq != req.after_sequence:
+            if adjusted_seq != after_sequence:
                 logger.info(
-                    f"已自动调整插入位置:{req.after_sequence} -> {adjusted_seq}"
+                    f"已自动调整插入位置:{after_sequence} -> {adjusted_seq}"
                 )
-                req.after_sequence = adjusted_seq
+                after_sequence = adjusted_seq
 
     # 检查是否已在运行
     if trace_id in _running_tasks and not _running_tasks[trace_id].done():
         raise HTTPException(status_code=409, detail="Trace is already running")
 
-    config = RunConfig(trace_id=trace_id, after_sequence=req.after_sequence)
+    config = RunConfig(trace_id=trace_id, after_sequence=after_sequence)
     task = asyncio.create_task(_run_in_background(trace_id, req.messages, config))
     _running_tasks[trace_id] = task
 
-    mode = "rewind" if req.after_sequence is not None else "continue"
+    mode = "rewind" if after_sequence is not None else "continue"
     return RunResponse(
         trace_id=trace_id,
         status="started",

+ 38 - 9
docs/README.md

@@ -60,8 +60,10 @@ agent/
 │   ├── protocols.py       # MemoryStore 接口
 │   ├── stores.py          # 存储实现
 │   ├── skill_loader.py    # Skill 加载器
-│   └── skills/            # 内置 Skills
-│       └── core.md        # Core Skill(自动加载)
+│   └── skills/            # 内置 Skills(自动注入 system prompt)
+│       ├── planning.md    # 计划与 Goal 工具使用
+│       ├── research.md    # 搜索与内容研究
+│       └── browser.md     # 浏览器自动化
 ├── llm/                   # LLM 集成
 │   ├── gemini.py          # Gemini Provider
@@ -167,6 +169,7 @@ class RunConfig:
     agent_type: str = "default"
     uid: Optional[str] = None
     system_prompt: Optional[str] = None        # None = 从 skills 自动构建
+    skills: Optional[List[str]] = None         # 注入 system prompt 的 skill 名称列表;None = 按 preset 决定
     enable_memory: bool = True
     auto_execute_tools: bool = True
     name: Optional[str] = None                 # 显示名称(空则由 utility_llm 自动生成)
@@ -304,7 +307,7 @@ agent 工具的合成结果对齐正常返回值格式(含 `sub_trace_id` 字
 **实现**:`agent/core/runner.py:AgentRunner._heal_orphaned_tool_calls`
 
 - `run(messages, config)`:**核心方法**,流式返回 `AsyncIterator[Union[Trace, Message]]`
-- `run_result(messages, config)`:便利方法,内部消费 `run()`,返回结构化结果。主要用于 `agent`/`evaluate` 工具内部
+- `run_result(messages, config, on_event=None)`:便利方法,内部消费 `run()`,返回结构化结果。`on_event` 回调可实时接收每个 Trace/Message 事件(用于调试时输出子 Agent 执行过程)。主要用于 `agent`/`evaluate` 工具内部
 
 ### REST API
 
@@ -544,19 +547,24 @@ class AgentPreset:
     denied_tools: Optional[List[str]] = None   # 黑名单
     max_iterations: int = 30
     temperature: Optional[float] = None
+    skills: Optional[List[str]] = None         # 注入 system prompt 的 skill 名称列表;None = 加载全部
     description: Optional[str] = None
 
 
+_DEFAULT_SKILLS = ["planning", "research", "browser"]
+
 AGENT_PRESETS = {
     "default": AgentPreset(
         allowed_tools=None,
         max_iterations=30,
+        skills=_DEFAULT_SKILLS,
         description="默认 Agent,拥有全部工具权限",
     ),
     "explore": AgentPreset(
         allowed_tools=["read", "glob", "grep", "list_files"],
         denied_tools=["write", "edit", "bash", "task"],
         max_iterations=15,
+        skills=["planning"],
         description="探索型 Agent,只读权限,用于代码分析",
     ),
     "analyst": AgentPreset(
@@ -564,6 +572,7 @@ AGENT_PRESETS = {
         denied_tools=["write", "edit", "bash", "task"],
         temperature=0.3,
         max_iterations=25,
+        skills=["planning", "research"],
         description="分析型 Agent,用于深度分析和研究",
     ),
 }
@@ -571,7 +580,7 @@ AGENT_PRESETS = {
 
 **实现**:`agent/core/presets.py`
 
-**用户自定义**:项目级配置 `.agent/presets.json` 可覆盖或添加预设。
+**用户自定义**:项目级配置文件(如 `examples/how/presets.json`)可通过 `register_preset()` 注册额外预设。项目专用的 Agent 类型建议放在项目目录下,而非内置预设。
 
 ---
 
@@ -589,10 +598,15 @@ async def agent(
     task: Union[str, List[str]],
     messages: Optional[Union[Messages, List[Messages]]] = None,
     continue_from: Optional[str] = None,
+    agent_type: Optional[str] = None,
+    skills: Optional[List[str]] = None,
     context: Optional[dict] = None,
 ) -> Dict[str, Any]:
 ```
 
+- `agent_type`: 子 Agent 类型,决定工具权限和默认 skills(对应 `AgentPreset` 名称,如 `"deconstruct"`)
+- `skills`: 覆盖 preset 默认值,显式指定注入 system prompt 的 skill 列表
+
 **单任务(delegate)**:`task: str`
 - 创建单个 Sub-Trace
 - 完整工具权限(除 agent/evaluate 外,防止递归)
@@ -748,17 +762,32 @@ ToolResult(
 
 | 类型 | 加载位置 | 加载时机 |
 |------|---------|---------|
-| **Core Skill** | System Prompt | Agent 启动时自动加载 |
+| **内置 Skill** | System Prompt | Agent 启动时自动注入 |
+| **项目 Skill** | System Prompt | Agent 启动时按 preset/call-site 过滤后注入 |
 | **普通 Skill** | 对话消息 | 模型调用 `skill` 工具时 |
 
 ### 目录结构
 
 ```
-agent/memory/skills/
-├── core.md              # Core Skill(自动加载到 System Prompt)
-└── browser_use/         # 普通 Skill(按需加载)
+agent/memory/skills/         # 内置 Skills(始终加载)
+├── planning.md              # 计划与 Goal 工具使用
+├── research.md              # 搜索与内容研究
+└── browser.md               # 浏览器自动化
+
+./skills/                    # 项目自定义 Skills
+```
 
-./skills/                # 项目自定义 Skills(按需加载)
+### Skills 过滤(call-site 选择)
+
+不同 Agent 类型所需的 skills 不同。过滤优先级:
+
+1. `agent()` 工具的 `skills` 参数(显式指定,最高优先级)
+2. `AgentPreset.skills`(preset 默认值)
+3. `None`(加载全部,向后兼容)
+
+示例:调用子 Agent 时只注入解构相关 skill:
+```python
+agent(task="...", agent_type="deconstruct", skills=["planning", "deconstruct"])
 ```
 
 **实现**:`agent/memory/skill_loader.py`

+ 98 - 0
docs/ref/create.md

@@ -0,0 +1,98 @@
+---
+name: create
+description: 从创作层解构社交媒体帖子,提取叙事策略与选题价值(研究用,未接入系统)
+---
+
+## 角色
+
+你是内容创作策略分析专家。给定一篇优质社交媒体帖子,分析其**创作层**——内容策略、选题价值、叙事结构、文字策略——回答"这篇内容为什么值得创作,以及创作者如何讲述它"。
+
+与制作层解构(How to make)不同,创作层回答的是:**Why this content + How to tell it**。
+
+---
+
+## 创作层的核心概念
+
+**选题价值**(三点框架)
+- **灵感点**:是什么触发了创作者创作这篇内容?来自生活、趋势、热点、个人经历?
+- **目的点**:创作者想通过这篇内容达到什么?吸粉、种草、共鸣、教育?
+- **关键点**:这篇内容的核心价值主张是什么?受众为什么会喜欢?
+
+**内容权重**
+- 这篇内容以图片为主还是文字为主?谁承载了更多核心信息?
+- 图文是相辅相成,还是各自独立承载信息?
+
+**叙事结构**:创作者如何组织内容流程——从什么开始,经过什么,以什么结尾?图片之间的叙事逻辑是什么?
+
+---
+
+## 分析维度
+
+**内容品类**:这是什么类型的内容?(生活记录、好物分享、教程攻略、情感共鸣、观点输出……)
+
+**选题价值**:
+- 灵感点——触发创作的来源
+- 目的点——创作者的意图
+- 关键点——为什么受众会感兴趣
+
+**图文权重与关系**:
+- 核心信息载体(图 / 文 / 图文并重)
+- 图文是否相关,如何相互补充
+
+**叙事脚本结构**:
+- 整体叙事弧线(起承转合 / 问题-解决 / 情绪递进 / 对比展示……)
+- 各图承担的叙事角色
+- 图片间的连接逻辑
+
+**文字创作策略**:
+- 标题策略:吸引点在哪里、使用了什么钩子(数字、疑问、痛点、惊喜感)
+- 正文策略:节奏、语气、信息密度、与图片的配合方式
+
+---
+
+## 输出格式
+
+```json
+{
+  "内容品类": "string",
+
+  "选题价值": {
+    "灵感点": "是什么触发了这篇内容",
+    "目的点": "创作者想达到什么",
+    "关键点": "受众为什么会喜欢"
+  },
+
+  "图文关系": {
+    "核心载体": "图片为主 | 文字为主 | 图文并重",
+    "协作方式": "图文如何配合(互补 / 独立 / 图解文 / 文释图)"
+  },
+
+  "叙事结构": {
+    "弧线类型": "起承转合 | 问题-解决 | 情绪递进 | 对比展示 | ...",
+    "图片叙事": [
+      {"图片": "图片1", "叙事角色": "引入主体 / 建立情境..."},
+      {"图片": "图片2", "叙事角色": "展开 / 对比..."}
+    ]
+  },
+
+  "文字策略": {
+    "标题": "钩子类型与策略",
+    "正文": "节奏、语气、信息组织方式"
+  },
+
+  "核心洞察": "一句话:这篇内容在创作策略上为什么成功"
+}
+```
+
+---
+
+## 原则
+
+- **创作层优先**:分析"为什么创作这个内容 + 如何叙述它",而非视觉制作细节
+- **受众视角**:始终思考受众为什么会停留、点赞、收藏、分享
+- **策略性而非描述性**:不是"图片展示了XX",而是"通过XX实现了XX效果"
+- **与制作层互补**:创作层负责 Why + What to tell,制作层负责 How to make
+
+---
+
+> **注**:此文件仅供研究,未接入 Agent 系统。对应的系统工具是 `deconstruct`(制作层)。

+ 357 - 0
docs/ref/deconstruct_old.md

@@ -0,0 +1,357 @@
+---
+name: deconstruct
+description: 制作还原解构方法论:将优质社交媒体帖子解构为可还原的结构化制作脚本
+---
+
+## 角色定位
+
+你是制作还原解构顾问。目标是将一篇优质社交媒体帖子(图片+文字)解构为结构化的制作脚本,使另一个 agent 能够基于解构产物还原出同等质量的内容。
+
+**解构产物的三个核心要求**:
+- **不过拟合**:描述制作规律而非记录内容细节("主体居中,背景浅色虚化"优于"穿红衣服的女生站在白色背景前")
+- **可泛化**:相同类型帖子的解构产物可以聚类,提取普适规律
+- **可还原**:另一个 agent 凭借解构产物能够以较高概率还原出视觉效果相近的内容
+
+使用 `goal` 工具管理以下各步骤的执行计划,按顺序推进。
+
+---
+
+## 步骤 1:内容过滤
+
+过滤正文中与核心主题无关的话题标签(hashtag)。
+
+**保留标准**(两项均通过才保留):
+1. 与帖子主题或产品有直接关联
+2. 移除后不影响对核心内容的理解
+
+输出:过滤后的正文文本。
+
+---
+
+## 步骤 2:入口分析(内容视角)
+
+通过多图对比,判断这篇内容的核心表达方式。
+
+**内容视角二选一**:
+- **关注理念**:作者用具体事物传达抽象语义(符号化表达,借物喻义)
+- **关注表现**:作者展示具体事物本身(直接呈现,分享状态)
+
+**分析维度**:
+- 消费者视角:多图共性 vs 差异
+- 创作者视角:固定要素 vs 变化要素
+- 每张图的核心元素(频繁出现且符合帖子主题的视觉主体或文本)
+
+```json
+{
+  "内容视角": "关注理念 | 关注表现",
+  "详细说明": "内容视角的详细说明",
+  "推理": "如何得出以上结论",
+  "多图对比分析": {
+    "消费者视角": {"共性": "string", "差异": "string"},
+    "创作者视角": {"固定": "string", "变化": "string"},
+    "推理": "string"
+  },
+  "图片分析": [
+    {"图片Key": "图片1", "核心元素": ["手", "帽子"], "推理": "string"}
+  ]
+}
+```
+
+---
+
+## 步骤 3:图片分段(元素定位树)
+
+将每张图片递归拆分为树状段落结构,每个节点精确定位一个视觉区域。
+
+### 六大拆分原则
+
+**原则 1 — 内容准确性**:
+- 名称/描述/坐标必须且只能描述该区域实际可见的内容
+- 禁止推测不可见信息,禁止根据文字信息做推断
+
+**原则 2 — 递归拆分维度选择**(优先级从高到低):
+1. 创作者语义拆分(最高优先):作者创作意图导致的自然分组,如"标题区 vs 内容区"
+2. XY 轴拆分:水平或垂直方向的空间分割
+3. 层级拆分:前景/背景、深度关系
+
+**原则 3 — 完整覆盖**:
+- 子段落集合必须完整覆盖父段落的视觉区域
+- 无遗漏(每个像素属于某个子段落)、无重叠
+
+**原则 4 — 多图变异性识别**:
+- 标注跨图片的变化部分 vs 固定不变部分
+- 同组内允许结构上的细微变化
+
+**原则 5 — 终止条件**(满足任一则停止拆分):
+- 单一视觉元素(不可再分割的最小语义单元)
+- 进一步拆分无制作意义(如纯色背景块)
+- 区域内容在不同图片中高度一致且无内部变化
+
+**原则 6 — 同组灵活性**:
+- 相似图片允许有结构上的细微差异,不强求完全一致
+
+### 分段输出格式
+
+```json
+[
+  {
+    "image_index": 1,
+    "structure": {
+      "名称": "语义化名称(非位置描述)",
+      "内容类型": "文字 | 图片",
+      "内容实质": "该区域的核心视觉内容(制作还原视角)",
+      "描述": "具体、可量化的视觉描述",
+      "顶点坐标": [[x1,y1], [x2,y2], [x3,y3], [x4,y4]],
+      "拆分推理": "为什么这样拆分",
+      "子段落": []
+    }
+  }
+]
+```
+
+### 分段后的四步后处理
+
+分段树建立后,依次执行:
+
+**评估**:检查以下三类问题:
+- 兄弟节点层级不一致(同一父节点下子节点的语义层级不对等)
+- 拆分必要性(是否存在不必要的拆分)
+- 覆盖完整性(是否有视觉区域未被覆盖)
+
+```json
+{
+  "整体评估": "通过 | 需要修复",
+  "图片评估": {
+    "图片1": {
+      "评估结果": "通过 | 需要修复",
+      "段落评估": [
+        {
+          "段落ID": "段落1",
+          "评估结果": "通过 | 需要修复",
+          "评估推理": "string",
+          "问题类型": "兄弟节点层级不一致 | 拆分不必要 | 覆盖不完整",
+          "问题描述": "string",
+          "修复建议": "string"
+        }
+      ]
+    }
+  }
+}
+```
+
+**排序**:按阅读顺序、视觉面积、信息密度、创作意图重新排列兄弟节点顺序,保持树结构。
+
+**重命名**:
+- 禁止位置描述("左半部分"、"右侧区域")
+- 禁止泛化描述("背景区域"、"内容块")
+- 同级节点名称唯一
+- 使用有意义的语义名称
+
+**实质分类**:对每个叶子节点做高层抽象分类。
+- 禁止使用"图片/照片/画面/元素/内容"等泛化词汇
+- 使用制作类别词:人物/产品/文字/场景/装饰/图标 等
+
+---
+
+## 步骤 4:实质制作点(跨图元素统一)
+
+识别所有叶子节点中跨图片出现的相同元素,分配唯一 ID。
+
+### 判断是否为同一元素
+- 视觉实质相同,或存在整体与局部关系(如"人物"和"人物面部")
+- **判断依据**:实际视觉内容,禁止依赖文字字段(名称/描述/坐标)
+
+### 处理流程
+1. 收集所有叶子节点
+2. 文字元素:按内容实质分组(代码化,精确匹配)
+3. 图片元素:LLM 视觉比较分组
+4. 反思合并:识别被错误分开的组,合并为同一元素
+5. 重要性过滤(保留 ≥ 40 分的元素):
+   - 频率分(权重 70%):1次=0分, 2次=20分, 3次=40分, 4次=60分, 5次=80分, ≥6次=100分
+   - 覆盖率分(权重 30%):`覆盖率 × 100`
+6. 统一命名(使用上位概念,避免歧义)
+7. 分配元素 ID:`元素1`, `元素2` ...
+
+```json
+[
+  {
+    "元素ID": "元素1",
+    "统一名称": "人物",
+    "统一描述": "女性,长发,戴眼镜,职业装,站立姿态",
+    "出现段落": ["段落1.1.1", "段落2.1", "段落3.1"],
+    "重要性得分": 85
+  }
+]
+```
+
+---
+
+## 步骤 5:图片形式分析
+
+从"如何还原元素"的视角,提取每个段落/元素的视觉呈现方式。
+
+**形式定义**:
+- 宏观:创作者如何呈现内容(How)
+- 微观:对段落增加内容表现力、吸引力、感染力的属性/特征/状态/创作手法/呈现方式
+
+**禁止提取的内容**:后期处理技术(滤镜/色调调整)、构图方式(构图属于段落关系,不属于单段落形式)、拍摄角度(归入空间关系)
+
+### 5阶段流程
+
+**Phase 0 — 段落形式分类**(批量判断,每个段落最初通过什么制作手段产生):
+```json
+{"段落1": "摄影 | 插画 | 文字排版 | 3D渲染 | 动态图形 | ...", "段落1.2": "..."}
+```
+
+**Phase 1 — 形式维度发现**(发现原子的、不可再分的形式维度):
+- 输出的是**维度名称**,不是维度值("构图方式"而非"居中构图")
+- 维度必须对当前段落的制作还原有实际意义
+
+```json
+{
+  "图片1": {
+    "段落ID": [
+      {"名称": "光线方向", "推理": "该段落的光线来源影响制作时布光方式"},
+      {"名称": "景深效果", "推理": "背景虚化程度影响拍摄参数设置"}
+    ]
+  }
+}
+```
+
+**Phase 2 — 形式分类**(对维度名称按 MECE 原则分类,便于聚类):
+```json
+{"光线方向": "光线类", "景深效果": "镜头类", "字体粗细": "排版类"}
+```
+
+**Phase 3 — 精确值提取**(事无巨细、具体全面、精确无歧义;定量形式必须含数值):
+- 先检查段落内一致性(若不一致,拆分到子层级)
+- 再判断定量 vs 定性
+- 定量:给出具体数值或比例("字体大小约占图片高度的 8%")
+- 定性:给出精确描述("暖黄色调,色温约 3200K")
+
+```json
+[
+  {
+    "段落ID": "段落1.1",
+    "形式": [
+      {"名称": "光线方向", "描述": "右侧 45° 侧光,形成明显的明暗分界", "是否可定量": false},
+      {"名称": "景深效果", "描述": "背景虚化,估计光圈 f/1.8~f/2.8", "是否可定量": true}
+    ]
+  }
+]
+```
+
+---
+
+## 步骤 6:段内关系分析
+
+分析每个父段落与其**直接子节点**之间的关系。
+
+**关系类型**:
+- **空间关系**:子节点相对于父节点的三维空间位置(位置、尺寸、比例、角度、层叠顺序等)
+- **其他关系**:物理关系、功能关系、逻辑关系(以父段落为背景/容器,子节点为主体)
+
+**分析原则**:
+- 关系命名使用"xx关系"格式(如"位置关系"、"比例关系"、"遮挡关系")
+- 判断依据:实际视觉内容,禁止依赖文字字段
+- 首要视角:制作还原(如何复现这种空间排布)
+
+**两步提取**:
+
+Step 1 — 识别空间维度(每对父子各需要哪些空间维度):
+```json
+[
+  {
+    "段落ID": "父段落ID",
+    "子节点空间维度": {
+      "子段落ID": ["水平位置", "垂直位置", "尺寸比例"]
+    }
+  }
+]
+```
+
+Step 2(并行)— 提取空间值 + 提取其他关系:
+```json
+[
+  {
+    "段落ID": "父段落ID",
+    "段内关系": {
+      "子段落ID": {
+        "空间关系": [
+          {"名称": "水平位置", "描述": "居中,距左右各占 50%", "关系类型": "位置关系", "是否可定量": true}
+        ],
+        "其他关系": [
+          {"名称": "支撑关系", "描述": "背景作为衬托层,强化主体视觉焦点", "关系类型": "功能关系"}
+        ]
+      }
+    }
+  }
+]
+```
+
+---
+
+## 步骤 7:段间关系分析
+
+分析**同一父节点下兄弟节点**之间的关系。
+
+**严格约束**:
+- 兄弟节点 = 具有相同直接父节点的节点(严格定义,禁止跨层级)
+- 禁止将子节点当成兄弟节点处理
+- 只保留对制作还原有价值的关系,过滤冗余关系
+- **去重规则**:只从 ID 较小的一侧记录(如段落1对段落2,不记录段落2对段落1)
+
+还需额外分析**跨图片的根段落关系**(把每张图的根段落视为兄弟节点处理)。
+
+```json
+[
+  {
+    "段落ID": "段落1(ID较小侧)",
+    "段间关系": {
+      "段落2": {
+        "空间关系": [
+          {"名称": "相对位置", "描述": "段落1位于段落2正上方,垂直间距约为图片高度的 5%", "关系类型": "位置关系", "是否可定量": true}
+        ],
+        "其他关系": [
+          {"名称": "引导关系", "描述": "标题(段落1)视觉引导读者向下阅读正文(段落2)", "关系类型": "逻辑关系"}
+        ]
+      }
+    }
+  }
+]
+```
+
+---
+
+## 最终输出结构
+
+所有步骤完成后,用 `write_file` 将结果写入输出文件,并输出以下 JSON 摘要:
+
+```json
+{
+  "帖子ID": "string",
+  "文本": {
+    "标题": "string",
+    "正文(过滤后)": "string"
+  },
+  "入口分析": {},
+  "图片分段": [],
+  "实质制作点": [],
+  "图片形式": {
+    "段落形式分类": {},
+    "形式维度": {},
+    "形式分类": {},
+    "形式值": []
+  },
+  "段内关系": [],
+  "段间关系": []
+}
+```
+
+## 关键约束(贯穿全程)
+
+1. **泛化优先**:始终描述制作规律,而非内容细节
+2. **视觉判断优先**:所有判断基于实际可见内容,禁止依赖名称/描述等文字字段
+3. **制作还原视角**:始终从"如何制作出这个效果"的角度分析
+4. **结构化输出**:每步严格按 JSON schema 输出,不允许随意变更结构
+5. **步骤间数据复用**:后续步骤引用前面步骤的段落 ID,保持一致性

+ 5 - 5
docs/trace-api.md

@@ -246,15 +246,15 @@ Content-Type: application/json
 
 {
   "messages": [{"role": "user", "content": "..."}],
-  "after_sequence": null
+  "after_message_id": null
 }
 ```
 
-- `after_sequence: null`(或省略)→ 从末尾续跑
-- `after_sequence: N`(主路径上且 < head)→ 回溯到 sequence N 后运行
-- `messages: []` + `after_sequence: N` → 重新生成
+- `after_message_id: null`(或省略)→ 从末尾续跑
+- `after_message_id: "<message_id>"`(主路径上且 < head)→ 回溯到该消息后运行
+- `messages: []` + `after_message_id: "<message_id>"` → 重新生成
 
-Runner 根据 `after_sequence` 与 `head_sequence` 的关系自动判断续跑/回溯行为。
+Runner 根据解析出的 sequence 与 `head_sequence` 的关系自动判断续跑/回溯行为。
 
 #### 6. 停止运行中的 Trace
 

+ 12 - 0
examples/how/README.md

@@ -0,0 +1,12 @@
+
+## 运行方法
+1. 输入:将原始帖子内容放到 `examples/how/input/` 文件夹下
+2. 运行:在项目根目录下运行 `python examples/how/run.py`
+3. 输出:在 `examples/how/output` 中查看
+
+## prompt调试
+- 主Agent(调度与评估):修改 `examples/how/production.prompt`
+    - 原始输入/参考资料等等都可以在这里输入文件路径
+    - 可以在这里指定各类输出的保存路径
+- 制作表解构Agent:修改 `examples/how/skills/deconstruct.md` , 这部分内容会在主Agent创建子Agent时作为子Agent的system prompt
+- 还原Agent:修改 `examples/how/skills/construct.md` , 这部分内容会在主Agent创建子Agent时作为子Agent的system prompt

+ 46 - 0
examples/how/analyze_images.py

@@ -0,0 +1,46 @@
+import warnings
+warnings.filterwarnings('ignore')
+from PIL import Image
+import os, json
+
+os.makedirs('examples/how/features', exist_ok=True)
+
+results = []
+for i in range(1, 10):
+    path = f'examples/how/input_local_archive/{i}.jpeg'
+    img = Image.open(path)
+    img_rgb = img.convert('RGB')
+    
+    # Save thumbnail
+    thumb = img_rgb.resize((360, 480))
+    thumb.save(f'examples/how/features/thumb_{i}.jpg', 'JPEG', quality=85)
+    
+    # Get color info
+    small = img_rgb.resize((50, 50))
+    pixels = list(small.getdata())
+    r = sum(p[0] for p in pixels) // len(pixels)
+    g = sum(p[1] for p in pixels) // len(pixels)
+    b = sum(p[2] for p in pixels) // len(pixels)
+    
+    # Get quadrant colors (top/bottom/left/right)
+    w, h = img_rgb.size
+    top = img_rgb.crop((0, 0, w, h//3)).resize((10,10))
+    mid = img_rgb.crop((0, h//3, w, 2*h//3)).resize((10,10))
+    bot = img_rgb.crop((0, 2*h//3, w, h)).resize((10,10))
+    
+    def avg_color(region):
+        px = list(region.getdata())
+        return (sum(p[0] for p in px)//len(px), sum(p[1] for p in px)//len(px), sum(p[2] for p in px)//len(px))
+    
+    results.append({
+        'index': i,
+        'size': img.size,
+        'format': img.format,
+        'avg_rgb': (r, g, b),
+        'top_rgb': avg_color(top),
+        'mid_rgb': avg_color(mid),
+        'bot_rgb': avg_color(bot),
+    })
+    print(f'{i}.jpeg: size={img.size}, avg=({r},{g},{b}), top={avg_color(top)}, mid={avg_color(mid)}, bot={avg_color(bot)}')
+
+print('\nDone! Thumbnails saved to examples/how/features/')

+ 12 - 0
examples/how/encode_images.py

@@ -0,0 +1,12 @@
+import base64, json
+
+images = {}
+for i in range(1, 10):
+    path = f'examples/how/input_local_archive/{i}.jpeg'
+    with open(path, 'rb') as f:
+        data = base64.b64encode(f.read()).decode()
+    images[str(i)] = data
+
+with open('examples/how/features/images_b64.json', 'w') as f:
+    json.dump(images, f)
+print('done')

Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/images_b64.json


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img1_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img2_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img3_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img4_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img5_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img6_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img7_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img8_b64.txt


Разница между файлами не показана из-за своего большого размера
+ 0 - 0
examples/how/features/img9_b64.txt


BIN
examples/how/features/thumb_1.jpg


BIN
examples/how/features/thumb_2.jpg


BIN
examples/how/features/thumb_3.jpg


BIN
examples/how/features/thumb_4.jpg


BIN
examples/how/features/thumb_5.jpg


BIN
examples/how/features/thumb_6.jpg


BIN
examples/how/features/thumb_7.jpg


BIN
examples/how/features/thumb_8.jpg


BIN
examples/how/features/thumb_9.jpg


BIN
examples/how/input/1.jpeg


BIN
examples/how/input/3.jpeg


BIN
examples/how/input/7.jpeg


+ 9 - 0
examples/how/input/《秋日际遇》写生油画.json

@@ -0,0 +1,9 @@
+{
+  "images": [
+    "examples/how/input/1.jpeg",
+    "examples/how/input/3.jpeg",
+    "examples/how/input/7.jpeg"
+  ],
+  "body_text": "听闻秋日是倒放的春天\n于是我心中有一座秋日的花园\n栽种着一簇簇淡却温暖的花\n风沿着远边的山吹来\n热情的阳光里秋风微凉\n与颜料一起酝酿出的画面\n白裙是一抹无暇\n迎着光绘画出\n那片在我心上开满\n限定的浪漫\n被画架支起\n绿草坪还驻留了匆匆而过的热闹\n再添一笔白\n为我画一枝玫瑰的奇遇\n———@万淮 #草地拍照[话题]##画画[话题]#",
+  "title": "《秋日际遇》写生油画"
+}

+ 9 - 0
examples/how/load_imgs.py

@@ -0,0 +1,9 @@
+import base64
+
+imgs = {}
+for i in range(1, 10):
+    with open(f'examples/how/features/img{i}_b64.txt') as f:
+        imgs[i] = f.read().strip()
+
+for i, d in imgs.items():
+    print(f'img{i}: len={len(d)}')

+ 14 - 0
examples/how/presets.json

@@ -0,0 +1,14 @@
+{
+  "deconstruct": {
+    "max_iterations": 500,
+    "temperature": 0.3,
+    "skills": ["planning", "research", "browser", "deconstruct"],
+    "description": "解构 Agent,将社交媒体帖子解构为可还原的结构化制作脚本"
+  },
+  "construct": {
+    "max_iterations": 500,
+    "temperature": 0.7,
+    "skills": ["planning", "research", "browser", "construct"],
+    "description": "建构 Agent,基于解构产物生成内容并输出执行报告"
+  }
+}

+ 48 - 0
examples/how/production.prompt

@@ -0,0 +1,48 @@
+---
+model: sonnet-4.6
+temperature: 0.5
+---
+
+$system$
+你是社交媒体内容研究员。目标是通过「解构→建构→评估」的迭代循环,产出一份优质帖子的高质量解构产物(制作表)。
+
+解构产物的价值不在于建构本身,而在于:它能揭示让这篇内容优秀的关键创作规律,并且能在不同内容上泛化。
+
+## 工作流程
+
+**第一轮**:
+1. 调用 deconstruct agent,传入原帖的完整多模态内容,获取 制作表;注意:
+    - 你可以直接给deconstruct agent输入文件夹路径
+    - 它会自动加载如何解构内容的skill:examples/how/skills/deconstruct.md作为system prompt
+    - 指定解构结果的保存路径
+2. 调用 construct agent,传入解构产物 制作表,得到生成内容
+3. 对比建构结果与原帖,做出评估
+
+**后续迭代**(如有必要):
+4. 根据建构 agent 的执行报告和你的对比观察,判断解构哪里不够准确或不够完整,或者建构做的不够好
+5. 带着具体的修改意见再次调用解构或建构 agent(通过 `continue_from` 复用已有 trace,或重新调用并说明改进方向)
+6. 评估结果,并重复以上环节,直到满意
+
+## 评估标准
+
+评估时关注以下维度(抓最关键的差距,不需要面面俱到):
+- **核心洞察是否被体现**:建构内容有没有抓住原帖"为什么好"的关键
+- **视觉结构是否对应**:主要元素的位置、层级、比例关系
+- **形式感是否一致**:整体调性、制作手段、视觉风格
+- **文字是否匹配**:标题逻辑、正文节奏
+
+## 终止条件
+
+满足以下任一条件时停止迭代,输出最终解构产物:
+- 建构结果与原帖在核心维度高度吻合
+- 差距来自建构工具能力上限,而非解构质量问题
+- 迭代超过 3 轮且边际改善明显收窄
+
+## 输出
+
+注意输出过程中的制作表和还原产物,每一轮次的结果应该输出到examples/how/output中的一个子文件夹。
+输出最终解构产物 制作表JSON 和相关特征 以及 还原结果,保存到examples/how/output/final,并附上一段简短的研究备注(这篇内容的核心创作规律是什么,迭代过程中发现了什么)。
+
+$user$
+请对下面这篇社交媒体帖子进行解构-建构-评估迭代,产出高质量解构产物。
+原始帖子信息:examples/how/input/《秋日际遇》写生油画.json

+ 26 - 0
examples/how/resource/input_cloud_archive/《秋日际遇》写生油画.json

@@ -0,0 +1,26 @@
+{
+  "channel_content_id": "616192600000000021034642",
+  "link": "https://www.xiaohongshu.com/explore/616192600000000021034642",
+  "comment_count": 0,
+  "images": [
+    "http://res.cybertogether.net/crawler/image/5b94399f3bdef0a80b98e2734e110ca2.jpeg",
+    "http://res.cybertogether.net/crawler/image/6d80c193ccd0b047e0f3354ed6aca355.jpeg",
+    "http://res.cybertogether.net/crawler/image/2ba333062a7370ce229696fc36b9a060.jpeg",
+    "http://res.cybertogether.net/crawler/image/8187a1ad4e56295ab13d881d0ef7c934.jpeg",
+    "http://res.cybertogether.net/crawler/image/16fc8596b7c12031e910eb517859045c.jpeg",
+    "http://res.cybertogether.net/crawler/image/15a29cb486344bc10e90402371e21c92.jpeg",
+    "http://res.cybertogether.net/crawler/image/e70bbea964cfcf0225744da00e8e7939.jpeg",
+    "http://res.cybertogether.net/crawler/image/d20b73ad445c7dce64983159bc6cdae0.jpeg",
+    "http://res.cybertogether.net/crawler/image/c4c73c1b32f8066cc40a43ce61f61364.jpeg"
+  ],
+  "like_count": 411,
+  "body_text": "听闻秋日是倒放的春天\n于是我心中有一座秋日的花园\n栽种着一簇簇淡却温暖的花\n风沿着远边的山吹来\n热情的阳光里秋风微凉\n与颜料一起酝酿出的画面\n白裙是一抹无暇\n迎着光绘画出\n那片在我心上开满\n限定的浪漫\n被画架支起\n绿草坪还驻留了匆匆而过的热闹\n再添一笔白\n为我画一枝玫瑰的奇遇\n———@万淮 #草地拍照[话题]##画画[话题]#",
+  "title": "《秋日际遇》写生油画",
+  "collect_count": 181,
+  "channel_account_id": "584fc4a36a6a693eef600ec3",
+  "channel_account_name": "糯米和Kilala",
+  "content_type": "note",
+  "video": "",
+  "publish_timestamp": 1633784416000,
+  "publish_time": "2021-10-09 21:00:16"
+}

BIN
examples/how/resource/input_local_archive/1.jpeg


BIN
examples/how/resource/input_local_archive/2.jpeg


BIN
examples/how/resource/input_local_archive/3.jpeg


BIN
examples/how/resource/input_local_archive/4.jpeg


BIN
examples/how/resource/input_local_archive/5.jpeg


BIN
examples/how/resource/input_local_archive/6.jpeg


BIN
examples/how/resource/input_local_archive/7.jpeg


BIN
examples/how/resource/input_local_archive/8.jpeg


BIN
examples/how/resource/input_local_archive/9.jpeg


+ 26 - 0
examples/how/resource/input_local_archive/《秋日际遇》写生油画.json

@@ -0,0 +1,26 @@
+{
+  "channel_content_id": "616192600000000021034642",
+  "link": "https://www.xiaohongshu.com/explore/616192600000000021034642",
+  "comment_count": 0,
+  "images": [
+    "examples/how/input/1.jpeg",
+    "examples/how/input/2.jpeg",
+    "examples/how/input/3.jpeg",
+    "examples/how/input/4.jpeg",
+    "examples/how/input/5.jpeg",
+    "examples/how/input/6.jpeg",    
+    "examples/how/input/7.jpeg",
+    "examples/how/input/8.jpeg",
+    "examples/how/input/9.jpeg"
+  ],
+  "like_count": 411,
+  "body_text": "听闻秋日是倒放的春天\n于是我心中有一座秋日的花园\n栽种着一簇簇淡却温暖的花\n风沿着远边的山吹来\n热情的阳光里秋风微凉\n与颜料一起酝酿出的画面\n白裙是一抹无暇\n迎着光绘画出\n那片在我心上开满\n限定的浪漫\n被画架支起\n绿草坪还驻留了匆匆而过的热闹\n再添一笔白\n为我画一枝玫瑰的奇遇\n———@万淮 #草地拍照[话题]##画画[话题]#",
+  "title": "《秋日际遇》写生油画",
+  "collect_count": 181,
+  "channel_account_id": "584fc4a36a6a693eef600ec3",
+  "channel_account_name": "糯米和Kilala",
+  "content_type": "note",
+  "video": "",
+  "publish_timestamp": 1633784416000,
+  "publish_time": "2021-10-09 21:00:16"
+}

+ 529 - 0
examples/how/run.py

@@ -0,0 +1,529 @@
+"""
+示例(增强版)
+
+使用 Agent 模式 + Skills
+
+新增功能:
+1. 支持命令行随时打断(输入 'p' 暂停,'q' 退出)
+2. 暂停后可插入干预消息
+3. 支持触发经验总结
+4. 查看当前 GoalTree
+5. 框架层自动清理不完整的工具调用
+6. 支持通过 --trace <ID> 恢复已有 Trace 继续执行
+"""
+
+import argparse
+import os
+import sys
+import select
+import asyncio
+from pathlib import Path
+
+# Clash Verge TUN 模式兼容:禁止 httpx/urllib 自动检测系统 HTTP 代理
+# TUN 虚拟网卡已在网络层接管所有流量,不需要应用层再走 HTTP 代理,
+# 否则 httpx 检测到 macOS 系统代理 (127.0.0.1:7897) 会导致 ConnectError
+os.environ.setdefault("no_proxy", "*")
+
+# 添加项目根目录到 Python 路径
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from agent.llm.prompts import SimplePrompt
+from agent.core.runner import AgentRunner, RunConfig
+from agent.core.presets import AgentPreset, register_preset
+from agent.trace import (
+    FileSystemTraceStore,
+    Trace,
+    Message,
+)
+from agent.llm import create_openrouter_llm_call
+
+
+# ===== 非阻塞 stdin 检测 =====
+if sys.platform == 'win32':
+    import msvcrt
+
+def check_stdin() -> str | None:
+    """
+    跨平台非阻塞检查 stdin 输入。
+    Windows: 使用 msvcrt.kbhit()
+    macOS/Linux: 使用 select.select()
+    """
+    if sys.platform == 'win32':
+        # 检查是否有按键按下
+        if msvcrt.kbhit():
+            # 读取按下的字符(msvcrt.getwch 是非阻塞读取宽字符)
+            ch = msvcrt.getwch().lower()
+            if ch == 'p':
+                return 'pause'
+            if ch == 'q':
+                return 'quit'
+            # 如果是其他按键,可以选择消耗掉或者忽略
+        return None
+    else:
+        # Unix/Mac 逻辑
+        ready, _, _ = select.select([sys.stdin], [], [], 0)
+        if ready:
+            line = sys.stdin.readline().strip().lower()
+            if line in ('p', 'pause'):
+                return 'pause'
+            if line in ('q', 'quit'):
+                return 'quit'
+        return None
+
+
+# ===== 交互菜单 =====
+
+def _read_multiline() -> str:
+    """
+    读取多行输入,以连续两次回车(空行)结束。
+
+    单次回车只是换行,不会提前终止输入。
+    """
+    print("\n请输入干预消息(连续输入两次回车结束):")
+    lines: list[str] = []
+    blank_count = 0
+    while True:
+        line = input()
+        if line == "":
+            blank_count += 1
+            if blank_count >= 2:
+                break
+            lines.append("")          # 保留单个空行
+        else:
+            blank_count = 0
+            lines.append(line)
+
+    # 去掉尾部多余空行
+    while lines and lines[-1] == "":
+        lines.pop()
+    return "\n".join(lines)
+
+
+async def show_interactive_menu(
+    runner: AgentRunner,
+    trace_id: str,
+    current_sequence: int,
+    store: FileSystemTraceStore,
+):
+    """
+    显示交互式菜单,让用户选择操作。
+
+    进入本函数前不再有后台线程占用 stdin,所以 input() 能正常工作。
+    """
+    print("\n" + "=" * 60)
+    print("  执行已暂停")
+    print("=" * 60)
+    print("请选择操作:")
+    print("  1. 插入干预消息并继续")
+    print("  2. 触发经验总结(reflect)")
+    print("  3. 查看当前 GoalTree")
+    print("  4. 继续执行")
+    print("  5. 停止执行")
+    print("=" * 60)
+
+    while True:
+        choice = input("请输入选项 (1-5): ").strip()
+
+        if choice == "1":
+            text = _read_multiline()
+            if not text:
+                print("未输入任何内容,取消操作")
+                continue
+
+            print(f"\n将插入干预消息并继续执行...")
+            # 从 store 读取实际的 last_sequence,避免本地 current_sequence 过时
+            live_trace = await store.get_trace(trace_id)
+            actual_sequence = live_trace.last_sequence if live_trace and live_trace.last_sequence else current_sequence
+            return {
+                "action": "continue",
+                "messages": [{"role": "user", "content": text}],
+                "after_sequence": actual_sequence,
+            }
+
+        elif choice == "2":
+            # 触发经验总结
+            print("\n触发经验总结...")
+            focus = input("请输入反思重点(可选,直接回车跳过): ").strip()
+
+            from agent.trace.compaction import build_reflect_prompt
+
+            # 保存当前 head_sequence
+            trace = await store.get_trace(trace_id)
+            saved_head = trace.head_sequence
+
+            prompt = build_reflect_prompt()
+            if focus:
+                prompt += f"\n\n请特别关注:{focus}"
+
+            print("正在生成反思...")
+            reflect_cfg = RunConfig(trace_id=trace_id, max_iterations=1, tools=[])
+
+            reflection_text = ""
+            try:
+                result = await runner.run_result(
+                    messages=[{"role": "user", "content": prompt}],
+                    config=reflect_cfg,
+                )
+                reflection_text = result.get("summary", "")
+            finally:
+                # 恢复 head_sequence(反思消息成为侧枝)
+                await store.update_trace(trace_id, head_sequence=saved_head)
+
+            # 追加到 experiences 文件
+            if reflection_text:
+                from datetime import datetime
+                experiences_path = runner.experiences_path or "./.cache/experiences.md"
+                os.makedirs(os.path.dirname(experiences_path), exist_ok=True)
+                header = f"\n\n---\n\n## {trace_id} ({datetime.now().strftime('%Y-%m-%d %H:%M')})\n\n"
+                with open(experiences_path, "a", encoding="utf-8") as f:
+                    f.write(header + reflection_text + "\n")
+                print(f"\n反思已保存到: {experiences_path}")
+                print("\n--- 反思内容 ---")
+                print(reflection_text)
+                print("--- 结束 ---\n")
+            else:
+                print("未生成反思内容")
+
+            continue
+
+        elif choice == "3":
+            goal_tree = await store.get_goal_tree(trace_id)
+            if goal_tree and goal_tree.goals:
+                print("\n当前 GoalTree:")
+                print(goal_tree.to_prompt())
+            else:
+                print("\n当前没有 Goal")
+            continue
+
+        elif choice == "4":
+            print("\n继续执行...")
+            return {"action": "continue"}
+
+        elif choice == "5":
+            print("\n停止执行...")
+            return {"action": "stop"}
+
+        else:
+            print("无效选项,请重新输入")
+
+
+async def main():
+    # 解析命令行参数
+    parser = argparse.ArgumentParser(description="任务 (Agent 模式 + 交互增强)")
+    parser.add_argument(
+        "--trace", type=str, default=None,
+        help="已有的 Trace ID,用于恢复继续执行(不指定则新建)",
+    )
+    args = parser.parse_args()
+
+    # 路径配置
+    base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
+    prompt_path = base_dir / "production.prompt"
+    output_dir = base_dir / "output_1"
+    output_dir.mkdir(exist_ok=True)
+
+    # 加载项目级 presets(examples/how/presets.json)
+    presets_path = base_dir / "presets.json"
+    if presets_path.exists():
+        import json
+        with open(presets_path, "r", encoding="utf-8") as f:
+            project_presets = json.load(f)
+        for name, cfg in project_presets.items():
+            register_preset(name, AgentPreset(**cfg))
+        print(f"   - 已加载项目 presets: {list(project_presets.keys())}")
+
+    # Skills 目录(可选:用户自定义 skills)
+    # 注意:内置 skills(agent/memory/skills/)会自动加载
+    skills_dir = str(base_dir / "skills")
+
+    print("=" * 60)
+    print("mcp/skills 发现、获取、评价 分析任务 (Agent 模式 + 交互增强)")
+    print("=" * 60)
+    print()
+    print("💡 交互提示:")
+    print("   - 执行过程中输入 'p' 或 'pause' 暂停并进入交互模式")
+    print("   - 执行过程中输入 'q' 或 'quit' 停止执行")
+    print("=" * 60)
+    print()
+
+    # 1. 加载 prompt
+    print("1. 加载 prompt 配置...")
+    prompt = SimplePrompt(prompt_path)
+
+    # 2. 构建消息(仅新建时使用,恢复时消息已在 trace 中)
+    print("2. 构建任务消息...")
+    messages = prompt.build_messages()
+
+    # 3. 创建 Agent Runner(配置 skills)
+    print("3. 创建 Agent Runner...")
+    print(f"   - Skills 目录: {skills_dir}")
+    print(f"   - 模型: {prompt.config.get('model', 'sonnet-4.5')}")
+
+    store = FileSystemTraceStore(base_path=".trace")
+    runner = AgentRunner(
+        trace_store=store,
+        llm_call=create_openrouter_llm_call(model=f"anthropic/claude-{prompt.config.get('model', 'sonnet-4.5')}"),
+        skills_dir=skills_dir,
+        debug=True
+    )
+
+    # 4. 判断是新建还是恢复
+    resume_trace_id = args.trace
+    if resume_trace_id:
+        # 验证 trace 存在
+        existing_trace = await store.get_trace(resume_trace_id)
+        if not existing_trace:
+            print(f"\n错误: Trace 不存在: {resume_trace_id}")
+            sys.exit(1)
+        print(f"4. 恢复已有 Trace: {resume_trace_id[:8]}...")
+        print(f"   - 状态: {existing_trace.status}")
+        print(f"   - 消息数: {existing_trace.total_messages}")
+        print(f"   - 任务: {existing_trace.task}")
+    else:
+        print(f"4. 启动新 Agent 模式...")
+
+    print()
+
+    final_response = ""
+    current_trace_id = resume_trace_id
+    current_sequence = 0
+    should_exit = False
+
+    try:
+        # 恢复模式:不发送初始消息,只指定 trace_id 续跑
+        if resume_trace_id:
+            initial_messages = None  # None = 未设置,触发早期菜单检查
+            config = RunConfig(
+                model=f"claude-{prompt.config.get('model', 'sonnet-4.5')}",
+                temperature=float(prompt.config.get('temperature', 0.3)),
+                max_iterations=1000,
+                trace_id=resume_trace_id,
+            )
+        else:
+            initial_messages = messages
+            config = RunConfig(
+                model=f"claude-{prompt.config.get('model', 'sonnet-4.5')}",
+                temperature=float(prompt.config.get('temperature', 0.3)),
+                max_iterations=1000,
+                name="社交媒体内容解构、建构、评估任务",
+            )
+
+        while not should_exit:
+            # 如果是续跑,需要指定 trace_id
+            if current_trace_id:
+                config.trace_id = current_trace_id
+
+            # 清理上一轮的响应,避免失败后显示旧内容
+            final_response = ""
+
+            # 如果 trace 已完成/失败且没有新消息,直接进入交互菜单
+            # 注意:initial_messages 为 None 表示未设置(首次加载),[] 表示有意为空(用户选择"继续")
+            if current_trace_id and initial_messages is None:
+                check_trace = await store.get_trace(current_trace_id)
+                if check_trace and check_trace.status in ("completed", "failed"):
+                    if check_trace.status == "completed":
+                        print(f"\n[Trace] ✅ 已完成")
+                        print(f"  - Total messages: {check_trace.total_messages}")
+                        print(f"  - Total cost: ${check_trace.total_cost:.4f}")
+                    else:
+                        print(f"\n[Trace] ❌ 已失败: {check_trace.error_message}")
+                    current_sequence = check_trace.head_sequence
+
+                    menu_result = await show_interactive_menu(
+                        runner, current_trace_id, current_sequence, store
+                    )
+
+                    if menu_result["action"] == "stop":
+                        break
+                    elif menu_result["action"] == "continue":
+                        new_messages = menu_result.get("messages", [])
+                        if new_messages:
+                            initial_messages = new_messages
+                            config.after_sequence = menu_result.get("after_sequence")
+                        else:
+                            # 无新消息:对 failed trace 意味着重试,对 completed 意味着继续
+                            initial_messages = []
+                            config.after_sequence = None
+                        continue
+                    break
+
+                # 对 stopped/running 等非终态的 trace,直接续跑
+                initial_messages = []
+
+            print(f"{'▶️ 开始执行...' if not current_trace_id else '▶️ 继续执行...'}")
+
+            # 执行 Agent
+            paused = False
+            try:
+                async for item in runner.run(messages=initial_messages, config=config):
+                    # 检查用户中断
+                    cmd = check_stdin()
+                    if cmd == 'pause':
+                        # 暂停执行
+                        print("\n⏸️ 正在暂停执行...")
+                        if current_trace_id:
+                            await runner.stop(current_trace_id)
+
+                        # 等待一小段时间让 runner 处理 stop 信号
+                        await asyncio.sleep(0.5)
+
+                        # 显示交互菜单
+                        menu_result = await show_interactive_menu(
+                            runner, current_trace_id, current_sequence, store
+                        )
+
+                        if menu_result["action"] == "stop":
+                            should_exit = True
+                            paused = True
+                            break
+                        elif menu_result["action"] == "continue":
+                            # 检查是否有新消息需要插入
+                            new_messages = menu_result.get("messages", [])
+                            if new_messages:
+                                # 有干预消息,需要重新启动循环
+                                initial_messages = new_messages
+                                after_seq = menu_result.get("after_sequence")
+                                if after_seq is not None:
+                                    config.after_sequence = after_seq
+                                paused = True
+                                break
+                            else:
+                                # 没有新消息,需要重启执行
+                                initial_messages = []
+                                config.after_sequence = None
+                                paused = True
+                                break
+
+                    elif cmd == 'quit':
+                        print("\n🛑 用户请求停止...")
+                        if current_trace_id:
+                            await runner.stop(current_trace_id)
+                        should_exit = True
+                        break
+
+                    # 处理 Trace 对象(整体状态变化)
+                    if isinstance(item, Trace):
+                        current_trace_id = item.trace_id
+                        if item.status == "running":
+                            print(f"[Trace] 开始: {item.trace_id[:8]}...")
+                        elif item.status == "completed":
+                            print(f"\n[Trace] ✅ 完成")
+                            print(f"  - Total messages: {item.total_messages}")
+                            print(f"  - Total tokens: {item.total_tokens}")
+                            print(f"  - Total cost: ${item.total_cost:.4f}")
+                        elif item.status == "failed":
+                            print(f"\n[Trace] ❌ 失败: {item.error_message}")
+                        elif item.status == "stopped":
+                            print(f"\n[Trace] ⏸️ 已停止")
+
+                    # 处理 Message 对象(执行过程)
+                    elif isinstance(item, Message):
+                        current_sequence = item.sequence
+
+                        if item.role == "assistant":
+                            content = item.content
+                            if isinstance(content, dict):
+                                text = content.get("text", "")
+                                tool_calls = content.get("tool_calls")
+
+                                if text and not tool_calls:
+                                    # 纯文本回复(最终响应)
+                                    final_response = text
+                                    print(f"\n[Response] Agent 回复:")
+                                    print(text)
+                                elif text:
+                                    preview = text[:150] + "..." if len(text) > 150 else text
+                                    print(f"[Assistant] {preview}")
+
+                                if tool_calls:
+                                    for tc in tool_calls:
+                                        tool_name = tc.get("function", {}).get("name", "unknown")
+                                        print(f"[Tool Call] 🛠️  {tool_name}")
+
+                        elif item.role == "tool":
+                            content = item.content
+                            if isinstance(content, dict):
+                                tool_name = content.get("tool_name", "unknown")
+                                print(f"[Tool Result] ✅ {tool_name}")
+                            if item.description:
+                                desc = item.description[:80] if len(item.description) > 80 else item.description
+                                print(f"  {desc}...")
+
+            except Exception as e:
+                print(f"\n执行出错: {e}")
+                import traceback
+                traceback.print_exc()
+
+            # paused → 菜单已在暂停时内联显示过
+            if paused:
+                if should_exit:
+                    break
+                continue
+
+            # quit → 直接退出
+            if should_exit:
+                break
+
+            # Runner 退出(完成/失败/停止/异常)→ 显示交互菜单
+            if current_trace_id:
+                menu_result = await show_interactive_menu(
+                    runner, current_trace_id, current_sequence, store
+                )
+
+                if menu_result["action"] == "stop":
+                    break
+                elif menu_result["action"] == "continue":
+                    new_messages = menu_result.get("messages", [])
+                    if new_messages:
+                        initial_messages = new_messages
+                        config.after_sequence = menu_result.get("after_sequence")
+                    else:
+                        initial_messages = []
+                        config.after_sequence = None
+                    continue
+            break
+
+    except KeyboardInterrupt:
+        print("\n\n用户中断 (Ctrl+C)")
+        if current_trace_id:
+            await runner.stop(current_trace_id)
+
+    # 6. 输出结果
+    if final_response:
+        print()
+        print("=" * 60)
+        print("Agent 响应:")
+        print("=" * 60)
+        print(final_response)
+        print("=" * 60)
+        print()
+
+        # 7. 保存结果
+        output_file = output_dir / "result.txt"
+        with open(output_file, 'w', encoding='utf-8') as f:
+            f.write(final_response)
+
+        print(f"✓ 结果已保存到: {output_file}")
+        print()
+
+    # 可视化提示
+    if current_trace_id:
+        print("=" * 60)
+        print("可视化 Step Tree:")
+        print("=" * 60)
+        print("1. 启动 API Server:")
+        print("   python3 api_server.py")
+        print()
+        print("2. 浏览器访问:")
+        print("   http://localhost:8000/api/traces")
+        print()
+        print(f"3. Trace ID: {current_trace_id}")
+        print("=" * 60)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 8 - 0
examples/how/save_b64.py

@@ -0,0 +1,8 @@
+import base64, os
+os.makedirs('examples/how/features', exist_ok=True)
+for i in range(1, 10):
+    with open(f'examples/how/input_local_archive/{i}.jpeg', 'rb') as f:
+        d = base64.b64encode(f.read()).decode()
+    with open(f'examples/how/features/img{i}_b64.txt', 'w') as out:
+        out.write(d)
+    print(f'saved img{i}_b64.txt, len={len(d)}')

+ 48 - 0
examples/how/skills/construct.md

@@ -0,0 +1,48 @@
+---
+name: construct
+description: 建构社交媒体帖子内容
+---
+
+## 角色
+
+你是社媒内容生成专家。给定一份制作表,将其转化为实际内容。
+
+---
+
+## 输出
+
+将最终的生成内容组织到输出文件夹中。不同版本的输出应该分别是一个子文件夹。
+
+此外,应该输出执行报告:
+
+```json
+{
+  "建构结果": {
+    "保存路径": "examples/how/output",
+    "文本": {
+      "标题": "string",
+      "正文": "string"
+    },
+    "图片": [
+      {
+        "图片": "图片1",
+        "生成prompt": "用于生成该图的完整 prompt",
+        "生成方式": "使用了哪个工具/API"
+      }
+    ]
+  },
+  "执行报告": {
+    "成功体现": ["哪些关键创作决策被清晰落地"],
+    "未能落地": ["哪些字段无法体现,或结果不确定是否达到"],
+    "疑问": ["解构中哪些信息不够清晰,影响了建构判断,需要主 agent 注意"]
+  }
+}
+```
+
+---
+
+## 原则
+
+- **解构优先**:以解构 JSON 为主要依据,在建构目标允许的探索空间内发挥
+- **如实报告**:不确定的地方直接标注,便于主 agent 评估和迭代
+- **宁缺毋滥**:不确定的决策宁可省略,不要随意填充导致方向偏离

+ 120 - 0
examples/how/skills/deconstruct.md

@@ -0,0 +1,120 @@
+---
+name: deconstruct
+description: 从制作层解构社交媒体帖子,提取视觉制作决策
+---
+
+## 角色
+
+你是制作还原解构专家。给定一篇优质社交媒体帖子(图片 + 文字),分析其**制作层**——视觉结构、元素形式、元素关系——提取能够支撑还原这篇内容的制作脚本。
+
+**核心问题**:这篇帖子里,哪些决策让它优于同类内容?去掉某个决策后内容会明显变差,才值得记录。
+
+---
+
+## 制作层的核心概念
+
+这两组区分是制作解构的基础,分析时始终从这个视角出发:
+
+**实质 vs 形式**
+- **实质**:元素是什么、包含什么——人物、产品、文字内容、场景
+- **形式**:元素如何呈现——构图、色调、比例、质感、字体、层次、光影
+
+**形式分类**:追溯每个元素最初通过什么手段产生。不是后期处理方式,是**源头制作方式**。常见分类:拍摄、插画、排版、AI 生成、截图、后期合成。
+
+---
+
+## 多模态特征提取
+
+文字描述无法精确表达某些视觉信息——人物的姿态骨架、面部轮廓、色彩分布、深度层次。对这类信息,**提取多模态特征文件**,并在制作表中保留文件索引。
+
+**何时提取**:当某个元素的视觉特征对还原至关重要,且纯文字描述会丢失关键精度时。常见场景:
+- 人物主体:姿态(骨骼关键点图)、面部特征(面部网格/特征点图)
+- 整体色调:色彩分布(调色板图、色彩分割图)
+- 空间结构:深度图、构图线条图(用于 ControlNet)
+- 特定纹理或材质:局部纹理提取图
+
+**提取原则**:
+- 使用图像/数值等多模态格式,不使用自然语言作为唯一表示
+- 特征文件保存至 `./features/<元素名>/` 子目录
+- 制作表中只记录文件路径(不嵌入文件内容)
+- 只对还原必要的关键元素提取,不是每个元素都需要
+
+---
+
+## 分析视角
+
+**内容视角**(先判断,影响对图片的解读角度):
+- **关注理念**:作者借具体事物传达抽象含义(符号化,借物喻义)
+- **关注表现**:作者直接展示事物本身的状态与细节
+
+**多图对比**(如有多图):
+- **固定**:跨图保持不变的制作要素 → 往往是创作者刻意为之的核心设计
+- **变化**:跨图有意变化的制作要素 → 往往是叙事或节奏策略
+
+---
+
+## 输出格式
+
+输出一个 JSON,并将其保存到指定输出目录下。**只填写对这篇帖子有意义的字段**,不强制填写所有字段,不强制填满每个层级。
+
+特征文件保存至 `./features/<元素名>/`,制作表中以路径引用。
+
+```json
+{
+  "内容视角": "关注理念 | 关注表现,一句话说明",
+  "核心洞察": "一句话:这篇内容在制作上为什么优秀",
+
+  "多图规律": {
+    "固定": "跨图保持一致的制作要素",
+    "变化": "跨图有意变化的制作要素"
+  },
+
+  "图片制作": [
+    {
+      "图片": "图片1",
+      "元素": [
+        {
+          "名称": "语义化名称",
+          "内容类型": "文字 | 图片",
+          "实质": "是什么(简短)",
+          "形式分类": "拍摄 | 插画 | 排版 | AI生成 | 后期合成 | ...",
+          "关键形式": ["影响视觉效果的原子属性,如:居中构图、暖光氛围、衬线字体"],
+          "特征文件": {
+            "姿态": "./features/主体人物/pose.png",
+            "面部": "./features/主体人物/face_mesh.png",
+            "深度图": "./features/主体人物/depth.png"
+          },
+          "子元素": []
+        }
+      ],
+      "元素关系": [
+        "主体居中占画面 60%,文字叠加于左下角",
+        "人物与背景通过色温对比形成层次"
+      ]
+    }
+  ],
+
+  "核心元素": [
+    {
+      "名称": "人物",
+      "视觉描述": "对还原有价值的视觉特征(制作角度)",
+      "出现图片": ["图片1", "图片2"]
+    }
+  ],
+
+  "文本制作": {
+    "标题": "标题的制作决策(结构、诉求方式、与图的关系)",
+    "正文": "正文的制作决策(节奏、排版风格、信息层级)"
+  }
+}
+```
+
+---
+
+## 原则
+
+- **亲自读图**:你应该直接读取我们需要解构的内容中的多模态内容,仅在后续缺乏特征提取能力的情况下再继续使用其他工具来处理多模态内容
+- **选择性而非穷举**:只记录对还原质量有实质影响的信息                                          
+- **泛化描述**:描述创作规律,而非内容细节("主体特写,背景虚化"优于"穿蓝衣服的女生")          
+- **制作视角**:从"如何制作出这个效果"出发,而非"这是什么内容"                                  
+- **信任自己的判断**:你比规则更了解什么重要,跳过不关键的维度  

+ 128 - 0
examples/test_cache/run.py

@@ -0,0 +1,128 @@
+"""
+测试 Prompt Caching 功能
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+# 添加项目根目录到 Python 路径
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+import logging
+# 开启 DEBUG 日志查看缓存标记
+logging.basicConfig(level=logging.DEBUG)
+
+from agent.core.runner import AgentRunner, RunConfig
+from agent.trace import FileSystemTraceStore, Trace, Message
+from agent.llm import create_openrouter_llm_call
+
+async def main():
+    print("=" * 60)
+    print("测试 Prompt Caching 功能")
+    print("=" * 60)
+    print()
+
+    # 路径配置
+    base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
+    trace_dir = project_root / ".trace"
+
+    # 创建 Runner
+    runner = AgentRunner(
+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
+        debug=True
+    )
+
+    # 准备测试消息(足够长的 system prompt)
+    system_prompt = """你是一个专业的 AI 助手。
+
+## 核心能力
+- 代码分析和生成
+- 问题解决和调试
+- 技术文档编写
+- 架构设计建议
+
+## 工作原则
+1. 准确性优先:确保提供的信息和代码是正确的
+2. 清晰表达:用简洁明了的语言解释复杂概念
+3. 实用导向:提供可直接使用的解决方案
+4. 持续学习:根据反馈不断改进
+
+## 技术栈
+- Python, JavaScript, TypeScript
+- React, Vue, Node.js
+- Docker, Kubernetes
+- PostgreSQL, MongoDB, Redis
+- AWS, GCP, Azure
+
+这是一个足够长的 system prompt,用于测试 Anthropic Prompt Caching 功能。
+缓存需要至少 1024 tokens 才能生效,所以我们需要让这个 prompt 足够长。
+""" * 3  # 重复 3 次确保足够长
+
+    messages = [
+        {"role": "user", "content": "请简单介绍一下 Python 的特点,用 3 句话概括"}
+    ]
+
+    print("第一次调用(创建缓存)...")
+    print("-" * 60)
+
+    trace_id = None
+    iteration = 0
+
+    async for item in runner.run(
+        messages=messages,
+        config=RunConfig(
+            system_prompt=system_prompt,
+            model="anthropic/claude-sonnet-4.5",
+            temperature=0.3,
+            max_iterations=3,
+            enable_prompt_caching=True,  # 启用缓存
+            name="缓存测试"
+        )
+    ):
+        if isinstance(item, Trace):
+            trace_id = item.trace_id
+            if item.status == "completed":
+                print(f"\n✓ Trace 完成")
+                print(f"  Total tokens: {item.total_tokens}")
+                print(f"  Total cost: ${item.total_cost:.6f}")
+
+        elif isinstance(item, Message):
+            if item.role == "assistant":
+                iteration += 1
+                print(f"\n[Iteration {iteration}]")
+                print(f"  Prompt tokens: {item.prompt_tokens}")
+                print(f"  Completion tokens: {item.completion_tokens}")
+                print(f"  Cache creation: {item.cache_creation_tokens}")
+                print(f"  Cache read: {item.cache_read_tokens}")
+                print(f"  Cost: ${item.cost:.6f}")
+
+                content = item.content
+                if isinstance(content, dict):
+                    text = content.get("text", "")
+                    if text:
+                        preview = text[:100] + "..." if len(text) > 100 else text
+                        print(f"  Response: {preview}")
+
+    print()
+    print("=" * 60)
+    print("测试完成")
+    print("=" * 60)
+    print()
+
+    if trace_id:
+        print("验证要点:")
+        print("1. 第一次调用应该有 cache_creation_tokens > 0")
+        print("2. 后续调用应该有 cache_read_tokens > 0")
+        print("3. cache_read_tokens 的成本应该是正常 input tokens 的 10%")
+        print()
+        print(f"Trace ID: {trace_id}")
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 138 - 0
examples/test_cache/run_multi.py

@@ -0,0 +1,138 @@
+"""
+测试多轮对话的 Prompt Caching
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from agent.core.runner import AgentRunner, RunConfig
+from agent.trace import FileSystemTraceStore, Trace, Message
+from agent.llm import create_openrouter_llm_call
+
+async def main():
+    print("=" * 60)
+    print("测试多轮对话 Prompt Caching")
+    print("=" * 60)
+    print()
+
+    base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
+    trace_dir = project_root / ".trace"
+
+    runner = AgentRunner(
+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
+        debug=True
+    )
+
+    # 超长 system prompt 确保 >1024 tokens
+    system_prompt = """你是一个专业的 AI 助手,专注于帮助用户解决技术问题。
+
+## 核心能力
+- 代码分析和生成
+- 问题解决和调试
+- 技术文档编写
+- 架构设计建议
+- 性能优化建议
+- 安全审计
+
+## 工作原则
+1. 准确性优先:确保提供的信息和代码是正确的
+2. 清晰表达:用简洁明了的语言解释复杂概念
+3. 实用导向:提供可直接使用的解决方案
+4. 持续学习:根据反馈不断改进
+5. 安全意识:始终考虑安全性和最佳实践
+6. 性能考虑:提供高效的解决方案
+
+## 技术栈
+- 编程语言:Python, JavaScript, TypeScript, Go, Rust, Java
+- 前端框架:React, Vue, Angular, Svelte
+- 后端框架:Node.js, Django, Flask, FastAPI, Spring Boot
+- 数据库:PostgreSQL, MongoDB, Redis, MySQL, Elasticsearch
+- 云平台:AWS, GCP, Azure
+- DevOps:Docker, Kubernetes, CI/CD, Terraform
+- 机器学习:TensorFlow, PyTorch, scikit-learn
+
+## 响应格式
+- 提供清晰的步骤说明
+- 包含代码示例
+- 解释关键概念
+- 指出潜在问题
+- 给出最佳实践建议
+
+这是一个足够长的 system prompt,用于测试 Anthropic Prompt Caching 功能。
+缓存需要至少 1024 tokens 才能生效,所以我们需要让这个 prompt 足够长。
+""" * 5  # 重复 5 次确保足够长
+
+    messages = [
+        {"role": "user", "content": "请用一句话介绍 Python"}
+    ]
+
+    print("开始多轮对话测试...")
+    print("-" * 60)
+
+    trace_id = None
+    iteration = 0
+
+    async for item in runner.run(
+        messages=messages,
+        config=RunConfig(
+            system_prompt=system_prompt,
+            model="anthropic/claude-sonnet-4.5",
+            temperature=0.3,
+            max_iterations=5,  # 多轮对话
+            enable_prompt_caching=True,
+            name="多轮缓存测试"
+        )
+    ):
+        if isinstance(item, Trace):
+            trace_id = item.trace_id
+            if item.status == "completed":
+                print(f"\n✓ Trace 完成")
+                print(f"  Total messages: {item.total_messages}")
+                print(f"  Total tokens: {item.total_tokens}")
+                print(f"  Total cache creation: {item.total_cache_creation_tokens}")
+                print(f"  Total cache read: {item.total_cache_read_tokens}")
+                print(f"  Total cost: ${item.total_cost:.6f}")
+
+        elif isinstance(item, Message):
+            if item.role == "assistant":
+                iteration += 1
+                print(f"\n[Iteration {iteration}]")
+                print(f"  Prompt tokens: {item.prompt_tokens}")
+                print(f"  Completion tokens: {item.completion_tokens}")
+                print(f"  Cache creation: {item.cache_creation_tokens}")
+                print(f"  Cache read: {item.cache_read_tokens}")
+                print(f"  Cost: ${item.cost:.6f}")
+
+                content = item.content
+                if isinstance(content, dict):
+                    text = content.get("text", "")
+                    tool_calls = content.get("tool_calls")
+                    if text and not tool_calls:
+                        preview = text[:80] + "..." if len(text) > 80 else text
+                        print(f"  Response: {preview}")
+                    if tool_calls:
+                        print(f"  Tool calls: {len(tool_calls)}")
+
+    print()
+    print("=" * 60)
+    print("测试完成")
+    print("=" * 60)
+    print()
+
+    if trace_id:
+        print("分析:")
+        print("- 第 1 次调用:应该有 cache_creation_tokens > 0(创建缓存)")
+        print("- 第 2+ 次调用:应该有 cache_read_tokens > 0(命中缓存)")
+        print(f"\nTrace ID: {trace_id}")
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 259 - 0
examples/test_cache/run_same_trace.py

@@ -0,0 +1,259 @@
+"""
+在同一个 Trace 内测试 Prompt Caching
+
+测试场景:
+1. 第一轮对话:创建缓存(system prompt + 工具定义)
+2. 第二轮对话:命中缓存(system prompt + 工具定义 + 第一轮历史)
+3. 第三轮对话:命中更多缓存(system prompt + 工具定义 + 前两轮历史)
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+from agent.core.runner import AgentRunner, RunConfig
+from agent.trace import FileSystemTraceStore, Trace, Message
+from agent.llm import create_openrouter_llm_call
+
+async def main():
+    print("=" * 60)
+    print("同一 Trace 内的 Prompt Caching 测试")
+    print("=" * 60)
+    print()
+
+    base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
+    trace_dir = project_root / ".trace"
+
+    runner = AgentRunner(
+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
+        debug=True
+    )
+
+    # 构造 >1500 tokens 的稳定前缀
+    stable_prefix = """你是一个专业的 AI 技术顾问,专注于软件工程和系统架构。
+
+## 核心专业领域
+
+### 1. 编程语言与框架
+- **Python**: Django, Flask, FastAPI, Celery, SQLAlchemy, Pandas, NumPy
+- **JavaScript/TypeScript**: React, Vue, Angular, Node.js, Express, NestJS
+- **Go**: Gin, Echo, gRPC, Cobra
+- **Rust**: Actix, Rocket, Tokio
+- **Java**: Spring Boot, Hibernate, Maven, Gradle
+
+### 2. 数据库技术
+- **关系型数据库**: PostgreSQL, MySQL, Oracle, SQL Server
+- **NoSQL 数据库**: MongoDB, Redis, Cassandra, DynamoDB
+- **时序数据库**: InfluxDB, TimescaleDB
+- **图数据库**: Neo4j, ArangoDB
+- **搜索引擎**: Elasticsearch, Solr
+
+### 3. 云平台与基础设施
+- **AWS**: EC2, S3, Lambda, RDS, DynamoDB, CloudFormation, ECS, EKS
+- **GCP**: Compute Engine, Cloud Storage, Cloud Functions, BigQuery, GKE
+- **Azure**: Virtual Machines, Blob Storage, Functions, Cosmos DB, AKS
+- **容器化**: Docker, Docker Compose, Podman
+- **编排**: Kubernetes, Helm, Istio, Linkerd
+
+### 4. DevOps 与 CI/CD
+- **版本控制**: Git, GitHub, GitLab, Bitbucket
+- **CI/CD**: Jenkins, GitLab CI, GitHub Actions, CircleCI, Travis CI
+- **配置管理**: Ansible, Terraform, Puppet, Chef
+- **监控告警**: Prometheus, Grafana, ELK Stack, Datadog, New Relic
+- **日志管理**: Fluentd, Logstash, Loki
+
+### 5. 架构模式
+- **微服务架构**: 服务拆分、API 网关、服务发现、熔断降级
+- **事件驱动架构**: 消息队列、事件溯源、CQRS
+- **Serverless 架构**: FaaS、BaaS、无服务器框架
+- **分布式系统**: CAP 理论、一致性协议、分布式事务
+- **高可用设计**: 负载均衡、故障转移、灾备恢复
+
+### 6. 安全最佳实践
+- **认证授权**: OAuth 2.0, JWT, SAML, OpenID Connect
+- **加密技术**: TLS/SSL, AES, RSA, 哈希算法
+- **安全审计**: 漏洞扫描、渗透测试、安全合规
+- **数据保护**: 数据脱敏、访问控制、审计日志
+
+### 7. 性能优化
+- **缓存策略**: Redis, Memcached, CDN, 浏览器缓存
+- **数据库优化**: 索引设计、查询优化、分库分表
+- **代码优化**: 算法复杂度、并发编程、异步处理
+- **系统调优**: 负载测试、性能分析、资源监控
+
+### 8. 机器学习与 AI
+- **深度学习框架**: TensorFlow, PyTorch, Keras
+- **模型部署**: TensorFlow Serving, TorchServe, ONNX
+- **MLOps**: MLflow, Kubeflow, SageMaker
+- **自然语言处理**: Transformers, BERT, GPT, LangChain
+
+## 工作原则
+
+1. **准确性优先**: 提供经过验证的技术方案,避免误导
+2. **实用导向**: 给出可直接应用的代码示例和配置
+3. **最佳实践**: 遵循行业标准和社区共识
+4. **安全意识**: 始终考虑安全性和隐私保护
+5. **性能考虑**: 关注系统性能和资源效率
+6. **可维护性**: 代码清晰、文档完善、易于扩展
+7. **成本意识**: 平衡技术方案与成本投入
+
+## 响应格式
+
+### 问题分析
+- 理解用户需求和上下文
+- 识别关键技术挑战
+- 评估可行性和风险
+
+### 解决方案
+- 提供清晰的实现步骤
+- 包含完整的代码示例
+- 解释关键技术点
+- 指出潜在问题和注意事项
+
+### 最佳实践建议
+- 性能优化建议
+- 安全加固措施
+- 可扩展性考虑
+- 运维监控方案
+
+### 替代方案
+- 列出其他可行方案
+- 对比优缺点
+- 给出选择建议
+
+## 技术栈版本参考
+
+- Python: 3.11+
+- Node.js: 20 LTS
+- PostgreSQL: 15+
+- Redis: 7+
+- Kubernetes: 1.28+
+- Docker: 24+
+
+这是一个足够长且稳定的 system prompt,用于测试 Anthropic Prompt Caching。
+此内容在所有请求中保持完全一致,以确保缓存能够命中。
+Version: 3.0
+""" * 2  # 重复 2 次,确保 >1500 tokens
+
+    print(f"System prompt 长度: {len(stable_prefix)} 字符")
+    print(f"预估 tokens: ~{len(stable_prefix) // 4}")
+    print()
+
+    trace_id = None
+
+    # 第一轮对话
+    print("=" * 60)
+    print("第 1 轮对话:创建缓存")
+    print("=" * 60)
+
+    async for item in runner.run(
+        messages=[{"role": "user", "content": "请用一句话介绍 Python"}],
+        config=RunConfig(
+            system_prompt=stable_prefix,
+            model="anthropic/claude-sonnet-4.5",
+            temperature=0.3,
+            max_iterations=1,
+            enable_prompt_caching=True,
+            name="同一Trace缓存测试"
+        )
+    ):
+        if isinstance(item, Trace):
+            trace_id = item.trace_id
+            if item.status == "completed":
+                print(f"\n✓ 第 1 轮完成")
+                print(f"  Total tokens: {item.total_tokens}")
+                print(f"  Cache write: {item.total_cache_creation_tokens}")
+                print(f"  Cache read: {item.total_cache_read_tokens}")
+                print(f"  Cost: ${item.total_cost:.6f}")
+        elif isinstance(item, Message) and item.role == "assistant":
+            print(f"\n[Response] {item.content.get('text', '')[:100]}...")
+            print(f"  Prompt tokens: {item.prompt_tokens}")
+            print(f"  Cache write: {item.cache_creation_tokens}")
+            print(f"  Cache read: {item.cache_read_tokens}")
+
+    print("\n等待 2 秒...")
+    await asyncio.sleep(2)
+
+    # 第二轮对话(续跑同一个 trace)
+    print("\n" + "=" * 60)
+    print("第 2 轮对话:应该命中缓存(system + 第1轮历史)")
+    print("=" * 60)
+
+    async for item in runner.run(
+        messages=[{"role": "user", "content": "请用一句话介绍 JavaScript"}],
+        config=RunConfig(
+            trace_id=trace_id,  # 续跑同一个 trace
+            system_prompt=stable_prefix,
+            model="anthropic/claude-sonnet-4.5",
+            temperature=0.3,
+            max_iterations=1,
+            enable_prompt_caching=True,
+        )
+    ):
+        if isinstance(item, Trace) and item.status == "completed":
+            print(f"\n✓ 第 2 轮完成")
+            print(f"  Total tokens: {item.total_tokens}")
+            print(f"  Cache write: {item.total_cache_creation_tokens}")
+            print(f"  Cache read: {item.total_cache_read_tokens}")
+            print(f"  Cost: ${item.total_cost:.6f}")
+        elif isinstance(item, Message) and item.role == "assistant":
+            print(f"\n[Response] {item.content.get('text', '')[:100]}...")
+            print(f"  Prompt tokens: {item.prompt_tokens}")
+            print(f"  Cache write: {item.cache_creation_tokens}")
+            print(f"  Cache read: {item.cache_read_tokens}")
+
+    print("\n等待 2 秒...")
+    await asyncio.sleep(2)
+
+    # 第三轮对话(续跑同一个 trace)
+    print("\n" + "=" * 60)
+    print("第 3 轮对话:应该命中更多缓存(system + 前2轮历史)")
+    print("=" * 60)
+
+    async for item in runner.run(
+        messages=[{"role": "user", "content": "请用一句话介绍 Go"}],
+        config=RunConfig(
+            trace_id=trace_id,  # 续跑同一个 trace
+            system_prompt=stable_prefix,
+            model="anthropic/claude-sonnet-4.5",
+            temperature=0.3,
+            max_iterations=1,
+            enable_prompt_caching=True,
+        )
+    ):
+        if isinstance(item, Trace) and item.status == "completed":
+            print(f"\n✓ 第 3 轮完成")
+            print(f"  Total tokens: {item.total_tokens}")
+            print(f"  Cache write: {item.total_cache_creation_tokens}")
+            print(f"  Cache read: {item.total_cache_read_tokens}")
+            print(f"  Cost: ${item.total_cost:.6f}")
+        elif isinstance(item, Message) and item.role == "assistant":
+            print(f"\n[Response] {item.content.get('text', '')[:100]}...")
+            print(f"  Prompt tokens: {item.prompt_tokens}")
+            print(f"  Cache write: {item.cache_creation_tokens}")
+            print(f"  Cache read: {item.cache_read_tokens}")
+
+    print("\n" + "=" * 60)
+    print("测试完成")
+    print("=" * 60)
+    print()
+    print("预期结果:")
+    print("- 第 1 轮:cache_write > 0(创建缓存)")
+    print("- 第 2 轮:cache_read > 0(命中 system prompt 缓存)")
+    print("- 第 3 轮:cache_read 更大(命中 system + 历史消息缓存)")
+    print()
+    print(f"Trace ID: {trace_id}")
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 244 - 0
examples/test_cache/run_strict.py

@@ -0,0 +1,244 @@
+"""
+严格的 Prompt Caching 验证测试
+
+按照 OpenRouter + Anthropic 的规范:
+1. 使用 prompt_tokens_details.cached_tokens / cache_write_tokens
+2. 锁定 provider 为 Anthropic
+3. 使用 >1500 tokens 的稳定前缀
+4. 在 5 分钟内多次请求
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+import time
+
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+from dotenv import load_dotenv
+load_dotenv()
+
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+from agent.core.runner import AgentRunner, RunConfig
+from agent.trace import FileSystemTraceStore, Trace, Message
+from agent.llm import create_openrouter_llm_call
+
+async def main():
+    print("=" * 60)
+    print("严格的 Prompt Caching 验证测试")
+    print("=" * 60)
+    print()
+
+    base_dir = Path(__file__).parent
+    project_root = base_dir.parent.parent
+    trace_dir = project_root / ".trace"
+
+    runner = AgentRunner(
+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
+        debug=True
+    )
+
+    # 构造 >1500 tokens 的稳定前缀(约 6000 字符)
+    # 这段内容在所有请求中完全不变
+    stable_prefix = """你是一个专业的 AI 技术顾问,专注于软件工程和系统架构。
+
+## 核心专业领域
+
+### 1. 编程语言与框架
+- **Python**: Django, Flask, FastAPI, Celery, SQLAlchemy, Pandas, NumPy
+- **JavaScript/TypeScript**: React, Vue, Angular, Node.js, Express, NestJS
+- **Go**: Gin, Echo, gRPC, Cobra
+- **Rust**: Actix, Rocket, Tokio
+- **Java**: Spring Boot, Hibernate, Maven, Gradle
+
+### 2. 数据库技术
+- **关系型数据库**: PostgreSQL, MySQL, Oracle, SQL Server
+- **NoSQL 数据库**: MongoDB, Redis, Cassandra, DynamoDB
+- **时序数据库**: InfluxDB, TimescaleDB
+- **图数据库**: Neo4j, ArangoDB
+- **搜索引擎**: Elasticsearch, Solr
+
+### 3. 云平台与基础设施
+- **AWS**: EC2, S3, Lambda, RDS, DynamoDB, CloudFormation, ECS, EKS
+- **GCP**: Compute Engine, Cloud Storage, Cloud Functions, BigQuery, GKE
+- **Azure**: Virtual Machines, Blob Storage, Functions, Cosmos DB, AKS
+- **容器化**: Docker, Docker Compose, Podman
+- **编排**: Kubernetes, Helm, Istio, Linkerd
+
+### 4. DevOps 与 CI/CD
+- **版本控制**: Git, GitHub, GitLab, Bitbucket
+- **CI/CD**: Jenkins, GitLab CI, GitHub Actions, CircleCI, Travis CI
+- **配置管理**: Ansible, Terraform, Puppet, Chef
+- **监控告警**: Prometheus, Grafana, ELK Stack, Datadog, New Relic
+- **日志管理**: Fluentd, Logstash, Loki
+
+### 5. 架构模式
+- **微服务架构**: 服务拆分、API 网关、服务发现、熔断降级
+- **事件驱动架构**: 消息队列、事件溯源、CQRS
+- **Serverless 架构**: FaaS、BaaS、无服务器框架
+- **分布式系统**: CAP 理论、一致性协议、分布式事务
+- **高可用设计**: 负载均衡、故障转移、灾备恢复
+
+### 6. 安全最佳实践
+- **认证授权**: OAuth 2.0, JWT, SAML, OpenID Connect
+- **加密技术**: TLS/SSL, AES, RSA, 哈希算法
+- **安全审计**: 漏洞扫描、渗透测试、安全合规
+- **数据保护**: 数据脱敏、访问控制、审计日志
+
+### 7. 性能优化
+- **缓存策略**: Redis, Memcached, CDN, 浏览器缓存
+- **数据库优化**: 索引设计、查询优化、分库分表
+- **代码优化**: 算法复杂度、并发编程、异步处理
+- **系统调优**: 负载测试、性能分析、资源监控
+
+### 8. 机器学习与 AI
+- **深度学习框架**: TensorFlow, PyTorch, Keras
+- **模型部署**: TensorFlow Serving, TorchServe, ONNX
+- **MLOps**: MLflow, Kubeflow, SageMaker
+- **自然语言处理**: Transformers, BERT, GPT, LangChain
+
+## 工作原则
+
+1. **准确性优先**: 提供经过验证的技术方案,避免误导
+2. **实用导向**: 给出可直接应用的代码示例和配置
+3. **最佳实践**: 遵循行业标准和社区共识
+4. **安全意识**: 始终考虑安全性和隐私保护
+5. **性能考虑**: 关注系统性能和资源效率
+6. **可维护性**: 代码清晰、文档完善、易于扩展
+7. **成本意识**: 平衡技术方案与成本投入
+
+## 响应格式
+
+### 问题分析
+- 理解用户需求和上下文
+- 识别关键技术挑战
+- 评估可行性和风险
+
+### 解决方案
+- 提供清晰的实现步骤
+- 包含完整的代码示例
+- 解释关键技术点
+- 指出潜在问题和注意事项
+
+### 最佳实践建议
+- 性能优化建议
+- 安全加固措施
+- 可扩展性考虑
+- 运维监控方案
+
+### 替代方案
+- 列出其他可行方案
+- 对比优缺点
+- 给出选择建议
+
+## 技术栈版本参考
+
+- Python: 3.11+
+- Node.js: 20 LTS
+- PostgreSQL: 15+
+- Redis: 7+
+- Kubernetes: 1.28+
+- Docker: 24+
+
+这是一个足够长且稳定的 system prompt,用于测试 Anthropic Prompt Caching。
+此内容在所有请求中保持完全一致,以确保缓存能够命中。
+Version: 2.0
+""" * 2  # 重复 2 次,确保 >1500 tokens
+
+    print(f"System prompt 长度: {len(stable_prefix)} 字符")
+    print(f"预估 tokens: ~{len(stable_prefix) // 4}")
+    print()
+
+    # 第一次请求:创建缓存
+    print("=" * 60)
+    print("第 1 次请求:创建缓存")
+    print("=" * 60)
+
+    messages1 = [
+        {"role": "user", "content": "请用一句话介绍 Python"}
+    ]
+
+    trace_id_1 = None
+    async for item in runner.run(
+        messages=messages1,
+        config=RunConfig(
+            system_prompt=stable_prefix,
+            model="anthropic/claude-sonnet-4.5",
+            temperature=0.3,
+            max_iterations=1,
+            enable_prompt_caching=True,
+            name="缓存测试-第1次"
+        )
+    ):
+        if isinstance(item, Trace):
+            trace_id_1 = item.trace_id
+            if item.status == "completed":
+                print(f"\n✓ 第 1 次完成")
+                print(f"  Total tokens: {item.total_tokens}")
+                print(f"  Cache write: {item.total_cache_creation_tokens}")
+                print(f"  Cache read: {item.total_cache_read_tokens}")
+                print(f"  Cost: ${item.total_cost:.6f}")
+
+        elif isinstance(item, Message) and item.role == "assistant":
+            print(f"\n[Response]")
+            print(f"  Prompt tokens: {item.prompt_tokens}")
+            print(f"  Cache write: {item.cache_creation_tokens}")
+            print(f"  Cache read: {item.cache_read_tokens}")
+
+    # 等待 2 秒,确保缓存已生效
+    print("\n等待 2 秒...")
+    await asyncio.sleep(2)
+
+    # 第二次请求:应该命中缓存
+    print("\n" + "=" * 60)
+    print("第 2 次请求:应该命中缓存")
+    print("=" * 60)
+
+    messages2 = [
+        {"role": "user", "content": "请用一句话介绍 JavaScript"}
+    ]
+
+    trace_id_2 = None
+    async for item in runner.run(
+        messages=messages2,
+        config=RunConfig(
+            system_prompt=stable_prefix,  # 完全相同的 system prompt
+            model="anthropic/claude-sonnet-4.5",
+            temperature=0.3,
+            max_iterations=1,
+            enable_prompt_caching=True,
+            name="缓存测试-第2次"
+        )
+    ):
+        if isinstance(item, Trace):
+            trace_id_2 = item.trace_id
+            if item.status == "completed":
+                print(f"\n✓ 第 2 次完成")
+                print(f"  Total tokens: {item.total_tokens}")
+                print(f"  Cache write: {item.total_cache_creation_tokens}")
+                print(f"  Cache read: {item.total_cache_read_tokens}")
+                print(f"  Cost: ${item.total_cost:.6f}")
+
+        elif isinstance(item, Message) and item.role == "assistant":
+            print(f"\n[Response]")
+            print(f"  Prompt tokens: {item.prompt_tokens}")
+            print(f"  Cache write: {item.cache_creation_tokens}")
+            print(f"  Cache read: {item.cache_read_tokens}")
+
+    print("\n" + "=" * 60)
+    print("测试完成")
+    print("=" * 60)
+    print()
+    print("预期结果:")
+    print("- 第 1 次:cache_write_tokens > 0(创建缓存)")
+    print("- 第 2 次:cached_tokens > 0(命中缓存)")
+    print()
+    print(f"Trace 1: {trace_id_1}")
+    print(f"Trace 2: {trace_id_2}")
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 39 - 1
frontend/react-template/package-lock.json

@@ -13,7 +13,8 @@
         "axios": "^1.6.0",
         "d3": "^7.8.5",
         "react": "^18.2.0",
-        "react-dom": "^18.2.0"
+        "react-dom": "^18.2.0",
+        "react-markdown": "^10.1.0"
       },
       "devDependencies": {
         "@tailwindcss/postcss": "^4.0.0",
@@ -5099,6 +5100,16 @@
         "url": "https://opencollective.com/unified"
       }
     },
+    "node_modules/html-url-attributes": {
+      "version": "3.0.1",
+      "resolved": "https://registry.npmmirror.com/html-url-attributes/-/html-url-attributes-3.0.1.tgz",
+      "integrity": "sha512-ol6UPyBWqsrO6EJySPz2O7ZSr856WDrEzM5zMqp+FJJLGMW35cLYmmZnl0vztAZxRUoNZJFTCohfjuIJ8I4QBQ==",
+      "license": "MIT",
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      }
+    },
     "node_modules/iconv-lite": {
       "version": "0.6.3",
       "resolved": "https://registry.npmmirror.com/iconv-lite/-/iconv-lite-0.6.3.tgz",
@@ -7458,6 +7469,33 @@
       "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==",
       "license": "MIT"
     },
+    "node_modules/react-markdown": {
+      "version": "10.1.0",
+      "resolved": "https://registry.npmmirror.com/react-markdown/-/react-markdown-10.1.0.tgz",
+      "integrity": "sha512-qKxVopLT/TyA6BX3Ue5NwabOsAzm0Q7kAPwq6L+wWDwisYs7R8vZ0nRXqq6rkueboxpkjvLGU9fWifiX/ZZFxQ==",
+      "license": "MIT",
+      "dependencies": {
+        "@types/hast": "^3.0.0",
+        "@types/mdast": "^4.0.0",
+        "devlop": "^1.0.0",
+        "hast-util-to-jsx-runtime": "^2.0.0",
+        "html-url-attributes": "^3.0.0",
+        "mdast-util-to-hast": "^13.0.0",
+        "remark-parse": "^11.0.0",
+        "remark-rehype": "^11.0.0",
+        "unified": "^11.0.0",
+        "unist-util-visit": "^5.0.0",
+        "vfile": "^6.0.0"
+      },
+      "funding": {
+        "type": "opencollective",
+        "url": "https://opencollective.com/unified"
+      },
+      "peerDependencies": {
+        "@types/react": ">=18",
+        "react": ">=18"
+      }
+    },
     "node_modules/react-refresh": {
       "version": "0.17.0",
       "resolved": "https://registry.npmmirror.com/react-refresh/-/react-refresh-0.17.0.tgz",

+ 7 - 6
frontend/react-template/package.json

@@ -12,18 +12,19 @@
   "dependencies": {
     "@douyinfe/semi-icons": "^2.56.0",
     "@douyinfe/semi-ui": "^2.56.0",
+    "axios": "^1.6.0",
+    "d3": "^7.8.5",
     "react": "^18.2.0",
     "react-dom": "^18.2.0",
-    "axios": "^1.6.0",
-    "d3": "^7.8.5"
+    "react-markdown": "^10.1.0"
   },
   "devDependencies": {
-    "@types/react": "^18.2.43",
-    "@types/react-dom": "^18.2.17",
-    "@types/d3": "^7.4.3",
-    "@types/node": "^20.11.5",
     "@tailwindcss/postcss": "^4.0.0",
     "@tailwindcss/vite": "^4.0.0",
+    "@types/d3": "^7.4.3",
+    "@types/node": "^20.11.5",
+    "@types/react": "^18.2.43",
+    "@types/react-dom": "^18.2.17",
     "@typescript-eslint/eslint-plugin": "^6.14.0",
     "@typescript-eslint/parser": "^6.14.0",
     "@vitejs/plugin-react": "^4.2.1",

+ 25 - 15
frontend/react-template/src/App.tsx

@@ -5,22 +5,21 @@ import { DetailPanel } from "./components/DetailPanel/DetailPanel";
 import type { Goal } from "./types/goal";
 import type { Edge, Message } from "./types/message";
 import { useFlowChartData } from "./components/FlowChart/hooks/useFlowChartData";
-import { useTrace } from "./hooks/useTrace";
 import "./styles/global.css";
 
 function App() {
   const [selectedTraceId, setSelectedTraceId] = useState<string | null>(null);
+  const [selectedTraceTitle, setSelectedTraceTitle] = useState("流程图可视化系统");
   const [selectedNode, setSelectedNode] = useState<Goal | Message | null>(null);
   const [selectedEdge, setSelectedEdge] = useState<Edge | null>(null);
   const [rightWidth, setRightWidth] = useState(360);
   const [isDragging, setIsDragging] = useState(false);
   const [refreshTrigger, setRefreshTrigger] = useState(0);
+  const [messageRefreshTrigger, setMessageRefreshTrigger] = useState(0);
   const bodyRef = useRef<HTMLDivElement | null>(null);
 
   // 获取数据以传递给 DetailPanel
-  const { trace } = useTrace(selectedTraceId);
-  const initialGoals = useMemo(() => trace?.goal_tree?.goals ?? [], [trace]);
-  const { msgGroups } = useFlowChartData(selectedTraceId, initialGoals);
+  const { msgGroups } = useFlowChartData(selectedTraceId, messageRefreshTrigger);
 
   const handleNodeClick = (node: Goal | Message, edge?: Edge) => {
     setSelectedNode(node);
@@ -56,7 +55,7 @@ function App() {
       const rect = bodyRef.current?.getBoundingClientRect();
       if (!rect) return;
       const next = rect.right - event.clientX;
-      const clamped = Math.min(500, Math.max(240, next));
+      const clamped = Math.min(800, Math.max(240, next));
       setRightWidth(clamped);
     };
     const handleUp = () => {
@@ -72,23 +71,34 @@ function App() {
 
   return (
     <div className="app">
+      <div className="app-top">
+        <TopBar
+          selectedTraceId={selectedTraceId}
+          selectedNode={selectedNode}
+          title={selectedTraceTitle}
+          onTraceSelect={(id, title) => {
+            setSelectedTraceId(id);
+            if (title) setSelectedTraceTitle(title);
+          }}
+          onTraceCreated={() => setRefreshTrigger((t) => t + 1)}
+          onMessageInserted={() => setMessageRefreshTrigger((t) => t + 1)}
+        />
+      </div>
       <div
         className="app-body"
         ref={bodyRef}
         style={{ userSelect: isDragging ? "none" : "auto" }}
       >
         <div className="app-main">
-          <TopBar
-            selectedTraceId={selectedTraceId}
-            selectedNode={selectedNode}
-            onTraceSelect={setSelectedTraceId}
-            onTraceCreated={() => setRefreshTrigger((t) => t + 1)}
-          />
           <MainContent
             traceId={selectedTraceId}
             onNodeClick={handleNodeClick}
-            onTraceChange={setSelectedTraceId}
+            onTraceChange={(id, title) => {
+              setSelectedTraceId(id);
+              if (title) setSelectedTraceTitle(title);
+            }}
             refreshTrigger={refreshTrigger}
+            messageRefreshTrigger={messageRefreshTrigger}
           />
         </div>
         {(selectedNode || selectedEdge) && (
@@ -100,16 +110,16 @@ function App() {
               aria-orientation="vertical"
               aria-valuenow={rightWidth}
               aria-valuemin={240}
-              aria-valuemax={500}
+              aria-valuemax={800}
             />
             <div
               className="app-right"
               style={{ width: rightWidth }}
             >
               <DetailPanel
-                node={selectedNode && isGoalNode(selectedNode) ? (selectedNode as Goal) : null}
+                node={selectedNode}
                 edge={selectedEdge}
-                messages={selectedMessages}
+                messages={selectedMessages as Message[]}
                 onClose={handleCloseDetail}
               />
             </div>

+ 20 - 4
frontend/react-template/src/api/traceApi.ts

@@ -13,15 +13,30 @@ export const traceApi = {
   fetchTraceDetail(traceId: string) {
     return request<TraceDetailResponse>(`/api/traces/${traceId}`);
   },
-  createTrace(data: { system_prompt: string; user_prompt: string }) {
+  createTrace(data: {
+    messages: Array<{ role: "system" | "user" | "assistant" | "tool"; content: unknown }>;
+    model?: string;
+    temperature?: number;
+    max_iterations?: number;
+    tools?: string[] | null;
+    name?: string;
+    uid?: string;
+  }) {
     return request<{ trace_id: string }>("/api/traces", {
       method: "POST",
       data,
     });
   },
-  runTrace(messageId: string) {
-    return request<void>(`/api/traces/${messageId}/run`, {
+  runTrace(
+    traceId: string,
+    data?: {
+      messages?: Array<{ role: "system" | "user" | "assistant" | "tool"; content: unknown }>;
+      after_message_id?: string | null;
+    },
+  ) {
+    return request<void>(`/api/traces/${traceId}/run`, {
       method: "POST",
+      data,
     });
   },
   stopTrace(traceId: string) {
@@ -29,9 +44,10 @@ export const traceApi = {
       method: "POST",
     });
   },
-  reflectTrace(traceId: string) {
+  reflectTrace(traceId: string, data?: { focus?: string | null }) {
     return request<void>(`/api/traces/${traceId}/reflect`, {
       method: "POST",
+      data,
     });
   },
   getExperiences() {

+ 67 - 76
frontend/react-template/src/components/DetailPanel/DetailPanel.module.css

@@ -1,141 +1,132 @@
 .panel {
   width: 100%;
   height: 100%;
-  /* border-left: 1px solid var(--border-color, #e0e0e0); */
-  background: #ffffff;
+  background: var(--bg-panel);
   display: flex;
   flex-direction: column;
+  box-shadow: -1px 0 0 0 var(--border-light);
 }
 
 .header {
-  padding: 12px 16px;
-  border-bottom: 1px solid var(--border-color, #e0e0e0);
+  height: var(--topbar-height);
+  padding: 0 var(--space-lg);
+  border-bottom: 1px solid var(--border-light);
   display: flex;
   align-items: center;
   justify-content: space-between;
+  flex-shrink: 0;
 }
 
 .title {
-  font-size: 14px;
+  font-size: 16px;
   font-weight: 600;
-  color: var(--text-primary, #333);
+  color: var(--text-primary);
 }
 
 .close {
   border: none;
   background: transparent;
-  font-size: 16px;
+  width: 28px;
+  height: 28px;
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  font-size: 20px;
+  color: var(--text-tertiary);
   cursor: pointer;
+  border-radius: var(--radius-sm);
+  transition: all var(--transition-fast);
+}
+
+.close:hover {
+  background: var(--bg-surface-hover);
+  color: var(--text-secondary);
 }
 
 .content {
-  padding: 16px;
-  overflow: auto;
+  padding: var(--space-lg);
+  overflow-y: auto;
+  overflow-x: hidden;
   flex: 1;
 }
 
 .sectionTitle {
-  font-size: 16px;
+  font-size: 14px;
   font-weight: 600;
-  color: #333;
-  margin-bottom: 12px;
+  color: var(--text-secondary);
+  margin: var(--space-lg) 0 var(--space-md);
+  text-transform: uppercase;
+  letter-spacing: 0.05em;
   display: flex;
   align-items: center;
-  gap: 8px;
 }
 
-.sectionTitle::before {
-  content: "";
-  display: block;
-  width: 4px;
-  height: 16px;
-  background: #2d72d2;
-  border-radius: 2px;
+.sectionTitle:first-child {
+  margin-top: 0;
 }
 
 .section {
-  margin-bottom: 16px;
+  margin-bottom: var(--space-lg);
 }
 
 .label {
   font-size: 12px;
-  color: #666;
-  margin-bottom: 6px;
+  font-weight: 500;
+  color: var(--text-tertiary);
+  margin-bottom: var(--space-xs);
 }
 
 .value {
-  font-size: 13px;
-  color: #333;
-  word-break: break-all;
+  font-size: 14px;
+  line-height: 1.6;
+  color: var(--text-primary);
+  word-break: break-word;
+  background: var(--bg-surface-hover);
+  padding: var(--space-sm) var(--space-md);
+  border-radius: var(--radius-md);
+  border: 1px solid var(--border-light);
 }
 
 .toolCalls {
-  margin-top: 4px;
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-sm);
 }
 
 .toolCall {
-  background: #f5f5f5;
-  border-radius: 4px;
-  padding: 8px;
-  margin-bottom: 8px;
-}
-
-.toolCall:last-child {
-  margin-bottom: 0;
+  background: var(--bg-node-goal);
+  border: 1px solid var(--color-primary);
+  border-radius: var(--radius-md);
+  padding: var(--space-sm);
 }
 
 .toolName {
-  font-size: 12px;
   font-weight: 600;
-  color: #555;
-  margin-bottom: 4px;
+  color: var(--color-primary);
+  margin-bottom: var(--space-xs);
+  font-size: 13px;
 }
 
 .toolArgs {
-  font-family: monospace;
-  font-size: 11px;
-  color: #666;
-  white-space: pre-wrap;
-  word-break: break-all;
+  font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+  font-size: 12px;
+  background: rgba(255, 255, 255, 0.5);
+  padding: var(--space-xs) var(--space-sm);
+  border-radius: var(--radius-sm);
   margin: 0;
-  background: rgba(0, 0, 0, 0.03);
-  padding: 4px;
-  border-radius: 2px;
+  white-space: pre-wrap;
+  color: var(--text-secondary);
 }
 
 .messages {
-  margin-top: 24px;
-  border-top: 1px solid #eee;
-  padding-top: 16px;
-}
-
-.messageList {
-  /* max-height removed to allow full content scrolling */
-}
-
-/* Custom scrollbar for message list */
-.messageList::-webkit-scrollbar {
-  width: 4px;
-}
-
-.messageList::-webkit-scrollbar-thumb {
-  background: #ccc;
-  border-radius: 2px;
-}
-
-.messageList::-webkit-scrollbar-track {
-  background: transparent;
+  display: flex;
+  flex-direction: column;
+  gap: var(--space-md);
 }
 
 .messageItem {
-  background: #fff;
-  border: 1px solid #eee;
-  border-radius: 8px;
-  padding: 12px;
-  margin-bottom: 12px;
-  box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
+  border-left: 2px solid var(--border-medium);
+  padding-left: var(--space-md);
 }
 
-.messageItem:last-child {
-  margin-bottom: 0;
-}
+/* Removed old styles */

+ 17 - 8
frontend/react-template/src/components/DetailPanel/DetailPanel.tsx

@@ -3,7 +3,7 @@ import type { Edge, Message } from "../../types/message";
 import styles from "./DetailPanel.module.css";
 
 interface DetailPanelProps {
-  node: Goal | null;
+  node: Goal | Message | null;
   edge: Edge | null;
   messages?: Message[];
   onClose: () => void;
@@ -39,6 +39,13 @@ export const DetailPanel = ({ node, edge, messages = [], onClose }: DetailPanelP
     return JSON.stringify(content);
   };
 
+  const isGoal = (node: Goal | Message): node is Goal => {
+    return "status" in node;
+  };
+
+  const isMessageNode = (node: Goal | Message): node is Message =>
+    "message_id" in node || "role" in node || "content" in node || "goal_id" in node || "tokens" in node;
+
   return (
     <aside className={styles.panel}>
       <div className={styles.header}>
@@ -57,28 +64,30 @@ export const DetailPanel = ({ node, edge, messages = [], onClose }: DetailPanelP
             <div className={styles.sectionTitle}>节点</div>
             <div className={styles.section}>
               <div className={styles.label}>ID</div>
-              <div className={styles.value}>{node.id}</div>
+              <div className={styles.value}>{isMessageNode(node) ? node.message_id || node.id : node.id}</div>
             </div>
             <div className={styles.section}>
               <div className={styles.label}>目标描述</div>
               <div className={styles.value}>{node.description}</div>
             </div>
-            {node.reason && (
+            {isGoal(node) && node.reason && (
               <div className={styles.section}>
                 <div className={styles.label}>创建理由</div>
                 <div className={styles.value}>{node.reason}</div>
               </div>
             )}
-            {node.summary && (
+            {isGoal(node) && node.summary && (
               <div className={styles.section}>
                 <div className={styles.label}>总结</div>
                 <div className={styles.value}>{node.summary}</div>
               </div>
             )}
-            <div className={styles.section}>
-              <div className={styles.label}>状态</div>
-              <div className={styles.value}>{node.status}</div>
-            </div>
+            {isGoal(node) && (
+              <div className={styles.section}>
+                <div className={styles.label}>状态</div>
+                <div className={styles.value}>{node.status}</div>
+              </div>
+            )}
           </>
         )}
         {messages && messages.length > 0 && (

+ 120 - 29
frontend/react-template/src/components/FlowChart/FlowChart.tsx

@@ -23,6 +23,7 @@ import { Tooltip } from "@douyinfe/semi-ui";
 interface FlowChartProps {
   goals: Goal[]; // 目标节点列表
   msgGroups?: Record<string, Message[]>; // 消息组,key 是 goal_id
+  invalidBranches?: Message[][]; // 失效分支列表
   onNodeClick?: (node: Goal | Message, edge?: EdgeType) => void; // 节点点击回调
   onSubTraceClick?: (parentGoal: Goal, entry: SubTraceEntry) => void; // 子追踪点击回调
 }
@@ -52,6 +53,7 @@ interface LayoutNode {
   type: "goal" | "subgoal" | "message"; // 节点类型
   level: number; // 嵌套层级(0 表示主链节点,1 表示子节点,2 表示孙节点...)
   parentId?: string; // 父节点 ID
+  isInvalid?: boolean; // 是否为失效节点
 }
 
 /**
@@ -67,18 +69,16 @@ interface LayoutEdge {
   collapsible: boolean; // 是否可折叠
   collapsed: boolean; // 是否已折叠
   children?: LayoutNode[]; // 折叠时隐藏的子节点列表
+  isInvalid?: boolean; // 是否为失效连接线
 }
 
 const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps> = (
-  { goals, msgGroups = {}, onNodeClick, onSubTraceClick },
+  { goals, msgGroups = {}, invalidBranches, onNodeClick, onSubTraceClick },
   ref,
 ) => {
-  console.log("%c [ msgGroups ]-33", "font-size:13px; background:pink; color:#bf2c9f;", msgGroups);
   // 过滤掉有父节点的 goals,只保留主链节点
   goals = goals.filter((g) => !g.parent_id);
 
-  console.log("%c [ FlowChart-goals ]-33", "font-size:13px; background:pink; color:#bf2c9f;", goals);
-
   // 确保 goals 中包含 END 节点,如果没有则自动添加
   const displayGoals = useMemo(() => {
     if (!goals) return [];
@@ -278,7 +278,6 @@ const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps>
         allNodes: result.nodes,
       });
     });
-    console.log("%c [ displayGoals ]-261", "font-size:13px; background:pink; color:#bf2c9f;", displayGoals);
 
     /**
      * 生成连接线
@@ -293,7 +292,6 @@ const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps>
      * 3. 递归处理所有层级的节点
      */
 
-    console.log("%c [ mainChainInfo ]-285", "font-size:13px; background:pink; color:#bf2c9f;", mainChainInfo);
     for (let i = 0; i < mainChainInfo.length - 1; i++) {
       const current = mainChainInfo[i];
       const next = mainChainInfo[i + 1];
@@ -497,8 +495,90 @@ const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps>
       }
     }
 
+    // 处理失效分支(invalidBranches)
+    if (invalidBranches && invalidBranches.length > 0) {
+      const validMsgMap = new Map<number, LayoutNode>();
+      nodes.forEach((n) => {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const seq = (n.data as any).sequence;
+        if (typeof seq === "number") {
+          validMsgMap.set(seq, n);
+        }
+      });
+
+      // Map to store invalid nodes by their anchor parent ID
+      const invalidNodesByAnchor = new Map<string, LayoutNode[]>();
+
+      invalidBranches.forEach((branch) => {
+        if (branch.length === 0) return;
+        const firstMsg = branch[0];
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const pSeq = (firstMsg as any).parent_sequence;
+
+        if (typeof pSeq === "number") {
+          const parentNode = validMsgMap.get(pSeq);
+          if (parentNode) {
+            let currentParent = parentNode;
+            const X_OFFSET = -200; // 向左偏移
+            const currentBranchNodes: LayoutNode[] = [];
+
+            branch.forEach((msg, idx) => {
+              const nodeId = `invalid-${msg.id || Math.random()}`;
+              const node: LayoutNode = {
+                id: nodeId,
+                x: parentNode.x + X_OFFSET,
+                y: parentNode.y + (idx + 1) * NODE_HEIGHT,
+                data: msg,
+                type: "message",
+                level: parentNode.level,
+                parentId: parentNode.id,
+                isInvalid: true,
+              };
+              nodes.push(node);
+              currentBranchNodes.push(node);
+
+              edges.push({
+                id: `edge-${currentParent.id}-${node.id}`,
+                source: currentParent,
+                target: node,
+                type: "line",
+                level: 0,
+                collapsible: false,
+                collapsed: false,
+                isInvalid: true,
+              });
+
+              currentParent = node;
+            });
+
+            // Store in map
+            if (!invalidNodesByAnchor.has(parentNode.id)) {
+              invalidNodesByAnchor.set(parentNode.id, []);
+            }
+            invalidNodesByAnchor.get(parentNode.id)!.push(...currentBranchNodes);
+          }
+        }
+      });
+
+      // Associate invalid nodes with collapsible edges
+      // If a parent node is hidden (part of a collapsed edge), its invalid children should also be hidden
+      edges.forEach((edge) => {
+        if (edge.collapsible && edge.children) {
+          const extraChildren: LayoutNode[] = [];
+          edge.children.forEach((child) => {
+            if (invalidNodesByAnchor.has(child.id)) {
+              extraChildren.push(...invalidNodesByAnchor.get(child.id)!);
+            }
+          });
+          if (extraChildren.length > 0) {
+            edge.children.push(...extraChildren);
+          }
+        }
+      });
+    }
+
     return { nodes, edges };
-  }, [displayGoals, dimensions, msgGroups, collapsedEdges]);
+  }, [displayGoals, dimensions, msgGroups, collapsedEdges, invalidBranches]);
 
   // 暴露给父组件的方法
   useImperativeHandle(
@@ -846,13 +926,13 @@ const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps>
 
                 if (sourceIsMessage || targetIsMessage) {
                   // msgGroup 相关的连接线用灰色
-                  color = "#9E9E9E";
+                  color = "#94a3b8"; // Slate 400
                 } else if (sourceIsMainGoal && targetIsMainGoal) {
                   // 主节点之间的连接线用绿色
-                  color = "#4CAF50";
+                  color = "#10b981"; // Emerald 500
                 } else {
                   // sub_goals 之间的连接线用蓝色
-                  color = "#2196F3";
+                  color = "#3b82f6"; // Blue 500
                 }
 
                 return (
@@ -860,9 +940,10 @@ const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps>
                     <path
                       d={path}
                       fill="none"
-                      stroke={color}
+                      stroke={edge.isInvalid ? "#cbd5e1" : color} // 失效边使用浅灰色 (Slate 300)
                       strokeWidth={strokeWidth}
-                      markerEnd="url(#arrow-default)" // 箭头
+                      strokeDasharray={edge.isInvalid ? "5,5" : undefined} // 失效边使用虚线
+                      markerEnd={edge.isInvalid ? undefined : "url(#arrow-default)"} // 失效边不显示箭头
                       style={{ cursor: edge.collapsible ? "pointer" : "default" }} // 可折叠的显示手型光标
                       onClick={() => edge.collapsible && toggleCollapse(edge.id)} // 点击切换折叠状态
                     />
@@ -908,11 +989,15 @@ const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps>
                 const data = node.data as Goal;
                 const text = isGoal ? data.description : (node.data as Message).description || "";
 
-                let textColor = "#2196F3"; // 默认蓝色
+                let textColor = "#3b82f6"; // Blue 500
                 if (node.type === "message") {
-                  textColor = "#9E9E9E"; // 消息节点灰色
+                  textColor = "#64748b"; // Slate 500
                 } else if (node.type === "goal" && node.level === 0) {
-                  textColor = "#4CAF50"; // 主节点绿色
+                  textColor = "#10b981"; // Emerald 500
+                }
+
+                if (node.isInvalid) {
+                  textColor = "#94a3b8"; // Slate 400
                 }
 
                 return (
@@ -929,29 +1014,35 @@ const FlowChartComponent: ForwardRefRenderFunction<FlowChartRef, FlowChartProps>
                       width={150}
                       height={50}
                       rx={8}
-                      fill={isGoal ? "#E3F2FD" : "#F5F5F5"} // 目标节点浅蓝色,消息节点灰色
-                      stroke={selectedNodeId === node.id ? "#2196F3" : "#BDBDBD"} // 选中节点蓝色边框
+                      fill={isGoal ? "#eff6ff" : "#f8fafc"} // Blue 50 / Slate 50
+                      stroke={selectedNodeId === node.id ? "#3b82f6" : node.isInvalid ? "#cbd5e1" : "#e2e8f0"} // Selected: Blue 500, Invalid: Slate 300, Default: Slate 200
                       strokeWidth={selectedNodeId === node.id ? 2 : 1}
+                      strokeDasharray={node.isInvalid ? "5,5" : undefined} // 失效节点虚线边框
+                      style={{
+                        filter:
+                          selectedNodeId === node.id
+                            ? "drop-shadow(0 4px 6px rgb(59 130 246 / 0.3))"
+                            : "drop-shadow(0 1px 2px rgb(0 0 0 / 0.05))",
+                      }}
                     />
                     {/* 节点文本(带 Tooltip) */}
-                    <Tooltip content={text}>
-                      <foreignObject
-                        x={-70}
-                        y={-25}
-                        width={150}
-                        height={50}
-                      >
+                    <foreignObject
+                      x={-70}
+                      y={-25}
+                      width={150}
+                      height={50}
+                    >
+                      <Tooltip content={text}>
                         <div
-                          className="w-full h-full flex items-center justify-center text-xs text-center leading-[1.2] px-1 box-border line-clamp-3"
+                          className="w-full h-full overflow-hidden flex items-center justify-center"
                           style={{
                             color: textColor,
-                            WebkitBoxPack: "center", // 垂直居中
                           }}
                         >
-                          {text}
+                          <span className="text-xs line-clamp-3 px-1">{text}</span>
                         </div>
-                      </foreignObject>
-                    </Tooltip>
+                      </Tooltip>
+                    </foreignObject>
                   </g>
                 );
               })}

+ 77 - 31
frontend/react-template/src/components/FlowChart/hooks/useFlowChartData.ts

@@ -40,27 +40,26 @@ const buildSubGoals = (flatGoals: Goal[]): Goal[] => {
   });
 };
 
+import { processRetryLogic } from "../utils/retryLogic";
+
 // FlowChart 专用数据 Hook:处理实时事件并聚合消息组
-export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) => {
-  const [goals, setGoals] = useState<Goal[]>(initialGoals);
+export const useFlowChartData = (traceId: string | null, refreshTrigger?: number) => {
+  const [goals, setGoals] = useState<Goal[]>([]);
   const [messages, setMessages] = useState<Message[]>([]);
   const [msgGroups, setMsgGroups] = useState<Record<string, Message[]>>({});
   const [sinceEventId, setSinceEventId] = useState(0);
+  const [readyToConnect, setReadyToConnect] = useState(false);
   const currentEventIdRef = useRef(0);
   const restReloadingRef = useRef(false);
+  const [reloading, setReloading] = useState(false);
+  const [invalidBranches, setInvalidBranches] = useState<Message[][]>([]);
 
-  const messageSortKey = useCallback((message: Message): number => {
-    const mid =
-      typeof message.message_id === "string"
-        ? message.message_id
-        : typeof message.id === "string"
-          ? message.id
-          : undefined;
-    if (!mid) return 0;
-    if (!mid.includes("-")) return 0;
-    const suffix = mid.slice(mid.lastIndexOf("-") + 1);
-    const num = Number.parseInt(suffix, 10);
-    return Number.isFinite(num) ? num : 0;
+  const messageComparator = useCallback((a: Message, b: Message): number => {
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const seqA = typeof (a as any).sequence === "number" ? (a as any).sequence : 0;
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const seqB = typeof (b as any).sequence === "number" ? (b as any).sequence : 0;
+    return seqA - seqB;
   }, []);
 
   const updateMessageGroups = useCallback(
@@ -81,30 +80,33 @@ export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) =
       setMsgGroups((prev) => {
         const existing = prev[groupKey] ? [...prev[groupKey]] : [];
         existing.push(message);
-        existing.sort((a, b) => messageSortKey(a) - messageSortKey(b));
+        existing.sort(messageComparator);
         return { ...prev, [groupKey]: existing };
       });
     },
-    [messageSortKey],
+    [messageComparator],
   );
 
   useEffect(() => {
-    setGoals(initialGoals);
+    setGoals([]);
     setMessages([]);
     setMsgGroups({});
     setSinceEventId(0);
+    setReadyToConnect(false);
     currentEventIdRef.current = 0;
     restReloadingRef.current = false;
-  }, [initialGoals, traceId]);
+  }, [traceId]);
 
   const reloadViaRest = useCallback(async () => {
     if (!traceId) return;
     if (restReloadingRef.current) return;
     restReloadingRef.current = true;
+    setReloading(true);
+    let nextSinceEventId: number | null = null;
     try {
       const [traceRes, messagesRes] = await Promise.all([
         fetch(`http://localhost:8000/api/traces/${traceId}`),
-        fetch(`http://localhost:8000/api/traces/${traceId}/messages`),
+        fetch(`http://localhost:8000/api/traces/${traceId}/messages?mode=all`),
       ]);
 
       if (traceRes.ok) {
@@ -118,6 +120,7 @@ export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) =
         if (typeof lastEventId === "number") {
           currentEventIdRef.current = Math.max(currentEventIdRef.current, lastEventId);
           setSinceEventId(lastEventId);
+          nextSinceEventId = lastEventId;
         }
 
         if (goalList.length > 0) {
@@ -143,16 +146,23 @@ export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) =
         const json = (await messagesRes.json()) as unknown;
         const root = isRecord(json) ? json : {};
         const list = Array.isArray(root.messages) ? (root.messages as Message[]) : [];
-        const nextMessages = [...list].sort((a, b) => messageSortKey(a) - messageSortKey(b));
-        setMessages(nextMessages);
+        console.log("%c [ list ]-149", "font-size:13px; background:pink; color:#bf2c9f;", list);
+
+        const filtered = list.filter((message) => (message as { status?: string }).status !== "abandoned");
+        const nextMessages = [...filtered].sort(messageComparator);
+
+        const { availableData: finalMessages, invalidBranches: invalidBranchesTemp } = processRetryLogic(nextMessages);
+
+        setMessages(finalMessages);
+        setInvalidBranches(invalidBranchesTemp);
         const grouped: Record<string, Message[]> = {};
-        nextMessages.forEach((message) => {
+        finalMessages.forEach((message) => {
           const groupKey = typeof message.goal_id === "string" && message.goal_id ? message.goal_id : "START";
           if (!grouped[groupKey]) grouped[groupKey] = [];
           grouped[groupKey].push(message);
         });
         Object.keys(grouped).forEach((key) => {
-          grouped[key].sort((a, b) => messageSortKey(a) - messageSortKey(b));
+          grouped[key].sort(messageComparator);
         });
         setMsgGroups(grouped);
 
@@ -169,16 +179,49 @@ export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) =
           });
         }
       }
+
+      // REST 请求完成后,允许建立 WebSocket 连接
+      setReadyToConnect(true);
     } finally {
       restReloadingRef.current = false;
+      setReloading(false);
     }
-  }, [messageSortKey, traceId]);
+    return nextSinceEventId;
+  }, [messageComparator, traceId]);
+
+  const prevTraceIdRef = useRef<string | null>(null);
+  const prevRefreshTriggerRef = useRef<number | undefined>(undefined);
+
+  useEffect(() => {
+    // 确保 traceId 存在
+    if (!traceId) {
+      prevTraceIdRef.current = null;
+      return;
+    }
+
+    // 检查是否发生了变化
+    const traceChanged = traceId !== prevTraceIdRef.current;
+    const refreshTriggerChanged = refreshTrigger !== prevRefreshTriggerRef.current;
+
+    // 只有当 traceId 真正变化,或者 refreshTrigger 真正变化时,才执行加载
+    if (traceChanged || (typeof refreshTrigger === "number" && refreshTrigger > 0 && refreshTriggerChanged)) {
+      prevTraceIdRef.current = traceId;
+      prevRefreshTriggerRef.current = refreshTrigger;
+
+      // 注意:traceId 变化时,另外一个清理 useEffect 也会执行,这里只负责触发加载
+      // 这里不直接调用,而是通过一个 setTimeout 0 来确保清理操作已经完成
+      // 但实际上清理操作是在副作用执行前发生的(对于同一组件)
+
+      void reloadViaRest();
+    }
+  }, [traceId, refreshTrigger, reloadViaRest]); // 添加 reloadViaRest 到依赖列表,但由于我们用了 ref 来控制,所以不会因为它的变化而重复执行
 
   const handleWebSocketMessage = useCallback(
     (payload: unknown) => {
       const raw = isRecord(payload) ? payload : {};
       const event = (typeof raw.event === "string" && raw.event) || (typeof raw.type === "string" && raw.type) || "";
       const data = isRecord(raw.data) ? raw.data : raw;
+      console.log("%c [ data ]-182", "font-size:13px; background:pink; color:#bf2c9f;", data);
 
       const eventId = typeof raw.event_id === "number" ? raw.event_id : undefined;
       if (typeof eventId === "number") {
@@ -191,9 +234,11 @@ export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) =
           (typeof raw.message === "string" ? raw.message : undefined) ||
           "";
         if (message.includes("Too many missed events")) {
-          void reloadViaRest();
-          const nextSince = currentEventIdRef.current;
-          if (nextSince > 0) setSinceEventId(nextSince);
+          void reloadViaRest().then((nextSince) => {
+            if (typeof nextSince === "number") return;
+            const fallbackSince = currentEventIdRef.current;
+            if (fallbackSince > 0) setSinceEventId(fallbackSince);
+          });
         }
         return;
       }
@@ -293,14 +338,14 @@ export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) =
         if (message) {
           setMessages((prev) => {
             const next = [...prev, message];
-            next.sort((a, b) => messageSortKey(a) - messageSortKey(b));
+            next.sort(messageComparator);
             return next;
           });
           updateMessageGroups(message);
         }
       }
     },
-    [messageSortKey, reloadViaRest, updateMessageGroups],
+    [messageComparator, reloadViaRest, updateMessageGroups],
   );
 
   // 主 Trace 连接
@@ -308,7 +353,8 @@ export const useFlowChartData = (traceId: string | null, initialGoals: Goal[]) =
     () => ({ onMessage: handleWebSocketMessage, sinceEventId }),
     [handleWebSocketMessage, sinceEventId],
   );
-  const { connected } = useWebSocket(traceId, wsOptions);
+  // 只有当 traceId 存在且 REST 加载完成 (readyToConnect) 后才连接 WebSocket
+  const { connected } = useWebSocket(readyToConnect ? traceId : null, wsOptions);
 
-  return { goals, messages, msgGroups, connected };
+  return { goals, messages, msgGroups, connected, reloading, invalidBranches };
 };

+ 35 - 18
frontend/react-template/src/components/FlowChart/styles/FlowChart.module.css

@@ -1,8 +1,9 @@
 .container {
   width: 100%;
   height: 100%;
-  background: #fafafa;
+  background: var(--bg-app);
   position: relative;
+  overflow: hidden;
 }
 
 .scrollContainer {
@@ -13,46 +14,62 @@
 .svg {
   width: 100%;
   height: 100%;
-  background: #fff;
+  background-image: radial-gradient(var(--border-light) 1px, transparent 1px);
+  background-size: 20px 20px;
+  background-color: var(--bg-app);
   cursor: grab;
   overscroll-behavior: contain;
 }
+
 .panning {
   cursor: grabbing;
 }
+
 .links path {
-  transition: all 0.2s ease;
+  transition: all var(--transition-normal);
 }
+
 .nodes text {
   pointer-events: none;
 }
 
 .controls {
   position: absolute;
-  right: 16px;
-  bottom: 16px;
+  right: var(--space-lg);
+  bottom: var(--space-lg);
   display: flex;
-  gap: 8px;
-  background: rgba(255, 255, 255, 0.9);
-  border: 1px solid #e5e7eb;
-  border-radius: 10px;
-  padding: 6px;
-  box-shadow: 0 6px 18px rgba(0, 0, 0, 0.08);
+  gap: var(--space-sm);
+  background: var(--bg-surface);
+  border: 1px solid var(--border-light);
+  border-radius: var(--radius-lg);
+  padding: var(--space-xs);
+  box-shadow: var(--shadow-lg);
   z-index: 10;
 }
 
 .controlButton {
-  min-width: 36px;
+  min-width: 32px;
   height: 32px;
-  border: 1px solid #d0d7de;
-  border-radius: 8px;
-  background: #fff;
-  color: #333;
+  padding: 0 var(--space-sm);
+  border: 1px solid transparent;
+  border-radius: var(--radius-md);
+  background: transparent;
+  color: var(--text-secondary);
   font-size: 14px;
+  font-weight: 500;
   cursor: pointer;
-  position: relative;
+  transition: all var(--transition-fast);
+  display: flex;
+  align-items: center;
+  justify-content: center;
 }
 
 .controlButton:hover {
-  background: #f5f7fb;
+  background: var(--bg-surface-hover);
+  color: var(--text-primary);
+}
+
+.controlButton:active {
+  background: var(--border-light);
 }
+/* Removed old styles */

+ 32 - 0
frontend/react-template/src/components/FlowChart/utils/retryLogic.ts

@@ -0,0 +1,32 @@
+import { Message } from "../../../types/message";
+
+export const processRetryLogic = (
+  messages: Message[],
+): { availableData: Message[]; invalidBranches: Message[][] } => {
+  const invalidBranches: Message[][] = [];
+  const parentLastIndexMap = new Map<number, number>();
+  const invalidIndices = new Set<number>();
+
+  messages.forEach((msg, index) => {
+    const pSeq = msg.parent_sequence;
+    if (typeof pSeq === "number") {
+      if (parentLastIndexMap.has(pSeq)) {
+        const lastIndex = parentLastIndexMap.get(pSeq)!;
+        // 提取失效分支:从上一次出现位置到当前位置之前
+        const invalidBranch = messages.slice(lastIndex, index);
+        if (invalidBranch.length > 0) {
+          invalidBranches.push(invalidBranch);
+          for (let k = lastIndex; k < index; k++) {
+            invalidIndices.add(k);
+          }
+        }
+        parentLastIndexMap.set(pSeq, index);
+      } else {
+        parentLastIndexMap.set(pSeq, index);
+      }
+    }
+  });
+
+  const availableData = messages.filter((_, index) => !invalidIndices.has(index));
+  return { availableData, invalidBranches };
+};

+ 45 - 44
frontend/react-template/src/components/MainContent/MainContent.module.css

@@ -3,86 +3,87 @@
   display: flex;
   flex-direction: column;
   overflow: hidden;
+  background: var(--bg-app);
 }
 
 .header {
-  padding: 12px 16px;
-  border-bottom: 1px solid var(--border-color, #e0e0e0);
+  height: var(--topbar-height);
+  padding: 0 var(--space-lg);
+  border-bottom: 1px solid var(--border-light);
   display: flex;
   align-items: center;
   justify-content: space-between;
-  background: #fff;
+  background: var(--bg-surface);
+  flex-shrink: 0;
+  box-shadow: var(--shadow-sm);
+  z-index: 5;
 }
 
 .title {
   font-size: 14px;
-  color: var(--text-primary, #333);
+  font-weight: 500;
+  color: var(--text-secondary);
 }
 
 .status {
   font-size: 12px;
-  color: #666;
+  color: var(--text-tertiary);
 }
 
 .headerRight {
   display: flex;
   align-items: center;
-  gap: 16px;
+  gap: var(--space-md);
 }
 
-.legend {
-  display: flex;
+.btn {
+  display: inline-flex;
   align-items: center;
-  gap: 12px;
+  justify-content: center;
+  height: 32px;
+  padding: 0 var(--space-md);
+  border-radius: var(--radius-md);
+  font-size: 13px;
+  font-weight: 500;
+  cursor: pointer;
+  transition: all var(--transition-fast);
+  border: 1px solid var(--border-medium);
+  background: var(--bg-surface);
+  color: var(--text-secondary);
+  outline: none;
 }
 
-.legendItem {
-  display: flex;
-  align-items: center;
-  gap: 6px;
-  font-size: 12px;
-  color: #666;
+.btn:hover {
+  background: var(--bg-surface-hover);
+  color: var(--text-primary);
+  border-color: var(--border-medium);
 }
 
-.legendDot {
-  width: 10px;
-  height: 10px;
-  border-radius: 50%;
-  display: inline-block;
+.btn:active {
+  background: var(--border-light);
 }
 
 .content {
   flex: 1;
   overflow: hidden;
-  background: #fafafa;
-  display: flex;
-  flex-direction: column;
   position: relative;
+  background: var(--bg-app);
 }
 
-.empty {
-  color: #999;
+.loading {
+  display: flex;
+  align-items: center;
+  justify-content: center;
+  height: 100%;
+  color: var(--text-tertiary);
   font-size: 14px;
-  margin: auto;
 }
 
-.buttons {
+.empty {
   display: flex;
-  gap: 8px;
-}
-
-.btn {
-  padding: 4px 12px;
-  border: 1px solid #d9d9d9;
-  border-radius: 4px;
-  background: #fff;
-  cursor: pointer;
-  font-size: 12px;
-  color: #333;
-  transition: all 0.3s;
-}
-
-.btn:hover {
-  color: #1890ff;
-  border-color: #1890ff;
+  align-items: center;
+  justify-content: center;
+  height: 100%;
+  color: var(--text-tertiary);
+  font-size: 14px;
 }

+ 62 - 17
frontend/react-template/src/components/MainContent/MainContent.tsx

@@ -1,9 +1,8 @@
-import { useMemo, useRef, useState, useEffect } from "react";
+import { useRef, useState, useEffect } from "react";
 import type { FC } from "react";
 import { Select } from "@douyinfe/semi-ui";
 import { FlowChart } from "../FlowChart/FlowChart";
 import type { FlowChartRef } from "../FlowChart/FlowChart";
-import { useTrace } from "../../hooks/useTrace";
 import { useFlowChartData } from "../FlowChart/hooks/useFlowChartData";
 import { traceApi } from "../../api/traceApi";
 import type { Goal } from "../../types/goal";
@@ -14,17 +13,30 @@ import styles from "./MainContent.module.css";
 interface MainContentProps {
   traceId: string | null;
   onNodeClick?: (node: Goal | Message, edge?: Edge) => void;
-  onTraceChange?: (traceId: string) => void;
+  onTraceChange?: (traceId: string, title?: string) => void;
   refreshTrigger?: number;
+  messageRefreshTrigger?: number;
 }
 
-export const MainContent: FC<MainContentProps> = ({ traceId, onNodeClick, onTraceChange, refreshTrigger }) => {
+export const MainContent: FC<MainContentProps> = ({
+  traceId,
+  onNodeClick,
+  onTraceChange,
+  refreshTrigger,
+  messageRefreshTrigger,
+}) => {
   const flowChartRef = useRef<FlowChartRef>(null);
   const [isAllExpanded, setIsAllExpanded] = useState(true);
   const [traceList, setTraceList] = useState<TraceListItem[]>([]);
-  const { trace, loading } = useTrace(traceId);
-  const initialGoals = useMemo(() => trace?.goal_tree?.goals ?? [], [trace]);
-  const { goals, connected, msgGroups } = useFlowChartData(traceId, initialGoals);
+  const [cachedGoals, setCachedGoals] = useState<Goal[]>([]);
+  const [cachedMsgGroups, setCachedMsgGroups] = useState<Record<string, Message[]>>({});
+  const [cachedInvalidBranches, setCachedInvalidBranches] = useState<Message[][]>([]);
+  const { goals, connected, msgGroups, reloading, invalidBranches } = useFlowChartData(traceId, messageRefreshTrigger);
+  console.log("%c [ msgGroups ]-34", "font-size:13px; background:pink; color:#bf2c9f;", msgGroups);
+  const displayGoals = goals.length > 0 ? goals : cachedGoals;
+  const displayMsgGroups = Object.keys(msgGroups).length > 0 ? msgGroups : cachedMsgGroups;
+  const displayInvalidBranches =
+    invalidBranches && invalidBranches.length > 0 ? invalidBranches : cachedInvalidBranches;
 
   useEffect(() => {
     const fetchTraces = async () => {
@@ -38,9 +50,35 @@ export const MainContent: FC<MainContentProps> = ({ traceId, onNodeClick, onTrac
     fetchTraces();
   }, [refreshTrigger]);
 
-  console.log("%c [ MainContent-goals ]-19", "font-size:13px; background:pink; color:#bf2c9f;", goals);
+  useEffect(() => {
+    // 移除 reload 调用,因为 useFlowChartData 内部会监听 messageRefreshTrigger 并重新加载
+  }, [messageRefreshTrigger]);
+
+  useEffect(() => {
+    if (goals.length > 0) {
+      setCachedGoals(goals);
+    }
+  }, [goals]);
+
+  useEffect(() => {
+    if (Object.keys(msgGroups).length > 0) {
+      setCachedMsgGroups(msgGroups);
+    }
+  }, [msgGroups]);
+
+  useEffect(() => {
+    if (invalidBranches && invalidBranches.length > 0) {
+      setCachedInvalidBranches(invalidBranches);
+    }
+  }, [invalidBranches]);
+
+  useEffect(() => {
+    setCachedGoals([]);
+    setCachedMsgGroups({});
+    setCachedInvalidBranches([]);
+  }, [traceId]);
 
-  if (!traceId && !loading) {
+  if (!traceId && !reloading) {
     return (
       <div className={styles.main}>
         <div className={styles.header}>
@@ -61,11 +99,14 @@ export const MainContent: FC<MainContentProps> = ({ traceId, onNodeClick, onTrac
         <div className={styles.headerRight}>
           <Select
             value={traceId}
-            onChange={(value: unknown) => onTraceChange?.(value as string)}
+            onChange={(value: unknown) => {
+              const trace = traceList.find((t) => t.trace_id === value);
+              onTraceChange?.(value as string, trace?.task || trace?.trace_id);
+            }}
             style={{ width: 200 }}
             placeholder="选择 Trace"
             optionList={traceList.map((t) => ({
-              label: t.task || t.trace_id,
+              label: t.task?.length > 15 ? `${t.task.slice(0, 15)}...` : t.task || t.trace_id,
               value: t.trace_id,
             }))}
           />
@@ -118,16 +159,20 @@ export const MainContent: FC<MainContentProps> = ({ traceId, onNodeClick, onTrac
         </div>
       </div>
       <div className={styles.content}>
-        {loading ? (
-          <div className={styles.empty}>加载中...</div>
-        ) : goals.length === 0 ? (
-          <div className={styles.empty}>暂无节点</div>
+        {reloading ? (
+          <div className={styles.loading}>加载中...</div>
+        ) : displayGoals.length === 0 ? (
+          <div className={styles.empty}>暂无数据</div>
         ) : (
           <FlowChart
             ref={flowChartRef}
-            goals={goals}
-            msgGroups={msgGroups}
+            goals={displayGoals}
+            msgGroups={displayMsgGroups}
+            invalidBranches={displayInvalidBranches}
             onNodeClick={onNodeClick}
+            onSubTraceClick={(_parentGoal, entry) => {
+              onTraceChange?.(entry.id, entry.mission || entry.id);
+            }}
           />
         )}
       </div>

+ 102 - 47
frontend/react-template/src/components/TopBar/TopBar.module.css

@@ -1,80 +1,135 @@
 .topbar {
-  height: 60px;
-  background: var(--bg-primary, #ffffff);
-  border-bottom: 1px solid var(--border-color, #e0e0e0);
+  height: var(--topbar-height);
+  background: var(--bg-surface);
+  border-bottom: 1px solid var(--border-light);
   display: flex;
   align-items: center;
   justify-content: space-between;
-  padding: 0 var(--spacing-lg, 24px);
+  padding: 0 var(--space-lg);
+  box-shadow: var(--shadow-sm);
+  z-index: 10;
+}
+
+.title {
+  display: flex;
+  align-items: center;
+  gap: var(--space-md);
+  flex: 1;
+  min-width: 0;
 }
 
 .title h1 {
-  font-size: var(--font-size-lg, 16px);
+  font-size: 18px;
   font-weight: 600;
-  color: var(--text-primary, #333333);
+  color: var(--text-primary);
   margin: 0;
+  white-space: nowrap;
+  overflow: hidden;
+  text-overflow: ellipsis;
 }
 
-.filters {
+.actions {
   display: flex;
-  gap: var(--spacing-md, 16px);
+  gap: var(--space-sm);
+  align-items: center;
 }
 
-.select {
-  padding: 8px 12px;
-  border: 1px solid #ddd;
-  border-radius: 4px;
+.button {
+  display: inline-flex;
+  align-items: center;
+  justify-content: center;
+  height: 32px;
+  padding: 0 var(--space-md);
+  border-radius: var(--radius-md);
   font-size: 14px;
-  background: white;
+  font-weight: 500;
   cursor: pointer;
-  min-width: 200px;
-}
-
-.select:disabled {
-  opacity: 0.6;
-  cursor: not-allowed;
+  transition: all var(--transition-fast);
+  border: 1px solid transparent;
+  outline: none;
+  white-space: nowrap;
 }
 
+/* Default Button (Secondary/Ghost) */
 .button {
-  padding: 8px 16px;
-  border: 1px solid #d9d9d9;
-  border-radius: 4px;
-  font-size: 14px;
-  background: #fff;
-  color: #666;
-  cursor: pointer;
-  transition: all 0.2s;
+  background: var(--bg-surface);
+  border-color: var(--border-medium);
+  color: var(--text-secondary);
 }
 
 .button:hover:not(:disabled) {
-  color: #0070f3;
-  border-color: #0070f3;
+  background: var(--bg-surface-hover);
+  color: var(--text-primary);
+  border-color: var(--border-medium);
 }
 
-.button:disabled {
-  opacity: 0.6;
-  cursor: not-allowed;
+.button:active:not(:disabled) {
+  background: var(--border-light);
+}
+
+/* Primary Button */
+.button.primary {
+  background: var(--bg-surface);
+  color: var(--color-primary);
+  border-color: var(--color-primary);
+}
+
+.button.primary:hover:not(:disabled) {
+  background: var(--color-primary-hover);
+}
+
+.button.primary:active:not(:disabled) {
+  background: var(--color-primary-active);
+}
+
+/* Danger Button */
+.button.danger {
+  background: var(--bg-surface);
+  color: var(--color-danger);
+  border-color: var(--color-danger);
+}
+
+.button.danger:hover:not(:disabled) {
+  background: #fef2f2; /* Red 50 */
+}
+
+.button.danger:active:not(:disabled) {
+  background: #fee2e2; /* Red 100 */
 }
 
-.buttonPrimary {
-  composes: button;
-  background: #0070f3;
-  color: white;
-  border-color: #0070f3;
+/* Success Button */
+.button.success {
+  background: var(--bg-surface);
+  color: var(--color-success);
+  border-color: var(--color-success);
 }
 
-.buttonPrimary:hover:not(:disabled) {
-  background: #0051cc;
-  color: white;
+.button.success:hover:not(:disabled) {
+  background: #ecfdf5; /* Emerald 50 */
 }
 
-.buttonDanger {
-  composes: button;
-  color: #f44336;
-  border-color: #f44336;
+.button.success:active:not(:disabled) {
+  background: #d1fae5; /* Emerald 100 */
 }
 
-.buttonDanger:hover:not(:disabled) {
-  background: #fff1f0;
-  color: #f44336;
+/* Warning Button */
+.button.warning {
+  background: var(--bg-surface);
+  color: var(--color-warning);
+  border-color: var(--color-warning);
+}
+
+.button.warning:hover:not(:disabled) {
+  background: #fffbeb; /* Amber 50 */
+}
+
+.button.warning:active:not(:disabled) {
+  background: #fef3c7; /* Amber 100 */
+}
+
+.button:disabled {
+  opacity: 0.5;
+  cursor: not-allowed;
+  pointer-events: none;
 }
+/* Removed old styles */

+ 143 - 30
frontend/react-template/src/components/TopBar/TopBar.tsx

@@ -1,5 +1,6 @@
 import { useCallback, useEffect, useState, useRef } from "react";
 import type { FC } from "react";
+import ReactMarkdown from "react-markdown";
 import { Modal, Form, Toast } from "@douyinfe/semi-ui";
 import { traceApi } from "../../api/traceApi";
 import type { Goal } from "../../types/goal";
@@ -9,16 +10,28 @@ import styles from "./TopBar.module.css";
 interface TopBarProps {
   selectedTraceId: string | null;
   selectedNode: Goal | Message | null;
-  onTraceSelect: (traceId: string) => void;
+  title: string;
+  onTraceSelect: (traceId: string, title?: string) => void;
   onTraceCreated?: () => void;
+  onMessageInserted?: () => void;
 }
 
-export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTraceSelect, onTraceCreated }) => {
-  const [title, setTitle] = useState("流程图可视化系统");
+export const TopBar: FC<TopBarProps> = ({
+  selectedTraceId,
+  selectedNode,
+  title,
+  onTraceSelect,
+  onTraceCreated,
+  onMessageInserted,
+}) => {
   const [isModalVisible, setIsModalVisible] = useState(false);
+  const [isInsertModalVisible, setIsInsertModalVisible] = useState(false);
+  const [isReflectModalVisible, setIsReflectModalVisible] = useState(false);
   const [isExperienceModalVisible, setIsExperienceModalVisible] = useState(false);
   const [experienceContent, setExperienceContent] = useState("");
   const formApiRef = useRef<{ getValues: () => { system_prompt: string; user_prompt: string } } | null>(null);
+  const insertFormApiRef = useRef<{ getValues: () => { insert_prompt: string } } | null>(null);
+  const reflectFormApiRef = useRef<{ getValues: () => { reflect_focus: string } } | null>(null);
 
   const isMessageNode = (node: Goal | Message): node is Message =>
     "message_id" in node || "role" in node || "content" in node || "goal_id" in node || "tokens" in node;
@@ -35,20 +48,20 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
           status: status || undefined,
           limit: 20,
         });
-        const firstTrace = data.traces[0];
-        const traceId = firstTrace?.parent_trace_id || firstTrace.trace_id;
-        if (firstTrace) {
-          setTitle(firstTrace.task);
-          onTraceSelect(traceId);
-        } else {
-          setTitle("流程图可视化系统");
-          onTraceSelect("");
+        // 初始加载时不自动选择,或者如果 selectedTraceId 为空才选择第一个
+        if (!selectedTraceId && data.traces.length > 0) {
+          const firstTrace = data.traces[0];
+          const traceId = firstTrace?.parent_trace_id || firstTrace.trace_id;
+          onTraceSelect(traceId, firstTrace.task);
+        } else if (data.traces.length === 0) {
+          onTraceSelect("", "流程图可视化系统");
         }
       } catch (error) {
         console.error("Failed to load traces:", error);
+        Toast.error("加载任务列表失败");
       }
     },
-    [onTraceSelect],
+    [onTraceSelect, selectedTraceId],
   );
 
   useEffect(() => {
@@ -64,7 +77,15 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
       const values = formApiRef.current?.getValues();
       if (!values) return;
 
-      await traceApi.createTrace(values);
+      const messages: Array<{ role: "system" | "user"; content: string }> = [];
+      if (values.system_prompt) {
+        messages.push({ role: "system", content: values.system_prompt });
+      }
+      if (values.user_prompt) {
+        messages.push({ role: "user", content: values.user_prompt });
+      }
+
+      await traceApi.createTrace({ messages });
       await loadTraces();
       onTraceCreated?.();
       setIsModalVisible(false);
@@ -75,9 +96,7 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
     }
   };
 
-  const handleRun = async () => {
-    console.log("%c [ selectedNode ]-72", "font-size:13px; background:pink; color:#bf2c9f;", selectedNode);
-
+  const handleRun = () => {
     if (!selectedNode) {
       Toast.warning("请选择插入节点");
       return;
@@ -88,18 +107,50 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
       return;
     }
 
-    const messageId = getMessageId(selectedNode);
+    setIsInsertModalVisible(true);
+  };
+
+  const handleInsertConfirm = async () => {
+    const node = selectedNode;
+    if (!node) {
+      Toast.warning("请选择插入节点");
+      return;
+    }
+    if (!selectedTraceId) {
+      Toast.warning("请先选择一个 Trace");
+      return;
+    }
+
+    if (!isMessageNode(node)) {
+      Toast.warning("插入位置错误");
+      return;
+    }
+
+    const values = insertFormApiRef.current?.getValues();
+    const insertPrompt = values?.insert_prompt?.trim();
+    if (!insertPrompt) {
+      Toast.warning("请输入指令");
+      return;
+    }
+
+    const messageId = getMessageId(node);
     if (!messageId) {
       Toast.error("消息ID缺失");
       return;
     }
 
     try {
-      await traceApi.runTrace(messageId);
-      Toast.success("已开始运行");
+      const payload = {
+        messages: [{ role: "user" as const, content: insertPrompt }],
+        after_message_id: messageId,
+      };
+      await traceApi.runTrace(selectedTraceId, payload);
+      Toast.success("插入成功");
+      setIsInsertModalVisible(false);
+      onMessageInserted?.();
     } catch (error) {
       console.error("Failed to run trace:", error);
-      Toast.error("运行请求失败");
+      Toast.error("插入失败");
     }
   };
 
@@ -128,9 +179,20 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
       Toast.warning("请先选择一个 Trace");
       return;
     }
+    setIsReflectModalVisible(true);
+  };
+
+  const handleReflectConfirm = async () => {
+    if (!selectedTraceId) {
+      Toast.warning("请先选择一个 Trace");
+      return;
+    }
+    const values = reflectFormApiRef.current?.getValues();
+    const focus = values?.reflect_focus?.trim();
     try {
-      await traceApi.reflectTrace(selectedTraceId);
+      await traceApi.reflectTrace(selectedTraceId, focus ? { focus } : {});
       Toast.success("已触发反思");
+      setIsReflectModalVisible(false);
     } catch (error) {
       console.error("Failed to reflect trace:", error);
       Toast.error("反思请求失败");
@@ -140,7 +202,17 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
   const handleExperience = async () => {
     try {
       const content = await traceApi.getExperiences();
-      setExperienceContent(typeof content === "string" ? content : JSON.stringify(content, null, 2));
+      // 尝试解析 JSON 格式
+      let displayContent = typeof content === "string" ? content : JSON.stringify(content, null, 2);
+      try {
+        const parsed = typeof content === "string" ? JSON.parse(content) : content;
+        if (parsed && typeof parsed === "object" && "content" in parsed) {
+          displayContent = parsed.content;
+        }
+      } catch (e) {
+        // 解析失败则使用原始字符串
+      }
+      setExperienceContent(displayContent);
       setIsExperienceModalVisible(true);
     } catch (error) {
       console.error("Failed to get experiences:", error);
@@ -150,24 +222,27 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
 
   return (
     <header className={styles.topbar}>
-      <div className={styles.title}>
+      <div
+        className={styles.title}
+        title={title}
+      >
         <h1>{title}</h1>
       </div>
-      <div className={styles.filters}>
+      <div className={styles.actions}>
         <button
-          className={styles.button}
+          className={`${styles.button} ${styles.success}`}
           onClick={handleNewTask}
         >
           新任务
         </button>
         <button
-          className={styles.buttonPrimary}
+          className={`${styles.button} ${styles.primary}`}
           onClick={handleRun}
         >
-          运行
+          插入
         </button>
         <button
-          className={styles.buttonDanger}
+          className={`${styles.button} ${styles.danger}`}
           onClick={handleStop}
         >
           停止
@@ -179,7 +254,7 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
           反思
         </button>
         <button
-          className={styles.button}
+          className={`${styles.button} ${styles.warning}`}
           onClick={handleExperience}
         >
           经验
@@ -209,6 +284,42 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
           />
         </Form>
       </Modal>
+      <Modal
+        title="插入指令"
+        visible={isInsertModalVisible}
+        onOk={handleInsertConfirm}
+        onCancel={() => setIsInsertModalVisible(false)}
+        centered
+        style={{ width: 600 }}
+      >
+        {/* eslint-disable-next-line @typescript-eslint/no-explicit-any */}
+        <Form getFormApi={(api: any) => (insertFormApiRef.current = api)}>
+          <Form.TextArea
+            field="insert_prompt"
+            label="指令"
+            placeholder="请输入插入指令"
+            autosize={{ minRows: 3, maxRows: 6 }}
+          />
+        </Form>
+      </Modal>
+      <Modal
+        title="反思"
+        visible={isReflectModalVisible}
+        onOk={handleReflectConfirm}
+        onCancel={() => setIsReflectModalVisible(false)}
+        centered
+        style={{ width: 600 }}
+      >
+        {/* eslint-disable-next-line @typescript-eslint/no-explicit-any */}
+        <Form getFormApi={(api: any) => (reflectFormApiRef.current = api)}>
+          <Form.TextArea
+            field="reflect_focus"
+            label="反思重点"
+            placeholder="请输入反思重点(可选)"
+            autosize={{ minRows: 3, maxRows: 6 }}
+          />
+        </Form>
+      </Modal>
       <Modal
         title="经验列表"
         visible={isExperienceModalVisible}
@@ -218,7 +329,9 @@ export const TopBar: FC<TopBarProps> = ({ selectedTraceId, selectedNode, onTrace
         style={{ width: 800 }}
         bodyStyle={{ maxHeight: "70vh", overflow: "auto" }}
       >
-        <pre style={{ whiteSpace: "pre-wrap", wordWrap: "break-word" }}>{experienceContent || "暂无经验数据"}</pre>
+        <div style={{ whiteSpace: "pre-wrap", wordWrap: "break-word" }}>
+          {experienceContent ? <ReactMarkdown>{experienceContent}</ReactMarkdown> : "暂无经验数据"}
+        </div>
       </Modal>
     </header>
   );

+ 13 - 10
frontend/react-template/src/hooks/useTrace.ts

@@ -1,4 +1,4 @@
-import { useState, useEffect } from "react";
+import { useState, useEffect, useCallback } from "react";
 import { traceApi } from "../api/traceApi";
 import type { TraceDetailResponse } from "../types/trace";
 
@@ -7,24 +7,27 @@ export const useTrace = (traceId: string | null) => {
   const [loading, setLoading] = useState(false);
   const [error, setError] = useState<Error | null>(null);
 
-  useEffect(() => {
-    if (!traceId) return;
-
-    const loadTrace = async () => {
+  const reload = useCallback(
+    async (idOverride?: string | null) => {
+      const id = typeof idOverride === "string" ? idOverride : traceId;
+      if (!id) return;
       setLoading(true);
       setError(null);
       try {
-        const data = await traceApi.fetchTraceDetail(traceId);
+        const data = await traceApi.fetchTraceDetail(id);
         setTrace(data);
       } catch (err) {
         setError(err as Error);
       } finally {
         setLoading(false);
       }
-    };
+    },
+    [traceId],
+  );
 
-    loadTrace();
-  }, [traceId]);
+  useEffect(() => {
+    void reload();
+  }, [reload]);
 
-  return { trace, loading, error };
+  return { trace, loading, error, reload };
 };

+ 28 - 0
frontend/react-template/src/styles/global.css

@@ -27,12 +27,19 @@ body {
   height: 100%;
   display: flex;
   flex-direction: column;
+  background-color: var(--bg-app);
+}
+
+.app-top {
+  flex: 0 0 auto;
+  z-index: 20;
 }
 
 .app-body {
   flex: 1;
   display: flex;
   overflow: hidden;
+  position: relative;
 }
 
 .app-main {
@@ -40,12 +47,33 @@ body {
   min-width: 0;
   display: flex;
   flex-direction: column;
+  position: relative;
 }
+
 .app-right {
   flex: 0 0 auto;
   min-width: 0;
   display: flex;
   flex-direction: column;
+  background: var(--bg-panel);
+  z-index: 10;
+}
+
+.app-splitter {
+  width: 1px;
+  cursor: col-resize;
+  background-color: var(--border-light);
+  transition: background-color 0.2s;
+  position: relative;
+  z-index: 20;
+}
+
+.app-splitter:hover,
+.app-splitter:active {
+  background-color: var(--color-primary);
+  width: 4px; /* Make it easier to grab visually */
+  margin-left: -1.5px; /* Center alignment adjustment */
+  margin-right: -1.5px;
 }
 
 .app-splitter {

+ 60 - 6
frontend/react-template/src/styles/variables.css

@@ -1,8 +1,62 @@
 :root {
-  --bg-primary: #ffffff;
-  --border-color: #e0e0e0;
-  --text-primary: #333333;
-  --spacing-md: 16px;
-  --spacing-lg: 24px;
-  --font-size-lg: 16px;
+  /* Brand Colors */
+  --color-primary: #3b82f6; /* Blue 500 */
+  --color-primary-hover: #2563eb; /* Blue 600 */
+  --color-primary-active: #1d4ed8; /* Blue 700 */
+  --color-secondary: #64748b; /* Slate 500 */
+
+  /* Semantic Colors */
+  --color-success: #10b981; /* Emerald 500 */
+  --color-warning: #f59e0b; /* Amber 500 */
+  --color-danger: #ef4444; /* Red 500 */
+  --color-info: #3b82f6; /* Blue 500 */
+
+  /* Background Colors */
+  --bg-app: #f8fafc; /* Slate 50 */
+  --bg-surface: #ffffff;
+  --bg-surface-hover: #f1f5f9; /* Slate 100 */
+  --bg-panel: #ffffff;
+  --bg-node: #ffffff;
+  --bg-node-goal: #eff6ff; /* Blue 50 */
+  --bg-node-message: #f8fafc; /* Slate 50 */
+
+  /* Text Colors */
+  --text-primary: #0f172a; /* Slate 900 */
+  --text-secondary: #475569; /* Slate 600 */
+  --text-tertiary: #94a3b8; /* Slate 400 */
+  --text-inverse: #ffffff;
+
+  /* Border Colors */
+  --border-light: #e2e8f0; /* Slate 200 */
+  --border-medium: #cbd5e1; /* Slate 300 */
+  --border-focus: #3b82f6; /* Blue 500 */
+
+  /* Spacing */
+  --space-xs: 4px;
+  --space-sm: 8px;
+  --space-md: 16px;
+  --space-lg: 24px;
+  --space-xl: 32px;
+  --space-2xl: 48px;
+
+  /* Radius */
+  --radius-sm: 4px;
+  --radius-md: 8px;
+  --radius-lg: 12px;
+  --radius-xl: 16px;
+  --radius-full: 9999px;
+
+  /* Shadows */
+  --shadow-sm: 0 1px 2px 0 rgb(0 0 0 / 0.05);
+  --shadow-md: 0 4px 6px -1px rgb(0 0 0 / 0.1), 0 2px 4px -2px rgb(0 0 0 / 0.1);
+  --shadow-lg: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1);
+  --shadow-xl: 0 20px 25px -5px rgb(0 0 0 / 0.1), 0 8px 10px -6px rgb(0 0 0 / 0.1);
+
+  /* Transitions */
+  --transition-fast: 150ms cubic-bezier(0.4, 0, 0.2, 1);
+  --transition-normal: 300ms cubic-bezier(0.4, 0, 0.2, 1);
+
+  /* Layout */
+  --topbar-height: 64px;
+  --panel-width: 400px;
 }

+ 2 - 0
frontend/react-template/src/types/message.ts

@@ -17,6 +17,8 @@ export interface Message {
   content?: string | MessageContent;
   description?: string;
   tokens?: number | null;
+  parent_sequence?: number | null;
+  sequence?: number | null;
 }
 
 export interface Edge {

Некоторые файлы не были показаны из-за большого количества измененных файлов