2 mesi fa · 04fd92349a
--- a/agent/core/runner.py
+++ b/agent/core/runner.py
@@ -57,6 +57,8 @@ class RunConfig:
 
															     temperature: float = 0.3
														
 
															     max_iterations: int = 200
														
 
															     tools: Optional[List[str]] = None          # None = 全部已注册工具
														
 
															+    enable_thinking: bool = False              # 启用 extended thinking 模式（仅 Claude 模型有效）
														
 
															+    thinking_budget_tokens: int = 10000        # thinking token 预算
														
 
															     # --- 框架层参数 ---
														
 
															     agent_type: str = "default"
														
@@ -397,7 +399,11 @@ class AgentRunner:
 
															         if trace and self.trace_store and trace_id:
														
 
															             msg = Message.create(
														
 
															                 trace_id=trace_id, role="assistant", sequence=1, goal_id=None,
														
 
															-                content={"text": result.get("content", ""), "tool_calls": result.get("tool_calls")},
														
 
															+                content={
														
 
															+                    "text": result.get("content", ""),
														
 
															+                    "tool_calls": result.get("tool_calls"),
														
 
															+                    "thinking": result.get("thinking"),
														
 
															+                },
														
 
															                 prompt_tokens=result.get("prompt_tokens", 0),
														
 
															                 completion_tokens=result.get("completion_tokens", 0),
														
 
															                 finish_reason=result.get("finish_reason"),
														
@@ -811,6 +817,8 @@ class AgentRunner:
 
															                 model=config.model,
														
 
															                 tools=tool_schemas,
														
 
															                 temperature=config.temperature,
														
 
															+                enable_thinking=config.enable_thinking,
														
 
															+                thinking_budget_tokens=config.thinking_budget_tokens,
														
 
															                 **config.extra_llm_params,
														
 
															             )
														
@@ -853,7 +861,11 @@ class AgentRunner:
 
															                 sequence=sequence,
														
 
															                 goal_id=current_goal_id,
														
 
															                 parent_sequence=head_seq if head_seq > 0 else None,
														
 
															-                content={"text": response_content, "tool_calls": tool_calls},
														
 
															+                content={
														
 
															+                    "text": response_content,
														
 
															+                    "tool_calls": tool_calls,
														
 
															+                    "thinking": result.get("thinking"),
														
 
															+                },
														
 
															                 prompt_tokens=prompt_tokens,
														
 
															                 completion_tokens=completion_tokens,
														
 
															                 cache_creation_tokens=cache_creation_tokens,
														
--- a/agent/llm/openrouter.py
+++ b/agent/llm/openrouter.py
@@ -310,15 +310,19 @@ def _to_anthropic_tools(tools: List[Dict]) -> List[Dict]:
 
															 def _parse_anthropic_response(result: Dict[str, Any]) -> Dict[str, Any]:
														
 
															     """Parse an Anthropic Messages API response into the unified format.
														
 
															-    Returns a dict with keys: content, tool_calls, finish_reason, usage.
														
 
															+    Returns a dict with keys: content, tool_calls, finish_reason, usage, thinking.
														
 
															     """
														
 
															     content_blocks = result.get("content", [])
														
 
															     text_parts = []
														
 
															     tool_calls = []
														
 
															+    thinking_parts = []
														
 
															+
														
 
															     for block in content_blocks:
														
 
															         if block.get("type") == "text":
														
 
															             text_parts.append(block.get("text", ""))
														
 
															+        elif block.get("type") == "thinking":
														
 
															+            thinking_parts.append(block.get("thinking", ""))
														
 
															         elif block.get("type") == "tool_use":
														
 
															             tool_calls.append({
														
 
															                 "id": block.get("id", ""),
														
@@ -330,6 +334,7 @@ def _parse_anthropic_response(result: Dict[str, Any]) -> Dict[str, Any]:
 
															             })
														
 
															     content = "\n".join(text_parts)
														
 
															+    thinking = "\n".join(thinking_parts) if thinking_parts else None
														
 
															     stop_reason = result.get("stop_reason", "end_turn")
														
 
															     finish_reason_map = {
														
@@ -353,6 +358,7 @@ def _parse_anthropic_response(result: Dict[str, Any]) -> Dict[str, Any]:
 
															         "tool_calls": tool_calls if tool_calls else None,
														
 
															         "finish_reason": finish_reason,
														
 
															         "usage": usage,
														
 
															+        "thinking": thinking,
														
 
															     }
														
@@ -511,6 +517,15 @@ async def _openrouter_anthropic_call(
 
															     if "temperature" in kwargs:
														
 
															         payload["temperature"] = kwargs["temperature"]
														
 
															+    # 可选：启用 extended thinking 模式
														
 
															+    if kwargs.get("enable_thinking", False):
														
 
															+        thinking_config = {
														
 
															+            "type": "enabled",
														
 
															+            "budget_tokens": kwargs.get("thinking_budget_tokens", 10000)
														
 
															+        }
														
 
															+        payload["thinking"] = thinking_config
														
 
															+        logger.info(f"[OpenRouter/Anthropic] Extended thinking enabled (budget: {thinking_config['budget_tokens']} tokens)")
														
 
															+
														
 
															     # Debug: 检查 cache_control 是否存在
														
 
															     cache_control_count = 0
														
 
															     if isinstance(system_prompt, list):
														
@@ -592,6 +607,7 @@ async def _openrouter_anthropic_call(
 
															         "finish_reason": parsed["finish_reason"],
														
 
															         "cost": cost,
														
 
															         "usage": usage,
														
 
															+        "thinking": parsed.get("thinking"),
														
 
															     }
														
--- a/knowledge/README.md
+++ b/knowledge/README.md
@@ -59,7 +59,12 @@ python3 run.py --trace <trace-id>
 
															 默认使用 OpenRouter 调用 `anthropic/claude-sonnet-4.6`，可在 `test.prompt` 中修改：
														
 
															 ```yaml
														
 
															 ---
														
 
															-model: sonnet-4.6
														
 
															+model: anthropic/claude-sonnet-4.6
														
 
															 temperature: 0.3
														
 
															 ---
														
 
															 ```
														
 
															+
														
 
															+支持的模型格式（通过 OpenRouter）：
														
 
															+- Anthropic Claude: `anthropic/claude-sonnet-4.6`, `anthropic/claude-opus-4`
														
 
															+- Google Gemini: `google/gemini-2.5-flash-lite`, `google/gemini-2.5-pro`
														
 
															+- 其他 OpenRouter 支持的模型
														
--- a/knowledge/run.py
+++ b/knowledge/run.py
@@ -164,12 +164,13 @@ async def main():
 
															     # 创建 Agent Runner
														
 
															     print("3. 创建 Agent Runner...")
														
 
															-    print(f"   - 模型: {prompt.config.get('model', 'sonnet-4.6')}")
														
 
															+    model_name = prompt.config.get('model', 'anthropic/claude-sonnet-4.6')
														
 
															+    print(f"   - 模型: {model_name}")
														
 
															     store = FileSystemTraceStore(base_path=".trace")
														
 
															     runner = AgentRunner(
														
 
															         trace_store=store,
														
 
															-        llm_call=create_openrouter_llm_call(model=f"anthropic/claude-{prompt.config.get('model', 'sonnet-4.6')}"),
														
 
															+        llm_call=create_openrouter_llm_call(model=model_name),
														
 
															         skills_dir=None,
														
 
															         debug=True
														
 
															     )
														
@@ -195,21 +196,27 @@ async def main():
 
															     should_exit = False
														
 
															     try:
														
 
															+        model_name = prompt.config.get('model', 'anthropic/claude-sonnet-4.6')
														
 
															+
														
 
															         if resume_trace_id:
														
 
															             initial_messages = None
														
 
															             config = RunConfig(
														
 
															-                model=f"anthropic/claude-{prompt.config.get('model', 'sonnet-4.6')}",
														
 
															+                model=model_name,
														
 
															                 temperature=float(prompt.config.get('temperature', 0.3)),
														
 
															                 max_iterations=1000,
														
 
															                 trace_id=resume_trace_id,
														
 
															+                enable_thinking=prompt.config.get('enable_thinking', False),
														
 
															+                thinking_budget_tokens=prompt.config.get('thinking_budget_tokens', 10000),
														
 
															             )
														
 
															         else:
														
 
															             initial_messages = messages
														
 
															             config = RunConfig(
														
 
															-                model=f"anthropic/claude-{prompt.config.get('model', 'sonnet-4.6')}",
														
 
															+                model=model_name,
														
 
															                 temperature=float(prompt.config.get('temperature', 0.3)),
														
 
															                 max_iterations=1000,
														
 
															                 name="图片模态特征提取研究",
														
 
															+                enable_thinking=prompt.config.get('enable_thinking', False),
														
 
															+                thinking_budget_tokens=prompt.config.get('thinking_budget_tokens', 10000),
														
 
															             )
														
 
															         while not should_exit:
														
--- a/knowledge/test.prompt
+++ b/knowledge/test.prompt
@@ -1,26 +1,20 @@
 
															 ---
														
 
															-model: sonnet-4.6
														
 
															+model: qwen/qwen3.5-122b-a10b
														
 
															 temperature: 0.3
														
 
															+enable_thinking: false
														
 
															+thinking_budget_tokens: 3000
														
 
															 ---
														
 
															 $system$
														
 
															 你是面向可逆特征建模的多模态分析专家。你的核心目标是：构建可逆的多模态特征空间，使生成模型能够基于特征重建原始图片。生成模型可以是任何AI模型或工具。
														
 
															-在整个任务中，你必须遵守以下输出规范（强制执行，不得省略）：
														
 
															---------------------------------
														
 
															-**逐步推理原则**：
														
 
															-每一个“步骤”的输出中，都必须包含以下字段：
														
 
															-
														
 
															-- Step：当前步骤名称（简短）
														
 
															-- Inputs：本步骤使用了哪些输入（列出：图片/制作表路径/亮点条目/制作点/搜索结果 URL 等）
														
 
															-- Observation：从 Inputs 中观察到的事实（只写事实，不写推断）
														
 
															-- Reasoning：你如何从 Observation 推导到结论
														
 
															-- Decision：本步骤最终做出的选择（这一步的结果）
														
 
															-- Rationale：为什么做出该 Decision（必须对应到 Observation/搜索证据）
														
 
															-- Checks：你做了哪些验证（例如：是否可控、是否可复用、是否过像原图、是否可提取）
														
 
															-
														
 
															-注意：
														
 
															-- 如果证据不足，无法支撑起合理的推理，必须写明缺口，并触发“继续搜索/继续分析”，不得强行下结论。
														
 
															-- 该规范的核心目的不是约束输出，而是将思考过程显式化。
														
 
															+你必须输出“可审计理由链”（Audit Rationale），覆盖每一步决策与行动。
														
 
															+
														
 
															+规则：
														
 
															+1) 每次生成任何行动（包括：给结论、提出假设、选择/不选择工具、修改 query、筛选信息、做归纳）前，先输出一个 STEP 区块。
														
 
															+2) 每个 STEP 必须包含：ACTION、WHY（2-4条）、EVIDENCE（1-3条，引用输入/工具返回的字段或原句）、UNCERTAINTY（可选）、NEXT。
														
 
															+3) WHY 必须是“面向读者的简短理由”，不得输出长篇内心独白；用可验证的依据支撑。
														
 
															+4) 如果要调用工具：必须在每次工具调用前，先输出一段短理由：为什么选这个工具、为什么现在调用、备选工具为什么不用、期望返回什么...
														
 
															+5) 若没有足够依据：在 UNCERTAINTY 中说明缺口，并给出降低不确定性的下一步（通常是调用工具或改写 query）。
														
 
															 $user$
														
 
															 # 任务目标