elksmmx 6 дней назад
Родитель
Сommit
aafb9bab48
3 измененных файлов с 71 добавлено и 26 удалено
  1. 52 23
      agent/core/runner.py
  2. 16 0
      agent/llm/openrouter.py
  3. 3 3
      examples/how/tool/nanobanana.py

+ 52 - 23
agent/core/runner.py

@@ -758,6 +758,15 @@ class AgentRunner:
             # 构建 LLM messages(注入上下文)
             # 构建 LLM messages(注入上下文)
             llm_messages = list(history)
             llm_messages = list(history)
 
 
+            # 先对历史消息应用 Prompt Caching(在注入动态内容之前)
+            # 这样可以确保历史消息的缓存点固定,不受动态注入影响
+            llm_messages = self._add_cache_control(
+                llm_messages,
+                config.model,
+                config.enable_prompt_caching
+            )
+
+            # 然后追加动态注入的内容(不影响已缓存的历史消息)
             # 周期性注入 GoalTree + Collaborators
             # 周期性注入 GoalTree + Collaborators
             if iteration % CONTEXT_INJECTION_INTERVAL == 0:
             if iteration % CONTEXT_INJECTION_INTERVAL == 0:
                 context_injection = self._build_context_injection(trace, goal_tree)
                 context_injection = self._build_context_injection(trace, goal_tree)
@@ -792,14 +801,9 @@ class AgentRunner:
                         logger.warning("经验检索失败: %s", e)
                         logger.warning("经验检索失败: %s", e)
                         _cached_exp_text = ""
                         _cached_exp_text = ""
 
 
+            # 经验注入:goal切换时注入相关历史经验
             if _cached_exp_text:
             if _cached_exp_text:
                 llm_messages.append({"role": "system", "content": _cached_exp_text})
                 llm_messages.append({"role": "system", "content": _cached_exp_text})
-            # 应用 Prompt Caching(不修改原始 history,只在发送给 LLM 时添加缓存标记)
-            llm_messages = self._add_cache_control(
-                llm_messages,
-                config.model,
-                config.enable_prompt_caching
-            )
 
 
             # 调用 LLM
             # 调用 LLM
             result = await self.llm_call(
             result = await self.llm_call(
@@ -1511,9 +1515,10 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
         """
         """
         为支持的模型添加 Prompt Caching 标记
         为支持的模型添加 Prompt Caching 标记
 
 
-        策略:
+        策略:固定位置缓存点,提高缓存命中率
         1. system message 添加缓存(如果存在且足够长)
         1. system message 添加缓存(如果存在且足够长)
-        2. 倒数第 3-5 条 user/assistant 消息添加缓存点
+        2. 每 20 条 user/assistant/tool 消息添加一个固定缓存点(位置:20, 40, 60)
+        3. 最多使用 4 个缓存点(含 system)
 
 
         Args:
         Args:
             messages: 原始消息列表
             messages: 原始消息列表
@@ -1535,13 +1540,12 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
         messages = copy.deepcopy(messages)
         messages = copy.deepcopy(messages)
 
 
         # 策略 1: 为 system message 添加缓存
         # 策略 1: 为 system message 添加缓存
+        system_cached = False
         for msg in messages:
         for msg in messages:
             if msg.get("role") == "system":
             if msg.get("role") == "system":
                 content = msg.get("content", "")
                 content = msg.get("content", "")
                 # 只有足够长的 system prompt 才值得缓存(>1024 tokens 约 4000 字符)
                 # 只有足够长的 system prompt 才值得缓存(>1024 tokens 约 4000 字符)
                 if isinstance(content, str) and len(content) > 1000:
                 if isinstance(content, str) and len(content) > 1000:
-                    # Anthropic API 格式:在 content 的最后一个 block 添加 cache_control
-                    # 如果 content 是 string,需要转换为 list 格式
                     msg["content"] = [
                     msg["content"] = [
                         {
                         {
                             "type": "text",
                             "type": "text",
@@ -1549,27 +1553,39 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
                             "cache_control": {"type": "ephemeral"}
                             "cache_control": {"type": "ephemeral"}
                         }
                         }
                     ]
                     ]
+                    system_cached = True
                     logger.debug(f"[Cache] 为 system message 添加缓存标记 (len={len(content)})")
                     logger.debug(f"[Cache] 为 system message 添加缓存标记 (len={len(content)})")
                 break
                 break
 
 
-        # 策略 2: 为倒数第 3-5 条消息添加缓存点
-        # 这样可以缓存大部分历史对话,只有最新的几条消息是新的
+        # 策略 2: 按总消息数计算缓存点(包括 tool 消息)
+        # 但只能在 user/assistant 消息上添加 cache_control
+        total_msgs = len(messages)
+        if total_msgs == 0:
+            return messages
+
+        # 每 20 条总消息添加一个缓存点
+        # 原因:Anthropic 要求每个缓存点至少 1024 tokens
+        # 每 15 条消息约 1050 tokens,太接近边界,改为 20 条确保足够(约 1400 tokens)
+        CACHE_INTERVAL = 20
+        max_cache_points = 3 if system_cached else 4
+
         cache_positions = []
         cache_positions = []
-        user_assistant_msgs = [
-            (i, msg) for i, msg in enumerate(messages)
-            if msg.get("role") in ("user", "assistant")
-        ]
-
-        if len(user_assistant_msgs) >= 5:
-            # 在倒数第 5 条添加缓存点
-            cache_positions.append(user_assistant_msgs[-5][0])
-        elif len(user_assistant_msgs) >= 3:
-            # 在倒数第 3 条添加缓存点
-            cache_positions.append(user_assistant_msgs[-3][0])
+        for i in range(1, max_cache_points + 1):
+            target_pos = i * CACHE_INTERVAL - 1  # 第 20, 40, 60, 80 条
+            if target_pos < total_msgs:
+                # 从 target_pos 往前找最近的 user/assistant 消息
+                for j in range(target_pos, -1, -1):
+                    if messages[j].get("role") in ("user", "assistant"):
+                        cache_positions.append(j)
+                        break
 
 
+        # 应用缓存标记
         for idx in cache_positions:
         for idx in cache_positions:
             msg = messages[idx]
             msg = messages[idx]
             content = msg.get("content", "")
             content = msg.get("content", "")
+            role = msg.get("role", "")
+
+            print(f"[Cache] 尝试为 message[{idx}] (role={role}, content_type={type(content).__name__}) 添加缓存标记")
 
 
             # 处理 string content
             # 处理 string content
             if isinstance(content, str):
             if isinstance(content, str):
@@ -1580,6 +1596,7 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
                         "cache_control": {"type": "ephemeral"}
                         "cache_control": {"type": "ephemeral"}
                     }
                     }
                 ]
                 ]
+                print(f"[Cache] ✓ 为 message[{idx}] ({role}) 添加缓存标记 (str->list)")
                 logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 添加缓存标记")
                 logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 添加缓存标记")
 
 
             # 处理 list content(多模态消息)
             # 处理 list content(多模态消息)
@@ -1588,9 +1605,21 @@ created_at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
                 for i in range(len(content) - 1, -1, -1):
                 for i in range(len(content) - 1, -1, -1):
                     if isinstance(content[i], dict) and content[i].get("type") == "text":
                     if isinstance(content[i], dict) and content[i].get("type") == "text":
                         content[i]["cache_control"] = {"type": "ephemeral"}
                         content[i]["cache_control"] = {"type": "ephemeral"}
+                        print(f"[Cache] ✓ 为 message[{idx}] ({role}) 的 content[{i}] 添加缓存标记 (list)")
                         logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 的 content[{i}] 添加缓存标记")
                         logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 的 content[{i}] 添加缓存标记")
                         break
                         break
+            else:
+                print(f"[Cache] ✗ message[{idx}] ({role}) 的 content 类型不支持: {type(content).__name__}, len={len(content) if isinstance(content, (list, str)) else 'N/A'}")
 
 
+        total_cache_points = len(cache_positions) + (1 if system_cached else 0)
+        print(
+            f"[Cache] 总消息: {len(messages)}, "
+            f"缓存点: {total_cache_points} at positions: {cache_positions}"
+        )
+        logger.debug(
+            f"[Cache] 总消息: {len(messages)}, "
+            f"缓存点: {total_cache_points} at positions: {cache_positions}"
+        )
         return messages
         return messages
 
 
     def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:
     def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:

+ 16 - 0
agent/llm/openrouter.py

@@ -511,6 +511,22 @@ async def _openrouter_anthropic_call(
     if "temperature" in kwargs:
     if "temperature" in kwargs:
         payload["temperature"] = kwargs["temperature"]
         payload["temperature"] = kwargs["temperature"]
 
 
+    # Debug: 检查 cache_control 是否存在
+    cache_control_count = 0
+    if isinstance(system_prompt, list):
+        for block in system_prompt:
+            if isinstance(block, dict) and "cache_control" in block:
+                cache_control_count += 1
+    for msg in anthropic_messages:
+        content = msg.get("content", "")
+        if isinstance(content, list):
+            for block in content:
+                if isinstance(block, dict) and "cache_control" in block:
+                    cache_control_count += 1
+    if cache_control_count > 0:
+        print(f"[OpenRouter/Anthropic] 发现 {cache_control_count} 个 cache_control 标记")
+        logger.info(f"[OpenRouter/Anthropic] 发现 {cache_control_count} 个 cache_control 标记")
+
     headers = {
     headers = {
         "Authorization": f"Bearer {api_key}",
         "Authorization": f"Bearer {api_key}",
         "anthropic-version": "2023-06-01",
         "anthropic-version": "2023-06-01",

+ 3 - 3
examples/how/tool/nanobanana.py

@@ -36,9 +36,9 @@ DEFAULT_IMAGE_PROMPT = (
 
 
 DEFAULT_IMAGE_MODEL_CANDIDATES = [
 DEFAULT_IMAGE_MODEL_CANDIDATES = [
     "google/gemini-2.5-flash-image",
     "google/gemini-2.5-flash-image",
-    "google/gemini-3-pro-image-preview",
-    "black-forest-labs/flux.2-flex",
-    "black-forest-labs/flux.2-pro",
+    # "google/gemini-3-pro-image-preview",
+    # "black-forest-labs/flux.2-flex",
+    # "black-forest-labs/flux.2-pro",
 ]
 ]