2 месяцев назад · 3a5ced184d
--- a/agent/core/runner.py
+++ b/agent/core/runner.py
@@ -64,6 +64,7 @@ class RunConfig:
 
															     enable_memory: bool = True
														
 
															     auto_execute_tools: bool = True
														
 
															     name: Optional[str] = None                 # 显示名称（空则由 utility_llm 自动生成）
														
 
															+    enable_prompt_caching: bool = True         # 启用 Anthropic Prompt Caching（仅 Claude 模型有效）
														
 
															     # --- Trace 控制 ---
														
 
															     trace_id: Optional[str] = None             # None = 新建
														
@@ -680,6 +681,13 @@ class AgentRunner:
 
															                 if context_injection:
														
 
															                     llm_messages.append({"role": "system", "content": context_injection})
														
 
															+            # 应用 Prompt Caching（不修改原始 history，只在发送给 LLM 时添加缓存标记）
														
 
															+            llm_messages = self._add_cache_control(
														
 
															+                llm_messages,
														
 
															+                config.model,
														
 
															+                config.enable_prompt_caching
														
 
															+            )
														
 
															+
														
 
															             # 调用 LLM
														
 
															             result = await self.llm_call(
														
 
															                 messages=llm_messages,
														
@@ -695,6 +703,8 @@ class AgentRunner:
 
															             prompt_tokens = result.get("prompt_tokens", 0)
														
 
															             completion_tokens = result.get("completion_tokens", 0)
														
 
															             step_cost = result.get("cost", 0)
														
 
															+            cache_creation_tokens = result.get("cache_creation_tokens")
														
 
															+            cache_read_tokens = result.get("cache_read_tokens")
														
 
															             # 按需自动创建 root goal
														
 
															             if goal_tree and not goal_tree.goals and tool_calls:
														
@@ -729,6 +739,8 @@ class AgentRunner:
 
															                 content={"text": response_content, "tool_calls": tool_calls},
														
 
															                 prompt_tokens=prompt_tokens,
														
 
															                 completion_tokens=completion_tokens,
														
 
															+                cache_creation_tokens=cache_creation_tokens,
														
 
															+                cache_read_tokens=cache_read_tokens,
														
 
															                 finish_reason=finish_reason,
														
 
															                 cost=step_cost,
														
 
															             )
														
@@ -923,6 +935,13 @@ class AgentRunner:
 
															             reflect_prompt = build_reflect_prompt()
														
 
															             reflect_messages = list(history) + [{"role": "user", "content": reflect_prompt}]
														
 
															+            # 应用 Prompt Caching
														
 
															+            reflect_messages = self._add_cache_control(
														
 
															+                reflect_messages,
														
 
															+                config.model,
														
 
															+                config.enable_prompt_caching
														
 
															+            )
														
 
															+
														
 
															             reflect_result = await self.llm_call(
														
 
															                 messages=reflect_messages,
														
 
															                 model=config.model,
														
@@ -948,6 +967,13 @@ class AgentRunner:
 
															         compress_prompt = build_compression_prompt(goal_tree)
														
 
															         compress_messages = list(history) + [{"role": "user", "content": compress_prompt}]
														
 
															+        # 应用 Prompt Caching
														
 
															+        compress_messages = self._add_cache_control(
														
 
															+            compress_messages,
														
 
															+            config.model,
														
 
															+            config.enable_prompt_caching
														
 
															+        )
														
 
															+
														
 
															         compress_result = await self.llm_call(
														
 
															             messages=compress_messages,
														
 
															             model=config.model,
														
@@ -1288,6 +1314,97 @@ class AgentRunner:
 
															     # ===== 辅助方法 =====
														
 
															+    def _add_cache_control(
														
 
															+        self,
														
 
															+        messages: List[Dict],
														
 
															+        model: str,
														
 
															+        enable: bool
														
 
															+    ) -> List[Dict]:
														
 
															+        """
														
 
															+        为支持的模型添加 Prompt Caching 标记
														
 
															+
														
 
															+        策略：
														
 
															+        1. system message 添加缓存（如果存在且足够长）
														
 
															+        2. 倒数第 3-5 条 user/assistant 消息添加缓存点
														
 
															+
														
 
															+        Args:
														
 
															+            messages: 原始消息列表
														
 
															+            model: 模型名称
														
 
															+            enable: 是否启用缓存
														
 
															+
														
 
															+        Returns:
														
 
															+            添加了 cache_control 的消息列表（深拷贝）
														
 
															+        """
														
 
															+        if not enable:
														
 
															+            return messages
														
 
															+
														
 
															+        # 只对 Claude 模型启用
														
 
															+        if "claude" not in model.lower():
														
 
															+            return messages
														
 
															+
														
 
															+        # 深拷贝避免修改原始数据
														
 
															+        import copy
														
 
															+        messages = copy.deepcopy(messages)
														
 
															+
														
 
															+        # 策略 1: 为 system message 添加缓存
														
 
															+        for msg in messages:
														
 
															+            if msg.get("role") == "system":
														
 
															+                content = msg.get("content", "")
														
 
															+                # 只有足够长的 system prompt 才值得缓存（>1024 tokens 约 4000 字符）
														
 
															+                if isinstance(content, str) and len(content) > 1000:
														
 
															+                    # Anthropic API 格式：在 content 的最后一个 block 添加 cache_control
														
 
															+                    # 如果 content 是 string，需要转换为 list 格式
														
 
															+                    msg["content"] = [
														
 
															+                        {
														
 
															+                            "type": "text",
														
 
															+                            "text": content,
														
 
															+                            "cache_control": {"type": "ephemeral"}
														
 
															+                        }
														
 
															+                    ]
														
 
															+                    logger.debug(f"[Cache] 为 system message 添加缓存标记 (len={len(content)})")
														
 
															+                break
														
 
															+
														
 
															+        # 策略 2: 为倒数第 3-5 条消息添加缓存点
														
 
															+        # 这样可以缓存大部分历史对话，只有最新的几条消息是新的
														
 
															+        cache_positions = []
														
 
															+        user_assistant_msgs = [
														
 
															+            (i, msg) for i, msg in enumerate(messages)
														
 
															+            if msg.get("role") in ("user", "assistant")
														
 
															+        ]
														
 
															+
														
 
															+        if len(user_assistant_msgs) >= 5:
														
 
															+            # 在倒数第 5 条添加缓存点
														
 
															+            cache_positions.append(user_assistant_msgs[-5][0])
														
 
															+        elif len(user_assistant_msgs) >= 3:
														
 
															+            # 在倒数第 3 条添加缓存点
														
 
															+            cache_positions.append(user_assistant_msgs[-3][0])
														
 
															+
														
 
															+        for idx in cache_positions:
														
 
															+            msg = messages[idx]
														
 
															+            content = msg.get("content", "")
														
 
															+
														
 
															+            # 处理 string content
														
 
															+            if isinstance(content, str):
														
 
															+                msg["content"] = [
														
 
															+                    {
														
 
															+                        "type": "text",
														
 
															+                        "text": content,
														
 
															+                        "cache_control": {"type": "ephemeral"}
														
 
															+                    }
														
 
															+                ]
														
 
															+                logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 添加缓存标记")
														
 
															+
														
 
															+            # 处理 list content（多模态消息）
														
 
															+            elif isinstance(content, list) and len(content) > 0:
														
 
															+                # 在最后一个 text block 添加 cache_control
														
 
															+                for i in range(len(content) - 1, -1, -1):
														
 
															+                    if isinstance(content[i], dict) and content[i].get("type") == "text":
														
 
															+                        content[i]["cache_control"] = {"type": "ephemeral"}
														
 
															+                        logger.debug(f"[Cache] 为 message[{idx}] ({msg.get('role')}) 的 content[{i}] 添加缓存标记")
														
 
															+                        break
														
 
															+
														
 
															+        return messages
														
 
															+
														
 
															     def _get_tool_schemas(self, tools: Optional[List[str]]) -> List[Dict]:
														
 
															         """
														
 
															         获取工具 Schema
														
--- a/agent/llm/openrouter.py
+++ b/agent/llm/openrouter.py
@@ -60,11 +60,20 @@ def _parse_openrouter_usage(usage: Dict[str, Any], model: str) -> TokenUsage:
 
															     # OpenRouter 通常返回 OpenAI 格式，但可能包含额外字段
														
 
															     if provider == "anthropic":
														
 
															         # Claude 模型可能有缓存字段
														
 
															+        # OpenRouter 使用 prompt_tokens_details 嵌套结构
														
 
															+        prompt_details = usage.get("prompt_tokens_details", {})
														
 
															+
														
 
															+        # 调试：打印原始 usage
														
 
															+        if logger.isEnabledFor(logging.DEBUG):
														
 
															+            logger.debug(f"[OpenRouter] Raw usage: {usage}")
														
 
															+            logger.debug(f"[OpenRouter] prompt_tokens_details: {prompt_details}")
														
 
															+
														
 
															         return TokenUsage(
														
 
															             input_tokens=usage.get("prompt_tokens") or usage.get("input_tokens", 0),
														
 
															             output_tokens=usage.get("completion_tokens") or usage.get("output_tokens", 0),
														
 
															-            cache_creation_tokens=usage.get("cache_creation_input_tokens", 0),
														
 
															-            cache_read_tokens=usage.get("cache_read_input_tokens", 0),
														
 
															+            # OpenRouter 格式：prompt_tokens_details.cached_tokens / cache_write_tokens
														
 
															+            cache_read_tokens=prompt_details.get("cached_tokens", 0),
														
 
															+            cache_creation_tokens=prompt_details.get("cache_write_tokens", 0),
														
 
															         )
														
 
															     elif provider == "deepseek":
														
 
															         # DeepSeek 可能有 reasoning_tokens
														
@@ -180,6 +189,15 @@ async def openrouter_llm_call(
 
															     if "max_tokens" in kwargs:
														
 
															         payload["max_tokens"] = kwargs["max_tokens"]
														
 
															+    # 对于 Anthropic 模型，锁定 provider 以确保缓存生效
														
 
															+    if "anthropic" in model.lower() or "claude" in model.lower():
														
 
															+        payload["provider"] = {
														
 
															+            "only": ["Anthropic"],
														
 
															+            "allow_fallbacks": False,
														
 
															+            "require_parameters": True
														
 
															+        }
														
 
															+        logger.debug("[OpenRouter] Locked provider to Anthropic for caching support")
														
 
															+
														
 
															     # OpenRouter 特定参数
														
 
															     headers = {
														
 
															         "Authorization": f"Bearer {api_key}",
														
--- a/agent/trace/models.py
+++ b/agent/trace/models.py
@@ -405,11 +405,11 @@ class Message:
 
															         # 只添加非空的可选字段
														
 
															         if self.abandoned_at:
														
 
															             result["abandoned_at"] = self.abandoned_at.isoformat()
														
 
															-        if self.reasoning_tokens:
														
 
															+        if self.reasoning_tokens is not None:
														
 
															             result["reasoning_tokens"] = self.reasoning_tokens
														
 
															-        if self.cache_creation_tokens:
														
 
															+        if self.cache_creation_tokens is not None:
														
 
															             result["cache_creation_tokens"] = self.cache_creation_tokens
														
 
															-        if self.cache_read_tokens:
														
 
															+        if self.cache_read_tokens is not None:
														
 
															             result["cache_read_tokens"] = self.cache_read_tokens
														
 
															         return result
														
--- a/examples/test_cache/run.py
+++ b/examples/test_cache/run.py
@@ -0,0 +1,128 @@
 
															+"""
														
 
															+测试 Prompt Caching 功能
														
 
															+"""
														
 
															+
														
 
															+import asyncio
														
 
															+import os
														
 
															+import sys
														
 
															+from pathlib import Path
														
 
															+
														
 
															+# 添加项目根目录到 Python 路径
														
 
															+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
														
 
															+
														
 
															+from dotenv import load_dotenv
														
 
															+load_dotenv()
														
 
															+
														
 
															+import logging
														
 
															+# 开启 DEBUG 日志查看缓存标记
														
 
															+logging.basicConfig(level=logging.DEBUG)
														
 
															+
														
 
															+from agent.core.runner import AgentRunner, RunConfig
														
 
															+from agent.trace import FileSystemTraceStore, Trace, Message
														
 
															+from agent.llm import create_openrouter_llm_call
														
 
															+
														
 
															+async def main():
														
 
															+    print("=" * 60)
														
 
															+    print("测试 Prompt Caching 功能")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+
														
 
															+    # 路径配置
														
 
															+    base_dir = Path(__file__).parent
														
 
															+    project_root = base_dir.parent.parent
														
 
															+    trace_dir = project_root / ".trace"
														
 
															+
														
 
															+    # 创建 Runner
														
 
															+    runner = AgentRunner(
														
 
															+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
														
 
															+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
														
 
															+        debug=True
														
 
															+    )
														
 
															+
														
 
															+    # 准备测试消息（足够长的 system prompt）
														
 
															+    system_prompt = """你是一个专业的 AI 助手。
														
 
															+
														
 
															+## 核心能力
														
 
															+- 代码分析和生成
														
 
															+- 问题解决和调试
														
 
															+- 技术文档编写
														
 
															+- 架构设计建议
														
 
															+
														
 
															+## 工作原则
														
 
															+1. 准确性优先：确保提供的信息和代码是正确的
														
 
															+2. 清晰表达：用简洁明了的语言解释复杂概念
														
 
															+3. 实用导向：提供可直接使用的解决方案
														
 
															+4. 持续学习：根据反馈不断改进
														
 
															+
														
 
															+## 技术栈
														
 
															+- Python, JavaScript, TypeScript
														
 
															+- React, Vue, Node.js
														
 
															+- Docker, Kubernetes
														
 
															+- PostgreSQL, MongoDB, Redis
														
 
															+- AWS, GCP, Azure
														
 
															+
														
 
															+这是一个足够长的 system prompt，用于测试 Anthropic Prompt Caching 功能。
														
 
															+缓存需要至少 1024 tokens 才能生效，所以我们需要让这个 prompt 足够长。
														
 
															+""" * 3  # 重复 3 次确保足够长
														
 
															+
														
 
															+    messages = [
														
 
															+        {"role": "user", "content": "请简单介绍一下 Python 的特点，用 3 句话概括"}
														
 
															+    ]
														
 
															+
														
 
															+    print("第一次调用（创建缓存）...")
														
 
															+    print("-" * 60)
														
 
															+
														
 
															+    trace_id = None
														
 
															+    iteration = 0
														
 
															+
														
 
															+    async for item in runner.run(
														
 
															+        messages=messages,
														
 
															+        config=RunConfig(
														
 
															+            system_prompt=system_prompt,
														
 
															+            model="anthropic/claude-sonnet-4.5",
														
 
															+            temperature=0.3,
														
 
															+            max_iterations=3,
														
 
															+            enable_prompt_caching=True,  # 启用缓存
														
 
															+            name="缓存测试"
														
 
															+        )
														
 
															+    ):
														
 
															+        if isinstance(item, Trace):
														
 
															+            trace_id = item.trace_id
														
 
															+            if item.status == "completed":
														
 
															+                print(f"\n✓ Trace 完成")
														
 
															+                print(f"  Total tokens: {item.total_tokens}")
														
 
															+                print(f"  Total cost: ${item.total_cost:.6f}")
														
 
															+
														
 
															+        elif isinstance(item, Message):
														
 
															+            if item.role == "assistant":
														
 
															+                iteration += 1
														
 
															+                print(f"\n[Iteration {iteration}]")
														
 
															+                print(f"  Prompt tokens: {item.prompt_tokens}")
														
 
															+                print(f"  Completion tokens: {item.completion_tokens}")
														
 
															+                print(f"  Cache creation: {item.cache_creation_tokens}")
														
 
															+                print(f"  Cache read: {item.cache_read_tokens}")
														
 
															+                print(f"  Cost: ${item.cost:.6f}")
														
 
															+
														
 
															+                content = item.content
														
 
															+                if isinstance(content, dict):
														
 
															+                    text = content.get("text", "")
														
 
															+                    if text:
														
 
															+                        preview = text[:100] + "..." if len(text) > 100 else text
														
 
															+                        print(f"  Response: {preview}")
														
 
															+
														
 
															+    print()
														
 
															+    print("=" * 60)
														
 
															+    print("测试完成")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+
														
 
															+    if trace_id:
														
 
															+        print("验证要点：")
														
 
															+        print("1. 第一次调用应该有 cache_creation_tokens > 0")
														
 
															+        print("2. 后续调用应该有 cache_read_tokens > 0")
														
 
															+        print("3. cache_read_tokens 的成本应该是正常 input tokens 的 10%")
														
 
															+        print()
														
 
															+        print(f"Trace ID: {trace_id}")
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    asyncio.run(main())
														
--- a/examples/test_cache/run_multi.py
+++ b/examples/test_cache/run_multi.py
@@ -0,0 +1,138 @@
 
															+"""
														
 
															+测试多轮对话的 Prompt Caching
														
 
															+"""
														
 
															+
														
 
															+import asyncio
														
 
															+import os
														
 
															+import sys
														
 
															+from pathlib import Path
														
 
															+
														
 
															+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
														
 
															+
														
 
															+from dotenv import load_dotenv
														
 
															+load_dotenv()
														
 
															+
														
 
															+from agent.core.runner import AgentRunner, RunConfig
														
 
															+from agent.trace import FileSystemTraceStore, Trace, Message
														
 
															+from agent.llm import create_openrouter_llm_call
														
 
															+
														
 
															+async def main():
														
 
															+    print("=" * 60)
														
 
															+    print("测试多轮对话 Prompt Caching")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+
														
 
															+    base_dir = Path(__file__).parent
														
 
															+    project_root = base_dir.parent.parent
														
 
															+    trace_dir = project_root / ".trace"
														
 
															+
														
 
															+    runner = AgentRunner(
														
 
															+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
														
 
															+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
														
 
															+        debug=True
														
 
															+    )
														
 
															+
														
 
															+    # 超长 system prompt 确保 >1024 tokens
														
 
															+    system_prompt = """你是一个专业的 AI 助手，专注于帮助用户解决技术问题。
														
 
															+
														
 
															+## 核心能力
														
 
															+- 代码分析和生成
														
 
															+- 问题解决和调试
														
 
															+- 技术文档编写
														
 
															+- 架构设计建议
														
 
															+- 性能优化建议
														
 
															+- 安全审计
														
 
															+
														
 
															+## 工作原则
														
 
															+1. 准确性优先：确保提供的信息和代码是正确的
														
 
															+2. 清晰表达：用简洁明了的语言解释复杂概念
														
 
															+3. 实用导向：提供可直接使用的解决方案
														
 
															+4. 持续学习：根据反馈不断改进
														
 
															+5. 安全意识：始终考虑安全性和最佳实践
														
 
															+6. 性能考虑：提供高效的解决方案
														
 
															+
														
 
															+## 技术栈
														
 
															+- 编程语言：Python, JavaScript, TypeScript, Go, Rust, Java
														
 
															+- 前端框架：React, Vue, Angular, Svelte
														
 
															+- 后端框架：Node.js, Django, Flask, FastAPI, Spring Boot
														
 
															+- 数据库：PostgreSQL, MongoDB, Redis, MySQL, Elasticsearch
														
 
															+- 云平台：AWS, GCP, Azure
														
 
															+- DevOps：Docker, Kubernetes, CI/CD, Terraform
														
 
															+- 机器学习：TensorFlow, PyTorch, scikit-learn
														
 
															+
														
 
															+## 响应格式
														
 
															+- 提供清晰的步骤说明
														
 
															+- 包含代码示例
														
 
															+- 解释关键概念
														
 
															+- 指出潜在问题
														
 
															+- 给出最佳实践建议
														
 
															+
														
 
															+这是一个足够长的 system prompt，用于测试 Anthropic Prompt Caching 功能。
														
 
															+缓存需要至少 1024 tokens 才能生效，所以我们需要让这个 prompt 足够长。
														
 
															+""" * 5  # 重复 5 次确保足够长
														
 
															+
														
 
															+    messages = [
														
 
															+        {"role": "user", "content": "请用一句话介绍 Python"}
														
 
															+    ]
														
 
															+
														
 
															+    print("开始多轮对话测试...")
														
 
															+    print("-" * 60)
														
 
															+
														
 
															+    trace_id = None
														
 
															+    iteration = 0
														
 
															+
														
 
															+    async for item in runner.run(
														
 
															+        messages=messages,
														
 
															+        config=RunConfig(
														
 
															+            system_prompt=system_prompt,
														
 
															+            model="anthropic/claude-sonnet-4.5",
														
 
															+            temperature=0.3,
														
 
															+            max_iterations=5,  # 多轮对话
														
 
															+            enable_prompt_caching=True,
														
 
															+            name="多轮缓存测试"
														
 
															+        )
														
 
															+    ):
														
 
															+        if isinstance(item, Trace):
														
 
															+            trace_id = item.trace_id
														
 
															+            if item.status == "completed":
														
 
															+                print(f"\n✓ Trace 完成")
														
 
															+                print(f"  Total messages: {item.total_messages}")
														
 
															+                print(f"  Total tokens: {item.total_tokens}")
														
 
															+                print(f"  Total cache creation: {item.total_cache_creation_tokens}")
														
 
															+                print(f"  Total cache read: {item.total_cache_read_tokens}")
														
 
															+                print(f"  Total cost: ${item.total_cost:.6f}")
														
 
															+
														
 
															+        elif isinstance(item, Message):
														
 
															+            if item.role == "assistant":
														
 
															+                iteration += 1
														
 
															+                print(f"\n[Iteration {iteration}]")
														
 
															+                print(f"  Prompt tokens: {item.prompt_tokens}")
														
 
															+                print(f"  Completion tokens: {item.completion_tokens}")
														
 
															+                print(f"  Cache creation: {item.cache_creation_tokens}")
														
 
															+                print(f"  Cache read: {item.cache_read_tokens}")
														
 
															+                print(f"  Cost: ${item.cost:.6f}")
														
 
															+
														
 
															+                content = item.content
														
 
															+                if isinstance(content, dict):
														
 
															+                    text = content.get("text", "")
														
 
															+                    tool_calls = content.get("tool_calls")
														
 
															+                    if text and not tool_calls:
														
 
															+                        preview = text[:80] + "..." if len(text) > 80 else text
														
 
															+                        print(f"  Response: {preview}")
														
 
															+                    if tool_calls:
														
 
															+                        print(f"  Tool calls: {len(tool_calls)}")
														
 
															+
														
 
															+    print()
														
 
															+    print("=" * 60)
														
 
															+    print("测试完成")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+
														
 
															+    if trace_id:
														
 
															+        print("分析：")
														
 
															+        print("- 第 1 次调用：应该有 cache_creation_tokens > 0（创建缓存）")
														
 
															+        print("- 第 2+ 次调用：应该有 cache_read_tokens > 0（命中缓存）")
														
 
															+        print(f"\nTrace ID: {trace_id}")
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    asyncio.run(main())
														
--- a/examples/test_cache/run_same_trace.py
+++ b/examples/test_cache/run_same_trace.py
@@ -0,0 +1,259 @@
 
															+"""
														
 
															+在同一个 Trace 内测试 Prompt Caching
														
 
															+
														
 
															+测试场景：
														
 
															+1. 第一轮对话：创建缓存（system prompt + 工具定义）
														
 
															+2. 第二轮对话：命中缓存（system prompt + 工具定义 + 第一轮历史）
														
 
															+3. 第三轮对话：命中更多缓存（system prompt + 工具定义 + 前两轮历史）
														
 
															+"""
														
 
															+
														
 
															+import asyncio
														
 
															+import os
														
 
															+import sys
														
 
															+from pathlib import Path
														
 
															+
														
 
															+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
														
 
															+
														
 
															+from dotenv import load_dotenv
														
 
															+load_dotenv()
														
 
															+
														
 
															+import logging
														
 
															+logging.basicConfig(level=logging.DEBUG)
														
 
															+
														
 
															+from agent.core.runner import AgentRunner, RunConfig
														
 
															+from agent.trace import FileSystemTraceStore, Trace, Message
														
 
															+from agent.llm import create_openrouter_llm_call
														
 
															+
														
 
															+async def main():
														
 
															+    print("=" * 60)
														
 
															+    print("同一 Trace 内的 Prompt Caching 测试")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+
														
 
															+    base_dir = Path(__file__).parent
														
 
															+    project_root = base_dir.parent.parent
														
 
															+    trace_dir = project_root / ".trace"
														
 
															+
														
 
															+    runner = AgentRunner(
														
 
															+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
														
 
															+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
														
 
															+        debug=True
														
 
															+    )
														
 
															+
														
 
															+    # 构造 >1500 tokens 的稳定前缀
														
 
															+    stable_prefix = """你是一个专业的 AI 技术顾问，专注于软件工程和系统架构。
														
 
															+
														
 
															+## 核心专业领域
														
 
															+
														
 
															+### 1. 编程语言与框架
														
 
															+- **Python**: Django, Flask, FastAPI, Celery, SQLAlchemy, Pandas, NumPy
														
 
															+- **JavaScript/TypeScript**: React, Vue, Angular, Node.js, Express, NestJS
														
 
															+- **Go**: Gin, Echo, gRPC, Cobra
														
 
															+- **Rust**: Actix, Rocket, Tokio
														
 
															+- **Java**: Spring Boot, Hibernate, Maven, Gradle
														
 
															+
														
 
															+### 2. 数据库技术
														
 
															+- **关系型数据库**: PostgreSQL, MySQL, Oracle, SQL Server
														
 
															+- **NoSQL 数据库**: MongoDB, Redis, Cassandra, DynamoDB
														
 
															+- **时序数据库**: InfluxDB, TimescaleDB
														
 
															+- **图数据库**: Neo4j, ArangoDB
														
 
															+- **搜索引擎**: Elasticsearch, Solr
														
 
															+
														
 
															+### 3. 云平台与基础设施
														
 
															+- **AWS**: EC2, S3, Lambda, RDS, DynamoDB, CloudFormation, ECS, EKS
														
 
															+- **GCP**: Compute Engine, Cloud Storage, Cloud Functions, BigQuery, GKE
														
 
															+- **Azure**: Virtual Machines, Blob Storage, Functions, Cosmos DB, AKS
														
 
															+- **容器化**: Docker, Docker Compose, Podman
														
 
															+- **编排**: Kubernetes, Helm, Istio, Linkerd
														
 
															+
														
 
															+### 4. DevOps 与 CI/CD
														
 
															+- **版本控制**: Git, GitHub, GitLab, Bitbucket
														
 
															+- **CI/CD**: Jenkins, GitLab CI, GitHub Actions, CircleCI, Travis CI
														
 
															+- **配置管理**: Ansible, Terraform, Puppet, Chef
														
 
															+- **监控告警**: Prometheus, Grafana, ELK Stack, Datadog, New Relic
														
 
															+- **日志管理**: Fluentd, Logstash, Loki
														
 
															+
														
 
															+### 5. 架构模式
														
 
															+- **微服务架构**: 服务拆分、API 网关、服务发现、熔断降级
														
 
															+- **事件驱动架构**: 消息队列、事件溯源、CQRS
														
 
															+- **Serverless 架构**: FaaS、BaaS、无服务器框架
														
 
															+- **分布式系统**: CAP 理论、一致性协议、分布式事务
														
 
															+- **高可用设计**: 负载均衡、故障转移、灾备恢复
														
 
															+
														
 
															+### 6. 安全最佳实践
														
 
															+- **认证授权**: OAuth 2.0, JWT, SAML, OpenID Connect
														
 
															+- **加密技术**: TLS/SSL, AES, RSA, 哈希算法
														
 
															+- **安全审计**: 漏洞扫描、渗透测试、安全合规
														
 
															+- **数据保护**: 数据脱敏、访问控制、审计日志
														
 
															+
														
 
															+### 7. 性能优化
														
 
															+- **缓存策略**: Redis, Memcached, CDN, 浏览器缓存
														
 
															+- **数据库优化**: 索引设计、查询优化、分库分表
														
 
															+- **代码优化**: 算法复杂度、并发编程、异步处理
														
 
															+- **系统调优**: 负载测试、性能分析、资源监控
														
 
															+
														
 
															+### 8. 机器学习与 AI
														
 
															+- **深度学习框架**: TensorFlow, PyTorch, Keras
														
 
															+- **模型部署**: TensorFlow Serving, TorchServe, ONNX
														
 
															+- **MLOps**: MLflow, Kubeflow, SageMaker
														
 
															+- **自然语言处理**: Transformers, BERT, GPT, LangChain
														
 
															+
														
 
															+## 工作原则
														
 
															+
														
 
															+1. **准确性优先**: 提供经过验证的技术方案，避免误导
														
 
															+2. **实用导向**: 给出可直接应用的代码示例和配置
														
 
															+3. **最佳实践**: 遵循行业标准和社区共识
														
 
															+4. **安全意识**: 始终考虑安全性和隐私保护
														
 
															+5. **性能考虑**: 关注系统性能和资源效率
														
 
															+6. **可维护性**: 代码清晰、文档完善、易于扩展
														
 
															+7. **成本意识**: 平衡技术方案与成本投入
														
 
															+
														
 
															+## 响应格式
														
 
															+
														
 
															+### 问题分析
														
 
															+- 理解用户需求和上下文
														
 
															+- 识别关键技术挑战
														
 
															+- 评估可行性和风险
														
 
															+
														
 
															+### 解决方案
														
 
															+- 提供清晰的实现步骤
														
 
															+- 包含完整的代码示例
														
 
															+- 解释关键技术点
														
 
															+- 指出潜在问题和注意事项
														
 
															+
														
 
															+### 最佳实践建议
														
 
															+- 性能优化建议
														
 
															+- 安全加固措施
														
 
															+- 可扩展性考虑
														
 
															+- 运维监控方案
														
 
															+
														
 
															+### 替代方案
														
 
															+- 列出其他可行方案
														
 
															+- 对比优缺点
														
 
															+- 给出选择建议
														
 
															+
														
 
															+## 技术栈版本参考
														
 
															+
														
 
															+- Python: 3.11+
														
 
															+- Node.js: 20 LTS
														
 
															+- PostgreSQL: 15+
														
 
															+- Redis: 7+
														
 
															+- Kubernetes: 1.28+
														
 
															+- Docker: 24+
														
 
															+
														
 
															+这是一个足够长且稳定的 system prompt，用于测试 Anthropic Prompt Caching。
														
 
															+此内容在所有请求中保持完全一致，以确保缓存能够命中。
														
 
															+Version: 3.0
														
 
															+""" * 2  # 重复 2 次，确保 >1500 tokens
														
 
															+
														
 
															+    print(f"System prompt 长度: {len(stable_prefix)} 字符")
														
 
															+    print(f"预估 tokens: ~{len(stable_prefix) // 4}")
														
 
															+    print()
														
 
															+
														
 
															+    trace_id = None
														
 
															+
														
 
															+    # 第一轮对话
														
 
															+    print("=" * 60)
														
 
															+    print("第 1 轮对话：创建缓存")
														
 
															+    print("=" * 60)
														
 
															+
														
 
															+    async for item in runner.run(
														
 
															+        messages=[{"role": "user", "content": "请用一句话介绍 Python"}],
														
 
															+        config=RunConfig(
														
 
															+            system_prompt=stable_prefix,
														
 
															+            model="anthropic/claude-sonnet-4.5",
														
 
															+            temperature=0.3,
														
 
															+            max_iterations=1,
														
 
															+            enable_prompt_caching=True,
														
 
															+            name="同一Trace缓存测试"
														
 
															+        )
														
 
															+    ):
														
 
															+        if isinstance(item, Trace):
														
 
															+            trace_id = item.trace_id
														
 
															+            if item.status == "completed":
														
 
															+                print(f"\n✓ 第 1 轮完成")
														
 
															+                print(f"  Total tokens: {item.total_tokens}")
														
 
															+                print(f"  Cache write: {item.total_cache_creation_tokens}")
														
 
															+                print(f"  Cache read: {item.total_cache_read_tokens}")
														
 
															+                print(f"  Cost: ${item.total_cost:.6f}")
														
 
															+        elif isinstance(item, Message) and item.role == "assistant":
														
 
															+            print(f"\n[Response] {item.content.get('text', '')[:100]}...")
														
 
															+            print(f"  Prompt tokens: {item.prompt_tokens}")
														
 
															+            print(f"  Cache write: {item.cache_creation_tokens}")
														
 
															+            print(f"  Cache read: {item.cache_read_tokens}")
														
 
															+
														
 
															+    print("\n等待 2 秒...")
														
 
															+    await asyncio.sleep(2)
														
 
															+
														
 
															+    # 第二轮对话（续跑同一个 trace）
														
 
															+    print("\n" + "=" * 60)
														
 
															+    print("第 2 轮对话：应该命中缓存（system + 第1轮历史）")
														
 
															+    print("=" * 60)
														
 
															+
														
 
															+    async for item in runner.run(
														
 
															+        messages=[{"role": "user", "content": "请用一句话介绍 JavaScript"}],
														
 
															+        config=RunConfig(
														
 
															+            trace_id=trace_id,  # 续跑同一个 trace
														
 
															+            system_prompt=stable_prefix,
														
 
															+            model="anthropic/claude-sonnet-4.5",
														
 
															+            temperature=0.3,
														
 
															+            max_iterations=1,
														
 
															+            enable_prompt_caching=True,
														
 
															+        )
														
 
															+    ):
														
 
															+        if isinstance(item, Trace) and item.status == "completed":
														
 
															+            print(f"\n✓ 第 2 轮完成")
														
 
															+            print(f"  Total tokens: {item.total_tokens}")
														
 
															+            print(f"  Cache write: {item.total_cache_creation_tokens}")
														
 
															+            print(f"  Cache read: {item.total_cache_read_tokens}")
														
 
															+            print(f"  Cost: ${item.total_cost:.6f}")
														
 
															+        elif isinstance(item, Message) and item.role == "assistant":
														
 
															+            print(f"\n[Response] {item.content.get('text', '')[:100]}...")
														
 
															+            print(f"  Prompt tokens: {item.prompt_tokens}")
														
 
															+            print(f"  Cache write: {item.cache_creation_tokens}")
														
 
															+            print(f"  Cache read: {item.cache_read_tokens}")
														
 
															+
														
 
															+    print("\n等待 2 秒...")
														
 
															+    await asyncio.sleep(2)
														
 
															+
														
 
															+    # 第三轮对话（续跑同一个 trace）
														
 
															+    print("\n" + "=" * 60)
														
 
															+    print("第 3 轮对话：应该命中更多缓存（system + 前2轮历史）")
														
 
															+    print("=" * 60)
														
 
															+
														
 
															+    async for item in runner.run(
														
 
															+        messages=[{"role": "user", "content": "请用一句话介绍 Go"}],
														
 
															+        config=RunConfig(
														
 
															+            trace_id=trace_id,  # 续跑同一个 trace
														
 
															+            system_prompt=stable_prefix,
														
 
															+            model="anthropic/claude-sonnet-4.5",
														
 
															+            temperature=0.3,
														
 
															+            max_iterations=1,
														
 
															+            enable_prompt_caching=True,
														
 
															+        )
														
 
															+    ):
														
 
															+        if isinstance(item, Trace) and item.status == "completed":
														
 
															+            print(f"\n✓ 第 3 轮完成")
														
 
															+            print(f"  Total tokens: {item.total_tokens}")
														
 
															+            print(f"  Cache write: {item.total_cache_creation_tokens}")
														
 
															+            print(f"  Cache read: {item.total_cache_read_tokens}")
														
 
															+            print(f"  Cost: ${item.total_cost:.6f}")
														
 
															+        elif isinstance(item, Message) and item.role == "assistant":
														
 
															+            print(f"\n[Response] {item.content.get('text', '')[:100]}...")
														
 
															+            print(f"  Prompt tokens: {item.prompt_tokens}")
														
 
															+            print(f"  Cache write: {item.cache_creation_tokens}")
														
 
															+            print(f"  Cache read: {item.cache_read_tokens}")
														
 
															+
														
 
															+    print("\n" + "=" * 60)
														
 
															+    print("测试完成")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+    print("预期结果：")
														
 
															+    print("- 第 1 轮：cache_write > 0（创建缓存）")
														
 
															+    print("- 第 2 轮：cache_read > 0（命中 system prompt 缓存）")
														
 
															+    print("- 第 3 轮：cache_read 更大（命中 system + 历史消息缓存）")
														
 
															+    print()
														
 
															+    print(f"Trace ID: {trace_id}")
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    asyncio.run(main())
														
--- a/examples/test_cache/run_strict.py
+++ b/examples/test_cache/run_strict.py
@@ -0,0 +1,244 @@
 
															+"""
														
 
															+严格的 Prompt Caching 验证测试
														
 
															+
														
 
															+按照 OpenRouter + Anthropic 的规范：
														
 
															+1. 使用 prompt_tokens_details.cached_tokens / cache_write_tokens
														
 
															+2. 锁定 provider 为 Anthropic
														
 
															+3. 使用 >1500 tokens 的稳定前缀
														
 
															+4. 在 5 分钟内多次请求
														
 
															+"""
														
 
															+
														
 
															+import asyncio
														
 
															+import os
														
 
															+import sys
														
 
															+from pathlib import Path
														
 
															+import time
														
 
															+
														
 
															+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
														
 
															+
														
 
															+from dotenv import load_dotenv
														
 
															+load_dotenv()
														
 
															+
														
 
															+import logging
														
 
															+logging.basicConfig(level=logging.DEBUG)
														
 
															+
														
 
															+from agent.core.runner import AgentRunner, RunConfig
														
 
															+from agent.trace import FileSystemTraceStore, Trace, Message
														
 
															+from agent.llm import create_openrouter_llm_call
														
 
															+
														
 
															+async def main():
														
 
															+    print("=" * 60)
														
 
															+    print("严格的 Prompt Caching 验证测试")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+
														
 
															+    base_dir = Path(__file__).parent
														
 
															+    project_root = base_dir.parent.parent
														
 
															+    trace_dir = project_root / ".trace"
														
 
															+
														
 
															+    runner = AgentRunner(
														
 
															+        trace_store=FileSystemTraceStore(base_path=str(trace_dir)),
														
 
															+        llm_call=create_openrouter_llm_call(model="anthropic/claude-sonnet-4.5"),
														
 
															+        debug=True
														
 
															+    )
														
 
															+
														
 
															+    # 构造 >1500 tokens 的稳定前缀（约 6000 字符）
														
 
															+    # 这段内容在所有请求中完全不变
														
 
															+    stable_prefix = """你是一个专业的 AI 技术顾问，专注于软件工程和系统架构。
														
 
															+
														
 
															+## 核心专业领域
														
 
															+
														
 
															+### 1. 编程语言与框架
														
 
															+- **Python**: Django, Flask, FastAPI, Celery, SQLAlchemy, Pandas, NumPy
														
 
															+- **JavaScript/TypeScript**: React, Vue, Angular, Node.js, Express, NestJS
														
 
															+- **Go**: Gin, Echo, gRPC, Cobra
														
 
															+- **Rust**: Actix, Rocket, Tokio
														
 
															+- **Java**: Spring Boot, Hibernate, Maven, Gradle
														
 
															+
														
 
															+### 2. 数据库技术
														
 
															+- **关系型数据库**: PostgreSQL, MySQL, Oracle, SQL Server
														
 
															+- **NoSQL 数据库**: MongoDB, Redis, Cassandra, DynamoDB
														
 
															+- **时序数据库**: InfluxDB, TimescaleDB
														
 
															+- **图数据库**: Neo4j, ArangoDB
														
 
															+- **搜索引擎**: Elasticsearch, Solr
														
 
															+
														
 
															+### 3. 云平台与基础设施
														
 
															+- **AWS**: EC2, S3, Lambda, RDS, DynamoDB, CloudFormation, ECS, EKS
														
 
															+- **GCP**: Compute Engine, Cloud Storage, Cloud Functions, BigQuery, GKE
														
 
															+- **Azure**: Virtual Machines, Blob Storage, Functions, Cosmos DB, AKS
														
 
															+- **容器化**: Docker, Docker Compose, Podman
														
 
															+- **编排**: Kubernetes, Helm, Istio, Linkerd
														
 
															+
														
 
															+### 4. DevOps 与 CI/CD
														
 
															+- **版本控制**: Git, GitHub, GitLab, Bitbucket
														
 
															+- **CI/CD**: Jenkins, GitLab CI, GitHub Actions, CircleCI, Travis CI
														
 
															+- **配置管理**: Ansible, Terraform, Puppet, Chef
														
 
															+- **监控告警**: Prometheus, Grafana, ELK Stack, Datadog, New Relic
														
 
															+- **日志管理**: Fluentd, Logstash, Loki
														
 
															+
														
 
															+### 5. 架构模式
														
 
															+- **微服务架构**: 服务拆分、API 网关、服务发现、熔断降级
														
 
															+- **事件驱动架构**: 消息队列、事件溯源、CQRS
														
 
															+- **Serverless 架构**: FaaS、BaaS、无服务器框架
														
 
															+- **分布式系统**: CAP 理论、一致性协议、分布式事务
														
 
															+- **高可用设计**: 负载均衡、故障转移、灾备恢复
														
 
															+
														
 
															+### 6. 安全最佳实践
														
 
															+- **认证授权**: OAuth 2.0, JWT, SAML, OpenID Connect
														
 
															+- **加密技术**: TLS/SSL, AES, RSA, 哈希算法
														
 
															+- **安全审计**: 漏洞扫描、渗透测试、安全合规
														
 
															+- **数据保护**: 数据脱敏、访问控制、审计日志
														
 
															+
														
 
															+### 7. 性能优化
														
 
															+- **缓存策略**: Redis, Memcached, CDN, 浏览器缓存
														
 
															+- **数据库优化**: 索引设计、查询优化、分库分表
														
 
															+- **代码优化**: 算法复杂度、并发编程、异步处理
														
 
															+- **系统调优**: 负载测试、性能分析、资源监控
														
 
															+
														
 
															+### 8. 机器学习与 AI
														
 
															+- **深度学习框架**: TensorFlow, PyTorch, Keras
														
 
															+- **模型部署**: TensorFlow Serving, TorchServe, ONNX
														
 
															+- **MLOps**: MLflow, Kubeflow, SageMaker
														
 
															+- **自然语言处理**: Transformers, BERT, GPT, LangChain
														
 
															+
														
 
															+## 工作原则
														
 
															+
														
 
															+1. **准确性优先**: 提供经过验证的技术方案，避免误导
														
 
															+2. **实用导向**: 给出可直接应用的代码示例和配置
														
 
															+3. **最佳实践**: 遵循行业标准和社区共识
														
 
															+4. **安全意识**: 始终考虑安全性和隐私保护
														
 
															+5. **性能考虑**: 关注系统性能和资源效率
														
 
															+6. **可维护性**: 代码清晰、文档完善、易于扩展
														
 
															+7. **成本意识**: 平衡技术方案与成本投入
														
 
															+
														
 
															+## 响应格式
														
 
															+
														
 
															+### 问题分析
														
 
															+- 理解用户需求和上下文
														
 
															+- 识别关键技术挑战
														
 
															+- 评估可行性和风险
														
 
															+
														
 
															+### 解决方案
														
 
															+- 提供清晰的实现步骤
														
 
															+- 包含完整的代码示例
														
 
															+- 解释关键技术点
														
 
															+- 指出潜在问题和注意事项
														
 
															+
														
 
															+### 最佳实践建议
														
 
															+- 性能优化建议
														
 
															+- 安全加固措施
														
 
															+- 可扩展性考虑
														
 
															+- 运维监控方案
														
 
															+
														
 
															+### 替代方案
														
 
															+- 列出其他可行方案
														
 
															+- 对比优缺点
														
 
															+- 给出选择建议
														
 
															+
														
 
															+## 技术栈版本参考
														
 
															+
														
 
															+- Python: 3.11+
														
 
															+- Node.js: 20 LTS
														
 
															+- PostgreSQL: 15+
														
 
															+- Redis: 7+
														
 
															+- Kubernetes: 1.28+
														
 
															+- Docker: 24+
														
 
															+
														
 
															+这是一个足够长且稳定的 system prompt，用于测试 Anthropic Prompt Caching。
														
 
															+此内容在所有请求中保持完全一致，以确保缓存能够命中。
														
 
															+Version: 2.0
														
 
															+""" * 2  # 重复 2 次，确保 >1500 tokens
														
 
															+
														
 
															+    print(f"System prompt 长度: {len(stable_prefix)} 字符")
														
 
															+    print(f"预估 tokens: ~{len(stable_prefix) // 4}")
														
 
															+    print()
														
 
															+
														
 
															+    # 第一次请求：创建缓存
														
 
															+    print("=" * 60)
														
 
															+    print("第 1 次请求：创建缓存")
														
 
															+    print("=" * 60)
														
 
															+
														
 
															+    messages1 = [
														
 
															+        {"role": "user", "content": "请用一句话介绍 Python"}
														
 
															+    ]
														
 
															+
														
 
															+    trace_id_1 = None
														
 
															+    async for item in runner.run(
														
 
															+        messages=messages1,
														
 
															+        config=RunConfig(
														
 
															+            system_prompt=stable_prefix,
														
 
															+            model="anthropic/claude-sonnet-4.5",
														
 
															+            temperature=0.3,
														
 
															+            max_iterations=1,
														
 
															+            enable_prompt_caching=True,
														
 
															+            name="缓存测试-第1次"
														
 
															+        )
														
 
															+    ):
														
 
															+        if isinstance(item, Trace):
														
 
															+            trace_id_1 = item.trace_id
														
 
															+            if item.status == "completed":
														
 
															+                print(f"\n✓ 第 1 次完成")
														
 
															+                print(f"  Total tokens: {item.total_tokens}")
														
 
															+                print(f"  Cache write: {item.total_cache_creation_tokens}")
														
 
															+                print(f"  Cache read: {item.total_cache_read_tokens}")
														
 
															+                print(f"  Cost: ${item.total_cost:.6f}")
														
 
															+
														
 
															+        elif isinstance(item, Message) and item.role == "assistant":
														
 
															+            print(f"\n[Response]")
														
 
															+            print(f"  Prompt tokens: {item.prompt_tokens}")
														
 
															+            print(f"  Cache write: {item.cache_creation_tokens}")
														
 
															+            print(f"  Cache read: {item.cache_read_tokens}")
														
 
															+
														
 
															+    # 等待 2 秒，确保缓存已生效
														
 
															+    print("\n等待 2 秒...")
														
 
															+    await asyncio.sleep(2)
														
 
															+
														
 
															+    # 第二次请求：应该命中缓存
														
 
															+    print("\n" + "=" * 60)
														
 
															+    print("第 2 次请求：应该命中缓存")
														
 
															+    print("=" * 60)
														
 
															+
														
 
															+    messages2 = [
														
 
															+        {"role": "user", "content": "请用一句话介绍 JavaScript"}
														
 
															+    ]
														
 
															+
														
 
															+    trace_id_2 = None
														
 
															+    async for item in runner.run(
														
 
															+        messages=messages2,
														
 
															+        config=RunConfig(
														
 
															+            system_prompt=stable_prefix,  # 完全相同的 system prompt
														
 
															+            model="anthropic/claude-sonnet-4.5",
														
 
															+            temperature=0.3,
														
 
															+            max_iterations=1,
														
 
															+            enable_prompt_caching=True,
														
 
															+            name="缓存测试-第2次"
														
 
															+        )
														
 
															+    ):
														
 
															+        if isinstance(item, Trace):
														
 
															+            trace_id_2 = item.trace_id
														
 
															+            if item.status == "completed":
														
 
															+                print(f"\n✓ 第 2 次完成")
														
 
															+                print(f"  Total tokens: {item.total_tokens}")
														
 
															+                print(f"  Cache write: {item.total_cache_creation_tokens}")
														
 
															+                print(f"  Cache read: {item.total_cache_read_tokens}")
														
 
															+                print(f"  Cost: ${item.total_cost:.6f}")
														
 
															+
														
 
															+        elif isinstance(item, Message) and item.role == "assistant":
														
 
															+            print(f"\n[Response]")
														
 
															+            print(f"  Prompt tokens: {item.prompt_tokens}")
														
 
															+            print(f"  Cache write: {item.cache_creation_tokens}")
														
 
															+            print(f"  Cache read: {item.cache_read_tokens}")
														
 
															+
														
 
															+    print("\n" + "=" * 60)
														
 
															+    print("测试完成")
														
 
															+    print("=" * 60)
														
 
															+    print()
														
 
															+    print("预期结果：")
														
 
															+    print("- 第 1 次：cache_write_tokens > 0（创建缓存）")
														
 
															+    print("- 第 2 次：cached_tokens > 0（命中缓存）")
														
 
															+    print()
														
 
															+    print(f"Trace 1: {trace_id_1}")
														
 
															+    print(f"Trace 2: {trace_id_2}")
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    asyncio.run(main())