""" Token Usage 数据模型和费用计算 支持各种 LLM 提供商的完整 token 统计: - 基础 tokens: input/output - 思考 tokens: reasoning/thinking (OpenAI o1/o3, DeepSeek R1, Gemini 2.x) - 缓存 tokens: cache_creation/cache_read (Claude) - 其他: cached_content (Gemini) 设计模式: - TokenUsage: 不可变数据类,表示单次调用的 token 使用 - TokenUsageAccumulator: 累加器,用于统计多次调用 - PricingCalculator: 策略模式,根据定价表计算费用 """ from dataclasses import dataclass, field from typing import Dict, Any, Optional import copy @dataclass(frozen=True) class TokenUsage: """ Token 使用量(不可变) 统一所有提供商的 token 统计字段,未使用的字段为 0 """ # 基础 tokens(所有提供商都有) input_tokens: int = 0 # 输入 tokens (prompt_tokens) output_tokens: int = 0 # 输出 tokens (completion_tokens) # 思考/推理 tokens(部分模型) # - OpenAI o1/o3: reasoning_tokens (在 completion_tokens_details 中) # - DeepSeek R1: reasoning_tokens # - Gemini 2.x thinking mode: thoughts_tokens reasoning_tokens: int = 0 # 缓存相关 tokens(Claude) # - cache_creation_input_tokens: 创建缓存消耗的 tokens # - cache_read_input_tokens: 读取缓存的 tokens(通常更便宜) cache_creation_tokens: int = 0 cache_read_tokens: int = 0 # Gemini 特有 cached_content_tokens: int = 0 # cachedContentTokenCount @property def total_tokens(self) -> int: """总 tokens(input + output,不含 reasoning)""" return self.input_tokens + self.output_tokens @property def total_input_tokens(self) -> int: """ 总输入 tokens 对于 Claude 带缓存的情况: 实际输入 = input_tokens(已包含 cache_read) 计费输入 = input_tokens - cache_read_tokens + cache_creation_tokens """ return self.input_tokens @property def total_output_tokens(self) -> int: """ 总输出 tokens 对于有 reasoning 的模型: output_tokens 通常已包含 reasoning_tokens """ return self.output_tokens @property def billable_input_tokens(self) -> int: """ 计费输入 tokens(考虑缓存折扣) Claude 缓存定价: - cache_read: 0.1x 价格 - cache_creation: 1.25x 价格 - 普通 input: 1x 价格 这里返回等效的全价 tokens 数 """ # 普通输入 = 总输入 - 缓存读取 regular_input = self.input_tokens - self.cache_read_tokens # 等效计费 = 普通输入 + 缓存读取*0.1 + 缓存创建*1.25 # 简化:返回原始值,让 PricingCalculator 处理 return self.input_tokens def __add__(self, other: "TokenUsage") -> "TokenUsage": """支持 + 运算符累加""" if not isinstance(other, TokenUsage): return NotImplemented return TokenUsage( input_tokens=self.input_tokens + other.input_tokens, output_tokens=self.output_tokens + other.output_tokens, reasoning_tokens=self.reasoning_tokens + other.reasoning_tokens, cache_creation_tokens=self.cache_creation_tokens + other.cache_creation_tokens, cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens, cached_content_tokens=self.cached_content_tokens + other.cached_content_tokens, ) def to_dict(self) -> Dict[str, Any]: """转换为字典(只包含非零字段)""" result = { "input_tokens": self.input_tokens, "output_tokens": self.output_tokens, "total_tokens": self.total_tokens, } # 只添加非零的可选字段 if self.reasoning_tokens: result["reasoning_tokens"] = self.reasoning_tokens if self.cache_creation_tokens: result["cache_creation_tokens"] = self.cache_creation_tokens if self.cache_read_tokens: result["cache_read_tokens"] = self.cache_read_tokens if self.cached_content_tokens: result["cached_content_tokens"] = self.cached_content_tokens return result @classmethod def from_dict(cls, data: Dict[str, Any]) -> "TokenUsage": """从字典创建(兼容旧格式)""" return cls( input_tokens=data.get("input_tokens") or data.get("prompt_tokens", 0), output_tokens=data.get("output_tokens") or data.get("completion_tokens", 0), reasoning_tokens=data.get("reasoning_tokens", 0), cache_creation_tokens=data.get("cache_creation_tokens", 0), cache_read_tokens=data.get("cache_read_tokens", 0), cached_content_tokens=data.get("cached_content_tokens", 0), ) @classmethod def from_openai(cls, usage: Dict[str, Any]) -> "TokenUsage": """ 从 OpenAI 格式创建 OpenAI 格式: { "prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150, "completion_tokens_details": { "reasoning_tokens": 20 # o1/o3 模型 } } """ reasoning = 0 if details := usage.get("completion_tokens_details"): reasoning = details.get("reasoning_tokens", 0) return cls( input_tokens=usage.get("prompt_tokens", 0), output_tokens=usage.get("completion_tokens", 0), reasoning_tokens=reasoning, ) @classmethod def from_anthropic(cls, usage: Dict[str, Any]) -> "TokenUsage": """ 从 Anthropic/Claude 格式创建 Claude 格式: { "input_tokens": 100, "output_tokens": 50, "cache_creation_input_tokens": 1000, # 可选 "cache_read_input_tokens": 500 # 可选 } """ return cls( input_tokens=usage.get("input_tokens", 0), output_tokens=usage.get("output_tokens", 0), cache_creation_tokens=usage.get("cache_creation_input_tokens", 0), cache_read_tokens=usage.get("cache_read_input_tokens", 0), ) @classmethod def from_gemini(cls, usage_metadata: Dict[str, Any]) -> "TokenUsage": """ 从 Gemini 格式创建 Gemini 格式: { "promptTokenCount": 100, "candidatesTokenCount": 50, "totalTokenCount": 150, "cachedContentTokenCount": 0, # 可选 "thoughtsTokenCount": 20 # Gemini 2.x thinking mode } """ return cls( input_tokens=usage_metadata.get("promptTokenCount", 0), output_tokens=usage_metadata.get("candidatesTokenCount", 0), reasoning_tokens=usage_metadata.get("thoughtsTokenCount", 0), cached_content_tokens=usage_metadata.get("cachedContentTokenCount", 0), ) @classmethod def from_deepseek(cls, usage: Dict[str, Any]) -> "TokenUsage": """ 从 DeepSeek 格式创建 DeepSeek R1 格式(OpenAI 兼容 + 扩展): { "prompt_tokens": 100, "completion_tokens": 50, "reasoning_tokens": 30, # DeepSeek R1 特有 "total_tokens": 150 } """ return cls( input_tokens=usage.get("prompt_tokens", 0), output_tokens=usage.get("completion_tokens", 0), reasoning_tokens=usage.get("reasoning_tokens", 0), ) class TokenUsageAccumulator: """ Token 使用量累加器 用于在 Trace 级别累计多次 LLM 调用的 token 使用 """ def __init__(self): self._input_tokens: int = 0 self._output_tokens: int = 0 self._reasoning_tokens: int = 0 self._cache_creation_tokens: int = 0 self._cache_read_tokens: int = 0 self._cached_content_tokens: int = 0 self._call_count: int = 0 def add(self, usage: TokenUsage) -> None: """累加一次调用的 token 使用""" self._input_tokens += usage.input_tokens self._output_tokens += usage.output_tokens self._reasoning_tokens += usage.reasoning_tokens self._cache_creation_tokens += usage.cache_creation_tokens self._cache_read_tokens += usage.cache_read_tokens self._cached_content_tokens += usage.cached_content_tokens self._call_count += 1 @property def total(self) -> TokenUsage: """获取累计的 TokenUsage""" return TokenUsage( input_tokens=self._input_tokens, output_tokens=self._output_tokens, reasoning_tokens=self._reasoning_tokens, cache_creation_tokens=self._cache_creation_tokens, cache_read_tokens=self._cache_read_tokens, cached_content_tokens=self._cached_content_tokens, ) @property def call_count(self) -> int: """调用次数""" return self._call_count def to_dict(self) -> Dict[str, Any]: """转换为字典""" result = self.total.to_dict() result["call_count"] = self._call_count return result # 向后兼容的别名 def create_usage_from_response( provider: str, usage_data: Dict[str, Any] ) -> TokenUsage: """ 根据提供商创建 TokenUsage Args: provider: 提供商名称 ("openai", "anthropic", "gemini", "deepseek", "openrouter") usage_data: API 返回的 usage 数据 Returns: TokenUsage 实例 """ provider = provider.lower() if provider in ("openai", "openrouter"): return TokenUsage.from_openai(usage_data) elif provider in ("anthropic", "claude"): return TokenUsage.from_anthropic(usage_data) elif provider == "gemini": return TokenUsage.from_gemini(usage_data) elif provider == "deepseek": return TokenUsage.from_deepseek(usage_data) else: # 默认使用 OpenAI 格式 return TokenUsage.from_openai(usage_data)