| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297 |
- """
- Token Usage 数据模型和费用计算
- 支持各种 LLM 提供商的完整 token 统计:
- - 基础 tokens: input/output
- - 思考 tokens: reasoning/thinking (OpenAI o1/o3, DeepSeek R1, Gemini 2.x)
- - 缓存 tokens: cache_creation/cache_read (Claude)
- - 其他: cached_content (Gemini)
- 设计模式:
- - TokenUsage: 不可变数据类,表示单次调用的 token 使用
- - TokenUsageAccumulator: 累加器,用于统计多次调用
- - PricingCalculator: 策略模式,根据定价表计算费用
- """
- from dataclasses import dataclass, field
- from typing import Dict, Any, Optional
- import copy
- @dataclass(frozen=True)
- class TokenUsage:
- """
- Token 使用量(不可变)
- 统一所有提供商的 token 统计字段,未使用的字段为 0
- """
- # 基础 tokens(所有提供商都有)
- input_tokens: int = 0 # 输入 tokens (prompt_tokens)
- output_tokens: int = 0 # 输出 tokens (completion_tokens)
- # 思考/推理 tokens(部分模型)
- # - OpenAI o1/o3: reasoning_tokens (在 completion_tokens_details 中)
- # - DeepSeek R1: reasoning_tokens
- # - Gemini 2.x thinking mode: thoughts_tokens
- reasoning_tokens: int = 0
- # 缓存相关 tokens(Claude)
- # - cache_creation_input_tokens: 创建缓存消耗的 tokens
- # - cache_read_input_tokens: 读取缓存的 tokens(通常更便宜)
- cache_creation_tokens: int = 0
- cache_read_tokens: int = 0
- # Gemini 特有
- cached_content_tokens: int = 0 # cachedContentTokenCount
- @property
- def total_tokens(self) -> int:
- """总 tokens(input + output,不含 reasoning)"""
- return self.input_tokens + self.output_tokens
- @property
- def total_input_tokens(self) -> int:
- """
- 总输入 tokens
- 对于 Claude 带缓存的情况:
- 实际输入 = input_tokens(已包含 cache_read)
- 计费输入 = input_tokens - cache_read_tokens + cache_creation_tokens
- """
- return self.input_tokens
- @property
- def total_output_tokens(self) -> int:
- """
- 总输出 tokens
- 对于有 reasoning 的模型:
- output_tokens 通常已包含 reasoning_tokens
- """
- return self.output_tokens
- @property
- def billable_input_tokens(self) -> int:
- """
- 计费输入 tokens(考虑缓存折扣)
- Claude 缓存定价:
- - cache_read: 0.1x 价格
- - cache_creation: 1.25x 价格
- - 普通 input: 1x 价格
- 这里返回等效的全价 tokens 数
- """
- # 普通输入 = 总输入 - 缓存读取
- regular_input = self.input_tokens - self.cache_read_tokens
- # 等效计费 = 普通输入 + 缓存读取*0.1 + 缓存创建*1.25
- # 简化:返回原始值,让 PricingCalculator 处理
- return self.input_tokens
- def __add__(self, other: "TokenUsage") -> "TokenUsage":
- """支持 + 运算符累加"""
- if not isinstance(other, TokenUsage):
- return NotImplemented
- return TokenUsage(
- input_tokens=self.input_tokens + other.input_tokens,
- output_tokens=self.output_tokens + other.output_tokens,
- reasoning_tokens=self.reasoning_tokens + other.reasoning_tokens,
- cache_creation_tokens=self.cache_creation_tokens + other.cache_creation_tokens,
- cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens,
- cached_content_tokens=self.cached_content_tokens + other.cached_content_tokens,
- )
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典(只包含非零字段)"""
- result = {
- "input_tokens": self.input_tokens,
- "output_tokens": self.output_tokens,
- "total_tokens": self.total_tokens,
- }
- # 只添加非零的可选字段
- if self.reasoning_tokens:
- result["reasoning_tokens"] = self.reasoning_tokens
- if self.cache_creation_tokens:
- result["cache_creation_tokens"] = self.cache_creation_tokens
- if self.cache_read_tokens:
- result["cache_read_tokens"] = self.cache_read_tokens
- if self.cached_content_tokens:
- result["cached_content_tokens"] = self.cached_content_tokens
- return result
- @classmethod
- def from_dict(cls, data: Dict[str, Any]) -> "TokenUsage":
- """从字典创建(兼容旧格式)"""
- return cls(
- input_tokens=data.get("input_tokens") or data.get("prompt_tokens", 0),
- output_tokens=data.get("output_tokens") or data.get("completion_tokens", 0),
- reasoning_tokens=data.get("reasoning_tokens", 0),
- cache_creation_tokens=data.get("cache_creation_tokens", 0),
- cache_read_tokens=data.get("cache_read_tokens", 0),
- cached_content_tokens=data.get("cached_content_tokens", 0),
- )
- @classmethod
- def from_openai(cls, usage: Dict[str, Any]) -> "TokenUsage":
- """
- 从 OpenAI 格式创建
- OpenAI 格式:
- {
- "prompt_tokens": 100,
- "completion_tokens": 50,
- "total_tokens": 150,
- "completion_tokens_details": {
- "reasoning_tokens": 20 # o1/o3 模型
- }
- }
- """
- reasoning = 0
- if details := usage.get("completion_tokens_details"):
- reasoning = details.get("reasoning_tokens", 0)
- return cls(
- input_tokens=usage.get("prompt_tokens", 0),
- output_tokens=usage.get("completion_tokens", 0),
- reasoning_tokens=reasoning,
- )
- @classmethod
- def from_anthropic(cls, usage: Dict[str, Any]) -> "TokenUsage":
- """
- 从 Anthropic/Claude 格式创建
- Claude 格式:
- {
- "input_tokens": 100,
- "output_tokens": 50,
- "cache_creation_input_tokens": 1000, # 可选
- "cache_read_input_tokens": 500 # 可选
- }
- """
- return cls(
- input_tokens=usage.get("input_tokens", 0),
- output_tokens=usage.get("output_tokens", 0),
- cache_creation_tokens=usage.get("cache_creation_input_tokens", 0),
- cache_read_tokens=usage.get("cache_read_input_tokens", 0),
- )
- @classmethod
- def from_gemini(cls, usage_metadata: Dict[str, Any]) -> "TokenUsage":
- """
- 从 Gemini 格式创建
- Gemini 格式:
- {
- "promptTokenCount": 100,
- "candidatesTokenCount": 50,
- "totalTokenCount": 150,
- "cachedContentTokenCount": 0, # 可选
- "thoughtsTokenCount": 20 # Gemini 2.x thinking mode
- }
- """
- return cls(
- input_tokens=usage_metadata.get("promptTokenCount", 0),
- output_tokens=usage_metadata.get("candidatesTokenCount", 0),
- reasoning_tokens=usage_metadata.get("thoughtsTokenCount", 0),
- cached_content_tokens=usage_metadata.get("cachedContentTokenCount", 0),
- )
- @classmethod
- def from_deepseek(cls, usage: Dict[str, Any]) -> "TokenUsage":
- """
- 从 DeepSeek 格式创建
- DeepSeek R1 格式(OpenAI 兼容 + 扩展):
- {
- "prompt_tokens": 100,
- "completion_tokens": 50,
- "reasoning_tokens": 30, # DeepSeek R1 特有
- "total_tokens": 150
- }
- """
- return cls(
- input_tokens=usage.get("prompt_tokens", 0),
- output_tokens=usage.get("completion_tokens", 0),
- reasoning_tokens=usage.get("reasoning_tokens", 0),
- )
- class TokenUsageAccumulator:
- """
- Token 使用量累加器
- 用于在 Trace 级别累计多次 LLM 调用的 token 使用
- """
- def __init__(self):
- self._input_tokens: int = 0
- self._output_tokens: int = 0
- self._reasoning_tokens: int = 0
- self._cache_creation_tokens: int = 0
- self._cache_read_tokens: int = 0
- self._cached_content_tokens: int = 0
- self._call_count: int = 0
- def add(self, usage: TokenUsage) -> None:
- """累加一次调用的 token 使用"""
- self._input_tokens += usage.input_tokens
- self._output_tokens += usage.output_tokens
- self._reasoning_tokens += usage.reasoning_tokens
- self._cache_creation_tokens += usage.cache_creation_tokens
- self._cache_read_tokens += usage.cache_read_tokens
- self._cached_content_tokens += usage.cached_content_tokens
- self._call_count += 1
- @property
- def total(self) -> TokenUsage:
- """获取累计的 TokenUsage"""
- return TokenUsage(
- input_tokens=self._input_tokens,
- output_tokens=self._output_tokens,
- reasoning_tokens=self._reasoning_tokens,
- cache_creation_tokens=self._cache_creation_tokens,
- cache_read_tokens=self._cache_read_tokens,
- cached_content_tokens=self._cached_content_tokens,
- )
- @property
- def call_count(self) -> int:
- """调用次数"""
- return self._call_count
- def to_dict(self) -> Dict[str, Any]:
- """转换为字典"""
- result = self.total.to_dict()
- result["call_count"] = self._call_count
- return result
- # 向后兼容的别名
- def create_usage_from_response(
- provider: str,
- usage_data: Dict[str, Any]
- ) -> TokenUsage:
- """
- 根据提供商创建 TokenUsage
- Args:
- provider: 提供商名称 ("openai", "anthropic", "gemini", "deepseek", "openrouter")
- usage_data: API 返回的 usage 数据
- Returns:
- TokenUsage 实例
- """
- provider = provider.lower()
- if provider in ("openai", "openrouter"):
- return TokenUsage.from_openai(usage_data)
- elif provider in ("anthropic", "claude"):
- return TokenUsage.from_anthropic(usage_data)
- elif provider == "gemini":
- return TokenUsage.from_gemini(usage_data)
- elif provider == "deepseek":
- return TokenUsage.from_deepseek(usage_data)
- else:
- # 默认使用 OpenAI 格式
- return TokenUsage.from_openai(usage_data)
|