usage.py 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. """
  2. Token Usage 数据模型和费用计算
  3. 支持各种 LLM 提供商的完整 token 统计:
  4. - 基础 tokens: input/output
  5. - 思考 tokens: reasoning/thinking (OpenAI o1/o3, DeepSeek R1, Gemini 2.x)
  6. - 缓存 tokens: cache_creation/cache_read (Claude)
  7. - 其他: cached_content (Gemini)
  8. 设计模式:
  9. - TokenUsage: 不可变数据类,表示单次调用的 token 使用
  10. - TokenUsageAccumulator: 累加器,用于统计多次调用
  11. - PricingCalculator: 策略模式,根据定价表计算费用
  12. """
  13. from dataclasses import dataclass, field
  14. from typing import Dict, Any, Optional
  15. import copy
  16. @dataclass(frozen=True)
  17. class TokenUsage:
  18. """
  19. Token 使用量(不可变)
  20. 统一所有提供商的 token 统计字段,未使用的字段为 0
  21. """
  22. # 基础 tokens(所有提供商都有)
  23. input_tokens: int = 0 # 输入 tokens (prompt_tokens)
  24. output_tokens: int = 0 # 输出 tokens (completion_tokens)
  25. # 思考/推理 tokens(部分模型)
  26. # - OpenAI o1/o3: reasoning_tokens (在 completion_tokens_details 中)
  27. # - DeepSeek R1: reasoning_tokens
  28. # - Gemini 2.x thinking mode: thoughts_tokens
  29. reasoning_tokens: int = 0
  30. # 缓存相关 tokens(Claude)
  31. # - cache_creation_input_tokens: 创建缓存消耗的 tokens
  32. # - cache_read_input_tokens: 读取缓存的 tokens(通常更便宜)
  33. cache_creation_tokens: int = 0
  34. cache_read_tokens: int = 0
  35. # Gemini 特有
  36. cached_content_tokens: int = 0 # cachedContentTokenCount
  37. @property
  38. def total_tokens(self) -> int:
  39. """总 tokens(input + output,不含 reasoning)"""
  40. return self.input_tokens + self.output_tokens
  41. @property
  42. def total_input_tokens(self) -> int:
  43. """
  44. 总输入 tokens
  45. 对于 Claude 带缓存的情况:
  46. 实际输入 = input_tokens(已包含 cache_read)
  47. 计费输入 = input_tokens - cache_read_tokens + cache_creation_tokens
  48. """
  49. return self.input_tokens
  50. @property
  51. def total_output_tokens(self) -> int:
  52. """
  53. 总输出 tokens
  54. 对于有 reasoning 的模型:
  55. output_tokens 通常已包含 reasoning_tokens
  56. """
  57. return self.output_tokens
  58. @property
  59. def billable_input_tokens(self) -> int:
  60. """
  61. 计费输入 tokens(考虑缓存折扣)
  62. Claude 缓存定价:
  63. - cache_read: 0.1x 价格
  64. - cache_creation: 1.25x 价格
  65. - 普通 input: 1x 价格
  66. 这里返回等效的全价 tokens 数
  67. """
  68. # 普通输入 = 总输入 - 缓存读取
  69. regular_input = self.input_tokens - self.cache_read_tokens
  70. # 等效计费 = 普通输入 + 缓存读取*0.1 + 缓存创建*1.25
  71. # 简化:返回原始值,让 PricingCalculator 处理
  72. return self.input_tokens
  73. def __add__(self, other: "TokenUsage") -> "TokenUsage":
  74. """支持 + 运算符累加"""
  75. if not isinstance(other, TokenUsage):
  76. return NotImplemented
  77. return TokenUsage(
  78. input_tokens=self.input_tokens + other.input_tokens,
  79. output_tokens=self.output_tokens + other.output_tokens,
  80. reasoning_tokens=self.reasoning_tokens + other.reasoning_tokens,
  81. cache_creation_tokens=self.cache_creation_tokens + other.cache_creation_tokens,
  82. cache_read_tokens=self.cache_read_tokens + other.cache_read_tokens,
  83. cached_content_tokens=self.cached_content_tokens + other.cached_content_tokens,
  84. )
  85. def to_dict(self) -> Dict[str, Any]:
  86. """转换为字典(只包含非零字段)"""
  87. result = {
  88. "input_tokens": self.input_tokens,
  89. "output_tokens": self.output_tokens,
  90. "total_tokens": self.total_tokens,
  91. }
  92. # 只添加非零的可选字段
  93. if self.reasoning_tokens:
  94. result["reasoning_tokens"] = self.reasoning_tokens
  95. if self.cache_creation_tokens:
  96. result["cache_creation_tokens"] = self.cache_creation_tokens
  97. if self.cache_read_tokens:
  98. result["cache_read_tokens"] = self.cache_read_tokens
  99. if self.cached_content_tokens:
  100. result["cached_content_tokens"] = self.cached_content_tokens
  101. return result
  102. @classmethod
  103. def from_dict(cls, data: Dict[str, Any]) -> "TokenUsage":
  104. """从字典创建(兼容旧格式)"""
  105. return cls(
  106. input_tokens=data.get("input_tokens") or data.get("prompt_tokens", 0),
  107. output_tokens=data.get("output_tokens") or data.get("completion_tokens", 0),
  108. reasoning_tokens=data.get("reasoning_tokens", 0),
  109. cache_creation_tokens=data.get("cache_creation_tokens", 0),
  110. cache_read_tokens=data.get("cache_read_tokens", 0),
  111. cached_content_tokens=data.get("cached_content_tokens", 0),
  112. )
  113. @classmethod
  114. def from_openai(cls, usage: Dict[str, Any]) -> "TokenUsage":
  115. """
  116. 从 OpenAI 格式创建
  117. OpenAI 格式:
  118. {
  119. "prompt_tokens": 100,
  120. "completion_tokens": 50,
  121. "total_tokens": 150,
  122. "completion_tokens_details": {
  123. "reasoning_tokens": 20 # o1/o3 模型
  124. }
  125. }
  126. """
  127. reasoning = 0
  128. if details := usage.get("completion_tokens_details"):
  129. reasoning = details.get("reasoning_tokens", 0)
  130. return cls(
  131. input_tokens=usage.get("prompt_tokens", 0),
  132. output_tokens=usage.get("completion_tokens", 0),
  133. reasoning_tokens=reasoning,
  134. )
  135. @classmethod
  136. def from_anthropic(cls, usage: Dict[str, Any]) -> "TokenUsage":
  137. """
  138. 从 Anthropic/Claude 格式创建
  139. Claude 格式:
  140. {
  141. "input_tokens": 100,
  142. "output_tokens": 50,
  143. "cache_creation_input_tokens": 1000, # 可选
  144. "cache_read_input_tokens": 500 # 可选
  145. }
  146. """
  147. return cls(
  148. input_tokens=usage.get("input_tokens", 0),
  149. output_tokens=usage.get("output_tokens", 0),
  150. cache_creation_tokens=usage.get("cache_creation_input_tokens", 0),
  151. cache_read_tokens=usage.get("cache_read_input_tokens", 0),
  152. )
  153. @classmethod
  154. def from_gemini(cls, usage_metadata: Dict[str, Any]) -> "TokenUsage":
  155. """
  156. 从 Gemini 格式创建
  157. Gemini 格式:
  158. {
  159. "promptTokenCount": 100,
  160. "candidatesTokenCount": 50,
  161. "totalTokenCount": 150,
  162. "cachedContentTokenCount": 0, # 可选
  163. "thoughtsTokenCount": 20 # Gemini 2.x thinking mode
  164. }
  165. """
  166. return cls(
  167. input_tokens=usage_metadata.get("promptTokenCount", 0),
  168. output_tokens=usage_metadata.get("candidatesTokenCount", 0),
  169. reasoning_tokens=usage_metadata.get("thoughtsTokenCount", 0),
  170. cached_content_tokens=usage_metadata.get("cachedContentTokenCount", 0),
  171. )
  172. @classmethod
  173. def from_deepseek(cls, usage: Dict[str, Any]) -> "TokenUsage":
  174. """
  175. 从 DeepSeek 格式创建
  176. DeepSeek R1 格式(OpenAI 兼容 + 扩展):
  177. {
  178. "prompt_tokens": 100,
  179. "completion_tokens": 50,
  180. "reasoning_tokens": 30, # DeepSeek R1 特有
  181. "total_tokens": 150
  182. }
  183. """
  184. return cls(
  185. input_tokens=usage.get("prompt_tokens", 0),
  186. output_tokens=usage.get("completion_tokens", 0),
  187. reasoning_tokens=usage.get("reasoning_tokens", 0),
  188. )
  189. class TokenUsageAccumulator:
  190. """
  191. Token 使用量累加器
  192. 用于在 Trace 级别累计多次 LLM 调用的 token 使用
  193. """
  194. def __init__(self):
  195. self._input_tokens: int = 0
  196. self._output_tokens: int = 0
  197. self._reasoning_tokens: int = 0
  198. self._cache_creation_tokens: int = 0
  199. self._cache_read_tokens: int = 0
  200. self._cached_content_tokens: int = 0
  201. self._call_count: int = 0
  202. def add(self, usage: TokenUsage) -> None:
  203. """累加一次调用的 token 使用"""
  204. self._input_tokens += usage.input_tokens
  205. self._output_tokens += usage.output_tokens
  206. self._reasoning_tokens += usage.reasoning_tokens
  207. self._cache_creation_tokens += usage.cache_creation_tokens
  208. self._cache_read_tokens += usage.cache_read_tokens
  209. self._cached_content_tokens += usage.cached_content_tokens
  210. self._call_count += 1
  211. @property
  212. def total(self) -> TokenUsage:
  213. """获取累计的 TokenUsage"""
  214. return TokenUsage(
  215. input_tokens=self._input_tokens,
  216. output_tokens=self._output_tokens,
  217. reasoning_tokens=self._reasoning_tokens,
  218. cache_creation_tokens=self._cache_creation_tokens,
  219. cache_read_tokens=self._cache_read_tokens,
  220. cached_content_tokens=self._cached_content_tokens,
  221. )
  222. @property
  223. def call_count(self) -> int:
  224. """调用次数"""
  225. return self._call_count
  226. def to_dict(self) -> Dict[str, Any]:
  227. """转换为字典"""
  228. result = self.total.to_dict()
  229. result["call_count"] = self._call_count
  230. return result
  231. # 向后兼容的别名
  232. def create_usage_from_response(
  233. provider: str,
  234. usage_data: Dict[str, Any]
  235. ) -> TokenUsage:
  236. """
  237. 根据提供商创建 TokenUsage
  238. Args:
  239. provider: 提供商名称 ("openai", "anthropic", "gemini", "deepseek", "openrouter")
  240. usage_data: API 返回的 usage 数据
  241. Returns:
  242. TokenUsage 实例
  243. """
  244. provider = provider.lower()
  245. if provider in ("openai", "openrouter"):
  246. return TokenUsage.from_openai(usage_data)
  247. elif provider in ("anthropic", "claude"):
  248. return TokenUsage.from_anthropic(usage_data)
  249. elif provider == "gemini":
  250. return TokenUsage.from_gemini(usage_data)
  251. elif provider == "deepseek":
  252. return TokenUsage.from_deepseek(usage_data)
  253. else:
  254. # 默认使用 OpenAI 格式
  255. return TokenUsage.from_openai(usage_data)