text_quota_test.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441
  1. package service
  2. import (
  3. "net/http/httptest"
  4. "testing"
  5. "time"
  6. "github.com/QuantumNous/new-api/constant"
  7. "github.com/QuantumNous/new-api/dto"
  8. "github.com/QuantumNous/new-api/pkg/billingexpr"
  9. relaycommon "github.com/QuantumNous/new-api/relay/common"
  10. "github.com/QuantumNous/new-api/types"
  11. "github.com/gin-gonic/gin"
  12. "github.com/stretchr/testify/require"
  13. )
  14. func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) {
  15. gin.SetMode(gin.TestMode)
  16. w := httptest.NewRecorder()
  17. ctx, _ := gin.CreateTestContext(w)
  18. usage := &dto.Usage{
  19. PromptTokens: 1000,
  20. CompletionTokens: 200,
  21. PromptTokensDetails: dto.InputTokenDetails{
  22. CachedTokens: 100,
  23. CachedCreationTokens: 50,
  24. },
  25. ClaudeCacheCreation5mTokens: 10,
  26. ClaudeCacheCreation1hTokens: 20,
  27. }
  28. priceData := types.PriceData{
  29. ModelRatio: 1,
  30. CompletionRatio: 2,
  31. CacheRatio: 0.1,
  32. CacheCreationRatio: 1.25,
  33. CacheCreation5mRatio: 1.25,
  34. CacheCreation1hRatio: 2,
  35. GroupRatioInfo: types.GroupRatioInfo{
  36. GroupRatio: 1,
  37. },
  38. }
  39. chatRelayInfo := &relaycommon.RelayInfo{
  40. RelayFormat: types.RelayFormatOpenAI,
  41. FinalRequestRelayFormat: types.RelayFormatClaude,
  42. OriginModelName: "claude-3-7-sonnet",
  43. PriceData: priceData,
  44. StartTime: time.Now(),
  45. }
  46. messageRelayInfo := &relaycommon.RelayInfo{
  47. RelayFormat: types.RelayFormatClaude,
  48. FinalRequestRelayFormat: types.RelayFormatClaude,
  49. OriginModelName: "claude-3-7-sonnet",
  50. PriceData: priceData,
  51. StartTime: time.Now(),
  52. }
  53. chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage)
  54. messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage)
  55. require.Equal(t, messageSummary.Quota, chatSummary.Quota)
  56. require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m)
  57. require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h)
  58. require.True(t, chatSummary.IsClaudeUsageSemantic)
  59. require.Equal(t, 1488, chatSummary.Quota)
  60. }
  61. func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) {
  62. gin.SetMode(gin.TestMode)
  63. w := httptest.NewRecorder()
  64. ctx, _ := gin.CreateTestContext(w)
  65. relayInfo := &relaycommon.RelayInfo{
  66. RelayFormat: types.RelayFormatOpenAI,
  67. FinalRequestRelayFormat: types.RelayFormatClaude,
  68. OriginModelName: "claude-3-7-sonnet",
  69. PriceData: types.PriceData{
  70. ModelRatio: 1,
  71. CompletionRatio: 1,
  72. CacheRatio: 0,
  73. CacheCreationRatio: 1,
  74. CacheCreation5mRatio: 2,
  75. CacheCreation1hRatio: 3,
  76. GroupRatioInfo: types.GroupRatioInfo{
  77. GroupRatio: 1,
  78. },
  79. },
  80. StartTime: time.Now(),
  81. }
  82. usage := &dto.Usage{
  83. PromptTokens: 100,
  84. CompletionTokens: 0,
  85. PromptTokensDetails: dto.InputTokenDetails{
  86. CachedCreationTokens: 10,
  87. },
  88. ClaudeCacheCreation5mTokens: 2,
  89. ClaudeCacheCreation1hTokens: 3,
  90. }
  91. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  92. // 100 + remaining(5)*1 + 2*2 + 3*3 = 118
  93. require.Equal(t, 118, summary.Quota)
  94. }
  95. func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) {
  96. gin.SetMode(gin.TestMode)
  97. w := httptest.NewRecorder()
  98. ctx, _ := gin.CreateTestContext(w)
  99. relayInfo := &relaycommon.RelayInfo{
  100. RelayFormat: types.RelayFormatOpenAI,
  101. OriginModelName: "claude-3-7-sonnet",
  102. PriceData: types.PriceData{
  103. ModelRatio: 1,
  104. CompletionRatio: 2,
  105. CacheRatio: 0.1,
  106. CacheCreationRatio: 1.25,
  107. CacheCreation5mRatio: 1.25,
  108. CacheCreation1hRatio: 2,
  109. GroupRatioInfo: types.GroupRatioInfo{
  110. GroupRatio: 1,
  111. },
  112. },
  113. StartTime: time.Now(),
  114. }
  115. usage := &dto.Usage{
  116. PromptTokens: 1000,
  117. CompletionTokens: 200,
  118. UsageSemantic: "anthropic",
  119. PromptTokensDetails: dto.InputTokenDetails{
  120. CachedTokens: 100,
  121. CachedCreationTokens: 50,
  122. },
  123. ClaudeCacheCreation5mTokens: 10,
  124. ClaudeCacheCreation1hTokens: 20,
  125. }
  126. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  127. require.True(t, summary.IsClaudeUsageSemantic)
  128. require.Equal(t, "anthropic", summary.UsageSemantic)
  129. require.Equal(t, 1488, summary.Quota)
  130. }
  131. func TestCacheWriteTokensTotal(t *testing.T) {
  132. t.Run("split cache creation", func(t *testing.T) {
  133. summary := textQuotaSummary{
  134. CacheCreationTokens: 50,
  135. CacheCreationTokens5m: 10,
  136. CacheCreationTokens1h: 20,
  137. }
  138. require.Equal(t, 50, cacheWriteTokensTotal(summary))
  139. })
  140. t.Run("legacy cache creation", func(t *testing.T) {
  141. summary := textQuotaSummary{CacheCreationTokens: 50}
  142. require.Equal(t, 50, cacheWriteTokensTotal(summary))
  143. })
  144. t.Run("split cache creation without aggregate remainder", func(t *testing.T) {
  145. summary := textQuotaSummary{
  146. CacheCreationTokens5m: 10,
  147. CacheCreationTokens1h: 20,
  148. }
  149. require.Equal(t, 30, cacheWriteTokensTotal(summary))
  150. })
  151. }
  152. func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) {
  153. gin.SetMode(gin.TestMode)
  154. w := httptest.NewRecorder()
  155. ctx, _ := gin.CreateTestContext(w)
  156. relayInfo := &relaycommon.RelayInfo{
  157. RelayFormat: types.RelayFormatOpenAI,
  158. OriginModelName: "claude-3-7-sonnet",
  159. PriceData: types.PriceData{
  160. ModelRatio: 1,
  161. CompletionRatio: 5,
  162. CacheRatio: 0.1,
  163. CacheCreationRatio: 1.25,
  164. CacheCreation5mRatio: 1.25,
  165. CacheCreation1hRatio: 2,
  166. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  167. },
  168. StartTime: time.Now(),
  169. }
  170. usage := &dto.Usage{
  171. PromptTokens: 62,
  172. CompletionTokens: 95,
  173. PromptTokensDetails: dto.InputTokenDetails{
  174. CachedTokens: 3544,
  175. },
  176. ClaudeCacheCreation5mTokens: 586,
  177. }
  178. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  179. // 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
  180. require.Equal(t, 1624, summary.Quota)
  181. }
  182. func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) {
  183. gin.SetMode(gin.TestMode)
  184. w := httptest.NewRecorder()
  185. ctx, _ := gin.CreateTestContext(w)
  186. relayInfo := &relaycommon.RelayInfo{
  187. OriginModelName: "openai/gpt-4.1",
  188. ChannelMeta: &relaycommon.ChannelMeta{
  189. ChannelType: constant.ChannelTypeOpenRouter,
  190. },
  191. PriceData: types.PriceData{
  192. ModelRatio: 1,
  193. CompletionRatio: 1,
  194. CacheRatio: 0.1,
  195. CacheCreationRatio: 1.25,
  196. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  197. },
  198. StartTime: time.Now(),
  199. }
  200. usage := &dto.Usage{
  201. PromptTokens: 2604,
  202. CompletionTokens: 383,
  203. PromptTokensDetails: dto.InputTokenDetails{
  204. CachedTokens: 2432,
  205. },
  206. }
  207. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  208. // OpenRouter OpenAI-format display keeps prompt_tokens as total input,
  209. // but billing still separates normal input from cache read tokens.
  210. // quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798
  211. require.Equal(t, 2604, summary.PromptTokens)
  212. require.Equal(t, 798, summary.Quota)
  213. }
  214. func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) {
  215. gin.SetMode(gin.TestMode)
  216. w := httptest.NewRecorder()
  217. ctx, _ := gin.CreateTestContext(w)
  218. relayInfo := &relaycommon.RelayInfo{
  219. OriginModelName: "openai/gpt-4.1",
  220. ChannelMeta: &relaycommon.ChannelMeta{
  221. ChannelType: constant.ChannelTypeOpenRouter,
  222. },
  223. PriceData: types.PriceData{
  224. ModelRatio: 1,
  225. CompletionRatio: 1,
  226. CacheCreationRatio: 1.25,
  227. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  228. },
  229. StartTime: time.Now(),
  230. }
  231. usage := &dto.Usage{
  232. PromptTokens: 2604,
  233. CompletionTokens: 383,
  234. PromptTokensDetails: dto.InputTokenDetails{
  235. CachedCreationTokens: 100,
  236. },
  237. }
  238. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  239. // prompt_tokens is still logged as total input, but cache creation is billed separately.
  240. // quota = (2604 - 100) + 100*1.25 + 383 = 3012
  241. require.Equal(t, 2604, summary.PromptTokens)
  242. require.Equal(t, 3012, summary.Quota)
  243. }
  244. func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {
  245. gin.SetMode(gin.TestMode)
  246. w := httptest.NewRecorder()
  247. ctx, _ := gin.CreateTestContext(w)
  248. relayInfo := &relaycommon.RelayInfo{
  249. FinalRequestRelayFormat: types.RelayFormatClaude,
  250. OriginModelName: "anthropic/claude-3.7-sonnet",
  251. ChannelMeta: &relaycommon.ChannelMeta{
  252. ChannelType: constant.ChannelTypeOpenRouter,
  253. },
  254. PriceData: types.PriceData{
  255. ModelRatio: 1,
  256. CompletionRatio: 1,
  257. CacheRatio: 0.1,
  258. CacheCreationRatio: 1.25,
  259. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  260. },
  261. StartTime: time.Now(),
  262. }
  263. usage := &dto.Usage{
  264. PromptTokens: 2604,
  265. CompletionTokens: 383,
  266. PromptTokensDetails: dto.InputTokenDetails{
  267. CachedTokens: 2432,
  268. },
  269. }
  270. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  271. // Pre-PR PostClaudeConsumeQuota behavior for OpenRouter:
  272. // prompt = 2604 - 2432 = 172
  273. // quota = 172 + 2432*0.1 + 383 = 798.2 => 798
  274. require.True(t, summary.IsClaudeUsageSemantic)
  275. require.Equal(t, 172, summary.PromptTokens)
  276. require.Equal(t, 798, summary.Quota)
  277. }
  278. func TestComposeTieredTextQuotaKeepsToolCallSurcharges(t *testing.T) {
  279. gin.SetMode(gin.TestMode)
  280. w := httptest.NewRecorder()
  281. ctx, _ := gin.CreateTestContext(w)
  282. ctx.Set("image_generation_call", true)
  283. ctx.Set("image_generation_call_quality", "low")
  284. ctx.Set("image_generation_call_size", "1024x1024")
  285. relayInfo := &relaycommon.RelayInfo{
  286. OriginModelName: "o1",
  287. PriceData: types.PriceData{
  288. ModelRatio: 1,
  289. CompletionRatio: 1,
  290. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  291. },
  292. ResponsesUsageInfo: &relaycommon.ResponsesUsageInfo{
  293. BuiltInTools: map[string]*relaycommon.BuildInToolInfo{
  294. dto.BuildInToolWebSearchPreview: &relaycommon.BuildInToolInfo{
  295. CallCount: 1,
  296. },
  297. dto.BuildInToolFileSearch: &relaycommon.BuildInToolInfo{
  298. CallCount: 2,
  299. },
  300. },
  301. },
  302. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  303. BillingMode: "tiered_expr",
  304. GroupRatio: 1,
  305. EstimatedQuotaBeforeGroup: 1000,
  306. },
  307. StartTime: time.Now(),
  308. }
  309. usage := &dto.Usage{
  310. PromptTokens: 100,
  311. CompletionTokens: 50,
  312. TotalTokens: 150,
  313. }
  314. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  315. quota := composeTieredTextQuota(relayInfo, summary, 1000, &billingexpr.TieredResult{
  316. ActualQuotaBeforeGroup: 1000,
  317. ActualQuotaAfterGroup: 1000,
  318. })
  319. require.Equal(t, int64(13000), summary.ToolCallSurchargeQuota.Round(0).IntPart())
  320. require.Equal(t, 14000, quota)
  321. }
  322. func TestComposeTieredTextQuotaFallbackKeepsToolCallSurcharges(t *testing.T) {
  323. gin.SetMode(gin.TestMode)
  324. w := httptest.NewRecorder()
  325. ctx, _ := gin.CreateTestContext(w)
  326. ctx.Set("claude_web_search_requests", 2)
  327. relayInfo := &relaycommon.RelayInfo{
  328. OriginModelName: "claude-3-7-sonnet",
  329. PriceData: types.PriceData{
  330. ModelRatio: 1,
  331. CompletionRatio: 1,
  332. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1.25},
  333. },
  334. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  335. BillingMode: "tiered_expr",
  336. GroupRatio: 1.25,
  337. EstimatedQuotaBeforeGroup: 1000,
  338. },
  339. StartTime: time.Now(),
  340. }
  341. usage := &dto.Usage{
  342. PromptTokens: 100,
  343. CompletionTokens: 50,
  344. TotalTokens: 150,
  345. }
  346. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  347. quota := composeTieredTextQuota(relayInfo, summary, 1250, nil)
  348. require.Equal(t, int64(12500), summary.ToolCallSurchargeQuota.Round(0).IntPart())
  349. require.Equal(t, 13750, quota)
  350. }
  351. func TestComposeTieredTextQuotaErrorFallbackUsesPreConsumedQuota(t *testing.T) {
  352. gin.SetMode(gin.TestMode)
  353. w := httptest.NewRecorder()
  354. ctx, _ := gin.CreateTestContext(w)
  355. ctx.Set("claude_web_search_requests", 2)
  356. relayInfo := &relaycommon.RelayInfo{
  357. OriginModelName: "claude-3-7-sonnet",
  358. PriceData: types.PriceData{
  359. ModelRatio: 1,
  360. CompletionRatio: 1,
  361. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1.25},
  362. },
  363. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  364. BillingMode: "tiered_expr",
  365. GroupRatio: 1.25,
  366. EstimatedQuotaBeforeGroup: 1000,
  367. },
  368. StartTime: time.Now(),
  369. }
  370. usage := &dto.Usage{
  371. PromptTokens: 100,
  372. CompletionTokens: 50,
  373. TotalTokens: 150,
  374. }
  375. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  376. // tieredResult=nil simulates a settlement error where TryTieredSettle
  377. // falls back to FinalPreConsumedQuota (2000), which differs from
  378. // EstimatedQuotaBeforeGroup * GroupRatio (1250).
  379. preConsumedFallback := 2000
  380. quota := composeTieredTextQuota(relayInfo, summary, preConsumedFallback, nil)
  381. require.Equal(t, int64(12500), summary.ToolCallSurchargeQuota.Round(0).IntPart())
  382. require.Equal(t, 14500, quota)
  383. }