text_quota_test.go 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. package service
  2. import (
  3. "net/http/httptest"
  4. "testing"
  5. "time"
  6. "github.com/QuantumNous/new-api/constant"
  7. "github.com/QuantumNous/new-api/dto"
  8. relaycommon "github.com/QuantumNous/new-api/relay/common"
  9. "github.com/QuantumNous/new-api/types"
  10. "github.com/gin-gonic/gin"
  11. "github.com/stretchr/testify/require"
  12. )
  13. func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) {
  14. gin.SetMode(gin.TestMode)
  15. w := httptest.NewRecorder()
  16. ctx, _ := gin.CreateTestContext(w)
  17. usage := &dto.Usage{
  18. PromptTokens: 1000,
  19. CompletionTokens: 200,
  20. PromptTokensDetails: dto.InputTokenDetails{
  21. CachedTokens: 100,
  22. CachedCreationTokens: 50,
  23. },
  24. ClaudeCacheCreation5mTokens: 10,
  25. ClaudeCacheCreation1hTokens: 20,
  26. }
  27. priceData := types.PriceData{
  28. ModelRatio: 1,
  29. CompletionRatio: 2,
  30. CacheRatio: 0.1,
  31. CacheCreationRatio: 1.25,
  32. CacheCreation5mRatio: 1.25,
  33. CacheCreation1hRatio: 2,
  34. GroupRatioInfo: types.GroupRatioInfo{
  35. GroupRatio: 1,
  36. },
  37. }
  38. chatRelayInfo := &relaycommon.RelayInfo{
  39. RelayFormat: types.RelayFormatOpenAI,
  40. FinalRequestRelayFormat: types.RelayFormatClaude,
  41. OriginModelName: "claude-3-7-sonnet",
  42. PriceData: priceData,
  43. StartTime: time.Now(),
  44. }
  45. messageRelayInfo := &relaycommon.RelayInfo{
  46. RelayFormat: types.RelayFormatClaude,
  47. FinalRequestRelayFormat: types.RelayFormatClaude,
  48. OriginModelName: "claude-3-7-sonnet",
  49. PriceData: priceData,
  50. StartTime: time.Now(),
  51. }
  52. chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage)
  53. messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage)
  54. require.Equal(t, messageSummary.Quota, chatSummary.Quota)
  55. require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m)
  56. require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h)
  57. require.True(t, chatSummary.IsClaudeUsageSemantic)
  58. require.Equal(t, 1488, chatSummary.Quota)
  59. }
  60. func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) {
  61. gin.SetMode(gin.TestMode)
  62. w := httptest.NewRecorder()
  63. ctx, _ := gin.CreateTestContext(w)
  64. relayInfo := &relaycommon.RelayInfo{
  65. RelayFormat: types.RelayFormatOpenAI,
  66. FinalRequestRelayFormat: types.RelayFormatClaude,
  67. OriginModelName: "claude-3-7-sonnet",
  68. PriceData: types.PriceData{
  69. ModelRatio: 1,
  70. CompletionRatio: 1,
  71. CacheRatio: 0,
  72. CacheCreationRatio: 1,
  73. CacheCreation5mRatio: 2,
  74. CacheCreation1hRatio: 3,
  75. GroupRatioInfo: types.GroupRatioInfo{
  76. GroupRatio: 1,
  77. },
  78. },
  79. StartTime: time.Now(),
  80. }
  81. usage := &dto.Usage{
  82. PromptTokens: 100,
  83. CompletionTokens: 0,
  84. PromptTokensDetails: dto.InputTokenDetails{
  85. CachedCreationTokens: 10,
  86. },
  87. ClaudeCacheCreation5mTokens: 2,
  88. ClaudeCacheCreation1hTokens: 3,
  89. }
  90. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  91. // 100 + remaining(5)*1 + 2*2 + 3*3 = 118
  92. require.Equal(t, 118, summary.Quota)
  93. }
  94. func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) {
  95. gin.SetMode(gin.TestMode)
  96. w := httptest.NewRecorder()
  97. ctx, _ := gin.CreateTestContext(w)
  98. relayInfo := &relaycommon.RelayInfo{
  99. RelayFormat: types.RelayFormatOpenAI,
  100. OriginModelName: "claude-3-7-sonnet",
  101. PriceData: types.PriceData{
  102. ModelRatio: 1,
  103. CompletionRatio: 2,
  104. CacheRatio: 0.1,
  105. CacheCreationRatio: 1.25,
  106. CacheCreation5mRatio: 1.25,
  107. CacheCreation1hRatio: 2,
  108. GroupRatioInfo: types.GroupRatioInfo{
  109. GroupRatio: 1,
  110. },
  111. },
  112. StartTime: time.Now(),
  113. }
  114. usage := &dto.Usage{
  115. PromptTokens: 1000,
  116. CompletionTokens: 200,
  117. UsageSemantic: "anthropic",
  118. PromptTokensDetails: dto.InputTokenDetails{
  119. CachedTokens: 100,
  120. CachedCreationTokens: 50,
  121. },
  122. ClaudeCacheCreation5mTokens: 10,
  123. ClaudeCacheCreation1hTokens: 20,
  124. }
  125. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  126. require.True(t, summary.IsClaudeUsageSemantic)
  127. require.Equal(t, "anthropic", summary.UsageSemantic)
  128. require.Equal(t, 1488, summary.Quota)
  129. }
  130. func TestCacheWriteTokensTotal(t *testing.T) {
  131. t.Run("split cache creation", func(t *testing.T) {
  132. summary := textQuotaSummary{
  133. CacheCreationTokens: 50,
  134. CacheCreationTokens5m: 10,
  135. CacheCreationTokens1h: 20,
  136. }
  137. require.Equal(t, 50, cacheWriteTokensTotal(summary))
  138. })
  139. t.Run("legacy cache creation", func(t *testing.T) {
  140. summary := textQuotaSummary{CacheCreationTokens: 50}
  141. require.Equal(t, 50, cacheWriteTokensTotal(summary))
  142. })
  143. t.Run("split cache creation without aggregate remainder", func(t *testing.T) {
  144. summary := textQuotaSummary{
  145. CacheCreationTokens5m: 10,
  146. CacheCreationTokens1h: 20,
  147. }
  148. require.Equal(t, 30, cacheWriteTokensTotal(summary))
  149. })
  150. }
  151. func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) {
  152. gin.SetMode(gin.TestMode)
  153. w := httptest.NewRecorder()
  154. ctx, _ := gin.CreateTestContext(w)
  155. relayInfo := &relaycommon.RelayInfo{
  156. RelayFormat: types.RelayFormatOpenAI,
  157. OriginModelName: "claude-3-7-sonnet",
  158. PriceData: types.PriceData{
  159. ModelRatio: 1,
  160. CompletionRatio: 5,
  161. CacheRatio: 0.1,
  162. CacheCreationRatio: 1.25,
  163. CacheCreation5mRatio: 1.25,
  164. CacheCreation1hRatio: 2,
  165. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  166. },
  167. StartTime: time.Now(),
  168. }
  169. usage := &dto.Usage{
  170. PromptTokens: 62,
  171. CompletionTokens: 95,
  172. PromptTokensDetails: dto.InputTokenDetails{
  173. CachedTokens: 3544,
  174. },
  175. ClaudeCacheCreation5mTokens: 586,
  176. }
  177. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  178. // 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
  179. require.Equal(t, 1624, summary.Quota)
  180. }
  181. func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) {
  182. gin.SetMode(gin.TestMode)
  183. w := httptest.NewRecorder()
  184. ctx, _ := gin.CreateTestContext(w)
  185. relayInfo := &relaycommon.RelayInfo{
  186. OriginModelName: "openai/gpt-4.1",
  187. ChannelMeta: &relaycommon.ChannelMeta{
  188. ChannelType: constant.ChannelTypeOpenRouter,
  189. },
  190. PriceData: types.PriceData{
  191. ModelRatio: 1,
  192. CompletionRatio: 1,
  193. CacheRatio: 0.1,
  194. CacheCreationRatio: 1.25,
  195. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  196. },
  197. StartTime: time.Now(),
  198. }
  199. usage := &dto.Usage{
  200. PromptTokens: 2604,
  201. CompletionTokens: 383,
  202. PromptTokensDetails: dto.InputTokenDetails{
  203. CachedTokens: 2432,
  204. },
  205. }
  206. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  207. // OpenRouter OpenAI-format display keeps prompt_tokens as total input,
  208. // but billing still separates normal input from cache read tokens.
  209. // quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798
  210. require.Equal(t, 2604, summary.PromptTokens)
  211. require.Equal(t, 798, summary.Quota)
  212. }
  213. func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) {
  214. gin.SetMode(gin.TestMode)
  215. w := httptest.NewRecorder()
  216. ctx, _ := gin.CreateTestContext(w)
  217. relayInfo := &relaycommon.RelayInfo{
  218. OriginModelName: "openai/gpt-4.1",
  219. ChannelMeta: &relaycommon.ChannelMeta{
  220. ChannelType: constant.ChannelTypeOpenRouter,
  221. },
  222. PriceData: types.PriceData{
  223. ModelRatio: 1,
  224. CompletionRatio: 1,
  225. CacheCreationRatio: 1.25,
  226. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  227. },
  228. StartTime: time.Now(),
  229. }
  230. usage := &dto.Usage{
  231. PromptTokens: 2604,
  232. CompletionTokens: 383,
  233. PromptTokensDetails: dto.InputTokenDetails{
  234. CachedCreationTokens: 100,
  235. },
  236. }
  237. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  238. // prompt_tokens is still logged as total input, but cache creation is billed separately.
  239. // quota = (2604 - 100) + 100*1.25 + 383 = 3012
  240. require.Equal(t, 2604, summary.PromptTokens)
  241. require.Equal(t, 3012, summary.Quota)
  242. }
  243. func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {
  244. gin.SetMode(gin.TestMode)
  245. w := httptest.NewRecorder()
  246. ctx, _ := gin.CreateTestContext(w)
  247. relayInfo := &relaycommon.RelayInfo{
  248. FinalRequestRelayFormat: types.RelayFormatClaude,
  249. OriginModelName: "anthropic/claude-3.7-sonnet",
  250. ChannelMeta: &relaycommon.ChannelMeta{
  251. ChannelType: constant.ChannelTypeOpenRouter,
  252. },
  253. PriceData: types.PriceData{
  254. ModelRatio: 1,
  255. CompletionRatio: 1,
  256. CacheRatio: 0.1,
  257. CacheCreationRatio: 1.25,
  258. GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
  259. },
  260. StartTime: time.Now(),
  261. }
  262. usage := &dto.Usage{
  263. PromptTokens: 2604,
  264. CompletionTokens: 383,
  265. PromptTokensDetails: dto.InputTokenDetails{
  266. CachedTokens: 2432,
  267. },
  268. }
  269. summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
  270. // Pre-PR PostClaudeConsumeQuota behavior for OpenRouter:
  271. // prompt = 2604 - 2432 = 172
  272. // quota = 172 + 2432*0.1 + 383 = 798.2 => 798
  273. require.True(t, summary.IsClaudeUsageSemantic)
  274. require.Equal(t, 172, summary.PromptTokens)
  275. require.Equal(t, 798, summary.Quota)
  276. }