tiered_settle_test.go 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589
  1. package service
  2. import (
  3. "math"
  4. "testing"
  5. "github.com/QuantumNous/new-api/dto"
  6. "github.com/QuantumNous/new-api/pkg/billingexpr"
  7. relaycommon "github.com/QuantumNous/new-api/relay/common"
  8. )
  9. // Claude Sonnet-style tiered expression: standard vs long-context
  10. const sonnetTieredExpr = `p <= 200000 ? tier("standard", p * 1.5 + c * 7.5) : tier("long_context", p * 3 + c * 11.25)`
  11. // Simple flat expression
  12. const flatExpr = `tier("default", p * 2 + c * 10)`
  13. // Expression with cache tokens
  14. const cacheExpr = `tier("default", p * 2 + c * 10 + cr * 0.2 + cc * 2.5 + cc1h * 4)`
  15. // Expression with request probes
  16. const probeExpr = `param("service_tier") == "fast" ? tier("fast", p * 4 + c * 20) : tier("normal", p * 2 + c * 10)`
  17. const testQuotaPerUnit = 500_000.0
  18. func makeSnapshot(expr string, groupRatio float64, estPrompt, estCompletion int) *billingexpr.BillingSnapshot {
  19. return &billingexpr.BillingSnapshot{
  20. BillingMode: "tiered_expr",
  21. ExprString: expr,
  22. ExprHash: billingexpr.ExprHashString(expr),
  23. GroupRatio: groupRatio,
  24. EstimatedPromptTokens: estPrompt,
  25. EstimatedCompletionTokens: estCompletion,
  26. QuotaPerUnit: testQuotaPerUnit,
  27. }
  28. }
  29. func makeRelayInfo(expr string, groupRatio float64, estPrompt, estCompletion int) *relaycommon.RelayInfo {
  30. snap := makeSnapshot(expr, groupRatio, estPrompt, estCompletion)
  31. cost, trace, _ := billingexpr.RunExpr(expr, billingexpr.TokenParams{P: float64(estPrompt), C: float64(estCompletion)})
  32. quotaBeforeGroup := cost / 1_000_000 * testQuotaPerUnit
  33. snap.EstimatedQuotaBeforeGroup = quotaBeforeGroup
  34. snap.EstimatedQuotaAfterGroup = billingexpr.QuotaRound(quotaBeforeGroup * groupRatio)
  35. snap.EstimatedTier = trace.MatchedTier
  36. return &relaycommon.RelayInfo{
  37. TieredBillingSnapshot: snap,
  38. FinalPreConsumedQuota: snap.EstimatedQuotaAfterGroup,
  39. }
  40. }
  41. // ---------------------------------------------------------------------------
  42. // Existing tests (preserved)
  43. // ---------------------------------------------------------------------------
  44. func TestTryTieredSettleUsesFrozenRequestInput(t *testing.T) {
  45. exprStr := `param("service_tier") == "fast" ? tier("fast", p * 2) : tier("normal", p)`
  46. relayInfo := &relaycommon.RelayInfo{
  47. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  48. BillingMode: "tiered_expr",
  49. ExprString: exprStr,
  50. ExprHash: billingexpr.ExprHashString(exprStr),
  51. GroupRatio: 1.0,
  52. EstimatedPromptTokens: 100,
  53. EstimatedCompletionTokens: 0,
  54. EstimatedQuotaAfterGroup: 50,
  55. QuotaPerUnit: testQuotaPerUnit,
  56. },
  57. BillingRequestInput: &billingexpr.RequestInput{
  58. Body: []byte(`{"service_tier":"fast"}`),
  59. },
  60. }
  61. ok, quota, result := TryTieredSettle(relayInfo, billingexpr.TokenParams{P: 100})
  62. if !ok {
  63. t.Fatal("expected tiered settle to apply")
  64. }
  65. // fast: p*2 = 200; quota = 200 / 1M * 500K = 100
  66. if quota != 100 {
  67. t.Fatalf("quota = %d, want 100", quota)
  68. }
  69. if result == nil || result.MatchedTier != "fast" {
  70. t.Fatalf("matched tier = %v, want fast", result)
  71. }
  72. }
  73. func TestTryTieredSettleFallsBackToFrozenPreConsumeOnExprError(t *testing.T) {
  74. relayInfo := &relaycommon.RelayInfo{
  75. FinalPreConsumedQuota: 321,
  76. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  77. BillingMode: "tiered_expr",
  78. ExprString: `invalid +-+ expr`,
  79. ExprHash: billingexpr.ExprHashString(`invalid +-+ expr`),
  80. GroupRatio: 1.0,
  81. EstimatedQuotaAfterGroup: 123,
  82. },
  83. }
  84. ok, quota, result := TryTieredSettle(relayInfo, billingexpr.TokenParams{P: 100})
  85. if !ok {
  86. t.Fatal("expected tiered settle to apply")
  87. }
  88. if quota != 321 {
  89. t.Fatalf("quota = %d, want 321", quota)
  90. }
  91. if result != nil {
  92. t.Fatalf("result = %#v, want nil", result)
  93. }
  94. }
  95. // ---------------------------------------------------------------------------
  96. // Pre-consume vs Post-consume consistency
  97. // ---------------------------------------------------------------------------
  98. func TestTryTieredSettle_PreConsumeMatchesPostConsume(t *testing.T) {
  99. info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
  100. params := billingexpr.TokenParams{P: 1000, C: 500}
  101. ok, quota, _ := TryTieredSettle(info, params)
  102. if !ok {
  103. t.Fatal("expected tiered settle")
  104. }
  105. // p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
  106. if quota != 3500 {
  107. t.Fatalf("quota = %d, want 3500", quota)
  108. }
  109. if quota != info.FinalPreConsumedQuota {
  110. t.Fatalf("pre-consume %d != post-consume %d", info.FinalPreConsumedQuota, quota)
  111. }
  112. }
  113. func TestTryTieredSettle_PostConsumeOverPreConsume(t *testing.T) {
  114. info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
  115. preConsumed := info.FinalPreConsumedQuota // 3500
  116. // Actual usage is higher than estimated
  117. params := billingexpr.TokenParams{P: 2000, C: 1000}
  118. ok, quota, _ := TryTieredSettle(info, params)
  119. if !ok {
  120. t.Fatal("expected tiered settle")
  121. }
  122. // p*2 + c*10 = 14000; quota = 14000 / 1M * 500K = 7000
  123. if quota != 7000 {
  124. t.Fatalf("quota = %d, want 7000", quota)
  125. }
  126. if quota <= preConsumed {
  127. t.Fatalf("expected supplement: actual %d should > pre-consumed %d", quota, preConsumed)
  128. }
  129. }
  130. func TestTryTieredSettle_PostConsumeUnderPreConsume(t *testing.T) {
  131. info := makeRelayInfo(flatExpr, 1.0, 1000, 500)
  132. preConsumed := info.FinalPreConsumedQuota // 3500
  133. // Actual usage is lower than estimated
  134. params := billingexpr.TokenParams{P: 100, C: 50}
  135. ok, quota, _ := TryTieredSettle(info, params)
  136. if !ok {
  137. t.Fatal("expected tiered settle")
  138. }
  139. // p*2 + c*10 = 700; quota = 700 / 1M * 500K = 350
  140. if quota != 350 {
  141. t.Fatalf("quota = %d, want 350", quota)
  142. }
  143. if quota >= preConsumed {
  144. t.Fatalf("expected refund: actual %d should < pre-consumed %d", quota, preConsumed)
  145. }
  146. }
  147. // ---------------------------------------------------------------------------
  148. // Tiered boundary conditions
  149. // ---------------------------------------------------------------------------
  150. func TestTryTieredSettle_ExactBoundary(t *testing.T) {
  151. info := makeRelayInfo(sonnetTieredExpr, 1.0, 200000, 1000)
  152. // p == 200000 => standard tier (p <= 200000)
  153. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 200000, C: 1000})
  154. if !ok {
  155. t.Fatal("expected tiered settle")
  156. }
  157. // standard: p*1.5 + c*7.5 = 307500; quota = 307500 / 1M * 500K = 153750
  158. if quota != 153750 {
  159. t.Fatalf("quota = %d, want 153750", quota)
  160. }
  161. if result.MatchedTier != "standard" {
  162. t.Fatalf("tier = %s, want standard", result.MatchedTier)
  163. }
  164. }
  165. func TestTryTieredSettle_BoundaryPlusOne(t *testing.T) {
  166. info := makeRelayInfo(sonnetTieredExpr, 1.0, 200000, 1000)
  167. // p == 200001 => crosses to long_context tier
  168. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 200001, C: 1000})
  169. if !ok {
  170. t.Fatal("expected tiered settle")
  171. }
  172. // long_context: p*3 + c*11.25 = 611253; quota = round(611253 / 1M * 500K) = 305627
  173. if quota != 305627 {
  174. t.Fatalf("quota = %d, want 305627", quota)
  175. }
  176. if result.MatchedTier != "long_context" {
  177. t.Fatalf("tier = %s, want long_context", result.MatchedTier)
  178. }
  179. if !result.CrossedTier {
  180. t.Fatal("expected CrossedTier = true")
  181. }
  182. }
  183. func TestTryTieredSettle_ZeroTokens(t *testing.T) {
  184. info := makeRelayInfo(flatExpr, 1.0, 0, 0)
  185. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 0, C: 0})
  186. if !ok {
  187. t.Fatal("expected tiered settle")
  188. }
  189. if quota != 0 {
  190. t.Fatalf("quota = %d, want 0", quota)
  191. }
  192. if result == nil {
  193. t.Fatal("result should not be nil")
  194. }
  195. }
  196. func TestTryTieredSettle_HugeTokens(t *testing.T) {
  197. info := makeRelayInfo(flatExpr, 1.0, 10000000, 5000000)
  198. ok, quota, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 10000000, C: 5000000})
  199. if !ok {
  200. t.Fatal("expected tiered settle")
  201. }
  202. // p*2 + c*10 = 70000000; quota = 70000000 / 1M * 500K = 35000000
  203. if quota != 35000000 {
  204. t.Fatalf("quota = %d, want 35000000", quota)
  205. }
  206. }
  207. func TestTryTieredSettle_CacheTokensAffectSettlement(t *testing.T) {
  208. info := makeRelayInfo(cacheExpr, 1.0, 1000, 500)
  209. // Without cache tokens
  210. ok1, quota1, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  211. if !ok1 {
  212. t.Fatal("expected tiered settle")
  213. }
  214. // p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
  215. // With cache tokens
  216. ok2, quota2, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500, CR: 10000, CC: 5000, CC1h: 2000})
  217. if !ok2 {
  218. t.Fatal("expected tiered settle")
  219. }
  220. // 2000 + 5000 + 2000 + 12500 + 8000 = 29500; quota = 29500 / 1M * 500K = 14750
  221. if quota2 <= quota1 {
  222. t.Fatalf("cache tokens should increase quota: without=%d, with=%d", quota1, quota2)
  223. }
  224. if quota1 != 3500 {
  225. t.Fatalf("no-cache quota = %d, want 3500", quota1)
  226. }
  227. if quota2 != 14750 {
  228. t.Fatalf("cache quota = %d, want 14750", quota2)
  229. }
  230. }
  231. // ---------------------------------------------------------------------------
  232. // Request probe tests
  233. // ---------------------------------------------------------------------------
  234. func TestTryTieredSettle_RequestProbeInfluencesBilling(t *testing.T) {
  235. info := makeRelayInfo(probeExpr, 1.0, 1000, 500)
  236. info.BillingRequestInput = &billingexpr.RequestInput{
  237. Body: []byte(`{"service_tier":"fast"}`),
  238. }
  239. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  240. if !ok {
  241. t.Fatal("expected tiered settle")
  242. }
  243. // fast: p*4 + c*20 = 14000; quota = 14000 / 1M * 500K = 7000
  244. if quota != 7000 {
  245. t.Fatalf("quota = %d, want 7000", quota)
  246. }
  247. if result.MatchedTier != "fast" {
  248. t.Fatalf("tier = %s, want fast", result.MatchedTier)
  249. }
  250. }
  251. func TestTryTieredSettle_NoRequestInput_FallsBackToDefault(t *testing.T) {
  252. info := makeRelayInfo(probeExpr, 1.0, 1000, 500)
  253. // No BillingRequestInput set — param("service_tier") returns nil, not "fast"
  254. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  255. if !ok {
  256. t.Fatal("expected tiered settle")
  257. }
  258. // normal: p*2 + c*10 = 7000; quota = 7000 / 1M * 500K = 3500
  259. if quota != 3500 {
  260. t.Fatalf("quota = %d, want 3500", quota)
  261. }
  262. if result.MatchedTier != "normal" {
  263. t.Fatalf("tier = %s, want normal", result.MatchedTier)
  264. }
  265. }
  266. // ---------------------------------------------------------------------------
  267. // Group ratio tests
  268. // ---------------------------------------------------------------------------
  269. func TestTryTieredSettle_GroupRatioScaling(t *testing.T) {
  270. info := makeRelayInfo(flatExpr, 1.5, 1000, 500)
  271. ok, quota, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  272. if !ok {
  273. t.Fatal("expected tiered settle")
  274. }
  275. // exprCost = 7000, quotaBeforeGroup = 3500, afterGroup = round(3500 * 1.5) = 5250
  276. if quota != 5250 {
  277. t.Fatalf("quota = %d, want 5250", quota)
  278. }
  279. }
  280. func TestTryTieredSettle_GroupRatioZero(t *testing.T) {
  281. info := makeRelayInfo(flatExpr, 0, 1000, 500)
  282. ok, quota, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  283. if !ok {
  284. t.Fatal("expected tiered settle")
  285. }
  286. if quota != 0 {
  287. t.Fatalf("quota = %d, want 0 (group ratio = 0)", quota)
  288. }
  289. }
  290. // ---------------------------------------------------------------------------
  291. // Ratio mode (negative tests) — TryTieredSettle must return false
  292. // ---------------------------------------------------------------------------
  293. func TestTryTieredSettle_RatioMode_NilSnapshot(t *testing.T) {
  294. info := &relaycommon.RelayInfo{
  295. TieredBillingSnapshot: nil,
  296. }
  297. ok, _, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  298. if ok {
  299. t.Fatal("expected TryTieredSettle to return false when snapshot is nil")
  300. }
  301. }
  302. func TestTryTieredSettle_RatioMode_WrongBillingMode(t *testing.T) {
  303. info := &relaycommon.RelayInfo{
  304. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  305. BillingMode: "ratio",
  306. ExprString: flatExpr,
  307. ExprHash: billingexpr.ExprHashString(flatExpr),
  308. GroupRatio: 1.0,
  309. },
  310. }
  311. ok, _, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  312. if ok {
  313. t.Fatal("expected TryTieredSettle to return false for ratio billing mode")
  314. }
  315. }
  316. func TestTryTieredSettle_RatioMode_EmptyBillingMode(t *testing.T) {
  317. info := &relaycommon.RelayInfo{
  318. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  319. BillingMode: "",
  320. ExprString: flatExpr,
  321. ExprHash: billingexpr.ExprHashString(flatExpr),
  322. GroupRatio: 1.0,
  323. },
  324. }
  325. ok, _, _ := TryTieredSettle(info, billingexpr.TokenParams{P: 1000, C: 500})
  326. if ok {
  327. t.Fatal("expected TryTieredSettle to return false for empty billing mode")
  328. }
  329. }
  330. // ---------------------------------------------------------------------------
  331. // Fallback tests
  332. // ---------------------------------------------------------------------------
  333. func TestTryTieredSettle_ErrorFallbackToEstimatedQuotaAfterGroup(t *testing.T) {
  334. info := &relaycommon.RelayInfo{
  335. FinalPreConsumedQuota: 0,
  336. TieredBillingSnapshot: &billingexpr.BillingSnapshot{
  337. BillingMode: "tiered_expr",
  338. ExprString: `invalid expr!!!`,
  339. ExprHash: billingexpr.ExprHashString(`invalid expr!!!`),
  340. GroupRatio: 1.0,
  341. EstimatedQuotaAfterGroup: 999,
  342. },
  343. }
  344. ok, quota, result := TryTieredSettle(info, billingexpr.TokenParams{P: 100})
  345. if !ok {
  346. t.Fatal("expected tiered settle to apply")
  347. }
  348. // FinalPreConsumedQuota is 0, should fall back to EstimatedQuotaAfterGroup
  349. if quota != 999 {
  350. t.Fatalf("quota = %d, want 999", quota)
  351. }
  352. if result != nil {
  353. t.Fatal("result should be nil on error fallback")
  354. }
  355. }
  356. // ---------------------------------------------------------------------------
  357. // BuildTieredTokenParams: token normalization and ratio parity tests
  358. // ---------------------------------------------------------------------------
  359. func tieredQuota(exprStr string, usage *dto.Usage, isClaudeSemantic bool, groupRatio float64) float64 {
  360. usedVars := billingexpr.UsedVars(exprStr)
  361. params := BuildTieredTokenParams(usage, isClaudeSemantic, usedVars)
  362. cost, _, _ := billingexpr.RunExpr(exprStr, params)
  363. return cost / 1_000_000 * testQuotaPerUnit * groupRatio
  364. }
  365. func ratioQuota(usage *dto.Usage, isClaudeSemantic bool, modelRatio, completionRatio, cacheRatio, imageRatio, groupRatio float64) float64 {
  366. baseTokens := float64(usage.PromptTokens)
  367. cacheTokens := float64(usage.PromptTokensDetails.CachedTokens)
  368. ccTokens := float64(usage.PromptTokensDetails.CachedCreationTokens)
  369. imgTokens := float64(usage.PromptTokensDetails.ImageTokens)
  370. if !isClaudeSemantic {
  371. baseTokens -= cacheTokens
  372. baseTokens -= ccTokens
  373. baseTokens -= imgTokens
  374. }
  375. promptQuota := baseTokens + cacheTokens*cacheRatio + imgTokens*imageRatio
  376. completionQuota := float64(usage.CompletionTokens) * completionRatio
  377. return (promptQuota + completionQuota) * modelRatio * groupRatio
  378. }
  379. func TestBuildTieredTokenParams_GPT_WithCache(t *testing.T) {
  380. usage := &dto.Usage{
  381. PromptTokens: 1000,
  382. CompletionTokens: 500,
  383. PromptTokensDetails: dto.InputTokenDetails{
  384. CachedTokens: 200,
  385. TextTokens: 800,
  386. },
  387. }
  388. expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
  389. got := tieredQuota(expr, usage, false, 1.0)
  390. // P=800, C=500, CR=200 → (800*2.5 + 500*15 + 200*0.25) * 0.5 = 4775
  391. want := 4775.0
  392. if math.Abs(got-want) > 0.01 {
  393. t.Fatalf("quota = %f, want %f", got, want)
  394. }
  395. }
  396. func TestBuildTieredTokenParams_GPT_NoCacheVar(t *testing.T) {
  397. usage := &dto.Usage{
  398. PromptTokens: 1000,
  399. CompletionTokens: 500,
  400. PromptTokensDetails: dto.InputTokenDetails{
  401. CachedTokens: 200,
  402. TextTokens: 800,
  403. },
  404. }
  405. expr := `tier("base", p * 2.5 + c * 15)`
  406. got := tieredQuota(expr, usage, false, 1.0)
  407. // No cr → P=1000 (cache stays in P), C=500 → (1000*2.5 + 500*15) * 0.5 = 5000
  408. want := 5000.0
  409. if math.Abs(got-want) > 0.01 {
  410. t.Fatalf("quota = %f, want %f", got, want)
  411. }
  412. }
  413. func TestBuildTieredTokenParams_GPT_WithImage(t *testing.T) {
  414. usage := &dto.Usage{
  415. PromptTokens: 1000,
  416. CompletionTokens: 500,
  417. PromptTokensDetails: dto.InputTokenDetails{
  418. ImageTokens: 200,
  419. TextTokens: 800,
  420. },
  421. }
  422. expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
  423. got := tieredQuota(expr, usage, false, 1.0)
  424. // P=800, C=500, Img=200 → (800*2 + 500*8 + 200*2.5) * 0.5 = 3050
  425. want := 3050.0
  426. if math.Abs(got-want) > 0.01 {
  427. t.Fatalf("quota = %f, want %f", got, want)
  428. }
  429. }
  430. func TestBuildTieredTokenParams_Claude_WithCache(t *testing.T) {
  431. usage := &dto.Usage{
  432. PromptTokens: 800,
  433. CompletionTokens: 500,
  434. PromptTokensDetails: dto.InputTokenDetails{
  435. CachedTokens: 200,
  436. TextTokens: 800,
  437. },
  438. }
  439. expr := `tier("base", p * 3 + c * 15 + cr * 0.3)`
  440. got := tieredQuota(expr, usage, true, 1.0)
  441. // Claude: P=800 (no subtraction), C=500, CR=200 → (800*3 + 500*15 + 200*0.3) * 0.5 = 4980
  442. want := 4980.0
  443. if math.Abs(got-want) > 0.01 {
  444. t.Fatalf("quota = %f, want %f", got, want)
  445. }
  446. }
  447. func TestBuildTieredTokenParams_GPT_AudioOutput(t *testing.T) {
  448. usage := &dto.Usage{
  449. PromptTokens: 1000,
  450. CompletionTokens: 600,
  451. CompletionTokenDetails: dto.OutputTokenDetails{
  452. AudioTokens: 100,
  453. TextTokens: 500,
  454. },
  455. }
  456. expr := `tier("base", p * 2 + c * 10 + ao * 50)`
  457. got := tieredQuota(expr, usage, false, 1.0)
  458. // C=600-100=500, AO=100 → (1000*2 + 500*10 + 100*50) * 0.5 = 6000
  459. want := 6000.0
  460. if math.Abs(got-want) > 0.01 {
  461. t.Fatalf("quota = %f, want %f", got, want)
  462. }
  463. }
  464. func TestBuildTieredTokenParams_GPT_AudioOutputNoVar(t *testing.T) {
  465. usage := &dto.Usage{
  466. PromptTokens: 1000,
  467. CompletionTokens: 600,
  468. CompletionTokenDetails: dto.OutputTokenDetails{
  469. AudioTokens: 100,
  470. TextTokens: 500,
  471. },
  472. }
  473. expr := `tier("base", p * 2 + c * 10)`
  474. got := tieredQuota(expr, usage, false, 1.0)
  475. // No ao → C=600 (audio stays in C) → (1000*2 + 600*10) * 0.5 = 4000
  476. want := 4000.0
  477. if math.Abs(got-want) > 0.01 {
  478. t.Fatalf("quota = %f, want %f", got, want)
  479. }
  480. }
  481. func TestBuildTieredTokenParams_ParityWithRatio(t *testing.T) {
  482. // GPT-5.4 prices: input=$2.5, output=$15, cacheRead=$0.25
  483. // Ratio equivalents: modelRatio=1.25, completionRatio=6, cacheRatio=0.1
  484. usage := &dto.Usage{
  485. PromptTokens: 10000,
  486. CompletionTokens: 2000,
  487. PromptTokensDetails: dto.InputTokenDetails{
  488. CachedTokens: 3000,
  489. TextTokens: 7000,
  490. },
  491. }
  492. expr := `tier("base", p * 2.5 + c * 15 + cr * 0.25)`
  493. for _, gr := range []float64{1.0, 1.5, 2.0, 0.5} {
  494. tq := tieredQuota(expr, usage, false, gr)
  495. rq := ratioQuota(usage, false, 1.25, 6, 0.1, 0, gr)
  496. if math.Abs(tq-rq) > 0.01 {
  497. t.Fatalf("groupRatio=%v: tiered=%f ratio=%f (mismatch)", gr, tq, rq)
  498. }
  499. }
  500. }
  501. func TestBuildTieredTokenParams_ParityWithRatio_Image(t *testing.T) {
  502. // gpt-image-1-mini prices: input=$2, output=$8, image=$2.5
  503. // Ratio equivalents: modelRatio=1, completionRatio=4, imageRatio=1.25
  504. usage := &dto.Usage{
  505. PromptTokens: 5000,
  506. CompletionTokens: 4000,
  507. PromptTokensDetails: dto.InputTokenDetails{
  508. ImageTokens: 1000,
  509. TextTokens: 4000,
  510. },
  511. }
  512. expr := `tier("base", p * 2 + c * 8 + img * 2.5)`
  513. tq := tieredQuota(expr, usage, false, 1.0)
  514. rq := ratioQuota(usage, false, 1.0, 4, 0, 1.25, 1.0)
  515. if math.Abs(tq-rq) > 0.01 {
  516. t.Fatalf("tiered=%f ratio=%f (mismatch)", tq, rq)
  517. }
  518. }