tiered_settle.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. package service
  2. import (
  3. "github.com/QuantumNous/new-api/dto"
  4. "github.com/QuantumNous/new-api/pkg/billingexpr"
  5. relaycommon "github.com/QuantumNous/new-api/relay/common"
  6. )
  7. // TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
  8. type TieredResultWrapper = billingexpr.TieredResult
  9. // BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
  10. // normalizing P and C so they mean "tokens not separately priced by the
  11. // expression". Sub-categories (cache, image, audio) are only subtracted
  12. // when the expression references them via their own variable.
  13. //
  14. // GPT-format APIs report prompt_tokens / completion_tokens as totals that
  15. // include all sub-categories (cache, image, audio). Claude-format APIs
  16. // report them as text-only. This function normalizes to text-only when
  17. // sub-categories are separately priced.
  18. func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
  19. p := float64(usage.PromptTokens)
  20. c := float64(usage.CompletionTokens)
  21. cr := float64(usage.PromptTokensDetails.CachedTokens)
  22. cc5m := float64(usage.PromptTokensDetails.CachedCreationTokens)
  23. cc1h := float64(0)
  24. if usage.UsageSemantic == "anthropic" {
  25. cc1h = float64(usage.ClaudeCacheCreation1hTokens)
  26. cc5m = float64(usage.ClaudeCacheCreation5mTokens)
  27. }
  28. img := float64(usage.PromptTokensDetails.ImageTokens)
  29. ai := float64(usage.PromptTokensDetails.AudioTokens)
  30. imgO := float64(usage.CompletionTokenDetails.ImageTokens)
  31. ao := float64(usage.CompletionTokenDetails.AudioTokens)
  32. // len = total input context length for tier condition evaluation.
  33. // Non-Claude: prompt_tokens already includes everything.
  34. // Claude: input_tokens is text-only, so add cache read + cache creation.
  35. inputLen := p
  36. if isClaudeUsageSemantic {
  37. inputLen = p + cr + cc5m + cc1h
  38. }
  39. if !isClaudeUsageSemantic {
  40. if usedVars["cr"] {
  41. p -= cr
  42. }
  43. if usedVars["cc"] {
  44. p -= cc5m
  45. }
  46. if usedVars["cc1h"] {
  47. p -= cc1h
  48. }
  49. if usedVars["img"] {
  50. p -= img
  51. }
  52. if usedVars["ai"] {
  53. p -= ai
  54. }
  55. if usedVars["img_o"] {
  56. c -= imgO
  57. }
  58. if usedVars["ao"] {
  59. c -= ao
  60. }
  61. }
  62. if p < 0 {
  63. p = 0
  64. }
  65. if c < 0 {
  66. c = 0
  67. }
  68. return billingexpr.TokenParams{
  69. P: p,
  70. C: c,
  71. Len: inputLen,
  72. CR: cr,
  73. CC: cc5m,
  74. CC1h: cc1h,
  75. Img: img,
  76. ImgO: imgO,
  77. AI: ai,
  78. AO: ao,
  79. }
  80. }
  81. // TryTieredSettle checks if the request uses tiered_expr billing and, if so,
  82. // computes the actual quota using the frozen BillingSnapshot. Returns:
  83. // - ok=true, quota, result when tiered billing applies
  84. // - ok=false, 0, nil when it doesn't (caller should fall through to existing logic)
  85. func TryTieredSettle(relayInfo *relaycommon.RelayInfo, params billingexpr.TokenParams) (ok bool, quota int, result *billingexpr.TieredResult) {
  86. snap := relayInfo.TieredBillingSnapshot
  87. if snap == nil || snap.BillingMode != "tiered_expr" {
  88. return false, 0, nil
  89. }
  90. requestInput := billingexpr.RequestInput{}
  91. if relayInfo.BillingRequestInput != nil {
  92. requestInput = *relayInfo.BillingRequestInput
  93. }
  94. tr, err := billingexpr.ComputeTieredQuotaWithRequest(snap, params, requestInput)
  95. if err != nil {
  96. quota = relayInfo.FinalPreConsumedQuota
  97. if quota <= 0 {
  98. quota = snap.EstimatedQuotaAfterGroup
  99. }
  100. return true, quota, nil
  101. }
  102. return true, tr.ActualQuotaAfterGroup, &tr
  103. }