tiered_settle.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. package service
  2. import (
  3. "github.com/QuantumNous/new-api/dto"
  4. "github.com/QuantumNous/new-api/pkg/billingexpr"
  5. relaycommon "github.com/QuantumNous/new-api/relay/common"
  6. )
  7. // TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
  8. type TieredResultWrapper = billingexpr.TieredResult
  9. // BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
  10. // normalizing P and C so they mean "tokens not separately priced by the
  11. // expression". Sub-categories (cache, image, audio) are only subtracted
  12. // when the expression references them via their own variable.
  13. //
  14. // GPT-format APIs report prompt_tokens / completion_tokens as totals that
  15. // include all sub-categories (cache, image, audio). Claude-format APIs
  16. // report them as text-only. This function normalizes to text-only when
  17. // sub-categories are separately priced.
  18. func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
  19. p := float64(usage.PromptTokens)
  20. c := float64(usage.CompletionTokens)
  21. cr := float64(usage.PromptTokensDetails.CachedTokens)
  22. cc5m := float64(usage.PromptTokensDetails.CachedCreationTokens)
  23. cc1h := float64(0)
  24. if usage.UsageSemantic == "anthropic" {
  25. cc1h = float64(usage.ClaudeCacheCreation1hTokens)
  26. cc5m = float64(usage.ClaudeCacheCreation5mTokens)
  27. }
  28. img := float64(usage.PromptTokensDetails.ImageTokens)
  29. ai := float64(usage.PromptTokensDetails.AudioTokens)
  30. imgO := float64(usage.CompletionTokenDetails.ImageTokens)
  31. ao := float64(usage.CompletionTokenDetails.AudioTokens)
  32. if !isClaudeUsageSemantic {
  33. if usedVars["cr"] {
  34. p -= cr
  35. }
  36. if usedVars["cc"] {
  37. p -= cc5m
  38. }
  39. if usedVars["cc1h"] {
  40. p -= cc1h
  41. }
  42. if usedVars["img"] {
  43. p -= img
  44. }
  45. if usedVars["ai"] {
  46. p -= ai
  47. }
  48. if usedVars["img_o"] {
  49. c -= imgO
  50. }
  51. if usedVars["ao"] {
  52. c -= ao
  53. }
  54. }
  55. if p < 0 {
  56. p = 0
  57. }
  58. if c < 0 {
  59. c = 0
  60. }
  61. return billingexpr.TokenParams{
  62. P: p,
  63. C: c,
  64. CR: cr,
  65. CC: cc5m,
  66. CC1h: cc1h,
  67. Img: img,
  68. ImgO: imgO,
  69. AI: ai,
  70. AO: ao,
  71. }
  72. }
  73. // TryTieredSettle checks if the request uses tiered_expr billing and, if so,
  74. // computes the actual quota using the frozen BillingSnapshot. Returns:
  75. // - ok=true, quota, result when tiered billing applies
  76. // - ok=false, 0, nil when it doesn't (caller should fall through to existing logic)
  77. func TryTieredSettle(relayInfo *relaycommon.RelayInfo, params billingexpr.TokenParams) (ok bool, quota int, result *billingexpr.TieredResult) {
  78. snap := relayInfo.TieredBillingSnapshot
  79. if snap == nil || snap.BillingMode != "tiered_expr" {
  80. return false, 0, nil
  81. }
  82. requestInput := billingexpr.RequestInput{}
  83. if relayInfo.BillingRequestInput != nil {
  84. requestInput = *relayInfo.BillingRequestInput
  85. }
  86. tr, err := billingexpr.ComputeTieredQuotaWithRequest(snap, params, requestInput)
  87. if err != nil {
  88. quota = relayInfo.FinalPreConsumedQuota
  89. if quota <= 0 {
  90. quota = snap.EstimatedQuotaAfterGroup
  91. }
  92. return true, quota, nil
  93. }
  94. return true, tr.ActualQuotaAfterGroup, &tr
  95. }