tiered_settle.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. package service
  2. import (
  3. "github.com/QuantumNous/new-api/dto"
  4. "github.com/QuantumNous/new-api/pkg/billingexpr"
  5. relaycommon "github.com/QuantumNous/new-api/relay/common"
  6. )
  7. // TieredResultWrapper wraps billingexpr.TieredResult for use at the service layer.
  8. type TieredResultWrapper = billingexpr.TieredResult
  9. // BuildTieredTokenParams constructs billingexpr.TokenParams from a dto.Usage,
  10. // normalizing P and C so they mean "tokens not separately priced by the
  11. // expression". Sub-categories (cache, image, audio) are only subtracted
  12. // when the expression references them via their own variable.
  13. //
  14. // GPT-format APIs report prompt_tokens / completion_tokens as totals that
  15. // include all sub-categories (cache, image, audio). Claude-format APIs
  16. // report them as text-only. This function normalizes to text-only when
  17. // sub-categories are separately priced.
  18. func BuildTieredTokenParams(usage *dto.Usage, isClaudeUsageSemantic bool, usedVars map[string]bool) billingexpr.TokenParams {
  19. p := float64(usage.PromptTokens)
  20. c := float64(usage.CompletionTokens)
  21. cr := float64(usage.PromptTokensDetails.CachedTokens)
  22. ccTotal := float64(usage.PromptTokensDetails.CachedCreationTokens)
  23. cc1h := float64(usage.ClaudeCacheCreation1hTokens)
  24. img := float64(usage.PromptTokensDetails.ImageTokens)
  25. ai := float64(usage.PromptTokensDetails.AudioTokens)
  26. ao := float64(usage.CompletionTokenDetails.AudioTokens)
  27. if !isClaudeUsageSemantic {
  28. if usedVars["cr"] || usedVars["cache_read_tokens"] {
  29. p -= cr
  30. }
  31. if usedVars["cc"] || usedVars["cc1h"] || usedVars["cache_create_tokens"] || usedVars["cache_create_1h_tokens"] {
  32. p -= ccTotal
  33. }
  34. if usedVars["img"] || usedVars["image_tokens"] {
  35. p -= img
  36. }
  37. if usedVars["ai"] || usedVars["audio_input_tokens"] {
  38. p -= ai
  39. }
  40. if usedVars["ao"] || usedVars["audio_output_tokens"] {
  41. c -= ao
  42. }
  43. }
  44. if p < 0 {
  45. p = 0
  46. }
  47. if c < 0 {
  48. c = 0
  49. }
  50. return billingexpr.TokenParams{
  51. P: p,
  52. C: c,
  53. CR: cr,
  54. CC: ccTotal - cc1h,
  55. CC1h: cc1h,
  56. Img: img,
  57. AI: ai,
  58. AO: ao,
  59. }
  60. }
  61. // TryTieredSettle checks if the request uses tiered_expr billing and, if so,
  62. // computes the actual quota using the frozen BillingSnapshot. Returns:
  63. // - ok=true, quota, result when tiered billing applies
  64. // - ok=false, 0, nil when it doesn't (caller should fall through to existing logic)
  65. func TryTieredSettle(relayInfo *relaycommon.RelayInfo, params billingexpr.TokenParams) (ok bool, quota int, result *billingexpr.TieredResult) {
  66. snap := relayInfo.TieredBillingSnapshot
  67. if snap == nil || snap.BillingMode != "tiered_expr" {
  68. return false, 0, nil
  69. }
  70. requestInput := billingexpr.RequestInput{}
  71. if relayInfo.BillingRequestInput != nil {
  72. requestInput = *relayInfo.BillingRequestInput
  73. }
  74. tr, err := billingexpr.ComputeTieredQuotaWithRequest(snap, params, requestInput)
  75. if err != nil {
  76. quota = relayInfo.FinalPreConsumedQuota
  77. if quota <= 0 {
  78. quota = snap.EstimatedQuotaAfterGroup
  79. }
  80. return true, quota, nil
  81. }
  82. return true, tr.ActualQuotaAfterGroup, &tr
  83. }