1 mesiac pred · 0191a68d4e
--- a/dto/openai_response.go
+++ b/dto/openai_response.go
@@ -220,10 +220,12 @@ type CompletionsStreamResponse struct {
 
				 }
			
 
				 
			
 
				 type Usage struct {
			
 
				-	PromptTokens         int `json:"prompt_tokens"`
			
 
				-	CompletionTokens     int `json:"completion_tokens"`
			
 
				-	TotalTokens          int `json:"total_tokens"`
			
 
				-	PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
			
 
				+	PromptTokens         int    `json:"prompt_tokens"`
			
 
				+	CompletionTokens     int    `json:"completion_tokens"`
			
 
				+	TotalTokens          int    `json:"total_tokens"`
			
 
				+	PromptCacheHitTokens int    `json:"prompt_cache_hit_tokens,omitempty"`
			
 
				+	UsageSemantic        string `json:"usage_semantic,omitempty"`
			
 
				+	UsageSource          string `json:"usage_source,omitempty"`
			
 
				 
			
 
				 	PromptTokensDetails    InputTokenDetails  `json:"prompt_tokens_details"`
			
 
				 	CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
			
@@ -251,7 +253,7 @@ type OpenAIVideoResponse struct {
 
				 
			
 
				 type InputTokenDetails struct {
			
 
				 	CachedTokens         int `json:"cached_tokens"`
			
 
				-	CachedCreationTokens int `json:"-"`
			
 
				+	CachedCreationTokens int `json:"cached_creation_tokens,omitempty"`
			
 
				 	TextTokens           int `json:"text_tokens"`
			
 
				 	AudioTokens          int `json:"audio_tokens"`
			
 
				 	ImageTokens          int `json:"image_tokens"`
			
--- a/relay/audio_handler.go
+++ b/relay/audio_handler.go
@@ -70,7 +70,7 @@ func AudioHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
 
				 	if usage.(*dto.Usage).CompletionTokenDetails.AudioTokens > 0 || usage.(*dto.Usage).PromptTokensDetails.AudioTokens > 0 {
			
 
				 		service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
			
 
				 	} else {
			
 
				-		postConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+		service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
			
 
				 	}
			
 
				 
			
 
				 	return nil
			
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -555,6 +555,35 @@ type ClaudeResponseInfo struct {
 
				 	Done         bool
			
 
				 }
			
 
				 
			
 
				+func cacheCreationTokensForOpenAIUsage(usage *dto.Usage) int {
			
 
				+	if usage == nil {
			
 
				+		return 0
			
 
				+	}
			
 
				+	splitCacheCreationTokens := usage.ClaudeCacheCreation5mTokens + usage.ClaudeCacheCreation1hTokens
			
 
				+	if splitCacheCreationTokens == 0 {
			
 
				+		return usage.PromptTokensDetails.CachedCreationTokens
			
 
				+	}
			
 
				+	if usage.PromptTokensDetails.CachedCreationTokens > splitCacheCreationTokens {
			
 
				+		return usage.PromptTokensDetails.CachedCreationTokens
			
 
				+	}
			
 
				+	return splitCacheCreationTokens
			
 
				+}
			
 
				+
			
 
				+func buildOpenAIStyleUsageFromClaudeUsage(usage *dto.Usage) dto.Usage {
			
 
				+	if usage == nil {
			
 
				+		return dto.Usage{}
			
 
				+	}
			
 
				+	clone := *usage
			
 
				+	cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage)
			
 
				+	totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens
			
 
				+	clone.PromptTokens = totalInputTokens
			
 
				+	clone.InputTokens = totalInputTokens
			
 
				+	clone.TotalTokens = totalInputTokens + usage.CompletionTokens
			
 
				+	clone.UsageSemantic = "openai"
			
 
				+	clone.UsageSource = "anthropic"
			
 
				+	return clone
			
 
				+}
			
 
				+
			
 
				 func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo *ClaudeResponseInfo) *dto.ClaudeUsage {
			
 
				 	usage := &dto.ClaudeUsage{}
			
 
				 	if claudeResponse != nil && claudeResponse.Usage != nil {
			
@@ -643,6 +672,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d
 
				 		// message_start, 获取usage
			
 
				 		if claudeResponse.Message != nil && claudeResponse.Message.Usage != nil {
			
 
				 			claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
			
 
				+			claudeInfo.Usage.UsageSemantic = "anthropic"
			
 
				 			claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
			
 
				 			claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
			
 
				 			claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
			
@@ -661,6 +691,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d
 
				 	} else if claudeResponse.Type == "message_delta" {
			
 
				 		// 最终的usage获取
			
 
				 		if claudeResponse.Usage != nil {
			
 
				+			claudeInfo.Usage.UsageSemantic = "anthropic"
			
 
				 			if claudeResponse.Usage.InputTokens > 0 {
			
 
				 				// 不叠加，只取最新的
			
 
				 				claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
			
@@ -754,12 +785,16 @@ func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, clau
 
				 		}
			
 
				 		claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
			
 
				 	}
			
 
				+	if claudeInfo.Usage != nil {
			
 
				+		claudeInfo.Usage.UsageSemantic = "anthropic"
			
 
				+	}
			
 
				 
			
 
				 	if info.RelayFormat == types.RelayFormatClaude {
			
 
				 		//
			
 
				 	} else if info.RelayFormat == types.RelayFormatOpenAI {
			
 
				 		if info.ShouldIncludeUsage {
			
 
				-			response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, *claudeInfo.Usage)
			
 
				+			openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage)
			
 
				+			response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, openAIUsage)
			
 
				 			err := helper.ObjectData(c, response)
			
 
				 			if err != nil {
			
 
				 				common.SysLog("send final response failed: " + err.Error())
			
@@ -810,6 +845,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
 
				 		claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
			
 
				 		claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
			
 
				 		claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
			
 
				+		claudeInfo.Usage.UsageSemantic = "anthropic"
			
 
				 		claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
			
 
				 		claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
			
 
				 		claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
			
@@ -819,7 +855,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
 
				 	switch info.RelayFormat {
			
 
				 	case types.RelayFormatOpenAI:
			
 
				 		openaiResponse := ResponseClaude2OpenAI(&claudeResponse)
			
 
				-		openaiResponse.Usage = *claudeInfo.Usage
			
 
				+		openaiResponse.Usage = buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage)
			
 
				 		responseData, err = json.Marshal(openaiResponse)
			
 
				 		if err != nil {
			
 
				 			return types.NewError(err, types.ErrorCodeBadResponseBody)
			
--- a/relay/channel/claude/relay_claude_test.go
+++ b/relay/channel/claude/relay_claude_test.go
@@ -173,3 +173,85 @@ func TestFormatClaudeResponseInfo_ContentBlockDelta(t *testing.T) {
 
				 		t.Errorf("ResponseText = %q, want %q", claudeInfo.ResponseText.String(), "hello")
			
 
				 	}
			
 
				 }
			
 
				+
			
 
				+func TestBuildOpenAIStyleUsageFromClaudeUsage(t *testing.T) {
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     100,
			
 
				+		CompletionTokens: 20,
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedTokens:         30,
			
 
				+			CachedCreationTokens: 50,
			
 
				+		},
			
 
				+		ClaudeCacheCreation5mTokens: 10,
			
 
				+		ClaudeCacheCreation1hTokens: 20,
			
 
				+		UsageSemantic:               "anthropic",
			
 
				+	}
			
 
				+
			
 
				+	openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
			
 
				+
			
 
				+	if openAIUsage.PromptTokens != 180 {
			
 
				+		t.Fatalf("PromptTokens = %d, want 180", openAIUsage.PromptTokens)
			
 
				+	}
			
 
				+	if openAIUsage.InputTokens != 180 {
			
 
				+		t.Fatalf("InputTokens = %d, want 180", openAIUsage.InputTokens)
			
 
				+	}
			
 
				+	if openAIUsage.TotalTokens != 200 {
			
 
				+		t.Fatalf("TotalTokens = %d, want 200", openAIUsage.TotalTokens)
			
 
				+	}
			
 
				+	if openAIUsage.UsageSemantic != "openai" {
			
 
				+		t.Fatalf("UsageSemantic = %s, want openai", openAIUsage.UsageSemantic)
			
 
				+	}
			
 
				+	if openAIUsage.UsageSource != "anthropic" {
			
 
				+		t.Fatalf("UsageSource = %s, want anthropic", openAIUsage.UsageSource)
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func TestBuildOpenAIStyleUsageFromClaudeUsagePreservesCacheCreationRemainder(t *testing.T) {
			
 
				+	tests := []struct {
			
 
				+		name                    string
			
 
				+		cachedCreationTokens    int
			
 
				+		cacheCreationTokens5m   int
			
 
				+		cacheCreationTokens1h   int
			
 
				+		expectedTotalInputToken int
			
 
				+	}{
			
 
				+		{
			
 
				+			name:                    "prefers aggregate when it includes remainder",
			
 
				+			cachedCreationTokens:    50,
			
 
				+			cacheCreationTokens5m:   10,
			
 
				+			cacheCreationTokens1h:   20,
			
 
				+			expectedTotalInputToken: 180,
			
 
				+		},
			
 
				+		{
			
 
				+			name:                    "falls back to split tokens when aggregate missing",
			
 
				+			cachedCreationTokens:    0,
			
 
				+			cacheCreationTokens5m:   10,
			
 
				+			cacheCreationTokens1h:   20,
			
 
				+			expectedTotalInputToken: 160,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	for _, tt := range tests {
			
 
				+		t.Run(tt.name, func(t *testing.T) {
			
 
				+			usage := &dto.Usage{
			
 
				+				PromptTokens:     100,
			
 
				+				CompletionTokens: 20,
			
 
				+				PromptTokensDetails: dto.InputTokenDetails{
			
 
				+					CachedTokens:         30,
			
 
				+					CachedCreationTokens: tt.cachedCreationTokens,
			
 
				+				},
			
 
				+				ClaudeCacheCreation5mTokens: tt.cacheCreationTokens5m,
			
 
				+				ClaudeCacheCreation1hTokens: tt.cacheCreationTokens1h,
			
 
				+				UsageSemantic:               "anthropic",
			
 
				+			}
			
 
				+
			
 
				+			openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
			
 
				+
			
 
				+			if openAIUsage.PromptTokens != tt.expectedTotalInputToken {
			
 
				+				t.Fatalf("PromptTokens = %d, want %d", openAIUsage.PromptTokens, tt.expectedTotalInputToken)
			
 
				+			}
			
 
				+			if openAIUsage.InputTokens != tt.expectedTotalInputToken {
			
 
				+				t.Fatalf("InputTokens = %d, want %d", openAIUsage.InputTokens, tt.expectedTotalInputToken)
			
 
				+			}
			
 
				+		})
			
 
				+	}
			
 
				+}
			
--- a/relay/claude_handler.go
+++ b/relay/claude_handler.go
@@ -122,7 +122,7 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 
				 			return newApiErr
			
 
				 		}
			
 
				 
			
 
				-		service.PostClaudeConsumeQuota(c, info, usage)
			
 
				+		service.PostTextConsumeQuota(c, info, usage, nil)
			
 
				 		return nil
			
 
				 	}
			
 
				 
			
@@ -190,6 +190,6 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 
				 		return newAPIError
			
 
				 	}
			
 
				 
			
 
				-	service.PostClaudeConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+	service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
			
 
				 	return nil
			
 
				 }
			
--- a/relay/compatible_handler.go
+++ b/relay/compatible_handler.go
@@ -6,25 +6,20 @@ import (
 
				 	"io"
			
 
				 	"net/http"
			
 
				 	"strings"
			
 
				-	"time"
			
 
				 
			
 
				 	"github.com/QuantumNous/new-api/common"
			
 
				 	"github.com/QuantumNous/new-api/constant"
			
 
				 	"github.com/QuantumNous/new-api/dto"
			
 
				 	"github.com/QuantumNous/new-api/logger"
			
 
				-	"github.com/QuantumNous/new-api/model"
			
 
				 	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				 	relayconstant "github.com/QuantumNous/new-api/relay/constant"
			
 
				 	"github.com/QuantumNous/new-api/relay/helper"
			
 
				 	"github.com/QuantumNous/new-api/service"
			
 
				 	"github.com/QuantumNous/new-api/setting/model_setting"
			
 
				-	"github.com/QuantumNous/new-api/setting/operation_setting"
			
 
				 	"github.com/QuantumNous/new-api/setting/ratio_setting"
			
 
				 	"github.com/QuantumNous/new-api/types"
			
 
				 	"github.com/samber/lo"
			
 
				 
			
 
				-	"github.com/shopspring/decimal"
			
 
				-
			
 
				 	"github.com/gin-gonic/gin"
			
 
				 )
			
 
				 
			
@@ -93,7 +88,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
 
				 		if containAudioTokens && containsAudioRatios {
			
 
				 			service.PostAudioConsumeQuota(c, info, usage, "")
			
 
				 		} else {
			
 
				-			postConsumeQuota(c, info, usage)
			
 
				+			service.PostTextConsumeQuota(c, info, usage, nil)
			
 
				 		}
			
 
				 		return nil
			
 
				 	}
			
@@ -216,293 +211,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
 
				 	if containAudioTokens && containsAudioRatios {
			
 
				 		service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
			
 
				 	} else {
			
 
				-		postConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+		service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
			
 
				 	}
			
 
				 	return nil
			
 
				 }
			
 
				-
			
 
				-func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent ...string) {
			
 
				-	originUsage := usage
			
 
				-	if usage == nil {
			
 
				-		usage = &dto.Usage{
			
 
				-			PromptTokens:     relayInfo.GetEstimatePromptTokens(),
			
 
				-			CompletionTokens: 0,
			
 
				-			TotalTokens:      relayInfo.GetEstimatePromptTokens(),
			
 
				-		}
			
 
				-		extraContent = append(extraContent, "上游无计费信息")
			
 
				-	}
			
 
				-
			
 
				-	if originUsage != nil {
			
 
				-		service.ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
			
 
				-	}
			
 
				-
			
 
				-	adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
			
 
				-
			
 
				-	useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
			
 
				-	promptTokens := usage.PromptTokens
			
 
				-	cacheTokens := usage.PromptTokensDetails.CachedTokens
			
 
				-	imageTokens := usage.PromptTokensDetails.ImageTokens
			
 
				-	audioTokens := usage.PromptTokensDetails.AudioTokens
			
 
				-	completionTokens := usage.CompletionTokens
			
 
				-	cachedCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
			
 
				-
			
 
				-	modelName := relayInfo.OriginModelName
			
 
				-
			
 
				-	tokenName := ctx.GetString("token_name")
			
 
				-	completionRatio := relayInfo.PriceData.CompletionRatio
			
 
				-	cacheRatio := relayInfo.PriceData.CacheRatio
			
 
				-	imageRatio := relayInfo.PriceData.ImageRatio
			
 
				-	modelRatio := relayInfo.PriceData.ModelRatio
			
 
				-	groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio
			
 
				-	modelPrice := relayInfo.PriceData.ModelPrice
			
 
				-	cachedCreationRatio := relayInfo.PriceData.CacheCreationRatio
			
 
				-
			
 
				-	// Convert values to decimal for precise calculation
			
 
				-	dPromptTokens := decimal.NewFromInt(int64(promptTokens))
			
 
				-	dCacheTokens := decimal.NewFromInt(int64(cacheTokens))
			
 
				-	dImageTokens := decimal.NewFromInt(int64(imageTokens))
			
 
				-	dAudioTokens := decimal.NewFromInt(int64(audioTokens))
			
 
				-	dCompletionTokens := decimal.NewFromInt(int64(completionTokens))
			
 
				-	dCachedCreationTokens := decimal.NewFromInt(int64(cachedCreationTokens))
			
 
				-	dCompletionRatio := decimal.NewFromFloat(completionRatio)
			
 
				-	dCacheRatio := decimal.NewFromFloat(cacheRatio)
			
 
				-	dImageRatio := decimal.NewFromFloat(imageRatio)
			
 
				-	dModelRatio := decimal.NewFromFloat(modelRatio)
			
 
				-	dGroupRatio := decimal.NewFromFloat(groupRatio)
			
 
				-	dModelPrice := decimal.NewFromFloat(modelPrice)
			
 
				-	dCachedCreationRatio := decimal.NewFromFloat(cachedCreationRatio)
			
 
				-	dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
			
 
				-
			
 
				-	ratio := dModelRatio.Mul(dGroupRatio)
			
 
				-
			
 
				-	// openai web search 工具计费
			
 
				-	var dWebSearchQuota decimal.Decimal
			
 
				-	var webSearchPrice float64
			
 
				-	// response api 格式工具计费
			
 
				-	if relayInfo.ResponsesUsageInfo != nil {
			
 
				-		if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 {
			
 
				-			// 计算 web search 调用的配额 (配额 = 价格 * 调用次数 / 1000 * 分组倍率)
			
 
				-			webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, webSearchTool.SearchContextSize)
			
 
				-			dWebSearchQuota = decimal.NewFromFloat(webSearchPrice).
			
 
				-				Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))).
			
 
				-				Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				-			extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次，上下文大小 %s，调用花费 %s",
			
 
				-				webSearchTool.CallCount, webSearchTool.SearchContextSize, dWebSearchQuota.String()))
			
 
				-		}
			
 
				-	} else if strings.HasSuffix(modelName, "search-preview") {
			
 
				-		// search-preview 模型不支持 response api
			
 
				-		searchContextSize := ctx.GetString("chat_completion_web_search_context_size")
			
 
				-		if searchContextSize == "" {
			
 
				-			searchContextSize = "medium"
			
 
				-		}
			
 
				-		webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, searchContextSize)
			
 
				-		dWebSearchQuota = decimal.NewFromFloat(webSearchPrice).
			
 
				-			Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				-		extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 1 次，上下文大小 %s，调用花费 %s",
			
 
				-			searchContextSize, dWebSearchQuota.String()))
			
 
				-	}
			
 
				-	// claude web search tool 计费
			
 
				-	var dClaudeWebSearchQuota decimal.Decimal
			
 
				-	var claudeWebSearchPrice float64
			
 
				-	claudeWebSearchCallCount := ctx.GetInt("claude_web_search_requests")
			
 
				-	if claudeWebSearchCallCount > 0 {
			
 
				-		claudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand()
			
 
				-		dClaudeWebSearchQuota = decimal.NewFromFloat(claudeWebSearchPrice).
			
 
				-			Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).Mul(decimal.NewFromInt(int64(claudeWebSearchCallCount)))
			
 
				-		extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次，调用花费 %s",
			
 
				-			claudeWebSearchCallCount, dClaudeWebSearchQuota.String()))
			
 
				-	}
			
 
				-	// file search tool 计费
			
 
				-	var dFileSearchQuota decimal.Decimal
			
 
				-	var fileSearchPrice float64
			
 
				-	if relayInfo.ResponsesUsageInfo != nil {
			
 
				-		if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 {
			
 
				-			fileSearchPrice = operation_setting.GetFileSearchPricePerThousand()
			
 
				-			dFileSearchQuota = decimal.NewFromFloat(fileSearchPrice).
			
 
				-				Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
			
 
				-				Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				-			extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次，调用花费 %s",
			
 
				-				fileSearchTool.CallCount, dFileSearchQuota.String()))
			
 
				-		}
			
 
				-	}
			
 
				-	var dImageGenerationCallQuota decimal.Decimal
			
 
				-	var imageGenerationCallPrice float64
			
 
				-	if ctx.GetBool("image_generation_call") {
			
 
				-		imageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size"))
			
 
				-		dImageGenerationCallQuota = decimal.NewFromFloat(imageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				-		extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", dImageGenerationCallQuota.String()))
			
 
				-	}
			
 
				-
			
 
				-	var quotaCalculateDecimal decimal.Decimal
			
 
				-
			
 
				-	var audioInputQuota decimal.Decimal
			
 
				-	var audioInputPrice float64
			
 
				-	isClaudeUsageSemantic := relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude
			
 
				-	if !relayInfo.PriceData.UsePrice {
			
 
				-		baseTokens := dPromptTokens
			
 
				-		// 减去 cached tokens
			
 
				-		// Anthropic API 的 input_tokens 已经不包含缓存 tokens，不需要减去
			
 
				-		// OpenAI/OpenRouter 等 API 的 prompt_tokens 包含缓存 tokens，需要减去
			
 
				-		var cachedTokensWithRatio decimal.Decimal
			
 
				-		if !dCacheTokens.IsZero() {
			
 
				-			if !isClaudeUsageSemantic {
			
 
				-				baseTokens = baseTokens.Sub(dCacheTokens)
			
 
				-			}
			
 
				-			cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
			
 
				-		}
			
 
				-		var dCachedCreationTokensWithRatio decimal.Decimal
			
 
				-		if !dCachedCreationTokens.IsZero() {
			
 
				-			if !isClaudeUsageSemantic {
			
 
				-				baseTokens = baseTokens.Sub(dCachedCreationTokens)
			
 
				-			}
			
 
				-			dCachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCachedCreationRatio)
			
 
				-		}
			
 
				-
			
 
				-		// 减去 image tokens
			
 
				-		var imageTokensWithRatio decimal.Decimal
			
 
				-		if !dImageTokens.IsZero() {
			
 
				-			baseTokens = baseTokens.Sub(dImageTokens)
			
 
				-			imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
			
 
				-		}
			
 
				-
			
 
				-		// 减去 Gemini audio tokens
			
 
				-		if !dAudioTokens.IsZero() {
			
 
				-			audioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(modelName)
			
 
				-			if audioInputPrice > 0 {
			
 
				-				// 重新计算 base tokens
			
 
				-				baseTokens = baseTokens.Sub(dAudioTokens)
			
 
				-				audioInputQuota = decimal.NewFromFloat(audioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				-				extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", audioInputQuota.String()))
			
 
				-			}
			
 
				-		}
			
 
				-		promptQuota := baseTokens.Add(cachedTokensWithRatio).
			
 
				-			Add(imageTokensWithRatio).
			
 
				-			Add(dCachedCreationTokensWithRatio)
			
 
				-
			
 
				-		completionQuota := dCompletionTokens.Mul(dCompletionRatio)
			
 
				-
			
 
				-		quotaCalculateDecimal = promptQuota.Add(completionQuota).Mul(ratio)
			
 
				-
			
 
				-		if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) {
			
 
				-			quotaCalculateDecimal = decimal.NewFromInt(1)
			
 
				-		}
			
 
				-	} else {
			
 
				-		quotaCalculateDecimal = dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio)
			
 
				-	}
			
 
				-	// 添加 responses tools call 调用的配额
			
 
				-	quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
			
 
				-	quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
			
 
				-	// 添加 audio input 独立计费
			
 
				-	quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
			
 
				-	// 添加 image generation call 计费
			
 
				-	quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
			
 
				-
			
 
				-	if len(relayInfo.PriceData.OtherRatios) > 0 {
			
 
				-		for key, otherRatio := range relayInfo.PriceData.OtherRatios {
			
 
				-			dOtherRatio := decimal.NewFromFloat(otherRatio)
			
 
				-			quotaCalculateDecimal = quotaCalculateDecimal.Mul(dOtherRatio)
			
 
				-			extraContent = append(extraContent, fmt.Sprintf("其他倍率 %s: %f", key, otherRatio))
			
 
				-		}
			
 
				-	}
			
 
				-
			
 
				-	quota := int(quotaCalculateDecimal.Round(0).IntPart())
			
 
				-	totalTokens := promptTokens + completionTokens
			
 
				-
			
 
				-	//var logContent string
			
 
				-
			
 
				-	// record all the consume log even if quota is 0
			
 
				-	if totalTokens == 0 {
			
 
				-		// in this case, must be some error happened
			
 
				-		// we cannot just return, because we may have to return the pre-consumed quota
			
 
				-		quota = 0
			
 
				-		extraContent = append(extraContent, "上游没有返回计费信息，无法扣费（可能是上游超时）")
			
 
				-		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
			
 
				-			"tokenId %d, model %s， pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
			
 
				-	} else {
			
 
				-		if !ratio.IsZero() && quota == 0 {
			
 
				-			quota = 1
			
 
				-		}
			
 
				-		model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
			
 
				-		model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
			
 
				-	}
			
 
				-
			
 
				-	if err := service.SettleBilling(ctx, relayInfo, quota); err != nil {
			
 
				-		logger.LogError(ctx, "error settling billing: "+err.Error())
			
 
				-	}
			
 
				-
			
 
				-	logModel := modelName
			
 
				-	if strings.HasPrefix(logModel, "gpt-4-gizmo") {
			
 
				-		logModel = "gpt-4-gizmo-*"
			
 
				-		extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName))
			
 
				-	}
			
 
				-	if strings.HasPrefix(logModel, "gpt-4o-gizmo") {
			
 
				-		logModel = "gpt-4o-gizmo-*"
			
 
				-		extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName))
			
 
				-	}
			
 
				-	logContent := strings.Join(extraContent, ", ")
			
 
				-	other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
			
 
				-	if adminRejectReason != "" {
			
 
				-		other["reject_reason"] = adminRejectReason
			
 
				-	}
			
 
				-	// For chat-based calls to the Claude model, tagging is required. Using Claude's rendering logs, the two approaches handle input rendering differently.
			
 
				-	if isClaudeUsageSemantic {
			
 
				-		other["claude"] = true
			
 
				-		other["usage_semantic"] = "anthropic"
			
 
				-	}
			
 
				-	if imageTokens != 0 {
			
 
				-		other["image"] = true
			
 
				-		other["image_ratio"] = imageRatio
			
 
				-		other["image_output"] = imageTokens
			
 
				-	}
			
 
				-	if cachedCreationTokens != 0 {
			
 
				-		other["cache_creation_tokens"] = cachedCreationTokens
			
 
				-		other["cache_creation_ratio"] = cachedCreationRatio
			
 
				-	}
			
 
				-	if !dWebSearchQuota.IsZero() {
			
 
				-		if relayInfo.ResponsesUsageInfo != nil {
			
 
				-			if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists {
			
 
				-				other["web_search"] = true
			
 
				-				other["web_search_call_count"] = webSearchTool.CallCount
			
 
				-				other["web_search_price"] = webSearchPrice
			
 
				-			}
			
 
				-		} else if strings.HasSuffix(modelName, "search-preview") {
			
 
				-			other["web_search"] = true
			
 
				-			other["web_search_call_count"] = 1
			
 
				-			other["web_search_price"] = webSearchPrice
			
 
				-		}
			
 
				-	} else if !dClaudeWebSearchQuota.IsZero() {
			
 
				-		other["web_search"] = true
			
 
				-		other["web_search_call_count"] = claudeWebSearchCallCount
			
 
				-		other["web_search_price"] = claudeWebSearchPrice
			
 
				-	}
			
 
				-	if !dFileSearchQuota.IsZero() && relayInfo.ResponsesUsageInfo != nil {
			
 
				-		if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists {
			
 
				-			other["file_search"] = true
			
 
				-			other["file_search_call_count"] = fileSearchTool.CallCount
			
 
				-			other["file_search_price"] = fileSearchPrice
			
 
				-		}
			
 
				-	}
			
 
				-	if !audioInputQuota.IsZero() {
			
 
				-		other["audio_input_seperate_price"] = true
			
 
				-		other["audio_input_token_count"] = audioTokens
			
 
				-		other["audio_input_price"] = audioInputPrice
			
 
				-	}
			
 
				-	if !dImageGenerationCallQuota.IsZero() {
			
 
				-		other["image_generation_call"] = true
			
 
				-		other["image_generation_call_price"] = imageGenerationCallPrice
			
 
				-	}
			
 
				-	model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
			
 
				-		ChannelId:        relayInfo.ChannelId,
			
 
				-		PromptTokens:     promptTokens,
			
 
				-		CompletionTokens: completionTokens,
			
 
				-		ModelName:        logModel,
			
 
				-		TokenName:        tokenName,
			
 
				-		Quota:            quota,
			
 
				-		Content:          logContent,
			
 
				-		TokenId:          relayInfo.TokenId,
			
 
				-		UseTimeSeconds:   int(useTimeSeconds),
			
 
				-		IsStream:         relayInfo.IsStream,
			
 
				-		Group:            relayInfo.UsingGroup,
			
 
				-		Other:            other,
			
 
				-	})
			
 
				-}
			
--- a/relay/embedding_handler.go
+++ b/relay/embedding_handler.go
@@ -82,6 +82,6 @@ func EmbeddingHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
 
				 		service.ResetStatusCode(newAPIError, statusCodeMappingStr)
			
 
				 		return newAPIError
			
 
				 	}
			
 
				-	postConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+	service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
			
 
				 	return nil
			
 
				 }
			
--- a/relay/gemini_handler.go
+++ b/relay/gemini_handler.go
@@ -194,7 +194,7 @@ func GeminiHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 
				 		return openaiErr
			
 
				 	}
			
 
				 
			
 
				-	postConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+	service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
			
 
				 	return nil
			
 
				 }
			
 
				 
			
@@ -288,6 +288,6 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo) (newAPI
 
				 		return openaiErr
			
 
				 	}
			
 
				 
			
 
				-	postConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+	service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
			
 
				 	return nil
			
 
				 }
			
--- a/relay/image_handler.go
+++ b/relay/image_handler.go
@@ -141,6 +141,6 @@ func ImageHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
 
				 		logContent = append(logContent, fmt.Sprintf("生成数量 %d", imageN))
			
 
				 	}
			
 
				 
			
 
				-	postConsumeQuota(c, info, usage.(*dto.Usage), logContent...)
			
 
				+	service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), logContent)
			
 
				 	return nil
			
 
				 }
			
--- a/relay/rerank_handler.go
+++ b/relay/rerank_handler.go
@@ -96,6 +96,6 @@ func RerankHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 
				 		service.ResetStatusCode(newAPIError, statusCodeMappingStr)
			
 
				 		return newAPIError
			
 
				 	}
			
 
				-	postConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+	service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
			
 
				 	return nil
			
 
				 }
			
--- a/relay/responses_handler.go
+++ b/relay/responses_handler.go
@@ -145,7 +145,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
 
				 			info.PriceData = originPriceData
			
 
				 			return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry())
			
 
				 		}
			
 
				-		postConsumeQuota(c, info, usageDto)
			
 
				+		service.PostTextConsumeQuota(c, info, usageDto, nil)
			
 
				 
			
 
				 		info.OriginModelName = originModelName
			
 
				 		info.PriceData = originPriceData
			
@@ -155,7 +155,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
 
				 	if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
			
 
				 		service.PostAudioConsumeQuota(c, info, usageDto, "")
			
 
				 	} else {
			
 
				-		postConsumeQuota(c, info, usageDto)
			
 
				+		service.PostTextConsumeQuota(c, info, usageDto, nil)
			
 
				 	}
			
 
				 	return nil
			
 
				 }
			
--- a/service/convert.go
+++ b/service/convert.go
@@ -223,6 +223,25 @@ func generateStopBlock(index int) *dto.ClaudeResponse {
 
				 	}
			
 
				 }
			
 
				 
			
 
				+func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage {
			
 
				+	if oaiUsage == nil {
			
 
				+		return nil
			
 
				+	}
			
 
				+	usage := &dto.ClaudeUsage{
			
 
				+		InputTokens:              oaiUsage.PromptTokens,
			
 
				+		OutputTokens:             oaiUsage.CompletionTokens,
			
 
				+		CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
			
 
				+		CacheReadInputTokens:     oaiUsage.PromptTokensDetails.CachedTokens,
			
 
				+	}
			
 
				+	if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 {
			
 
				+		usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
			
 
				+			Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens,
			
 
				+			Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens,
			
 
				+		}
			
 
				+	}
			
 
				+	return usage
			
 
				+}
			
 
				+
			
 
				 func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
			
 
				 	if info.ClaudeConvertInfo.Done {
			
 
				 		return nil
			
@@ -391,13 +410,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
 
				 			}
			
 
				 			if oaiUsage != nil {
			
 
				 				claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
			
 
				-					Type: "message_delta",
			
 
				-					Usage: &dto.ClaudeUsage{
			
 
				-						InputTokens:              oaiUsage.PromptTokens,
			
 
				-						OutputTokens:             oaiUsage.CompletionTokens,
			
 
				-						CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
			
 
				-						CacheReadInputTokens:     oaiUsage.PromptTokensDetails.CachedTokens,
			
 
				-					},
			
 
				+					Type:  "message_delta",
			
 
				+					Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
			
 
				 					Delta: &dto.ClaudeMediaMessage{
			
 
				 						StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
			
 
				 					},
			
@@ -419,13 +433,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
 
				 			oaiUsage := info.ClaudeConvertInfo.Usage
			
 
				 			if oaiUsage != nil {
			
 
				 				claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
			
 
				-					Type: "message_delta",
			
 
				-					Usage: &dto.ClaudeUsage{
			
 
				-						InputTokens:              oaiUsage.PromptTokens,
			
 
				-						OutputTokens:             oaiUsage.CompletionTokens,
			
 
				-						CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
			
 
				-						CacheReadInputTokens:     oaiUsage.PromptTokensDetails.CachedTokens,
			
 
				-					},
			
 
				+					Type:  "message_delta",
			
 
				+					Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
			
 
				 					Delta: &dto.ClaudeMediaMessage{
			
 
				 						StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
			
 
				 					},
			
@@ -555,13 +564,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
 
				 			}
			
 
				 			if oaiUsage != nil {
			
 
				 				claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
			
 
				-					Type: "message_delta",
			
 
				-					Usage: &dto.ClaudeUsage{
			
 
				-						InputTokens:              oaiUsage.PromptTokens,
			
 
				-						OutputTokens:             oaiUsage.CompletionTokens,
			
 
				-						CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
			
 
				-						CacheReadInputTokens:     oaiUsage.PromptTokensDetails.CachedTokens,
			
 
				-					},
			
 
				+					Type:  "message_delta",
			
 
				+					Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
			
 
				 					Delta: &dto.ClaudeMediaMessage{
			
 
				 						StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
			
 
				 					},
			
--- a/service/log_info_generate.go
+++ b/service/log_info_generate.go
@@ -73,6 +73,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
 
				 	other["admin_info"] = adminInfo
			
 
				 	appendRequestPath(ctx, relayInfo, other)
			
 
				 	appendRequestConversionChain(relayInfo, other)
			
 
				+	appendFinalRequestFormat(relayInfo, other)
			
 
				 	appendBillingInfo(relayInfo, other)
			
 
				 	appendParamOverrideInfo(relayInfo, other)
			
 
				 	return other
			
@@ -167,6 +168,17 @@ func appendRequestConversionChain(relayInfo *relaycommon.RelayInfo, other map[st
 
				 	other["request_conversion"] = chain
			
 
				 }
			
 
				 
			
 
				+func appendFinalRequestFormat(relayInfo *relaycommon.RelayInfo, other map[string]interface{}) {
			
 
				+	if relayInfo == nil || other == nil {
			
 
				+		return
			
 
				+	}
			
 
				+	if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
			
 
				+		// claude indicates the final upstream request format is Claude Messages.
			
 
				+		// Frontend log rendering uses this to keep the original Claude input display.
			
 
				+		other["claude"] = true
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice, userGroupRatio float64) map[string]interface{} {
			
 
				 	info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice, userGroupRatio)
			
 
				 	info["ws"] = true
			
--- a/service/quota.go
+++ b/service/quota.go
@@ -235,108 +235,6 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
 
				 	})
			
 
				 }
			
 
				 
			
 
				-func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
			
 
				-	if usage != nil {
			
 
				-		ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
			
 
				-	}
			
 
				-
			
 
				-	useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
			
 
				-	promptTokens := usage.PromptTokens
			
 
				-	completionTokens := usage.CompletionTokens
			
 
				-	modelName := relayInfo.OriginModelName
			
 
				-
			
 
				-	tokenName := ctx.GetString("token_name")
			
 
				-	completionRatio := relayInfo.PriceData.CompletionRatio
			
 
				-	modelRatio := relayInfo.PriceData.ModelRatio
			
 
				-	groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio
			
 
				-	modelPrice := relayInfo.PriceData.ModelPrice
			
 
				-	cacheRatio := relayInfo.PriceData.CacheRatio
			
 
				-	cacheTokens := usage.PromptTokensDetails.CachedTokens
			
 
				-
			
 
				-	cacheCreationRatio := relayInfo.PriceData.CacheCreationRatio
			
 
				-	cacheCreationRatio5m := relayInfo.PriceData.CacheCreation5mRatio
			
 
				-	cacheCreationRatio1h := relayInfo.PriceData.CacheCreation1hRatio
			
 
				-	cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
			
 
				-	cacheCreationTokens5m := usage.ClaudeCacheCreation5mTokens
			
 
				-	cacheCreationTokens1h := usage.ClaudeCacheCreation1hTokens
			
 
				-
			
 
				-	if relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
			
 
				-		promptTokens -= cacheTokens
			
 
				-		isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(modelName, relayInfo.PriceData.ModelRatio)
			
 
				-		if cacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
			
 
				-			maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
			
 
				-			if maybeCacheCreationTokens >= 0 && promptTokens >= maybeCacheCreationTokens {
			
 
				-				cacheCreationTokens = maybeCacheCreationTokens
			
 
				-			}
			
 
				-		}
			
 
				-		promptTokens -= cacheCreationTokens
			
 
				-	}
			
 
				-
			
 
				-	calculateQuota := 0.0
			
 
				-	if !relayInfo.PriceData.UsePrice {
			
 
				-		calculateQuota = float64(promptTokens)
			
 
				-		calculateQuota += float64(cacheTokens) * cacheRatio
			
 
				-		calculateQuota += float64(cacheCreationTokens5m) * cacheCreationRatio5m
			
 
				-		calculateQuota += float64(cacheCreationTokens1h) * cacheCreationRatio1h
			
 
				-		remainingCacheCreationTokens := cacheCreationTokens - cacheCreationTokens5m - cacheCreationTokens1h
			
 
				-		if remainingCacheCreationTokens > 0 {
			
 
				-			calculateQuota += float64(remainingCacheCreationTokens) * cacheCreationRatio
			
 
				-		}
			
 
				-		calculateQuota += float64(completionTokens) * completionRatio
			
 
				-		calculateQuota = calculateQuota * groupRatio * modelRatio
			
 
				-	} else {
			
 
				-		calculateQuota = modelPrice * common.QuotaPerUnit * groupRatio
			
 
				-	}
			
 
				-
			
 
				-	if modelRatio != 0 && calculateQuota <= 0 {
			
 
				-		calculateQuota = 1
			
 
				-	}
			
 
				-
			
 
				-	quota := int(calculateQuota)
			
 
				-
			
 
				-	totalTokens := promptTokens + completionTokens
			
 
				-
			
 
				-	var logContent string
			
 
				-	// record all the consume log even if quota is 0
			
 
				-	if totalTokens == 0 {
			
 
				-		// in this case, must be some error happened
			
 
				-		// we cannot just return, because we may have to return the pre-consumed quota
			
 
				-		quota = 0
			
 
				-		logContent += fmt.Sprintf("（可能是上游出错）")
			
 
				-		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
			
 
				-			"tokenId %d, model %s， pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
			
 
				-	} else {
			
 
				-		model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
			
 
				-		model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
			
 
				-	}
			
 
				-
			
 
				-	if err := SettleBilling(ctx, relayInfo, quota); err != nil {
			
 
				-		logger.LogError(ctx, "error settling billing: "+err.Error())
			
 
				-	}
			
 
				-
			
 
				-	other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
			
 
				-		cacheTokens, cacheRatio,
			
 
				-		cacheCreationTokens, cacheCreationRatio,
			
 
				-		cacheCreationTokens5m, cacheCreationRatio5m,
			
 
				-		cacheCreationTokens1h, cacheCreationRatio1h,
			
 
				-		modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
			
 
				-	model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
			
 
				-		ChannelId:        relayInfo.ChannelId,
			
 
				-		PromptTokens:     promptTokens,
			
 
				-		CompletionTokens: completionTokens,
			
 
				-		ModelName:        modelName,
			
 
				-		TokenName:        tokenName,
			
 
				-		Quota:            quota,
			
 
				-		Content:          logContent,
			
 
				-		TokenId:          relayInfo.TokenId,
			
 
				-		UseTimeSeconds:   int(useTimeSeconds),
			
 
				-		IsStream:         relayInfo.IsStream,
			
 
				-		Group:            relayInfo.UsingGroup,
			
 
				-		Other:            other,
			
 
				-	})
			
 
				-
			
 
				-}
			
 
				-
			
 
				 func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData) int {
			
 
				 	if priceData.CacheCreationRatio == 1 {
			
 
				 		return 0
			
--- a/service/text_quota.go
+++ b/service/text_quota.go
@@ -0,0 +1,427 @@
 
				+package service
			
 
				+
			
 
				+import (
			
 
				+	"fmt"
			
 
				+	"strings"
			
 
				+	"time"
			
 
				+
			
 
				+	"github.com/QuantumNous/new-api/common"
			
 
				+	"github.com/QuantumNous/new-api/constant"
			
 
				+	"github.com/QuantumNous/new-api/dto"
			
 
				+	"github.com/QuantumNous/new-api/logger"
			
 
				+	"github.com/QuantumNous/new-api/model"
			
 
				+	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				+	"github.com/QuantumNous/new-api/setting/operation_setting"
			
 
				+	"github.com/QuantumNous/new-api/types"
			
 
				+
			
 
				+	"github.com/gin-gonic/gin"
			
 
				+	"github.com/shopspring/decimal"
			
 
				+)
			
 
				+
			
 
				+type textQuotaSummary struct {
			
 
				+	PromptTokens             int
			
 
				+	CompletionTokens         int
			
 
				+	TotalTokens              int
			
 
				+	CacheTokens              int
			
 
				+	CacheCreationTokens      int
			
 
				+	CacheCreationTokens5m    int
			
 
				+	CacheCreationTokens1h    int
			
 
				+	ImageTokens              int
			
 
				+	AudioTokens              int
			
 
				+	ModelName                string
			
 
				+	TokenName                string
			
 
				+	UseTimeSeconds           int64
			
 
				+	CompletionRatio          float64
			
 
				+	CacheRatio               float64
			
 
				+	ImageRatio               float64
			
 
				+	ModelRatio               float64
			
 
				+	GroupRatio               float64
			
 
				+	ModelPrice               float64
			
 
				+	CacheCreationRatio       float64
			
 
				+	CacheCreationRatio5m     float64
			
 
				+	CacheCreationRatio1h     float64
			
 
				+	Quota                    int
			
 
				+	IsClaudeUsageSemantic    bool
			
 
				+	UsageSemantic            string
			
 
				+	WebSearchPrice           float64
			
 
				+	WebSearchCallCount       int
			
 
				+	ClaudeWebSearchPrice     float64
			
 
				+	ClaudeWebSearchCallCount int
			
 
				+	FileSearchPrice          float64
			
 
				+	FileSearchCallCount      int
			
 
				+	AudioInputPrice          float64
			
 
				+	ImageGenerationCallPrice float64
			
 
				+}
			
 
				+
			
 
				+func cacheWriteTokensTotal(summary textQuotaSummary) int {
			
 
				+	if summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 {
			
 
				+		splitCacheWriteTokens := summary.CacheCreationTokens5m + summary.CacheCreationTokens1h
			
 
				+		if summary.CacheCreationTokens > splitCacheWriteTokens {
			
 
				+			return summary.CacheCreationTokens
			
 
				+		}
			
 
				+		return splitCacheWriteTokens
			
 
				+	}
			
 
				+	return summary.CacheCreationTokens
			
 
				+}
			
 
				+
			
 
				+func isLegacyClaudeDerivedOpenAIUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) bool {
			
 
				+	if relayInfo == nil || usage == nil {
			
 
				+		return false
			
 
				+	}
			
 
				+	if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
			
 
				+		return false
			
 
				+	}
			
 
				+	if usage.UsageSource != "" || usage.UsageSemantic != "" {
			
 
				+		return false
			
 
				+	}
			
 
				+	return usage.ClaudeCacheCreation5mTokens > 0 || usage.ClaudeCacheCreation1hTokens > 0
			
 
				+}
			
 
				+
			
 
				+func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) textQuotaSummary {
			
 
				+	summary := textQuotaSummary{
			
 
				+		ModelName:            relayInfo.OriginModelName,
			
 
				+		TokenName:            ctx.GetString("token_name"),
			
 
				+		UseTimeSeconds:       time.Now().Unix() - relayInfo.StartTime.Unix(),
			
 
				+		CompletionRatio:      relayInfo.PriceData.CompletionRatio,
			
 
				+		CacheRatio:           relayInfo.PriceData.CacheRatio,
			
 
				+		ImageRatio:           relayInfo.PriceData.ImageRatio,
			
 
				+		ModelRatio:           relayInfo.PriceData.ModelRatio,
			
 
				+		GroupRatio:           relayInfo.PriceData.GroupRatioInfo.GroupRatio,
			
 
				+		ModelPrice:           relayInfo.PriceData.ModelPrice,
			
 
				+		CacheCreationRatio:   relayInfo.PriceData.CacheCreationRatio,
			
 
				+		CacheCreationRatio5m: relayInfo.PriceData.CacheCreation5mRatio,
			
 
				+		CacheCreationRatio1h: relayInfo.PriceData.CacheCreation1hRatio,
			
 
				+		UsageSemantic:        usageSemanticFromUsage(relayInfo, usage),
			
 
				+	}
			
 
				+	summary.IsClaudeUsageSemantic = summary.UsageSemantic == "anthropic"
			
 
				+
			
 
				+	if usage == nil {
			
 
				+		usage = &dto.Usage{
			
 
				+			PromptTokens:     relayInfo.GetEstimatePromptTokens(),
			
 
				+			CompletionTokens: 0,
			
 
				+			TotalTokens:      relayInfo.GetEstimatePromptTokens(),
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	summary.PromptTokens = usage.PromptTokens
			
 
				+	summary.CompletionTokens = usage.CompletionTokens
			
 
				+	summary.TotalTokens = usage.PromptTokens + usage.CompletionTokens
			
 
				+	summary.CacheTokens = usage.PromptTokensDetails.CachedTokens
			
 
				+	summary.CacheCreationTokens = usage.PromptTokensDetails.CachedCreationTokens
			
 
				+	summary.CacheCreationTokens5m = usage.ClaudeCacheCreation5mTokens
			
 
				+	summary.CacheCreationTokens1h = usage.ClaudeCacheCreation1hTokens
			
 
				+	summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
			
 
				+	summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
			
 
				+	legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
			
 
				+
			
 
				+	if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
			
 
				+		summary.PromptTokens -= summary.CacheTokens
			
 
				+		isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
			
 
				+		if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
			
 
				+			maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
			
 
				+			if maybeCacheCreationTokens >= 0 && summary.PromptTokens >= maybeCacheCreationTokens {
			
 
				+				summary.CacheCreationTokens = maybeCacheCreationTokens
			
 
				+			}
			
 
				+		}
			
 
				+		summary.PromptTokens -= summary.CacheCreationTokens
			
 
				+	}
			
 
				+
			
 
				+	dPromptTokens := decimal.NewFromInt(int64(summary.PromptTokens))
			
 
				+	dCacheTokens := decimal.NewFromInt(int64(summary.CacheTokens))
			
 
				+	dImageTokens := decimal.NewFromInt(int64(summary.ImageTokens))
			
 
				+	dAudioTokens := decimal.NewFromInt(int64(summary.AudioTokens))
			
 
				+	dCompletionTokens := decimal.NewFromInt(int64(summary.CompletionTokens))
			
 
				+	dCachedCreationTokens := decimal.NewFromInt(int64(summary.CacheCreationTokens))
			
 
				+	dCompletionRatio := decimal.NewFromFloat(summary.CompletionRatio)
			
 
				+	dCacheRatio := decimal.NewFromFloat(summary.CacheRatio)
			
 
				+	dImageRatio := decimal.NewFromFloat(summary.ImageRatio)
			
 
				+	dModelRatio := decimal.NewFromFloat(summary.ModelRatio)
			
 
				+	dGroupRatio := decimal.NewFromFloat(summary.GroupRatio)
			
 
				+	dModelPrice := decimal.NewFromFloat(summary.ModelPrice)
			
 
				+	dCacheCreationRatio := decimal.NewFromFloat(summary.CacheCreationRatio)
			
 
				+	dCacheCreationRatio5m := decimal.NewFromFloat(summary.CacheCreationRatio5m)
			
 
				+	dCacheCreationRatio1h := decimal.NewFromFloat(summary.CacheCreationRatio1h)
			
 
				+	dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
			
 
				+
			
 
				+	ratio := dModelRatio.Mul(dGroupRatio)
			
 
				+
			
 
				+	var dWebSearchQuota decimal.Decimal
			
 
				+	if relayInfo.ResponsesUsageInfo != nil {
			
 
				+		if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 {
			
 
				+			summary.WebSearchCallCount = webSearchTool.CallCount
			
 
				+			summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, webSearchTool.SearchContextSize)
			
 
				+			dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
			
 
				+				Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))).
			
 
				+				Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				+		}
			
 
				+	} else if strings.HasSuffix(summary.ModelName, "search-preview") {
			
 
				+		searchContextSize := ctx.GetString("chat_completion_web_search_context_size")
			
 
				+		if searchContextSize == "" {
			
 
				+			searchContextSize = "medium"
			
 
				+		}
			
 
				+		summary.WebSearchCallCount = 1
			
 
				+		summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, searchContextSize)
			
 
				+		dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
			
 
				+			Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				+	}
			
 
				+
			
 
				+	var dClaudeWebSearchQuota decimal.Decimal
			
 
				+	summary.ClaudeWebSearchCallCount = ctx.GetInt("claude_web_search_requests")
			
 
				+	if summary.ClaudeWebSearchCallCount > 0 {
			
 
				+		summary.ClaudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand()
			
 
				+		dClaudeWebSearchQuota = decimal.NewFromFloat(summary.ClaudeWebSearchPrice).
			
 
				+			Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).
			
 
				+			Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount)))
			
 
				+	}
			
 
				+
			
 
				+	var dFileSearchQuota decimal.Decimal
			
 
				+	if relayInfo.ResponsesUsageInfo != nil {
			
 
				+		if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 {
			
 
				+			summary.FileSearchCallCount = fileSearchTool.CallCount
			
 
				+			summary.FileSearchPrice = operation_setting.GetFileSearchPricePerThousand()
			
 
				+			dFileSearchQuota = decimal.NewFromFloat(summary.FileSearchPrice).
			
 
				+				Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
			
 
				+				Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	var dImageGenerationCallQuota decimal.Decimal
			
 
				+	if ctx.GetBool("image_generation_call") {
			
 
				+		summary.ImageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size"))
			
 
				+		dImageGenerationCallQuota = decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				+	}
			
 
				+
			
 
				+	var audioInputQuota decimal.Decimal
			
 
				+	if !relayInfo.PriceData.UsePrice {
			
 
				+		baseTokens := dPromptTokens
			
 
				+
			
 
				+		var cachedTokensWithRatio decimal.Decimal
			
 
				+		if !dCacheTokens.IsZero() {
			
 
				+			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
			
 
				+				baseTokens = baseTokens.Sub(dCacheTokens)
			
 
				+			}
			
 
				+			cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
			
 
				+		}
			
 
				+
			
 
				+		var cachedCreationTokensWithRatio decimal.Decimal
			
 
				+		hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
			
 
				+		if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
			
 
				+			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
			
 
				+				baseTokens = baseTokens.Sub(dCachedCreationTokens)
			
 
				+				cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
			
 
				+			} else {
			
 
				+				remaining := summary.CacheCreationTokens - summary.CacheCreationTokens5m - summary.CacheCreationTokens1h
			
 
				+				if remaining < 0 {
			
 
				+					remaining = 0
			
 
				+				}
			
 
				+				cachedCreationTokensWithRatio = decimal.NewFromInt(int64(remaining)).Mul(dCacheCreationRatio)
			
 
				+				cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens5m)).Mul(dCacheCreationRatio5m))
			
 
				+				cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens1h)).Mul(dCacheCreationRatio1h))
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		var imageTokensWithRatio decimal.Decimal
			
 
				+		if !dImageTokens.IsZero() {
			
 
				+			baseTokens = baseTokens.Sub(dImageTokens)
			
 
				+			imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
			
 
				+		}
			
 
				+
			
 
				+		if !dAudioTokens.IsZero() {
			
 
				+			summary.AudioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(summary.ModelName)
			
 
				+			if summary.AudioInputPrice > 0 {
			
 
				+				baseTokens = baseTokens.Sub(dAudioTokens)
			
 
				+				audioInputQuota = decimal.NewFromFloat(summary.AudioInputPrice).
			
 
				+					Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio).Add(cachedCreationTokensWithRatio)
			
 
				+		completionQuota := dCompletionTokens.Mul(dCompletionRatio)
			
 
				+		quotaCalculateDecimal := promptQuota.Add(completionQuota).Mul(ratio)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
			
 
				+
			
 
				+		if len(relayInfo.PriceData.OtherRatios) > 0 {
			
 
				+			for _, otherRatio := range relayInfo.PriceData.OtherRatios {
			
 
				+				quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
			
 
				+			}
			
 
				+		}
			
 
				+
			
 
				+		if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) {
			
 
				+			quotaCalculateDecimal = decimal.NewFromInt(1)
			
 
				+		}
			
 
				+		summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
			
 
				+	} else {
			
 
				+		quotaCalculateDecimal := dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
			
 
				+		quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
			
 
				+		if len(relayInfo.PriceData.OtherRatios) > 0 {
			
 
				+			for _, otherRatio := range relayInfo.PriceData.OtherRatios {
			
 
				+				quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
			
 
				+			}
			
 
				+		}
			
 
				+		summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
			
 
				+	}
			
 
				+
			
 
				+	if summary.TotalTokens == 0 {
			
 
				+		summary.Quota = 0
			
 
				+	} else if !ratio.IsZero() && summary.Quota == 0 {
			
 
				+		summary.Quota = 1
			
 
				+	}
			
 
				+
			
 
				+	return summary
			
 
				+}
			
 
				+
			
 
				+func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) string {
			
 
				+	if usage != nil && usage.UsageSemantic != "" {
			
 
				+		return usage.UsageSemantic
			
 
				+	}
			
 
				+	if relayInfo != nil && relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
			
 
				+		return "anthropic"
			
 
				+	}
			
 
				+	return "openai"
			
 
				+}
			
 
				+
			
 
				+func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent []string) {
			
 
				+	originUsage := usage
			
 
				+	if usage == nil {
			
 
				+		extraContent = append(extraContent, "上游无计费信息")
			
 
				+	}
			
 
				+	if originUsage != nil {
			
 
				+		ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
			
 
				+	}
			
 
				+
			
 
				+	adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
			
 
				+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
			
 
				+
			
 
				+	if summary.WebSearchCallCount > 0 {
			
 
				+		extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次，调用花费 %s", summary.WebSearchCallCount, decimal.NewFromFloat(summary.WebSearchPrice).Mul(decimal.NewFromInt(int64(summary.WebSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
			
 
				+	}
			
 
				+	if summary.ClaudeWebSearchCallCount > 0 {
			
 
				+		extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次，调用花费 %s", summary.ClaudeWebSearchCallCount, decimal.NewFromFloat(summary.ClaudeWebSearchPrice).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount))).String()))
			
 
				+	}
			
 
				+	if summary.FileSearchCallCount > 0 {
			
 
				+		extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次，调用花费 %s", summary.FileSearchCallCount, decimal.NewFromFloat(summary.FileSearchPrice).Mul(decimal.NewFromInt(int64(summary.FileSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
			
 
				+	}
			
 
				+	if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
			
 
				+		extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", decimal.NewFromFloat(summary.AudioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(decimal.NewFromInt(int64(summary.AudioTokens))).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
			
 
				+	}
			
 
				+	if summary.ImageGenerationCallPrice > 0 {
			
 
				+		extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
			
 
				+	}
			
 
				+
			
 
				+	if summary.TotalTokens == 0 {
			
 
				+		extraContent = append(extraContent, "上游没有返回计费信息，无法扣费（可能是上游超时）")
			
 
				+		logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, tokenId %d, model %s， pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, summary.ModelName, relayInfo.FinalPreConsumedQuota))
			
 
				+	} else {
			
 
				+		model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, summary.Quota)
			
 
				+		model.UpdateChannelUsedQuota(relayInfo.ChannelId, summary.Quota)
			
 
				+	}
			
 
				+
			
 
				+	if err := SettleBilling(ctx, relayInfo, summary.Quota); err != nil {
			
 
				+		logger.LogError(ctx, "error settling billing: "+err.Error())
			
 
				+	}
			
 
				+
			
 
				+	logModel := summary.ModelName
			
 
				+	if strings.HasPrefix(logModel, "gpt-4-gizmo") {
			
 
				+		logModel = "gpt-4-gizmo-*"
			
 
				+		extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
			
 
				+	}
			
 
				+	if strings.HasPrefix(logModel, "gpt-4o-gizmo") {
			
 
				+		logModel = "gpt-4o-gizmo-*"
			
 
				+		extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
			
 
				+	}
			
 
				+
			
 
				+	logContent := strings.Join(extraContent, ", ")
			
 
				+	var other map[string]interface{}
			
 
				+	if summary.IsClaudeUsageSemantic {
			
 
				+		other = GenerateClaudeOtherInfo(ctx, relayInfo,
			
 
				+			summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio,
			
 
				+			summary.CacheTokens, summary.CacheRatio,
			
 
				+			summary.CacheCreationTokens, summary.CacheCreationRatio,
			
 
				+			summary.CacheCreationTokens5m, summary.CacheCreationRatio5m,
			
 
				+			summary.CacheCreationTokens1h, summary.CacheCreationRatio1h,
			
 
				+			summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
			
 
				+		other["usage_semantic"] = "anthropic"
			
 
				+	} else {
			
 
				+		other = GenerateTextOtherInfo(ctx, relayInfo, summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio, summary.CacheTokens, summary.CacheRatio, summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
			
 
				+	}
			
 
				+	if adminRejectReason != "" {
			
 
				+		other["reject_reason"] = adminRejectReason
			
 
				+	}
			
 
				+	if summary.ImageTokens != 0 {
			
 
				+		other["image"] = true
			
 
				+		other["image_ratio"] = summary.ImageRatio
			
 
				+		other["image_output"] = summary.ImageTokens
			
 
				+	}
			
 
				+	if summary.WebSearchCallCount > 0 {
			
 
				+		other["web_search"] = true
			
 
				+		other["web_search_call_count"] = summary.WebSearchCallCount
			
 
				+		other["web_search_price"] = summary.WebSearchPrice
			
 
				+	} else if summary.ClaudeWebSearchCallCount > 0 {
			
 
				+		other["web_search"] = true
			
 
				+		other["web_search_call_count"] = summary.ClaudeWebSearchCallCount
			
 
				+		other["web_search_price"] = summary.ClaudeWebSearchPrice
			
 
				+	}
			
 
				+	if summary.FileSearchCallCount > 0 {
			
 
				+		other["file_search"] = true
			
 
				+		other["file_search_call_count"] = summary.FileSearchCallCount
			
 
				+		other["file_search_price"] = summary.FileSearchPrice
			
 
				+	}
			
 
				+	if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
			
 
				+		other["audio_input_seperate_price"] = true
			
 
				+		other["audio_input_token_count"] = summary.AudioTokens
			
 
				+		other["audio_input_price"] = summary.AudioInputPrice
			
 
				+	}
			
 
				+	if summary.ImageGenerationCallPrice > 0 {
			
 
				+		other["image_generation_call"] = true
			
 
				+		other["image_generation_call_price"] = summary.ImageGenerationCallPrice
			
 
				+	}
			
 
				+	if summary.CacheCreationTokens > 0 {
			
 
				+		other["cache_creation_tokens"] = summary.CacheCreationTokens
			
 
				+		other["cache_creation_ratio"] = summary.CacheCreationRatio
			
 
				+	}
			
 
				+	if summary.CacheCreationTokens5m > 0 {
			
 
				+		other["cache_creation_tokens_5m"] = summary.CacheCreationTokens5m
			
 
				+		other["cache_creation_ratio_5m"] = summary.CacheCreationRatio5m
			
 
				+	}
			
 
				+	if summary.CacheCreationTokens1h > 0 {
			
 
				+		other["cache_creation_tokens_1h"] = summary.CacheCreationTokens1h
			
 
				+		other["cache_creation_ratio_1h"] = summary.CacheCreationRatio1h
			
 
				+	}
			
 
				+	cacheWriteTokens := cacheWriteTokensTotal(summary)
			
 
				+	if cacheWriteTokens > 0 {
			
 
				+		// cache_write_tokens: normalized cache creation total for UI display.
			
 
				+		// If split 5m/1h values are present, this is their sum; otherwise it falls back
			
 
				+		// to cache_creation_tokens.
			
 
				+		other["cache_write_tokens"] = cacheWriteTokens
			
 
				+	}
			
 
				+	if relayInfo.GetFinalRequestRelayFormat() != types.RelayFormatClaude && usage != nil && usage.UsageSource != "" && usage.InputTokens > 0 {
			
 
				+		// input_tokens_total: explicit normalized total input used by the usage log UI.
			
 
				+		// Only write this field when upstream/current conversion has already provided a
			
 
				+		// reliable total input value and tagged the usage source. Do not infer it from
			
 
				+		// prompt/cache fields here, otherwise old upstream payloads may be double-counted.
			
 
				+		other["input_tokens_total"] = usage.InputTokens
			
 
				+	}
			
 
				+
			
 
				+	model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
			
 
				+		ChannelId:        relayInfo.ChannelId,
			
 
				+		PromptTokens:     summary.PromptTokens,
			
 
				+		CompletionTokens: summary.CompletionTokens,
			
 
				+		ModelName:        logModel,
			
 
				+		TokenName:        summary.TokenName,
			
 
				+		Quota:            summary.Quota,
			
 
				+		Content:          logContent,
			
 
				+		TokenId:          relayInfo.TokenId,
			
 
				+		UseTimeSeconds:   int(summary.UseTimeSeconds),
			
 
				+		IsStream:         relayInfo.IsStream,
			
 
				+		Group:            relayInfo.UsingGroup,
			
 
				+		Other:            other,
			
 
				+	})
			
 
				+}
			
--- a/service/text_quota_test.go
+++ b/service/text_quota_test.go
@@ -0,0 +1,206 @@
 
				+package service
			
 
				+
			
 
				+import (
			
 
				+	"net/http/httptest"
			
 
				+	"testing"
			
 
				+	"time"
			
 
				+
			
 
				+	"github.com/QuantumNous/new-api/dto"
			
 
				+	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				+	"github.com/QuantumNous/new-api/types"
			
 
				+
			
 
				+	"github.com/gin-gonic/gin"
			
 
				+	"github.com/stretchr/testify/require"
			
 
				+)
			
 
				+
			
 
				+func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) {
			
 
				+	gin.SetMode(gin.TestMode)
			
 
				+	w := httptest.NewRecorder()
			
 
				+	ctx, _ := gin.CreateTestContext(w)
			
 
				+
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     1000,
			
 
				+		CompletionTokens: 200,
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedTokens:         100,
			
 
				+			CachedCreationTokens: 50,
			
 
				+		},
			
 
				+		ClaudeCacheCreation5mTokens: 10,
			
 
				+		ClaudeCacheCreation1hTokens: 20,
			
 
				+	}
			
 
				+
			
 
				+	priceData := types.PriceData{
			
 
				+		ModelRatio:           1,
			
 
				+		CompletionRatio:      2,
			
 
				+		CacheRatio:           0.1,
			
 
				+		CacheCreationRatio:   1.25,
			
 
				+		CacheCreation5mRatio: 1.25,
			
 
				+		CacheCreation1hRatio: 2,
			
 
				+		GroupRatioInfo: types.GroupRatioInfo{
			
 
				+			GroupRatio: 1,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	chatRelayInfo := &relaycommon.RelayInfo{
			
 
				+		RelayFormat:             types.RelayFormatOpenAI,
			
 
				+		FinalRequestRelayFormat: types.RelayFormatClaude,
			
 
				+		OriginModelName:         "claude-3-7-sonnet",
			
 
				+		PriceData:               priceData,
			
 
				+		StartTime:               time.Now(),
			
 
				+	}
			
 
				+	messageRelayInfo := &relaycommon.RelayInfo{
			
 
				+		RelayFormat:             types.RelayFormatClaude,
			
 
				+		FinalRequestRelayFormat: types.RelayFormatClaude,
			
 
				+		OriginModelName:         "claude-3-7-sonnet",
			
 
				+		PriceData:               priceData,
			
 
				+		StartTime:               time.Now(),
			
 
				+	}
			
 
				+
			
 
				+	chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage)
			
 
				+	messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage)
			
 
				+
			
 
				+	require.Equal(t, messageSummary.Quota, chatSummary.Quota)
			
 
				+	require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m)
			
 
				+	require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h)
			
 
				+	require.True(t, chatSummary.IsClaudeUsageSemantic)
			
 
				+	require.Equal(t, 1488, chatSummary.Quota)
			
 
				+}
			
 
				+
			
 
				+func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) {
			
 
				+	gin.SetMode(gin.TestMode)
			
 
				+	w := httptest.NewRecorder()
			
 
				+	ctx, _ := gin.CreateTestContext(w)
			
 
				+
			
 
				+	relayInfo := &relaycommon.RelayInfo{
			
 
				+		RelayFormat:             types.RelayFormatOpenAI,
			
 
				+		FinalRequestRelayFormat: types.RelayFormatClaude,
			
 
				+		OriginModelName:         "claude-3-7-sonnet",
			
 
				+		PriceData: types.PriceData{
			
 
				+			ModelRatio:           1,
			
 
				+			CompletionRatio:      1,
			
 
				+			CacheRatio:           0,
			
 
				+			CacheCreationRatio:   1,
			
 
				+			CacheCreation5mRatio: 2,
			
 
				+			CacheCreation1hRatio: 3,
			
 
				+			GroupRatioInfo: types.GroupRatioInfo{
			
 
				+				GroupRatio: 1,
			
 
				+			},
			
 
				+		},
			
 
				+		StartTime: time.Now(),
			
 
				+	}
			
 
				+
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     100,
			
 
				+		CompletionTokens: 0,
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedCreationTokens: 10,
			
 
				+		},
			
 
				+		ClaudeCacheCreation5mTokens: 2,
			
 
				+		ClaudeCacheCreation1hTokens: 3,
			
 
				+	}
			
 
				+
			
 
				+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
			
 
				+
			
 
				+	// 100 + remaining(5)*1 + 2*2 + 3*3 = 118
			
 
				+	require.Equal(t, 118, summary.Quota)
			
 
				+}
			
 
				+
			
 
				+func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) {
			
 
				+	gin.SetMode(gin.TestMode)
			
 
				+	w := httptest.NewRecorder()
			
 
				+	ctx, _ := gin.CreateTestContext(w)
			
 
				+
			
 
				+	relayInfo := &relaycommon.RelayInfo{
			
 
				+		RelayFormat:     types.RelayFormatOpenAI,
			
 
				+		OriginModelName: "claude-3-7-sonnet",
			
 
				+		PriceData: types.PriceData{
			
 
				+			ModelRatio:           1,
			
 
				+			CompletionRatio:      2,
			
 
				+			CacheRatio:           0.1,
			
 
				+			CacheCreationRatio:   1.25,
			
 
				+			CacheCreation5mRatio: 1.25,
			
 
				+			CacheCreation1hRatio: 2,
			
 
				+			GroupRatioInfo: types.GroupRatioInfo{
			
 
				+				GroupRatio: 1,
			
 
				+			},
			
 
				+		},
			
 
				+		StartTime: time.Now(),
			
 
				+	}
			
 
				+
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     1000,
			
 
				+		CompletionTokens: 200,
			
 
				+		UsageSemantic:    "anthropic",
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedTokens:         100,
			
 
				+			CachedCreationTokens: 50,
			
 
				+		},
			
 
				+		ClaudeCacheCreation5mTokens: 10,
			
 
				+		ClaudeCacheCreation1hTokens: 20,
			
 
				+	}
			
 
				+
			
 
				+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
			
 
				+
			
 
				+	require.True(t, summary.IsClaudeUsageSemantic)
			
 
				+	require.Equal(t, "anthropic", summary.UsageSemantic)
			
 
				+	require.Equal(t, 1488, summary.Quota)
			
 
				+}
			
 
				+
			
 
				+func TestCacheWriteTokensTotal(t *testing.T) {
			
 
				+	t.Run("split cache creation", func(t *testing.T) {
			
 
				+		summary := textQuotaSummary{
			
 
				+			CacheCreationTokens:   50,
			
 
				+			CacheCreationTokens5m: 10,
			
 
				+			CacheCreationTokens1h: 20,
			
 
				+		}
			
 
				+		require.Equal(t, 50, cacheWriteTokensTotal(summary))
			
 
				+	})
			
 
				+
			
 
				+	t.Run("legacy cache creation", func(t *testing.T) {
			
 
				+		summary := textQuotaSummary{CacheCreationTokens: 50}
			
 
				+		require.Equal(t, 50, cacheWriteTokensTotal(summary))
			
 
				+	})
			
 
				+
			
 
				+	t.Run("split cache creation without aggregate remainder", func(t *testing.T) {
			
 
				+		summary := textQuotaSummary{
			
 
				+			CacheCreationTokens5m: 10,
			
 
				+			CacheCreationTokens1h: 20,
			
 
				+		}
			
 
				+		require.Equal(t, 30, cacheWriteTokensTotal(summary))
			
 
				+	})
			
 
				+}
			
 
				+
			
 
				+func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) {
			
 
				+	gin.SetMode(gin.TestMode)
			
 
				+	w := httptest.NewRecorder()
			
 
				+	ctx, _ := gin.CreateTestContext(w)
			
 
				+
			
 
				+	relayInfo := &relaycommon.RelayInfo{
			
 
				+		RelayFormat:     types.RelayFormatOpenAI,
			
 
				+		OriginModelName: "claude-3-7-sonnet",
			
 
				+		PriceData: types.PriceData{
			
 
				+			ModelRatio:           1,
			
 
				+			CompletionRatio:      5,
			
 
				+			CacheRatio:           0.1,
			
 
				+			CacheCreationRatio:   1.25,
			
 
				+			CacheCreation5mRatio: 1.25,
			
 
				+			CacheCreation1hRatio: 2,
			
 
				+			GroupRatioInfo:       types.GroupRatioInfo{GroupRatio: 1},
			
 
				+		},
			
 
				+		StartTime: time.Now(),
			
 
				+	}
			
 
				+
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     62,
			
 
				+		CompletionTokens: 95,
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedTokens: 3544,
			
 
				+		},
			
 
				+		ClaudeCacheCreation5mTokens: 586,
			
 
				+	}
			
 
				+
			
 
				+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
			
 
				+
			
 
				+	// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
			
 
				+	require.Equal(t, 1624, summary.Quota)
			
 
				+}