1 maand geleden · ff66288e3a
--- a/service/convert.go
+++ b/service/convert.go
@@ -616,10 +616,7 @@ func ResponseOpenAI2Claude(openAIResponse *dto.OpenAITextResponse, info *relayco
 
				 	}
			
 
				 	claudeResponse.Content = contents
			
 
				 	claudeResponse.StopReason = stopReason
			
 
				-	claudeResponse.Usage = &dto.ClaudeUsage{
			
 
				-		InputTokens:  openAIResponse.PromptTokens,
			
 
				-		OutputTokens: openAIResponse.CompletionTokens,
			
 
				-	}
			
 
				+	claudeResponse.Usage = buildClaudeUsageFromOpenAIUsage(&openAIResponse.Usage)
			
 
				 
			
 
				 	return claudeResponse
			
 
				 }
			
--- a/service/text_quota.go
+++ b/service/text_quota.go
@@ -113,8 +113,11 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 
				 	summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
			
 
				 	summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
			
 
				 	legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
			
 
				+	isOpenRouterClaudeBilling := relayInfo.ChannelMeta != nil &&
			
 
				+		relayInfo.ChannelType == constant.ChannelTypeOpenRouter &&
			
 
				+		summary.IsClaudeUsageSemantic
			
 
				 
			
 
				-	if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
			
 
				+	if isOpenRouterClaudeBilling {
			
 
				 		summary.PromptTokens -= summary.CacheTokens
			
 
				 		isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
			
 
				 		if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
			
--- a/service/text_quota_test.go
+++ b/service/text_quota_test.go
@@ -5,6 +5,7 @@ import (
 
				 	"testing"
			
 
				 	"time"
			
 
				 
			
 
				+	"github.com/QuantumNous/new-api/constant"
			
 
				 	"github.com/QuantumNous/new-api/dto"
			
 
				 	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				 	"github.com/QuantumNous/new-api/types"
			
@@ -204,3 +205,114 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi
 
				 	// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
			
 
				 	require.Equal(t, 1624, summary.Quota)
			
 
				 }
			
 
				+
			
 
				+func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) {
			
 
				+	gin.SetMode(gin.TestMode)
			
 
				+	w := httptest.NewRecorder()
			
 
				+	ctx, _ := gin.CreateTestContext(w)
			
 
				+
			
 
				+	relayInfo := &relaycommon.RelayInfo{
			
 
				+		OriginModelName: "openai/gpt-4.1",
			
 
				+		ChannelMeta: &relaycommon.ChannelMeta{
			
 
				+			ChannelType: constant.ChannelTypeOpenRouter,
			
 
				+		},
			
 
				+		PriceData: types.PriceData{
			
 
				+			ModelRatio:         1,
			
 
				+			CompletionRatio:    1,
			
 
				+			CacheRatio:         0.1,
			
 
				+			CacheCreationRatio: 1.25,
			
 
				+			GroupRatioInfo:     types.GroupRatioInfo{GroupRatio: 1},
			
 
				+		},
			
 
				+		StartTime: time.Now(),
			
 
				+	}
			
 
				+
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     2604,
			
 
				+		CompletionTokens: 383,
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedTokens: 2432,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
			
 
				+
			
 
				+	// OpenRouter OpenAI-format display keeps prompt_tokens as total input,
			
 
				+	// but billing still separates normal input from cache read tokens.
			
 
				+	// quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798
			
 
				+	require.Equal(t, 2604, summary.PromptTokens)
			
 
				+	require.Equal(t, 798, summary.Quota)
			
 
				+}
			
 
				+
			
 
				+func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) {
			
 
				+	gin.SetMode(gin.TestMode)
			
 
				+	w := httptest.NewRecorder()
			
 
				+	ctx, _ := gin.CreateTestContext(w)
			
 
				+
			
 
				+	relayInfo := &relaycommon.RelayInfo{
			
 
				+		OriginModelName: "openai/gpt-4.1",
			
 
				+		ChannelMeta: &relaycommon.ChannelMeta{
			
 
				+			ChannelType: constant.ChannelTypeOpenRouter,
			
 
				+		},
			
 
				+		PriceData: types.PriceData{
			
 
				+			ModelRatio:         1,
			
 
				+			CompletionRatio:    1,
			
 
				+			CacheCreationRatio: 1.25,
			
 
				+			GroupRatioInfo:     types.GroupRatioInfo{GroupRatio: 1},
			
 
				+		},
			
 
				+		StartTime: time.Now(),
			
 
				+	}
			
 
				+
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     2604,
			
 
				+		CompletionTokens: 383,
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedCreationTokens: 100,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
			
 
				+
			
 
				+	// prompt_tokens is still logged as total input, but cache creation is billed separately.
			
 
				+	// quota = (2604 - 100) + 100*1.25 + 383 = 3012
			
 
				+	require.Equal(t, 2604, summary.PromptTokens)
			
 
				+	require.Equal(t, 3012, summary.Quota)
			
 
				+}
			
 
				+
			
 
				+func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {
			
 
				+	gin.SetMode(gin.TestMode)
			
 
				+	w := httptest.NewRecorder()
			
 
				+	ctx, _ := gin.CreateTestContext(w)
			
 
				+
			
 
				+	relayInfo := &relaycommon.RelayInfo{
			
 
				+		FinalRequestRelayFormat: types.RelayFormatClaude,
			
 
				+		OriginModelName:         "anthropic/claude-3.7-sonnet",
			
 
				+		ChannelMeta: &relaycommon.ChannelMeta{
			
 
				+			ChannelType: constant.ChannelTypeOpenRouter,
			
 
				+		},
			
 
				+		PriceData: types.PriceData{
			
 
				+			ModelRatio:         1,
			
 
				+			CompletionRatio:    1,
			
 
				+			CacheRatio:         0.1,
			
 
				+			CacheCreationRatio: 1.25,
			
 
				+			GroupRatioInfo:     types.GroupRatioInfo{GroupRatio: 1},
			
 
				+		},
			
 
				+		StartTime: time.Now(),
			
 
				+	}
			
 
				+
			
 
				+	usage := &dto.Usage{
			
 
				+		PromptTokens:     2604,
			
 
				+		CompletionTokens: 383,
			
 
				+		PromptTokensDetails: dto.InputTokenDetails{
			
 
				+			CachedTokens: 2432,
			
 
				+		},
			
 
				+	}
			
 
				+
			
 
				+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
			
 
				+
			
 
				+	// Pre-PR PostClaudeConsumeQuota behavior for OpenRouter:
			
 
				+	// prompt = 2604 - 2432 = 172
			
 
				+	// quota = 172 + 2432*0.1 + 383 = 798.2 => 798
			
 
				+	require.True(t, summary.IsClaudeUsageSemantic)
			
 
				+	require.Equal(t, 172, summary.PromptTokens)
			
 
				+	require.Equal(t, 798, summary.Quota)
			
 
				+}