Просмотр исходного кода

Merge pull request #2798 from RedwindA/feat/GeminiCacheBilling

feat(gemini): support cached token billing
Seefs 1 месяц назад
Родитель
Сommit
76050e66ca

+ 6 - 5
dto/gemini.go

@@ -449,11 +449,12 @@ type GeminiChatResponse struct {
 }
 
 type GeminiUsageMetadata struct {
-	PromptTokenCount     int                         `json:"promptTokenCount"`
-	CandidatesTokenCount int                         `json:"candidatesTokenCount"`
-	TotalTokenCount      int                         `json:"totalTokenCount"`
-	ThoughtsTokenCount   int                         `json:"thoughtsTokenCount"`
-	PromptTokensDetails  []GeminiPromptTokensDetails `json:"promptTokensDetails"`
+	PromptTokenCount        int                         `json:"promptTokenCount"`
+	CandidatesTokenCount    int                         `json:"candidatesTokenCount"`
+	TotalTokenCount         int                         `json:"totalTokenCount"`
+	ThoughtsTokenCount      int                         `json:"thoughtsTokenCount"`
+	CachedContentTokenCount int                         `json:"cachedContentTokenCount"`
+	PromptTokensDetails     []GeminiPromptTokensDetails `json:"promptTokensDetails"`
 }
 
 type GeminiPromptTokensDetails struct {

+ 1 - 0
relay/channel/gemini/relay-gemini-native.go

@@ -49,6 +49,7 @@ func GeminiTextGenerationHandler(c *gin.Context, info *relaycommon.RelayInfo, re
 	}
 
 	usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+	usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
 
 	for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
 		if detail.Modality == "AUDIO" {

+ 3 - 0
relay/channel/gemini/relay-gemini.go

@@ -1251,6 +1251,7 @@ func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 			usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
 			usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
 			usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
+			usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
 			for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
 				if detail.Modality == "AUDIO" {
 					usage.PromptTokensDetails.AudioTokens = detail.TokenCount
@@ -1395,6 +1396,7 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
 			PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
 		}
 		usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+		usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
 		for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
 			if detail.Modality == "AUDIO" {
 				usage.PromptTokensDetails.AudioTokens = detail.TokenCount
@@ -1447,6 +1449,7 @@ func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.R
 	}
 
 	usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
+	usage.PromptTokensDetails.CachedTokens = geminiResponse.UsageMetadata.CachedContentTokenCount
 	usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
 
 	for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {

+ 2 - 0
setting/ratio_setting/cache_ratio.go

@@ -8,6 +8,8 @@ import (
 )
 
 var defaultCacheRatio = map[string]float64{
+	"gemini-3-flash-preview":              0.25,
+	"gemini-3-pro-preview":                0.25,
 	"gpt-4":                               0.5,
 	"o1":                                  0.5,
 	"o1-2024-12-17":                       0.5,