Explorar o código

Merge pull request #1509 from QuantumNous/responses-input-cache-token

fix: responses cache token 未计费
Calcium-Ion hai 7 meses
pai
achega
7370b4fbcd

+ 16 - 6
relay/channel/openai/relay_responses.go

@@ -37,9 +37,14 @@ func OaiResponsesHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http
 
 	// compute usage
 	usage := dto.Usage{}
-	usage.PromptTokens = responsesResponse.Usage.InputTokens
-	usage.CompletionTokens = responsesResponse.Usage.OutputTokens
-	usage.TotalTokens = responsesResponse.Usage.TotalTokens
+	if responsesResponse.Usage != nil {
+		usage.PromptTokens = responsesResponse.Usage.InputTokens
+		usage.CompletionTokens = responsesResponse.Usage.OutputTokens
+		usage.TotalTokens = responsesResponse.Usage.TotalTokens
+		if responsesResponse.Usage.InputTokensDetails != nil {
+			usage.PromptTokensDetails.CachedTokens = responsesResponse.Usage.InputTokensDetails.CachedTokens
+		}
+	}
 	// 解析 Tools 用量
 	for _, tool := range responsesResponse.Tools {
 		info.ResponsesUsageInfo.BuiltInTools[common.Interface2String(tool["type"])].CallCount++
@@ -64,9 +69,14 @@ func OaiResponsesStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp
 			sendResponsesStreamData(c, streamResponse, data)
 			switch streamResponse.Type {
 			case "response.completed":
-				usage.PromptTokens = streamResponse.Response.Usage.InputTokens
-				usage.CompletionTokens = streamResponse.Response.Usage.OutputTokens
-				usage.TotalTokens = streamResponse.Response.Usage.TotalTokens
+				if streamResponse.Response.Usage != nil {
+					usage.PromptTokens = streamResponse.Response.Usage.InputTokens
+					usage.CompletionTokens = streamResponse.Response.Usage.OutputTokens
+					usage.TotalTokens = streamResponse.Response.Usage.TotalTokens
+					if streamResponse.Response.Usage.InputTokensDetails != nil {
+						usage.PromptTokensDetails.CachedTokens = streamResponse.Response.Usage.InputTokensDetails.CachedTokens
+					}
+				}
 			case "response.output_text.delta":
 				// 处理输出文本
 				responseTextBuilder.WriteString(streamResponse.Delta)

+ 7 - 3
web/src/helpers/render.js

@@ -1156,6 +1156,7 @@ export function renderLogContent(
   modelPrice = -1,
   groupRatio,
   user_group_ratio,
+  cacheRatio = 1.0,
   image = false,
   imageRatio = 1.0,
   webSearch = false,
@@ -1174,9 +1175,10 @@ export function renderLogContent(
   } else {
     if (image) {
       return i18next.t(
-        '模型倍率 {{modelRatio}},输出倍率 {{completionRatio}},图片输入倍率 {{imageRatio}},{{ratioType}} {{ratio}}',
+        '模型倍率 {{modelRatio}},缓存倍率 {{cacheRatio}},输出倍率 {{completionRatio}},图片输入倍率 {{imageRatio}},{{ratioType}} {{ratio}}',
         {
           modelRatio: modelRatio,
+          cacheRatio: cacheRatio,
           completionRatio: completionRatio,
           imageRatio: imageRatio,
           ratioType: ratioLabel,
@@ -1185,9 +1187,10 @@ export function renderLogContent(
       );
     } else if (webSearch) {
       return i18next.t(
-        '模型倍率 {{modelRatio}},输出倍率 {{completionRatio}},{{ratioType}} {{ratio}},Web 搜索调用 {{webSearchCallCount}} 次',
+        '模型倍率 {{modelRatio}},缓存倍率 {{cacheRatio}},输出倍率 {{completionRatio}},{{ratioType}} {{ratio}},Web 搜索调用 {{webSearchCallCount}} 次',
         {
           modelRatio: modelRatio,
+          cacheRatio: cacheRatio,
           completionRatio: completionRatio,
           ratioType: ratioLabel,
           ratio,
@@ -1196,9 +1199,10 @@ export function renderLogContent(
       );
     } else {
       return i18next.t(
-        '模型倍率 {{modelRatio}},输出倍率 {{completionRatio}},{{ratioType}} {{ratio}}',
+        '模型倍率 {{modelRatio}},缓存倍率 {{cacheRatio}},输出倍率 {{completionRatio}},{{ratioType}} {{ratio}}',
         {
           modelRatio: modelRatio,
+          cacheRatio: cacheRatio,
           completionRatio: completionRatio,
           ratioType: ratioLabel,
           ratio,

+ 1 - 0
web/src/hooks/usage-logs/useUsageLogsData.js

@@ -366,6 +366,7 @@ export const useLogsData = () => {
               other.model_price,
               other.group_ratio,
               other?.user_group_ratio,
+              other.cache_ratio || 1.0,
               false,
               1.0,
               other.web_search || false,