|
@@ -627,6 +627,12 @@ func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, res
|
|
|
usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
|
|
usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
+ case constant.ChannelTypeOpenAI:
|
|
|
|
|
+ if usage.PromptTokensDetails.CachedTokens == 0 {
|
|
|
|
|
+ if cachedTokens, ok := extractLlamaCachedTokensFromBody(responseBody); ok {
|
|
|
|
|
+ usage.PromptTokensDetails.CachedTokens = cachedTokens
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -689,3 +695,25 @@ func extractMoonshotCachedTokensFromBody(body []byte) (int, bool) {
|
|
|
|
|
|
|
|
return 0, false
|
|
return 0, false
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+// extractLlamaCachedTokensFromBody 从llama.cpp的非标准位置提取cache_n
|
|
|
|
|
+func extractLlamaCachedTokensFromBody(body []byte) (int, bool) {
|
|
|
|
|
+ if len(body) == 0 {
|
|
|
|
|
+ return 0, false
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ var payload struct {
|
|
|
|
|
+ Timings struct {
|
|
|
|
|
+ CachedTokens *int `json:"cache_n"`
|
|
|
|
|
+ } `json:"timings"`
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if err := common.Unmarshal(body, &payload); err != nil {
|
|
|
|
|
+ return 0, false
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if payload.Timings.CachedTokens == nil {
|
|
|
|
|
+ return 0, false
|
|
|
|
|
+ }
|
|
|
|
|
+ return *payload.Timings.CachedTokens, true
|
|
|
|
|
+}
|