Просмотр исходного кода

feat: enhance token usage details for upstream OpenRouter

neotf 9 месяцев назад
Родитель
Сommit
66778efcc5
3 измененных файлов с 48 добавлено и 40 удалено
  1. 39 37
      dto/openai_request.go
  2. 3 0
      relay/channel/openai/adaptor.go
  3. 6 3
      service/convert.go

+ 39 - 37
dto/openai_request.go

@@ -18,43 +18,45 @@ type FormatJsonSchema struct {
 }
 
 type GeneralOpenAIRequest struct {
-	Model               string         `json:"model,omitempty"`
-	Messages            []Message      `json:"messages,omitempty"`
-	Prompt              any            `json:"prompt,omitempty"`
-	Prefix              any            `json:"prefix,omitempty"`
-	Suffix              any            `json:"suffix,omitempty"`
-	Stream              bool           `json:"stream,omitempty"`
-	StreamOptions       *StreamOptions `json:"stream_options,omitempty"`
-	MaxTokens           uint           `json:"max_tokens,omitempty"`
-	MaxCompletionTokens uint           `json:"max_completion_tokens,omitempty"`
-	ReasoningEffort     string         `json:"reasoning_effort,omitempty"`
-	//Reasoning           json.RawMessage   `json:"reasoning,omitempty"`
-	Temperature      *float64          `json:"temperature,omitempty"`
-	TopP             float64           `json:"top_p,omitempty"`
-	TopK             int               `json:"top_k,omitempty"`
-	Stop             any               `json:"stop,omitempty"`
-	N                int               `json:"n,omitempty"`
-	Input            any               `json:"input,omitempty"`
-	Instruction      string            `json:"instruction,omitempty"`
-	Size             string            `json:"size,omitempty"`
-	Functions        any               `json:"functions,omitempty"`
-	FrequencyPenalty float64           `json:"frequency_penalty,omitempty"`
-	PresencePenalty  float64           `json:"presence_penalty,omitempty"`
-	ResponseFormat   *ResponseFormat   `json:"response_format,omitempty"`
-	EncodingFormat   any               `json:"encoding_format,omitempty"`
-	Seed             float64           `json:"seed,omitempty"`
-	ParallelTooCalls *bool             `json:"parallel_tool_calls,omitempty"`
-	Tools            []ToolCallRequest `json:"tools,omitempty"`
-	ToolChoice       any               `json:"tool_choice,omitempty"`
-	User             string            `json:"user,omitempty"`
-	LogProbs         bool              `json:"logprobs,omitempty"`
-	TopLogProbs      int               `json:"top_logprobs,omitempty"`
-	Dimensions       int               `json:"dimensions,omitempty"`
-	Modalities       any               `json:"modalities,omitempty"`
-	Audio            any               `json:"audio,omitempty"`
-	EnableThinking   any               `json:"enable_thinking,omitempty"` // ali
-	ExtraBody        any               `json:"extra_body,omitempty"`
-	WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
+	Model               string            `json:"model,omitempty"`
+	Messages            []Message         `json:"messages,omitempty"`
+	Prompt              any               `json:"prompt,omitempty"`
+	Prefix              any               `json:"prefix,omitempty"`
+	Suffix              any               `json:"suffix,omitempty"`
+	Stream              bool              `json:"stream,omitempty"`
+	StreamOptions       *StreamOptions    `json:"stream_options,omitempty"`
+	MaxTokens           uint              `json:"max_tokens,omitempty"`
+	MaxCompletionTokens uint              `json:"max_completion_tokens,omitempty"`
+	ReasoningEffort     string            `json:"reasoning_effort,omitempty"`
+	Temperature         *float64          `json:"temperature,omitempty"`
+	TopP                float64           `json:"top_p,omitempty"`
+	TopK                int               `json:"top_k,omitempty"`
+	Stop                any               `json:"stop,omitempty"`
+	N                   int               `json:"n,omitempty"`
+	Input               any               `json:"input,omitempty"`
+	Instruction         string            `json:"instruction,omitempty"`
+	Size                string            `json:"size,omitempty"`
+	Functions           any               `json:"functions,omitempty"`
+	FrequencyPenalty    float64           `json:"frequency_penalty,omitempty"`
+	PresencePenalty     float64           `json:"presence_penalty,omitempty"`
+	ResponseFormat      *ResponseFormat   `json:"response_format,omitempty"`
+	EncodingFormat      any               `json:"encoding_format,omitempty"`
+	Seed                float64           `json:"seed,omitempty"`
+	ParallelTooCalls    *bool             `json:"parallel_tool_calls,omitempty"`
+	Tools               []ToolCallRequest `json:"tools,omitempty"`
+	ToolChoice          any               `json:"tool_choice,omitempty"`
+	User                string            `json:"user,omitempty"`
+	LogProbs            bool              `json:"logprobs,omitempty"`
+	TopLogProbs         int               `json:"top_logprobs,omitempty"`
+	Dimensions          int               `json:"dimensions,omitempty"`
+	Modalities          any               `json:"modalities,omitempty"`
+	Audio               any               `json:"audio,omitempty"`
+	EnableThinking      any               `json:"enable_thinking,omitempty"` // ali
+	ExtraBody           any               `json:"extra_body,omitempty"`
+	WebSearchOptions    *WebSearchOptions `json:"web_search_options,omitempty"`
+	// OpenRouter Params
+	Usage     json.RawMessage `json:"usage,omitempty"`
+	Reasoning json.RawMessage `json:"reasoning,omitempty"`
 }
 
 type ToolCallRequest struct {

+ 3 - 0
relay/channel/openai/adaptor.go

@@ -152,6 +152,9 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure {
 		request.StreamOptions = nil
 	}
+	if info.ChannelType == common.ChannelTypeOpenRouter {
+		request.Usage = json.RawMessage("{\"include\": true}")
+	}
 	if strings.HasPrefix(request.Model, "o") {
 		if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 {
 			request.MaxCompletionTokens = request.MaxTokens

+ 6 - 3
service/convert.go

@@ -246,12 +246,15 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
 		}
 		if info.Done {
 			claudeResponses = append(claudeResponses, generateStopBlock(info.ClaudeConvertInfo.Index))
-			if info.ClaudeConvertInfo.Usage != nil {
+			oaiUsage := info.ClaudeConvertInfo.Usage
+			if oaiUsage != nil {
 				claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
 					Type: "message_delta",
 					Usage: &dto.ClaudeUsage{
-						InputTokens:  info.ClaudeConvertInfo.Usage.PromptTokens,
-						OutputTokens: info.ClaudeConvertInfo.Usage.CompletionTokens,
+						InputTokens:              oaiUsage.PromptTokens,
+						OutputTokens:             oaiUsage.CompletionTokens,
+						CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
+						CacheReadInputTokens:     oaiUsage.PromptTokensDetails.CachedTokens,
 					},
 					Delta: &dto.ClaudeMediaMessage{
 						StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),