3 maanden geleden · cc1da72d10
--- a/common/endpoint_defaults.go
+++ b/common/endpoint_defaults.go
@@ -17,13 +17,14 @@ type EndpointInfo struct {
 
				 
			
 
				 // defaultEndpointInfoMap 保存内置端点的默认 Path 与 Method
			
 
				 var defaultEndpointInfoMap = map[constant.EndpointType]EndpointInfo{
			
 
				-	constant.EndpointTypeOpenAI:          {Path: "/v1/chat/completions", Method: "POST"},
			
 
				-	constant.EndpointTypeOpenAIResponse:  {Path: "/v1/responses", Method: "POST"},
			
 
				-	constant.EndpointTypeAnthropic:       {Path: "/v1/messages", Method: "POST"},
			
 
				-	constant.EndpointTypeGemini:          {Path: "/v1beta/models/{model}:generateContent", Method: "POST"},
			
 
				-	constant.EndpointTypeJinaRerank:      {Path: "/v1/rerank", Method: "POST"},
			
 
				-	constant.EndpointTypeImageGeneration: {Path: "/v1/images/generations", Method: "POST"},
			
 
				-	constant.EndpointTypeEmbeddings:      {Path: "/v1/embeddings", Method: "POST"},
			
 
				+	constant.EndpointTypeOpenAI:                {Path: "/v1/chat/completions", Method: "POST"},
			
 
				+	constant.EndpointTypeOpenAIResponse:        {Path: "/v1/responses", Method: "POST"},
			
 
				+	constant.EndpointTypeOpenAIResponseCompact: {Path: "/v1/responses/compact", Method: "POST"},
			
 
				+	constant.EndpointTypeAnthropic:             {Path: "/v1/messages", Method: "POST"},
			
 
				+	constant.EndpointTypeGemini:                {Path: "/v1beta/models/{model}:generateContent", Method: "POST"},
			
 
				+	constant.EndpointTypeJinaRerank:            {Path: "/v1/rerank", Method: "POST"},
			
 
				+	constant.EndpointTypeImageGeneration:       {Path: "/v1/images/generations", Method: "POST"},
			
 
				+	constant.EndpointTypeEmbeddings:            {Path: "/v1/embeddings", Method: "POST"},
			
 
				 }
			
 
				 
			
 
				 // GetDefaultEndpointInfo 返回指定端点类型的默认信息以及是否存在
			
--- a/constant/endpoint_type.go
+++ b/constant/endpoint_type.go
@@ -3,14 +3,15 @@ package constant
 
				 type EndpointType string
			
 
				 
			
 
				 const (
			
 
				-	EndpointTypeOpenAI          EndpointType = "openai"
			
 
				-	EndpointTypeOpenAIResponse  EndpointType = "openai-response"
			
 
				-	EndpointTypeAnthropic       EndpointType = "anthropic"
			
 
				-	EndpointTypeGemini          EndpointType = "gemini"
			
 
				-	EndpointTypeJinaRerank      EndpointType = "jina-rerank"
			
 
				-	EndpointTypeImageGeneration EndpointType = "image-generation"
			
 
				-	EndpointTypeEmbeddings      EndpointType = "embeddings"
			
 
				-	EndpointTypeOpenAIVideo     EndpointType = "openai-video"
			
 
				+	EndpointTypeOpenAI                EndpointType = "openai"
			
 
				+	EndpointTypeOpenAIResponse        EndpointType = "openai-response"
			
 
				+	EndpointTypeOpenAIResponseCompact EndpointType = "openai-response-compact"
			
 
				+	EndpointTypeAnthropic             EndpointType = "anthropic"
			
 
				+	EndpointTypeGemini                EndpointType = "gemini"
			
 
				+	EndpointTypeJinaRerank            EndpointType = "jina-rerank"
			
 
				+	EndpointTypeImageGeneration       EndpointType = "image-generation"
			
 
				+	EndpointTypeEmbeddings            EndpointType = "embeddings"
			
 
				+	EndpointTypeOpenAIVideo           EndpointType = "openai-video"
			
 
				 	//EndpointTypeMidjourney     EndpointType = "midjourney-proxy"
			
 
				 	//EndpointTypeSuno           EndpointType = "suno-proxy"
			
 
				 	//EndpointTypeKling          EndpointType = "kling"
			
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -26,6 +26,7 @@ import (
 
				 	"github.com/QuantumNous/new-api/relay/helper"
			
 
				 	"github.com/QuantumNous/new-api/service"
			
 
				 	"github.com/QuantumNous/new-api/setting/operation_setting"
			
 
				+	"github.com/QuantumNous/new-api/setting/ratio_setting"
			
 
				 	"github.com/QuantumNous/new-api/types"
			
 
				 
			
 
				 	"github.com/bytedance/gopkg/util/gopool"
			
@@ -107,6 +108,14 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 
				 		if strings.Contains(strings.ToLower(testModel), "codex") {
			
 
				 			requestPath = "/v1/responses"
			
 
				 		}
			
 
				+
			
 
				+		// responses compaction models (must use /v1/responses/compact)
			
 
				+		if strings.HasSuffix(testModel, ratio_setting.CompactModelSuffix) {
			
 
				+			requestPath = "/v1/responses/compact"
			
 
				+		}
			
 
				+	}
			
 
				+	if strings.HasPrefix(requestPath, "/v1/responses/compact") {
			
 
				+		testModel = ratio_setting.WithCompactModelSuffix(testModel)
			
 
				 	}
			
 
				 
			
 
				 	c.Request = &http.Request{
			
@@ -150,6 +159,8 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 
				 			relayFormat = types.RelayFormatOpenAI
			
 
				 		case constant.EndpointTypeOpenAIResponse:
			
 
				 			relayFormat = types.RelayFormatOpenAIResponses
			
 
				+		case constant.EndpointTypeOpenAIResponseCompact:
			
 
				+			relayFormat = types.RelayFormatOpenAIResponsesCompaction
			
 
				 		case constant.EndpointTypeAnthropic:
			
 
				 			relayFormat = types.RelayFormatClaude
			
 
				 		case constant.EndpointTypeGemini:
			
@@ -184,6 +195,9 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 
				 		if c.Request.URL.Path == "/v1/responses" {
			
 
				 			relayFormat = types.RelayFormatOpenAIResponses
			
 
				 		}
			
 
				+		if strings.HasPrefix(c.Request.URL.Path, "/v1/responses/compact") {
			
 
				+			relayFormat = types.RelayFormatOpenAIResponsesCompaction
			
 
				+		}
			
 
				 	}
			
 
				 
			
 
				 	request := buildTestRequest(testModel, endpointType, channel)
			
@@ -215,6 +229,15 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 
				 	request.SetModelName(testModel)
			
 
				 
			
 
				 	apiType, _ := common.ChannelType2APIType(channel.Type)
			
 
				+	if info.RelayMode == relayconstant.RelayModeResponsesCompact &&
			
 
				+		apiType != constant.APITypeOpenAI &&
			
 
				+		apiType != constant.APITypeCodex {
			
 
				+		return testResult{
			
 
				+			context:     c,
			
 
				+			localErr:    fmt.Errorf("responses compaction test only supports openai/codex channels, got api type %d", apiType),
			
 
				+			newAPIError: types.NewError(fmt.Errorf("unsupported api type: %d", apiType), types.ErrorCodeInvalidApiType),
			
 
				+		}
			
 
				+	}
			
 
				 	adaptor := relay.GetAdaptor(apiType)
			
 
				 	if adaptor == nil {
			
 
				 		return testResult{
			
@@ -287,6 +310,25 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 
				 				newAPIError: types.NewError(errors.New("invalid response request type"), types.ErrorCodeConvertRequestFailed),
			
 
				 			}
			
 
				 		}
			
 
				+	case relayconstant.RelayModeResponsesCompact:
			
 
				+		// Response compaction request - convert to OpenAIResponsesRequest before adapting
			
 
				+		switch req := request.(type) {
			
 
				+		case *dto.OpenAIResponsesCompactionRequest:
			
 
				+			convertedRequest, err = adaptor.ConvertOpenAIResponsesRequest(c, info, dto.OpenAIResponsesRequest{
			
 
				+				Model:              req.Model,
			
 
				+				Input:              req.Input,
			
 
				+				Instructions:       req.Instructions,
			
 
				+				PreviousResponseID: req.PreviousResponseID,
			
 
				+			})
			
 
				+		case *dto.OpenAIResponsesRequest:
			
 
				+			convertedRequest, err = adaptor.ConvertOpenAIResponsesRequest(c, info, *req)
			
 
				+		default:
			
 
				+			return testResult{
			
 
				+				context:     c,
			
 
				+				localErr:    errors.New("invalid response compaction request type"),
			
 
				+				newAPIError: types.NewError(errors.New("invalid response compaction request type"), types.ErrorCodeConvertRequestFailed),
			
 
				+			}
			
 
				+		}
			
 
				 	default:
			
 
				 		// Chat/Completion 等其他请求类型
			
 
				 		if generalReq, ok := request.(*dto.GeneralOpenAIRequest); ok {
			
@@ -432,6 +474,8 @@ func testChannel(channel *model.Channel, testModel string, endpointType string)
 
				 }
			
 
				 
			
 
				 func buildTestRequest(model string, endpointType string, channel *model.Channel) dto.Request {
			
 
				+	testResponsesInput := json.RawMessage(`[{"role":"user","content":"hi"}]`)
			
 
				+
			
 
				 	// 根据端点类型构建不同的测试请求
			
 
				 	if endpointType != "" {
			
 
				 		switch constant.EndpointType(endpointType) {
			
@@ -463,6 +507,12 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel)
 
				 				Model: model,
			
 
				 				Input: json.RawMessage(`[{"role":"user","content":"hi"}]`),
			
 
				 			}
			
 
				+		case constant.EndpointTypeOpenAIResponseCompact:
			
 
				+			// 返回 OpenAIResponsesCompactionRequest
			
 
				+			return &dto.OpenAIResponsesCompactionRequest{
			
 
				+				Model: model,
			
 
				+				Input: testResponsesInput,
			
 
				+			}
			
 
				 		case constant.EndpointTypeAnthropic, constant.EndpointTypeGemini, constant.EndpointTypeOpenAI:
			
 
				 			// 返回 GeneralOpenAIRequest
			
 
				 			maxTokens := uint(16)
			
@@ -504,6 +554,14 @@ func buildTestRequest(model string, endpointType string, channel *model.Channel)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	// Responses compaction models (must use /v1/responses/compact)
			
 
				+	if strings.HasSuffix(model, ratio_setting.CompactModelSuffix) {
			
 
				+		return &dto.OpenAIResponsesCompactionRequest{
			
 
				+			Model: model,
			
 
				+			Input: testResponsesInput,
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				 	// Responses-only models (e.g. codex series)
			
 
				 	if strings.Contains(strings.ToLower(model), "codex") {
			
 
				 		return &dto.OpenAIResponsesRequest{
			
--- a/controller/relay.go
+++ b/controller/relay.go
@@ -45,7 +45,7 @@ func relayHandler(c *gin.Context, info *relaycommon.RelayInfo) *types.NewAPIErro
 
				 		err = relay.RerankHelper(c, info)
			
 
				 	case relayconstant.RelayModeEmbeddings:
			
 
				 		err = relay.EmbeddingHelper(c, info)
			
 
				-	case relayconstant.RelayModeResponses:
			
 
				+	case relayconstant.RelayModeResponses, relayconstant.RelayModeResponsesCompact:
			
 
				 		err = relay.ResponsesHelper(c, info)
			
 
				 	default:
			
 
				 		err = relay.TextHelper(c, info)
			
--- a/docs/openapi/relay.json
+++ b/docs/openapi/relay.json
@@ -284,6 +284,46 @@
 
				           }
			
 
				         ]
			
 
				       }
			
 
				+    },
			
 
				+	    "/v1/responses/compact": {
			
 
				+	      "post": {
			
 
				+	        "summary": "压缩对话 (OpenAI Responses API)",
			
 
				+	        "deprecated": false,
			
 
				+	        "description": "OpenAI Responses API，用于对长对话进行 compaction。",
			
 
				+	        "operationId": "compactResponse",
			
 
				+        "tags": [
			
 
				+          "OpenAI格式(Responses)"
			
 
				+        ],
			
 
				+        "parameters": [],
			
 
				+	        "requestBody": {
			
 
				+	          "content": {
			
 
				+	            "application/json": {
			
 
				+	              "schema": {
			
 
				+	                "$ref": "#/components/schemas/ResponsesCompactionRequest"
			
 
				+	              }
			
 
				+	            }
			
 
				+	          },
			
 
				+	          "required": true
			
 
				+	        },
			
 
				+        "responses": {
			
 
				+          "200": {
			
 
				+            "description": "成功压缩对话",
			
 
				+            "content": {
			
 
				+              "application/json": {
			
 
				+                "schema": {
			
 
				+                  "$ref": "#/components/schemas/ResponsesCompactionResponse"
			
 
				+                }
			
 
				+              }
			
 
				+            },
			
 
				+            "headers": {}
			
 
				+          }
			
 
				+        },
			
 
				+        "security": [
			
 
				+          {
			
 
				+            "BearerAuth": []
			
 
				+          }
			
 
				+        ]
			
 
				+      }
			
 
				     },
			
 
				     "/v1/images/generations": {
			
 
				       "post": {
			
@@ -3130,10 +3170,71 @@
 
				           }
			
 
				         }
			
 
				       },
			
 
				-      "ResponsesStreamResponse": {
			
 
				-        "type": "object",
			
 
				-        "properties": {
			
 
				-          "type": {
			
 
				+	      "ResponsesCompactionResponse": {
			
 
				+	        "type": "object",
			
 
				+	        "properties": {
			
 
				+          "id": {
			
 
				+            "type": "string"
			
 
				+          },
			
 
				+          "object": {
			
 
				+            "type": "string",
			
 
				+            "example": "response.compaction"
			
 
				+          },
			
 
				+          "created_at": {
			
 
				+            "type": "integer"
			
 
				+          },
			
 
				+          "output": {
			
 
				+            "type": "array",
			
 
				+            "items": {
			
 
				+              "type": "object",
			
 
				+              "properties": {}
			
 
				+            }
			
 
				+          },
			
 
				+          "usage": {
			
 
				+            "$ref": "#/components/schemas/Usage"
			
 
				+          },
			
 
				+          "error": {
			
 
				+            "type": "object",
			
 
				+            "properties": {}
			
 
				+          }
			
 
				+	        }
			
 
				+	      },
			
 
				+	      "ResponsesCompactionRequest": {
			
 
				+	        "type": "object",
			
 
				+	        "required": [
			
 
				+	          "model"
			
 
				+	        ],
			
 
				+	        "properties": {
			
 
				+	          "model": {
			
 
				+	            "type": "string"
			
 
				+	          },
			
 
				+	          "input": {
			
 
				+	            "description": "输入内容，可以是字符串或消息数组",
			
 
				+	            "oneOf": [
			
 
				+	              {
			
 
				+	                "type": "string"
			
 
				+	              },
			
 
				+	              {
			
 
				+	                "type": "array",
			
 
				+	                "items": {
			
 
				+	                  "type": "object",
			
 
				+	                  "properties": {}
			
 
				+	                }
			
 
				+	              }
			
 
				+	            ]
			
 
				+	          },
			
 
				+	          "instructions": {
			
 
				+	            "type": "string"
			
 
				+	          },
			
 
				+	          "previous_response_id": {
			
 
				+	            "type": "string"
			
 
				+	          }
			
 
				+	        }
			
 
				+	      },
			
 
				+	      "ResponsesStreamResponse": {
			
 
				+	        "type": "object",
			
 
				+	        "properties": {
			
 
				+	          "type": {
			
 
				             "type": "string"
			
 
				           },
			
 
				           "response": {
			
@@ -7138,4 +7239,4 @@
 
				       "BearerAuth": []
			
 
				     }
			
 
				   ]
			
 
				-}
			
 
				+}
			
--- a/dto/openai_compaction.go
+++ b/dto/openai_compaction.go
@@ -0,0 +1,20 @@
 
				+package dto
			
 
				+
			
 
				+import (
			
 
				+	"encoding/json"
			
 
				+
			
 
				+	"github.com/QuantumNous/new-api/types"
			
 
				+)
			
 
				+
			
 
				+type OpenAIResponsesCompactionResponse struct {
			
 
				+	ID        string          `json:"id"`
			
 
				+	Object    string          `json:"object"`
			
 
				+	CreatedAt int             `json:"created_at"`
			
 
				+	Output    json.RawMessage `json:"output"`
			
 
				+	Usage     *Usage          `json:"usage"`
			
 
				+	Error     any             `json:"error,omitempty"`
			
 
				+}
			
 
				+
			
 
				+func (o *OpenAIResponsesCompactionResponse) GetOpenAIError() *types.OpenAIError {
			
 
				+	return GetOpenAIError(o.Error)
			
 
				+}
			
--- a/dto/openai_responses_compaction_request.go
+++ b/dto/openai_responses_compaction_request.go
@@ -0,0 +1,40 @@
 
				+package dto
			
 
				+
			
 
				+import (
			
 
				+	"encoding/json"
			
 
				+	"strings"
			
 
				+
			
 
				+	"github.com/QuantumNous/new-api/types"
			
 
				+
			
 
				+	"github.com/gin-gonic/gin"
			
 
				+)
			
 
				+
			
 
				+type OpenAIResponsesCompactionRequest struct {
			
 
				+	Model              string          `json:"model"`
			
 
				+	Input              json.RawMessage `json:"input,omitempty"`
			
 
				+	Instructions       json.RawMessage `json:"instructions,omitempty"`
			
 
				+	PreviousResponseID string          `json:"previous_response_id,omitempty"`
			
 
				+}
			
 
				+
			
 
				+func (r *OpenAIResponsesCompactionRequest) GetTokenCountMeta() *types.TokenCountMeta {
			
 
				+	var parts []string
			
 
				+	if len(r.Instructions) > 0 {
			
 
				+		parts = append(parts, string(r.Instructions))
			
 
				+	}
			
 
				+	if len(r.Input) > 0 {
			
 
				+		parts = append(parts, string(r.Input))
			
 
				+	}
			
 
				+	return &types.TokenCountMeta{
			
 
				+		CombineText: strings.Join(parts, "\n"),
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+func (r *OpenAIResponsesCompactionRequest) IsStream(c *gin.Context) bool {
			
 
				+	return false
			
 
				+}
			
 
				+
			
 
				+func (r *OpenAIResponsesCompactionRequest) SetModelName(modelName string) {
			
 
				+	if modelName != "" {
			
 
				+		r.Model = modelName
			
 
				+	}
			
 
				+}
			
--- a/middleware/distributor.go
+++ b/middleware/distributor.go
@@ -329,6 +329,10 @@ func getModelRequest(c *gin.Context) (*ModelRequest, bool, error) {
 
				 		modelRequest.Group = req.Group
			
 
				 		common.SetContextKey(c, constant.ContextKeyTokenGroup, modelRequest.Group)
			
 
				 	}
			
 
				+
			
 
				+	if strings.HasPrefix(c.Request.URL.Path, "/v1/responses/compact") && modelRequest.Model != "" {
			
 
				+		modelRequest.Model = ratio_setting.WithCompactModelSuffix(modelRequest.Model)
			
 
				+	}
			
 
				 	return &modelRequest, shouldSelectChannel, nil
			
 
				 }
			
 
				 
			
--- a/relay/channel/codex/adaptor.go
+++ b/relay/channel/codex/adaptor.go
@@ -53,6 +53,8 @@ func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.Rela
 
				 }
			
 
				 
			
 
				 func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
			
 
				+	isCompact := info != nil && info.RelayMode == relayconstant.RelayModeResponsesCompact
			
 
				+
			
 
				 	if info != nil && info.ChannelSetting.SystemPrompt != "" {
			
 
				 		systemPrompt := info.ChannelSetting.SystemPrompt
			
 
				 
			
@@ -88,7 +90,9 @@ func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommo
 
				 			}
			
 
				 		}
			
 
				 	}
			
 
				-
			
 
				+	if isCompact {
			
 
				+		return request, nil
			
 
				+	}
			
 
				 	// codex: store must be false
			
 
				 	request.Store = json.RawMessage("false")
			
 
				 	// rm max_output_tokens
			
@@ -102,10 +106,14 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 
				 }
			
 
				 
			
 
				 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
			
 
				-	if info.RelayMode != relayconstant.RelayModeResponses {
			
 
				+	if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact {
			
 
				 		return nil, types.NewError(errors.New("codex channel: endpoint not supported"), types.ErrorCodeInvalidRequest)
			
 
				 	}
			
 
				 
			
 
				+	if info.RelayMode == relayconstant.RelayModeResponsesCompact {
			
 
				+		return openai.OaiResponsesCompactionHandler(c, resp)
			
 
				+	}
			
 
				+
			
 
				 	if info.IsStream {
			
 
				 		return openai.OaiResponsesStreamHandler(c, info, resp)
			
 
				 	}
			
@@ -121,10 +129,14 @@ func (a *Adaptor) GetChannelName() string {
 
				 }
			
 
				 
			
 
				 func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
			
 
				-	if info.RelayMode != relayconstant.RelayModeResponses {
			
 
				-		return "", errors.New("codex channel: only /v1/responses is supported")
			
 
				+	if info.RelayMode != relayconstant.RelayModeResponses && info.RelayMode != relayconstant.RelayModeResponsesCompact {
			
 
				+		return "", errors.New("codex channel: only /v1/responses and /v1/responses/compact are supported")
			
 
				+	}
			
 
				+	path := "/backend-api/codex/responses"
			
 
				+	if info.RelayMode == relayconstant.RelayModeResponsesCompact {
			
 
				+		path = "/backend-api/codex/responses/compact"
			
 
				 	}
			
 
				-	return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, "/backend-api/codex/responses", info.ChannelType), nil
			
 
				+	return relaycommon.GetFullRequestURL(info.ChannelBaseUrl, path, info.ChannelType), nil
			
 
				 }
			
 
				 
			
 
				 func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
			
--- a/relay/channel/codex/constants.go
+++ b/relay/channel/codex/constants.go
@@ -1,9 +1,25 @@
 
				 package codex
			
 
				 
			
 
				-var ModelList = []string{
			
 
				+import (
			
 
				+	"github.com/QuantumNous/new-api/setting/ratio_setting"
			
 
				+	"github.com/samber/lo"
			
 
				+)
			
 
				+
			
 
				+var baseModelList = []string{
			
 
				 	"gpt-5", "gpt-5-codex", "gpt-5-codex-mini",
			
 
				 	"gpt-5.1", "gpt-5.1-codex", "gpt-5.1-codex-max", "gpt-5.1-codex-mini",
			
 
				 	"gpt-5.2", "gpt-5.2-codex",
			
 
				 }
			
 
				 
			
 
				+var ModelList = withCompactModelSuffix(baseModelList)
			
 
				+
			
 
				 const ChannelName = "codex"
			
 
				+
			
 
				+func withCompactModelSuffix(models []string) []string {
			
 
				+	out := make([]string, 0, len(models)*2)
			
 
				+	out = append(out, models...)
			
 
				+	out = append(out, lo.Map(models, func(model string, _ int) string {
			
 
				+		return ratio_setting.WithCompactModelSuffix(model)
			
 
				+	})...)
			
 
				+	return lo.Uniq(out)
			
 
				+}
			
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -620,6 +620,8 @@ func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycom
 
				 		} else {
			
 
				 			usage, err = OaiResponsesHandler(c, info, resp)
			
 
				 		}
			
 
				+	case relayconstant.RelayModeResponsesCompact:
			
 
				+		usage, err = OaiResponsesCompactionHandler(c, resp)
			
 
				 	default:
			
 
				 		if info.IsStream {
			
 
				 			usage, err = OaiStreamHandler(c, info, resp)
			
--- a/relay/channel/openai/relay_responses_compact.go
+++ b/relay/channel/openai/relay_responses_compact.go
@@ -0,0 +1,44 @@
 
				+package openai
			
 
				+
			
 
				+import (
			
 
				+	"io"
			
 
				+	"net/http"
			
 
				+
			
 
				+	"github.com/QuantumNous/new-api/common"
			
 
				+	"github.com/QuantumNous/new-api/dto"
			
 
				+	"github.com/QuantumNous/new-api/service"
			
 
				+	"github.com/QuantumNous/new-api/types"
			
 
				+
			
 
				+	"github.com/gin-gonic/gin"
			
 
				+)
			
 
				+
			
 
				+func OaiResponsesCompactionHandler(c *gin.Context, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
			
 
				+	defer service.CloseResponseBodyGracefully(resp)
			
 
				+
			
 
				+	responseBody, err := io.ReadAll(resp.Body)
			
 
				+	if err != nil {
			
 
				+		return nil, types.NewOpenAIError(err, types.ErrorCodeReadResponseBodyFailed, http.StatusInternalServerError)
			
 
				+	}
			
 
				+
			
 
				+	var compactResp dto.OpenAIResponsesCompactionResponse
			
 
				+	if err := common.Unmarshal(responseBody, &compactResp); err != nil {
			
 
				+		return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
			
 
				+	}
			
 
				+	if oaiError := compactResp.GetOpenAIError(); oaiError != nil && oaiError.Type != "" {
			
 
				+		return nil, types.WithOpenAIError(*oaiError, resp.StatusCode)
			
 
				+	}
			
 
				+
			
 
				+	service.IOCopyBytesGracefully(c, resp, responseBody)
			
 
				+
			
 
				+	usage := dto.Usage{}
			
 
				+	if compactResp.Usage != nil {
			
 
				+		usage.PromptTokens = compactResp.Usage.InputTokens
			
 
				+		usage.CompletionTokens = compactResp.Usage.OutputTokens
			
 
				+		usage.TotalTokens = compactResp.Usage.TotalTokens
			
 
				+		if compactResp.Usage.InputTokensDetails != nil {
			
 
				+			usage.PromptTokensDetails.CachedTokens = compactResp.Usage.InputTokensDetails.CachedTokens
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	return &usage, nil
			
 
				+}
			
--- a/relay/common/relay_info.go
+++ b/relay/common/relay_info.go
@@ -481,6 +481,11 @@ func GenRelayInfo(c *gin.Context, relayFormat types.RelayFormat, request dto.Req
 
				 			break
			
 
				 		}
			
 
				 		err = errors.New("request is not a OpenAIResponsesRequest")
			
 
				+	case types.RelayFormatOpenAIResponsesCompaction:
			
 
				+		if request, ok := request.(*dto.OpenAIResponsesCompactionRequest); ok {
			
 
				+			return GenRelayInfoResponsesCompaction(c, request), nil
			
 
				+		}
			
 
				+		return nil, errors.New("request is not a OpenAIResponsesCompactionRequest")
			
 
				 	case types.RelayFormatTask:
			
 
				 		info = genBaseRelayInfo(c, nil)
			
 
				 	case types.RelayFormatMjProxy:
			
@@ -531,6 +536,15 @@ func (info *RelayInfo) AppendRequestConversion(format types.RelayFormat) {
 
				 	info.RequestConversionChain = append(info.RequestConversionChain, format)
			
 
				 }
			
 
				 
			
 
				+func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo {
			
 
				+	info := genBaseRelayInfo(c, request)
			
 
				+	if info.RelayMode == relayconstant.RelayModeUnknown {
			
 
				+		info.RelayMode = relayconstant.RelayModeResponsesCompact
			
 
				+	}
			
 
				+	info.RelayFormat = types.RelayFormatOpenAIResponsesCompaction
			
 
				+	return info
			
 
				+}
			
 
				+
			
 
				 //func (info *RelayInfo) SetPromptTokens(promptTokens int) {
			
 
				 //	info.promptTokens = promptTokens
			
 
				 //}
			
--- a/relay/constant/relay_mode.go
+++ b/relay/constant/relay_mode.go
@@ -50,6 +50,8 @@ const (
 
				 	RelayModeRealtime
			
 
				 
			
 
				 	RelayModeGemini
			
 
				+
			
 
				+	RelayModeResponsesCompact
			
 
				 )
			
 
				 
			
 
				 func Path2RelayMode(path string) int {
			
@@ -70,6 +72,8 @@ func Path2RelayMode(path string) int {
 
				 		relayMode = RelayModeImagesEdits
			
 
				 	} else if strings.HasPrefix(path, "/v1/edits") {
			
 
				 		relayMode = RelayModeEdits
			
 
				+	} else if strings.HasPrefix(path, "/v1/responses/compact") {
			
 
				+		relayMode = RelayModeResponsesCompact
			
 
				 	} else if strings.HasPrefix(path, "/v1/responses") {
			
 
				 		relayMode = RelayModeResponses
			
 
				 	} else if strings.HasPrefix(path, "/v1/audio/speech") {
			
--- a/relay/helper/model_mapped.go
+++ b/relay/helper/model_mapped.go
@@ -4,13 +4,27 @@ import (
 
				 	"encoding/json"
			
 
				 	"errors"
			
 
				 	"fmt"
			
 
				+	"strings"
			
 
				 
			
 
				 	"github.com/QuantumNous/new-api/dto"
			
 
				 	"github.com/QuantumNous/new-api/relay/common"
			
 
				+	relayconstant "github.com/QuantumNous/new-api/relay/constant"
			
 
				+	"github.com/QuantumNous/new-api/setting/ratio_setting"
			
 
				 	"github.com/gin-gonic/gin"
			
 
				 )
			
 
				 
			
 
				 func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Request) error {
			
 
				+	if info.ChannelMeta == nil {
			
 
				+		info.ChannelMeta = &common.ChannelMeta{}
			
 
				+	}
			
 
				+
			
 
				+	isResponsesCompact := info.RelayMode == relayconstant.RelayModeResponsesCompact
			
 
				+	originModelName := info.OriginModelName
			
 
				+	mappingModelName := originModelName
			
 
				+	if isResponsesCompact && strings.HasSuffix(originModelName, ratio_setting.CompactModelSuffix) {
			
 
				+		mappingModelName = strings.TrimSuffix(originModelName, ratio_setting.CompactModelSuffix)
			
 
				+	}
			
 
				+
			
 
				 	// map model name
			
 
				 	modelMapping := c.GetString("model_mapping")
			
 
				 	if modelMapping != "" && modelMapping != "{}" {
			
@@ -21,7 +35,7 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque
 
				 		}
			
 
				 
			
 
				 		// 支持链式模型重定向，最终使用链尾的模型
			
 
				-		currentModel := info.OriginModelName
			
 
				+		currentModel := mappingModelName
			
 
				 		visitedModels := map[string]bool{
			
 
				 			currentModel: true,
			
 
				 		}
			
@@ -51,6 +65,15 @@ func ModelMappedHelper(c *gin.Context, info *common.RelayInfo, request dto.Reque
 
				 			info.UpstreamModelName = currentModel
			
 
				 		}
			
 
				 	}
			
 
				+
			
 
				+	if isResponsesCompact {
			
 
				+		finalUpstreamModelName := mappingModelName
			
 
				+		if info.IsModelMapped && info.UpstreamModelName != "" {
			
 
				+			finalUpstreamModelName = info.UpstreamModelName
			
 
				+		}
			
 
				+		info.UpstreamModelName = finalUpstreamModelName
			
 
				+		info.OriginModelName = ratio_setting.WithCompactModelSuffix(finalUpstreamModelName)
			
 
				+	}
			
 
				 	if request != nil {
			
 
				 		request.SetModelName(info.UpstreamModelName)
			
 
				 	}
			
--- a/relay/helper/valid_request.go
+++ b/relay/helper/valid_request.go
@@ -34,6 +34,8 @@ func GetAndValidateRequest(c *gin.Context, format types.RelayFormat) (request dt
 
				 		request, err = GetAndValidateClaudeRequest(c)
			
 
				 	case types.RelayFormatOpenAIResponses:
			
 
				 		request, err = GetAndValidateResponsesRequest(c)
			
 
				+	case types.RelayFormatOpenAIResponsesCompaction:
			
 
				+		request, err = GetAndValidateResponsesCompactionRequest(c)
			
 
				 
			
 
				 	case types.RelayFormatOpenAIImage:
			
 
				 		request, err = GetAndValidOpenAIImageRequest(c, relayMode)
			
@@ -125,6 +127,17 @@ func GetAndValidateResponsesRequest(c *gin.Context) (*dto.OpenAIResponsesRequest
 
				 	return request, nil
			
 
				 }
			
 
				 
			
 
				+func GetAndValidateResponsesCompactionRequest(c *gin.Context) (*dto.OpenAIResponsesCompactionRequest, error) {
			
 
				+	request := &dto.OpenAIResponsesCompactionRequest{}
			
 
				+	if err := common.UnmarshalBodyReusable(c, request); err != nil {
			
 
				+		return nil, err
			
 
				+	}
			
 
				+	if request.Model == "" {
			
 
				+		return nil, errors.New("model is required")
			
 
				+	}
			
 
				+	return request, nil
			
 
				+}
			
 
				+
			
 
				 func GetAndValidOpenAIImageRequest(c *gin.Context, relayMode int) (*dto.ImageRequest, error) {
			
 
				 	imageRequest := &dto.ImageRequest{}
			
 
				 
			
--- a/relay/responses_handler.go
+++ b/relay/responses_handler.go
@@ -8,8 +8,10 @@ import (
 
				 	"strings"
			
 
				 
			
 
				 	"github.com/QuantumNous/new-api/common"
			
 
				+	appconstant "github.com/QuantumNous/new-api/constant"
			
 
				 	"github.com/QuantumNous/new-api/dto"
			
 
				 	relaycommon "github.com/QuantumNous/new-api/relay/common"
			
 
				+	relayconstant "github.com/QuantumNous/new-api/relay/constant"
			
 
				 	"github.com/QuantumNous/new-api/relay/helper"
			
 
				 	"github.com/QuantumNous/new-api/service"
			
 
				 	"github.com/QuantumNous/new-api/setting/model_setting"
			
@@ -20,10 +22,37 @@ import (
 
				 
			
 
				 func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types.NewAPIError) {
			
 
				 	info.InitChannelMeta(c)
			
 
				+	if info.RelayMode == relayconstant.RelayModeResponsesCompact {
			
 
				+		switch info.ApiType {
			
 
				+		case appconstant.APITypeOpenAI, appconstant.APITypeCodex:
			
 
				+		default:
			
 
				+			return types.NewErrorWithStatusCode(
			
 
				+				fmt.Errorf("unsupported endpoint %q for api type %d", "/v1/responses/compact", info.ApiType),
			
 
				+				types.ErrorCodeInvalidRequest,
			
 
				+				http.StatusBadRequest,
			
 
				+				types.ErrOptionWithSkipRetry(),
			
 
				+			)
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				-	responsesReq, ok := info.Request.(*dto.OpenAIResponsesRequest)
			
 
				-	if !ok {
			
 
				-		return types.NewErrorWithStatusCode(fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest, got %T", info.Request), types.ErrorCodeInvalidRequest, http.StatusBadRequest, types.ErrOptionWithSkipRetry())
			
 
				+	var responsesReq *dto.OpenAIResponsesRequest
			
 
				+	switch req := info.Request.(type) {
			
 
				+	case *dto.OpenAIResponsesRequest:
			
 
				+		responsesReq = req
			
 
				+	case *dto.OpenAIResponsesCompactionRequest:
			
 
				+		responsesReq = &dto.OpenAIResponsesRequest{
			
 
				+			Model:              req.Model,
			
 
				+			Input:              req.Input,
			
 
				+			Instructions:       req.Instructions,
			
 
				+			PreviousResponseID: req.PreviousResponseID,
			
 
				+		}
			
 
				+	default:
			
 
				+		return types.NewErrorWithStatusCode(
			
 
				+			fmt.Errorf("invalid request type, expected dto.OpenAIResponsesRequest or dto.OpenAIResponsesCompactionRequest, got %T", info.Request),
			
 
				+			types.ErrorCodeInvalidRequest,
			
 
				+			http.StatusBadRequest,
			
 
				+			types.ErrOptionWithSkipRetry(),
			
 
				+		)
			
 
				 	}
			
 
				 
			
 
				 	request, err := common.DeepCopy(responsesReq)
			
@@ -105,10 +134,28 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
 
				 		return newAPIError
			
 
				 	}
			
 
				 
			
 
				+	usageDto := usage.(*dto.Usage)
			
 
				+	if info.RelayMode == relayconstant.RelayModeResponsesCompact {
			
 
				+		originModelName := info.OriginModelName
			
 
				+		originPriceData := info.PriceData
			
 
				+
			
 
				+		_, err := helper.ModelPriceHelper(c, info, info.GetEstimatePromptTokens(), &types.TokenCountMeta{})
			
 
				+		if err != nil {
			
 
				+			info.OriginModelName = originModelName
			
 
				+			info.PriceData = originPriceData
			
 
				+			return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry())
			
 
				+		}
			
 
				+		postConsumeQuota(c, info, usageDto)
			
 
				+
			
 
				+		info.OriginModelName = originModelName
			
 
				+		info.PriceData = originPriceData
			
 
				+		return nil
			
 
				+	}
			
 
				+
			
 
				 	if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
			
 
				-		service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
			
 
				+		service.PostAudioConsumeQuota(c, info, usageDto, "")
			
 
				 	} else {
			
 
				-		postConsumeQuota(c, info, usage.(*dto.Usage))
			
 
				+		postConsumeQuota(c, info, usageDto)
			
 
				 	}
			
 
				 	return nil
			
 
				 }
			
--- a/router/relay-router.go
+++ b/router/relay-router.go
@@ -93,6 +93,9 @@ func SetRelayRouter(router *gin.Engine) {
 
				 		httpRouter.POST("/responses", func(c *gin.Context) {
			
 
				 			controller.Relay(c, types.RelayFormatOpenAIResponses)
			
 
				 		})
			
 
				+		httpRouter.POST("/responses/compact", func(c *gin.Context) {
			
 
				+			controller.Relay(c, types.RelayFormatOpenAIResponsesCompaction)
			
 
				+		})
			
 
				 
			
 
				 		// image related routes
			
 
				 		httpRouter.POST("/edits", func(c *gin.Context) {
			
--- a/setting/ratio_setting/compact_suffix.go
+++ b/setting/ratio_setting/compact_suffix.go
@@ -0,0 +1,13 @@
 
				+package ratio_setting
			
 
				+
			
 
				+import "strings"
			
 
				+
			
 
				+const CompactModelSuffix = "-openai-compact"
			
 
				+const CompactWildcardModelKey = "*" + CompactModelSuffix
			
 
				+
			
 
				+func WithCompactModelSuffix(modelName string) string {
			
 
				+	if strings.HasSuffix(modelName, CompactModelSuffix) {
			
 
				+		return modelName
			
 
				+	}
			
 
				+	return modelName + CompactModelSuffix
			
 
				+}
			
--- a/setting/ratio_setting/model_ratio.go
+++ b/setting/ratio_setting/model_ratio.go
@@ -411,6 +411,17 @@ func GetModelPrice(name string, printErr bool) (float64, bool) {
 
				 
			
 
				 	name = FormatMatchingModelName(name)
			
 
				 
			
 
				+	if strings.HasSuffix(name, CompactModelSuffix) {
			
 
				+		price, ok := modelPriceMap[CompactWildcardModelKey]
			
 
				+		if !ok {
			
 
				+			if printErr {
			
 
				+				common.SysError("model price not found: " + name)
			
 
				+			}
			
 
				+			return -1, false
			
 
				+		}
			
 
				+		return price, true
			
 
				+	}
			
 
				+
			
 
				 	price, ok := modelPriceMap[name]
			
 
				 	if !ok {
			
 
				 		if printErr {
			
@@ -448,6 +459,12 @@ func GetModelRatio(name string) (float64, bool, string) {
 
				 
			
 
				 	ratio, ok := modelRatioMap[name]
			
 
				 	if !ok {
			
 
				+		if strings.HasSuffix(name, CompactModelSuffix) {
			
 
				+			if wildcardRatio, ok := modelRatioMap[CompactWildcardModelKey]; ok {
			
 
				+				return wildcardRatio, true, name
			
 
				+			}
			
 
				+			return 0, true, name
			
 
				+		}
			
 
				 		return 37.5, operation_setting.SelfUseModeEnabled, name
			
 
				 	}
			
 
				 	return ratio, true, name
			
--- a/types/relay_format.go
+++ b/types/relay_format.go
@@ -3,15 +3,16 @@ package types
 
				 type RelayFormat string
			
 
				 
			
 
				 const (
			
 
				-	RelayFormatOpenAI          RelayFormat = "openai"
			
 
				-	RelayFormatClaude                      = "claude"
			
 
				-	RelayFormatGemini                      = "gemini"
			
 
				-	RelayFormatOpenAIResponses             = "openai_responses"
			
 
				-	RelayFormatOpenAIAudio                 = "openai_audio"
			
 
				-	RelayFormatOpenAIImage                 = "openai_image"
			
 
				-	RelayFormatOpenAIRealtime              = "openai_realtime"
			
 
				-	RelayFormatRerank                      = "rerank"
			
 
				-	RelayFormatEmbedding                   = "embedding"
			
 
				+	RelayFormatOpenAI                    RelayFormat = "openai"
			
 
				+	RelayFormatClaude                                = "claude"
			
 
				+	RelayFormatGemini                                = "gemini"
			
 
				+	RelayFormatOpenAIResponses                       = "openai_responses"
			
 
				+	RelayFormatOpenAIResponsesCompaction             = "openai_responses_compaction"
			
 
				+	RelayFormatOpenAIAudio                           = "openai_audio"
			
 
				+	RelayFormatOpenAIImage                           = "openai_image"
			
 
				+	RelayFormatOpenAIRealtime                        = "openai_realtime"
			
 
				+	RelayFormatRerank                                = "rerank"
			
 
				+	RelayFormatEmbedding                             = "embedding"
			
 
				 
			
 
				 	RelayFormatTask    = "task"
			
 
				 	RelayFormatMjProxy = "mj_proxy"
			
--- a/web/src/components/table/channels/modals/ModelTestModal.jsx
+++ b/web/src/components/table/channels/modals/ModelTestModal.jsx
@@ -66,6 +66,10 @@ const ModelTestModal = ({
 
				     { value: '', label: t('自动检测') },
			
 
				     { value: 'openai', label: 'OpenAI (/v1/chat/completions)' },
			
 
				     { value: 'openai-response', label: 'OpenAI Response (/v1/responses)' },
			
 
				+    {
			
 
				+      value: 'openai-response-compact',
			
 
				+      label: 'OpenAI Response Compaction (/v1/responses/compact)',
			
 
				+    },
			
 
				     { value: 'anthropic', label: 'Anthropic (/v1/messages)' },
			
 
				     {
			
 
				       value: 'gemini',
			
--- a/web/src/components/table/models/modals/EditModelModal.jsx
+++ b/web/src/components/table/models/modals/EditModelModal.jsx
@@ -45,6 +45,7 @@ const { Text, Title } = Typography;
 
				 const ENDPOINT_TEMPLATE = {
			
 
				   openai: { path: '/v1/chat/completions', method: 'POST' },
			
 
				   'openai-response': { path: '/v1/responses', method: 'POST' },
			
 
				+  'openai-response-compact': { path: '/v1/responses/compact', method: 'POST' },
			
 
				   anthropic: { path: '/v1/messages', method: 'POST' },
			
 
				   gemini: { path: '/v1beta/models/{model}:generateContent', method: 'POST' },
			
 
				   'jina-rerank': { path: '/v1/rerank', method: 'POST' },
			
--- a/web/src/components/table/models/modals/EditPrefillGroupModal.jsx
+++ b/web/src/components/table/models/modals/EditPrefillGroupModal.jsx
@@ -43,6 +43,7 @@ const { Text, Title } = Typography;
 
				 const ENDPOINT_TEMPLATE = {
			
 
				   openai: { path: '/v1/chat/completions', method: 'POST' },
			
 
				   'openai-response': { path: '/v1/responses', method: 'POST' },
			
 
				+  'openai-response-compact': { path: '/v1/responses/compact', method: 'POST' },
			
 
				   anthropic: { path: '/v1/messages', method: 'POST' },
			
 
				   gemini: { path: '/v1beta/models/{model}:generateContent', method: 'POST' },
			
 
				   'jina-rerank': { path: '/v1/rerank', method: 'POST' },
			
--- a/web/src/constants/common.constant.js
+++ b/web/src/constants/common.constant.js
@@ -26,6 +26,7 @@ export const TABLE_COMPACT_MODES_KEY = 'table_compact_modes';
 
				 export const API_ENDPOINTS = [
			
 
				   '/v1/chat/completions',
			
 
				   '/v1/responses',
			
 
				+  '/v1/responses/compact',
			
 
				   '/v1/messages',
			
 
				   '/v1beta/models',
			
 
				   '/v1/embeddings',