瀏覽代碼

chore: openai stream

CalciumIon 1 年之前
父節點
當前提交
e262a9bd2c

+ 7 - 6
common/model-ratio.go

@@ -105,12 +105,13 @@ var defaultModelRatio = map[string]float64{
 	"gemini-1.0-pro-latest":          1,
 	"gemini-1.0-pro-vision-latest":   1,
 	"gemini-ultra":                   1,
-	"chatglm_turbo":                  0.3572, // ¥0.005 / 1k tokens
-	"chatglm_pro":                    0.7143, // ¥0.01 / 1k tokens
-	"chatglm_std":                    0.3572, // ¥0.005 / 1k tokens
-	"chatglm_lite":                   0.1429, // ¥0.002 / 1k tokens
-	"glm-4":                          7.143,  // ¥0.1 / 1k tokens
-	"glm-4v":                         7.143,  // ¥0.1 / 1k tokens
+	"chatglm_turbo":                  0.3572,     // ¥0.005 / 1k tokens
+	"chatglm_pro":                    0.7143,     // ¥0.01 / 1k tokens
+	"chatglm_std":                    0.3572,     // ¥0.005 / 1k tokens
+	"chatglm_lite":                   0.1429,     // ¥0.002 / 1k tokens
+	"glm-4":                          7.143,      // ¥0.1 / 1k tokens
+	"glm-4v":                         0.05 * RMB, // ¥0.05 / 1k tokens
+	"glm-4-alltools":                 0.1 * RMB,  // ¥0.1 / 1k tokens
 	"glm-3-turbo":                    0.3572,
 	"qwen-turbo":                     0.8572, // ¥0.012 / 1k tokens
 	"qwen-plus":                      10,     // ¥0.14 / 1k tokens

+ 1 - 6
relay/channel/ollama/adaptor.go

@@ -10,7 +10,6 @@ import (
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
 	relayconstant "one-api/relay/constant"
-	"one-api/service"
 )
 
 type Adaptor struct {
@@ -58,11 +57,7 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage *dto.Usage, err *dto.OpenAIErrorWithStatusCode) {
 	if info.IsStream {
-		var responseText string
-		err, usage, responseText, _ = openai.OpenaiStreamHandler(c, resp, info)
-		if usage == nil || usage.TotalTokens == 0 || (usage.PromptTokens+usage.CompletionTokens) == 0 {
-			usage, _ = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
-		}
+		err, usage = openai.OpenaiStreamHandler(c, resp, info)
 	} else {
 		if info.RelayMode == relayconstant.RelayModeEmbeddings {
 			err, usage = ollamaEmbeddingHandler(c, resp, info.PromptTokens, info.UpstreamModelName, info.RelayMode)

+ 1 - 1
relay/channel/openai/adaptor.go

@@ -89,7 +89,7 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage *dto.Usage, err *dto.OpenAIErrorWithStatusCode) {
 	if info.IsStream {
-		err, usage, _, _ = OpenaiStreamHandler(c, resp, info)
+		err, usage = OpenaiStreamHandler(c, resp, info)
 	} else {
 		err, usage = OpenaiHandler(c, resp, info.PromptTokens, info.UpstreamModelName)
 	}

+ 2 - 2
relay/channel/openai/relay-openai.go

@@ -17,7 +17,7 @@ import (
 	"time"
 )
 
-func OpenaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage, string, int) {
+func OpenaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
 	hasStreamUsage := false
 	responseId := ""
 	var createAt int64 = 0
@@ -168,7 +168,7 @@ func OpenaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.
 	if err != nil {
 		common.LogError(c, "close_response_body_failed: "+err.Error())
 	}
-	return nil, usage, responseTextBuilder.String(), toolCount
+	return nil, usage
 }
 
 func OpenaiHandler(c *gin.Context, resp *http.Response, promptTokens int, model string) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {

+ 1 - 6
relay/channel/perplexity/adaptor.go

@@ -10,7 +10,6 @@ import (
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
-	"one-api/service"
 )
 
 type Adaptor struct {
@@ -54,11 +53,7 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage *dto.Usage, err *dto.OpenAIErrorWithStatusCode) {
 	if info.IsStream {
-		var responseText string
-		err, usage, responseText, _ = openai.OpenaiStreamHandler(c, resp, info)
-		if usage == nil || usage.TotalTokens == 0 || (usage.PromptTokens+usage.CompletionTokens) == 0 {
-			usage, _ = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
-		}
+		err, usage = openai.OpenaiStreamHandler(c, resp, info)
 	} else {
 		err, usage = openai.OpenaiHandler(c, resp, info.PromptTokens, info.UpstreamModelName)
 	}

+ 1 - 8
relay/channel/zhipu_4v/adaptor.go

@@ -10,7 +10,6 @@ import (
 	"one-api/relay/channel"
 	"one-api/relay/channel/openai"
 	relaycommon "one-api/relay/common"
-	"one-api/service"
 )
 
 type Adaptor struct {
@@ -55,13 +54,7 @@ func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, request
 
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage *dto.Usage, err *dto.OpenAIErrorWithStatusCode) {
 	if info.IsStream {
-		var responseText string
-		var toolCount int
-		err, usage, responseText, toolCount = openai.OpenaiStreamHandler(c, resp, info)
-		if usage == nil || usage.TotalTokens == 0 || (usage.PromptTokens+usage.CompletionTokens) == 0 {
-			usage, _ = service.ResponseText2Usage(responseText, info.UpstreamModelName, info.PromptTokens)
-			usage.CompletionTokens += toolCount * 7
-		}
+		err, usage = openai.OpenaiStreamHandler(c, resp, info)
 	} else {
 		err, usage = openai.OpenaiHandler(c, resp, info.PromptTokens, info.UpstreamModelName)
 	}

+ 1 - 1
relay/channel/zhipu_4v/constants.go

@@ -1,7 +1,7 @@
 package zhipu_4v
 
 var ModelList = []string{
-	"glm-4", "glm-4v", "glm-3-turbo",
+	"glm-4", "glm-4v", "glm-3-turbo", "glm-4-alltools",
 }
 
 var ChannelName = "zhipu_4v"