Просмотр исходного кода

Merge branch 'Calcium-Ion:main' into main

GuoRuqiang 1 год назад
Родитель
Сommit
a0673ef2b6

+ 33 - 20
common/model-ratio.go

@@ -119,6 +119,13 @@ var defaultModelRatio = map[string]float64{
 	"glm-4v":                         0.05 * RMB, // ¥0.05 / 1k tokens
 	"glm-4-alltools":                 0.1 * RMB,  // ¥0.1 / 1k tokens
 	"glm-3-turbo":                    0.3572,
+	"glm-4-plus":                     0.05 * RMB,
+	"glm-4-0520":                     0.1 * RMB,
+	"glm-4-air":                      0.001 * RMB,
+	"glm-4-airx":                     0.01 * RMB,
+	"glm-4-long":                     0.001 * RMB,
+	"glm-4-flash":                    0,
+	"glm-4v-plus":                    0.01 * RMB,
 	"qwen-turbo":                     0.8572, // ¥0.012 / 1k tokens
 	"qwen-plus":                      10,     // ¥0.14 / 1k tokens
 	"text-embedding-v1":              0.05,   // ¥0.0007 / 1k tokens
@@ -137,26 +144,28 @@ var defaultModelRatio = map[string]float64{
 	"hunyuan":                        7.143,  // ¥0.1 / 1k tokens  // https://cloud.tencent.com/document/product/1729/97731#e0e6be58-60c8-469f-bdeb-6c264ce3b4d0
 	// https://platform.lingyiwanwu.com/docs#-计费单元
 	// 已经按照 7.2 来换算美元价格
-	"yi-34b-chat-0205":      0.18,
-	"yi-34b-chat-200k":      0.864,
-	"yi-vl-plus":            0.432,
-	"yi-large":              20.0 / 1000 * RMB,
-	"yi-medium":             2.5 / 1000 * RMB,
-	"yi-vision":             6.0 / 1000 * RMB,
-	"yi-medium-200k":        12.0 / 1000 * RMB,
-	"yi-spark":              1.0 / 1000 * RMB,
-	"yi-large-rag":          25.0 / 1000 * RMB,
-	"yi-large-turbo":        12.0 / 1000 * RMB,
-	"yi-large-preview":      20.0 / 1000 * RMB,
-	"yi-large-rag-preview":  25.0 / 1000 * RMB,
-	"command":               0.5,
-	"command-nightly":       0.5,
-	"command-light":         0.5,
-	"command-light-nightly": 0.5,
-	"command-r":             0.25,
-	"command-r-plus	":       1.5,
-	"deepseek-chat":         0.07,
-	"deepseek-coder":        0.07,
+	"yi-34b-chat-0205":       0.18,
+	"yi-34b-chat-200k":       0.864,
+	"yi-vl-plus":             0.432,
+	"yi-large":               20.0 / 1000 * RMB,
+	"yi-medium":              2.5 / 1000 * RMB,
+	"yi-vision":              6.0 / 1000 * RMB,
+	"yi-medium-200k":         12.0 / 1000 * RMB,
+	"yi-spark":               1.0 / 1000 * RMB,
+	"yi-large-rag":           25.0 / 1000 * RMB,
+	"yi-large-turbo":         12.0 / 1000 * RMB,
+	"yi-large-preview":       20.0 / 1000 * RMB,
+	"yi-large-rag-preview":   25.0 / 1000 * RMB,
+	"command":                0.5,
+	"command-nightly":        0.5,
+	"command-light":          0.5,
+	"command-light-nightly":  0.5,
+	"command-r":              0.25,
+	"command-r-plus":         1.5,
+	"command-r-08-2024":      0.075,
+	"command-r-plus-08-2024": 1.25,
+	"deepseek-chat":          0.07,
+	"deepseek-coder":         0.07,
 	// Perplexity online 模型对搜索额外收费,有需要应自行调整,此处不计入搜索费用
 	"llama-3-sonar-small-32k-chat":   0.2 / 1000 * USD,
 	"llama-3-sonar-small-32k-online": 0.2 / 1000 * USD,
@@ -365,6 +374,10 @@ func GetCompletionRatio(name string) float64 {
 			return 3
 		case "command-r-plus":
 			return 5
+		case "command-r-08-2024":
+			return 4
+		case "command-r-plus-08-2024":
+			return 4
 		default:
 			return 2
 		}

+ 4 - 1
relay/channel/cohere/constant.go

@@ -1,7 +1,10 @@
 package cohere
 
 var ModelList = []string{
-	"command-r", "command-r-plus", "command-light", "command-light-nightly", "command", "command-nightly",
+	"command-r", "command-r-plus",
+	"command-r-08-2024", "command-r-plus-08-2024",
+	"c4ai-aya-23-35b", "c4ai-aya-23-8b",
+	"command-light", "command-light-nightly", "command", "command-nightly",
 	"rerank-english-v3.0", "rerank-multilingual-v3.0", "rerank-english-v2.0", "rerank-multilingual-v2.0",
 }
 

+ 3 - 1
relay/channel/jina/adaptor.go

@@ -32,7 +32,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
 	if info.RelayMode == constant.RelayModeRerank {
 		return fmt.Sprintf("%s/v1/rerank", info.BaseUrl), nil
 	} else if info.RelayMode == constant.RelayModeEmbeddings {
-		return fmt.Sprintf("%s/v1/embeddings ", info.BaseUrl), nil
+		return fmt.Sprintf("%s/v1/embeddings", info.BaseUrl), nil
 	}
 	return "", errors.New("invalid relay mode")
 }
@@ -58,6 +58,8 @@ func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dt
 func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage *dto.Usage, err *dto.OpenAIErrorWithStatusCode) {
 	if info.RelayMode == constant.RelayModeRerank {
 		err, usage = jinaRerankHandler(c, resp)
+	} else if info.RelayMode == constant.RelayModeEmbeddings {
+		err, usage = jinaEmbeddingHandler(c, resp)
 	}
 	return
 }

+ 25 - 0
relay/channel/jina/relay-jina.go

@@ -33,3 +33,28 @@ func jinaRerankHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWit
 	_, err = c.Writer.Write(jsonResponse)
 	return nil, &jinaResp.Usage
 }
+
+func jinaEmbeddingHandler(c *gin.Context, resp *http.Response) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
+	responseBody, err := io.ReadAll(resp.Body)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
+	}
+	err = resp.Body.Close()
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
+	}
+	var jinaResp dto.OpenAIEmbeddingResponse
+	err = json.Unmarshal(responseBody, &jinaResp)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
+	}
+
+	jsonResponse, err := json.Marshal(jinaResp)
+	if err != nil {
+		return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
+	}
+	c.Writer.Header().Set("Content-Type", "application/json")
+	c.Writer.WriteHeader(resp.StatusCode)
+	_, err = c.Writer.Write(jsonResponse)
+	return nil, &jinaResp.Usage
+}

+ 1 - 1
relay/channel/zhipu_4v/constants.go

@@ -1,7 +1,7 @@
 package zhipu_4v
 
 var ModelList = []string{
-	"glm-4", "glm-4v", "glm-3-turbo", "glm-4-alltools",
+	"glm-4", "glm-4v", "glm-3-turbo", "glm-4-alltools", "glm-4-plus", "glm-4-0520", "glm-4-air", "glm-4-airx", "glm-4-long", "glm-4-flash", "glm-4v-plus",
 }
 
 var ChannelName = "zhipu_4v"

+ 1 - 1
relay/relay-text.go

@@ -52,7 +52,7 @@ func getAndValidateTextRequest(c *gin.Context, relayInfo *relaycommon.RelayInfo)
 		}
 	case relayconstant.RelayModeEmbeddings:
 	case relayconstant.RelayModeModerations:
-		if textRequest.Input == "" {
+		if textRequest.Input == "" || textRequest.Input == nil {
 			return nil, errors.New("field input is required")
 		}
 	case relayconstant.RelayModeEdits: