Przeglądaj źródła

chore: update gpt3.5 completion ratio

JustSong 2 lat temu
rodzic
commit
38668e7331
2 zmienionych plików z 12 dodań i 11 usunięć
  1. 7 5
      common/model-ratio.go
  2. 5 6
      controller/relay.go

+ 7 - 5
common/model-ratio.go

@@ -2,9 +2,11 @@ package common
 
 
 import "encoding/json"
 import "encoding/json"
 
 
+// ModelRatio
 // https://platform.openai.com/docs/models/model-endpoint-compatibility
 // https://platform.openai.com/docs/models/model-endpoint-compatibility
 // https://openai.com/pricing
 // https://openai.com/pricing
 // TODO: when a new api is enabled, check the pricing here
 // TODO: when a new api is enabled, check the pricing here
+// 1 === $0.002 / 1K tokens
 var ModelRatio = map[string]float64{
 var ModelRatio = map[string]float64{
 	"gpt-4":                   15,
 	"gpt-4":                   15,
 	"gpt-4-0314":              15,
 	"gpt-4-0314":              15,
@@ -12,11 +14,11 @@ var ModelRatio = map[string]float64{
 	"gpt-4-32k":               30,
 	"gpt-4-32k":               30,
 	"gpt-4-32k-0314":          30,
 	"gpt-4-32k-0314":          30,
 	"gpt-4-32k-0613":          30,
 	"gpt-4-32k-0613":          30,
-	"gpt-3.5-turbo":           1, // $0.002 / 1K tokens
-	"gpt-3.5-turbo-0301":      1,
-	"gpt-3.5-turbo-0613":      1,
-	"gpt-3.5-turbo-16k":       2, // $0.004 / 1K tokens
-	"gpt-3.5-turbo-16k-0613":  2,
+	"gpt-3.5-turbo":           0.75, // $0.0015 / 1K tokens
+	"gpt-3.5-turbo-0301":      0.75,
+	"gpt-3.5-turbo-0613":      0.75,
+	"gpt-3.5-turbo-16k":       1.5, // $0.003 / 1K tokens
+	"gpt-3.5-turbo-16k-0613":  1.5,
 	"text-ada-001":            0.2,
 	"text-ada-001":            0.2,
 	"text-babbage-001":        0.25,
 	"text-babbage-001":        0.25,
 	"text-curie-001":          1,
 	"text-curie-001":          1,

+ 5 - 6
controller/relay.go

@@ -239,16 +239,15 @@ func relayHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
 	defer func() {
 	defer func() {
 		if consumeQuota {
 		if consumeQuota {
 			quota := 0
 			quota := 0
-			usingGPT4 := strings.HasPrefix(textRequest.Model, "gpt-4")
-			completionRatio := 1
-			if usingGPT4 {
+			completionRatio := 1.34 // default for gpt-3
+			if strings.HasPrefix(textRequest.Model, "gpt-4") {
 				completionRatio = 2
 				completionRatio = 2
 			}
 			}
 			if isStream {
 			if isStream {
 				responseTokens := countTokenText(streamResponseText, textRequest.Model)
 				responseTokens := countTokenText(streamResponseText, textRequest.Model)
-				quota = promptTokens + responseTokens*completionRatio
+				quota = promptTokens + int(float64(responseTokens)*completionRatio)
 			} else {
 			} else {
-				quota = textResponse.Usage.PromptTokens + textResponse.Usage.CompletionTokens*completionRatio
+				quota = textResponse.Usage.PromptTokens + int(float64(textResponse.Usage.CompletionTokens)*completionRatio)
 			}
 			}
 			quota = int(float64(quota) * ratio)
 			quota = int(float64(quota) * ratio)
 			if ratio != 0 && quota <= 0 {
 			if ratio != 0 && quota <= 0 {
@@ -260,7 +259,7 @@ func relayHelper(c *gin.Context, relayMode int) *OpenAIErrorWithStatusCode {
 				common.SysError("Error consuming token remain quota: " + err.Error())
 				common.SysError("Error consuming token remain quota: " + err.Error())
 			}
 			}
 			userId := c.GetInt("id")
 			userId := c.GetInt("id")
-			model.RecordLog(userId, model.LogTypeConsume, fmt.Sprintf("使用模型 %s 消耗 %d 点额度(模型倍率 %.2f,分组倍率 %.2f)", textRequest.Model, quota, modelRatio, groupRatio))
+			model.RecordLog(userId, model.LogTypeConsume, fmt.Sprintf("使用模型 %s 消耗 %d 点额度(模型倍率 %.2f,分组倍率 %.2f,补全倍率 %.2f)", textRequest.Model, quota, modelRatio, groupRatio, completionRatio))
 		}
 		}
 	}()
 	}()