Просмотр исходного кода

Merge pull request #1247 from RedwindA/feat/25lite-thinking

feat: improve gemini thinking budget adaption
Calcium-Ion 8 месяцев назад
Родитель
Сommit
0b9b21eafd
1 измененных файлов с 53 добавлено и 62 удалено
  1. 53 62
      relay/channel/gemini/relay-gemini.go

+ 53 - 62
relay/channel/gemini/relay-gemini.go

@@ -39,11 +39,45 @@ var geminiSupportedMimeTypes = map[string]bool{
 
 // Gemini 允许的思考预算范围
 const (
-	pro25MinBudget   = 128
-	pro25MaxBudget   = 32768
-	flash25MaxBudget = 24576
+	pro25MinBudget       = 128
+	pro25MaxBudget       = 32768
+	flash25MaxBudget     = 24576
+	flash25LiteMinBudget = 512
+	flash25LiteMaxBudget = 24576
 )
 
+// clampThinkingBudget 根据模型名称将预算限制在允许的范围内
+func clampThinkingBudget(modelName string, budget int) int {
+	isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
+		!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
+		!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
+	is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
+
+	if is25FlashLite {
+		if budget < flash25LiteMinBudget {
+			return flash25LiteMinBudget
+		}
+		if budget > flash25LiteMaxBudget {
+			return flash25LiteMaxBudget
+		}
+	} else if isNew25Pro {
+		if budget < pro25MinBudget {
+			return pro25MinBudget
+		}
+		if budget > pro25MaxBudget {
+			return pro25MaxBudget
+		}
+	} else { // 其他模型
+		if budget < 0 {
+			return 0
+		}
+		if budget > flash25MaxBudget {
+			return flash25MaxBudget
+		}
+	}
+	return budget
+}
+
 // Setting safety to the lowest possible values since Gemini is already powerless enough
 func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*GeminiChatRequest, error) {
 
@@ -65,49 +99,31 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
 	}
 
 	if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
-		// 新增逻辑:处理 -thinking-<budget> 格式
-		if strings.Contains(info.OriginModelName, "-thinking-") {
-			parts := strings.SplitN(info.OriginModelName, "-thinking-", 2)
+		modelName := info.OriginModelName
+		isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
+			!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
+			!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
+		is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
+
+		if strings.Contains(modelName, "-thinking-") {
+			parts := strings.SplitN(modelName, "-thinking-", 2)
 			if len(parts) == 2 && parts[1] != "" {
 				if budgetTokens, err := strconv.Atoi(parts[1]); err == nil {
-					// 从模型名称成功解析预算
-					isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
-						!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
-						!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
-
-					if isNew25Pro {
-						// 新的2.5pro模型:ThinkingBudget范围为128-32768
-						if budgetTokens < pro25MinBudget {
-							budgetTokens = pro25MinBudget
-						} else if budgetTokens > pro25MaxBudget {
-							budgetTokens = pro25MaxBudget
-						}
-					} else {
-						// 其他模型:ThinkingBudget范围为0-24576
-						if budgetTokens < 0 {
-							budgetTokens = 0
-						} else if budgetTokens > flash25MaxBudget {
-							budgetTokens = flash25MaxBudget
-						}
-					}
-
+					clampedBudget := clampThinkingBudget(modelName, budgetTokens)
 					geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
-						ThinkingBudget:  common.GetPointer(budgetTokens),
+						ThinkingBudget:  common.GetPointer(clampedBudget),
 						IncludeThoughts: true,
 					}
 				}
-				// 如果解析失败,则不设置ThinkingConfig,静默处理
 			}
-		} else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 保留旧逻辑以兼容
-			// 硬编码不支持 ThinkingBudget 的旧模型
+		} else if strings.HasSuffix(modelName, "-thinking") {
 			unsupportedModels := []string{
 				"gemini-2.5-pro-preview-05-06",
 				"gemini-2.5-pro-preview-03-25",
 			}
-
 			isUnsupported := false
 			for _, unsupportedModel := range unsupportedModels {
-				if strings.HasPrefix(info.OriginModelName, unsupportedModel) {
+				if strings.HasPrefix(modelName, unsupportedModel) {
 					isUnsupported = true
 					break
 				}
@@ -119,39 +135,14 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
 				}
 			} else {
 				budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
-
-				// 检查是否为新的2.5pro模型(支持ThinkingBudget但有特殊范围)
-				isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
-					!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
-					!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
-
-				if isNew25Pro {
-					// 新的2.5pro模型:ThinkingBudget范围为128-32768
-					if budgetTokens == 0 || budgetTokens < 128 {
-						budgetTokens = 128
-					} else if budgetTokens > 32768 {
-						budgetTokens = 32768
-					}
-				} else {
-					// 其他模型:ThinkingBudget范围为0-24576
-					if budgetTokens == 0 || budgetTokens > 24576 {
-						budgetTokens = 24576
-					}
-				}
-
+				clampedBudget := clampThinkingBudget(modelName, int(budgetTokens))
 				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
-					ThinkingBudget:  common.GetPointer(int(budgetTokens)),
+					ThinkingBudget:  common.GetPointer(clampedBudget),
 					IncludeThoughts: true,
 				}
 			}
-		} else if strings.HasSuffix(info.OriginModelName, "-nothinking") {
-			// 检查是否为新的2.5pro模型(不支持-nothinking,因为最低值只能为128)
-			isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") &&
-				!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") &&
-				!strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25")
-
-			if !isNew25Pro {
-				// 只有非新2.5pro模型才支持-nothinking
+		} else if strings.HasSuffix(modelName, "-nothinking") {
+			if !isNew25Pro && !is25FlashLite {
 				geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{
 					ThinkingBudget: common.GetPointer(0),
 				}