|
@@ -49,12 +49,20 @@ const (
|
|
|
flash25LiteMaxBudget = 24576
|
|
flash25LiteMaxBudget = 24576
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
-// clampThinkingBudget 根据模型名称将预算限制在允许的范围内
|
|
|
|
|
-func clampThinkingBudget(modelName string, budget int) int {
|
|
|
|
|
- isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
|
|
|
|
|
|
+func isNew25ProModel(modelName string) bool {
|
|
|
|
|
+ return strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
|
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
|
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
|
|
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
|
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
|
|
|
- is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
|
|
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+func is25FlashLiteModel(modelName string) bool {
|
|
|
|
|
+ return strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+// clampThinkingBudget 根据模型名称将预算限制在允许的范围内
|
|
|
|
|
+func clampThinkingBudget(modelName string, budget int) int {
|
|
|
|
|
+ isNew25Pro := isNew25ProModel(modelName)
|
|
|
|
|
+ is25FlashLite := is25FlashLiteModel(modelName)
|
|
|
|
|
|
|
|
if is25FlashLite {
|
|
if is25FlashLite {
|
|
|
if budget < flash25LiteMinBudget {
|
|
if budget < flash25LiteMinBudget {
|
|
@@ -81,7 +89,34 @@ func clampThinkingBudget(modelName string, budget int) int {
|
|
|
return budget
|
|
return budget
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo) {
|
|
|
|
|
|
|
+// "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
|
|
|
|
|
+// "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
|
|
|
|
|
+// "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
|
|
|
|
|
+func clampThinkingBudgetByEffort(modelName string, effort string) int {
|
|
|
|
|
+ isNew25Pro := isNew25ProModel(modelName)
|
|
|
|
|
+ is25FlashLite := is25FlashLiteModel(modelName)
|
|
|
|
|
+
|
|
|
|
|
+ maxBudget := 0
|
|
|
|
|
+ if is25FlashLite {
|
|
|
|
|
+ maxBudget = flash25LiteMaxBudget
|
|
|
|
|
+ }
|
|
|
|
|
+ if isNew25Pro {
|
|
|
|
|
+ maxBudget = pro25MaxBudget
|
|
|
|
|
+ } else {
|
|
|
|
|
+ maxBudget = flash25MaxBudget
|
|
|
|
|
+ }
|
|
|
|
|
+ switch effort {
|
|
|
|
|
+ case "high":
|
|
|
|
|
+ return maxBudget * 80 / 100
|
|
|
|
|
+ case "medium":
|
|
|
|
|
+ return maxBudget * 50 / 100
|
|
|
|
|
+ case "low":
|
|
|
|
|
+ return maxBudget * 20 / 100
|
|
|
|
|
+ }
|
|
|
|
|
+ return maxBudget * 50 / 100 // 默认medium
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
|
|
|
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
|
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
|
|
modelName := info.UpstreamModelName
|
|
modelName := info.UpstreamModelName
|
|
|
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
|
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
|
@@ -124,6 +159,11 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
|
|
|
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
|
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
|
|
clampedBudget := clampThinkingBudget(modelName, int(budgetTokens))
|
|
clampedBudget := clampThinkingBudget(modelName, int(budgetTokens))
|
|
|
geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampedBudget)
|
|
geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampedBudget)
|
|
|
|
|
+ } else {
|
|
|
|
|
+ if len(oaiRequest) > 0 {
|
|
|
|
|
+ // 如果有reasoningEffort参数,则根据其值设置思考预算
|
|
|
|
|
+ geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampThinkingBudgetByEffort(modelName, oaiRequest[0].ReasoningEffort))
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
} else if strings.HasSuffix(modelName, "-nothinking") {
|
|
} else if strings.HasSuffix(modelName, "-nothinking") {
|
|
@@ -156,7 +196,37 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- ThinkingAdaptor(&geminiRequest, info)
|
|
|
|
|
|
|
+ adaptorWithExtraBody := false
|
|
|
|
|
+
|
|
|
|
|
+ if len(textRequest.ExtraBody) > 0 {
|
|
|
|
|
+ if !strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
|
|
|
|
|
+ var extraBody map[string]interface{}
|
|
|
|
|
+ if err := common.Unmarshal(textRequest.ExtraBody, &extraBody); err != nil {
|
|
|
|
|
+ return nil, fmt.Errorf("invalid extra body: %w", err)
|
|
|
|
|
+ }
|
|
|
|
|
+ // eg. {"google":{"thinking_config":{"thinking_budget":5324,"include_thoughts":true}}}
|
|
|
|
|
+ if googleBody, ok := extraBody["google"].(map[string]interface{}); ok {
|
|
|
|
|
+ adaptorWithExtraBody = true
|
|
|
|
|
+ if thinkingConfig, ok := googleBody["thinking_config"].(map[string]interface{}); ok {
|
|
|
|
|
+ if budget, ok := thinkingConfig["thinking_budget"].(float64); ok {
|
|
|
|
|
+ budgetInt := int(budget)
|
|
|
|
|
+ geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
|
|
|
|
|
+ ThinkingBudget: common.GetPointer(budgetInt),
|
|
|
|
|
+ IncludeThoughts: true,
|
|
|
|
|
+ }
|
|
|
|
|
+ } else {
|
|
|
|
|
+ geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
|
|
|
|
|
+ IncludeThoughts: true,
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ if !adaptorWithExtraBody {
|
|
|
|
|
+ ThinkingAdaptor(&geminiRequest, info, textRequest)
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
safetySettings := make([]dto.GeminiChatSafetySettings, 0, len(SafetySettingList))
|
|
safetySettings := make([]dto.GeminiChatSafetySettings, 0, len(SafetySettingList))
|
|
|
for _, category := range SafetySettingList {
|
|
for _, category := range SafetySettingList {
|