relay-gemini.go 41 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255
  1. package gemini
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "strconv"
  9. "strings"
  10. "unicode/utf8"
  11. "github.com/QuantumNous/new-api/common"
  12. "github.com/QuantumNous/new-api/constant"
  13. "github.com/QuantumNous/new-api/dto"
  14. "github.com/QuantumNous/new-api/logger"
  15. "github.com/QuantumNous/new-api/relay/channel/openai"
  16. relaycommon "github.com/QuantumNous/new-api/relay/common"
  17. "github.com/QuantumNous/new-api/relay/helper"
  18. "github.com/QuantumNous/new-api/service"
  19. "github.com/QuantumNous/new-api/setting/model_setting"
  20. "github.com/QuantumNous/new-api/types"
  21. "github.com/gin-gonic/gin"
  22. )
  23. // https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference?hl=zh-cn#blob
  24. var geminiSupportedMimeTypes = map[string]bool{
  25. "application/pdf": true,
  26. "audio/mpeg": true,
  27. "audio/mp3": true,
  28. "audio/wav": true,
  29. "image/png": true,
  30. "image/jpeg": true,
  31. "image/webp": true,
  32. "text/plain": true,
  33. "video/mov": true,
  34. "video/mpeg": true,
  35. "video/mp4": true,
  36. "video/mpg": true,
  37. "video/avi": true,
  38. "video/wmv": true,
  39. "video/mpegps": true,
  40. "video/flv": true,
  41. }
  42. const thoughtSignatureBypassValue = "context_engineering_is_the_way_to_go"
  43. // Gemini 允许的思考预算范围
  44. const (
  45. pro25MinBudget = 128
  46. pro25MaxBudget = 32768
  47. flash25MaxBudget = 24576
  48. flash25LiteMinBudget = 512
  49. flash25LiteMaxBudget = 24576
  50. )
  51. func isNew25ProModel(modelName string) bool {
  52. return strings.HasPrefix(modelName, "gemini-2.5-pro") &&
  53. !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
  54. !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
  55. }
  56. func is25FlashLiteModel(modelName string) bool {
  57. return strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
  58. }
  59. // clampThinkingBudget 根据模型名称将预算限制在允许的范围内
  60. func clampThinkingBudget(modelName string, budget int) int {
  61. isNew25Pro := isNew25ProModel(modelName)
  62. is25FlashLite := is25FlashLiteModel(modelName)
  63. if is25FlashLite {
  64. if budget < flash25LiteMinBudget {
  65. return flash25LiteMinBudget
  66. }
  67. if budget > flash25LiteMaxBudget {
  68. return flash25LiteMaxBudget
  69. }
  70. } else if isNew25Pro {
  71. if budget < pro25MinBudget {
  72. return pro25MinBudget
  73. }
  74. if budget > pro25MaxBudget {
  75. return pro25MaxBudget
  76. }
  77. } else { // 其他模型
  78. if budget < 0 {
  79. return 0
  80. }
  81. if budget > flash25MaxBudget {
  82. return flash25MaxBudget
  83. }
  84. }
  85. return budget
  86. }
  87. // "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
  88. // "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
  89. // "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
  90. func clampThinkingBudgetByEffort(modelName string, effort string) int {
  91. isNew25Pro := isNew25ProModel(modelName)
  92. is25FlashLite := is25FlashLiteModel(modelName)
  93. maxBudget := 0
  94. if is25FlashLite {
  95. maxBudget = flash25LiteMaxBudget
  96. }
  97. if isNew25Pro {
  98. maxBudget = pro25MaxBudget
  99. } else {
  100. maxBudget = flash25MaxBudget
  101. }
  102. switch effort {
  103. case "high":
  104. maxBudget = maxBudget * 80 / 100
  105. case "medium":
  106. maxBudget = maxBudget * 50 / 100
  107. case "low":
  108. maxBudget = maxBudget * 20 / 100
  109. }
  110. return clampThinkingBudget(modelName, maxBudget)
  111. }
  112. func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
  113. if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
  114. modelName := info.UpstreamModelName
  115. isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
  116. !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
  117. !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
  118. if strings.Contains(modelName, "-thinking-") {
  119. parts := strings.SplitN(modelName, "-thinking-", 2)
  120. if len(parts) == 2 && parts[1] != "" {
  121. if budgetTokens, err := strconv.Atoi(parts[1]); err == nil {
  122. clampedBudget := clampThinkingBudget(modelName, budgetTokens)
  123. geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
  124. ThinkingBudget: common.GetPointer(clampedBudget),
  125. IncludeThoughts: true,
  126. }
  127. }
  128. }
  129. } else if strings.HasSuffix(modelName, "-thinking") {
  130. unsupportedModels := []string{
  131. "gemini-2.5-pro-preview-05-06",
  132. "gemini-2.5-pro-preview-03-25",
  133. }
  134. isUnsupported := false
  135. for _, unsupportedModel := range unsupportedModels {
  136. if strings.HasPrefix(modelName, unsupportedModel) {
  137. isUnsupported = true
  138. break
  139. }
  140. }
  141. if isUnsupported {
  142. geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
  143. IncludeThoughts: true,
  144. }
  145. } else {
  146. geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
  147. IncludeThoughts: true,
  148. }
  149. if geminiRequest.GenerationConfig.MaxOutputTokens > 0 {
  150. budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
  151. clampedBudget := clampThinkingBudget(modelName, int(budgetTokens))
  152. geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampedBudget)
  153. } else {
  154. if len(oaiRequest) > 0 {
  155. // 如果有reasoningEffort参数,则根据其值设置思考预算
  156. geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampThinkingBudgetByEffort(modelName, oaiRequest[0].ReasoningEffort))
  157. }
  158. }
  159. }
  160. } else if strings.HasSuffix(modelName, "-nothinking") {
  161. if !isNew25Pro {
  162. geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
  163. ThinkingBudget: common.GetPointer(0),
  164. }
  165. }
  166. }
  167. }
  168. }
  169. // Setting safety to the lowest possible values since Gemini is already powerless enough
  170. func CovertOpenAI2Gemini(c *gin.Context, textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*dto.GeminiChatRequest, error) {
  171. geminiRequest := dto.GeminiChatRequest{
  172. Contents: make([]dto.GeminiChatContent, 0, len(textRequest.Messages)),
  173. GenerationConfig: dto.GeminiChatGenerationConfig{
  174. Temperature: textRequest.Temperature,
  175. TopP: textRequest.TopP,
  176. MaxOutputTokens: textRequest.GetMaxTokens(),
  177. Seed: int64(textRequest.Seed),
  178. },
  179. }
  180. attachThoughtSignature := (info.ChannelType == constant.ChannelTypeGemini ||
  181. info.ChannelType == constant.ChannelTypeVertexAi) &&
  182. model_setting.GetGeminiSettings().FunctionCallThoughtSignatureEnabled
  183. if model_setting.IsGeminiModelSupportImagine(info.UpstreamModelName) {
  184. geminiRequest.GenerationConfig.ResponseModalities = []string{
  185. "TEXT",
  186. "IMAGE",
  187. }
  188. }
  189. adaptorWithExtraBody := false
  190. if len(textRequest.ExtraBody) > 0 {
  191. if !strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
  192. var extraBody map[string]interface{}
  193. if err := common.Unmarshal(textRequest.ExtraBody, &extraBody); err != nil {
  194. return nil, fmt.Errorf("invalid extra body: %w", err)
  195. }
  196. // eg. {"google":{"thinking_config":{"thinking_budget":5324,"include_thoughts":true}}}
  197. if googleBody, ok := extraBody["google"].(map[string]interface{}); ok {
  198. adaptorWithExtraBody = true
  199. // check error param name like thinkingConfig, should be thinking_config
  200. if _, hasErrorParam := googleBody["thinkingConfig"]; hasErrorParam {
  201. return nil, errors.New("extra_body.google.thinkingConfig is not supported, use extra_body.google.thinking_config instead")
  202. }
  203. if thinkingConfig, ok := googleBody["thinking_config"].(map[string]interface{}); ok {
  204. // check error param name like thinkingBudget, should be thinking_budget
  205. if _, hasErrorParam := thinkingConfig["thinkingBudget"]; hasErrorParam {
  206. return nil, errors.New("extra_body.google.thinking_config.thinkingBudget is not supported, use extra_body.google.thinking_config.thinking_budget instead")
  207. }
  208. if budget, ok := thinkingConfig["thinking_budget"].(float64); ok {
  209. budgetInt := int(budget)
  210. geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
  211. ThinkingBudget: common.GetPointer(budgetInt),
  212. IncludeThoughts: true,
  213. }
  214. } else {
  215. geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
  216. IncludeThoughts: true,
  217. }
  218. }
  219. }
  220. }
  221. }
  222. }
  223. if !adaptorWithExtraBody {
  224. ThinkingAdaptor(&geminiRequest, info, textRequest)
  225. }
  226. safetySettings := make([]dto.GeminiChatSafetySettings, 0, len(SafetySettingList))
  227. for _, category := range SafetySettingList {
  228. safetySettings = append(safetySettings, dto.GeminiChatSafetySettings{
  229. Category: category,
  230. Threshold: model_setting.GetGeminiSafetySetting(category),
  231. })
  232. }
  233. geminiRequest.SafetySettings = safetySettings
  234. // openaiContent.FuncToToolCalls()
  235. if textRequest.Tools != nil {
  236. functions := make([]dto.FunctionRequest, 0, len(textRequest.Tools))
  237. googleSearch := false
  238. codeExecution := false
  239. urlContext := false
  240. for _, tool := range textRequest.Tools {
  241. if tool.Function.Name == "googleSearch" {
  242. googleSearch = true
  243. continue
  244. }
  245. if tool.Function.Name == "codeExecution" {
  246. codeExecution = true
  247. continue
  248. }
  249. if tool.Function.Name == "urlContext" {
  250. urlContext = true
  251. continue
  252. }
  253. if tool.Function.Parameters != nil {
  254. params, ok := tool.Function.Parameters.(map[string]interface{})
  255. if ok {
  256. if props, hasProps := params["properties"].(map[string]interface{}); hasProps {
  257. if len(props) == 0 {
  258. tool.Function.Parameters = nil
  259. }
  260. }
  261. }
  262. }
  263. // Clean the parameters before appending
  264. cleanedParams := cleanFunctionParameters(tool.Function.Parameters)
  265. tool.Function.Parameters = cleanedParams
  266. functions = append(functions, tool.Function)
  267. }
  268. geminiTools := geminiRequest.GetTools()
  269. if codeExecution {
  270. geminiTools = append(geminiTools, dto.GeminiChatTool{
  271. CodeExecution: make(map[string]string),
  272. })
  273. }
  274. if googleSearch {
  275. geminiTools = append(geminiTools, dto.GeminiChatTool{
  276. GoogleSearch: make(map[string]string),
  277. })
  278. }
  279. if urlContext {
  280. geminiTools = append(geminiTools, dto.GeminiChatTool{
  281. URLContext: make(map[string]string),
  282. })
  283. }
  284. if len(functions) > 0 {
  285. geminiTools = append(geminiTools, dto.GeminiChatTool{
  286. FunctionDeclarations: functions,
  287. })
  288. }
  289. geminiRequest.SetTools(geminiTools)
  290. }
  291. if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
  292. geminiRequest.GenerationConfig.ResponseMimeType = "application/json"
  293. if len(textRequest.ResponseFormat.JsonSchema) > 0 {
  294. // 先将json.RawMessage解析
  295. var jsonSchema dto.FormatJsonSchema
  296. if err := common.Unmarshal(textRequest.ResponseFormat.JsonSchema, &jsonSchema); err == nil {
  297. cleanedSchema := removeAdditionalPropertiesWithDepth(jsonSchema.Schema, 0)
  298. geminiRequest.GenerationConfig.ResponseSchema = cleanedSchema
  299. }
  300. }
  301. }
  302. tool_call_ids := make(map[string]string)
  303. var system_content []string
  304. //shouldAddDummyModelMessage := false
  305. for _, message := range textRequest.Messages {
  306. if message.Role == "system" {
  307. system_content = append(system_content, message.StringContent())
  308. continue
  309. } else if message.Role == "tool" || message.Role == "function" {
  310. if len(geminiRequest.Contents) == 0 || geminiRequest.Contents[len(geminiRequest.Contents)-1].Role == "model" {
  311. geminiRequest.Contents = append(geminiRequest.Contents, dto.GeminiChatContent{
  312. Role: "user",
  313. })
  314. }
  315. var parts = &geminiRequest.Contents[len(geminiRequest.Contents)-1].Parts
  316. name := ""
  317. if message.Name != nil {
  318. name = *message.Name
  319. } else if val, exists := tool_call_ids[message.ToolCallId]; exists {
  320. name = val
  321. }
  322. var contentMap map[string]interface{}
  323. contentStr := message.StringContent()
  324. // 1. 尝试解析为 JSON 对象
  325. if err := json.Unmarshal([]byte(contentStr), &contentMap); err != nil {
  326. // 2. 如果失败,尝试解析为 JSON 数组
  327. var contentSlice []interface{}
  328. if err := json.Unmarshal([]byte(contentStr), &contentSlice); err == nil {
  329. // 如果是数组,包装成对象
  330. contentMap = map[string]interface{}{"result": contentSlice}
  331. } else {
  332. // 3. 如果再次失败,作为纯文本处理
  333. contentMap = map[string]interface{}{"content": contentStr}
  334. }
  335. }
  336. functionResp := &dto.GeminiFunctionResponse{
  337. Name: name,
  338. Response: contentMap,
  339. }
  340. *parts = append(*parts, dto.GeminiPart{
  341. FunctionResponse: functionResp,
  342. })
  343. continue
  344. }
  345. var parts []dto.GeminiPart
  346. content := dto.GeminiChatContent{
  347. Role: message.Role,
  348. }
  349. shouldAttachThoughtSignature := attachThoughtSignature && (message.Role == "assistant" || message.Role == "model")
  350. signatureAttached := false
  351. // isToolCall := false
  352. if message.ToolCalls != nil {
  353. // message.Role = "model"
  354. // isToolCall = true
  355. for _, call := range message.ParseToolCalls() {
  356. args := map[string]interface{}{}
  357. if call.Function.Arguments != "" {
  358. if json.Unmarshal([]byte(call.Function.Arguments), &args) != nil {
  359. return nil, fmt.Errorf("invalid arguments for function %s, args: %s", call.Function.Name, call.Function.Arguments)
  360. }
  361. }
  362. toolCall := dto.GeminiPart{
  363. FunctionCall: &dto.FunctionCall{
  364. FunctionName: call.Function.Name,
  365. Arguments: args,
  366. },
  367. }
  368. if shouldAttachThoughtSignature && !signatureAttached && hasFunctionCallContent(toolCall.FunctionCall) && len(toolCall.ThoughtSignature) == 0 {
  369. toolCall.ThoughtSignature = json.RawMessage(strconv.Quote(thoughtSignatureBypassValue))
  370. signatureAttached = true
  371. }
  372. parts = append(parts, toolCall)
  373. tool_call_ids[call.ID] = call.Function.Name
  374. }
  375. }
  376. openaiContent := message.ParseContent()
  377. imageNum := 0
  378. for _, part := range openaiContent {
  379. if part.Type == dto.ContentTypeText {
  380. if part.Text == "" {
  381. continue
  382. }
  383. parts = append(parts, dto.GeminiPart{
  384. Text: part.Text,
  385. })
  386. } else if part.Type == dto.ContentTypeImageURL {
  387. imageNum += 1
  388. if constant.GeminiVisionMaxImageNum != -1 && imageNum > constant.GeminiVisionMaxImageNum {
  389. return nil, fmt.Errorf("too many images in the message, max allowed is %d", constant.GeminiVisionMaxImageNum)
  390. }
  391. // 判断是否是url
  392. if strings.HasPrefix(part.GetImageMedia().Url, "http") {
  393. // 是url,获取文件的类型和base64编码的数据
  394. fileData, err := service.GetFileBase64FromUrl(c, part.GetImageMedia().Url, "formatting image for Gemini")
  395. if err != nil {
  396. return nil, fmt.Errorf("get file base64 from url '%s' failed: %w", part.GetImageMedia().Url, err)
  397. }
  398. // 校验 MimeType 是否在 Gemini 支持的白名单中
  399. if _, ok := geminiSupportedMimeTypes[strings.ToLower(fileData.MimeType)]; !ok {
  400. url := part.GetImageMedia().Url
  401. return nil, fmt.Errorf("mime type is not supported by Gemini: '%s', url: '%s', supported types are: %v", fileData.MimeType, url, getSupportedMimeTypesList())
  402. }
  403. parts = append(parts, dto.GeminiPart{
  404. InlineData: &dto.GeminiInlineData{
  405. MimeType: fileData.MimeType, // 使用原始的 MimeType,因为大小写可能对API有意义
  406. Data: fileData.Base64Data,
  407. },
  408. })
  409. } else {
  410. format, base64String, err := service.DecodeBase64FileData(part.GetImageMedia().Url)
  411. if err != nil {
  412. return nil, fmt.Errorf("decode base64 image data failed: %s", err.Error())
  413. }
  414. parts = append(parts, dto.GeminiPart{
  415. InlineData: &dto.GeminiInlineData{
  416. MimeType: format,
  417. Data: base64String,
  418. },
  419. })
  420. }
  421. } else if part.Type == dto.ContentTypeFile {
  422. if part.GetFile().FileId != "" {
  423. return nil, fmt.Errorf("only base64 file is supported in gemini")
  424. }
  425. format, base64String, err := service.DecodeBase64FileData(part.GetFile().FileData)
  426. if err != nil {
  427. return nil, fmt.Errorf("decode base64 file data failed: %s", err.Error())
  428. }
  429. parts = append(parts, dto.GeminiPart{
  430. InlineData: &dto.GeminiInlineData{
  431. MimeType: format,
  432. Data: base64String,
  433. },
  434. })
  435. } else if part.Type == dto.ContentTypeInputAudio {
  436. if part.GetInputAudio().Data == "" {
  437. return nil, fmt.Errorf("only base64 audio is supported in gemini")
  438. }
  439. base64String, err := service.DecodeBase64AudioData(part.GetInputAudio().Data)
  440. if err != nil {
  441. return nil, fmt.Errorf("decode base64 audio data failed: %s", err.Error())
  442. }
  443. parts = append(parts, dto.GeminiPart{
  444. InlineData: &dto.GeminiInlineData{
  445. MimeType: "audio/" + part.GetInputAudio().Format,
  446. Data: base64String,
  447. },
  448. })
  449. }
  450. }
  451. content.Parts = parts
  452. // there's no assistant role in gemini and API shall vomit if Role is not user or model
  453. if content.Role == "assistant" {
  454. content.Role = "model"
  455. }
  456. if len(content.Parts) > 0 {
  457. geminiRequest.Contents = append(geminiRequest.Contents, content)
  458. }
  459. }
  460. if len(system_content) > 0 {
  461. geminiRequest.SystemInstructions = &dto.GeminiChatContent{
  462. Parts: []dto.GeminiPart{
  463. {
  464. Text: strings.Join(system_content, "\n"),
  465. },
  466. },
  467. }
  468. }
  469. return &geminiRequest, nil
  470. }
  471. func hasFunctionCallContent(call *dto.FunctionCall) bool {
  472. if call == nil {
  473. return false
  474. }
  475. if strings.TrimSpace(call.FunctionName) != "" {
  476. return true
  477. }
  478. switch v := call.Arguments.(type) {
  479. case nil:
  480. return false
  481. case string:
  482. return strings.TrimSpace(v) != ""
  483. case map[string]interface{}:
  484. return len(v) > 0
  485. case []interface{}:
  486. return len(v) > 0
  487. default:
  488. return true
  489. }
  490. }
  491. // Helper function to get a list of supported MIME types for error messages
  492. func getSupportedMimeTypesList() []string {
  493. keys := make([]string, 0, len(geminiSupportedMimeTypes))
  494. for k := range geminiSupportedMimeTypes {
  495. keys = append(keys, k)
  496. }
  497. return keys
  498. }
  499. // cleanFunctionParameters recursively removes unsupported fields from Gemini function parameters.
  500. func cleanFunctionParameters(params interface{}) interface{} {
  501. if params == nil {
  502. return nil
  503. }
  504. switch v := params.(type) {
  505. case map[string]interface{}:
  506. // Create a copy to avoid modifying the original
  507. cleanedMap := make(map[string]interface{})
  508. for k, val := range v {
  509. cleanedMap[k] = val
  510. }
  511. // Remove unsupported root-level fields
  512. delete(cleanedMap, "default")
  513. delete(cleanedMap, "exclusiveMaximum")
  514. delete(cleanedMap, "exclusiveMinimum")
  515. delete(cleanedMap, "$schema")
  516. delete(cleanedMap, "additionalProperties")
  517. // Check and clean 'format' for string types
  518. if propType, typeExists := cleanedMap["type"].(string); typeExists && propType == "string" {
  519. if formatValue, formatExists := cleanedMap["format"].(string); formatExists {
  520. if formatValue != "enum" && formatValue != "date-time" {
  521. delete(cleanedMap, "format")
  522. }
  523. }
  524. }
  525. // Clean properties
  526. if props, ok := cleanedMap["properties"].(map[string]interface{}); ok && props != nil {
  527. cleanedProps := make(map[string]interface{})
  528. for propName, propValue := range props {
  529. cleanedProps[propName] = cleanFunctionParameters(propValue)
  530. }
  531. cleanedMap["properties"] = cleanedProps
  532. }
  533. // Recursively clean items in arrays
  534. if items, ok := cleanedMap["items"].(map[string]interface{}); ok && items != nil {
  535. cleanedMap["items"] = cleanFunctionParameters(items)
  536. }
  537. // Also handle items if it's an array of schemas
  538. if itemsArray, ok := cleanedMap["items"].([]interface{}); ok {
  539. cleanedItemsArray := make([]interface{}, len(itemsArray))
  540. for i, item := range itemsArray {
  541. cleanedItemsArray[i] = cleanFunctionParameters(item)
  542. }
  543. cleanedMap["items"] = cleanedItemsArray
  544. }
  545. // Recursively clean other schema composition keywords
  546. for _, field := range []string{"allOf", "anyOf", "oneOf"} {
  547. if nested, ok := cleanedMap[field].([]interface{}); ok {
  548. cleanedNested := make([]interface{}, len(nested))
  549. for i, item := range nested {
  550. cleanedNested[i] = cleanFunctionParameters(item)
  551. }
  552. cleanedMap[field] = cleanedNested
  553. }
  554. }
  555. // Recursively clean patternProperties
  556. if patternProps, ok := cleanedMap["patternProperties"].(map[string]interface{}); ok {
  557. cleanedPatternProps := make(map[string]interface{})
  558. for pattern, schema := range patternProps {
  559. cleanedPatternProps[pattern] = cleanFunctionParameters(schema)
  560. }
  561. cleanedMap["patternProperties"] = cleanedPatternProps
  562. }
  563. // Recursively clean definitions
  564. if definitions, ok := cleanedMap["definitions"].(map[string]interface{}); ok {
  565. cleanedDefinitions := make(map[string]interface{})
  566. for defName, defSchema := range definitions {
  567. cleanedDefinitions[defName] = cleanFunctionParameters(defSchema)
  568. }
  569. cleanedMap["definitions"] = cleanedDefinitions
  570. }
  571. // Recursively clean $defs (newer JSON Schema draft)
  572. if defs, ok := cleanedMap["$defs"].(map[string]interface{}); ok {
  573. cleanedDefs := make(map[string]interface{})
  574. for defName, defSchema := range defs {
  575. cleanedDefs[defName] = cleanFunctionParameters(defSchema)
  576. }
  577. cleanedMap["$defs"] = cleanedDefs
  578. }
  579. // Clean conditional keywords
  580. for _, field := range []string{"if", "then", "else", "not"} {
  581. if nested, ok := cleanedMap[field]; ok {
  582. cleanedMap[field] = cleanFunctionParameters(nested)
  583. }
  584. }
  585. return cleanedMap
  586. case []interface{}:
  587. // Handle arrays of schemas
  588. cleanedArray := make([]interface{}, len(v))
  589. for i, item := range v {
  590. cleanedArray[i] = cleanFunctionParameters(item)
  591. }
  592. return cleanedArray
  593. default:
  594. // Not a map or array, return as is (e.g., could be a primitive)
  595. return params
  596. }
  597. }
  598. func removeAdditionalPropertiesWithDepth(schema interface{}, depth int) interface{} {
  599. if depth >= 5 {
  600. return schema
  601. }
  602. v, ok := schema.(map[string]interface{})
  603. if !ok || len(v) == 0 {
  604. return schema
  605. }
  606. // 删除所有的title字段
  607. delete(v, "title")
  608. delete(v, "$schema")
  609. // 如果type不为object和array,则直接返回
  610. if typeVal, exists := v["type"]; !exists || (typeVal != "object" && typeVal != "array") {
  611. return schema
  612. }
  613. switch v["type"] {
  614. case "object":
  615. delete(v, "additionalProperties")
  616. // 处理 properties
  617. if properties, ok := v["properties"].(map[string]interface{}); ok {
  618. for key, value := range properties {
  619. properties[key] = removeAdditionalPropertiesWithDepth(value, depth+1)
  620. }
  621. }
  622. for _, field := range []string{"allOf", "anyOf", "oneOf"} {
  623. if nested, ok := v[field].([]interface{}); ok {
  624. for i, item := range nested {
  625. nested[i] = removeAdditionalPropertiesWithDepth(item, depth+1)
  626. }
  627. }
  628. }
  629. case "array":
  630. if items, ok := v["items"].(map[string]interface{}); ok {
  631. v["items"] = removeAdditionalPropertiesWithDepth(items, depth+1)
  632. }
  633. }
  634. return v
  635. }
  636. func unescapeString(s string) (string, error) {
  637. var result []rune
  638. escaped := false
  639. i := 0
  640. for i < len(s) {
  641. r, size := utf8.DecodeRuneInString(s[i:]) // 正确解码UTF-8字符
  642. if r == utf8.RuneError {
  643. return "", fmt.Errorf("invalid UTF-8 encoding")
  644. }
  645. if escaped {
  646. // 如果是转义符后的字符,检查其类型
  647. switch r {
  648. case '"':
  649. result = append(result, '"')
  650. case '\\':
  651. result = append(result, '\\')
  652. case '/':
  653. result = append(result, '/')
  654. case 'b':
  655. result = append(result, '\b')
  656. case 'f':
  657. result = append(result, '\f')
  658. case 'n':
  659. result = append(result, '\n')
  660. case 'r':
  661. result = append(result, '\r')
  662. case 't':
  663. result = append(result, '\t')
  664. case '\'':
  665. result = append(result, '\'')
  666. default:
  667. // 如果遇到一个非法的转义字符,直接按原样输出
  668. result = append(result, '\\', r)
  669. }
  670. escaped = false
  671. } else {
  672. if r == '\\' {
  673. escaped = true // 记录反斜杠作为转义符
  674. } else {
  675. result = append(result, r)
  676. }
  677. }
  678. i += size // 移动到下一个字符
  679. }
  680. return string(result), nil
  681. }
  682. func unescapeMapOrSlice(data interface{}) interface{} {
  683. switch v := data.(type) {
  684. case map[string]interface{}:
  685. for k, val := range v {
  686. v[k] = unescapeMapOrSlice(val)
  687. }
  688. case []interface{}:
  689. for i, val := range v {
  690. v[i] = unescapeMapOrSlice(val)
  691. }
  692. case string:
  693. if unescaped, err := unescapeString(v); err != nil {
  694. return v
  695. } else {
  696. return unescaped
  697. }
  698. }
  699. return data
  700. }
  701. func getResponseToolCall(item *dto.GeminiPart) *dto.ToolCallResponse {
  702. var argsBytes []byte
  703. var err error
  704. if result, ok := item.FunctionCall.Arguments.(map[string]interface{}); ok {
  705. argsBytes, err = json.Marshal(unescapeMapOrSlice(result))
  706. } else {
  707. argsBytes, err = json.Marshal(item.FunctionCall.Arguments)
  708. }
  709. if err != nil {
  710. return nil
  711. }
  712. return &dto.ToolCallResponse{
  713. ID: fmt.Sprintf("call_%s", common.GetUUID()),
  714. Type: "function",
  715. Function: dto.FunctionResponse{
  716. Arguments: string(argsBytes),
  717. Name: item.FunctionCall.FunctionName,
  718. },
  719. }
  720. }
  721. func responseGeminiChat2OpenAI(c *gin.Context, response *dto.GeminiChatResponse) *dto.OpenAITextResponse {
  722. fullTextResponse := dto.OpenAITextResponse{
  723. Id: helper.GetResponseID(c),
  724. Object: "chat.completion",
  725. Created: common.GetTimestamp(),
  726. Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
  727. }
  728. isToolCall := false
  729. for _, candidate := range response.Candidates {
  730. choice := dto.OpenAITextResponseChoice{
  731. Index: int(candidate.Index),
  732. Message: dto.Message{
  733. Role: "assistant",
  734. Content: "",
  735. },
  736. FinishReason: constant.FinishReasonStop,
  737. }
  738. if len(candidate.Content.Parts) > 0 {
  739. var texts []string
  740. var toolCalls []dto.ToolCallResponse
  741. for _, part := range candidate.Content.Parts {
  742. if part.InlineData != nil {
  743. // 媒体内容
  744. if strings.HasPrefix(part.InlineData.MimeType, "image") {
  745. imgText := "![image](data:" + part.InlineData.MimeType + ";base64," + part.InlineData.Data + ")"
  746. texts = append(texts, imgText)
  747. } else {
  748. // 其他媒体类型,直接显示链接
  749. texts = append(texts, fmt.Sprintf("[media](data:%s;base64,%s)", part.InlineData.MimeType, part.InlineData.Data))
  750. }
  751. } else if part.FunctionCall != nil {
  752. choice.FinishReason = constant.FinishReasonToolCalls
  753. if call := getResponseToolCall(&part); call != nil {
  754. toolCalls = append(toolCalls, *call)
  755. }
  756. } else if part.Thought {
  757. choice.Message.ReasoningContent = part.Text
  758. } else {
  759. if part.ExecutableCode != nil {
  760. texts = append(texts, "```"+part.ExecutableCode.Language+"\n"+part.ExecutableCode.Code+"\n```")
  761. } else if part.CodeExecutionResult != nil {
  762. texts = append(texts, "```output\n"+part.CodeExecutionResult.Output+"\n```")
  763. } else {
  764. // 过滤掉空行
  765. if part.Text != "\n" {
  766. texts = append(texts, part.Text)
  767. }
  768. }
  769. }
  770. }
  771. if len(toolCalls) > 0 {
  772. choice.Message.SetToolCalls(toolCalls)
  773. isToolCall = true
  774. }
  775. choice.Message.SetStringContent(strings.Join(texts, "\n"))
  776. }
  777. if candidate.FinishReason != nil {
  778. switch *candidate.FinishReason {
  779. case "STOP":
  780. choice.FinishReason = constant.FinishReasonStop
  781. case "MAX_TOKENS":
  782. choice.FinishReason = constant.FinishReasonLength
  783. default:
  784. choice.FinishReason = constant.FinishReasonContentFilter
  785. }
  786. }
  787. if isToolCall {
  788. choice.FinishReason = constant.FinishReasonToolCalls
  789. }
  790. fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
  791. }
  792. return &fullTextResponse
  793. }
  794. func streamResponseGeminiChat2OpenAI(geminiResponse *dto.GeminiChatResponse) (*dto.ChatCompletionsStreamResponse, bool) {
  795. choices := make([]dto.ChatCompletionsStreamResponseChoice, 0, len(geminiResponse.Candidates))
  796. isStop := false
  797. for _, candidate := range geminiResponse.Candidates {
  798. if candidate.FinishReason != nil && *candidate.FinishReason == "STOP" {
  799. isStop = true
  800. candidate.FinishReason = nil
  801. }
  802. choice := dto.ChatCompletionsStreamResponseChoice{
  803. Index: int(candidate.Index),
  804. Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
  805. //Role: "assistant",
  806. },
  807. }
  808. var texts []string
  809. isTools := false
  810. isThought := false
  811. if candidate.FinishReason != nil {
  812. // p := GeminiConvertFinishReason(*candidate.FinishReason)
  813. switch *candidate.FinishReason {
  814. case "STOP":
  815. choice.FinishReason = &constant.FinishReasonStop
  816. case "MAX_TOKENS":
  817. choice.FinishReason = &constant.FinishReasonLength
  818. default:
  819. choice.FinishReason = &constant.FinishReasonContentFilter
  820. }
  821. }
  822. for _, part := range candidate.Content.Parts {
  823. if part.InlineData != nil {
  824. if strings.HasPrefix(part.InlineData.MimeType, "image") {
  825. imgText := "![image](data:" + part.InlineData.MimeType + ";base64," + part.InlineData.Data + ")"
  826. texts = append(texts, imgText)
  827. }
  828. } else if part.FunctionCall != nil {
  829. isTools = true
  830. if call := getResponseToolCall(&part); call != nil {
  831. call.SetIndex(len(choice.Delta.ToolCalls))
  832. choice.Delta.ToolCalls = append(choice.Delta.ToolCalls, *call)
  833. }
  834. } else if part.Thought {
  835. isThought = true
  836. texts = append(texts, part.Text)
  837. } else {
  838. if part.ExecutableCode != nil {
  839. texts = append(texts, "```"+part.ExecutableCode.Language+"\n"+part.ExecutableCode.Code+"\n```\n")
  840. } else if part.CodeExecutionResult != nil {
  841. texts = append(texts, "```output\n"+part.CodeExecutionResult.Output+"\n```\n")
  842. } else {
  843. if part.Text != "\n" {
  844. texts = append(texts, part.Text)
  845. }
  846. }
  847. }
  848. }
  849. if isThought {
  850. choice.Delta.SetReasoningContent(strings.Join(texts, "\n"))
  851. } else {
  852. choice.Delta.SetContentString(strings.Join(texts, "\n"))
  853. }
  854. if isTools {
  855. choice.FinishReason = &constant.FinishReasonToolCalls
  856. }
  857. choices = append(choices, choice)
  858. }
  859. var response dto.ChatCompletionsStreamResponse
  860. response.Object = "chat.completion.chunk"
  861. response.Choices = choices
  862. return &response, isStop
  863. }
  864. func handleStream(c *gin.Context, info *relaycommon.RelayInfo, resp *dto.ChatCompletionsStreamResponse) error {
  865. streamData, err := common.Marshal(resp)
  866. if err != nil {
  867. return fmt.Errorf("failed to marshal stream response: %w", err)
  868. }
  869. err = openai.HandleStreamFormat(c, info, string(streamData), info.ChannelSetting.ForceFormat, info.ChannelSetting.ThinkingToContent)
  870. if err != nil {
  871. return fmt.Errorf("failed to handle stream format: %w", err)
  872. }
  873. return nil
  874. }
  875. func handleFinalStream(c *gin.Context, info *relaycommon.RelayInfo, resp *dto.ChatCompletionsStreamResponse) error {
  876. streamData, err := common.Marshal(resp)
  877. if err != nil {
  878. return fmt.Errorf("failed to marshal stream response: %w", err)
  879. }
  880. openai.HandleFinalResponse(c, info, string(streamData), resp.Id, resp.Created, resp.Model, resp.GetSystemFingerprint(), resp.Usage, false)
  881. return nil
  882. }
  883. func geminiStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response, callback func(data string, geminiResponse *dto.GeminiChatResponse) bool) (*dto.Usage, *types.NewAPIError) {
  884. var usage = &dto.Usage{}
  885. var imageCount int
  886. responseText := strings.Builder{}
  887. helper.StreamScannerHandler(c, resp, info, func(data string) bool {
  888. var geminiResponse dto.GeminiChatResponse
  889. err := common.UnmarshalJsonStr(data, &geminiResponse)
  890. if err != nil {
  891. logger.LogError(c, "error unmarshalling stream response: "+err.Error())
  892. return false
  893. }
  894. // 统计图片数量
  895. for _, candidate := range geminiResponse.Candidates {
  896. for _, part := range candidate.Content.Parts {
  897. if part.InlineData != nil && part.InlineData.MimeType != "" {
  898. imageCount++
  899. }
  900. if part.Text != "" {
  901. responseText.WriteString(part.Text)
  902. }
  903. }
  904. }
  905. // 更新使用量统计
  906. if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
  907. usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
  908. usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount + geminiResponse.UsageMetadata.ThoughtsTokenCount
  909. usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
  910. usage.TotalTokens = geminiResponse.UsageMetadata.TotalTokenCount
  911. for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
  912. if detail.Modality == "AUDIO" {
  913. usage.PromptTokensDetails.AudioTokens = detail.TokenCount
  914. } else if detail.Modality == "TEXT" {
  915. usage.PromptTokensDetails.TextTokens = detail.TokenCount
  916. }
  917. }
  918. }
  919. return callback(data, &geminiResponse)
  920. })
  921. if imageCount != 0 {
  922. if usage.CompletionTokens == 0 {
  923. usage.CompletionTokens = imageCount * 1400
  924. }
  925. }
  926. usage.PromptTokensDetails.TextTokens = usage.PromptTokens
  927. if usage.TotalTokens > 0 {
  928. usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
  929. }
  930. if usage.CompletionTokens <= 0 {
  931. str := responseText.String()
  932. if len(str) > 0 {
  933. usage = service.ResponseText2Usage(c, responseText.String(), info.UpstreamModelName, info.PromptTokens)
  934. } else {
  935. usage = &dto.Usage{}
  936. }
  937. }
  938. return usage, nil
  939. }
  940. func GeminiChatStreamHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
  941. id := helper.GetResponseID(c)
  942. createAt := common.GetTimestamp()
  943. finishReason := constant.FinishReasonStop
  944. usage, err := geminiStreamHandler(c, info, resp, func(data string, geminiResponse *dto.GeminiChatResponse) bool {
  945. response, isStop := streamResponseGeminiChat2OpenAI(geminiResponse)
  946. response.Id = id
  947. response.Created = createAt
  948. response.Model = info.UpstreamModelName
  949. logger.LogDebug(c, fmt.Sprintf("info.SendResponseCount = %d", info.SendResponseCount))
  950. if info.SendResponseCount == 0 {
  951. // send first response
  952. emptyResponse := helper.GenerateStartEmptyResponse(id, createAt, info.UpstreamModelName, nil)
  953. if response.IsToolCall() {
  954. if len(emptyResponse.Choices) > 0 && len(response.Choices) > 0 {
  955. toolCalls := response.Choices[0].Delta.ToolCalls
  956. copiedToolCalls := make([]dto.ToolCallResponse, len(toolCalls))
  957. for idx := range toolCalls {
  958. copiedToolCalls[idx] = toolCalls[idx]
  959. copiedToolCalls[idx].Function.Arguments = ""
  960. }
  961. emptyResponse.Choices[0].Delta.ToolCalls = copiedToolCalls
  962. }
  963. finishReason = constant.FinishReasonToolCalls
  964. err := handleStream(c, info, emptyResponse)
  965. if err != nil {
  966. logger.LogError(c, err.Error())
  967. }
  968. response.ClearToolCalls()
  969. if response.IsFinished() {
  970. response.Choices[0].FinishReason = nil
  971. }
  972. } else {
  973. err := handleStream(c, info, emptyResponse)
  974. if err != nil {
  975. logger.LogError(c, err.Error())
  976. }
  977. }
  978. }
  979. err := handleStream(c, info, response)
  980. if err != nil {
  981. logger.LogError(c, err.Error())
  982. }
  983. if isStop {
  984. _ = handleStream(c, info, helper.GenerateStopResponse(id, createAt, info.UpstreamModelName, finishReason))
  985. }
  986. return true
  987. })
  988. if err != nil {
  989. return usage, err
  990. }
  991. response := helper.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
  992. handleErr := handleFinalStream(c, info, response)
  993. if handleErr != nil {
  994. common.SysLog("send final response failed: " + handleErr.Error())
  995. }
  996. return usage, nil
  997. }
  998. func GeminiChatHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
  999. responseBody, err := io.ReadAll(resp.Body)
  1000. if err != nil {
  1001. return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1002. }
  1003. service.CloseResponseBodyGracefully(resp)
  1004. if common.DebugEnabled {
  1005. println(string(responseBody))
  1006. }
  1007. var geminiResponse dto.GeminiChatResponse
  1008. err = common.Unmarshal(responseBody, &geminiResponse)
  1009. if err != nil {
  1010. return nil, types.NewOpenAIError(err, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1011. }
  1012. if len(geminiResponse.Candidates) == 0 {
  1013. //return nil, types.NewOpenAIError(errors.New("no candidates returned"), types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1014. //if geminiResponse.PromptFeedback != nil && geminiResponse.PromptFeedback.BlockReason != nil {
  1015. // return nil, types.NewOpenAIError(errors.New("request blocked by Gemini API: "+*geminiResponse.PromptFeedback.BlockReason), types.ErrorCodePromptBlocked, http.StatusBadRequest)
  1016. //} else {
  1017. // return nil, types.NewOpenAIError(errors.New("empty response from Gemini API"), types.ErrorCodeEmptyResponse, http.StatusInternalServerError)
  1018. //}
  1019. }
  1020. fullTextResponse := responseGeminiChat2OpenAI(c, &geminiResponse)
  1021. fullTextResponse.Model = info.UpstreamModelName
  1022. usage := dto.Usage{
  1023. PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
  1024. CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
  1025. TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount,
  1026. }
  1027. usage.CompletionTokenDetails.ReasoningTokens = geminiResponse.UsageMetadata.ThoughtsTokenCount
  1028. usage.CompletionTokens = usage.TotalTokens - usage.PromptTokens
  1029. for _, detail := range geminiResponse.UsageMetadata.PromptTokensDetails {
  1030. if detail.Modality == "AUDIO" {
  1031. usage.PromptTokensDetails.AudioTokens = detail.TokenCount
  1032. } else if detail.Modality == "TEXT" {
  1033. usage.PromptTokensDetails.TextTokens = detail.TokenCount
  1034. }
  1035. }
  1036. fullTextResponse.Usage = usage
  1037. switch info.RelayFormat {
  1038. case types.RelayFormatOpenAI:
  1039. responseBody, err = common.Marshal(fullTextResponse)
  1040. if err != nil {
  1041. return nil, types.NewError(err, types.ErrorCodeBadResponseBody)
  1042. }
  1043. case types.RelayFormatClaude:
  1044. claudeResp := service.ResponseOpenAI2Claude(fullTextResponse, info)
  1045. claudeRespStr, err := common.Marshal(claudeResp)
  1046. if err != nil {
  1047. return nil, types.NewError(err, types.ErrorCodeBadResponseBody)
  1048. }
  1049. responseBody = claudeRespStr
  1050. case types.RelayFormatGemini:
  1051. break
  1052. }
  1053. service.IOCopyBytesGracefully(c, resp, responseBody)
  1054. return &usage, nil
  1055. }
  1056. func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
  1057. defer service.CloseResponseBodyGracefully(resp)
  1058. responseBody, readErr := io.ReadAll(resp.Body)
  1059. if readErr != nil {
  1060. return nil, types.NewOpenAIError(readErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1061. }
  1062. var geminiResponse dto.GeminiBatchEmbeddingResponse
  1063. if jsonErr := common.Unmarshal(responseBody, &geminiResponse); jsonErr != nil {
  1064. return nil, types.NewOpenAIError(jsonErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1065. }
  1066. // convert to openai format response
  1067. openAIResponse := dto.OpenAIEmbeddingResponse{
  1068. Object: "list",
  1069. Data: make([]dto.OpenAIEmbeddingResponseItem, 0, len(geminiResponse.Embeddings)),
  1070. Model: info.UpstreamModelName,
  1071. }
  1072. for i, embedding := range geminiResponse.Embeddings {
  1073. openAIResponse.Data = append(openAIResponse.Data, dto.OpenAIEmbeddingResponseItem{
  1074. Object: "embedding",
  1075. Embedding: embedding.Values,
  1076. Index: i,
  1077. })
  1078. }
  1079. // calculate usage
  1080. // https://ai.google.dev/gemini-api/docs/pricing?hl=zh-cn#text-embedding-004
  1081. // Google has not yet clarified how embedding models will be billed
  1082. // refer to openai billing method to use input tokens billing
  1083. // https://platform.openai.com/docs/guides/embeddings#what-are-embeddings
  1084. usage := &dto.Usage{
  1085. PromptTokens: info.PromptTokens,
  1086. CompletionTokens: 0,
  1087. TotalTokens: info.PromptTokens,
  1088. }
  1089. openAIResponse.Usage = *usage
  1090. jsonResponse, jsonErr := common.Marshal(openAIResponse)
  1091. if jsonErr != nil {
  1092. return nil, types.NewOpenAIError(jsonErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1093. }
  1094. service.IOCopyBytesGracefully(c, resp, jsonResponse)
  1095. return usage, nil
  1096. }
  1097. func GeminiImageHandler(c *gin.Context, info *relaycommon.RelayInfo, resp *http.Response) (*dto.Usage, *types.NewAPIError) {
  1098. responseBody, readErr := io.ReadAll(resp.Body)
  1099. if readErr != nil {
  1100. return nil, types.NewOpenAIError(readErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1101. }
  1102. _ = resp.Body.Close()
  1103. var geminiResponse dto.GeminiImageResponse
  1104. if jsonErr := common.Unmarshal(responseBody, &geminiResponse); jsonErr != nil {
  1105. return nil, types.NewOpenAIError(jsonErr, types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1106. }
  1107. if len(geminiResponse.Predictions) == 0 {
  1108. return nil, types.NewOpenAIError(errors.New("no images generated"), types.ErrorCodeBadResponseBody, http.StatusInternalServerError)
  1109. }
  1110. // convert to openai format response
  1111. openAIResponse := dto.ImageResponse{
  1112. Created: common.GetTimestamp(),
  1113. Data: make([]dto.ImageData, 0, len(geminiResponse.Predictions)),
  1114. }
  1115. for _, prediction := range geminiResponse.Predictions {
  1116. if prediction.RaiFilteredReason != "" {
  1117. continue // skip filtered image
  1118. }
  1119. openAIResponse.Data = append(openAIResponse.Data, dto.ImageData{
  1120. B64Json: prediction.BytesBase64Encoded,
  1121. })
  1122. }
  1123. jsonResponse, jsonErr := json.Marshal(openAIResponse)
  1124. if jsonErr != nil {
  1125. return nil, types.NewError(jsonErr, types.ErrorCodeBadResponseBody)
  1126. }
  1127. c.Writer.Header().Set("Content-Type", "application/json")
  1128. c.Writer.WriteHeader(resp.StatusCode)
  1129. _, _ = c.Writer.Write(jsonResponse)
  1130. // https://github.com/google-gemini/cookbook/blob/719a27d752aac33f39de18a8d3cb42a70874917e/quickstarts/Counting_Tokens.ipynb
  1131. // each image has fixed 258 tokens
  1132. const imageTokens = 258
  1133. generatedImages := len(openAIResponse.Data)
  1134. usage := &dto.Usage{
  1135. PromptTokens: imageTokens * generatedImages, // each generated image has fixed 258 tokens
  1136. CompletionTokens: 0, // image generation does not calculate completion tokens
  1137. TotalTokens: imageTokens * generatedImages,
  1138. }
  1139. return usage, nil
  1140. }