relay-gemini.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604
  1. package gemini
  2. import (
  3. "bufio"
  4. "encoding/json"
  5. "fmt"
  6. "io"
  7. "net/http"
  8. "one-api/common"
  9. "one-api/constant"
  10. "one-api/dto"
  11. relaycommon "one-api/relay/common"
  12. "one-api/service"
  13. "strings"
  14. "unicode/utf8"
  15. "github.com/gin-gonic/gin"
  16. )
  17. // Setting safety to the lowest possible values since Gemini is already powerless enough
  18. func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest) (*GeminiChatRequest, error) {
  19. geminiRequest := GeminiChatRequest{
  20. Contents: make([]GeminiChatContent, 0, len(textRequest.Messages)),
  21. SafetySettings: []GeminiChatSafetySettings{
  22. {
  23. Category: "HARM_CATEGORY_HARASSMENT",
  24. Threshold: common.GeminiSafetySetting,
  25. },
  26. {
  27. Category: "HARM_CATEGORY_HATE_SPEECH",
  28. Threshold: common.GeminiSafetySetting,
  29. },
  30. {
  31. Category: "HARM_CATEGORY_SEXUALLY_EXPLICIT",
  32. Threshold: common.GeminiSafetySetting,
  33. },
  34. {
  35. Category: "HARM_CATEGORY_DANGEROUS_CONTENT",
  36. Threshold: common.GeminiSafetySetting,
  37. },
  38. {
  39. Category: "HARM_CATEGORY_CIVIC_INTEGRITY",
  40. Threshold: common.GeminiSafetySetting,
  41. },
  42. },
  43. GenerationConfig: GeminiChatGenerationConfig{
  44. Temperature: textRequest.Temperature,
  45. TopP: textRequest.TopP,
  46. MaxOutputTokens: textRequest.MaxTokens,
  47. Seed: int64(textRequest.Seed),
  48. },
  49. }
  50. // openaiContent.FuncToToolCalls()
  51. if textRequest.Tools != nil {
  52. functions := make([]dto.FunctionCall, 0, len(textRequest.Tools))
  53. googleSearch := false
  54. codeExecution := false
  55. for _, tool := range textRequest.Tools {
  56. if tool.Function.Name == "googleSearch" {
  57. googleSearch = true
  58. continue
  59. }
  60. if tool.Function.Name == "codeExecution" {
  61. codeExecution = true
  62. continue
  63. }
  64. if tool.Function.Parameters != nil {
  65. params, ok := tool.Function.Parameters.(map[string]interface{})
  66. if ok {
  67. if props, hasProps := params["properties"].(map[string]interface{}); hasProps {
  68. if len(props) == 0 {
  69. tool.Function.Parameters = nil
  70. }
  71. }
  72. }
  73. }
  74. functions = append(functions, tool.Function)
  75. }
  76. if codeExecution {
  77. geminiRequest.Tools = append(geminiRequest.Tools, GeminiChatTool{
  78. CodeExecution: make(map[string]string),
  79. })
  80. }
  81. if googleSearch {
  82. geminiRequest.Tools = append(geminiRequest.Tools, GeminiChatTool{
  83. GoogleSearch: make(map[string]string),
  84. })
  85. }
  86. if len(functions) > 0 {
  87. geminiRequest.Tools = append(geminiRequest.Tools, GeminiChatTool{
  88. FunctionDeclarations: functions,
  89. })
  90. }
  91. // common.SysLog("tools: " + fmt.Sprintf("%+v", geminiRequest.Tools))
  92. // json_data, _ := json.Marshal(geminiRequest.Tools)
  93. // common.SysLog("tools_json: " + string(json_data))
  94. } else if textRequest.Functions != nil {
  95. geminiRequest.Tools = []GeminiChatTool{
  96. {
  97. FunctionDeclarations: textRequest.Functions,
  98. },
  99. }
  100. }
  101. if textRequest.ResponseFormat != nil && (textRequest.ResponseFormat.Type == "json_schema" || textRequest.ResponseFormat.Type == "json_object") {
  102. geminiRequest.GenerationConfig.ResponseMimeType = "application/json"
  103. if textRequest.ResponseFormat.JsonSchema != nil && textRequest.ResponseFormat.JsonSchema.Schema != nil {
  104. cleanedSchema := removeAdditionalPropertiesWithDepth(textRequest.ResponseFormat.JsonSchema.Schema, 0)
  105. geminiRequest.GenerationConfig.ResponseSchema = cleanedSchema
  106. }
  107. }
  108. tool_call_ids := make(map[string]string)
  109. var system_content []string
  110. //shouldAddDummyModelMessage := false
  111. for _, message := range textRequest.Messages {
  112. if message.Role == "system" {
  113. system_content = append(system_content, message.StringContent())
  114. continue
  115. } else if message.Role == "tool" || message.Role == "function" {
  116. if len(geminiRequest.Contents) == 0 || geminiRequest.Contents[len(geminiRequest.Contents)-1].Role == "model" {
  117. geminiRequest.Contents = append(geminiRequest.Contents, GeminiChatContent{
  118. Role: "user",
  119. })
  120. }
  121. var parts = &geminiRequest.Contents[len(geminiRequest.Contents)-1].Parts
  122. name := ""
  123. if message.Name != nil {
  124. name = *message.Name
  125. } else if val, exists := tool_call_ids[message.ToolCallId]; exists {
  126. name = val
  127. }
  128. content := common.StrToMap(message.StringContent())
  129. functionResp := &FunctionResponse{
  130. Name: name,
  131. Response: GeminiFunctionResponseContent{
  132. Name: name,
  133. Content: content,
  134. },
  135. }
  136. if content == nil {
  137. functionResp.Response.Content = message.StringContent()
  138. }
  139. *parts = append(*parts, GeminiPart{
  140. FunctionResponse: functionResp,
  141. })
  142. continue
  143. }
  144. var parts []GeminiPart
  145. content := GeminiChatContent{
  146. Role: message.Role,
  147. }
  148. // isToolCall := false
  149. if message.ToolCalls != nil {
  150. // message.Role = "model"
  151. // isToolCall = true
  152. for _, call := range message.ParseToolCalls() {
  153. args := map[string]interface{}{}
  154. if call.Function.Arguments != "" {
  155. if json.Unmarshal([]byte(call.Function.Arguments), &args) != nil {
  156. return nil, fmt.Errorf("invalid arguments for function %s, args: %s", call.Function.Name, call.Function.Arguments)
  157. }
  158. }
  159. toolCall := GeminiPart{
  160. FunctionCall: &FunctionCall{
  161. FunctionName: call.Function.Name,
  162. Arguments: args,
  163. },
  164. }
  165. parts = append(parts, toolCall)
  166. tool_call_ids[call.ID] = call.Function.Name
  167. }
  168. }
  169. openaiContent := message.ParseContent()
  170. imageNum := 0
  171. for _, part := range openaiContent {
  172. if part.Type == dto.ContentTypeText {
  173. if part.Text == "" {
  174. continue
  175. }
  176. parts = append(parts, GeminiPart{
  177. Text: part.Text,
  178. })
  179. } else if part.Type == dto.ContentTypeImageURL {
  180. imageNum += 1
  181. if constant.GeminiVisionMaxImageNum != -1 && imageNum > constant.GeminiVisionMaxImageNum {
  182. return nil, fmt.Errorf("too many images in the message, max allowed is %d", constant.GeminiVisionMaxImageNum)
  183. }
  184. // 判断是否是url
  185. if strings.HasPrefix(part.ImageUrl.(dto.MessageImageUrl).Url, "http") {
  186. // 是url,获取图片的类型和base64编码的数据
  187. fileData, err := service.GetFileBase64FromUrl(part.ImageUrl.(dto.MessageImageUrl).Url)
  188. if err != nil {
  189. return nil, fmt.Errorf("get file base64 from url failed: %s", err.Error())
  190. }
  191. parts = append(parts, GeminiPart{
  192. InlineData: &GeminiInlineData{
  193. MimeType: fileData.MimeType,
  194. Data: fileData.Base64Data,
  195. },
  196. })
  197. } else {
  198. format, base64String, err := service.DecodeBase64FileData(part.ImageUrl.(dto.MessageImageUrl).Url)
  199. if err != nil {
  200. return nil, fmt.Errorf("decode base64 image data failed: %s", err.Error())
  201. }
  202. parts = append(parts, GeminiPart{
  203. InlineData: &GeminiInlineData{
  204. MimeType: format,
  205. Data: base64String,
  206. },
  207. })
  208. }
  209. }
  210. }
  211. content.Parts = parts
  212. // there's no assistant role in gemini and API shall vomit if Role is not user or model
  213. if content.Role == "assistant" {
  214. content.Role = "model"
  215. }
  216. geminiRequest.Contents = append(geminiRequest.Contents, content)
  217. }
  218. if len(system_content) > 0 {
  219. geminiRequest.SystemInstructions = &GeminiChatContent{
  220. Parts: []GeminiPart{
  221. {
  222. Text: strings.Join(system_content, "\n"),
  223. },
  224. },
  225. }
  226. }
  227. return &geminiRequest, nil
  228. }
  229. func removeAdditionalPropertiesWithDepth(schema interface{}, depth int) interface{} {
  230. if depth >= 5 {
  231. return schema
  232. }
  233. v, ok := schema.(map[string]interface{})
  234. if !ok || len(v) == 0 {
  235. return schema
  236. }
  237. // 删除所有的title字段
  238. delete(v, "title")
  239. // 如果type不为object和array,则直接返回
  240. if typeVal, exists := v["type"]; !exists || (typeVal != "object" && typeVal != "array") {
  241. return schema
  242. }
  243. switch v["type"] {
  244. case "object":
  245. delete(v, "additionalProperties")
  246. // 处理 properties
  247. if properties, ok := v["properties"].(map[string]interface{}); ok {
  248. for key, value := range properties {
  249. properties[key] = removeAdditionalPropertiesWithDepth(value, depth+1)
  250. }
  251. }
  252. for _, field := range []string{"allOf", "anyOf", "oneOf"} {
  253. if nested, ok := v[field].([]interface{}); ok {
  254. for i, item := range nested {
  255. nested[i] = removeAdditionalPropertiesWithDepth(item, depth+1)
  256. }
  257. }
  258. }
  259. case "array":
  260. if items, ok := v["items"].(map[string]interface{}); ok {
  261. v["items"] = removeAdditionalPropertiesWithDepth(items, depth+1)
  262. }
  263. }
  264. return v
  265. }
  266. func unescapeString(s string) (string, error) {
  267. var result []rune
  268. escaped := false
  269. i := 0
  270. for i < len(s) {
  271. r, size := utf8.DecodeRuneInString(s[i:]) // 正确解码UTF-8字符
  272. if r == utf8.RuneError {
  273. return "", fmt.Errorf("invalid UTF-8 encoding")
  274. }
  275. if escaped {
  276. // 如果是转义符后的字符,检查其类型
  277. switch r {
  278. case '"':
  279. result = append(result, '"')
  280. case '\\':
  281. result = append(result, '\\')
  282. case '/':
  283. result = append(result, '/')
  284. case 'b':
  285. result = append(result, '\b')
  286. case 'f':
  287. result = append(result, '\f')
  288. case 'n':
  289. result = append(result, '\n')
  290. case 'r':
  291. result = append(result, '\r')
  292. case 't':
  293. result = append(result, '\t')
  294. case '\'':
  295. result = append(result, '\'')
  296. default:
  297. // 如果遇到一个非法的转义字符,直接按原样输出
  298. result = append(result, '\\', r)
  299. }
  300. escaped = false
  301. } else {
  302. if r == '\\' {
  303. escaped = true // 记录反斜杠作为转义符
  304. } else {
  305. result = append(result, r)
  306. }
  307. }
  308. i += size // 移动到下一个字符
  309. }
  310. return string(result), nil
  311. }
  312. func unescapeMapOrSlice(data interface{}) interface{} {
  313. switch v := data.(type) {
  314. case map[string]interface{}:
  315. for k, val := range v {
  316. v[k] = unescapeMapOrSlice(val)
  317. }
  318. case []interface{}:
  319. for i, val := range v {
  320. v[i] = unescapeMapOrSlice(val)
  321. }
  322. case string:
  323. if unescaped, err := unescapeString(v); err != nil {
  324. return v
  325. } else {
  326. return unescaped
  327. }
  328. }
  329. return data
  330. }
  331. func getToolCall(item *GeminiPart) *dto.ToolCall {
  332. var argsBytes []byte
  333. var err error
  334. if result, ok := item.FunctionCall.Arguments.(map[string]interface{}); ok {
  335. argsBytes, err = json.Marshal(unescapeMapOrSlice(result))
  336. } else {
  337. argsBytes, err = json.Marshal(item.FunctionCall.Arguments)
  338. }
  339. if err != nil {
  340. return nil
  341. }
  342. return &dto.ToolCall{
  343. ID: fmt.Sprintf("call_%s", common.GetUUID()),
  344. Type: "function",
  345. Function: dto.FunctionCall{
  346. Arguments: string(argsBytes),
  347. Name: item.FunctionCall.FunctionName,
  348. },
  349. }
  350. }
  351. func responseGeminiChat2OpenAI(response *GeminiChatResponse) *dto.OpenAITextResponse {
  352. fullTextResponse := dto.OpenAITextResponse{
  353. Id: fmt.Sprintf("chatcmpl-%s", common.GetUUID()),
  354. Object: "chat.completion",
  355. Created: common.GetTimestamp(),
  356. Choices: make([]dto.OpenAITextResponseChoice, 0, len(response.Candidates)),
  357. }
  358. content, _ := json.Marshal("")
  359. is_tool_call := false
  360. for _, candidate := range response.Candidates {
  361. choice := dto.OpenAITextResponseChoice{
  362. Index: int(candidate.Index),
  363. Message: dto.Message{
  364. Role: "assistant",
  365. Content: content,
  366. },
  367. FinishReason: constant.FinishReasonStop,
  368. }
  369. if len(candidate.Content.Parts) > 0 {
  370. var texts []string
  371. var tool_calls []dto.ToolCall
  372. for _, part := range candidate.Content.Parts {
  373. if part.FunctionCall != nil {
  374. choice.FinishReason = constant.FinishReasonToolCalls
  375. if call := getToolCall(&part); call != nil {
  376. tool_calls = append(tool_calls, *call)
  377. }
  378. } else {
  379. if part.ExecutableCode != nil {
  380. texts = append(texts, "```"+part.ExecutableCode.Language+"\n"+part.ExecutableCode.Code+"\n```")
  381. } else if part.CodeExecutionResult != nil {
  382. texts = append(texts, "```output\n"+part.CodeExecutionResult.Output+"\n```")
  383. } else {
  384. // 过滤掉空行
  385. if part.Text != "\n" {
  386. texts = append(texts, part.Text)
  387. }
  388. }
  389. }
  390. }
  391. if len(tool_calls) > 0 {
  392. choice.Message.SetToolCalls(tool_calls)
  393. is_tool_call = true
  394. }
  395. choice.Message.SetStringContent(strings.Join(texts, "\n"))
  396. }
  397. if candidate.FinishReason != nil {
  398. switch *candidate.FinishReason {
  399. case "STOP":
  400. choice.FinishReason = constant.FinishReasonStop
  401. case "MAX_TOKENS":
  402. choice.FinishReason = constant.FinishReasonLength
  403. default:
  404. choice.FinishReason = constant.FinishReasonContentFilter
  405. }
  406. }
  407. if is_tool_call {
  408. choice.FinishReason = constant.FinishReasonToolCalls
  409. }
  410. fullTextResponse.Choices = append(fullTextResponse.Choices, choice)
  411. }
  412. return &fullTextResponse
  413. }
  414. func streamResponseGeminiChat2OpenAI(geminiResponse *GeminiChatResponse) (*dto.ChatCompletionsStreamResponse, bool) {
  415. choices := make([]dto.ChatCompletionsStreamResponseChoice, 0, len(geminiResponse.Candidates))
  416. is_stop := false
  417. for _, candidate := range geminiResponse.Candidates {
  418. if candidate.FinishReason != nil && *candidate.FinishReason == "STOP" {
  419. is_stop = true
  420. candidate.FinishReason = nil
  421. }
  422. choice := dto.ChatCompletionsStreamResponseChoice{
  423. Index: int(candidate.Index),
  424. Delta: dto.ChatCompletionsStreamResponseChoiceDelta{
  425. Role: "assistant",
  426. },
  427. }
  428. var texts []string
  429. isTools := false
  430. if candidate.FinishReason != nil {
  431. // p := GeminiConvertFinishReason(*candidate.FinishReason)
  432. switch *candidate.FinishReason {
  433. case "STOP":
  434. choice.FinishReason = &constant.FinishReasonStop
  435. case "MAX_TOKENS":
  436. choice.FinishReason = &constant.FinishReasonLength
  437. default:
  438. choice.FinishReason = &constant.FinishReasonContentFilter
  439. }
  440. }
  441. for _, part := range candidate.Content.Parts {
  442. if part.FunctionCall != nil {
  443. isTools = true
  444. if call := getToolCall(&part); call != nil {
  445. call.SetIndex(len(choice.Delta.ToolCalls))
  446. choice.Delta.ToolCalls = append(choice.Delta.ToolCalls, *call)
  447. }
  448. } else {
  449. if part.ExecutableCode != nil {
  450. texts = append(texts, "```"+part.ExecutableCode.Language+"\n"+part.ExecutableCode.Code+"\n```\n")
  451. } else if part.CodeExecutionResult != nil {
  452. texts = append(texts, "```output\n"+part.CodeExecutionResult.Output+"\n```\n")
  453. } else {
  454. if part.Text != "\n" {
  455. texts = append(texts, part.Text)
  456. }
  457. }
  458. }
  459. }
  460. choice.Delta.SetContentString(strings.Join(texts, "\n"))
  461. if isTools {
  462. choice.FinishReason = &constant.FinishReasonToolCalls
  463. }
  464. choices = append(choices, choice)
  465. }
  466. var response dto.ChatCompletionsStreamResponse
  467. response.Object = "chat.completion.chunk"
  468. response.Model = "gemini"
  469. response.Choices = choices
  470. return &response, is_stop
  471. }
  472. func GeminiChatStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
  473. // responseText := ""
  474. id := fmt.Sprintf("chatcmpl-%s", common.GetUUID())
  475. createAt := common.GetTimestamp()
  476. var usage = &dto.Usage{}
  477. scanner := bufio.NewScanner(resp.Body)
  478. scanner.Split(bufio.ScanLines)
  479. service.SetEventStreamHeaders(c)
  480. for scanner.Scan() {
  481. data := scanner.Text()
  482. info.SetFirstResponseTime()
  483. data = strings.TrimSpace(data)
  484. if !strings.HasPrefix(data, "data: ") {
  485. continue
  486. }
  487. data = strings.TrimPrefix(data, "data: ")
  488. data = strings.TrimSuffix(data, "\"")
  489. var geminiResponse GeminiChatResponse
  490. err := json.Unmarshal([]byte(data), &geminiResponse)
  491. if err != nil {
  492. common.LogError(c, "error unmarshalling stream response: "+err.Error())
  493. continue
  494. }
  495. response, is_stop := streamResponseGeminiChat2OpenAI(&geminiResponse)
  496. response.Id = id
  497. response.Created = createAt
  498. response.Model = info.UpstreamModelName
  499. // responseText += response.Choices[0].Delta.GetContentString()
  500. if geminiResponse.UsageMetadata.TotalTokenCount != 0 {
  501. usage.PromptTokens = geminiResponse.UsageMetadata.PromptTokenCount
  502. usage.CompletionTokens = geminiResponse.UsageMetadata.CandidatesTokenCount
  503. }
  504. err = service.ObjectData(c, response)
  505. if err != nil {
  506. common.LogError(c, err.Error())
  507. }
  508. if is_stop {
  509. response := service.GenerateStopResponse(id, createAt, info.UpstreamModelName, constant.FinishReasonStop)
  510. service.ObjectData(c, response)
  511. }
  512. }
  513. var response *dto.ChatCompletionsStreamResponse
  514. usage.TotalTokens = usage.PromptTokens + usage.CompletionTokens
  515. usage.PromptTokensDetails.TextTokens = usage.PromptTokens
  516. usage.CompletionTokenDetails.TextTokens = usage.CompletionTokens
  517. if info.ShouldIncludeUsage {
  518. response = service.GenerateFinalUsageResponse(id, createAt, info.UpstreamModelName, *usage)
  519. err := service.ObjectData(c, response)
  520. if err != nil {
  521. common.SysError("send final response failed: " + err.Error())
  522. }
  523. }
  524. service.Done(c)
  525. resp.Body.Close()
  526. return nil, usage
  527. }
  528. func GeminiChatHandler(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (*dto.OpenAIErrorWithStatusCode, *dto.Usage) {
  529. responseBody, err := io.ReadAll(resp.Body)
  530. if err != nil {
  531. return service.OpenAIErrorWrapper(err, "read_response_body_failed", http.StatusInternalServerError), nil
  532. }
  533. err = resp.Body.Close()
  534. if err != nil {
  535. return service.OpenAIErrorWrapper(err, "close_response_body_failed", http.StatusInternalServerError), nil
  536. }
  537. var geminiResponse GeminiChatResponse
  538. err = json.Unmarshal(responseBody, &geminiResponse)
  539. if err != nil {
  540. return service.OpenAIErrorWrapper(err, "unmarshal_response_body_failed", http.StatusInternalServerError), nil
  541. }
  542. if len(geminiResponse.Candidates) == 0 {
  543. return &dto.OpenAIErrorWithStatusCode{
  544. Error: dto.OpenAIError{
  545. Message: "No candidates returned",
  546. Type: "server_error",
  547. Param: "",
  548. Code: 500,
  549. },
  550. StatusCode: resp.StatusCode,
  551. }, nil
  552. }
  553. fullTextResponse := responseGeminiChat2OpenAI(&geminiResponse)
  554. fullTextResponse.Model = info.UpstreamModelName
  555. usage := dto.Usage{
  556. PromptTokens: geminiResponse.UsageMetadata.PromptTokenCount,
  557. CompletionTokens: geminiResponse.UsageMetadata.CandidatesTokenCount,
  558. TotalTokens: geminiResponse.UsageMetadata.TotalTokenCount,
  559. }
  560. fullTextResponse.Usage = usage
  561. jsonResponse, err := json.Marshal(fullTextResponse)
  562. if err != nil {
  563. return service.OpenAIErrorWrapper(err, "marshal_response_body_failed", http.StatusInternalServerError), nil
  564. }
  565. c.Writer.Header().Set("Content-Type", "application/json")
  566. c.Writer.WriteHeader(resp.StatusCode)
  567. _, err = c.Writer.Write(jsonResponse)
  568. return nil, &usage
  569. }