chat_to_responses.go 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. package openaicompat
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "strings"
  7. "github.com/QuantumNous/new-api/common"
  8. "github.com/QuantumNous/new-api/dto"
  9. "github.com/samber/lo"
  10. )
  11. func normalizeChatImageURLToString(v any) any {
  12. switch vv := v.(type) {
  13. case string:
  14. return vv
  15. case map[string]any:
  16. if url := common.Interface2String(vv["url"]); url != "" {
  17. return url
  18. }
  19. return v
  20. case dto.MessageImageUrl:
  21. if vv.Url != "" {
  22. return vv.Url
  23. }
  24. return v
  25. case *dto.MessageImageUrl:
  26. if vv != nil && vv.Url != "" {
  27. return vv.Url
  28. }
  29. return v
  30. default:
  31. return v
  32. }
  33. }
  34. func convertChatResponseFormatToResponsesText(reqFormat *dto.ResponseFormat) json.RawMessage {
  35. if reqFormat == nil || strings.TrimSpace(reqFormat.Type) == "" {
  36. return nil
  37. }
  38. format := map[string]any{
  39. "type": reqFormat.Type,
  40. }
  41. if reqFormat.Type == "json_schema" && len(reqFormat.JsonSchema) > 0 {
  42. var chatSchema map[string]any
  43. if err := common.Unmarshal(reqFormat.JsonSchema, &chatSchema); err == nil {
  44. for key, value := range chatSchema {
  45. if key == "type" {
  46. continue
  47. }
  48. format[key] = value
  49. }
  50. if nested, ok := format["json_schema"].(map[string]any); ok {
  51. for key, value := range nested {
  52. if _, exists := format[key]; !exists {
  53. format[key] = value
  54. }
  55. }
  56. delete(format, "json_schema")
  57. }
  58. } else {
  59. format["json_schema"] = reqFormat.JsonSchema
  60. }
  61. }
  62. textRaw, _ := common.Marshal(map[string]any{
  63. "format": format,
  64. })
  65. return textRaw
  66. }
  67. func ChatCompletionsRequestToResponsesRequest(req *dto.GeneralOpenAIRequest) (*dto.OpenAIResponsesRequest, error) {
  68. if req == nil {
  69. return nil, errors.New("request is nil")
  70. }
  71. if req.Model == "" {
  72. return nil, errors.New("model is required")
  73. }
  74. if lo.FromPtrOr(req.N, 1) > 1 {
  75. return nil, fmt.Errorf("n>1 is not supported in responses compatibility mode")
  76. }
  77. var instructionsParts []string
  78. inputItems := make([]map[string]any, 0, len(req.Messages))
  79. for _, msg := range req.Messages {
  80. role := strings.TrimSpace(msg.Role)
  81. if role == "" {
  82. continue
  83. }
  84. if role == "tool" || role == "function" {
  85. callID := strings.TrimSpace(msg.ToolCallId)
  86. var output any
  87. if msg.Content == nil {
  88. output = ""
  89. } else if msg.IsStringContent() {
  90. output = msg.StringContent()
  91. } else {
  92. if b, err := common.Marshal(msg.Content); err == nil {
  93. output = string(b)
  94. } else {
  95. output = fmt.Sprintf("%v", msg.Content)
  96. }
  97. }
  98. if callID == "" {
  99. inputItems = append(inputItems, map[string]any{
  100. "role": "user",
  101. "content": fmt.Sprintf("[tool_output_missing_call_id] %v", output),
  102. })
  103. continue
  104. }
  105. inputItems = append(inputItems, map[string]any{
  106. "type": "function_call_output",
  107. "call_id": callID,
  108. "output": output,
  109. })
  110. continue
  111. }
  112. // Prefer mapping system/developer messages into `instructions`.
  113. if role == "system" || role == "developer" {
  114. if msg.Content == nil {
  115. continue
  116. }
  117. if msg.IsStringContent() {
  118. if s := strings.TrimSpace(msg.StringContent()); s != "" {
  119. instructionsParts = append(instructionsParts, s)
  120. }
  121. continue
  122. }
  123. parts := msg.ParseContent()
  124. var sb strings.Builder
  125. for _, part := range parts {
  126. if part.Type == dto.ContentTypeText && strings.TrimSpace(part.Text) != "" {
  127. if sb.Len() > 0 {
  128. sb.WriteString("\n")
  129. }
  130. sb.WriteString(part.Text)
  131. }
  132. }
  133. if s := strings.TrimSpace(sb.String()); s != "" {
  134. instructionsParts = append(instructionsParts, s)
  135. }
  136. continue
  137. }
  138. item := map[string]any{
  139. "role": role,
  140. }
  141. if msg.Content == nil {
  142. item["content"] = ""
  143. inputItems = append(inputItems, item)
  144. if role == "assistant" {
  145. for _, tc := range msg.ParseToolCalls() {
  146. if strings.TrimSpace(tc.ID) == "" {
  147. continue
  148. }
  149. if tc.Type != "" && tc.Type != "function" {
  150. continue
  151. }
  152. name := strings.TrimSpace(tc.Function.Name)
  153. if name == "" {
  154. continue
  155. }
  156. inputItems = append(inputItems, map[string]any{
  157. "type": "function_call",
  158. "call_id": tc.ID,
  159. "name": name,
  160. "arguments": tc.Function.Arguments,
  161. })
  162. }
  163. }
  164. continue
  165. }
  166. if msg.IsStringContent() {
  167. item["content"] = msg.StringContent()
  168. inputItems = append(inputItems, item)
  169. if role == "assistant" {
  170. for _, tc := range msg.ParseToolCalls() {
  171. if strings.TrimSpace(tc.ID) == "" {
  172. continue
  173. }
  174. if tc.Type != "" && tc.Type != "function" {
  175. continue
  176. }
  177. name := strings.TrimSpace(tc.Function.Name)
  178. if name == "" {
  179. continue
  180. }
  181. inputItems = append(inputItems, map[string]any{
  182. "type": "function_call",
  183. "call_id": tc.ID,
  184. "name": name,
  185. "arguments": tc.Function.Arguments,
  186. })
  187. }
  188. }
  189. continue
  190. }
  191. parts := msg.ParseContent()
  192. contentParts := make([]map[string]any, 0, len(parts))
  193. for _, part := range parts {
  194. switch part.Type {
  195. case dto.ContentTypeText:
  196. textType := "input_text"
  197. if role == "assistant" {
  198. textType = "output_text"
  199. }
  200. contentParts = append(contentParts, map[string]any{
  201. "type": textType,
  202. "text": part.Text,
  203. })
  204. case dto.ContentTypeImageURL:
  205. contentParts = append(contentParts, map[string]any{
  206. "type": "input_image",
  207. "image_url": normalizeChatImageURLToString(part.ImageUrl),
  208. })
  209. case dto.ContentTypeInputAudio:
  210. contentParts = append(contentParts, map[string]any{
  211. "type": "input_audio",
  212. "input_audio": part.InputAudio,
  213. })
  214. case dto.ContentTypeFile:
  215. contentParts = append(contentParts, map[string]any{
  216. "type": "input_file",
  217. "file": part.File,
  218. })
  219. case dto.ContentTypeVideoUrl:
  220. contentParts = append(contentParts, map[string]any{
  221. "type": "input_video",
  222. "video_url": part.VideoUrl,
  223. })
  224. default:
  225. contentParts = append(contentParts, map[string]any{
  226. "type": part.Type,
  227. })
  228. }
  229. }
  230. item["content"] = contentParts
  231. inputItems = append(inputItems, item)
  232. if role == "assistant" {
  233. for _, tc := range msg.ParseToolCalls() {
  234. if strings.TrimSpace(tc.ID) == "" {
  235. continue
  236. }
  237. if tc.Type != "" && tc.Type != "function" {
  238. continue
  239. }
  240. name := strings.TrimSpace(tc.Function.Name)
  241. if name == "" {
  242. continue
  243. }
  244. inputItems = append(inputItems, map[string]any{
  245. "type": "function_call",
  246. "call_id": tc.ID,
  247. "name": name,
  248. "arguments": tc.Function.Arguments,
  249. })
  250. }
  251. }
  252. }
  253. inputRaw, err := common.Marshal(inputItems)
  254. if err != nil {
  255. return nil, err
  256. }
  257. var instructionsRaw json.RawMessage
  258. if len(instructionsParts) > 0 {
  259. instructions := strings.Join(instructionsParts, "\n\n")
  260. instructionsRaw, _ = common.Marshal(instructions)
  261. }
  262. var toolsRaw json.RawMessage
  263. if req.Tools != nil {
  264. tools := make([]map[string]any, 0, len(req.Tools))
  265. for _, tool := range req.Tools {
  266. switch tool.Type {
  267. case "function":
  268. tools = append(tools, map[string]any{
  269. "type": "function",
  270. "name": tool.Function.Name,
  271. "description": tool.Function.Description,
  272. "parameters": tool.Function.Parameters,
  273. })
  274. default:
  275. // Best-effort: keep original tool shape for unknown types.
  276. var m map[string]any
  277. if b, err := common.Marshal(tool); err == nil {
  278. _ = common.Unmarshal(b, &m)
  279. }
  280. if len(m) == 0 {
  281. m = map[string]any{"type": tool.Type}
  282. }
  283. tools = append(tools, m)
  284. }
  285. }
  286. toolsRaw, _ = common.Marshal(tools)
  287. }
  288. var toolChoiceRaw json.RawMessage
  289. if req.ToolChoice != nil {
  290. switch v := req.ToolChoice.(type) {
  291. case string:
  292. toolChoiceRaw, _ = common.Marshal(v)
  293. default:
  294. var m map[string]any
  295. if b, err := common.Marshal(v); err == nil {
  296. _ = common.Unmarshal(b, &m)
  297. }
  298. if m == nil {
  299. toolChoiceRaw, _ = common.Marshal(v)
  300. } else if t, _ := m["type"].(string); t == "function" {
  301. // Chat: {"type":"function","function":{"name":"..."}}
  302. // Responses: {"type":"function","name":"..."}
  303. if name, ok := m["name"].(string); ok && name != "" {
  304. toolChoiceRaw, _ = common.Marshal(map[string]any{
  305. "type": "function",
  306. "name": name,
  307. })
  308. } else if fn, ok := m["function"].(map[string]any); ok {
  309. if name, ok := fn["name"].(string); ok && name != "" {
  310. toolChoiceRaw, _ = common.Marshal(map[string]any{
  311. "type": "function",
  312. "name": name,
  313. })
  314. } else {
  315. toolChoiceRaw, _ = common.Marshal(v)
  316. }
  317. } else {
  318. toolChoiceRaw, _ = common.Marshal(v)
  319. }
  320. } else {
  321. toolChoiceRaw, _ = common.Marshal(v)
  322. }
  323. }
  324. }
  325. var parallelToolCallsRaw json.RawMessage
  326. if req.ParallelTooCalls != nil {
  327. parallelToolCallsRaw, _ = common.Marshal(*req.ParallelTooCalls)
  328. }
  329. textRaw := convertChatResponseFormatToResponsesText(req.ResponseFormat)
  330. maxOutputTokens := lo.FromPtrOr(req.MaxTokens, uint(0))
  331. maxCompletionTokens := lo.FromPtrOr(req.MaxCompletionTokens, uint(0))
  332. if maxCompletionTokens > maxOutputTokens {
  333. maxOutputTokens = maxCompletionTokens
  334. }
  335. // OpenAI Responses API rejects max_output_tokens < 16 when explicitly provided.
  336. //if maxOutputTokens > 0 && maxOutputTokens < 16 {
  337. // maxOutputTokens = 16
  338. //}
  339. var topP *float64
  340. if req.TopP != nil {
  341. topP = common.GetPointer(lo.FromPtr(req.TopP))
  342. }
  343. out := &dto.OpenAIResponsesRequest{
  344. Model: req.Model,
  345. Input: inputRaw,
  346. Instructions: instructionsRaw,
  347. Stream: req.Stream,
  348. Temperature: req.Temperature,
  349. Text: textRaw,
  350. ToolChoice: toolChoiceRaw,
  351. Tools: toolsRaw,
  352. TopP: topP,
  353. User: req.User,
  354. ParallelToolCalls: parallelToolCallsRaw,
  355. Store: req.Store,
  356. Metadata: req.Metadata,
  357. }
  358. if req.MaxTokens != nil || req.MaxCompletionTokens != nil {
  359. out.MaxOutputTokens = lo.ToPtr(maxOutputTokens)
  360. }
  361. if req.ReasoningEffort != "" {
  362. out.Reasoning = &dto.Reasoning{
  363. Effort: req.ReasoningEffort,
  364. Summary: "detailed",
  365. }
  366. }
  367. return out, nil
  368. }