chat_to_responses.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. package openaicompat
  2. import (
  3. "encoding/json"
  4. "errors"
  5. "fmt"
  6. "strings"
  7. "github.com/QuantumNous/new-api/common"
  8. "github.com/QuantumNous/new-api/dto"
  9. )
  10. func normalizeChatImageURLToString(v any) any {
  11. switch vv := v.(type) {
  12. case string:
  13. return vv
  14. case map[string]any:
  15. if url := common.Interface2String(vv["url"]); url != "" {
  16. return url
  17. }
  18. return v
  19. case dto.MessageImageUrl:
  20. if vv.Url != "" {
  21. return vv.Url
  22. }
  23. return v
  24. case *dto.MessageImageUrl:
  25. if vv != nil && vv.Url != "" {
  26. return vv.Url
  27. }
  28. return v
  29. default:
  30. return v
  31. }
  32. }
  33. func convertChatResponseFormatToResponsesText(reqFormat *dto.ResponseFormat) json.RawMessage {
  34. if reqFormat == nil || strings.TrimSpace(reqFormat.Type) == "" {
  35. return nil
  36. }
  37. format := map[string]any{
  38. "type": reqFormat.Type,
  39. }
  40. if reqFormat.Type == "json_schema" && len(reqFormat.JsonSchema) > 0 {
  41. var chatSchema map[string]any
  42. if err := common.Unmarshal(reqFormat.JsonSchema, &chatSchema); err == nil {
  43. for key, value := range chatSchema {
  44. if key == "type" {
  45. continue
  46. }
  47. format[key] = value
  48. }
  49. if nested, ok := format["json_schema"].(map[string]any); ok {
  50. for key, value := range nested {
  51. if _, exists := format[key]; !exists {
  52. format[key] = value
  53. }
  54. }
  55. delete(format, "json_schema")
  56. }
  57. } else {
  58. format["json_schema"] = reqFormat.JsonSchema
  59. }
  60. }
  61. textRaw, _ := common.Marshal(map[string]any{
  62. "format": format,
  63. })
  64. return textRaw
  65. }
  66. func ChatCompletionsRequestToResponsesRequest(req *dto.GeneralOpenAIRequest) (*dto.OpenAIResponsesRequest, error) {
  67. if req == nil {
  68. return nil, errors.New("request is nil")
  69. }
  70. if req.Model == "" {
  71. return nil, errors.New("model is required")
  72. }
  73. if req.N > 1 {
  74. return nil, fmt.Errorf("n>1 is not supported in responses compatibility mode")
  75. }
  76. var instructionsParts []string
  77. inputItems := make([]map[string]any, 0, len(req.Messages))
  78. for _, msg := range req.Messages {
  79. role := strings.TrimSpace(msg.Role)
  80. if role == "" {
  81. continue
  82. }
  83. if role == "tool" || role == "function" {
  84. callID := strings.TrimSpace(msg.ToolCallId)
  85. var output any
  86. if msg.Content == nil {
  87. output = ""
  88. } else if msg.IsStringContent() {
  89. output = msg.StringContent()
  90. } else {
  91. if b, err := common.Marshal(msg.Content); err == nil {
  92. output = string(b)
  93. } else {
  94. output = fmt.Sprintf("%v", msg.Content)
  95. }
  96. }
  97. if callID == "" {
  98. inputItems = append(inputItems, map[string]any{
  99. "role": "user",
  100. "content": fmt.Sprintf("[tool_output_missing_call_id] %v", output),
  101. })
  102. continue
  103. }
  104. inputItems = append(inputItems, map[string]any{
  105. "type": "function_call_output",
  106. "call_id": callID,
  107. "output": output,
  108. })
  109. continue
  110. }
  111. // Prefer mapping system/developer messages into `instructions`.
  112. if role == "system" || role == "developer" {
  113. if msg.Content == nil {
  114. continue
  115. }
  116. if msg.IsStringContent() {
  117. if s := strings.TrimSpace(msg.StringContent()); s != "" {
  118. instructionsParts = append(instructionsParts, s)
  119. }
  120. continue
  121. }
  122. parts := msg.ParseContent()
  123. var sb strings.Builder
  124. for _, part := range parts {
  125. if part.Type == dto.ContentTypeText && strings.TrimSpace(part.Text) != "" {
  126. if sb.Len() > 0 {
  127. sb.WriteString("\n")
  128. }
  129. sb.WriteString(part.Text)
  130. }
  131. }
  132. if s := strings.TrimSpace(sb.String()); s != "" {
  133. instructionsParts = append(instructionsParts, s)
  134. }
  135. continue
  136. }
  137. item := map[string]any{
  138. "role": role,
  139. }
  140. if msg.Content == nil {
  141. item["content"] = ""
  142. inputItems = append(inputItems, item)
  143. if role == "assistant" {
  144. for _, tc := range msg.ParseToolCalls() {
  145. if strings.TrimSpace(tc.ID) == "" {
  146. continue
  147. }
  148. if tc.Type != "" && tc.Type != "function" {
  149. continue
  150. }
  151. name := strings.TrimSpace(tc.Function.Name)
  152. if name == "" {
  153. continue
  154. }
  155. inputItems = append(inputItems, map[string]any{
  156. "type": "function_call",
  157. "call_id": tc.ID,
  158. "name": name,
  159. "arguments": tc.Function.Arguments,
  160. })
  161. }
  162. }
  163. continue
  164. }
  165. if msg.IsStringContent() {
  166. item["content"] = msg.StringContent()
  167. inputItems = append(inputItems, item)
  168. if role == "assistant" {
  169. for _, tc := range msg.ParseToolCalls() {
  170. if strings.TrimSpace(tc.ID) == "" {
  171. continue
  172. }
  173. if tc.Type != "" && tc.Type != "function" {
  174. continue
  175. }
  176. name := strings.TrimSpace(tc.Function.Name)
  177. if name == "" {
  178. continue
  179. }
  180. inputItems = append(inputItems, map[string]any{
  181. "type": "function_call",
  182. "call_id": tc.ID,
  183. "name": name,
  184. "arguments": tc.Function.Arguments,
  185. })
  186. }
  187. }
  188. continue
  189. }
  190. parts := msg.ParseContent()
  191. contentParts := make([]map[string]any, 0, len(parts))
  192. for _, part := range parts {
  193. switch part.Type {
  194. case dto.ContentTypeText:
  195. textType := "input_text"
  196. if role == "assistant" {
  197. textType = "output_text"
  198. }
  199. contentParts = append(contentParts, map[string]any{
  200. "type": textType,
  201. "text": part.Text,
  202. })
  203. case dto.ContentTypeImageURL:
  204. contentParts = append(contentParts, map[string]any{
  205. "type": "input_image",
  206. "image_url": normalizeChatImageURLToString(part.ImageUrl),
  207. })
  208. case dto.ContentTypeInputAudio:
  209. contentParts = append(contentParts, map[string]any{
  210. "type": "input_audio",
  211. "input_audio": part.InputAudio,
  212. })
  213. case dto.ContentTypeFile:
  214. contentParts = append(contentParts, map[string]any{
  215. "type": "input_file",
  216. "file": part.File,
  217. })
  218. case dto.ContentTypeVideoUrl:
  219. contentParts = append(contentParts, map[string]any{
  220. "type": "input_video",
  221. "video_url": part.VideoUrl,
  222. })
  223. default:
  224. contentParts = append(contentParts, map[string]any{
  225. "type": part.Type,
  226. })
  227. }
  228. }
  229. item["content"] = contentParts
  230. inputItems = append(inputItems, item)
  231. if role == "assistant" {
  232. for _, tc := range msg.ParseToolCalls() {
  233. if strings.TrimSpace(tc.ID) == "" {
  234. continue
  235. }
  236. if tc.Type != "" && tc.Type != "function" {
  237. continue
  238. }
  239. name := strings.TrimSpace(tc.Function.Name)
  240. if name == "" {
  241. continue
  242. }
  243. inputItems = append(inputItems, map[string]any{
  244. "type": "function_call",
  245. "call_id": tc.ID,
  246. "name": name,
  247. "arguments": tc.Function.Arguments,
  248. })
  249. }
  250. }
  251. }
  252. inputRaw, err := common.Marshal(inputItems)
  253. if err != nil {
  254. return nil, err
  255. }
  256. var instructionsRaw json.RawMessage
  257. if len(instructionsParts) > 0 {
  258. instructions := strings.Join(instructionsParts, "\n\n")
  259. instructionsRaw, _ = common.Marshal(instructions)
  260. }
  261. var toolsRaw json.RawMessage
  262. if req.Tools != nil {
  263. tools := make([]map[string]any, 0, len(req.Tools))
  264. for _, tool := range req.Tools {
  265. switch tool.Type {
  266. case "function":
  267. tools = append(tools, map[string]any{
  268. "type": "function",
  269. "name": tool.Function.Name,
  270. "description": tool.Function.Description,
  271. "parameters": tool.Function.Parameters,
  272. })
  273. default:
  274. // Best-effort: keep original tool shape for unknown types.
  275. var m map[string]any
  276. if b, err := common.Marshal(tool); err == nil {
  277. _ = common.Unmarshal(b, &m)
  278. }
  279. if len(m) == 0 {
  280. m = map[string]any{"type": tool.Type}
  281. }
  282. tools = append(tools, m)
  283. }
  284. }
  285. toolsRaw, _ = common.Marshal(tools)
  286. }
  287. var toolChoiceRaw json.RawMessage
  288. if req.ToolChoice != nil {
  289. switch v := req.ToolChoice.(type) {
  290. case string:
  291. toolChoiceRaw, _ = common.Marshal(v)
  292. default:
  293. var m map[string]any
  294. if b, err := common.Marshal(v); err == nil {
  295. _ = common.Unmarshal(b, &m)
  296. }
  297. if m == nil {
  298. toolChoiceRaw, _ = common.Marshal(v)
  299. } else if t, _ := m["type"].(string); t == "function" {
  300. // Chat: {"type":"function","function":{"name":"..."}}
  301. // Responses: {"type":"function","name":"..."}
  302. if name, ok := m["name"].(string); ok && name != "" {
  303. toolChoiceRaw, _ = common.Marshal(map[string]any{
  304. "type": "function",
  305. "name": name,
  306. })
  307. } else if fn, ok := m["function"].(map[string]any); ok {
  308. if name, ok := fn["name"].(string); ok && name != "" {
  309. toolChoiceRaw, _ = common.Marshal(map[string]any{
  310. "type": "function",
  311. "name": name,
  312. })
  313. } else {
  314. toolChoiceRaw, _ = common.Marshal(v)
  315. }
  316. } else {
  317. toolChoiceRaw, _ = common.Marshal(v)
  318. }
  319. } else {
  320. toolChoiceRaw, _ = common.Marshal(v)
  321. }
  322. }
  323. }
  324. var parallelToolCallsRaw json.RawMessage
  325. if req.ParallelTooCalls != nil {
  326. parallelToolCallsRaw, _ = common.Marshal(*req.ParallelTooCalls)
  327. }
  328. textRaw := convertChatResponseFormatToResponsesText(req.ResponseFormat)
  329. maxOutputTokens := req.MaxTokens
  330. if req.MaxCompletionTokens > maxOutputTokens {
  331. maxOutputTokens = req.MaxCompletionTokens
  332. }
  333. // OpenAI Responses API rejects max_output_tokens < 16 when explicitly provided.
  334. //if maxOutputTokens > 0 && maxOutputTokens < 16 {
  335. // maxOutputTokens = 16
  336. //}
  337. var topP *float64
  338. if req.TopP != 0 {
  339. topP = common.GetPointer(req.TopP)
  340. }
  341. out := &dto.OpenAIResponsesRequest{
  342. Model: req.Model,
  343. Input: inputRaw,
  344. Instructions: instructionsRaw,
  345. MaxOutputTokens: maxOutputTokens,
  346. Stream: req.Stream,
  347. Temperature: req.Temperature,
  348. Text: textRaw,
  349. ToolChoice: toolChoiceRaw,
  350. Tools: toolsRaw,
  351. TopP: topP,
  352. User: req.User,
  353. ParallelToolCalls: parallelToolCallsRaw,
  354. Store: req.Store,
  355. Metadata: req.Metadata,
  356. }
  357. if req.ReasoningEffort != "" {
  358. out.Reasoning = &dto.Reasoning{
  359. Effort: req.ReasoningEffort,
  360. Summary: "detailed",
  361. }
  362. }
  363. return out, nil
  364. }