openai_request.go 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/samber/lo"
  9. "github.com/gin-gonic/gin"
  10. )
  11. type ResponseFormat struct {
  12. Type string `json:"type,omitempty"`
  13. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  14. }
  15. type FormatJsonSchema struct {
  16. Description string `json:"description,omitempty"`
  17. Name string `json:"name"`
  18. Schema any `json:"schema,omitempty"`
  19. Strict json.RawMessage `json:"strict,omitempty"`
  20. }
  21. // GeneralOpenAIRequest represents a general request structure for OpenAI-compatible APIs.
  22. // 参数增加规范:无引用的参数必须使用json.RawMessage类型,并添加omitempty标签
  23. type GeneralOpenAIRequest struct {
  24. Model string `json:"model,omitempty"`
  25. Messages []Message `json:"messages,omitempty"`
  26. Prompt any `json:"prompt,omitempty"`
  27. Prefix any `json:"prefix,omitempty"`
  28. Suffix any `json:"suffix,omitempty"`
  29. Stream *bool `json:"stream,omitempty"`
  30. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  31. MaxTokens *uint `json:"max_tokens,omitempty"`
  32. MaxCompletionTokens *uint `json:"max_completion_tokens,omitempty"`
  33. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  34. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  35. Temperature *float64 `json:"temperature,omitempty"`
  36. TopP *float64 `json:"top_p,omitempty"`
  37. TopK *int `json:"top_k,omitempty"`
  38. Stop any `json:"stop,omitempty"`
  39. N *int `json:"n,omitempty"`
  40. Input any `json:"input,omitempty"`
  41. Instruction string `json:"instruction,omitempty"`
  42. Size string `json:"size,omitempty"`
  43. Functions json.RawMessage `json:"functions,omitempty"`
  44. FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
  45. PresencePenalty *float64 `json:"presence_penalty,omitempty"`
  46. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  47. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  48. Seed *float64 `json:"seed,omitempty"`
  49. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  50. Tools []ToolCallRequest `json:"tools,omitempty"`
  51. ToolChoice any `json:"tool_choice,omitempty"`
  52. FunctionCall json.RawMessage `json:"function_call,omitempty"`
  53. User string `json:"user,omitempty"`
  54. // ServiceTier specifies upstream service level and may affect billing.
  55. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  56. ServiceTier string `json:"service_tier,omitempty"`
  57. LogProbs *bool `json:"logprobs,omitempty"`
  58. TopLogProbs *int `json:"top_logprobs,omitempty"`
  59. Dimensions *int `json:"dimensions,omitempty"`
  60. Modalities json.RawMessage `json:"modalities,omitempty"`
  61. Audio json.RawMessage `json:"audio,omitempty"`
  62. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  63. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤,可通过 allow_safety_identifier 开启
  64. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  65. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  66. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  67. // 注意:默认允许透传,可通过 disable_store 禁用;禁用后可能导致 Codex 无法正常使用
  68. Store json.RawMessage `json:"store,omitempty"`
  69. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  70. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  71. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  72. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  73. Metadata json.RawMessage `json:"metadata,omitempty"`
  74. Prediction json.RawMessage `json:"prediction,omitempty"`
  75. // gemini
  76. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  77. //xai
  78. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  79. // claude
  80. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  81. // OpenRouter Params
  82. Usage json.RawMessage `json:"usage,omitempty"`
  83. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  84. // Ali Qwen Params
  85. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  86. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  87. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  88. EnableSearch json.RawMessage `json:"enable_search,omitempty"`
  89. // ollama Params
  90. Think json.RawMessage `json:"think,omitempty"`
  91. // baidu v2
  92. WebSearch json.RawMessage `json:"web_search,omitempty"`
  93. // doubao,zhipu_v4
  94. THINKING json.RawMessage `json:"thinking,omitempty"`
  95. // pplx Params
  96. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  97. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  98. ReturnImages *bool `json:"return_images,omitempty"`
  99. ReturnRelatedQuestions *bool `json:"return_related_questions,omitempty"`
  100. SearchMode string `json:"search_mode,omitempty"`
  101. // Minimax
  102. ReasoningSplit json.RawMessage `json:"reasoning_split,omitempty"`
  103. }
  104. // createFileSource 根据数据内容创建正确类型的 FileSource
  105. func createFileSource(data string) *types.FileSource {
  106. if strings.HasPrefix(data, "http://") || strings.HasPrefix(data, "https://") {
  107. return types.NewURLFileSource(data)
  108. }
  109. return types.NewBase64FileSource(data, "")
  110. }
  111. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  112. var tokenCountMeta types.TokenCountMeta
  113. var texts = make([]string, 0)
  114. var fileMeta = make([]*types.FileMeta, 0)
  115. if r.Prompt != nil {
  116. switch v := r.Prompt.(type) {
  117. case string:
  118. texts = append(texts, v)
  119. case []any:
  120. for _, item := range v {
  121. if str, ok := item.(string); ok {
  122. texts = append(texts, str)
  123. }
  124. }
  125. default:
  126. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  127. }
  128. }
  129. if r.Input != nil {
  130. inputs := r.ParseInput()
  131. texts = append(texts, inputs...)
  132. }
  133. maxTokens := lo.FromPtrOr(r.MaxTokens, uint(0))
  134. maxCompletionTokens := lo.FromPtrOr(r.MaxCompletionTokens, uint(0))
  135. if maxCompletionTokens > maxTokens {
  136. tokenCountMeta.MaxTokens = int(maxCompletionTokens)
  137. } else {
  138. tokenCountMeta.MaxTokens = int(maxTokens)
  139. }
  140. for _, message := range r.Messages {
  141. tokenCountMeta.MessagesCount++
  142. texts = append(texts, message.Role)
  143. if message.Content != nil {
  144. if message.Name != nil {
  145. tokenCountMeta.NameCount++
  146. texts = append(texts, *message.Name)
  147. }
  148. arrayContent := message.ParseContent()
  149. for _, m := range arrayContent {
  150. if m.Type == ContentTypeImageURL {
  151. imageUrl := m.GetImageMedia()
  152. if imageUrl != nil && imageUrl.Url != "" {
  153. source := createFileSource(imageUrl.Url)
  154. fileMeta = append(fileMeta, &types.FileMeta{
  155. FileType: types.FileTypeImage,
  156. Source: source,
  157. Detail: imageUrl.Detail,
  158. })
  159. }
  160. } else if m.Type == ContentTypeInputAudio {
  161. inputAudio := m.GetInputAudio()
  162. if inputAudio != nil && inputAudio.Data != "" {
  163. source := createFileSource(inputAudio.Data)
  164. fileMeta = append(fileMeta, &types.FileMeta{
  165. FileType: types.FileTypeAudio,
  166. Source: source,
  167. })
  168. }
  169. } else if m.Type == ContentTypeFile {
  170. file := m.GetFile()
  171. if file != nil && file.FileData != "" {
  172. source := createFileSource(file.FileData)
  173. fileMeta = append(fileMeta, &types.FileMeta{
  174. FileType: types.FileTypeFile,
  175. Source: source,
  176. })
  177. }
  178. } else if m.Type == ContentTypeVideoUrl {
  179. videoUrl := m.GetVideoUrl()
  180. if videoUrl != nil && videoUrl.Url != "" {
  181. source := createFileSource(videoUrl.Url)
  182. fileMeta = append(fileMeta, &types.FileMeta{
  183. FileType: types.FileTypeVideo,
  184. Source: source,
  185. })
  186. }
  187. } else {
  188. texts = append(texts, m.Text)
  189. }
  190. }
  191. }
  192. }
  193. if r.Tools != nil {
  194. openaiTools := r.Tools
  195. for _, tool := range openaiTools {
  196. tokenCountMeta.ToolsCount++
  197. texts = append(texts, tool.Function.Name)
  198. if tool.Function.Description != "" {
  199. texts = append(texts, tool.Function.Description)
  200. }
  201. if tool.Function.Parameters != nil {
  202. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  203. }
  204. }
  205. //toolTokens := CountTokenInput(countStr, request.Model)
  206. //tkm += 8
  207. //tkm += toolTokens
  208. }
  209. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  210. tokenCountMeta.Files = fileMeta
  211. return &tokenCountMeta
  212. }
  213. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  214. return lo.FromPtrOr(r.Stream, false)
  215. }
  216. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  217. if modelName != "" {
  218. r.Model = modelName
  219. }
  220. }
  221. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  222. result := make(map[string]any)
  223. data, _ := common.Marshal(r)
  224. _ = common.Unmarshal(data, &result)
  225. return result
  226. }
  227. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  228. if strings.HasPrefix(r.Model, "o") {
  229. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  230. return "developer"
  231. }
  232. } else if strings.HasPrefix(r.Model, "gpt-5") {
  233. return "developer"
  234. }
  235. return "system"
  236. }
  237. const CustomType = "custom"
  238. type ToolCallRequest struct {
  239. ID string `json:"id,omitempty"`
  240. Type string `json:"type"`
  241. Function FunctionRequest `json:"function,omitempty"`
  242. Custom json.RawMessage `json:"custom,omitempty"`
  243. }
  244. type FunctionRequest struct {
  245. Description string `json:"description,omitempty"`
  246. Name string `json:"name"`
  247. Parameters any `json:"parameters,omitempty"`
  248. Arguments string `json:"arguments,omitempty"`
  249. }
  250. type StreamOptions struct {
  251. IncludeUsage bool `json:"include_usage,omitempty"`
  252. // IncludeObfuscation is only for /v1/responses stream payload.
  253. // This field is filtered by default and can be enabled via channel setting allow_include_obfuscation.
  254. IncludeObfuscation bool `json:"include_obfuscation,omitempty"`
  255. }
  256. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  257. maxCompletionTokens := lo.FromPtrOr(r.MaxCompletionTokens, uint(0))
  258. if maxCompletionTokens != 0 {
  259. return maxCompletionTokens
  260. }
  261. return lo.FromPtrOr(r.MaxTokens, uint(0))
  262. }
  263. func (r *GeneralOpenAIRequest) ParseInput() []string {
  264. if r.Input == nil {
  265. return nil
  266. }
  267. var input []string
  268. switch r.Input.(type) {
  269. case string:
  270. input = []string{r.Input.(string)}
  271. case []any:
  272. input = make([]string, 0, len(r.Input.([]any)))
  273. for _, item := range r.Input.([]any) {
  274. if str, ok := item.(string); ok {
  275. input = append(input, str)
  276. }
  277. }
  278. }
  279. return input
  280. }
  281. type Message struct {
  282. Role string `json:"role"`
  283. Content any `json:"content"`
  284. Name *string `json:"name,omitempty"`
  285. Prefix *bool `json:"prefix,omitempty"`
  286. ReasoningContent string `json:"reasoning_content,omitempty"`
  287. Reasoning string `json:"reasoning,omitempty"`
  288. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  289. ToolCallId string `json:"tool_call_id,omitempty"`
  290. parsedContent []MediaContent
  291. //parsedStringContent *string
  292. }
  293. type MediaContent struct {
  294. Type string `json:"type"`
  295. Text string `json:"text,omitempty"`
  296. ImageUrl any `json:"image_url,omitempty"`
  297. InputAudio any `json:"input_audio,omitempty"`
  298. File any `json:"file,omitempty"`
  299. VideoUrl any `json:"video_url,omitempty"`
  300. // OpenRouter Params
  301. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  302. }
  303. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  304. if m.ImageUrl != nil {
  305. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  306. return m.ImageUrl.(*MessageImageUrl)
  307. }
  308. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  309. out := &MessageImageUrl{
  310. Url: common.Interface2String(itemMap["url"]),
  311. Detail: common.Interface2String(itemMap["detail"]),
  312. MimeType: common.Interface2String(itemMap["mime_type"]),
  313. }
  314. return out
  315. }
  316. }
  317. return nil
  318. }
  319. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  320. if m.InputAudio != nil {
  321. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  322. return m.InputAudio.(*MessageInputAudio)
  323. }
  324. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  325. out := &MessageInputAudio{
  326. Data: common.Interface2String(itemMap["data"]),
  327. Format: common.Interface2String(itemMap["format"]),
  328. }
  329. return out
  330. }
  331. }
  332. return nil
  333. }
  334. func (m *MediaContent) GetFile() *MessageFile {
  335. if m.File != nil {
  336. if _, ok := m.File.(*MessageFile); ok {
  337. return m.File.(*MessageFile)
  338. }
  339. if itemMap, ok := m.File.(map[string]any); ok {
  340. out := &MessageFile{
  341. FileName: common.Interface2String(itemMap["file_name"]),
  342. FileData: common.Interface2String(itemMap["file_data"]),
  343. FileId: common.Interface2String(itemMap["file_id"]),
  344. }
  345. return out
  346. }
  347. }
  348. return nil
  349. }
  350. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  351. if m.VideoUrl != nil {
  352. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  353. return m.VideoUrl.(*MessageVideoUrl)
  354. }
  355. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  356. out := &MessageVideoUrl{
  357. Url: common.Interface2String(itemMap["url"]),
  358. }
  359. return out
  360. }
  361. }
  362. return nil
  363. }
  364. type MessageImageUrl struct {
  365. Url string `json:"url"`
  366. Detail string `json:"detail"`
  367. MimeType string
  368. }
  369. func (m *MessageImageUrl) IsRemoteImage() bool {
  370. return strings.HasPrefix(m.Url, "http")
  371. }
  372. type MessageInputAudio struct {
  373. Data string `json:"data"` //base64
  374. Format string `json:"format"`
  375. }
  376. type MessageFile struct {
  377. FileName string `json:"filename,omitempty"`
  378. FileData string `json:"file_data,omitempty"`
  379. FileId string `json:"file_id,omitempty"`
  380. }
  381. type MessageVideoUrl struct {
  382. Url string `json:"url"`
  383. }
  384. const (
  385. ContentTypeText = "text"
  386. ContentTypeImageURL = "image_url"
  387. ContentTypeInputAudio = "input_audio"
  388. ContentTypeFile = "file"
  389. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  390. //ContentTypeAudioUrl = "audio_url"
  391. )
  392. func (m *Message) GetPrefix() bool {
  393. if m.Prefix == nil {
  394. return false
  395. }
  396. return *m.Prefix
  397. }
  398. func (m *Message) SetPrefix(prefix bool) {
  399. m.Prefix = &prefix
  400. }
  401. func (m *Message) ParseToolCalls() []ToolCallRequest {
  402. if m.ToolCalls == nil {
  403. return nil
  404. }
  405. var toolCalls []ToolCallRequest
  406. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  407. return toolCalls
  408. }
  409. return toolCalls
  410. }
  411. func (m *Message) SetToolCalls(toolCalls any) {
  412. toolCallsJson, _ := json.Marshal(toolCalls)
  413. m.ToolCalls = toolCallsJson
  414. }
  415. func (m *Message) StringContent() string {
  416. switch m.Content.(type) {
  417. case string:
  418. return m.Content.(string)
  419. case []any:
  420. var contentStr string
  421. for _, contentItem := range m.Content.([]any) {
  422. contentMap, ok := contentItem.(map[string]any)
  423. if !ok {
  424. continue
  425. }
  426. if contentMap["type"] == ContentTypeText {
  427. if subStr, ok := contentMap["text"].(string); ok {
  428. contentStr += subStr
  429. }
  430. }
  431. }
  432. return contentStr
  433. }
  434. return ""
  435. }
  436. func (m *Message) SetNullContent() {
  437. m.Content = nil
  438. m.parsedContent = nil
  439. }
  440. func (m *Message) SetStringContent(content string) {
  441. m.Content = content
  442. m.parsedContent = nil
  443. }
  444. func (m *Message) SetMediaContent(content []MediaContent) {
  445. m.Content = content
  446. m.parsedContent = content
  447. }
  448. func (m *Message) IsStringContent() bool {
  449. _, ok := m.Content.(string)
  450. if ok {
  451. return true
  452. }
  453. return false
  454. }
  455. func (m *Message) ParseContent() []MediaContent {
  456. if m.Content == nil {
  457. return nil
  458. }
  459. if len(m.parsedContent) > 0 {
  460. return m.parsedContent
  461. }
  462. var contentList []MediaContent
  463. // 先尝试解析为字符串
  464. content, ok := m.Content.(string)
  465. if ok {
  466. contentList = []MediaContent{{
  467. Type: ContentTypeText,
  468. Text: content,
  469. }}
  470. m.parsedContent = contentList
  471. return contentList
  472. }
  473. // 尝试解析为数组
  474. //var arrayContent []map[string]interface{}
  475. arrayContent, ok := m.Content.([]any)
  476. if !ok {
  477. return contentList
  478. }
  479. for _, contentItemAny := range arrayContent {
  480. mediaItem, ok := contentItemAny.(MediaContent)
  481. if ok {
  482. contentList = append(contentList, mediaItem)
  483. continue
  484. }
  485. contentItem, ok := contentItemAny.(map[string]any)
  486. if !ok {
  487. continue
  488. }
  489. contentType, ok := contentItem["type"].(string)
  490. if !ok {
  491. continue
  492. }
  493. switch contentType {
  494. case ContentTypeText:
  495. if text, ok := contentItem["text"].(string); ok {
  496. contentList = append(contentList, MediaContent{
  497. Type: ContentTypeText,
  498. Text: text,
  499. })
  500. }
  501. case ContentTypeImageURL:
  502. imageUrl := contentItem["image_url"]
  503. temp := &MessageImageUrl{
  504. Detail: "high",
  505. }
  506. switch v := imageUrl.(type) {
  507. case string:
  508. temp.Url = v
  509. case map[string]interface{}:
  510. url, ok1 := v["url"].(string)
  511. detail, ok2 := v["detail"].(string)
  512. if ok2 {
  513. temp.Detail = detail
  514. }
  515. if ok1 {
  516. temp.Url = url
  517. }
  518. }
  519. contentList = append(contentList, MediaContent{
  520. Type: ContentTypeImageURL,
  521. ImageUrl: temp,
  522. })
  523. case ContentTypeInputAudio:
  524. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  525. data, ok1 := audioData["data"].(string)
  526. format, ok2 := audioData["format"].(string)
  527. if ok1 && ok2 {
  528. temp := &MessageInputAudio{
  529. Data: data,
  530. Format: format,
  531. }
  532. contentList = append(contentList, MediaContent{
  533. Type: ContentTypeInputAudio,
  534. InputAudio: temp,
  535. })
  536. }
  537. }
  538. case ContentTypeFile:
  539. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  540. fileId, ok3 := fileData["file_id"].(string)
  541. if ok3 {
  542. contentList = append(contentList, MediaContent{
  543. Type: ContentTypeFile,
  544. File: &MessageFile{
  545. FileId: fileId,
  546. },
  547. })
  548. } else {
  549. fileName, ok1 := fileData["filename"].(string)
  550. fileDataStr, ok2 := fileData["file_data"].(string)
  551. if ok1 && ok2 {
  552. contentList = append(contentList, MediaContent{
  553. Type: ContentTypeFile,
  554. File: &MessageFile{
  555. FileName: fileName,
  556. FileData: fileDataStr,
  557. },
  558. })
  559. }
  560. }
  561. }
  562. case ContentTypeVideoUrl:
  563. if videoUrl, ok := contentItem["video_url"].(string); ok {
  564. contentList = append(contentList, MediaContent{
  565. Type: ContentTypeVideoUrl,
  566. VideoUrl: &MessageVideoUrl{
  567. Url: videoUrl,
  568. },
  569. })
  570. }
  571. }
  572. }
  573. if len(contentList) > 0 {
  574. m.parsedContent = contentList
  575. }
  576. return contentList
  577. }
  578. // old code
  579. /*func (m *Message) StringContent() string {
  580. if m.parsedStringContent != nil {
  581. return *m.parsedStringContent
  582. }
  583. var stringContent string
  584. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  585. m.parsedStringContent = &stringContent
  586. return stringContent
  587. }
  588. contentStr := new(strings.Builder)
  589. arrayContent := m.ParseContent()
  590. for _, content := range arrayContent {
  591. if content.Type == ContentTypeText {
  592. contentStr.WriteString(content.Text)
  593. }
  594. }
  595. stringContent = contentStr.String()
  596. m.parsedStringContent = &stringContent
  597. return stringContent
  598. }
  599. func (m *Message) SetNullContent() {
  600. m.Content = nil
  601. m.parsedStringContent = nil
  602. m.parsedContent = nil
  603. }
  604. func (m *Message) SetStringContent(content string) {
  605. jsonContent, _ := json.Marshal(content)
  606. m.Content = jsonContent
  607. m.parsedStringContent = &content
  608. m.parsedContent = nil
  609. }
  610. func (m *Message) SetMediaContent(content []MediaContent) {
  611. jsonContent, _ := json.Marshal(content)
  612. m.Content = jsonContent
  613. m.parsedContent = nil
  614. m.parsedStringContent = nil
  615. }
  616. func (m *Message) IsStringContent() bool {
  617. if m.parsedStringContent != nil {
  618. return true
  619. }
  620. var stringContent string
  621. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  622. m.parsedStringContent = &stringContent
  623. return true
  624. }
  625. return false
  626. }
  627. func (m *Message) ParseContent() []MediaContent {
  628. if m.parsedContent != nil {
  629. return m.parsedContent
  630. }
  631. var contentList []MediaContent
  632. // 先尝试解析为字符串
  633. var stringContent string
  634. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  635. contentList = []MediaContent{{
  636. Type: ContentTypeText,
  637. Text: stringContent,
  638. }}
  639. m.parsedContent = contentList
  640. return contentList
  641. }
  642. // 尝试解析为数组
  643. var arrayContent []map[string]interface{}
  644. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  645. for _, contentItem := range arrayContent {
  646. contentType, ok := contentItem["type"].(string)
  647. if !ok {
  648. continue
  649. }
  650. switch contentType {
  651. case ContentTypeText:
  652. if text, ok := contentItem["text"].(string); ok {
  653. contentList = append(contentList, MediaContent{
  654. Type: ContentTypeText,
  655. Text: text,
  656. })
  657. }
  658. case ContentTypeImageURL:
  659. imageUrl := contentItem["image_url"]
  660. temp := &MessageImageUrl{
  661. Detail: "high",
  662. }
  663. switch v := imageUrl.(type) {
  664. case string:
  665. temp.Url = v
  666. case map[string]interface{}:
  667. url, ok1 := v["url"].(string)
  668. detail, ok2 := v["detail"].(string)
  669. if ok2 {
  670. temp.Detail = detail
  671. }
  672. if ok1 {
  673. temp.Url = url
  674. }
  675. }
  676. contentList = append(contentList, MediaContent{
  677. Type: ContentTypeImageURL,
  678. ImageUrl: temp,
  679. })
  680. case ContentTypeInputAudio:
  681. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  682. data, ok1 := audioData["data"].(string)
  683. format, ok2 := audioData["format"].(string)
  684. if ok1 && ok2 {
  685. temp := &MessageInputAudio{
  686. Data: data,
  687. Format: format,
  688. }
  689. contentList = append(contentList, MediaContent{
  690. Type: ContentTypeInputAudio,
  691. InputAudio: temp,
  692. })
  693. }
  694. }
  695. case ContentTypeFile:
  696. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  697. fileId, ok3 := fileData["file_id"].(string)
  698. if ok3 {
  699. contentList = append(contentList, MediaContent{
  700. Type: ContentTypeFile,
  701. File: &MessageFile{
  702. FileId: fileId,
  703. },
  704. })
  705. } else {
  706. fileName, ok1 := fileData["filename"].(string)
  707. fileDataStr, ok2 := fileData["file_data"].(string)
  708. if ok1 && ok2 {
  709. contentList = append(contentList, MediaContent{
  710. Type: ContentTypeFile,
  711. File: &MessageFile{
  712. FileName: fileName,
  713. FileData: fileDataStr,
  714. },
  715. })
  716. }
  717. }
  718. }
  719. case ContentTypeVideoUrl:
  720. if videoUrl, ok := contentItem["video_url"].(string); ok {
  721. contentList = append(contentList, MediaContent{
  722. Type: ContentTypeVideoUrl,
  723. VideoUrl: &MessageVideoUrl{
  724. Url: videoUrl,
  725. },
  726. })
  727. }
  728. }
  729. }
  730. }
  731. if len(contentList) > 0 {
  732. m.parsedContent = contentList
  733. }
  734. return contentList
  735. }*/
  736. type WebSearchOptions struct {
  737. SearchContextSize string `json:"search_context_size,omitempty"`
  738. UserLocation json.RawMessage `json:"user_location,omitempty"`
  739. }
  740. // https://platform.openai.com/docs/api-reference/responses/create
  741. type OpenAIResponsesRequest struct {
  742. Model string `json:"model"`
  743. Input json.RawMessage `json:"input,omitempty"`
  744. Include json.RawMessage `json:"include,omitempty"`
  745. // 在后台运行推理,暂时还不支持依赖的接口
  746. // Background json.RawMessage `json:"background,omitempty"`
  747. Conversation json.RawMessage `json:"conversation,omitempty"`
  748. ContextManagement json.RawMessage `json:"context_management,omitempty"`
  749. Instructions json.RawMessage `json:"instructions,omitempty"`
  750. MaxOutputTokens *uint `json:"max_output_tokens,omitempty"`
  751. TopLogProbs *int `json:"top_logprobs,omitempty"`
  752. Metadata json.RawMessage `json:"metadata,omitempty"`
  753. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  754. PreviousResponseID string `json:"previous_response_id,omitempty"`
  755. Reasoning *Reasoning `json:"reasoning,omitempty"`
  756. // ServiceTier specifies upstream service level and may affect billing.
  757. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  758. ServiceTier string `json:"service_tier,omitempty"`
  759. // Store controls whether upstream may store request/response data.
  760. // This field is allowed by default and can be disabled via channel setting disable_store.
  761. Store json.RawMessage `json:"store,omitempty"`
  762. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  763. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  764. // SafetyIdentifier carries client identity for policy abuse detection.
  765. // This field is filtered by default and can be enabled via channel setting allow_safety_identifier.
  766. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  767. Stream *bool `json:"stream,omitempty"`
  768. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  769. Temperature *float64 `json:"temperature,omitempty"`
  770. Text json.RawMessage `json:"text,omitempty"`
  771. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  772. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  773. TopP *float64 `json:"top_p,omitempty"`
  774. Truncation string `json:"truncation,omitempty"`
  775. User string `json:"user,omitempty"`
  776. MaxToolCalls *uint `json:"max_tool_calls,omitempty"`
  777. Prompt json.RawMessage `json:"prompt,omitempty"`
  778. // qwen
  779. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  780. // perplexity
  781. Preset json.RawMessage `json:"preset,omitempty"`
  782. }
  783. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  784. var fileMeta = make([]*types.FileMeta, 0)
  785. var texts = make([]string, 0)
  786. if r.Input != nil {
  787. inputs := r.ParseInput()
  788. for _, input := range inputs {
  789. if input.Type == "input_image" {
  790. if input.ImageUrl != "" {
  791. fileMeta = append(fileMeta, &types.FileMeta{
  792. FileType: types.FileTypeImage,
  793. Source: createFileSource(input.ImageUrl),
  794. Detail: input.Detail,
  795. })
  796. }
  797. } else if input.Type == "input_file" {
  798. if input.FileUrl != "" {
  799. fileMeta = append(fileMeta, &types.FileMeta{
  800. FileType: types.FileTypeFile,
  801. Source: createFileSource(input.FileUrl),
  802. })
  803. }
  804. } else {
  805. texts = append(texts, input.Text)
  806. }
  807. }
  808. }
  809. if len(r.Instructions) > 0 {
  810. texts = append(texts, string(r.Instructions))
  811. }
  812. if len(r.Metadata) > 0 {
  813. texts = append(texts, string(r.Metadata))
  814. }
  815. if len(r.Text) > 0 {
  816. texts = append(texts, string(r.Text))
  817. }
  818. if len(r.ToolChoice) > 0 {
  819. texts = append(texts, string(r.ToolChoice))
  820. }
  821. if len(r.Prompt) > 0 {
  822. texts = append(texts, string(r.Prompt))
  823. }
  824. if len(r.Tools) > 0 {
  825. texts = append(texts, string(r.Tools))
  826. }
  827. return &types.TokenCountMeta{
  828. CombineText: strings.Join(texts, "\n"),
  829. Files: fileMeta,
  830. MaxTokens: int(lo.FromPtrOr(r.MaxOutputTokens, uint(0))),
  831. }
  832. }
  833. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  834. return lo.FromPtrOr(r.Stream, false)
  835. }
  836. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  837. if modelName != "" {
  838. r.Model = modelName
  839. }
  840. }
  841. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  842. var toolsMap []map[string]any
  843. if len(r.Tools) > 0 {
  844. _ = common.Unmarshal(r.Tools, &toolsMap)
  845. }
  846. return toolsMap
  847. }
  848. type Reasoning struct {
  849. Effort string `json:"effort,omitempty"`
  850. Summary string `json:"summary,omitempty"`
  851. }
  852. type Input struct {
  853. Type string `json:"type,omitempty"`
  854. Role string `json:"role,omitempty"`
  855. Content json.RawMessage `json:"content,omitempty"`
  856. }
  857. type MediaInput struct {
  858. Type string `json:"type"`
  859. Text string `json:"text,omitempty"`
  860. FileUrl string `json:"file_url,omitempty"`
  861. ImageUrl string `json:"image_url,omitempty"`
  862. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  863. }
  864. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  865. // Reference implementation mirrors Message.ParseContent:
  866. // - input can be a string, treated as an input_text item
  867. // - input can be an array of objects with a `type` field
  868. // supported types: input_text, input_image, input_file
  869. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  870. if r.Input == nil {
  871. return nil
  872. }
  873. var mediaInputs []MediaInput
  874. // Try string first
  875. // if str, ok := common.GetJsonType(r.Input); ok {
  876. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  877. // return inputs
  878. // }
  879. if common.GetJsonType(r.Input) == "string" {
  880. var str string
  881. _ = common.Unmarshal(r.Input, &str)
  882. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  883. return mediaInputs
  884. }
  885. // Try array of parts
  886. if common.GetJsonType(r.Input) == "array" {
  887. var inputs []Input
  888. _ = common.Unmarshal(r.Input, &inputs)
  889. for _, input := range inputs {
  890. if common.GetJsonType(input.Content) == "string" {
  891. var str string
  892. _ = common.Unmarshal(input.Content, &str)
  893. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  894. }
  895. if common.GetJsonType(input.Content) == "array" {
  896. var array []any
  897. _ = common.Unmarshal(input.Content, &array)
  898. for _, itemAny := range array {
  899. // Already parsed MediaContent
  900. if media, ok := itemAny.(MediaInput); ok {
  901. mediaInputs = append(mediaInputs, media)
  902. continue
  903. }
  904. // Generic map
  905. item, ok := itemAny.(map[string]any)
  906. if !ok {
  907. continue
  908. }
  909. typeVal, ok := item["type"].(string)
  910. if !ok {
  911. continue
  912. }
  913. switch typeVal {
  914. case "input_text":
  915. text, _ := item["text"].(string)
  916. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  917. case "input_image":
  918. // image_url may be string or object with url field
  919. var imageUrl string
  920. switch v := item["image_url"].(type) {
  921. case string:
  922. imageUrl = v
  923. case map[string]any:
  924. if url, ok := v["url"].(string); ok {
  925. imageUrl = url
  926. }
  927. }
  928. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  929. case "input_file":
  930. // file_url may be string or object with url field
  931. var fileUrl string
  932. switch v := item["file_url"].(type) {
  933. case string:
  934. fileUrl = v
  935. case map[string]any:
  936. if url, ok := v["url"].(string); ok {
  937. fileUrl = url
  938. }
  939. }
  940. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  941. }
  942. }
  943. }
  944. }
  945. }
  946. return mediaInputs
  947. }