openai_request.go 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/samber/lo"
  9. "github.com/gin-gonic/gin"
  10. )
  11. type ResponseFormat struct {
  12. Type string `json:"type,omitempty"`
  13. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  14. }
  15. type FormatJsonSchema struct {
  16. Description string `json:"description,omitempty"`
  17. Name string `json:"name"`
  18. Schema any `json:"schema,omitempty"`
  19. Strict json.RawMessage `json:"strict,omitempty"`
  20. }
  21. // GeneralOpenAIRequest represents a general request structure for OpenAI-compatible APIs.
  22. // 参数增加规范:无引用的参数必须使用json.RawMessage类型,并添加omitempty标签
  23. type GeneralOpenAIRequest struct {
  24. Model string `json:"model,omitempty"`
  25. Messages []Message `json:"messages,omitempty"`
  26. Prompt any `json:"prompt,omitempty"`
  27. Prefix any `json:"prefix,omitempty"`
  28. Suffix any `json:"suffix,omitempty"`
  29. Stream *bool `json:"stream,omitempty"`
  30. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  31. MaxTokens *uint `json:"max_tokens,omitempty"`
  32. MaxCompletionTokens *uint `json:"max_completion_tokens,omitempty"`
  33. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  34. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  35. Temperature *float64 `json:"temperature,omitempty"`
  36. TopP *float64 `json:"top_p,omitempty"`
  37. TopK *int `json:"top_k,omitempty"`
  38. Stop any `json:"stop,omitempty"`
  39. N *int `json:"n,omitempty"`
  40. Input any `json:"input,omitempty"`
  41. Instruction string `json:"instruction,omitempty"`
  42. Size string `json:"size,omitempty"`
  43. Functions json.RawMessage `json:"functions,omitempty"`
  44. FrequencyPenalty *float64 `json:"frequency_penalty,omitempty"`
  45. PresencePenalty *float64 `json:"presence_penalty,omitempty"`
  46. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  47. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  48. Seed *float64 `json:"seed,omitempty"`
  49. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  50. Tools []ToolCallRequest `json:"tools,omitempty"`
  51. ToolChoice any `json:"tool_choice,omitempty"`
  52. FunctionCall json.RawMessage `json:"function_call,omitempty"`
  53. User json.RawMessage `json:"user,omitempty"`
  54. // ServiceTier specifies upstream service level and may affect billing.
  55. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  56. ServiceTier json.RawMessage `json:"service_tier,omitempty"`
  57. LogProbs *bool `json:"logprobs,omitempty"`
  58. TopLogProbs *int `json:"top_logprobs,omitempty"`
  59. Dimensions *int `json:"dimensions,omitempty"`
  60. Modalities json.RawMessage `json:"modalities,omitempty"`
  61. Audio json.RawMessage `json:"audio,omitempty"`
  62. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  63. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤,可通过 allow_safety_identifier 开启
  64. SafetyIdentifier json.RawMessage `json:"safety_identifier,omitempty"`
  65. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  66. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  67. // 注意:默认允许透传,可通过 disable_store 禁用;禁用后可能导致 Codex 无法正常使用
  68. Store json.RawMessage `json:"store,omitempty"`
  69. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  70. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  71. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  72. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  73. Metadata json.RawMessage `json:"metadata,omitempty"`
  74. Prediction json.RawMessage `json:"prediction,omitempty"`
  75. // gemini
  76. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  77. //xai
  78. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  79. // claude
  80. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  81. // OpenRouter Params
  82. Usage json.RawMessage `json:"usage,omitempty"`
  83. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  84. // Ali Qwen Params
  85. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  86. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  87. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  88. EnableSearch json.RawMessage `json:"enable_search,omitempty"`
  89. // ollama Params
  90. Think json.RawMessage `json:"think,omitempty"`
  91. // baidu v2
  92. WebSearch json.RawMessage `json:"web_search,omitempty"`
  93. // doubao,zhipu_v4
  94. THINKING json.RawMessage `json:"thinking,omitempty"`
  95. // pplx Params
  96. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  97. SearchRecencyFilter json.RawMessage `json:"search_recency_filter,omitempty"`
  98. ReturnImages *bool `json:"return_images,omitempty"`
  99. ReturnRelatedQuestions *bool `json:"return_related_questions,omitempty"`
  100. SearchMode json.RawMessage `json:"search_mode,omitempty"`
  101. // Minimax
  102. ReasoningSplit json.RawMessage `json:"reasoning_split,omitempty"`
  103. }
  104. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  105. var tokenCountMeta types.TokenCountMeta
  106. var texts = make([]string, 0)
  107. var fileMeta = make([]*types.FileMeta, 0)
  108. if r.Prompt != nil {
  109. switch v := r.Prompt.(type) {
  110. case string:
  111. texts = append(texts, v)
  112. case []any:
  113. for _, item := range v {
  114. if str, ok := item.(string); ok {
  115. texts = append(texts, str)
  116. }
  117. }
  118. default:
  119. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  120. }
  121. }
  122. if r.Input != nil {
  123. inputs := r.ParseInput()
  124. texts = append(texts, inputs...)
  125. }
  126. maxTokens := lo.FromPtrOr(r.MaxTokens, uint(0))
  127. maxCompletionTokens := lo.FromPtrOr(r.MaxCompletionTokens, uint(0))
  128. if maxCompletionTokens > maxTokens {
  129. tokenCountMeta.MaxTokens = int(maxCompletionTokens)
  130. } else {
  131. tokenCountMeta.MaxTokens = int(maxTokens)
  132. }
  133. for _, message := range r.Messages {
  134. tokenCountMeta.MessagesCount++
  135. texts = append(texts, message.Role)
  136. if message.Content != nil {
  137. if message.Name != nil {
  138. tokenCountMeta.NameCount++
  139. texts = append(texts, *message.Name)
  140. }
  141. arrayContent := message.ParseContent()
  142. for _, m := range arrayContent {
  143. source := m.ToFileSource()
  144. if source != nil {
  145. meta := &types.FileMeta{Source: source}
  146. switch m.Type {
  147. case ContentTypeImageURL:
  148. meta.FileType = types.FileTypeImage
  149. if img := m.GetImageMedia(); img != nil {
  150. meta.Detail = img.Detail
  151. }
  152. case ContentTypeInputAudio:
  153. meta.FileType = types.FileTypeAudio
  154. case ContentTypeFile:
  155. meta.FileType = types.FileTypeFile
  156. case ContentTypeVideoUrl:
  157. meta.FileType = types.FileTypeVideo
  158. }
  159. fileMeta = append(fileMeta, meta)
  160. } else if m.Type == ContentTypeText {
  161. texts = append(texts, m.Text)
  162. }
  163. }
  164. }
  165. }
  166. if r.Tools != nil {
  167. openaiTools := r.Tools
  168. for _, tool := range openaiTools {
  169. tokenCountMeta.ToolsCount++
  170. texts = append(texts, tool.Function.Name)
  171. if tool.Function.Description != "" {
  172. texts = append(texts, tool.Function.Description)
  173. }
  174. if tool.Function.Parameters != nil {
  175. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  176. }
  177. }
  178. //toolTokens := CountTokenInput(countStr, request.Model)
  179. //tkm += 8
  180. //tkm += toolTokens
  181. }
  182. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  183. tokenCountMeta.Files = fileMeta
  184. return &tokenCountMeta
  185. }
  186. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  187. return lo.FromPtrOr(r.Stream, false)
  188. }
  189. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  190. if modelName != "" {
  191. r.Model = modelName
  192. }
  193. }
  194. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  195. result := make(map[string]any)
  196. data, _ := common.Marshal(r)
  197. _ = common.Unmarshal(data, &result)
  198. return result
  199. }
  200. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  201. if strings.HasPrefix(r.Model, "o") {
  202. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  203. return "developer"
  204. }
  205. } else if strings.HasPrefix(r.Model, "gpt-5") {
  206. return "developer"
  207. }
  208. return "system"
  209. }
  210. const CustomType = "custom"
  211. type ToolCallRequest struct {
  212. ID string `json:"id,omitempty"`
  213. Type string `json:"type"`
  214. Function FunctionRequest `json:"function,omitempty"`
  215. Custom json.RawMessage `json:"custom,omitempty"`
  216. }
  217. type FunctionRequest struct {
  218. Description string `json:"description,omitempty"`
  219. Name string `json:"name"`
  220. Parameters any `json:"parameters,omitempty"`
  221. Arguments string `json:"arguments,omitempty"`
  222. }
  223. type StreamOptions struct {
  224. IncludeUsage bool `json:"include_usage,omitempty"`
  225. // IncludeObfuscation is only for /v1/responses stream payload.
  226. // This field is filtered by default and can be enabled via channel setting allow_include_obfuscation.
  227. IncludeObfuscation bool `json:"include_obfuscation,omitempty"`
  228. }
  229. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  230. maxCompletionTokens := lo.FromPtrOr(r.MaxCompletionTokens, uint(0))
  231. if maxCompletionTokens != 0 {
  232. return maxCompletionTokens
  233. }
  234. return lo.FromPtrOr(r.MaxTokens, uint(0))
  235. }
  236. func (r *GeneralOpenAIRequest) ParseInput() []string {
  237. if r.Input == nil {
  238. return nil
  239. }
  240. var input []string
  241. switch r.Input.(type) {
  242. case string:
  243. input = []string{r.Input.(string)}
  244. case []any:
  245. input = make([]string, 0, len(r.Input.([]any)))
  246. for _, item := range r.Input.([]any) {
  247. if str, ok := item.(string); ok {
  248. input = append(input, str)
  249. }
  250. }
  251. }
  252. return input
  253. }
  254. type Message struct {
  255. Role string `json:"role"`
  256. Content any `json:"content"`
  257. Name *string `json:"name,omitempty"`
  258. Prefix *bool `json:"prefix,omitempty"`
  259. ReasoningContent *string `json:"reasoning_content,omitempty"`
  260. Reasoning *string `json:"reasoning,omitempty"`
  261. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  262. ToolCallId string `json:"tool_call_id,omitempty"`
  263. parsedContent []MediaContent
  264. //parsedStringContent *string
  265. }
  266. type MediaContent struct {
  267. Type string `json:"type"`
  268. Text string `json:"text,omitempty"`
  269. ImageUrl any `json:"image_url,omitempty"`
  270. InputAudio any `json:"input_audio,omitempty"`
  271. File any `json:"file,omitempty"`
  272. VideoUrl any `json:"video_url,omitempty"`
  273. // OpenRouter Params
  274. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  275. }
  276. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  277. if m.ImageUrl != nil {
  278. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  279. return m.ImageUrl.(*MessageImageUrl)
  280. }
  281. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  282. out := &MessageImageUrl{
  283. Url: common.Interface2String(itemMap["url"]),
  284. Detail: common.Interface2String(itemMap["detail"]),
  285. MimeType: common.Interface2String(itemMap["mime_type"]),
  286. }
  287. return out
  288. }
  289. }
  290. return nil
  291. }
  292. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  293. if m.InputAudio != nil {
  294. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  295. return m.InputAudio.(*MessageInputAudio)
  296. }
  297. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  298. out := &MessageInputAudio{
  299. Data: common.Interface2String(itemMap["data"]),
  300. Format: common.Interface2String(itemMap["format"]),
  301. }
  302. return out
  303. }
  304. }
  305. return nil
  306. }
  307. func (m *MediaContent) GetFile() *MessageFile {
  308. if m.File != nil {
  309. if _, ok := m.File.(*MessageFile); ok {
  310. return m.File.(*MessageFile)
  311. }
  312. if itemMap, ok := m.File.(map[string]any); ok {
  313. out := &MessageFile{
  314. FileName: common.Interface2String(itemMap["file_name"]),
  315. FileData: common.Interface2String(itemMap["file_data"]),
  316. FileId: common.Interface2String(itemMap["file_id"]),
  317. }
  318. return out
  319. }
  320. }
  321. return nil
  322. }
  323. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  324. if m.VideoUrl != nil {
  325. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  326. return m.VideoUrl.(*MessageVideoUrl)
  327. }
  328. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  329. out := &MessageVideoUrl{
  330. Url: common.Interface2String(itemMap["url"]),
  331. }
  332. return out
  333. }
  334. }
  335. return nil
  336. }
  337. func (m *MediaContent) ToFileSource() types.FileSource {
  338. switch m.Type {
  339. case ContentTypeImageURL:
  340. img := m.GetImageMedia()
  341. if img == nil || img.Url == "" {
  342. return nil
  343. }
  344. return types.NewFileSourceFromData(img.Url, img.MimeType)
  345. case ContentTypeInputAudio:
  346. audio := m.GetInputAudio()
  347. if audio == nil || audio.Data == "" {
  348. return nil
  349. }
  350. mimeType := ""
  351. if audio.Format != "" {
  352. mimeType = "audio/" + audio.Format
  353. }
  354. return types.NewFileSourceFromData(audio.Data, mimeType)
  355. case ContentTypeFile:
  356. file := m.GetFile()
  357. if file == nil || file.FileData == "" {
  358. return nil
  359. }
  360. return types.NewFileSourceFromData(file.FileData, "")
  361. case ContentTypeVideoUrl:
  362. video := m.GetVideoUrl()
  363. if video == nil || video.Url == "" {
  364. return nil
  365. }
  366. return types.NewFileSourceFromData(video.Url, "")
  367. }
  368. return nil
  369. }
  370. type MessageImageUrl struct {
  371. Url string `json:"url"`
  372. Detail string `json:"detail,omitempty"`
  373. MimeType string
  374. }
  375. func (m *MessageImageUrl) IsRemoteImage() bool {
  376. return strings.HasPrefix(m.Url, "http")
  377. }
  378. type MessageInputAudio struct {
  379. Data string `json:"data"` //base64
  380. Format string `json:"format"`
  381. }
  382. type MessageFile struct {
  383. FileName string `json:"filename,omitempty"`
  384. FileData string `json:"file_data,omitempty"`
  385. FileId string `json:"file_id,omitempty"`
  386. }
  387. type MessageVideoUrl struct {
  388. Url string `json:"url"`
  389. }
  390. const (
  391. ContentTypeText = "text"
  392. ContentTypeImageURL = "image_url"
  393. ContentTypeInputAudio = "input_audio"
  394. ContentTypeFile = "file"
  395. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  396. //ContentTypeAudioUrl = "audio_url"
  397. )
  398. func (m *Message) GetReasoningContent() string {
  399. if m.ReasoningContent == nil && m.Reasoning == nil {
  400. return ""
  401. }
  402. if m.ReasoningContent != nil {
  403. return *m.ReasoningContent
  404. }
  405. return *m.Reasoning
  406. }
  407. func (m *Message) GetPrefix() bool {
  408. if m.Prefix == nil {
  409. return false
  410. }
  411. return *m.Prefix
  412. }
  413. func (m *Message) SetPrefix(prefix bool) {
  414. m.Prefix = &prefix
  415. }
  416. func (m *Message) ParseToolCalls() []ToolCallRequest {
  417. if m.ToolCalls == nil {
  418. return nil
  419. }
  420. var toolCalls []ToolCallRequest
  421. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  422. return toolCalls
  423. }
  424. return toolCalls
  425. }
  426. func (m *Message) SetToolCalls(toolCalls any) {
  427. toolCallsJson, _ := json.Marshal(toolCalls)
  428. m.ToolCalls = toolCallsJson
  429. }
  430. func (m *Message) StringContent() string {
  431. switch m.Content.(type) {
  432. case string:
  433. return m.Content.(string)
  434. case []any:
  435. var contentStr string
  436. for _, contentItem := range m.Content.([]any) {
  437. contentMap, ok := contentItem.(map[string]any)
  438. if !ok {
  439. continue
  440. }
  441. if contentMap["type"] == ContentTypeText {
  442. if subStr, ok := contentMap["text"].(string); ok {
  443. contentStr += subStr
  444. }
  445. }
  446. }
  447. return contentStr
  448. }
  449. return ""
  450. }
  451. func (m *Message) SetNullContent() {
  452. m.Content = nil
  453. m.parsedContent = nil
  454. }
  455. func (m *Message) SetStringContent(content string) {
  456. m.Content = content
  457. m.parsedContent = nil
  458. }
  459. func (m *Message) SetMediaContent(content []MediaContent) {
  460. m.Content = content
  461. m.parsedContent = content
  462. }
  463. func (m *Message) IsStringContent() bool {
  464. _, ok := m.Content.(string)
  465. if ok {
  466. return true
  467. }
  468. return false
  469. }
  470. func (m *Message) ParseContent() []MediaContent {
  471. if m.Content == nil {
  472. return nil
  473. }
  474. if len(m.parsedContent) > 0 {
  475. return m.parsedContent
  476. }
  477. var contentList []MediaContent
  478. // 先尝试解析为字符串
  479. content, ok := m.Content.(string)
  480. if ok {
  481. contentList = []MediaContent{{
  482. Type: ContentTypeText,
  483. Text: content,
  484. }}
  485. m.parsedContent = contentList
  486. return contentList
  487. }
  488. // 尝试解析为数组
  489. //var arrayContent []map[string]interface{}
  490. arrayContent, ok := m.Content.([]any)
  491. if !ok {
  492. return contentList
  493. }
  494. for _, contentItemAny := range arrayContent {
  495. mediaItem, ok := contentItemAny.(MediaContent)
  496. if ok {
  497. contentList = append(contentList, mediaItem)
  498. continue
  499. }
  500. contentItem, ok := contentItemAny.(map[string]any)
  501. if !ok {
  502. continue
  503. }
  504. contentType, ok := contentItem["type"].(string)
  505. if !ok {
  506. continue
  507. }
  508. switch contentType {
  509. case ContentTypeText:
  510. if text, ok := contentItem["text"].(string); ok {
  511. contentList = append(contentList, MediaContent{
  512. Type: ContentTypeText,
  513. Text: text,
  514. })
  515. }
  516. case ContentTypeImageURL:
  517. imageUrl := contentItem["image_url"]
  518. temp := &MessageImageUrl{
  519. Detail: "high",
  520. }
  521. switch v := imageUrl.(type) {
  522. case string:
  523. temp.Url = v
  524. case map[string]interface{}:
  525. url, ok1 := v["url"].(string)
  526. detail, ok2 := v["detail"].(string)
  527. if ok2 {
  528. temp.Detail = detail
  529. }
  530. if ok1 {
  531. temp.Url = url
  532. }
  533. }
  534. contentList = append(contentList, MediaContent{
  535. Type: ContentTypeImageURL,
  536. ImageUrl: temp,
  537. })
  538. case ContentTypeInputAudio:
  539. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  540. data, ok1 := audioData["data"].(string)
  541. format, ok2 := audioData["format"].(string)
  542. if ok1 && ok2 {
  543. temp := &MessageInputAudio{
  544. Data: data,
  545. Format: format,
  546. }
  547. contentList = append(contentList, MediaContent{
  548. Type: ContentTypeInputAudio,
  549. InputAudio: temp,
  550. })
  551. }
  552. }
  553. case ContentTypeFile:
  554. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  555. fileId, ok3 := fileData["file_id"].(string)
  556. if ok3 {
  557. contentList = append(contentList, MediaContent{
  558. Type: ContentTypeFile,
  559. File: &MessageFile{
  560. FileId: fileId,
  561. },
  562. })
  563. } else {
  564. fileName, ok1 := fileData["filename"].(string)
  565. fileDataStr, ok2 := fileData["file_data"].(string)
  566. if ok1 && ok2 {
  567. contentList = append(contentList, MediaContent{
  568. Type: ContentTypeFile,
  569. File: &MessageFile{
  570. FileName: fileName,
  571. FileData: fileDataStr,
  572. },
  573. })
  574. }
  575. }
  576. }
  577. case ContentTypeVideoUrl:
  578. if videoUrl, ok := contentItem["video_url"].(string); ok {
  579. contentList = append(contentList, MediaContent{
  580. Type: ContentTypeVideoUrl,
  581. VideoUrl: &MessageVideoUrl{
  582. Url: videoUrl,
  583. },
  584. })
  585. }
  586. }
  587. }
  588. if len(contentList) > 0 {
  589. m.parsedContent = contentList
  590. }
  591. return contentList
  592. }
  593. // old code
  594. /*func (m *Message) StringContent() string {
  595. if m.parsedStringContent != nil {
  596. return *m.parsedStringContent
  597. }
  598. var stringContent string
  599. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  600. m.parsedStringContent = &stringContent
  601. return stringContent
  602. }
  603. contentStr := new(strings.Builder)
  604. arrayContent := m.ParseContent()
  605. for _, content := range arrayContent {
  606. if content.Type == ContentTypeText {
  607. contentStr.WriteString(content.Text)
  608. }
  609. }
  610. stringContent = contentStr.String()
  611. m.parsedStringContent = &stringContent
  612. return stringContent
  613. }
  614. func (m *Message) SetNullContent() {
  615. m.Content = nil
  616. m.parsedStringContent = nil
  617. m.parsedContent = nil
  618. }
  619. func (m *Message) SetStringContent(content string) {
  620. jsonContent, _ := json.Marshal(content)
  621. m.Content = jsonContent
  622. m.parsedStringContent = &content
  623. m.parsedContent = nil
  624. }
  625. func (m *Message) SetMediaContent(content []MediaContent) {
  626. jsonContent, _ := json.Marshal(content)
  627. m.Content = jsonContent
  628. m.parsedContent = nil
  629. m.parsedStringContent = nil
  630. }
  631. func (m *Message) IsStringContent() bool {
  632. if m.parsedStringContent != nil {
  633. return true
  634. }
  635. var stringContent string
  636. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  637. m.parsedStringContent = &stringContent
  638. return true
  639. }
  640. return false
  641. }
  642. func (m *Message) ParseContent() []MediaContent {
  643. if m.parsedContent != nil {
  644. return m.parsedContent
  645. }
  646. var contentList []MediaContent
  647. // 先尝试解析为字符串
  648. var stringContent string
  649. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  650. contentList = []MediaContent{{
  651. Type: ContentTypeText,
  652. Text: stringContent,
  653. }}
  654. m.parsedContent = contentList
  655. return contentList
  656. }
  657. // 尝试解析为数组
  658. var arrayContent []map[string]interface{}
  659. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  660. for _, contentItem := range arrayContent {
  661. contentType, ok := contentItem["type"].(string)
  662. if !ok {
  663. continue
  664. }
  665. switch contentType {
  666. case ContentTypeText:
  667. if text, ok := contentItem["text"].(string); ok {
  668. contentList = append(contentList, MediaContent{
  669. Type: ContentTypeText,
  670. Text: text,
  671. })
  672. }
  673. case ContentTypeImageURL:
  674. imageUrl := contentItem["image_url"]
  675. temp := &MessageImageUrl{
  676. Detail: "high",
  677. }
  678. switch v := imageUrl.(type) {
  679. case string:
  680. temp.Url = v
  681. case map[string]interface{}:
  682. url, ok1 := v["url"].(string)
  683. detail, ok2 := v["detail"].(string)
  684. if ok2 {
  685. temp.Detail = detail
  686. }
  687. if ok1 {
  688. temp.Url = url
  689. }
  690. }
  691. contentList = append(contentList, MediaContent{
  692. Type: ContentTypeImageURL,
  693. ImageUrl: temp,
  694. })
  695. case ContentTypeInputAudio:
  696. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  697. data, ok1 := audioData["data"].(string)
  698. format, ok2 := audioData["format"].(string)
  699. if ok1 && ok2 {
  700. temp := &MessageInputAudio{
  701. Data: data,
  702. Format: format,
  703. }
  704. contentList = append(contentList, MediaContent{
  705. Type: ContentTypeInputAudio,
  706. InputAudio: temp,
  707. })
  708. }
  709. }
  710. case ContentTypeFile:
  711. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  712. fileId, ok3 := fileData["file_id"].(string)
  713. if ok3 {
  714. contentList = append(contentList, MediaContent{
  715. Type: ContentTypeFile,
  716. File: &MessageFile{
  717. FileId: fileId,
  718. },
  719. })
  720. } else {
  721. fileName, ok1 := fileData["filename"].(string)
  722. fileDataStr, ok2 := fileData["file_data"].(string)
  723. if ok1 && ok2 {
  724. contentList = append(contentList, MediaContent{
  725. Type: ContentTypeFile,
  726. File: &MessageFile{
  727. FileName: fileName,
  728. FileData: fileDataStr,
  729. },
  730. })
  731. }
  732. }
  733. }
  734. case ContentTypeVideoUrl:
  735. if videoUrl, ok := contentItem["video_url"].(string); ok {
  736. contentList = append(contentList, MediaContent{
  737. Type: ContentTypeVideoUrl,
  738. VideoUrl: &MessageVideoUrl{
  739. Url: videoUrl,
  740. },
  741. })
  742. }
  743. }
  744. }
  745. }
  746. if len(contentList) > 0 {
  747. m.parsedContent = contentList
  748. }
  749. return contentList
  750. }*/
  751. type WebSearchOptions struct {
  752. SearchContextSize string `json:"search_context_size,omitempty"`
  753. UserLocation json.RawMessage `json:"user_location,omitempty"`
  754. }
  755. // https://platform.openai.com/docs/api-reference/responses/create
  756. type OpenAIResponsesRequest struct {
  757. Model string `json:"model"`
  758. Input json.RawMessage `json:"input,omitempty"`
  759. Include json.RawMessage `json:"include,omitempty"`
  760. // 在后台运行推理,暂时还不支持依赖的接口
  761. // Background json.RawMessage `json:"background,omitempty"`
  762. Conversation json.RawMessage `json:"conversation,omitempty"`
  763. ContextManagement json.RawMessage `json:"context_management,omitempty"`
  764. Instructions json.RawMessage `json:"instructions,omitempty"`
  765. MaxOutputTokens *uint `json:"max_output_tokens,omitempty"`
  766. TopLogProbs *int `json:"top_logprobs,omitempty"`
  767. Metadata json.RawMessage `json:"metadata,omitempty"`
  768. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  769. PreviousResponseID string `json:"previous_response_id,omitempty"`
  770. Reasoning *Reasoning `json:"reasoning,omitempty"`
  771. // ServiceTier specifies upstream service level and may affect billing.
  772. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  773. ServiceTier string `json:"service_tier,omitempty"`
  774. // Store controls whether upstream may store request/response data.
  775. // This field is allowed by default and can be disabled via channel setting disable_store.
  776. Store json.RawMessage `json:"store,omitempty"`
  777. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  778. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  779. // SafetyIdentifier carries client identity for policy abuse detection.
  780. // This field is filtered by default and can be enabled via channel setting allow_safety_identifier.
  781. SafetyIdentifier json.RawMessage `json:"safety_identifier,omitempty"`
  782. Stream *bool `json:"stream,omitempty"`
  783. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  784. Temperature *float64 `json:"temperature,omitempty"`
  785. Text json.RawMessage `json:"text,omitempty"`
  786. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  787. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  788. TopP *float64 `json:"top_p,omitempty"`
  789. Truncation json.RawMessage `json:"truncation,omitempty"`
  790. User json.RawMessage `json:"user,omitempty"`
  791. MaxToolCalls *uint `json:"max_tool_calls,omitempty"`
  792. Prompt json.RawMessage `json:"prompt,omitempty"`
  793. // qwen
  794. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  795. // perplexity
  796. Preset json.RawMessage `json:"preset,omitempty"`
  797. }
  798. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  799. var fileMeta = make([]*types.FileMeta, 0)
  800. var texts = make([]string, 0)
  801. if r.Input != nil {
  802. inputs := r.ParseInput()
  803. for _, input := range inputs {
  804. if input.Type == "input_image" {
  805. if input.ImageUrl != "" {
  806. fileMeta = append(fileMeta, &types.FileMeta{
  807. FileType: types.FileTypeImage,
  808. Source: types.NewFileSourceFromData(input.ImageUrl, ""),
  809. Detail: input.Detail,
  810. })
  811. }
  812. } else if input.Type == "input_file" {
  813. if input.FileUrl != "" {
  814. fileMeta = append(fileMeta, &types.FileMeta{
  815. FileType: types.FileTypeFile,
  816. Source: types.NewFileSourceFromData(input.FileUrl, ""),
  817. })
  818. }
  819. } else {
  820. texts = append(texts, input.Text)
  821. }
  822. }
  823. }
  824. if len(r.Instructions) > 0 {
  825. texts = append(texts, string(r.Instructions))
  826. }
  827. if len(r.Metadata) > 0 {
  828. texts = append(texts, string(r.Metadata))
  829. }
  830. if len(r.Text) > 0 {
  831. texts = append(texts, string(r.Text))
  832. }
  833. if len(r.ToolChoice) > 0 {
  834. texts = append(texts, string(r.ToolChoice))
  835. }
  836. if len(r.Prompt) > 0 {
  837. texts = append(texts, string(r.Prompt))
  838. }
  839. if len(r.Tools) > 0 {
  840. texts = append(texts, string(r.Tools))
  841. }
  842. return &types.TokenCountMeta{
  843. CombineText: strings.Join(texts, "\n"),
  844. Files: fileMeta,
  845. MaxTokens: int(lo.FromPtrOr(r.MaxOutputTokens, uint(0))),
  846. }
  847. }
  848. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  849. return lo.FromPtrOr(r.Stream, false)
  850. }
  851. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  852. if modelName != "" {
  853. r.Model = modelName
  854. }
  855. }
  856. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  857. var toolsMap []map[string]any
  858. if len(r.Tools) > 0 {
  859. _ = common.Unmarshal(r.Tools, &toolsMap)
  860. }
  861. return toolsMap
  862. }
  863. type Reasoning struct {
  864. Effort string `json:"effort,omitempty"`
  865. Summary string `json:"summary,omitempty"`
  866. }
  867. type Input struct {
  868. Type string `json:"type,omitempty"`
  869. Role string `json:"role,omitempty"`
  870. Content json.RawMessage `json:"content,omitempty"`
  871. }
  872. type MediaInput struct {
  873. Type string `json:"type"`
  874. Text string `json:"text,omitempty"`
  875. FileUrl string `json:"file_url,omitempty"`
  876. ImageUrl string `json:"image_url,omitempty"`
  877. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  878. }
  879. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  880. // Reference implementation mirrors Message.ParseContent:
  881. // - input can be a string, treated as an input_text item
  882. // - input can be an array of objects with a `type` field
  883. // supported types: input_text, input_image, input_file
  884. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  885. if r.Input == nil {
  886. return nil
  887. }
  888. var mediaInputs []MediaInput
  889. // Try string first
  890. // if str, ok := common.GetJsonType(r.Input); ok {
  891. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  892. // return inputs
  893. // }
  894. if common.GetJsonType(r.Input) == "string" {
  895. var str string
  896. _ = common.Unmarshal(r.Input, &str)
  897. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  898. return mediaInputs
  899. }
  900. // Try array of parts
  901. if common.GetJsonType(r.Input) == "array" {
  902. var inputs []Input
  903. _ = common.Unmarshal(r.Input, &inputs)
  904. for _, input := range inputs {
  905. if common.GetJsonType(input.Content) == "string" {
  906. var str string
  907. _ = common.Unmarshal(input.Content, &str)
  908. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  909. }
  910. if common.GetJsonType(input.Content) == "array" {
  911. var array []any
  912. _ = common.Unmarshal(input.Content, &array)
  913. for _, itemAny := range array {
  914. // Already parsed MediaContent
  915. if media, ok := itemAny.(MediaInput); ok {
  916. mediaInputs = append(mediaInputs, media)
  917. continue
  918. }
  919. // Generic map
  920. item, ok := itemAny.(map[string]any)
  921. if !ok {
  922. continue
  923. }
  924. typeVal, ok := item["type"].(string)
  925. if !ok {
  926. continue
  927. }
  928. switch typeVal {
  929. case "input_text":
  930. text, _ := item["text"].(string)
  931. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  932. case "input_image":
  933. // image_url may be string or object with url field
  934. var imageUrl string
  935. switch v := item["image_url"].(type) {
  936. case string:
  937. imageUrl = v
  938. case map[string]any:
  939. if url, ok := v["url"].(string); ok {
  940. imageUrl = url
  941. }
  942. }
  943. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  944. case "input_file":
  945. // file_url may be string or object with url field
  946. var fileUrl string
  947. switch v := item["file_url"].(type) {
  948. case string:
  949. fileUrl = v
  950. case map[string]any:
  951. if url, ok := v["url"].(string); ok {
  952. fileUrl = url
  953. }
  954. }
  955. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  956. }
  957. }
  958. }
  959. }
  960. }
  961. return mediaInputs
  962. }