openai_request.go 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. // GeneralOpenAIRequest represents a general request structure for OpenAI-compatible APIs.
  21. // 参数增加规范:无引用的参数必须使用json.RawMessage类型,并添加omitempty标签
  22. type GeneralOpenAIRequest struct {
  23. Model string `json:"model,omitempty"`
  24. Messages []Message `json:"messages,omitempty"`
  25. Prompt any `json:"prompt,omitempty"`
  26. Prefix any `json:"prefix,omitempty"`
  27. Suffix any `json:"suffix,omitempty"`
  28. Stream bool `json:"stream,omitempty"`
  29. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  30. MaxTokens uint `json:"max_tokens,omitempty"`
  31. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  32. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  33. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  34. Temperature *float64 `json:"temperature,omitempty"`
  35. TopP float64 `json:"top_p,omitempty"`
  36. TopK int `json:"top_k,omitempty"`
  37. Stop any `json:"stop,omitempty"`
  38. N int `json:"n,omitempty"`
  39. Input any `json:"input,omitempty"`
  40. Instruction string `json:"instruction,omitempty"`
  41. Size string `json:"size,omitempty"`
  42. Functions json.RawMessage `json:"functions,omitempty"`
  43. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  44. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  45. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  46. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  47. Seed float64 `json:"seed,omitempty"`
  48. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  49. Tools []ToolCallRequest `json:"tools,omitempty"`
  50. ToolChoice any `json:"tool_choice,omitempty"`
  51. FunctionCall json.RawMessage `json:"function_call,omitempty"`
  52. User string `json:"user,omitempty"`
  53. ServiceTier string `json:"service_tier,omitempty"`
  54. LogProbs bool `json:"logprobs,omitempty"`
  55. TopLogProbs int `json:"top_logprobs,omitempty"`
  56. Dimensions int `json:"dimensions,omitempty"`
  57. Modalities json.RawMessage `json:"modalities,omitempty"`
  58. Audio json.RawMessage `json:"audio,omitempty"`
  59. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  60. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  61. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  62. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  63. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  64. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  65. Store json.RawMessage `json:"store,omitempty"`
  66. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  67. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  68. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  69. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  70. Metadata json.RawMessage `json:"metadata,omitempty"`
  71. Prediction json.RawMessage `json:"prediction,omitempty"`
  72. // gemini
  73. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  74. //xai
  75. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  76. // claude
  77. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  78. // OpenRouter Params
  79. Usage json.RawMessage `json:"usage,omitempty"`
  80. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  81. // Ali Qwen Params
  82. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  83. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  84. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  85. EnableSearch json.RawMessage `json:"enable_search,omitempty"`
  86. // ollama Params
  87. Think json.RawMessage `json:"think,omitempty"`
  88. // baidu v2
  89. WebSearch json.RawMessage `json:"web_search,omitempty"`
  90. // doubao,zhipu_v4
  91. THINKING json.RawMessage `json:"thinking,omitempty"`
  92. // pplx Params
  93. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  94. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  95. ReturnImages bool `json:"return_images,omitempty"`
  96. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  97. SearchMode string `json:"search_mode,omitempty"`
  98. }
  99. // createFileSource 根据数据内容创建正确类型的 FileSource
  100. func createFileSource(data string) *types.FileSource {
  101. if strings.HasPrefix(data, "http://") || strings.HasPrefix(data, "https://") {
  102. return types.NewURLFileSource(data)
  103. }
  104. return types.NewBase64FileSource(data, "")
  105. }
  106. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  107. var tokenCountMeta types.TokenCountMeta
  108. var texts = make([]string, 0)
  109. var fileMeta = make([]*types.FileMeta, 0)
  110. if r.Prompt != nil {
  111. switch v := r.Prompt.(type) {
  112. case string:
  113. texts = append(texts, v)
  114. case []any:
  115. for _, item := range v {
  116. if str, ok := item.(string); ok {
  117. texts = append(texts, str)
  118. }
  119. }
  120. default:
  121. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  122. }
  123. }
  124. if r.Input != nil {
  125. inputs := r.ParseInput()
  126. texts = append(texts, inputs...)
  127. }
  128. if r.MaxCompletionTokens > r.MaxTokens {
  129. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  130. } else {
  131. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  132. }
  133. for _, message := range r.Messages {
  134. tokenCountMeta.MessagesCount++
  135. texts = append(texts, message.Role)
  136. if message.Content != nil {
  137. if message.Name != nil {
  138. tokenCountMeta.NameCount++
  139. texts = append(texts, *message.Name)
  140. }
  141. arrayContent := message.ParseContent()
  142. for _, m := range arrayContent {
  143. if m.Type == ContentTypeImageURL {
  144. imageUrl := m.GetImageMedia()
  145. if imageUrl != nil && imageUrl.Url != "" {
  146. source := createFileSource(imageUrl.Url)
  147. fileMeta = append(fileMeta, &types.FileMeta{
  148. FileType: types.FileTypeImage,
  149. Source: source,
  150. Detail: imageUrl.Detail,
  151. })
  152. }
  153. } else if m.Type == ContentTypeInputAudio {
  154. inputAudio := m.GetInputAudio()
  155. if inputAudio != nil && inputAudio.Data != "" {
  156. source := createFileSource(inputAudio.Data)
  157. fileMeta = append(fileMeta, &types.FileMeta{
  158. FileType: types.FileTypeAudio,
  159. Source: source,
  160. })
  161. }
  162. } else if m.Type == ContentTypeFile {
  163. file := m.GetFile()
  164. if file != nil && file.FileData != "" {
  165. source := createFileSource(file.FileData)
  166. fileMeta = append(fileMeta, &types.FileMeta{
  167. FileType: types.FileTypeFile,
  168. Source: source,
  169. })
  170. }
  171. } else if m.Type == ContentTypeVideoUrl {
  172. videoUrl := m.GetVideoUrl()
  173. if videoUrl != nil && videoUrl.Url != "" {
  174. source := createFileSource(videoUrl.Url)
  175. fileMeta = append(fileMeta, &types.FileMeta{
  176. FileType: types.FileTypeVideo,
  177. Source: source,
  178. })
  179. }
  180. } else {
  181. texts = append(texts, m.Text)
  182. }
  183. }
  184. }
  185. }
  186. if r.Tools != nil {
  187. openaiTools := r.Tools
  188. for _, tool := range openaiTools {
  189. tokenCountMeta.ToolsCount++
  190. texts = append(texts, tool.Function.Name)
  191. if tool.Function.Description != "" {
  192. texts = append(texts, tool.Function.Description)
  193. }
  194. if tool.Function.Parameters != nil {
  195. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  196. }
  197. }
  198. //toolTokens := CountTokenInput(countStr, request.Model)
  199. //tkm += 8
  200. //tkm += toolTokens
  201. }
  202. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  203. tokenCountMeta.Files = fileMeta
  204. return &tokenCountMeta
  205. }
  206. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  207. return r.Stream
  208. }
  209. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  210. if modelName != "" {
  211. r.Model = modelName
  212. }
  213. }
  214. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  215. result := make(map[string]any)
  216. data, _ := common.Marshal(r)
  217. _ = common.Unmarshal(data, &result)
  218. return result
  219. }
  220. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  221. if strings.HasPrefix(r.Model, "o") {
  222. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  223. return "developer"
  224. }
  225. } else if strings.HasPrefix(r.Model, "gpt-5") {
  226. return "developer"
  227. }
  228. return "system"
  229. }
  230. const CustomType = "custom"
  231. type ToolCallRequest struct {
  232. ID string `json:"id,omitempty"`
  233. Type string `json:"type"`
  234. Function FunctionRequest `json:"function,omitempty"`
  235. Custom json.RawMessage `json:"custom,omitempty"`
  236. }
  237. type FunctionRequest struct {
  238. Description string `json:"description,omitempty"`
  239. Name string `json:"name"`
  240. Parameters any `json:"parameters,omitempty"`
  241. Arguments string `json:"arguments,omitempty"`
  242. }
  243. type StreamOptions struct {
  244. IncludeUsage bool `json:"include_usage,omitempty"`
  245. // for /v1/responses
  246. IncludeObfuscation bool `json:"include_obfuscation,omitempty"`
  247. }
  248. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  249. if r.MaxCompletionTokens != 0 {
  250. return r.MaxCompletionTokens
  251. }
  252. return r.MaxTokens
  253. }
  254. func (r *GeneralOpenAIRequest) ParseInput() []string {
  255. if r.Input == nil {
  256. return nil
  257. }
  258. var input []string
  259. switch r.Input.(type) {
  260. case string:
  261. input = []string{r.Input.(string)}
  262. case []any:
  263. input = make([]string, 0, len(r.Input.([]any)))
  264. for _, item := range r.Input.([]any) {
  265. if str, ok := item.(string); ok {
  266. input = append(input, str)
  267. }
  268. }
  269. }
  270. return input
  271. }
  272. type Message struct {
  273. Role string `json:"role"`
  274. Content any `json:"content"`
  275. Name *string `json:"name,omitempty"`
  276. Prefix *bool `json:"prefix,omitempty"`
  277. ReasoningContent string `json:"reasoning_content,omitempty"`
  278. Reasoning string `json:"reasoning,omitempty"`
  279. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  280. ToolCallId string `json:"tool_call_id,omitempty"`
  281. parsedContent []MediaContent
  282. //parsedStringContent *string
  283. }
  284. type MediaContent struct {
  285. Type string `json:"type"`
  286. Text string `json:"text,omitempty"`
  287. ImageUrl any `json:"image_url,omitempty"`
  288. InputAudio any `json:"input_audio,omitempty"`
  289. File any `json:"file,omitempty"`
  290. VideoUrl any `json:"video_url,omitempty"`
  291. // OpenRouter Params
  292. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  293. }
  294. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  295. if m.ImageUrl != nil {
  296. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  297. return m.ImageUrl.(*MessageImageUrl)
  298. }
  299. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  300. out := &MessageImageUrl{
  301. Url: common.Interface2String(itemMap["url"]),
  302. Detail: common.Interface2String(itemMap["detail"]),
  303. MimeType: common.Interface2String(itemMap["mime_type"]),
  304. }
  305. return out
  306. }
  307. }
  308. return nil
  309. }
  310. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  311. if m.InputAudio != nil {
  312. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  313. return m.InputAudio.(*MessageInputAudio)
  314. }
  315. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  316. out := &MessageInputAudio{
  317. Data: common.Interface2String(itemMap["data"]),
  318. Format: common.Interface2String(itemMap["format"]),
  319. }
  320. return out
  321. }
  322. }
  323. return nil
  324. }
  325. func (m *MediaContent) GetFile() *MessageFile {
  326. if m.File != nil {
  327. if _, ok := m.File.(*MessageFile); ok {
  328. return m.File.(*MessageFile)
  329. }
  330. if itemMap, ok := m.File.(map[string]any); ok {
  331. out := &MessageFile{
  332. FileName: common.Interface2String(itemMap["file_name"]),
  333. FileData: common.Interface2String(itemMap["file_data"]),
  334. FileId: common.Interface2String(itemMap["file_id"]),
  335. }
  336. return out
  337. }
  338. }
  339. return nil
  340. }
  341. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  342. if m.VideoUrl != nil {
  343. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  344. return m.VideoUrl.(*MessageVideoUrl)
  345. }
  346. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  347. out := &MessageVideoUrl{
  348. Url: common.Interface2String(itemMap["url"]),
  349. }
  350. return out
  351. }
  352. }
  353. return nil
  354. }
  355. type MessageImageUrl struct {
  356. Url string `json:"url"`
  357. Detail string `json:"detail"`
  358. MimeType string
  359. }
  360. func (m *MessageImageUrl) IsRemoteImage() bool {
  361. return strings.HasPrefix(m.Url, "http")
  362. }
  363. type MessageInputAudio struct {
  364. Data string `json:"data"` //base64
  365. Format string `json:"format"`
  366. }
  367. type MessageFile struct {
  368. FileName string `json:"filename,omitempty"`
  369. FileData string `json:"file_data,omitempty"`
  370. FileId string `json:"file_id,omitempty"`
  371. }
  372. type MessageVideoUrl struct {
  373. Url string `json:"url"`
  374. }
  375. const (
  376. ContentTypeText = "text"
  377. ContentTypeImageURL = "image_url"
  378. ContentTypeInputAudio = "input_audio"
  379. ContentTypeFile = "file"
  380. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  381. //ContentTypeAudioUrl = "audio_url"
  382. )
  383. func (m *Message) GetPrefix() bool {
  384. if m.Prefix == nil {
  385. return false
  386. }
  387. return *m.Prefix
  388. }
  389. func (m *Message) SetPrefix(prefix bool) {
  390. m.Prefix = &prefix
  391. }
  392. func (m *Message) ParseToolCalls() []ToolCallRequest {
  393. if m.ToolCalls == nil {
  394. return nil
  395. }
  396. var toolCalls []ToolCallRequest
  397. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  398. return toolCalls
  399. }
  400. return toolCalls
  401. }
  402. func (m *Message) SetToolCalls(toolCalls any) {
  403. toolCallsJson, _ := json.Marshal(toolCalls)
  404. m.ToolCalls = toolCallsJson
  405. }
  406. func (m *Message) StringContent() string {
  407. switch m.Content.(type) {
  408. case string:
  409. return m.Content.(string)
  410. case []any:
  411. var contentStr string
  412. for _, contentItem := range m.Content.([]any) {
  413. contentMap, ok := contentItem.(map[string]any)
  414. if !ok {
  415. continue
  416. }
  417. if contentMap["type"] == ContentTypeText {
  418. if subStr, ok := contentMap["text"].(string); ok {
  419. contentStr += subStr
  420. }
  421. }
  422. }
  423. return contentStr
  424. }
  425. return ""
  426. }
  427. func (m *Message) SetNullContent() {
  428. m.Content = nil
  429. m.parsedContent = nil
  430. }
  431. func (m *Message) SetStringContent(content string) {
  432. m.Content = content
  433. m.parsedContent = nil
  434. }
  435. func (m *Message) SetMediaContent(content []MediaContent) {
  436. m.Content = content
  437. m.parsedContent = content
  438. }
  439. func (m *Message) IsStringContent() bool {
  440. _, ok := m.Content.(string)
  441. if ok {
  442. return true
  443. }
  444. return false
  445. }
  446. func (m *Message) ParseContent() []MediaContent {
  447. if m.Content == nil {
  448. return nil
  449. }
  450. if len(m.parsedContent) > 0 {
  451. return m.parsedContent
  452. }
  453. var contentList []MediaContent
  454. // 先尝试解析为字符串
  455. content, ok := m.Content.(string)
  456. if ok {
  457. contentList = []MediaContent{{
  458. Type: ContentTypeText,
  459. Text: content,
  460. }}
  461. m.parsedContent = contentList
  462. return contentList
  463. }
  464. // 尝试解析为数组
  465. //var arrayContent []map[string]interface{}
  466. arrayContent, ok := m.Content.([]any)
  467. if !ok {
  468. return contentList
  469. }
  470. for _, contentItemAny := range arrayContent {
  471. mediaItem, ok := contentItemAny.(MediaContent)
  472. if ok {
  473. contentList = append(contentList, mediaItem)
  474. continue
  475. }
  476. contentItem, ok := contentItemAny.(map[string]any)
  477. if !ok {
  478. continue
  479. }
  480. contentType, ok := contentItem["type"].(string)
  481. if !ok {
  482. continue
  483. }
  484. switch contentType {
  485. case ContentTypeText:
  486. if text, ok := contentItem["text"].(string); ok {
  487. contentList = append(contentList, MediaContent{
  488. Type: ContentTypeText,
  489. Text: text,
  490. })
  491. }
  492. case ContentTypeImageURL:
  493. imageUrl := contentItem["image_url"]
  494. temp := &MessageImageUrl{
  495. Detail: "high",
  496. }
  497. switch v := imageUrl.(type) {
  498. case string:
  499. temp.Url = v
  500. case map[string]interface{}:
  501. url, ok1 := v["url"].(string)
  502. detail, ok2 := v["detail"].(string)
  503. if ok2 {
  504. temp.Detail = detail
  505. }
  506. if ok1 {
  507. temp.Url = url
  508. }
  509. }
  510. contentList = append(contentList, MediaContent{
  511. Type: ContentTypeImageURL,
  512. ImageUrl: temp,
  513. })
  514. case ContentTypeInputAudio:
  515. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  516. data, ok1 := audioData["data"].(string)
  517. format, ok2 := audioData["format"].(string)
  518. if ok1 && ok2 {
  519. temp := &MessageInputAudio{
  520. Data: data,
  521. Format: format,
  522. }
  523. contentList = append(contentList, MediaContent{
  524. Type: ContentTypeInputAudio,
  525. InputAudio: temp,
  526. })
  527. }
  528. }
  529. case ContentTypeFile:
  530. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  531. fileId, ok3 := fileData["file_id"].(string)
  532. if ok3 {
  533. contentList = append(contentList, MediaContent{
  534. Type: ContentTypeFile,
  535. File: &MessageFile{
  536. FileId: fileId,
  537. },
  538. })
  539. } else {
  540. fileName, ok1 := fileData["filename"].(string)
  541. fileDataStr, ok2 := fileData["file_data"].(string)
  542. if ok1 && ok2 {
  543. contentList = append(contentList, MediaContent{
  544. Type: ContentTypeFile,
  545. File: &MessageFile{
  546. FileName: fileName,
  547. FileData: fileDataStr,
  548. },
  549. })
  550. }
  551. }
  552. }
  553. case ContentTypeVideoUrl:
  554. if videoUrl, ok := contentItem["video_url"].(string); ok {
  555. contentList = append(contentList, MediaContent{
  556. Type: ContentTypeVideoUrl,
  557. VideoUrl: &MessageVideoUrl{
  558. Url: videoUrl,
  559. },
  560. })
  561. }
  562. }
  563. }
  564. if len(contentList) > 0 {
  565. m.parsedContent = contentList
  566. }
  567. return contentList
  568. }
  569. // old code
  570. /*func (m *Message) StringContent() string {
  571. if m.parsedStringContent != nil {
  572. return *m.parsedStringContent
  573. }
  574. var stringContent string
  575. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  576. m.parsedStringContent = &stringContent
  577. return stringContent
  578. }
  579. contentStr := new(strings.Builder)
  580. arrayContent := m.ParseContent()
  581. for _, content := range arrayContent {
  582. if content.Type == ContentTypeText {
  583. contentStr.WriteString(content.Text)
  584. }
  585. }
  586. stringContent = contentStr.String()
  587. m.parsedStringContent = &stringContent
  588. return stringContent
  589. }
  590. func (m *Message) SetNullContent() {
  591. m.Content = nil
  592. m.parsedStringContent = nil
  593. m.parsedContent = nil
  594. }
  595. func (m *Message) SetStringContent(content string) {
  596. jsonContent, _ := json.Marshal(content)
  597. m.Content = jsonContent
  598. m.parsedStringContent = &content
  599. m.parsedContent = nil
  600. }
  601. func (m *Message) SetMediaContent(content []MediaContent) {
  602. jsonContent, _ := json.Marshal(content)
  603. m.Content = jsonContent
  604. m.parsedContent = nil
  605. m.parsedStringContent = nil
  606. }
  607. func (m *Message) IsStringContent() bool {
  608. if m.parsedStringContent != nil {
  609. return true
  610. }
  611. var stringContent string
  612. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  613. m.parsedStringContent = &stringContent
  614. return true
  615. }
  616. return false
  617. }
  618. func (m *Message) ParseContent() []MediaContent {
  619. if m.parsedContent != nil {
  620. return m.parsedContent
  621. }
  622. var contentList []MediaContent
  623. // 先尝试解析为字符串
  624. var stringContent string
  625. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  626. contentList = []MediaContent{{
  627. Type: ContentTypeText,
  628. Text: stringContent,
  629. }}
  630. m.parsedContent = contentList
  631. return contentList
  632. }
  633. // 尝试解析为数组
  634. var arrayContent []map[string]interface{}
  635. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  636. for _, contentItem := range arrayContent {
  637. contentType, ok := contentItem["type"].(string)
  638. if !ok {
  639. continue
  640. }
  641. switch contentType {
  642. case ContentTypeText:
  643. if text, ok := contentItem["text"].(string); ok {
  644. contentList = append(contentList, MediaContent{
  645. Type: ContentTypeText,
  646. Text: text,
  647. })
  648. }
  649. case ContentTypeImageURL:
  650. imageUrl := contentItem["image_url"]
  651. temp := &MessageImageUrl{
  652. Detail: "high",
  653. }
  654. switch v := imageUrl.(type) {
  655. case string:
  656. temp.Url = v
  657. case map[string]interface{}:
  658. url, ok1 := v["url"].(string)
  659. detail, ok2 := v["detail"].(string)
  660. if ok2 {
  661. temp.Detail = detail
  662. }
  663. if ok1 {
  664. temp.Url = url
  665. }
  666. }
  667. contentList = append(contentList, MediaContent{
  668. Type: ContentTypeImageURL,
  669. ImageUrl: temp,
  670. })
  671. case ContentTypeInputAudio:
  672. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  673. data, ok1 := audioData["data"].(string)
  674. format, ok2 := audioData["format"].(string)
  675. if ok1 && ok2 {
  676. temp := &MessageInputAudio{
  677. Data: data,
  678. Format: format,
  679. }
  680. contentList = append(contentList, MediaContent{
  681. Type: ContentTypeInputAudio,
  682. InputAudio: temp,
  683. })
  684. }
  685. }
  686. case ContentTypeFile:
  687. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  688. fileId, ok3 := fileData["file_id"].(string)
  689. if ok3 {
  690. contentList = append(contentList, MediaContent{
  691. Type: ContentTypeFile,
  692. File: &MessageFile{
  693. FileId: fileId,
  694. },
  695. })
  696. } else {
  697. fileName, ok1 := fileData["filename"].(string)
  698. fileDataStr, ok2 := fileData["file_data"].(string)
  699. if ok1 && ok2 {
  700. contentList = append(contentList, MediaContent{
  701. Type: ContentTypeFile,
  702. File: &MessageFile{
  703. FileName: fileName,
  704. FileData: fileDataStr,
  705. },
  706. })
  707. }
  708. }
  709. }
  710. case ContentTypeVideoUrl:
  711. if videoUrl, ok := contentItem["video_url"].(string); ok {
  712. contentList = append(contentList, MediaContent{
  713. Type: ContentTypeVideoUrl,
  714. VideoUrl: &MessageVideoUrl{
  715. Url: videoUrl,
  716. },
  717. })
  718. }
  719. }
  720. }
  721. }
  722. if len(contentList) > 0 {
  723. m.parsedContent = contentList
  724. }
  725. return contentList
  726. }*/
  727. type WebSearchOptions struct {
  728. SearchContextSize string `json:"search_context_size,omitempty"`
  729. UserLocation json.RawMessage `json:"user_location,omitempty"`
  730. }
  731. // https://platform.openai.com/docs/api-reference/responses/create
  732. type OpenAIResponsesRequest struct {
  733. Model string `json:"model"`
  734. Input json.RawMessage `json:"input,omitempty"`
  735. Include json.RawMessage `json:"include,omitempty"`
  736. // 在后台运行推理,暂时还不支持依赖的接口
  737. // Background json.RawMessage `json:"background,omitempty"`
  738. Conversation json.RawMessage `json:"conversation,omitempty"`
  739. ContextManagement json.RawMessage `json:"context_management,omitempty"`
  740. Instructions json.RawMessage `json:"instructions,omitempty"`
  741. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  742. TopLogProbs *int `json:"top_logprobs,omitempty"`
  743. Metadata json.RawMessage `json:"metadata,omitempty"`
  744. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  745. PreviousResponseID string `json:"previous_response_id,omitempty"`
  746. Reasoning *Reasoning `json:"reasoning,omitempty"`
  747. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  748. ServiceTier string `json:"service_tier,omitempty"`
  749. Store json.RawMessage `json:"store,omitempty"`
  750. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  751. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  752. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  753. Stream bool `json:"stream,omitempty"`
  754. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  755. Temperature *float64 `json:"temperature,omitempty"`
  756. Text json.RawMessage `json:"text,omitempty"`
  757. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  758. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  759. TopP *float64 `json:"top_p,omitempty"`
  760. Truncation string `json:"truncation,omitempty"`
  761. User string `json:"user,omitempty"`
  762. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  763. Prompt json.RawMessage `json:"prompt,omitempty"`
  764. // qwen
  765. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  766. // perplexity
  767. Preset json.RawMessage `json:"preset,omitempty"`
  768. }
  769. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  770. var fileMeta = make([]*types.FileMeta, 0)
  771. var texts = make([]string, 0)
  772. if r.Input != nil {
  773. inputs := r.ParseInput()
  774. for _, input := range inputs {
  775. if input.Type == "input_image" {
  776. if input.ImageUrl != "" {
  777. fileMeta = append(fileMeta, &types.FileMeta{
  778. FileType: types.FileTypeImage,
  779. Source: createFileSource(input.ImageUrl),
  780. Detail: input.Detail,
  781. })
  782. }
  783. } else if input.Type == "input_file" {
  784. if input.FileUrl != "" {
  785. fileMeta = append(fileMeta, &types.FileMeta{
  786. FileType: types.FileTypeFile,
  787. Source: createFileSource(input.FileUrl),
  788. })
  789. }
  790. } else {
  791. texts = append(texts, input.Text)
  792. }
  793. }
  794. }
  795. if len(r.Instructions) > 0 {
  796. texts = append(texts, string(r.Instructions))
  797. }
  798. if len(r.Metadata) > 0 {
  799. texts = append(texts, string(r.Metadata))
  800. }
  801. if len(r.Text) > 0 {
  802. texts = append(texts, string(r.Text))
  803. }
  804. if len(r.ToolChoice) > 0 {
  805. texts = append(texts, string(r.ToolChoice))
  806. }
  807. if len(r.Prompt) > 0 {
  808. texts = append(texts, string(r.Prompt))
  809. }
  810. if len(r.Tools) > 0 {
  811. texts = append(texts, string(r.Tools))
  812. }
  813. return &types.TokenCountMeta{
  814. CombineText: strings.Join(texts, "\n"),
  815. Files: fileMeta,
  816. MaxTokens: int(r.MaxOutputTokens),
  817. }
  818. }
  819. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  820. return r.Stream
  821. }
  822. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  823. if modelName != "" {
  824. r.Model = modelName
  825. }
  826. }
  827. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  828. var toolsMap []map[string]any
  829. if len(r.Tools) > 0 {
  830. _ = common.Unmarshal(r.Tools, &toolsMap)
  831. }
  832. return toolsMap
  833. }
  834. type Reasoning struct {
  835. Effort string `json:"effort,omitempty"`
  836. Summary string `json:"summary,omitempty"`
  837. }
  838. type Input struct {
  839. Type string `json:"type,omitempty"`
  840. Role string `json:"role,omitempty"`
  841. Content json.RawMessage `json:"content,omitempty"`
  842. }
  843. type MediaInput struct {
  844. Type string `json:"type"`
  845. Text string `json:"text,omitempty"`
  846. FileUrl string `json:"file_url,omitempty"`
  847. ImageUrl string `json:"image_url,omitempty"`
  848. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  849. }
  850. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  851. // Reference implementation mirrors Message.ParseContent:
  852. // - input can be a string, treated as an input_text item
  853. // - input can be an array of objects with a `type` field
  854. // supported types: input_text, input_image, input_file
  855. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  856. if r.Input == nil {
  857. return nil
  858. }
  859. var mediaInputs []MediaInput
  860. // Try string first
  861. // if str, ok := common.GetJsonType(r.Input); ok {
  862. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  863. // return inputs
  864. // }
  865. if common.GetJsonType(r.Input) == "string" {
  866. var str string
  867. _ = common.Unmarshal(r.Input, &str)
  868. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  869. return mediaInputs
  870. }
  871. // Try array of parts
  872. if common.GetJsonType(r.Input) == "array" {
  873. var inputs []Input
  874. _ = common.Unmarshal(r.Input, &inputs)
  875. for _, input := range inputs {
  876. if common.GetJsonType(input.Content) == "string" {
  877. var str string
  878. _ = common.Unmarshal(input.Content, &str)
  879. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  880. }
  881. if common.GetJsonType(input.Content) == "array" {
  882. var array []any
  883. _ = common.Unmarshal(input.Content, &array)
  884. for _, itemAny := range array {
  885. // Already parsed MediaContent
  886. if media, ok := itemAny.(MediaInput); ok {
  887. mediaInputs = append(mediaInputs, media)
  888. continue
  889. }
  890. // Generic map
  891. item, ok := itemAny.(map[string]any)
  892. if !ok {
  893. continue
  894. }
  895. typeVal, ok := item["type"].(string)
  896. if !ok {
  897. continue
  898. }
  899. switch typeVal {
  900. case "input_text":
  901. text, _ := item["text"].(string)
  902. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  903. case "input_image":
  904. // image_url may be string or object with url field
  905. var imageUrl string
  906. switch v := item["image_url"].(type) {
  907. case string:
  908. imageUrl = v
  909. case map[string]any:
  910. if url, ok := v["url"].(string); ok {
  911. imageUrl = url
  912. }
  913. }
  914. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  915. case "input_file":
  916. // file_url may be string or object with url field
  917. var fileUrl string
  918. switch v := item["file_url"].(type) {
  919. case string:
  920. fileUrl = v
  921. case map[string]any:
  922. if url, ok := v["url"].(string); ok {
  923. fileUrl = url
  924. }
  925. }
  926. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  927. }
  928. }
  929. }
  930. }
  931. }
  932. return mediaInputs
  933. }