openai_request.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "one-api/common"
  6. "one-api/types"
  7. "strings"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. EnableThinking any `json:"enable_thinking,omitempty"` // ali
  56. THINKING json.RawMessage `json:"thinking,omitempty"` // doubao,zhipu_v4
  57. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  58. SearchParameters any `json:"search_parameters,omitempty"` //xai
  59. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  60. // OpenRouter Params
  61. Usage json.RawMessage `json:"usage,omitempty"`
  62. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  63. // Ali Qwen Params
  64. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  65. // 用匿名参数接收额外参数,例如ollama的think参数在此接收
  66. Extra map[string]json.RawMessage `json:"-"`
  67. }
  68. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  69. var tokenCountMeta types.TokenCountMeta
  70. var texts = make([]string, 0)
  71. var fileMeta = make([]*types.FileMeta, 0)
  72. if r.Prompt != nil {
  73. switch v := r.Prompt.(type) {
  74. case string:
  75. texts = append(texts, v)
  76. case []any:
  77. for _, item := range v {
  78. if str, ok := item.(string); ok {
  79. texts = append(texts, str)
  80. }
  81. }
  82. default:
  83. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  84. }
  85. }
  86. if r.Input != nil {
  87. inputs := r.ParseInput()
  88. texts = append(texts, inputs...)
  89. }
  90. if r.MaxCompletionTokens > r.MaxTokens {
  91. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  92. } else {
  93. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  94. }
  95. for _, message := range r.Messages {
  96. tokenCountMeta.MessagesCount++
  97. texts = append(texts, message.Role)
  98. if message.Content != nil {
  99. if message.Name != nil {
  100. tokenCountMeta.NameCount++
  101. texts = append(texts, *message.Name)
  102. }
  103. arrayContent := message.ParseContent()
  104. for _, m := range arrayContent {
  105. if m.Type == ContentTypeImageURL {
  106. imageUrl := m.GetImageMedia()
  107. if imageUrl != nil {
  108. meta := &types.FileMeta{
  109. FileType: types.FileTypeImage,
  110. }
  111. meta.OriginData = imageUrl.Url
  112. meta.Detail = imageUrl.Detail
  113. fileMeta = append(fileMeta, meta)
  114. }
  115. } else if m.Type == ContentTypeInputAudio {
  116. inputAudio := m.GetInputAudio()
  117. if inputAudio != nil {
  118. meta := &types.FileMeta{
  119. FileType: types.FileTypeAudio,
  120. }
  121. meta.OriginData = inputAudio.Data
  122. fileMeta = append(fileMeta, meta)
  123. }
  124. } else if m.Type == ContentTypeFile {
  125. file := m.GetFile()
  126. if file != nil {
  127. meta := &types.FileMeta{
  128. FileType: types.FileTypeFile,
  129. }
  130. meta.OriginData = file.FileData
  131. fileMeta = append(fileMeta, meta)
  132. }
  133. } else if m.Type == ContentTypeVideoUrl {
  134. videoUrl := m.GetVideoUrl()
  135. if videoUrl != nil {
  136. meta := &types.FileMeta{
  137. FileType: types.FileTypeVideo,
  138. }
  139. meta.OriginData = videoUrl.Url
  140. fileMeta = append(fileMeta, meta)
  141. }
  142. } else {
  143. texts = append(texts, m.Text)
  144. }
  145. }
  146. }
  147. }
  148. if r.Tools != nil {
  149. openaiTools := r.Tools
  150. for _, tool := range openaiTools {
  151. tokenCountMeta.ToolsCount++
  152. texts = append(texts, tool.Function.Name)
  153. if tool.Function.Description != "" {
  154. texts = append(texts, tool.Function.Description)
  155. }
  156. if tool.Function.Parameters != nil {
  157. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  158. }
  159. }
  160. //toolTokens := CountTokenInput(countStr, request.Model)
  161. //tkm += 8
  162. //tkm += toolTokens
  163. }
  164. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  165. tokenCountMeta.Files = fileMeta
  166. return &tokenCountMeta
  167. }
  168. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  169. return r.Stream
  170. }
  171. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  172. result := make(map[string]any)
  173. data, _ := common.Marshal(r)
  174. _ = common.Unmarshal(data, &result)
  175. return result
  176. }
  177. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  178. if strings.HasPrefix(r.Model, "o") {
  179. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  180. return "developer"
  181. }
  182. } else if strings.HasPrefix(r.Model, "gpt-5") {
  183. return "developer"
  184. }
  185. return "system"
  186. }
  187. type ToolCallRequest struct {
  188. ID string `json:"id,omitempty"`
  189. Type string `json:"type"`
  190. Function FunctionRequest `json:"function"`
  191. }
  192. type FunctionRequest struct {
  193. Description string `json:"description,omitempty"`
  194. Name string `json:"name"`
  195. Parameters any `json:"parameters,omitempty"`
  196. Arguments string `json:"arguments,omitempty"`
  197. }
  198. type StreamOptions struct {
  199. IncludeUsage bool `json:"include_usage,omitempty"`
  200. }
  201. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  202. if r.MaxCompletionTokens != 0 {
  203. return r.MaxCompletionTokens
  204. }
  205. return r.MaxTokens
  206. }
  207. func (r *GeneralOpenAIRequest) ParseInput() []string {
  208. if r.Input == nil {
  209. return nil
  210. }
  211. var input []string
  212. switch r.Input.(type) {
  213. case string:
  214. input = []string{r.Input.(string)}
  215. case []any:
  216. input = make([]string, 0, len(r.Input.([]any)))
  217. for _, item := range r.Input.([]any) {
  218. if str, ok := item.(string); ok {
  219. input = append(input, str)
  220. }
  221. }
  222. }
  223. return input
  224. }
  225. type Message struct {
  226. Role string `json:"role"`
  227. Content any `json:"content"`
  228. Name *string `json:"name,omitempty"`
  229. Prefix *bool `json:"prefix,omitempty"`
  230. ReasoningContent string `json:"reasoning_content,omitempty"`
  231. Reasoning string `json:"reasoning,omitempty"`
  232. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  233. ToolCallId string `json:"tool_call_id,omitempty"`
  234. parsedContent []MediaContent
  235. //parsedStringContent *string
  236. }
  237. type MediaContent struct {
  238. Type string `json:"type"`
  239. Text string `json:"text,omitempty"`
  240. ImageUrl any `json:"image_url,omitempty"`
  241. InputAudio any `json:"input_audio,omitempty"`
  242. File any `json:"file,omitempty"`
  243. VideoUrl any `json:"video_url,omitempty"`
  244. // OpenRouter Params
  245. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  246. }
  247. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  248. if m.ImageUrl != nil {
  249. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  250. return m.ImageUrl.(*MessageImageUrl)
  251. }
  252. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  253. out := &MessageImageUrl{
  254. Url: common.Interface2String(itemMap["url"]),
  255. Detail: common.Interface2String(itemMap["detail"]),
  256. MimeType: common.Interface2String(itemMap["mime_type"]),
  257. }
  258. return out
  259. }
  260. }
  261. return nil
  262. }
  263. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  264. if m.InputAudio != nil {
  265. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  266. return m.InputAudio.(*MessageInputAudio)
  267. }
  268. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  269. out := &MessageInputAudio{
  270. Data: common.Interface2String(itemMap["data"]),
  271. Format: common.Interface2String(itemMap["format"]),
  272. }
  273. return out
  274. }
  275. }
  276. return nil
  277. }
  278. func (m *MediaContent) GetFile() *MessageFile {
  279. if m.File != nil {
  280. if _, ok := m.File.(*MessageFile); ok {
  281. return m.File.(*MessageFile)
  282. }
  283. if itemMap, ok := m.File.(map[string]any); ok {
  284. out := &MessageFile{
  285. FileName: common.Interface2String(itemMap["file_name"]),
  286. FileData: common.Interface2String(itemMap["file_data"]),
  287. FileId: common.Interface2String(itemMap["file_id"]),
  288. }
  289. return out
  290. }
  291. }
  292. return nil
  293. }
  294. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  295. if m.VideoUrl != nil {
  296. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  297. return m.VideoUrl.(*MessageVideoUrl)
  298. }
  299. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  300. out := &MessageVideoUrl{
  301. Url: common.Interface2String(itemMap["url"]),
  302. }
  303. return out
  304. }
  305. }
  306. return nil
  307. }
  308. type MessageImageUrl struct {
  309. Url string `json:"url"`
  310. Detail string `json:"detail"`
  311. MimeType string
  312. }
  313. func (m *MessageImageUrl) IsRemoteImage() bool {
  314. return strings.HasPrefix(m.Url, "http")
  315. }
  316. type MessageInputAudio struct {
  317. Data string `json:"data"` //base64
  318. Format string `json:"format"`
  319. }
  320. type MessageFile struct {
  321. FileName string `json:"filename,omitempty"`
  322. FileData string `json:"file_data,omitempty"`
  323. FileId string `json:"file_id,omitempty"`
  324. }
  325. type MessageVideoUrl struct {
  326. Url string `json:"url"`
  327. }
  328. const (
  329. ContentTypeText = "text"
  330. ContentTypeImageURL = "image_url"
  331. ContentTypeInputAudio = "input_audio"
  332. ContentTypeFile = "file"
  333. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  334. //ContentTypeAudioUrl = "audio_url"
  335. )
  336. func (m *Message) GetPrefix() bool {
  337. if m.Prefix == nil {
  338. return false
  339. }
  340. return *m.Prefix
  341. }
  342. func (m *Message) SetPrefix(prefix bool) {
  343. m.Prefix = &prefix
  344. }
  345. func (m *Message) ParseToolCalls() []ToolCallRequest {
  346. if m.ToolCalls == nil {
  347. return nil
  348. }
  349. var toolCalls []ToolCallRequest
  350. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  351. return toolCalls
  352. }
  353. return toolCalls
  354. }
  355. func (m *Message) SetToolCalls(toolCalls any) {
  356. toolCallsJson, _ := json.Marshal(toolCalls)
  357. m.ToolCalls = toolCallsJson
  358. }
  359. func (m *Message) StringContent() string {
  360. switch m.Content.(type) {
  361. case string:
  362. return m.Content.(string)
  363. case []any:
  364. var contentStr string
  365. for _, contentItem := range m.Content.([]any) {
  366. contentMap, ok := contentItem.(map[string]any)
  367. if !ok {
  368. continue
  369. }
  370. if contentMap["type"] == ContentTypeText {
  371. if subStr, ok := contentMap["text"].(string); ok {
  372. contentStr += subStr
  373. }
  374. }
  375. }
  376. return contentStr
  377. }
  378. return ""
  379. }
  380. func (m *Message) SetNullContent() {
  381. m.Content = nil
  382. m.parsedContent = nil
  383. }
  384. func (m *Message) SetStringContent(content string) {
  385. m.Content = content
  386. m.parsedContent = nil
  387. }
  388. func (m *Message) SetMediaContent(content []MediaContent) {
  389. m.Content = content
  390. m.parsedContent = content
  391. }
  392. func (m *Message) IsStringContent() bool {
  393. _, ok := m.Content.(string)
  394. if ok {
  395. return true
  396. }
  397. return false
  398. }
  399. func (m *Message) ParseContent() []MediaContent {
  400. if m.Content == nil {
  401. return nil
  402. }
  403. if len(m.parsedContent) > 0 {
  404. return m.parsedContent
  405. }
  406. var contentList []MediaContent
  407. // 先尝试解析为字符串
  408. content, ok := m.Content.(string)
  409. if ok {
  410. contentList = []MediaContent{{
  411. Type: ContentTypeText,
  412. Text: content,
  413. }}
  414. m.parsedContent = contentList
  415. return contentList
  416. }
  417. // 尝试解析为数组
  418. //var arrayContent []map[string]interface{}
  419. arrayContent, ok := m.Content.([]any)
  420. if !ok {
  421. return contentList
  422. }
  423. for _, contentItemAny := range arrayContent {
  424. mediaItem, ok := contentItemAny.(MediaContent)
  425. if ok {
  426. contentList = append(contentList, mediaItem)
  427. continue
  428. }
  429. contentItem, ok := contentItemAny.(map[string]any)
  430. if !ok {
  431. continue
  432. }
  433. contentType, ok := contentItem["type"].(string)
  434. if !ok {
  435. continue
  436. }
  437. switch contentType {
  438. case ContentTypeText:
  439. if text, ok := contentItem["text"].(string); ok {
  440. contentList = append(contentList, MediaContent{
  441. Type: ContentTypeText,
  442. Text: text,
  443. })
  444. }
  445. case ContentTypeImageURL:
  446. imageUrl := contentItem["image_url"]
  447. temp := &MessageImageUrl{
  448. Detail: "high",
  449. }
  450. switch v := imageUrl.(type) {
  451. case string:
  452. temp.Url = v
  453. case map[string]interface{}:
  454. url, ok1 := v["url"].(string)
  455. detail, ok2 := v["detail"].(string)
  456. if ok2 {
  457. temp.Detail = detail
  458. }
  459. if ok1 {
  460. temp.Url = url
  461. }
  462. }
  463. contentList = append(contentList, MediaContent{
  464. Type: ContentTypeImageURL,
  465. ImageUrl: temp,
  466. })
  467. case ContentTypeInputAudio:
  468. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  469. data, ok1 := audioData["data"].(string)
  470. format, ok2 := audioData["format"].(string)
  471. if ok1 && ok2 {
  472. temp := &MessageInputAudio{
  473. Data: data,
  474. Format: format,
  475. }
  476. contentList = append(contentList, MediaContent{
  477. Type: ContentTypeInputAudio,
  478. InputAudio: temp,
  479. })
  480. }
  481. }
  482. case ContentTypeFile:
  483. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  484. fileId, ok3 := fileData["file_id"].(string)
  485. if ok3 {
  486. contentList = append(contentList, MediaContent{
  487. Type: ContentTypeFile,
  488. File: &MessageFile{
  489. FileId: fileId,
  490. },
  491. })
  492. } else {
  493. fileName, ok1 := fileData["filename"].(string)
  494. fileDataStr, ok2 := fileData["file_data"].(string)
  495. if ok1 && ok2 {
  496. contentList = append(contentList, MediaContent{
  497. Type: ContentTypeFile,
  498. File: &MessageFile{
  499. FileName: fileName,
  500. FileData: fileDataStr,
  501. },
  502. })
  503. }
  504. }
  505. }
  506. case ContentTypeVideoUrl:
  507. if videoUrl, ok := contentItem["video_url"].(string); ok {
  508. contentList = append(contentList, MediaContent{
  509. Type: ContentTypeVideoUrl,
  510. VideoUrl: &MessageVideoUrl{
  511. Url: videoUrl,
  512. },
  513. })
  514. }
  515. }
  516. }
  517. if len(contentList) > 0 {
  518. m.parsedContent = contentList
  519. }
  520. return contentList
  521. }
  522. // old code
  523. /*func (m *Message) StringContent() string {
  524. if m.parsedStringContent != nil {
  525. return *m.parsedStringContent
  526. }
  527. var stringContent string
  528. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  529. m.parsedStringContent = &stringContent
  530. return stringContent
  531. }
  532. contentStr := new(strings.Builder)
  533. arrayContent := m.ParseContent()
  534. for _, content := range arrayContent {
  535. if content.Type == ContentTypeText {
  536. contentStr.WriteString(content.Text)
  537. }
  538. }
  539. stringContent = contentStr.String()
  540. m.parsedStringContent = &stringContent
  541. return stringContent
  542. }
  543. func (m *Message) SetNullContent() {
  544. m.Content = nil
  545. m.parsedStringContent = nil
  546. m.parsedContent = nil
  547. }
  548. func (m *Message) SetStringContent(content string) {
  549. jsonContent, _ := json.Marshal(content)
  550. m.Content = jsonContent
  551. m.parsedStringContent = &content
  552. m.parsedContent = nil
  553. }
  554. func (m *Message) SetMediaContent(content []MediaContent) {
  555. jsonContent, _ := json.Marshal(content)
  556. m.Content = jsonContent
  557. m.parsedContent = nil
  558. m.parsedStringContent = nil
  559. }
  560. func (m *Message) IsStringContent() bool {
  561. if m.parsedStringContent != nil {
  562. return true
  563. }
  564. var stringContent string
  565. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  566. m.parsedStringContent = &stringContent
  567. return true
  568. }
  569. return false
  570. }
  571. func (m *Message) ParseContent() []MediaContent {
  572. if m.parsedContent != nil {
  573. return m.parsedContent
  574. }
  575. var contentList []MediaContent
  576. // 先尝试解析为字符串
  577. var stringContent string
  578. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  579. contentList = []MediaContent{{
  580. Type: ContentTypeText,
  581. Text: stringContent,
  582. }}
  583. m.parsedContent = contentList
  584. return contentList
  585. }
  586. // 尝试解析为数组
  587. var arrayContent []map[string]interface{}
  588. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  589. for _, contentItem := range arrayContent {
  590. contentType, ok := contentItem["type"].(string)
  591. if !ok {
  592. continue
  593. }
  594. switch contentType {
  595. case ContentTypeText:
  596. if text, ok := contentItem["text"].(string); ok {
  597. contentList = append(contentList, MediaContent{
  598. Type: ContentTypeText,
  599. Text: text,
  600. })
  601. }
  602. case ContentTypeImageURL:
  603. imageUrl := contentItem["image_url"]
  604. temp := &MessageImageUrl{
  605. Detail: "high",
  606. }
  607. switch v := imageUrl.(type) {
  608. case string:
  609. temp.Url = v
  610. case map[string]interface{}:
  611. url, ok1 := v["url"].(string)
  612. detail, ok2 := v["detail"].(string)
  613. if ok2 {
  614. temp.Detail = detail
  615. }
  616. if ok1 {
  617. temp.Url = url
  618. }
  619. }
  620. contentList = append(contentList, MediaContent{
  621. Type: ContentTypeImageURL,
  622. ImageUrl: temp,
  623. })
  624. case ContentTypeInputAudio:
  625. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  626. data, ok1 := audioData["data"].(string)
  627. format, ok2 := audioData["format"].(string)
  628. if ok1 && ok2 {
  629. temp := &MessageInputAudio{
  630. Data: data,
  631. Format: format,
  632. }
  633. contentList = append(contentList, MediaContent{
  634. Type: ContentTypeInputAudio,
  635. InputAudio: temp,
  636. })
  637. }
  638. }
  639. case ContentTypeFile:
  640. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  641. fileId, ok3 := fileData["file_id"].(string)
  642. if ok3 {
  643. contentList = append(contentList, MediaContent{
  644. Type: ContentTypeFile,
  645. File: &MessageFile{
  646. FileId: fileId,
  647. },
  648. })
  649. } else {
  650. fileName, ok1 := fileData["filename"].(string)
  651. fileDataStr, ok2 := fileData["file_data"].(string)
  652. if ok1 && ok2 {
  653. contentList = append(contentList, MediaContent{
  654. Type: ContentTypeFile,
  655. File: &MessageFile{
  656. FileName: fileName,
  657. FileData: fileDataStr,
  658. },
  659. })
  660. }
  661. }
  662. }
  663. case ContentTypeVideoUrl:
  664. if videoUrl, ok := contentItem["video_url"].(string); ok {
  665. contentList = append(contentList, MediaContent{
  666. Type: ContentTypeVideoUrl,
  667. VideoUrl: &MessageVideoUrl{
  668. Url: videoUrl,
  669. },
  670. })
  671. }
  672. }
  673. }
  674. }
  675. if len(contentList) > 0 {
  676. m.parsedContent = contentList
  677. }
  678. return contentList
  679. }*/
  680. type WebSearchOptions struct {
  681. SearchContextSize string `json:"search_context_size,omitempty"`
  682. UserLocation json.RawMessage `json:"user_location,omitempty"`
  683. }
  684. // https://platform.openai.com/docs/api-reference/responses/create
  685. type OpenAIResponsesRequest struct {
  686. Model string `json:"model"`
  687. Input any `json:"input,omitempty"`
  688. Include json.RawMessage `json:"include,omitempty"`
  689. Instructions json.RawMessage `json:"instructions,omitempty"`
  690. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  691. Metadata json.RawMessage `json:"metadata,omitempty"`
  692. ParallelToolCalls bool `json:"parallel_tool_calls,omitempty"`
  693. PreviousResponseID string `json:"previous_response_id,omitempty"`
  694. Reasoning *Reasoning `json:"reasoning,omitempty"`
  695. ServiceTier string `json:"service_tier,omitempty"`
  696. Store bool `json:"store,omitempty"`
  697. Stream bool `json:"stream,omitempty"`
  698. Temperature float64 `json:"temperature,omitempty"`
  699. Text json.RawMessage `json:"text,omitempty"`
  700. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  701. Tools []map[string]any `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  702. TopP float64 `json:"top_p,omitempty"`
  703. Truncation string `json:"truncation,omitempty"`
  704. User string `json:"user,omitempty"`
  705. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  706. Prompt json.RawMessage `json:"prompt,omitempty"`
  707. }
  708. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  709. var fileMeta = make([]*types.FileMeta, 0)
  710. var texts = make([]string, 0)
  711. if r.Input != nil {
  712. inputs := r.ParseInput()
  713. for _, input := range inputs {
  714. if input.Type == "input_image" {
  715. fileMeta = append(fileMeta, &types.FileMeta{
  716. FileType: types.FileTypeImage,
  717. OriginData: input.ImageUrl,
  718. Detail: input.Detail,
  719. })
  720. } else if input.Type == "input_file" {
  721. fileMeta = append(fileMeta, &types.FileMeta{
  722. FileType: types.FileTypeFile,
  723. OriginData: input.FileUrl,
  724. })
  725. } else {
  726. texts = append(texts, input.Text)
  727. }
  728. }
  729. }
  730. if len(r.Instructions) > 0 {
  731. texts = append(texts, string(r.Instructions))
  732. }
  733. if len(r.Metadata) > 0 {
  734. texts = append(texts, string(r.Metadata))
  735. }
  736. if len(r.Text) > 0 {
  737. texts = append(texts, string(r.Text))
  738. }
  739. if len(r.ToolChoice) > 0 {
  740. texts = append(texts, string(r.ToolChoice))
  741. }
  742. if len(r.Prompt) > 0 {
  743. texts = append(texts, string(r.Prompt))
  744. }
  745. if len(r.Tools) > 0 {
  746. toolStr, _ := common.Marshal(r.Tools)
  747. texts = append(texts, string(toolStr))
  748. }
  749. return &types.TokenCountMeta{
  750. CombineText: strings.Join(texts, "\n"),
  751. Files: fileMeta,
  752. MaxTokens: int(r.MaxOutputTokens),
  753. }
  754. }
  755. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  756. return r.Stream
  757. }
  758. type Reasoning struct {
  759. Effort string `json:"effort,omitempty"`
  760. Summary string `json:"summary,omitempty"`
  761. }
  762. type MediaInput struct {
  763. Type string `json:"type"`
  764. Text string `json:"text,omitempty"`
  765. FileUrl string `json:"file_url,omitempty"`
  766. ImageUrl string `json:"image_url,omitempty"`
  767. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  768. }
  769. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  770. // Reference implementation mirrors Message.ParseContent:
  771. // - input can be a string, treated as an input_text item
  772. // - input can be an array of objects with a `type` field
  773. // supported types: input_text, input_image, input_file
  774. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  775. if r.Input == nil {
  776. return nil
  777. }
  778. var inputs []MediaInput
  779. // Try string first
  780. if str, ok := r.Input.(string); ok {
  781. inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  782. return inputs
  783. }
  784. // Try array of parts
  785. if array, ok := r.Input.([]any); ok {
  786. for _, itemAny := range array {
  787. // Already parsed MediaInput
  788. if media, ok := itemAny.(MediaInput); ok {
  789. inputs = append(inputs, media)
  790. continue
  791. }
  792. // Generic map
  793. item, ok := itemAny.(map[string]any)
  794. if !ok {
  795. continue
  796. }
  797. typeVal, ok := item["type"].(string)
  798. if !ok {
  799. continue
  800. }
  801. switch typeVal {
  802. case "input_text":
  803. text, _ := item["text"].(string)
  804. inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
  805. case "input_image":
  806. // image_url may be string or object with url field
  807. var imageUrl string
  808. switch v := item["image_url"].(type) {
  809. case string:
  810. imageUrl = v
  811. case map[string]any:
  812. if url, ok := v["url"].(string); ok {
  813. imageUrl = url
  814. }
  815. }
  816. inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  817. case "input_file":
  818. // file_url may be string or object with url field
  819. var fileUrl string
  820. switch v := item["file_url"].(type) {
  821. case string:
  822. fileUrl = v
  823. case map[string]any:
  824. if url, ok := v["url"].(string); ok {
  825. fileUrl = url
  826. }
  827. }
  828. inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  829. }
  830. }
  831. }
  832. return inputs
  833. }