openai_request.go 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "one-api/common"
  6. "one-api/types"
  7. "strings"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. type GeneralOpenAIRequest struct {
  21. Model string `json:"model,omitempty"`
  22. Messages []Message `json:"messages,omitempty"`
  23. Prompt any `json:"prompt,omitempty"`
  24. Prefix any `json:"prefix,omitempty"`
  25. Suffix any `json:"suffix,omitempty"`
  26. Stream bool `json:"stream,omitempty"`
  27. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  28. MaxTokens uint `json:"max_tokens,omitempty"`
  29. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  30. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  31. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  32. Temperature *float64 `json:"temperature,omitempty"`
  33. TopP float64 `json:"top_p,omitempty"`
  34. TopK int `json:"top_k,omitempty"`
  35. Stop any `json:"stop,omitempty"`
  36. N int `json:"n,omitempty"`
  37. Input any `json:"input,omitempty"`
  38. Instruction string `json:"instruction,omitempty"`
  39. Size string `json:"size,omitempty"`
  40. Functions json.RawMessage `json:"functions,omitempty"`
  41. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  42. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  43. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  44. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  45. Seed float64 `json:"seed,omitempty"`
  46. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  47. Tools []ToolCallRequest `json:"tools,omitempty"`
  48. ToolChoice any `json:"tool_choice,omitempty"`
  49. User string `json:"user,omitempty"`
  50. LogProbs bool `json:"logprobs,omitempty"`
  51. TopLogProbs int `json:"top_logprobs,omitempty"`
  52. Dimensions int `json:"dimensions,omitempty"`
  53. Modalities json.RawMessage `json:"modalities,omitempty"`
  54. Audio json.RawMessage `json:"audio,omitempty"`
  55. EnableThinking any `json:"enable_thinking,omitempty"` // ali
  56. THINKING json.RawMessage `json:"thinking,omitempty"` // doubao,zhipu_v4
  57. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  58. SearchParameters any `json:"search_parameters,omitempty"` //xai
  59. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  60. // OpenRouter Params
  61. Usage json.RawMessage `json:"usage,omitempty"`
  62. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  63. // Ali Qwen Params
  64. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  65. // 用匿名参数接收额外参数,例如ollama的think参数在此接收
  66. Extra map[string]json.RawMessage `json:"-"`
  67. }
  68. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  69. var tokenCountMeta types.TokenCountMeta
  70. var texts = make([]string, 0)
  71. var fileMeta = make([]*types.FileMeta, 0)
  72. if r.Prompt != nil {
  73. switch v := r.Prompt.(type) {
  74. case string:
  75. texts = append(texts, v)
  76. case []any:
  77. for _, item := range v {
  78. if str, ok := item.(string); ok {
  79. texts = append(texts, str)
  80. }
  81. }
  82. default:
  83. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  84. }
  85. }
  86. if r.Input != nil {
  87. inputs := r.ParseInput()
  88. texts = append(texts, inputs...)
  89. }
  90. if r.MaxCompletionTokens > r.MaxTokens {
  91. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  92. } else {
  93. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  94. }
  95. for _, message := range r.Messages {
  96. tokenCountMeta.MessagesCount++
  97. texts = append(texts, message.Role)
  98. if message.Content != nil {
  99. if message.Name != nil {
  100. tokenCountMeta.NameCount++
  101. texts = append(texts, *message.Name)
  102. }
  103. arrayContent := message.ParseContent()
  104. for _, m := range arrayContent {
  105. if m.Type == ContentTypeImageURL {
  106. imageUrl := m.GetImageMedia()
  107. if imageUrl != nil {
  108. if imageUrl.Url != "" {
  109. meta := &types.FileMeta{
  110. FileType: types.FileTypeImage,
  111. }
  112. meta.OriginData = imageUrl.Url
  113. meta.Detail = imageUrl.Detail
  114. fileMeta = append(fileMeta, meta)
  115. }
  116. }
  117. } else if m.Type == ContentTypeInputAudio {
  118. inputAudio := m.GetInputAudio()
  119. if inputAudio != nil {
  120. meta := &types.FileMeta{
  121. FileType: types.FileTypeAudio,
  122. }
  123. meta.OriginData = inputAudio.Data
  124. fileMeta = append(fileMeta, meta)
  125. }
  126. } else if m.Type == ContentTypeFile {
  127. file := m.GetFile()
  128. if file != nil {
  129. meta := &types.FileMeta{
  130. FileType: types.FileTypeFile,
  131. }
  132. meta.OriginData = file.FileData
  133. fileMeta = append(fileMeta, meta)
  134. }
  135. } else if m.Type == ContentTypeVideoUrl {
  136. videoUrl := m.GetVideoUrl()
  137. if videoUrl != nil && videoUrl.Url != "" {
  138. meta := &types.FileMeta{
  139. FileType: types.FileTypeVideo,
  140. }
  141. meta.OriginData = videoUrl.Url
  142. fileMeta = append(fileMeta, meta)
  143. }
  144. } else {
  145. texts = append(texts, m.Text)
  146. }
  147. }
  148. }
  149. }
  150. if r.Tools != nil {
  151. openaiTools := r.Tools
  152. for _, tool := range openaiTools {
  153. tokenCountMeta.ToolsCount++
  154. texts = append(texts, tool.Function.Name)
  155. if tool.Function.Description != "" {
  156. texts = append(texts, tool.Function.Description)
  157. }
  158. if tool.Function.Parameters != nil {
  159. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  160. }
  161. }
  162. //toolTokens := CountTokenInput(countStr, request.Model)
  163. //tkm += 8
  164. //tkm += toolTokens
  165. }
  166. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  167. tokenCountMeta.Files = fileMeta
  168. return &tokenCountMeta
  169. }
  170. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  171. return r.Stream
  172. }
  173. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  174. if modelName != "" {
  175. r.Model = modelName
  176. }
  177. }
  178. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  179. result := make(map[string]any)
  180. data, _ := common.Marshal(r)
  181. _ = common.Unmarshal(data, &result)
  182. return result
  183. }
  184. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  185. if strings.HasPrefix(r.Model, "o") {
  186. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  187. return "developer"
  188. }
  189. } else if strings.HasPrefix(r.Model, "gpt-5") {
  190. return "developer"
  191. }
  192. return "system"
  193. }
  194. type ToolCallRequest struct {
  195. ID string `json:"id,omitempty"`
  196. Type string `json:"type"`
  197. Function FunctionRequest `json:"function"`
  198. }
  199. type FunctionRequest struct {
  200. Description string `json:"description,omitempty"`
  201. Name string `json:"name"`
  202. Parameters any `json:"parameters,omitempty"`
  203. Arguments string `json:"arguments,omitempty"`
  204. }
  205. type StreamOptions struct {
  206. IncludeUsage bool `json:"include_usage,omitempty"`
  207. }
  208. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  209. if r.MaxCompletionTokens != 0 {
  210. return r.MaxCompletionTokens
  211. }
  212. return r.MaxTokens
  213. }
  214. func (r *GeneralOpenAIRequest) ParseInput() []string {
  215. if r.Input == nil {
  216. return nil
  217. }
  218. var input []string
  219. switch r.Input.(type) {
  220. case string:
  221. input = []string{r.Input.(string)}
  222. case []any:
  223. input = make([]string, 0, len(r.Input.([]any)))
  224. for _, item := range r.Input.([]any) {
  225. if str, ok := item.(string); ok {
  226. input = append(input, str)
  227. }
  228. }
  229. }
  230. return input
  231. }
  232. type Message struct {
  233. Role string `json:"role"`
  234. Content any `json:"content"`
  235. Name *string `json:"name,omitempty"`
  236. Prefix *bool `json:"prefix,omitempty"`
  237. ReasoningContent string `json:"reasoning_content,omitempty"`
  238. Reasoning string `json:"reasoning,omitempty"`
  239. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  240. ToolCallId string `json:"tool_call_id,omitempty"`
  241. parsedContent []MediaContent
  242. //parsedStringContent *string
  243. }
  244. type MediaContent struct {
  245. Type string `json:"type"`
  246. Text string `json:"text,omitempty"`
  247. ImageUrl any `json:"image_url,omitempty"`
  248. InputAudio any `json:"input_audio,omitempty"`
  249. File any `json:"file,omitempty"`
  250. VideoUrl any `json:"video_url,omitempty"`
  251. // OpenRouter Params
  252. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  253. }
  254. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  255. if m.ImageUrl != nil {
  256. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  257. return m.ImageUrl.(*MessageImageUrl)
  258. }
  259. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  260. out := &MessageImageUrl{
  261. Url: common.Interface2String(itemMap["url"]),
  262. Detail: common.Interface2String(itemMap["detail"]),
  263. MimeType: common.Interface2String(itemMap["mime_type"]),
  264. }
  265. return out
  266. }
  267. }
  268. return nil
  269. }
  270. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  271. if m.InputAudio != nil {
  272. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  273. return m.InputAudio.(*MessageInputAudio)
  274. }
  275. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  276. out := &MessageInputAudio{
  277. Data: common.Interface2String(itemMap["data"]),
  278. Format: common.Interface2String(itemMap["format"]),
  279. }
  280. return out
  281. }
  282. }
  283. return nil
  284. }
  285. func (m *MediaContent) GetFile() *MessageFile {
  286. if m.File != nil {
  287. if _, ok := m.File.(*MessageFile); ok {
  288. return m.File.(*MessageFile)
  289. }
  290. if itemMap, ok := m.File.(map[string]any); ok {
  291. out := &MessageFile{
  292. FileName: common.Interface2String(itemMap["file_name"]),
  293. FileData: common.Interface2String(itemMap["file_data"]),
  294. FileId: common.Interface2String(itemMap["file_id"]),
  295. }
  296. return out
  297. }
  298. }
  299. return nil
  300. }
  301. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  302. if m.VideoUrl != nil {
  303. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  304. return m.VideoUrl.(*MessageVideoUrl)
  305. }
  306. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  307. out := &MessageVideoUrl{
  308. Url: common.Interface2String(itemMap["url"]),
  309. }
  310. return out
  311. }
  312. }
  313. return nil
  314. }
  315. type MessageImageUrl struct {
  316. Url string `json:"url"`
  317. Detail string `json:"detail"`
  318. MimeType string
  319. }
  320. func (m *MessageImageUrl) IsRemoteImage() bool {
  321. return strings.HasPrefix(m.Url, "http")
  322. }
  323. type MessageInputAudio struct {
  324. Data string `json:"data"` //base64
  325. Format string `json:"format"`
  326. }
  327. type MessageFile struct {
  328. FileName string `json:"filename,omitempty"`
  329. FileData string `json:"file_data,omitempty"`
  330. FileId string `json:"file_id,omitempty"`
  331. }
  332. type MessageVideoUrl struct {
  333. Url string `json:"url"`
  334. }
  335. const (
  336. ContentTypeText = "text"
  337. ContentTypeImageURL = "image_url"
  338. ContentTypeInputAudio = "input_audio"
  339. ContentTypeFile = "file"
  340. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  341. //ContentTypeAudioUrl = "audio_url"
  342. )
  343. func (m *Message) GetPrefix() bool {
  344. if m.Prefix == nil {
  345. return false
  346. }
  347. return *m.Prefix
  348. }
  349. func (m *Message) SetPrefix(prefix bool) {
  350. m.Prefix = &prefix
  351. }
  352. func (m *Message) ParseToolCalls() []ToolCallRequest {
  353. if m.ToolCalls == nil {
  354. return nil
  355. }
  356. var toolCalls []ToolCallRequest
  357. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  358. return toolCalls
  359. }
  360. return toolCalls
  361. }
  362. func (m *Message) SetToolCalls(toolCalls any) {
  363. toolCallsJson, _ := json.Marshal(toolCalls)
  364. m.ToolCalls = toolCallsJson
  365. }
  366. func (m *Message) StringContent() string {
  367. switch m.Content.(type) {
  368. case string:
  369. return m.Content.(string)
  370. case []any:
  371. var contentStr string
  372. for _, contentItem := range m.Content.([]any) {
  373. contentMap, ok := contentItem.(map[string]any)
  374. if !ok {
  375. continue
  376. }
  377. if contentMap["type"] == ContentTypeText {
  378. if subStr, ok := contentMap["text"].(string); ok {
  379. contentStr += subStr
  380. }
  381. }
  382. }
  383. return contentStr
  384. }
  385. return ""
  386. }
  387. func (m *Message) SetNullContent() {
  388. m.Content = nil
  389. m.parsedContent = nil
  390. }
  391. func (m *Message) SetStringContent(content string) {
  392. m.Content = content
  393. m.parsedContent = nil
  394. }
  395. func (m *Message) SetMediaContent(content []MediaContent) {
  396. m.Content = content
  397. m.parsedContent = content
  398. }
  399. func (m *Message) IsStringContent() bool {
  400. _, ok := m.Content.(string)
  401. if ok {
  402. return true
  403. }
  404. return false
  405. }
  406. func (m *Message) ParseContent() []MediaContent {
  407. if m.Content == nil {
  408. return nil
  409. }
  410. if len(m.parsedContent) > 0 {
  411. return m.parsedContent
  412. }
  413. var contentList []MediaContent
  414. // 先尝试解析为字符串
  415. content, ok := m.Content.(string)
  416. if ok {
  417. contentList = []MediaContent{{
  418. Type: ContentTypeText,
  419. Text: content,
  420. }}
  421. m.parsedContent = contentList
  422. return contentList
  423. }
  424. // 尝试解析为数组
  425. //var arrayContent []map[string]interface{}
  426. arrayContent, ok := m.Content.([]any)
  427. if !ok {
  428. return contentList
  429. }
  430. for _, contentItemAny := range arrayContent {
  431. mediaItem, ok := contentItemAny.(MediaContent)
  432. if ok {
  433. contentList = append(contentList, mediaItem)
  434. continue
  435. }
  436. contentItem, ok := contentItemAny.(map[string]any)
  437. if !ok {
  438. continue
  439. }
  440. contentType, ok := contentItem["type"].(string)
  441. if !ok {
  442. continue
  443. }
  444. switch contentType {
  445. case ContentTypeText:
  446. if text, ok := contentItem["text"].(string); ok {
  447. contentList = append(contentList, MediaContent{
  448. Type: ContentTypeText,
  449. Text: text,
  450. })
  451. }
  452. case ContentTypeImageURL:
  453. imageUrl := contentItem["image_url"]
  454. temp := &MessageImageUrl{
  455. Detail: "high",
  456. }
  457. switch v := imageUrl.(type) {
  458. case string:
  459. temp.Url = v
  460. case map[string]interface{}:
  461. url, ok1 := v["url"].(string)
  462. detail, ok2 := v["detail"].(string)
  463. if ok2 {
  464. temp.Detail = detail
  465. }
  466. if ok1 {
  467. temp.Url = url
  468. }
  469. }
  470. contentList = append(contentList, MediaContent{
  471. Type: ContentTypeImageURL,
  472. ImageUrl: temp,
  473. })
  474. case ContentTypeInputAudio:
  475. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  476. data, ok1 := audioData["data"].(string)
  477. format, ok2 := audioData["format"].(string)
  478. if ok1 && ok2 {
  479. temp := &MessageInputAudio{
  480. Data: data,
  481. Format: format,
  482. }
  483. contentList = append(contentList, MediaContent{
  484. Type: ContentTypeInputAudio,
  485. InputAudio: temp,
  486. })
  487. }
  488. }
  489. case ContentTypeFile:
  490. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  491. fileId, ok3 := fileData["file_id"].(string)
  492. if ok3 {
  493. contentList = append(contentList, MediaContent{
  494. Type: ContentTypeFile,
  495. File: &MessageFile{
  496. FileId: fileId,
  497. },
  498. })
  499. } else {
  500. fileName, ok1 := fileData["filename"].(string)
  501. fileDataStr, ok2 := fileData["file_data"].(string)
  502. if ok1 && ok2 {
  503. contentList = append(contentList, MediaContent{
  504. Type: ContentTypeFile,
  505. File: &MessageFile{
  506. FileName: fileName,
  507. FileData: fileDataStr,
  508. },
  509. })
  510. }
  511. }
  512. }
  513. case ContentTypeVideoUrl:
  514. if videoUrl, ok := contentItem["video_url"].(string); ok {
  515. contentList = append(contentList, MediaContent{
  516. Type: ContentTypeVideoUrl,
  517. VideoUrl: &MessageVideoUrl{
  518. Url: videoUrl,
  519. },
  520. })
  521. }
  522. }
  523. }
  524. if len(contentList) > 0 {
  525. m.parsedContent = contentList
  526. }
  527. return contentList
  528. }
  529. // old code
  530. /*func (m *Message) StringContent() string {
  531. if m.parsedStringContent != nil {
  532. return *m.parsedStringContent
  533. }
  534. var stringContent string
  535. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  536. m.parsedStringContent = &stringContent
  537. return stringContent
  538. }
  539. contentStr := new(strings.Builder)
  540. arrayContent := m.ParseContent()
  541. for _, content := range arrayContent {
  542. if content.Type == ContentTypeText {
  543. contentStr.WriteString(content.Text)
  544. }
  545. }
  546. stringContent = contentStr.String()
  547. m.parsedStringContent = &stringContent
  548. return stringContent
  549. }
  550. func (m *Message) SetNullContent() {
  551. m.Content = nil
  552. m.parsedStringContent = nil
  553. m.parsedContent = nil
  554. }
  555. func (m *Message) SetStringContent(content string) {
  556. jsonContent, _ := json.Marshal(content)
  557. m.Content = jsonContent
  558. m.parsedStringContent = &content
  559. m.parsedContent = nil
  560. }
  561. func (m *Message) SetMediaContent(content []MediaContent) {
  562. jsonContent, _ := json.Marshal(content)
  563. m.Content = jsonContent
  564. m.parsedContent = nil
  565. m.parsedStringContent = nil
  566. }
  567. func (m *Message) IsStringContent() bool {
  568. if m.parsedStringContent != nil {
  569. return true
  570. }
  571. var stringContent string
  572. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  573. m.parsedStringContent = &stringContent
  574. return true
  575. }
  576. return false
  577. }
  578. func (m *Message) ParseContent() []MediaContent {
  579. if m.parsedContent != nil {
  580. return m.parsedContent
  581. }
  582. var contentList []MediaContent
  583. // 先尝试解析为字符串
  584. var stringContent string
  585. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  586. contentList = []MediaContent{{
  587. Type: ContentTypeText,
  588. Text: stringContent,
  589. }}
  590. m.parsedContent = contentList
  591. return contentList
  592. }
  593. // 尝试解析为数组
  594. var arrayContent []map[string]interface{}
  595. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  596. for _, contentItem := range arrayContent {
  597. contentType, ok := contentItem["type"].(string)
  598. if !ok {
  599. continue
  600. }
  601. switch contentType {
  602. case ContentTypeText:
  603. if text, ok := contentItem["text"].(string); ok {
  604. contentList = append(contentList, MediaContent{
  605. Type: ContentTypeText,
  606. Text: text,
  607. })
  608. }
  609. case ContentTypeImageURL:
  610. imageUrl := contentItem["image_url"]
  611. temp := &MessageImageUrl{
  612. Detail: "high",
  613. }
  614. switch v := imageUrl.(type) {
  615. case string:
  616. temp.Url = v
  617. case map[string]interface{}:
  618. url, ok1 := v["url"].(string)
  619. detail, ok2 := v["detail"].(string)
  620. if ok2 {
  621. temp.Detail = detail
  622. }
  623. if ok1 {
  624. temp.Url = url
  625. }
  626. }
  627. contentList = append(contentList, MediaContent{
  628. Type: ContentTypeImageURL,
  629. ImageUrl: temp,
  630. })
  631. case ContentTypeInputAudio:
  632. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  633. data, ok1 := audioData["data"].(string)
  634. format, ok2 := audioData["format"].(string)
  635. if ok1 && ok2 {
  636. temp := &MessageInputAudio{
  637. Data: data,
  638. Format: format,
  639. }
  640. contentList = append(contentList, MediaContent{
  641. Type: ContentTypeInputAudio,
  642. InputAudio: temp,
  643. })
  644. }
  645. }
  646. case ContentTypeFile:
  647. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  648. fileId, ok3 := fileData["file_id"].(string)
  649. if ok3 {
  650. contentList = append(contentList, MediaContent{
  651. Type: ContentTypeFile,
  652. File: &MessageFile{
  653. FileId: fileId,
  654. },
  655. })
  656. } else {
  657. fileName, ok1 := fileData["filename"].(string)
  658. fileDataStr, ok2 := fileData["file_data"].(string)
  659. if ok1 && ok2 {
  660. contentList = append(contentList, MediaContent{
  661. Type: ContentTypeFile,
  662. File: &MessageFile{
  663. FileName: fileName,
  664. FileData: fileDataStr,
  665. },
  666. })
  667. }
  668. }
  669. }
  670. case ContentTypeVideoUrl:
  671. if videoUrl, ok := contentItem["video_url"].(string); ok {
  672. contentList = append(contentList, MediaContent{
  673. Type: ContentTypeVideoUrl,
  674. VideoUrl: &MessageVideoUrl{
  675. Url: videoUrl,
  676. },
  677. })
  678. }
  679. }
  680. }
  681. }
  682. if len(contentList) > 0 {
  683. m.parsedContent = contentList
  684. }
  685. return contentList
  686. }*/
  687. type WebSearchOptions struct {
  688. SearchContextSize string `json:"search_context_size,omitempty"`
  689. UserLocation json.RawMessage `json:"user_location,omitempty"`
  690. }
  691. // https://platform.openai.com/docs/api-reference/responses/create
  692. type OpenAIResponsesRequest struct {
  693. Model string `json:"model"`
  694. Input any `json:"input,omitempty"`
  695. Include json.RawMessage `json:"include,omitempty"`
  696. Instructions json.RawMessage `json:"instructions,omitempty"`
  697. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  698. Metadata json.RawMessage `json:"metadata,omitempty"`
  699. ParallelToolCalls bool `json:"parallel_tool_calls,omitempty"`
  700. PreviousResponseID string `json:"previous_response_id,omitempty"`
  701. Reasoning *Reasoning `json:"reasoning,omitempty"`
  702. ServiceTier string `json:"service_tier,omitempty"`
  703. Store bool `json:"store,omitempty"`
  704. Stream bool `json:"stream,omitempty"`
  705. Temperature float64 `json:"temperature,omitempty"`
  706. Text json.RawMessage `json:"text,omitempty"`
  707. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  708. Tools []map[string]any `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  709. TopP float64 `json:"top_p,omitempty"`
  710. Truncation string `json:"truncation,omitempty"`
  711. User string `json:"user,omitempty"`
  712. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  713. Prompt json.RawMessage `json:"prompt,omitempty"`
  714. }
  715. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  716. var fileMeta = make([]*types.FileMeta, 0)
  717. var texts = make([]string, 0)
  718. if r.Input != nil {
  719. inputs := r.ParseInput()
  720. for _, input := range inputs {
  721. if input.Type == "input_image" {
  722. if input.ImageUrl != "" {
  723. fileMeta = append(fileMeta, &types.FileMeta{
  724. FileType: types.FileTypeImage,
  725. OriginData: input.ImageUrl,
  726. Detail: input.Detail,
  727. })
  728. }
  729. } else if input.Type == "input_file" {
  730. if input.FileUrl != "" {
  731. fileMeta = append(fileMeta, &types.FileMeta{
  732. FileType: types.FileTypeFile,
  733. OriginData: input.FileUrl,
  734. })
  735. }
  736. } else {
  737. texts = append(texts, input.Text)
  738. }
  739. }
  740. }
  741. if len(r.Instructions) > 0 {
  742. texts = append(texts, string(r.Instructions))
  743. }
  744. if len(r.Metadata) > 0 {
  745. texts = append(texts, string(r.Metadata))
  746. }
  747. if len(r.Text) > 0 {
  748. texts = append(texts, string(r.Text))
  749. }
  750. if len(r.ToolChoice) > 0 {
  751. texts = append(texts, string(r.ToolChoice))
  752. }
  753. if len(r.Prompt) > 0 {
  754. texts = append(texts, string(r.Prompt))
  755. }
  756. if len(r.Tools) > 0 {
  757. toolStr, _ := common.Marshal(r.Tools)
  758. texts = append(texts, string(toolStr))
  759. }
  760. return &types.TokenCountMeta{
  761. CombineText: strings.Join(texts, "\n"),
  762. Files: fileMeta,
  763. MaxTokens: int(r.MaxOutputTokens),
  764. }
  765. }
  766. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  767. return r.Stream
  768. }
  769. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  770. if modelName != "" {
  771. r.Model = modelName
  772. }
  773. }
  774. type Reasoning struct {
  775. Effort string `json:"effort,omitempty"`
  776. Summary string `json:"summary,omitempty"`
  777. }
  778. type MediaInput struct {
  779. Type string `json:"type"`
  780. Text string `json:"text,omitempty"`
  781. FileUrl string `json:"file_url,omitempty"`
  782. ImageUrl string `json:"image_url,omitempty"`
  783. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  784. }
  785. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  786. // Reference implementation mirrors Message.ParseContent:
  787. // - input can be a string, treated as an input_text item
  788. // - input can be an array of objects with a `type` field
  789. // supported types: input_text, input_image, input_file
  790. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  791. if r.Input == nil {
  792. return nil
  793. }
  794. var inputs []MediaInput
  795. // Try string first
  796. if str, ok := r.Input.(string); ok {
  797. inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  798. return inputs
  799. }
  800. // Try array of parts
  801. if array, ok := r.Input.([]any); ok {
  802. for _, itemAny := range array {
  803. // Already parsed MediaInput
  804. if media, ok := itemAny.(MediaInput); ok {
  805. inputs = append(inputs, media)
  806. continue
  807. }
  808. // Generic map
  809. item, ok := itemAny.(map[string]any)
  810. if !ok {
  811. continue
  812. }
  813. typeVal, ok := item["type"].(string)
  814. if !ok {
  815. continue
  816. }
  817. switch typeVal {
  818. case "input_text":
  819. text, _ := item["text"].(string)
  820. inputs = append(inputs, MediaInput{Type: "input_text", Text: text})
  821. case "input_image":
  822. // image_url may be string or object with url field
  823. var imageUrl string
  824. switch v := item["image_url"].(type) {
  825. case string:
  826. imageUrl = v
  827. case map[string]any:
  828. if url, ok := v["url"].(string); ok {
  829. imageUrl = url
  830. }
  831. }
  832. inputs = append(inputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  833. case "input_file":
  834. // file_url may be string or object with url field
  835. var fileUrl string
  836. switch v := item["file_url"].(type) {
  837. case string:
  838. fileUrl = v
  839. case map[string]any:
  840. if url, ok := v["url"].(string); ok {
  841. fileUrl = url
  842. }
  843. }
  844. inputs = append(inputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  845. }
  846. }
  847. }
  848. return inputs
  849. }