openai_request.go 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. // GeneralOpenAIRequest represents a general request structure for OpenAI-compatible APIs.
  21. // 参数增加规范:无引用的参数必须使用json.RawMessage类型,并添加omitempty标签
  22. type GeneralOpenAIRequest struct {
  23. Model string `json:"model,omitempty"`
  24. Messages []Message `json:"messages,omitempty"`
  25. Prompt any `json:"prompt,omitempty"`
  26. Prefix any `json:"prefix,omitempty"`
  27. Suffix any `json:"suffix,omitempty"`
  28. Stream bool `json:"stream,omitempty"`
  29. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  30. MaxTokens uint `json:"max_tokens,omitempty"`
  31. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  32. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  33. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  34. Temperature *float64 `json:"temperature,omitempty"`
  35. TopP float64 `json:"top_p,omitempty"`
  36. TopK int `json:"top_k,omitempty"`
  37. Stop any `json:"stop,omitempty"`
  38. N int `json:"n,omitempty"`
  39. Input any `json:"input,omitempty"`
  40. Instruction string `json:"instruction,omitempty"`
  41. Size string `json:"size,omitempty"`
  42. Functions json.RawMessage `json:"functions,omitempty"`
  43. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  44. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  45. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  46. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  47. Seed float64 `json:"seed,omitempty"`
  48. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  49. Tools []ToolCallRequest `json:"tools,omitempty"`
  50. ToolChoice any `json:"tool_choice,omitempty"`
  51. FunctionCall json.RawMessage `json:"function_call,omitempty"`
  52. User string `json:"user,omitempty"`
  53. // ServiceTier specifies upstream service level and may affect billing.
  54. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  55. ServiceTier string `json:"service_tier,omitempty"`
  56. LogProbs bool `json:"logprobs,omitempty"`
  57. TopLogProbs int `json:"top_logprobs,omitempty"`
  58. Dimensions int `json:"dimensions,omitempty"`
  59. Modalities json.RawMessage `json:"modalities,omitempty"`
  60. Audio json.RawMessage `json:"audio,omitempty"`
  61. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  62. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤,可通过 allow_safety_identifier 开启
  63. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  64. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  65. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  66. // 注意:默认允许透传,可通过 disable_store 禁用;禁用后可能导致 Codex 无法正常使用
  67. Store json.RawMessage `json:"store,omitempty"`
  68. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  69. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  70. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  71. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  72. Metadata json.RawMessage `json:"metadata,omitempty"`
  73. Prediction json.RawMessage `json:"prediction,omitempty"`
  74. // gemini
  75. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  76. //xai
  77. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  78. // claude
  79. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  80. // OpenRouter Params
  81. Usage json.RawMessage `json:"usage,omitempty"`
  82. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  83. // Ali Qwen Params
  84. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  85. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  86. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  87. EnableSearch json.RawMessage `json:"enable_search,omitempty"`
  88. // ollama Params
  89. Think json.RawMessage `json:"think,omitempty"`
  90. // baidu v2
  91. WebSearch json.RawMessage `json:"web_search,omitempty"`
  92. // doubao,zhipu_v4
  93. THINKING json.RawMessage `json:"thinking,omitempty"`
  94. // pplx Params
  95. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  96. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  97. ReturnImages bool `json:"return_images,omitempty"`
  98. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  99. SearchMode string `json:"search_mode,omitempty"`
  100. // Minimax
  101. ReasoningSplit json.RawMessage `json:"reasoning_split,omitempty"`
  102. }
  103. // createFileSource 根据数据内容创建正确类型的 FileSource
  104. func createFileSource(data string) *types.FileSource {
  105. if strings.HasPrefix(data, "http://") || strings.HasPrefix(data, "https://") {
  106. return types.NewURLFileSource(data)
  107. }
  108. return types.NewBase64FileSource(data, "")
  109. }
  110. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  111. var tokenCountMeta types.TokenCountMeta
  112. var texts = make([]string, 0)
  113. var fileMeta = make([]*types.FileMeta, 0)
  114. if r.Prompt != nil {
  115. switch v := r.Prompt.(type) {
  116. case string:
  117. texts = append(texts, v)
  118. case []any:
  119. for _, item := range v {
  120. if str, ok := item.(string); ok {
  121. texts = append(texts, str)
  122. }
  123. }
  124. default:
  125. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  126. }
  127. }
  128. if r.Input != nil {
  129. inputs := r.ParseInput()
  130. texts = append(texts, inputs...)
  131. }
  132. if r.MaxCompletionTokens > r.MaxTokens {
  133. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  134. } else {
  135. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  136. }
  137. for _, message := range r.Messages {
  138. tokenCountMeta.MessagesCount++
  139. texts = append(texts, message.Role)
  140. if message.Content != nil {
  141. if message.Name != nil {
  142. tokenCountMeta.NameCount++
  143. texts = append(texts, *message.Name)
  144. }
  145. arrayContent := message.ParseContent()
  146. for _, m := range arrayContent {
  147. if m.Type == ContentTypeImageURL {
  148. imageUrl := m.GetImageMedia()
  149. if imageUrl != nil && imageUrl.Url != "" {
  150. source := createFileSource(imageUrl.Url)
  151. fileMeta = append(fileMeta, &types.FileMeta{
  152. FileType: types.FileTypeImage,
  153. Source: source,
  154. Detail: imageUrl.Detail,
  155. })
  156. }
  157. } else if m.Type == ContentTypeInputAudio {
  158. inputAudio := m.GetInputAudio()
  159. if inputAudio != nil && inputAudio.Data != "" {
  160. source := createFileSource(inputAudio.Data)
  161. fileMeta = append(fileMeta, &types.FileMeta{
  162. FileType: types.FileTypeAudio,
  163. Source: source,
  164. })
  165. }
  166. } else if m.Type == ContentTypeFile {
  167. file := m.GetFile()
  168. if file != nil && file.FileData != "" {
  169. source := createFileSource(file.FileData)
  170. fileMeta = append(fileMeta, &types.FileMeta{
  171. FileType: types.FileTypeFile,
  172. Source: source,
  173. })
  174. }
  175. } else if m.Type == ContentTypeVideoUrl {
  176. videoUrl := m.GetVideoUrl()
  177. if videoUrl != nil && videoUrl.Url != "" {
  178. source := createFileSource(videoUrl.Url)
  179. fileMeta = append(fileMeta, &types.FileMeta{
  180. FileType: types.FileTypeVideo,
  181. Source: source,
  182. })
  183. }
  184. } else {
  185. texts = append(texts, m.Text)
  186. }
  187. }
  188. }
  189. }
  190. if r.Tools != nil {
  191. openaiTools := r.Tools
  192. for _, tool := range openaiTools {
  193. tokenCountMeta.ToolsCount++
  194. texts = append(texts, tool.Function.Name)
  195. if tool.Function.Description != "" {
  196. texts = append(texts, tool.Function.Description)
  197. }
  198. if tool.Function.Parameters != nil {
  199. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  200. }
  201. }
  202. //toolTokens := CountTokenInput(countStr, request.Model)
  203. //tkm += 8
  204. //tkm += toolTokens
  205. }
  206. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  207. tokenCountMeta.Files = fileMeta
  208. return &tokenCountMeta
  209. }
  210. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  211. return r.Stream
  212. }
  213. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  214. if modelName != "" {
  215. r.Model = modelName
  216. }
  217. }
  218. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  219. result := make(map[string]any)
  220. data, _ := common.Marshal(r)
  221. _ = common.Unmarshal(data, &result)
  222. return result
  223. }
  224. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  225. if strings.HasPrefix(r.Model, "o") {
  226. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  227. return "developer"
  228. }
  229. } else if strings.HasPrefix(r.Model, "gpt-5") {
  230. return "developer"
  231. }
  232. return "system"
  233. }
  234. const CustomType = "custom"
  235. type ToolCallRequest struct {
  236. ID string `json:"id,omitempty"`
  237. Type string `json:"type"`
  238. Function FunctionRequest `json:"function,omitempty"`
  239. Custom json.RawMessage `json:"custom,omitempty"`
  240. }
  241. type FunctionRequest struct {
  242. Description string `json:"description,omitempty"`
  243. Name string `json:"name"`
  244. Parameters any `json:"parameters,omitempty"`
  245. Arguments string `json:"arguments,omitempty"`
  246. }
  247. type StreamOptions struct {
  248. IncludeUsage bool `json:"include_usage,omitempty"`
  249. // IncludeObfuscation is only for /v1/responses stream payload.
  250. // This field is filtered by default and can be enabled via channel setting allow_include_obfuscation.
  251. IncludeObfuscation bool `json:"include_obfuscation,omitempty"`
  252. }
  253. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  254. if r.MaxCompletionTokens != 0 {
  255. return r.MaxCompletionTokens
  256. }
  257. return r.MaxTokens
  258. }
  259. func (r *GeneralOpenAIRequest) ParseInput() []string {
  260. if r.Input == nil {
  261. return nil
  262. }
  263. var input []string
  264. switch r.Input.(type) {
  265. case string:
  266. input = []string{r.Input.(string)}
  267. case []any:
  268. input = make([]string, 0, len(r.Input.([]any)))
  269. for _, item := range r.Input.([]any) {
  270. if str, ok := item.(string); ok {
  271. input = append(input, str)
  272. }
  273. }
  274. }
  275. return input
  276. }
  277. type Message struct {
  278. Role string `json:"role"`
  279. Content any `json:"content"`
  280. Name *string `json:"name,omitempty"`
  281. Prefix *bool `json:"prefix,omitempty"`
  282. ReasoningContent string `json:"reasoning_content,omitempty"`
  283. Reasoning string `json:"reasoning,omitempty"`
  284. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  285. ToolCallId string `json:"tool_call_id,omitempty"`
  286. parsedContent []MediaContent
  287. //parsedStringContent *string
  288. }
  289. type MediaContent struct {
  290. Type string `json:"type"`
  291. Text string `json:"text,omitempty"`
  292. ImageUrl any `json:"image_url,omitempty"`
  293. InputAudio any `json:"input_audio,omitempty"`
  294. File any `json:"file,omitempty"`
  295. VideoUrl any `json:"video_url,omitempty"`
  296. // OpenRouter Params
  297. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  298. }
  299. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  300. if m.ImageUrl != nil {
  301. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  302. return m.ImageUrl.(*MessageImageUrl)
  303. }
  304. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  305. out := &MessageImageUrl{
  306. Url: common.Interface2String(itemMap["url"]),
  307. Detail: common.Interface2String(itemMap["detail"]),
  308. MimeType: common.Interface2String(itemMap["mime_type"]),
  309. }
  310. return out
  311. }
  312. }
  313. return nil
  314. }
  315. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  316. if m.InputAudio != nil {
  317. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  318. return m.InputAudio.(*MessageInputAudio)
  319. }
  320. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  321. out := &MessageInputAudio{
  322. Data: common.Interface2String(itemMap["data"]),
  323. Format: common.Interface2String(itemMap["format"]),
  324. }
  325. return out
  326. }
  327. }
  328. return nil
  329. }
  330. func (m *MediaContent) GetFile() *MessageFile {
  331. if m.File != nil {
  332. if _, ok := m.File.(*MessageFile); ok {
  333. return m.File.(*MessageFile)
  334. }
  335. if itemMap, ok := m.File.(map[string]any); ok {
  336. out := &MessageFile{
  337. FileName: common.Interface2String(itemMap["file_name"]),
  338. FileData: common.Interface2String(itemMap["file_data"]),
  339. FileId: common.Interface2String(itemMap["file_id"]),
  340. }
  341. return out
  342. }
  343. }
  344. return nil
  345. }
  346. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  347. if m.VideoUrl != nil {
  348. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  349. return m.VideoUrl.(*MessageVideoUrl)
  350. }
  351. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  352. out := &MessageVideoUrl{
  353. Url: common.Interface2String(itemMap["url"]),
  354. }
  355. return out
  356. }
  357. }
  358. return nil
  359. }
  360. type MessageImageUrl struct {
  361. Url string `json:"url"`
  362. Detail string `json:"detail"`
  363. MimeType string
  364. }
  365. func (m *MessageImageUrl) IsRemoteImage() bool {
  366. return strings.HasPrefix(m.Url, "http")
  367. }
  368. type MessageInputAudio struct {
  369. Data string `json:"data"` //base64
  370. Format string `json:"format"`
  371. }
  372. type MessageFile struct {
  373. FileName string `json:"filename,omitempty"`
  374. FileData string `json:"file_data,omitempty"`
  375. FileId string `json:"file_id,omitempty"`
  376. }
  377. type MessageVideoUrl struct {
  378. Url string `json:"url"`
  379. }
  380. const (
  381. ContentTypeText = "text"
  382. ContentTypeImageURL = "image_url"
  383. ContentTypeInputAudio = "input_audio"
  384. ContentTypeFile = "file"
  385. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  386. //ContentTypeAudioUrl = "audio_url"
  387. )
  388. func (m *Message) GetPrefix() bool {
  389. if m.Prefix == nil {
  390. return false
  391. }
  392. return *m.Prefix
  393. }
  394. func (m *Message) SetPrefix(prefix bool) {
  395. m.Prefix = &prefix
  396. }
  397. func (m *Message) ParseToolCalls() []ToolCallRequest {
  398. if m.ToolCalls == nil {
  399. return nil
  400. }
  401. var toolCalls []ToolCallRequest
  402. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  403. return toolCalls
  404. }
  405. return toolCalls
  406. }
  407. func (m *Message) SetToolCalls(toolCalls any) {
  408. toolCallsJson, _ := json.Marshal(toolCalls)
  409. m.ToolCalls = toolCallsJson
  410. }
  411. func (m *Message) StringContent() string {
  412. switch m.Content.(type) {
  413. case string:
  414. return m.Content.(string)
  415. case []any:
  416. var contentStr string
  417. for _, contentItem := range m.Content.([]any) {
  418. contentMap, ok := contentItem.(map[string]any)
  419. if !ok {
  420. continue
  421. }
  422. if contentMap["type"] == ContentTypeText {
  423. if subStr, ok := contentMap["text"].(string); ok {
  424. contentStr += subStr
  425. }
  426. }
  427. }
  428. return contentStr
  429. }
  430. return ""
  431. }
  432. func (m *Message) SetNullContent() {
  433. m.Content = nil
  434. m.parsedContent = nil
  435. }
  436. func (m *Message) SetStringContent(content string) {
  437. m.Content = content
  438. m.parsedContent = nil
  439. }
  440. func (m *Message) SetMediaContent(content []MediaContent) {
  441. m.Content = content
  442. m.parsedContent = content
  443. }
  444. func (m *Message) IsStringContent() bool {
  445. _, ok := m.Content.(string)
  446. if ok {
  447. return true
  448. }
  449. return false
  450. }
  451. func (m *Message) ParseContent() []MediaContent {
  452. if m.Content == nil {
  453. return nil
  454. }
  455. if len(m.parsedContent) > 0 {
  456. return m.parsedContent
  457. }
  458. var contentList []MediaContent
  459. // 先尝试解析为字符串
  460. content, ok := m.Content.(string)
  461. if ok {
  462. contentList = []MediaContent{{
  463. Type: ContentTypeText,
  464. Text: content,
  465. }}
  466. m.parsedContent = contentList
  467. return contentList
  468. }
  469. // 尝试解析为数组
  470. //var arrayContent []map[string]interface{}
  471. arrayContent, ok := m.Content.([]any)
  472. if !ok {
  473. return contentList
  474. }
  475. for _, contentItemAny := range arrayContent {
  476. mediaItem, ok := contentItemAny.(MediaContent)
  477. if ok {
  478. contentList = append(contentList, mediaItem)
  479. continue
  480. }
  481. contentItem, ok := contentItemAny.(map[string]any)
  482. if !ok {
  483. continue
  484. }
  485. contentType, ok := contentItem["type"].(string)
  486. if !ok {
  487. continue
  488. }
  489. switch contentType {
  490. case ContentTypeText:
  491. if text, ok := contentItem["text"].(string); ok {
  492. contentList = append(contentList, MediaContent{
  493. Type: ContentTypeText,
  494. Text: text,
  495. })
  496. }
  497. case ContentTypeImageURL:
  498. imageUrl := contentItem["image_url"]
  499. temp := &MessageImageUrl{
  500. Detail: "high",
  501. }
  502. switch v := imageUrl.(type) {
  503. case string:
  504. temp.Url = v
  505. case map[string]interface{}:
  506. url, ok1 := v["url"].(string)
  507. detail, ok2 := v["detail"].(string)
  508. if ok2 {
  509. temp.Detail = detail
  510. }
  511. if ok1 {
  512. temp.Url = url
  513. }
  514. }
  515. contentList = append(contentList, MediaContent{
  516. Type: ContentTypeImageURL,
  517. ImageUrl: temp,
  518. })
  519. case ContentTypeInputAudio:
  520. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  521. data, ok1 := audioData["data"].(string)
  522. format, ok2 := audioData["format"].(string)
  523. if ok1 && ok2 {
  524. temp := &MessageInputAudio{
  525. Data: data,
  526. Format: format,
  527. }
  528. contentList = append(contentList, MediaContent{
  529. Type: ContentTypeInputAudio,
  530. InputAudio: temp,
  531. })
  532. }
  533. }
  534. case ContentTypeFile:
  535. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  536. fileId, ok3 := fileData["file_id"].(string)
  537. if ok3 {
  538. contentList = append(contentList, MediaContent{
  539. Type: ContentTypeFile,
  540. File: &MessageFile{
  541. FileId: fileId,
  542. },
  543. })
  544. } else {
  545. fileName, ok1 := fileData["filename"].(string)
  546. fileDataStr, ok2 := fileData["file_data"].(string)
  547. if ok1 && ok2 {
  548. contentList = append(contentList, MediaContent{
  549. Type: ContentTypeFile,
  550. File: &MessageFile{
  551. FileName: fileName,
  552. FileData: fileDataStr,
  553. },
  554. })
  555. }
  556. }
  557. }
  558. case ContentTypeVideoUrl:
  559. if videoUrl, ok := contentItem["video_url"].(string); ok {
  560. contentList = append(contentList, MediaContent{
  561. Type: ContentTypeVideoUrl,
  562. VideoUrl: &MessageVideoUrl{
  563. Url: videoUrl,
  564. },
  565. })
  566. }
  567. }
  568. }
  569. if len(contentList) > 0 {
  570. m.parsedContent = contentList
  571. }
  572. return contentList
  573. }
  574. // old code
  575. /*func (m *Message) StringContent() string {
  576. if m.parsedStringContent != nil {
  577. return *m.parsedStringContent
  578. }
  579. var stringContent string
  580. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  581. m.parsedStringContent = &stringContent
  582. return stringContent
  583. }
  584. contentStr := new(strings.Builder)
  585. arrayContent := m.ParseContent()
  586. for _, content := range arrayContent {
  587. if content.Type == ContentTypeText {
  588. contentStr.WriteString(content.Text)
  589. }
  590. }
  591. stringContent = contentStr.String()
  592. m.parsedStringContent = &stringContent
  593. return stringContent
  594. }
  595. func (m *Message) SetNullContent() {
  596. m.Content = nil
  597. m.parsedStringContent = nil
  598. m.parsedContent = nil
  599. }
  600. func (m *Message) SetStringContent(content string) {
  601. jsonContent, _ := json.Marshal(content)
  602. m.Content = jsonContent
  603. m.parsedStringContent = &content
  604. m.parsedContent = nil
  605. }
  606. func (m *Message) SetMediaContent(content []MediaContent) {
  607. jsonContent, _ := json.Marshal(content)
  608. m.Content = jsonContent
  609. m.parsedContent = nil
  610. m.parsedStringContent = nil
  611. }
  612. func (m *Message) IsStringContent() bool {
  613. if m.parsedStringContent != nil {
  614. return true
  615. }
  616. var stringContent string
  617. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  618. m.parsedStringContent = &stringContent
  619. return true
  620. }
  621. return false
  622. }
  623. func (m *Message) ParseContent() []MediaContent {
  624. if m.parsedContent != nil {
  625. return m.parsedContent
  626. }
  627. var contentList []MediaContent
  628. // 先尝试解析为字符串
  629. var stringContent string
  630. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  631. contentList = []MediaContent{{
  632. Type: ContentTypeText,
  633. Text: stringContent,
  634. }}
  635. m.parsedContent = contentList
  636. return contentList
  637. }
  638. // 尝试解析为数组
  639. var arrayContent []map[string]interface{}
  640. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  641. for _, contentItem := range arrayContent {
  642. contentType, ok := contentItem["type"].(string)
  643. if !ok {
  644. continue
  645. }
  646. switch contentType {
  647. case ContentTypeText:
  648. if text, ok := contentItem["text"].(string); ok {
  649. contentList = append(contentList, MediaContent{
  650. Type: ContentTypeText,
  651. Text: text,
  652. })
  653. }
  654. case ContentTypeImageURL:
  655. imageUrl := contentItem["image_url"]
  656. temp := &MessageImageUrl{
  657. Detail: "high",
  658. }
  659. switch v := imageUrl.(type) {
  660. case string:
  661. temp.Url = v
  662. case map[string]interface{}:
  663. url, ok1 := v["url"].(string)
  664. detail, ok2 := v["detail"].(string)
  665. if ok2 {
  666. temp.Detail = detail
  667. }
  668. if ok1 {
  669. temp.Url = url
  670. }
  671. }
  672. contentList = append(contentList, MediaContent{
  673. Type: ContentTypeImageURL,
  674. ImageUrl: temp,
  675. })
  676. case ContentTypeInputAudio:
  677. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  678. data, ok1 := audioData["data"].(string)
  679. format, ok2 := audioData["format"].(string)
  680. if ok1 && ok2 {
  681. temp := &MessageInputAudio{
  682. Data: data,
  683. Format: format,
  684. }
  685. contentList = append(contentList, MediaContent{
  686. Type: ContentTypeInputAudio,
  687. InputAudio: temp,
  688. })
  689. }
  690. }
  691. case ContentTypeFile:
  692. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  693. fileId, ok3 := fileData["file_id"].(string)
  694. if ok3 {
  695. contentList = append(contentList, MediaContent{
  696. Type: ContentTypeFile,
  697. File: &MessageFile{
  698. FileId: fileId,
  699. },
  700. })
  701. } else {
  702. fileName, ok1 := fileData["filename"].(string)
  703. fileDataStr, ok2 := fileData["file_data"].(string)
  704. if ok1 && ok2 {
  705. contentList = append(contentList, MediaContent{
  706. Type: ContentTypeFile,
  707. File: &MessageFile{
  708. FileName: fileName,
  709. FileData: fileDataStr,
  710. },
  711. })
  712. }
  713. }
  714. }
  715. case ContentTypeVideoUrl:
  716. if videoUrl, ok := contentItem["video_url"].(string); ok {
  717. contentList = append(contentList, MediaContent{
  718. Type: ContentTypeVideoUrl,
  719. VideoUrl: &MessageVideoUrl{
  720. Url: videoUrl,
  721. },
  722. })
  723. }
  724. }
  725. }
  726. }
  727. if len(contentList) > 0 {
  728. m.parsedContent = contentList
  729. }
  730. return contentList
  731. }*/
  732. type WebSearchOptions struct {
  733. SearchContextSize string `json:"search_context_size,omitempty"`
  734. UserLocation json.RawMessage `json:"user_location,omitempty"`
  735. }
  736. // https://platform.openai.com/docs/api-reference/responses/create
  737. type OpenAIResponsesRequest struct {
  738. Model string `json:"model"`
  739. Input json.RawMessage `json:"input,omitempty"`
  740. Include json.RawMessage `json:"include,omitempty"`
  741. // 在后台运行推理,暂时还不支持依赖的接口
  742. // Background json.RawMessage `json:"background,omitempty"`
  743. Conversation json.RawMessage `json:"conversation,omitempty"`
  744. ContextManagement json.RawMessage `json:"context_management,omitempty"`
  745. Instructions json.RawMessage `json:"instructions,omitempty"`
  746. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  747. TopLogProbs *int `json:"top_logprobs,omitempty"`
  748. Metadata json.RawMessage `json:"metadata,omitempty"`
  749. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  750. PreviousResponseID string `json:"previous_response_id,omitempty"`
  751. Reasoning *Reasoning `json:"reasoning,omitempty"`
  752. // ServiceTier specifies upstream service level and may affect billing.
  753. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  754. ServiceTier string `json:"service_tier,omitempty"`
  755. // Store controls whether upstream may store request/response data.
  756. // This field is allowed by default and can be disabled via channel setting disable_store.
  757. Store json.RawMessage `json:"store,omitempty"`
  758. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  759. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  760. // SafetyIdentifier carries client identity for policy abuse detection.
  761. // This field is filtered by default and can be enabled via channel setting allow_safety_identifier.
  762. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  763. Stream bool `json:"stream,omitempty"`
  764. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  765. Temperature *float64 `json:"temperature,omitempty"`
  766. Text json.RawMessage `json:"text,omitempty"`
  767. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  768. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  769. TopP *float64 `json:"top_p,omitempty"`
  770. Truncation string `json:"truncation,omitempty"`
  771. User string `json:"user,omitempty"`
  772. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  773. Prompt json.RawMessage `json:"prompt,omitempty"`
  774. // qwen
  775. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  776. // perplexity
  777. Preset json.RawMessage `json:"preset,omitempty"`
  778. }
  779. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  780. var fileMeta = make([]*types.FileMeta, 0)
  781. var texts = make([]string, 0)
  782. if r.Input != nil {
  783. inputs := r.ParseInput()
  784. for _, input := range inputs {
  785. if input.Type == "input_image" {
  786. if input.ImageUrl != "" {
  787. fileMeta = append(fileMeta, &types.FileMeta{
  788. FileType: types.FileTypeImage,
  789. Source: createFileSource(input.ImageUrl),
  790. Detail: input.Detail,
  791. })
  792. }
  793. } else if input.Type == "input_file" {
  794. if input.FileUrl != "" {
  795. fileMeta = append(fileMeta, &types.FileMeta{
  796. FileType: types.FileTypeFile,
  797. Source: createFileSource(input.FileUrl),
  798. })
  799. }
  800. } else {
  801. texts = append(texts, input.Text)
  802. }
  803. }
  804. }
  805. if len(r.Instructions) > 0 {
  806. texts = append(texts, string(r.Instructions))
  807. }
  808. if len(r.Metadata) > 0 {
  809. texts = append(texts, string(r.Metadata))
  810. }
  811. if len(r.Text) > 0 {
  812. texts = append(texts, string(r.Text))
  813. }
  814. if len(r.ToolChoice) > 0 {
  815. texts = append(texts, string(r.ToolChoice))
  816. }
  817. if len(r.Prompt) > 0 {
  818. texts = append(texts, string(r.Prompt))
  819. }
  820. if len(r.Tools) > 0 {
  821. texts = append(texts, string(r.Tools))
  822. }
  823. return &types.TokenCountMeta{
  824. CombineText: strings.Join(texts, "\n"),
  825. Files: fileMeta,
  826. MaxTokens: int(r.MaxOutputTokens),
  827. }
  828. }
  829. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  830. return r.Stream
  831. }
  832. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  833. if modelName != "" {
  834. r.Model = modelName
  835. }
  836. }
  837. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  838. var toolsMap []map[string]any
  839. if len(r.Tools) > 0 {
  840. _ = common.Unmarshal(r.Tools, &toolsMap)
  841. }
  842. return toolsMap
  843. }
  844. type Reasoning struct {
  845. Effort string `json:"effort,omitempty"`
  846. Summary string `json:"summary,omitempty"`
  847. }
  848. type Input struct {
  849. Type string `json:"type,omitempty"`
  850. Role string `json:"role,omitempty"`
  851. Content json.RawMessage `json:"content,omitempty"`
  852. }
  853. type MediaInput struct {
  854. Type string `json:"type"`
  855. Text string `json:"text,omitempty"`
  856. FileUrl string `json:"file_url,omitempty"`
  857. ImageUrl string `json:"image_url,omitempty"`
  858. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  859. }
  860. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  861. // Reference implementation mirrors Message.ParseContent:
  862. // - input can be a string, treated as an input_text item
  863. // - input can be an array of objects with a `type` field
  864. // supported types: input_text, input_image, input_file
  865. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  866. if r.Input == nil {
  867. return nil
  868. }
  869. var mediaInputs []MediaInput
  870. // Try string first
  871. // if str, ok := common.GetJsonType(r.Input); ok {
  872. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  873. // return inputs
  874. // }
  875. if common.GetJsonType(r.Input) == "string" {
  876. var str string
  877. _ = common.Unmarshal(r.Input, &str)
  878. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  879. return mediaInputs
  880. }
  881. // Try array of parts
  882. if common.GetJsonType(r.Input) == "array" {
  883. var inputs []Input
  884. _ = common.Unmarshal(r.Input, &inputs)
  885. for _, input := range inputs {
  886. if common.GetJsonType(input.Content) == "string" {
  887. var str string
  888. _ = common.Unmarshal(input.Content, &str)
  889. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  890. }
  891. if common.GetJsonType(input.Content) == "array" {
  892. var array []any
  893. _ = common.Unmarshal(input.Content, &array)
  894. for _, itemAny := range array {
  895. // Already parsed MediaContent
  896. if media, ok := itemAny.(MediaInput); ok {
  897. mediaInputs = append(mediaInputs, media)
  898. continue
  899. }
  900. // Generic map
  901. item, ok := itemAny.(map[string]any)
  902. if !ok {
  903. continue
  904. }
  905. typeVal, ok := item["type"].(string)
  906. if !ok {
  907. continue
  908. }
  909. switch typeVal {
  910. case "input_text":
  911. text, _ := item["text"].(string)
  912. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  913. case "input_image":
  914. // image_url may be string or object with url field
  915. var imageUrl string
  916. switch v := item["image_url"].(type) {
  917. case string:
  918. imageUrl = v
  919. case map[string]any:
  920. if url, ok := v["url"].(string); ok {
  921. imageUrl = url
  922. }
  923. }
  924. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  925. case "input_file":
  926. // file_url may be string or object with url field
  927. var fileUrl string
  928. switch v := item["file_url"].(type) {
  929. case string:
  930. fileUrl = v
  931. case map[string]any:
  932. if url, ok := v["url"].(string); ok {
  933. fileUrl = url
  934. }
  935. }
  936. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  937. }
  938. }
  939. }
  940. }
  941. }
  942. return mediaInputs
  943. }