openai_request.go 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. // GeneralOpenAIRequest represents a general request structure for OpenAI-compatible APIs.
  21. // 参数增加规范:无引用的参数必须使用json.RawMessage类型,并添加omitempty标签
  22. type GeneralOpenAIRequest struct {
  23. Model string `json:"model,omitempty"`
  24. Messages []Message `json:"messages,omitempty"`
  25. Prompt any `json:"prompt,omitempty"`
  26. Prefix any `json:"prefix,omitempty"`
  27. Suffix any `json:"suffix,omitempty"`
  28. Stream bool `json:"stream,omitempty"`
  29. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  30. MaxTokens uint `json:"max_tokens,omitempty"`
  31. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  32. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  33. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  34. Temperature *float64 `json:"temperature,omitempty"`
  35. TopP float64 `json:"top_p,omitempty"`
  36. TopK int `json:"top_k,omitempty"`
  37. Stop any `json:"stop,omitempty"`
  38. N int `json:"n,omitempty"`
  39. Input any `json:"input,omitempty"`
  40. Instruction string `json:"instruction,omitempty"`
  41. Size string `json:"size,omitempty"`
  42. Functions json.RawMessage `json:"functions,omitempty"`
  43. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  44. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  45. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  46. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  47. Seed float64 `json:"seed,omitempty"`
  48. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  49. Tools []ToolCallRequest `json:"tools,omitempty"`
  50. ToolChoice any `json:"tool_choice,omitempty"`
  51. FunctionCall json.RawMessage `json:"function_call,omitempty"`
  52. User string `json:"user,omitempty"`
  53. // ServiceTier specifies upstream service level and may affect billing.
  54. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  55. ServiceTier string `json:"service_tier,omitempty"`
  56. LogProbs bool `json:"logprobs,omitempty"`
  57. TopLogProbs int `json:"top_logprobs,omitempty"`
  58. Dimensions int `json:"dimensions,omitempty"`
  59. Modalities json.RawMessage `json:"modalities,omitempty"`
  60. Audio json.RawMessage `json:"audio,omitempty"`
  61. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  62. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤,可通过 allow_safety_identifier 开启
  63. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  64. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  65. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  66. // 注意:默认允许透传,可通过 disable_store 禁用;禁用后可能导致 Codex 无法正常使用
  67. Store json.RawMessage `json:"store,omitempty"`
  68. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  69. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  70. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  71. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  72. Metadata json.RawMessage `json:"metadata,omitempty"`
  73. Prediction json.RawMessage `json:"prediction,omitempty"`
  74. // gemini
  75. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  76. //xai
  77. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  78. // claude
  79. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  80. // OpenRouter Params
  81. Usage json.RawMessage `json:"usage,omitempty"`
  82. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  83. // Ali Qwen Params
  84. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  85. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  86. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  87. EnableSearch json.RawMessage `json:"enable_search,omitempty"`
  88. // ollama Params
  89. Think json.RawMessage `json:"think,omitempty"`
  90. // baidu v2
  91. WebSearch json.RawMessage `json:"web_search,omitempty"`
  92. // doubao,zhipu_v4
  93. THINKING json.RawMessage `json:"thinking,omitempty"`
  94. // pplx Params
  95. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  96. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  97. ReturnImages bool `json:"return_images,omitempty"`
  98. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  99. SearchMode string `json:"search_mode,omitempty"`
  100. }
  101. // createFileSource 根据数据内容创建正确类型的 FileSource
  102. func createFileSource(data string) *types.FileSource {
  103. if strings.HasPrefix(data, "http://") || strings.HasPrefix(data, "https://") {
  104. return types.NewURLFileSource(data)
  105. }
  106. return types.NewBase64FileSource(data, "")
  107. }
  108. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  109. var tokenCountMeta types.TokenCountMeta
  110. var texts = make([]string, 0)
  111. var fileMeta = make([]*types.FileMeta, 0)
  112. if r.Prompt != nil {
  113. switch v := r.Prompt.(type) {
  114. case string:
  115. texts = append(texts, v)
  116. case []any:
  117. for _, item := range v {
  118. if str, ok := item.(string); ok {
  119. texts = append(texts, str)
  120. }
  121. }
  122. default:
  123. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  124. }
  125. }
  126. if r.Input != nil {
  127. inputs := r.ParseInput()
  128. texts = append(texts, inputs...)
  129. }
  130. if r.MaxCompletionTokens > r.MaxTokens {
  131. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  132. } else {
  133. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  134. }
  135. for _, message := range r.Messages {
  136. tokenCountMeta.MessagesCount++
  137. texts = append(texts, message.Role)
  138. if message.Content != nil {
  139. if message.Name != nil {
  140. tokenCountMeta.NameCount++
  141. texts = append(texts, *message.Name)
  142. }
  143. arrayContent := message.ParseContent()
  144. for _, m := range arrayContent {
  145. if m.Type == ContentTypeImageURL {
  146. imageUrl := m.GetImageMedia()
  147. if imageUrl != nil && imageUrl.Url != "" {
  148. source := createFileSource(imageUrl.Url)
  149. fileMeta = append(fileMeta, &types.FileMeta{
  150. FileType: types.FileTypeImage,
  151. Source: source,
  152. Detail: imageUrl.Detail,
  153. })
  154. }
  155. } else if m.Type == ContentTypeInputAudio {
  156. inputAudio := m.GetInputAudio()
  157. if inputAudio != nil && inputAudio.Data != "" {
  158. source := createFileSource(inputAudio.Data)
  159. fileMeta = append(fileMeta, &types.FileMeta{
  160. FileType: types.FileTypeAudio,
  161. Source: source,
  162. })
  163. }
  164. } else if m.Type == ContentTypeFile {
  165. file := m.GetFile()
  166. if file != nil && file.FileData != "" {
  167. source := createFileSource(file.FileData)
  168. fileMeta = append(fileMeta, &types.FileMeta{
  169. FileType: types.FileTypeFile,
  170. Source: source,
  171. })
  172. }
  173. } else if m.Type == ContentTypeVideoUrl {
  174. videoUrl := m.GetVideoUrl()
  175. if videoUrl != nil && videoUrl.Url != "" {
  176. source := createFileSource(videoUrl.Url)
  177. fileMeta = append(fileMeta, &types.FileMeta{
  178. FileType: types.FileTypeVideo,
  179. Source: source,
  180. })
  181. }
  182. } else {
  183. texts = append(texts, m.Text)
  184. }
  185. }
  186. }
  187. }
  188. if r.Tools != nil {
  189. openaiTools := r.Tools
  190. for _, tool := range openaiTools {
  191. tokenCountMeta.ToolsCount++
  192. texts = append(texts, tool.Function.Name)
  193. if tool.Function.Description != "" {
  194. texts = append(texts, tool.Function.Description)
  195. }
  196. if tool.Function.Parameters != nil {
  197. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  198. }
  199. }
  200. //toolTokens := CountTokenInput(countStr, request.Model)
  201. //tkm += 8
  202. //tkm += toolTokens
  203. }
  204. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  205. tokenCountMeta.Files = fileMeta
  206. return &tokenCountMeta
  207. }
  208. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  209. return r.Stream
  210. }
  211. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  212. if modelName != "" {
  213. r.Model = modelName
  214. }
  215. }
  216. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  217. result := make(map[string]any)
  218. data, _ := common.Marshal(r)
  219. _ = common.Unmarshal(data, &result)
  220. return result
  221. }
  222. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  223. if strings.HasPrefix(r.Model, "o") {
  224. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  225. return "developer"
  226. }
  227. } else if strings.HasPrefix(r.Model, "gpt-5") {
  228. return "developer"
  229. }
  230. return "system"
  231. }
  232. const CustomType = "custom"
  233. type ToolCallRequest struct {
  234. ID string `json:"id,omitempty"`
  235. Type string `json:"type"`
  236. Function FunctionRequest `json:"function,omitempty"`
  237. Custom json.RawMessage `json:"custom,omitempty"`
  238. }
  239. type FunctionRequest struct {
  240. Description string `json:"description,omitempty"`
  241. Name string `json:"name"`
  242. Parameters any `json:"parameters,omitempty"`
  243. Arguments string `json:"arguments,omitempty"`
  244. }
  245. type StreamOptions struct {
  246. IncludeUsage bool `json:"include_usage,omitempty"`
  247. // IncludeObfuscation is only for /v1/responses stream payload.
  248. // This field is filtered by default and can be enabled via channel setting allow_include_obfuscation.
  249. IncludeObfuscation bool `json:"include_obfuscation,omitempty"`
  250. }
  251. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  252. if r.MaxCompletionTokens != 0 {
  253. return r.MaxCompletionTokens
  254. }
  255. return r.MaxTokens
  256. }
  257. func (r *GeneralOpenAIRequest) ParseInput() []string {
  258. if r.Input == nil {
  259. return nil
  260. }
  261. var input []string
  262. switch r.Input.(type) {
  263. case string:
  264. input = []string{r.Input.(string)}
  265. case []any:
  266. input = make([]string, 0, len(r.Input.([]any)))
  267. for _, item := range r.Input.([]any) {
  268. if str, ok := item.(string); ok {
  269. input = append(input, str)
  270. }
  271. }
  272. }
  273. return input
  274. }
  275. type Message struct {
  276. Role string `json:"role"`
  277. Content any `json:"content"`
  278. Name *string `json:"name,omitempty"`
  279. Prefix *bool `json:"prefix,omitempty"`
  280. ReasoningContent string `json:"reasoning_content,omitempty"`
  281. Reasoning string `json:"reasoning,omitempty"`
  282. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  283. ToolCallId string `json:"tool_call_id,omitempty"`
  284. parsedContent []MediaContent
  285. //parsedStringContent *string
  286. }
  287. type MediaContent struct {
  288. Type string `json:"type"`
  289. Text string `json:"text,omitempty"`
  290. ImageUrl any `json:"image_url,omitempty"`
  291. InputAudio any `json:"input_audio,omitempty"`
  292. File any `json:"file,omitempty"`
  293. VideoUrl any `json:"video_url,omitempty"`
  294. // OpenRouter Params
  295. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  296. }
  297. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  298. if m.ImageUrl != nil {
  299. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  300. return m.ImageUrl.(*MessageImageUrl)
  301. }
  302. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  303. out := &MessageImageUrl{
  304. Url: common.Interface2String(itemMap["url"]),
  305. Detail: common.Interface2String(itemMap["detail"]),
  306. MimeType: common.Interface2String(itemMap["mime_type"]),
  307. }
  308. return out
  309. }
  310. }
  311. return nil
  312. }
  313. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  314. if m.InputAudio != nil {
  315. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  316. return m.InputAudio.(*MessageInputAudio)
  317. }
  318. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  319. out := &MessageInputAudio{
  320. Data: common.Interface2String(itemMap["data"]),
  321. Format: common.Interface2String(itemMap["format"]),
  322. }
  323. return out
  324. }
  325. }
  326. return nil
  327. }
  328. func (m *MediaContent) GetFile() *MessageFile {
  329. if m.File != nil {
  330. if _, ok := m.File.(*MessageFile); ok {
  331. return m.File.(*MessageFile)
  332. }
  333. if itemMap, ok := m.File.(map[string]any); ok {
  334. out := &MessageFile{
  335. FileName: common.Interface2String(itemMap["file_name"]),
  336. FileData: common.Interface2String(itemMap["file_data"]),
  337. FileId: common.Interface2String(itemMap["file_id"]),
  338. }
  339. return out
  340. }
  341. }
  342. return nil
  343. }
  344. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  345. if m.VideoUrl != nil {
  346. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  347. return m.VideoUrl.(*MessageVideoUrl)
  348. }
  349. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  350. out := &MessageVideoUrl{
  351. Url: common.Interface2String(itemMap["url"]),
  352. }
  353. return out
  354. }
  355. }
  356. return nil
  357. }
  358. type MessageImageUrl struct {
  359. Url string `json:"url"`
  360. Detail string `json:"detail"`
  361. MimeType string
  362. }
  363. func (m *MessageImageUrl) IsRemoteImage() bool {
  364. return strings.HasPrefix(m.Url, "http")
  365. }
  366. type MessageInputAudio struct {
  367. Data string `json:"data"` //base64
  368. Format string `json:"format"`
  369. }
  370. type MessageFile struct {
  371. FileName string `json:"filename,omitempty"`
  372. FileData string `json:"file_data,omitempty"`
  373. FileId string `json:"file_id,omitempty"`
  374. }
  375. type MessageVideoUrl struct {
  376. Url string `json:"url"`
  377. }
  378. const (
  379. ContentTypeText = "text"
  380. ContentTypeImageURL = "image_url"
  381. ContentTypeInputAudio = "input_audio"
  382. ContentTypeFile = "file"
  383. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  384. //ContentTypeAudioUrl = "audio_url"
  385. )
  386. func (m *Message) GetPrefix() bool {
  387. if m.Prefix == nil {
  388. return false
  389. }
  390. return *m.Prefix
  391. }
  392. func (m *Message) SetPrefix(prefix bool) {
  393. m.Prefix = &prefix
  394. }
  395. func (m *Message) ParseToolCalls() []ToolCallRequest {
  396. if m.ToolCalls == nil {
  397. return nil
  398. }
  399. var toolCalls []ToolCallRequest
  400. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  401. return toolCalls
  402. }
  403. return toolCalls
  404. }
  405. func (m *Message) SetToolCalls(toolCalls any) {
  406. toolCallsJson, _ := json.Marshal(toolCalls)
  407. m.ToolCalls = toolCallsJson
  408. }
  409. func (m *Message) StringContent() string {
  410. switch m.Content.(type) {
  411. case string:
  412. return m.Content.(string)
  413. case []any:
  414. var contentStr string
  415. for _, contentItem := range m.Content.([]any) {
  416. contentMap, ok := contentItem.(map[string]any)
  417. if !ok {
  418. continue
  419. }
  420. if contentMap["type"] == ContentTypeText {
  421. if subStr, ok := contentMap["text"].(string); ok {
  422. contentStr += subStr
  423. }
  424. }
  425. }
  426. return contentStr
  427. }
  428. return ""
  429. }
  430. func (m *Message) SetNullContent() {
  431. m.Content = nil
  432. m.parsedContent = nil
  433. }
  434. func (m *Message) SetStringContent(content string) {
  435. m.Content = content
  436. m.parsedContent = nil
  437. }
  438. func (m *Message) SetMediaContent(content []MediaContent) {
  439. m.Content = content
  440. m.parsedContent = content
  441. }
  442. func (m *Message) IsStringContent() bool {
  443. _, ok := m.Content.(string)
  444. if ok {
  445. return true
  446. }
  447. return false
  448. }
  449. func (m *Message) ParseContent() []MediaContent {
  450. if m.Content == nil {
  451. return nil
  452. }
  453. if len(m.parsedContent) > 0 {
  454. return m.parsedContent
  455. }
  456. var contentList []MediaContent
  457. // 先尝试解析为字符串
  458. content, ok := m.Content.(string)
  459. if ok {
  460. contentList = []MediaContent{{
  461. Type: ContentTypeText,
  462. Text: content,
  463. }}
  464. m.parsedContent = contentList
  465. return contentList
  466. }
  467. // 尝试解析为数组
  468. //var arrayContent []map[string]interface{}
  469. arrayContent, ok := m.Content.([]any)
  470. if !ok {
  471. return contentList
  472. }
  473. for _, contentItemAny := range arrayContent {
  474. mediaItem, ok := contentItemAny.(MediaContent)
  475. if ok {
  476. contentList = append(contentList, mediaItem)
  477. continue
  478. }
  479. contentItem, ok := contentItemAny.(map[string]any)
  480. if !ok {
  481. continue
  482. }
  483. contentType, ok := contentItem["type"].(string)
  484. if !ok {
  485. continue
  486. }
  487. switch contentType {
  488. case ContentTypeText:
  489. if text, ok := contentItem["text"].(string); ok {
  490. contentList = append(contentList, MediaContent{
  491. Type: ContentTypeText,
  492. Text: text,
  493. })
  494. }
  495. case ContentTypeImageURL:
  496. imageUrl := contentItem["image_url"]
  497. temp := &MessageImageUrl{
  498. Detail: "high",
  499. }
  500. switch v := imageUrl.(type) {
  501. case string:
  502. temp.Url = v
  503. case map[string]interface{}:
  504. url, ok1 := v["url"].(string)
  505. detail, ok2 := v["detail"].(string)
  506. if ok2 {
  507. temp.Detail = detail
  508. }
  509. if ok1 {
  510. temp.Url = url
  511. }
  512. }
  513. contentList = append(contentList, MediaContent{
  514. Type: ContentTypeImageURL,
  515. ImageUrl: temp,
  516. })
  517. case ContentTypeInputAudio:
  518. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  519. data, ok1 := audioData["data"].(string)
  520. format, ok2 := audioData["format"].(string)
  521. if ok1 && ok2 {
  522. temp := &MessageInputAudio{
  523. Data: data,
  524. Format: format,
  525. }
  526. contentList = append(contentList, MediaContent{
  527. Type: ContentTypeInputAudio,
  528. InputAudio: temp,
  529. })
  530. }
  531. }
  532. case ContentTypeFile:
  533. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  534. fileId, ok3 := fileData["file_id"].(string)
  535. if ok3 {
  536. contentList = append(contentList, MediaContent{
  537. Type: ContentTypeFile,
  538. File: &MessageFile{
  539. FileId: fileId,
  540. },
  541. })
  542. } else {
  543. fileName, ok1 := fileData["filename"].(string)
  544. fileDataStr, ok2 := fileData["file_data"].(string)
  545. if ok1 && ok2 {
  546. contentList = append(contentList, MediaContent{
  547. Type: ContentTypeFile,
  548. File: &MessageFile{
  549. FileName: fileName,
  550. FileData: fileDataStr,
  551. },
  552. })
  553. }
  554. }
  555. }
  556. case ContentTypeVideoUrl:
  557. if videoUrl, ok := contentItem["video_url"].(string); ok {
  558. contentList = append(contentList, MediaContent{
  559. Type: ContentTypeVideoUrl,
  560. VideoUrl: &MessageVideoUrl{
  561. Url: videoUrl,
  562. },
  563. })
  564. }
  565. }
  566. }
  567. if len(contentList) > 0 {
  568. m.parsedContent = contentList
  569. }
  570. return contentList
  571. }
  572. // old code
  573. /*func (m *Message) StringContent() string {
  574. if m.parsedStringContent != nil {
  575. return *m.parsedStringContent
  576. }
  577. var stringContent string
  578. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  579. m.parsedStringContent = &stringContent
  580. return stringContent
  581. }
  582. contentStr := new(strings.Builder)
  583. arrayContent := m.ParseContent()
  584. for _, content := range arrayContent {
  585. if content.Type == ContentTypeText {
  586. contentStr.WriteString(content.Text)
  587. }
  588. }
  589. stringContent = contentStr.String()
  590. m.parsedStringContent = &stringContent
  591. return stringContent
  592. }
  593. func (m *Message) SetNullContent() {
  594. m.Content = nil
  595. m.parsedStringContent = nil
  596. m.parsedContent = nil
  597. }
  598. func (m *Message) SetStringContent(content string) {
  599. jsonContent, _ := json.Marshal(content)
  600. m.Content = jsonContent
  601. m.parsedStringContent = &content
  602. m.parsedContent = nil
  603. }
  604. func (m *Message) SetMediaContent(content []MediaContent) {
  605. jsonContent, _ := json.Marshal(content)
  606. m.Content = jsonContent
  607. m.parsedContent = nil
  608. m.parsedStringContent = nil
  609. }
  610. func (m *Message) IsStringContent() bool {
  611. if m.parsedStringContent != nil {
  612. return true
  613. }
  614. var stringContent string
  615. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  616. m.parsedStringContent = &stringContent
  617. return true
  618. }
  619. return false
  620. }
  621. func (m *Message) ParseContent() []MediaContent {
  622. if m.parsedContent != nil {
  623. return m.parsedContent
  624. }
  625. var contentList []MediaContent
  626. // 先尝试解析为字符串
  627. var stringContent string
  628. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  629. contentList = []MediaContent{{
  630. Type: ContentTypeText,
  631. Text: stringContent,
  632. }}
  633. m.parsedContent = contentList
  634. return contentList
  635. }
  636. // 尝试解析为数组
  637. var arrayContent []map[string]interface{}
  638. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  639. for _, contentItem := range arrayContent {
  640. contentType, ok := contentItem["type"].(string)
  641. if !ok {
  642. continue
  643. }
  644. switch contentType {
  645. case ContentTypeText:
  646. if text, ok := contentItem["text"].(string); ok {
  647. contentList = append(contentList, MediaContent{
  648. Type: ContentTypeText,
  649. Text: text,
  650. })
  651. }
  652. case ContentTypeImageURL:
  653. imageUrl := contentItem["image_url"]
  654. temp := &MessageImageUrl{
  655. Detail: "high",
  656. }
  657. switch v := imageUrl.(type) {
  658. case string:
  659. temp.Url = v
  660. case map[string]interface{}:
  661. url, ok1 := v["url"].(string)
  662. detail, ok2 := v["detail"].(string)
  663. if ok2 {
  664. temp.Detail = detail
  665. }
  666. if ok1 {
  667. temp.Url = url
  668. }
  669. }
  670. contentList = append(contentList, MediaContent{
  671. Type: ContentTypeImageURL,
  672. ImageUrl: temp,
  673. })
  674. case ContentTypeInputAudio:
  675. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  676. data, ok1 := audioData["data"].(string)
  677. format, ok2 := audioData["format"].(string)
  678. if ok1 && ok2 {
  679. temp := &MessageInputAudio{
  680. Data: data,
  681. Format: format,
  682. }
  683. contentList = append(contentList, MediaContent{
  684. Type: ContentTypeInputAudio,
  685. InputAudio: temp,
  686. })
  687. }
  688. }
  689. case ContentTypeFile:
  690. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  691. fileId, ok3 := fileData["file_id"].(string)
  692. if ok3 {
  693. contentList = append(contentList, MediaContent{
  694. Type: ContentTypeFile,
  695. File: &MessageFile{
  696. FileId: fileId,
  697. },
  698. })
  699. } else {
  700. fileName, ok1 := fileData["filename"].(string)
  701. fileDataStr, ok2 := fileData["file_data"].(string)
  702. if ok1 && ok2 {
  703. contentList = append(contentList, MediaContent{
  704. Type: ContentTypeFile,
  705. File: &MessageFile{
  706. FileName: fileName,
  707. FileData: fileDataStr,
  708. },
  709. })
  710. }
  711. }
  712. }
  713. case ContentTypeVideoUrl:
  714. if videoUrl, ok := contentItem["video_url"].(string); ok {
  715. contentList = append(contentList, MediaContent{
  716. Type: ContentTypeVideoUrl,
  717. VideoUrl: &MessageVideoUrl{
  718. Url: videoUrl,
  719. },
  720. })
  721. }
  722. }
  723. }
  724. }
  725. if len(contentList) > 0 {
  726. m.parsedContent = contentList
  727. }
  728. return contentList
  729. }*/
  730. type WebSearchOptions struct {
  731. SearchContextSize string `json:"search_context_size,omitempty"`
  732. UserLocation json.RawMessage `json:"user_location,omitempty"`
  733. }
  734. // https://platform.openai.com/docs/api-reference/responses/create
  735. type OpenAIResponsesRequest struct {
  736. Model string `json:"model"`
  737. Input json.RawMessage `json:"input,omitempty"`
  738. Include json.RawMessage `json:"include,omitempty"`
  739. // 在后台运行推理,暂时还不支持依赖的接口
  740. // Background json.RawMessage `json:"background,omitempty"`
  741. Conversation json.RawMessage `json:"conversation,omitempty"`
  742. ContextManagement json.RawMessage `json:"context_management,omitempty"`
  743. Instructions json.RawMessage `json:"instructions,omitempty"`
  744. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  745. TopLogProbs *int `json:"top_logprobs,omitempty"`
  746. Metadata json.RawMessage `json:"metadata,omitempty"`
  747. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  748. PreviousResponseID string `json:"previous_response_id,omitempty"`
  749. Reasoning *Reasoning `json:"reasoning,omitempty"`
  750. // ServiceTier specifies upstream service level and may affect billing.
  751. // This field is filtered by default and can be enabled via channel setting allow_service_tier.
  752. ServiceTier string `json:"service_tier,omitempty"`
  753. // Store controls whether upstream may store request/response data.
  754. // This field is allowed by default and can be disabled via channel setting disable_store.
  755. Store json.RawMessage `json:"store,omitempty"`
  756. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  757. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  758. // SafetyIdentifier carries client identity for policy abuse detection.
  759. // This field is filtered by default and can be enabled via channel setting allow_safety_identifier.
  760. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  761. Stream bool `json:"stream,omitempty"`
  762. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  763. Temperature *float64 `json:"temperature,omitempty"`
  764. Text json.RawMessage `json:"text,omitempty"`
  765. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  766. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  767. TopP *float64 `json:"top_p,omitempty"`
  768. Truncation string `json:"truncation,omitempty"`
  769. User string `json:"user,omitempty"`
  770. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  771. Prompt json.RawMessage `json:"prompt,omitempty"`
  772. // qwen
  773. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  774. // perplexity
  775. Preset json.RawMessage `json:"preset,omitempty"`
  776. }
  777. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  778. var fileMeta = make([]*types.FileMeta, 0)
  779. var texts = make([]string, 0)
  780. if r.Input != nil {
  781. inputs := r.ParseInput()
  782. for _, input := range inputs {
  783. if input.Type == "input_image" {
  784. if input.ImageUrl != "" {
  785. fileMeta = append(fileMeta, &types.FileMeta{
  786. FileType: types.FileTypeImage,
  787. Source: createFileSource(input.ImageUrl),
  788. Detail: input.Detail,
  789. })
  790. }
  791. } else if input.Type == "input_file" {
  792. if input.FileUrl != "" {
  793. fileMeta = append(fileMeta, &types.FileMeta{
  794. FileType: types.FileTypeFile,
  795. Source: createFileSource(input.FileUrl),
  796. })
  797. }
  798. } else {
  799. texts = append(texts, input.Text)
  800. }
  801. }
  802. }
  803. if len(r.Instructions) > 0 {
  804. texts = append(texts, string(r.Instructions))
  805. }
  806. if len(r.Metadata) > 0 {
  807. texts = append(texts, string(r.Metadata))
  808. }
  809. if len(r.Text) > 0 {
  810. texts = append(texts, string(r.Text))
  811. }
  812. if len(r.ToolChoice) > 0 {
  813. texts = append(texts, string(r.ToolChoice))
  814. }
  815. if len(r.Prompt) > 0 {
  816. texts = append(texts, string(r.Prompt))
  817. }
  818. if len(r.Tools) > 0 {
  819. texts = append(texts, string(r.Tools))
  820. }
  821. return &types.TokenCountMeta{
  822. CombineText: strings.Join(texts, "\n"),
  823. Files: fileMeta,
  824. MaxTokens: int(r.MaxOutputTokens),
  825. }
  826. }
  827. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  828. return r.Stream
  829. }
  830. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  831. if modelName != "" {
  832. r.Model = modelName
  833. }
  834. }
  835. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  836. var toolsMap []map[string]any
  837. if len(r.Tools) > 0 {
  838. _ = common.Unmarshal(r.Tools, &toolsMap)
  839. }
  840. return toolsMap
  841. }
  842. type Reasoning struct {
  843. Effort string `json:"effort,omitempty"`
  844. Summary string `json:"summary,omitempty"`
  845. }
  846. type Input struct {
  847. Type string `json:"type,omitempty"`
  848. Role string `json:"role,omitempty"`
  849. Content json.RawMessage `json:"content,omitempty"`
  850. }
  851. type MediaInput struct {
  852. Type string `json:"type"`
  853. Text string `json:"text,omitempty"`
  854. FileUrl string `json:"file_url,omitempty"`
  855. ImageUrl string `json:"image_url,omitempty"`
  856. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  857. }
  858. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  859. // Reference implementation mirrors Message.ParseContent:
  860. // - input can be a string, treated as an input_text item
  861. // - input can be an array of objects with a `type` field
  862. // supported types: input_text, input_image, input_file
  863. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  864. if r.Input == nil {
  865. return nil
  866. }
  867. var mediaInputs []MediaInput
  868. // Try string first
  869. // if str, ok := common.GetJsonType(r.Input); ok {
  870. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  871. // return inputs
  872. // }
  873. if common.GetJsonType(r.Input) == "string" {
  874. var str string
  875. _ = common.Unmarshal(r.Input, &str)
  876. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  877. return mediaInputs
  878. }
  879. // Try array of parts
  880. if common.GetJsonType(r.Input) == "array" {
  881. var inputs []Input
  882. _ = common.Unmarshal(r.Input, &inputs)
  883. for _, input := range inputs {
  884. if common.GetJsonType(input.Content) == "string" {
  885. var str string
  886. _ = common.Unmarshal(input.Content, &str)
  887. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  888. }
  889. if common.GetJsonType(input.Content) == "array" {
  890. var array []any
  891. _ = common.Unmarshal(input.Content, &array)
  892. for _, itemAny := range array {
  893. // Already parsed MediaContent
  894. if media, ok := itemAny.(MediaInput); ok {
  895. mediaInputs = append(mediaInputs, media)
  896. continue
  897. }
  898. // Generic map
  899. item, ok := itemAny.(map[string]any)
  900. if !ok {
  901. continue
  902. }
  903. typeVal, ok := item["type"].(string)
  904. if !ok {
  905. continue
  906. }
  907. switch typeVal {
  908. case "input_text":
  909. text, _ := item["text"].(string)
  910. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  911. case "input_image":
  912. // image_url may be string or object with url field
  913. var imageUrl string
  914. switch v := item["image_url"].(type) {
  915. case string:
  916. imageUrl = v
  917. case map[string]any:
  918. if url, ok := v["url"].(string); ok {
  919. imageUrl = url
  920. }
  921. }
  922. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  923. case "input_file":
  924. // file_url may be string or object with url field
  925. var fileUrl string
  926. switch v := item["file_url"].(type) {
  927. case string:
  928. fileUrl = v
  929. case map[string]any:
  930. if url, ok := v["url"].(string); ok {
  931. fileUrl = url
  932. }
  933. }
  934. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  935. }
  936. }
  937. }
  938. }
  939. }
  940. return mediaInputs
  941. }