openai_request.go 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012
  1. package dto
  2. import (
  3. "encoding/json"
  4. "fmt"
  5. "strings"
  6. "github.com/QuantumNous/new-api/common"
  7. "github.com/QuantumNous/new-api/types"
  8. "github.com/gin-gonic/gin"
  9. )
  10. type ResponseFormat struct {
  11. Type string `json:"type,omitempty"`
  12. JsonSchema json.RawMessage `json:"json_schema,omitempty"`
  13. }
  14. type FormatJsonSchema struct {
  15. Description string `json:"description,omitempty"`
  16. Name string `json:"name"`
  17. Schema any `json:"schema,omitempty"`
  18. Strict json.RawMessage `json:"strict,omitempty"`
  19. }
  20. // GeneralOpenAIRequest represents a general request structure for OpenAI-compatible APIs.
  21. // 参数增加规范:无引用的参数必须使用json.RawMessage类型,并添加omitempty标签
  22. type GeneralOpenAIRequest struct {
  23. Model string `json:"model,omitempty"`
  24. Messages []Message `json:"messages,omitempty"`
  25. Prompt any `json:"prompt,omitempty"`
  26. Prefix any `json:"prefix,omitempty"`
  27. Suffix any `json:"suffix,omitempty"`
  28. Stream bool `json:"stream,omitempty"`
  29. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  30. MaxTokens uint `json:"max_tokens,omitempty"`
  31. MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"`
  32. ReasoningEffort string `json:"reasoning_effort,omitempty"`
  33. Verbosity json.RawMessage `json:"verbosity,omitempty"` // gpt-5
  34. Temperature *float64 `json:"temperature,omitempty"`
  35. TopP float64 `json:"top_p,omitempty"`
  36. TopK int `json:"top_k,omitempty"`
  37. Stop any `json:"stop,omitempty"`
  38. N int `json:"n,omitempty"`
  39. Input any `json:"input,omitempty"`
  40. Instruction string `json:"instruction,omitempty"`
  41. Size string `json:"size,omitempty"`
  42. Functions json.RawMessage `json:"functions,omitempty"`
  43. FrequencyPenalty float64 `json:"frequency_penalty,omitempty"`
  44. PresencePenalty float64 `json:"presence_penalty,omitempty"`
  45. ResponseFormat *ResponseFormat `json:"response_format,omitempty"`
  46. EncodingFormat json.RawMessage `json:"encoding_format,omitempty"`
  47. Seed float64 `json:"seed,omitempty"`
  48. ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"`
  49. Tools []ToolCallRequest `json:"tools,omitempty"`
  50. ToolChoice any `json:"tool_choice,omitempty"`
  51. User string `json:"user,omitempty"`
  52. LogProbs bool `json:"logprobs,omitempty"`
  53. TopLogProbs int `json:"top_logprobs,omitempty"`
  54. Dimensions int `json:"dimensions,omitempty"`
  55. Modalities json.RawMessage `json:"modalities,omitempty"`
  56. Audio json.RawMessage `json:"audio,omitempty"`
  57. // 安全标识符,用于帮助 OpenAI 检测可能违反使用政策的应用程序用户
  58. // 注意:此字段会向 OpenAI 发送用户标识信息,默认过滤以保护用户隐私
  59. SafetyIdentifier string `json:"safety_identifier,omitempty"`
  60. // Whether or not to store the output of this chat completion request for use in our model distillation or evals products.
  61. // 是否存储此次请求数据供 OpenAI 用于评估和优化产品
  62. // 注意:默认过滤此字段以保护用户隐私,但过滤后可能导致 Codex 无法正常使用
  63. Store json.RawMessage `json:"store,omitempty"`
  64. // Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces the user field
  65. PromptCacheKey string `json:"prompt_cache_key,omitempty"`
  66. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  67. LogitBias json.RawMessage `json:"logit_bias,omitempty"`
  68. Metadata json.RawMessage `json:"metadata,omitempty"`
  69. Prediction json.RawMessage `json:"prediction,omitempty"`
  70. // gemini
  71. ExtraBody json.RawMessage `json:"extra_body,omitempty"`
  72. //xai
  73. SearchParameters json.RawMessage `json:"search_parameters,omitempty"`
  74. // claude
  75. WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
  76. // OpenRouter Params
  77. Usage json.RawMessage `json:"usage,omitempty"`
  78. Reasoning json.RawMessage `json:"reasoning,omitempty"`
  79. // Ali Qwen Params
  80. VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"`
  81. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  82. ChatTemplateKwargs json.RawMessage `json:"chat_template_kwargs,omitempty"`
  83. EnableSearch json.RawMessage `json:"enable_search,omitempty"`
  84. // ollama Params
  85. Think json.RawMessage `json:"think,omitempty"`
  86. // baidu v2
  87. WebSearch json.RawMessage `json:"web_search,omitempty"`
  88. // doubao,zhipu_v4
  89. THINKING json.RawMessage `json:"thinking,omitempty"`
  90. // pplx Params
  91. SearchDomainFilter json.RawMessage `json:"search_domain_filter,omitempty"`
  92. SearchRecencyFilter string `json:"search_recency_filter,omitempty"`
  93. ReturnImages bool `json:"return_images,omitempty"`
  94. ReturnRelatedQuestions bool `json:"return_related_questions,omitempty"`
  95. SearchMode string `json:"search_mode,omitempty"`
  96. }
  97. func (r *GeneralOpenAIRequest) GetTokenCountMeta() *types.TokenCountMeta {
  98. var tokenCountMeta types.TokenCountMeta
  99. var texts = make([]string, 0)
  100. var fileMeta = make([]*types.FileMeta, 0)
  101. if r.Prompt != nil {
  102. switch v := r.Prompt.(type) {
  103. case string:
  104. texts = append(texts, v)
  105. case []any:
  106. for _, item := range v {
  107. if str, ok := item.(string); ok {
  108. texts = append(texts, str)
  109. }
  110. }
  111. default:
  112. texts = append(texts, fmt.Sprintf("%v", r.Prompt))
  113. }
  114. }
  115. if r.Input != nil {
  116. inputs := r.ParseInput()
  117. texts = append(texts, inputs...)
  118. }
  119. if r.MaxCompletionTokens > r.MaxTokens {
  120. tokenCountMeta.MaxTokens = int(r.MaxCompletionTokens)
  121. } else {
  122. tokenCountMeta.MaxTokens = int(r.MaxTokens)
  123. }
  124. for _, message := range r.Messages {
  125. tokenCountMeta.MessagesCount++
  126. texts = append(texts, message.Role)
  127. if message.Content != nil {
  128. if message.Name != nil {
  129. tokenCountMeta.NameCount++
  130. texts = append(texts, *message.Name)
  131. }
  132. arrayContent := message.ParseContent()
  133. for _, m := range arrayContent {
  134. if m.Type == ContentTypeImageURL {
  135. imageUrl := m.GetImageMedia()
  136. if imageUrl != nil {
  137. if imageUrl.Url != "" {
  138. meta := &types.FileMeta{
  139. FileType: types.FileTypeImage,
  140. }
  141. meta.OriginData = imageUrl.Url
  142. meta.Detail = imageUrl.Detail
  143. fileMeta = append(fileMeta, meta)
  144. }
  145. }
  146. } else if m.Type == ContentTypeInputAudio {
  147. inputAudio := m.GetInputAudio()
  148. if inputAudio != nil {
  149. meta := &types.FileMeta{
  150. FileType: types.FileTypeAudio,
  151. }
  152. meta.OriginData = inputAudio.Data
  153. fileMeta = append(fileMeta, meta)
  154. }
  155. } else if m.Type == ContentTypeFile {
  156. file := m.GetFile()
  157. if file != nil {
  158. meta := &types.FileMeta{
  159. FileType: types.FileTypeFile,
  160. }
  161. meta.OriginData = file.FileData
  162. fileMeta = append(fileMeta, meta)
  163. }
  164. } else if m.Type == ContentTypeVideoUrl {
  165. videoUrl := m.GetVideoUrl()
  166. if videoUrl != nil && videoUrl.Url != "" {
  167. meta := &types.FileMeta{
  168. FileType: types.FileTypeVideo,
  169. }
  170. meta.OriginData = videoUrl.Url
  171. fileMeta = append(fileMeta, meta)
  172. }
  173. } else {
  174. texts = append(texts, m.Text)
  175. }
  176. }
  177. }
  178. }
  179. if r.Tools != nil {
  180. openaiTools := r.Tools
  181. for _, tool := range openaiTools {
  182. tokenCountMeta.ToolsCount++
  183. texts = append(texts, tool.Function.Name)
  184. if tool.Function.Description != "" {
  185. texts = append(texts, tool.Function.Description)
  186. }
  187. if tool.Function.Parameters != nil {
  188. texts = append(texts, fmt.Sprintf("%v", tool.Function.Parameters))
  189. }
  190. }
  191. //toolTokens := CountTokenInput(countStr, request.Model)
  192. //tkm += 8
  193. //tkm += toolTokens
  194. }
  195. tokenCountMeta.CombineText = strings.Join(texts, "\n")
  196. tokenCountMeta.Files = fileMeta
  197. return &tokenCountMeta
  198. }
  199. func (r *GeneralOpenAIRequest) IsStream(c *gin.Context) bool {
  200. return r.Stream
  201. }
  202. func (r *GeneralOpenAIRequest) SetModelName(modelName string) {
  203. if modelName != "" {
  204. r.Model = modelName
  205. }
  206. }
  207. func (r *GeneralOpenAIRequest) ToMap() map[string]any {
  208. result := make(map[string]any)
  209. data, _ := common.Marshal(r)
  210. _ = common.Unmarshal(data, &result)
  211. return result
  212. }
  213. func (r *GeneralOpenAIRequest) GetSystemRoleName() string {
  214. if strings.HasPrefix(r.Model, "o") {
  215. if !strings.HasPrefix(r.Model, "o1-mini") && !strings.HasPrefix(r.Model, "o1-preview") {
  216. return "developer"
  217. }
  218. } else if strings.HasPrefix(r.Model, "gpt-5") {
  219. return "developer"
  220. }
  221. return "system"
  222. }
  223. const CustomType = "custom"
  224. type ToolCallRequest struct {
  225. ID string `json:"id,omitempty"`
  226. Type string `json:"type"`
  227. Function FunctionRequest `json:"function,omitempty"`
  228. Custom json.RawMessage `json:"custom,omitempty"`
  229. }
  230. type FunctionRequest struct {
  231. Description string `json:"description,omitempty"`
  232. Name string `json:"name"`
  233. Parameters any `json:"parameters,omitempty"`
  234. Arguments string `json:"arguments,omitempty"`
  235. }
  236. type StreamOptions struct {
  237. IncludeUsage bool `json:"include_usage,omitempty"`
  238. }
  239. func (r *GeneralOpenAIRequest) GetMaxTokens() uint {
  240. if r.MaxCompletionTokens != 0 {
  241. return r.MaxCompletionTokens
  242. }
  243. return r.MaxTokens
  244. }
  245. func (r *GeneralOpenAIRequest) ParseInput() []string {
  246. if r.Input == nil {
  247. return nil
  248. }
  249. var input []string
  250. switch r.Input.(type) {
  251. case string:
  252. input = []string{r.Input.(string)}
  253. case []any:
  254. input = make([]string, 0, len(r.Input.([]any)))
  255. for _, item := range r.Input.([]any) {
  256. if str, ok := item.(string); ok {
  257. input = append(input, str)
  258. }
  259. }
  260. }
  261. return input
  262. }
  263. type Message struct {
  264. Role string `json:"role"`
  265. Content any `json:"content"`
  266. Name *string `json:"name,omitempty"`
  267. Prefix *bool `json:"prefix,omitempty"`
  268. ReasoningContent string `json:"reasoning_content,omitempty"`
  269. Reasoning string `json:"reasoning,omitempty"`
  270. ToolCalls json.RawMessage `json:"tool_calls,omitempty"`
  271. ToolCallId string `json:"tool_call_id,omitempty"`
  272. parsedContent []MediaContent
  273. //parsedStringContent *string
  274. }
  275. type MediaContent struct {
  276. Type string `json:"type"`
  277. Text string `json:"text,omitempty"`
  278. ImageUrl any `json:"image_url,omitempty"`
  279. InputAudio any `json:"input_audio,omitempty"`
  280. File any `json:"file,omitempty"`
  281. VideoUrl any `json:"video_url,omitempty"`
  282. // OpenRouter Params
  283. CacheControl json.RawMessage `json:"cache_control,omitempty"`
  284. }
  285. func (m *MediaContent) GetImageMedia() *MessageImageUrl {
  286. if m.ImageUrl != nil {
  287. if _, ok := m.ImageUrl.(*MessageImageUrl); ok {
  288. return m.ImageUrl.(*MessageImageUrl)
  289. }
  290. if itemMap, ok := m.ImageUrl.(map[string]any); ok {
  291. out := &MessageImageUrl{
  292. Url: common.Interface2String(itemMap["url"]),
  293. Detail: common.Interface2String(itemMap["detail"]),
  294. MimeType: common.Interface2String(itemMap["mime_type"]),
  295. }
  296. return out
  297. }
  298. }
  299. return nil
  300. }
  301. func (m *MediaContent) GetInputAudio() *MessageInputAudio {
  302. if m.InputAudio != nil {
  303. if _, ok := m.InputAudio.(*MessageInputAudio); ok {
  304. return m.InputAudio.(*MessageInputAudio)
  305. }
  306. if itemMap, ok := m.InputAudio.(map[string]any); ok {
  307. out := &MessageInputAudio{
  308. Data: common.Interface2String(itemMap["data"]),
  309. Format: common.Interface2String(itemMap["format"]),
  310. }
  311. return out
  312. }
  313. }
  314. return nil
  315. }
  316. func (m *MediaContent) GetFile() *MessageFile {
  317. if m.File != nil {
  318. if _, ok := m.File.(*MessageFile); ok {
  319. return m.File.(*MessageFile)
  320. }
  321. if itemMap, ok := m.File.(map[string]any); ok {
  322. out := &MessageFile{
  323. FileName: common.Interface2String(itemMap["file_name"]),
  324. FileData: common.Interface2String(itemMap["file_data"]),
  325. FileId: common.Interface2String(itemMap["file_id"]),
  326. }
  327. return out
  328. }
  329. }
  330. return nil
  331. }
  332. func (m *MediaContent) GetVideoUrl() *MessageVideoUrl {
  333. if m.VideoUrl != nil {
  334. if _, ok := m.VideoUrl.(*MessageVideoUrl); ok {
  335. return m.VideoUrl.(*MessageVideoUrl)
  336. }
  337. if itemMap, ok := m.VideoUrl.(map[string]any); ok {
  338. out := &MessageVideoUrl{
  339. Url: common.Interface2String(itemMap["url"]),
  340. }
  341. return out
  342. }
  343. }
  344. return nil
  345. }
  346. type MessageImageUrl struct {
  347. Url string `json:"url"`
  348. Detail string `json:"detail"`
  349. MimeType string
  350. }
  351. func (m *MessageImageUrl) IsRemoteImage() bool {
  352. return strings.HasPrefix(m.Url, "http")
  353. }
  354. type MessageInputAudio struct {
  355. Data string `json:"data"` //base64
  356. Format string `json:"format"`
  357. }
  358. type MessageFile struct {
  359. FileName string `json:"filename,omitempty"`
  360. FileData string `json:"file_data,omitempty"`
  361. FileId string `json:"file_id,omitempty"`
  362. }
  363. type MessageVideoUrl struct {
  364. Url string `json:"url"`
  365. }
  366. const (
  367. ContentTypeText = "text"
  368. ContentTypeImageURL = "image_url"
  369. ContentTypeInputAudio = "input_audio"
  370. ContentTypeFile = "file"
  371. ContentTypeVideoUrl = "video_url" // 阿里百炼视频识别
  372. //ContentTypeAudioUrl = "audio_url"
  373. )
  374. func (m *Message) GetPrefix() bool {
  375. if m.Prefix == nil {
  376. return false
  377. }
  378. return *m.Prefix
  379. }
  380. func (m *Message) SetPrefix(prefix bool) {
  381. m.Prefix = &prefix
  382. }
  383. func (m *Message) ParseToolCalls() []ToolCallRequest {
  384. if m.ToolCalls == nil {
  385. return nil
  386. }
  387. var toolCalls []ToolCallRequest
  388. if err := json.Unmarshal(m.ToolCalls, &toolCalls); err == nil {
  389. return toolCalls
  390. }
  391. return toolCalls
  392. }
  393. func (m *Message) SetToolCalls(toolCalls any) {
  394. toolCallsJson, _ := json.Marshal(toolCalls)
  395. m.ToolCalls = toolCallsJson
  396. }
  397. func (m *Message) StringContent() string {
  398. switch m.Content.(type) {
  399. case string:
  400. return m.Content.(string)
  401. case []any:
  402. var contentStr string
  403. for _, contentItem := range m.Content.([]any) {
  404. contentMap, ok := contentItem.(map[string]any)
  405. if !ok {
  406. continue
  407. }
  408. if contentMap["type"] == ContentTypeText {
  409. if subStr, ok := contentMap["text"].(string); ok {
  410. contentStr += subStr
  411. }
  412. }
  413. }
  414. return contentStr
  415. }
  416. return ""
  417. }
  418. func (m *Message) SetNullContent() {
  419. m.Content = nil
  420. m.parsedContent = nil
  421. }
  422. func (m *Message) SetStringContent(content string) {
  423. m.Content = content
  424. m.parsedContent = nil
  425. }
  426. func (m *Message) SetMediaContent(content []MediaContent) {
  427. m.Content = content
  428. m.parsedContent = content
  429. }
  430. func (m *Message) IsStringContent() bool {
  431. _, ok := m.Content.(string)
  432. if ok {
  433. return true
  434. }
  435. return false
  436. }
  437. func (m *Message) ParseContent() []MediaContent {
  438. if m.Content == nil {
  439. return nil
  440. }
  441. if len(m.parsedContent) > 0 {
  442. return m.parsedContent
  443. }
  444. var contentList []MediaContent
  445. // 先尝试解析为字符串
  446. content, ok := m.Content.(string)
  447. if ok {
  448. contentList = []MediaContent{{
  449. Type: ContentTypeText,
  450. Text: content,
  451. }}
  452. m.parsedContent = contentList
  453. return contentList
  454. }
  455. // 尝试解析为数组
  456. //var arrayContent []map[string]interface{}
  457. arrayContent, ok := m.Content.([]any)
  458. if !ok {
  459. return contentList
  460. }
  461. for _, contentItemAny := range arrayContent {
  462. mediaItem, ok := contentItemAny.(MediaContent)
  463. if ok {
  464. contentList = append(contentList, mediaItem)
  465. continue
  466. }
  467. contentItem, ok := contentItemAny.(map[string]any)
  468. if !ok {
  469. continue
  470. }
  471. contentType, ok := contentItem["type"].(string)
  472. if !ok {
  473. continue
  474. }
  475. switch contentType {
  476. case ContentTypeText:
  477. if text, ok := contentItem["text"].(string); ok {
  478. contentList = append(contentList, MediaContent{
  479. Type: ContentTypeText,
  480. Text: text,
  481. })
  482. }
  483. case ContentTypeImageURL:
  484. imageUrl := contentItem["image_url"]
  485. temp := &MessageImageUrl{
  486. Detail: "high",
  487. }
  488. switch v := imageUrl.(type) {
  489. case string:
  490. temp.Url = v
  491. case map[string]interface{}:
  492. url, ok1 := v["url"].(string)
  493. detail, ok2 := v["detail"].(string)
  494. if ok2 {
  495. temp.Detail = detail
  496. }
  497. if ok1 {
  498. temp.Url = url
  499. }
  500. }
  501. contentList = append(contentList, MediaContent{
  502. Type: ContentTypeImageURL,
  503. ImageUrl: temp,
  504. })
  505. case ContentTypeInputAudio:
  506. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  507. data, ok1 := audioData["data"].(string)
  508. format, ok2 := audioData["format"].(string)
  509. if ok1 && ok2 {
  510. temp := &MessageInputAudio{
  511. Data: data,
  512. Format: format,
  513. }
  514. contentList = append(contentList, MediaContent{
  515. Type: ContentTypeInputAudio,
  516. InputAudio: temp,
  517. })
  518. }
  519. }
  520. case ContentTypeFile:
  521. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  522. fileId, ok3 := fileData["file_id"].(string)
  523. if ok3 {
  524. contentList = append(contentList, MediaContent{
  525. Type: ContentTypeFile,
  526. File: &MessageFile{
  527. FileId: fileId,
  528. },
  529. })
  530. } else {
  531. fileName, ok1 := fileData["filename"].(string)
  532. fileDataStr, ok2 := fileData["file_data"].(string)
  533. if ok1 && ok2 {
  534. contentList = append(contentList, MediaContent{
  535. Type: ContentTypeFile,
  536. File: &MessageFile{
  537. FileName: fileName,
  538. FileData: fileDataStr,
  539. },
  540. })
  541. }
  542. }
  543. }
  544. case ContentTypeVideoUrl:
  545. if videoUrl, ok := contentItem["video_url"].(string); ok {
  546. contentList = append(contentList, MediaContent{
  547. Type: ContentTypeVideoUrl,
  548. VideoUrl: &MessageVideoUrl{
  549. Url: videoUrl,
  550. },
  551. })
  552. }
  553. }
  554. }
  555. if len(contentList) > 0 {
  556. m.parsedContent = contentList
  557. }
  558. return contentList
  559. }
  560. // old code
  561. /*func (m *Message) StringContent() string {
  562. if m.parsedStringContent != nil {
  563. return *m.parsedStringContent
  564. }
  565. var stringContent string
  566. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  567. m.parsedStringContent = &stringContent
  568. return stringContent
  569. }
  570. contentStr := new(strings.Builder)
  571. arrayContent := m.ParseContent()
  572. for _, content := range arrayContent {
  573. if content.Type == ContentTypeText {
  574. contentStr.WriteString(content.Text)
  575. }
  576. }
  577. stringContent = contentStr.String()
  578. m.parsedStringContent = &stringContent
  579. return stringContent
  580. }
  581. func (m *Message) SetNullContent() {
  582. m.Content = nil
  583. m.parsedStringContent = nil
  584. m.parsedContent = nil
  585. }
  586. func (m *Message) SetStringContent(content string) {
  587. jsonContent, _ := json.Marshal(content)
  588. m.Content = jsonContent
  589. m.parsedStringContent = &content
  590. m.parsedContent = nil
  591. }
  592. func (m *Message) SetMediaContent(content []MediaContent) {
  593. jsonContent, _ := json.Marshal(content)
  594. m.Content = jsonContent
  595. m.parsedContent = nil
  596. m.parsedStringContent = nil
  597. }
  598. func (m *Message) IsStringContent() bool {
  599. if m.parsedStringContent != nil {
  600. return true
  601. }
  602. var stringContent string
  603. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  604. m.parsedStringContent = &stringContent
  605. return true
  606. }
  607. return false
  608. }
  609. func (m *Message) ParseContent() []MediaContent {
  610. if m.parsedContent != nil {
  611. return m.parsedContent
  612. }
  613. var contentList []MediaContent
  614. // 先尝试解析为字符串
  615. var stringContent string
  616. if err := json.Unmarshal(m.Content, &stringContent); err == nil {
  617. contentList = []MediaContent{{
  618. Type: ContentTypeText,
  619. Text: stringContent,
  620. }}
  621. m.parsedContent = contentList
  622. return contentList
  623. }
  624. // 尝试解析为数组
  625. var arrayContent []map[string]interface{}
  626. if err := json.Unmarshal(m.Content, &arrayContent); err == nil {
  627. for _, contentItem := range arrayContent {
  628. contentType, ok := contentItem["type"].(string)
  629. if !ok {
  630. continue
  631. }
  632. switch contentType {
  633. case ContentTypeText:
  634. if text, ok := contentItem["text"].(string); ok {
  635. contentList = append(contentList, MediaContent{
  636. Type: ContentTypeText,
  637. Text: text,
  638. })
  639. }
  640. case ContentTypeImageURL:
  641. imageUrl := contentItem["image_url"]
  642. temp := &MessageImageUrl{
  643. Detail: "high",
  644. }
  645. switch v := imageUrl.(type) {
  646. case string:
  647. temp.Url = v
  648. case map[string]interface{}:
  649. url, ok1 := v["url"].(string)
  650. detail, ok2 := v["detail"].(string)
  651. if ok2 {
  652. temp.Detail = detail
  653. }
  654. if ok1 {
  655. temp.Url = url
  656. }
  657. }
  658. contentList = append(contentList, MediaContent{
  659. Type: ContentTypeImageURL,
  660. ImageUrl: temp,
  661. })
  662. case ContentTypeInputAudio:
  663. if audioData, ok := contentItem["input_audio"].(map[string]interface{}); ok {
  664. data, ok1 := audioData["data"].(string)
  665. format, ok2 := audioData["format"].(string)
  666. if ok1 && ok2 {
  667. temp := &MessageInputAudio{
  668. Data: data,
  669. Format: format,
  670. }
  671. contentList = append(contentList, MediaContent{
  672. Type: ContentTypeInputAudio,
  673. InputAudio: temp,
  674. })
  675. }
  676. }
  677. case ContentTypeFile:
  678. if fileData, ok := contentItem["file"].(map[string]interface{}); ok {
  679. fileId, ok3 := fileData["file_id"].(string)
  680. if ok3 {
  681. contentList = append(contentList, MediaContent{
  682. Type: ContentTypeFile,
  683. File: &MessageFile{
  684. FileId: fileId,
  685. },
  686. })
  687. } else {
  688. fileName, ok1 := fileData["filename"].(string)
  689. fileDataStr, ok2 := fileData["file_data"].(string)
  690. if ok1 && ok2 {
  691. contentList = append(contentList, MediaContent{
  692. Type: ContentTypeFile,
  693. File: &MessageFile{
  694. FileName: fileName,
  695. FileData: fileDataStr,
  696. },
  697. })
  698. }
  699. }
  700. }
  701. case ContentTypeVideoUrl:
  702. if videoUrl, ok := contentItem["video_url"].(string); ok {
  703. contentList = append(contentList, MediaContent{
  704. Type: ContentTypeVideoUrl,
  705. VideoUrl: &MessageVideoUrl{
  706. Url: videoUrl,
  707. },
  708. })
  709. }
  710. }
  711. }
  712. }
  713. if len(contentList) > 0 {
  714. m.parsedContent = contentList
  715. }
  716. return contentList
  717. }*/
  718. type WebSearchOptions struct {
  719. SearchContextSize string `json:"search_context_size,omitempty"`
  720. UserLocation json.RawMessage `json:"user_location,omitempty"`
  721. }
  722. // https://platform.openai.com/docs/api-reference/responses/create
  723. type OpenAIResponsesRequest struct {
  724. Model string `json:"model"`
  725. Input json.RawMessage `json:"input,omitempty"`
  726. Include json.RawMessage `json:"include,omitempty"`
  727. Instructions json.RawMessage `json:"instructions,omitempty"`
  728. MaxOutputTokens uint `json:"max_output_tokens,omitempty"`
  729. Metadata json.RawMessage `json:"metadata,omitempty"`
  730. ParallelToolCalls json.RawMessage `json:"parallel_tool_calls,omitempty"`
  731. PreviousResponseID string `json:"previous_response_id,omitempty"`
  732. Reasoning *Reasoning `json:"reasoning,omitempty"`
  733. // 服务层级字段,用于指定 API 服务等级。允许透传可能导致实际计费高于预期,默认应过滤
  734. ServiceTier string `json:"service_tier,omitempty"`
  735. Store json.RawMessage `json:"store,omitempty"`
  736. PromptCacheKey json.RawMessage `json:"prompt_cache_key,omitempty"`
  737. PromptCacheRetention json.RawMessage `json:"prompt_cache_retention,omitempty"`
  738. Stream bool `json:"stream,omitempty"`
  739. Temperature *float64 `json:"temperature,omitempty"`
  740. Text json.RawMessage `json:"text,omitempty"`
  741. ToolChoice json.RawMessage `json:"tool_choice,omitempty"`
  742. Tools json.RawMessage `json:"tools,omitempty"` // 需要处理的参数很少,MCP 参数太多不确定,所以用 map
  743. TopP *float64 `json:"top_p,omitempty"`
  744. Truncation string `json:"truncation,omitempty"`
  745. User string `json:"user,omitempty"`
  746. MaxToolCalls uint `json:"max_tool_calls,omitempty"`
  747. Prompt json.RawMessage `json:"prompt,omitempty"`
  748. // qwen
  749. EnableThinking json.RawMessage `json:"enable_thinking,omitempty"`
  750. // perplexity
  751. Preset json.RawMessage `json:"preset,omitempty"`
  752. }
  753. func (r *OpenAIResponsesRequest) GetTokenCountMeta() *types.TokenCountMeta {
  754. var fileMeta = make([]*types.FileMeta, 0)
  755. var texts = make([]string, 0)
  756. if r.Input != nil {
  757. inputs := r.ParseInput()
  758. for _, input := range inputs {
  759. if input.Type == "input_image" {
  760. if input.ImageUrl != "" {
  761. fileMeta = append(fileMeta, &types.FileMeta{
  762. FileType: types.FileTypeImage,
  763. OriginData: input.ImageUrl,
  764. Detail: input.Detail,
  765. })
  766. }
  767. } else if input.Type == "input_file" {
  768. if input.FileUrl != "" {
  769. fileMeta = append(fileMeta, &types.FileMeta{
  770. FileType: types.FileTypeFile,
  771. OriginData: input.FileUrl,
  772. })
  773. }
  774. } else {
  775. texts = append(texts, input.Text)
  776. }
  777. }
  778. }
  779. if len(r.Instructions) > 0 {
  780. texts = append(texts, string(r.Instructions))
  781. }
  782. if len(r.Metadata) > 0 {
  783. texts = append(texts, string(r.Metadata))
  784. }
  785. if len(r.Text) > 0 {
  786. texts = append(texts, string(r.Text))
  787. }
  788. if len(r.ToolChoice) > 0 {
  789. texts = append(texts, string(r.ToolChoice))
  790. }
  791. if len(r.Prompt) > 0 {
  792. texts = append(texts, string(r.Prompt))
  793. }
  794. if len(r.Tools) > 0 {
  795. texts = append(texts, string(r.Tools))
  796. }
  797. return &types.TokenCountMeta{
  798. CombineText: strings.Join(texts, "\n"),
  799. Files: fileMeta,
  800. MaxTokens: int(r.MaxOutputTokens),
  801. }
  802. }
  803. func (r *OpenAIResponsesRequest) IsStream(c *gin.Context) bool {
  804. return r.Stream
  805. }
  806. func (r *OpenAIResponsesRequest) SetModelName(modelName string) {
  807. if modelName != "" {
  808. r.Model = modelName
  809. }
  810. }
  811. func (r *OpenAIResponsesRequest) GetToolsMap() []map[string]any {
  812. var toolsMap []map[string]any
  813. if len(r.Tools) > 0 {
  814. _ = common.Unmarshal(r.Tools, &toolsMap)
  815. }
  816. return toolsMap
  817. }
  818. type Reasoning struct {
  819. Effort string `json:"effort,omitempty"`
  820. Summary string `json:"summary,omitempty"`
  821. }
  822. type Input struct {
  823. Type string `json:"type,omitempty"`
  824. Role string `json:"role,omitempty"`
  825. Content json.RawMessage `json:"content,omitempty"`
  826. }
  827. type MediaInput struct {
  828. Type string `json:"type"`
  829. Text string `json:"text,omitempty"`
  830. FileUrl string `json:"file_url,omitempty"`
  831. ImageUrl string `json:"image_url,omitempty"`
  832. Detail string `json:"detail,omitempty"` // 仅 input_image 有效
  833. }
  834. // ParseInput parses the Responses API `input` field into a normalized slice of MediaInput.
  835. // Reference implementation mirrors Message.ParseContent:
  836. // - input can be a string, treated as an input_text item
  837. // - input can be an array of objects with a `type` field
  838. // supported types: input_text, input_image, input_file
  839. func (r *OpenAIResponsesRequest) ParseInput() []MediaInput {
  840. if r.Input == nil {
  841. return nil
  842. }
  843. var mediaInputs []MediaInput
  844. // Try string first
  845. // if str, ok := common.GetJsonType(r.Input); ok {
  846. // inputs = append(inputs, MediaInput{Type: "input_text", Text: str})
  847. // return inputs
  848. // }
  849. if common.GetJsonType(r.Input) == "string" {
  850. var str string
  851. _ = common.Unmarshal(r.Input, &str)
  852. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  853. return mediaInputs
  854. }
  855. // Try array of parts
  856. if common.GetJsonType(r.Input) == "array" {
  857. var inputs []Input
  858. _ = common.Unmarshal(r.Input, &inputs)
  859. for _, input := range inputs {
  860. if common.GetJsonType(input.Content) == "string" {
  861. var str string
  862. _ = common.Unmarshal(input.Content, &str)
  863. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: str})
  864. }
  865. if common.GetJsonType(input.Content) == "array" {
  866. var array []any
  867. _ = common.Unmarshal(input.Content, &array)
  868. for _, itemAny := range array {
  869. // Already parsed MediaContent
  870. if media, ok := itemAny.(MediaInput); ok {
  871. mediaInputs = append(mediaInputs, media)
  872. continue
  873. }
  874. // Generic map
  875. item, ok := itemAny.(map[string]any)
  876. if !ok {
  877. continue
  878. }
  879. typeVal, ok := item["type"].(string)
  880. if !ok {
  881. continue
  882. }
  883. switch typeVal {
  884. case "input_text":
  885. text, _ := item["text"].(string)
  886. mediaInputs = append(mediaInputs, MediaInput{Type: "input_text", Text: text})
  887. case "input_image":
  888. // image_url may be string or object with url field
  889. var imageUrl string
  890. switch v := item["image_url"].(type) {
  891. case string:
  892. imageUrl = v
  893. case map[string]any:
  894. if url, ok := v["url"].(string); ok {
  895. imageUrl = url
  896. }
  897. }
  898. mediaInputs = append(mediaInputs, MediaInput{Type: "input_image", ImageUrl: imageUrl})
  899. case "input_file":
  900. // file_url may be string or object with url field
  901. var fileUrl string
  902. switch v := item["file_url"].(type) {
  903. case string:
  904. fileUrl = v
  905. case map[string]any:
  906. if url, ok := v["url"].(string); ok {
  907. fileUrl = url
  908. }
  909. }
  910. mediaInputs = append(mediaInputs, MediaInput{Type: "input_file", FileUrl: fileUrl})
  911. }
  912. }
  913. }
  914. }
  915. }
  916. return mediaInputs
  917. }