adaptor.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. package volcengine
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "mime/multipart"
  9. "net/http"
  10. "net/textproto"
  11. "path/filepath"
  12. "strings"
  13. channelconstant "github.com/QuantumNous/new-api/constant"
  14. "github.com/QuantumNous/new-api/dto"
  15. "github.com/QuantumNous/new-api/relay/channel"
  16. "github.com/QuantumNous/new-api/relay/channel/openai"
  17. relaycommon "github.com/QuantumNous/new-api/relay/common"
  18. "github.com/QuantumNous/new-api/relay/constant"
  19. "github.com/QuantumNous/new-api/types"
  20. "github.com/gin-gonic/gin"
  21. )
  22. const (
  23. // Context keys for passing data between methods
  24. contextKeyTTSRequest = "volcengine_tts_request"
  25. contextKeyResponseFormat = "response_format"
  26. )
  27. type Adaptor struct {
  28. }
  29. func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
  30. //TODO implement me
  31. return nil, errors.New("not implemented")
  32. }
  33. func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
  34. adaptor := openai.Adaptor{}
  35. return adaptor.ConvertClaudeRequest(c, info, req)
  36. }
  37. func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
  38. if info.RelayMode != constant.RelayModeAudioSpeech {
  39. return nil, errors.New("unsupported audio relay mode")
  40. }
  41. appID, token, err := parseVolcengineAuth(info.ApiKey)
  42. if err != nil {
  43. return nil, err
  44. }
  45. voiceType := mapVoiceType(request.Voice)
  46. speedRatio := request.Speed
  47. encoding := mapEncoding(request.ResponseFormat)
  48. c.Set(contextKeyResponseFormat, encoding)
  49. volcRequest := VolcengineTTSRequest{
  50. App: VolcengineTTSApp{
  51. AppID: appID,
  52. Token: token,
  53. Cluster: "volcano_tts",
  54. },
  55. User: VolcengineTTSUser{
  56. UID: "openai_relay_user",
  57. },
  58. Audio: VolcengineTTSAudio{
  59. VoiceType: voiceType,
  60. Encoding: encoding,
  61. SpeedRatio: speedRatio,
  62. Rate: 24000,
  63. },
  64. Request: VolcengineTTSReqInfo{
  65. ReqID: generateRequestID(),
  66. Text: request.Input,
  67. Operation: "submit", // default WebSocket uses "submit"
  68. Model: info.OriginModelName,
  69. },
  70. }
  71. // 同步扩展字段的厂商自定义metadata
  72. if len(request.Metadata) > 0 {
  73. if err = json.Unmarshal(request.Metadata, &volcRequest); err != nil {
  74. return nil, fmt.Errorf("error unmarshalling metadata to volcengine request: %w", err)
  75. }
  76. }
  77. // Store the request in context for WebSocket handler
  78. c.Set(contextKeyTTSRequest, volcRequest)
  79. // https://www.volcengine.com/docs/6561/1257584
  80. // operation需要设置为submit才是流式返回
  81. if volcRequest.Request.Operation == "submit" {
  82. info.IsStream = true
  83. }
  84. // Return nil as WebSocket doesn't use traditional request body
  85. jsonData, err := json.Marshal(volcRequest)
  86. if err != nil {
  87. return nil, fmt.Errorf("error marshalling volcengine request: %w", err)
  88. }
  89. return bytes.NewReader(jsonData), nil
  90. }
  91. func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
  92. switch info.RelayMode {
  93. case constant.RelayModeImagesGenerations:
  94. return request, nil
  95. case constant.RelayModeImagesEdits:
  96. var requestBody bytes.Buffer
  97. writer := multipart.NewWriter(&requestBody)
  98. writer.WriteField("model", request.Model)
  99. // 获取所有表单字段
  100. formData := c.Request.PostForm
  101. // 遍历表单字段并打印输出
  102. for key, values := range formData {
  103. if key == "model" {
  104. continue
  105. }
  106. for _, value := range values {
  107. writer.WriteField(key, value)
  108. }
  109. }
  110. // Parse the multipart form to handle both single image and multiple images
  111. if err := c.Request.ParseMultipartForm(32 << 20); err != nil { // 32MB max memory
  112. return nil, errors.New("failed to parse multipart form")
  113. }
  114. if c.Request.MultipartForm != nil && c.Request.MultipartForm.File != nil {
  115. // Check if "image" field exists in any form, including array notation
  116. var imageFiles []*multipart.FileHeader
  117. var exists bool
  118. // First check for standard "image" field
  119. if imageFiles, exists = c.Request.MultipartForm.File["image"]; !exists || len(imageFiles) == 0 {
  120. // If not found, check for "image[]" field
  121. if imageFiles, exists = c.Request.MultipartForm.File["image[]"]; !exists || len(imageFiles) == 0 {
  122. // If still not found, iterate through all fields to find any that start with "image["
  123. foundArrayImages := false
  124. for fieldName, files := range c.Request.MultipartForm.File {
  125. if strings.HasPrefix(fieldName, "image[") && len(files) > 0 {
  126. foundArrayImages = true
  127. for _, file := range files {
  128. imageFiles = append(imageFiles, file)
  129. }
  130. }
  131. }
  132. // If no image fields found at all
  133. if !foundArrayImages && (len(imageFiles) == 0) {
  134. return nil, errors.New("image is required")
  135. }
  136. }
  137. }
  138. // Process all image files
  139. for i, fileHeader := range imageFiles {
  140. file, err := fileHeader.Open()
  141. if err != nil {
  142. return nil, fmt.Errorf("failed to open image file %d: %w", i, err)
  143. }
  144. defer file.Close()
  145. // If multiple images, use image[] as the field name
  146. fieldName := "image"
  147. if len(imageFiles) > 1 {
  148. fieldName = "image[]"
  149. }
  150. // Determine MIME type based on file extension
  151. mimeType := detectImageMimeType(fileHeader.Filename)
  152. // Create a form file with the appropriate content type
  153. h := make(textproto.MIMEHeader)
  154. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="%s"; filename="%s"`, fieldName, fileHeader.Filename))
  155. h.Set("Content-Type", mimeType)
  156. part, err := writer.CreatePart(h)
  157. if err != nil {
  158. return nil, fmt.Errorf("create form part failed for image %d: %w", i, err)
  159. }
  160. if _, err := io.Copy(part, file); err != nil {
  161. return nil, fmt.Errorf("copy file failed for image %d: %w", i, err)
  162. }
  163. }
  164. // Handle mask file if present
  165. if maskFiles, exists := c.Request.MultipartForm.File["mask"]; exists && len(maskFiles) > 0 {
  166. maskFile, err := maskFiles[0].Open()
  167. if err != nil {
  168. return nil, errors.New("failed to open mask file")
  169. }
  170. defer maskFile.Close()
  171. // Determine MIME type for mask file
  172. mimeType := detectImageMimeType(maskFiles[0].Filename)
  173. // Create a form file with the appropriate content type
  174. h := make(textproto.MIMEHeader)
  175. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="mask"; filename="%s"`, maskFiles[0].Filename))
  176. h.Set("Content-Type", mimeType)
  177. maskPart, err := writer.CreatePart(h)
  178. if err != nil {
  179. return nil, errors.New("create form file failed for mask")
  180. }
  181. if _, err := io.Copy(maskPart, maskFile); err != nil {
  182. return nil, errors.New("copy mask file failed")
  183. }
  184. }
  185. } else {
  186. return nil, errors.New("no multipart form data found")
  187. }
  188. // 关闭 multipart 编写器以设置分界线
  189. writer.Close()
  190. c.Request.Header.Set("Content-Type", writer.FormDataContentType())
  191. return bytes.NewReader(requestBody.Bytes()), nil
  192. default:
  193. return request, nil
  194. }
  195. }
  196. // detectImageMimeType determines the MIME type based on the file extension
  197. func detectImageMimeType(filename string) string {
  198. ext := strings.ToLower(filepath.Ext(filename))
  199. switch ext {
  200. case ".jpg", ".jpeg":
  201. return "image/jpeg"
  202. case ".png":
  203. return "image/png"
  204. case ".webp":
  205. return "image/webp"
  206. default:
  207. // Try to detect from extension if possible
  208. if strings.HasPrefix(ext, ".jp") {
  209. return "image/jpeg"
  210. }
  211. // Default to png as a fallback
  212. return "image/png"
  213. }
  214. }
  215. func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
  216. }
  217. func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
  218. baseUrl := info.ChannelBaseUrl
  219. if baseUrl == "" {
  220. baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
  221. }
  222. switch info.RelayFormat {
  223. case types.RelayFormatClaude:
  224. if strings.HasPrefix(info.UpstreamModelName, "bot") {
  225. return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
  226. }
  227. return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil
  228. default:
  229. switch info.RelayMode {
  230. case constant.RelayModeChatCompletions:
  231. if strings.HasPrefix(info.UpstreamModelName, "bot") {
  232. return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
  233. }
  234. return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil
  235. case constant.RelayModeEmbeddings:
  236. return fmt.Sprintf("%s/api/v3/embeddings", baseUrl), nil
  237. case constant.RelayModeImagesGenerations:
  238. return fmt.Sprintf("%s/api/v3/images/generations", baseUrl), nil
  239. case constant.RelayModeImagesEdits:
  240. return fmt.Sprintf("%s/api/v3/images/edits", baseUrl), nil
  241. case constant.RelayModeRerank:
  242. return fmt.Sprintf("%s/api/v3/rerank", baseUrl), nil
  243. case constant.RelayModeAudioSpeech:
  244. // 只有当 baseUrl 是火山默认的官方Url时才改为官方的的TTS接口,否则走透传的New接口
  245. if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
  246. return "wss://openspeech.bytedance.com/api/v1/tts/ws_binary", nil
  247. }
  248. return fmt.Sprintf("%s/v1/audio/speech", baseUrl), nil
  249. default:
  250. }
  251. }
  252. return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
  253. }
  254. func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
  255. channel.SetupApiRequestHeader(info, c, req)
  256. if info.RelayMode == constant.RelayModeAudioSpeech {
  257. parts := strings.Split(info.ApiKey, "|")
  258. if len(parts) == 2 {
  259. req.Set("Authorization", "Bearer;"+parts[1])
  260. }
  261. req.Set("Content-Type", "application/json")
  262. return nil
  263. }
  264. req.Set("Authorization", "Bearer "+info.ApiKey)
  265. return nil
  266. }
  267. func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
  268. if request == nil {
  269. return nil, errors.New("request is nil")
  270. }
  271. // 适配 方舟deepseek混合模型 的 thinking 后缀
  272. if strings.HasSuffix(info.UpstreamModelName, "-thinking") && strings.HasPrefix(info.UpstreamModelName, "deepseek") {
  273. info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
  274. request.Model = info.UpstreamModelName
  275. request.THINKING = json.RawMessage(`{"type": "enabled"}`)
  276. }
  277. return request, nil
  278. }
  279. func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
  280. return nil, nil
  281. }
  282. func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
  283. return request, nil
  284. }
  285. func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
  286. // TODO implement me
  287. return nil, errors.New("not implemented")
  288. }
  289. func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
  290. // For TTS with WebSocket, skip traditional HTTP request
  291. if info.RelayMode == constant.RelayModeAudioSpeech {
  292. baseUrl := info.ChannelBaseUrl
  293. if baseUrl == "" {
  294. baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
  295. }
  296. // Only use WebSocket for official Volcengine endpoint
  297. if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
  298. if info.IsStream {
  299. return nil, nil
  300. }
  301. }
  302. }
  303. return channel.DoApiRequest(a, c, info, requestBody)
  304. }
  305. func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
  306. if info.RelayMode == constant.RelayModeAudioSpeech {
  307. encoding := mapEncoding(c.GetString(contextKeyResponseFormat))
  308. if info.IsStream {
  309. volcRequestInterface, exists := c.Get(contextKeyTTSRequest)
  310. if !exists {
  311. return nil, types.NewErrorWithStatusCode(
  312. errors.New("volcengine TTS request not found in context"),
  313. types.ErrorCodeBadRequestBody,
  314. http.StatusInternalServerError,
  315. )
  316. }
  317. volcRequest, ok := volcRequestInterface.(VolcengineTTSRequest)
  318. if !ok {
  319. return nil, types.NewErrorWithStatusCode(
  320. errors.New("invalid volcengine TTS request type"),
  321. types.ErrorCodeBadRequestBody,
  322. http.StatusInternalServerError,
  323. )
  324. }
  325. // Get the WebSocket URL
  326. requestURL, urlErr := a.GetRequestURL(info)
  327. if urlErr != nil {
  328. return nil, types.NewErrorWithStatusCode(
  329. urlErr,
  330. types.ErrorCodeBadRequestBody,
  331. http.StatusInternalServerError,
  332. )
  333. }
  334. return handleTTSWebSocketResponse(c, requestURL, volcRequest, info, encoding)
  335. }
  336. return handleTTSResponse(c, resp, info, encoding)
  337. }
  338. adaptor := openai.Adaptor{}
  339. usage, err = adaptor.DoResponse(c, resp, info)
  340. return
  341. }
  342. func (a *Adaptor) GetModelList() []string {
  343. return ModelList
  344. }
  345. func (a *Adaptor) GetChannelName() string {
  346. return ChannelName
  347. }