adaptor.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. package volcengine
  2. import (
  3. "bytes"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "mime/multipart"
  9. "net/http"
  10. "net/textproto"
  11. "path/filepath"
  12. "strings"
  13. channelconstant "github.com/QuantumNous/new-api/constant"
  14. "github.com/QuantumNous/new-api/dto"
  15. "github.com/QuantumNous/new-api/relay/channel"
  16. "github.com/QuantumNous/new-api/relay/channel/openai"
  17. relaycommon "github.com/QuantumNous/new-api/relay/common"
  18. "github.com/QuantumNous/new-api/relay/constant"
  19. "github.com/QuantumNous/new-api/types"
  20. "github.com/gin-gonic/gin"
  21. )
  22. const (
  23. contextKeyTTSRequest = "volcengine_tts_request"
  24. contextKeyResponseFormat = "response_format"
  25. )
  26. type Adaptor struct {
  27. }
  28. func (a *Adaptor) ConvertGeminiRequest(*gin.Context, *relaycommon.RelayInfo, *dto.GeminiChatRequest) (any, error) {
  29. //TODO implement me
  30. return nil, errors.New("not implemented")
  31. }
  32. func (a *Adaptor) ConvertClaudeRequest(c *gin.Context, info *relaycommon.RelayInfo, req *dto.ClaudeRequest) (any, error) {
  33. adaptor := openai.Adaptor{}
  34. return adaptor.ConvertClaudeRequest(c, info, req)
  35. }
  36. func (a *Adaptor) ConvertAudioRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.AudioRequest) (io.Reader, error) {
  37. if info.RelayMode != constant.RelayModeAudioSpeech {
  38. return nil, errors.New("unsupported audio relay mode")
  39. }
  40. appID, token, err := parseVolcengineAuth(info.ApiKey)
  41. if err != nil {
  42. return nil, err
  43. }
  44. voiceType := mapVoiceType(request.Voice)
  45. speedRatio := request.Speed
  46. encoding := mapEncoding(request.ResponseFormat)
  47. c.Set(contextKeyResponseFormat, encoding)
  48. volcRequest := VolcengineTTSRequest{
  49. App: VolcengineTTSApp{
  50. AppID: appID,
  51. Token: token,
  52. Cluster: "volcano_tts",
  53. },
  54. User: VolcengineTTSUser{
  55. UID: "openai_relay_user",
  56. },
  57. Audio: VolcengineTTSAudio{
  58. VoiceType: voiceType,
  59. Encoding: encoding,
  60. SpeedRatio: speedRatio,
  61. Rate: 24000,
  62. },
  63. Request: VolcengineTTSReqInfo{
  64. ReqID: generateRequestID(),
  65. Text: request.Input,
  66. Operation: "submit",
  67. Model: info.OriginModelName,
  68. },
  69. }
  70. if len(request.Metadata) > 0 {
  71. if err = json.Unmarshal(request.Metadata, &volcRequest); err != nil {
  72. return nil, fmt.Errorf("error unmarshalling metadata to volcengine request: %w", err)
  73. }
  74. }
  75. c.Set(contextKeyTTSRequest, volcRequest)
  76. if volcRequest.Request.Operation == "submit" {
  77. info.IsStream = true
  78. }
  79. jsonData, err := json.Marshal(volcRequest)
  80. if err != nil {
  81. return nil, fmt.Errorf("error marshalling volcengine request: %w", err)
  82. }
  83. return bytes.NewReader(jsonData), nil
  84. }
  85. func (a *Adaptor) ConvertImageRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.ImageRequest) (any, error) {
  86. switch info.RelayMode {
  87. case constant.RelayModeImagesGenerations:
  88. return request, nil
  89. case constant.RelayModeImagesEdits:
  90. var requestBody bytes.Buffer
  91. writer := multipart.NewWriter(&requestBody)
  92. writer.WriteField("model", request.Model)
  93. formData := c.Request.PostForm
  94. for key, values := range formData {
  95. if key == "model" {
  96. continue
  97. }
  98. for _, value := range values {
  99. writer.WriteField(key, value)
  100. }
  101. }
  102. if err := c.Request.ParseMultipartForm(32 << 20); err != nil {
  103. return nil, errors.New("failed to parse multipart form")
  104. }
  105. if c.Request.MultipartForm != nil && c.Request.MultipartForm.File != nil {
  106. var imageFiles []*multipart.FileHeader
  107. var exists bool
  108. if imageFiles, exists = c.Request.MultipartForm.File["image"]; !exists || len(imageFiles) == 0 {
  109. if imageFiles, exists = c.Request.MultipartForm.File["image[]"]; !exists || len(imageFiles) == 0 {
  110. foundArrayImages := false
  111. for fieldName, files := range c.Request.MultipartForm.File {
  112. if strings.HasPrefix(fieldName, "image[") && len(files) > 0 {
  113. foundArrayImages = true
  114. for _, file := range files {
  115. imageFiles = append(imageFiles, file)
  116. }
  117. }
  118. }
  119. if !foundArrayImages && (len(imageFiles) == 0) {
  120. return nil, errors.New("image is required")
  121. }
  122. }
  123. }
  124. for i, fileHeader := range imageFiles {
  125. file, err := fileHeader.Open()
  126. if err != nil {
  127. return nil, fmt.Errorf("failed to open image file %d: %w", i, err)
  128. }
  129. defer file.Close()
  130. fieldName := "image"
  131. if len(imageFiles) > 1 {
  132. fieldName = "image[]"
  133. }
  134. mimeType := detectImageMimeType(fileHeader.Filename)
  135. h := make(textproto.MIMEHeader)
  136. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="%s"; filename="%s"`, fieldName, fileHeader.Filename))
  137. h.Set("Content-Type", mimeType)
  138. part, err := writer.CreatePart(h)
  139. if err != nil {
  140. return nil, fmt.Errorf("create form part failed for image %d: %w", i, err)
  141. }
  142. if _, err := io.Copy(part, file); err != nil {
  143. return nil, fmt.Errorf("copy file failed for image %d: %w", i, err)
  144. }
  145. }
  146. if maskFiles, exists := c.Request.MultipartForm.File["mask"]; exists && len(maskFiles) > 0 {
  147. maskFile, err := maskFiles[0].Open()
  148. if err != nil {
  149. return nil, errors.New("failed to open mask file")
  150. }
  151. defer maskFile.Close()
  152. mimeType := detectImageMimeType(maskFiles[0].Filename)
  153. h := make(textproto.MIMEHeader)
  154. h.Set("Content-Disposition", fmt.Sprintf(`form-data; name="mask"; filename="%s"`, maskFiles[0].Filename))
  155. h.Set("Content-Type", mimeType)
  156. maskPart, err := writer.CreatePart(h)
  157. if err != nil {
  158. return nil, errors.New("create form file failed for mask")
  159. }
  160. if _, err := io.Copy(maskPart, maskFile); err != nil {
  161. return nil, errors.New("copy mask file failed")
  162. }
  163. }
  164. } else {
  165. return nil, errors.New("no multipart form data found")
  166. }
  167. writer.Close()
  168. c.Request.Header.Set("Content-Type", writer.FormDataContentType())
  169. return bytes.NewReader(requestBody.Bytes()), nil
  170. default:
  171. return request, nil
  172. }
  173. }
  174. func detectImageMimeType(filename string) string {
  175. ext := strings.ToLower(filepath.Ext(filename))
  176. switch ext {
  177. case ".jpg", ".jpeg":
  178. return "image/jpeg"
  179. case ".png":
  180. return "image/png"
  181. case ".webp":
  182. return "image/webp"
  183. default:
  184. if strings.HasPrefix(ext, ".jp") {
  185. return "image/jpeg"
  186. }
  187. return "image/png"
  188. }
  189. }
  190. func (a *Adaptor) Init(info *relaycommon.RelayInfo) {
  191. }
  192. func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
  193. baseUrl := info.ChannelBaseUrl
  194. if baseUrl == "" {
  195. baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
  196. }
  197. switch info.RelayFormat {
  198. case types.RelayFormatClaude:
  199. if strings.HasPrefix(info.UpstreamModelName, "bot") {
  200. return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
  201. }
  202. return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil
  203. default:
  204. switch info.RelayMode {
  205. case constant.RelayModeChatCompletions:
  206. if strings.HasPrefix(info.UpstreamModelName, "bot") {
  207. return fmt.Sprintf("%s/api/v3/bots/chat/completions", baseUrl), nil
  208. }
  209. return fmt.Sprintf("%s/api/v3/chat/completions", baseUrl), nil
  210. case constant.RelayModeEmbeddings:
  211. return fmt.Sprintf("%s/api/v3/embeddings", baseUrl), nil
  212. case constant.RelayModeImagesGenerations:
  213. return fmt.Sprintf("%s/api/v3/images/generations", baseUrl), nil
  214. case constant.RelayModeImagesEdits:
  215. return fmt.Sprintf("%s/api/v3/images/edits", baseUrl), nil
  216. case constant.RelayModeRerank:
  217. return fmt.Sprintf("%s/api/v3/rerank", baseUrl), nil
  218. case constant.RelayModeAudioSpeech:
  219. if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
  220. return "wss://openspeech.bytedance.com/api/v1/tts/ws_binary", nil
  221. }
  222. return fmt.Sprintf("%s/v1/audio/speech", baseUrl), nil
  223. default:
  224. }
  225. }
  226. return "", fmt.Errorf("unsupported relay mode: %d", info.RelayMode)
  227. }
  228. func (a *Adaptor) SetupRequestHeader(c *gin.Context, req *http.Header, info *relaycommon.RelayInfo) error {
  229. channel.SetupApiRequestHeader(info, c, req)
  230. if info.RelayMode == constant.RelayModeAudioSpeech {
  231. parts := strings.Split(info.ApiKey, "|")
  232. if len(parts) == 2 {
  233. req.Set("Authorization", "Bearer;"+parts[1])
  234. }
  235. req.Set("Content-Type", "application/json")
  236. return nil
  237. }
  238. req.Set("Authorization", "Bearer "+info.ApiKey)
  239. return nil
  240. }
  241. func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayInfo, request *dto.GeneralOpenAIRequest) (any, error) {
  242. if request == nil {
  243. return nil, errors.New("request is nil")
  244. }
  245. if strings.HasSuffix(info.UpstreamModelName, "-thinking") && strings.HasPrefix(info.UpstreamModelName, "deepseek") {
  246. info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
  247. request.Model = info.UpstreamModelName
  248. request.THINKING = json.RawMessage(`{"type": "enabled"}`)
  249. }
  250. return request, nil
  251. }
  252. func (a *Adaptor) ConvertRerankRequest(c *gin.Context, relayMode int, request dto.RerankRequest) (any, error) {
  253. return nil, nil
  254. }
  255. func (a *Adaptor) ConvertEmbeddingRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.EmbeddingRequest) (any, error) {
  256. return request, nil
  257. }
  258. func (a *Adaptor) ConvertOpenAIResponsesRequest(c *gin.Context, info *relaycommon.RelayInfo, request dto.OpenAIResponsesRequest) (any, error) {
  259. return nil, errors.New("not implemented")
  260. }
  261. func (a *Adaptor) DoRequest(c *gin.Context, info *relaycommon.RelayInfo, requestBody io.Reader) (any, error) {
  262. if info.RelayMode == constant.RelayModeAudioSpeech {
  263. baseUrl := info.ChannelBaseUrl
  264. if baseUrl == "" {
  265. baseUrl = channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine]
  266. }
  267. if baseUrl == channelconstant.ChannelBaseURLs[channelconstant.ChannelTypeVolcEngine] {
  268. if info.IsStream {
  269. return nil, nil
  270. }
  271. }
  272. }
  273. return channel.DoApiRequest(a, c, info, requestBody)
  274. }
  275. func (a *Adaptor) DoResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
  276. if info.RelayMode == constant.RelayModeAudioSpeech {
  277. encoding := mapEncoding(c.GetString(contextKeyResponseFormat))
  278. if info.IsStream {
  279. volcRequestInterface, exists := c.Get(contextKeyTTSRequest)
  280. if !exists {
  281. return nil, types.NewErrorWithStatusCode(
  282. errors.New("volcengine TTS request not found in context"),
  283. types.ErrorCodeBadRequestBody,
  284. http.StatusInternalServerError,
  285. )
  286. }
  287. volcRequest, ok := volcRequestInterface.(VolcengineTTSRequest)
  288. if !ok {
  289. return nil, types.NewErrorWithStatusCode(
  290. errors.New("invalid volcengine TTS request type"),
  291. types.ErrorCodeBadRequestBody,
  292. http.StatusInternalServerError,
  293. )
  294. }
  295. // Get the WebSocket URL
  296. requestURL, urlErr := a.GetRequestURL(info)
  297. if urlErr != nil {
  298. return nil, types.NewErrorWithStatusCode(
  299. urlErr,
  300. types.ErrorCodeBadRequestBody,
  301. http.StatusInternalServerError,
  302. )
  303. }
  304. return handleTTSWebSocketResponse(c, requestURL, volcRequest, info, encoding)
  305. }
  306. return handleTTSResponse(c, resp, info, encoding)
  307. }
  308. adaptor := openai.Adaptor{}
  309. usage, err = adaptor.DoResponse(c, resp, info)
  310. return
  311. }
  312. func (a *Adaptor) GetModelList() []string {
  313. return ModelList
  314. }
  315. func (a *Adaptor) GetChannelName() string {
  316. return ChannelName
  317. }