tts.go 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. package minimax
  2. import (
  3. "encoding/hex"
  4. "encoding/json"
  5. "errors"
  6. "fmt"
  7. "io"
  8. "net/http"
  9. "strings"
  10. "github.com/QuantumNous/new-api/dto"
  11. relaycommon "github.com/QuantumNous/new-api/relay/common"
  12. "github.com/QuantumNous/new-api/types"
  13. "github.com/gin-gonic/gin"
  14. )
  15. type MiniMaxTTSRequest struct {
  16. Model string `json:"model"`
  17. Text string `json:"text"`
  18. Stream bool `json:"stream,omitempty"`
  19. StreamOptions *StreamOptions `json:"stream_options,omitempty"`
  20. VoiceSetting VoiceSetting `json:"voice_setting"`
  21. PronunciationDict *PronunciationDict `json:"pronunciation_dict,omitempty"`
  22. AudioSetting *AudioSetting `json:"audio_setting,omitempty"`
  23. TimbreWeights []TimbreWeight `json:"timbre_weights,omitempty"`
  24. LanguageBoost string `json:"language_boost,omitempty"`
  25. VoiceModify *VoiceModify `json:"voice_modify,omitempty"`
  26. SubtitleEnable bool `json:"subtitle_enable,omitempty"`
  27. OutputFormat string `json:"output_format,omitempty"`
  28. AigcWatermark bool `json:"aigc_watermark,omitempty"`
  29. }
  30. type StreamOptions struct {
  31. ExcludeAggregatedAudio bool `json:"exclude_aggregated_audio,omitempty"`
  32. }
  33. type VoiceSetting struct {
  34. VoiceID string `json:"voice_id"`
  35. Speed float64 `json:"speed,omitempty"`
  36. Vol float64 `json:"vol,omitempty"`
  37. Pitch int `json:"pitch,omitempty"`
  38. Emotion string `json:"emotion,omitempty"`
  39. TextNormalization bool `json:"text_normalization,omitempty"`
  40. LatexRead bool `json:"latex_read,omitempty"`
  41. }
  42. type PronunciationDict struct {
  43. Tone []string `json:"tone,omitempty"`
  44. }
  45. type AudioSetting struct {
  46. SampleRate int `json:"sample_rate,omitempty"`
  47. Bitrate int `json:"bitrate,omitempty"`
  48. Format string `json:"format,omitempty"`
  49. Channel int `json:"channel,omitempty"`
  50. ForceCbr bool `json:"force_cbr,omitempty"`
  51. }
  52. type TimbreWeight struct {
  53. VoiceID string `json:"voice_id"`
  54. Weight int `json:"weight"`
  55. }
  56. type VoiceModify struct {
  57. Pitch int `json:"pitch,omitempty"`
  58. Intensity int `json:"intensity,omitempty"`
  59. Timbre int `json:"timbre,omitempty"`
  60. SoundEffects string `json:"sound_effects,omitempty"`
  61. }
  62. type MiniMaxTTSResponse struct {
  63. Data MiniMaxTTSData `json:"data"`
  64. ExtraInfo MiniMaxExtraInfo `json:"extra_info"`
  65. TraceID string `json:"trace_id"`
  66. BaseResp MiniMaxBaseResp `json:"base_resp"`
  67. }
  68. type MiniMaxTTSData struct {
  69. Audio string `json:"audio"`
  70. Status int `json:"status"`
  71. }
  72. type MiniMaxExtraInfo struct {
  73. UsageCharacters int64 `json:"usage_characters"`
  74. }
  75. type MiniMaxBaseResp struct {
  76. StatusCode int64 `json:"status_code"`
  77. StatusMsg string `json:"status_msg"`
  78. }
  79. func getContentTypeByFormat(format string) string {
  80. contentTypeMap := map[string]string{
  81. "mp3": "audio/mpeg",
  82. "wav": "audio/wav",
  83. "flac": "audio/flac",
  84. "aac": "audio/aac",
  85. "pcm": "audio/pcm",
  86. }
  87. if ct, ok := contentTypeMap[format]; ok {
  88. return ct
  89. }
  90. return "audio/mpeg" // default to mp3
  91. }
  92. func handleTTSResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
  93. body, readErr := io.ReadAll(resp.Body)
  94. if readErr != nil {
  95. return nil, types.NewErrorWithStatusCode(
  96. fmt.Errorf("failed to read minimax response: %w", readErr),
  97. types.ErrorCodeReadResponseBodyFailed,
  98. http.StatusInternalServerError,
  99. )
  100. }
  101. defer resp.Body.Close()
  102. // Parse response
  103. var minimaxResp MiniMaxTTSResponse
  104. if unmarshalErr := json.Unmarshal(body, &minimaxResp); unmarshalErr != nil {
  105. return nil, types.NewErrorWithStatusCode(
  106. fmt.Errorf("failed to unmarshal minimax TTS response: %w", unmarshalErr),
  107. types.ErrorCodeBadResponseBody,
  108. http.StatusInternalServerError,
  109. )
  110. }
  111. // Check base_resp status code
  112. if minimaxResp.BaseResp.StatusCode != 0 {
  113. return nil, types.NewErrorWithStatusCode(
  114. fmt.Errorf("minimax TTS error: %d - %s", minimaxResp.BaseResp.StatusCode, minimaxResp.BaseResp.StatusMsg),
  115. types.ErrorCodeBadResponse,
  116. http.StatusBadRequest,
  117. )
  118. }
  119. // Check if we have audio data
  120. if minimaxResp.Data.Audio == "" {
  121. return nil, types.NewErrorWithStatusCode(
  122. fmt.Errorf("no audio data in minimax TTS response"),
  123. types.ErrorCodeBadResponse,
  124. http.StatusBadRequest,
  125. )
  126. }
  127. if strings.HasPrefix(minimaxResp.Data.Audio, "http") {
  128. c.Redirect(http.StatusFound, minimaxResp.Data.Audio)
  129. } else {
  130. // Handle hex-encoded audio data
  131. audioData, decodeErr := hex.DecodeString(minimaxResp.Data.Audio)
  132. if decodeErr != nil {
  133. return nil, types.NewErrorWithStatusCode(
  134. fmt.Errorf("failed to decode hex audio data: %w", decodeErr),
  135. types.ErrorCodeBadResponse,
  136. http.StatusInternalServerError,
  137. )
  138. }
  139. // Determine content type - default to mp3
  140. contentType := "audio/mpeg"
  141. c.Data(http.StatusOK, contentType, audioData)
  142. }
  143. usage = &dto.Usage{
  144. PromptTokens: info.GetEstimatePromptTokens(),
  145. CompletionTokens: 0,
  146. TotalTokens: int(minimaxResp.ExtraInfo.UsageCharacters),
  147. }
  148. return usage, nil
  149. }
  150. func handleChatCompletionResponse(c *gin.Context, resp *http.Response, info *relaycommon.RelayInfo) (usage any, err *types.NewAPIError) {
  151. body, readErr := io.ReadAll(resp.Body)
  152. if readErr != nil {
  153. return nil, types.NewErrorWithStatusCode(
  154. errors.New("failed to read minimax response"),
  155. types.ErrorCodeReadResponseBodyFailed,
  156. http.StatusInternalServerError,
  157. )
  158. }
  159. defer resp.Body.Close()
  160. // Set response headers
  161. for key, values := range resp.Header {
  162. for _, value := range values {
  163. c.Header(key, value)
  164. }
  165. }
  166. c.Data(resp.StatusCode, "application/json", body)
  167. return nil, nil
  168. }