audio.go 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. package dto
  2. import (
  3. "encoding/json"
  4. "strings"
  5. "github.com/QuantumNous/new-api/types"
  6. "github.com/gin-gonic/gin"
  7. )
  8. type AudioRequest struct {
  9. Model string `json:"model"`
  10. Input string `json:"input"`
  11. Voice string `json:"voice"`
  12. Instructions string `json:"instructions,omitempty"`
  13. ResponseFormat string `json:"response_format,omitempty"`
  14. Speed *float64 `json:"speed,omitempty"`
  15. StreamFormat string `json:"stream_format,omitempty"`
  16. Metadata json.RawMessage `json:"metadata,omitempty"`
  17. // vllm-omini
  18. TaskType json.RawMessage `json:"task_type,omitempty"`
  19. Language json.RawMessage `json:"language,omitempty"`
  20. RefAudio json.RawMessage `json:"ref_audio,omitempty"`
  21. RefText json.RawMessage `json:"ref_text,omitempty"`
  22. XVectorOnlyMode json.RawMessage `json:"x_vector_only_mode,omitempty"`
  23. MaxNewTokens json.RawMessage `json:"max_new_tokens,omitempty"`
  24. InitialCodecChunkFrames json.RawMessage `json:"initial_codec_chunk_frames,omitempty"`
  25. // TODO:ensure that the logic remains correct after the stream is started.
  26. //Stream json.RawMessage `json:"stream,omitempty"`
  27. }
  28. func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
  29. meta := &types.TokenCountMeta{
  30. CombineText: r.Input,
  31. TokenType: types.TokenTypeTextNumber,
  32. }
  33. if strings.Contains(r.Model, "gpt") {
  34. meta.TokenType = types.TokenTypeTokenizer
  35. }
  36. return meta
  37. }
  38. func (r *AudioRequest) IsStream(c *gin.Context) bool {
  39. return r.StreamFormat == "sse"
  40. }
  41. func (r *AudioRequest) SetModelName(modelName string) {
  42. if modelName != "" {
  43. r.Model = modelName
  44. }
  45. }
  46. type AudioResponse struct {
  47. Text string `json:"text"`
  48. }
  49. type WhisperVerboseJSONResponse struct {
  50. Task string `json:"task,omitempty"`
  51. Language string `json:"language,omitempty"`
  52. Duration float64 `json:"duration,omitempty"`
  53. Text string `json:"text,omitempty"`
  54. Segments []Segment `json:"segments,omitempty"`
  55. }
  56. type Segment struct {
  57. Id int `json:"id"`
  58. Seek int `json:"seek"`
  59. Start float64 `json:"start"`
  60. End float64 `json:"end"`
  61. Text string `json:"text"`
  62. Tokens []int `json:"tokens"`
  63. Temperature float64 `json:"temperature"`
  64. AvgLogprob float64 `json:"avg_logprob"`
  65. CompressionRatio float64 `json:"compression_ratio"`
  66. NoSpeechProb float64 `json:"no_speech_prob"`
  67. }