audio.go 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960
  1. package dto
  2. import (
  3. "github.com/QuantumNous/new-api/types"
  4. "github.com/gin-gonic/gin"
  5. )
  6. type AudioRequest struct {
  7. Model string `json:"model"`
  8. Input string `json:"input"`
  9. Voice string `json:"voice"`
  10. Instructions string `json:"instructions,omitempty"`
  11. ResponseFormat string `json:"response_format,omitempty"`
  12. Speed float64 `json:"speed,omitempty"`
  13. StreamFormat string `json:"stream_format,omitempty"`
  14. }
  15. func (r *AudioRequest) GetTokenCountMeta() *types.TokenCountMeta {
  16. meta := &types.TokenCountMeta{
  17. CombineText: r.Input,
  18. TokenType: types.TokenTypeTextNumber,
  19. }
  20. return meta
  21. }
  22. func (r *AudioRequest) IsStream(c *gin.Context) bool {
  23. return false
  24. }
  25. func (r *AudioRequest) SetModelName(modelName string) {
  26. if modelName != "" {
  27. r.Model = modelName
  28. }
  29. }
  30. type AudioResponse struct {
  31. Text string `json:"text"`
  32. }
  33. type WhisperVerboseJSONResponse struct {
  34. Task string `json:"task,omitempty"`
  35. Language string `json:"language,omitempty"`
  36. Duration float64 `json:"duration,omitempty"`
  37. Text string `json:"text,omitempty"`
  38. Segments []Segment `json:"segments,omitempty"`
  39. }
  40. type Segment struct {
  41. Id int `json:"id"`
  42. Seek int `json:"seek"`
  43. Start float64 `json:"start"`
  44. End float64 `json:"end"`
  45. Text string `json:"text"`
  46. Tokens []int `json:"tokens"`
  47. Temperature float64 `json:"temperature"`
  48. AvgLogprob float64 `json:"avg_logprob"`
  49. CompressionRatio float64 `json:"compression_ratio"`
  50. NoSpeechProb float64 `json:"no_speech_prob"`
  51. }