utils.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. import io
  2. import wave
  3. from dataclasses import dataclass
  4. from typing import Literal, Optional, Tuple
  5. import numpy as np
  6. from fish_speech.text.chn_text_norm.text import Text as ChnNormedText
  7. @dataclass
  8. class InferenceResult:
  9. code: Literal["header", "segment", "error", "final"]
  10. audio: Optional[Tuple[int, np.ndarray]]
  11. error: Optional[Exception]
  12. def normalize_text(user_input: str, use_normalization: bool) -> str:
  13. """Normalize user input text if needed."""
  14. if use_normalization:
  15. return ChnNormedText(raw_text=user_input).normalize()
  16. else:
  17. return user_input
  18. def wav_chunk_header(
  19. sample_rate: int = 44100, bit_depth: int = 16, channels: int = 1
  20. ) -> np.ndarray:
  21. buffer = io.BytesIO()
  22. with wave.open(buffer, "wb") as wav_file:
  23. wav_file.setnchannels(channels)
  24. wav_file.setsampwidth(bit_depth // 8)
  25. wav_file.setframerate(sample_rate)
  26. wav_header_bytes = buffer.getvalue()
  27. buffer.close()
  28. # Convert to numpy array
  29. wav_header = np.frombuffer(wav_header_bytes, dtype=np.uint8)
  30. return wav_header