merge_asr_files.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import os
  2. from pathlib import Path
  3. from pydub import AudioSegment
  4. from tqdm import tqdm
  5. from fish_speech.utils.file import AUDIO_EXTENSIONS, list_files
  6. def merge_and_delete_files(save_dir, original_files):
  7. save_path = Path(save_dir)
  8. audio_slice_files = list_files(
  9. path=save_dir, extensions=AUDIO_EXTENSIONS.union([".lab"]), recursive=True
  10. )
  11. audio_files = {}
  12. label_files = {}
  13. for file_path in tqdm(audio_slice_files, desc="Merging audio files"):
  14. rel_path = Path(file_path).relative_to(save_path)
  15. (save_path / rel_path.parent).mkdir(parents=True, exist_ok=True)
  16. if file_path.suffix == ".wav":
  17. prefix = rel_path.parent / file_path.stem.rsplit("-", 1)[0]
  18. if prefix == rel_path.parent / file_path.stem:
  19. continue
  20. audio = AudioSegment.from_wav(file_path)
  21. if prefix in audio_files.keys():
  22. audio_files[prefix] = audio_files[prefix] + audio
  23. else:
  24. audio_files[prefix] = audio
  25. elif file_path.suffix == ".lab":
  26. prefix = rel_path.parent / file_path.stem.rsplit("-", 1)[0]
  27. if prefix == rel_path.parent / file_path.stem:
  28. continue
  29. with open(file_path, "r", encoding="utf-8") as f:
  30. label = f.read()
  31. if prefix in label_files.keys():
  32. label_files[prefix] = label_files[prefix] + ", " + label
  33. else:
  34. label_files[prefix] = label
  35. for prefix, audio in audio_files.items():
  36. output_audio_path = save_path / f"{prefix}.wav"
  37. audio.export(output_audio_path, format="wav")
  38. for prefix, label in label_files.items():
  39. output_label_path = save_path / f"{prefix}.lab"
  40. with open(output_label_path, "w", encoding="utf-8") as f:
  41. f.write(label)
  42. for file_path in original_files:
  43. os.remove(file_path)
  44. if __name__ == "__main__":
  45. merge_and_delete_files("/made/by/spicysama/laziman", [__file__])