to_flac.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. from pathlib import Path
  2. import subprocess
  3. from multiprocessing import Pool, cpu_count
  4. from tqdm import tqdm
  5. import random
  6. def convert_to_flac(src_file_path):
  7. dst_file_path = src_file_path.with_suffix(".flac")
  8. dst_file_path.parent.mkdir(parents=True, exist_ok=True)
  9. try:
  10. subprocess.check_call(
  11. ["ffmpeg", "-y", "-i", str(src_file_path), "-acodec", "flac", "-threads", "0", str(dst_file_path)],
  12. stdout=subprocess.DEVNULL,
  13. stderr=subprocess.DEVNULL,
  14. )
  15. # remove the input file
  16. src_file_path.unlink()
  17. return True
  18. except subprocess.CalledProcessError:
  19. return False
  20. if __name__ == "__main__":
  21. src_dir = Path("dataset/tts/WenetSpeech/cleaned")
  22. wav_files = list(src_dir.rglob("*.wav"))
  23. random.shuffle(wav_files)
  24. print(f"Found {len(wav_files)} wav files")
  25. success_counter = 0
  26. fail_counter = 0
  27. with Pool(processes=cpu_count(), maxtasksperchild=100) as pool:
  28. with tqdm(pool.imap_unordered(convert_to_flac, wav_files), total=len(wav_files)) as pbar:
  29. for success in pbar:
  30. if success:
  31. success_counter += 1
  32. else:
  33. fail_counter += 1
  34. pbar.set_description(f"Success: {success_counter}, Fail: {fail_counter}")
  35. print(f"Successfully converted: {success_counter}")
  36. print(f"Failed conversions: {fail_counter}")