| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- from pathlib import Path
- import subprocess
- from multiprocessing import Pool, cpu_count
- from tqdm import tqdm
- import random
- def convert_to_flac(src_file_path):
- dst_file_path = src_file_path.with_suffix(".flac")
- dst_file_path.parent.mkdir(parents=True, exist_ok=True)
- try:
- subprocess.check_call(
- ["ffmpeg", "-y", "-i", str(src_file_path), "-acodec", "flac", "-threads", "0", str(dst_file_path)],
- stdout=subprocess.DEVNULL,
- stderr=subprocess.DEVNULL,
- )
- # remove the input file
- src_file_path.unlink()
- return True
- except subprocess.CalledProcessError:
- return False
- if __name__ == "__main__":
- src_dir = Path("dataset/tts/WenetSpeech/cleaned")
- wav_files = list(src_dir.rglob("*.wav"))
- random.shuffle(wav_files)
- print(f"Found {len(wav_files)} wav files")
- success_counter = 0
- fail_counter = 0
- with Pool(processes=cpu_count(), maxtasksperchild=100) as pool:
- with tqdm(pool.imap_unordered(convert_to_flac, wav_files), total=len(wav_files)) as pbar:
- for success in pbar:
- if success:
- success_counter += 1
- else:
- fail_counter += 1
-
- pbar.set_description(f"Success: {success_counter}, Fail: {fail_counter}")
- print(f"Successfully converted: {success_counter}")
- print(f"Failed conversions: {fail_counter}")
|