create_train_split.py 832 B

1234567891011121314151617181920212223242526272829303132333435
  1. from pathlib import Path
  2. from random import Random
  3. import click
  4. from tqdm import tqdm
  5. from fish_speech.utils.file import AUDIO_EXTENSIONS, list_files
  6. @click.command()
  7. @click.argument("root", type=click.Path(exists=True, path_type=Path))
  8. def main(root):
  9. files = list_files(root, AUDIO_EXTENSIONS, recursive=True)
  10. print(f"Found {len(files)} files")
  11. files = [
  12. str(file.relative_to(root))
  13. for file in tqdm(files)
  14. if file.with_suffix(".npy").exists()
  15. ]
  16. print(f"Found {len(files)} files with features")
  17. Random(42).shuffle(files)
  18. with open(root / "vq_train_filelist.txt", "w") as f:
  19. f.write("\n".join(files[:-100]))
  20. with open(root / "vq_val_filelist.txt", "w") as f:
  21. f.write("\n".join(files[-100:]))
  22. print("Done")
  23. if __name__ == "__main__":
  24. main()