|
|
@@ -41,7 +41,7 @@ logger.add(sys.stderr, format=logger_format)
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
|
def get_model(
|
|
|
- config_name: str = "vqgan",
|
|
|
+ config_name: str = "vqgan_pretrain",
|
|
|
checkpoint_path: str = "checkpoints/vqgan/step_000380000.ckpt",
|
|
|
):
|
|
|
with initialize(version_base="1.3", config_path="../../fish_speech/configs"):
|
|
|
@@ -72,7 +72,9 @@ def process_batch(files: list[Path], model) -> float:
|
|
|
|
|
|
for file in files:
|
|
|
try:
|
|
|
- wav, sr = torchaudio.load(file)
|
|
|
+ wav, sr = torchaudio.load(
|
|
|
+ str(file), backend="sox"
|
|
|
+ ) # Need to install libsox-dev
|
|
|
except Exception as e:
|
|
|
logger.error(f"Error reading {file}: {e}")
|
|
|
continue
|
|
|
@@ -169,11 +171,10 @@ def main(
|
|
|
if filelist:
|
|
|
files = [i[0] for i in load_filelist(filelist)]
|
|
|
else:
|
|
|
- files = list_files(folder, AUDIO_EXTENSIONS, recursive=True, sort=True)
|
|
|
+ files = list_files(folder, AUDIO_EXTENSIONS, recursive=True, sort=False)
|
|
|
|
|
|
print(f"Found {len(files)} files")
|
|
|
- files = [Path(f) for f in files if not Path(f).with_suffix(".npy").exists()]
|
|
|
- Random(42).shuffle(files)
|
|
|
+ # files = [Path(f) for f in files if not Path(f).with_suffix(".npy").exists()]
|
|
|
|
|
|
total_files = len(files)
|
|
|
files = files[RANK::WORLD_SIZE]
|