|
@@ -44,31 +44,30 @@ def task_generator_yaml(config):
|
|
|
|
|
|
|
|
logger.info(f"Found {len(grouped_files)} groups in {root}")
|
|
logger.info(f"Found {len(grouped_files)} groups in {root}")
|
|
|
for name, subset in grouped_files.items():
|
|
for name, subset in grouped_files.items():
|
|
|
- yield name, subset, source, languages, extension, None
|
|
|
|
|
|
|
+ yield name, subset, source, languages, extension
|
|
|
|
|
|
|
|
|
|
|
|
|
def task_generator_filelist(filelist):
|
|
def task_generator_filelist(filelist):
|
|
|
grouped_files = defaultdict(list)
|
|
grouped_files = defaultdict(list)
|
|
|
for filename, speaker, languages, text in load_filelist(filelist):
|
|
for filename, speaker, languages, text in load_filelist(filelist):
|
|
|
- if speaker in grouped_files:
|
|
|
|
|
- assert (
|
|
|
|
|
- languages == grouped_files[speaker][0][2]
|
|
|
|
|
- ), f"Speaker {speaker} has different languages"
|
|
|
|
|
-
|
|
|
|
|
grouped_files[speaker].append((Path(filename), text, languages))
|
|
grouped_files[speaker].append((Path(filename), text, languages))
|
|
|
|
|
|
|
|
logger.info(f"Found {len(grouped_files)} groups in {filelist}")
|
|
logger.info(f"Found {len(grouped_files)} groups in {filelist}")
|
|
|
for speaker, values in grouped_files.items():
|
|
for speaker, values in grouped_files.items():
|
|
|
- for filename, txt, languages in values:
|
|
|
|
|
- yield speaker, filename, "filelist", languages, None, txt
|
|
|
|
|
|
|
+ yield speaker, values, "filelist", languages, None
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_task(task):
|
|
def run_task(task):
|
|
|
- name, subset, source, languages, extension, text = task
|
|
|
|
|
|
|
+ name, subset, source, languages, extension = task
|
|
|
|
|
|
|
|
# Parse the files
|
|
# Parse the files
|
|
|
sentences = []
|
|
sentences = []
|
|
|
for file in subset:
|
|
for file in subset:
|
|
|
|
|
+ if isinstance(file, tuple):
|
|
|
|
|
+ file, text, languages = file
|
|
|
|
|
+ else:
|
|
|
|
|
+ text = None
|
|
|
|
|
+
|
|
|
np_file = file.with_suffix(".npy")
|
|
np_file = file.with_suffix(".npy")
|
|
|
if np_file.exists() is False:
|
|
if np_file.exists() is False:
|
|
|
logger.warning(f"Can't find {np_file}")
|
|
logger.warning(f"Can't find {np_file}")
|