Explorar o código

Fix utf-8 encoding

Lengyue %!s(int64=2) %!d(string=hai) anos
pai
achega
1854897e3c
Modificáronse 1 ficheiros con 1 adicións e 1 borrados
  1. 1 1
      preparing_data/whisper_asr.py

+ 1 - 1
preparing_data/whisper_asr.py

@@ -142,7 +142,7 @@ def main(folder: str, rank: int, world_size: int, num_workers: int):
 
 
         # Write to file
         # Write to file
         for file, transcription in zip(batch, trascriptions):
         for file, transcription in zip(batch, trascriptions):
-            Path(file).with_suffix(".whisper.txt").write_text(transcription)
+            Path(file).with_suffix(".whisper.txt").write_text(transcription, encoding="utf-8")
 
 
     logger.info(
     logger.info(
         f"{RANK_STR}Finished processing {len(files)} files, {total_time / 3600:.2f} hours of audio"
         f"{RANK_STR}Finished processing {len(files)} files, {total_time / 3600:.2f} hours of audio"