datafileprocess.py 1.5 KB

1234567891011121314151617181920212223242526272829303132
  1. import os
  2. def geberateAudioPath(filepath,orifilepath):
  3. data_path = "/Users/tzld/Downloads/LJSpeech-1.1/wavs/"
  4. with open(filepath, 'w') as f:
  5. with open(orifilepath,'r') as f1:
  6. while True:
  7. content = f1.readline()
  8. if content =='':
  9. break
  10. # try:
  11. filename = content.split('|')[0].split('/')[-1]
  12. path = data_path+filename
  13. str = path + '|' + content.split('|')[1] + '|' + content.split('|')[2]
  14. f.write(str)
  15. # except:
  16. # print(content)
  17. if __name__ == '__main__':
  18. trainfilepath = './ljs_audiopaths_text_sid_train_filelist.txt'
  19. valfilepath = './ljs_audiopaths_text_sid_val_filelist.txt'
  20. targettrainfilepath = './ljs_audiopaths_text_sid_train_filelist_new.txt'
  21. targetvalfilepath = './ljs_audiopaths_text_sid_val_filelist_new.txt'
  22. geberateAudioPath(targettrainfilepath,trainfilepath)
  23. geberateAudioPath(targetvalfilepath,valfilepath)
  24. #
  25. #
  26. # origin_tarin_data_df["ID"] = origin_tarin_data_df.apply(lambda s:data_path + s['ID'].split('/')[-1],axis=1)
  27. # origin_val_data_df["ID"] = origin_tarin_data_df.apply(lambda s:data_path + s['ID'].split('/')[-1],axis=1)
  28. #
  29. # origin_tarin_data_df.to_csv("./ljs_audiopaths_text_sid_train_filelist_new.txt",sep="|",header=False,index=False)
  30. # origin_val_data_df.to_csv("./ljs_audiopaths_text_sid_val_filelist_new.txt",sep="|",header=False,index=False)