123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168 |
- import os
- import shutil
- import json
- import datetime
- import sys
- import time
- import traceback
- import requests
- import multiprocessing
- from threading import Timer
- from whisper_asr import get_whisper_asr
- from gpt_tag import request_gpt
- from config import set_config
- from log import Log
- config_ = set_config()
- log_ = Log()
- features = ['videoid', 'title', 'video_path']
- def get_asr(video_id, download_folder, asr_folder):
- video_folder = os.path.join(download_folder, video_id)
- for filename in os.listdir(video_folder):
- video_type = filename.split('.')[-1]
- if video_type in ['mp4', 'm3u8']:
- video_file = os.path.join(video_folder, filename)
-
- asr_res_initial = get_whisper_asr(video=video_file)
- print(video_id, asr_res_initial)
-
- asr_path = os.path.join(asr_folder, f"{video_id}.txt")
- with open(asr_path, 'w', encoding='utf-8') as wf:
- wf.write(asr_res_initial)
-
- shutil.rmtree(os.path.join(download_folder, video_id))
- break
- if __name__ == '__main__':
-
- cuda_id = sys.argv[1]
- download_folder = 'videos'
- download_folder = f'{download_folder}_{cuda_id}'
- if not os.path.exists(download_folder):
- print(f"download_folder: {download_folder} not exists!")
- exit(0)
-
- video_folder_list = os.listdir(download_folder)
- if len(video_folder_list) < 1:
- print(f"video_folder_list is empty!")
- exit(0)
- asr_folder = 'asr_res'
- if not os.path.exists(asr_folder):
- os.makedirs(asr_folder)
- pool = multiprocessing.Pool(processes=5)
- for video_id in video_folder_list:
- pool.apply_async(
- func=get_asr,
- args=(video_id, download_folder, asr_folder)
- )
- pool.close()
- pool.join()
- print(f"videos asr finished!")
|