sunxy 1 рік тому
батько
коміт
fa78e38c3c
4 змінених файлів з 19 додано та 16 видалено
  1. 4 2
      asr_task.py
  2. 1 1
      asr_task.sh
  3. 10 10
      requirements.txt
  4. 4 3
      whisper_asr.py

+ 4 - 2
asr_task.py

@@ -11,6 +11,7 @@ from threading import Timer
 from whisper_asr import get_whisper_asr
 from gpt_tag import request_gpt
 from config import set_config
+from audio_process import get_wav
 from log import Log
 config_ = set_config()
 log_ = Log()
@@ -23,15 +24,16 @@ def get_asr(video_id, download_folder, asr_folder):
         video_type = filename.split('.')[-1]
         if video_type in ['mp4', 'm3u8']:
             video_file = os.path.join(video_folder, filename)
+            audio_path = get_wav(video_file)
             # 1. asr识别
-            asr_res_initial = get_whisper_asr(video=video_file)
+            asr_res_initial = get_whisper_asr(audio=audio_path)
             print(video_id, asr_res_initial)
             # 2. 识别结果写入文件
             asr_path = os.path.join(asr_folder, f"{video_id}.txt")
             with open(asr_path, 'w', encoding='utf-8') as wf:
                 wf.write(asr_res_initial)
             # 将处理过的视频进行删除
-            shutil.rmtree(os.path.join(download_folder, video_id))
+            # shutil.rmtree(os.path.join(download_folder, video_id))
             break
 
 

+ 1 - 1
asr_task.sh

@@ -2,7 +2,7 @@ ps -ef | grep asr_task.py | grep -v grep | awk '{print $2}' | xargs kill -9
 
 rm -r asr_res/
 
-source activate whisper
+source activate aigc-test
 
 CUDA_VISIBLE_DEVICES=0 python asr_task.py 0 > logs/asr_task_0.log &
 CUDA_VISIBLE_DEVICES=1 python asr_task.py 1 > logs/asr_task_1.log

+ 10 - 10
requirements.txt

@@ -1,10 +1,10 @@
-pyodps==0.10.7
-moviepy==1.0.3
-requests==2.31.0
-openai_whisper==20230314
-zhconv==1.4.3
-oss2==2.14.0
-pandas==1.1.3
-aliyun_python_sdk==2.2.0
-odps==3.5.1
-whisper==1.1.10
+pyodps
+moviepy
+requests
+openai_whisper
+zhconv
+oss2
+pandas
+aliyun_python_sdk
+odps
+git+https://github.com/openai/whisper.git

+ 4 - 3
whisper_asr.py

@@ -2,13 +2,14 @@ import whisper
 from zhconv import convert
 
 
-def get_whisper_asr(video, model='medium'):
+def get_whisper_asr(audio, model='medium'):
     try:
         model = whisper.load_model(model)
-        result = model.transcribe(video, language='Chinese')
+        result = model.transcribe(audio, language='Chinese')
         # 简繁转换
         # zh-cn 大陆简体
         # zh-hant 繁体
         return convert(result['text'], 'zh-cn')
-    except:
+    except Exception as e:
+        print(f'asr error: {audio}. e: {e}')
         return ''