liqian 1 năm trước cách đây
mục cha
commit
6b0719c7a3
1 tập tin đã thay đổi với 43 bổ sung4 xóa
  1. 43 4
      ai_tag_task.py

+ 43 - 4
ai_tag_task.py

@@ -17,7 +17,7 @@ log_ = Log()
 features = ['videoid', 'title', 'video_path']
 
 
-def get_video_ai_tags(video_id, video_file, video_info):
+def get_video_ai_tags(video_id, asr_file, video_info):
     try:
         st_time = time.time()
         log_message = {
@@ -26,8 +26,10 @@ def get_video_ai_tags(video_id, video_file, video_info):
         title = video_info.get('title')
         log_message['videoPath'] = video_info.get('video_path')
         log_message['title'] = video_info.get('title')
-        # 1. asr
-        asr_res_initial = get_whisper_asr(video=video_file)
+        # 1. 获取asr结果
+        # asr_res_initial = get_whisper_asr(video=video_file)
+        with open(asr_file, 'r', encoding='utf-8') as rf:
+            asr_res_initial = rf.read()
         log_message['asrRes'] = asr_res_initial
         # 2. 判断asr识别的文本是否有效
         validity = asr_validity_discrimination(text=asr_res_initial)
@@ -156,6 +158,43 @@ def ai_tags(project, table, dt):
                         shutil.rmtree(os.path.join(download_folder, video_id))
 
 
+def ai_tags_new(project, table, dt):
+    # 获取特征数据
+    feature_df = get_feature_data(project=project, table=table, dt=dt, features=features)
+    video_id_list = feature_df['videoid'].to_list()
+    video_info = {}
+    for video_id in video_id_list:
+        title = feature_df[feature_df['videoid'] == video_id]['title'].values[0]
+        video_path = feature_df[feature_df['videoid'] == video_id]['video_path'].values[0]
+        if title is None:
+            continue
+        title = title.strip()
+        if len(title) > 0:
+            video_info[video_id] = {'title': title, 'video_path': video_path}
+            # print(video_id, title)
+    print(len(video_info))
+    # 获取已asr识别的视频
+    asr_folder = 'asr_res'
+    retry = 0
+    while retry < 5:
+        asr_file_list = os.listdir(asr_folder)
+        if len(asr_file_list) < 2:
+            retry += 1
+            time.sleep(60)
+            continue
+
+        for asr_filename in asr_file_list:
+            video_id = asr_filename[:-4]
+            if video_id not in video_id_list:
+                continue
+            asr_file = os.path.join(asr_folder, asr_filename)
+            if video_info.get(video_id, None) is None:
+                os.remove(asr_file)
+            else:
+                get_video_ai_tags(video_id=video_id, asr_file=asr_file, video_info=video_info.get(video_id))
+                os.remove(asr_file)
+
+
 def timer_check():
     try:
         project = config_.DAILY_VIDEO['project']
@@ -168,7 +207,7 @@ def timer_check():
         if data_count > 0:
             print(f'videos count = {data_count}')
             # 数据准备好,进行视频下载
-            ai_tags(project=project, table=table, dt=dt)
+            ai_tags_new(project=project, table=table, dt=dt)
             print(f"videos ai tag finished!")
 
         else: