1 year ago · e1dab34bca
--- a/ai_tag_task.py
+++ b/ai_tag_task.py
@@ -21,7 +21,7 @@ log_ = Log()
 
				 features = ['videoid', 'title', 'video_path']
			
 
				 
			
 
				 
			
 
				-def get_video_ai_tags(video_id, asr_file, video_info):
			
 
				+def get_video_ai_tags(video_id, video_info):
			
 
				     try:
			
 
				         st_time = time.time()
			
 
				         log_message = {
			
@@ -31,9 +31,7 @@ def get_video_ai_tags(video_id, asr_file, video_info):
 
				         log_message['videoPath'] = video_info.get('video_path')
			
 
				         log_message['title'] = video_info.get('title')
			
 
				         # 1. 获取asr结果
			
 
				-        # asr_res_initial = get_whisper_asr(video=video_file)
			
 
				-        with open(asr_file, 'r', encoding='utf-8') as rf:
			
 
				-            asr_res_initial = rf.read()
			
 
				+        asr_res_initial = video_info.get('asr_res', '')
			
 
				         log_message['asrRes'] = asr_res_initial
			
 
				         # 2. 判断asr识别的文本是否有效
			
 
				         validity = asr_validity_discrimination(text=asr_res_initial)
			
@@ -198,6 +196,32 @@ def ai_tags(project, table, dt):
 
				                         shutil.rmtree(os.path.join(download_folder, video_id))
			
 
				 
			
 
				 
			
 
				+def get_asr_res(video_id):
			
 
				+    # URL of the API endpoint
			
 
				+    url = 'http://61.48.133.26:5999/video_to_text'
			
 
				+
			
 
				+    # Headers for the request
			
 
				+    headers = {
			
 
				+        'Content-Type': 'application/json'
			
 
				+    }
			
 
				+
			
 
				+    # Data to be sent in the request
			
 
				+    data = {
			
 
				+        "video_id": f"{video_id}"
			
 
				+    }
			
 
				+
			
 
				+    # Making the POST request
			
 
				+    response = requests.post(url, headers=headers, json=data)
			
 
				+
			
 
				+    # Checking if the request was successful
			
 
				+    if response.status_code == 200:
			
 
				+        # Extracting the 'text' field from the JSON response
			
 
				+        result_text = response.json().get('text', '无内容')
			
 
				+        return result_text
			
 
				+    else:
			
 
				+        return '无内容'
			
 
				+
			
 
				+
			
 
				 def ai_tags_new(project, table, dt):
			
 
				     # 获取特征数据
			
 
				     feature_df = get_feature_data(
			
@@ -212,31 +236,15 @@ def ai_tags_new(project, table, dt):
 
				         if title is None:
			
 
				             continue
			
 
				         title = title.strip()
			
 
				-        if len(title) > 0:
			
 
				-            video_info[video_id] = {'title': title, 'video_path': video_path}
			
 
				-            # print(video_id, title)
			
 
				-    print(len(video_info))
			
 
				-    # 获取已asr识别的视频
			
 
				-    asr_folder = 'asr_res'
			
 
				-    retry = 0
			
 
				-    while retry < 30:
			
 
				-        asr_file_list = os.listdir(asr_folder)
			
 
				-        if len(asr_file_list) < 1:
			
 
				-            retry += 1
			
 
				-            time.sleep(60)
			
 
				+        if len(title) < 1:
			
 
				             continue
			
 
				-        retry = 0
			
 
				-        for asr_filename in asr_file_list:
			
 
				-            video_id = asr_filename[:-4]
			
 
				-            if video_id not in video_id_list:
			
 
				-                continue
			
 
				-            asr_file = os.path.join(asr_folder, asr_filename)
			
 
				-            if video_info.get(video_id, None) is None:
			
 
				-                os.remove(asr_file)
			
 
				-            else:
			
 
				-                get_video_ai_tags(
			
 
				-                    video_id=video_id, asr_file=asr_file, video_info=video_info.get(video_id))
			
 
				-                os.remove(asr_file)
			
 
				+        # 获取asr结果
			
 
				+        asr_res = get_asr_res(video_id)
			
 
				+
			
 
				+        video_info[video_id] = {'title': title,
			
 
				+                                'video_path': video_path, 'asr_res': asr_res}
			
 
				+        get_video_ai_tags(video_id=video_id,
			
 
				+                          video_info=video_info.get(video_id))
			
 
				 
			
 
				 
			
 
				 def timer_check():