Browse Source

changeAsr

sunxy 11 months ago
parent
commit
e1dab34bca
1 changed files with 36 additions and 28 deletions
  1. 36 28
      ai_tag_task.py

+ 36 - 28
ai_tag_task.py

@@ -21,7 +21,7 @@ log_ = Log()
 features = ['videoid', 'title', 'video_path']
 
 
-def get_video_ai_tags(video_id, asr_file, video_info):
+def get_video_ai_tags(video_id, video_info):
     try:
         st_time = time.time()
         log_message = {
@@ -31,9 +31,7 @@ def get_video_ai_tags(video_id, asr_file, video_info):
         log_message['videoPath'] = video_info.get('video_path')
         log_message['title'] = video_info.get('title')
         # 1. 获取asr结果
-        # asr_res_initial = get_whisper_asr(video=video_file)
-        with open(asr_file, 'r', encoding='utf-8') as rf:
-            asr_res_initial = rf.read()
+        asr_res_initial = video_info.get('asr_res', '')
         log_message['asrRes'] = asr_res_initial
         # 2. 判断asr识别的文本是否有效
         validity = asr_validity_discrimination(text=asr_res_initial)
@@ -198,6 +196,32 @@ def ai_tags(project, table, dt):
                         shutil.rmtree(os.path.join(download_folder, video_id))
 
 
+def get_asr_res(video_id):
+    # URL of the API endpoint
+    url = 'http://61.48.133.26:5999/video_to_text'
+
+    # Headers for the request
+    headers = {
+        'Content-Type': 'application/json'
+    }
+
+    # Data to be sent in the request
+    data = {
+        "video_id": f"{video_id}"
+    }
+
+    # Making the POST request
+    response = requests.post(url, headers=headers, json=data)
+
+    # Checking if the request was successful
+    if response.status_code == 200:
+        # Extracting the 'text' field from the JSON response
+        result_text = response.json().get('text', '无内容')
+        return result_text
+    else:
+        return '无内容'
+
+
 def ai_tags_new(project, table, dt):
     # 获取特征数据
     feature_df = get_feature_data(
@@ -212,31 +236,15 @@ def ai_tags_new(project, table, dt):
         if title is None:
             continue
         title = title.strip()
-        if len(title) > 0:
-            video_info[video_id] = {'title': title, 'video_path': video_path}
-            # print(video_id, title)
-    print(len(video_info))
-    # 获取已asr识别的视频
-    asr_folder = 'asr_res'
-    retry = 0
-    while retry < 30:
-        asr_file_list = os.listdir(asr_folder)
-        if len(asr_file_list) < 1:
-            retry += 1
-            time.sleep(60)
+        if len(title) < 1:
             continue
-        retry = 0
-        for asr_filename in asr_file_list:
-            video_id = asr_filename[:-4]
-            if video_id not in video_id_list:
-                continue
-            asr_file = os.path.join(asr_folder, asr_filename)
-            if video_info.get(video_id, None) is None:
-                os.remove(asr_file)
-            else:
-                get_video_ai_tags(
-                    video_id=video_id, asr_file=asr_file, video_info=video_info.get(video_id))
-                os.remove(asr_file)
+        # 获取asr结果
+        asr_res = get_asr_res(video_id)
+
+        video_info[video_id] = {'title': title,
+                                'video_path': video_path, 'asr_res': asr_res}
+        get_video_ai_tags(video_id=video_id,
+                          video_info=video_info.get(video_id))
 
 
 def timer_check():