|
@@ -17,7 +17,7 @@ log_ = Log()
|
|
|
features = ['videoid', 'title', 'video_path']
|
|
|
|
|
|
|
|
|
-def get_video_ai_tags(video_id, video_file, video_info):
|
|
|
+def get_video_ai_tags(video_id, asr_file, video_info):
|
|
|
try:
|
|
|
st_time = time.time()
|
|
|
log_message = {
|
|
@@ -26,8 +26,10 @@ def get_video_ai_tags(video_id, video_file, video_info):
|
|
|
title = video_info.get('title')
|
|
|
log_message['videoPath'] = video_info.get('video_path')
|
|
|
log_message['title'] = video_info.get('title')
|
|
|
- # 1. asr
|
|
|
- asr_res_initial = get_whisper_asr(video=video_file)
|
|
|
+ # 1. 获取asr结果
|
|
|
+ # asr_res_initial = get_whisper_asr(video=video_file)
|
|
|
+ with open(asr_file, 'r', encoding='utf-8') as rf:
|
|
|
+ asr_res_initial = rf.read()
|
|
|
log_message['asrRes'] = asr_res_initial
|
|
|
# 2. 判断asr识别的文本是否有效
|
|
|
validity = asr_validity_discrimination(text=asr_res_initial)
|
|
@@ -156,6 +158,43 @@ def ai_tags(project, table, dt):
|
|
|
shutil.rmtree(os.path.join(download_folder, video_id))
|
|
|
|
|
|
|
|
|
+def ai_tags_new(project, table, dt):
|
|
|
+ # 获取特征数据
|
|
|
+ feature_df = get_feature_data(project=project, table=table, dt=dt, features=features)
|
|
|
+ video_id_list = feature_df['videoid'].to_list()
|
|
|
+ video_info = {}
|
|
|
+ for video_id in video_id_list:
|
|
|
+ title = feature_df[feature_df['videoid'] == video_id]['title'].values[0]
|
|
|
+ video_path = feature_df[feature_df['videoid'] == video_id]['video_path'].values[0]
|
|
|
+ if title is None:
|
|
|
+ continue
|
|
|
+ title = title.strip()
|
|
|
+ if len(title) > 0:
|
|
|
+ video_info[video_id] = {'title': title, 'video_path': video_path}
|
|
|
+ # print(video_id, title)
|
|
|
+ print(len(video_info))
|
|
|
+ # 获取已asr识别的视频
|
|
|
+ asr_folder = 'asr_res'
|
|
|
+ retry = 0
|
|
|
+ while retry < 5:
|
|
|
+ asr_file_list = os.listdir(asr_folder)
|
|
|
+ if len(asr_file_list) < 2:
|
|
|
+ retry += 1
|
|
|
+ time.sleep(60)
|
|
|
+ continue
|
|
|
+
|
|
|
+ for asr_filename in asr_file_list:
|
|
|
+ video_id = asr_filename[:-4]
|
|
|
+ if video_id not in video_id_list:
|
|
|
+ continue
|
|
|
+ asr_file = os.path.join(asr_folder, asr_filename)
|
|
|
+ if video_info.get(video_id, None) is None:
|
|
|
+ os.remove(asr_file)
|
|
|
+ else:
|
|
|
+ get_video_ai_tags(video_id=video_id, asr_file=asr_file, video_info=video_info.get(video_id))
|
|
|
+ os.remove(asr_file)
|
|
|
+
|
|
|
+
|
|
|
def timer_check():
|
|
|
try:
|
|
|
project = config_.DAILY_VIDEO['project']
|
|
@@ -168,7 +207,7 @@ def timer_check():
|
|
|
if data_count > 0:
|
|
|
print(f'videos count = {data_count}')
|
|
|
# 数据准备好,进行视频下载
|
|
|
- ai_tags(project=project, table=table, dt=dt)
|
|
|
+ ai_tags_new(project=project, table=table, dt=dt)
|
|
|
print(f"videos ai tag finished!")
|
|
|
|
|
|
else:
|