|
@@ -1,3 +1,4 @@
|
|
|
+import json
|
|
|
import traceback
|
|
|
|
|
|
from feishu import FeiShuHelper
|
|
@@ -46,79 +47,114 @@ def main(sheet_info_config):
|
|
|
# 1. 下载视频
|
|
|
video_id = video['videoId']
|
|
|
video_path = video['videoPath']
|
|
|
- video_file = download_video(video_path=video_path, video_id=video_id, download_folder='videos')
|
|
|
- # print(video_file)
|
|
|
- log_.info(f"video_path = {video_file}")
|
|
|
-
|
|
|
- # 2. 获取视频中的音频
|
|
|
- audio_path = get_wav(video_path=video_file)
|
|
|
- # print(audio_path)
|
|
|
- log_.info(f"audio_path = {audio_path}")
|
|
|
-
|
|
|
- # 3. asr
|
|
|
- dialogue_path, asr_res_initial = call_asr(audio_path=audio_path)
|
|
|
- # print(asr_res)
|
|
|
- log_.info(f"asr_res_initial = {asr_res_initial}")
|
|
|
-
|
|
|
- # 4. 判断asr识别的文本是否有效
|
|
|
- validity = asr_validity_discrimination(text=asr_res_initial)
|
|
|
- log_.info(f"validity = {validity}")
|
|
|
-
|
|
|
- # 5. 对asr结果进行清洗
|
|
|
- asr_res = asr_res_initial.strip().replace('\n', '')
|
|
|
- for stop_word in config_.STOP_WORDS:
|
|
|
- asr_res = asr_res.replace(stop_word, '')
|
|
|
- # token限制: 字数 <= 2500
|
|
|
- asr_res = asr_res[-2500:]
|
|
|
-
|
|
|
- # 6. gpt产出结果
|
|
|
- prompt = f"{config_.GPT_PROMPT['tags']['prompt4']}{asr_res.strip()}"
|
|
|
- gpt_res = get_tag(prompt=prompt)
|
|
|
- # print(gpt_res)
|
|
|
- log_.info(f"gpt_res = {gpt_res}")
|
|
|
-
|
|
|
- # 7. 结果写入飞书表格
|
|
|
- result = [[video_id, video_path, video['title'], str(validity), asr_res_initial, gpt_res, prompt]]
|
|
|
- log_.info(f"result = {result}")
|
|
|
- if len(result) > 0:
|
|
|
- feishu_helper.data_to_feishu_sheet(
|
|
|
- sheet_token=res_spreadsheet_token,
|
|
|
- sheet_id=res_sheet_id,
|
|
|
- data=result,
|
|
|
- start_row=write_start_row,
|
|
|
- start_column=write_start_col,
|
|
|
- end_column=write_end_col
|
|
|
- )
|
|
|
- log_.info(f"write to feishu success!")
|
|
|
- write_start_row += 1
|
|
|
+ if video_path[-4:] != '.mp4':
|
|
|
+ result = [[video_id, video_path, video['title'], '', '', '', '', '']]
|
|
|
+ log_.info(f"result = {result}")
|
|
|
+ if len(result) > 0:
|
|
|
+ feishu_helper.data_to_feishu_sheet(
|
|
|
+ sheet_token=res_spreadsheet_token,
|
|
|
+ sheet_id=res_sheet_id,
|
|
|
+ data=result,
|
|
|
+ start_row=write_start_row,
|
|
|
+ start_column=write_start_col,
|
|
|
+ end_column=write_end_col
|
|
|
+ )
|
|
|
+ log_.info(f"write to feishu success!")
|
|
|
+ write_start_row += 1
|
|
|
+ else:
|
|
|
+ try:
|
|
|
+ video_file = download_video(video_path=video_path, video_id=video_id, download_folder='videos')
|
|
|
+ # print(video_file)
|
|
|
+ log_.info(f"video_path = {video_file}")
|
|
|
+
|
|
|
+ # 2. 获取视频中的音频
|
|
|
+ audio_path = get_wav(video_path=video_file)
|
|
|
+ # print(audio_path)
|
|
|
+ log_.info(f"audio_path = {audio_path}")
|
|
|
+
|
|
|
+ # 3. asr
|
|
|
+ dialogue_path, asr_res_initial = call_asr(audio_path=audio_path)
|
|
|
+ # print(asr_res)
|
|
|
+ log_.info(f"asr_res_initial = {asr_res_initial}")
|
|
|
+ except:
|
|
|
+ log_.error(traceback.format_exc())
|
|
|
+ result = [[video_id, video_path, video['title'], '', '', '', '', '']]
|
|
|
+ log_.info(f"result = {result}")
|
|
|
+ if len(result) > 0:
|
|
|
+ feishu_helper.data_to_feishu_sheet(
|
|
|
+ sheet_token=res_spreadsheet_token,
|
|
|
+ sheet_id=res_sheet_id,
|
|
|
+ data=result,
|
|
|
+ start_row=write_start_row,
|
|
|
+ start_column=write_start_col,
|
|
|
+ end_column=write_end_col
|
|
|
+ )
|
|
|
+ log_.info(f"write to feishu success!")
|
|
|
+ write_start_row += 1
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 4. 判断asr识别的文本是否有效
|
|
|
+ validity = asr_validity_discrimination(text=asr_res_initial)
|
|
|
+ log_.info(f"validity = {validity}")
|
|
|
+ if validity is True:
|
|
|
+ # 5. 对asr结果进行清洗
|
|
|
+ asr_res = asr_res_initial.strip().replace('\n', '')
|
|
|
+ for stop_word in config_.STOP_WORDS:
|
|
|
+ asr_res = asr_res.replace(stop_word, '')
|
|
|
+ # token限制: 字数 <= 2500
|
|
|
+ asr_res = asr_res[-2500:]
|
|
|
+
|
|
|
+ # 6. gpt产出结果
|
|
|
+ prompt = f"{config_.GPT_PROMPT['tags']['prompt5']}{asr_res.strip()}"
|
|
|
+ gpt_res = get_tag(prompt=prompt)
|
|
|
+ # print(gpt_res)
|
|
|
+ log_.info(f"gpt_res = {gpt_res}, type = {type(gpt_res)}")
|
|
|
+
|
|
|
+ # 7. 结果写入飞书表格
|
|
|
+ if gpt_res is None:
|
|
|
+ result = [[video_id, video_path, video['title'], str(validity), asr_res_initial, prompt, '', '']]
|
|
|
+ else:
|
|
|
+ confidence_up_list = []
|
|
|
+ try:
|
|
|
+ for item in json.loads(gpt_res):
|
|
|
+ if item['confidence'] > 0.5:
|
|
|
+ confidence_up_list.append(item['category'])
|
|
|
+ except:
|
|
|
+ pass
|
|
|
+ confidence_up = ', '.join(confidence_up_list)
|
|
|
+ result = [[video_id, video_path, video['title'], str(validity), asr_res_initial,
|
|
|
+ prompt, gpt_res, confidence_up]]
|
|
|
+ else:
|
|
|
+ result = [[video_id, video_path, video['title'], str(validity), asr_res_initial, '', '', '']]
|
|
|
+ log_.info(f"result = {result}")
|
|
|
+ if len(result) > 0:
|
|
|
+ feishu_helper.data_to_feishu_sheet(
|
|
|
+ sheet_token=res_spreadsheet_token,
|
|
|
+ sheet_id=res_sheet_id,
|
|
|
+ data=result,
|
|
|
+ start_row=write_start_row,
|
|
|
+ start_column=write_start_col,
|
|
|
+ end_column=write_end_col
|
|
|
+ )
|
|
|
+ log_.info(f"write to feishu success!")
|
|
|
+ write_start_row += 1
|
|
|
except Exception as e:
|
|
|
log_.error(e)
|
|
|
log_.error(traceback.format_exc())
|
|
|
continue
|
|
|
|
|
|
- # 6. 结果写入飞书表格
|
|
|
- # if len(result) > 0:
|
|
|
- # feishu_helper.data_to_feishu_sheet(
|
|
|
- # sheet_token=res_spreadsheet_token,
|
|
|
- # sheet_id=res_sheet_id,
|
|
|
- # data=result,
|
|
|
- # start_row=write_start_row,
|
|
|
- # start_column=write_start_col,
|
|
|
- # end_column=write_end_col
|
|
|
- # )
|
|
|
-
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
sheet_info = {
|
|
|
'历史视频top5000回流倒叙排列': {
|
|
|
'video_spreadsheet_token': 'L4ywsRaV2hFLv1t4Athcdw71nde',
|
|
|
'video_sheet_id': 'hRjMrL',
|
|
|
- 'read_start_row': 2,
|
|
|
+ 'read_start_row': 44,
|
|
|
'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
|
|
|
'res_sheet_id': '7Fua00',
|
|
|
- 'write_start_row': 3,
|
|
|
+ 'write_start_row': 44,
|
|
|
'write_start_col': 'A',
|
|
|
- 'write_end_col': 'I'
|
|
|
+ 'write_end_col': 'H'
|
|
|
}
|
|
|
}
|
|
|
|