123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- import json
- import traceback
- from feishu import FeiShuHelper
- from audio_process import get_wav
- from xunfei_asr import call_asr
- from utils import download_video, asr_validity_discrimination
- from gpt_tag import get_tag, request_gpt
- from config import set_config
- from log import Log
- config_ = set_config()
- log_ = Log()
- def main(sheet_info_config):
- video_spreadsheet_token = sheet_info_config['video_spreadsheet_token']
- video_sheet_id = sheet_info_config['video_sheet_id']
- read_start_row = sheet_info_config['read_start_row']
- res_spreadsheet_token = sheet_info_config['res_spreadsheet_token']
- res_sheet_id = sheet_info_config['res_sheet_id']
- write_start_row = sheet_info_config['write_start_row']
- write_start_col = sheet_info_config['write_start_col']
- write_end_col = sheet_info_config['write_end_col']
-
- feishu_helper = FeiShuHelper()
- data = feishu_helper.get_data(spreadsheet_token=video_spreadsheet_token, sheet_id=video_sheet_id)
- videos = []
- for item in data[read_start_row:]:
-
-
- try:
- videos.append(
- {
- 'videoId': item[0],
- 'title': item[3],
- 'asrRes': item[2],
- }
- )
- except:
- continue
- log_.info(f"videos count: {len(videos)}")
- result = []
- for i, video in enumerate(videos):
- try:
- log_.info(f"i = {i}, video = {video}")
- asr_res_initial = video['asrRes']
- title = video['title']
-
- validity = asr_validity_discrimination(text=asr_res_initial)
- log_.info(f"validity = {validity}")
- if validity is True:
-
- asr_res = asr_res_initial.strip().replace('\n', '')
- for stop_word in config_.STOP_WORDS:
- asr_res = asr_res.replace(stop_word, '')
-
- asr_res = asr_res[-2500:]
-
-
- prompt1 = f"{config_.GPT_PROMPT['tags']['prompt6']}{asr_res.strip()}"
-
- gpt_res1 = request_gpt(prompt=prompt1)
-
- log_.info(f"gpt_res1 = {gpt_res1}, type = {type(gpt_res1)}")
- if gpt_res1 is None:
- result = [[str(validity), prompt1, '', '', '', '', '']]
- else:
- result = [[str(validity), prompt1, gpt_res1]]
-
- try:
- gpt_res1_json = json.loads(gpt_res1)
- summary = gpt_res1_json['summary']
- keywords = gpt_res1_json['keywords']
- result[0].extend([summary, str(keywords)])
- prompt2_param = f"标题:{title}\n概况:{summary}\n关键词:{keywords}"
- prompt2 = f"{config_.GPT_PROMPT['tags']['prompt7']}{prompt2_param}"
- log_.info(f"prompt2: {prompt2}")
- gpt_res2 = request_gpt(prompt=prompt2)
- log_.info(f"gpt_res2 = {gpt_res2}, type = {type(gpt_res2)}")
-
- if gpt_res2 is None:
- result[0].extend(['', '', ''])
- else:
- confidence_up_list = []
- try:
- for item in json.loads(gpt_res2):
- if item['confidence'] > 0.5:
- confidence_up_list.append(item['category'])
- except:
- pass
- confidence_up = ', '.join(confidence_up_list)
- result[0].extend([prompt2, gpt_res2, confidence_up])
- except:
- result[0].extend(['', '', '', '', ''])
- else:
- result = [[str(validity), '', '', '', '', '', '', '']]
- log_.info(f"result = {result}")
- if len(result) > 0:
- feishu_helper.update_values(
- sheet_token=res_spreadsheet_token,
- sheet_id=res_sheet_id,
- data=result,
- start_row=write_start_row,
- start_column=write_start_col,
- end_column=write_end_col
- )
- log_.info(f"write to feishu success!")
- write_start_row += 1
- except Exception as e:
- log_.error(e)
- log_.error(traceback.format_exc())
- continue
- if __name__ == '__main__':
- sheet_info = {
- 'top100新promt-0605': {
- 'video_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
- 'video_sheet_id': 'tbd971',
- 'read_start_row': 1,
- 'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
- 'res_sheet_id': 'tbd971',
- 'write_start_row': 2,
- 'write_start_col': 'E',
- 'write_end_col': 'L'
- }
- }
- for sheet_tag, sheet_item in sheet_info.items():
- print(sheet_tag)
- main(sheet_info_config=sheet_item)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
|