import os.path import traceback import requests from feishu import FeiShuHelper from audio_process import get_wav from xunfei_asr import RequestApi from gpt_tag import get_tag from config import set_config from log import Log config_ = set_config() log_ = Log() def download_video(video_url, video_id, download_folder, ftype='mp4'): if not os.path.exists(download_folder): os.makedirs(download_folder) response = requests.get(video_url, stream=True) if response.status_code == 200: filename = f"{download_folder}/{video_id}.{ftype}" with open(filename, "wb") as video_file: for chunk in response.iter_content(chunk_size=8192): video_file.write(chunk) return filename def call_asr(audio_path): api = RequestApi(appid=config_.XFASR_CONFIG['appid'], secret_key=config_.XFASR_CONFIG['secret_key'], upload_file_path=audio_path) order_id = api.upload() result = api.get_result(order_id) asr_res = api.parse_lattice(result) dialogue_path = audio_path.replace('.wav', '.txt') with open(dialogue_path, 'w') as f: f.write(asr_res) return asr_res def main(sheet_info_config): video_spreadsheet_token = sheet_info_config['video_spreadsheet_token'] video_sheet_id = sheet_info_config['video_sheet_id'] read_start_row = sheet_info_config['read_start_row'] res_spreadsheet_token = sheet_info_config['res_spreadsheet_token'] res_sheet_id = sheet_info_config['res_sheet_id'] write_start_row = sheet_info_config['write_start_row'] write_start_col = sheet_info_config['write_start_col'] write_end_col = sheet_info_config['write_end_col'] # 1. 读取飞书表格,获取视频url和videoId feishu_helper = FeiShuHelper() data = feishu_helper.get_data(spreadsheet_token=video_spreadsheet_token, sheet_id=video_sheet_id) videos = [] for item in data[read_start_row:read_start_row+100]: if video_sheet_id == 'nz1pRo': videos.append( { 'videoId': item[1], 'url': item[2][0]['text'], 'title': item[6] } ) elif video_sheet_id == '3ba53c': videos.append( { 'videoId': item[0], 'url': item[1][0]['text'] } ) log_.info(f"videos count: {len(videos)}") result = [] for i, video in enumerate(videos): try: log_.info(f"i = {i}, video = {video}") # 2. 下载视频 video_id = video['videoId'] video_url = video['url'] video_path = download_video(video_url=video_url, video_id=video_id, download_folder='videos') print(video_path) log_.info(f"video_path = {video_path}") # 3. 获取视频中的音频 audio_path = get_wav(video_path=video_path) print(audio_path) log_.info(f"audio_path = {audio_path}") # 4. asr asr_res = call_asr(audio_path=audio_path) print(asr_res) log_.info(f"asr_res = {asr_res}") # 5. gpt产出结果 gpt_res = get_tag(text=asr_res) print(gpt_res) log_.info(f"gpt_res = {gpt_res}") if video_sheet_id == 'nz1pRo': result = [[video_id, video_url, video['title'], asr_res, gpt_res]] elif video_sheet_id == '3ba53c': result = [[video_id, video_url, asr_res, gpt_res]] log_.info(f"result = {result}") # 6. 结果写入飞书表格 if len(result) > 0: feishu_helper.data_to_feishu_sheet( sheet_token=res_spreadsheet_token, sheet_id=res_sheet_id, data=result, start_row=write_start_row, start_column=write_start_col, end_column=write_end_col ) log_.info(f"write to feishu success!") write_start_row += 1 except Exception as e: log_.error(e) log_.error(traceback.format_exc()) continue # 6. 结果写入飞书表格 # if len(result) > 0: # feishu_helper.data_to_feishu_sheet( # sheet_token=res_spreadsheet_token, # sheet_id=res_sheet_id, # data=result, # start_row=write_start_row, # start_column=write_start_col, # end_column=write_end_col # ) if __name__ == '__main__': # sheet_info = { # '每日标题审核记录': { # 'video_spreadsheet_token': 'shtcn1fmHJ2z0oc3j9OScBOlAbe', # 'video_sheet_id': 'nz1pRo', # 'read_start_row': 1, # 'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante', # 'res_sheet_id': '08d4cc', # 'write_start_row': 2, # 'write_start_col': 'A', # 'write_end_col': 'E' # }, # 'top 视频需要识别内容主题等信息': { # 'video_spreadsheet_token': 'shtcndUUt61ItHYp8C8goBp7Sah', # 'video_sheet_id': '3ba53c', # 'read_start_row': 1, # 'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante', # 'res_sheet_id': 'LErgi2', # 'write_start_row': 2, # 'write_start_col': 'A', # 'write_end_col': 'D' # } # } # # for sheet_tag, sheet_item in sheet_info.items(): # print(sheet_tag) # main(sheet_info_config=sheet_item) video_path = download_video( video_url='http://rescdn.yishihui.com/longvideo/video/vpc/20230420/22421791F3yZJNHSelDuvs04zd', video_id='001', download_folder='videos', ftype='mp4') print(video_path) # 3. 获取视频中的音频 audio_path = get_wav(video_path=video_path) print(audio_path) log_.info(f"audio_path = {audio_path}") # 4. asr asr_res = call_asr(audio_path=audio_path) print(asr_res) log_.info(f"asr_res = {asr_res}") # 5. gpt产出结果 gpt_res = get_tag(text=asr_res) print(gpt_res)