123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171 |
- import os.path
- import traceback
- import requests
- from feishu import FeiShuHelper
- from audio_process import get_wav
- from xunfei_asr import RequestApi
- from gpt_tag import get_tag
- from config import set_config
- from log import Log
- config_ = set_config()
- log_ = Log()
- def download_video(video_url, video_id, download_folder, ftype='mp4'):
- if not os.path.exists(download_folder):
- os.makedirs(download_folder)
- response = requests.get(video_url, stream=True)
- if response.status_code == 200:
- filename = f"{download_folder}/{video_id}.{ftype}"
- with open(filename, "wb") as video_file:
- for chunk in response.iter_content(chunk_size=8192):
- video_file.write(chunk)
- return filename
- def call_asr(audio_path):
- api = RequestApi(appid=config_.XFASR_CONFIG['appid'],
- secret_key=config_.XFASR_CONFIG['secret_key'],
- upload_file_path=audio_path)
- order_id = api.upload()
- result = api.get_result(order_id)
- asr_res = api.parse_lattice(result)
- dialogue_path = audio_path.replace('.wav', '.txt')
- with open(dialogue_path, 'w') as f:
- f.write(asr_res)
- return asr_res
- def main(sheet_info_config):
- video_spreadsheet_token = sheet_info_config['video_spreadsheet_token']
- video_sheet_id = sheet_info_config['video_sheet_id']
- read_start_row = sheet_info_config['read_start_row']
- res_spreadsheet_token = sheet_info_config['res_spreadsheet_token']
- res_sheet_id = sheet_info_config['res_sheet_id']
- write_start_row = sheet_info_config['write_start_row']
- write_start_col = sheet_info_config['write_start_col']
- write_end_col = sheet_info_config['write_end_col']
- # 1. 读取飞书表格,获取视频url和videoId
- feishu_helper = FeiShuHelper()
- data = feishu_helper.get_data(spreadsheet_token=video_spreadsheet_token, sheet_id=video_sheet_id)
- videos = []
- for item in data[read_start_row:read_start_row+100]:
- if video_sheet_id == 'nz1pRo':
- videos.append(
- {
- 'videoId': item[1],
- 'url': item[2][0]['text'],
- 'title': item[6]
- }
- )
- elif video_sheet_id == '3ba53c':
- videos.append(
- {
- 'videoId': item[0],
- 'url': item[1][0]['text']
- }
- )
- log_.info(f"videos count: {len(videos)}")
- result = []
- for i, video in enumerate(videos):
- try:
- log_.info(f"i = {i}, video = {video}")
- # 2. 下载视频
- video_id = video['videoId']
- video_url = video['url']
- video_path = download_video(video_url=video_url, video_id=video_id, download_folder='videos')
- print(video_path)
- log_.info(f"video_path = {video_path}")
- # 3. 获取视频中的音频
- audio_path = get_wav(video_path=video_path)
- print(audio_path)
- log_.info(f"audio_path = {audio_path}")
- # 4. asr
- asr_res = call_asr(audio_path=audio_path)
- print(asr_res)
- log_.info(f"asr_res = {asr_res}")
- # 5. gpt产出结果
- gpt_res = get_tag(text=asr_res)
- print(gpt_res)
- log_.info(f"gpt_res = {gpt_res}")
- if video_sheet_id == 'nz1pRo':
- result = [[video_id, video_url, video['title'], asr_res, gpt_res]]
- elif video_sheet_id == '3ba53c':
- result = [[video_id, video_url, asr_res, gpt_res]]
- log_.info(f"result = {result}")
- # 6. 结果写入飞书表格
- if len(result) > 0:
- feishu_helper.data_to_feishu_sheet(
- sheet_token=res_spreadsheet_token,
- sheet_id=res_sheet_id,
- data=result,
- start_row=write_start_row,
- start_column=write_start_col,
- end_column=write_end_col
- )
- log_.info(f"write to feishu success!")
- write_start_row += 1
- except Exception as e:
- log_.error(e)
- log_.error(traceback.format_exc())
- continue
- # 6. 结果写入飞书表格
- # if len(result) > 0:
- # feishu_helper.data_to_feishu_sheet(
- # sheet_token=res_spreadsheet_token,
- # sheet_id=res_sheet_id,
- # data=result,
- # start_row=write_start_row,
- # start_column=write_start_col,
- # end_column=write_end_col
- # )
- if __name__ == '__main__':
- # sheet_info = {
- # '每日标题审核记录': {
- # 'video_spreadsheet_token': 'shtcn1fmHJ2z0oc3j9OScBOlAbe',
- # 'video_sheet_id': 'nz1pRo',
- # 'read_start_row': 1,
- # 'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
- # 'res_sheet_id': '08d4cc',
- # 'write_start_row': 2,
- # 'write_start_col': 'A',
- # 'write_end_col': 'E'
- # },
- # 'top 视频需要识别内容主题等信息': {
- # 'video_spreadsheet_token': 'shtcndUUt61ItHYp8C8goBp7Sah',
- # 'video_sheet_id': '3ba53c',
- # 'read_start_row': 1,
- # 'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
- # 'res_sheet_id': 'LErgi2',
- # 'write_start_row': 2,
- # 'write_start_col': 'A',
- # 'write_end_col': 'D'
- # }
- # }
- #
- # for sheet_tag, sheet_item in sheet_info.items():
- # print(sheet_tag)
- # main(sheet_info_config=sheet_item)
- video_path = download_video(
- video_url='http://rescdn.yishihui.com/longvideo/video/vpc/20230420/22421791F3yZJNHSelDuvs04zd',
- video_id='001', download_folder='videos', ftype='mp4')
- print(video_path)
- # 3. 获取视频中的音频
- audio_path = get_wav(video_path=video_path)
- print(audio_path)
- log_.info(f"audio_path = {audio_path}")
- # 4. asr
- asr_res = call_asr(audio_path=audio_path)
- print(asr_res)
- log_.info(f"asr_res = {asr_res}")
- # 5. gpt产出结果
- gpt_res = get_tag(text=asr_res)
- print(gpt_res)
|