algorithm
/
aigc-test


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
							import json
import traceback

from feishu import FeiShuHelper
from audio_process import get_wav
from xunfei_asr import call_asr
from utils import download_video, asr_validity_discrimination
from gpt_tag import get_tag, request_gpt
from config import set_config
from log import Log
config_ = set_config()
log_ = Log()


def main(sheet_info_config):
    video_spreadsheet_token = sheet_info_config['video_spreadsheet_token']
    video_sheet_id = sheet_info_config['video_sheet_id']
    read_start_row = sheet_info_config['read_start_row']
    res_spreadsheet_token = sheet_info_config['res_spreadsheet_token']
    res_sheet_id = sheet_info_config['res_sheet_id']
    write_start_row = sheet_info_config['write_start_row']
    write_start_col = sheet_info_config['write_start_col']
    write_end_col = sheet_info_config['write_end_col']

    # 1. 读取飞书表格，获取视频url和videoId
    feishu_helper = FeiShuHelper()
    data = feishu_helper.get_data(spreadsheet_token=video_spreadsheet_token, sheet_id=video_sheet_id)
    videos = []
    for item in data[read_start_row:]:
        print(item)
        try:
            videos.append(
                {
                    'videoId': item[0],
                    'title': item[1],
                    'videoPath': item[3][0]['text'],
                }
            )
        except:
            continue
    log_.info(f"videos count: {len(videos)}")

    result = []
    for i, video in enumerate(videos):
        try:
            log_.info(f"i = {i}, video = {video}")
            # 1. 下载视频
            video_id = video['videoId']
            video_path = video['videoPath']
            if video_path[-4:] != '.mp4':
                result = [[video_id, video_path, video['title'], '', '', '', '', '']]
                log_.info(f"result = {result}")
                if len(result) > 0:
                    feishu_helper.data_to_feishu_sheet(
                        sheet_token=res_spreadsheet_token,
                        sheet_id=res_sheet_id,
                        data=result,
                        start_row=write_start_row,
                        start_column=write_start_col,
                        end_column=write_end_col
                    )
                    log_.info(f"write to feishu success!")
                    write_start_row += 1
            else:
                try:
                    video_file = download_video(video_path=video_path, video_id=video_id, download_folder='videos')
                    # print(video_file)
                    log_.info(f"video_path = {video_file}")

                    # 2. 获取视频中的音频
                    audio_path = get_wav(video_path=video_file)
                    # print(audio_path)
                    log_.info(f"audio_path = {audio_path}")

                    # 3. asr
                    dialogue_path, asr_res_initial = call_asr(audio_path=audio_path)
                    # print(asr_res)
                    log_.info(f"asr_res_initial = {asr_res_initial}")
                except:
                    log_.error(traceback.format_exc())
                    result = [[video_id, video_path, video['title'], '', '', '', '', '']]
                    log_.info(f"result = {result}")
                    if len(result) > 0:
                        feishu_helper.data_to_feishu_sheet(
                            sheet_token=res_spreadsheet_token,
                            sheet_id=res_sheet_id,
                            data=result,
                            start_row=write_start_row,
                            start_column=write_start_col,
                            end_column=write_end_col
                        )
                        log_.info(f"write to feishu success!")
                        write_start_row += 1
                    continue

                # 4. 判断asr识别的文本是否有效
                validity = asr_validity_discrimination(text=asr_res_initial)
                log_.info(f"validity = {validity}")
                if validity is True:
                    # 5. 对asr结果进行清洗
                    asr_res = asr_res_initial.strip().replace('\n', '')
                    for stop_word in config_.STOP_WORDS:
                        asr_res = asr_res.replace(stop_word, '')
                    # token限制: 字数 <= 2500
                    asr_res = asr_res[-2500:]

                    # 6. gpt产出结果
                    prompt = f"{config_.GPT_PROMPT['tags']['prompt5']}{asr_res.strip()}"
                    # gpt_res = get_tag(prompt=prompt)
                    gpt_res = request_gpt(prompt=prompt)
                    # print(gpt_res)
                    log_.info(f"gpt_res = {gpt_res}, type = {type(gpt_res)}")

                    # 7. 结果写入飞书表格
                    if gpt_res is None:
                        result = [[video_id, video_path, video['title'], str(validity), asr_res_initial, prompt, '', '']]
                    else:
                        confidence_up_list = []
                        try:
                            for item in json.loads(gpt_res):
                                if item['confidence'] > 0.5:
                                    confidence_up_list.append(item['category'])
                        except:
                            pass
                        confidence_up = ', '.join(confidence_up_list)
                        result = [[video_id, video_path, video['title'], str(validity), asr_res_initial,
                                   prompt, gpt_res, confidence_up]]
                else:
                    result = [[video_id, video_path, video['title'], str(validity), asr_res_initial, '', '', '']]
                log_.info(f"result = {result}")
                if len(result) > 0:
                    feishu_helper.data_to_feishu_sheet(
                        sheet_token=res_spreadsheet_token,
                        sheet_id=res_sheet_id,
                        data=result,
                        start_row=write_start_row,
                        start_column=write_start_col,
                        end_column=write_end_col
                    )
                    log_.info(f"write to feishu success!")
                    write_start_row += 1
        except Exception as e:
            log_.error(e)
            log_.error(traceback.format_exc())
            continue


if __name__ == '__main__':
    sheet_info = {
        '历史视频top5000回流倒叙排列': {
            'video_spreadsheet_token': 'L4ywsRaV2hFLv1t4Athcdw71nde',
            'video_sheet_id': 'hRjMrL',
            'read_start_row': 1456,
            'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
            'res_sheet_id': '7Fua00',
            'write_start_row': 1455,
            'write_start_col': 'A',
            'write_end_col': 'H'
        }
    }

    for sheet_tag, sheet_item in sheet_info.items():
        print(sheet_tag)
        main(sheet_info_config=sheet_item)

    # video_path = download_video(
    #     video_url='http://rescdn.yishihui.com/longvideo/video/vpc/20230420/22421791F3yZJNHSelDuvs04zd',
    #     video_id='001', download_folder='videos', ftype='mp4')
    # print(video_path)
    # # 3. 获取视频中的音频
    # audio_path = get_wav(video_path=video_path)
    # print(audio_path)
    # log_.info(f"audio_path = {audio_path}")
    # # 4. asr
    # asr_res = call_asr(audio_path=audio_path)
    # print(asr_res)
    # log_.info(f"asr_res = {asr_res}")
    # # 5. gpt产出结果
    # gpt_res = get_tag(text=asr_res)
    # print(gpt_res)