1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- import os
- import time
- import requests
- import json
- import traceback
- from utils import download_video
- from audio_process import get_wav
- from xunfei_asr import call_asr
- from config import set_config
- from log import Log
- config_ = set_config()
- log_ = Log()
- def request_gpt(prompt):
- """
- headers = {
- 'Content-Type': 'application/json',
- 'Authorization': f'Bearer {config_.GPT_OPENAI_API_KEY}',
- }
- proxies = config_.PROXIES
- json_data = {
- 'model': 'gpt-3.5-turbo',
- 'messages': [
- {
- 'role': 'user',
- 'content': prompt,
- },
- ],
- }
- response = requests.post(url=config_.GPT_HOST, headers=headers, json=json_data, proxies=proxies)
- """
- retry_count = 0
- result_content = None
- while retry_count < config_.RETRY_MAX_COUNT:
- retry_count += 1
- try:
- # response = requests.post(url=config_.GPT_URL, json={'content': prompt, 'auth': config_.GPT_OPENAI_API_KEY})
- response = requests.post(url=config_.GPT_URL, json={'content': prompt})
- # print(response.json())
- # print(response.json()['choices'][0]['message']['content'])
- # print('\n')
- # result_content = response.json()['choices'][0]['message']['content']
- # log_.info(f"response.text: {response.text}")
- res_data = json.loads(response.text)
- if res_data['code'] != 0:
- time.sleep(10)
- continue
- result_content = res_data['data']['choices'][0]['message']['content']
- except Exception:
- time.sleep(10)
- continue
- return result_content
- def title_generate(video_id, video_path):
- """
- 视频生成标题
- :param video_id: videoId
- :param video_path: videoPath
- :return:
- """
- generate_filepath = dict()
- # 1. 下载视频
- # log_.info(f"debug: title_generate 1")
- video_file_path = download_video(video_path=video_path, video_id=video_id, download_folder='videos')
- generate_filepath['video_file_path'] = video_file_path
- # log_.info({'videoId': video_id, 'video_file_path': video_file_path})
- # 2. 获取视频中的音频
- # log_.info(f"debug: title_generate 2")
- audio_path = get_wav(video_path=video_file_path)
- generate_filepath['audio_path'] = audio_path
- # log_.info({'videoId': video_id, 'audio_path': audio_path})
- # 3. asr
- # log_.info(f"debug: title_generate 3")
- dialogue_path, asr_res = call_asr(audio_path=audio_path)
- generate_filepath['dialogue_path'] = dialogue_path
- log_.info({
- 'asrResult': {'videoId': video_id, 'asrRes': asr_res}
- })
- # 4. gpt产出结果
- # log_.info(f"debug: title_generate 4")
- # 对asr结果进行清洗
- asr_res = asr_res.strip().replace('\n', '')
- for stop_word in config_.STOP_WORDS:
- asr_res = asr_res.replace(stop_word, '')
- # token限制: 字数 <= 2500
- asr_res = asr_res[-2500:]
- prompt = f"{config_.GPT_PROMPT['title']['prompt2']}{asr_res}"
- gpt_res = request_gpt(prompt=prompt)
- return gpt_res, generate_filepath
- if __name__ == '__main__':
- title_generate(video_id='001', video_path='')
|