gpt_process.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import os
  2. import time
  3. import requests
  4. import json
  5. import traceback
  6. from utils import download_video
  7. from audio_process import get_wav
  8. from xunfei_asr import call_asr
  9. from config import set_config
  10. from log import Log
  11. config_ = set_config()
  12. log_ = Log()
  13. def request_gpt(prompt):
  14. """
  15. headers = {
  16. 'Content-Type': 'application/json',
  17. 'Authorization': f'Bearer {config_.GPT_OPENAI_API_KEY}',
  18. }
  19. proxies = config_.PROXIES
  20. json_data = {
  21. 'model': 'gpt-3.5-turbo',
  22. 'messages': [
  23. {
  24. 'role': 'user',
  25. 'content': prompt,
  26. },
  27. ],
  28. }
  29. response = requests.post(url=config_.GPT_HOST, headers=headers, json=json_data, proxies=proxies)
  30. """
  31. retry_count = 0
  32. result_content = None
  33. while retry_count < config_.RETRY_MAX_COUNT:
  34. retry_count += 1
  35. try:
  36. # response = requests.post(url=config_.GPT_URL, json={'content': prompt, 'auth': config_.GPT_OPENAI_API_KEY})
  37. response = requests.post(url=config_.GPT_URL, json={'content': prompt})
  38. # print(response.json())
  39. # print(response.json()['choices'][0]['message']['content'])
  40. # print('\n')
  41. # result_content = response.json()['choices'][0]['message']['content']
  42. # log_.info(f"response.text: {response.text}")
  43. res_data = json.loads(response.text)
  44. if res_data['code'] != 0:
  45. time.sleep(10)
  46. continue
  47. result_content = res_data['data']['choices'][0]['message']['content']
  48. except Exception:
  49. time.sleep(10)
  50. continue
  51. return result_content
  52. def title_generate(video_id, video_path):
  53. """
  54. 视频生成标题
  55. :param video_id: videoId
  56. :param video_path: videoPath
  57. :return:
  58. """
  59. generate_filepath = dict()
  60. # 1. 下载视频
  61. # log_.info(f"debug: title_generate 1")
  62. video_file_path = download_video(video_path=video_path, video_id=video_id, download_folder='videos')
  63. generate_filepath['video_file_path'] = video_file_path
  64. # log_.info({'videoId': video_id, 'video_file_path': video_file_path})
  65. # 2. 获取视频中的音频
  66. # log_.info(f"debug: title_generate 2")
  67. audio_path = get_wav(video_path=video_file_path)
  68. generate_filepath['audio_path'] = audio_path
  69. # log_.info({'videoId': video_id, 'audio_path': audio_path})
  70. # 3. asr
  71. # log_.info(f"debug: title_generate 3")
  72. dialogue_path, asr_res = call_asr(audio_path=audio_path)
  73. generate_filepath['dialogue_path'] = dialogue_path
  74. log_.info({
  75. 'asrResult': {'videoId': video_id, 'asrRes': asr_res}
  76. })
  77. # 4. gpt产出结果
  78. # log_.info(f"debug: title_generate 4")
  79. # 对asr结果进行清洗
  80. asr_res = asr_res.strip().replace('\n', '')
  81. for stop_word in config_.STOP_WORDS:
  82. asr_res = asr_res.replace(stop_word, '')
  83. # token限制: 字数 <= 2500
  84. asr_res = asr_res[-2500:]
  85. prompt = f"{config_.GPT_PROMPT['title']['prompt2']}{asr_res}"
  86. gpt_res = request_gpt(prompt=prompt)
  87. return gpt_res, generate_filepath
  88. if __name__ == '__main__':
  89. title_generate(video_id='001', video_path='')