main_process.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import os.path
  2. import traceback
  3. import requests
  4. from feishu import FeiShuHelper
  5. from audio_process import get_wav
  6. from xunfei_asr import RequestApi
  7. from gpt_tag import get_tag
  8. from config import set_config
  9. from log import Log
  10. config_ = set_config()
  11. log_ = Log()
  12. def download_video(video_url, video_id, download_folder, ftype='mp4'):
  13. if not os.path.exists(download_folder):
  14. os.makedirs(download_folder)
  15. response = requests.get(video_url, stream=True)
  16. if response.status_code == 200:
  17. filename = f"{download_folder}/{video_id}.{ftype}"
  18. with open(filename, "wb") as video_file:
  19. for chunk in response.iter_content(chunk_size=8192):
  20. video_file.write(chunk)
  21. return filename
  22. def call_asr(audio_path):
  23. api = RequestApi(appid=config_.XFASR_CONFIG['appid'],
  24. secret_key=config_.XFASR_CONFIG['secret_key'],
  25. upload_file_path=audio_path)
  26. order_id = api.upload()
  27. result = api.get_result(order_id)
  28. asr_res = api.parse_lattice(result)
  29. dialogue_path = audio_path.replace('.wav', '.txt')
  30. with open(dialogue_path, 'w') as f:
  31. f.write(asr_res)
  32. return asr_res
  33. def main(sheet_info_config):
  34. video_spreadsheet_token = sheet_info_config['video_spreadsheet_token']
  35. video_sheet_id = sheet_info_config['video_sheet_id']
  36. read_start_row = sheet_info_config['read_start_row']
  37. res_spreadsheet_token = sheet_info_config['res_spreadsheet_token']
  38. res_sheet_id = sheet_info_config['res_sheet_id']
  39. write_start_row = sheet_info_config['write_start_row']
  40. write_start_col = sheet_info_config['write_start_col']
  41. write_end_col = sheet_info_config['write_end_col']
  42. # 1. 读取飞书表格,获取视频url和videoId
  43. feishu_helper = FeiShuHelper()
  44. data = feishu_helper.get_data(spreadsheet_token=video_spreadsheet_token, sheet_id=video_sheet_id)
  45. videos = []
  46. for item in data[read_start_row:read_start_row+100]:
  47. if video_sheet_id == 'nz1pRo':
  48. videos.append(
  49. {
  50. 'videoId': item[1],
  51. 'url': item[2][0]['text'],
  52. 'title': item[6]
  53. }
  54. )
  55. elif video_sheet_id == '3ba53c':
  56. videos.append(
  57. {
  58. 'videoId': item[0],
  59. 'url': item[1][0]['text']
  60. }
  61. )
  62. log_.info(f"videos count: {len(videos)}")
  63. result = []
  64. for i, video in enumerate(videos):
  65. try:
  66. log_.info(f"i = {i}, video = {video}")
  67. # 2. 下载视频
  68. video_id = video['videoId']
  69. video_url = video['url']
  70. video_path = download_video(video_url=video_url, video_id=video_id, download_folder='videos')
  71. print(video_path)
  72. log_.info(f"video_path = {video_path}")
  73. # 3. 获取视频中的音频
  74. audio_path = get_wav(video_path=video_path)
  75. print(audio_path)
  76. log_.info(f"audio_path = {audio_path}")
  77. # 4. asr
  78. asr_res = call_asr(audio_path=audio_path)
  79. print(asr_res)
  80. log_.info(f"asr_res = {asr_res}")
  81. # 5. gpt产出结果
  82. gpt_res = get_tag(text=asr_res)
  83. print(gpt_res)
  84. log_.info(f"gpt_res = {gpt_res}")
  85. if video_sheet_id == 'nz1pRo':
  86. result = [[video_id, video_url, video['title'], asr_res, gpt_res]]
  87. elif video_sheet_id == '3ba53c':
  88. result = [[video_id, video_url, asr_res, gpt_res]]
  89. log_.info(f"result = {result}")
  90. # 6. 结果写入飞书表格
  91. if len(result) > 0:
  92. feishu_helper.data_to_feishu_sheet(
  93. sheet_token=res_spreadsheet_token,
  94. sheet_id=res_sheet_id,
  95. data=result,
  96. start_row=write_start_row,
  97. start_column=write_start_col,
  98. end_column=write_end_col
  99. )
  100. log_.info(f"write to feishu success!")
  101. write_start_row += 1
  102. except Exception as e:
  103. log_.error(e)
  104. log_.error(traceback.format_exc())
  105. continue
  106. # 6. 结果写入飞书表格
  107. # if len(result) > 0:
  108. # feishu_helper.data_to_feishu_sheet(
  109. # sheet_token=res_spreadsheet_token,
  110. # sheet_id=res_sheet_id,
  111. # data=result,
  112. # start_row=write_start_row,
  113. # start_column=write_start_col,
  114. # end_column=write_end_col
  115. # )
  116. if __name__ == '__main__':
  117. # sheet_info = {
  118. # '每日标题审核记录': {
  119. # 'video_spreadsheet_token': 'shtcn1fmHJ2z0oc3j9OScBOlAbe',
  120. # 'video_sheet_id': 'nz1pRo',
  121. # 'read_start_row': 1,
  122. # 'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
  123. # 'res_sheet_id': '08d4cc',
  124. # 'write_start_row': 2,
  125. # 'write_start_col': 'A',
  126. # 'write_end_col': 'E'
  127. # },
  128. # 'top 视频需要识别内容主题等信息': {
  129. # 'video_spreadsheet_token': 'shtcndUUt61ItHYp8C8goBp7Sah',
  130. # 'video_sheet_id': '3ba53c',
  131. # 'read_start_row': 1,
  132. # 'res_spreadsheet_token': 'DkiUsqwJ6hmBxstBYyEcNE4ante',
  133. # 'res_sheet_id': 'LErgi2',
  134. # 'write_start_row': 2,
  135. # 'write_start_col': 'A',
  136. # 'write_end_col': 'D'
  137. # }
  138. # }
  139. #
  140. # for sheet_tag, sheet_item in sheet_info.items():
  141. # print(sheet_tag)
  142. # main(sheet_info_config=sheet_item)
  143. video_path = download_video(
  144. video_url='http://rescdn.yishihui.com/longvideo/video/vpc/20230420/22421791F3yZJNHSelDuvs04zd',
  145. video_id='001', download_folder='videos', ftype='mp4')
  146. print(video_path)
  147. # 3. 获取视频中的音频
  148. audio_path = get_wav(video_path=video_path)
  149. print(audio_path)
  150. log_.info(f"audio_path = {audio_path}")
  151. # 4. asr
  152. asr_res = call_asr(audio_path=audio_path)
  153. print(asr_res)
  154. log_.info(f"asr_res = {asr_res}")
  155. # 5. gpt产出结果
  156. gpt_res = get_tag(text=asr_res)
  157. print(gpt_res)