nrfx_carry_video.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341
  1. import html
  2. import json
  3. import os
  4. import random
  5. import re
  6. import time
  7. import uuid
  8. import requests
  9. from datetime import datetime
  10. from urllib.parse import urlparse, parse_qs
  11. from loguru import logger
  12. from common import Oss, Feishu, AliyunLogger, Material
  13. from common.download_video import DownLoad
  14. from common.ffmpeg import FFmpeg
  15. from common.google_ai_studio import GoogleAI
  16. from common.gpt4o_mini_help import GPT4oMini
  17. from common.redis import in_carry_video_data
  18. from common.sql_help import sqlCollect
  19. from common.tag_video import Tag
  20. from common.tts_help import TTS
  21. from data_channel.piaoquan import PQ
  22. class NrfxCarryViode:
  23. def get_text_dy_video(self,url):
  24. max_retries = 3
  25. retry_count = 0
  26. while retry_count < max_retries:
  27. try:
  28. if "&vid=" in url:
  29. parsed_url = urlparse(url)
  30. params = parse_qs(parsed_url.query)
  31. video_id = params.get('vid', [None])[0]
  32. elif "?modal_id=" in url:
  33. parsed_url = urlparse(url)
  34. params = parse_qs(parsed_url.query)
  35. video_id = params.get('modal_id', [None])[0]
  36. else:
  37. headers = {
  38. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
  39. 'q=0.8,application/signed-exchange;v=b3;q=0.7',
  40. 'Accept-Language': 'zh-CN,zh;q=0.9',
  41. 'Cache-Control': 'no-cache',
  42. 'Pragma': 'no-cache',
  43. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  44. 'Chrome/127.0.0.0 Safari/537.36',
  45. }
  46. response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout=30)
  47. location = response.headers.get('Location', None)
  48. video_id = re.search(r'/video/(\d+)/?', location.split('?')[0] if location else url).group(1)
  49. url = "http://8.217.192.46:8889/crawler/dou_yin/detail"
  50. if not video_id or not video_id.strip():
  51. return None, None, None
  52. payload = json.dumps({
  53. "content_id": str(video_id)
  54. })
  55. headers = {
  56. 'Content-Type': 'application/json'
  57. }
  58. response = requests.request("POST", url, headers=headers, data=payload, timeout= 60)
  59. response = response.json()
  60. code = response["code"]
  61. if code == 0:
  62. data = response["data"]["data"]
  63. video_url = data["video_url_list"][0]["video_url"]
  64. original_title = data["title"]
  65. return video_url, original_title, video_id
  66. if code == 22002:
  67. if '抖音内容已被删除或无法访问' in response['msg']:
  68. return "作品不存在", None, None
  69. except Exception as e:
  70. retry_count += 1
  71. logger.error(f"[+] 抖音{url}获取视频链接失败,失败信息{e}")
  72. time.sleep(1)
  73. return None, None, None
  74. def get_text_ks_video(self,url):
  75. try:
  76. headers = {
  77. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
  78. 'q=0.8,application/signed-exchange;v=b3;q=0.7',
  79. 'Accept-Language': 'zh-CN,zh;q=0.9',
  80. 'Cache-Control': 'no-cache',
  81. 'Pragma': 'no-cache',
  82. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  83. 'Chrome/127.0.0.0 Safari/537.36',
  84. }
  85. response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout= 30)
  86. location = response.headers.get('Location', None)
  87. video_id = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
  88. location.split('?')[0] if location else url).group(2)
  89. url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
  90. if not video_id or not video_id.strip():
  91. return None, None, None
  92. payload = json.dumps({
  93. "content_id": str(video_id)
  94. })
  95. headers = {
  96. 'Content-Type': 'application/json'
  97. }
  98. time.sleep(random.uniform(10, 30))
  99. response = requests.request("POST", url, headers=headers, data=payload, timeout= 30)
  100. response = response.json()
  101. code = response["code"]
  102. if code == 0:
  103. data = response["data"]["data"]
  104. content_type = data['content_type']
  105. if content_type == 'note':
  106. return "note","note"
  107. video_url = data["video_url_list"][0]["video_url"]
  108. original_title = data["title"]
  109. return video_url, original_title, video_id
  110. elif code == 27006:
  111. if "作品不存在" in response['msg'] or "不存在" in response['msg'] or "私密作品" in response['msg']:
  112. return "作品不存在", None, None
  113. time.sleep(3)
  114. except Exception as e:
  115. logger.error(f"[+] 快手{url}获取视频链接失败,失败信息{e}")
  116. return None, None,None
  117. def insert_pq(self, data, oss_object_key, title, cover):
  118. logger.info(f"[+] 开始写入票圈")
  119. code = PQ.insert_piaoquantv(oss_object_key, title, '50322062')
  120. if not code:
  121. logger.error(f"[+] 写入票圈后台失败")
  122. text = (
  123. f"**渠道**: {data['channel']}\n"
  124. f"**内容**: {data}\n"
  125. f"**失败信息**: 写入票圈后台失败\n"
  126. )
  127. Feishu.finish_bot(text,
  128. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  129. f"【 内容理解-{data['channel']}失败通知 】")
  130. return
  131. logger.info(f"[+] 写入票圈成功,返回视频id{code}")
  132. tag_status = Tag.video_tag(code, "lev-供给,rol-机器,#str-搬运改造内容理解引导语实验_60")
  133. Tag.video_tag(data["videoid"], "lev-供给,rol-机器,#str-搬运改造内容理解引导语base_61")
  134. if tag_status == 0:
  135. logger.info(f"[+] 写入标签成功,后台视频ID为{code}")
  136. try:
  137. current_time = datetime.now()
  138. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  139. sqlCollect.insert_machine_making_data(data["channel"], data["name"], data["name"],
  140. data["videoid"], data["videoid"], "50322062",
  141. title,
  142. code,
  143. formatted_time, data["title_category"], oss_object_key)
  144. pq_url = f'https://admin.piaoquantv.com/cms/post-detail/{code}/detail' # 站内视频链接
  145. values = [
  146. [
  147. data["videoid"],
  148. code,
  149. data["channel"],
  150. data["dt"],
  151. formatted_time,
  152. pq_url
  153. ]
  154. ]
  155. Feishu.insert_columns("R4dLsce8Jhz9oCtDMr9ccpFHnbI", '1Ycd37', "ROWS", 1, 2)
  156. time.sleep(0.5)
  157. Feishu.update_values("R4dLsce8Jhz9oCtDMr9ccpFHnbI", '1Ycd37', "A2:Z2", values)
  158. logger.info(f"[+] 写入飞书成功")
  159. return
  160. except Exception as e:
  161. logger.error(f"[+] 写入飞书失败{e}")
  162. return
  163. def main(self, data, file_path, GEMINI_API_KEY):
  164. REDIS_NAME = 'task:carry_redis_by_nrfx'
  165. video_id = data["videoid"]
  166. AliyunLogger.logging(data["type"],"内容分析", data["channel"], video_id, "扫描到一条视频", "2001", str(data))
  167. AliyunLogger.logging(data["type"],"内容分析", data["channel"], video_id, "符合规则等待改造", "2004", str(data))
  168. logger.info(f"[+] 获取{video_id}的视频链接")
  169. video_path, cover_path, old_title = PQ.get_pq_oss_path(video_id)
  170. if not video_path:
  171. AliyunLogger.logging(data["type"], "内容分析", data["channel"], video_id, "没有获取到视频链接", "3001",
  172. str(data))
  173. text = (
  174. f"**渠道**: {data['channel']}\n"
  175. f"**内容**: {data}\n"
  176. f"**失败信息**: 没有获取到视频链接\n"
  177. )
  178. Feishu.finish_bot(text,
  179. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  180. f"【 内容理解-{data['channel']}失败通知 】")
  181. return
  182. video_url = f"http://rescdn.yishihui.com/{video_path}"
  183. video_path = DownLoad.download_video(video_url, file_path, '', video_id)
  184. if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
  185. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  186. logger.error(f"[+] {video_url}下载失败")
  187. AliyunLogger.logging(data["type"], "内容分析", data["channel"], video_id, "视频下载失败等待重新处理", "3002",
  188. str(data))
  189. text = (
  190. f"**渠道**: {data['channel']}\n"
  191. f"**内容**: {data}\n"
  192. f"**失败信息**: 视频下载失败\n"
  193. )
  194. Feishu.finish_bot(text,
  195. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  196. f"【 内容理解-{data['channel']}失败通知 】")
  197. return
  198. logger.info(f"[+] {video_url}视频下载成功")
  199. logger.info(f"[+] {video_url}开始处理标题")
  200. logger.info(f"[+] 视频更改分辨率处理成功")
  201. logger.info(f"[+] 内容分析-开始获取视频口播内容")
  202. video_text = GoogleAI.run(GEMINI_API_KEY, video_path)
  203. if not video_text:
  204. AliyunLogger.logging(data["type"], "内容分析", data["channel"], video_id, "内容分析,获取口播文案失败",
  205. "3003",
  206. str(data))
  207. text = (
  208. f"**渠道**: {data['channel']}\n"
  209. f"**内容**: {data}\n"
  210. f"**失败信息**: 获取口播文案失败\n"
  211. )
  212. Feishu.finish_bot(text,
  213. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  214. f"【 内容理解-{data['channel']}失败通知 】")
  215. return
  216. logger.info(f"[+] 内容分析-开始获取AI片尾")
  217. pw_srt_text = GPT4oMini.get_content_understanding_pw(video_text)
  218. pw_url = TTS.get_pw_zm(pw_srt_text, 'zhifeng_emo')
  219. if not pw_url:
  220. logger.error(f"[+] 内容分析-片尾获取失败")
  221. data["transform_rule"] = "仅改造"
  222. AliyunLogger.logging(data["type"], "内容分析", data["channel"], video_id, "内容分析,片尾获取失败",
  223. "3003",
  224. str(data))
  225. text = (
  226. f"**渠道**: {data['channel']}\n"
  227. f"**内容**: {data}\n"
  228. f"**失败信息**: 片尾获取失败\n"
  229. )
  230. Feishu.finish_bot(text,
  231. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  232. f"【 内容理解-{data['channel']}失败通知 】")
  233. return
  234. logger.info(f"[+] 内容分析-片尾获取成功")
  235. pw_srt = TTS.getSrt(pw_url)
  236. if not pw_srt:
  237. AliyunLogger.logging(data["type"], "内容分析", data["channel"], video_id, "内容分析,片尾音频获取失败",
  238. "3003",
  239. str(data))
  240. text = (
  241. f"**渠道**: {data['channel']}\n"
  242. f"**内容**: {data}\n"
  243. f"**失败信息**: 片尾音频获取失败\n"
  244. )
  245. Feishu.finish_bot(text,
  246. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  247. f"【 内容理解-{data['channel']}失败通知 】")
  248. return
  249. pw_mp3_path = TTS.download_mp3(pw_url, file_path)
  250. if not pw_mp3_path:
  251. AliyunLogger.logging(data["type"], "内容分析", data["channel"], video_id, "内容分析,片尾音频下载失败",
  252. "3003",
  253. str(data))
  254. text = (
  255. f"**渠道**: {data['channel']}\n"
  256. f"**内容**: {data}\n"
  257. f"**失败信息**: 片尾音频下载失败\n"
  258. )
  259. Feishu.finish_bot(text,
  260. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  261. f"【 内容理解-{data['channel']}失败通知 】")
  262. return
  263. logger.info(f"[+] 内容分析-片尾音频下载成功")
  264. logger.info(f"[+] 内容分析-片尾获取最后一帧成功")
  265. jpg_path = FFmpeg.video_png(video_path, file_path) # 生成视频最后一帧jpg
  266. pw_path = FFmpeg.pw_video(jpg_path, file_path, pw_mp3_path, pw_srt) # 生成片尾视频
  267. if not os.path.exists(pw_path) or os.path.getsize(pw_path) == 0:
  268. logger.error(f"[+] 内容分析-片尾拼接失败")
  269. AliyunLogger.logging(data["name"], "内容分析", "", data["video_url"],
  270. "内容分析,片尾拼接失败", "3003", str(data))
  271. text = (
  272. f"**渠道**: {data['channel']}\n"
  273. f"**内容**: {data}\n"
  274. f"**失败信息**: 片尾拼接失败\n"
  275. )
  276. Feishu.finish_bot(text,
  277. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  278. f"【 内容理解-{data['channel']}失败通知 】")
  279. return
  280. logger.info(f"[+] 内容分析-合并开始拼接")
  281. video_path = FFmpeg.h_b_video(video_path, pw_path, file_path)
  282. if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
  283. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  284. logger.error(f"[+] 内容分析-添加片尾失败")
  285. text = (
  286. f"**渠道**: {data['channel']}\n"
  287. f"**内容**: {data}\n"
  288. f"**失败信息**: 添加片尾失败\n"
  289. )
  290. Feishu.finish_bot(text,
  291. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  292. f"【 内容理解-{data['channel']}失败通知 】")
  293. return
  294. logger.info(f"[+] 内容分析-开始发送oss")
  295. oss_object_key = Oss.stitching_sync_upload_oss(video_path, str(uuid.uuid4())) # 视频发送OSS
  296. status = oss_object_key.get("status")
  297. if status != 200:
  298. logger.error(f"[+] 内容分析-发送oss失败")
  299. AliyunLogger.logging(data["type"], "内容分析", data["channel"], video_id, "内容分析,发送oss失败",
  300. "3003",
  301. str(data))
  302. text = (
  303. f"**渠道**: {data['channel']}\n"
  304. f"**内容**: {data}\n"
  305. f"**失败信息**: 发送oss失败\n"
  306. )
  307. Feishu.finish_bot(text,
  308. "https://open.feishu.cn/open-apis/bot/v2/hook/65bc5463-dee9-46d0-bc2d-ec6c49a8f3cd",
  309. f"【 内容理解-{data['channel']}失败通知 】")
  310. return
  311. logger.info(f"[+] 内容分析-发送oss成功")
  312. oss_object_key = oss_object_key.get("oss_object_key")
  313. self.insert_pq(data, oss_object_key, old_title, cover_path)
  314. return