nrfx_carry_video.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. import html
  2. import json
  3. import os
  4. import random
  5. import re
  6. import time
  7. import uuid
  8. import requests
  9. from datetime import datetime
  10. from urllib.parse import urlparse, parse_qs
  11. from loguru import logger
  12. from common import Oss, Feishu, AliyunLogger, Material
  13. from common.download_video import DownLoad
  14. from common.ffmpeg import FFmpeg
  15. from common.google_ai_studio import GoogleAI
  16. from common.gpt4o_mini_help import GPT4oMini
  17. from common.redis import in_carry_video_data
  18. from common.sql_help import sqlCollect
  19. from common.tag_video import Tag
  20. from common.tts_help import TTS
  21. from data_channel.piaoquan import PQ
  22. class NrfxCarryViode:
  23. def get_text_dy_video(self,url):
  24. max_retries = 3
  25. retry_count = 0
  26. while retry_count < max_retries:
  27. try:
  28. if "&vid=" in url:
  29. parsed_url = urlparse(url)
  30. params = parse_qs(parsed_url.query)
  31. video_id = params.get('vid', [None])[0]
  32. elif "?modal_id=" in url:
  33. parsed_url = urlparse(url)
  34. params = parse_qs(parsed_url.query)
  35. video_id = params.get('modal_id', [None])[0]
  36. else:
  37. headers = {
  38. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
  39. 'q=0.8,application/signed-exchange;v=b3;q=0.7',
  40. 'Accept-Language': 'zh-CN,zh;q=0.9',
  41. 'Cache-Control': 'no-cache',
  42. 'Pragma': 'no-cache',
  43. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  44. 'Chrome/127.0.0.0 Safari/537.36',
  45. }
  46. response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout=30)
  47. location = response.headers.get('Location', None)
  48. video_id = re.search(r'/video/(\d+)/?', location.split('?')[0] if location else url).group(1)
  49. url = "http://8.217.192.46:8889/crawler/dou_yin/detail"
  50. if not video_id or not video_id.strip():
  51. return None, None, None
  52. payload = json.dumps({
  53. "content_id": str(video_id)
  54. })
  55. headers = {
  56. 'Content-Type': 'application/json'
  57. }
  58. response = requests.request("POST", url, headers=headers, data=payload, timeout= 60)
  59. response = response.json()
  60. code = response["code"]
  61. if code == 0:
  62. data = response["data"]["data"]
  63. video_url = data["video_url_list"][0]["video_url"]
  64. original_title = data["title"]
  65. return video_url, original_title, video_id
  66. if code == 22002:
  67. if '抖音内容已被删除或无法访问' in response['msg']:
  68. return "作品不存在", None, None
  69. except Exception as e:
  70. retry_count += 1
  71. logger.error(f"[+] 抖音{url}获取视频链接失败,失败信息{e}")
  72. time.sleep(1)
  73. return None, None, None
  74. def get_text_ks_video(self,url):
  75. try:
  76. headers = {
  77. 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;'
  78. 'q=0.8,application/signed-exchange;v=b3;q=0.7',
  79. 'Accept-Language': 'zh-CN,zh;q=0.9',
  80. 'Cache-Control': 'no-cache',
  81. 'Pragma': 'no-cache',
  82. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) '
  83. 'Chrome/127.0.0.0 Safari/537.36',
  84. }
  85. response = requests.request(url=url, method='GET', headers=headers, allow_redirects=False, timeout= 30)
  86. location = response.headers.get('Location', None)
  87. video_id = re.search(r'/(f|photo|short-video|long-video)/(.*)/?',
  88. location.split('?')[0] if location else url).group(2)
  89. url = "http://8.217.192.46:8889/crawler/kuai_shou/detail"
  90. if not video_id or not video_id.strip():
  91. return None, None, None
  92. payload = json.dumps({
  93. "content_id": str(video_id)
  94. })
  95. headers = {
  96. 'Content-Type': 'application/json'
  97. }
  98. time.sleep(random.uniform(10, 30))
  99. response = requests.request("POST", url, headers=headers, data=payload, timeout= 30)
  100. response = response.json()
  101. code = response["code"]
  102. if code == 0:
  103. data = response["data"]["data"]
  104. content_type = data['content_type']
  105. if content_type == 'note':
  106. return "note","note"
  107. video_url = data["video_url_list"][0]["video_url"]
  108. original_title = data["title"]
  109. return video_url, original_title, video_id
  110. elif code == 27006:
  111. if "作品不存在" in response['msg'] or "不存在" in response['msg'] or "私密作品" in response['msg']:
  112. return "作品不存在", None, None
  113. time.sleep(3)
  114. except Exception as e:
  115. logger.error(f"[+] 快手{url}获取视频链接失败,失败信息{e}")
  116. return None, None,None
  117. def insert_pq(self, REDIS_NAME, data, oss_object_key, title, tags, tag_transport_channel, channel_mark, n_ids, type):
  118. logger.info(f"[+] {REDIS_NAME}的{data}开始写入票圈")
  119. if ',' in n_ids:
  120. n_id_list = n_ids.split(',')
  121. else:
  122. n_id_list = [n_ids]
  123. pq_list = []
  124. for n_id in n_id_list:
  125. code = PQ.insert_piaoquantv(oss_object_key, title, n_id)
  126. if not code:
  127. logger.error(f"[+] {REDIS_NAME}的{data}写入票圈后台失败")
  128. AliyunLogger.logging(data["name"], type, tag_transport_channel, data["video_url"],
  129. "改造失败,写入票圈后台失败", "3003", str(data))
  130. continue
  131. pq_list.append(code)
  132. logger.info(f"[+] {REDIS_NAME}的{data}写入票圈成功,返回视频id{code}")
  133. tag_status = Tag.video_tag(code, str(tags))
  134. if tag_status == 0:
  135. logger.info(f"[+] {REDIS_NAME}的{data}写入标签成功,后台视频ID为{code}")
  136. try:
  137. current_time = datetime.now()
  138. formatted_time = current_time.strftime("%Y-%m-%d %H:%M:%S")
  139. sqlCollect.insert_machine_making_data(data["name"], type, tag_transport_channel,
  140. data["video_url"], data["video_url"], data["pq_ids"],
  141. data["title_category"],
  142. code,
  143. formatted_time, data["title_category"], oss_object_key)
  144. pq_url = f'https://admin.piaoquantv.com/cms/post-detail/{code}/detail' # 站内视频链接
  145. values = [
  146. [
  147. str(code),
  148. str(n_id),
  149. formatted_time,
  150. channel_mark,
  151. data["name"],
  152. data["pq_ids"],
  153. data["pq_label"],
  154. data["activate_data"],
  155. data["video_url"],
  156. data["title_category"],
  157. tag_transport_channel,
  158. data["tag_transport_scene"],
  159. data["tag_transport_keyword"],
  160. data["tag"],
  161. data["transform_rule"],
  162. data["video_share"],
  163. data["trailer_share"],
  164. data["trailer_share_audio"],
  165. data["video_clipping"],
  166. data["video_clipping_time"],
  167. data["title_transform"],
  168. pq_url
  169. ]
  170. ]
  171. Feishu.insert_columns("R4dLsce8Jhz9oCtDMr9ccpFHnbI", 'Um1nWA', "ROWS", 1, 2)
  172. time.sleep(0.5)
  173. Feishu.update_values("R4dLsce8Jhz9oCtDMr9ccpFHnbI", 'Um1nWA', "A2:Z2", values)
  174. logger.info(f"[+] {REDIS_NAME}的{data}写入飞书成功")
  175. except Exception as e:
  176. logger.error(f"[+] {REDIS_NAME}的{data}写入飞书失败{e}")
  177. pass
  178. AliyunLogger.logging(data["name"], "内容分析", tag_transport_channel, data["video_url"],
  179. "改造成功", "1000", str(data), str(pq_list))
  180. return
  181. def main(self, data, file_path, GEMINI_API_KEY):
  182. REDIS_NAME = 'task:carry_redis_nrfx'
  183. try:
  184. if data["transform_rule"] == '否':
  185. return
  186. url = data['video_url']
  187. if "&vid=" in url or "?modal_id=" in url:
  188. host = urlparse(url).netloc
  189. else:
  190. msg = html.unescape(url).split('?')[0]
  191. pattern = re.search(r'https?://[^\s<>"\'\u4e00-\u9fff]+', msg)
  192. if not pattern:
  193. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  194. return
  195. url = pattern.group()
  196. host = urlparse(url).netloc
  197. if host in ['v.douyin.com', 'www.douyin.com', 'www.iesdouyin.com']:
  198. tag_transport_channel = "抖音"
  199. logger.info(f"[+] {url}开始获取抖音视频链接")
  200. url, original_title, video_id = self.get_text_dy_video(url=url)
  201. elif host in ['v.kuaishou.com', 'www.kuaishou.com', 'v.m.chenzhongtech.com', 'creater.eozatvmq.com']:
  202. tag_transport_channel = "快手"
  203. logger.info(f"[+] {url}开始获取快手视频链接")
  204. url, original_title, video_id = self.get_text_ks_video(url=url)
  205. else:
  206. AliyunLogger.logging(data["name"], "内容分析", "", data["video_url"], "扫描到一条视频",
  207. "2001", str(data))
  208. logger.error(f"[+] {url}该链接不是抖/快 不做处理")
  209. AliyunLogger.logging(data["name"], "内容分析","", data["video_url"],
  210. "不是抖/快不做处理", "1001", str(data))
  211. return
  212. if url == "作品不存在":
  213. return
  214. except Exception as e:
  215. logger.info(f"[+] 获取视频链接异常{e}")
  216. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  217. return
  218. AliyunLogger.logging(data["name"],"内容分析", tag_transport_channel, data["video_url"], "扫描到一条视频", "2001", str(data))
  219. AliyunLogger.logging(data["name"], "内容分析",tag_transport_channel, data["video_url"], "符合规则等待改造", "2004", str(data))
  220. if not url:
  221. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  222. logger.info(f"[+] {url}没有获取到视频链接,等待重新处理")
  223. AliyunLogger.logging(data["name"], "内容分析",tag_transport_channel, data["video_url"],
  224. "没有获取到视频链接,等待重新处理", "1002", str(data))
  225. return
  226. if url == "note":
  227. logger.info(f"[+] {url}是图文不做处理")
  228. AliyunLogger.logging(data["name"], "内容分析", tag_transport_channel, data["video_url"],
  229. "是图文不做处理", "1002", str(data))
  230. return
  231. logger.info(f"[+] {url}开始下载视频")
  232. video_path = DownLoad.download_video(url, file_path, tag_transport_channel, video_id)
  233. if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
  234. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  235. logger.error(f"[+] {url}下载失败")
  236. AliyunLogger.logging(data["name"],"内容分析", tag_transport_channel, data["video_url"],
  237. "视频下载失败等待重新处理", "3002", str(data))
  238. return
  239. logger.info(f"[+] {url}开始视频下载成功")
  240. logger.info(f"[+] {url}开始处理标题")
  241. if data["title_category"] == "AI标题" or data["trailer_share"] == "AI标题":
  242. title = GPT4oMini.get_ai_mini_title(
  243. original_title if data["title_category"] == "AI标题" else data["title_category"])
  244. else:
  245. title = original_title if data["title_category"] == "原标题" else data["title_category"]
  246. if tag_transport_channel == "抖音":
  247. if "复制打开抖音" in data['video_url']:
  248. channel_mark = "APP"
  249. else:
  250. channel_mark = "PC"
  251. else:
  252. if "https://www.kuaishou.com/f" in data['video_url']:
  253. channel_mark = "PC"
  254. else:
  255. channel_mark = "APP"
  256. if data["transform_rule"] == "仅改造" or data["transform_rule"] == "是":
  257. width, height = FFmpeg.get_w_h_size(video_path)
  258. if width < height: # 判断是否需要修改为竖屏
  259. video_path = FFmpeg.update_video_h_w(video_path, file_path)
  260. logger.info(f"[+] {REDIS_NAME}的{data}视频更改分辨率处理")
  261. video_path = FFmpeg.video_640(video_path, file_path)
  262. if not os.path.exists(video_path) or os.path.getsize(video_path) == 0:
  263. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  264. logger.error(f"[+] {REDIS_NAME}的{data}视频更改分辨率失败")
  265. AliyunLogger.logging(data["name"], "内容分析", tag_transport_channel, data["video_url"],
  266. "改造失败,片尾拼接失败", "3001", str(data))
  267. return
  268. logger.info(f"[+] {REDIS_NAME}的{data}视频更改分辨率处理成功")
  269. if data["video_clipping"]: # 判断是否需要裁剪
  270. video_path = FFmpeg.video_crop(video_path, file_path)
  271. if data["video_clipping_time"]: # 判断是否需要指定视频时长
  272. video_path = FFmpeg.video_ggduration(video_path, file_path, data["video_clipping_time"])
  273. logger.info(f"[+] 内容分析-开始获取视频口播内容")
  274. video_text = GoogleAI.run(GEMINI_API_KEY, video_path)
  275. if not video_text:
  276. AliyunLogger.logging(data["name"], "内容分析", "", data["video_url"],
  277. "内容分析,获取口播文案失败", "3003", str(data))
  278. return
  279. logger.info(f"[+] 内容分析-开始获取AI片尾")
  280. pw_srt_text = GPT4oMini.get_content_understanding_pw(video_text)
  281. voice = data['trailer_share_audio']
  282. if voice:
  283. if ',' in voice:
  284. voices = voice.split(',')
  285. else:
  286. voices = [voice]
  287. voice = random.choice(voices)
  288. else:
  289. voice = "zhifeng_emo"
  290. pw_url = TTS.get_pw_zm(pw_srt_text, voice)
  291. if not pw_url:
  292. logger.error(f"[+] 内容分析-片尾获取失败")
  293. data["transform_rule"] = "仅改造"
  294. AliyunLogger.logging(data["name"], "内容分析", "", data["video_url"],
  295. "内容分析,片尾获取失败", "3003", str(data))
  296. return
  297. logger.info(f"[+] 内容分析-片尾获取成功")
  298. pw_srt = TTS.getSrt(pw_url)
  299. if not pw_srt:
  300. AliyunLogger.logging(data["name"], "内容分析", "", data["video_url"],
  301. "内容分析,片尾音频下载失败", "3003", str(data))
  302. return
  303. pw_mp3_path = TTS.download_mp3(pw_url, file_path)
  304. if not pw_mp3_path:
  305. AliyunLogger.logging(data["name"], "内容分析", "", data["video_url"],
  306. "内容分析,片尾音频下载失败", "3003", str(data))
  307. return
  308. logger.info(f"[+] 内容分析-片尾音频下载成功")
  309. logger.info(f"[+] 内容分析-片尾获取最后一帧成功")
  310. jpg_path = FFmpeg.video_png(video_path, file_path) # 生成视频最后一帧jpg
  311. pw_path = FFmpeg.nrfx_pw_video(jpg_path, file_path, pw_mp3_path, pw_srt) # 生成片尾视频
  312. if not os.path.exists(pw_path) or os.path.getsize(pw_path) == 0:
  313. logger.error(f"[+] 内容分析-片尾拼接失败")
  314. AliyunLogger.logging(data["name"], "内容分析", "", data["video_url"],
  315. "内容分析,片尾拼接失败", "3003", str(data))
  316. return
  317. logger.info(f"[+] 内容分析-合并开始拼接")
  318. video_path = FFmpeg.h_b_video(video_path, pw_path, file_path)
  319. single_video_path = FFmpeg.single_video(video_path, file_path, data["video_share"])
  320. if not os.path.exists(single_video_path) or os.path.getsize(single_video_path) == 0:
  321. data["transform_rule"] = "仅改造"
  322. in_carry_video_data(REDIS_NAME, json.dumps(data, ensure_ascii=False, indent=4))
  323. logger.error(f"[+] 内容分析-添加片中字幕失败")
  324. AliyunLogger.logging(data["name"], "内容分析", tag_transport_channel, data["video_url"],
  325. "内容分析,添加片中字幕失败", "3003", str(data))
  326. return
  327. logger.info(f"[+] 内容分析-添加片中字幕成功")
  328. logger.info(f"[+] 内容分析-开始发送oss")
  329. oss_object_key = Oss.stitching_sync_upload_oss(single_video_path, str(uuid.uuid4())) # 视频发送OSS
  330. status = oss_object_key.get("status")
  331. if status != 200:
  332. logger.error(f"[+] 内容分析-发送oss失败")
  333. AliyunLogger.logging(data["name"], "内容分析", tag_transport_channel, data["video_url"],
  334. "内容分析,发送oss失败", "3003", str(data))
  335. return
  336. logger.info(f"[+] 内容分析-发送oss成功")
  337. oss_object_key = oss_object_key.get("oss_object_key")
  338. tags = 'lev-供给,rol-机器,#str-搬运改造内容理解引导语实验_60'
  339. self.insert_pq(REDIS_NAME, data, oss_object_key, title, tags, tag_transport_channel, channel_mark, "50322062", "内容分析")
  340. return
  341. return