zongjiao.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/11/28
  4. import json
  5. # import os
  6. # import sys
  7. import random
  8. import shutil
  9. import time
  10. import ffmpeg
  11. import requests
  12. import urllib3
  13. from selenium.webdriver import DesiredCapabilities
  14. from selenium.webdriver.chrome.service import Service
  15. from selenium.webdriver.common.by import By
  16. from selenium import webdriver
  17. from main.common import Common
  18. from main.feishu_lib import Feishu
  19. from main.zongjiao_publish import Publish
  20. class ZongJiao:
  21. # 翻页参数
  22. begin = 0
  23. # 获取已下载视频宽高、时长等信息
  24. @classmethod
  25. def get_video_info_from_ffmpeg(cls, log_type, video_path):
  26. probe = ffmpeg.probe(video_path)
  27. video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
  28. if video_stream is None:
  29. Common.logger(log_type).info('No video stream found!')
  30. return
  31. width = int(video_stream['width'])
  32. height = int(video_stream['height'])
  33. duration = float(video_stream['duration'])
  34. return width, height, duration
  35. # 过滤词库
  36. @classmethod
  37. def filter_words(cls, log_type):
  38. try:
  39. filter_word_list = []
  40. filter_sheet = Feishu.get_values_batch(log_type, 'zongjiao', 'KeAfT7')
  41. for x in filter_sheet:
  42. for y in x:
  43. if y is None:
  44. pass
  45. else:
  46. filter_word_list.append(y)
  47. return filter_word_list
  48. except Exception as e:
  49. Common.logger(log_type).info(f'filter_words异常:{e}\n')
  50. # 获取 token
  51. @classmethod
  52. def get_token(cls, log_type):
  53. try:
  54. sheet = Feishu.get_values_batch(log_type, "zongjiao", "LpKzTD")
  55. token = sheet[0][1]
  56. cookie = sheet[1][1]
  57. token_dict = {'token': token, 'cookie': cookie}
  58. return token_dict
  59. except Exception as e:
  60. Common.logger(log_type).error(f"get_cookie_token异常:{e}\n")
  61. # 获取用户 fakeid
  62. @classmethod
  63. def get_fakeid(cls, log_type, user, index):
  64. try:
  65. url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
  66. headers = {
  67. "accept": "*/*",
  68. "accept-encoding": "gzip, deflate, br",
  69. "accept-language": "zh-CN,zh;q=0.9",
  70. "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  71. "t=media/appmsg_edit_v2&action=edit&isNew=1"
  72. "&type=77&createType=5&token=1011071554&lang=zh_CN",
  73. 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
  74. "sec-ch-ua-mobile": "?0",
  75. "sec-ch-ua-platform": '"Windows"',
  76. "sec-fetch-dest": "empty",
  77. "sec-fetch-mode": "cors",
  78. "sec-fetch-site": "same-origin",
  79. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
  80. " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
  81. "x-requested-with": "XMLHttpRequest",
  82. 'cookie': cls.get_token(log_type)['cookie'],
  83. }
  84. params = {
  85. "action": "search_biz",
  86. "begin": "0",
  87. "count": "5",
  88. "query": str(user),
  89. "token": cls.get_token(log_type)['token'],
  90. "lang": "zh_CN",
  91. "f": "json",
  92. "ajax": "1",
  93. }
  94. urllib3.disable_warnings()
  95. r = requests.get(url=url, headers=headers, params=params, verify=False)
  96. if "list" not in r.json() or len(r.json()["list"]) == 0:
  97. Common.logger(log_type).warning(f"get_fakeid:{r.text},随机休眠 3-5 分钟\n")
  98. time.sleep(random.randint(60 * 3, 60 * 5))
  99. else:
  100. fakeid = r.json()["list"][int(index) - 1]["fakeid"]
  101. head_url = r.json()["list"][int(index) - 1]["round_head_img"]
  102. fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
  103. return fakeid_dict
  104. except Exception as e:
  105. Common.logger(log_type).error(f"get_fakeid异常:{e}\n")
  106. # 获取腾讯视频下载链接
  107. @classmethod
  108. def get_tencent_video_url(cls, log_type, video_id):
  109. try:
  110. url = 'https://vv.video.qq.com/getinfo?vids='+str(video_id)+'&platform=101001&charge=0&otype=json'
  111. response = requests.get(url=url).text.replace('QZOutputJson=', '').replace('"};', '"}')
  112. response = json.loads(response)
  113. url = response['vl']['vi'][0]['ul']['ui'][0]['url']
  114. fvkey = response['vl']['vi'][0]['fvkey']
  115. video_url = url + str(video_id) + '.mp4?vkey=' + fvkey
  116. return video_url
  117. except Exception as e:
  118. Common.logger(log_type).error(f"get_tencent_video_url异常:{e}\n")
  119. @classmethod
  120. def get_video_url(cls, log_type, article_url):
  121. try:
  122. # 打印请求配置
  123. ca = DesiredCapabilities.CHROME
  124. ca["goog:loggingPrefs"] = {"performance": "ALL"}
  125. # 不打开浏览器运行
  126. chrome_options = webdriver.ChromeOptions()
  127. chrome_options.add_argument("headless")
  128. chrome_options.add_argument(f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
  129. chrome_options.add_argument("--no-sandbox")
  130. # driver初始化
  131. # Common.logger(log_type).info('初始化 webdriver')
  132. driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
  133. # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
  134. # service=Service('/root/chrome/chromedriver'))
  135. # driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver_v107/chromedriver'))
  136. driver.implicitly_wait(10)
  137. # Common.logger(log_type).info('打开文章链接')
  138. driver.get(article_url)
  139. time.sleep(5)
  140. if len(driver.find_elements(By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]')) != 0:
  141. video_url = driver.find_element(
  142. By.XPATH, '//div[@class="js_video_poster video_poster"]/*[2]').get_attribute('src')
  143. elif len(driver.find_elements(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]')) != 0:
  144. iframe = driver.find_element(By.XPATH, '//span[@class="js_tx_video_container"]/*[1]').get_attribute('src')
  145. video_id = iframe.split('vid=')[-1].split('&')[0]
  146. video_url = cls.get_tencent_video_url(log_type, video_id)
  147. else:
  148. video_url = 0
  149. return video_url
  150. except Exception as e:
  151. Common.logger(log_type).info(f'get_video_url异常:{e}\n')
  152. # 获取文章列表
  153. @classmethod
  154. def get_articles(cls, log_type, user, index, env):
  155. fakeid_dict = cls.get_fakeid(log_type, user, index)
  156. while True:
  157. try:
  158. url = "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  159. headers = {
  160. "accept": "*/*",
  161. "accept-encoding": "gzip, deflate, br",
  162. "accept-language": "zh-CN,zh;q=0.9",
  163. "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?"
  164. "t=media/appmsg_edit_v2&action=edit&isNew=1"
  165. "&type=77&createType=5&token=" + str(cls.get_token(log_type)['token']) + "&lang=zh_CN",
  166. 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
  167. "sec-ch-ua-mobile": "?0",
  168. "sec-ch-ua-platform": '"Windows"',
  169. "sec-fetch-dest": "empty",
  170. "sec-fetch-mode": "cors",
  171. "sec-fetch-site": "same-origin",
  172. "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
  173. " (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36",
  174. "x-requested-with": "XMLHttpRequest",
  175. 'cookie': cls.get_token(log_type)['cookie'],
  176. }
  177. params = {
  178. "action": "list_ex",
  179. "begin": str(cls.begin),
  180. "count": "5",
  181. "fakeid": fakeid_dict['fakeid'],
  182. "type": "9",
  183. "query": "",
  184. "token": str(cls.get_token(log_type)['token']),
  185. "lang": "zh_CN",
  186. "f": "json",
  187. "ajax": "1",
  188. }
  189. urllib3.disable_warnings()
  190. r = requests.get(url=url, headers=headers, params=params, verify=False)
  191. cls.begin += 5
  192. if 'app_msg_list' not in r.json():
  193. Common.logger(log_type).warning(f"get_gzh_url:{r.text}\n")
  194. break
  195. elif len(r.json()['app_msg_list']) == 0:
  196. Common.logger(log_type).info('没有更多视频了\n')
  197. else:
  198. app_msg_list = r.json()['app_msg_list']
  199. for article_url in app_msg_list:
  200. # title
  201. if 'title' in article_url:
  202. title = article_url['title'].replace('/', '').replace('\n', '')\
  203. .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')
  204. else:
  205. title = 0
  206. # aid
  207. if 'aid' in article_url:
  208. aid = article_url['aid']
  209. else:
  210. aid = 0
  211. # create_time
  212. if 'create_time' in article_url:
  213. create_time = article_url['create_time']
  214. else:
  215. create_time = 0
  216. head_url = fakeid_dict['head_url']
  217. # cover_url
  218. if 'cover' in article_url:
  219. cover_url = article_url['cover']
  220. else:
  221. cover_url = 0
  222. # article_url
  223. if 'link' in article_url:
  224. article_url = article_url['link']
  225. else:
  226. article_url = 0
  227. video_url = cls.get_video_url(log_type, article_url)
  228. Common.logger(log_type).info(f"title:{title}")
  229. # Common.logger(log_type).info(f"aid:{aid}, type{type(aid)}")
  230. # Common.logger(log_type).info("create_time:{}", create_time)
  231. # Common.logger(log_type).info("head_url:{}", head_url)
  232. # Common.logger(log_type).info("cover_url:{}", cover_url)
  233. Common.logger(log_type).info(f"article_url:{article_url}")
  234. Common.logger(log_type).info(f"video_url:{video_url}")
  235. video_dict = {
  236. 'video_title': title,
  237. 'aid': aid,
  238. 'create_time': create_time,
  239. 'user_name': user,
  240. 'user_id': fakeid_dict['fakeid'],
  241. 'head_url': head_url,
  242. 'cover_url': cover_url,
  243. 'article_url': article_url,
  244. 'video_url': video_url
  245. }
  246. cls.download_publish(log_type, video_dict, env)
  247. Common.logger(log_type).info('休眠 10 秒\n')
  248. time.sleep(10)
  249. except Exception as e:
  250. Common.logger(log_type).error("get_gzh_url异常:{}\n", e)
  251. # 下载/上传
  252. @classmethod
  253. def download_publish(cls, log_type, video_dict, env):
  254. try:
  255. if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
  256. Common.logger(log_type).info("文章涉嫌违反相关法律法规和政策\n")
  257. # 标题敏感词过滤
  258. elif any(word if word in video_dict['video_title'] else False for word in cls.filter_words(log_type)) is True:
  259. Common.logger(log_type).info("标题已中过滤词\n")
  260. # 已下载判断
  261. elif video_dict['aid'] in [x for y in Feishu.get_values_batch(log_type, 'zongjiao', 'xf9wC2') for x in y]:
  262. Common.logger(log_type).info("视频已下载\n")
  263. else:
  264. # 下载视频
  265. Common.download_method(log_type, "video", video_dict['video_title'], video_dict['video_url'])
  266. # 获取视频时长
  267. video_info = cls.get_video_info_from_ffmpeg(log_type, "./videos/" + video_dict['video_title'] + "/video.mp4")
  268. video_width = str(video_info[0])
  269. video_height = str(video_info[1])
  270. duration = video_info[2]
  271. # 视频时长<50s,直接删除
  272. if int(duration) < 50:
  273. # 删除视频文件夹
  274. shutil.rmtree("./videos/" + video_dict['video_title'] + "/")
  275. Common.logger(log_type).info("时长:{}<50秒,删除成功\n")
  276. return
  277. else:
  278. # 下载封面
  279. Common.download_method(log_type, 'cover', video_dict['video_title'], video_dict['cover_url'])
  280. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  281. with open("./videos/" + video_dict['video_title'] + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  282. f_a.write(str(video_dict['aid']) + "\n" +
  283. video_dict['video_title'] + "\n" +
  284. str(int(duration)) + "\n" +
  285. '0' + "\n" +
  286. '0' + "\n" +
  287. '0' + "\n" +
  288. '0' + "\n" +
  289. str(video_width)+'*'+str(video_height) + "\n" +
  290. str(video_dict['create_time']) + "\n" +
  291. video_dict['user_name'] + "\n" +
  292. video_dict['head_url'] + "\n" +
  293. video_dict['video_url'] + "\n" +
  294. video_dict['cover_url'] + "\n" +
  295. "zongjiao"+str(time.time()) + "\n")
  296. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  297. # 上传视频
  298. Common.logger(log_type).info("开始上传视频")
  299. our_video_id = Publish.upload_and_publish(log_type, env, "play")
  300. if env == 'prod':
  301. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  302. else:
  303. our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  304. Common.logger(log_type).info("视频上传完成")
  305. # 保存视频 ID 到云文档
  306. Common.logger(log_type).info("保存视频信息至云文档")
  307. # 视频ID工作表,插入首行
  308. Feishu.insert_columns(log_type, "zongjiao", "xf9wC2", "ROWS", 1, 2)
  309. # 视频ID工作表,首行写入数据
  310. upload_time = int(time.time())
  311. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  312. "宗教公众号",
  313. video_dict['video_title'],
  314. video_dict['aid'],
  315. our_video_link,
  316. int(duration),
  317. str(video_width)+'*'+str(video_height),
  318. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_dict['create_time'])),
  319. video_dict['user_name'],
  320. video_dict['user_id'],
  321. video_dict['head_url'],
  322. video_dict['cover_url'],
  323. video_dict['article_url'],
  324. video_dict['video_url']]]
  325. time.sleep(1)
  326. Feishu.update_values(log_type, "zongjiao", "xf9wC2", "F2:Z2", values)
  327. Common.logger(log_type).info("视频下载/上传成功\n")
  328. except Exception as e:
  329. Common.logger(log_type).error(f"download_publish异常:{e}\n")
  330. @classmethod
  331. def get_all_videos(cls, log_type, env):
  332. try:
  333. user_sheet = Feishu.get_values_batch(log_type, 'zongjiao', '7cac48')
  334. for i in range(2, len(user_sheet)):
  335. user_name = user_sheet[i][2]
  336. index = user_sheet[i][5]
  337. Common.logger(log_type).info(f'获取 {user_name} 公众号视频\n')
  338. cls.get_articles(log_type, user_name, index, env)
  339. cls.begin = 0
  340. Common.logger(log_type).info('休眠1分钟')
  341. time.sleep(60)
  342. except Exception as e:
  343. Common.logger(log_type).info(f'get_all_videos异常:{e}\n')
  344. if __name__ == "__main__":
  345. print(ZongJiao.get_video_info_from_ffmpeg('demo', ''))
  346. pass