bszf_recommend.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/4/25
  4. """
  5. 从 微信小程序-本山祝福短视频 中,下载符合规则的视频
  6. """
  7. import json
  8. import os
  9. import random
  10. import shutil
  11. import sys
  12. import time
  13. from urllib import parse
  14. import ffmpeg
  15. import requests
  16. import urllib3
  17. sys.path.append(os.getcwd())
  18. from main.common import Common
  19. from main.bszf_publish import Publish
  20. from main.feishu_lib import Feishu
  21. proxies = {"http": None, "https": None}
  22. class Recommend:
  23. # 翻页参数
  24. visitor_key = ""
  25. page = 1
  26. # 过滤词库
  27. @classmethod
  28. def sensitive_words(cls, log_type):
  29. word_list = []
  30. # 从云文档读取所有敏感词,添加到词库列表
  31. lists = Feishu.get_values_batch(log_type, "bszf", "DjXfqG")
  32. for i in lists:
  33. for j in i:
  34. # 过滤空的单元格内容
  35. if j is None:
  36. pass
  37. else:
  38. word_list.append(j)
  39. return word_list
  40. # 获取已下载视频宽高、时长等信息
  41. @classmethod
  42. def get_video_info_from_local(cls, video_path):
  43. probe = ffmpeg.probe(video_path)
  44. video_stream = next((stream for stream in probe['streams'] if stream['codec_type'] == 'video'), None)
  45. if video_stream is None:
  46. print('No video stream found!')
  47. return
  48. width = int(video_stream['width'])
  49. height = int(video_stream['height'])
  50. duration = float(video_stream['duration'])
  51. return width, height, duration
  52. # 推荐列表获取视频
  53. @classmethod
  54. def get_recommend(cls, log_type):
  55. """
  56. 获取首页推荐视频列表,写入:https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?sheet=CcHgO7
  57. """
  58. now = int(time.time() * 1000)
  59. url = "https://bszf.wentingyou.cn/index.php/v111/index/index?parameter="
  60. header = {
  61. "Connection": "keep-alive",
  62. "vision": "1.1.0",
  63. "content-type": "application/x-www-form-urlencoded",
  64. "scene": "1008",
  65. "content-time": str(now),
  66. "token": "",
  67. "visitorKey": "165086930003741",
  68. "chatKey": "wx0fb8149da961d3b0",
  69. "cache-time": str(now),
  70. "Accept-Encoding": "gzip,compress,br,deflate",
  71. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
  72. "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  73. "MicroMessenger/8.0.20(0x1800142d) NetType/WIFI Language/zh_CN",
  74. "Referer": "https://servicewechat.com/wx0fb8149da961d3b0/2/page-frame.html"
  75. }
  76. parameter = {
  77. "cid": "",
  78. "page": random.randint(1, 76),
  79. "is_ads": 1,
  80. "model": "iPhone 11<iPhone12,1>",
  81. "mini_version": "8.0.25",
  82. "origin_channel": "-1",
  83. "origin_type": "2",
  84. "origin_level": "0",
  85. "ini_id": cls.visitor_key
  86. }
  87. params = parse.quote(json.dumps(parameter))
  88. url = url + str(params)
  89. try:
  90. urllib3.disable_warnings()
  91. r = requests.get(headers=header, url=url, proxies=proxies, verify=False)
  92. response = json.loads(r.content.decode("utf8"))
  93. # 翻页
  94. cls.visitor_key = r.json()["data"]["visitor_key"]
  95. cls.page += 1
  96. if "data" not in response:
  97. Common.logger(log_type).warning("get_recommend, response:{}".format(response))
  98. time.sleep(3)
  99. else:
  100. feeds = response["data"]["list"]
  101. for i in range(len(feeds)):
  102. if "nid" not in feeds[i]:
  103. video_id = 0
  104. else:
  105. video_id = feeds[i]["nid"]
  106. if "video_cover" not in feeds[i]:
  107. cover_url = 0
  108. else:
  109. cover_url = feeds[i]["video_cover"]
  110. if "video_url" not in feeds[i]:
  111. video_url = 0
  112. elif ".mp4" not in feeds[i]["video_url"]:
  113. video_url = 0
  114. else:
  115. video_url = feeds[i]["video_url"]
  116. if "commentCount" not in feeds[i]:
  117. video_comment_cnt = 0
  118. else:
  119. video_comment_cnt = feeds[i]["commentCount"]
  120. if "update_time" not in feeds[i]:
  121. video_send_time = 0
  122. else:
  123. video_send_time = feeds[i]["update_time"]
  124. # 视频标题过滤话题及处理特殊字符
  125. if "title" not in feeds[i]:
  126. video_title = 0
  127. else:
  128. video_title = feeds[i]["title"].strip().replace("\n", "")\
  129. .replace("/", "").replace("本山祝福", "").replace(" ", "")\
  130. .replace(" ", "").replace("&NBSP", "").replace("\r", "")\
  131. .replace("#", "").replace(".", "。").replace("\\", "")\
  132. .replace(":", "").replace("*", "").replace("?", "")\
  133. .replace("?", "").replace('"', "").replace("<", "")\
  134. .replace(">", "").replace("|", "")
  135. like_cnt = "0"
  136. share_cnt = "0"
  137. play_cnt = "0"
  138. user_name = "本山祝福"
  139. head_url = cover_url
  140. user_id = "benshanzhufu"
  141. Common.logger(log_type).info("video_title:{}".format(video_title))
  142. Common.logger(log_type).info("video_id:{}".format(video_id))
  143. Common.logger(log_type).info(
  144. "video_send_time:{}", time.strftime(
  145. "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time))))
  146. Common.logger(log_type).info("video_url:{}".format(video_url))
  147. # 过滤无效视频
  148. if video_id == 0 or cover_url == 0 or video_url == 0:
  149. Common.logger(log_type).info("无效视频\n")
  150. # 已下载表去重:https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?sheet=440018
  151. elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "bszf", "440018") for n in m]:
  152. Common.logger(log_type).info("视频已下载\n")
  153. # recommend_feeds表去重:https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?sheet=CcHgO7
  154. elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "bszf", "CcHgO7") for n in m]:
  155. Common.logger(log_type).info("视频已在recommend_feeds表中\n")
  156. # # 竖版视频表去重:https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?sheet=dAcOWt
  157. # elif str(video_id) in [n for m in Feishu.get_values_batch(log_type, "bszf", "dAcOWt") for n in m]:
  158. # Common.logger(log_type).info("视频已在竖版视频表中\n")
  159. else:
  160. time.sleep(1)
  161. Feishu.insert_columns(log_type, "bszf", "CcHgO7", "ROWS", 1, 2)
  162. get_feeds_time = int(time.time())
  163. values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))),
  164. "推荐榜",
  165. str(video_id),
  166. video_title,
  167. int(play_cnt),
  168. int(like_cnt),
  169. int(share_cnt),
  170. int(video_comment_cnt),
  171. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time))),
  172. user_name,
  173. user_id,
  174. head_url,
  175. cover_url,
  176. video_url]]
  177. time.sleep(1)
  178. Feishu.update_values(log_type, "bszf", "CcHgO7", "A2:N2", values)
  179. Common.logger(log_type).info("添加至recommend_feeds成功\n")
  180. except Exception as e:
  181. Common.logger(log_type).error("get_recommend异常:{}".format(e))
  182. # 下载 / 上传
  183. @classmethod
  184. def download_publish(cls, log_type, env):
  185. """
  186. 下载视频
  187. 测试环境:env == dev
  188. 正式环境:env == prod
  189. """
  190. try:
  191. recommend_feeds_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
  192. for i in range(1, len(recommend_feeds_sheet)):
  193. download_video_id = recommend_feeds_sheet[i][2]
  194. download_video_title = recommend_feeds_sheet[i][3]
  195. download_video_play_cnt = recommend_feeds_sheet[i][4]
  196. download_video_comment_cnt = recommend_feeds_sheet[i][7]
  197. download_video_like_cnt = recommend_feeds_sheet[i][5]
  198. download_video_share_cnt = recommend_feeds_sheet[i][6]
  199. download_video_send_time = recommend_feeds_sheet[i][8]
  200. download_user_name = recommend_feeds_sheet[i][9]
  201. download_user_id = recommend_feeds_sheet[i][10]
  202. download_head_url = recommend_feeds_sheet[i][11]
  203. download_cover_url = recommend_feeds_sheet[i][12]
  204. download_video_url = recommend_feeds_sheet[i][13]
  205. Common.logger(log_type).info("正在判断第{}行", i + 1)
  206. Common.logger(log_type).info("download_video_title:{}", download_video_title)
  207. Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
  208. Common.logger(log_type).info("download_video_url:{}", download_video_url)
  209. # 过滤空行
  210. if download_video_id is None or download_video_title is None or download_video_play_cnt is None:
  211. Common.logger(log_type).warning("空行,略过\n")
  212. # 过滤敏感词
  213. elif any(word if word in download_video_title else False for word in
  214. cls.sensitive_words(log_type)) is True:
  215. Feishu.dimension_range(log_type, "bszf", "CcHgO7", "ROWS", i + 1, i + 1)
  216. Common.logger(log_type).info("视频已中敏感词,删除成功\n")
  217. return
  218. # 已下载视频表去重
  219. elif str(download_video_id) in [n for m in Feishu.get_values_batch(log_type, "bszf", "440018")
  220. for n in m]:
  221. Feishu.dimension_range(log_type, "bszf", "CcHgO7", "ROWS", i + 1, i + 1)
  222. Common.logger(log_type).info("该视频已下载,删除成功\n")
  223. return
  224. # 满足下载规则
  225. else:
  226. # 下载视频
  227. Common.download_method(log_type=log_type, text="video",
  228. d_name=str(download_video_title), d_url=str(download_video_url))
  229. # 获取视频时长
  230. video_info = cls.get_video_info_from_local("./videos/" + download_video_title + "/video.mp4")
  231. download_video_resolution = str(video_info[0]) + "*" + str(video_info[1])
  232. download_video_duration = video_info[2]
  233. # 视频时长<40s,直接删除
  234. if int(download_video_duration) < 40:
  235. # 删除视频文件夹
  236. shutil.rmtree("./videos/" + download_video_title + "/")
  237. # 删除云文档recommend_feeds中的记录
  238. Feishu.dimension_range(log_type, "bszf", "CcHgO7", "ROWS", i + 1, i + 1)
  239. Common.logger(log_type).info("时长:{}<40秒,删除成功\n", int(download_video_duration))
  240. return
  241. # # 竖版视频不下载,写入竖版视频表
  242. # elif int(video_info[0]) < int(video_info[1]):
  243. # # 删除视频文件夹
  244. # shutil.rmtree("./videos/" + download_video_title + "/")
  245. # # 删除在 recommend_feeds 的记录
  246. # Feishu.dimension_range(log_type, "bszf", "CcHgO7", "ROWS", i + 1, i + 1)
  247. # Common.logger(log_type).info("宽:{}<高:{},删除成功", int(video_info[0]), int(video_info[1]))
  248. #
  249. # # 添加到竖版视频表
  250. # time.sleep(1)
  251. # Feishu.insert_columns(log_type, "bszf", "dAcOWt", "ROWS", 1, 2)
  252. # # 视频ID工作表,首行写入数据
  253. # upload_time = int(time.time())
  254. # values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  255. # "推荐榜",
  256. # str(download_video_id),
  257. # str(download_video_title),
  258. # int(download_video_play_cnt),
  259. # int(download_video_like_cnt),
  260. # int(download_video_share_cnt),
  261. # int(download_video_comment_cnt),
  262. # int(download_video_duration),
  263. # str(download_video_resolution),
  264. # str(download_video_send_time),
  265. # str(download_user_name),
  266. # str(download_user_id),
  267. # str(download_head_url),
  268. # str(download_cover_url),
  269. # str(download_video_url)]]
  270. # time.sleep(1)
  271. # Feishu.update_values(log_type, "bszf", "dAcOWt", "A2:P2", values)
  272. # Common.logger(log_type).info("写入竖版视频表成功\n")
  273. # return
  274. else:
  275. # 下载封面
  276. Common.download_method(log_type=log_type, text="cover",
  277. d_name=str(download_video_title), d_url=str(download_cover_url))
  278. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  279. with open("./videos/" + download_video_title
  280. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  281. f_a.write(str(download_video_id) + "\n" +
  282. str(download_video_title) + "\n" +
  283. str(int(download_video_duration)) + "\n" +
  284. str(download_video_play_cnt) + "\n" +
  285. str(download_video_comment_cnt) + "\n" +
  286. str(download_video_like_cnt) + "\n" +
  287. str(download_video_share_cnt) + "\n" +
  288. str(download_video_resolution) + "\n" +
  289. str(int(time.mktime(
  290. time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
  291. str(download_user_name) + "\n" +
  292. str(download_head_url) + "\n" +
  293. str(download_video_url) + "\n" +
  294. str(download_cover_url) + "\n" +
  295. "benshanzhufu")
  296. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  297. # 上传视频
  298. Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
  299. our_video_id = Publish.upload_and_publish(log_type, env, "play")
  300. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  301. Common.logger(log_type).info("视频上传完成:{}", download_video_title)
  302. # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnGh2rrsPYM4iVNEBO7OqWrb?sheet=440018
  303. Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
  304. # 视频ID工作表,插入首行
  305. Feishu.insert_columns(log_type, "bszf", "440018", "ROWS", 1, 2)
  306. # 视频ID工作表,首行写入数据
  307. upload_time = int(time.time())
  308. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  309. "推荐榜",
  310. str(download_video_id),
  311. str(download_video_title),
  312. our_video_link,
  313. int(download_video_play_cnt),
  314. int(download_video_comment_cnt),
  315. int(download_video_like_cnt),
  316. int(download_video_share_cnt),
  317. int(download_video_duration),
  318. str(download_video_resolution),
  319. str(download_video_send_time),
  320. str(download_user_name),
  321. str(download_user_id),
  322. str(download_head_url),
  323. str(download_cover_url),
  324. str(download_video_url)]]
  325. time.sleep(1)
  326. Feishu.update_values(log_type, "bszf", "440018", "E2:V2", values)
  327. # 删除行或列,可选 ROWS、COLUMNS
  328. Feishu.dimension_range(log_type, "bszf", "CcHgO7", "ROWS", i + 1, i + 1)
  329. Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
  330. return
  331. except Exception as e:
  332. Common.logger(log_type).error("download_publish异常:{}", e)
  333. # 执行下载 / 上传
  334. @classmethod
  335. def run_download_publish(cls, log_type, env):
  336. try:
  337. while True:
  338. time.sleep(1)
  339. recommend_feeds_sheet = Feishu.get_values_batch(log_type, "bszf", "CcHgO7")
  340. if len(recommend_feeds_sheet) == 1:
  341. Common.logger(log_type).info("下载/上传完成\n")
  342. break
  343. else:
  344. cls.download_publish(log_type, env)
  345. time.sleep(random.randint(5, 10))
  346. except Exception as e:
  347. Common.logger(log_type).error("run_download_publish异常:{}", e)
  348. if __name__ == "__main__":
  349. recommend = Recommend()
  350. recommend.get_recommend("recommend")
  351. recommend.run_download_publish("recommend", "dev")