recommend.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/4/8
  4. import json
  5. import os
  6. import sys
  7. import time
  8. import requests
  9. import urllib3
  10. sys.path.append(os.getcwd())
  11. from main.common import Common
  12. from main.feishu_lib import Feishu
  13. from main.publish import Publish
  14. proxies = {"http": None, "https": None}
  15. class DownloadRecommend:
  16. # 配置微信号
  17. Referer = Feishu.get_range_value("recommend", "9fTK1f", "C3:C3")[0]
  18. wesee_openid = Feishu.get_range_value("recommend", "9fTK1f", "C4:C4")[0]
  19. wesee_openkey = Feishu.get_range_value("recommend", "9fTK1f", "C5:C5")[0]
  20. wesee_personid = Feishu.get_range_value("recommend", "9fTK1f", "C6:C6")[0]
  21. wesee_access_token = Feishu.get_range_value("recommend", "9fTK1f", "C7:C7")[0]
  22. wesee_thr_appid = Feishu.get_range_value("recommend", "9fTK1f", "C8:C8")[0]
  23. # 过滤词库
  24. @classmethod
  25. def sensitive_words(cls):
  26. # 敏感词库列表
  27. word_list = []
  28. # 从云文档读取所有敏感词,添加到词库列表
  29. lists = Feishu.get_values_batch("recommend", "2Oxf8C")
  30. for a in lists:
  31. for j in a:
  32. # 过滤空的单元格内容
  33. if j is None:
  34. pass
  35. else:
  36. word_list.append(j)
  37. return word_list
  38. # 抓取基础规则
  39. @staticmethod
  40. def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt):
  41. """
  42. 下载视频的基本规则
  43. :param d_duration: 时长
  44. :param d_width: 宽
  45. :param d_height: 高
  46. :param d_play_cnt: 播放量
  47. :param d_like_cnt: 点赞量
  48. :param d_share_cnt: 分享量
  49. :return: 满足规则,返回 True;反之,返回 False
  50. """
  51. if int(float(d_duration)) >= 30:
  52. if int(d_width) >= 720 or int(d_height) >= 720:
  53. if int(d_play_cnt) >= 0:
  54. if int(d_like_cnt) >= 0:
  55. if int(d_share_cnt) >= 0:
  56. return True
  57. else:
  58. return False
  59. else:
  60. return False
  61. else:
  62. return False
  63. return False
  64. return False
  65. # 抓取列表
  66. @classmethod
  67. def get_feeds(cls):
  68. """
  69. 1.从微视小程序首页推荐,获取视频列表
  70. 2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa 中去重
  71. 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr 中去重
  72. 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
  73. """
  74. url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
  75. headers = {
  76. "content-type": "application/json",
  77. "Accept-Encoding": "gzip,compress,br,deflate",
  78. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  79. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
  80. " MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN",
  81. "Referer": str(cls.Referer)
  82. }
  83. cookies = {
  84. "wesee_authtype": "3",
  85. "wesee_openid": str(cls.wesee_openid),
  86. "wesee_openkey": str(cls.wesee_openkey),
  87. "wesee_personid": str(cls.wesee_personid),
  88. "wesee_refresh_token": "",
  89. "wesee_access_token": str(cls.wesee_access_token),
  90. "wesee_thr_appid": str(cls.wesee_thr_appid),
  91. "wesee_ichid": "8"
  92. }
  93. json_data = {
  94. "req_body": {
  95. "requestType": 16,
  96. "isrefresh": 1,
  97. "isfirst": 1,
  98. "attachInfo": "",
  99. "scene_id": 22,
  100. "requestExt": {
  101. "mini_openid": str(cls.wesee_openid),
  102. "notLogin-personid": str(cls.wesee_personid)
  103. }
  104. },
  105. "req_header": {
  106. "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
  107. }
  108. }
  109. try:
  110. urllib3.disable_warnings()
  111. r = requests.post(headers=headers, url=url, cookies=cookies, json=json_data, proxies=proxies, verify=False)
  112. response = json.loads(r.content.decode("utf8"))
  113. feeds = response["rsp_body"]["feeds"]
  114. for i in range(len(feeds)):
  115. # 视频标题过滤话题及处理特殊字符
  116. weishi_title = feeds[i]["desc"]
  117. title_split1 = weishi_title.split(" #")
  118. if title_split1[0] != "":
  119. title1 = title_split1[0]
  120. else:
  121. title1 = title_split1[-1]
  122. title_split2 = title1.split(" #")
  123. if title_split2[0] != "":
  124. title2 = title_split2[0]
  125. else:
  126. title2 = title_split2[-1]
  127. title_split3 = title2.split("@")
  128. if title_split3[0] != "":
  129. title3 = title_split3[0]
  130. else:
  131. title3 = title_split3[-1]
  132. # 视频标题
  133. video_title = title3.strip().replace("\n", "") \
  134. .replace("/", "").replace("快手", "").replace(" ", "") \
  135. .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
  136. .replace("#", "").replace(".", "。").replace("\\", "") \
  137. .replace(":", "").replace("*", "").replace("?", "") \
  138. .replace("?", "").replace('"', "").replace("<", "") \
  139. .replace(">", "").replace("|", "").replace("微视", "")
  140. # 视频 ID
  141. if "id" not in feeds[i]["video"]:
  142. video_id = 0
  143. else:
  144. video_id = feeds[i]["video"]["id"]
  145. # 播放数
  146. if "playNum" not in feeds[i]["ugcData"]:
  147. video_play_cnt = 0
  148. else:
  149. video_play_cnt = feeds[i]["ugcData"]["playNum"]
  150. # 点赞数
  151. if "dingCount" not in feeds[i]["ugcData"]:
  152. video_like_cnt = 0
  153. else:
  154. video_like_cnt = feeds[i]["ugcData"]["dingCount"]
  155. # 分享数
  156. if "shareNum" not in feeds[i]["ugcData"]:
  157. video_share_cnt = 0
  158. else:
  159. video_share_cnt = feeds[i]["ugcData"]["shareNum"]
  160. # 评论数
  161. if "totalCommentNum" not in feeds[i]["ugcData"]:
  162. video_comment_cnt = 0
  163. else:
  164. video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
  165. # 视频时长
  166. if "duration" not in feeds[i]["video"]:
  167. video_duration = 0
  168. else:
  169. video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
  170. # 视频宽高
  171. if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
  172. video_width = 0
  173. video_height = 0
  174. video_resolution = str(video_width) + "*" + str(video_height)
  175. else:
  176. video_width = feeds[i]["video"]["width"]
  177. video_height = feeds[i]["video"]["height"]
  178. video_resolution = str(video_width) + "*" + str(video_height)
  179. # 视频发布时间
  180. if "createTime" not in feeds[i]:
  181. video_send_time = 0
  182. else:
  183. video_send_time = int(feeds[i]["createTime"]) * 1000
  184. # 用户昵称
  185. user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
  186. .replace("/", "").replace("快手", "").replace(" ", "") \
  187. .replace(" ", "").replace("&NBSP", "").replace("\r", "").replace("微视", "")
  188. # 用户 ID
  189. user_id = feeds[i]["poster"]["id"]
  190. # 用户头像地址
  191. if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
  192. head_url = 0
  193. elif "thumbURL" in feeds[i]["material"]:
  194. head_url = feeds[i]["material"]["thumbURL"]
  195. else:
  196. head_url = feeds[i]["poster"]["avatar"]
  197. # 视频封面地址
  198. if len(feeds[i]["images"]) == 0:
  199. cover_url = 0
  200. else:
  201. cover_url = feeds[i]["images"][0]["url"]
  202. # 视频播放地址
  203. if "url" not in feeds[i]["video"]:
  204. video_url = 0
  205. else:
  206. video_url = feeds[i]["video"]["url"]
  207. Common.logger("recommend").info("video_title:{}".format(video_title))
  208. Common.logger("recommend").info("video_id:{}".format(video_id))
  209. Common.logger("recommend").info("video_play_cnt:{}".format(video_play_cnt))
  210. Common.logger("recommend").info("video_like_cnt:{}".format(video_like_cnt))
  211. Common.logger("recommend").info("video_share_cnt:{}".format(video_share_cnt))
  212. # Common.logger("recommend").info("video_comment_cnt:{}".format(video_comment_cnt))
  213. Common.logger("recommend").info("video_duration:{}秒".format(video_duration))
  214. # Common.logger("recommend").info("video_resolution:{}".format(video_resolution))
  215. Common.logger("recommend").info(
  216. "video_send_time:{}".format(time.strftime(
  217. "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
  218. Common.logger("recommend").info("user_name:{}".format(user_name))
  219. # Common.logger("recommend").info("user_id:{}".format(user_id))
  220. # Common.logger("recommend").info("head_url:{}".format(head_url))
  221. # Common.logger("recommend").info("cover_url:{}".format(cover_url))
  222. Common.logger("recommend").info("video_url:{}".format(video_url))
  223. # 过滤无效视频
  224. if video_id == 0 or video_duration == 0 or video_send_time == 0 or head_url == 0 \
  225. or cover_url == 0 or video_url == 0:
  226. Common.logger("recommend").info("无效视频")
  227. # 判断基础规则
  228. elif cls.download_rule(video_duration, video_width, video_height,
  229. video_play_cnt, video_like_cnt, video_share_cnt) is False:
  230. Common.logger("recommend").info("不满足基础规则")
  231. # 判断敏感词
  232. elif any(word if word in weishi_title else False for word in cls.sensitive_words()) is True:
  233. Common.logger("recommend").info("视频已中敏感词:{}".format(weishi_title))
  234. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa
  235. elif video_id in [j for m in Feishu.get_values_batch("recommend", "caa3fa") for j in m]:
  236. Common.logger("recommend").info("该视频已下载:{}", video_title)
  237. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
  238. elif video_id in [j for n in Feishu.get_values_batch("recommend", "O7fCzr") for j in n]:
  239. Common.logger("recommend").info("该视频已在feeds中:{}", video_title)
  240. else:
  241. Common.logger("recommend").info("该视频未下载,添加至feeds中:{}".format(video_title))
  242. # feeds工作表,插入首行
  243. time.sleep(1)
  244. Feishu.insert_columns("recommend", "O7fCzr", "ROWS", 1, 2)
  245. # 获取当前时间
  246. get_feeds_time = int(time.time())
  247. # 工作表 feeds 中写入数据
  248. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
  249. "推荐榜",
  250. video_id,
  251. video_title,
  252. video_play_cnt,
  253. video_comment_cnt,
  254. video_like_cnt,
  255. video_share_cnt,
  256. video_duration,
  257. video_resolution,
  258. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time / 1000))),
  259. user_name,
  260. user_id,
  261. head_url,
  262. cover_url,
  263. video_url]]
  264. # 等待 1s,防止操作云文档太频繁,导致报错
  265. time.sleep(1)
  266. Feishu.update_values("recommend", "O7fCzr", "A2:P2", values)
  267. except Exception as e:
  268. Common.logger("recommend").error("获取微视视频list异常:{}".format(e))
  269. # 下载/上传视频
  270. @classmethod
  271. def download_publish(cls):
  272. try:
  273. for i in range(1, len(Feishu.get_values_batch("recommend", "O7fCzr")) + 1):
  274. time.sleep(1)
  275. download_video_id = Feishu.get_values_batch("recommend", "O7fCzr")[i][2]
  276. download_video_title = Feishu.get_values_batch("recommend", "O7fCzr")[i][3]
  277. download_video_play_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][4]
  278. download_video_comment_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][5]
  279. download_video_like_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][6]
  280. download_video_share_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][7]
  281. download_video_duration = Feishu.get_values_batch("recommend", "O7fCzr")[i][8]
  282. download_video_resolution = Feishu.get_values_batch("recommend", "O7fCzr")[i][9]
  283. # download_video_width = download_video_resolution.split("*")[0]
  284. # download_video_height = download_video_resolution.split("*")[-1]
  285. download_video_send_time = Feishu.get_values_batch("recommend", "O7fCzr")[i][10]
  286. download_user_name = Feishu.get_values_batch("recommend", "O7fCzr")[i][11]
  287. download_user_id = Feishu.get_values_batch("recommend", "O7fCzr")[i][12]
  288. download_head_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][13]
  289. download_cover_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][14]
  290. download_video_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][15]
  291. # Common.logger("recommend").info("download_video_id:{}", download_video_id)
  292. # Common.logger("recommend").info("download_video_title:{}", download_video_title)
  293. # Common.logger("recommend").info("download_video_play_cnt:{}", download_video_play_cnt)
  294. # Common.logger("recommend").info("download_video_comment_cnt:{}", download_video_comment_cnt)
  295. # Common.logger("recommend").info("download_video_like_cnt:{}", download_video_like_cnt)
  296. # Common.logger("recommend").info("download_video_share_cnt:{}", download_video_share_cnt)
  297. # Common.logger("recommend").info("download_video_duration:{}", download_video_duration)
  298. # Common.logger("recommend").info("download_video_resolution:{}", download_video_resolution)
  299. # Common.logger("recommend").info("download_video_send_time:{}", download_video_send_time)
  300. # Common.logger("recommend").info("download_user_name:{}", download_user_name)
  301. # Common.logger("recommend").info("download_user_id:{}", download_user_id)
  302. # Common.logger("recommend").info("download_head_url:{}", download_head_url)
  303. # Common.logger("recommend").info("download_cover_url:{}", download_cover_url)
  304. # Common.logger("recommend").info("download_video_url:{}", download_video_url)
  305. Common.logger("recommend").info("正在判断第{}行,视频:{}", i, download_video_title)
  306. # 过滤空行
  307. if download_video_id is None \
  308. or download_video_id == "" \
  309. or download_video_title is None \
  310. or download_video_title == "":
  311. Common.logger("recommend").warning("空行,删除")
  312. # 删除行或列,可选 ROWS、COLUMNS
  313. Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
  314. return
  315. # 分享量>=1000
  316. elif int(download_video_share_cnt) < 1000:
  317. Common.logger("recommend").info("分享量:{} < 1000", download_video_share_cnt)
  318. # 删除行或列,可选 ROWS、COLUMNS
  319. Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
  320. return
  321. # 去重
  322. elif download_video_id in [j for m in Feishu.get_values_batch("recommend", "caa3fa") for j in m]:
  323. Common.logger("recommend").info("该视频已下载:{}", download_video_title)
  324. # 删除行或列,可选 ROWS、COLUMNS
  325. Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
  326. return
  327. else:
  328. Common.logger("recommend").info("开始下载视频:{}", download_video_title)
  329. # 下载封面
  330. Common.download_method(job="recommend", text="cover",
  331. d_name=str(download_video_title), d_url=str(download_cover_url))
  332. # 下载视频
  333. Common.download_method(job="recommend", text="video",
  334. d_name=str(download_video_title), d_url=str(download_video_url))
  335. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  336. with open("./videos/" + download_video_title
  337. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  338. f_a.write(str(download_video_id) + "\n" +
  339. str(download_video_title) + "\n" +
  340. str(download_video_duration) + "\n" +
  341. str(download_video_play_cnt) + "\n" +
  342. str(download_video_comment_cnt) + "\n" +
  343. str(download_video_like_cnt) + "\n" +
  344. str(download_video_share_cnt) + "\n" +
  345. str(download_video_resolution) + "\n" +
  346. str(int(time.mktime(
  347. time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
  348. str(download_user_name) + "\n" +
  349. str(download_head_url) + "\n" +
  350. str(download_video_url) + "\n" +
  351. str(download_cover_url) + "\n" +
  352. str(cls.wesee_access_token))
  353. Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
  354. # 上传视频
  355. Common.logger("recommend").info("开始上传视频:{}".format(download_video_title))
  356. Publish.upload_and_publish("recommend", "prod", "play")
  357. # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa
  358. Common.logger("recommend").info("保存视频ID至云文档:{}", download_video_title)
  359. # 视频ID工作表,插入首行
  360. Feishu.insert_columns("recommend", "caa3fa", "ROWS", 1, 2)
  361. # 视频ID工作表,首行写入数据
  362. upload_time = int(time.time())
  363. values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))),
  364. "推荐榜",
  365. str(download_video_id),
  366. str(download_video_title),
  367. download_video_play_cnt,
  368. download_video_comment_cnt,
  369. download_video_like_cnt,
  370. download_video_share_cnt,
  371. download_video_duration,
  372. str(download_video_resolution),
  373. str(download_video_send_time),
  374. str(download_user_name),
  375. str(download_user_id),
  376. str(download_head_url),
  377. str(download_cover_url),
  378. str(download_video_url)]]
  379. time.sleep(1)
  380. Feishu.update_values("recommend", "caa3fa", "A2:Q2", values)
  381. # 删除行或列,可选 ROWS、COLUMNS
  382. Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
  383. return
  384. except Exception as e:
  385. Common.logger("recommend").error("下载/上传视频异常:{}", e)
  386. Feishu.dimension_range("recommend", "O7fCzr", "ROWS", 2, 2)
  387. if __name__ == "__main__":
  388. weishi = DownloadRecommend()
  389. for n in range(2):
  390. Common.logger("recommend").info("正在抓取第{}页视频", n + 1)
  391. weishi.get_feeds()
  392. # print(weishi.Referer)
  393. # print(weishi.wesee_openid)
  394. # print(weishi.wesee_openkey)
  395. # print(weishi.wesee_personid)
  396. # print(weishi.wesee_access_token)
  397. # print(weishi.wesee_thr_appid)
  398. # print(weishi.json_text)