kanyikan_moment.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/6/10
  4. import os
  5. import random
  6. import sys
  7. import time
  8. import requests
  9. import urllib3
  10. sys.path.append(os.getcwd())
  11. from main.feishu_lib import Feishu
  12. from main.common import Common
  13. from main.kanyikan_moment_publish import Publish
  14. proxies = {"http": None, "https": None}
  15. class Moment:
  16. # 过滤词库
  17. @classmethod
  18. def sensitive_words(cls):
  19. word_list = []
  20. # 从云文档读取所有敏感词,添加到词库列表
  21. lists = Feishu.get_values_batch("moment", "kanyikan", "rofdM5")
  22. for i in lists:
  23. for j in i:
  24. # 过滤空的单元格内容
  25. if j is None:
  26. pass
  27. else:
  28. word_list.append(j)
  29. return word_list
  30. # 朋友圈视频 ID
  31. @classmethod
  32. def moment_videoids(cls):
  33. try:
  34. videoid_list = []
  35. # 从云文档读取所有敏感词,添加到词库列表
  36. lists = Feishu.get_values_batch("moment", "kanyikan", "iK58HX")
  37. for i in lists:
  38. for j in i:
  39. # 过滤空的单元格内容
  40. if j is None:
  41. pass
  42. else:
  43. videoid_list.append(j)
  44. return videoid_list
  45. except Exception as e:
  46. Common.logger("moment").error("获取朋友圈视频ID异常:{}", e)
  47. return "t3256lo1cmk"
  48. # 抓取基础规则
  49. @staticmethod
  50. def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt):
  51. """
  52. 抓取基础规则
  53. :param d_duration: 时长
  54. :param d_width: 宽
  55. :param d_height: 高
  56. :param d_play_cnt: 播放量
  57. :param d_like_cnt: 点赞量
  58. :param d_share_cnt: 分享量
  59. :return: 满足规则,返回 True;反之,返回 False
  60. """
  61. if int(float(d_duration)) >= 60:
  62. if int(d_width) >= 0 or int(d_height) >= 0:
  63. if int(d_play_cnt) >= 100000:
  64. if int(d_like_cnt) >= 0:
  65. if int(d_share_cnt) >= 0:
  66. return True
  67. else:
  68. return False
  69. else:
  70. return False
  71. else:
  72. return False
  73. return False
  74. return False
  75. # 获取推荐视频列表
  76. @classmethod
  77. def get_recommend(cls):
  78. url = "https://search.weixin.qq.com/cgi-bin/recwxa/snsgetvideoinfo?"
  79. headers = {
  80. "content-type": "application/json",
  81. "Accept-Encoding": "gzip,compress,br,deflate",
  82. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  83. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
  84. " MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN",
  85. "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/236/page-frame.html"
  86. }
  87. time.sleep(1)
  88. videoid = random.choice(cls.moment_videoids())
  89. # Common.logger("moment").info("videoid:{}", videoid)
  90. params = {
  91. "vid": videoid,
  92. "openid": "1924336296754305",
  93. "model": "iPhone 11<iPhone12,1>14.7.1",
  94. "sharesearchid": "8406805193800900989",
  95. "shareOpenid": "oh_m45YffSEGxvDH--6s6g9ZkPxg",
  96. }
  97. try:
  98. urllib3.disable_warnings()
  99. r = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
  100. # Common.logger("moment").info("response:{}", r.json())
  101. if "rec_video_list" not in r.json()["data"]:
  102. Common.logger("moment").warning("该视频无推荐视频列表:{}", videoid)
  103. else:
  104. feeds = r.json()["data"]["rec_video_list"]
  105. for i in range(len(feeds)):
  106. # video_id
  107. if "vid" in feeds[i]:
  108. video_id = feeds[i]["vid"]
  109. else:
  110. video_id = 0
  111. # video_title
  112. if "title" in feeds[i]:
  113. video_title = feeds[i]["title"].strip().replace("\n", "") \
  114. .replace("/", "").replace("\\", "").replace("\r", "") \
  115. .replace(":", "").replace("*", "").replace("?", "") \
  116. .replace("?", "").replace('"', "").replace("<", "") \
  117. .replace(">", "").replace("|", "").replace(" ", "") \
  118. .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
  119. .replace("小年糕", "").replace("#", "").replace("Merge", "")
  120. else:
  121. video_title = 0
  122. # video_play_cnt
  123. if "played_cnt" in feeds[i]:
  124. video_play_cnt = feeds[i]["played_cnt"]
  125. else:
  126. video_play_cnt = 0
  127. # video_comment_cnt
  128. if "comment_cnt" in feeds[i]:
  129. video_comment_cnt = feeds[i]["comment_cnt"]
  130. else:
  131. video_comment_cnt = 0
  132. # video_liked_cnt
  133. if "liked_cnt" in feeds[i]:
  134. video_liked_cnt = feeds[i]["liked_cnt"]
  135. else:
  136. video_liked_cnt = 0
  137. # video_share_cnt
  138. if "shared_cnt" in feeds[i]:
  139. video_share_cnt = feeds[i]["shared_cnt"]
  140. else:
  141. video_share_cnt = 0
  142. # video_duration
  143. if "duration" in feeds[i]:
  144. video_duration = feeds[i]["duration"]
  145. else:
  146. video_duration = 0
  147. # video_width / video_height
  148. if "width" in feeds[i] or "height" in feeds[i]:
  149. video_width = feeds[i]["width"]
  150. video_height = feeds[i]["height"]
  151. else:
  152. video_width = 0
  153. video_height = 0
  154. # video_send_time
  155. if "upload_time" in feeds[i]:
  156. video_send_time = feeds[i]["upload_time"]
  157. else:
  158. video_send_time = 0
  159. # user_name
  160. if "user_info" not in feeds[i]:
  161. user_name = 0
  162. elif "nickname" not in feeds[i]["user_info"]:
  163. user_name = 0
  164. else:
  165. user_name = feeds[i]["user_info"]["nickname"].strip().replace("\n", "")
  166. # user_id
  167. if "user_info" not in feeds[i]:
  168. user_id = 0
  169. elif "openid" not in feeds[i]["user_info"]:
  170. user_id = 0
  171. else:
  172. user_id = feeds[i]["user_info"]["openid"]
  173. # head_url
  174. if "user_info" not in feeds[i]:
  175. head_url = 0
  176. elif "headimg_url" not in feeds[i]["user_info"]:
  177. head_url = 0
  178. else:
  179. head_url = feeds[i]["user_info"]["headimg_url"]
  180. # cover_url
  181. if "cover_url" not in feeds[i]:
  182. cover_url = 0
  183. else:
  184. cover_url = feeds[i]["cover_url"]
  185. # video_url
  186. if "play_info" not in feeds[i]:
  187. video_url = 0
  188. elif "items" not in feeds[i]["play_info"]:
  189. video_url = 0
  190. else:
  191. video_url = feeds[i]["play_info"]["items"][-1]["play_url"]
  192. Common.logger("moment").info("video_id:{}", video_id)
  193. Common.logger("moment").info("video_title:{}", video_title)
  194. Common.logger("moment").info("user_name:{}", user_name)
  195. Common.logger("moment").info("video_play_cnt:{}", video_play_cnt)
  196. Common.logger("moment").info("video_liked_cnt:{}", video_liked_cnt)
  197. Common.logger("moment").info("video_share_cnt:{}", video_share_cnt)
  198. Common.logger("moment").info("video_duration:{}", video_duration)
  199. Common.logger("moment").info("video_width * video_height:{}*{}", video_width, video_height)
  200. Common.logger("moment").info("video_url:{}", video_url)
  201. # 过滤无效视频
  202. if video_id == 0 or video_title == 0 or video_duration == 0 or video_send_time == 0 or user_id == 0\
  203. or head_url == 0 or cover_url == 0 or video_url == 0:
  204. Common.logger("moment").warning("无效视频")
  205. # 抓取基础规则
  206. elif cls.download_rule(
  207. d_duration=video_duration, d_width=video_width, d_height=video_height,
  208. d_play_cnt=video_play_cnt, d_like_cnt=video_liked_cnt,
  209. d_share_cnt=video_share_cnt) is False:
  210. Common.logger("moment").info("不满足基础规则:{}", video_title)
  211. elif int(video_send_time) < 1659283200:
  212. Common.logger("moment").info('发布时间{}<2022-08-01', video_send_time)
  213. # 过滤词库
  214. elif any(word if word in video_title else False for word in cls.sensitive_words()) is True:
  215. Common.logger("moment").info("视频已中过滤词:{}".format(video_title))
  216. # 从已下载视频表去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
  217. elif video_id in [j for m in Feishu.get_values_batch("moment", "kanyikan", "20ce0c") for j in m]:
  218. Common.logger("moment").info("该视频已下载:{}", video_title)
  219. # 从feeds视频表去重:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=tGqZMX
  220. elif video_id in [j for n in Feishu.get_values_batch("moment", "kanyikan", "tGqZMX") for j in n]:
  221. Common.logger("moment").info("该视频已在moment_feeds中:{}", video_title)
  222. else:
  223. Common.logger("moment").info("该视频未下载,添加至moment_feeds中:{}", video_title)
  224. # 看一看+工作表,插入首行
  225. Feishu.insert_columns("moment", "kanyikan", "tGqZMX", "ROWS", 1, 2)
  226. # 获取当前时间
  227. get_feeds_time = int(time.time())
  228. # 准备写入云文档的数据
  229. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time)),
  230. "朋友圈",
  231. video_id,
  232. video_title,
  233. video_play_cnt,
  234. video_comment_cnt,
  235. video_liked_cnt,
  236. video_share_cnt,
  237. video_duration,
  238. str(video_width)+"*"+str(video_height),
  239. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(video_send_time)),
  240. user_name,
  241. user_id,
  242. head_url,
  243. cover_url,
  244. video_url]]
  245. time.sleep(1)
  246. Feishu.update_values("moment", "kanyikan", "tGqZMX", "A2:P2", values)
  247. except Exception as e:
  248. Common.logger("moment").error("获取视频列表异常:{}", e)
  249. # 下载/上传视频
  250. @classmethod
  251. def download_publish(cls, env):
  252. try:
  253. moment_feeds = Feishu.get_values_batch("moment", "kanyikan", "tGqZMX")
  254. for i in range(1, len(moment_feeds) + 1):
  255. time.sleep(1)
  256. # download_push_time = moment_feeds[i][0]
  257. download_video_id = moment_feeds[i][2]
  258. download_video_title = moment_feeds[i][3]
  259. download_video_play_cnt = moment_feeds[i][4]
  260. download_video_comment_cnt = moment_feeds[i][5]
  261. download_video_like_cnt = moment_feeds[i][6]
  262. download_video_share_cnt = moment_feeds[i][7]
  263. download_video_duration = moment_feeds[i][8]
  264. download_video_resolution = moment_feeds[i][9]
  265. download_video_send_time = moment_feeds[i][10]
  266. download_user_name = moment_feeds[i][11]
  267. download_user_id = moment_feeds[i][12]
  268. download_head_url = moment_feeds[i][13]
  269. download_cover_url = moment_feeds[i][14]
  270. download_video_url = moment_feeds[i][15]
  271. Common.logger("moment").info("正在判断第{}行,视频:{}", i, download_video_title)
  272. # 发布时间的时间戳格式(秒为单位)
  273. v_send_time = int(time.mktime(time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))
  274. # 抓取时间的时间戳格式(秒为单位)
  275. # v_push_time = int(time.mktime(time.strptime(download_push_time, "%Y/%m/%d %H:%M:%S")))
  276. # 过滤空行及空标题视频
  277. if download_video_id is None\
  278. or download_video_id == ""\
  279. or download_video_title is None\
  280. or download_video_title == "":
  281. Common.logger("moment").warning("标题为空或空行,删除")
  282. # 删除行或列,可选 ROWS、COLUMNS
  283. Feishu.dimension_range("moment", "kanyikan", "tGqZMX", "ROWS", i + 1, i + 1)
  284. return
  285. # # 视频的抓取时间小于 2 天
  286. # elif int(time.time()) - v_push_time > 172800:
  287. # Common.logger("moment").info("抓取时间超过2天:{}", download_video_title)
  288. # # 删除行或列,可选 ROWS、COLUMNS
  289. # Feishu.dimension_range("tGqZMX", "ROWS", i + 1, i + 1)
  290. # return
  291. # 视频发布时间不小于 2021-06-01 00:00:00
  292. elif v_send_time < 1622476800:
  293. Common.logger("moment").info(
  294. "发布时间小于2021年6月:{},{}", download_video_title, download_video_send_time)
  295. # 删除行或列,可选 ROWS、COLUMNS
  296. Feishu.dimension_range("moment", "kanyikan", "tGqZMX", "ROWS", i + 1, i + 1)
  297. return
  298. # 从已下载视频表中去重
  299. elif download_video_id in [j for m in Feishu.get_values_batch(
  300. "moment", "kanyikan", "20ce0c") for j in m]:
  301. Common.logger("moment").info("视频已下载:{}", download_video_title)
  302. # 删除行或列,可选 ROWS、COLUMNS
  303. Feishu.dimension_range("moment", "kanyikan", "tGqZMX", "ROWS", i + 1, i + 1)
  304. return
  305. else:
  306. Common.logger("moment").info("开始下载视频:{}", download_video_title)
  307. # 下载封面
  308. Common.download_method(log_type="moment", text="cover",
  309. d_name=str(download_video_title), d_url=str(download_cover_url))
  310. # 下载视频
  311. Common.download_method(log_type="moment", text="video",
  312. d_name=str(download_video_title), d_url=str(download_video_url))
  313. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  314. with open("./videos/" + download_video_title + "/" + "info.txt",
  315. "a", encoding="UTF-8") as f_a:
  316. f_a.write(str(download_video_id) + "\n" +
  317. str(download_video_title) + "\n" +
  318. str(download_video_duration) + "\n" +
  319. str(download_video_play_cnt) + "\n" +
  320. str(download_video_comment_cnt) + "\n" +
  321. str(download_video_like_cnt) + "\n" +
  322. str(download_video_share_cnt) + "\n" +
  323. str(download_video_resolution) + "\n" +
  324. str(int(time.mktime(
  325. time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
  326. str(download_user_name) + "\n" +
  327. str(download_head_url) + "\n" +
  328. str(download_video_url) + "\n" +
  329. str(download_cover_url) + "\n" +
  330. "KANYIKAN_MOMENT")
  331. Common.logger("moment").info("==========视频信息已保存至info.txt==========")
  332. # 上传视频
  333. Common.logger("moment").info("开始上传视频:{}".format(download_video_title))
  334. our_video_id = Publish.upload_and_publish("moment", env, "play")
  335. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  336. Common.logger("moment").info("视频上传完成:{}", download_video_title)
  337. # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=20ce0c
  338. Common.logger("moment").info("保存视频ID至云文档:{}", download_video_title)
  339. # 视频ID工作表,插入首行
  340. Feishu.insert_columns("moment", "kanyikan", "20ce0c", "ROWS", 1, 2)
  341. # 视频ID工作表,首行写入数据
  342. upload_time = int(time.time())
  343. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
  344. "朋友圈",
  345. str(download_video_id),
  346. str(download_video_title),
  347. our_video_link,
  348. download_video_play_cnt,
  349. download_video_comment_cnt,
  350. download_video_like_cnt,
  351. download_video_share_cnt,
  352. download_video_duration,
  353. str(download_video_resolution),
  354. str(download_video_send_time),
  355. str(download_user_name),
  356. str(download_user_id),
  357. str(download_head_url),
  358. str(download_cover_url),
  359. str(download_video_url)]]
  360. time.sleep(1)
  361. Feishu.update_values("moment", "kanyikan", "20ce0c", "F2:W2", values)
  362. # 保存视频信息到监控表
  363. Common.logger("moment").info("添加视频到监控表:{}", download_video_title)
  364. # 插入空行
  365. time.sleep(1)
  366. Feishu.insert_columns("moment", "monitor", "6fed97", "ROWS", 1, 2)
  367. # 视频信息写入监控表
  368. values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(upload_time))),
  369. str(download_video_id),
  370. download_video_title,
  371. our_video_link,
  372. download_video_duration,
  373. str(download_video_send_time),
  374. download_video_play_cnt]]
  375. time.sleep(1)
  376. Feishu.update_values("moment", "monitor", "6fed97", "F2:L2", values)
  377. # 删除行或列,可选 ROWS、COLUMNS
  378. Feishu.dimension_range("moment", "kanyikan", "tGqZMX", "ROWS", i + 1, i + 1)
  379. return
  380. except Exception as e:
  381. Common.logger("moment").error("下载视频异常:{}", e)
  382. # 删除行或列,可选 ROWS、COLUMNS
  383. Feishu.dimension_range("moment", "kanyikan", "tGqZMX", "ROWS", 2, 2)
  384. # 执行下载/上传
  385. @classmethod
  386. def run_download_publish(cls, env):
  387. try:
  388. while True:
  389. if len(Feishu.get_values_batch("moment", "kanyikan", "tGqZMX")) == 1:
  390. break
  391. else:
  392. cls.download_publish(env)
  393. except Exception as e:
  394. Common.logger("moment").error("执行下载/上传异常:{}", e)
  395. if __name__ == "__main__":
  396. kuaishou = Moment()
  397. kuaishou.run_download_publish("dev")
  398. pass