recommend_list.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/3/29
  4. """
  5. 从 微信小程序-快手短视频 中,下载符合规则的视频
  6. """
  7. import json
  8. import os
  9. import sys
  10. import time
  11. import requests
  12. import urllib3
  13. sys.path.append(os.getcwd())
  14. from main.common import Common
  15. from main.feishu_lib import Feishu
  16. from main.publish import Publish
  17. proxies = {"http": None, "https": None}
  18. class KuaiShou:
  19. # 配置微信号
  20. wechat_sheet = Feishu.get_values_batch("recommend", "kuaishou", "WFF4jw")
  21. Referer = wechat_sheet[2][2]
  22. NS_sig3 = wechat_sheet[3][2]
  23. NS_sig3_origin = wechat_sheet[4][2]
  24. did = wechat_sheet[5][2]
  25. session_key = wechat_sheet[6][2]
  26. unionid = wechat_sheet[7][2]
  27. eUserStableOpenId = wechat_sheet[8][2]
  28. openId = wechat_sheet[9][2]
  29. eOpenUserId = wechat_sheet[10][2]
  30. kuaishou_wechat_app_st = wechat_sheet[11][2]
  31. passToken = wechat_sheet[12][2]
  32. userId = wechat_sheet[13][2]
  33. @classmethod
  34. def sensitive_words(cls):
  35. # 敏感词库列表
  36. word_list = []
  37. # 从云文档读取所有敏感词,添加到词库列表
  38. lists = Feishu.get_values_batch("recommend", "kuaishou", "HIKVvs")
  39. for i in lists:
  40. for j in i:
  41. # 过滤空的单元格内容
  42. if j is None:
  43. pass
  44. else:
  45. word_list.append(j)
  46. return word_list
  47. @staticmethod
  48. def kuaishou_download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt):
  49. """
  50. 下载视频的基本规则
  51. :param d_duration: 时长
  52. :param d_width: 宽
  53. :param d_height: 高
  54. :param d_play_cnt: 播放量
  55. :param d_like_cnt: 点赞量
  56. :param d_share_cnt: 分享量
  57. :return: 满足规则,返回 True;反之,返回 False
  58. """
  59. if 600 >= int(float(d_duration)) >= 60:
  60. if int(d_width) >= 720 or int(d_height) >= 720:
  61. if int(d_play_cnt) >= 50000:
  62. if int(d_like_cnt) >= 50000:
  63. if int(d_share_cnt) >= 2000:
  64. return True
  65. else:
  66. return False
  67. else:
  68. return False
  69. else:
  70. return False
  71. return False
  72. return False
  73. @classmethod
  74. def get_feeds(cls, log_type):
  75. """
  76. 1.从快手小程序首页推荐,获取视频列表
  77. 2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128 中去重
  78. 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf 中去重
  79. 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf
  80. """
  81. url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/recommend"
  82. headers = {
  83. "content-type": "application/json",
  84. "Accept-Encoding": "gzip,compress,br,deflate",
  85. "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
  86. ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148'
  87. ' MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN',
  88. "Referer": str(cls.Referer),
  89. }
  90. params = {
  91. "__NS_sig3": str(cls.NS_sig3),
  92. "__NS_sig3_origin": str(cls.NS_sig3_origin)
  93. }
  94. cookies = {
  95. "did": str(cls.did),
  96. "preMinaVersion": "v3.109.0",
  97. "sid": "kuaishou.wechat.app",
  98. "appId": "ks_wechat_small_app_2",
  99. "clientid": "13",
  100. "client_key": "f60ac815",
  101. "kpn": "WECHAT_SMALL_APP",
  102. "kpf": "OUTSIDE_ANDROID_H5",
  103. "language": "zh_CN",
  104. "smallAppVersion": "v3.114.0",
  105. "session_key": str(cls.session_key),
  106. "unionid": str(cls.unionid),
  107. "eUserStableOpenId": str(cls.eUserStableOpenId),
  108. "openId": str(cls.openId),
  109. "eOpenUserId": str(cls.eOpenUserId),
  110. "kuaishou.wechat.app_st": str(cls.kuaishou_wechat_app_st),
  111. "passToken": str(cls.passToken),
  112. "userId": str(cls.userId)
  113. }
  114. json_data = {
  115. "count": 10,
  116. "portal": 1,
  117. "pageType": 2,
  118. "extraRequestInfo": "{\"scene\":1089,\"fid\":\"\",\"sharerUserId\":\"\",\"curPhotoIndex\":0,"
  119. "\"adShow\":true,\"weChatAd\":{},\"headurl\":\"https://js2.a.kwimgs.com/udata/pkg"
  120. "/fe/profiel_icon_photo_normal@3x.fb3ec1af.png\",\"page\":0}",
  121. "needLivestream": True,
  122. "pcursor": 0,
  123. "sourceFrom": 2,
  124. "thirdPartyUserId": int(cls.userId)
  125. }
  126. try:
  127. urllib3.disable_warnings()
  128. r = requests.post(url=url, headers=headers, params=params,
  129. cookies=cookies, json=json_data, proxies=proxies, verify=False)
  130. response = json.loads(r.content.decode("utf8"))
  131. feeds = response["feeds"]
  132. for i in range(len(feeds)):
  133. # 视频标题过滤话题及处理特殊字符
  134. kuaishou_title = feeds[i]["caption"]
  135. title_split1 = kuaishou_title.split(" #")
  136. if title_split1[0] != "":
  137. title1 = title_split1[0]
  138. else:
  139. title1 = title_split1[-1]
  140. title_split2 = title1.split(" #")
  141. if title_split2[0] != "":
  142. title2 = title_split2[0]
  143. else:
  144. title2 = title_split2[-1]
  145. title_split3 = title2.split("@")
  146. if title_split3[0] != "":
  147. title3 = title_split3[0]
  148. else:
  149. title3 = title_split3[-1]
  150. video_title = title3.strip().replace("\n", "") \
  151. .replace("/", "").replace("快手", "").replace(" ", "") \
  152. .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
  153. .replace("#", "").replace(".", "。").replace("\\", "") \
  154. .replace(":", "").replace("*", "").replace("?", "") \
  155. .replace("?", "").replace('"', "").replace("<", "") \
  156. .replace(">", "").replace("|", "")
  157. if "photoId" not in feeds[i]:
  158. video_id = "0"
  159. else:
  160. video_id = feeds[i]["photoId"]
  161. if "viewCount" not in feeds[i]:
  162. video_play_cnt = "0"
  163. else:
  164. video_play_cnt = feeds[i]["viewCount"]
  165. if "likeCount" not in feeds[i]:
  166. video_like_cnt = "0"
  167. else:
  168. video_like_cnt = feeds[i]["likeCount"]
  169. if "shareCount" not in feeds[i]:
  170. video_share_cnt = "0"
  171. else:
  172. video_share_cnt = feeds[i]["shareCount"]
  173. if "commentCount" not in feeds[i]:
  174. video_comment_cnt = "0"
  175. else:
  176. video_comment_cnt = feeds[i]["commentCount"]
  177. if "duration" not in feeds[i]:
  178. video_duration = "0"
  179. else:
  180. video_duration = int(int(feeds[i]["duration"]) / 1000)
  181. if "width" not in feeds[i] or "height" not in feeds[i]:
  182. video_width = "0"
  183. video_height = "0"
  184. video_resolution = str(video_width) + "*" + str(video_height)
  185. else:
  186. video_width = feeds[i]["width"]
  187. video_height = feeds[i]["height"]
  188. video_resolution = str(video_width) + "*" + str(video_height)
  189. if "timestamp" not in feeds[i]:
  190. video_send_time = "0"
  191. else:
  192. video_send_time = feeds[i]["timestamp"]
  193. user_name = feeds[i]["userName"].strip().replace("\n", "") \
  194. .replace("/", "").replace("快手", "").replace(" ", "") \
  195. .replace(" ", "").replace("&NBSP", "").replace("\r", "")
  196. user_id = feeds[i]["userId"]
  197. if "headUrl" not in feeds[i]:
  198. head_url = "0"
  199. else:
  200. head_url = feeds[i]["headUrl"]
  201. if len(feeds[i]["coverUrls"]) == 0:
  202. cover_url = "0"
  203. else:
  204. cover_url = feeds[i]["coverUrls"][0]["url"]
  205. if len(feeds[i]["mainMvUrls"]) == 0:
  206. video_url = "0"
  207. else:
  208. video_url = feeds[i]["mainMvUrls"][0]["url"]
  209. Common.logger(log_type).info("video_title:{}".format(video_title))
  210. Common.logger(log_type).info("user_name:{}".format(user_name))
  211. Common.logger(log_type).info("video_id:{}".format(video_id))
  212. Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt))
  213. Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt))
  214. Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt))
  215. # Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt))
  216. Common.logger(log_type).info("video_duration:{}秒".format(video_duration))
  217. # Common.logger(log_type).info("video_resolution:{}".format(video_resolution))
  218. Common.logger(log_type).info("video_send_time:{}".format(
  219. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
  220. # Common.logger(log_type).info("user_id:{}".format(user_id))
  221. # Common.logger(log_type).info("head_url:{}".format(head_url))
  222. # Common.logger(log_type).info("cover_url:{}".format(cover_url))
  223. Common.logger(log_type).info("video_url:{}".format(video_url))
  224. # 过滤无效视频
  225. if video_id == "0" \
  226. or head_url == "0" \
  227. or cover_url == "0" \
  228. or video_url == "0" \
  229. or video_duration == "0" \
  230. or video_send_time == "0" \
  231. or user_name == "" \
  232. or video_title == "":
  233. Common.logger(log_type).info("无效视频\n")
  234. # 判断敏感词
  235. elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True:
  236. Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title))
  237. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
  238. elif video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3cd128") for j in m]:
  239. Common.logger(log_type).info("该视频已下载:{}\n", video_title)
  240. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf
  241. elif video_id in [j for n in Feishu.get_values_batch(log_type, "kuaishou", "JK6npf") for j in n]:
  242. Common.logger(log_type).info("该视频已在feeds中:{}\n", video_title)
  243. else:
  244. # feeds工作表,插入首行
  245. time.sleep(1)
  246. Feishu.insert_columns(log_type, "kuaishou", "JK6npf", "ROWS", 1, 2)
  247. # 获取当前时间
  248. get_feeds_time = int(time.time())
  249. # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据
  250. values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))),
  251. "推荐榜",
  252. video_id,
  253. video_title,
  254. video_play_cnt,
  255. video_comment_cnt,
  256. video_like_cnt,
  257. video_share_cnt,
  258. video_duration,
  259. video_resolution,
  260. time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
  261. user_name,
  262. user_id,
  263. head_url,
  264. cover_url,
  265. video_url]]
  266. # 等待 1s,防止操作云文档太频繁,导致报错
  267. time.sleep(1)
  268. Feishu.update_values(log_type, "kuaishou", "JK6npf", "A2:P2", values)
  269. Common.logger(log_type).info("视频:{}添加至feeds成功\n".format(video_title))
  270. except Exception as e:
  271. # Feishu.bot(log_type, "recommend:get_feeds异常"+format(e))
  272. Common.logger(log_type).error("获取视频 list 异常:{}\n".format(e))
  273. @classmethod
  274. def download_publish(cls, log_type, env):
  275. """
  276. 1.从 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf 中读取视频信息
  277. 2.下载并上传符合规则的视频
  278. 测试环境:env == dev
  279. 正式环境:env == prod
  280. """
  281. try:
  282. recommend_feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")
  283. for i in range(1, len(recommend_feeds_sheet) + 1):
  284. download_video_id = recommend_feeds_sheet[i][2]
  285. download_video_title = recommend_feeds_sheet[i][3]
  286. download_video_play_cnt = recommend_feeds_sheet[i][4]
  287. download_video_comment_cnt = recommend_feeds_sheet[i][5]
  288. download_video_like_cnt = recommend_feeds_sheet[i][6]
  289. download_video_share_cnt = recommend_feeds_sheet[i][7]
  290. download_video_duration = recommend_feeds_sheet[i][8]
  291. download_video_resolution = recommend_feeds_sheet[i][9]
  292. download_video_send_time = recommend_feeds_sheet[i][10]
  293. download_user_name = recommend_feeds_sheet[i][11]
  294. download_user_id = recommend_feeds_sheet[i][12]
  295. download_head_url = recommend_feeds_sheet[i][13]
  296. download_cover_url = recommend_feeds_sheet[i][14]
  297. download_video_url = recommend_feeds_sheet[i][15]
  298. Common.logger(log_type).info("正在判断第{}行,视频:{}", i + 1, download_video_title)
  299. # Common.logger(log_type).info("download_video_id:{}", download_video_id)
  300. # Common.logger(log_type).info("download_video_title:{}", download_video_title)
  301. # Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt)
  302. # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt)
  303. # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt)
  304. # Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt)
  305. # Common.logger(log_type).info("download_video_duration:{}", download_video_duration)
  306. # Common.logger(log_type).info("download_video_resolution:{}", download_video_resolution)
  307. # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time)
  308. # Common.logger(log_type).info("download_user_name:{}", download_user_name)
  309. # Common.logger(log_type).info("download_user_id:{}", download_user_id)
  310. # Common.logger(log_type).info("download_head_url:{}", download_head_url)
  311. # Common.logger(log_type).info("download_cover_url:{}", download_cover_url)
  312. # Common.logger(log_type).info("download_video_url:{}", download_video_url)
  313. # 过滤空行
  314. if download_video_id is None or download_video_title is None or download_video_play_cnt is None:
  315. # 删除行或列,可选 ROWS、COLUMNS
  316. Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1)
  317. Common.logger(log_type).info("空行,删除成功\n")
  318. return
  319. # 去重
  320. elif download_video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3cd128") for j in
  321. m]:
  322. # 删除行或列,可选 ROWS、COLUMNS
  323. Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1)
  324. Common.logger(log_type).info("该视频已下载:{},删除成功\n", download_video_title)
  325. return
  326. # 下载规则
  327. elif cls.kuaishou_download_rule(
  328. download_video_duration, download_video_resolution.split("*")[0],
  329. download_video_resolution.split("*")[-1],
  330. download_video_play_cnt, download_video_like_cnt, download_video_share_cnt) is True:
  331. # 下载封面
  332. Common.download_method(log_type=log_type, text="cover",
  333. d_name=str(download_video_title), d_url=str(download_cover_url))
  334. # 下载视频
  335. Common.download_method(log_type=log_type, text="video",
  336. d_name=str(download_video_title), d_url=str(download_video_url))
  337. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  338. with open("./videos/" + download_video_title
  339. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  340. f_a.write(str(download_video_id) + "\n" +
  341. str(download_video_title) + "\n" +
  342. str(download_video_duration) + "\n" +
  343. str(download_video_play_cnt) + "\n" +
  344. str(download_video_comment_cnt) + "\n" +
  345. str(download_video_like_cnt) + "\n" +
  346. str(download_video_share_cnt) + "\n" +
  347. str(download_video_resolution) + "\n" +
  348. str(int(time.mktime(
  349. time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
  350. str(download_user_name) + "\n" +
  351. str(download_head_url) + "\n" +
  352. str(download_video_url) + "\n" +
  353. str(download_cover_url) + "\n" +
  354. str(cls.did))
  355. Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
  356. # 上传视频
  357. Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
  358. our_video_id = Publish.upload_and_publish(log_type, env, "play")
  359. our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info"
  360. Common.logger(log_type).info("视频上传完成:{}", download_video_title)
  361. # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128
  362. Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title)
  363. # 视频ID工作表,插入首行
  364. Feishu.insert_columns(log_type, "kuaishou", "3cd128", "ROWS", 1, 2)
  365. # 视频ID工作表,首行写入数据
  366. upload_time = int(time.time())
  367. values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))),
  368. "推荐榜",
  369. str(download_video_id),
  370. str(download_video_title),
  371. our_video_link,
  372. download_video_play_cnt,
  373. download_video_comment_cnt,
  374. download_video_like_cnt,
  375. download_video_share_cnt,
  376. download_video_duration,
  377. str(download_video_resolution),
  378. str(download_video_send_time),
  379. str(download_user_name),
  380. str(download_user_id),
  381. str(download_head_url),
  382. str(download_cover_url),
  383. str(download_video_url)]]
  384. time.sleep(1)
  385. Feishu.update_values(log_type, "kuaishou", "3cd128", "F2:V2", values)
  386. # 删除行或列,可选 ROWS、COLUMNS
  387. time.sleep(1)
  388. Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1)
  389. Common.logger(log_type).info("从云文档删除该视频信息成功:{}\n", download_video_title)
  390. return
  391. else:
  392. # 删除行或列,可选 ROWS、COLUMNS
  393. Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1)
  394. # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=JK6npf
  395. Common.logger(log_type).info("该视频不满足下载规则,删除在云文档中的信息:{}\n", download_video_title)
  396. return
  397. except Exception as e:
  398. # Feishu.bot(log_type, "recommend:download_publish异常" + format(e))
  399. Common.logger(log_type).error("视频 info 异常,删除该视频信息\n", e)
  400. # 删除行或列,可选 ROWS、COLUMNS
  401. Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", 2, 2)
  402. return
  403. # 执行上传及下载
  404. @classmethod
  405. def run_download_publish(cls, log_type, env):
  406. try:
  407. while True:
  408. if len(Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")) == 1:
  409. break
  410. else:
  411. cls.download_publish(log_type, env)
  412. except Exception as e:
  413. Common.logger(log_type).error("执行下载/上传异常:{}", e)
  414. if __name__ == "__main__":
  415. kuaishou = KuaiShou()
  416. print(kuaishou.sensitive_words())