person_list.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/5/18
  4. import time
  5. import requests
  6. import urllib3
  7. from main.common import Common
  8. from main.feishu_lib import Feishu
  9. from main.publish import Publish
  10. proxies = {"http": None, "https": None}
  11. class Person:
  12. # 翻页初始值
  13. next_t_list = [-1]
  14. person_x_b3_traceid = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C3:C3")[0]
  15. person_x_token_id = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C4:C4")[0]
  16. person_referer = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"]
  17. person_uid = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C6:C6")[0]
  18. person_token = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C7:C7")[0]
  19. # 过滤敏感词
  20. @classmethod
  21. def sensitive_words(cls):
  22. # 敏感词库列表
  23. word_list = []
  24. # 从云文档读取所有敏感词,添加到词库列表
  25. lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh")
  26. for i in lists:
  27. for j in i:
  28. # 过滤空的单元格内容
  29. if j is None:
  30. pass
  31. else:
  32. word_list.append(j)
  33. return word_list
  34. # 基础门槛规则
  35. @staticmethod
  36. def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt, d_send_time):
  37. """
  38. 下载视频的基本规则
  39. :param d_duration: 时长
  40. :param d_width: 宽
  41. :param d_height: 高
  42. :param d_play_cnt: 播放量
  43. :param d_like_cnt: 点赞量
  44. :param d_share_cnt: 分享量
  45. :param d_send_time: 发布时间
  46. :return: 满足规则,返回 True;反之,返回 False
  47. """
  48. # 视频时长
  49. if 600 >= int(float(d_duration)) >= 60:
  50. # 宽或高
  51. if int(d_width) >= 0 or int(d_height) >= 0:
  52. # 播放量
  53. if int(d_play_cnt) >= 5000:
  54. # 点赞量
  55. if int(d_like_cnt) >= 0:
  56. # 分享量
  57. if int(d_share_cnt) >= 0:
  58. # 发布时间 <= 48 小时
  59. if int(time.time()) - int(d_send_time) / 1000 <= 172800:
  60. return True
  61. else:
  62. return False
  63. else:
  64. return False
  65. else:
  66. return False
  67. else:
  68. return False
  69. return False
  70. return False
  71. # 获取关注用户列表
  72. @classmethod
  73. def follow_person_list(cls):
  74. try:
  75. if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) == 1:
  76. Common.person_logger().info("暂无定向爬取账号")
  77. else:
  78. person_list = []
  79. nick_list = []
  80. for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) + 1):
  81. time.sleep(0.5)
  82. profile_mid = Feishu.get_range_value(
  83. "person-logs", "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0]
  84. time.sleep(0.5)
  85. nick = \
  86. Feishu.get_range_value("person-logs", "xiaoniangao", "oNpThi",
  87. "C" + str(i) + ":" + "C" + str(i))[0]
  88. nick_list.append(nick)
  89. person_list.append(profile_mid)
  90. Common.person_logger().info("已获取用户列表:{}", nick_list)
  91. return person_list
  92. except Exception as e:
  93. Common.person_logger().error("获取用户列表异常:{}", e)
  94. # 获取取消关注用户列表
  95. @classmethod
  96. def unfollow_person_list(cls):
  97. try:
  98. if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "tuMNhn")) == 1:
  99. Common.person_logger().info("暂无定向账号")
  100. else:
  101. unfollow_person_list = []
  102. nick_list = []
  103. for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "tuMNhn")) + 1):
  104. time.sleep(0.5)
  105. profile_mid = Feishu.get_range_value(
  106. "person-logs", "xiaoniangao", "tuMNhn", "B" + str(i) + ":" + "B" + str(i))[0]
  107. time.sleep(0.5)
  108. nick = \
  109. Feishu.get_range_value("person-logs", "xiaoniangao", "tuMNhn",
  110. "C" + str(i) + ":" + "C" + str(i))[0]
  111. nick_list.append(nick)
  112. unfollow_person_list.append(profile_mid)
  113. Common.person_logger().info("取消关注用户列表:{}", nick_list)
  114. return unfollow_person_list
  115. except Exception as e:
  116. Common.person_logger().error("获取用户列表异常:{}", e)
  117. # 关注列表中的用户
  118. @classmethod
  119. def sub_persons(cls):
  120. profile_mids = cls.follow_person_list()
  121. for profile_mid in profile_mids:
  122. url = "https://api.xiaoniangao.cn/V1/account/sub_user"
  123. headers = {
  124. "x-b3-traceid": cls.person_x_b3_traceid,
  125. "X-Token-Id": cls.person_x_token_id,
  126. "content-type": "application/json",
  127. "uuid": cls.person_uid,
  128. "Accept-Encoding": "gzip,compress,br,deflate",
  129. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  130. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  131. "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
  132. "Referer": cls.person_referer
  133. }
  134. data = {
  135. "visited_mid": int(profile_mid),
  136. "log_common_params": {
  137. "e": [{
  138. "data": {
  139. "page": "profilePage",
  140. "topic": "public",
  141. "type": "follow",
  142. "name": "user",
  143. "smid": str(profile_mid)
  144. },
  145. "ab": {}
  146. }],
  147. "ext": {
  148. "brand": "iPhone",
  149. "device": "iPhone 11",
  150. "os": "iOS 14.7.1",
  151. "weixinver": "8.0.20",
  152. "srcver": "2.24.2",
  153. "net": "wifi",
  154. "scene": "1089"
  155. },
  156. "pj": "1",
  157. "pf": "2",
  158. "session_id": "d53b6125-942b-4ec1-8d22-f9451a35e9f9"
  159. },
  160. "token": cls.person_token,
  161. "uid": cls.person_uid,
  162. "proj": "ma",
  163. "wx_ver": "8.0.20",
  164. "code_ver": "3.62.0"
  165. }
  166. try:
  167. urllib3.disable_warnings()
  168. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  169. Common.person_logger().info("关注用户:{},{}", profile_mid, r)
  170. except Exception as e:
  171. Common.person_logger().error("关注用户异常:{}", e)
  172. # 取消关注
  173. @classmethod
  174. def unsub_persons(cls):
  175. unsub_profile_mids = cls.unfollow_person_list()
  176. for profile_mid in unsub_profile_mids:
  177. url = "https://api.xiaoniangao.cn/V1/account/unsub_user"
  178. headers = {
  179. "x-b3-traceid": cls.person_x_b3_traceid,
  180. "X-Token-Id": cls.person_x_token_id,
  181. "content-type": "application/json",
  182. "uuid": cls.person_uid,
  183. "Accept-Encoding": "gzip,compress,br,deflate",
  184. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  185. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  186. "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
  187. "Referer": cls.person_referer
  188. }
  189. data = {
  190. "visited_mid": int(profile_mid),
  191. "log_common_params": {
  192. "e": [{
  193. "data": {
  194. "page": "profilePage",
  195. "topic": "public",
  196. "type": "unfollow",
  197. "name": "user",
  198. "smid": str(profile_mid)
  199. },
  200. "ab": {}
  201. }],
  202. "ext": {
  203. "brand": "iPhone",
  204. "device": "iPhone 11",
  205. "os": "iOS 14.7.1",
  206. "weixinver": "8.0.20",
  207. "srcver": "2.24.4",
  208. "net": "wifi",
  209. "scene": "1089"
  210. },
  211. "pj": "1",
  212. "pf": "2",
  213. "session_id": "6a2959c7-3f98-411f-8bc9-8d2a8a5c6f16"
  214. },
  215. "token": cls.person_token,
  216. "uid": cls.person_uid,
  217. "proj": "ma",
  218. "wx_ver": "8.0.20",
  219. "code_ver": "3.64.1"}
  220. try:
  221. urllib3.disable_warnings()
  222. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  223. Common.person_logger().info("取消关注:{},{}", profile_mid, r)
  224. except Exception as e:
  225. Common.person_logger().error("取消关注异常:{}", e)
  226. # 从关注列表获取视频,并下载符合规则的视频,再进行上传
  227. @classmethod
  228. def download_from_sub(cls, endtime):
  229. url = "https://api.xiaoniangao.cn/album/get_user_trends"
  230. headers = {
  231. "x-b3-traceid": cls.person_x_b3_traceid,
  232. "X-Token-Id": cls.person_x_token_id,
  233. "content-type": "application/json",
  234. "uuid": cls.person_uid,
  235. "Accept-Encoding": "gzip,compress,br,deflate",
  236. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  237. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  238. "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
  239. "Referer": cls.person_referer
  240. }
  241. data = {
  242. "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
  243. "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
  244. "start_t": int(cls.next_t_list[-1]),
  245. "limit": 5,
  246. "share_width": 625,
  247. "share_height": 500,
  248. "token": cls.person_token,
  249. "uid": cls.person_uid,
  250. "proj": "ma",
  251. "wx_ver": "8.0.20",
  252. "code_ver": "3.62.0",
  253. "log_common_params": {
  254. "e": [{
  255. "data": {
  256. "page": "discoverIndexPage",
  257. "topic": "follow"
  258. }
  259. }],
  260. "ext": {
  261. "brand": "iPhone",
  262. "device": "iPhone 11",
  263. "os": "iOS 14.7.1",
  264. "weixinver": "8.0.20",
  265. "srcver": "2.24.2",
  266. "net": "wifi",
  267. "scene": "1089"
  268. },
  269. "pj": "1",
  270. "pf": "2",
  271. "session_id": "18da9157-5aa6-4955-a849-9160f07ee912"
  272. }
  273. }
  274. try:
  275. urllib3.disable_warnings()
  276. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  277. next_t = r.json()["data"]["next_t"]
  278. cls.next_t_list.append(next_t)
  279. feeds = r.json()["data"]["list"]
  280. for i in range(len(feeds)):
  281. # 标题
  282. video_title = feeds[i]["title"].strip().replace("\n", "") \
  283. .replace("/", "").replace("\r", "").replace("#", "") \
  284. .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
  285. .replace(":", "").replace("*", "").replace("?", "") \
  286. .replace("?", "").replace('"', "").replace("<", "") \
  287. .replace(">", "").replace("|", "").replace(" ", "")
  288. Common.person_logger().info("标题:{}", video_title)
  289. # 用户名
  290. user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
  291. .replace("/", "").replace("快手", "").replace(" ", "") \
  292. .replace(" ", "").replace("&NBSP", "").replace("\r", "")
  293. Common.person_logger().info("用户名:{}", user_name)
  294. # 视频 ID
  295. video_id = feeds[i]["vid"]
  296. Common.person_logger().info("视频ID:{}", video_id)
  297. # 播放量
  298. video_play_cnt = feeds[i]["play_pv"]
  299. Common.person_logger().info("播放量:{}", video_play_cnt)
  300. # 评论数
  301. video_comment_cnt = feeds[i]["comment_count"]
  302. # 点赞
  303. video_like_cnt = feeds[i]["favor"]["total"]
  304. # 分享
  305. video_share_cnt = feeds[i]["share"]
  306. # 时长
  307. video_duration = int(feeds[i]["du"] / 1000)
  308. # 发布时间
  309. video_send_time = feeds[i]["t"]
  310. Common.person_logger().info(
  311. "发布时间:{}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
  312. # 宽和高
  313. video_width = feeds[i]["w"]
  314. video_height = feeds[i]["h"]
  315. # 头像
  316. head_url = feeds[i]["user"]["hurl"]
  317. # 用户 ID
  318. profile_id = feeds[i]["id"]
  319. # 用户 mid
  320. profile_mid = feeds[i]["user"]["mid"]
  321. # 封面
  322. cover_url = feeds[i]["url"]
  323. # 视频播放地址
  324. video_url = feeds[i]["v_url"]
  325. Common.person_logger().info("播放地址:{}", video_url)
  326. # 过滤无效视频
  327. if video_id == "" or video_url == "" or video_send_time == "":
  328. Common.person_logger().info("无效视频")
  329. elif int(video_send_time) < endtime:
  330. Common.person_logger().info("发布时间超过 48 小时")
  331. elif cls.download_rule(
  332. video_duration, video_width, video_height, video_play_cnt,
  333. video_like_cnt, video_share_cnt, video_send_time) is False:
  334. Common.person_logger().info("不满足基础门槛规则")
  335. # 过滤敏感词
  336. elif any(word if word in video_title else False for word in cls.sensitive_words()) is True:
  337. Common.person_logger().info("视频已中敏感词:{}".format(video_title))
  338. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
  339. elif video_id in [j for i in Feishu.get_values_batch("person-logs", "xiaoniangao", "yatRv2") for j in i]:
  340. Common.person_logger().info("该视频已下载:{}", video_title)
  341. # 满足抓取规则
  342. else:
  343. Common.person_logger().info("开始下载视频:{}", video_title)
  344. # 下载封面
  345. Common.download_method(
  346. log_path="person-logs", text="cover", d_name=video_title, d_url=cover_url)
  347. # 下载视频
  348. Common.download_method(
  349. log_path="person-logs", text="video", d_name=video_title, d_url=video_url)
  350. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  351. with open(r"./videos/" + video_title
  352. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  353. f_a.write(str(video_id) + "\n" +
  354. str(video_title) + "\n" +
  355. str(video_duration) + "\n" +
  356. str(video_play_cnt) + "\n" +
  357. str(video_comment_cnt) + "\n" +
  358. str(video_like_cnt) + "\n" +
  359. str(video_share_cnt) + "\n" +
  360. str(video_width) + "*" + str(video_height) + "\n" +
  361. str(video_send_time) + "\n" +
  362. str(user_name) + "\n" +
  363. str(head_url) + "\n" +
  364. str(video_url) + "\n" +
  365. str(cover_url) + "\n" +
  366. str("xiaoniangao"))
  367. Common.person_logger().info("==========视频信息已保存至info.txt==========")
  368. # 上传视频
  369. Common.person_logger().info("开始上传视频:{}".format(video_title))
  370. Publish.upload_and_publish("prod", "play")
  371. Common.person_logger().info("视频上传完成:{}", video_title)
  372. # 上传完成时间
  373. upload_time = int(time.time())
  374. # 保存视频信息到云文档
  375. Common.person_logger().info("添加视频到云文档:{}", video_title)
  376. # 插入空行
  377. time.sleep(1)
  378. Feishu.insert_columns("person-logs", "xiaoniangao", "yatRv2", "ROWS", 1, 2)
  379. # 视频信息写入云文档
  380. values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(upload_time))),
  381. "定向账号爬取",
  382. video_id,
  383. video_title,
  384. video_play_cnt,
  385. video_comment_cnt,
  386. video_like_cnt,
  387. video_share_cnt,
  388. video_duration,
  389. str(video_width) + "*" + str(video_height),
  390. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)),
  391. user_name,
  392. profile_id,
  393. profile_mid,
  394. head_url,
  395. cover_url,
  396. video_url]]
  397. time.sleep(1)
  398. Feishu.update_values("person-logs", "xiaoniangao", "yatRv2", "A2:Q2", values)
  399. return int(video_send_time)
  400. except Exception as e:
  401. Common.person_logger().error("请求关注列表异常:{}", e)
  402. if __name__ == "__main__":
  403. person = Person()
  404. # person.person_list()
  405. # person.download_person_videos()
  406. # person.sub_persons()
  407. # print(person.unfollow_person_list())
  408. person.unsub_persons()