person_list.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/5/18
  4. import time
  5. import requests
  6. import urllib3
  7. from main.common import Common
  8. from main.feishu_lib import Feishu
  9. from main.publish import Publish
  10. proxies = {"http": None, "https": None}
  11. class Person:
  12. # 翻页初始值
  13. next_t_list = [-1]
  14. # 过滤敏感词
  15. @classmethod
  16. def sensitive_words(cls):
  17. # 敏感词库列表
  18. word_list = []
  19. # 从云文档读取所有敏感词,添加到词库列表
  20. lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh")
  21. for i in lists:
  22. for j in i:
  23. # 过滤空的单元格内容
  24. if j is None:
  25. pass
  26. else:
  27. word_list.append(j)
  28. return word_list
  29. # 获取用户列表
  30. @classmethod
  31. def person_list(cls):
  32. try:
  33. if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) == 1:
  34. Common.person_logger().info("暂无定向爬取账号")
  35. else:
  36. person_list = []
  37. nick_list = []
  38. for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) + 1):
  39. time.sleep(0.5)
  40. profile_mid = Feishu.get_range_value(
  41. "person-logs", "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0]
  42. time.sleep(0.5)
  43. nick = \
  44. Feishu.get_range_value("person-logs", "xiaoniangao", "oNpThi",
  45. "C" + str(i) + ":" + "C" + str(i))[0]
  46. nick_list.append(nick)
  47. person_list.append(profile_mid)
  48. Common.person_logger().info("已获取用户列表:{}", nick_list)
  49. return person_list
  50. except Exception as e:
  51. Common.person_logger().error("获取用户列表异常:{}", e)
  52. # 关注列表中的用户
  53. @classmethod
  54. def sub_persons(cls):
  55. profile_mids = cls.person_list()
  56. for profile_mid in profile_mids:
  57. url = "https://api.xiaoniangao.cn/V1/account/sub_user"
  58. headers = {
  59. "X-Mid": "1164637358",
  60. "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358",
  61. "content-type": "application/json",
  62. "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  63. "Accept-Encoding": "gzip,compress,br,deflate",
  64. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  65. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  66. "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
  67. "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html"
  68. }
  69. data = {
  70. "visited_mid": int(profile_mid),
  71. "log_common_params": {
  72. "e": [{
  73. "data": {
  74. "page": "profilePage",
  75. "topic": "public",
  76. "type": "follow",
  77. "name": "user",
  78. "smid": str(profile_mid)
  79. },
  80. "ab": {}
  81. }],
  82. "ext": {
  83. "brand": "iPhone",
  84. "device": "iPhone 11",
  85. "os": "iOS 14.7.1",
  86. "weixinver": "8.0.20",
  87. "srcver": "2.24.2",
  88. "net": "wifi",
  89. "scene": "1089"
  90. },
  91. "pj": "1",
  92. "pf": "2",
  93. "session_id": "d53b6125-942b-4ec1-8d22-f9451a35e9f9"
  94. },
  95. "token": "451273638af2c8bb90266bcfaf601a68",
  96. "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  97. "proj": "ma",
  98. "wx_ver": "8.0.20",
  99. "code_ver": "3.62.0"
  100. }
  101. try:
  102. urllib3.disable_warnings()
  103. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  104. Common.person_logger().info("关注用户:{},{}", profile_mid, r)
  105. except Exception as e:
  106. Common.person_logger().error("关注用户异常:{}", e)
  107. # 从关注列表获取视频,并下载符合规则的视频,再进行上传
  108. @classmethod
  109. def download_from_sub(cls, endtime):
  110. url = "https://api.xiaoniangao.cn/album/get_user_trends"
  111. headers = {
  112. "X-Mid": "1164637358",
  113. "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358",
  114. "content-type": "application/json",
  115. "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  116. "Accept-Encoding": "gzip,compress,br,deflate",
  117. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  118. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  119. "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
  120. "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html"
  121. }
  122. data = {
  123. "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
  124. "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
  125. "start_t": int(cls.next_t_list[-1]),
  126. "limit": 5,
  127. "share_width": 625,
  128. "share_height": 500,
  129. "token": "451273638af2c8bb90266bcfaf601a68",
  130. "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  131. "proj": "ma",
  132. "wx_ver": "8.0.20",
  133. "code_ver": "3.62.0",
  134. "log_common_params": {
  135. "e": [{
  136. "data": {
  137. "page": "discoverIndexPage",
  138. "topic": "follow"
  139. }
  140. }],
  141. "ext": {
  142. "brand": "iPhone",
  143. "device": "iPhone 11",
  144. "os": "iOS 14.7.1",
  145. "weixinver": "8.0.20",
  146. "srcver": "2.24.2",
  147. "net": "wifi",
  148. "scene": "1089"
  149. },
  150. "pj": "1",
  151. "pf": "2",
  152. "session_id": "18da9157-5aa6-4955-a849-9160f07ee912"
  153. }
  154. }
  155. try:
  156. urllib3.disable_warnings()
  157. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  158. next_t = r.json()["data"]["next_t"]
  159. cls.next_t_list.append(next_t)
  160. feeds = r.json()["data"]["list"]
  161. for i in range(len(feeds)):
  162. # 标题
  163. video_title = feeds[i]["title"].strip().replace("\n", "") \
  164. .replace("/", "").replace("\r", "").replace("#", "") \
  165. .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
  166. .replace(":", "").replace("*", "").replace("?", "") \
  167. .replace("?", "").replace('"', "").replace("<", "") \
  168. .replace(">", "").replace("|", "").replace(" ", "")
  169. Common.person_logger().info("标题:{}", video_title)
  170. # 用户名
  171. user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
  172. .replace("/", "").replace("快手", "").replace(" ", "") \
  173. .replace(" ", "").replace("&NBSP", "").replace("\r", "")
  174. Common.person_logger().info("用户名:{}", user_name)
  175. # 视频 ID
  176. video_id = feeds[i]["vid"]
  177. Common.person_logger().info("视频ID:{}", video_id)
  178. # 播放量
  179. video_play_cnt = feeds[i]["play_pv"]
  180. Common.person_logger().info("播放量:{}", video_play_cnt)
  181. # 评论数
  182. video_comment_cnt = feeds[i]["comment_count"]
  183. # 点赞
  184. video_like_cnt = feeds[i]["favor"]["total"]
  185. # 分享
  186. video_share_cnt = feeds[i]["share"]
  187. # 时长
  188. video_duration = int(feeds[i]["du"] / 1000)
  189. # 发布时间
  190. video_send_time = feeds[i]["t"]
  191. Common.person_logger().info(
  192. "发布时间:{}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
  193. # 宽和高
  194. video_width = feeds[i]["w"]
  195. video_height = feeds[i]["h"]
  196. # 头像
  197. head_url = feeds[i]["user"]["hurl"]
  198. # 用户 ID
  199. profile_id = feeds[i]["id"]
  200. # 用户 mid
  201. profile_mid = feeds[i]["user"]["mid"]
  202. # 封面
  203. cover_url = feeds[i]["url"]
  204. # 视频播放地址
  205. video_url = feeds[i]["v_url"]
  206. Common.person_logger().info("播放地址:{}", video_url)
  207. # 过滤无效视频
  208. if video_id == "" or video_url == "" or video_send_time == "":
  209. Common.person_logger().info("无效视频")
  210. # 判断发布时间:2022年5月18日以后发布
  211. elif int(video_send_time) < endtime:
  212. Common.person_logger().info(
  213. "发布时间:{},在2022年5月18日之前",
  214. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
  215. # 判断视频播放量大于1000
  216. elif int(video_play_cnt) < 1000:
  217. Common.person_logger().info("视频:{},播放量:{}<1000", video_title, video_play_cnt)
  218. # 过滤敏感词
  219. elif any(word if word in video_title else False for word in cls.sensitive_words()) is True:
  220. Common.person_logger().info("视频已中敏感词:{}".format(video_title))
  221. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
  222. elif video_id in [j for i in Feishu.get_values_batch(
  223. "person-logs", "xiaoniangao", "yatRv2") for j in i]:
  224. Common.person_logger().info("该视频已下载:{}", video_title)
  225. # 满足抓取规则
  226. else:
  227. Common.person_logger().info("开始下载视频:{}", video_title)
  228. # 下载封面
  229. Common.download_method(
  230. log_path="person-logs", text="cover", d_name=video_title, d_url=cover_url)
  231. # 下载视频
  232. Common.download_method(
  233. log_path="person-logs", text="video", d_name=video_title, d_url=video_url)
  234. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  235. with open(r"./videos/" + video_title
  236. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  237. f_a.write(str(video_id) + "\n" +
  238. str(video_title) + "\n" +
  239. str(video_duration) + "\n" +
  240. str(video_play_cnt) + "\n" +
  241. str(video_comment_cnt) + "\n" +
  242. str(video_like_cnt) + "\n" +
  243. str(video_share_cnt) + "\n" +
  244. str(video_width)+"*"+str(video_height) + "\n" +
  245. str(video_send_time) + "\n" +
  246. str(user_name) + "\n" +
  247. str(head_url) + "\n" +
  248. str(video_url) + "\n" +
  249. str(cover_url) + "\n" +
  250. str("xiaoniangao"))
  251. Common.person_logger().info("==========视频信息已保存至info.txt==========")
  252. # 上传视频
  253. Common.person_logger().info("开始上传视频:{}".format(video_title))
  254. Publish.upload_and_publish("prod", "play")
  255. Common.person_logger().info("视频上传完成:{}", video_title)
  256. # 上传完成时间
  257. upload_time = int(time.time())
  258. # 保存视频信息到云文档
  259. Common.person_logger().info("添加视频到云文档:{}", video_title)
  260. # 插入空行
  261. time.sleep(1)
  262. Feishu.insert_columns("person-logs", "xiaoniangao", "yatRv2", "ROWS", 1, 2)
  263. # 视频信息写入云文档
  264. values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(upload_time))),
  265. "定向账号爬取",
  266. video_id,
  267. video_title,
  268. video_play_cnt,
  269. video_comment_cnt,
  270. video_like_cnt,
  271. video_share_cnt,
  272. video_duration,
  273. str(video_width)+"*"+str(video_height),
  274. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000)),
  275. user_name,
  276. profile_id,
  277. profile_mid,
  278. head_url,
  279. cover_url,
  280. video_url]]
  281. time.sleep(1)
  282. Feishu.update_values("person-logs", "xiaoniangao", "yatRv2", "A2:Q2", values)
  283. return int(video_send_time)
  284. except Exception as e:
  285. Common.person_logger().error("请求关注列表异常:{}", e)
  286. if __name__ == "__main__":
  287. person = Person()
  288. # person.person_list()
  289. # person.download_person_videos()
  290. person.sub_persons()