person_list.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2022/5/18
  4. import time
  5. import requests
  6. import urllib3
  7. from main.common import Common
  8. from main.feishu_lib import Feishu
  9. from main.publish import Publish
  10. proxies = {"http": None, "https": None}
  11. class Person:
  12. next_t = -1
  13. # 过滤敏感词
  14. @classmethod
  15. def sensitive_words(cls):
  16. # 敏感词库列表
  17. word_list = []
  18. # 从云文档读取所有敏感词,添加到词库列表
  19. lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh")
  20. for i in lists:
  21. for j in i:
  22. # 过滤空的单元格内容
  23. if j is None:
  24. pass
  25. else:
  26. word_list.append(j)
  27. return word_list
  28. # 获取用户列表
  29. @classmethod
  30. def person_list(cls):
  31. try:
  32. if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) == 1:
  33. Common.person_logger().info("暂无定向爬取账号")
  34. else:
  35. person_list = []
  36. nick_list = []
  37. for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) + 1):
  38. time.sleep(0.5)
  39. profile_mid = Feishu.get_range_value(
  40. "person-logs", "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0]
  41. time.sleep(0.5)
  42. nick = \
  43. Feishu.get_range_value("person-logs", "xiaoniangao", "oNpThi",
  44. "C" + str(i) + ":" + "C" + str(i))[0]
  45. nick_list.append(nick)
  46. person_list.append(profile_mid)
  47. Common.person_logger().info("已获取用户列表:{}", nick_list)
  48. return person_list
  49. except Exception as e:
  50. Common.person_logger().error("获取用户列表异常:{}", e)
  51. # 关注列表中的用户
  52. @classmethod
  53. def sub_persons(cls):
  54. profile_mids = cls.person_list()
  55. for profile_mid in profile_mids:
  56. url = "https://api.xiaoniangao.cn/V1/account/sub_user"
  57. headers = {
  58. "X-Mid": "1164637358",
  59. "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358",
  60. "content-type": "application/json",
  61. "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  62. "Accept-Encoding": "gzip,compress,br,deflate",
  63. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  64. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  65. "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
  66. "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html"
  67. }
  68. data = {
  69. "visited_mid": int(profile_mid),
  70. "log_common_params": {
  71. "e": [{
  72. "data": {
  73. "page": "profilePage",
  74. "topic": "public",
  75. "type": "follow",
  76. "name": "user",
  77. "smid": str(profile_mid)
  78. },
  79. "ab": {}
  80. }],
  81. "ext": {
  82. "brand": "iPhone",
  83. "device": "iPhone 11",
  84. "os": "iOS 14.7.1",
  85. "weixinver": "8.0.20",
  86. "srcver": "2.24.2",
  87. "net": "wifi",
  88. "scene": "1089"
  89. },
  90. "pj": "1",
  91. "pf": "2",
  92. "session_id": "d53b6125-942b-4ec1-8d22-f9451a35e9f9"
  93. },
  94. "token": "451273638af2c8bb90266bcfaf601a68",
  95. "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  96. "proj": "ma",
  97. "wx_ver": "8.0.20",
  98. "code_ver": "3.62.0"
  99. }
  100. try:
  101. urllib3.disable_warnings()
  102. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  103. Common.person_logger().info("关注用户:{},{}", profile_mid, r)
  104. except Exception as e:
  105. Common.person_logger().error("关注用户异常:{}", e)
  106. # 从关注列表获取视频,并下载符合规则的视频,再进行上传
  107. @classmethod
  108. def download_from_sub(cls, endtime):
  109. url = "https://api.xiaoniangao.cn/album/get_user_trends"
  110. headers = {
  111. "X-Mid": "1164637358",
  112. "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358",
  113. "content-type": "application/json",
  114. "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  115. "Accept-Encoding": "gzip,compress,br,deflate",
  116. "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
  117. " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
  118. "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
  119. "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html"
  120. }
  121. data = {
  122. "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
  123. "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
  124. "start_t": int(cls.next_t),
  125. "limit": 5,
  126. "share_width": 625,
  127. "share_height": 500,
  128. "token": "451273638af2c8bb90266bcfaf601a68",
  129. "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0",
  130. "proj": "ma",
  131. "wx_ver": "8.0.20",
  132. "code_ver": "3.62.0",
  133. "log_common_params": {
  134. "e": [{
  135. "data": {
  136. "page": "discoverIndexPage",
  137. "topic": "follow"
  138. }
  139. }],
  140. "ext": {
  141. "brand": "iPhone",
  142. "device": "iPhone 11",
  143. "os": "iOS 14.7.1",
  144. "weixinver": "8.0.20",
  145. "srcver": "2.24.2",
  146. "net": "wifi",
  147. "scene": "1089"
  148. },
  149. "pj": "1",
  150. "pf": "2",
  151. "session_id": "18da9157-5aa6-4955-a849-9160f07ee912"
  152. }
  153. }
  154. try:
  155. urllib3.disable_warnings()
  156. r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
  157. cls.next_t = r.json()["data"]["next_t"]
  158. # cls.next_t_list.append(next_t)
  159. feeds = r.json()["data"]["list"]
  160. for i in range(len(feeds)):
  161. # 标题
  162. video_title = feeds[i]["title"].strip().replace("\n", "") \
  163. .replace("/", "").replace("\r", "").replace("#", "") \
  164. .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
  165. .replace(":", "").replace("*", "").replace("?", "") \
  166. .replace("?", "").replace('"', "").replace("<", "") \
  167. .replace(">", "").replace("|", "").replace(" ", "")
  168. Common.person_logger().info("标题:{}", video_title)
  169. # 用户名
  170. user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
  171. .replace("/", "").replace("快手", "").replace(" ", "") \
  172. .replace(" ", "").replace("&NBSP", "").replace("\r", "")
  173. Common.person_logger().info("用户名:{}", user_name)
  174. # 视频 ID
  175. video_id = feeds[i]["vid"]
  176. Common.person_logger().info("视频ID:{}", video_id)
  177. # 播放量
  178. video_play_cnt = feeds[i]["play_pv"]
  179. Common.person_logger().info("播放量:{}", video_play_cnt)
  180. # 评论数
  181. video_comment_cnt = feeds[i]["comment_count"]
  182. # 点赞
  183. video_like_cnt = feeds[i]["favor"]["total"]
  184. # 分享
  185. video_share_cnt = feeds[i]["share"]
  186. # 时长
  187. video_duration = int(feeds[i]["du"] / 1000)
  188. # 发布时间
  189. video_send_time = feeds[i]["t"]
  190. Common.person_logger().info(
  191. "发布时间:{}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
  192. # 宽和高
  193. video_width = feeds[i]["w"]
  194. video_height = feeds[i]["h"]
  195. # 头像
  196. head_url = feeds[i]["user"]["hurl"]
  197. # 用户 ID
  198. profile_id = feeds[i]["id"]
  199. # 用户 mid
  200. profile_mid = feeds[i]["user"]["mid"]
  201. # 封面
  202. cover_url = feeds[i]["url"]
  203. # 视频播放地址
  204. video_url = feeds[i]["v_url"]
  205. Common.person_logger().info("播放地址:{}", video_url)
  206. # 过滤无效视频
  207. if video_id == "" or video_url == "" or video_send_time == "":
  208. Common.person_logger().info("无效视频")
  209. # 判断发布时间:2022年5月18日以后发布
  210. elif int(video_send_time) < endtime:
  211. Common.person_logger().info(
  212. "发布时间:{},在2022年5月18日之前",
  213. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))
  214. # 判断视频播放量大于1000
  215. elif int(video_play_cnt) < 1000:
  216. Common.person_logger().info("视频:{},播放量:{}<1000", video_title, video_play_cnt)
  217. # 过滤敏感词
  218. elif any(word if word in video_title else False for word in cls.sensitive_words()) is True:
  219. Common.person_logger().info("视频已中敏感词:{}".format(video_title))
  220. # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2
  221. elif video_id in [j for i in Feishu.get_values_batch(
  222. "person-logs", "xiaoniangao", "yatRv2") for j in i]:
  223. Common.person_logger().info("该视频已下载:{}", video_title)
  224. # 满足抓取规则
  225. else:
  226. Common.person_logger().info("开始下载视频:{}", video_title)
  227. # 下载封面
  228. Common.download_method(
  229. log_path="person-logs", text="cover", d_name=video_title, d_url=cover_url)
  230. # 下载视频
  231. Common.download_method(
  232. log_path="person-logs", text="video", d_name=video_title, d_url=video_url)
  233. # 保存视频信息至 "./videos/{download_video_title}/info.txt"
  234. with open(r"./videos/" + video_title
  235. + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
  236. f_a.write(str(video_id) + "\n" +
  237. str(video_title) + "\n" +
  238. str(video_duration) + "\n" +
  239. str(video_play_cnt) + "\n" +
  240. str(video_comment_cnt) + "\n" +
  241. str(video_like_cnt) + "\n" +
  242. str(video_share_cnt) + "\n" +
  243. str(video_width)+"*"+str(video_height) + "\n" +
  244. str(video_send_time) + "\n" +
  245. str(user_name) + "\n" +
  246. str(head_url) + "\n" +
  247. str(video_url) + "\n" +
  248. str(cover_url) + "\n" +
  249. str("xiaoniangao"))
  250. Common.person_logger().info("==========视频信息已保存至info.txt==========")
  251. # 上传视频
  252. Common.person_logger().info("开始上传视频:{}".format(video_title))
  253. Publish.upload_and_publish("dev", "play")
  254. Common.person_logger().info("视频上传完成:{}", video_title)
  255. # 上传完成时间
  256. upload_time = int(time.time())
  257. # 保存视频信息到云文档
  258. Common.person_logger().info("添加视频到云文档:{}", video_title)
  259. # 插入空行
  260. time.sleep(1)
  261. Feishu.insert_columns("person-logs", "xiaoniangao", "yatRv2", "ROWS", 1, 2)
  262. # 视频信息写入云文档
  263. values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(upload_time))),
  264. "定向账号爬取",
  265. video_id,
  266. video_title,
  267. video_play_cnt,
  268. video_comment_cnt,
  269. video_like_cnt,
  270. video_share_cnt,
  271. video_duration,
  272. str(video_width)+"*"+str(video_height),
  273. time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000)),
  274. user_name,
  275. profile_id,
  276. profile_mid,
  277. head_url,
  278. cover_url,
  279. video_url]]
  280. time.sleep(1)
  281. Feishu.update_values("person-logs", "xiaoniangao", "yatRv2", "A2:Q2", values)
  282. return int(video_send_time)
  283. except Exception as e:
  284. Common.person_logger().error("请求关注列表异常:{}", e)
  285. if __name__ == "__main__":
  286. person = Person()
  287. # person.person_list()
  288. # person.download_person_videos()
  289. person.sub_persons()