# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/5/18 import time import requests import urllib3 from main.common import Common from main.feishu_lib import Feishu from main.publish import Publish proxies = {"http": None, "https": None} class Person: # 翻页初始值 next_t_list = [-1] # 过滤敏感词 @classmethod def sensitive_words(cls): # 敏感词库列表 word_list = [] # 从云文档读取所有敏感词,添加到词库列表 lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh") for i in lists: for j in i: # 过滤空的单元格内容 if j is None: pass else: word_list.append(j) return word_list # 获取用户列表 @classmethod def person_list(cls): try: if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) == 1: Common.person_logger().info("暂无定向爬取账号") else: person_list = [] nick_list = [] for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) + 1): time.sleep(0.5) profile_mid = Feishu.get_range_value( "person-logs", "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0] time.sleep(0.5) nick = \ Feishu.get_range_value("person-logs", "xiaoniangao", "oNpThi", "C" + str(i) + ":" + "C" + str(i))[0] nick_list.append(nick) person_list.append(profile_mid) Common.person_logger().info("已获取用户列表:{}", nick_list) return person_list except Exception as e: Common.person_logger().error("获取用户列表异常:{}", e) # 关注列表中的用户 @classmethod def sub_persons(cls): profile_mids = cls.person_list() for profile_mid in profile_mids: url = "https://api.xiaoniangao.cn/V1/account/sub_user" headers = { "X-Mid": "1164637358", "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358", "content-type": "application/json", "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0", "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 " "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN", "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html" } data = { "visited_mid": int(profile_mid), "log_common_params": { "e": [{ "data": { "page": "profilePage", "topic": "public", "type": "follow", "name": "user", "smid": str(profile_mid) }, "ab": {} }], "ext": { "brand": "iPhone", "device": "iPhone 11", "os": "iOS 14.7.1", "weixinver": "8.0.20", "srcver": "2.24.2", "net": "wifi", "scene": "1089" }, "pj": "1", "pf": "2", "session_id": "d53b6125-942b-4ec1-8d22-f9451a35e9f9" }, "token": "451273638af2c8bb90266bcfaf601a68", "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0", "proj": "ma", "wx_ver": "8.0.20", "code_ver": "3.62.0" } try: urllib3.disable_warnings() r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False) Common.person_logger().info("关注用户:{},{}", profile_mid, r) except Exception as e: Common.person_logger().error("关注用户异常:{}", e) # 从关注列表获取视频,并下载符合规则的视频,再进行上传 @classmethod def download_from_sub(cls, endtime): url = "https://api.xiaoniangao.cn/album/get_user_trends" headers = { "X-Mid": "1164637358", "X-Token-Id": "af9c47bb6c942236ff35ee10d355f3b0-1164637358", "content-type": "application/json", "uuid": "3d460a1b-ab85-426b-bd80-62029acaa2c0", "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 " "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN", "Referer": "https://servicewechat.com/wxd7911e4c177690e4/617/page-frame.html" } data = { "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg", "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg", "start_t": int(cls.next_t_list[-1]), "limit": 5, "share_width": 625, "share_height": 500, "token": "451273638af2c8bb90266bcfaf601a68", "uid": "3d460a1b-ab85-426b-bd80-62029acaa2c0", "proj": "ma", "wx_ver": "8.0.20", "code_ver": "3.62.0", "log_common_params": { "e": [{ "data": { "page": "discoverIndexPage", "topic": "follow" } }], "ext": { "brand": "iPhone", "device": "iPhone 11", "os": "iOS 14.7.1", "weixinver": "8.0.20", "srcver": "2.24.2", "net": "wifi", "scene": "1089" }, "pj": "1", "pf": "2", "session_id": "18da9157-5aa6-4955-a849-9160f07ee912" } } try: urllib3.disable_warnings() r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False) next_t = r.json()["data"]["next_t"] cls.next_t_list.append(next_t) feeds = r.json()["data"]["list"] for i in range(len(feeds)): # 标题 video_title = feeds[i]["title"].strip().replace("\n", "") \ .replace("/", "").replace("\r", "").replace("#", "") \ .replace(".", "。").replace("\\", "").replace("&NBSP", "") \ .replace(":", "").replace("*", "").replace("?", "") \ .replace("?", "").replace('"', "").replace("<", "") \ .replace(">", "").replace("|", "").replace(" ", "") Common.person_logger().info("标题:{}", video_title) # 用户名 user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \ .replace("/", "").replace("快手", "").replace(" ", "") \ .replace(" ", "").replace("&NBSP", "").replace("\r", "") Common.person_logger().info("用户名:{}", user_name) # 视频 ID video_id = feeds[i]["vid"] Common.person_logger().info("视频ID:{}", video_id) # 播放量 video_play_cnt = feeds[i]["play_pv"] Common.person_logger().info("播放量:{}", video_play_cnt) # 评论数 video_comment_cnt = feeds[i]["comment_count"] # 点赞 video_like_cnt = feeds[i]["favor"]["total"] # 分享 video_share_cnt = feeds[i]["share"] # 时长 video_duration = int(feeds[i]["du"] / 1000) # 发布时间 video_send_time = feeds[i]["t"] Common.person_logger().info( "发布时间:{}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))) # 宽和高 video_width = feeds[i]["w"] video_height = feeds[i]["h"] # 头像 head_url = feeds[i]["user"]["hurl"] # 用户 ID profile_id = feeds[i]["id"] # 用户 mid profile_mid = feeds[i]["user"]["mid"] # 封面 cover_url = feeds[i]["url"] # 视频播放地址 video_url = feeds[i]["v_url"] Common.person_logger().info("播放地址:{}", video_url) # 过滤无效视频 if video_id == "" or video_url == "" or video_send_time == "": Common.person_logger().info("无效视频") # 判断发布时间:2022年5月18日以后发布 elif int(video_send_time) < endtime: Common.person_logger().info( "发布时间:{},在2022年5月18日之前", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))) # 判断视频播放量大于1000 elif int(video_play_cnt) < 1000: Common.person_logger().info("视频:{},播放量:{}<1000", video_title, video_play_cnt) # 过滤敏感词 elif any(word if word in video_title else False for word in cls.sensitive_words()) is True: Common.person_logger().info("视频已中敏感词:{}".format(video_title)) # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2 elif video_id in [j for i in Feishu.get_values_batch( "person-logs", "xiaoniangao", "yatRv2") for j in i]: Common.person_logger().info("该视频已下载:{}", video_title) # 满足抓取规则 else: Common.person_logger().info("开始下载视频:{}", video_title) # 下载封面 Common.download_method( log_path="person-logs", text="cover", d_name=video_title, d_url=cover_url) # 下载视频 Common.download_method( log_path="person-logs", text="video", d_name=video_title, d_url=video_url) # 保存视频信息至 "./videos/{download_video_title}/info.txt" with open(r"./videos/" + video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a: f_a.write(str(video_id) + "\n" + str(video_title) + "\n" + str(video_duration) + "\n" + str(video_play_cnt) + "\n" + str(video_comment_cnt) + "\n" + str(video_like_cnt) + "\n" + str(video_share_cnt) + "\n" + str(video_width)+"*"+str(video_height) + "\n" + str(video_send_time) + "\n" + str(user_name) + "\n" + str(head_url) + "\n" + str(video_url) + "\n" + str(cover_url) + "\n" + str("xiaoniangao")) Common.person_logger().info("==========视频信息已保存至info.txt==========") # 上传视频 Common.person_logger().info("开始上传视频:{}".format(video_title)) Publish.upload_and_publish("prod", "play") Common.person_logger().info("视频上传完成:{}", video_title) # 上传完成时间 upload_time = int(time.time()) # 保存视频信息到云文档 Common.person_logger().info("添加视频到云文档:{}", video_title) # 插入空行 time.sleep(1) Feishu.insert_columns("person-logs", "xiaoniangao", "yatRv2", "ROWS", 1, 2) # 视频信息写入云文档 values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(upload_time))), "定向账号爬取", video_id, video_title, video_play_cnt, video_comment_cnt, video_like_cnt, video_share_cnt, video_duration, str(video_width)+"*"+str(video_height), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000)), user_name, profile_id, profile_mid, head_url, cover_url, video_url]] time.sleep(1) Feishu.update_values("person-logs", "xiaoniangao", "yatRv2", "A2:Q2", values) return int(video_send_time) except Exception as e: Common.person_logger().error("请求关注列表异常:{}", e) if __name__ == "__main__": person = Person() # person.person_list() # person.download_person_videos() person.sub_persons()