# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/5/18 import time import requests import urllib3 from main.common import Common from main.feishu_lib import Feishu from main.publish import Publish proxies = {"http": None, "https": None} class Person: # 翻页初始值 next_t_list = [-1] person_x_b3_traceid = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C3:C3")[0] person_x_token_id = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C4:C4")[0] person_referer = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C5:C5")[0][0]["link"] person_uid = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C6:C6")[0] person_token = Feishu.get_range_value("person-log", "xiaoniangao", "dzcWHw", "C7:C7")[0] # 过滤敏感词 @classmethod def sensitive_words(cls): # 敏感词库列表 word_list = [] # 从云文档读取所有敏感词,添加到词库列表 lists = Feishu.get_values_batch("person-logs", "xiaoniangao", "DRAnZh") for i in lists: for j in i: # 过滤空的单元格内容 if j is None: pass else: word_list.append(j) return word_list # 基础门槛规则 @staticmethod def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt, d_send_time): """ 下载视频的基本规则 :param d_duration: 时长 :param d_width: 宽 :param d_height: 高 :param d_play_cnt: 播放量 :param d_like_cnt: 点赞量 :param d_share_cnt: 分享量 :param d_send_time: 发布时间 :return: 满足规则,返回 True;反之,返回 False """ # 视频时长 if 600 >= int(float(d_duration)) >= 60: # 宽或高 if int(d_width) >= 0 or int(d_height) >= 0: # 播放量 if int(d_play_cnt) >= 5000: # 点赞量 if int(d_like_cnt) >= 0: # 分享量 if int(d_share_cnt) >= 0: # 发布时间 <= 48 小时 if int(time.time()) - int(d_send_time) / 1000 <= 172800: return True else: return False else: return False else: return False else: return False return False return False # 获取关注用户列表 @classmethod def follow_person_list(cls): try: if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) == 1: Common.person_logger().info("暂无定向爬取账号") else: person_list = [] nick_list = [] for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "oNpThi")) + 1): time.sleep(0.5) profile_mid = Feishu.get_range_value( "person-logs", "xiaoniangao", "oNpThi", "B" + str(i) + ":" + "B" + str(i))[0] time.sleep(0.5) nick = \ Feishu.get_range_value("person-logs", "xiaoniangao", "oNpThi", "C" + str(i) + ":" + "C" + str(i))[0] nick_list.append(nick) person_list.append(profile_mid) Common.person_logger().info("已获取用户列表:{}", nick_list) return person_list except Exception as e: Common.person_logger().error("获取用户列表异常:{}", e) # 获取取消关注用户列表 @classmethod def unfollow_person_list(cls): try: if len(Feishu.get_values_batch("person-logs", "xiaoniangao", "tuMNhn")) == 1: Common.person_logger().info("暂无定向账号") else: unfollow_person_list = [] nick_list = [] for i in range(2, len(Feishu.get_values_batch("person-logs", "xiaoniangao", "tuMNhn")) + 1): time.sleep(0.5) profile_mid = Feishu.get_range_value( "person-logs", "xiaoniangao", "tuMNhn", "B" + str(i) + ":" + "B" + str(i))[0] time.sleep(0.5) nick = \ Feishu.get_range_value("person-logs", "xiaoniangao", "tuMNhn", "C" + str(i) + ":" + "C" + str(i))[0] nick_list.append(nick) unfollow_person_list.append(profile_mid) Common.person_logger().info("取消关注用户列表:{}", nick_list) return unfollow_person_list except Exception as e: Common.person_logger().error("获取用户列表异常:{}", e) # 关注列表中的用户 @classmethod def sub_persons(cls): profile_mids = cls.follow_person_list() for profile_mid in profile_mids: url = "https://api.xiaoniangao.cn/V1/account/sub_user" headers = { "x-b3-traceid": cls.person_x_b3_traceid, "X-Token-Id": cls.person_x_token_id, "content-type": "application/json", "uuid": cls.person_uid, "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 " "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN", "Referer": cls.person_referer } data = { "visited_mid": int(profile_mid), "log_common_params": { "e": [{ "data": { "page": "profilePage", "topic": "public", "type": "follow", "name": "user", "smid": str(profile_mid) }, "ab": {} }], "ext": { "brand": "iPhone", "device": "iPhone 11", "os": "iOS 14.7.1", "weixinver": "8.0.20", "srcver": "2.24.2", "net": "wifi", "scene": "1089" }, "pj": "1", "pf": "2", "session_id": "d53b6125-942b-4ec1-8d22-f9451a35e9f9" }, "token": cls.person_token, "uid": cls.person_uid, "proj": "ma", "wx_ver": "8.0.20", "code_ver": "3.62.0" } try: urllib3.disable_warnings() r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False) Common.person_logger().info("关注用户:{},{}", profile_mid, r) except Exception as e: Common.person_logger().error("关注用户异常:{}", e) # 取消关注 @classmethod def unsub_persons(cls): unsub_profile_mids = cls.unfollow_person_list() for profile_mid in unsub_profile_mids: url = "https://api.xiaoniangao.cn/V1/account/unsub_user" headers = { "x-b3-traceid": cls.person_x_b3_traceid, "X-Token-Id": cls.person_x_token_id, "content-type": "application/json", "uuid": cls.person_uid, "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 " "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN", "Referer": cls.person_referer } data = { "visited_mid": int(profile_mid), "log_common_params": { "e": [{ "data": { "page": "profilePage", "topic": "public", "type": "unfollow", "name": "user", "smid": str(profile_mid) }, "ab": {} }], "ext": { "brand": "iPhone", "device": "iPhone 11", "os": "iOS 14.7.1", "weixinver": "8.0.20", "srcver": "2.24.4", "net": "wifi", "scene": "1089" }, "pj": "1", "pf": "2", "session_id": "6a2959c7-3f98-411f-8bc9-8d2a8a5c6f16" }, "token": cls.person_token, "uid": cls.person_uid, "proj": "ma", "wx_ver": "8.0.20", "code_ver": "3.64.1"} try: urllib3.disable_warnings() r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False) Common.person_logger().info("取消关注:{},{}", profile_mid, r) except Exception as e: Common.person_logger().error("取消关注异常:{}", e) # 从关注列表获取视频,并下载符合规则的视频,再进行上传 @classmethod def download_from_sub(cls, endtime): url = "https://api.xiaoniangao.cn/album/get_user_trends" headers = { "x-b3-traceid": cls.person_x_b3_traceid, "X-Token-Id": cls.person_x_token_id, "content-type": "application/json", "uuid": cls.person_uid, "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 " "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN", "Referer": cls.person_referer } data = { "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg", "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg", "start_t": int(cls.next_t_list[-1]), "limit": 5, "share_width": 625, "share_height": 500, "token": cls.person_token, "uid": cls.person_uid, "proj": "ma", "wx_ver": "8.0.20", "code_ver": "3.62.0", "log_common_params": { "e": [{ "data": { "page": "discoverIndexPage", "topic": "follow" } }], "ext": { "brand": "iPhone", "device": "iPhone 11", "os": "iOS 14.7.1", "weixinver": "8.0.20", "srcver": "2.24.2", "net": "wifi", "scene": "1089" }, "pj": "1", "pf": "2", "session_id": "18da9157-5aa6-4955-a849-9160f07ee912" } } try: urllib3.disable_warnings() r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False) next_t = r.json()["data"]["next_t"] cls.next_t_list.append(next_t) feeds = r.json()["data"]["list"] for i in range(len(feeds)): # 标题 video_title = feeds[i]["title"].strip().replace("\n", "") \ .replace("/", "").replace("\r", "").replace("#", "") \ .replace(".", "。").replace("\\", "").replace("&NBSP", "") \ .replace(":", "").replace("*", "").replace("?", "") \ .replace("?", "").replace('"', "").replace("<", "") \ .replace(">", "").replace("|", "").replace(" ", "") Common.person_logger().info("标题:{}", video_title) # 用户名 user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \ .replace("/", "").replace("快手", "").replace(" ", "") \ .replace(" ", "").replace("&NBSP", "").replace("\r", "") Common.person_logger().info("用户名:{}", user_name) # 视频 ID video_id = feeds[i]["vid"] Common.person_logger().info("视频ID:{}", video_id) # 播放量 video_play_cnt = feeds[i]["play_pv"] Common.person_logger().info("播放量:{}", video_play_cnt) # 评论数 video_comment_cnt = feeds[i]["comment_count"] # 点赞 video_like_cnt = feeds[i]["favor"]["total"] # 分享 video_share_cnt = feeds[i]["share"] # 时长 video_duration = int(feeds[i]["du"] / 1000) # 发布时间 video_send_time = feeds[i]["t"] Common.person_logger().info( "发布时间:{}", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))) # 宽和高 video_width = feeds[i]["w"] video_height = feeds[i]["h"] # 头像 head_url = feeds[i]["user"]["hurl"] # 用户 ID profile_id = feeds[i]["id"] # 用户 mid profile_mid = feeds[i]["user"]["mid"] # 封面 cover_url = feeds[i]["url"] # 视频播放地址 video_url = feeds[i]["v_url"] Common.person_logger().info("播放地址:{}", video_url) # 过滤无效视频 if video_id == "" or video_url == "" or video_send_time == "": Common.person_logger().info("无效视频") elif int(video_send_time) < endtime: Common.person_logger().info("发布时间超过 48 小时") elif cls.download_rule( video_duration, video_width, video_height, video_play_cnt, video_like_cnt, video_share_cnt, video_send_time) is False: Common.person_logger().info("不满足基础门槛规则") # 过滤敏感词 elif any(word if word in video_title else False for word in cls.sensitive_words()) is True: Common.person_logger().info("视频已中敏感词:{}".format(video_title)) # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnYxiyQ1wLklo1W5Kdqc9cGh?sheet=yatRv2 elif video_id in [j for i in Feishu.get_values_batch("person-logs", "xiaoniangao", "yatRv2") for j in i]: Common.person_logger().info("该视频已下载:{}", video_title) # 满足抓取规则 else: Common.person_logger().info("开始下载视频:{}", video_title) # 下载封面 Common.download_method( log_path="person-logs", text="cover", d_name=video_title, d_url=cover_url) # 下载视频 Common.download_method( log_path="person-logs", text="video", d_name=video_title, d_url=video_url) # 保存视频信息至 "./videos/{download_video_title}/info.txt" with open(r"./videos/" + video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a: f_a.write(str(video_id) + "\n" + str(video_title) + "\n" + str(video_duration) + "\n" + str(video_play_cnt) + "\n" + str(video_comment_cnt) + "\n" + str(video_like_cnt) + "\n" + str(video_share_cnt) + "\n" + str(video_width) + "*" + str(video_height) + "\n" + str(video_send_time) + "\n" + str(user_name) + "\n" + str(head_url) + "\n" + str(video_url) + "\n" + str(cover_url) + "\n" + str("xiaoniangao")) Common.person_logger().info("==========视频信息已保存至info.txt==========") # 上传视频 Common.person_logger().info("开始上传视频:{}".format(video_title)) Publish.upload_and_publish("prod", "play") Common.person_logger().info("视频上传完成:{}", video_title) # 上传完成时间 upload_time = int(time.time()) # 保存视频信息到云文档 Common.person_logger().info("添加视频到云文档:{}", video_title) # 插入空行 time.sleep(1) Feishu.insert_columns("person-logs", "xiaoniangao", "yatRv2", "ROWS", 1, 2) # 视频信息写入云文档 values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(upload_time))), "定向账号爬取", video_id, video_title, video_play_cnt, video_comment_cnt, video_like_cnt, video_share_cnt, video_duration, str(video_width) + "*" + str(video_height), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)), user_name, profile_id, profile_mid, head_url, cover_url, video_url]] time.sleep(1) Feishu.update_values("person-logs", "xiaoniangao", "yatRv2", "A2:Q2", values) return int(video_send_time) except Exception as e: Common.person_logger().error("请求关注列表异常:{}", e) if __name__ == "__main__": person = Person() # person.person_list() # person.download_person_videos() # person.sub_persons() # print(person.unfollow_person_list()) person.unsub_persons()