# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/6/15 import os import sys import time import requests import urllib3 sys.path.append(os.getcwd()) from main.weishi_publish import Publish from main.common import Common from main.feishu_lib import Feishu proxies = {"http": None, "https": None} class DownloadFollow: # 配置微信号 Referer = Feishu.get_range_value("follow", "9fTK1f", "C3:C3")[0] wesee_openid = Feishu.get_range_value("follow", "9fTK1f", "C4:C4")[0] wesee_openkey = Feishu.get_range_value("follow", "9fTK1f", "C5:C5")[0] wesee_personid = Feishu.get_range_value("follow", "9fTK1f", "C6:C6")[0] wesee_access_token = Feishu.get_range_value("follow", "9fTK1f", "C7:C7")[0] wesee_thr_appid = Feishu.get_range_value("follow", "9fTK1f", "C8:C8")[0] # 翻页参数 attachInfo = "" # 过滤词库 @classmethod def sensitive_words(cls): # 词库列表 word_list = [] # 从云文档读取所有词,添加到词库列表 lists = Feishu.get_values_batch("follow", "2Oxf8C") for i in lists: for j in i: # 过滤空的单元格内容 if j is None: pass else: word_list.append(j) return word_list # 下载规则 @staticmethod def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt): """ 下载视频的基本规则 :param d_duration: 时长 :param d_width: 宽 :param d_height: 高 :param d_play_cnt: 播放量 :param d_like_cnt: 点赞量 :param d_share_cnt: 分享量 :return: 满足规则,返回 True;反之,返回 False """ if int(float(d_duration)) >= 20: if int(d_width) >= 0 or int(d_height) >= 0: if int(d_play_cnt) >= 0: if int(d_like_cnt) >= 0 or int(d_share_cnt) >= 0: return True else: return False else: return False return False return False # 抓取列表 @classmethod def get_feeds(cls): """ 1.从微视小程序首页推荐,获取视频列表 2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa 中去重 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr 中去重 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr """ url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFollowFeedList" headers = { "content-type": "application/json", "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)" " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148" " MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN", "Referer": str(cls.Referer) } cookies = { "wesee_authtype": "3", "wesee_openid": str(cls.wesee_openid), "wesee_openkey": str(cls.wesee_openkey), "wesee_personid": str(cls.wesee_personid), "wesee_refresh_token": "", "wesee_access_token": str(cls.wesee_access_token), "wesee_thr_appid": str(cls.wesee_thr_appid), "wesee_ichid": "8" } json_data = { "req_body": { "attachInfo": str(cls.attachInfo) }, "req_header": { "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"," "\"weseeCostTag\":\"WxMiniProgram\"}" } } try: urllib3.disable_warnings() r = requests.post(headers=headers, url=url, cookies=cookies, json=json_data, proxies=proxies, verify=False) if r.json()["rsp_header"]["errMsg"] != "": Common.logger("follow").error("errMsg:{}", r.json()["rsp_header"]["errMsg"]) cls.attachInfo = r.json()["rsp_body"]["attachInfo"] feeds = r.json()["rsp_body"]["feeds"] for i in range(len(feeds)): # 视频标题过滤话题及处理特殊字符 weishi_title = feeds[i]["desc"] title_split1 = weishi_title.split(" #") if title_split1[0] != "": title1 = title_split1[0] else: title1 = title_split1[-1] title_split2 = title1.split(" #") if title_split2[0] != "": title2 = title_split2[0] else: title2 = title_split2[-1] title_split3 = title2.split("@") if title_split3[0] != "": title3 = title_split3[0] else: title3 = title_split3[-1] # 视频标题 video_title = title3.strip().replace("\n", "") \ .replace("/", "").replace("快手", "").replace(" ", "") \ .replace(" ", "").replace("&NBSP", "").replace("\r", "") \ .replace("#", "").replace(".", "。").replace("\\", "") \ .replace(":", "").replace("*", "").replace("?", "") \ .replace("?", "").replace('"', "").replace("<", "") \ .replace(">", "").replace("|", "").replace("微视", "") # 视频 ID if "id" not in feeds[i]["video"]: video_id = 0 else: video_id = feeds[i]["video"]["id"] # 播放数 if "playNum" not in feeds[i]["ugcData"]: video_play_cnt = 0 else: video_play_cnt = feeds[i]["ugcData"]["playNum"] # 点赞数 if "dingCount" not in feeds[i]["ugcData"]: video_like_cnt = 0 else: video_like_cnt = feeds[i]["ugcData"]["dingCount"] # 分享数 if "shareNum" not in feeds[i]["ugcData"]: video_share_cnt = 0 else: video_share_cnt = feeds[i]["ugcData"]["shareNum"] # 评论数 if "totalCommentNum" not in feeds[i]["ugcData"]: video_comment_cnt = 0 else: video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"] # 视频时长 if "duration" not in feeds[i]["video"]: video_duration = 0 else: video_duration = int(int(feeds[i]["video"]["duration"]) / 1000) # 视频宽高 if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]: video_width = 0 video_height = 0 video_resolution = str(video_width) + "*" + str(video_height) else: video_width = feeds[i]["video"]["width"] video_height = feeds[i]["video"]["height"] video_resolution = str(video_width) + "*" + str(video_height) # 视频发布时间 if "createTime" not in feeds[i]: video_send_time = 0 else: video_send_time = int(feeds[i]["createTime"]) * 1000 # 用户昵称 user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \ .replace("/", "").replace("快手", "").replace(" ", "") \ .replace(" ", "").replace("&NBSP", "").replace("\r", "").replace("微视", "") # 用户 ID user_id = feeds[i]["poster"]["id"] # 用户头像地址 if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]: head_url = 0 elif "thumbURL" in feeds[i]["material"]: head_url = feeds[i]["material"]["thumbURL"] else: head_url = feeds[i]["poster"]["avatar"] # 视频封面地址 if len(feeds[i]["images"]) == 0: cover_url = 0 else: cover_url = feeds[i]["images"][0]["url"] # 视频播放地址 if "url" not in feeds[i]["video"]: video_url = 0 else: video_url = feeds[i]["video"]["url"] Common.logger("follow").info("video_title:{}".format(video_title)) Common.logger("follow").info("video_id:{}".format(video_id)) Common.logger("follow").info("video_play_cnt:{}".format(video_play_cnt)) Common.logger("follow").info("video_like_cnt:{}".format(video_like_cnt)) Common.logger("follow").info("video_share_cnt:{}".format(video_share_cnt)) # Common.logger("follow").info("video_comment_cnt:{}".format(video_comment_cnt)) Common.logger("follow").info("video_duration:{}秒".format(video_duration)) # Common.logger("follow").info("video_resolution:{}".format(video_resolution)) Common.logger("follow").info( "video_send_time:{}".format(time.strftime( "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))) Common.logger("follow").info("user_name:{}".format(user_name)) # Common.logger("follow").info("user_id:{}".format(user_id)) # Common.logger("follow").info("head_url:{}".format(head_url)) # Common.logger("follow").info("cover_url:{}".format(cover_url)) Common.logger("follow").info("video_url:{}".format(video_url)) # 过滤无效视频 if video_id == 0 or video_duration == 0 or video_send_time == 0 or head_url == 0 \ or cover_url == 0 or video_url == 0: Common.logger("follow").info("无效视频") # 判断基础规则 elif cls.download_rule(video_duration, video_width, video_height, video_play_cnt, video_like_cnt, video_share_cnt) is False: Common.logger("follow").info("不满足基础规则") # 判断敏感词 elif any(word if word in weishi_title else False for word in cls.sensitive_words()) is True: Common.logger("follow").info("视频已中敏感词:{}".format(weishi_title)) # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa elif video_id in [j for m in Feishu.get_values_batch("follow", "caa3fa") for j in m]: Common.logger("follow").info("该视频已下载:{}", video_title) # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr elif video_id in [j for n in Feishu.get_values_batch("follow", "PamIy1") for j in n]: Common.logger("follow").info("该视频已在feeds中:{}", video_title) else: Common.logger("follow").info("该视频未下载,添加至feeds中:{}".format(video_title)) # feeds工作表,插入首行 time.sleep(1) Feishu.insert_columns("follow", "PamIy1", "ROWS", 1, 2) # 获取当前时间 get_feeds_time = int(time.time()) # 云文档,工作表中写入数据 values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))), "关注榜", video_id, video_title, video_play_cnt, video_comment_cnt, video_like_cnt, video_share_cnt, video_duration, video_resolution, time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)), user_name, user_id, head_url, cover_url, video_url]] # 等待 1s,防止操作云文档太频繁,导致报错 time.sleep(1) Feishu.update_values("follow", "PamIy1", "A2:P2", values) except Exception as e: Common.logger("follow").error("抓取关注列表异常:{}", e) # 下载及上传 @classmethod def download_publish(cls): try: for i in range(1, len(Feishu.get_values_batch("follow", "PamIy1")) + 1): time.sleep(1) download_video_id = Feishu.get_values_batch("follow", "PamIy1")[i][2] download_video_title = Feishu.get_values_batch("follow", "PamIy1")[i][3] download_video_play_cnt = Feishu.get_values_batch("follow", "PamIy1")[i][4] download_video_comment_cnt = Feishu.get_values_batch("follow", "PamIy1")[i][5] download_video_like_cnt = Feishu.get_values_batch("follow", "PamIy1")[i][6] download_video_share_cnt = Feishu.get_values_batch("follow", "PamIy1")[i][7] download_video_duration = Feishu.get_values_batch("follow", "PamIy1")[i][8] download_video_resolution = Feishu.get_values_batch("follow", "PamIy1")[i][9] # download_video_width = download_video_resolution.split("*")[0] # download_video_height = download_video_resolution.split("*")[-1] download_video_send_time = Feishu.get_values_batch("follow", "PamIy1")[i][10] download_user_name = Feishu.get_values_batch("follow", "PamIy1")[i][11] download_user_id = Feishu.get_values_batch("follow", "PamIy1")[i][12] download_head_url = Feishu.get_values_batch("follow", "PamIy1")[i][13] download_cover_url = Feishu.get_values_batch("follow", "PamIy1")[i][14] download_video_url = Feishu.get_values_batch("follow", "PamIy1")[i][15] # Common.logger("follow").info("download_video_id:{}", download_video_id) # Common.logger("follow").info("download_video_title:{}", download_video_title) # Common.logger("follow").info("download_video_play_cnt:{}", download_video_play_cnt) # Common.logger("follow").info("download_video_comment_cnt:{}", download_video_comment_cnt) # Common.logger("follow").info("download_video_like_cnt:{}", download_video_like_cnt) # Common.logger("follow").info("download_video_share_cnt:{}", download_video_share_cnt) # Common.logger("follow").info("download_video_duration:{}", download_video_duration) # Common.logger("follow").info("download_video_resolution:{}", download_video_resolution) # Common.logger("follow").info("download_video_send_time:{}", download_video_send_time) # Common.logger("follow").info("download_user_name:{}", download_user_name) # Common.logger("follow").info("download_user_id:{}", download_user_id) # Common.logger("follow").info("download_head_url:{}", download_head_url) # Common.logger("follow").info("download_cover_url:{}", download_cover_url) # Common.logger("follow").info("download_video_url:{}", download_video_url) Common.logger("follow").info("正在判断第{}行,视频:{}", i, download_video_title) # 过滤空行 if download_video_id is None \ or download_video_id == ""\ or download_video_title is None \ or download_video_title == "": Common.logger("follow").warning("空行,删除") # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range("follow", "PamIy1", "ROWS", i + 1, i + 1) return # 去重 elif download_video_id in [j for m in Feishu.get_values_batch("follow", "caa3fa") for j in m]: Common.logger("follow").info("该视频已下载:{}", download_video_title) # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range("follow", "PamIy1", "ROWS", i + 1, i + 1) return else: Common.logger("follow").info("开始下载视频:{}", download_video_title) # 下载封面 Common.download_method(job="follow", text="cover", d_name=str(download_video_title), d_url=str(download_cover_url)) # 下载视频 Common.download_method(job="follow", text="video", d_name=str(download_video_title), d_url=str(download_video_url)) # 保存视频信息至 "./videos/{download_video_title}/info.txt" with open("./videos/" + download_video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a: f_a.write(str(download_video_id) + "\n" + str(download_video_title) + "\n" + str(download_video_duration) + "\n" + str(download_video_play_cnt) + "\n" + str(download_video_comment_cnt) + "\n" + str(download_video_like_cnt) + "\n" + str(download_video_share_cnt) + "\n" + str(download_video_resolution) + "\n" + str(int(time.mktime( time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" + str(download_user_name) + "\n" + str(download_head_url) + "\n" + str(download_video_url) + "\n" + str(download_cover_url) + "\n" + str(cls.wesee_access_token)) Common.logger("follow").info("==========视频信息已保存至info.txt==========") # 上传视频 Common.logger("follow").info("开始上传视频:{}".format(download_video_title)) Publish.upload_and_publish("follow", "prod", "play") # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa Common.logger("follow").info("保存视频ID至云文档:{}", download_video_title) # 视频ID工作表,插入首行 Feishu.insert_columns("follow", "caa3fa", "ROWS", 1, 2) # 视频ID工作表,首行写入数据 upload_time = int(time.time()) values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))), "关注榜", str(download_video_id), str(download_video_title), download_video_play_cnt, download_video_comment_cnt, download_video_like_cnt, download_video_share_cnt, download_video_duration, str(download_video_resolution), str(download_video_send_time), str(download_user_name), str(download_user_id), str(download_head_url), str(download_cover_url), str(download_video_url)]] time.sleep(1) Feishu.update_values("follow", "caa3fa", "A2:Q2", values) # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range("follow", "PamIy1", "ROWS", i + 1, i + 1) return except Exception as e: Common.logger("follow").error("下载/上传视频异常:{}", e) Feishu.dimension_range("follow", "PamIy1", "ROWS", 2, 2) if __name__ == "__main__": download_follow = DownloadFollow() download_follow.get_feeds() download_follow.download_publish()