# -*- coding: utf-8 -*- # @Author: wangkun # @Time: 2022/3/29 """ 从 微信小程序-快手短视频 中,下载符合规则的视频 """ import json import os import sys import time import requests import urllib3 sys.path.append(os.getcwd()) from main.common import Common from main.feishu_lib import Feishu from main.publish import Publish proxies = {"http": None, "https": None} class KuaiShou: # 配置微信号 wechat_sheet = Feishu.get_values_batch("recommend", "kuaishou", "WFF4jw") Referer = wechat_sheet[2][2] NS_sig3 = wechat_sheet[3][2] NS_sig3_origin = wechat_sheet[4][2] did = wechat_sheet[5][2] session_key = wechat_sheet[6][2] unionid = wechat_sheet[7][2] eUserStableOpenId = wechat_sheet[8][2] openId = wechat_sheet[9][2] eOpenUserId = wechat_sheet[10][2] kuaishou_wechat_app_st = wechat_sheet[11][2] passToken = wechat_sheet[12][2] userId = wechat_sheet[13][2] @classmethod def sensitive_words(cls): # 敏感词库列表 word_list = [] # 从云文档读取所有敏感词,添加到词库列表 lists = Feishu.get_values_batch("recommend", "kuaishou", "HIKVvs") for i in lists: for j in i: # 过滤空的单元格内容 if j is None: pass else: word_list.append(j) return word_list @staticmethod def kuaishou_download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt): """ 下载视频的基本规则 :param d_duration: 时长 :param d_width: 宽 :param d_height: 高 :param d_play_cnt: 播放量 :param d_like_cnt: 点赞量 :param d_share_cnt: 分享量 :return: 满足规则,返回 True;反之,返回 False """ if 600 >= int(float(d_duration)) >= 60: if int(d_width) >= 720 or int(d_height) >= 720: if int(d_play_cnt) >= 50000: if int(d_like_cnt) >= 50000: if int(d_share_cnt) >= 2000: return True else: return False else: return False else: return False return False return False @classmethod def get_feeds(cls, log_type): """ 1.从快手小程序首页推荐,获取视频列表 2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128 中去重 3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf 中去重 4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf """ url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/recommend" headers = { "content-type": "application/json", "Accept-Encoding": "gzip,compress,br,deflate", "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)' ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148' ' MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN', "Referer": str(cls.Referer), } params = { "__NS_sig3": str(cls.NS_sig3), "__NS_sig3_origin": str(cls.NS_sig3_origin) } cookies = { "did": str(cls.did), "preMinaVersion": "v3.109.0", "sid": "kuaishou.wechat.app", "appId": "ks_wechat_small_app_2", "clientid": "13", "client_key": "f60ac815", "kpn": "WECHAT_SMALL_APP", "kpf": "OUTSIDE_ANDROID_H5", "language": "zh_CN", "smallAppVersion": "v3.114.0", "session_key": str(cls.session_key), "unionid": str(cls.unionid), "eUserStableOpenId": str(cls.eUserStableOpenId), "openId": str(cls.openId), "eOpenUserId": str(cls.eOpenUserId), "kuaishou.wechat.app_st": str(cls.kuaishou_wechat_app_st), "passToken": str(cls.passToken), "userId": str(cls.userId) } json_data = { "count": 10, "portal": 1, "pageType": 2, "extraRequestInfo": "{\"scene\":1089,\"fid\":\"\",\"sharerUserId\":\"\",\"curPhotoIndex\":0," "\"adShow\":true,\"weChatAd\":{},\"headurl\":\"https://js2.a.kwimgs.com/udata/pkg" "/fe/profiel_icon_photo_normal@3x.fb3ec1af.png\",\"page\":0}", "needLivestream": True, "pcursor": 0, "sourceFrom": 2, "thirdPartyUserId": int(cls.userId) } try: urllib3.disable_warnings() r = requests.post(url=url, headers=headers, params=params, cookies=cookies, json=json_data, proxies=proxies, verify=False) response = json.loads(r.content.decode("utf8")) feeds = response["feeds"] for i in range(len(feeds)): # 视频标题过滤话题及处理特殊字符 kuaishou_title = feeds[i]["caption"] title_split1 = kuaishou_title.split(" #") if title_split1[0] != "": title1 = title_split1[0] else: title1 = title_split1[-1] title_split2 = title1.split(" #") if title_split2[0] != "": title2 = title_split2[0] else: title2 = title_split2[-1] title_split3 = title2.split("@") if title_split3[0] != "": title3 = title_split3[0] else: title3 = title_split3[-1] video_title = title3.strip().replace("\n", "") \ .replace("/", "").replace("快手", "").replace(" ", "") \ .replace(" ", "").replace("&NBSP", "").replace("\r", "") \ .replace("#", "").replace(".", "。").replace("\\", "") \ .replace(":", "").replace("*", "").replace("?", "") \ .replace("?", "").replace('"', "").replace("<", "") \ .replace(">", "").replace("|", "") if "photoId" not in feeds[i]: video_id = "0" else: video_id = feeds[i]["photoId"] if "viewCount" not in feeds[i]: video_play_cnt = "0" else: video_play_cnt = feeds[i]["viewCount"] if "likeCount" not in feeds[i]: video_like_cnt = "0" else: video_like_cnt = feeds[i]["likeCount"] if "shareCount" not in feeds[i]: video_share_cnt = "0" else: video_share_cnt = feeds[i]["shareCount"] if "commentCount" not in feeds[i]: video_comment_cnt = "0" else: video_comment_cnt = feeds[i]["commentCount"] if "duration" not in feeds[i]: video_duration = "0" else: video_duration = int(int(feeds[i]["duration"]) / 1000) if "width" not in feeds[i] or "height" not in feeds[i]: video_width = "0" video_height = "0" video_resolution = str(video_width) + "*" + str(video_height) else: video_width = feeds[i]["width"] video_height = feeds[i]["height"] video_resolution = str(video_width) + "*" + str(video_height) if "timestamp" not in feeds[i]: video_send_time = "0" else: video_send_time = feeds[i]["timestamp"] user_name = feeds[i]["userName"].strip().replace("\n", "") \ .replace("/", "").replace("快手", "").replace(" ", "") \ .replace(" ", "").replace("&NBSP", "").replace("\r", "") user_id = feeds[i]["userId"] if "headUrl" not in feeds[i]: head_url = "0" else: head_url = feeds[i]["headUrl"] if len(feeds[i]["coverUrls"]) == 0: cover_url = "0" else: cover_url = feeds[i]["coverUrls"][0]["url"] if len(feeds[i]["mainMvUrls"]) == 0: video_url = "0" else: video_url = feeds[i]["mainMvUrls"][0]["url"] Common.logger(log_type).info("video_title:{}".format(video_title)) Common.logger(log_type).info("user_name:{}".format(user_name)) Common.logger(log_type).info("video_id:{}".format(video_id)) Common.logger(log_type).info("video_play_cnt:{}".format(video_play_cnt)) Common.logger(log_type).info("video_like_cnt:{}".format(video_like_cnt)) Common.logger(log_type).info("video_share_cnt:{}".format(video_share_cnt)) # Common.logger(log_type).info("video_comment_cnt:{}".format(video_comment_cnt)) Common.logger(log_type).info("video_duration:{}秒".format(video_duration)) # Common.logger(log_type).info("video_resolution:{}".format(video_resolution)) Common.logger(log_type).info("video_send_time:{}".format( time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)))) # Common.logger(log_type).info("user_id:{}".format(user_id)) # Common.logger(log_type).info("head_url:{}".format(head_url)) # Common.logger(log_type).info("cover_url:{}".format(cover_url)) Common.logger(log_type).info("video_url:{}".format(video_url)) # 过滤无效视频 if video_id == "0" \ or head_url == "0" \ or cover_url == "0" \ or video_url == "0" \ or video_duration == "0" \ or video_send_time == "0" \ or user_name == "" \ or video_title == "": Common.logger(log_type).info("无效视频\n") # 判断敏感词 elif any(word if word in kuaishou_title else False for word in cls.sensitive_words()) is True: Common.logger(log_type).info("视频已中敏感词:{}\n".format(kuaishou_title)) # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128 elif video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3cd128") for j in m]: Common.logger(log_type).info("该视频已下载:{}\n", video_title) # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf elif video_id in [j for n in Feishu.get_values_batch(log_type, "kuaishou", "JK6npf") for j in n]: Common.logger(log_type).info("该视频已在feeds中:{}\n", video_title) else: # feeds工作表,插入首行 time.sleep(1) Feishu.insert_columns(log_type, "kuaishou", "JK6npf", "ROWS", 1, 2) # 获取当前时间 get_feeds_time = int(time.time()) # 看一看云文档,工作表 kanyikan_feeds_1 中写入数据 values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(get_feeds_time))), "推荐榜", video_id, video_title, video_play_cnt, video_comment_cnt, video_like_cnt, video_share_cnt, video_duration, video_resolution, time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000)), user_name, user_id, head_url, cover_url, video_url]] # 等待 1s,防止操作云文档太频繁,导致报错 time.sleep(1) Feishu.update_values(log_type, "kuaishou", "JK6npf", "A2:P2", values) Common.logger(log_type).info("视频:{}添加至feeds成功\n".format(video_title)) except Exception as e: # Feishu.bot(log_type, "recommend:get_feeds异常"+format(e)) Common.logger(log_type).error("获取视频 list 异常:{}\n".format(e)) @classmethod def download_publish(cls, log_type, env): """ 1.从 https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=JK6npf 中读取视频信息 2.下载并上传符合规则的视频 测试环境:env == dev 正式环境:env == prod """ try: recommend_feeds_sheet = Feishu.get_values_batch(log_type, "kuaishou", "JK6npf") for i in range(1, len(recommend_feeds_sheet) + 1): download_video_id = recommend_feeds_sheet[i][2] download_video_title = recommend_feeds_sheet[i][3] download_video_play_cnt = recommend_feeds_sheet[i][4] download_video_comment_cnt = recommend_feeds_sheet[i][5] download_video_like_cnt = recommend_feeds_sheet[i][6] download_video_share_cnt = recommend_feeds_sheet[i][7] download_video_duration = recommend_feeds_sheet[i][8] download_video_resolution = recommend_feeds_sheet[i][9] download_video_send_time = recommend_feeds_sheet[i][10] download_user_name = recommend_feeds_sheet[i][11] download_user_id = recommend_feeds_sheet[i][12] download_head_url = recommend_feeds_sheet[i][13] download_cover_url = recommend_feeds_sheet[i][14] download_video_url = recommend_feeds_sheet[i][15] Common.logger(log_type).info("正在判断第{}行,视频:{}", i + 1, download_video_title) # Common.logger(log_type).info("download_video_id:{}", download_video_id) # Common.logger(log_type).info("download_video_title:{}", download_video_title) # Common.logger(log_type).info("download_video_play_cnt:{}", download_video_play_cnt) # Common.logger(log_type).info("download_video_comment_cnt:{}", download_video_comment_cnt) # Common.logger(log_type).info("download_video_like_cnt:{}", download_video_like_cnt) # Common.logger(log_type).info("download_video_share_cnt:{}", download_video_share_cnt) # Common.logger(log_type).info("download_video_duration:{}", download_video_duration) # Common.logger(log_type).info("download_video_resolution:{}", download_video_resolution) # Common.logger(log_type).info("download_video_send_time:{}", download_video_send_time) # Common.logger(log_type).info("download_user_name:{}", download_user_name) # Common.logger(log_type).info("download_user_id:{}", download_user_id) # Common.logger(log_type).info("download_head_url:{}", download_head_url) # Common.logger(log_type).info("download_cover_url:{}", download_cover_url) # Common.logger(log_type).info("download_video_url:{}", download_video_url) # 过滤空行 if download_video_id is None or download_video_title is None or download_video_play_cnt is None: # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1) Common.logger(log_type).info("空行,删除成功\n") return # 去重 elif download_video_id in [j for m in Feishu.get_values_batch(log_type, "kuaishou", "3cd128") for j in m]: # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1) Common.logger(log_type).info("该视频已下载:{},删除成功\n", download_video_title) return # 下载规则 elif cls.kuaishou_download_rule( download_video_duration, download_video_resolution.split("*")[0], download_video_resolution.split("*")[-1], download_video_play_cnt, download_video_like_cnt, download_video_share_cnt) is True: # 下载封面 Common.download_method(log_type=log_type, text="cover", d_name=str(download_video_title), d_url=str(download_cover_url)) # 下载视频 Common.download_method(log_type=log_type, text="video", d_name=str(download_video_title), d_url=str(download_video_url)) # 保存视频信息至 "./videos/{download_video_title}/info.txt" with open("./videos/" + download_video_title + "/" + "info.txt", "a", encoding="UTF-8") as f_a: f_a.write(str(download_video_id) + "\n" + str(download_video_title) + "\n" + str(download_video_duration) + "\n" + str(download_video_play_cnt) + "\n" + str(download_video_comment_cnt) + "\n" + str(download_video_like_cnt) + "\n" + str(download_video_share_cnt) + "\n" + str(download_video_resolution) + "\n" + str(int(time.mktime( time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" + str(download_user_name) + "\n" + str(download_head_url) + "\n" + str(download_video_url) + "\n" + str(download_cover_url) + "\n" + str(cls.did)) Common.logger(log_type).info("==========视频信息已保存至info.txt==========") # 上传视频 Common.logger(log_type).info("开始上传视频:{}".format(download_video_title)) our_video_id = Publish.upload_and_publish(log_type, env, "play") our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(our_video_id) + "/info" Common.logger(log_type).info("视频上传完成:{}", download_video_title) # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcnp4SaJt37q6OOOrYzPMjQkg?sheet=3cd128 Common.logger(log_type).info("保存视频ID至云文档:{}", download_video_title) # 视频ID工作表,插入首行 Feishu.insert_columns(log_type, "kuaishou", "3cd128", "ROWS", 1, 2) # 视频ID工作表,首行写入数据 upload_time = int(time.time()) values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))), "推荐榜", str(download_video_id), str(download_video_title), our_video_link, download_video_play_cnt, download_video_comment_cnt, download_video_like_cnt, download_video_share_cnt, download_video_duration, str(download_video_resolution), str(download_video_send_time), str(download_user_name), str(download_user_id), str(download_head_url), str(download_cover_url), str(download_video_url)]] time.sleep(1) Feishu.update_values(log_type, "kuaishou", "3cd128", "F2:V2", values) # 删除行或列,可选 ROWS、COLUMNS time.sleep(1) Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1) Common.logger(log_type).info("从云文档删除该视频信息成功:{}\n", download_video_title) return else: # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", i + 1, i + 1) # 从云文档删除该视频信息:https://w42nne6hzg.feishu.cn/sheets/shtcngRPoDYAi24x52j2nDuHMih?sheet=JK6npf Common.logger(log_type).info("该视频不满足下载规则,删除在云文档中的信息:{}\n", download_video_title) return except Exception as e: # Feishu.bot(log_type, "recommend:download_publish异常" + format(e)) Common.logger(log_type).error("视频 info 异常,删除该视频信息\n", e) # 删除行或列,可选 ROWS、COLUMNS Feishu.dimension_range(log_type, "kuaishou", "JK6npf", "ROWS", 2, 2) return # 执行上传及下载 @classmethod def run_download_publish(cls, log_type, env): try: while True: if len(Feishu.get_values_batch(log_type, "kuaishou", "JK6npf")) == 1: break else: cls.download_publish(log_type, env) except Exception as e: Common.logger(log_type).error("执行下载/上传异常:{}", e) if __name__ == "__main__": kuaishou = KuaiShou() print(kuaishou.sensitive_words())