3 anos atrás · 609eda4c26
--- a/README.md
+++ b/README.md
@@ -1 +1,23 @@
 
				-快手和微视小程序的爬虫
			
 
				+# 微视小程序爬虫
			
 
				+
			
 
				+https://git.yishihui.com/Server/crawler_weishi.git
			
 
				+
			
 
				+
			
 
				+# 依赖库文件
			
 
				+python==3.10.0
			
 
				+loguru==0.6.0
			
 
				+oss2==2.15.0
			
 
				+requests==2.27.1
			
 
				+urllib3==1.26.9
			
 
				+
			
 
				+
			
 
				+# 执行入口
			
 
				+
			
 
				+cd ./crawler_weishi
			
 
				+
			
 
				+python3 main/run_xxx.py
			
 
				+
			
 
				+
			
 
				+# 需求
			
 
				+
			
 
				+2022/8/18 需求链接 https://w42nne6hzg.feishu.cn/docs/doccnbLbxAzkzwJJigo1ii5c3ih
			
--- a/main/download_kuaishou.py
+++ b/main/download_kuaishou.py
@@ -1,462 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2022/3/29
			
 
				-"""
			
 
				-从 微信小程序-快手短视频 中，下载符合规则的视频
			
 
				-"""
			
 
				-import json
			
 
				-import time
			
 
				-
			
 
				-import requests
			
 
				-import urllib3
			
 
				-from main.common import Common
			
 
				-from main.publish import Publish
			
 
				-
			
 
				-proxies = {"http": None, "https": None}
			
 
				-
			
 
				-
			
 
				-class KuaiShou:
			
 
				-    # 已下载视频列表
			
 
				-    download_video_list = []
			
 
				-
			
 
				-    @classmethod
			
 
				-    def kuaishou_sensitive_words(cls):
			
 
				-        sensitive_words = [
			
 
				-            "集结吧光合创作者",
			
 
				-            "电影解说",
			
 
				-            "快来露两手",
			
 
				-            "分享家常美食教程",
			
 
				-            "光合作者助手",
			
 
				-            "创作者中心",
			
 
				-            "创作者学院",
			
 
				-            "娱乐星熠计划",
			
 
				-            "解说电影",
			
 
				-            "电影剪辑",
			
 
				-            "放映室",
			
 
				-            "老剧",
			
 
				-            "影视剪辑",
			
 
				-            "精彩片段",
			
 
				-            "冬日影娱大作战",
			
 
				-            "春日追剧计划单",
			
 
				-            "影视解说",
			
 
				-            "中视频影视混剪计划",
			
 
				-            "众志成城共抗疫情",
			
 
				-            "我在追好剧",
			
 
				-            "娱乐星灿计划",
			
 
				-            "电影",
			
 
				-            "电视剧",
			
 
				-            "毛泽东",
			
 
				-            "毛主席",
			
 
				-            "周恩来",
			
 
				-            "林彪",
			
 
				-            "习近平",
			
 
				-            "习大大",
			
 
				-            "彭丽媛",
			
 
				-            "怀旧经典影视",
			
 
				-                           ]
			
 
				-        return sensitive_words
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def kuaishou_download_rule(d_duration, d_width, d_height,
			
 
				-                               d_play_cnt, d_like_cnt, d_share_cnt):
			
 
				-        """
			
 
				-        下载视频的基本规则
			
 
				-        :param d_duration: 时长
			
 
				-        :param d_width: 宽
			
 
				-        :param d_height: 高
			
 
				-        :param d_play_cnt: 播放量
			
 
				-        :param d_like_cnt: 点赞量
			
 
				-        :param d_share_cnt: 分享量
			
 
				-        :return: 满足规则，返回 True；反之，返回 False
			
 
				-        """
			
 
				-        if 600 >= int(float(d_duration)) >= 60:
			
 
				-            if int(d_width) >= 720 or int(d_height) >= 720:
			
 
				-                if int(d_play_cnt) >= 50000:
			
 
				-                    if int(d_like_cnt) >= 50000:
			
 
				-                        if int(d_share_cnt) >= 2000:
			
 
				-                            return True
			
 
				-                        else:
			
 
				-                            return False
			
 
				-                    else:
			
 
				-                        return False
			
 
				-                else:
			
 
				-                    return False
			
 
				-            return False
			
 
				-        return False
			
 
				-
			
 
				-    @classmethod
			
 
				-    def kuaishou_get_recommend(cls):
			
 
				-        """
			
 
				-        从快手小程序首页推荐获取视频list:
			
 
				-            1.在 kuaishou_videoid.txt 中去重
			
 
				-            2.在 kuaishou_feeds.txt 中去重
			
 
				-            3.添加视频信息到 kuaishou_feeds.txt
			
 
				-        """
			
 
				-        url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/recommend"
			
 
				-        params = {
			
 
				-            "__NS_sig3": "e6f6b281ea31e3d7d1bbb8b91f662576fc25f7c3a7a7a5a5aaaba8b2",
			
 
				-            "__NS_sig3_origin": "3sCt3iAAAAAAAAAAAAAAAwEQBv2b8ewCwkZKaiAAAAAPg0soi"
			
 
				-                                "e7GiOlU vF4zPrG1Nl6xvaoBgFd3MwTzOed9w=="
			
 
				-        }
			
 
				-        cookies = {
			
 
				-            "did": "wxo_05f915ac6b1deca87db36cea1a0fd18fae6c",
			
 
				-            "preMinaVersion": "v3.109.0",
			
 
				-            "sid": "kuaishou.wechat.app",
			
 
				-            "appId": "ks_wechat_small_app_2",
			
 
				-            "clientid": "13",
			
 
				-            "client_key": "f60ac815",
			
 
				-            "kpn": "WECHAT_SMALL_APP",
			
 
				-            "kpf": "OUTSIDE_ANDROID_H5",
			
 
				-            "language": "zh_CN",
			
 
				-            "smallAppVersion": "v3.109.0",
			
 
				-            "session_key": "123005bcc551a92aac29cdb96190251c9f492c29d4ba6c502dc"
			
 
				-                           "0d2f8b8d18df356a2f7a22d6924d1dd34b8554a64af49b1bb1a"
			
 
				-                           "1236cd2f69c25d4ac2a2531ebcd28c179da14b222023f9e111c"
			
 
				-                           "c4d3b064ac7b0915d8c9fdaccb59e4048e96a5c38a32b2ce9f4abf628053001",
			
 
				-            "unionid": "V2:1230b56c8337908c3eecba63142a58daca05535c1f14bf67d3d8"
			
 
				-                       "85cace91a7db335c5572d204762d075f24aa84412e2955711a12bb9"
			
 
				-                       "2bd9c2290489ba7a733708a4a446de83822205ab727650489dda0db"
			
 
				-                       "9d2a226c5ddb66d88a1f1373283a3d3b959611d816660028053001",
			
 
				-            "eUserStableOpenId": "12303325e8710eb802137c70fd1fb65997a4e5e33d82"
			
 
				-                                 "cddd409d335d096e20873e07ee472090133bc7a67e5c"
			
 
				-                                 "749da045d9a31a12da4c4c26181d432b873ec39432f4"
			
 
				-                                 "10196c6c2220323d0e6b562d1b3786aefb352b4e509c"
			
 
				-                                 "d96f3466b7b2e5e74b904a94c40792d928053001",
			
 
				-            "openId": "o5otV45DcV1EUsWw4fAUk_iq0YSA",
			
 
				-            "eOpenUserId": "124074b7726c996283f25044a42e2c7427e929cd6d968c5342"
			
 
				-                           "330e61fc8939e57b0da4ffe21887f3abc8784175f73e1a267d"
			
 
				-                           "671247273806f293f64c9c8c2adc00a21a12bb92bd9c229048"
			
 
				-                           "9ba7a733708a4a446de8382220534aa79c69b74866bb09187e"
			
 
				-                           "eceec880fa1e0fa421b7df8b3289dab603b17c4828053001",
			
 
				-            "kuaishou.wechat.app_st": "ChZrdWFpc2hvdS53ZWNoYXQuYXBwLnN0ErAB8aO"
			
 
				-                                      "EcB6jh4CMSJ-p_4BJFCId0PKNa_5IeFfeV_tj7q"
			
 
				-                                      "CjdXK0y13CSte6-KHbNK9BPo6Rjy3OGny0sh4Zb"
			
 
				-                                      "5AUl3Q_zqVXe2TunW7_F3nlTdJOdZ6iVIhPrHa1"
			
 
				-                                      "CM0Y-cG9gS4FDDzTvejfWaTI0CbjfNN0RZXzYVE"
			
 
				-                                      "AUVT_BNgUVDtYBbEY792gPylMfXxwxKMSzkhaDe"
			
 
				-                                      "eaHkGCWUj62FGCFYQ9Fw2W3d7suCXFsNylqT4aE"
			
 
				-                                      "s8oNwmycUiygfvfKuoXlHkbeSIgOhEFMZ3ArImS"
			
 
				-                                      "vFY_OwLJDHak1iXRO8g5TwzHTvBT3WcoBTAB",
			
 
				-            "passToken": "ChNwYXNzcG9ydC5wYXNzLXRva2VuEpABI42IhPCJHfFngXC3i-vF"
			
 
				-                         "3daRTB-EtnAYyE6HpfWcPoZ6VSRDvKrom_RvltQ2zKk1T3_FJteb"
			
 
				-                         "mv7ZzQLD7IicnTypaGoeflb7KQVrAv50Mp_JL4ObfBu_xTiwI53t"
			
 
				-                         "bTlM6iML0G7DFd16K5z0jZZ1xECKVQQbk_vIqnseUujFIWAsKcDz"
			
 
				-                         "BqqfnQNbUU5DzDUkGhKgKyzmNjRDxLfpDU5SPFhJmG0iIGBZ_Vd-"
			
 
				-                         "7eT8i_Xit9ZPM-zdFpnRZFveFE9iplMg8Z06KAUwAQ",
			
 
				-            "userId": "2845397958"
			
 
				-        }
			
 
				-        json_data = {
			
 
				-            "thirdPartyUserId": 2845397958,
			
 
				-            "photoId": "5250352807040393911",
			
 
				-            "forwardUserId": 2845397958,
			
 
				-            "count": 10,
			
 
				-            "portal": 2,
			
 
				-            "pageType": 2,
			
 
				-            "needLivestream": "true",
			
 
				-            "extraRequestInfo": "{\"scene\":1074,\"fid\":\"2845397958\","
			
 
				-                                "\"sharerUserId\":\"2845397958\",\"curPhotoIndex\":0,"
			
 
				-                                "\"adShow\":true,\"weChatAd\":{},\"page\":0}",
			
 
				-            "pcursor": 0,
			
 
				-            "sourceFrom": 2,
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            urllib3.disable_warnings()
			
 
				-            r = requests.post(url=url, params=params, cookies=cookies, json=json_data, proxies=proxies, verify=False)
			
 
				-            response = json.loads(r.content.decode("utf8"))
			
 
				-            if "feeds" not in response:
			
 
				-                Common.crawler_log().info("获取快手视频 list 出错:{}，休眠 10s".format(response))
			
 
				-                time.sleep(10)
			
 
				-            else:
			
 
				-                feeds = response["feeds"]
			
 
				-                for i in range(len(feeds)):
			
 
				-                    if "photoId" not in feeds[i]:
			
 
				-                        photo_id = "0"
			
 
				-                        Common.crawler_log().info("photo_id:{}".format(photo_id))
			
 
				-                    else:
			
 
				-                        photo_id = feeds[i]["photoId"]
			
 
				-                        Common.crawler_log().info("photo_id:{}".format(photo_id))
			
 
				-
			
 
				-                    if "viewCount" not in feeds[i]:
			
 
				-                        video_play_cnt = "0"
			
 
				-                        Common.crawler_log().info("video_play_cnt:0")
			
 
				-                    else:
			
 
				-                        video_play_cnt = feeds[i]["viewCount"]
			
 
				-                        Common.crawler_log().info("video_play_cnt:{}".format(video_play_cnt))
			
 
				-
			
 
				-                    if "likeCount" not in feeds[i]:
			
 
				-                        video_like_cnt = "0"
			
 
				-                        Common.crawler_log().info("video_like_cnt:0")
			
 
				-                    else:
			
 
				-                        video_like_cnt = feeds[i]["likeCount"]
			
 
				-                        Common.crawler_log().info("video_like_cnt:{}".format(video_like_cnt))
			
 
				-
			
 
				-                    if "headUrl" not in feeds[i]:
			
 
				-                        head_url = "0"
			
 
				-                        Common.crawler_log().info("head_url:不存在")
			
 
				-                    else:
			
 
				-                        head_url = feeds[i]["headUrl"]
			
 
				-                        Common.crawler_log().info("head_url:{}".format(head_url))
			
 
				-
			
 
				-                    if len(feeds[i]["coverUrls"]) == 0:
			
 
				-                        cover_url = "0"
			
 
				-                        Common.crawler_log().info("cover_url:不存在")
			
 
				-                    else:
			
 
				-                        cover_url = feeds[i]["coverUrls"][0]["url"]
			
 
				-                        Common.crawler_log().info("cover_url:{}".format(cover_url))
			
 
				-
			
 
				-                    if len(feeds[i]["mainMvUrls"]) == 0:
			
 
				-                        video_url = "0"
			
 
				-                        Common.crawler_log().info("video_url:不存在")
			
 
				-                    else:
			
 
				-                        video_url = feeds[i]["mainMvUrls"][0]["url"]
			
 
				-                        Common.crawler_log().info("video_url:{}".format(video_url))
			
 
				-
			
 
				-                    if "shareCount" not in feeds[i]:
			
 
				-                        video_share_cnt = "0"
			
 
				-                        Common.crawler_log().info("video_share_cnt:0")
			
 
				-                    else:
			
 
				-                        video_share_cnt = feeds[i]["shareCount"]
			
 
				-                        Common.crawler_log().info("video_share_cnt:{}".format(video_share_cnt))
			
 
				-
			
 
				-                    if "width" not in feeds[i] or "height"not in feeds[i]:
			
 
				-                        video_width = "0"
			
 
				-                        video_height = "0"
			
 
				-                        video_resolution = str(video_width) + "*" + str(video_height)
			
 
				-                        Common.crawler_log().info("无分辨率")
			
 
				-                    else:
			
 
				-                        video_width = feeds[i]["width"]
			
 
				-                        video_height = feeds[i]["height"]
			
 
				-                        video_resolution = str(video_width) + "*" + str(video_height)
			
 
				-                        Common.crawler_log().info("video_resolution:{}".format(video_resolution))
			
 
				-
			
 
				-                    if "commentCount" not in feeds[i]:
			
 
				-                        video_comment_cnt = "0"
			
 
				-                        Common.crawler_log().info("video_comment_cnt:0")
			
 
				-                    else:
			
 
				-                        video_comment_cnt = feeds[i]["commentCount"]
			
 
				-                        Common.crawler_log().info("video_comment_cnt:{}".format(video_comment_cnt))
			
 
				-
			
 
				-                    if "duration" not in feeds[i]:
			
 
				-                        video_duration = "0"
			
 
				-                        Common.crawler_log().info("video_duration:不存在")
			
 
				-                    else:
			
 
				-                        video_duration = int(int(feeds[i]["duration"])/1000)
			
 
				-                        Common.crawler_log().info("video_duration:{}秒".format(video_duration))
			
 
				-
			
 
				-                    if "timestamp" not in feeds[i]:
			
 
				-                        video_send_time = "0"
			
 
				-                        Common.crawler_log().info("video_send_time:不存在")
			
 
				-                    else:
			
 
				-                        video_send_time = feeds[i]["timestamp"]
			
 
				-                        Common.crawler_log().info("video_send_time:{}".format(
			
 
				-                            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000))))
			
 
				-
			
 
				-                    user_name = feeds[i]["userName"].strip().replace("\n", "")\
			
 
				-                        .replace("/", "").replace("快手", "").replace(" ", "")\
			
 
				-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				-                    Common.crawler_log().info("user_name:{}".format(user_name))
			
 
				-
			
 
				-                    user_id = feeds[i]["userId"]
			
 
				-                    Common.crawler_log().info("user_id:{}".format(user_id))
			
 
				-
			
 
				-                    # 视频标题过滤话题及处理特殊字符
			
 
				-                    kuaishou_title = feeds[i]["caption"]
			
 
				-                    title_split1 = kuaishou_title.split(" #")
			
 
				-                    if title_split1[0] != "":
			
 
				-                        title1 = title_split1[0]
			
 
				-                    else:
			
 
				-                        title1 = title_split1[-1]
			
 
				-
			
 
				-                    title_split2 = title1.split(" #")
			
 
				-                    if title_split2[0] != "":
			
 
				-                        title2 = title_split2[0]
			
 
				-                    else:
			
 
				-                        title2 = title_split2[-1]
			
 
				-
			
 
				-                    title_split3 = title2.split("@")
			
 
				-                    if title_split3[0] != "":
			
 
				-                        title3 = title_split3[0]
			
 
				-                    else:
			
 
				-                        title3 = title_split3[-1]
			
 
				-
			
 
				-                    video_title = title3.strip().replace("\n", "")\
			
 
				-                        .replace("/", "").replace("快手", "").replace(" ", "")\
			
 
				-                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")\
			
 
				-                        .replace("#", "").replace(".", "。").replace("\\", "")\
			
 
				-                        .replace(":", "").replace("*", "").replace("？", "")\
			
 
				-                        .replace("?", "").replace('"', "").replace("<", "")\
			
 
				-                        .replace(">", "").replace("|", "")
			
 
				-
			
 
				-                    Common.crawler_log().info("video_title:{}".format(video_title))
			
 
				-
			
 
				-                    # 从 kuaishou_videoid.txt 中去重
			
 
				-                    photo_ids = Common.read_txt("kuaishou_videoid.txt")
			
 
				-                    if photo_id in [p_id.strip() for p_id in photo_ids]:
			
 
				-                        Common.crawler_log().info("该视频已下载:{}".format(video_title))
			
 
				-                        pass
			
 
				-                    else:
			
 
				-                        Common.crawler_log().info("该视频未下载:{}".format(video_title))
			
 
				-
			
 
				-                        # 从 kuaishou_feeds.txt 中去重
			
 
				-                        contents = Common.read_txt("kuaishou_feeds.txt")
			
 
				-                        # kuaishou_feeds.txt 为空时，直接保存
			
 
				-                        if len(contents) == 0 and head_url != "0" \
			
 
				-                                and cover_url != "0" and video_url != "0" \
			
 
				-                                and video_duration != "0" and photo_id != "0":
			
 
				-                            # 判断敏感词
			
 
				-                            if any(word if word in kuaishou_title else False
			
 
				-                                   for word in cls.kuaishou_sensitive_words()) is True:
			
 
				-                                Common.crawler_log().info("视频已中敏感词：{}".format(kuaishou_title))
			
 
				-                            else:
			
 
				-                                basic_time = int(time.time())
			
 
				-                                Common.crawler_log().info("添加视频信息至kuaishou_feeds.txt:{}".format(video_title))
			
 
				-                                with open(r"./txt/kuaishou_feeds.txt", "a", encoding="UTF-8") as f_a:
			
 
				-                                    f_a.write(str(basic_time) + " + " +
			
 
				-                                              str(photo_id) + " + " +
			
 
				-                                              str(video_play_cnt) + " + " +
			
 
				-                                              str(video_title) + " + " +
			
 
				-                                              str(video_duration) + " + " +
			
 
				-                                              str(video_comment_cnt) + " + " +
			
 
				-                                              str(video_like_cnt) + " + " +
			
 
				-                                              str(video_share_cnt) + " + " +
			
 
				-                                              str(video_resolution) + " + " +
			
 
				-                                              str(video_send_time) + " + " +
			
 
				-                                              str(user_name) + " + " +
			
 
				-                                              str(head_url) + " + " +
			
 
				-                                              str(cover_url) + " + " +
			
 
				-                                              str(video_url) + " + " +
			
 
				-                                              str(user_id) + " + " +
			
 
				-                                              str("wxo_b07ba02ad4340205d89b47c76030bb090977") + "\n")
			
 
				-                        else:
			
 
				-                            if photo_id in [content.split(" + ")[1] for content in contents]:
			
 
				-                                Common.crawler_log().info("该视频已在 kuaishou_feeds.txt 中:{}".format(video_title))
			
 
				-                            elif head_url == "0" or cover_url == "0" \
			
 
				-                                    or video_url == "0" or video_duration == "0" or photo_id == "0":
			
 
				-                                Common.crawler_log().info("视频封面/播放地址/播放时长/用户头像不存在")
			
 
				-                            else:
			
 
				-                                # 判断敏感词
			
 
				-                                if any(word if word in kuaishou_title else False
			
 
				-                                       for word in cls.kuaishou_sensitive_words()) is True:
			
 
				-                                    Common.crawler_log().info("视频已中敏感词：{}".format(kuaishou_title))
			
 
				-                                else:
			
 
				-                                    basic_time = int(time.time())
			
 
				-                                    Common.crawler_log().info("添加视频信息至kuaishou_feeds.txt:{}".format(video_title))
			
 
				-                                    with open(r"./txt/kuaishou_feeds.txt", "a", encoding="UTF-8") as f_a:
			
 
				-                                        f_a.write(str(basic_time) + " + " +
			
 
				-                                                  str(photo_id) + " + " +
			
 
				-                                                  str(video_play_cnt) + " + " +
			
 
				-                                                  str(video_title) + " + " +
			
 
				-                                                  str(video_duration) + " + " +
			
 
				-                                                  str(video_comment_cnt) + " + " +
			
 
				-                                                  str(video_like_cnt) + " + " +
			
 
				-                                                  str(video_share_cnt) + " + " +
			
 
				-                                                  str(video_resolution) + " + " +
			
 
				-                                                  str(video_send_time) + " + " +
			
 
				-                                                  str(user_name) + " + " +
			
 
				-                                                  str(head_url) + " + " +
			
 
				-                                                  str(cover_url) + " + " +
			
 
				-                                                  str(video_url) + " + " +
			
 
				-                                                  str(user_id) + " + " +
			
 
				-                                                  str("wxo_b07ba02ad4340205d89b47c76030bb090977") + "\n")
			
 
				-        except Exception as e:
			
 
				-            Common.crawler_log().error("获取视频 list 异常:{}".format(e))
			
 
				-
			
 
				-    @classmethod
			
 
				-    def kuaishou_download_play_video(cls, env):
			
 
				-        """
			
 
				-        下载播放量视频
			
 
				-        测试环境:env == dev
			
 
				-        正式环境:env == prod
			
 
				-        """
			
 
				-        videos = Common.read_txt("kuaishou_feeds.txt")
			
 
				-        for video in videos:
			
 
				-            download_photo_id = video.strip().split(" + ")[1]
			
 
				-            download_video_title = video.strip().split(" + ")[3]
			
 
				-            download_video_duration = video.strip().split(" + ")[4]
			
 
				-            download_video_play_cnt = video.strip().split(" + ")[2]
			
 
				-            download_video_comment_cnt = video.strip().split(" + ")[5]
			
 
				-            download_video_like_cnt = video.strip().split(" + ")[6]
			
 
				-            download_video_share_cnt = video.strip().split(" + ")[7]
			
 
				-            download_video_resolution = video.strip().split(" + ")[8]
			
 
				-            download_video_width = download_video_resolution.split("*")[0]
			
 
				-            download_video_height = download_video_resolution.split("*")[-1]
			
 
				-            download_video_send_time = video.strip().split(" + ")[9]
			
 
				-            download_user_name = video.strip().split(" + ")[10]
			
 
				-            download_head_url = video.strip().split(" + ")[11]
			
 
				-            download_cover_url = video.strip().split(" + ")[12]
			
 
				-            download_video_url = video.strip().split(" + ")[13]
			
 
				-            download_video_session = video.strip().split(" + ")[-1]
			
 
				-
			
 
				-            if cls.kuaishou_download_rule(download_video_duration,
			
 
				-                                          download_video_width,
			
 
				-                                          download_video_height,
			
 
				-                                          download_video_play_cnt,
			
 
				-                                          download_video_like_cnt,
			
 
				-                                          download_video_share_cnt) is True:
			
 
				-                Common.crawler_log().info("开始下载快手视频:{}".format(download_video_title))
			
 
				-                # 下载封面
			
 
				-                Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
			
 
				-                # 下载视频
			
 
				-                Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
			
 
				-
			
 
				-                # 保存视频信息至 kuaishou_videoid.txt
			
 
				-                with open(r"./txt/kuaishou_videoid.txt", "a", encoding="UTF-8") as fa:
			
 
				-                    fa.write(download_photo_id + "\n")
			
 
				-
			
 
				-                # 添加视频 ID 到 list，用于统计当次下载总数
			
 
				-                cls.download_video_list.append(download_photo_id)
			
 
				-
			
 
				-                # # 保存视频信息至 {today}_kuaishou_videoid.txt
			
 
				-                # with open("./txt/" + str(Common.today) + "_kuaishou_videoid.txt", "a", encoding="UTF-8") as fc:
			
 
				-                #     fc.write(download_photo_id + "\n")
			
 
				-
			
 
				-                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
			
 
				-                with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
			
 
				-                    f_a.write(str(download_photo_id) + "\n" +
			
 
				-                              str(download_video_title) + "\n" +
			
 
				-                              str(download_video_duration) + "\n" +
			
 
				-                              str(download_video_play_cnt) + "\n" +
			
 
				-                              str(download_video_comment_cnt) + "\n" +
			
 
				-                              str(download_video_like_cnt) + "\n" +
			
 
				-                              str(download_video_share_cnt) + "\n" +
			
 
				-                              str(download_video_resolution) + "\n" +
			
 
				-                              str(download_video_send_time) + "\n" +
			
 
				-                              str(download_user_name) + "\n" +
			
 
				-                              str(download_head_url) + "\n" +
			
 
				-                              str(download_video_url) + "\n" +
			
 
				-                              str(download_cover_url) + "\n" +
			
 
				-                              str(download_video_session))
			
 
				-
			
 
				-                # 上传视频
			
 
				-                if env == "dev":
			
 
				-                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				-                    Publish.upload_and_publish("dev", "play")
			
 
				-                elif env == "prod":
			
 
				-                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				-                    Publish.upload_and_publish("prod", "play")
			
 
				-
			
 
				-                # 删除该视频在kuaishou_feeds.txt中的信息
			
 
				-                Common.crawler_log().info("删除该视频在kuaishou_feeds.txt中的信息:{}".format(download_video_title))
			
 
				-                with open(r"./txt/kuaishou_feeds.txt", "r", encoding="UTF-8") as f_r:
			
 
				-                    lines = f_r.readlines()
			
 
				-                with open(r"./txt/kuaishou_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				-                    for line in lines:
			
 
				-                        if download_photo_id in line.split(" + ")[1]:
			
 
				-                            continue
			
 
				-                        f_w.write(line)
			
 
				-            else:
			
 
				-                # 删除该视频在 recommend.txt中的信息
			
 
				-                Common.crawler_log().info("该视频不满足下载规则，删除在kuaishou_feeds.txt中的信息:{}".format(download_video_title))
			
 
				-                with open(r"./txt/kuaishou_feeds.txt", "r", encoding="UTF-8") as f_r:
			
 
				-                    lines = f_r.readlines()
			
 
				-                with open(r"./txt/kuaishou_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				-                    for line in lines:
			
 
				-                        if download_photo_id in line.split(" + ")[1]:
			
 
				-                            continue
			
 
				-                        f_w.write(line)
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    kuaishou = KuaiShou()
			
 
				-    kuaishou.kuaishou_get_recommend()
			
--- a/main/download_weishi.py
+++ b/main/download_weishi.py
@@ -1,346 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2022/4/8
			
 
				-import json
			
 
				-import time
			
 
				-
			
 
				-import requests
			
 
				-import urllib3
			
 
				-from main.common import Common
			
 
				-from main.publish import Publish
			
 
				-
			
 
				-proxies = {"http": None, "https": None}
			
 
				-
			
 
				-
			
 
				-class Weishi:
			
 
				-    @staticmethod
			
 
				-    def weishi_download_rule(d_duration, d_width, d_height, d_play_cnt):
			
 
				-        """
			
 
				-        下载视频的基本规则
			
 
				-        :param d_duration: 时长
			
 
				-        :param d_width: 宽
			
 
				-        :param d_height: 高
			
 
				-        :param d_play_cnt: 播放量
			
 
				-        :return: 满足规则，返回 True；反之，返回 False
			
 
				-        """
			
 
				-        if 600 >= int(float(d_duration)) >= 60:
			
 
				-            if int(d_width) >= 720 or int(d_height) >= 720:
			
 
				-                if int(d_play_cnt) >= 100000:
			
 
				-                    return True
			
 
				-                else:
			
 
				-                    return False
			
 
				-            return False
			
 
				-        return False
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_weishi_recommend(cls):
			
 
				-        """
			
 
				-        从微视小程序首页推荐获取视频list:
			
 
				-            1.在 weishi_videoid.txt 中去重
			
 
				-            2.在 weishi_feeds.txt 中去重
			
 
				-            3.添加视频信息到 weishi_feeds.txt
			
 
				-        """
			
 
				-        url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
			
 
				-        cookies = {
			
 
				-            "wesee_authtype": "3",
			
 
				-            "wesee_openid": "oWGa05FrwkuUvT-4n1qGeQuhVsc8",
			
 
				-            "wesee_openkey": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf189e2a5c1d532eeff172bc21cf2"
			
 
				-                             "6230941ccbc10243a7879e8165ca608c17060de606a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
			
 
				-            "wesee_personid": "1593522421826902",
			
 
				-            "wesee_refresh_token": "",
			
 
				-            "wesee_access_token": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf18"
			
 
				-                                  "9e2a5c1d532eeff172bc21cf26230941ccbc10243a7879e8165ca608c17060de6"
			
 
				-                                  "06a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
			
 
				-            "wesee_thr_appid": "wx75ee9f19b93e5c46",
			
 
				-            "wesee_ichid": "8"
			
 
				-        }
			
 
				-        json_data = {
			
 
				-            "req_body": {
			
 
				-                "requestType": 16,
			
 
				-                "isrefresh": 0,
			
 
				-                "isfirst": 0,
			
 
				-                "attachInfo": "",
			
 
				-                "scene_id": 22,
			
 
				-                "requestExt": {
			
 
				-                    "mini_openid": "oWGa05FrwkuUvT-4n1qGeQuhVsc8",
			
 
				-                    "notLogin-personid": "1593522421826902"
			
 
				-                }
			
 
				-            },
			
 
				-            "req_header": {
			
 
				-                "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
			
 
				-            }
			
 
				-        }
			
 
				-
			
 
				-        try:
			
 
				-            urllib3.disable_warnings()
			
 
				-            r = requests.post(url=url, cookies=cookies, json=json_data, proxies=proxies, verify=False)
			
 
				-            response = json.loads(r.content.decode("utf8"))
			
 
				-            if "rsp_body" not in response:
			
 
				-                Common.crawler_log().info("获取微视视频 list 出错:{}，休眠 10s".format(response))
			
 
				-                time.sleep(10)
			
 
				-            else:
			
 
				-                feeds = response["rsp_body"]["feeds"]
			
 
				-                for i in range(len(feeds)):
			
 
				-                    if "video" not in feeds[i]:
			
 
				-                        Common.crawler_log().info("无视频信息")
			
 
				-                    else:
			
 
				-                        # 视频 ID
			
 
				-                        if "id" not in feeds[i]["video"]:
			
 
				-                            video_id = "0"
			
 
				-                            Common.crawler_log().info("video_id:{}".format(video_id))
			
 
				-                        else:
			
 
				-                            video_id = feeds[i]["video"]["id"]
			
 
				-                            Common.crawler_log().info("video_id:{}".format(video_id))
			
 
				-
			
 
				-                        # 视频标题
			
 
				-                        video_title = feeds[i]["desc"].strip().replace("\n", "") \
			
 
				-                            .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				-                        Common.crawler_log().info("video_title:{}".format(video_title))
			
 
				-
			
 
				-                        # 视频发布时间
			
 
				-                        if "createTime" not in feeds[i]:
			
 
				-                            video_send_time = "0"
			
 
				-                            Common.crawler_log().info("video_send_time:不存在")
			
 
				-                        else:
			
 
				-                            video_send_time = int(feeds[i]["createTime"])*1000
			
 
				-                            Common.crawler_log().info(
			
 
				-                                "video_send_time:{}".format(time.strftime(
			
 
				-                                    "%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000))))
			
 
				-
			
 
				-                        # 视频封面地址
			
 
				-                        if len(feeds[i]["images"]) == 0:
			
 
				-                            cover_url = "0"
			
 
				-                            Common.crawler_log().info("cover_url:不存在")
			
 
				-                        else:
			
 
				-                            cover_url = feeds[i]["images"][0]["url"]
			
 
				-                            Common.crawler_log().info("cover_url:{}".format(cover_url))
			
 
				-
			
 
				-                        # 视频播放地址
			
 
				-                        if "url" not in feeds[i]["video"]:
			
 
				-                            video_url = "0"
			
 
				-                            Common.crawler_log().info("video_url:不存在")
			
 
				-                        else:
			
 
				-                            video_url = feeds[i]["video"]["url"]
			
 
				-                            Common.crawler_log().info("video_url:{}".format(video_url))
			
 
				-
			
 
				-                        # 视频分辨率
			
 
				-                        if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
			
 
				-                            video_width = "0"
			
 
				-                            video_height = "0"
			
 
				-                            video_resolution = str(video_width) + "*" + str(video_height)
			
 
				-                            Common.crawler_log().info("无分辨率")
			
 
				-                        else:
			
 
				-                            video_width = feeds[i]["video"]["width"]
			
 
				-                            video_height = feeds[i]["video"]["height"]
			
 
				-                            video_resolution = str(video_width) + "*" + str(video_height)
			
 
				-                            Common.crawler_log().info("video_resolution:{}".format(video_resolution))
			
 
				-
			
 
				-                        # 视频时长
			
 
				-                        if "duration" not in feeds[i]["video"]:
			
 
				-                            video_duration = "0"
			
 
				-                            Common.crawler_log().info("video_duration:不存在")
			
 
				-                        else:
			
 
				-                            video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
			
 
				-                            Common.crawler_log().info("video_duration:{}秒".format(video_duration))
			
 
				-
			
 
				-                        # 播放数
			
 
				-                        if "playNum" not in feeds[i]["ugcData"]:
			
 
				-                            video_play_cnt = "0"
			
 
				-                            Common.crawler_log().info("video_play_cnt:{}".format(video_play_cnt))
			
 
				-                        else:
			
 
				-                            video_play_cnt = feeds[i]["ugcData"]["playNum"]
			
 
				-                            Common.crawler_log().info("video_play_cnt:{}".format(video_play_cnt))
			
 
				-
			
 
				-                        # 点赞数
			
 
				-                        if "dingCount" not in feeds[i]["ugcData"]:
			
 
				-                            video_like_cnt = "0"
			
 
				-                            Common.crawler_log().info("video_like_cnt:{}".format(video_like_cnt))
			
 
				-                        else:
			
 
				-                            video_like_cnt = feeds[i]["ugcData"]["dingCount"]
			
 
				-                            Common.crawler_log().info("video_like_cnt:{}".format(video_like_cnt))
			
 
				-
			
 
				-                        # 分享数
			
 
				-                        if "shareNum" not in feeds[i]["ugcData"]:
			
 
				-                            video_share_cnt = "0"
			
 
				-                            Common.crawler_log().info("video_share_cnt:{}".format(video_share_cnt))
			
 
				-                        else:
			
 
				-                            video_share_cnt = feeds[i]["ugcData"]["shareNum"]
			
 
				-                            Common.crawler_log().info("video_share_cnt:{}".format(video_share_cnt))
			
 
				-
			
 
				-                        # 评论数
			
 
				-                        if "totalCommentNum" not in feeds[i]["ugcData"]:
			
 
				-                            video_comment_cnt = "0"
			
 
				-                            Common.crawler_log().info("video_comment_cnt:{}".format(video_comment_cnt))
			
 
				-                        else:
			
 
				-                            video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
			
 
				-                            Common.crawler_log().info("video_comment_cnt:{}".format(video_comment_cnt))
			
 
				-
			
 
				-                        # 用户 ID
			
 
				-                        user_id = feeds[i]["poster"]["id"]
			
 
				-                        Common.crawler_log().info("user_id:{}".format(user_id))
			
 
				-
			
 
				-                        # 用户昵称
			
 
				-                        user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
			
 
				-                            .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				-                        Common.crawler_log().info("user_name:{}".format(user_name))
			
 
				-
			
 
				-                        # 用户头像地址
			
 
				-                        if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
			
 
				-                            head_url = "0"
			
 
				-                            Common.crawler_log().info("head_url:不存在")
			
 
				-                        elif "thumbURL" in feeds[i]["material"]:
			
 
				-                            head_url = feeds[i]["material"]["thumbURL"]
			
 
				-                            Common.crawler_log().info("head_url:{}".format(head_url))
			
 
				-                        else:
			
 
				-                            head_url = feeds[i]["poster"]["avatar"]
			
 
				-                            Common.crawler_log().info("head_url:{}".format(head_url))
			
 
				-
			
 
				-                        # 从 weishi_videoid.txt 中去重
			
 
				-                        videos_ids = Common.read_txt("weishi_videoid.txt")
			
 
				-                        if video_id in [v_id.strip() for v_id in videos_ids]:
			
 
				-                            Common.crawler_log().info("该视频已下载:{}".format(video_title))
			
 
				-                            pass
			
 
				-                        else:
			
 
				-                            Common.crawler_log().info("该视频未下载:{}".format(video_title))
			
 
				-
			
 
				-                            # 从 weishi_feeds.txt 中去重
			
 
				-                            contents = Common.read_txt("weishi_feeds.txt")
			
 
				-                            # 若 weishi_feeds.txt 为空时，直接保存
			
 
				-                            if len(contents) == 0 and head_url != "0" \
			
 
				-                                    and cover_url != "0" and video_url != "0" \
			
 
				-                                    and video_duration != "0" and video_id != "0":
			
 
				-                                basic_time = int(time.time())
			
 
				-                                Common.crawler_log().info("添加视频信息至weishi_feeds.txt:{}".format(video_title))
			
 
				-                                with open(r"./txt/weishi_feeds.txt", "a", encoding="UTF-8") as f_a:
			
 
				-                                    f_a.write(str(basic_time) + " + " +
			
 
				-                                              str(video_id) + " + " +
			
 
				-                                              str(video_play_cnt) + " + " +
			
 
				-                                              str(video_title) + " + " +
			
 
				-                                              str(video_duration) + " + " +
			
 
				-                                              str(video_comment_cnt) + " + " +
			
 
				-                                              str(video_like_cnt) + " + " +
			
 
				-                                              str(video_share_cnt) + " + " +
			
 
				-                                              str(video_resolution) + " + " +
			
 
				-                                              str(video_send_time) + " + " +
			
 
				-                                              str(user_name) + " + " +
			
 
				-                                              str(head_url) + " + " +
			
 
				-                                              str(cover_url) + " + " +
			
 
				-                                              str(video_url) + " + " +
			
 
				-                                              str(user_id) + " + " +
			
 
				-                                              str("oWGa05FrwkuUvT-4n1qGeQuhVsc8") + "\n")
			
 
				-                            else:
			
 
				-                                if video_id in [content.split(" + ")[1] for content in contents]:
			
 
				-                                    Common.crawler_log().info("该视频已在 weishi_feeds.txt 中:{}".format(video_title))
			
 
				-                                elif head_url == "0" or cover_url == "0" \
			
 
				-                                        or video_url == "0" or video_duration == "0" or video_id == "0":
			
 
				-                                    Common.crawler_log().info("视频封面/播放地址/播放时长/用户头像不存在")
			
 
				-                                else:
			
 
				-                                    basic_time = int(time.time())
			
 
				-                                    Common.crawler_log().info("添加视频信息至weishi_feeds.txt:{}".format(video_title))
			
 
				-                                    with open(r"./txt/weishi_feeds.txt", "a", encoding="UTF-8") as f_a:
			
 
				-                                        f_a.write(str(basic_time) + " + " +
			
 
				-                                                  str(video_id) + " + " +
			
 
				-                                                  str(video_play_cnt) + " + " +
			
 
				-                                                  str(video_title) + " + " +
			
 
				-                                                  str(video_duration) + " + " +
			
 
				-                                                  str(video_comment_cnt) + " + " +
			
 
				-                                                  str(video_like_cnt) + " + " +
			
 
				-                                                  str(video_share_cnt) + " + " +
			
 
				-                                                  str(video_resolution) + " + " +
			
 
				-                                                  str(video_send_time) + " + " +
			
 
				-                                                  str(user_name) + " + " +
			
 
				-                                                  str(head_url) + " + " +
			
 
				-                                                  str(cover_url) + " + " +
			
 
				-                                                  str(video_url) + " + " +
			
 
				-                                                  str(user_id) + " + " +
			
 
				-                                                  str("oWGa05FrwkuUvT-4n1qGeQuhVsc8") + "\n")
			
 
				-        except Exception as e:
			
 
				-            Common.crawler_log().error("获取微视视频 list 异常:{}".format(e))
			
 
				-
			
 
				-    @classmethod
			
 
				-    def download_weishi_play_video(cls, env):
			
 
				-        """
			
 
				-        下载播放量视频
			
 
				-        测试环境:env == dev
			
 
				-        正式环境:env == prod
			
 
				-        """
			
 
				-        videos = Common.read_txt("weishi_feeds.txt")
			
 
				-        for video in videos:
			
 
				-            download_video_id = video.strip().split(" + ")[1]
			
 
				-            download_video_title = video.strip().split(" + ")[3]
			
 
				-            download_video_duration = video.strip().split(" + ")[4]
			
 
				-            download_video_play_cnt = video.strip().split(" + ")[2]
			
 
				-            download_video_comment_cnt = video.strip().split(" + ")[5]
			
 
				-            download_video_like_cnt = video.strip().split(" + ")[6]
			
 
				-            download_video_share_cnt = video.strip().split(" + ")[7]
			
 
				-            download_video_resolution = video.strip().split(" + ")[8]
			
 
				-            download_video_width = download_video_resolution.split("*")[0]
			
 
				-            download_video_height = download_video_resolution.split("*")[-1]
			
 
				-            download_video_send_time = video.strip().split(" + ")[9]
			
 
				-            download_user_name = video.strip().split(" + ")[10]
			
 
				-            download_head_url = video.strip().split(" + ")[11]
			
 
				-            download_cover_url = video.strip().split(" + ")[12]
			
 
				-            download_video_url = video.strip().split(" + ")[13]
			
 
				-            download_video_session = video.strip().split(" + ")[-1]
			
 
				-
			
 
				-            if cls.weishi_download_rule(download_video_duration, download_video_width,
			
 
				-                                        download_video_height, download_video_play_cnt) is True:
			
 
				-                Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
			
 
				-                # 下载封面
			
 
				-                Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
			
 
				-                # 下载视频
			
 
				-                Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
			
 
				-                # 保存视频信息至 weishi_videoid.txt
			
 
				-                with open(r"./txt/weishi_videoid.txt", "a", encoding="UTF-8") as fa:
			
 
				-                    fa.write(download_video_id + "\n")
			
 
				-                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
			
 
				-                with open(r"./videos/" + download_video_title + "/info.txt", "a", encoding="UTF-8") as f_a:
			
 
				-                    f_a.write(str(download_video_id) + "\n" +
			
 
				-                              str(download_video_title) + "\n" +
			
 
				-                              str(download_video_duration) + "\n" +
			
 
				-                              str(download_video_play_cnt) + "\n" +
			
 
				-                              str(download_video_comment_cnt) + "\n" +
			
 
				-                              str(download_video_like_cnt) + "\n" +
			
 
				-                              str(download_video_share_cnt) + "\n" +
			
 
				-                              str(download_video_resolution) + "\n" +
			
 
				-                              str(download_video_send_time) + "\n" +
			
 
				-                              str(download_user_name) + "\n" +
			
 
				-                              str(download_head_url) + "\n" +
			
 
				-                              str(download_video_url) + "\n" +
			
 
				-                              str(download_cover_url) + "\n" +
			
 
				-                              str(download_video_session))
			
 
				-
			
 
				-                # 上传视频
			
 
				-                if env == "dev":
			
 
				-                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				-                    Publish.upload_and_publish("dev", "play")
			
 
				-                elif env == "prod":
			
 
				-                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				-                    Publish.upload_and_publish("prod", "play")
			
 
				-
			
 
				-                # 删除该视频在weishi_feeds.txt中的信息
			
 
				-                Common.crawler_log().info("删除该视频在weishi_feeds.txt中的信息:{}".format(download_video_title))
			
 
				-                with open(r"./txt/weishi_feeds.txt", "r", encoding="UTF-8") as f_r:
			
 
				-                    lines = f_r.readlines()
			
 
				-                with open(r"./txt/weishi_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				-                    for line in lines:
			
 
				-                        if download_video_id in line.split(" + ")[1]:
			
 
				-                            continue
			
 
				-                        f_w.write(line)
			
 
				-            else:
			
 
				-                # 删除该视频在weishi_feeds.txt中的信息
			
 
				-                Common.crawler_log().info("该视频不满足下载规则，删除在weishi_feeds.txt中的信息:{}".format(download_video_title))
			
 
				-                with open(r"./txt/weishi_feeds.txt", "r", encoding="UTF-8") as f_r:
			
 
				-                    lines = f_r.readlines()
			
 
				-                with open(r"./txt/weishi_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				-                    for line in lines:
			
 
				-                        if download_video_id in line.split(" + ")[1]:
			
 
				-                            continue
			
 
				-                        f_w.write(line)
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    weishi = Weishi()
			
 
				-    weishi.get_weishi_recommend()
			
--- a/main/recommend.py
+++ b/main/recommend.py
@@ -0,0 +1,432 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/4/8
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+import requests
			
 
				+import urllib3
			
 
				+sys.path.append(os.getcwd())
			
 
				+from main.common import Common
			
 
				+from main.feishu_lib import Feishu
			
 
				+from main.publish import Publish
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class DownloadRecommend:
			
 
				+
			
 
				+    # 配置微信号
			
 
				+    Referer = Feishu.get_range_value("recommend", "9fTK1f", "C3:C3")[0]
			
 
				+    wesee_openid = Feishu.get_range_value("recommend", "9fTK1f", "C4:C4")[0]
			
 
				+    wesee_openkey = Feishu.get_range_value("recommend", "9fTK1f", "C5:C5")[0]
			
 
				+    wesee_personid = Feishu.get_range_value("recommend", "9fTK1f", "C6:C6")[0]
			
 
				+    wesee_access_token = Feishu.get_range_value("recommend", "9fTK1f", "C7:C7")[0]
			
 
				+    wesee_thr_appid = Feishu.get_range_value("recommend", "9fTK1f", "C8:C8")[0]
			
 
				+
			
 
				+    # 过滤词库
			
 
				+    @classmethod
			
 
				+    def sensitive_words(cls):
			
 
				+        # 敏感词库列表
			
 
				+        word_list = []
			
 
				+        # 从云文档读取所有敏感词，添加到词库列表
			
 
				+        lists = Feishu.get_values_batch("recommend", "2Oxf8C")
			
 
				+        for a in lists:
			
 
				+            for j in a:
			
 
				+                # 过滤空的单元格内容
			
 
				+                if j is None:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    word_list.append(j)
			
 
				+        return word_list
			
 
				+
			
 
				+    # 抓取基础规则
			
 
				+    @staticmethod
			
 
				+    def download_rule(d_duration, d_width, d_height, d_play_cnt, d_like_cnt, d_share_cnt):
			
 
				+        """
			
 
				+        下载视频的基本规则
			
 
				+        :param d_duration: 时长
			
 
				+        :param d_width: 宽
			
 
				+        :param d_height: 高
			
 
				+        :param d_play_cnt: 播放量
			
 
				+        :param d_like_cnt: 点赞量
			
 
				+        :param d_share_cnt: 分享量
			
 
				+        :return: 满足规则，返回 True；反之，返回 False
			
 
				+        """
			
 
				+        if int(float(d_duration)) >= 30:
			
 
				+            if int(d_width) >= 720 or int(d_height) >= 720:
			
 
				+                if int(d_play_cnt) >= 0:
			
 
				+                    if int(d_like_cnt) >= 0:
			
 
				+                        if int(d_share_cnt) >= 0:
			
 
				+                            return True
			
 
				+                        else:
			
 
				+                            return False
			
 
				+                    else:
			
 
				+                        return False
			
 
				+                else:
			
 
				+                    return False
			
 
				+            return False
			
 
				+        return False
			
 
				+
			
 
				+    # 抓取列表
			
 
				+    @classmethod
			
 
				+    def get_feeds(cls):
			
 
				+        """
			
 
				+        1.从微视小程序首页推荐，获取视频列表
			
 
				+        2.先在 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa 中去重
			
 
				+        3.再从 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr 中去重
			
 
				+        4.添加视频信息至 https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
			
 
				+        """
			
 
				+        url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
			
 
				+        headers = {
			
 
				+            "content-type": "application/json",
			
 
				+            "Accept-Encoding": "gzip,compress,br,deflate",
			
 
				+            "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
			
 
				+                          " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"
			
 
				+                          " MicroMessenger/8.0.20(0x18001442) NetType/WIFI Language/zh_CN",
			
 
				+            "Referer": str(cls.Referer)
			
 
				+        }
			
 
				+        cookies = {
			
 
				+            "wesee_authtype": "3",
			
 
				+            "wesee_openid": str(cls.wesee_openid),
			
 
				+            "wesee_openkey": str(cls.wesee_openkey),
			
 
				+            "wesee_personid": str(cls.wesee_personid),
			
 
				+            "wesee_refresh_token": "",
			
 
				+            "wesee_access_token": str(cls.wesee_access_token),
			
 
				+            "wesee_thr_appid": str(cls.wesee_thr_appid),
			
 
				+            "wesee_ichid": "8"
			
 
				+        }
			
 
				+        json_data = {
			
 
				+            "req_body": {
			
 
				+                "requestType": 16,
			
 
				+                "isrefresh": 1,
			
 
				+                "isfirst": 1,
			
 
				+                "attachInfo": "",
			
 
				+                "scene_id": 22,
			
 
				+                "requestExt": {
			
 
				+                    "mini_openid": str(cls.wesee_openid),
			
 
				+                    "notLogin-personid": str(cls.wesee_personid)
			
 
				+                }
			
 
				+            },
			
 
				+            "req_header": {
			
 
				+                "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(headers=headers, url=url, cookies=cookies, json=json_data, proxies=proxies, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            feeds = response["rsp_body"]["feeds"]
			
 
				+            for i in range(len(feeds)):
			
 
				+                # 视频标题过滤话题及处理特殊字符
			
 
				+                weishi_title = feeds[i]["desc"]
			
 
				+                title_split1 = weishi_title.split(" #")
			
 
				+                if title_split1[0] != "":
			
 
				+                    title1 = title_split1[0]
			
 
				+                else:
			
 
				+                    title1 = title_split1[-1]
			
 
				+
			
 
				+                title_split2 = title1.split(" #")
			
 
				+                if title_split2[0] != "":
			
 
				+                    title2 = title_split2[0]
			
 
				+                else:
			
 
				+                    title2 = title_split2[-1]
			
 
				+
			
 
				+                title_split3 = title2.split("@")
			
 
				+                if title_split3[0] != "":
			
 
				+                    title3 = title_split3[0]
			
 
				+                else:
			
 
				+                    title3 = title_split3[-1]
			
 
				+                # 视频标题
			
 
				+                video_title = title3.strip().replace("\n", "") \
			
 
				+                    .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				+                    .replace(" ", "").replace("&NBSP", "").replace("\r", "") \
			
 
				+                    .replace("#", "").replace(".", "。").replace("\\", "") \
			
 
				+                    .replace(":", "").replace("*", "").replace("？", "") \
			
 
				+                    .replace("?", "").replace('"', "").replace("<", "") \
			
 
				+                    .replace(">", "").replace("|", "").replace("微视", "")
			
 
				+
			
 
				+                # 视频 ID
			
 
				+                if "id" not in feeds[i]["video"]:
			
 
				+                    video_id = 0
			
 
				+                else:
			
 
				+                    video_id = feeds[i]["video"]["id"]
			
 
				+
			
 
				+                # 播放数
			
 
				+                if "playNum" not in feeds[i]["ugcData"]:
			
 
				+                    video_play_cnt = 0
			
 
				+                else:
			
 
				+                    video_play_cnt = feeds[i]["ugcData"]["playNum"]
			
 
				+
			
 
				+                # 点赞数
			
 
				+                if "dingCount" not in feeds[i]["ugcData"]:
			
 
				+                    video_like_cnt = 0
			
 
				+                else:
			
 
				+                    video_like_cnt = feeds[i]["ugcData"]["dingCount"]
			
 
				+
			
 
				+                # 分享数
			
 
				+                if "shareNum" not in feeds[i]["ugcData"]:
			
 
				+                    video_share_cnt = 0
			
 
				+                else:
			
 
				+                    video_share_cnt = feeds[i]["ugcData"]["shareNum"]
			
 
				+
			
 
				+                # 评论数
			
 
				+                if "totalCommentNum" not in feeds[i]["ugcData"]:
			
 
				+                    video_comment_cnt = 0
			
 
				+                else:
			
 
				+                    video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
			
 
				+
			
 
				+                # 视频时长
			
 
				+                if "duration" not in feeds[i]["video"]:
			
 
				+                    video_duration = 0
			
 
				+                else:
			
 
				+                    video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
			
 
				+
			
 
				+                # 视频宽高
			
 
				+                if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
			
 
				+                    video_width = 0
			
 
				+                    video_height = 0
			
 
				+                    video_resolution = str(video_width) + "*" + str(video_height)
			
 
				+                else:
			
 
				+                    video_width = feeds[i]["video"]["width"]
			
 
				+                    video_height = feeds[i]["video"]["height"]
			
 
				+                    video_resolution = str(video_width) + "*" + str(video_height)
			
 
				+
			
 
				+                # 视频发布时间
			
 
				+                if "createTime" not in feeds[i]:
			
 
				+                    video_send_time = 0
			
 
				+                else:
			
 
				+                    video_send_time = int(feeds[i]["createTime"]) * 1000
			
 
				+
			
 
				+                # 用户昵称
			
 
				+                user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
			
 
				+                    .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				+                    .replace(" ", "").replace("&NBSP", "").replace("\r", "").replace("微视", "")
			
 
				+
			
 
				+                # 用户 ID
			
 
				+                user_id = feeds[i]["poster"]["id"]
			
 
				+
			
 
				+                # 用户头像地址
			
 
				+                if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
			
 
				+                    head_url = 0
			
 
				+                elif "thumbURL" in feeds[i]["material"]:
			
 
				+                    head_url = feeds[i]["material"]["thumbURL"]
			
 
				+                else:
			
 
				+                    head_url = feeds[i]["poster"]["avatar"]
			
 
				+
			
 
				+                # 视频封面地址
			
 
				+                if len(feeds[i]["images"]) == 0:
			
 
				+                    cover_url = 0
			
 
				+                else:
			
 
				+                    cover_url = feeds[i]["images"][0]["url"]
			
 
				+
			
 
				+                # 视频播放地址
			
 
				+                if "url" not in feeds[i]["video"]:
			
 
				+                    video_url = 0
			
 
				+                else:
			
 
				+                    video_url = feeds[i]["video"]["url"]
			
 
				+
			
 
				+                Common.logger("recommend").info("video_title:{}".format(video_title))
			
 
				+                Common.logger("recommend").info("video_id:{}".format(video_id))
			
 
				+                Common.logger("recommend").info("video_play_cnt:{}".format(video_play_cnt))
			
 
				+                Common.logger("recommend").info("video_like_cnt:{}".format(video_like_cnt))
			
 
				+                Common.logger("recommend").info("video_share_cnt:{}".format(video_share_cnt))
			
 
				+                # Common.logger("recommend").info("video_comment_cnt:{}".format(video_comment_cnt))
			
 
				+                Common.logger("recommend").info("video_duration:{}秒".format(video_duration))
			
 
				+                # Common.logger("recommend").info("video_resolution:{}".format(video_resolution))
			
 
				+                Common.logger("recommend").info(
			
 
				+                    "video_send_time:{}".format(time.strftime(
			
 
				+                        "%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time) / 1000))))
			
 
				+                Common.logger("recommend").info("user_name:{}".format(user_name))
			
 
				+                # Common.logger("recommend").info("user_id:{}".format(user_id))
			
 
				+                # Common.logger("recommend").info("head_url:{}".format(head_url))
			
 
				+                # Common.logger("recommend").info("cover_url:{}".format(cover_url))
			
 
				+                Common.logger("recommend").info("video_url:{}".format(video_url))
			
 
				+
			
 
				+                # 过滤无效视频
			
 
				+                if video_id == 0 or video_duration == 0 or video_send_time == 0 or head_url == 0 \
			
 
				+                        or cover_url == 0 or video_url == 0:
			
 
				+                    Common.logger("recommend").info("无效视频")
			
 
				+                # 判断基础规则
			
 
				+                elif cls.download_rule(video_duration, video_width, video_height,
			
 
				+                                       video_play_cnt, video_like_cnt, video_share_cnt) is False:
			
 
				+                    Common.logger("recommend").info("不满足基础规则")
			
 
				+                # 判断敏感词
			
 
				+                elif any(word if word in weishi_title else False for word in cls.sensitive_words()) is True:
			
 
				+                    Common.logger("recommend").info("视频已中敏感词:{}".format(weishi_title))
			
 
				+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa
			
 
				+                elif video_id in [j for m in Feishu.get_values_batch("recommend", "caa3fa") for j in m]:
			
 
				+                    Common.logger("recommend").info("该视频已下载:{}", video_title)
			
 
				+                # 从 云文档 去重:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=O7fCzr
			
 
				+                elif video_id in [j for n in Feishu.get_values_batch("recommend", "O7fCzr") for j in n]:
			
 
				+                    Common.logger("recommend").info("该视频已在feeds中:{}", video_title)
			
 
				+                else:
			
 
				+                    Common.logger("recommend").info("该视频未下载，添加至feeds中:{}".format(video_title))
			
 
				+                    # feeds工作表，插入首行
			
 
				+                    time.sleep(1)
			
 
				+                    Feishu.insert_columns("recommend", "O7fCzr", "ROWS", 1, 2)
			
 
				+                    # 获取当前时间
			
 
				+                    get_feeds_time = int(time.time())
			
 
				+                    # 工作表 feeds 中写入数据
			
 
				+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(get_feeds_time))),
			
 
				+                               "推荐榜",
			
 
				+                               video_id,
			
 
				+                               video_title,
			
 
				+                               video_play_cnt,
			
 
				+                               video_comment_cnt,
			
 
				+                               video_like_cnt,
			
 
				+                               video_share_cnt,
			
 
				+                               video_duration,
			
 
				+                               video_resolution,
			
 
				+                               time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(int(video_send_time / 1000))),
			
 
				+                               user_name,
			
 
				+                               user_id,
			
 
				+                               head_url,
			
 
				+                               cover_url,
			
 
				+                               video_url]]
			
 
				+                    # 等待 1s，防止操作云文档太频繁，导致报错
			
 
				+                    time.sleep(1)
			
 
				+                    Feishu.update_values("recommend", "O7fCzr", "A2:P2", values)
			
 
				+        except Exception as e:
			
 
				+            Common.logger("recommend").error("获取微视视频list异常:{}".format(e))
			
 
				+
			
 
				+    # 下载/上传视频
			
 
				+    @classmethod
			
 
				+    def download_publish(cls):
			
 
				+        try:
			
 
				+            for i in range(1, len(Feishu.get_values_batch("recommend", "O7fCzr")) + 1):
			
 
				+                time.sleep(1)
			
 
				+                download_video_id = Feishu.get_values_batch("recommend", "O7fCzr")[i][2]
			
 
				+                download_video_title = Feishu.get_values_batch("recommend", "O7fCzr")[i][3]
			
 
				+                download_video_play_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][4]
			
 
				+                download_video_comment_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][5]
			
 
				+                download_video_like_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][6]
			
 
				+                download_video_share_cnt = Feishu.get_values_batch("recommend", "O7fCzr")[i][7]
			
 
				+                download_video_duration = Feishu.get_values_batch("recommend", "O7fCzr")[i][8]
			
 
				+                download_video_resolution = Feishu.get_values_batch("recommend", "O7fCzr")[i][9]
			
 
				+                # download_video_width = download_video_resolution.split("*")[0]
			
 
				+                # download_video_height = download_video_resolution.split("*")[-1]
			
 
				+                download_video_send_time = Feishu.get_values_batch("recommend", "O7fCzr")[i][10]
			
 
				+                download_user_name = Feishu.get_values_batch("recommend", "O7fCzr")[i][11]
			
 
				+                download_user_id = Feishu.get_values_batch("recommend", "O7fCzr")[i][12]
			
 
				+                download_head_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][13]
			
 
				+                download_cover_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][14]
			
 
				+                download_video_url = Feishu.get_values_batch("recommend", "O7fCzr")[i][15]
			
 
				+
			
 
				+                # Common.logger("recommend").info("download_video_id:{}", download_video_id)
			
 
				+                # Common.logger("recommend").info("download_video_title:{}", download_video_title)
			
 
				+                # Common.logger("recommend").info("download_video_play_cnt:{}", download_video_play_cnt)
			
 
				+                # Common.logger("recommend").info("download_video_comment_cnt:{}", download_video_comment_cnt)
			
 
				+                # Common.logger("recommend").info("download_video_like_cnt:{}", download_video_like_cnt)
			
 
				+                # Common.logger("recommend").info("download_video_share_cnt:{}", download_video_share_cnt)
			
 
				+                # Common.logger("recommend").info("download_video_duration:{}", download_video_duration)
			
 
				+                # Common.logger("recommend").info("download_video_resolution:{}", download_video_resolution)
			
 
				+                # Common.logger("recommend").info("download_video_send_time:{}", download_video_send_time)
			
 
				+                # Common.logger("recommend").info("download_user_name:{}", download_user_name)
			
 
				+                # Common.logger("recommend").info("download_user_id:{}", download_user_id)
			
 
				+                # Common.logger("recommend").info("download_head_url:{}", download_head_url)
			
 
				+                # Common.logger("recommend").info("download_cover_url:{}", download_cover_url)
			
 
				+                # Common.logger("recommend").info("download_video_url:{}", download_video_url)
			
 
				+
			
 
				+                Common.logger("recommend").info("正在判断第{}行，视频:{}", i, download_video_title)
			
 
				+
			
 
				+                # 过滤空行
			
 
				+                if download_video_id is None \
			
 
				+                        or download_video_id == "" \
			
 
				+                        or download_video_title is None \
			
 
				+                        or download_video_title == "":
			
 
				+                    Common.logger("recommend").warning("空行，删除")
			
 
				+                    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+                    Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
			
 
				+                    return
			
 
				+                # 分享量>=1000
			
 
				+                elif int(download_video_share_cnt) < 1000:
			
 
				+                    Common.logger("recommend").info("分享量:{} < 1000", download_video_share_cnt)
			
 
				+                    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+                    Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
			
 
				+                    return
			
 
				+                # 去重
			
 
				+                elif download_video_id in [j for m in Feishu.get_values_batch("recommend", "caa3fa") for j in m]:
			
 
				+                    Common.logger("recommend").info("该视频已下载:{}", download_video_title)
			
 
				+                    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+                    Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
			
 
				+                    return
			
 
				+                else:
			
 
				+                    Common.logger("recommend").info("开始下载视频:{}", download_video_title)
			
 
				+                    # 下载封面
			
 
				+                    Common.download_method(job="recommend", text="cover",
			
 
				+                                           d_name=str(download_video_title), d_url=str(download_cover_url))
			
 
				+                    # 下载视频
			
 
				+                    Common.download_method(job="recommend", text="video",
			
 
				+                                           d_name=str(download_video_title), d_url=str(download_video_url))
			
 
				+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
			
 
				+                    with open("./videos/" + download_video_title
			
 
				+                              + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
			
 
				+                        f_a.write(str(download_video_id) + "\n" +
			
 
				+                                  str(download_video_title) + "\n" +
			
 
				+                                  str(download_video_duration) + "\n" +
			
 
				+                                  str(download_video_play_cnt) + "\n" +
			
 
				+                                  str(download_video_comment_cnt) + "\n" +
			
 
				+                                  str(download_video_like_cnt) + "\n" +
			
 
				+                                  str(download_video_share_cnt) + "\n" +
			
 
				+                                  str(download_video_resolution) + "\n" +
			
 
				+                                  str(int(time.mktime(
			
 
				+                                      time.strptime(download_video_send_time, "%Y/%m/%d %H:%M:%S")))) + "\n" +
			
 
				+                                  str(download_user_name) + "\n" +
			
 
				+                                  str(download_head_url) + "\n" +
			
 
				+                                  str(download_video_url) + "\n" +
			
 
				+                                  str(download_cover_url) + "\n" +
			
 
				+                                  str(cls.wesee_access_token))
			
 
				+                    Common.logger("recommend").info("==========视频信息已保存至info.txt==========")
			
 
				+
			
 
				+                    # 上传视频
			
 
				+                    Common.logger("recommend").info("开始上传视频:{}".format(download_video_title))
			
 
				+                    Publish.upload_and_publish("recommend", "prod", "play")
			
 
				+
			
 
				+                    # 保存视频 ID 到云文档:https://w42nne6hzg.feishu.cn/sheets/shtcn5YSWg91JfVGzj0SFZIRRPh?sheet=caa3fa
			
 
				+                    Common.logger("recommend").info("保存视频ID至云文档:{}", download_video_title)
			
 
				+                    # 视频ID工作表，插入首行
			
 
				+                    Feishu.insert_columns("recommend", "caa3fa", "ROWS", 1, 2)
			
 
				+                    # 视频ID工作表，首行写入数据
			
 
				+                    upload_time = int(time.time())
			
 
				+                    values = [[str(time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time))),
			
 
				+                               "推荐榜",
			
 
				+                               str(download_video_id),
			
 
				+                               str(download_video_title),
			
 
				+                               download_video_play_cnt,
			
 
				+                               download_video_comment_cnt,
			
 
				+                               download_video_like_cnt,
			
 
				+                               download_video_share_cnt,
			
 
				+                               download_video_duration,
			
 
				+                               str(download_video_resolution),
			
 
				+                               str(download_video_send_time),
			
 
				+                               str(download_user_name),
			
 
				+                               str(download_user_id),
			
 
				+                               str(download_head_url),
			
 
				+                               str(download_cover_url),
			
 
				+                               str(download_video_url)]]
			
 
				+                    time.sleep(1)
			
 
				+                    Feishu.update_values("recommend", "caa3fa", "A2:Q2", values)
			
 
				+
			
 
				+                    # 删除行或列，可选 ROWS、COLUMNS
			
 
				+                    Feishu.dimension_range("recommend", "O7fCzr", "ROWS", i + 1, i + 1)
			
 
				+                    return
			
 
				+        except Exception as e:
			
 
				+            Common.logger("recommend").error("下载/上传视频异常:{}", e)
			
 
				+            Feishu.dimension_range("recommend", "O7fCzr", "ROWS", 2, 2)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    weishi = DownloadRecommend()
			
 
				+    for n in range(2):
			
 
				+        Common.logger("recommend").info("正在抓取第{}页视频", n + 1)
			
 
				+        weishi.get_feeds()
			
 
				+
			
 
				+    # print(weishi.Referer)
			
 
				+    # print(weishi.wesee_openid)
			
 
				+    # print(weishi.wesee_openkey)
			
 
				+    # print(weishi.wesee_personid)
			
 
				+    # print(weishi.wesee_access_token)
			
 
				+    # print(weishi.wesee_thr_appid)
			
 
				+    # print(weishi.json_text)
			
--- a/main/run.py
+++ b/main/run.py
@@ -1,138 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2022/3/30
			
 
				-import os
			
 
				-import random
			
 
				-import sys
			
 
				-import time
			
 
				-from apscheduler.schedulers.blocking import BlockingScheduler
			
 
				-sys.path.append(os.getcwd())
			
 
				-from main.common import Common
			
 
				-from main.download_weishi import Weishi
			
 
				-from main.download_kuaishou import KuaiShou
			
 
				-
			
 
				-
			
 
				-def kuaishou_dev_job():
			
 
				-    """
			
 
				-    执行测试环境快手脚本
			
 
				-    """
			
 
				-    while True:
			
 
				-        # 当天下载及上传的视频数：20 条
			
 
				-        if len(KuaiShou.download_video_list) >= 20:
			
 
				-            time.sleep(60)
			
 
				-            break
			
 
				-        else:
			
 
				-            Common.crawler_log().info("开始抓取快手视频")
			
 
				-            time.sleep(1)
			
 
				-
			
 
				-            # 抓取符合规则的视频，写入 kuaishou_feeds.txt
			
 
				-            KuaiShou.kuaishou_get_recommend()
			
 
				-            # 下载视频，并上传
			
 
				-            KuaiShou.kuaishou_download_play_video("dev")
			
 
				-            # 随机睡眠1-3s
			
 
				-            time.sleep(random.randint(1, 3))
			
 
				-
			
 
				-    # 删除冗余日志
			
 
				-    Common.del_logs()
			
 
				-    # 统计下载视频数
			
 
				-    Common.kuaishou_download_count()
			
 
				-
			
 
				-
			
 
				-def weishi_dev_job():
			
 
				-    """
			
 
				-    执行测试环境微视脚本
			
 
				-    """
			
 
				-    while True:
			
 
				-        if 14 >= Common.now.hour >= 5:
			
 
				-            Common.crawler_log().info("结束抓取及上传任务")
			
 
				-            break
			
 
				-        else:
			
 
				-            # 抓取符合规则的视频，写入 weishi_feeds.txt
			
 
				-            Weishi.get_weishi_recommend()
			
 
				-            # 下载视频，并上传
			
 
				-            Weishi.download_weishi_play_video("dev")
			
 
				-            # 随机睡眠1-3s
			
 
				-            time.sleep(random.randint(1, 3))
			
 
				-
			
 
				-    # 删除冗余日志
			
 
				-    Common.del_logs()
			
 
				-    # 统计下载视频数
			
 
				-    Common.weishi_download_count()
			
 
				-
			
 
				-
			
 
				-def main_dev():
			
 
				-    """
			
 
				-    测试环境主函数
			
 
				-    """
			
 
				-    scheduler = BlockingScheduler(timezone="Asia/Shanghai")
			
 
				-    # 抓取视频的定时任务，在每天10点的40分，运行一次 job 方法
			
 
				-    scheduler.add_job(kuaishou_dev_job, 'cron', hour=19, minute=10, misfire_grace_time=60)
			
 
				-    # 开始运行脚本
			
 
				-    scheduler.start()
			
 
				-
			
 
				-
			
 
				-def weishi_prod_job():
			
 
				-    """
			
 
				-    执行正式环境微视脚本
			
 
				-    """
			
 
				-    while True:
			
 
				-        if 20 >= Common.now.hour >= 5:
			
 
				-            Common.crawler_log().info("结束抓取微视视频任务")
			
 
				-            break
			
 
				-        else:
			
 
				-            # 抓取符合规则的视频，写入 weishi_feeds.txt
			
 
				-            Weishi.get_weishi_recommend()
			
 
				-            # 下载视频，并上传
			
 
				-            Weishi.download_weishi_play_video("prod")
			
 
				-            # 随机睡眠1-3s
			
 
				-            time.sleep(random.randint(1, 3))
			
 
				-
			
 
				-    # 删除冗余日志
			
 
				-    Common.del_logs()
			
 
				-    # 统计下载视频数
			
 
				-    Common.weishi_download_count()
			
 
				-
			
 
				-
			
 
				-def kuaishou_prod_job():
			
 
				-    """
			
 
				-    执行正式环境快手脚本
			
 
				-    """
			
 
				-    # while True:
			
 
				-    #     # 当天下载及上传的视频数：200 条
			
 
				-    #     if len(KuaiShou.download_video_list) >= 200:
			
 
				-    #         time.sleep(60)
			
 
				-    #         break
			
 
				-    #     else:
			
 
				-
			
 
				-    Common.crawler_log().info("开始抓取快手视频")
			
 
				-
			
 
				-    # 抓取符合规则的视频，写入 kuaishou_feeds.txt
			
 
				-    KuaiShou.kuaishou_get_recommend()
			
 
				-    # 下载视频，并上传
			
 
				-    KuaiShou.kuaishou_download_play_video("prod")
			
 
				-    # 随机睡眠1-3s
			
 
				-    time.sleep(random.randint(1, 3))
			
 
				-
			
 
				-    # 删除冗余日志
			
 
				-    Common.del_logs()
			
 
				-    # 统计下载视频数
			
 
				-    Common.kuaishou_download_count()
			
 
				-
			
 
				-
			
 
				-def main_prod():
			
 
				-    """
			
 
				-    正式环境主函数
			
 
				-    """
			
 
				-    while True:
			
 
				-        kuaishou_prod_job()
			
 
				-
			
 
				-    # scheduler = BlockingScheduler(timezone="Asia/Shanghai")
			
 
				-    # # 抓取视频的定时任务，在每天10点的40分，运行一次 job 方法
			
 
				-    # scheduler.add_job(kuaishou_prod_job, 'cron', hour=8, minute=00, misfire_grace_time=60)
			
 
				-    # # 开始运行脚本
			
 
				-    # scheduler.start()
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    # main_dev()
			
 
				-    main_prod()
			
--- a/main/run_recommend.py
+++ b/main/run_recommend.py
@@ -0,0 +1,79 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/30
			
 
				+import datetime
			
 
				+import os
			
 
				+import random
			
 
				+import sys
			
 
				+import time
			
 
				+from apscheduler.schedulers.blocking import BlockingScheduler
			
 
				+sys.path.append(os.getcwd())
			
 
				+from main.common import Common
			
 
				+from main.download_recommend import DownloadRecommend
			
 
				+
			
 
				+
			
 
				+def weishi_dev_job():
			
 
				+    """
			
 
				+    执行测试环境微视脚本
			
 
				+    """
			
 
				+    while True:
			
 
				+        weishi_dev_time = datetime.datetime.now()
			
 
				+        if weishi_dev_time.hour >= 20 or weishi_dev_time.hour <= 10:
			
 
				+            # 抓取符合规则的视频，写入 weishi_feeds.txt
			
 
				+            DownloadRecommend.get_weishi_recommend()
			
 
				+            # 下载视频，并上传
			
 
				+            DownloadRecommend.download_weishi_play_video("dev")
			
 
				+            # 随机睡眠1-3s
			
 
				+            time.sleep(random.randint(1, 3))
			
 
				+        else:
			
 
				+            Common.crawler_log().info("结束抓取及上传任务")
			
 
				+            break
			
 
				+
			
 
				+    # 删除冗余日志
			
 
				+    Common.del_logs()
			
 
				+    # 统计下载视频数
			
 
				+    Common.weishi_download_count()
			
 
				+
			
 
				+
			
 
				+def main_dev():
			
 
				+    """
			
 
				+    测试环境主函数
			
 
				+    """
			
 
				+    while True:
			
 
				+        # Common.crawler_log().info("开始抓取微视视频")
			
 
				+        weishi_dev_job()
			
 
				+
			
 
				+
			
 
				+def weishi_prod_job():
			
 
				+    """
			
 
				+    执行正式环境微视脚本
			
 
				+    """
			
 
				+    while True:
			
 
				+        if 20 >= Common.now.hour >= 5:
			
 
				+            Common.crawler_log().info("结束抓取微视视频任务")
			
 
				+            break
			
 
				+        else:
			
 
				+            # 抓取符合规则的视频，写入 weishi_feeds.txt
			
 
				+            DownloadRecommend.get_weishi_recommend()
			
 
				+            # 下载视频，并上传
			
 
				+            DownloadRecommend.download_weishi_play_video("prod")
			
 
				+            # 随机睡眠1-3s
			
 
				+            time.sleep(random.randint(1, 3))
			
 
				+
			
 
				+    # 删除冗余日志
			
 
				+    Common.del_logs()
			
 
				+    # 统计下载视频数
			
 
				+    Common.weishi_download_count()
			
 
				+
			
 
				+
			
 
				+def main_prod():
			
 
				+    """
			
 
				+    正式环境主函数
			
 
				+    """
			
 
				+    while True:
			
 
				+        weishi_prod_job()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main_dev()
			
 
				+    # main_prod()
			
--- a/txt/__init__.py
+++ b/txt/__init__.py
@@ -1,3 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2022/3/30
			
--- a/txt/kuaishou_feeds.txt
+++ b/txt/kuaishou_feeds.txt
--- a/txt/kuaishou_videoid.txt
+++ b/txt/kuaishou_videoid.txt
--- a/txt/weishi_feeds.txt
+++ b/txt/weishi_feeds.txt
--- a/txt/weishi_videoid.txt
+++ b/txt/weishi_videoid.txt
--- a/抓取规则.txt
+++ b/抓取规则.txt
@@ -1,19 +0,0 @@
 
				-==========2022/4/15===========
			
 
				-一、按照数据指标抓取
			
 
				-1、任务开始时间：
			
 
				-- 每天早上8点-晚上21点
			
 
				-2、抓取规则：
			
 
				-  - 视频播放量点赞量5万+ ，分享量2000+
			
 
				-  - 视频时长1分钟以上，10分钟以下
			
 
				-  - 视频分辨率720以上
			
 
				-  - 站内标题=快手视频原标题 （需要过滤掉标题中的话题#  #）
			
 
				-  - 站内封面图=快手视频原封面图
			
 
				-3、站内承接：
			
 
				-- 每日入库200条视频
			
 
				-- 视频随机分配到10个虚拟账号。
			
 
				-4、特别注意：
			
 
				-- 视频需要排重，已经抓取过得视频，不要重复抓取
			
 
				-- 需要对视频库进行持续扫描：如1条视频上周未达到5万+点赞，本周达到了5万点赞，则进行抓取。
			
 
				-5、新增爬虫视频标题过滤词
			
 
				--  集结吧光合创作者、电影解说、快来露两手、分享家常美食教程、光合作者助手、创作者中心、创作者学院、娱乐星熠计划、解说电影、电影剪辑、放映室、老剧、影视剪辑、精彩片段、冬日影娱大作战、春日追剧计划单、影视解说、中视频影视混剪计划、众志成城共抗疫情、我在追好剧、娱乐星灿计划、电影、电视剧、毛泽东、毛主席、周恩来、林彪、习近平、习大大、彭丽媛、怀旧经典影视
			
 
				-==============================