3 years ago · 6dda8b1f2b
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,63 @@
 
				+# ---> Python
			
 
				+# Byte-compiled / optimized / DLL files
			
 
				+__pycache__/
			
 
				+*.py[cod]
			
 
				+*$py.class
			
 
				+
			
 
				+# C extensions
			
 
				+*.so
			
 
				+
			
 
				+# Distribution / packaging
			
 
				+.Python
			
 
				+env/
			
 
				+build/
			
 
				+develop-eggs/
			
 
				+dist/
			
 
				+downloads/
			
 
				+eggs/
			
 
				+.eggs/
			
 
				+lib/
			
 
				+lib64/
			
 
				+parts/
			
 
				+sdist/
			
 
				+var/
			
 
				+*.egg-info/
			
 
				+.installed.cfg
			
 
				+*.egg
			
 
				+
			
 
				+# PyInstaller
			
 
				+#  Usually these files are written by a python script from a template
			
 
				+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
			
 
				+*.manifest
			
 
				+*.spec
			
 
				+
			
 
				+# Installer logs
			
 
				+pip-log.txt
			
 
				+pip-delete-this-directory.txt
			
 
				+
			
 
				+# Unit test / coverage reports
			
 
				+htmlcov/
			
 
				+.tox/
			
 
				+.coverage
			
 
				+.coverage.*
			
 
				+.cache
			
 
				+nosetests.xml
			
 
				+coverage.xml
			
 
				+*,cover
			
 
				+
			
 
				+# Translations
			
 
				+*.mo
			
 
				+*.pot
			
 
				+
			
 
				+# Django stuff:
			
 
				+*.log
			
 
				+
			
 
				+# Sphinx documentation
			
 
				+docs/_build/
			
 
				+
			
 
				+# PyBuilder
			
 
				+target/
			
 
				+
			
 
				+.DS_Store
			
 
				+.idea/
			
 
				+
			
--- a/README.md
+++ b/README.md
@@ -0,0 +1 @@
 
				+快手和微视小程序的爬虫
			
--- a/logs/__init__.py
+++ b/logs/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/30
			
--- a/main/__init__.py
+++ b/main/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/29
			
--- a/main/common.py
+++ b/main/common.py
@@ -0,0 +1,180 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/30
			
 
				+"""
			
 
				+公共方法，包含：生成log / 删除log / 下载方法 / 读取文件 / 统计下载数
			
 
				+"""
			
 
				+from datetime import date, timedelta
			
 
				+import datetime
			
 
				+import logging
			
 
				+import os
			
 
				+import time
			
 
				+import requests
			
 
				+import urllib3
			
 
				+
			
 
				+
			
 
				+class Common:
			
 
				+    # 统一获取当前时间 <class 'datetime.datetime'>  2022-04-14 20:13:51.244472
			
 
				+    now = datetime.datetime.now()
			
 
				+    # 昨天 <class 'str'>  2022-04-13
			
 
				+    yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
			
 
				+    # 今天 <class 'datetime.date'>  2022-04-14
			
 
				+    today = date.today()
			
 
				+    # 明天 <class 'str'>  2022-04-15
			
 
				+    tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def crawler_log():
			
 
				+        """
			
 
				+        生成 log 日志
			
 
				+        """
			
 
				+        # 日志路径
			
 
				+        log_dir = "./logs/"
			
 
				+        log_path = os.getcwd() + os.sep + log_dir
			
 
				+        if not os.path.isdir(log_path):
			
 
				+            os.makedirs(log_path)
			
 
				+
			
 
				+        # 日志参数
			
 
				+        log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
			
 
				+        date_format = "%Y-%m-%d %p %H:%M:%S"
			
 
				+        log_name = time.strftime("%Y-%m-%d", time.localtime(time.time())) + '.log'
			
 
				+
			
 
				+        # 日志初始化
			
 
				+        logging.basicConfig(filename=log_path + log_name, level=logging.INFO, format=log_format, datefmt=date_format)
			
 
				+        crawler_logger = logging.getLogger("crawler-log")
			
 
				+
			
 
				+        return crawler_logger
			
 
				+
			
 
				+    @classmethod
			
 
				+    def del_logs(cls):
			
 
				+        """
			
 
				+        清除冗余日志文件
			
 
				+        :return: 保留最近 7 个日志
			
 
				+        """
			
 
				+        log_dir = "./logs/"
			
 
				+        all_files = sorted(os.listdir(log_dir))
			
 
				+        all_logs = []
			
 
				+        for log in all_files:
			
 
				+            name = os.path.splitext(log)[-1]
			
 
				+            if name == ".log":
			
 
				+                all_logs.append(log)
			
 
				+
			
 
				+        if len(all_logs) <= 7:
			
 
				+            pass
			
 
				+        else:
			
 
				+            for file in all_logs[:len(all_logs) - 7]:
			
 
				+                os.remove(log_dir + file)
			
 
				+        cls.crawler_log().info("清除冗余日志成功")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def download_method(cls, text, d_name, d_url):
			
 
				+        """
			
 
				+        下载封面：text == "cover" ； 下载视频：text == "video"
			
 
				+        需要下载的视频标题：d_title
			
 
				+        视频封面，或视频播放地址：d_url
			
 
				+        下载保存路径："./files/{d_title}/"
			
 
				+        """
			
 
				+        # 首先创建一个保存该视频相关信息的文件夹
			
 
				+        video_dir = "./videos/" + d_name + "/"
			
 
				+        if not os.path.exists(video_dir):
			
 
				+            os.mkdir(video_dir)
			
 
				+
			
 
				+        # 下载视频
			
 
				+        if text == "video":
			
 
				+            # 需要下载的视频地址
			
 
				+            video_url = d_url
			
 
				+            # 视频名
			
 
				+            video_name = "video.mp4"
			
 
				+
			
 
				+            # 下载视频
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.get(video_url, stream=True, verify=False)
			
 
				+            try:
			
 
				+                with open(video_dir + video_name, "wb") as f:
			
 
				+                    for chunk in response.iter_content(chunk_size=10240):
			
 
				+                        f.write(chunk)
			
 
				+                cls.crawler_log().info("==========视频下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                cls.crawler_log().info("视频下载失败：{}".format(e))
			
 
				+            # except FileNotFoundError:
			
 
				+            #     cls.kuaishou_log().info("==========视频下载失败==========")
			
 
				+
			
 
				+        # 下载封面
			
 
				+        elif text == "cover":
			
 
				+            # 需要下载的封面地址
			
 
				+            cover_url = d_url
			
 
				+            # 封面名
			
 
				+            cover_name = "image.jpg"
			
 
				+
			
 
				+            # 下载封面
			
 
				+            urllib3.disable_warnings()
			
 
				+            response = requests.get(cover_url, verify=False)
			
 
				+            try:
			
 
				+                with open(video_dir + cover_name, "wb") as f:
			
 
				+                    f.write(response.content)
			
 
				+                cls.crawler_log().info("==========封面下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                cls.crawler_log().info("封面下载失败：{}".format(e))
			
 
				+            # except FileNotFoundError:
			
 
				+            #     cls.kuaishou_log().info("==========封面下载失败==========")
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def read_txt(t_name):
			
 
				+        """
			
 
				+        读取 txt 文件
			
 
				+        :param t_name: 文件名
			
 
				+        :return: 文件内容
			
 
				+        """
			
 
				+        with open("./txt/" + t_name, "r", encoding="utf8") as f:
			
 
				+            return f.readlines()
			
 
				+
			
 
				+    @classmethod
			
 
				+    def kuaishou_download_count(cls):
			
 
				+        videoid_path = "./txt/kuaishou_videoid.txt"
			
 
				+        count = 0
			
 
				+        for count, line in enumerate(open(videoid_path, "rb").readlines()):
			
 
				+            count += 1
			
 
				+        cls.crawler_log().info('累计下载视频数: {}\n'.format(count))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def weishi_download_count(cls):
			
 
				+        videoid_path = "./txt/weishi_videoid.txt"
			
 
				+        count = 0
			
 
				+        for count, line in enumerate(open(videoid_path, "rb").readlines()):
			
 
				+            count += 1
			
 
				+        cls.crawler_log().info('累计下载视频数: {}\n'.format(count))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def kuaishou_today_download_count(cls):
			
 
				+        """
			
 
				+        统计快手渠道当日下载视频数
			
 
				+        :return:
			
 
				+        """
			
 
				+        # 创建空文件
			
 
				+        with open("./txt/" + str(cls.today) + "_kuaishou_videoid.txt", "a") as f:
			
 
				+            f.write("")
			
 
				+        videoid_path = "./txt/" + str(cls.today) + "_kuaishou_videoid.txt"
			
 
				+        count = 0
			
 
				+        for count, line in enumerate(open(videoid_path, "rb").readlines()):
			
 
				+            count += 1
			
 
				+        return count
			
 
				+
			
 
				+    @classmethod
			
 
				+    def del_yesterday_kuaishou_videoid_txt(cls):
			
 
				+        """
			
 
				+        删除快手渠道昨日下载视频数的 txt 文件
			
 
				+        :return:
			
 
				+        """
			
 
				+        yesterday_kuaishou_videoid_txt_dir = "./txt/"
			
 
				+        all_files = sorted(os.listdir(yesterday_kuaishou_videoid_txt_dir))
			
 
				+        for file in all_files:
			
 
				+            name = os.path.splitext(file)[0]
			
 
				+            if name == cls.yesterday + "_kuaishou_videoid":
			
 
				+                os.remove(yesterday_kuaishou_videoid_txt_dir + file)
			
 
				+        Common.crawler_log().info("删除快手昨天下载统计文件成功")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    common = Common()
			
 
				+    common.del_yesterday_kuaishou_videoid_txt()
			
 
				+    print(common.kuaishou_today_download_count())
			
--- a/main/demo.py
+++ b/main/demo.py
@@ -0,0 +1,260 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/31
			
 
				+from datetime import date, timedelta
			
 
				+import datetime
			
 
				+import json
			
 
				+import re
			
 
				+import time
			
 
				+
			
 
				+import requests
			
 
				+import urllib3
			
 
				+
			
 
				+
			
 
				+class Demo:
			
 
				+    @classmethod
			
 
				+    def demo1(cls):
			
 
				+        download_video_resolution = "720*1280"
			
 
				+        download_video_width = download_video_resolution.split("*")[0]
			
 
				+        download_video_height = download_video_resolution.split("*")[-1]
			
 
				+        print(download_video_resolution)
			
 
				+        print(download_video_width)
			
 
				+        print(download_video_height)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def time(cls):
			
 
				+        # 推荐
			
 
				+        time1 = int(time.time()*1000)
			
 
				+        print(time1)
			
 
				+        # 不推荐
			
 
				+        time2 = round(time.time())*1000
			
 
				+        print(time2)
			
 
				+
			
 
				+        # 统一获取当前时间
			
 
				+        now = datetime.datetime.now()
			
 
				+        print(type(now))
			
 
				+        print(f"now:{now}")
			
 
				+        # 昨天
			
 
				+        yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
			
 
				+        print(type(yesterday))
			
 
				+        print(f"昨天:{yesterday}")
			
 
				+        # 今天
			
 
				+        today = date.today()
			
 
				+        print(type(today))
			
 
				+        print(f"今天:{today}")
			
 
				+        # 明天
			
 
				+        tomorrow = (date.today() + timedelta(days=1)).strftime("%Y-%m-%d")
			
 
				+        print(type(tomorrow))
			
 
				+        print(f"明天:{tomorrow}")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_douyin_feeds(cls):
			
 
				+        """
			
 
				+        获取抖音feed流视频 https://www.douyin.com
			
 
				+        """
			
 
				+        url = "https://www.douyin.com/aweme/v1/web/tab/feed/?"
			
 
				+        params = {
			
 
				+            "device_platform": "webapp",
			
 
				+            "aid": "6383",
			
 
				+            "channel": "channel_pc_web",
			
 
				+            "count": "10",
			
 
				+            "refresh_index": "4",
			
 
				+            "video_type_select": "0",
			
 
				+            "version_code": "170400",
			
 
				+            "version_name": "17.4.0",
			
 
				+            "cookie_enabled": "true",
			
 
				+            "screen_width": "1920",
			
 
				+            "screen_height": "1080",
			
 
				+            "browser_language": "zh-CN",
			
 
				+            "browser_platform": "MacIntel",
			
 
				+            "browser_name": "Chrome",
			
 
				+            "browser_version": "99.0.4844.84",
			
 
				+            "browser_online": "true",
			
 
				+            "engine_name": "Blink",
			
 
				+            "engine_version": "99.0.4844.84",
			
 
				+            "os_name": "Mac OS",
			
 
				+            "os_version": "10.15.7",
			
 
				+            "platform": "PC",
			
 
				+            "cpu_core_num": "8",
			
 
				+            "device_memory": "8",
			
 
				+            "downlink": "10",
			
 
				+            "effective_type": "4g",
			
 
				+            "round_trip_time": "50",
			
 
				+            "msToken": "304uY1lV7HmHkR1G1QUaFqg0yrL5_WqrFOR8qCbl3hOsl8aSNI_18vIfpTGNhNRVZx7ysRiCHpcBKhpujTsbbC"
			
 
				+                       "ZEDbG7pllZzlO3tlrBOs2TFYUgJdsvbw==",
			
 
				+            "X-Bogus": "DFSzswVYPVsANat/Sl8eGc3WxM23",
			
 
				+            "_signature": "qaJgTwAAy.aVqLslyfC7aKmiYF"
			
 
				+        }
			
 
				+        cookies = {
			
 
				+            "_tea_utm_cache_6383": "undefined",
			
 
				+            "ttwid": "1%7CETZk6sDMDSBgewWhKJXghFN4cwXTz0fLuhsLEngD_Nk%7C1648812136%7Cfa66fa81ccfe3f552f4"
			
 
				+                     "e8b8327e72cbbc5e897141c25a5fcd32defaed1466d3e",
			
 
				+            "passport_csrf_token": "e2d0f1ed9fd22463be9f389137a781ce",
			
 
				+            "passport_csrf_token_default": "e2d0f1ed9fd22463be9f389137a781ce",
			
 
				+            "s_v_web_id": "verify_l1h7nzwr_ABN0FA2f_BTrM_4zSH_8WPN_2KY2iZFmbhE2",
			
 
				+            "_tea_utm_cache_1300": "undefined",
			
 
				+            "_tea_utm_cache_2285": "undefined",
			
 
				+            "ttcid": "3220eeda36a244beadd32a4b44d2044b31",
			
 
				+            "douyin.com": "",
			
 
				+            "__ac_nonce": "06247fb0f00f050ccc9b2",
			
 
				+            "__ac_signature": "_02B4Z6wo00f01AN7DoAAAIDB5nv.qI7xGZQDWwoAAGKfo4rd5YCAYF8o5PyppIpsdKxV0k2NerO"
			
 
				+                              "f1VEQr3eJftkpgon9tcveDVpmfY555vzTTvRznegS1ax3KJXnoav2ZdEoYzwR3wDszPCk5d",
			
 
				+            "strategyABtestKey": "1648865029.449",
			
 
				+            "AB_LOGIN_GUIDE_TIMESTAMP": "1648865029279",
			
 
				+            "THEME_STAY_TIME": "299621",
			
 
				+            "IS_HIDE_THEME_CHANGE": "1",
			
 
				+            "home_can_add_dy_2_desktop": "0",
			
 
				+            "tt_scid": "vUl8CBW1SMQp2l5GmUIja5A6ziY1LByrsoN.P-wvKuutiB8ftvlfK.9ZEeehNC5u821d",
			
 
				+            "pwa_guide_count": "2",
			
 
				+            "msToken": "EHCmp9Qw7PAChI3do-MQPjOR29hf4ZFLYNrGl89HkFKdO5Iwb8n7z5fpETrgim2zFTIkGT"
			
 
				+                       "ObOxH7HCrHCLVEX5eAuwAS1A2sjKH4MHEfjfPqA06Lo4v9Pw==",
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.get(url=url, params=params, cookies=cookies, verify=False)
			
 
				+            # response = json.loads(r.content.decode("utf8"))
			
 
				+            print(r)
			
 
				+            print(type(r.text))
			
 
				+            print(r.text)
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def demo2(cls):
			
 
				+        s = "0"
			
 
				+        print(int(int(s) / 10))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_weishi_feeds(cls):
			
 
				+        url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
			
 
				+        cookies = {
			
 
				+            "wesee_authtype": "3",
			
 
				+            "wesee_openid":	"oWGa05FrwkuUvT-4n1qGeQuhVsc8",
			
 
				+            "wesee_openkey": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf189e2a5c1d532eeff172bc21cf2"
			
 
				+                             "6230941ccbc10243a7879e8165ca608c17060de606a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
			
 
				+            "wesee_personid": "1593522421826902",
			
 
				+            "wesee_refresh_token": "",
			
 
				+            "wesee_access_token": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf18"
			
 
				+                                  "9e2a5c1d532eeff172bc21cf26230941ccbc10243a7879e8165ca608c17060de6"
			
 
				+                                  "06a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
			
 
				+            "wesee_thr_appid": "wx75ee9f19b93e5c46",
			
 
				+            "wesee_ichid": "8"
			
 
				+        }
			
 
				+        json_data = {
			
 
				+            "req_body": {
			
 
				+                "requestType": 16,
			
 
				+                "isrefresh": 0,
			
 
				+                "isfirst": 0,
			
 
				+                "attachInfo": "",
			
 
				+                "scene_id": 22,
			
 
				+                "requestExt": {
			
 
				+                    "mini_openid": "oWGa05FrwkuUvT-4n1qGeQuhVsc8",
			
 
				+                    "notLogin-personid": "1593522421826902"
			
 
				+                }
			
 
				+            },
			
 
				+            "req_header": {
			
 
				+                "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
			
 
				+            }
			
 
				+        }
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=url, cookies=cookies, json=json_data, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            feeds = response["rsp_body"]["feeds"]
			
 
				+            for feed in feeds:
			
 
				+                print(feed)
			
 
				+        except Exception as e:
			
 
				+            print(e)
			
 
				+
			
 
				+    @classmethod
			
 
				+    def edit_str(cls):
			
 
				+        title_list = ["#上海战疫 上海累计感染超20万！这条被淹没的热搜，令全网泪目… 疫情一定要攻克，但所有人都不该遗忘这些弱者。#上海累计报告本土阳性感染者超20万例 #农民工",
			
 
				+                      "#重庆地火村 #地火村 #旅行",
			
 
				+                      "第79集 | 湖南最值得去的六个景区，每一个都是绝色…… #快手带你去旅行 #旅游胜地 #旅游",
			
 
				+                      "霸王条款不废除，断供有多可怕。 #涨知识 #生活小常识 # 生活常识",
			
 
				+                      "秦始皇还活着？地宫中有不明物体缓缓移动 #历史 #秦始皇 #新春寄语  @快手热点(O40300129)",
			
 
				+                      "#夏日荷花  #国花牡丹 #昙花一现",
			
 
				+                      "国内最良心的8个景区，这才是景区最该有的样子，看看你去过几个？ #旅行  #旅游 ",
			
 
				+                      "狗子呆在水里三天三夜，终于练成捕鱼神功，一口一个大鲶鱼 #狗狗  #神奇动物  #快手放映室  @快手热点(O40300129) ",
			
 
				+                      "#集结吧光合创作者  养鸡小伙：喂鸡摆出各种造型，被称为鸡司令。",
			
 
				+                      "89岁农民老艺人自食其力，街头卖艺表演“捏碎碗片”绝技，现场听到咔吱咔吱响，人狠功夫硬！这功夫已失传，以后再看不到了！#集结吧光合创作者 #农民 #街头表演  @快手光合作者助手(O40300118)  @快手热点(O40300129)  @我要上热门(O1907752910)",
			
 
				+                      "我国最贵最有名的三棵树，你知道哪三棵吗？#旅游 #旅行攻略 #黄山迎客松",
			
 
				+                      "潘长江带来热舞，蔡明 郭达也来了！太嗨了！歌词太棒了！ @快手涨粉助手(O1815060199)  @快手热点(O40300129)  @快手平台帐号(O90041) #潘长江 #搞笑 #集结吧光合创作者",
			
 
				+                      "#带你看世界 给大家带来一期烟花盛宴，希望大家能够喜欢，带上你的那个她一起来看吧 #烟花 #视觉震撼"
			
 
				+                      ]
			
 
				+        for title in title_list:
			
 
				+            title_split1 = title.split(" #")
			
 
				+            if title_split1[0] != "":
			
 
				+                title1 = title_split1[0]
			
 
				+            else:
			
 
				+                title1 = title_split1[0]
			
 
				+
			
 
				+            title_split2 = title1.split(" #")
			
 
				+            if title_split2[0] != "":
			
 
				+                title2 = title_split2[0]
			
 
				+            else:
			
 
				+                title2 = title_split2[-1]
			
 
				+
			
 
				+            title_split3 = title2.split("@")
			
 
				+            if title_split3[0] != "":
			
 
				+                title3 = title_split3[0]
			
 
				+            else:
			
 
				+                title3 = title_split3[-1]
			
 
				+
			
 
				+            print(title3)
			
 
				+            title = title3.replace("\n", "").replace("#", "").replace("/", "").replace("\r", "")
			
 
				+            print(title)
			
 
				+
			
 
				+        # new_title = re.compile(r'(#)(.*)(#)')
			
 
				+        # print(new_title.sub(r'', title))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def kuaishou_sensitive_words(cls):
			
 
				+        sensitive_words = [
			
 
				+            "汽车",
			
 
				+            "电影解说",
			
 
				+            "放映室",
			
 
				+            "解说电影",
			
 
				+            "断供",
			
 
				+        ]
			
 
				+        return sensitive_words
			
 
				+
			
 
				+    @classmethod
			
 
				+    def sensitive_words(cls):
			
 
				+        title_list = ["#上海战疫 上海累计感染超20万！这条被淹没的热搜，令全网泪目… 疫情一定要攻克，但所有人都不该遗忘这些弱者。#上海累计报告本土阳性感染者超20万例 #农民工",
			
 
				+                      "#重庆地火村 #地火村 #旅行",
			
 
				+                      "第79集 | 湖南最值得去的六个景区，每一个都是绝色…… #快手带你去旅行 #旅游胜地 #旅游",
			
 
				+                      "霸王条款不废除，断供有多可怕。 #涨知识 #生活小常识 # 生活常识",
			
 
				+                      "秦始皇还活着？地宫中有不明物体缓缓移动 #历史 #秦始皇 #新春寄语  @快手热点(O40300129)",
			
 
				+                      "#夏日荷花  #国花牡丹 #昙花一现",
			
 
				+                      "国内最良心的8个景区，这才是景区最该有的样子，看看你去过几个？ #旅行  #旅游 ",
			
 
				+                      "狗子呆在水里三天三夜，终于练成捕鱼神功，一口一个大鲶鱼 #狗狗  #神奇动物  #快手放映室  @快手热点(O40300129) ",
			
 
				+                      "#集结吧光合创作者  养鸡小伙：喂鸡摆出各种造型，被称为鸡司令。",
			
 
				+                      "89岁农民老艺人自食其力，街头卖艺表演“捏碎碗片”绝技，现场听到咔吱咔吱响，人狠功夫硬！这功夫已失传，以后再看不到了！#集结吧光合创作者 #农民 #街头表演  @快手光合作者助手(O40300118)  @快手热点(O40300129)  @我要上热门(O1907752910)",
			
 
				+                      "我国最贵最有名的三棵树，你知道哪三棵吗？#旅游 #旅行攻略 #黄山迎客松",
			
 
				+                      "潘长江带来热舞，蔡明 郭达也来了！太嗨了！歌词太棒了！ @快手涨粉助手(O1815060199)  @快手热点(O40300129)  @快手平台帐号(O90041) #潘长江 #搞笑 #集结吧光合创作者",
			
 
				+                      "#带你看世界 给大家带来一期烟花盛宴，希望大家能够喜欢，带上你的那个她一起来看吧 #烟花 #视觉震撼"
			
 
				+                      ]
			
 
				+        print(cls.kuaishou_sensitive_words())
			
 
				+        for title in title_list:
			
 
				+            for word in cls.kuaishou_sensitive_words():
			
 
				+                if word in title:
			
 
				+                    print(f"敏感词：{word}")
			
 
				+                    print(f"敏感词视频：{title}")
			
 
				+                    cls.kuaishou_sensitive_words().remove(word)
			
 
				+                else:
			
 
				+                    print(f"正常视频：{title}")
			
 
				+                    cls.kuaishou_sensitive_words().remove(word)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    demo = Demo()
			
 
				+    # demo.demo1()
			
 
				+    demo.time()
			
 
				+    # demo.get_douyin_feeds()
			
 
				+    # demo.demo2()
			
 
				+    # demo.get_weishi_feeds()
			
 
				+    # demo.edit_str()
			
 
				+    # demo.sensitive_words()
			
 
				+
			
 
				+    pass
			
--- a/main/download_kuaishou.py
+++ b/main/download_kuaishou.py
@@ -0,0 +1,457 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/29
			
 
				+"""
			
 
				+从 微信小程序-快手短视频 中，下载符合规则的视频
			
 
				+"""
			
 
				+import json
			
 
				+import time
			
 
				+
			
 
				+import requests
			
 
				+import urllib3
			
 
				+from main.common import Common
			
 
				+from main.publish import Publish
			
 
				+
			
 
				+
			
 
				+class KuaiShou:
			
 
				+    # 已下载视频列表
			
 
				+    download_video_list = []
			
 
				+
			
 
				+    @classmethod
			
 
				+    def kuaishou_sensitive_words(cls):
			
 
				+        sensitive_words = [
			
 
				+            "集结吧光合创作者",
			
 
				+            "电影解说",
			
 
				+            "快来露两手",
			
 
				+            "分享家常美食教程",
			
 
				+            "光合作者助手",
			
 
				+            "创作者中心",
			
 
				+            "创作者学院",
			
 
				+            "娱乐星熠计划",
			
 
				+            "解说电影",
			
 
				+            "电影剪辑",
			
 
				+            "放映室",
			
 
				+            "老剧",
			
 
				+            "影视剪辑",
			
 
				+            "精彩片段",
			
 
				+            "冬日影娱大作战",
			
 
				+            "春日追剧计划单",
			
 
				+            "影视解说",
			
 
				+            "中视频影视混剪计划",
			
 
				+            "众志成城共抗疫情",
			
 
				+            "我在追好剧",
			
 
				+            "娱乐星灿计划",
			
 
				+            "电影",
			
 
				+            "电视剧",
			
 
				+            "毛泽东",
			
 
				+            "毛主席",
			
 
				+            "周恩来",
			
 
				+            "林彪",
			
 
				+            "习近平",
			
 
				+            "习大大",
			
 
				+            "彭丽媛",
			
 
				+            "怀旧经典影视",
			
 
				+                           ]
			
 
				+        return sensitive_words
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def kuaishou_download_rule(d_duration, d_width, d_height,
			
 
				+                               d_play_cnt, d_like_cnt, d_share_cnt):
			
 
				+        """
			
 
				+        下载视频的基本规则
			
 
				+        :param d_duration: 时长
			
 
				+        :param d_width: 宽
			
 
				+        :param d_height: 高
			
 
				+        :param d_play_cnt: 播放量
			
 
				+        :param d_like_cnt: 点赞量
			
 
				+        :param d_share_cnt: 分享量
			
 
				+        :return: 满足规则，返回 True；反之，返回 False
			
 
				+        """
			
 
				+        if 600 >= int(float(d_duration)) >= 60:
			
 
				+            if int(d_width) >= 720 or int(d_height) >= 720:
			
 
				+                if int(d_play_cnt) >= 50000:
			
 
				+                    if int(d_like_cnt) >= 50000:
			
 
				+                        if int(d_share_cnt) >= 2000:
			
 
				+                            return True
			
 
				+                        else:
			
 
				+                            return False
			
 
				+                    else:
			
 
				+                        return False
			
 
				+                else:
			
 
				+                    return False
			
 
				+            return False
			
 
				+        return False
			
 
				+
			
 
				+    @classmethod
			
 
				+    def kuaishou_get_recommend(cls):
			
 
				+        """
			
 
				+        从快手小程序首页推荐获取视频list:
			
 
				+            1.在 kuaishou_videoid.txt 中去重
			
 
				+            2.在 kuaishou_feeds.txt 中去重
			
 
				+            3.添加视频信息到 kuaishou_feeds.txt
			
 
				+        """
			
 
				+        url = "https://wxmini-api.uyouqu.com/rest/wd/wechatApp/feed/recommend"
			
 
				+        params = {
			
 
				+            "__NS_sig3": "e6f6b281ea31e3d7d1bbb8b91f662576fc25f7c3a7a7a5a5aaaba8b2",
			
 
				+            "__NS_sig3_origin": "3sCt3iAAAAAAAAAAAAAAAwEQBv2b8ewCwkZKaiAAAAAPg0soi"
			
 
				+                                "e7GiOlU vF4zPrG1Nl6xvaoBgFd3MwTzOed9w=="
			
 
				+        }
			
 
				+        cookies = {
			
 
				+            "did": "wxo_05f915ac6b1deca87db36cea1a0fd18fae6c",
			
 
				+            "preMinaVersion": "v3.109.0",
			
 
				+            "sid": "kuaishou.wechat.app",
			
 
				+            "appId": "ks_wechat_small_app_2",
			
 
				+            "clientid": "13",
			
 
				+            "client_key": "f60ac815",
			
 
				+            "kpn": "WECHAT_SMALL_APP",
			
 
				+            "kpf": "OUTSIDE_ANDROID_H5",
			
 
				+            "language": "zh_CN",
			
 
				+            "smallAppVersion": "v3.109.0",
			
 
				+            "session_key": "123005bcc551a92aac29cdb96190251c9f492c29d4ba6c502dc"
			
 
				+                           "0d2f8b8d18df356a2f7a22d6924d1dd34b8554a64af49b1bb1a"
			
 
				+                           "1236cd2f69c25d4ac2a2531ebcd28c179da14b222023f9e111c"
			
 
				+                           "c4d3b064ac7b0915d8c9fdaccb59e4048e96a5c38a32b2ce9f4abf628053001",
			
 
				+            "unionid": "V2:1230b56c8337908c3eecba63142a58daca05535c1f14bf67d3d8"
			
 
				+                       "85cace91a7db335c5572d204762d075f24aa84412e2955711a12bb9"
			
 
				+                       "2bd9c2290489ba7a733708a4a446de83822205ab727650489dda0db"
			
 
				+                       "9d2a226c5ddb66d88a1f1373283a3d3b959611d816660028053001",
			
 
				+            "eUserStableOpenId": "12303325e8710eb802137c70fd1fb65997a4e5e33d82"
			
 
				+                                 "cddd409d335d096e20873e07ee472090133bc7a67e5c"
			
 
				+                                 "749da045d9a31a12da4c4c26181d432b873ec39432f4"
			
 
				+                                 "10196c6c2220323d0e6b562d1b3786aefb352b4e509c"
			
 
				+                                 "d96f3466b7b2e5e74b904a94c40792d928053001",
			
 
				+            "openId": "o5otV45DcV1EUsWw4fAUk_iq0YSA",
			
 
				+            "eOpenUserId": "124074b7726c996283f25044a42e2c7427e929cd6d968c5342"
			
 
				+                           "330e61fc8939e57b0da4ffe21887f3abc8784175f73e1a267d"
			
 
				+                           "671247273806f293f64c9c8c2adc00a21a12bb92bd9c229048"
			
 
				+                           "9ba7a733708a4a446de8382220534aa79c69b74866bb09187e"
			
 
				+                           "eceec880fa1e0fa421b7df8b3289dab603b17c4828053001",
			
 
				+            "kuaishou.wechat.app_st": "ChZrdWFpc2hvdS53ZWNoYXQuYXBwLnN0ErAB8aO"
			
 
				+                                      "EcB6jh4CMSJ-p_4BJFCId0PKNa_5IeFfeV_tj7q"
			
 
				+                                      "CjdXK0y13CSte6-KHbNK9BPo6Rjy3OGny0sh4Zb"
			
 
				+                                      "5AUl3Q_zqVXe2TunW7_F3nlTdJOdZ6iVIhPrHa1"
			
 
				+                                      "CM0Y-cG9gS4FDDzTvejfWaTI0CbjfNN0RZXzYVE"
			
 
				+                                      "AUVT_BNgUVDtYBbEY792gPylMfXxwxKMSzkhaDe"
			
 
				+                                      "eaHkGCWUj62FGCFYQ9Fw2W3d7suCXFsNylqT4aE"
			
 
				+                                      "s8oNwmycUiygfvfKuoXlHkbeSIgOhEFMZ3ArImS"
			
 
				+                                      "vFY_OwLJDHak1iXRO8g5TwzHTvBT3WcoBTAB",
			
 
				+            "passToken": "ChNwYXNzcG9ydC5wYXNzLXRva2VuEpABI42IhPCJHfFngXC3i-vF"
			
 
				+                         "3daRTB-EtnAYyE6HpfWcPoZ6VSRDvKrom_RvltQ2zKk1T3_FJteb"
			
 
				+                         "mv7ZzQLD7IicnTypaGoeflb7KQVrAv50Mp_JL4ObfBu_xTiwI53t"
			
 
				+                         "bTlM6iML0G7DFd16K5z0jZZ1xECKVQQbk_vIqnseUujFIWAsKcDz"
			
 
				+                         "BqqfnQNbUU5DzDUkGhKgKyzmNjRDxLfpDU5SPFhJmG0iIGBZ_Vd-"
			
 
				+                         "7eT8i_Xit9ZPM-zdFpnRZFveFE9iplMg8Z06KAUwAQ",
			
 
				+            "userId": "2845397958"
			
 
				+        }
			
 
				+        json_data = {
			
 
				+            "thirdPartyUserId": 2845397958,
			
 
				+            "photoId": "5250352807040393911",
			
 
				+            "forwardUserId": 2845397958,
			
 
				+            "count": 10,
			
 
				+            "portal": 2,
			
 
				+            "pageType": 2,
			
 
				+            "needLivestream": "true",
			
 
				+            "extraRequestInfo": "{\"scene\":1074,\"fid\":\"2845397958\","
			
 
				+                                "\"sharerUserId\":\"2845397958\",\"curPhotoIndex\":0,"
			
 
				+                                "\"adShow\":true,\"weChatAd\":{},\"page\":0}",
			
 
				+            "pcursor": 0,
			
 
				+            "sourceFrom": 2,
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=url, params=params, cookies=cookies, json=json_data, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            if "feeds" not in response:
			
 
				+                Common.crawler_log().info("获取快手视频 list 出错:{}，休眠 10s".format(response))
			
 
				+                time.sleep(10)
			
 
				+            else:
			
 
				+                feeds = response["feeds"]
			
 
				+                for i in range(len(feeds)):
			
 
				+                    if "photoId" not in feeds[i]:
			
 
				+                        photo_id = "0"
			
 
				+                        Common.crawler_log().info("photo_id:{}".format(photo_id))
			
 
				+                    else:
			
 
				+                        photo_id = feeds[i]["photoId"]
			
 
				+                        Common.crawler_log().info("photo_id:{}".format(photo_id))
			
 
				+
			
 
				+                    if "viewCount" not in feeds[i]:
			
 
				+                        video_play_cnt = "0"
			
 
				+                        Common.crawler_log().info("video_play_cnt:0")
			
 
				+                    else:
			
 
				+                        video_play_cnt = feeds[i]["viewCount"]
			
 
				+                        Common.crawler_log().info("video_play_cnt:{}".format(video_play_cnt))
			
 
				+
			
 
				+                    if "likeCount" not in feeds[i]:
			
 
				+                        video_like_cnt = "0"
			
 
				+                        Common.crawler_log().info("video_like_cnt:0")
			
 
				+                    else:
			
 
				+                        video_like_cnt = feeds[i]["likeCount"]
			
 
				+                        Common.crawler_log().info("video_like_cnt:{}".format(video_like_cnt))
			
 
				+
			
 
				+                    if "headUrl" not in feeds[i]:
			
 
				+                        head_url = "0"
			
 
				+                        Common.crawler_log().info("head_url:不存在")
			
 
				+                    else:
			
 
				+                        head_url = feeds[i]["headUrl"]
			
 
				+                        Common.crawler_log().info("head_url:{}".format(head_url))
			
 
				+
			
 
				+                    if len(feeds[i]["coverUrls"]) == 0:
			
 
				+                        cover_url = "0"
			
 
				+                        Common.crawler_log().info("cover_url:不存在")
			
 
				+                    else:
			
 
				+                        cover_url = feeds[i]["coverUrls"][0]["url"]
			
 
				+                        Common.crawler_log().info("cover_url:{}".format(cover_url))
			
 
				+
			
 
				+                    if len(feeds[i]["mainMvUrls"]) == 0:
			
 
				+                        video_url = "0"
			
 
				+                        Common.crawler_log().info("video_url:不存在")
			
 
				+                    else:
			
 
				+                        video_url = feeds[i]["mainMvUrls"][0]["url"]
			
 
				+                        Common.crawler_log().info("video_url:{}".format(video_url))
			
 
				+
			
 
				+                    if "shareCount" not in feeds[i]:
			
 
				+                        video_share_cnt = "0"
			
 
				+                        Common.crawler_log().info("video_share_cnt:0")
			
 
				+                    else:
			
 
				+                        video_share_cnt = feeds[i]["shareCount"]
			
 
				+                        Common.crawler_log().info("video_share_cnt:{}".format(video_share_cnt))
			
 
				+
			
 
				+                    if "width" not in feeds[i] or "height"not in feeds[i]:
			
 
				+                        video_width = "0"
			
 
				+                        video_height = "0"
			
 
				+                        video_resolution = str(video_width) + "*" + str(video_height)
			
 
				+                        Common.crawler_log().info("无分辨率")
			
 
				+                    else:
			
 
				+                        video_width = feeds[i]["width"]
			
 
				+                        video_height = feeds[i]["height"]
			
 
				+                        video_resolution = str(video_width) + "*" + str(video_height)
			
 
				+                        Common.crawler_log().info("video_resolution:{}".format(video_resolution))
			
 
				+
			
 
				+                    if "commentCount" not in feeds[i]:
			
 
				+                        video_comment_cnt = "0"
			
 
				+                        Common.crawler_log().info("video_comment_cnt:0")
			
 
				+                    else:
			
 
				+                        video_comment_cnt = feeds[i]["commentCount"]
			
 
				+                        Common.crawler_log().info("video_comment_cnt:{}".format(video_comment_cnt))
			
 
				+
			
 
				+                    if "duration" not in feeds[i]:
			
 
				+                        video_duration = "0"
			
 
				+                        Common.crawler_log().info("video_duration:不存在")
			
 
				+                    else:
			
 
				+                        video_duration = int(int(feeds[i]["duration"])/1000)
			
 
				+                        Common.crawler_log().info("video_duration:{}秒".format(video_duration))
			
 
				+
			
 
				+                    if "timestamp" not in feeds[i]:
			
 
				+                        video_send_time = "0"
			
 
				+                        Common.crawler_log().info("video_send_time:不存在")
			
 
				+                    else:
			
 
				+                        video_send_time = feeds[i]["timestamp"]
			
 
				+                        Common.crawler_log().info("video_send_time:{}".format(
			
 
				+                            time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000))))
			
 
				+
			
 
				+                    user_name = feeds[i]["userName"].strip().replace("\n", "")\
			
 
				+                        .replace("/", "").replace("快手", "").replace(" ", "")\
			
 
				+                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				+                    Common.crawler_log().info("user_name:{}".format(user_name))
			
 
				+
			
 
				+                    user_id = feeds[i]["userId"]
			
 
				+                    Common.crawler_log().info("user_id:{}".format(user_id))
			
 
				+
			
 
				+                    # 视频标题过滤话题及处理特殊字符
			
 
				+                    kuaishou_title = feeds[i]["caption"]
			
 
				+                    title_split1 = kuaishou_title.split(" #")
			
 
				+                    if title_split1[0] != "":
			
 
				+                        title1 = title_split1[0]
			
 
				+                    else:
			
 
				+                        title1 = title_split1[-1]
			
 
				+
			
 
				+                    title_split2 = title1.split(" #")
			
 
				+                    if title_split2[0] != "":
			
 
				+                        title2 = title_split2[0]
			
 
				+                    else:
			
 
				+                        title2 = title_split2[-1]
			
 
				+
			
 
				+                    title_split3 = title2.split("@")
			
 
				+                    if title_split3[0] != "":
			
 
				+                        title3 = title_split3[0]
			
 
				+                    else:
			
 
				+                        title3 = title_split3[-1]
			
 
				+
			
 
				+                    video_title = title3.strip().replace("\n", "")\
			
 
				+                        .replace("/", "").replace("快手", "").replace(" ", "")\
			
 
				+                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")\
			
 
				+                        .replace("#", "").replace(".", "。")
			
 
				+
			
 
				+                    Common.crawler_log().info("video_title:{}".format(video_title))
			
 
				+
			
 
				+                    # 从 kuaishou_videoid.txt 中去重
			
 
				+                    photo_ids = Common.read_txt("kuaishou_videoid.txt")
			
 
				+                    if photo_id in [p_id.strip() for p_id in photo_ids]:
			
 
				+                        Common.crawler_log().info("该视频已下载:{}".format(video_title))
			
 
				+                        pass
			
 
				+                    else:
			
 
				+                        Common.crawler_log().info("该视频未下载:{}".format(video_title))
			
 
				+
			
 
				+                        # 从 kuaishou_feeds.txt 中去重
			
 
				+                        contents = Common.read_txt("kuaishou_feeds.txt")
			
 
				+                        # kuaishou_feeds.txt 为空时，直接保存
			
 
				+                        if len(contents) == 0 and head_url != "0" \
			
 
				+                                and cover_url != "0" and video_url != "0" \
			
 
				+                                and video_duration != "0" and photo_id != "0":
			
 
				+                            # 判断敏感词
			
 
				+                            if any(word if word in kuaishou_title else False
			
 
				+                                   for word in cls.kuaishou_sensitive_words()) is True:
			
 
				+                                Common.crawler_log().info("视频已中敏感词：{}".format(kuaishou_title))
			
 
				+                            else:
			
 
				+                                basic_time = int(time.time())
			
 
				+                                Common.crawler_log().info("添加视频信息至kuaishou_feeds.txt:{}".format(video_title))
			
 
				+                                with open("./txt/kuaishou_feeds.txt", "a", encoding="utf8") as f_a:
			
 
				+                                    f_a.write(str(basic_time) + " + " +
			
 
				+                                              str(photo_id) + " + " +
			
 
				+                                              str(video_play_cnt) + " + " +
			
 
				+                                              str(video_title) + " + " +
			
 
				+                                              str(video_duration) + " + " +
			
 
				+                                              str(video_comment_cnt) + " + " +
			
 
				+                                              str(video_like_cnt) + " + " +
			
 
				+                                              str(video_share_cnt) + " + " +
			
 
				+                                              str(video_resolution) + " + " +
			
 
				+                                              str(video_send_time) + " + " +
			
 
				+                                              str(user_name) + " + " +
			
 
				+                                              str(head_url) + " + " +
			
 
				+                                              str(cover_url) + " + " +
			
 
				+                                              str(video_url) + " + " +
			
 
				+                                              str(user_id) + " + " +
			
 
				+                                              str("wxo_b07ba02ad4340205d89b47c76030bb090977") + "\n")
			
 
				+                        else:
			
 
				+                            if photo_id in [content.split(" + ")[1] for content in contents]:
			
 
				+                                Common.crawler_log().info("该视频已在 kuaishou_feeds.txt 中:{}".format(video_title))
			
 
				+                            elif head_url == "0" or cover_url == "0" \
			
 
				+                                    or video_url == "0" or video_duration == "0" or photo_id == "0":
			
 
				+                                Common.crawler_log().info("视频封面/播放地址/播放时长/用户头像不存在")
			
 
				+                            else:
			
 
				+                                # 判断敏感词
			
 
				+                                if any(word if word in kuaishou_title else False
			
 
				+                                       for word in cls.kuaishou_sensitive_words()) is True:
			
 
				+                                    Common.crawler_log().info("视频已中敏感词：{}".format(kuaishou_title))
			
 
				+                                else:
			
 
				+                                    basic_time = int(time.time())
			
 
				+                                    Common.crawler_log().info("添加视频信息至kuaishou_feeds.txt:{}".format(video_title))
			
 
				+                                    with open("./txt/kuaishou_feeds.txt", "a", encoding="utf8") as f_a:
			
 
				+                                        f_a.write(str(basic_time) + " + " +
			
 
				+                                                  str(photo_id) + " + " +
			
 
				+                                                  str(video_play_cnt) + " + " +
			
 
				+                                                  str(video_title) + " + " +
			
 
				+                                                  str(video_duration) + " + " +
			
 
				+                                                  str(video_comment_cnt) + " + " +
			
 
				+                                                  str(video_like_cnt) + " + " +
			
 
				+                                                  str(video_share_cnt) + " + " +
			
 
				+                                                  str(video_resolution) + " + " +
			
 
				+                                                  str(video_send_time) + " + " +
			
 
				+                                                  str(user_name) + " + " +
			
 
				+                                                  str(head_url) + " + " +
			
 
				+                                                  str(cover_url) + " + " +
			
 
				+                                                  str(video_url) + " + " +
			
 
				+                                                  str(user_id) + " + " +
			
 
				+                                                  str("wxo_b07ba02ad4340205d89b47c76030bb090977") + "\n")
			
 
				+        except Exception as e:
			
 
				+            Common.crawler_log().error("获取视频 list 异常:{}".format(e))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def kuaishou_download_play_video(cls, env):
			
 
				+        """
			
 
				+        下载播放量视频
			
 
				+        测试环境:env == dev
			
 
				+        正式环境:env == prod
			
 
				+        """
			
 
				+        videos = Common.read_txt("kuaishou_feeds.txt")
			
 
				+        for video in videos:
			
 
				+            download_photo_id = video.strip().split(" + ")[1]
			
 
				+            download_video_title = video.strip().split(" + ")[3]
			
 
				+            download_video_duration = video.strip().split(" + ")[4]
			
 
				+            download_video_play_cnt = video.strip().split(" + ")[2]
			
 
				+            download_video_comment_cnt = video.strip().split(" + ")[5]
			
 
				+            download_video_like_cnt = video.strip().split(" + ")[6]
			
 
				+            download_video_share_cnt = video.strip().split(" + ")[7]
			
 
				+            download_video_resolution = video.strip().split(" + ")[8]
			
 
				+            download_video_width = download_video_resolution.split("*")[0]
			
 
				+            download_video_height = download_video_resolution.split("*")[-1]
			
 
				+            download_video_send_time = video.strip().split(" + ")[9]
			
 
				+            download_user_name = video.strip().split(" + ")[10]
			
 
				+            download_head_url = video.strip().split(" + ")[11]
			
 
				+            download_cover_url = video.strip().split(" + ")[12]
			
 
				+            download_video_url = video.strip().split(" + ")[13]
			
 
				+            download_video_session = video.strip().split(" + ")[-1]
			
 
				+
			
 
				+            if cls.kuaishou_download_rule(download_video_duration,
			
 
				+                                          download_video_width,
			
 
				+                                          download_video_height,
			
 
				+                                          download_video_play_cnt,
			
 
				+                                          download_video_like_cnt,
			
 
				+                                          download_video_share_cnt) is True:
			
 
				+                Common.crawler_log().info("开始下载快手视频:{}".format(download_video_title))
			
 
				+                # 下载封面
			
 
				+                Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
			
 
				+                # 下载视频
			
 
				+                Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
			
 
				+
			
 
				+                # 保存视频信息至 kuaishou_videoid.txt
			
 
				+                with open("./txt/kuaishou_videoid.txt", "a", encoding="utf8") as fa:
			
 
				+                    fa.write(download_photo_id + "\n")
			
 
				+
			
 
				+                # 添加视频 ID 到 list，用于统计当次下载总数
			
 
				+                cls.download_video_list.append(download_photo_id)
			
 
				+
			
 
				+                # # 保存视频信息至 {today}_kuaishou_videoid.txt
			
 
				+                # with open("./txt/" + str(Common.today) + "_kuaishou_videoid.txt", "a", encoding="utf8") as fc:
			
 
				+                #     fc.write(download_photo_id + "\n")
			
 
				+
			
 
				+                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
			
 
				+                with open("./videos/" + download_video_title + "/info.txt", "a", encoding="utf8") as f_a:
			
 
				+                    f_a.write(str(download_photo_id) + "\n" +
			
 
				+                              str(download_video_title) + "\n" +
			
 
				+                              str(download_video_duration) + "\n" +
			
 
				+                              str(download_video_play_cnt) + "\n" +
			
 
				+                              str(download_video_comment_cnt) + "\n" +
			
 
				+                              str(download_video_like_cnt) + "\n" +
			
 
				+                              str(download_video_share_cnt) + "\n" +
			
 
				+                              str(download_video_resolution) + "\n" +
			
 
				+                              str(download_video_send_time) + "\n" +
			
 
				+                              str(download_user_name) + "\n" +
			
 
				+                              str(download_head_url) + "\n" +
			
 
				+                              str(download_video_url) + "\n" +
			
 
				+                              str(download_cover_url) + "\n" +
			
 
				+                              str(download_video_session))
			
 
				+
			
 
				+                # 上传视频
			
 
				+                if env == "dev":
			
 
				+                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				+                    Publish.upload_and_publish("dev", "play")
			
 
				+                elif env == "prod":
			
 
				+                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				+                    Publish.upload_and_publish("prod", "play")
			
 
				+
			
 
				+                # 删除该视频在kuaishou_feeds.txt中的信息
			
 
				+                Common.crawler_log().info("删除该视频在kuaishou_feeds.txt中的信息:{}".format(download_video_title))
			
 
				+                with open("./txt/kuaishou_feeds.txt", "r", encoding="utf8") as f_r:
			
 
				+                    lines = f_r.readlines()
			
 
				+                with open("./txt/kuaishou_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				+                    for line in lines:
			
 
				+                        if download_photo_id in line.split(" + ")[1]:
			
 
				+                            continue
			
 
				+                        f_w.write(line)
			
 
				+            else:
			
 
				+                # 删除该视频在 recommend.txt中的信息
			
 
				+                Common.crawler_log().info("该视频不满足下载规则，删除在kuaishou_feeds.txt中的信息:{}".format(download_video_title))
			
 
				+                with open("./txt/kuaishou_feeds.txt", "r", encoding="utf8") as f_r:
			
 
				+                    lines = f_r.readlines()
			
 
				+                with open("./txt/kuaishou_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				+                    for line in lines:
			
 
				+                        if download_photo_id in line.split(" + ")[1]:
			
 
				+                            continue
			
 
				+                        f_w.write(line)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    kuaishou = KuaiShou()
			
 
				+    kuaishou.kuaishou_get_recommend()
			
--- a/main/download_weishi.py
+++ b/main/download_weishi.py
@@ -0,0 +1,344 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/4/8
			
 
				+import json
			
 
				+import time
			
 
				+
			
 
				+import requests
			
 
				+import urllib3
			
 
				+from main.common import Common
			
 
				+from main.publish import Publish
			
 
				+
			
 
				+
			
 
				+class Weishi:
			
 
				+    @staticmethod
			
 
				+    def weishi_download_rule(d_duration, d_width, d_height, d_play_cnt):
			
 
				+        """
			
 
				+        下载视频的基本规则
			
 
				+        :param d_duration: 时长
			
 
				+        :param d_width: 宽
			
 
				+        :param d_height: 高
			
 
				+        :param d_play_cnt: 播放量
			
 
				+        :return: 满足规则，返回 True；反之，返回 False
			
 
				+        """
			
 
				+        if 600 >= int(float(d_duration)) >= 60:
			
 
				+            if int(d_width) >= 720 or int(d_height) >= 720:
			
 
				+                if int(d_play_cnt) >= 100000:
			
 
				+                    return True
			
 
				+                else:
			
 
				+                    return False
			
 
				+            return False
			
 
				+        return False
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_weishi_recommend(cls):
			
 
				+        """
			
 
				+        从微视小程序首页推荐获取视频list:
			
 
				+            1.在 weishi_videoid.txt 中去重
			
 
				+            2.在 weishi_feeds.txt 中去重
			
 
				+            3.添加视频信息到 weishi_feeds.txt
			
 
				+        """
			
 
				+        url = "https://api.weishi.qq.com/trpc.weishi.weishi_h5_proxy.weishi_h5_proxy/WxminiGetFeedList"
			
 
				+        cookies = {
			
 
				+            "wesee_authtype": "3",
			
 
				+            "wesee_openid": "oWGa05FrwkuUvT-4n1qGeQuhVsc8",
			
 
				+            "wesee_openkey": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf189e2a5c1d532eeff172bc21cf2"
			
 
				+                             "6230941ccbc10243a7879e8165ca608c17060de606a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
			
 
				+            "wesee_personid": "1593522421826902",
			
 
				+            "wesee_refresh_token": "",
			
 
				+            "wesee_access_token": "8c3ec202f5d679fb5ee6d9f643640d9a2580ba504612e2d979a881d3169caf18"
			
 
				+                                  "9e2a5c1d532eeff172bc21cf26230941ccbc10243a7879e8165ca608c17060de6"
			
 
				+                                  "06a6d08afe0a3abd5250629314f9a99e9d1003b201bf5ec",
			
 
				+            "wesee_thr_appid": "wx75ee9f19b93e5c46",
			
 
				+            "wesee_ichid": "8"
			
 
				+        }
			
 
				+        json_data = {
			
 
				+            "req_body": {
			
 
				+                "requestType": 16,
			
 
				+                "isrefresh": 0,
			
 
				+                "isfirst": 0,
			
 
				+                "attachInfo": "",
			
 
				+                "scene_id": 22,
			
 
				+                "requestExt": {
			
 
				+                    "mini_openid": "oWGa05FrwkuUvT-4n1qGeQuhVsc8",
			
 
				+                    "notLogin-personid": "1593522421826902"
			
 
				+                }
			
 
				+            },
			
 
				+            "req_header": {
			
 
				+                "mapExt": "{\"imageSize\":\"480\",\"adaptScene\":\"PicHDWebpLimitScene\"}"
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        try:
			
 
				+            urllib3.disable_warnings()
			
 
				+            r = requests.post(url=url, cookies=cookies, json=json_data, verify=False)
			
 
				+            response = json.loads(r.content.decode("utf8"))
			
 
				+            if "rsp_body" not in response:
			
 
				+                Common.crawler_log().info("获取微视视频 list 出错:{}，休眠 10s".format(response))
			
 
				+                time.sleep(10)
			
 
				+            else:
			
 
				+                feeds = response["rsp_body"]["feeds"]
			
 
				+                for i in range(len(feeds)):
			
 
				+                    if "video" not in feeds[i]:
			
 
				+                        Common.crawler_log().info("无视频信息")
			
 
				+                    else:
			
 
				+                        # 视频 ID
			
 
				+                        if "id" not in feeds[i]["video"]:
			
 
				+                            video_id = "0"
			
 
				+                            Common.crawler_log().info("video_id:{}".format(video_id))
			
 
				+                        else:
			
 
				+                            video_id = feeds[i]["video"]["id"]
			
 
				+                            Common.crawler_log().info("video_id:{}".format(video_id))
			
 
				+
			
 
				+                        # 视频标题
			
 
				+                        video_title = feeds[i]["desc"].strip().replace("\n", "") \
			
 
				+                            .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				+                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				+                        Common.crawler_log().info("video_title:{}".format(video_title))
			
 
				+
			
 
				+                        # 视频发布时间
			
 
				+                        if "createTime" not in feeds[i]:
			
 
				+                            video_send_time = "0"
			
 
				+                            Common.crawler_log().info("video_send_time:不存在")
			
 
				+                        else:
			
 
				+                            video_send_time = int(feeds[i]["createTime"])*1000
			
 
				+                            Common.crawler_log().info(
			
 
				+                                "video_send_time:{}".format(time.strftime(
			
 
				+                                    "%Y-%m-%d %H:%M:%S", time.localtime(int(video_send_time)/1000))))
			
 
				+
			
 
				+                        # 视频封面地址
			
 
				+                        if len(feeds[i]["images"]) == 0:
			
 
				+                            cover_url = "0"
			
 
				+                            Common.crawler_log().info("cover_url:不存在")
			
 
				+                        else:
			
 
				+                            cover_url = feeds[i]["images"][0]["url"]
			
 
				+                            Common.crawler_log().info("cover_url:{}".format(cover_url))
			
 
				+
			
 
				+                        # 视频播放地址
			
 
				+                        if "url" not in feeds[i]["video"]:
			
 
				+                            video_url = "0"
			
 
				+                            Common.crawler_log().info("video_url:不存在")
			
 
				+                        else:
			
 
				+                            video_url = feeds[i]["video"]["url"]
			
 
				+                            Common.crawler_log().info("video_url:{}".format(video_url))
			
 
				+
			
 
				+                        # 视频分辨率
			
 
				+                        if "width" not in feeds[i]["video"] or "height" not in feeds[i]["video"]:
			
 
				+                            video_width = "0"
			
 
				+                            video_height = "0"
			
 
				+                            video_resolution = str(video_width) + "*" + str(video_height)
			
 
				+                            Common.crawler_log().info("无分辨率")
			
 
				+                        else:
			
 
				+                            video_width = feeds[i]["video"]["width"]
			
 
				+                            video_height = feeds[i]["video"]["height"]
			
 
				+                            video_resolution = str(video_width) + "*" + str(video_height)
			
 
				+                            Common.crawler_log().info("video_resolution:{}".format(video_resolution))
			
 
				+
			
 
				+                        # 视频时长
			
 
				+                        if "duration" not in feeds[i]["video"]:
			
 
				+                            video_duration = "0"
			
 
				+                            Common.crawler_log().info("video_duration:不存在")
			
 
				+                        else:
			
 
				+                            video_duration = int(int(feeds[i]["video"]["duration"]) / 1000)
			
 
				+                            Common.crawler_log().info("video_duration:{}秒".format(video_duration))
			
 
				+
			
 
				+                        # 播放数
			
 
				+                        if "playNum" not in feeds[i]["ugcData"]:
			
 
				+                            video_play_cnt = "0"
			
 
				+                            Common.crawler_log().info("video_play_cnt:{}".format(video_play_cnt))
			
 
				+                        else:
			
 
				+                            video_play_cnt = feeds[i]["ugcData"]["playNum"]
			
 
				+                            Common.crawler_log().info("video_play_cnt:{}".format(video_play_cnt))
			
 
				+
			
 
				+                        # 点赞数
			
 
				+                        if "dingCount" not in feeds[i]["ugcData"]:
			
 
				+                            video_like_cnt = "0"
			
 
				+                            Common.crawler_log().info("video_like_cnt:{}".format(video_like_cnt))
			
 
				+                        else:
			
 
				+                            video_like_cnt = feeds[i]["ugcData"]["dingCount"]
			
 
				+                            Common.crawler_log().info("video_like_cnt:{}".format(video_like_cnt))
			
 
				+
			
 
				+                        # 分享数
			
 
				+                        if "shareNum" not in feeds[i]["ugcData"]:
			
 
				+                            video_share_cnt = "0"
			
 
				+                            Common.crawler_log().info("video_share_cnt:{}".format(video_share_cnt))
			
 
				+                        else:
			
 
				+                            video_share_cnt = feeds[i]["ugcData"]["shareNum"]
			
 
				+                            Common.crawler_log().info("video_share_cnt:{}".format(video_share_cnt))
			
 
				+
			
 
				+                        # 评论数
			
 
				+                        if "totalCommentNum" not in feeds[i]["ugcData"]:
			
 
				+                            video_comment_cnt = "0"
			
 
				+                            Common.crawler_log().info("video_comment_cnt:{}".format(video_comment_cnt))
			
 
				+                        else:
			
 
				+                            video_comment_cnt = feeds[i]["ugcData"]["totalCommentNum"]
			
 
				+                            Common.crawler_log().info("video_comment_cnt:{}".format(video_comment_cnt))
			
 
				+
			
 
				+                        # 用户 ID
			
 
				+                        user_id = feeds[i]["poster"]["id"]
			
 
				+                        Common.crawler_log().info("user_id:{}".format(user_id))
			
 
				+
			
 
				+                        # 用户昵称
			
 
				+                        user_name = feeds[i]["poster"]["nick"].strip().replace("\n", "") \
			
 
				+                            .replace("/", "").replace("快手", "").replace(" ", "") \
			
 
				+                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
			
 
				+                        Common.crawler_log().info("user_name:{}".format(user_name))
			
 
				+
			
 
				+                        # 用户头像地址
			
 
				+                        if "thumbURL" not in feeds[i]["material"] and "avatar" not in feeds[i]["poster"]:
			
 
				+                            head_url = "0"
			
 
				+                            Common.crawler_log().info("head_url:不存在")
			
 
				+                        elif "thumbURL" in feeds[i]["material"]:
			
 
				+                            head_url = feeds[i]["material"]["thumbURL"]
			
 
				+                            Common.crawler_log().info("head_url:{}".format(head_url))
			
 
				+                        else:
			
 
				+                            head_url = feeds[i]["poster"]["avatar"]
			
 
				+                            Common.crawler_log().info("head_url:{}".format(head_url))
			
 
				+
			
 
				+                        # 从 weishi_videoid.txt 中去重
			
 
				+                        videos_ids = Common.read_txt("weishi_videoid.txt")
			
 
				+                        if video_id in [v_id.strip() for v_id in videos_ids]:
			
 
				+                            Common.crawler_log().info("该视频已下载:{}".format(video_title))
			
 
				+                            pass
			
 
				+                        else:
			
 
				+                            Common.crawler_log().info("该视频未下载:{}".format(video_title))
			
 
				+
			
 
				+                            # 从 weishi_feeds.txt 中去重
			
 
				+                            contents = Common.read_txt("weishi_feeds.txt")
			
 
				+                            # 若 weishi_feeds.txt 为空时，直接保存
			
 
				+                            if len(contents) == 0 and head_url != "0" \
			
 
				+                                    and cover_url != "0" and video_url != "0" \
			
 
				+                                    and video_duration != "0" and video_id != "0":
			
 
				+                                basic_time = int(time.time())
			
 
				+                                Common.crawler_log().info("添加视频信息至weishi_feeds.txt:{}".format(video_title))
			
 
				+                                with open("./txt/weishi_feeds.txt", "a", encoding="utf8") as f_a:
			
 
				+                                    f_a.write(str(basic_time) + " + " +
			
 
				+                                              str(video_id) + " + " +
			
 
				+                                              str(video_play_cnt) + " + " +
			
 
				+                                              str(video_title) + " + " +
			
 
				+                                              str(video_duration) + " + " +
			
 
				+                                              str(video_comment_cnt) + " + " +
			
 
				+                                              str(video_like_cnt) + " + " +
			
 
				+                                              str(video_share_cnt) + " + " +
			
 
				+                                              str(video_resolution) + " + " +
			
 
				+                                              str(video_send_time) + " + " +
			
 
				+                                              str(user_name) + " + " +
			
 
				+                                              str(head_url) + " + " +
			
 
				+                                              str(cover_url) + " + " +
			
 
				+                                              str(video_url) + " + " +
			
 
				+                                              str(user_id) + " + " +
			
 
				+                                              str("oWGa05FrwkuUvT-4n1qGeQuhVsc8") + "\n")
			
 
				+                            else:
			
 
				+                                if video_id in [content.split(" + ")[1] for content in contents]:
			
 
				+                                    Common.crawler_log().info("该视频已在 weishi_feeds.txt 中:{}".format(video_title))
			
 
				+                                elif head_url == "0" or cover_url == "0" \
			
 
				+                                        or video_url == "0" or video_duration == "0" or video_id == "0":
			
 
				+                                    Common.crawler_log().info("视频封面/播放地址/播放时长/用户头像不存在")
			
 
				+                                else:
			
 
				+                                    basic_time = int(time.time())
			
 
				+                                    Common.crawler_log().info("添加视频信息至weishi_feeds.txt:{}".format(video_title))
			
 
				+                                    with open("./txt/weishi_feeds.txt", "a", encoding="utf8") as f_a:
			
 
				+                                        f_a.write(str(basic_time) + " + " +
			
 
				+                                                  str(video_id) + " + " +
			
 
				+                                                  str(video_play_cnt) + " + " +
			
 
				+                                                  str(video_title) + " + " +
			
 
				+                                                  str(video_duration) + " + " +
			
 
				+                                                  str(video_comment_cnt) + " + " +
			
 
				+                                                  str(video_like_cnt) + " + " +
			
 
				+                                                  str(video_share_cnt) + " + " +
			
 
				+                                                  str(video_resolution) + " + " +
			
 
				+                                                  str(video_send_time) + " + " +
			
 
				+                                                  str(user_name) + " + " +
			
 
				+                                                  str(head_url) + " + " +
			
 
				+                                                  str(cover_url) + " + " +
			
 
				+                                                  str(video_url) + " + " +
			
 
				+                                                  str(user_id) + " + " +
			
 
				+                                                  str("oWGa05FrwkuUvT-4n1qGeQuhVsc8") + "\n")
			
 
				+        except Exception as e:
			
 
				+            Common.crawler_log().error("获取微视视频 list 异常:{}".format(e))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def download_weishi_play_video(cls, env):
			
 
				+        """
			
 
				+        下载播放量视频
			
 
				+        测试环境:env == dev
			
 
				+        正式环境:env == prod
			
 
				+        """
			
 
				+        videos = Common.read_txt("weishi_feeds.txt")
			
 
				+        for video in videos:
			
 
				+            download_video_id = video.strip().split(" + ")[1]
			
 
				+            download_video_title = video.strip().split(" + ")[3]
			
 
				+            download_video_duration = video.strip().split(" + ")[4]
			
 
				+            download_video_play_cnt = video.strip().split(" + ")[2]
			
 
				+            download_video_comment_cnt = video.strip().split(" + ")[5]
			
 
				+            download_video_like_cnt = video.strip().split(" + ")[6]
			
 
				+            download_video_share_cnt = video.strip().split(" + ")[7]
			
 
				+            download_video_resolution = video.strip().split(" + ")[8]
			
 
				+            download_video_width = download_video_resolution.split("*")[0]
			
 
				+            download_video_height = download_video_resolution.split("*")[-1]
			
 
				+            download_video_send_time = video.strip().split(" + ")[9]
			
 
				+            download_user_name = video.strip().split(" + ")[10]
			
 
				+            download_head_url = video.strip().split(" + ")[11]
			
 
				+            download_cover_url = video.strip().split(" + ")[12]
			
 
				+            download_video_url = video.strip().split(" + ")[13]
			
 
				+            download_video_session = video.strip().split(" + ")[-1]
			
 
				+
			
 
				+            if cls.weishi_download_rule(download_video_duration, download_video_width,
			
 
				+                                        download_video_height, download_video_play_cnt) is True:
			
 
				+                Common.crawler_log().info("开始下载视频:{}".format(download_video_title))
			
 
				+                # 下载封面
			
 
				+                Common.download_method(text="cover", d_name=download_video_title, d_url=download_cover_url)
			
 
				+                # 下载视频
			
 
				+                Common.download_method(text="video", d_name=download_video_title, d_url=download_video_url)
			
 
				+                # 保存视频信息至 weishi_videoid.txt
			
 
				+                with open("./txt/weishi_videoid.txt", "a", encoding="utf8") as fa:
			
 
				+                    fa.write(download_video_id + "\n")
			
 
				+                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
			
 
				+                with open("./videos/" + download_video_title + "/info.txt", "a", encoding="utf8") as f_a:
			
 
				+                    f_a.write(str(download_video_id) + "\n" +
			
 
				+                              str(download_video_title) + "\n" +
			
 
				+                              str(download_video_duration) + "\n" +
			
 
				+                              str(download_video_play_cnt) + "\n" +
			
 
				+                              str(download_video_comment_cnt) + "\n" +
			
 
				+                              str(download_video_like_cnt) + "\n" +
			
 
				+                              str(download_video_share_cnt) + "\n" +
			
 
				+                              str(download_video_resolution) + "\n" +
			
 
				+                              str(download_video_send_time) + "\n" +
			
 
				+                              str(download_user_name) + "\n" +
			
 
				+                              str(download_head_url) + "\n" +
			
 
				+                              str(download_video_url) + "\n" +
			
 
				+                              str(download_cover_url) + "\n" +
			
 
				+                              str(download_video_session))
			
 
				+
			
 
				+                # 上传视频
			
 
				+                if env == "dev":
			
 
				+                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				+                    Publish.upload_and_publish("dev", "play")
			
 
				+                elif env == "prod":
			
 
				+                    Common.crawler_log().info("开始上传视频:{}".format(download_video_title))
			
 
				+                    Publish.upload_and_publish("prod", "play")
			
 
				+
			
 
				+                # 删除该视频在weishi_feeds.txt中的信息
			
 
				+                Common.crawler_log().info("删除该视频在weishi_feeds.txt中的信息:{}".format(download_video_title))
			
 
				+                with open("./txt/weishi_feeds.txt", "r", encoding="utf8") as f_r:
			
 
				+                    lines = f_r.readlines()
			
 
				+                with open("./txt/weishi_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				+                    for line in lines:
			
 
				+                        if download_video_id in line.split(" + ")[1]:
			
 
				+                            continue
			
 
				+                        f_w.write(line)
			
 
				+            else:
			
 
				+                # 删除该视频在weishi_feeds.txt中的信息
			
 
				+                Common.crawler_log().info("该视频不满足下载规则，删除在weishi_feeds.txt中的信息:{}".format(download_video_title))
			
 
				+                with open("./txt/weishi_feeds.txt", "r", encoding="utf8") as f_r:
			
 
				+                    lines = f_r.readlines()
			
 
				+                with open("./txt/weishi_feeds.txt", "w", encoding="utf-8") as f_w:
			
 
				+                    for line in lines:
			
 
				+                        if download_video_id in line.split(" + ")[1]:
			
 
				+                            continue
			
 
				+                        f_w.write(line)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    weishi = Weishi()
			
 
				+    weishi.get_weishi_recommend()
			
--- a/main/publish.py
+++ b/main/publish.py
@@ -0,0 +1,248 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/30
			
 
				+"""
			
 
				+上传视频到阿里云 OSS
			
 
				+上传视频到管理后台
			
 
				+"""
			
 
				+import json
			
 
				+import os
			
 
				+import random
			
 
				+import time
			
 
				+
			
 
				+import oss2
			
 
				+import requests
			
 
				+import urllib3
			
 
				+from main.common import Common
			
 
				+
			
 
				+
			
 
				+class Publish:
			
 
				+    @classmethod
			
 
				+    def publish_video_dev(cls, request_data):
			
 
				+        """
			
 
				+        loginUid  站内uid (随机)
			
 
				+        appType  默认：888888
			
 
				+        crawlerSrcId   站外视频ID
			
 
				+        crawlerSrcCode   渠道（自定义 KYK）
			
 
				+        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				+        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				+        videoPath  视频oss地址
			
 
				+        coverImgPath  视频封面oss地址
			
 
				+        title  标题
			
 
				+        totalTime  视频时长
			
 
				+        viewStatus  视频的有效状态 默认1
			
 
				+        versionCode  版本 默认1
			
 
				+        :return:
			
 
				+        """
			
 
				+        # Common.crawler_log().info('publish request data: {}'.format(request_data))
			
 
				+        result = cls.request_post('https://videotest.yishihui.com/longvideoapi/crawler/video/send', request_data)
			
 
				+        Common.crawler_log().info('publish result: {}'.format(result))
			
 
				+        if result['code'] != 0:
			
 
				+            Common.crawler_log().error('pushlish failure msg = {}'.format(result['msg']))
			
 
				+        else:
			
 
				+            Common.crawler_log().info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def publish_video_prod(cls, request_data):
			
 
				+        """
			
 
				+        loginUid  站内uid (随机)
			
 
				+        appType  默认：888888
			
 
				+        crawlerSrcId   站外视频ID
			
 
				+        crawlerSrcCode   渠道（自定义 KYK）
			
 
				+        crawlerSrcPublishTimestamp  视频原发布时间
			
 
				+        crawlerTaskTimestamp   爬虫创建时间（可以是当前时间）
			
 
				+        videoPath  视频oss地址
			
 
				+        coverImgPath  视频封面oss地址
			
 
				+        title  标题
			
 
				+        totalTime  视频时长
			
 
				+        viewStatus  视频的有效状态 默认1
			
 
				+        versionCode  版本 默认1
			
 
				+        :return:
			
 
				+        """
			
 
				+        # Common.crawler_log().info('publish request data: {}'.format(request_data))
			
 
				+        result = cls.request_post('https://longvideoapi.piaoquantv.com/longvideoapi/crawler/video/send', request_data)
			
 
				+        Common.crawler_log().info('publish result: {}'.format(result))
			
 
				+        if result['code'] != 0:
			
 
				+            Common.crawler_log().error('pushlish failure msg = {}'.format(result['msg']))
			
 
				+        else:
			
 
				+            Common.crawler_log().info('publish success video_id = : {}'.format(request_data['crawlerSrcId']))
			
 
				+
			
 
				+    @classmethod
			
 
				+    def request_post(cls, request_url, request_data):
			
 
				+        """
			
 
				+        post 请求 HTTP接口
			
 
				+        :param request_url: 接口URL
			
 
				+        :param request_data: 请求参数
			
 
				+        :return: res_data json格式
			
 
				+        """
			
 
				+        urllib3.disable_warnings()
			
 
				+        response = requests.post(url=request_url, data=request_data, verify=False)
			
 
				+        if response.status_code == 200:
			
 
				+            res_data = json.loads(response.text)
			
 
				+            return res_data
			
 
				+
			
 
				+    # 以下代码展示了基本的文件上传、下载、罗列、删除用法。
			
 
				+
			
 
				+    # 首先初始化AccessKeyId、AccessKeySecret、Endpoint等信息。
			
 
				+    # 通过环境变量获取，或者把诸如“<你的AccessKeyId>”替换成真实的AccessKeyId等。
			
 
				+    #
			
 
				+    # 以杭州区域为例，Endpoint可以是：
			
 
				+    #   http://oss-cn-hangzhou.aliyuncs.com
			
 
				+    #   https://oss-cn-hangzhou.aliyuncs.com
			
 
				+    # 分别以HTTP、HTTPS协议访问。
			
 
				+    access_key_id = os.getenv('OSS_TEST_ACCESS_KEY_ID', 'LTAIP6x1l3DXfSxm')
			
 
				+    access_key_secret = os.getenv('OSS_TEST_ACCESS_KEY_SECRET', 'KbTaM9ars4OX3PMS6Xm7rtxGr1FLon')
			
 
				+    bucket_name = os.getenv('OSS_TEST_BUCKET', 'art-pubbucket')
			
 
				+    # endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou-internal.aliyuncs.com')
			
 
				+    endpoint = os.getenv('OSS_TEST_ENDPOINT', 'oss-cn-hangzhou.aliyuncs.com')
			
 
				+
			
 
				+    # 确认上面的参数都填写正确了
			
 
				+    for param in (access_key_id, access_key_secret, bucket_name, endpoint):
			
 
				+        assert '<' not in param, '请设置参数：' + param
			
 
				+
			
 
				+    # 创建Bucket对象，所有Object相关的接口都可以通过Bucket对象来进行
			
 
				+    bucket = oss2.Bucket(oss2.Auth(access_key_id, access_key_secret), endpoint, bucket_name)
			
 
				+
			
 
				+    """
			
 
				+    处理流程：
			
 
				+    1. 定时（每天凌晨1点执行一次）循环files文件下的内容 结构：files -> 视频文件夹 -> 视频文件 + 封面图 + 基本信息
			
 
				+    2. 视频文件和封面上传到oss
			
 
				+    - 视频文件oss目录  longvideo/crawler_local/video/prod/文件名
			
 
				+    - 视频封面oss目录  longvideo/crawler_local/image/prod/文件名
			
 
				+    3. 发布视频
			
 
				+    - 读取 基本信息 调用发布接口
			
 
				+    """
			
 
				+    # env 日期20220225 文件名
			
 
				+    oss_file_path_video = 'longvideo/crawler_local/video/{}/{}/{}'
			
 
				+    oss_file_path_image = 'longvideo/crawler_local/image/{}/{}/{}'
			
 
				+
			
 
				+    @classmethod
			
 
				+    def put_file(cls, oss_file, local_file):
			
 
				+        Common.crawler_log().info("put oss file = {}, local file = {}".format(oss_file, local_file))
			
 
				+        cls.bucket.put_object_from_file(oss_file, local_file)
			
 
				+        Common.crawler_log().info("put oss file = {}, local file = {} success".format(oss_file, local_file))
			
 
				+
			
 
				+    # 清除本地文件
			
 
				+    @classmethod
			
 
				+    def remove_local_file(cls, local_file):
			
 
				+        Common.crawler_log().info("remove local file = {}".format(local_file))
			
 
				+        os.remove(local_file)
			
 
				+        Common.crawler_log().info("remove local file = {} success".format(local_file))
			
 
				+
			
 
				+    # 清除本地文件夹
			
 
				+    @classmethod
			
 
				+    def remove_local_file_dir(cls, local_file):
			
 
				+        Common.crawler_log().info("remove local file dir = {}".format(local_file))
			
 
				+        os.rmdir(local_file)
			
 
				+        Common.crawler_log().info("remove local file dir = {} success".format(local_file))
			
 
				+
			
 
				+    local_file_path = './videos'
			
 
				+    video_file = 'video'
			
 
				+    image_file = 'image'
			
 
				+    info_file = 'info'
			
 
				+    uids_dev_up = [6267140]
			
 
				+    uids_dev_play = [6267141]
			
 
				+    uids_prod_up = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				+                    20631213, 20631214, 20631215, 20631216, 20631217]
			
 
				+    uids_prod_play = [20631228, 20631229, 20631230, 20631231, 20631232,
			
 
				+                      20631233, 20631234, 20631235, 20631236, 20631237]
			
 
				+
			
 
				+    @classmethod
			
 
				+    def upload_and_publish(cls, env, job):
			
 
				+        """
			
 
				+        上传视频到 oss
			
 
				+        :param env: 测试环境：dev，正式环境：prod
			
 
				+        :param job: 上升榜：up，播放量：play
			
 
				+        """
			
 
				+        Common.crawler_log().info("upload_and_publish starting...")
			
 
				+        today = time.strftime("%Y%m%d", time.localtime())
			
 
				+        # videos 目录下的所有视频文件夹
			
 
				+        files = os.listdir(cls.local_file_path)
			
 
				+        for f in files:
			
 
				+            try:
			
 
				+                # 单个视频文件夹
			
 
				+                fi_d = os.path.join(cls.local_file_path, f)
			
 
				+                # 确认为视频文件夹
			
 
				+                if os.path.isdir(fi_d):
			
 
				+                    Common.crawler_log().info('dir = {}'.format(fi_d))
			
 
				+                    # 列出所有视频文件夹
			
 
				+                    dir_files = os.listdir(fi_d)
			
 
				+                    data = {'appType': '888888', 'crawlerSrcCode': 'KANYIKAN', 'viewStatus': '1', 'versionCode': '1'}
			
 
				+                    now_timestamp = int(round(time.time() * 1000))
			
 
				+                    data['crawlerTaskTimestamp'] = str(now_timestamp)
			
 
				+                    global uid
			
 
				+                    if env == "dev" and job == "up":
			
 
				+                        uid = str(random.choice(cls.uids_dev_up))
			
 
				+                    elif env == "dev" and job == "play":
			
 
				+                        uid = str(random.choice(cls.uids_dev_play))
			
 
				+                    elif env == "prod" and job == "up":
			
 
				+                        uid = str(random.choice(cls.uids_prod_up))
			
 
				+                    elif env == "prod" and job == "play":
			
 
				+                        uid = str(random.choice(cls.uids_prod_play))
			
 
				+                    data['loginUid'] = uid
			
 
				+                    # 单个视频文件夹下的所有视频文件
			
 
				+                    for fi in dir_files:
			
 
				+                        # 视频文件夹下的所有文件路径
			
 
				+                        fi_path = fi_d + '/' + fi
			
 
				+                        Common.crawler_log().info('dir fi_path = {}'.format(fi_path))
			
 
				+                        # 读取 info.txt，赋值给 data
			
 
				+                        if cls.info_file in fi:
			
 
				+                            f = open(fi_path)
			
 
				+                            # 读取数据 数据准确性写入的时候保证 读取暂不处理
			
 
				+                            for i in range(14):
			
 
				+                                line = f.readline()
			
 
				+                                line = line.replace('\n', '')
			
 
				+                                if line is not None and len(line) != 0 and not line.isspace():
			
 
				+                                    Common.crawler_log().info("line = {}".format(line))
			
 
				+                                    if i == 0:
			
 
				+                                        data['crawlerSrcId'] = line
			
 
				+                                    elif i == 1:
			
 
				+                                        data['title'] = line
			
 
				+                                    elif i == 2:
			
 
				+                                        data['totalTime'] = line
			
 
				+                                    elif i == 8:
			
 
				+                                        data['crawlerSrcPublishTimestamp'] = line
			
 
				+                                else:
			
 
				+                                    Common.crawler_log().warning("{} line is None".format(fi_path))
			
 
				+                            # remove info.txt
			
 
				+                            cls.remove_local_file(fi_path)
			
 
				+                    # 刷新数据
			
 
				+                    dir_files = os.listdir(fi_d)
			
 
				+                    for fi in dir_files:
			
 
				+                        fi_path = fi_d + '/' + fi
			
 
				+                        Common.crawler_log().info('dir fi_path = {}'.format(fi_path))
			
 
				+                        # 上传oss
			
 
				+                        if cls.video_file in fi:
			
 
				+                            global oss_video_file
			
 
				+                            if env == "dev":
			
 
				+                                oss_video_file = cls.oss_file_path_video.format("dev", today, data['crawlerSrcId'])
			
 
				+                            elif env == "prod":
			
 
				+                                oss_video_file = cls.oss_file_path_video.format("prod", today, data['crawlerSrcId'])
			
 
				+                            Common.crawler_log().info("oss_video_file = {}".format(oss_video_file))
			
 
				+                            cls.put_file(oss_video_file, fi_path)
			
 
				+                            data['videoPath'] = oss_video_file
			
 
				+                            Common.crawler_log().info("videoPath = {}".format(oss_video_file))
			
 
				+                        elif cls.image_file in fi:
			
 
				+                            global oss_image_file
			
 
				+                            if env == "dev":
			
 
				+                                oss_image_file = cls.oss_file_path_image.format("env", today, data['crawlerSrcId'])
			
 
				+                            elif env == "prod":
			
 
				+                                oss_image_file = cls.oss_file_path_image.format("prod", today, data['crawlerSrcId'])
			
 
				+                            Common.crawler_log().info("oss_image_file = {}".format(oss_image_file))
			
 
				+                            cls.put_file(oss_image_file, fi_path)
			
 
				+                            data['coverImgPath'] = oss_image_file
			
 
				+                            Common.crawler_log().info("coverImgPath = {}".format(oss_image_file))
			
 
				+                        # 全部remove
			
 
				+                        cls.remove_local_file(fi_path)
			
 
				+
			
 
				+                    # 发布
			
 
				+                    if env == "dev":
			
 
				+                        cls.publish_video_dev(data)
			
 
				+                    elif env == "prod":
			
 
				+                        cls.publish_video_prod(data)
			
 
				+                    cls.remove_local_file_dir(fi_d)
			
 
				+
			
 
				+                else:
			
 
				+                    Common.crawler_log().error('file not a dir = {}'.format(fi_d))
			
 
				+            except Exception as e:
			
 
				+                Common.crawler_log().exception('upload_and_publish error', e)
			
--- a/main/run.py
+++ b/main/run.py
@@ -0,0 +1,135 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/30
			
 
				+import os
			
 
				+import random
			
 
				+import sys
			
 
				+import time
			
 
				+from apscheduler.schedulers.blocking import BlockingScheduler
			
 
				+sys.path.append(os.getcwd())
			
 
				+from main.common import Common
			
 
				+from main.download_weishi import Weishi
			
 
				+from main.download_kuaishou import KuaiShou
			
 
				+
			
 
				+
			
 
				+def kuaishou_dev_job():
			
 
				+    """
			
 
				+    执行测试环境快手脚本
			
 
				+    """
			
 
				+    while True:
			
 
				+        # 当天下载及上传的视频数：20 条
			
 
				+        if len(KuaiShou.download_video_list) >= 20:
			
 
				+            time.sleep(60)
			
 
				+            break
			
 
				+        else:
			
 
				+            Common.crawler_log().info("开始抓取快手视频")
			
 
				+            time.sleep(1)
			
 
				+
			
 
				+            # 抓取符合规则的视频，写入 kuaishou_feeds.txt
			
 
				+            KuaiShou.kuaishou_get_recommend()
			
 
				+            # 下载视频，并上传
			
 
				+            KuaiShou.kuaishou_download_play_video("dev")
			
 
				+            # 随机睡眠1-3s
			
 
				+            time.sleep(random.randint(1, 3))
			
 
				+
			
 
				+    # 删除冗余日志
			
 
				+    Common.del_logs()
			
 
				+    # 统计下载视频数
			
 
				+    Common.kuaishou_download_count()
			
 
				+
			
 
				+
			
 
				+def weishi_dev_job():
			
 
				+    """
			
 
				+    执行测试环境微视脚本
			
 
				+    """
			
 
				+    while True:
			
 
				+        if 14 >= Common.now.hour >= 5:
			
 
				+            Common.crawler_log().info("结束抓取及上传任务")
			
 
				+            break
			
 
				+        else:
			
 
				+            # 抓取符合规则的视频，写入 weishi_feeds.txt
			
 
				+            Weishi.get_weishi_recommend()
			
 
				+            # 下载视频，并上传
			
 
				+            Weishi.download_weishi_play_video("dev")
			
 
				+            # 随机睡眠1-3s
			
 
				+            time.sleep(random.randint(1, 3))
			
 
				+
			
 
				+    # 删除冗余日志
			
 
				+    Common.del_logs()
			
 
				+    # 统计下载视频数
			
 
				+    Common.weishi_download_count()
			
 
				+
			
 
				+
			
 
				+def main_dev():
			
 
				+    """
			
 
				+    测试环境主函数
			
 
				+    """
			
 
				+    scheduler = BlockingScheduler(timezone="Asia/Shanghai")
			
 
				+    # 抓取视频的定时任务，在每天10点的40分，运行一次 job 方法
			
 
				+    scheduler.add_job(kuaishou_dev_job, 'cron', hour=19, minute=10, misfire_grace_time=60)
			
 
				+    # 开始运行脚本
			
 
				+    scheduler.start()
			
 
				+
			
 
				+
			
 
				+def kuaishou_prod_job():
			
 
				+    """
			
 
				+    执行正式环境快手脚本
			
 
				+    """
			
 
				+    while True:
			
 
				+        # 当天下载及上传的视频数：200 条
			
 
				+        if len(KuaiShou.download_video_list) >= 200:
			
 
				+            time.sleep(60)
			
 
				+            break
			
 
				+        else:
			
 
				+            Common.crawler_log().info("开始抓取快手视频")
			
 
				+            time.sleep(1)
			
 
				+
			
 
				+            # 抓取符合规则的视频，写入 kuaishou_feeds.txt
			
 
				+            KuaiShou.kuaishou_get_recommend()
			
 
				+            # 下载视频，并上传
			
 
				+            KuaiShou.kuaishou_download_play_video("prod")
			
 
				+            # 随机睡眠1-3s
			
 
				+            time.sleep(random.randint(1, 3))
			
 
				+
			
 
				+    # 删除冗余日志
			
 
				+    Common.del_logs()
			
 
				+    # 统计下载视频数
			
 
				+    Common.kuaishou_download_count()
			
 
				+
			
 
				+
			
 
				+def weishi_prod_job():
			
 
				+    """
			
 
				+    执行正式环境微视脚本
			
 
				+    """
			
 
				+    while True:
			
 
				+        if 20 >= Common.now.hour >= 5:
			
 
				+            Common.crawler_log().info("结束抓取微视视频任务")
			
 
				+            break
			
 
				+        else:
			
 
				+            # 抓取符合规则的视频，写入 weishi_feeds.txt
			
 
				+            Weishi.get_weishi_recommend()
			
 
				+            # 下载视频，并上传
			
 
				+            Weishi.download_weishi_play_video("prod")
			
 
				+            # 随机睡眠1-3s
			
 
				+            time.sleep(random.randint(1, 3))
			
 
				+
			
 
				+    # 删除冗余日志
			
 
				+    Common.del_logs()
			
 
				+    # 统计下载视频数
			
 
				+    Common.weishi_download_count()
			
 
				+
			
 
				+
			
 
				+def main_prod():
			
 
				+    """
			
 
				+    正式环境主函数
			
 
				+    """
			
 
				+    scheduler = BlockingScheduler(timezone="Asia/Shanghai")
			
 
				+    # 抓取视频的定时任务，在每天10点的40分，运行一次 job 方法
			
 
				+    scheduler.add_job(kuaishou_prod_job, 'cron', hour=8, minute=00, misfire_grace_time=60)
			
 
				+    # 开始运行脚本
			
 
				+    scheduler.start()
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # main_dev()
			
 
				+    main_prod()
			
--- a/txt/__init__.py
+++ b/txt/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/30
			
--- a/txt/kuaishou_feeds.txt
+++ b/txt/kuaishou_feeds.txt
--- a/txt/kuaishou_videoid.txt
+++ b/txt/kuaishou_videoid.txt
--- a/txt/weishi_feeds.txt
+++ b/txt/weishi_feeds.txt
--- a/txt/weishi_videoid.txt
+++ b/txt/weishi_videoid.txt
--- a/videos/__init__.py
+++ b/videos/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2022/3/30
			
--- a/抓取规则.txt
+++ b/抓取规则.txt
@@ -0,0 +1,19 @@
 
				+==========2022/4/15===========
			
 
				+一、按照数据指标抓取
			
 
				+1、任务开始时间：
			
 
				+- 每天早上8点-晚上21点
			
 
				+2、抓取规则：
			
 
				+  - 视频播放量点赞量5万+ ，分享量2000+
			
 
				+  - 视频时长1分钟以上，10分钟以下
			
 
				+  - 视频分辨率720以上
			
 
				+  - 站内标题=快手视频原标题 （需要过滤掉标题中的话题#  #）
			
 
				+  - 站内封面图=快手视频原封面图
			
 
				+3、站内承接：
			
 
				+- 每日入库200条视频
			
 
				+- 视频随机分配到10个虚拟账号。
			
 
				+4、特别注意：
			
 
				+- 视频需要排重，已经抓取过得视频，不要重复抓取
			
 
				+- 需要对视频库进行持续扫描：如1条视频上周未达到5万+点赞，本周达到了5万点赞，则进行抓取。
			
 
				+5、新增爬虫视频标题过滤词
			
 
				+-  集结吧光合创作者、电影解说、快来露两手、分享家常美食教程、光合作者助手、创作者中心、创作者学院、娱乐星熠计划、解说电影、电影剪辑、放映室、老剧、影视剪辑、精彩片段、冬日影娱大作战、春日追剧计划单、影视解说、中视频影视混剪计划、众志成城共抗疫情、我在追好剧、娱乐星灿计划、电影、电视剧、毛泽东、毛主席、周恩来、林彪、习近平、习大大、彭丽媛、怀旧经典影视
			
 
				+==============================