wangkun 1 year ago
parent
commit
8a2d46cbd6

+ 69 - 0
dev/dev_script/get_cpu_mem.py

@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/7/19
+import psutil
+
+
+class GetCpuMen:
+    @classmethod
+    def get_pid(cls, script):
+        # 遍历所有正在运行的进程
+        for proc in psutil.process_iter():
+            try:
+                # 获取进程的命令行参数
+                cmds = proc.cmdline()
+                # 检查命令行参数是否包含爬虫脚本的名称或关键字
+                for cmd in cmds:
+                    if script in cmd:
+                        print(f"cmd:{cmd}")
+                        # 获取进程的PID
+                        pid = proc.pid
+                        return pid
+            except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                pass
+
+    @classmethod
+    def get_cpu_mem(cls, script):
+        import psutil
+
+        # 获取当前进程的PID
+        pid = cls.get_pid(script)
+        print(f"pid:{pid}")
+        # 获取CPU的使用情况
+        cpu_percent = round(psutil.Process(pid).cpu_percent(), 2)
+
+        # 获取内存的使用情况
+        memory_percent = round(psutil.Process(pid).memory_percent(), 2)
+
+        print(f"CPU使用率:{cpu_percent}")
+        print(f"内存使用率:{memory_percent}")
+
+    @classmethod
+    def get_all_cpu_mem(cls):
+        script_list = [
+            "run_xg_search",
+            "run_xg_author",
+            "run_xng_author",
+            "run_xng_play",
+            "run_xng_hour",
+            "run_dy_author",
+            "run_dy_recommend",
+            "run_ks_recommend",
+            "run_ks_author",
+            "run_bszf_recommend",
+            "run_ssnnyfq_recommend",
+            "run_gzh_author",
+            "run_weixinzhishu_score",
+            "get_cpu_mem",
+        ]
+
+        for scrip in script_list:
+            print(f"scrip:{scrip}")
+            cls.get_cpu_mem(scrip)
+            print("\n")
+
+
+if __name__ == "__main__":
+    # GetCpuMen.get_cpu_mem("get_cpu_mem")
+    GetCpuMen.get_all_cpu_mem()
+    pass

+ 71 - 0
dev/dev_script/get_intervals.py

@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/7/18
+import datetime
+
+# 读取日志文件并将每行日志存储到一个列表中
+# with open('../logs/benshanzhufu-recommend-2023-07-17.log', 'r') as file:
+# with open('../logs/douyin-author-2023-07-17.log', 'r') as file:
+with open('../logs/douyin-recommend-2023-07-19.log', 'r') as file:
+# with open('../logs/kuaishou-author-2023-07-17.log', 'r') as file:
+# with open('../logs/kuaishou-recommend-2023-07-17.log', 'r') as file:
+# with open('../logs/xigua-author-2023-07-17.log', 'r') as file:
+# with open('../logs/xigua-search-2023-07-17.log', 'r') as file:
+# with open('../logs/xiaoniangao-author-2023-07-17.log', 'r') as file:
+# with open('../logs/xiaoniangao-play-2023-07-17.log', 'r') as file:
+# with open('../logs/xiaoniangao-hour-2023-07-17.log', 'r') as file:
+# with open('../logs/suisuiniannianyingfuqi-recommend-2023-07-17.log', 'r') as file:
+# with open('../logs/gongzhonghao-author1-2023-07-18.log', 'r') as file:
+# with open('../logs/gongzhonghao-author2-2023-07-18.log', 'r') as file:
+# with open('../logs/gongzhonghao-author3-2023-07-18.log', 'r') as file:
+# with open('../logs/gongzhonghao-author4-2023-07-18.log', 'r') as file:
+# with open('../logs/gongzhonghao-author5-2023-07-18.log', 'r') as file:
+# with open('../logs/gongzhonghao-author6-2023-07-18.log', 'r') as file:
+    log_lines = file.readlines()
+
+# 存储间隔时间的列表
+intervals = []
+
+# 遍历日志列表,计算相邻两条日志的时间间隔
+for i in range(1, len(log_lines)):
+    if "2023-" not in log_lines[i-1] or "2023-" not in log_lines[i]:
+        continue
+    # 解析时间戳
+    timestamp1 = datetime.datetime.strptime(log_lines[i - 1].split(".")[0], '%Y-%m-%d %H:%M:%S')
+    timestamp2 = datetime.datetime.strptime(log_lines[i].split(".")[0], '%Y-%m-%d %H:%M:%S')
+
+    # 计算时间间隔
+    interval = timestamp2 - timestamp1
+
+    # 将时间间隔添加到间隔时间列表中
+    intervals.append(interval)
+
+# 对间隔时间列表进行倒序排序
+intervals.sort(reverse=True)
+
+# 取前10条间隔时间
+top_10_intervals = intervals[:10]
+# 取前10条间隔时间的秒数
+top_10_intervals_seconds = [int(interval.total_seconds()) for interval in top_10_intervals]
+
+# 打印结果
+print(top_10_intervals_seconds)
+
+# benshanzhufu  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+# douyin_author [30, 28, 25, 20, 18, 18, 17, 17, 17, 16]
+# douyin_recommend  [62, 50, 38, 34, 33, 31, 31, 31, 31, 31]
+# kuaishou_author  [31, 27, 23, 21, 21, 21, 19, 19, 17, 15]
+# kuaishou_recommend  [27, 23, 23, 23, 23, 22, 22, 22, 21, 21]
+# xigua_author  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+# xigua_search  [18, 14, 14, 13, 13, 12, 11, 11, 11, 10]
+# xiaoniangao_author  [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
+# xiaoniangao_play  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+# xiaoniangao_hour  [61, 3, 2, 2, 2, 2, 2, 2, 1, 1]
+# suisuiniannian  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
+# gzh1  [26, 26, 26, 25, 25, 25, 25, 24, 24, 24]
+# gzh2  [28, 26, 25, 25, 24, 24, 24, 24, 24, 24]
+# gzh3  [27, 26, 26, 25, 25, 25, 24, 24, 24, 24]
+# gzh4  [26, 26, 25, 24, 24, 24, 24, 24, 24, 24]
+# gzh5  [29, 26, 25, 25, 24, 24, 24, 24, 24, 24]
+# gzh6  [26, 25, 25, 25, 25, 24, 24, 24, 24, 24]
+

+ 0 - 312
dev/dev_script/xg_recommend.py

@@ -1,312 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2023/7/6
-import os
-import sys
-import time
-import cv2
-from selenium.webdriver.common.keys import Keys
-from selenium.webdriver import DesiredCapabilities
-from selenium import webdriver
-from selenium.webdriver.chrome.service import Service
-from selenium.webdriver.common.by import By
-sys.path.append(os.getcwd())
-from common.common import Common
-
-
-class XGRecommend(object):
-
-    def __init__(self, log_type, crawler, env):
-        """
-        本地启动 Chrome,指定端口号:12306
-        open -a "Google Chrome" --args --remote-debugging-port=12306
-        """
-        Common.logger(log_type, crawler).info("启动 Chrome 浏览器")
-        cmd = 'open -a "Google Chrome" --args --remote-debugging-port=12306'
-        os.system(cmd)
-
-        if env == "dev":
-            chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v114/chromedriver"
-        else:
-            chromedriver = "/usr/bin/chromedriver"
-
-        # 打印请求配置
-        ca = DesiredCapabilities.CHROME
-        ca["goog:loggingPrefs"] = {"performance": "ALL"}
-        # 初始化浏览器
-        self.browser = webdriver.ChromeOptions()
-        self.browser.add_experimental_option("debuggerAddress", "127.0.0.1:12306")
-        # # 设置user-agent
-        # self.browser.add_argument(
-        #     f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
-        # # 去掉提示:Chrome正收到自动测试软件的控制
-        # self.browser.add_argument('--disable-infobars')
-
-        # # 禁用GPU加速
-        # self.browser.add_argument('--disable-gpu')
-        # # 关闭开发者模式
-        # self.browser.add_experimental_option("useAutomationExtension", False)
-        # # 以键值对的形式加入参数
-        # self.browser.add_experimental_option('excludeSwitches', ['enable-automation'])
-        # # 禁用启用Blink运行时的功能
-        # self.browser.add_argument('--disable-blink-features=AutomationControlled')
-        # 不打开浏览器运行
-        # self.browser.add_argument("--headless")
-        # # linux 环境中,静默模式必须要加的参数
-        # self.browser.add_argument("--no-sandbox")
-        # # 设置浏览器size
-        # self.browser.add_argument("--window-size=1920,1080")
-
-        # driver初始化
-        self.driver = webdriver.Chrome(desired_capabilities=ca, options=self.browser, service=Service(chromedriver))
-        self.driver.implicitly_wait(10)
-        Common.logger(log_type, crawler).info("打开西瓜推荐页")
-        self.driver.get(f"https://www.ixigua.com/")
-        # 在当前页面打开新的标签页
-        self.driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.COMMAND + 't')
-        # 切换到新打开的标签页
-        self.driver.switch_to.window(self.driver.window_handles[-1])
-        self.username = "19831265541"
-        self.password = "Test111111"
-        time.sleep(2)
-
-    def quit(self, log_type, crawler):
-        Common.logger(log_type, crawler).info("退出浏览器")
-        self.driver.quit()
-
-    #  传入滑块背景图片本地路径和滑块本地路径,返回滑块到缺口的距离
-    @staticmethod
-    def findPic(log_type, crawler, img_bg_path, img_slider_path):
-        """
-        找出图像中最佳匹配位置
-        :param log_type: log
-        :param crawler: 爬虫
-        :param img_bg_path: 滑块背景图本地路径
-        :param img_slider_path: 滑块图片本地路径
-        :return: 返回最差匹配、最佳匹配对应的x坐标
-        """
-
-        # 读取滑块背景图片,参数是图片路径,Opencv2默认使用BGR模式
-        # cv2.imread()是 image read的简写
-        # img_bg 是一个numpy库ndarray数组对象
-        img_bg = cv2.imread(img_bg_path)
-
-        # 对滑块背景图片进行处理,由BGR模式转为gray模式(即灰度模式,也就是黑白图片)
-        # 为什么要处理? BGR模式(彩色图片)的数据比黑白图片的数据大,处理后可以加快算法的计算
-        # BGR模式:常见的是RGB模式
-        # R代表红,red; G代表绿,green;  B代表蓝,blue。
-        # RGB模式就是,色彩数据模式,R在高位,G在中间,B在低位。BGR正好相反。
-        # 如红色:RGB模式是(255,0,0),BGR模式是(0,0,255)
-        img_bg_gray = cv2.cvtColor(img_bg, cv2.COLOR_BGR2GRAY)
-
-        # 读取滑块,参数1是图片路径,参数2是使用灰度模式
-        img_slider_gray = cv2.imread(img_slider_path, 0)
-
-        # 在滑块背景图中匹配滑块。参数cv2.TM_CCOEFF_NORMED是opencv2中的一种算法
-        res = cv2.matchTemplate(img_bg_gray, img_slider_gray, cv2.TM_CCOEFF_NORMED)
-
-        Common.logger(log_type, crawler).info(f"{'#' * 50}")
-        Common.logger(log_type, crawler).info(type(res))  # 打印:<class 'numpy.ndarray'>
-        Common.logger(log_type, crawler).info(res)
-        # 打印:一个二维的ndarray数组
-        # [[0.05604218  0.05557462  0.06844381... - 0.1784117 - 0.1811338 - 0.18415523]
-        #  [0.06151756  0.04408009  0.07010461... - 0.18493137 - 0.18440475 - 0.1843424]
-        # [0.0643926    0.06221284  0.0719175... - 0.18742703 - 0.18535161 - 0.1823346]
-        # ...
-        # [-0.07755355 - 0.08177952 - 0.08642308... - 0.16476074 - 0.16210903 - 0.15467581]
-        # [-0.06975575 - 0.07566144 - 0.07783117... - 0.1412715 - 0.15145643 - 0.14800543]
-        # [-0.08476129 - 0.08415948 - 0.0949327... - 0.1371379 - 0.14271489 - 0.14166716]]
-
-        Common.logger(log_type, crawler).info(f"{'#' * 50}")
-
-        # cv22.minMaxLoc() 从ndarray数组中找到最小值、最大值及他们的坐标
-        value = cv2.minMaxLoc(res)
-        # 得到的value,如:(-0.1653602570295334, 0.6102921366691589, (144, 1), (141, 56))
-
-        Common.logger(log_type, crawler).info(f"{value, '#' * 30}")
-
-        # 获取x坐标,如上面的144、141
-        return value[2:][0][0], value[2:][1][0]
-
-    # 返回两个数组:一个用于加速拖动滑块,一个用于减速拖动滑块
-    @staticmethod
-    def generate_tracks(distance):
-        # 给距离加上20,这20像素用在滑块滑过缺口后,减速折返回到缺口
-        distance += 20
-        v = 0
-        t = 0.2
-        forward_tracks = []
-        current = 0
-        mid = distance * 3 / 5  # 减速阀值
-        while current < distance:
-            if current < mid:
-                a = 2  # 加速度为+2
-            else:
-                a = -3  # 加速度-3
-            s = v * t + 0.5 * a * (t ** 2)
-            v = v + a * t
-            current += s
-            forward_tracks.append(round(s))
-
-        back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1]
-        return forward_tracks, back_tracks
-
-    # 获取距离值
-    @staticmethod
-    def get_tracks(distance):
-        """
-        模拟人的滑动行为,先匀加速后匀减速
-        匀变速基本公式
-        v=v0+at
-        s=vot+1/2at2
-        """
-        # 初速度
-        v = 0
-        # 设置时间
-        t = 0.3
-        # 存储每段距离值
-        tracks = []
-        # 当前距离
-        current = 0
-        # 中间位置为4/5距离处
-        mid = distance * 4 / 5
-        while current < distance:
-            if current < mid:
-                # 加速阶段
-                a = 2
-            else:
-                # 减速阶段
-                a = -3
-            # 当前速度
-            v0 = v
-            # 当前位移
-            s = v0 * t + 0.5 * a * t ** 2
-            # 更新当前速度
-            v = v0 + a * t
-            # 更新当前位移
-            current += s
-            # 添加到轨迹列表
-            tracks.append(round(s))
-        return tracks
-
-    @staticmethod
-    def FindPic(log_type, crawler, target, template):
-        """
-        找出图像中最佳匹配位置
-        :param log_type: 日志
-        :param crawler: 爬虫
-        :param target: 目标即背景图
-        :param template: 模板即需要找到的图
-        :return: 返回最佳匹配及其最差匹配和对应的坐标
-        """
-        target_rgb = cv2.imread(target)
-        target_gray = cv2.cvtColor(target_rgb, cv2.COLOR_BGR2GRAY)
-        template_rgb = cv2.imread(template, 0)
-        res = cv2.matchTemplate(target_gray, template_rgb, cv2.TM_CCOEFF_NORMED)
-        value = cv2.minMaxLoc(res)
-        Common.logger(log_type, crawler).info(value)
-        # 计算缺口的 X 轴距离
-        x_val = int(value[3][0])
-        Common.logger(log_type, crawler).info(f"缺口的 X 轴距离:{x_val}")
-        # 获取模板图的宽高
-        template_height, template_width, template_c = cv2.imread(template).shape
-        Common.logger(log_type, crawler).info(f"模板高:{template_height}")
-        Common.logger(log_type, crawler).info(f"模板宽:{template_width}")
-        Common.logger(log_type, crawler).info(f"图片的通道数:{template_c}")
-        # 计算需要滑动的距离
-        move_val = x_val - template_width
-        Common.logger(log_type, crawler).info(f"需要滑动的距离:{move_val}")
-        return x_val
-
-    def login(self, log_type, crawler, env):
-        # Common.logger(log_type, crawler).info("点击登录")
-        # self.driver.find_element(By.XPATH, '//*[@class="xg-button xg-button-primary xg-button-middle loginButton"]').click()
-        # time.sleep(random.randint(1, 2))
-        # Common.logger(log_type, crawler).info("点击密码登录")
-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-link-list__item__text"]').click()
-        # time.sleep(random.randint(1, 2))
-        # Common.logger(log_type, crawler).info("输入手机号")
-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-normal-input__input"]').send_keys(self.username)
-        # time.sleep(random.randint(1, 2))
-        # Common.logger(log_type, crawler).info("输入密码")
-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-button-input__input"]').send_keys(self.password)
-        # time.sleep(random.randint(1, 2))
-        # Common.logger(log_type, crawler).info("点击登录")
-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-account-password__button-wrapper"]/*[1]').click()
-        # time.sleep(random.randint(1, 2))
-
-        # # 获取滑块
-        # Common.logger(log_type, crawler).info("获取滑块")
-        # move_btns = self.driver.find_elements(By.XPATH, '//*[@class="sc-kkGfuU bujTgx"]')
-        # if len(move_btns) == 0:
-        #     Common.logger(log_type, crawler).info("未发现滑块,3-5 秒后重试")
-        #     self.quit(log_type, crawler)
-        #     time.sleep(random.randint(3, 5))
-        #     self.__init__(log_type, crawler, env)
-        #     self.login(log_type, crawler, env)
-        # move_btn = move_btns[0]
-        #
-        # while True:
-        #
-        #     # 使用requests下载滑块
-        #     slide_url = self.driver.find_element(By.XPATH,
-        #                                          '//*[@class="captcha_verify_img_slide react-draggable sc-VigVT ggNWOG"]').get_attribute(
-        #         "src")
-        #     slide_dir = f"./{crawler}/photo/img_slide.png"
-        #     urllib3.disable_warnings()
-        #     slide_url_response = requests.get(slide_url, verify=False)
-        #     with open(slide_dir, "wb") as file:
-        #         file.write(slide_url_response.content)
-        #
-        #     # 使用urllib下载背景图
-        #     bg_image_url = self.driver.find_element(By.XPATH, '//*[@id="captcha-verify-image"]').get_attribute("src")
-        #     bg_image_dir = f"./{crawler}/photo/img_bg.png"
-        #     urllib3.disable_warnings()
-        #     bg_image_url_response = requests.get(bg_image_url, verify=False)
-        #     with open(bg_image_dir, "wb") as file:
-        #         file.write(bg_image_url_response.content)
-        #
-        #     offset = self.FindPic(log_type, crawler, bg_image_dir, slide_dir)
-        #     Common.logger(log_type, crawler).info(f"offset:{offset}")
-        #
-        #     # 在滑块上暂停
-        #     Common.logger(log_type, crawler).info("在滑块上暂停")
-        #     ActionChains(self.driver).click_and_hold(on_element=move_btn).perform()
-        #     # 拖动滑块
-        #     Common.logger(log_type, crawler).info("拖动滑块0.7*距离")
-        #     ActionChains(self.driver).move_to_element_with_offset(to_element=move_btn, xoffset=int(0.5*offset), yoffset=0).perform()
-        #     # 拖动剩余像素
-        #     Common.logger(log_type, crawler).info("拖动剩余像素")
-        #     tracks = self.get_tracks(int(0.15*offset))
-        #     # 遍历梅一段距离
-        #     for track in tracks:
-        #         # 滑块移动响应距离
-        #         ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform()
-        #     # 休息1s
-        #     Common.logger(log_type, crawler).info("休息1s")
-        #     time.sleep(1)
-        #     # 释放滑块
-        #     Common.logger(log_type, crawler).info("释放滑块")
-        #     ActionChains(self.driver).release().perform()
-        #
-        #     if len(move_btns) != 0:
-        #         time.sleep(1)
-        #         continue
-        #     break
-        Common.logger(log_type, crawler).info("刷新页面")
-        self.driver.refresh()
-
-        Common.logger(log_type, crawler).info("关闭当前标签页")
-        time.sleep(5)
-        # 关闭当前标签页
-        self.driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.COMMAND + 'w')
-        Common.logger(log_type, crawler).info("已关闭")
-        Common.logger(log_type, crawler).info("退出浏览器")
-        self.quit(log_type, crawler)
-
-
-
-if __name__ == "__main__":
-    Recommend = XGRecommend("search", "dev", "dev")
-    Recommend.login("search", "dev", "dev")
-    pass

+ 127 - 126
kanyikan/kanyikan_recommend/kanyikan_recommend0627.py

@@ -33,135 +33,136 @@ class KanyikanRecommend:
     @classmethod
     def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
         mq = MQ(topic_name="topic_crawler_etl_" + env)
-        try:
-            Common.logger(log_type, crawler).info(f"正在抓取列表页")
-            Common.logging(log_type, crawler, env, f"正在抓取列表页")
-            session = Common.get_session(log_type, crawler, env)
-            if session is None:
-                time.sleep(1)
-                cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
-            url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
-            header = {
-                "Connection": "keep-alive",
-                "content-type": "application/json",
-                "Accept-Encoding": "gzip,compress,br,deflate",
-                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
-                              "NetType/WIFI Language/zh_CN",
-                "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
-            }
-            params = {
-                'session': session,
-                "offset": 0,
-                "wxaVersion": "3.9.2",
-                "count": "10",
-                "channelid": "208",
-                "scene": '310',
-                "subscene": '1089',
-                "clientVersion": '8.0.18',
-                "sharesearchid": '0',
-                "nettype": 'wifi',
-                "switchprofile": "0",
-                "switchnewuser": "0",
-            }
-            urllib3.disable_warnings()
-            response = requests.get(url=url, headers=header, params=params, proxies=proxies, verify=False)
-            if "data" not in response.text:
-                Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
-                Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
-                # 如果返回空信息,则随机睡眠 31-40 秒
-                time.sleep(random.randint(31, 40))
-                cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
-            elif "items" not in response.json()["data"]:
-                Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
-                Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
-                # 如果返回空信息,则随机睡眠 1-3 分钟
-                time.sleep(random.randint(60, 180))
-                cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
-            feeds = response.json().get("data", {}).get("items", "")
-            if feeds == "":
-                Common.logger(log_type, crawler).info(f"feeds:{feeds}")
-                Common.logging(log_type, crawler, env, f"feeds:{feeds}")
-                return
-            for i in range(len(feeds)):
-                try:
-                    video_title = feeds[i].get("title", "").strip().replace("\n", "") \
-                        .replace("/", "").replace("\\", "").replace("\r", "") \
-                        .replace(":", "").replace("*", "").replace("?", "") \
-                        .replace("?", "").replace('"', "").replace("<", "") \
-                        .replace(">", "").replace("|", "").replace(" ", "") \
-                        .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
-                        .replace("'", "").replace("#", "").replace("Merge", "")
-                    publish_time_stamp = feeds[i].get("date", 0)
-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-                    # 获取播放地址
-                    if "videoInfo" not in feeds[i]:
-                        video_url = ""
-                    elif "mpInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
-                        if len(feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
-                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
+        for page in range(1, 3):
+            try:
+                Common.logger(log_type, crawler).info(f"正在抓取第{page}页")
+                Common.logging(log_type, crawler, env, f"正在抓取第{page}页")
+                session = Common.get_session(log_type, crawler, env)
+                if session is None:
+                    time.sleep(1)
+                    cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
+                url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
+                header = {
+                    "Connection": "keep-alive",
+                    "content-type": "application/json",
+                    "Accept-Encoding": "gzip,compress,br,deflate",
+                    "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
+                                  "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
+                                  "NetType/WIFI Language/zh_CN",
+                    "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
+                }
+                params = {
+                    'session': session,
+                    "offset": 0,
+                    "wxaVersion": "3.9.2",
+                    "count": "10",
+                    "channelid": "208",
+                    "scene": '310',
+                    "subscene": '1089',
+                    "clientVersion": '8.0.18',
+                    "sharesearchid": '0',
+                    "nettype": 'wifi',
+                    "switchprofile": "0",
+                    "switchnewuser": "0",
+                }
+                urllib3.disable_warnings()
+                response = requests.get(url=url, headers=header, params=params, proxies=proxies, verify=False)
+                if "data" not in response.text:
+                    Common.logger(log_type, crawler).info("获取视频list时,session过期,随机睡眠 31-50 秒")
+                    Common.logging(log_type, crawler, env, "获取视频list时,session过期,随机睡眠 31-50 秒")
+                    # 如果返回空信息,则随机睡眠 31-40 秒
+                    time.sleep(random.randint(31, 40))
+                    cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
+                elif "items" not in response.json()["data"]:
+                    Common.logger(log_type, crawler).info(f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+                    Common.logging(log_type, crawler, env, f"get_feeds:{response.json()},随机睡眠 1-3 分钟")
+                    # 如果返回空信息,则随机睡眠 1-3 分钟
+                    time.sleep(random.randint(60, 180))
+                    cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
+                feeds = response.json().get("data", {}).get("items", "")
+                if feeds == "":
+                    Common.logger(log_type, crawler).info(f"feeds:{feeds}")
+                    Common.logging(log_type, crawler, env, f"feeds:{feeds}")
+                    return
+                for i in range(len(feeds)):
+                    try:
+                        video_title = feeds[i].get("title", "").strip().replace("\n", "") \
+                            .replace("/", "").replace("\\", "").replace("\r", "") \
+                            .replace(":", "").replace("*", "").replace("?", "") \
+                            .replace("?", "").replace('"', "").replace("<", "") \
+                            .replace(">", "").replace("|", "").replace(" ", "") \
+                            .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
+                            .replace("'", "").replace("#", "").replace("Merge", "")
+                        publish_time_stamp = feeds[i].get("date", 0)
+                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+                        # 获取播放地址
+                        if "videoInfo" not in feeds[i]:
+                            video_url = ""
+                        elif "mpInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
+                            if len(feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
+                            else:
+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
+                        elif "ctnInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
+                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
                         else:
-                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
-                    elif "ctnInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
-                        video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
-                    else:
-                        video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
-                    video_dict = {
-                        "video_title": video_title,
-                        "video_id": feeds[i].get("videoId", ""),
-                        "play_cnt": feeds[i].get("playCount", 0),
-                        "like_cnt": feeds[i].get("liked_cnt", 0),
-                        "comment_cnt": feeds[i].get("comment_cnt", 0),
-                        "share_cnt": feeds[i].get("shared_cnt", 0),
-                        "duration": feeds[i].get("mediaDuration", 0),
-                        "video_width": feeds[i].get("short_video_info", {}).get("width", 0),
-                        "video_height": feeds[i].get("short_video_info", {}).get("height", 0),
-                        "publish_time_stamp": publish_time_stamp,
-                        "publish_time_str": publish_time_str,
-                        "user_name": feeds[i].get("source", "").strip().replace("\n", ""),
-                        "user_id": feeds[i].get("openid", ""),
-                        "avatar_url": feeds[i].get("bizIcon", ""),
-                        "cover_url": feeds[i].get("thumbUrl", ""),
-                        "video_url": video_url,
-                        "session": session,
-                    }
-                    for k, v in video_dict.items():
-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
-                    Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
+                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
+                        video_dict = {
+                            "video_title": video_title,
+                            "video_id": feeds[i].get("videoId", ""),
+                            "play_cnt": feeds[i].get("playCount", 0),
+                            "like_cnt": feeds[i].get("liked_cnt", 0),
+                            "comment_cnt": feeds[i].get("comment_cnt", 0),
+                            "share_cnt": feeds[i].get("shared_cnt", 0),
+                            "duration": feeds[i].get("mediaDuration", 0),
+                            "video_width": feeds[i].get("short_video_info", {}).get("width", 0),
+                            "video_height": feeds[i].get("short_video_info", {}).get("height", 0),
+                            "publish_time_stamp": publish_time_stamp,
+                            "publish_time_str": publish_time_str,
+                            "user_name": feeds[i].get("source", "").strip().replace("\n", ""),
+                            "user_id": feeds[i].get("openid", ""),
+                            "avatar_url": feeds[i].get("bizIcon", ""),
+                            "cover_url": feeds[i].get("thumbUrl", ""),
+                            "video_url": video_url,
+                            "session": session,
+                        }
+                        for k, v in video_dict.items():
+                            Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
 
-                    if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
-                        Common.logger(log_type, crawler).info("无效视频\n")
-                        Common.logging(log_type, crawler, env, "无效视频\n")
-                    elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
-                        Common.logger(log_type, crawler).info("不满足抓取规则\n")
-                        Common.logging(log_type, crawler, env, "不满足抓取规则\n")
-                    elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
-                        Common.logger(log_type, crawler).info('视频已下载\n')
-                        Common.logging(log_type, crawler, env, '视频已下载\n')
-                    else:
-                        # cls.download_publish(log_type=log_type,
-                        #                      crawler=crawler,
-                        #                      our_uid=our_uid,
-                        #                      video_dict=video_dict,
-                        #                      rule_dict=rule_dict,
-                        #                      env=env)
-                        video_dict["out_user_id"] = video_dict["user_id"]
-                        video_dict["platform"] = crawler
-                        video_dict["strategy"] = log_type
-                        video_dict["out_video_id"] = video_dict["video_id"]
-                        video_dict["width"] = video_dict["video_width"]
-                        video_dict["height"] = video_dict["video_height"]
-                        video_dict["crawler_rule"] = json.dumps(rule_dict)
-                        video_dict["user_id"] = our_uid
-                        video_dict["publish_time"] = video_dict["publish_time_str"]
+                        if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
+                            Common.logger(log_type, crawler).info("无效视频\n")
+                            Common.logging(log_type, crawler, env, "无效视频\n")
+                        elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
+                            Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
+                        elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
+                            Common.logger(log_type, crawler).info('视频已下载\n')
+                            Common.logging(log_type, crawler, env, '视频已下载\n')
+                        else:
+                            # cls.download_publish(log_type=log_type,
+                            #                      crawler=crawler,
+                            #                      our_uid=our_uid,
+                            #                      video_dict=video_dict,
+                            #                      rule_dict=rule_dict,
+                            #                      env=env)
+                            video_dict["out_user_id"] = video_dict["user_id"]
+                            video_dict["platform"] = crawler
+                            video_dict["strategy"] = log_type
+                            video_dict["out_video_id"] = video_dict["video_id"]
+                            video_dict["width"] = video_dict["video_width"]
+                            video_dict["height"] = video_dict["video_height"]
+                            video_dict["crawler_rule"] = json.dumps(rule_dict)
+                            video_dict["user_id"] = our_uid
+                            video_dict["publish_time"] = video_dict["publish_time_str"]
 
-                        mq.send_msg(video_dict)
-                except Exception as e:
-                    Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
-                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"抓取列表页时异常:{e}\n")
-            Common.logging(log_type, crawler, env, f"抓取列表页时异常:{e}\n")
+                            mq.send_msg(video_dict)
+                    except Exception as e:
+                        Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
+            except Exception as e:
+                Common.logger(log_type, crawler).error(f"抓取第{page}页时异常:{e}\n")
+                Common.logging(log_type, crawler, env, f"抓取第{page}页时异常:{e}\n")
 
     @classmethod
     def download_publish(cls, log_type, crawler, our_uid, video_dict, rule_dict, env):