2 年前 · 8a2d46cbd6
--- a/dev/dev_script/get_cpu_mem.py
+++ b/dev/dev_script/get_cpu_mem.py
@@ -0,0 +1,69 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/7/19
			
 
				+import psutil
			
 
				+
			
 
				+
			
 
				+class GetCpuMen:
			
 
				+    @classmethod
			
 
				+    def get_pid(cls, script):
			
 
				+        # 遍历所有正在运行的进程
			
 
				+        for proc in psutil.process_iter():
			
 
				+            try:
			
 
				+                # 获取进程的命令行参数
			
 
				+                cmds = proc.cmdline()
			
 
				+                # 检查命令行参数是否包含爬虫脚本的名称或关键字
			
 
				+                for cmd in cmds:
			
 
				+                    if script in cmd:
			
 
				+                        print(f"cmd:{cmd}")
			
 
				+                        # 获取进程的PID
			
 
				+                        pid = proc.pid
			
 
				+                        return pid
			
 
				+            except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
			
 
				+                pass
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_cpu_mem(cls, script):
			
 
				+        import psutil
			
 
				+
			
 
				+        # 获取当前进程的PID
			
 
				+        pid = cls.get_pid(script)
			
 
				+        print(f"pid:{pid}")
			
 
				+        # 获取CPU的使用情况
			
 
				+        cpu_percent = round(psutil.Process(pid).cpu_percent(), 2)
			
 
				+
			
 
				+        # 获取内存的使用情况
			
 
				+        memory_percent = round(psutil.Process(pid).memory_percent(), 2)
			
 
				+
			
 
				+        print(f"CPU使用率：{cpu_percent}")
			
 
				+        print(f"内存使用率：{memory_percent}")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_all_cpu_mem(cls):
			
 
				+        script_list = [
			
 
				+            "run_xg_search",
			
 
				+            "run_xg_author",
			
 
				+            "run_xng_author",
			
 
				+            "run_xng_play",
			
 
				+            "run_xng_hour",
			
 
				+            "run_dy_author",
			
 
				+            "run_dy_recommend",
			
 
				+            "run_ks_recommend",
			
 
				+            "run_ks_author",
			
 
				+            "run_bszf_recommend",
			
 
				+            "run_ssnnyfq_recommend",
			
 
				+            "run_gzh_author",
			
 
				+            "run_weixinzhishu_score",
			
 
				+            "get_cpu_mem",
			
 
				+        ]
			
 
				+
			
 
				+        for scrip in script_list:
			
 
				+            print(f"scrip:{scrip}")
			
 
				+            cls.get_cpu_mem(scrip)
			
 
				+            print("\n")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # GetCpuMen.get_cpu_mem("get_cpu_mem")
			
 
				+    GetCpuMen.get_all_cpu_mem()
			
 
				+    pass
			
--- a/dev/dev_script/get_intervals.py
+++ b/dev/dev_script/get_intervals.py
@@ -0,0 +1,71 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/7/18
			
 
				+import datetime
			
 
				+
			
 
				+# 读取日志文件并将每行日志存储到一个列表中
			
 
				+# with open('../logs/benshanzhufu-recommend-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/douyin-author-2023-07-17.log', 'r') as file:
			
 
				+with open('../logs/douyin-recommend-2023-07-19.log', 'r') as file:
			
 
				+# with open('../logs/kuaishou-author-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/kuaishou-recommend-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/xigua-author-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/xigua-search-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/xiaoniangao-author-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/xiaoniangao-play-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/xiaoniangao-hour-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/suisuiniannianyingfuqi-recommend-2023-07-17.log', 'r') as file:
			
 
				+# with open('../logs/gongzhonghao-author1-2023-07-18.log', 'r') as file:
			
 
				+# with open('../logs/gongzhonghao-author2-2023-07-18.log', 'r') as file:
			
 
				+# with open('../logs/gongzhonghao-author3-2023-07-18.log', 'r') as file:
			
 
				+# with open('../logs/gongzhonghao-author4-2023-07-18.log', 'r') as file:
			
 
				+# with open('../logs/gongzhonghao-author5-2023-07-18.log', 'r') as file:
			
 
				+# with open('../logs/gongzhonghao-author6-2023-07-18.log', 'r') as file:
			
 
				+    log_lines = file.readlines()
			
 
				+
			
 
				+# 存储间隔时间的列表
			
 
				+intervals = []
			
 
				+
			
 
				+# 遍历日志列表，计算相邻两条日志的时间间隔
			
 
				+for i in range(1, len(log_lines)):
			
 
				+    if "2023-" not in log_lines[i-1] or "2023-" not in log_lines[i]:
			
 
				+        continue
			
 
				+    # 解析时间戳
			
 
				+    timestamp1 = datetime.datetime.strptime(log_lines[i - 1].split(".")[0], '%Y-%m-%d %H:%M:%S')
			
 
				+    timestamp2 = datetime.datetime.strptime(log_lines[i].split(".")[0], '%Y-%m-%d %H:%M:%S')
			
 
				+
			
 
				+    # 计算时间间隔
			
 
				+    interval = timestamp2 - timestamp1
			
 
				+
			
 
				+    # 将时间间隔添加到间隔时间列表中
			
 
				+    intervals.append(interval)
			
 
				+
			
 
				+# 对间隔时间列表进行倒序排序
			
 
				+intervals.sort(reverse=True)
			
 
				+
			
 
				+# 取前10条间隔时间
			
 
				+top_10_intervals = intervals[:10]
			
 
				+# 取前10条间隔时间的秒数
			
 
				+top_10_intervals_seconds = [int(interval.total_seconds()) for interval in top_10_intervals]
			
 
				+
			
 
				+# 打印结果
			
 
				+print(top_10_intervals_seconds)
			
 
				+
			
 
				+# benshanzhufu  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
			
 
				+# douyin_author [30, 28, 25, 20, 18, 18, 17, 17, 17, 16]
			
 
				+# douyin_recommend  [62, 50, 38, 34, 33, 31, 31, 31, 31, 31]
			
 
				+# kuaishou_author  [31, 27, 23, 21, 21, 21, 19, 19, 17, 15]
			
 
				+# kuaishou_recommend  [27, 23, 23, 23, 23, 22, 22, 22, 21, 21]
			
 
				+# xigua_author  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
			
 
				+# xigua_search  [18, 14, 14, 13, 13, 12, 11, 11, 11, 10]
			
 
				+# xiaoniangao_author  [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
			
 
				+# xiaoniangao_play  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
			
 
				+# xiaoniangao_hour  [61, 3, 2, 2, 2, 2, 2, 2, 1, 1]
			
 
				+# suisuiniannian  [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
			
 
				+# gzh1  [26, 26, 26, 25, 25, 25, 25, 24, 24, 24]
			
 
				+# gzh2  [28, 26, 25, 25, 24, 24, 24, 24, 24, 24]
			
 
				+# gzh3  [27, 26, 26, 25, 25, 25, 24, 24, 24, 24]
			
 
				+# gzh4  [26, 26, 25, 24, 24, 24, 24, 24, 24, 24]
			
 
				+# gzh5  [29, 26, 25, 25, 24, 24, 24, 24, 24, 24]
			
 
				+# gzh6  [26, 25, 25, 25, 25, 24, 24, 24, 24, 24]
			
 
				+
			
--- a/dev/dev_script/xg_recommend.py
+++ b/dev/dev_script/xg_recommend.py
@@ -1,312 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2023/7/6
			
 
				-import os
			
 
				-import sys
			
 
				-import time
			
 
				-import cv2
			
 
				-from selenium.webdriver.common.keys import Keys
			
 
				-from selenium.webdriver import DesiredCapabilities
			
 
				-from selenium import webdriver
			
 
				-from selenium.webdriver.chrome.service import Service
			
 
				-from selenium.webdriver.common.by import By
			
 
				-sys.path.append(os.getcwd())
			
 
				-from common.common import Common
			
 
				-
			
 
				-
			
 
				-class XGRecommend(object):
			
 
				-
			
 
				-    def __init__(self, log_type, crawler, env):
			
 
				-        """
			
 
				-        本地启动 Chrome，指定端口号:12306
			
 
				-        open -a "Google Chrome" --args --remote-debugging-port=12306
			
 
				-        """
			
 
				-        Common.logger(log_type, crawler).info("启动 Chrome 浏览器")
			
 
				-        cmd = 'open -a "Google Chrome" --args --remote-debugging-port=12306'
			
 
				-        os.system(cmd)
			
 
				-
			
 
				-        if env == "dev":
			
 
				-            chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v114/chromedriver"
			
 
				-        else:
			
 
				-            chromedriver = "/usr/bin/chromedriver"
			
 
				-
			
 
				-        # 打印请求配置
			
 
				-        ca = DesiredCapabilities.CHROME
			
 
				-        ca["goog:loggingPrefs"] = {"performance": "ALL"}
			
 
				-        # 初始化浏览器
			
 
				-        self.browser = webdriver.ChromeOptions()
			
 
				-        self.browser.add_experimental_option("debuggerAddress", "127.0.0.1:12306")
			
 
				-        # # 设置user-agent
			
 
				-        # self.browser.add_argument(
			
 
				-        #     f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
			
 
				-        # # 去掉提示：Chrome正收到自动测试软件的控制
			
 
				-        # self.browser.add_argument('--disable-infobars')
			
 
				-
			
 
				-        # # 禁用GPU加速
			
 
				-        # self.browser.add_argument('--disable-gpu')
			
 
				-        # # 关闭开发者模式
			
 
				-        # self.browser.add_experimental_option("useAutomationExtension", False)
			
 
				-        # # 以键值对的形式加入参数
			
 
				-        # self.browser.add_experimental_option('excludeSwitches', ['enable-automation'])
			
 
				-        # # 禁用启用Blink运行时的功能
			
 
				-        # self.browser.add_argument('--disable-blink-features=AutomationControlled')
			
 
				-        # 不打开浏览器运行
			
 
				-        # self.browser.add_argument("--headless")
			
 
				-        # # linux 环境中，静默模式必须要加的参数
			
 
				-        # self.browser.add_argument("--no-sandbox")
			
 
				-        # # 设置浏览器size
			
 
				-        # self.browser.add_argument("--window-size=1920,1080")
			
 
				-
			
 
				-        # driver初始化
			
 
				-        self.driver = webdriver.Chrome(desired_capabilities=ca, options=self.browser, service=Service(chromedriver))
			
 
				-        self.driver.implicitly_wait(10)
			
 
				-        Common.logger(log_type, crawler).info("打开西瓜推荐页")
			
 
				-        self.driver.get(f"https://www.ixigua.com/")
			
 
				-        # 在当前页面打开新的标签页
			
 
				-        self.driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.COMMAND + 't')
			
 
				-        # 切换到新打开的标签页
			
 
				-        self.driver.switch_to.window(self.driver.window_handles[-1])
			
 
				-        self.username = "19831265541"
			
 
				-        self.password = "Test111111"
			
 
				-        time.sleep(2)
			
 
				-
			
 
				-    def quit(self, log_type, crawler):
			
 
				-        Common.logger(log_type, crawler).info("退出浏览器")
			
 
				-        self.driver.quit()
			
 
				-
			
 
				-    #  传入滑块背景图片本地路径和滑块本地路径，返回滑块到缺口的距离
			
 
				-    @staticmethod
			
 
				-    def findPic(log_type, crawler, img_bg_path, img_slider_path):
			
 
				-        """
			
 
				-        找出图像中最佳匹配位置
			
 
				-        :param log_type: log
			
 
				-        :param crawler: 爬虫
			
 
				-        :param img_bg_path: 滑块背景图本地路径
			
 
				-        :param img_slider_path: 滑块图片本地路径
			
 
				-        :return: 返回最差匹配、最佳匹配对应的x坐标
			
 
				-        """
			
 
				-
			
 
				-        # 读取滑块背景图片，参数是图片路径，Opencv2默认使用BGR模式
			
 
				-        # cv2.imread()是 image read的简写
			
 
				-        # img_bg 是一个numpy库ndarray数组对象
			
 
				-        img_bg = cv2.imread(img_bg_path)
			
 
				-
			
 
				-        # 对滑块背景图片进行处理，由BGR模式转为gray模式（即灰度模式，也就是黑白图片）
			
 
				-        # 为什么要处理？ BGR模式（彩色图片）的数据比黑白图片的数据大，处理后可以加快算法的计算
			
 
				-        # BGR模式：常见的是RGB模式
			
 
				-        # R代表红，red; G代表绿，green;  B代表蓝，blue。
			
 
				-        # RGB模式就是，色彩数据模式，R在高位，G在中间，B在低位。BGR正好相反。
			
 
				-        # 如红色：RGB模式是(255,0,0)，BGR模式是(0,0,255)
			
 
				-        img_bg_gray = cv2.cvtColor(img_bg, cv2.COLOR_BGR2GRAY)
			
 
				-
			
 
				-        # 读取滑块，参数1是图片路径，参数2是使用灰度模式
			
 
				-        img_slider_gray = cv2.imread(img_slider_path, 0)
			
 
				-
			
 
				-        # 在滑块背景图中匹配滑块。参数cv2.TM_CCOEFF_NORMED是opencv2中的一种算法
			
 
				-        res = cv2.matchTemplate(img_bg_gray, img_slider_gray, cv2.TM_CCOEFF_NORMED)
			
 
				-
			
 
				-        Common.logger(log_type, crawler).info(f"{'#' * 50}")
			
 
				-        Common.logger(log_type, crawler).info(type(res))  # 打印：<class 'numpy.ndarray'>
			
 
				-        Common.logger(log_type, crawler).info(res)
			
 
				-        # 打印：一个二维的ndarray数组
			
 
				-        # [[0.05604218  0.05557462  0.06844381... - 0.1784117 - 0.1811338 - 0.18415523]
			
 
				-        #  [0.06151756  0.04408009  0.07010461... - 0.18493137 - 0.18440475 - 0.1843424]
			
 
				-        # [0.0643926    0.06221284  0.0719175... - 0.18742703 - 0.18535161 - 0.1823346]
			
 
				-        # ...
			
 
				-        # [-0.07755355 - 0.08177952 - 0.08642308... - 0.16476074 - 0.16210903 - 0.15467581]
			
 
				-        # [-0.06975575 - 0.07566144 - 0.07783117... - 0.1412715 - 0.15145643 - 0.14800543]
			
 
				-        # [-0.08476129 - 0.08415948 - 0.0949327... - 0.1371379 - 0.14271489 - 0.14166716]]
			
 
				-
			
 
				-        Common.logger(log_type, crawler).info(f"{'#' * 50}")
			
 
				-
			
 
				-        # cv22.minMaxLoc() 从ndarray数组中找到最小值、最大值及他们的坐标
			
 
				-        value = cv2.minMaxLoc(res)
			
 
				-        # 得到的value，如：(-0.1653602570295334, 0.6102921366691589, (144, 1), (141, 56))
			
 
				-
			
 
				-        Common.logger(log_type, crawler).info(f"{value, '#' * 30}")
			
 
				-
			
 
				-        # 获取x坐标，如上面的144、141
			
 
				-        return value[2:][0][0], value[2:][1][0]
			
 
				-
			
 
				-    # 返回两个数组：一个用于加速拖动滑块，一个用于减速拖动滑块
			
 
				-    @staticmethod
			
 
				-    def generate_tracks(distance):
			
 
				-        # 给距离加上20，这20像素用在滑块滑过缺口后，减速折返回到缺口
			
 
				-        distance += 20
			
 
				-        v = 0
			
 
				-        t = 0.2
			
 
				-        forward_tracks = []
			
 
				-        current = 0
			
 
				-        mid = distance * 3 / 5  # 减速阀值
			
 
				-        while current < distance:
			
 
				-            if current < mid:
			
 
				-                a = 2  # 加速度为+2
			
 
				-            else:
			
 
				-                a = -3  # 加速度-3
			
 
				-            s = v * t + 0.5 * a * (t ** 2)
			
 
				-            v = v + a * t
			
 
				-            current += s
			
 
				-            forward_tracks.append(round(s))
			
 
				-
			
 
				-        back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1]
			
 
				-        return forward_tracks, back_tracks
			
 
				-
			
 
				-    # 获取距离值
			
 
				-    @staticmethod
			
 
				-    def get_tracks(distance):
			
 
				-        """
			
 
				-        模拟人的滑动行为，先匀加速后匀减速
			
 
				-        匀变速基本公式
			
 
				-        v=v0+at
			
 
				-        s=vot+1/2at2
			
 
				-        """
			
 
				-        # 初速度
			
 
				-        v = 0
			
 
				-        # 设置时间
			
 
				-        t = 0.3
			
 
				-        # 存储每段距离值
			
 
				-        tracks = []
			
 
				-        # 当前距离
			
 
				-        current = 0
			
 
				-        # 中间位置为4/5距离处
			
 
				-        mid = distance * 4 / 5
			
 
				-        while current < distance:
			
 
				-            if current < mid:
			
 
				-                # 加速阶段
			
 
				-                a = 2
			
 
				-            else:
			
 
				-                # 减速阶段
			
 
				-                a = -3
			
 
				-            # 当前速度
			
 
				-            v0 = v
			
 
				-            # 当前位移
			
 
				-            s = v0 * t + 0.5 * a * t ** 2
			
 
				-            # 更新当前速度
			
 
				-            v = v0 + a * t
			
 
				-            # 更新当前位移
			
 
				-            current += s
			
 
				-            # 添加到轨迹列表
			
 
				-            tracks.append(round(s))
			
 
				-        return tracks
			
 
				-
			
 
				-    @staticmethod
			
 
				-    def FindPic(log_type, crawler, target, template):
			
 
				-        """
			
 
				-        找出图像中最佳匹配位置
			
 
				-        :param log_type: 日志
			
 
				-        :param crawler: 爬虫
			
 
				-        :param target: 目标即背景图
			
 
				-        :param template: 模板即需要找到的图
			
 
				-        :return: 返回最佳匹配及其最差匹配和对应的坐标
			
 
				-        """
			
 
				-        target_rgb = cv2.imread(target)
			
 
				-        target_gray = cv2.cvtColor(target_rgb, cv2.COLOR_BGR2GRAY)
			
 
				-        template_rgb = cv2.imread(template, 0)
			
 
				-        res = cv2.matchTemplate(target_gray, template_rgb, cv2.TM_CCOEFF_NORMED)
			
 
				-        value = cv2.minMaxLoc(res)
			
 
				-        Common.logger(log_type, crawler).info(value)
			
 
				-        # 计算缺口的 X 轴距离
			
 
				-        x_val = int(value[3][0])
			
 
				-        Common.logger(log_type, crawler).info(f"缺口的 X 轴距离:{x_val}")
			
 
				-        # 获取模板图的宽高
			
 
				-        template_height, template_width, template_c = cv2.imread(template).shape
			
 
				-        Common.logger(log_type, crawler).info(f"模板高:{template_height}")
			
 
				-        Common.logger(log_type, crawler).info(f"模板宽:{template_width}")
			
 
				-        Common.logger(log_type, crawler).info(f"图片的通道数:{template_c}")
			
 
				-        # 计算需要滑动的距离
			
 
				-        move_val = x_val - template_width
			
 
				-        Common.logger(log_type, crawler).info(f"需要滑动的距离:{move_val}")
			
 
				-        return x_val
			
 
				-
			
 
				-    def login(self, log_type, crawler, env):
			
 
				-        # Common.logger(log_type, crawler).info("点击登录")
			
 
				-        # self.driver.find_element(By.XPATH, '//*[@class="xg-button xg-button-primary xg-button-middle loginButton"]').click()
			
 
				-        # time.sleep(random.randint(1, 2))
			
 
				-        # Common.logger(log_type, crawler).info("点击密码登录")
			
 
				-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-link-list__item__text"]').click()
			
 
				-        # time.sleep(random.randint(1, 2))
			
 
				-        # Common.logger(log_type, crawler).info("输入手机号")
			
 
				-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-normal-input__input"]').send_keys(self.username)
			
 
				-        # time.sleep(random.randint(1, 2))
			
 
				-        # Common.logger(log_type, crawler).info("输入密码")
			
 
				-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-button-input__input"]').send_keys(self.password)
			
 
				-        # time.sleep(random.randint(1, 2))
			
 
				-        # Common.logger(log_type, crawler).info("点击登录")
			
 
				-        # self.driver.find_element(By.XPATH, '//*[@class="web-login-account-password__button-wrapper"]/*[1]').click()
			
 
				-        # time.sleep(random.randint(1, 2))
			
 
				-
			
 
				-        # # 获取滑块
			
 
				-        # Common.logger(log_type, crawler).info("获取滑块")
			
 
				-        # move_btns = self.driver.find_elements(By.XPATH, '//*[@class="sc-kkGfuU bujTgx"]')
			
 
				-        # if len(move_btns) == 0:
			
 
				-        #     Common.logger(log_type, crawler).info("未发现滑块，3-5 秒后重试")
			
 
				-        #     self.quit(log_type, crawler)
			
 
				-        #     time.sleep(random.randint(3, 5))
			
 
				-        #     self.__init__(log_type, crawler, env)
			
 
				-        #     self.login(log_type, crawler, env)
			
 
				-        # move_btn = move_btns[0]
			
 
				-        #
			
 
				-        # while True:
			
 
				-        #
			
 
				-        #     # 使用requests下载滑块
			
 
				-        #     slide_url = self.driver.find_element(By.XPATH,
			
 
				-        #                                          '//*[@class="captcha_verify_img_slide react-draggable sc-VigVT ggNWOG"]').get_attribute(
			
 
				-        #         "src")
			
 
				-        #     slide_dir = f"./{crawler}/photo/img_slide.png"
			
 
				-        #     urllib3.disable_warnings()
			
 
				-        #     slide_url_response = requests.get(slide_url, verify=False)
			
 
				-        #     with open(slide_dir, "wb") as file:
			
 
				-        #         file.write(slide_url_response.content)
			
 
				-        #
			
 
				-        #     # 使用urllib下载背景图
			
 
				-        #     bg_image_url = self.driver.find_element(By.XPATH, '//*[@id="captcha-verify-image"]').get_attribute("src")
			
 
				-        #     bg_image_dir = f"./{crawler}/photo/img_bg.png"
			
 
				-        #     urllib3.disable_warnings()
			
 
				-        #     bg_image_url_response = requests.get(bg_image_url, verify=False)
			
 
				-        #     with open(bg_image_dir, "wb") as file:
			
 
				-        #         file.write(bg_image_url_response.content)
			
 
				-        #
			
 
				-        #     offset = self.FindPic(log_type, crawler, bg_image_dir, slide_dir)
			
 
				-        #     Common.logger(log_type, crawler).info(f"offset:{offset}")
			
 
				-        #
			
 
				-        #     # 在滑块上暂停
			
 
				-        #     Common.logger(log_type, crawler).info("在滑块上暂停")
			
 
				-        #     ActionChains(self.driver).click_and_hold(on_element=move_btn).perform()
			
 
				-        #     # 拖动滑块
			
 
				-        #     Common.logger(log_type, crawler).info("拖动滑块0.7*距离")
			
 
				-        #     ActionChains(self.driver).move_to_element_with_offset(to_element=move_btn, xoffset=int(0.5*offset), yoffset=0).perform()
			
 
				-        #     # 拖动剩余像素
			
 
				-        #     Common.logger(log_type, crawler).info("拖动剩余像素")
			
 
				-        #     tracks = self.get_tracks(int(0.15*offset))
			
 
				-        #     # 遍历梅一段距离
			
 
				-        #     for track in tracks:
			
 
				-        #         # 滑块移动响应距离
			
 
				-        #         ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform()
			
 
				-        #     # 休息1s
			
 
				-        #     Common.logger(log_type, crawler).info("休息1s")
			
 
				-        #     time.sleep(1)
			
 
				-        #     # 释放滑块
			
 
				-        #     Common.logger(log_type, crawler).info("释放滑块")
			
 
				-        #     ActionChains(self.driver).release().perform()
			
 
				-        #
			
 
				-        #     if len(move_btns) != 0:
			
 
				-        #         time.sleep(1)
			
 
				-        #         continue
			
 
				-        #     break
			
 
				-        Common.logger(log_type, crawler).info("刷新页面")
			
 
				-        self.driver.refresh()
			
 
				-
			
 
				-        Common.logger(log_type, crawler).info("关闭当前标签页")
			
 
				-        time.sleep(5)
			
 
				-        # 关闭当前标签页
			
 
				-        self.driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.COMMAND + 'w')
			
 
				-        Common.logger(log_type, crawler).info("已关闭")
			
 
				-        Common.logger(log_type, crawler).info("退出浏览器")
			
 
				-        self.quit(log_type, crawler)
			
 
				-
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Recommend = XGRecommend("search", "dev", "dev")
			
 
				-    Recommend.login("search", "dev", "dev")
			
 
				-    pass
			
--- a/kanyikan/kanyikan_recommend/kanyikan_recommend0627.py
+++ b/kanyikan/kanyikan_recommend/kanyikan_recommend0627.py
@@ -33,135 +33,136 @@ class KanyikanRecommend:
 
				     @classmethod
			
 
				     def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
			
 
				         mq = MQ(topic_name="topic_crawler_etl_" + env)
			
 
				-        try:
			
 
				-            Common.logger(log_type, crawler).info(f"正在抓取列表页")
			
 
				-            Common.logging(log_type, crawler, env, f"正在抓取列表页")
			
 
				-            session = Common.get_session(log_type, crawler, env)
			
 
				-            if session is None:
			
 
				-                time.sleep(1)
			
 
				-                cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
			
 
				-            url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
			
 
				-            header = {
			
 
				-                "Connection": "keep-alive",
			
 
				-                "content-type": "application/json",
			
 
				-                "Accept-Encoding": "gzip,compress,br,deflate",
			
 
				-                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
			
 
				-                              "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
			
 
				-                              "NetType/WIFI Language/zh_CN",
			
 
				-                "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
			
 
				-            }
			
 
				-            params = {
			
 
				-                'session': session,
			
 
				-                "offset": 0,
			
 
				-                "wxaVersion": "3.9.2",
			
 
				-                "count": "10",
			
 
				-                "channelid": "208",
			
 
				-                "scene": '310',
			
 
				-                "subscene": '1089',
			
 
				-                "clientVersion": '8.0.18',
			
 
				-                "sharesearchid": '0',
			
 
				-                "nettype": 'wifi',
			
 
				-                "switchprofile": "0",
			
 
				-                "switchnewuser": "0",
			
 
				-            }
			
 
				-            urllib3.disable_warnings()
			
 
				-            response = requests.get(url=url, headers=header, params=params, proxies=proxies, verify=False)
			
 
				-            if "data" not in response.text:
			
 
				-                Common.logger(log_type, crawler).info("获取视频list时，session过期，随机睡眠 31-50 秒")
			
 
				-                Common.logging(log_type, crawler, env, "获取视频list时，session过期，随机睡眠 31-50 秒")
			
 
				-                # 如果返回空信息，则随机睡眠 31-40 秒
			
 
				-                time.sleep(random.randint(31, 40))
			
 
				-                cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
			
 
				-            elif "items" not in response.json()["data"]:
			
 
				-                Common.logger(log_type, crawler).info(f"get_feeds:{response.json()}，随机睡眠 1-3 分钟")
			
 
				-                Common.logging(log_type, crawler, env, f"get_feeds:{response.json()}，随机睡眠 1-3 分钟")
			
 
				-                # 如果返回空信息，则随机睡眠 1-3 分钟
			
 
				-                time.sleep(random.randint(60, 180))
			
 
				-                cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
			
 
				-            feeds = response.json().get("data", {}).get("items", "")
			
 
				-            if feeds == "":
			
 
				-                Common.logger(log_type, crawler).info(f"feeds:{feeds}")
			
 
				-                Common.logging(log_type, crawler, env, f"feeds:{feeds}")
			
 
				-                return
			
 
				-            for i in range(len(feeds)):
			
 
				-                try:
			
 
				-                    video_title = feeds[i].get("title", "").strip().replace("\n", "") \
			
 
				-                        .replace("/", "").replace("\\", "").replace("\r", "") \
			
 
				-                        .replace(":", "").replace("*", "").replace("？", "") \
			
 
				-                        .replace("?", "").replace('"', "").replace("<", "") \
			
 
				-                        .replace(">", "").replace("|", "").replace(" ", "") \
			
 
				-                        .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
			
 
				-                        .replace("'", "").replace("#", "").replace("Merge", "")
			
 
				-                    publish_time_stamp = feeds[i].get("date", 0)
			
 
				-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
			
 
				-                    # 获取播放地址
			
 
				-                    if "videoInfo" not in feeds[i]:
			
 
				-                        video_url = ""
			
 
				-                    elif "mpInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				-                        if len(feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
			
 
				-                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
			
 
				+        for page in range(1, 3):
			
 
				+            try:
			
 
				+                Common.logger(log_type, crawler).info(f"正在抓取第{page}页")
			
 
				+                Common.logging(log_type, crawler, env, f"正在抓取第{page}页")
			
 
				+                session = Common.get_session(log_type, crawler, env)
			
 
				+                if session is None:
			
 
				+                    time.sleep(1)
			
 
				+                    cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
			
 
				+                url = 'https://search.weixin.qq.com/cgi-bin/recwxa/recwxavideolist?'
			
 
				+                header = {
			
 
				+                    "Connection": "keep-alive",
			
 
				+                    "content-type": "application/json",
			
 
				+                    "Accept-Encoding": "gzip,compress,br,deflate",
			
 
				+                    "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) "
			
 
				+                                  "AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.18(0x18001236) "
			
 
				+                                  "NetType/WIFI Language/zh_CN",
			
 
				+                    "Referer": "https://servicewechat.com/wxbb9a805eb4f9533c/234/page-frame.html",
			
 
				+                }
			
 
				+                params = {
			
 
				+                    'session': session,
			
 
				+                    "offset": 0,
			
 
				+                    "wxaVersion": "3.9.2",
			
 
				+                    "count": "10",
			
 
				+                    "channelid": "208",
			
 
				+                    "scene": '310',
			
 
				+                    "subscene": '1089',
			
 
				+                    "clientVersion": '8.0.18',
			
 
				+                    "sharesearchid": '0',
			
 
				+                    "nettype": 'wifi',
			
 
				+                    "switchprofile": "0",
			
 
				+                    "switchnewuser": "0",
			
 
				+                }
			
 
				+                urllib3.disable_warnings()
			
 
				+                response = requests.get(url=url, headers=header, params=params, proxies=proxies, verify=False)
			
 
				+                if "data" not in response.text:
			
 
				+                    Common.logger(log_type, crawler).info("获取视频list时，session过期，随机睡眠 31-50 秒")
			
 
				+                    Common.logging(log_type, crawler, env, "获取视频list时，session过期，随机睡眠 31-50 秒")
			
 
				+                    # 如果返回空信息，则随机睡眠 31-40 秒
			
 
				+                    time.sleep(random.randint(31, 40))
			
 
				+                    cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
			
 
				+                elif "items" not in response.json()["data"]:
			
 
				+                    Common.logger(log_type, crawler).info(f"get_feeds:{response.json()}，随机睡眠 1-3 分钟")
			
 
				+                    Common.logging(log_type, crawler, env, f"get_feeds:{response.json()}，随机睡眠 1-3 分钟")
			
 
				+                    # 如果返回空信息，则随机睡眠 1-3 分钟
			
 
				+                    time.sleep(random.randint(60, 180))
			
 
				+                    cls.get_videoList(log_type, crawler, our_uid, rule_dict, env)
			
 
				+                feeds = response.json().get("data", {}).get("items", "")
			
 
				+                if feeds == "":
			
 
				+                    Common.logger(log_type, crawler).info(f"feeds:{feeds}")
			
 
				+                    Common.logging(log_type, crawler, env, f"feeds:{feeds}")
			
 
				+                    return
			
 
				+                for i in range(len(feeds)):
			
 
				+                    try:
			
 
				+                        video_title = feeds[i].get("title", "").strip().replace("\n", "") \
			
 
				+                            .replace("/", "").replace("\\", "").replace("\r", "") \
			
 
				+                            .replace(":", "").replace("*", "").replace("？", "") \
			
 
				+                            .replace("?", "").replace('"', "").replace("<", "") \
			
 
				+                            .replace(">", "").replace("|", "").replace(" ", "") \
			
 
				+                            .replace("&NBSP", "").replace(".", "。").replace(" ", "") \
			
 
				+                            .replace("'", "").replace("#", "").replace("Merge", "")
			
 
				+                        publish_time_stamp = feeds[i].get("date", 0)
			
 
				+                        publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
			
 
				+                        # 获取播放地址
			
 
				+                        if "videoInfo" not in feeds[i]:
			
 
				+                            video_url = ""
			
 
				+                        elif "mpInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				+                            if len(feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"]) > 2:
			
 
				+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][2]["url"]
			
 
				+                            else:
			
 
				+                                video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
			
 
				+                        elif "ctnInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				+                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
			
 
				                         else:
			
 
				-                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["mpInfo"]["urlInfo"][0]["url"]
			
 
				-                    elif "ctnInfo" in feeds[i]["videoInfo"]["videoCdnInfo"]:
			
 
				-                        video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["ctnInfo"]["urlInfo"][0]["url"]
			
 
				-                    else:
			
 
				-                        video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
			
 
				-                    video_dict = {
			
 
				-                        "video_title": video_title,
			
 
				-                        "video_id": feeds[i].get("videoId", ""),
			
 
				-                        "play_cnt": feeds[i].get("playCount", 0),
			
 
				-                        "like_cnt": feeds[i].get("liked_cnt", 0),
			
 
				-                        "comment_cnt": feeds[i].get("comment_cnt", 0),
			
 
				-                        "share_cnt": feeds[i].get("shared_cnt", 0),
			
 
				-                        "duration": feeds[i].get("mediaDuration", 0),
			
 
				-                        "video_width": feeds[i].get("short_video_info", {}).get("width", 0),
			
 
				-                        "video_height": feeds[i].get("short_video_info", {}).get("height", 0),
			
 
				-                        "publish_time_stamp": publish_time_stamp,
			
 
				-                        "publish_time_str": publish_time_str,
			
 
				-                        "user_name": feeds[i].get("source", "").strip().replace("\n", ""),
			
 
				-                        "user_id": feeds[i].get("openid", ""),
			
 
				-                        "avatar_url": feeds[i].get("bizIcon", ""),
			
 
				-                        "cover_url": feeds[i].get("thumbUrl", ""),
			
 
				-                        "video_url": video_url,
			
 
				-                        "session": session,
			
 
				-                    }
			
 
				-                    for k, v in video_dict.items():
			
 
				-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				-                    Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
			
 
				+                            video_url = feeds[i]["videoInfo"]["videoCdnInfo"]["urlInfo"][0]["url"]
			
 
				+                        video_dict = {
			
 
				+                            "video_title": video_title,
			
 
				+                            "video_id": feeds[i].get("videoId", ""),
			
 
				+                            "play_cnt": feeds[i].get("playCount", 0),
			
 
				+                            "like_cnt": feeds[i].get("liked_cnt", 0),
			
 
				+                            "comment_cnt": feeds[i].get("comment_cnt", 0),
			
 
				+                            "share_cnt": feeds[i].get("shared_cnt", 0),
			
 
				+                            "duration": feeds[i].get("mediaDuration", 0),
			
 
				+                            "video_width": feeds[i].get("short_video_info", {}).get("width", 0),
			
 
				+                            "video_height": feeds[i].get("short_video_info", {}).get("height", 0),
			
 
				+                            "publish_time_stamp": publish_time_stamp,
			
 
				+                            "publish_time_str": publish_time_str,
			
 
				+                            "user_name": feeds[i].get("source", "").strip().replace("\n", ""),
			
 
				+                            "user_id": feeds[i].get("openid", ""),
			
 
				+                            "avatar_url": feeds[i].get("bizIcon", ""),
			
 
				+                            "cover_url": feeds[i].get("thumbUrl", ""),
			
 
				+                            "video_url": video_url,
			
 
				+                            "session": session,
			
 
				+                        }
			
 
				+                        for k, v in video_dict.items():
			
 
				+                            Common.logger(log_type, crawler).info(f"{k}:{v}")
			
 
				+                        Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
			
 
				 
			
 
				-                    if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
			
 
				-                        Common.logger(log_type, crawler).info("无效视频\n")
			
 
				-                        Common.logging(log_type, crawler, env, "无效视频\n")
			
 
				-                    elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
			
 
				-                        Common.logger(log_type, crawler).info("不满足抓取规则\n")
			
 
				-                        Common.logging(log_type, crawler, env, "不满足抓取规则\n")
			
 
				-                    elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
			
 
				-                        Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				-                        Common.logging(log_type, crawler, env, '视频已下载\n')
			
 
				-                    else:
			
 
				-                        # cls.download_publish(log_type=log_type,
			
 
				-                        #                      crawler=crawler,
			
 
				-                        #                      our_uid=our_uid,
			
 
				-                        #                      video_dict=video_dict,
			
 
				-                        #                      rule_dict=rule_dict,
			
 
				-                        #                      env=env)
			
 
				-                        video_dict["out_user_id"] = video_dict["user_id"]
			
 
				-                        video_dict["platform"] = crawler
			
 
				-                        video_dict["strategy"] = log_type
			
 
				-                        video_dict["out_video_id"] = video_dict["video_id"]
			
 
				-                        video_dict["width"] = video_dict["video_width"]
			
 
				-                        video_dict["height"] = video_dict["video_height"]
			
 
				-                        video_dict["crawler_rule"] = json.dumps(rule_dict)
			
 
				-                        video_dict["user_id"] = our_uid
			
 
				-                        video_dict["publish_time"] = video_dict["publish_time_str"]
			
 
				+                        if video_dict["video_id"] == "" or video_dict["video_title"] == "" or video_dict["video_url"] == "":
			
 
				+                            Common.logger(log_type, crawler).info("无效视频\n")
			
 
				+                            Common.logging(log_type, crawler, env, "无效视频\n")
			
 
				+                        elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
			
 
				+                            Common.logger(log_type, crawler).info("不满足抓取规则\n")
			
 
				+                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
			
 
				+                        elif cls.repeat_video(log_type, crawler, video_dict["video_id"], env) != 0:
			
 
				+                            Common.logger(log_type, crawler).info('视频已下载\n')
			
 
				+                            Common.logging(log_type, crawler, env, '视频已下载\n')
			
 
				+                        else:
			
 
				+                            # cls.download_publish(log_type=log_type,
			
 
				+                            #                      crawler=crawler,
			
 
				+                            #                      our_uid=our_uid,
			
 
				+                            #                      video_dict=video_dict,
			
 
				+                            #                      rule_dict=rule_dict,
			
 
				+                            #                      env=env)
			
 
				+                            video_dict["out_user_id"] = video_dict["user_id"]
			
 
				+                            video_dict["platform"] = crawler
			
 
				+                            video_dict["strategy"] = log_type
			
 
				+                            video_dict["out_video_id"] = video_dict["video_id"]
			
 
				+                            video_dict["width"] = video_dict["video_width"]
			
 
				+                            video_dict["height"] = video_dict["video_height"]
			
 
				+                            video_dict["crawler_rule"] = json.dumps(rule_dict)
			
 
				+                            video_dict["user_id"] = our_uid
			
 
				+                            video_dict["publish_time"] = video_dict["publish_time_str"]
			
 
				 
			
 
				-                        mq.send_msg(video_dict)
			
 
				-                except Exception as e:
			
 
				-                    Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
			
 
				-                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
			
 
				-        except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error(f"抓取列表页时异常:{e}\n")
			
 
				-            Common.logging(log_type, crawler, env, f"抓取列表页时异常:{e}\n")
			
 
				+                            mq.send_msg(video_dict)
			
 
				+                    except Exception as e:
			
 
				+                        Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
			
 
				+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
			
 
				+            except Exception as e:
			
 
				+                Common.logger(log_type, crawler).error(f"抓取第{page}页时异常:{e}\n")
			
 
				+                Common.logging(log_type, crawler, env, f"抓取第{page}页时异常:{e}\n")
			
 
				 
			
 
				     @classmethod
			
 
				     def download_publish(cls, log_type, crawler, our_uid, video_dict, rule_dict, env):