123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- # -*- coding: utf-8 -*-
- # @Author: wangkun
- # @Time: 2023/7/6
- import os
- import random
- import sys
- import time
- import cv2
- import numpy as np
- from selenium.webdriver.common.keys import Keys
- import requests
- import urllib3
- from selenium.webdriver import DesiredCapabilities, ActionChains
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.common.by import By
- sys.path.append(os.getcwd())
- from common.common import Common
- class XGRecommend(object):
- def __init__(self, log_type, crawler, env):
- """
- 本地启动 Chrome,指定端口号:12306
- open -a "Google Chrome" --args --remote-debugging-port=12306
- """
- Common.logger(log_type, crawler).info("启动 Chrome 浏览器")
- cmd = 'open -a "Google Chrome" --args --remote-debugging-port=12306'
- os.system(cmd)
- if env == "dev":
- chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v114/chromedriver"
- else:
- chromedriver = "/usr/bin/chromedriver"
- # 打印请求配置
- ca = DesiredCapabilities.CHROME
- ca["goog:loggingPrefs"] = {"performance": "ALL"}
- # 初始化浏览器
- self.browser = webdriver.ChromeOptions()
- self.browser.add_experimental_option("debuggerAddress", "127.0.0.1:12306")
- # # 设置user-agent
- # self.browser.add_argument(
- # f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
- # # 去掉提示:Chrome正收到自动测试软件的控制
- # self.browser.add_argument('--disable-infobars')
- # # 禁用GPU加速
- # self.browser.add_argument('--disable-gpu')
- # # 关闭开发者模式
- # self.browser.add_experimental_option("useAutomationExtension", False)
- # # 以键值对的形式加入参数
- # self.browser.add_experimental_option('excludeSwitches', ['enable-automation'])
- # # 禁用启用Blink运行时的功能
- # self.browser.add_argument('--disable-blink-features=AutomationControlled')
- # 不打开浏览器运行
- # self.browser.add_argument("--headless")
- # # linux 环境中,静默模式必须要加的参数
- # self.browser.add_argument("--no-sandbox")
- # # 设置浏览器size
- # self.browser.add_argument("--window-size=1920,1080")
- # driver初始化
- self.driver = webdriver.Chrome(desired_capabilities=ca, options=self.browser, service=Service(chromedriver))
- self.driver.implicitly_wait(10)
- Common.logger(log_type, crawler).info("打开西瓜推荐页")
- self.driver.get(f"https://www.ixigua.com/")
- # 在当前页面打开新的标签页
- self.driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.COMMAND + 't')
- # 切换到新打开的标签页
- self.driver.switch_to.window(self.driver.window_handles[-1])
- self.username = "19831265541"
- self.password = "Test111111"
- time.sleep(2)
- def quit(self, log_type, crawler):
- Common.logger(log_type, crawler).info("退出浏览器")
- self.driver.quit()
- # 传入滑块背景图片本地路径和滑块本地路径,返回滑块到缺口的距离
- @staticmethod
- def findPic(log_type, crawler, img_bg_path, img_slider_path):
- """
- 找出图像中最佳匹配位置
- :param log_type: log
- :param crawler: 爬虫
- :param img_bg_path: 滑块背景图本地路径
- :param img_slider_path: 滑块图片本地路径
- :return: 返回最差匹配、最佳匹配对应的x坐标
- """
- # 读取滑块背景图片,参数是图片路径,Opencv2默认使用BGR模式
- # cv2.imread()是 image read的简写
- # img_bg 是一个numpy库ndarray数组对象
- img_bg = cv2.imread(img_bg_path)
- # 对滑块背景图片进行处理,由BGR模式转为gray模式(即灰度模式,也就是黑白图片)
- # 为什么要处理? BGR模式(彩色图片)的数据比黑白图片的数据大,处理后可以加快算法的计算
- # BGR模式:常见的是RGB模式
- # R代表红,red; G代表绿,green; B代表蓝,blue。
- # RGB模式就是,色彩数据模式,R在高位,G在中间,B在低位。BGR正好相反。
- # 如红色:RGB模式是(255,0,0),BGR模式是(0,0,255)
- img_bg_gray = cv2.cvtColor(img_bg, cv2.COLOR_BGR2GRAY)
- # 读取滑块,参数1是图片路径,参数2是使用灰度模式
- img_slider_gray = cv2.imread(img_slider_path, 0)
- # 在滑块背景图中匹配滑块。参数cv2.TM_CCOEFF_NORMED是opencv2中的一种算法
- res = cv2.matchTemplate(img_bg_gray, img_slider_gray, cv2.TM_CCOEFF_NORMED)
- Common.logger(log_type, crawler).info(f"{'#' * 50}")
- Common.logger(log_type, crawler).info(type(res)) # 打印:<class 'numpy.ndarray'>
- Common.logger(log_type, crawler).info(res)
- # 打印:一个二维的ndarray数组
- # [[0.05604218 0.05557462 0.06844381... - 0.1784117 - 0.1811338 - 0.18415523]
- # [0.06151756 0.04408009 0.07010461... - 0.18493137 - 0.18440475 - 0.1843424]
- # [0.0643926 0.06221284 0.0719175... - 0.18742703 - 0.18535161 - 0.1823346]
- # ...
- # [-0.07755355 - 0.08177952 - 0.08642308... - 0.16476074 - 0.16210903 - 0.15467581]
- # [-0.06975575 - 0.07566144 - 0.07783117... - 0.1412715 - 0.15145643 - 0.14800543]
- # [-0.08476129 - 0.08415948 - 0.0949327... - 0.1371379 - 0.14271489 - 0.14166716]]
- Common.logger(log_type, crawler).info(f"{'#' * 50}")
- # cv22.minMaxLoc() 从ndarray数组中找到最小值、最大值及他们的坐标
- value = cv2.minMaxLoc(res)
- # 得到的value,如:(-0.1653602570295334, 0.6102921366691589, (144, 1), (141, 56))
- Common.logger(log_type, crawler).info(f"{value, '#' * 30}")
- # 获取x坐标,如上面的144、141
- return value[2:][0][0], value[2:][1][0]
- # 返回两个数组:一个用于加速拖动滑块,一个用于减速拖动滑块
- @staticmethod
- def generate_tracks(distance):
- # 给距离加上20,这20像素用在滑块滑过缺口后,减速折返回到缺口
- distance += 20
- v = 0
- t = 0.2
- forward_tracks = []
- current = 0
- mid = distance * 3 / 5 # 减速阀值
- while current < distance:
- if current < mid:
- a = 2 # 加速度为+2
- else:
- a = -3 # 加速度-3
- s = v * t + 0.5 * a * (t ** 2)
- v = v + a * t
- current += s
- forward_tracks.append(round(s))
- back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1]
- return forward_tracks, back_tracks
- # 获取距离值
- @staticmethod
- def get_tracks(distance):
- """
- 模拟人的滑动行为,先匀加速后匀减速
- 匀变速基本公式
- v=v0+at
- s=vot+1/2at2
- """
- # 初速度
- v = 0
- # 设置时间
- t = 0.3
- # 存储每段距离值
- tracks = []
- # 当前距离
- current = 0
- # 中间位置为4/5距离处
- mid = distance * 4 / 5
- while current < distance:
- if current < mid:
- # 加速阶段
- a = 2
- else:
- # 减速阶段
- a = -3
- # 当前速度
- v0 = v
- # 当前位移
- s = v0 * t + 0.5 * a * t ** 2
- # 更新当前速度
- v = v0 + a * t
- # 更新当前位移
- current += s
- # 添加到轨迹列表
- tracks.append(round(s))
- return tracks
- # 使用图像处理库(例如 OpenCV)找到滑块在背景图片中的位置
- @staticmethod
- def get_slider_offset(log_type, crawler, image_bg):
- image = cv2.imread(image_bg)
- # 高斯滤波
- blurred = cv2.GaussianBlur(image, (5, 5), 0)
- # 边缘检测
- canny = cv2.Canny(blurred, 200, 400)
- # 轮廓检测
- contours, hierarchy = cv2.findContours(canny, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
- for i, contour in enumerate(contours):
- m = cv2.moments(contour)
- if m['m00'] == 0:
- cx = cy = 0
- else:
- cx, cy = m['m10'] / m['m00'], m['m01'] / m['m00']
- if 6000 < cv2.contourArea(contour) < 8000 and 370 < cv2.arcLength(contour, True) < 390:
- if cx < 400:
- continue
- x, y, w, h = cv2.boundingRect(contour) # 外接矩形
- cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
- # cv.imshow('image', image) # 显示识别结果
- Common.logger(log_type, crawler).info('【缺口识别】 {x}px'.format(x=x / 2))
- return x / 2
- return 0
- @staticmethod
- def FindPic(log_type, crawler, target, template):
- """
- 找出图像中最佳匹配位置
- :param log_type: 日志
- :param crawler: 爬虫
- :param target: 目标即背景图
- :param template: 模板即需要找到的图
- :return: 返回最佳匹配及其最差匹配和对应的坐标
- """
- target_rgb = cv2.imread(target)
- target_gray = cv2.cvtColor(target_rgb, cv2.COLOR_BGR2GRAY)
- template_rgb = cv2.imread(template, 0)
- res = cv2.matchTemplate(target_gray, template_rgb, cv2.TM_CCOEFF_NORMED)
- value = cv2.minMaxLoc(res)
- Common.logger(log_type, crawler).info(value)
- # 计算缺口的 X 轴距离
- x_val = int(value[3][0])
- Common.logger(log_type, crawler).info(f"缺口的 X 轴距离:{x_val}")
- # 获取模板图的宽高
- template_height, template_width, template_c = cv2.imread(template).shape
- Common.logger(log_type, crawler).info(f"模板高:{template_height}")
- Common.logger(log_type, crawler).info(f"模板宽:{template_width}")
- Common.logger(log_type, crawler).info(f"图片的通道数:{template_c}")
- # 计算需要滑动的距离
- move_val = x_val - template_width
- Common.logger(log_type, crawler).info(f"需要滑动的距离:{move_val}")
- return x_val
- def login(self, log_type, crawler, env):
- # Common.logger(log_type, crawler).info("点击登录")
- # self.driver.find_element(By.XPATH, '//*[@class="xg-button xg-button-primary xg-button-middle loginButton"]').click()
- # time.sleep(random.randint(1, 2))
- # Common.logger(log_type, crawler).info("点击密码登录")
- # self.driver.find_element(By.XPATH, '//*[@class="web-login-link-list__item__text"]').click()
- # time.sleep(random.randint(1, 2))
- # Common.logger(log_type, crawler).info("输入手机号")
- # self.driver.find_element(By.XPATH, '//*[@class="web-login-normal-input__input"]').send_keys(self.username)
- # time.sleep(random.randint(1, 2))
- # Common.logger(log_type, crawler).info("输入密码")
- # self.driver.find_element(By.XPATH, '//*[@class="web-login-button-input__input"]').send_keys(self.password)
- # time.sleep(random.randint(1, 2))
- # Common.logger(log_type, crawler).info("点击登录")
- # self.driver.find_element(By.XPATH, '//*[@class="web-login-account-password__button-wrapper"]/*[1]').click()
- # time.sleep(random.randint(1, 2))
- # # 获取滑块
- # Common.logger(log_type, crawler).info("获取滑块")
- # move_btns = self.driver.find_elements(By.XPATH, '//*[@class="sc-kkGfuU bujTgx"]')
- # if len(move_btns) == 0:
- # Common.logger(log_type, crawler).info("未发现滑块,3-5 秒后重试")
- # self.quit(log_type, crawler)
- # time.sleep(random.randint(3, 5))
- # self.__init__(log_type, crawler, env)
- # self.login(log_type, crawler, env)
- # move_btn = move_btns[0]
- #
- # while True:
- #
- # # 使用requests下载滑块
- # slide_url = self.driver.find_element(By.XPATH,
- # '//*[@class="captcha_verify_img_slide react-draggable sc-VigVT ggNWOG"]').get_attribute(
- # "src")
- # slide_dir = f"./{crawler}/photo/img_slide.png"
- # urllib3.disable_warnings()
- # slide_url_response = requests.get(slide_url, verify=False)
- # with open(slide_dir, "wb") as file:
- # file.write(slide_url_response.content)
- #
- # # 使用urllib下载背景图
- # bg_image_url = self.driver.find_element(By.XPATH, '//*[@id="captcha-verify-image"]').get_attribute("src")
- # bg_image_dir = f"./{crawler}/photo/img_bg.png"
- # urllib3.disable_warnings()
- # bg_image_url_response = requests.get(bg_image_url, verify=False)
- # with open(bg_image_dir, "wb") as file:
- # file.write(bg_image_url_response.content)
- #
- # offset = self.FindPic(log_type, crawler, bg_image_dir, slide_dir)
- # Common.logger(log_type, crawler).info(f"offset:{offset}")
- #
- # # 在滑块上暂停
- # Common.logger(log_type, crawler).info("在滑块上暂停")
- # ActionChains(self.driver).click_and_hold(on_element=move_btn).perform()
- # # 拖动滑块
- # Common.logger(log_type, crawler).info("拖动滑块0.7*距离")
- # ActionChains(self.driver).move_to_element_with_offset(to_element=move_btn, xoffset=int(0.5*offset), yoffset=0).perform()
- # # 拖动剩余像素
- # Common.logger(log_type, crawler).info("拖动剩余像素")
- # tracks = self.get_tracks(int(0.15*offset))
- # # 遍历梅一段距离
- # for track in tracks:
- # # 滑块移动响应距离
- # ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform()
- # # 休息1s
- # Common.logger(log_type, crawler).info("休息1s")
- # time.sleep(1)
- # # 释放滑块
- # Common.logger(log_type, crawler).info("释放滑块")
- # ActionChains(self.driver).release().perform()
- #
- # if len(move_btns) != 0:
- # time.sleep(1)
- # continue
- # break
- Common.logger(log_type, crawler).info("刷新页面")
- self.driver.refresh()
- Common.logger(log_type, crawler).info("关闭当前标签页")
- time.sleep(5)
- # 关闭当前标签页
- self.driver.find_element(By.TAG_NAME, 'body').send_keys(Keys.COMMAND + 'w')
- Common.logger(log_type, crawler).info("已关闭")
- Common.logger(log_type, crawler).info("退出浏览器")
- self.quit(log_type, crawler)
- if __name__ == "__main__":
- Recommend = XGRecommend("search", "dev", "dev")
- Recommend.login("search", "dev", "dev")
- pass
|