xg_recommend2.py 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # -*- coding: utf-8 -*-
  2. # @Author: wangkun
  3. # @Time: 2023/7/6
  4. import os
  5. import random
  6. import sys
  7. import time
  8. import cv2
  9. import requests
  10. import urllib3
  11. from selenium.webdriver import DesiredCapabilities, ActionChains
  12. from selenium import webdriver
  13. from selenium.webdriver.chrome.service import Service
  14. from selenium.webdriver.common.by import By
  15. sys.path.append(os.getcwd())
  16. from common.common import Common
  17. class XGRecommend(object):
  18. def __init__(self, log_type, crawler, env):
  19. """
  20. 本地启动 Chrome,指定端口号:12306
  21. open -a "Google Chrome" --args --remote-debugging-port=12306
  22. """
  23. # Common.logger(log_type, crawler).info("启动 Chrome 浏览器")
  24. # cmd = 'open -a "Google Chrome" --args --remote-debugging-port=12306'
  25. # os.system(cmd)
  26. if env == "dev":
  27. chromedriver = "/Users/wangkun/Downloads/chromedriver/chromedriver_v114/chromedriver"
  28. else:
  29. chromedriver = "/usr/bin/chromedriver"
  30. # 打印请求配置
  31. ca = DesiredCapabilities.CHROME
  32. ca["goog:loggingPrefs"] = {"performance": "ALL"}
  33. # 初始化浏览器
  34. self.browser = webdriver.ChromeOptions()
  35. # 设置user-agent
  36. self.browser.add_argument(
  37. f'user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
  38. # 去掉提示:Chrome正收到自动测试软件的控制
  39. self.browser.add_argument('--disable-infobars')
  40. # 禁用GPU加速
  41. self.browser.add_argument('--disable-gpu')
  42. # 关闭开发者模式
  43. self.browser.add_experimental_option("useAutomationExtension", False)
  44. # 以键值对的形式加入参数
  45. self.browser.add_experimental_option('excludeSwitches', ['enable-automation'])
  46. # 禁用启用Blink运行时的功能
  47. self.browser.add_argument('--disable-blink-features=AutomationControlled')
  48. # 不打开浏览器运行
  49. # self.browser.add_argument("--headless")
  50. # linux 环境中,静默模式必须要加的参数
  51. # self.browser.add_argument("--no-sandbox")
  52. # 设置浏览器size
  53. self.browser.add_argument("--window-size=1920,1080")
  54. # driver初始化
  55. self.driver = webdriver.Chrome(desired_capabilities=ca, options=self.browser, service=Service(chromedriver))
  56. self.driver.implicitly_wait(10)
  57. Common.logger(log_type, crawler).info("打开西瓜推荐页")
  58. self.driver.get(f"https://www.ixigua.com/")
  59. self.username = "19831265541"
  60. self.password = "Test111111"
  61. time.sleep(2)
  62. def quit(self, log_type, crawler):
  63. Common.logger(log_type, crawler).info("退出浏览器")
  64. self.driver.quit()
  65. # 返回两个数组:一个用于加速拖动滑块,一个用于减速拖动滑块
  66. @staticmethod
  67. def generate_tracks(distance):
  68. # 给距离加上20,这20像素用在滑块滑过缺口后,减速折返回到缺口
  69. distance += 20
  70. v = 0
  71. t = 0.2
  72. forward_tracks = []
  73. current = 0
  74. mid = distance * 3 / 5 # 减速阀值
  75. while current < distance:
  76. if current < mid:
  77. a = 2 # 加速度为+2
  78. else:
  79. a = -3 # 加速度-3
  80. s = v * t + 0.5 * a * (t ** 2)
  81. v = v + a * t
  82. current += s
  83. forward_tracks.append(round(s))
  84. back_tracks = [-3, -3, -2, -2, -2, -2, -2, -1, -1, -1, -1]
  85. return forward_tracks, back_tracks
  86. # 获取距离值
  87. @staticmethod
  88. def get_tracks(distance):
  89. """
  90. 模拟人的滑动行为,先匀加速后匀减速
  91. 匀变速基本公式
  92. v=v0+at
  93. s=vot+1/2at2
  94. """
  95. # 初速度
  96. v = 0
  97. # 设置时间
  98. t = 0.3
  99. # 存储每段距离值
  100. tracks = []
  101. # 当前距离
  102. current = 0
  103. # 中间位置为4/5距离处
  104. mid = distance * 4 / 5
  105. while current < distance:
  106. if current < mid:
  107. # 加速阶段
  108. a = 2
  109. else:
  110. # 减速阶段
  111. a = -3
  112. # 当前速度
  113. v0 = v
  114. # 当前位移
  115. s = v0 * t + 0.5 * a * t ** 2
  116. # 更新当前速度
  117. v = v0 + a * t
  118. # 更新当前位移
  119. current += s
  120. # 添加到轨迹列表
  121. tracks.append(round(s))
  122. return tracks
  123. @staticmethod
  124. def FindPic(log_type, crawler, target, template):
  125. """
  126. 找出图像中最佳匹配位置
  127. :param log_type: 日志
  128. :param crawler: 爬虫
  129. :param target: 目标即背景图
  130. :param template: 模板即需要找到的图
  131. :return: 返回最佳匹配及其最差匹配和对应的坐标
  132. """
  133. target_rgb = cv2.imread(target)
  134. target_gray = cv2.cvtColor(target_rgb, cv2.COLOR_BGR2GRAY)
  135. template_rgb = cv2.imread(template, 0)
  136. res = cv2.matchTemplate(target_gray, template_rgb, cv2.TM_CCOEFF_NORMED)
  137. value = cv2.minMaxLoc(res)
  138. Common.logger(log_type, crawler).info(value)
  139. # 计算缺口的 X 轴距离
  140. x_val = int(value[3][0])
  141. Common.logger(log_type, crawler).info(f"缺口的 X 轴距离:{x_val}")
  142. # 获取模板图的宽高
  143. template_height, template_width, template_c = cv2.imread(template).shape
  144. Common.logger(log_type, crawler).info(f"模板高:{template_height}")
  145. Common.logger(log_type, crawler).info(f"模板宽:{template_width}")
  146. Common.logger(log_type, crawler).info(f"图片的通道数:{template_c}")
  147. # 计算需要滑动的距离
  148. move_val = x_val - template_width
  149. Common.logger(log_type, crawler).info(f"需要滑动的距离:{move_val}")
  150. return x_val
  151. def login(self, log_type, crawler, env):
  152. Common.logger(log_type, crawler).info("点击登录")
  153. self.driver.find_element(By.XPATH, '//*[@class="xg-button xg-button-primary xg-button-middle loginButton"]').click()
  154. time.sleep(random.randint(1, 2))
  155. Common.logger(log_type, crawler).info("点击密码登录")
  156. self.driver.find_element(By.XPATH, '//*[@class="web-login-link-list__item__text"]').click()
  157. time.sleep(random.randint(1, 2))
  158. Common.logger(log_type, crawler).info("输入手机号")
  159. self.driver.find_element(By.XPATH, '//*[@class="web-login-normal-input__input"]').send_keys(self.username)
  160. time.sleep(random.randint(1, 2))
  161. Common.logger(log_type, crawler).info("输入密码")
  162. self.driver.find_element(By.XPATH, '//*[@class="web-login-button-input__input"]').send_keys(self.password)
  163. time.sleep(random.randint(1, 2))
  164. Common.logger(log_type, crawler).info("点击登录")
  165. self.driver.find_element(By.XPATH, '//*[@class="web-login-account-password__button-wrapper"]/*[1]').click()
  166. time.sleep(random.randint(1, 2))
  167. # 获取滑块
  168. Common.logger(log_type, crawler).info("获取滑块")
  169. move_btns = self.driver.find_elements(By.XPATH, '//*[@class="sc-kkGfuU bujTgx"]')
  170. if len(move_btns) == 0:
  171. Common.logger(log_type, crawler).info("未发现滑块,3-5 秒后重试")
  172. self.quit(log_type, crawler)
  173. time.sleep(random.randint(3, 5))
  174. self.__init__(log_type, crawler, env)
  175. self.login(log_type, crawler, env)
  176. move_btn = move_btns[0]
  177. while True:
  178. # 使用requests下载滑块
  179. slide_url = self.driver.find_element(By.XPATH,
  180. '//*[@class="captcha_verify_img_slide react-draggable sc-VigVT ggNWOG"]').get_attribute(
  181. "src")
  182. slide_dir = f"./{crawler}/photo/img_slide.png"
  183. urllib3.disable_warnings()
  184. slide_url_response = requests.get(slide_url, verify=False)
  185. with open(slide_dir, "wb") as file:
  186. file.write(slide_url_response.content)
  187. # 使用urllib下载背景图
  188. bg_image_url = self.driver.find_element(By.XPATH, '//*[@id="captcha-verify-image"]').get_attribute("src")
  189. bg_image_dir = f"./{crawler}/photo/img_bg.png"
  190. urllib3.disable_warnings()
  191. bg_image_url_response = requests.get(bg_image_url, verify=False)
  192. with open(bg_image_dir, "wb") as file:
  193. file.write(bg_image_url_response.content)
  194. offset = self.FindPic(log_type, crawler, bg_image_dir, slide_dir)
  195. Common.logger(log_type, crawler).info(f"offset:{offset}")
  196. # 在滑块上暂停
  197. Common.logger(log_type, crawler).info("在滑块上暂停")
  198. ActionChains(self.driver).click_and_hold(on_element=move_btn).perform()
  199. # 拖动滑块
  200. Common.logger(log_type, crawler).info("拖动滑块0.7*距离")
  201. ActionChains(self.driver).move_to_element_with_offset(to_element=move_btn, xoffset=int(0.5*offset), yoffset=0).perform()
  202. # 拖动剩余像素
  203. Common.logger(log_type, crawler).info("拖动剩余像素")
  204. tracks = self.get_tracks(int(0.15*offset))
  205. # 遍历梅一段距离
  206. for track in tracks:
  207. # 滑块移动响应距离
  208. ActionChains(self.driver).move_by_offset(xoffset=track, yoffset=0).perform()
  209. # 休息1s
  210. Common.logger(log_type, crawler).info("休息1s")
  211. time.sleep(1)
  212. # 释放滑块
  213. Common.logger(log_type, crawler).info("释放滑块")
  214. ActionChains(self.driver).release().perform()
  215. if len(move_btns) != 0:
  216. time.sleep(1)
  217. continue
  218. break
  219. time.sleep(5)
  220. Common.logger(log_type, crawler).info("退出浏览器")
  221. self.quit(log_type, crawler)
  222. if __name__ == "__main__":
  223. Recommend = XGRecommend("search", "dev", "dev")
  224. Recommend.login("search", "dev", "dev")
  225. pass