shipinhao_search.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. """
  2. 视频号搜索功能
  3. """
  4. import json
  5. import os
  6. import random
  7. import sys
  8. import time
  9. import uuid
  10. from hashlib import md5
  11. from appium import webdriver
  12. from appium.webdriver.extensions.android.nativekey import AndroidKey
  13. from bs4 import BeautifulSoup
  14. from selenium.common.exceptions import NoSuchElementException
  15. from selenium.webdriver.common.by import By
  16. import multiprocessing
  17. sys.path.append(os.getcwd())
  18. from application.common.log import AliyunLogger, Local
  19. from application.common.messageQueue import MQ
  20. from application.functions import get_redirect_url
  21. from application.pipeline import PiaoQuanPipeline
  22. class ShiPinHaoSearch(object):
  23. """
  24. 视频号搜索爬虫
  25. """
  26. def __init__(self, platform, mode, env, rule_dict, our_uid):
  27. self.mq = MQ(topic_name="topic_crawler_etl_" + env)
  28. self.download_cnt = 0
  29. self.element_list = []
  30. self.count = 0
  31. self.swipe_count = 0
  32. self.platform = platform
  33. self.mode = mode
  34. self.env = env
  35. self.rule_dict = rule_dict
  36. self.our_uid = our_uid
  37. chromedriverExecutable = "/usr/bin/chromedriver"
  38. self.aliyun_log = AliyunLogger(platform=platform, mode=mode, env=env)
  39. Local.logger(platform=self.platform, mode=self.mode).info("启动微信")
  40. # 微信的配置文件
  41. caps = {
  42. "platformName": "Android",
  43. "devicesName": "Android",
  44. "appPackage": "com.tencent.mm",
  45. "appActivity": ".ui.LauncherUI",
  46. "autoGrantPermissions": True,
  47. "noReset": True,
  48. "resetkeyboard": True,
  49. "unicodekeyboard": True,
  50. "showChromedriverLog": True,
  51. "printPageSourceOnFailure": True,
  52. "recreateChromeDriverSessions": True,
  53. "enableWebviewDetailsCollection": True,
  54. "setWebContentsDebuggingEnabled": True,
  55. "newCommandTimeout": 6000,
  56. "automationName": "UiAutomator2",
  57. "chromedriverExecutable": chromedriverExecutable,
  58. "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
  59. }
  60. try:
  61. self.driver = webdriver.Remote("http://localhost:4750/wd/hub", caps)
  62. except Exception as e:
  63. print(e)
  64. self.aliyun_log.logging(
  65. code="3002",
  66. message=f'appium 启动异常: {e}'
  67. )
  68. return
  69. self.driver.implicitly_wait(30)
  70. for i in range(120):
  71. try:
  72. if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
  73. Local.logger(self.log_type, self.crawler).info("微信启动成功")
  74. # Common.logging(self.log_type, self.crawler, self.env, '微信启动成功')
  75. self.aliyun_log.logging(
  76. code="1000",
  77. message="启动微信成功"
  78. )
  79. break
  80. elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
  81. Local.logger(self.log_type, self.crawler).info("发现并关闭系统下拉菜单")
  82. # Common.logging(self.log_type, self.crawler, self.env, '发现并关闭系统下拉菜单')
  83. self.aliyun_log.logging(
  84. code="1000",
  85. message="发现并关闭系统下拉菜单"
  86. )
  87. size = self.driver.get_window_size()
  88. self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.8),
  89. int(size['width'] * 0.5), int(size['height'] * 0.2), 200)
  90. else:
  91. pass
  92. except NoSuchElementException:
  93. self.aliyun_log.logging(
  94. code="3001",
  95. message="打开微信异常"
  96. )
  97. time.sleep(1)
  98. def search(self, keyword):
  99. """搜索"""
  100. self.driver.find_element('com.tencent.mm:id/j5t').click()
  101. time.sleep(1)
  102. self.driver.find_element('com.tencent.mm:id/cd7').clear().send_keys(keyword)
  103. self.driver.press_keycode(AndroidKey.ENTER)
  104. time.sleep(5)
  105. # 切换到 webview
  106. self.check_to_webview(xpath='//div[@class="unit"]')
  107. time.sleep(1)
  108. # 切换到“视频号”分类
  109. shipinhao_tags = self.find_elements_by_xpath('//div[@class="unit"]/*[2]')
  110. Local.logger(platform=self.platform, mode=self.mode).info("点击视频号分类")
  111. shipinhao_tags[0].click()
  112. index = 0
  113. while True:
  114. if not self.find_elements_by_xpath('//*[@class="mixed-box__bd"]'):
  115. Local.logger(self.platform, self.mode).info("窗口已销毁")
  116. return
  117. Local.logger(self.platform, self.mode).info("开始获取视频列表")
  118. video_list = self.find_elements_by_xpath('//div[@class="rich-media active__absolute"]')
  119. if video_list:
  120. print(video_list)
  121. def check_to_webview(self, xpath):
  122. """
  123. 切换到了 webview
  124. :param xpath:
  125. :return:
  126. """
  127. webViews = self.driver.contexts
  128. self.driver.switch_to.context(webViews[-1])
  129. windowHandles = self.driver.window_handles
  130. for handle in windowHandles:
  131. self.driver.switch_to.window(handle)
  132. time.sleep(1)
  133. try:
  134. self.driver.find_element(By.XPATH, xpath)
  135. Local.logger(self.log_type, self.crawler).info("切换到WebView成功\n")
  136. # Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n')
  137. self.aliyun_log.logging(
  138. code="1000",
  139. message="成功切换到 webview"
  140. )
  141. return
  142. except NoSuchElementException:
  143. time.sleep(1)
  144. def find_elements_by_xpath(self, xpath):
  145. """
  146. 通过 xpath 获取 Element
  147. :param xpath:
  148. :return:
  149. """
  150. windowHandles = self.driver.window_handles
  151. for handle in windowHandles:
  152. self.driver.switch_to.window(handle)
  153. time.sleep(1)
  154. try:
  155. elements = self.driver.find_elements(By.XPATH, xpath)
  156. if elements:
  157. return elements
  158. except NoSuchElementException as e:
  159. Local.logger(platform=self.platform, mode=self.mode).info("未找到元素{}".format(xpath))
  160. return None