""" 视频号搜索功能 """ import json import os import random import sys import time import uuid from hashlib import md5 from appium import webdriver from appium.webdriver.extensions.android.nativekey import AndroidKey from bs4 import BeautifulSoup from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By import multiprocessing sys.path.append(os.getcwd()) from application.common.log import AliyunLogger, Local from application.common.messageQueue import MQ from application.functions import get_redirect_url from application.pipeline import PiaoQuanPipeline class ShiPinHaoSearch(object): """ 视频号搜索爬虫 """ def __init__(self, platform, mode, env, rule_dict, our_uid): self.mq = MQ(topic_name="topic_crawler_etl_" + env) self.download_cnt = 0 self.element_list = [] self.count = 0 self.swipe_count = 0 self.platform = platform self.mode = mode self.env = env self.rule_dict = rule_dict self.our_uid = our_uid chromedriverExecutable = "/usr/bin/chromedriver" self.aliyun_log = AliyunLogger(platform=platform, mode=mode, env=env) Local.logger(platform=self.platform, mode=self.mode).info("启动微信") # 微信的配置文件 caps = { "platformName": "Android", "devicesName": "Android", "appPackage": "com.tencent.mm", "appActivity": ".ui.LauncherUI", "autoGrantPermissions": True, "noReset": True, "resetkeyboard": True, "unicodekeyboard": True, "showChromedriverLog": True, "printPageSourceOnFailure": True, "recreateChromeDriverSessions": True, "enableWebviewDetailsCollection": True, "setWebContentsDebuggingEnabled": True, "newCommandTimeout": 6000, "automationName": "UiAutomator2", "chromedriverExecutable": chromedriverExecutable, "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"}, } try: self.driver = webdriver.Remote("http://localhost:4750/wd/hub", caps) except Exception as e: print(e) self.aliyun_log.logging( code="3002", message=f'appium 启动异常: {e}' ) return self.driver.implicitly_wait(30) for i in range(120): try: if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"): Local.logger(self.log_type, self.crawler).info("微信启动成功") # Common.logging(self.log_type, self.crawler, self.env, '微信启动成功') self.aliyun_log.logging( code="1000", message="启动微信成功" ) break elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"): Local.logger(self.log_type, self.crawler).info("发现并关闭系统下拉菜单") # Common.logging(self.log_type, self.crawler, self.env, '发现并关闭系统下拉菜单') self.aliyun_log.logging( code="1000", message="发现并关闭系统下拉菜单" ) size = self.driver.get_window_size() self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.8), int(size['width'] * 0.5), int(size['height'] * 0.2), 200) else: pass except NoSuchElementException: self.aliyun_log.logging( code="3001", message="打开微信异常" ) time.sleep(1) def search(self, keyword): """搜索""" self.driver.find_element('com.tencent.mm:id/j5t').click() time.sleep(1) self.driver.find_element('com.tencent.mm:id/cd7').clear().send_keys(keyword) self.driver.press_keycode(AndroidKey.ENTER) time.sleep(5) # 切换到 webview self.check_to_webview(xpath='//div[@class="unit"]') time.sleep(1) # 切换到“视频号”分类 shipinhao_tags = self.find_elements_by_xpath('//div[@class="unit"]/*[2]') Local.logger(platform=self.platform, mode=self.mode).info("点击视频号分类") shipinhao_tags[0].click() index = 0 while True: if not self.find_elements_by_xpath('//*[@class="mixed-box__bd"]'): Local.logger(self.platform, self.mode).info("窗口已销毁") return Local.logger(self.platform, self.mode).info("开始获取视频列表") video_list = self.find_elements_by_xpath('//div[@class="rich-media active__absolute"]') if video_list: print(video_list) def check_to_webview(self, xpath): """ 切换到了 webview :param xpath: :return: """ webViews = self.driver.contexts self.driver.switch_to.context(webViews[-1]) windowHandles = self.driver.window_handles for handle in windowHandles: self.driver.switch_to.window(handle) time.sleep(1) try: self.driver.find_element(By.XPATH, xpath) Local.logger(self.log_type, self.crawler).info("切换到WebView成功\n") # Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n') self.aliyun_log.logging( code="1000", message="成功切换到 webview" ) return except NoSuchElementException: time.sleep(1) def find_elements_by_xpath(self, xpath): """ 通过 xpath 获取 Element :param xpath: :return: """ windowHandles = self.driver.window_handles for handle in windowHandles: self.driver.switch_to.window(handle) time.sleep(1) try: elements = self.driver.find_elements(By.XPATH, xpath) if elements: return elements except NoSuchElementException as e: Local.logger(platform=self.platform, mode=self.mode).info("未找到元素{}".format(xpath)) return None