| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 | """视频号搜索功能"""import jsonimport osimport randomimport sysimport timeimport uuidfrom hashlib import md5from appium import webdriverfrom appium.webdriver.extensions.android.nativekey import AndroidKeyfrom bs4 import BeautifulSoupfrom selenium.common.exceptions import NoSuchElementExceptionfrom selenium.webdriver.common.by import Byimport multiprocessingsys.path.append(os.getcwd())from application.common.log import AliyunLogger, Localfrom application.common.messageQueue import MQfrom application.functions import get_redirect_urlfrom application.pipeline import PiaoQuanPipelineclass ShiPinHaoSearch(object):    """    视频号搜索爬虫    """    def __init__(self, platform, mode, env, rule_dict, our_uid):        self.mq = MQ(topic_name="topic_crawler_etl_" + env)        self.download_cnt = 0        self.element_list = []        self.count = 0        self.swipe_count = 0        self.platform = platform        self.mode = mode        self.env = env        self.rule_dict = rule_dict        self.our_uid = our_uid        chromedriverExecutable = "/usr/bin/chromedriver"        self.aliyun_log = AliyunLogger(platform=platform, mode=mode, env=env)        Local.logger(platform=self.platform, mode=self.mode).info("启动微信")        # 微信的配置文件        caps = {            "platformName": "Android",            "devicesName": "Android",            "appPackage": "com.tencent.mm",            "appActivity": ".ui.LauncherUI",            "autoGrantPermissions": True,            "noReset": True,            "resetkeyboard": True,            "unicodekeyboard": True,            "showChromedriverLog": True,            "printPageSourceOnFailure": True,            "recreateChromeDriverSessions": True,            "enableWebviewDetailsCollection": True,            "setWebContentsDebuggingEnabled": True,            "newCommandTimeout": 6000,            "automationName": "UiAutomator2",            "chromedriverExecutable": chromedriverExecutable,            "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},        }        try:            self.driver = webdriver.Remote("http://localhost:4750/wd/hub", caps)        except Exception as e:            print(e)            self.aliyun_log.logging(                code="3002",                message=f'appium 启动异常: {e}'            )            return        self.driver.implicitly_wait(30)        for i in range(120):            try:                if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):                    Local.logger(self.log_type, self.crawler).info("微信启动成功")                    # Common.logging(self.log_type, self.crawler, self.env, '微信启动成功')                    self.aliyun_log.logging(                        code="1000",                        message="启动微信成功"                    )                    break                elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):                    Local.logger(self.log_type, self.crawler).info("发现并关闭系统下拉菜单")                    # Common.logging(self.log_type, self.crawler, self.env, '发现并关闭系统下拉菜单')                    self.aliyun_log.logging(                        code="1000",                        message="发现并关闭系统下拉菜单"                    )                    size = self.driver.get_window_size()                    self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.8),                                      int(size['width'] * 0.5), int(size['height'] * 0.2), 200)                else:                    pass            except NoSuchElementException:                self.aliyun_log.logging(                    code="3001",                    message="打开微信异常"                )                time.sleep(1)    def search(self, keyword):        """搜索"""        self.driver.find_element('com.tencent.mm:id/j5t').click()        time.sleep(1)        self.driver.find_element('com.tencent.mm:id/cd7').clear().send_keys(keyword)        self.driver.press_keycode(AndroidKey.ENTER)        time.sleep(5)        # 切换到 webview        self.check_to_webview(xpath='//div[@class="unit"]')        time.sleep(1)        # 切换到“视频号”分类        shipinhao_tags = self.find_elements_by_xpath('//div[@class="unit"]/*[2]')        Local.logger(platform=self.platform, mode=self.mode).info("点击视频号分类")        shipinhao_tags[0].click()        index = 0        while True:            if not self.find_elements_by_xpath('//*[@class="mixed-box__bd"]'):                Local.logger(self.platform, self.mode).info("窗口已销毁")                return            Local.logger(self.platform, self.mode).info("开始获取视频列表")            video_list = self.find_elements_by_xpath('//div[@class="rich-media active__absolute"]')            if video_list:                print(video_list)    def check_to_webview(self, xpath):        """        切换到了 webview        :param xpath:        :return:        """        webViews = self.driver.contexts        self.driver.switch_to.context(webViews[-1])        windowHandles = self.driver.window_handles        for handle in windowHandles:            self.driver.switch_to.window(handle)            time.sleep(1)            try:                self.driver.find_element(By.XPATH, xpath)                Local.logger(self.log_type, self.crawler).info("切换到WebView成功\n")                # Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n')                self.aliyun_log.logging(                    code="1000",                    message="成功切换到 webview"                )                return            except NoSuchElementException:                time.sleep(1)    def find_elements_by_xpath(self, xpath):        """        通过 xpath 获取 Element        :param xpath:        :return:        """        windowHandles = self.driver.window_handles        for handle in windowHandles:            self.driver.switch_to.window(handle)            time.sleep(1)            try:                elements = self.driver.find_elements(By.XPATH, xpath)                if elements:                    return elements            except NoSuchElementException as e:                Local.logger(platform=self.platform, mode=self.mode).info("未找到元素{}".format(xpath))                return None
 |