|
@@ -0,0 +1,172 @@
|
|
|
+"""
|
|
|
+视频号搜索功能
|
|
|
+"""
|
|
|
+import json
|
|
|
+import os
|
|
|
+import random
|
|
|
+import sys
|
|
|
+import time
|
|
|
+import uuid
|
|
|
+from hashlib import md5
|
|
|
+
|
|
|
+from appium import webdriver
|
|
|
+from appium.webdriver.extensions.android.nativekey import AndroidKey
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+from selenium.common.exceptions import NoSuchElementException
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+import multiprocessing
|
|
|
+
|
|
|
+sys.path.append(os.getcwd())
|
|
|
+
|
|
|
+from application.common.log import AliyunLogger, Local
|
|
|
+from application.common.messageQueue import MQ
|
|
|
+from application.functions import get_redirect_url
|
|
|
+from application.pipeline import PiaoQuanPipeline
|
|
|
+
|
|
|
+
|
|
|
+class ShiPinHaoSearch(object):
|
|
|
+ """
|
|
|
+ 视频号搜索爬虫
|
|
|
+ """
|
|
|
+
|
|
|
+ def __init__(self, platform, mode, env, rule_dict, our_uid):
|
|
|
+ self.mq = MQ(topic_name="topic_crawler_etl_" + env)
|
|
|
+ self.download_cnt = 0
|
|
|
+ self.element_list = []
|
|
|
+ self.count = 0
|
|
|
+ self.swipe_count = 0
|
|
|
+ self.platform = platform
|
|
|
+ self.mode = mode
|
|
|
+ self.env = env
|
|
|
+ self.rule_dict = rule_dict
|
|
|
+ self.our_uid = our_uid
|
|
|
+ chromedriverExecutable = "/usr/bin/chromedriver"
|
|
|
+ self.aliyun_log = AliyunLogger(platform=platform, mode=mode, env=env)
|
|
|
+ Local.logger(platform=self.platform, mode=self.mode).info("启动微信")
|
|
|
+ # 微信的配置文件
|
|
|
+ caps = {
|
|
|
+ "platformName": "Android",
|
|
|
+ "devicesName": "Android",
|
|
|
+ "appPackage": "com.tencent.mm",
|
|
|
+ "appActivity": ".ui.LauncherUI",
|
|
|
+ "autoGrantPermissions": True,
|
|
|
+ "noReset": True,
|
|
|
+ "resetkeyboard": True,
|
|
|
+ "unicodekeyboard": True,
|
|
|
+ "showChromedriverLog": True,
|
|
|
+ "printPageSourceOnFailure": True,
|
|
|
+ "recreateChromeDriverSessions": True,
|
|
|
+ "enableWebviewDetailsCollection": True,
|
|
|
+ "setWebContentsDebuggingEnabled": True,
|
|
|
+ "newCommandTimeout": 6000,
|
|
|
+ "automationName": "UiAutomator2",
|
|
|
+ "chromedriverExecutable": chromedriverExecutable,
|
|
|
+ "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
|
|
|
+ }
|
|
|
+ try:
|
|
|
+ self.driver = webdriver.Remote("http://localhost:4750/wd/hub", caps)
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ self.aliyun_log.logging(
|
|
|
+ code="3002",
|
|
|
+ message=f'appium 启动异常: {e}'
|
|
|
+ )
|
|
|
+ return
|
|
|
+ self.driver.implicitly_wait(30)
|
|
|
+
|
|
|
+ for i in range(120):
|
|
|
+ try:
|
|
|
+ if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
|
|
|
+ Local.logger(self.log_type, self.crawler).info("微信启动成功")
|
|
|
+ # Common.logging(self.log_type, self.crawler, self.env, '微信启动成功')
|
|
|
+ self.aliyun_log.logging(
|
|
|
+ code="1000",
|
|
|
+ message="启动微信成功"
|
|
|
+ )
|
|
|
+ break
|
|
|
+ elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
|
|
|
+ Local.logger(self.log_type, self.crawler).info("发现并关闭系统下拉菜单")
|
|
|
+ # Common.logging(self.log_type, self.crawler, self.env, '发现并关闭系统下拉菜单')
|
|
|
+ self.aliyun_log.logging(
|
|
|
+ code="1000",
|
|
|
+ message="发现并关闭系统下拉菜单"
|
|
|
+ )
|
|
|
+ size = self.driver.get_window_size()
|
|
|
+ self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.8),
|
|
|
+ int(size['width'] * 0.5), int(size['height'] * 0.2), 200)
|
|
|
+ else:
|
|
|
+ pass
|
|
|
+ except NoSuchElementException:
|
|
|
+ self.aliyun_log.logging(
|
|
|
+ code="3001",
|
|
|
+ message="打开微信异常"
|
|
|
+ )
|
|
|
+ time.sleep(1)
|
|
|
+
|
|
|
+ def search(self, keyword):
|
|
|
+ """搜索"""
|
|
|
+ self.driver.find_element('com.tencent.mm:id/j5t').click()
|
|
|
+ time.sleep(1)
|
|
|
+ self.driver.find_element('com.tencent.mm:id/cd7').clear().send_keys(keyword)
|
|
|
+ self.driver.press_keycode(AndroidKey.ENTER)
|
|
|
+ time.sleep(5)
|
|
|
+ # 切换到 webview
|
|
|
+ self.check_to_webview(xpath='//div[@class="unit"]')
|
|
|
+ time.sleep(1)
|
|
|
+ # 切换到“视频号”分类
|
|
|
+ shipinhao_tags = self.find_elements_by_xpath('//div[@class="unit"]/*[2]')
|
|
|
+ Local.logger(platform=self.platform, mode=self.mode).info("点击视频号分类")
|
|
|
+ shipinhao_tags[0].click()
|
|
|
+
|
|
|
+ index = 0
|
|
|
+ while True:
|
|
|
+ if not self.find_elements_by_xpath('//*[@class="mixed-box__bd"]'):
|
|
|
+ Local.logger(self.platform, self.mode).info("窗口已销毁")
|
|
|
+ return
|
|
|
+ Local.logger(self.platform, self.mode).info("开始获取视频列表")
|
|
|
+
|
|
|
+ video_list = self.find_elements_by_xpath('//div[@class="rich-media active__absolute"]')
|
|
|
+ if video_list:
|
|
|
+ print(video_list)
|
|
|
+
|
|
|
+ def check_to_webview(self, xpath):
|
|
|
+ """
|
|
|
+ 切换到了 webview
|
|
|
+ :param xpath:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ webViews = self.driver.contexts
|
|
|
+ self.driver.switch_to.context(webViews[-1])
|
|
|
+ windowHandles = self.driver.window_handles
|
|
|
+ for handle in windowHandles:
|
|
|
+ self.driver.switch_to.window(handle)
|
|
|
+ time.sleep(1)
|
|
|
+ try:
|
|
|
+ self.driver.find_element(By.XPATH, xpath)
|
|
|
+ Local.logger(self.log_type, self.crawler).info("切换到WebView成功\n")
|
|
|
+ # Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n')
|
|
|
+ self.aliyun_log.logging(
|
|
|
+ code="1000",
|
|
|
+ message="成功切换到 webview"
|
|
|
+ )
|
|
|
+ return
|
|
|
+ except NoSuchElementException:
|
|
|
+ time.sleep(1)
|
|
|
+
|
|
|
+ def find_elements_by_xpath(self, xpath):
|
|
|
+ """
|
|
|
+ 通过 xpath 获取 Element
|
|
|
+ :param xpath:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ windowHandles = self.driver.window_handles
|
|
|
+ for handle in windowHandles:
|
|
|
+ self.driver.switch_to.window(handle)
|
|
|
+ time.sleep(1)
|
|
|
+ try:
|
|
|
+ elements = self.driver.find_elements(By.XPATH, xpath)
|
|
|
+ if elements:
|
|
|
+ return elements
|
|
|
+ except NoSuchElementException as e:
|
|
|
+ Local.logger(platform=self.platform, mode=self.mode).info("未找到元素{}".format(xpath))
|
|
|
+ return None
|