|
@@ -26,13 +26,13 @@ class ShipinhaoSearch:
|
|
|
# try:
|
|
|
Common.logger(log_type, crawler).info('启动微信')
|
|
|
if env == "dev":
|
|
|
- chromedriverExecutable = "/Users/wangkun/Downloads/chromedriver/chromedriver_v86/chromedriver"
|
|
|
+ chromedriverExecutable = "/Users/wangkun/Downloads/chromedriver/chromedriver_v107/chromedriver"
|
|
|
else:
|
|
|
chromedriverExecutable = '/Users/piaoquan/Downloads/chromedriver'
|
|
|
caps = {
|
|
|
"platformName": "Android", # 手机操作系统 Android / iOS
|
|
|
"deviceName": "Android", # 连接的设备名(模拟器或真机),安卓可以随便写
|
|
|
- "platforVersion": "11", # 手机对应的系统版本(Android 11)
|
|
|
+ "platforVersion": "13", # 手机对应的系统版本(Android 13)
|
|
|
"appPackage": "com.tencent.mm", # 被测APP的包名,乐活圈 Android
|
|
|
"appActivity": ".ui.LauncherUI", # 启动的Activity名
|
|
|
"autoGrantPermissions": True, # 让 appium 自动授权 base 权限,
|
|
@@ -46,21 +46,21 @@ class ShipinhaoSearch:
|
|
|
"automationName": "UiAutomator2", # 使用引擎,默认为 Appium,
|
|
|
# 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
|
|
|
"showChromedriverLog": True,
|
|
|
- "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
|
|
|
- # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
|
|
|
+ # "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
|
|
|
+ "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
|
|
|
'enableWebviewDetailsCollection': True,
|
|
|
'setWebContentsDebuggingEnabled': True,
|
|
|
'chromedriverExecutable': chromedriverExecutable,
|
|
|
}
|
|
|
driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
|
|
|
driver.implicitly_wait(10)
|
|
|
-
|
|
|
+ if len(driver.find_elements(By.ID, 'android:id/text1')) != 0:
|
|
|
+ driver.find_elements(By.ID, 'android:id/text1')[0].click()
|
|
|
time.sleep(5)
|
|
|
# 根据词搜索视频
|
|
|
cls.search_video(log_type=log_type,
|
|
|
crawler=crawler,
|
|
|
driver=driver,
|
|
|
- sheetid=sheetid,
|
|
|
env=env)
|
|
|
|
|
|
# except Exception as e:
|
|
@@ -83,77 +83,112 @@ class ShipinhaoSearch:
|
|
|
|
|
|
@classmethod
|
|
|
def check_to_webview(cls, log_type, crawler, driver: WebDriver):
|
|
|
- while True:
|
|
|
- Common.logger(log_type, crawler).info('切换到webview')
|
|
|
- webview = driver.contexts
|
|
|
- driver.switch_to.context(webview[1])
|
|
|
+ Common.logger(log_type, crawler).info('切换到webview')
|
|
|
+ webviews = driver.contexts
|
|
|
+ # Common.logger(log_type, crawler).info(f"webview:{webviews}")
|
|
|
+ driver.switch_to.context(webviews[1])
|
|
|
+ time.sleep(1)
|
|
|
+ windowHandles = driver.window_handles
|
|
|
+ for handle in windowHandles:
|
|
|
+ driver.switch_to.window(handle)
|
|
|
+ try:
|
|
|
+ shipinhao_webview = driver.find_element(By.XPATH, '//div[@class="unit11111"]')
|
|
|
+ if shipinhao_webview:
|
|
|
+ Common.logger(log_type, crawler).info('切换到视频号 webview 成功\n')
|
|
|
+ return "成功"
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger(log_type, crawler).info(f"{e}\n")
|
|
|
+ # return None
|
|
|
|
|
|
@classmethod
|
|
|
- def search_video(cls, log_type, crawler, driver: WebDriver, sheetid, env):
|
|
|
+ def search_video(cls, log_type, crawler, driver: WebDriver, env):
|
|
|
word_list = get_config_from_mysql(log_type, crawler, env, "search_word", action="")
|
|
|
for word in word_list:
|
|
|
driver.implicitly_wait(10)
|
|
|
Common.logger(log_type, crawler).info('点击搜索按钮')
|
|
|
driver.find_element(By.ID, 'com.tencent.mm:id/j5t').click()
|
|
|
time.sleep(0.5)
|
|
|
- Common.logger(log_type, crawler).info(f'输入视频标题:{word}')
|
|
|
+ Common.logger(log_type, crawler).info(f'输入搜索词:{word}')
|
|
|
driver.find_element(By.ID, 'com.tencent.mm:id/cd7').clear().send_keys(word)
|
|
|
Common.logger(log_type, crawler).info('点击搜索')
|
|
|
driver.press_keycode(AndroidKey.ENTER)
|
|
|
- driver.find_elements(By.ID, 'com.tencent.mm:id/oi4')[0].click()
|
|
|
+ # driver.find_elements(By.ID, 'com.tencent.mm:id/oi4')[0].click()
|
|
|
+ driver.find_element(By.ID, 'com.tencent.mm:id/m94').click()
|
|
|
+ time.sleep(5)
|
|
|
|
|
|
- cls.check_to_webview(log_type, crawler, driver)
|
|
|
+ check_to_webview = cls.check_to_webview(log_type, crawler, driver)
|
|
|
+ if check_to_webview is None:
|
|
|
+ Common.logger(log_type, crawler).info("切换到视频号 webview 失败\n")
|
|
|
+ return
|
|
|
|
|
|
- video_list = cls.search_elements(driver, '//div[@class="unit"]/*[2]')
|
|
|
- Common.logger(log_type, crawler).info('点击"视频号"分类')
|
|
|
- video_list[0].click()
|
|
|
- time.sleep(5)
|
|
|
+ # Common.logger(log_type, crawler).info('切换到webview')
|
|
|
+ # webviews = driver.contexts
|
|
|
+ # Common.logger(log_type, crawler).info(f"webview:{webviews}")
|
|
|
+ # driver.switch_to.context(webviews[1])
|
|
|
+ # driver.switch_to.context('WEBVIEW_com.tencent.mm:appbrand0')
|
|
|
+ # Common.logger(log_type, crawler).info(f"{driver.page_source}")
|
|
|
+
|
|
|
+ # Common.logger(log_type, crawler).info('切换到webview')
|
|
|
+ # webview = driver.contexts
|
|
|
+ # driver.switch_to.context(webview[1])
|
|
|
+ # time.sleep(5)
|
|
|
+ # video_list = cls.search_elements(driver, '//div[@class="unit"]/*[2]')
|
|
|
+ # Common.logger(log_type, crawler).info('点击"视频"分类')
|
|
|
+ # video_list[0].click()
|
|
|
|
|
|
- index = 0
|
|
|
- while True:
|
|
|
- if index == 30:
|
|
|
- Common.logger(log_type, crawler).info(f'"{word}"已抓取视频数:{index}\n')
|
|
|
- break
|
|
|
- try:
|
|
|
- if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
|
|
|
- Common.logger(log_type, crawler).info('窗口已销毁\n')
|
|
|
- return
|
|
|
-
|
|
|
- Common.logger(log_type, crawler).info('获取视频列表\n')
|
|
|
- video_elements = cls.search_elements(driver, '//wx-view[@class="double-rich double-rich_vertical"]')
|
|
|
- if video_elements is None:
|
|
|
- Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
|
|
|
- return
|
|
|
-
|
|
|
- video_element_temp = video_elements[index:]
|
|
|
- if len(video_element_temp) == 0:
|
|
|
- Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
|
|
|
- return
|
|
|
-
|
|
|
- for i, video_element in enumerate(video_element_temp):
|
|
|
- if video_element is None:
|
|
|
- Common.logger(log_type, crawler).info('到底啦~\n')
|
|
|
- return
|
|
|
- cls.i += 1
|
|
|
- cls.search_elements(driver, '//wx-view[@"double-rich double-rich_vertical"]')
|
|
|
-
|
|
|
- Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
|
|
|
- time.sleep(3)
|
|
|
- driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
|
|
|
- video_element)
|
|
|
- Common.logger(log_type, crawler).info("点击进入视频详情")
|
|
|
- video_element.click()
|
|
|
- time.sleep(3)
|
|
|
- cls.get_video_info(log_type=log_type,
|
|
|
- crawler=crawler,
|
|
|
- driver=driver,
|
|
|
- sheetid=sheetid)
|
|
|
- Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
|
|
|
- time.sleep(10)
|
|
|
- index = index + len(video_element_temp)
|
|
|
- except Exception as e:
|
|
|
- Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
|
|
|
- cls.i = 0
|
|
|
+ # time.sleep(5)
|
|
|
+ #
|
|
|
+ # video_list = cls.search_elements(driver, '//div[@class="unit"]/*[2]')
|
|
|
+ # Common.logger(log_type, crawler).info(f"video_list:{video_list}")
|
|
|
+ # Common.logger(log_type, crawler).info('点击"视频号"分类')
|
|
|
+ # video_list[0].click()
|
|
|
+ # time.sleep(5)
|
|
|
+ #
|
|
|
+ # index = 0
|
|
|
+ # while True:
|
|
|
+ # if index == 30:
|
|
|
+ # Common.logger(log_type, crawler).info(f'"{word}"已抓取视频数:{index}\n')
|
|
|
+ # break
|
|
|
+ # try:
|
|
|
+ # if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
|
|
|
+ # Common.logger(log_type, crawler).info('窗口已销毁\n')
|
|
|
+ # return
|
|
|
+ #
|
|
|
+ # Common.logger(log_type, crawler).info('获取视频列表\n')
|
|
|
+ # video_elements = cls.search_elements(driver, '//wx-view[@class="double-rich double-rich_vertical"]')
|
|
|
+ # if video_elements is None:
|
|
|
+ # Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
|
|
|
+ # return
|
|
|
+ #
|
|
|
+ # video_element_temp = video_elements[index:]
|
|
|
+ # if len(video_element_temp) == 0:
|
|
|
+ # Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
|
|
|
+ # return
|
|
|
+ #
|
|
|
+ # for i, video_element in enumerate(video_element_temp):
|
|
|
+ # if video_element is None:
|
|
|
+ # Common.logger(log_type, crawler).info('到底啦~\n')
|
|
|
+ # return
|
|
|
+ # cls.i += 1
|
|
|
+ # cls.search_elements(driver, '//wx-view[@"double-rich double-rich_vertical"]')
|
|
|
+ #
|
|
|
+ # Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
|
|
|
+ # time.sleep(3)
|
|
|
+ # driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
|
|
|
+ # video_element)
|
|
|
+ # Common.logger(log_type, crawler).info("点击进入视频详情")
|
|
|
+ # video_element.click()
|
|
|
+ # time.sleep(3)
|
|
|
+ # cls.get_video_info(log_type=log_type,
|
|
|
+ # crawler=crawler,
|
|
|
+ # driver=driver,
|
|
|
+ # sheetid=sheetid)
|
|
|
+ # Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
|
|
|
+ # time.sleep(10)
|
|
|
+ # index = index + len(video_element_temp)
|
|
|
+ # except Exception as e:
|
|
|
+ # Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
|
|
|
+ # cls.i = 0
|
|
|
|
|
|
@classmethod
|
|
|
def get_video_info(cls, log_type, crawler, driver: WebDriver, sheetid):
|