|
@@ -6,6 +6,7 @@ import os
|
|
|
import shutil
|
|
|
import sys
|
|
|
import time
|
|
|
+import uuid
|
|
|
from hashlib import md5
|
|
|
from appium import webdriver
|
|
|
# from appium.webdriver.common.touch_action import TouchAction
|
|
@@ -13,7 +14,10 @@ from appium.webdriver.extensions.android.nativekey import AndroidKey
|
|
|
from appium.webdriver.webdriver import WebDriver
|
|
|
from selenium.common import NoSuchElementException
|
|
|
from selenium.webdriver.common.by import By
|
|
|
+
|
|
|
+
|
|
|
sys.path.append(os.getcwd())
|
|
|
+from common import AliyunLogger, PiaoQuanPipeline
|
|
|
from common.feishu import Feishu
|
|
|
from common.common import Common
|
|
|
from common.publish import Publish
|
|
@@ -87,23 +91,54 @@ class JixiangxingfuRecommend:
|
|
|
# "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
|
|
|
'browserName': ''
|
|
|
}
|
|
|
- driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
|
|
|
+ try:
|
|
|
+ driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
|
|
|
+ except:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3002",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="appium 启动异常"
|
|
|
+ )
|
|
|
+ return \
|
|
|
driver.implicitly_wait(30)
|
|
|
# 向下滑动页面,展示出小程序选择面板
|
|
|
for i in range(120):
|
|
|
try:
|
|
|
# 发现微信消息 TAB,代表微信已启动成功
|
|
|
if driver.find_elements(By.ID, 'com.tencent.mm:id/f2s'):
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="微信启动成功"
|
|
|
+ )
|
|
|
break
|
|
|
# 发现并关闭系统菜单栏
|
|
|
elif driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view'):
|
|
|
Common.logger(log_type, crawler).info('发现并关闭系统下拉菜单栏')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="发现并关闭系统下拉菜单栏"
|
|
|
+ )
|
|
|
driver.find_element(By.ID, 'com.android.systemui:id/dismiss_view').click()
|
|
|
else:
|
|
|
pass
|
|
|
except NoSuchElementException:
|
|
|
time.sleep(1)
|
|
|
Common.logger(log_type, crawler).info('下滑,展示小程序选择面板')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="下滑,展示小程序选择面板"
|
|
|
+ )
|
|
|
size = driver.get_window_size()
|
|
|
driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2), int(size['width'] * 0.5),
|
|
|
int(size['height'] * 0.8), 200)
|
|
@@ -111,7 +146,13 @@ class JixiangxingfuRecommend:
|
|
|
time.sleep(5)
|
|
|
Common.logger(log_type, crawler).info('打开小程序"祝福每天好运来相伴"')
|
|
|
driver.find_elements(By.XPATH, '//*[@text="祝福每天好运来相伴"]')[-1].click()
|
|
|
-
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='打开小程序"祝福每天好运来相伴"成功'
|
|
|
+ )
|
|
|
# 获取视频信息
|
|
|
time.sleep(10)
|
|
|
# time.sleep(60)
|
|
@@ -226,10 +267,6 @@ class JixiangxingfuRecommend:
|
|
|
@classmethod
|
|
|
def get_videoList(cls, log_type, crawler, driver, env):
|
|
|
driver.implicitly_wait(20)
|
|
|
- # # 鼠标左键点击, 1为x坐标, 2为y坐标
|
|
|
- # Common.logger(log_type, crawler).info('关闭广告')
|
|
|
- # size = driver.get_window_size()
|
|
|
- # TouchAction(driver).tap(x=int(size['width'] * 0.5), y=int(size['height'] * 0.1)).perform()
|
|
|
driver.press_keycode(AndroidKey.BACK)
|
|
|
# 切换到小程序
|
|
|
cls.check_to_applet(log_type, crawler, driver)
|
|
@@ -240,33 +277,80 @@ class JixiangxingfuRecommend:
|
|
|
try:
|
|
|
if cls.search_elements(driver, '//wx-view[@class="list-container"]') is None:
|
|
|
Common.logger(log_type, crawler).info('窗口已销毁\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='窗口已销毁'
|
|
|
+ )
|
|
|
return
|
|
|
|
|
|
Common.logger(log_type, crawler).info('获取视频列表\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='获取视频列表'
|
|
|
+ )
|
|
|
video_elements = cls.search_elements(driver, '//wx-view[@class="listCardVideo--video-title"]')
|
|
|
if video_elements is None:
|
|
|
Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f'video_elements:{video_elements}'
|
|
|
+ )
|
|
|
return
|
|
|
|
|
|
video_element_temp = video_elements[index:]
|
|
|
if len(video_element_temp) == 0:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='到底啦~~~~~~~~~~~~~\n'
|
|
|
+ )
|
|
|
Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
|
|
|
return
|
|
|
|
|
|
for i, video_element in enumerate(video_element_temp):
|
|
|
if video_element is None:
|
|
|
Common.logger(log_type, crawler).info('到底啦~\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message='到底啦~~~~~~~~~~~~~\n'
|
|
|
+ )
|
|
|
return
|
|
|
cls.i += 1
|
|
|
cls.search_elements(driver, '//wx-view[@class="listCardVideo--video-title"]')
|
|
|
|
|
|
Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f'拖动"视频"列表第{cls.i}个至屏幕中间'
|
|
|
+ )
|
|
|
time.sleep(3)
|
|
|
driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
|
|
|
-
|
|
|
- # video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="video-title"]')[cls.i - 1].text
|
|
|
- # cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="video-cover-img"]')[cls.i - 1].get_attribute('src')
|
|
|
- # play_cnt = video_element.find_elements(By.XPATH, '//wx-view[@class="video-play-num"]')[cls.i - 1].text
|
|
|
+ trace_id = crawler + str(uuid.uuid1())
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1001",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ trace_id=trace_id,
|
|
|
+ message="扫描到一条视频",
|
|
|
+ )
|
|
|
|
|
|
video_title = video_element.find_elements(By.XPATH, '//wx-view[@class="listCardVideo--video-title"]')[index+i].text
|
|
|
cover_url = video_element.find_elements(By.XPATH, '//wx-image[@class="listCardVideo--cover-img"]')[index+i].get_attribute('src')
|
|
@@ -274,41 +358,56 @@ class JixiangxingfuRecommend:
|
|
|
|
|
|
if "万" in play_cnt:
|
|
|
play_cnt = int(play_cnt.split("万")[0])*10000
|
|
|
- # play_btn = video_element.find_elements(By.XPATH, '//wx-image[@class="video-play-img"]')[cls.i - 1]
|
|
|
out_video_id = md5(video_title.encode('utf8')).hexdigest()
|
|
|
video_dict = {
|
|
|
'video_title': video_title,
|
|
|
'video_id': out_video_id,
|
|
|
+ 'out_video_id': out_video_id,
|
|
|
'play_cnt': play_cnt,
|
|
|
'comment_cnt': 0,
|
|
|
'like_cnt': 0,
|
|
|
'share_cnt': 0,
|
|
|
'publish_time_stamp': int(time.time()),
|
|
|
'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
|
|
|
+ 'update_time_stamp': int(time.time()),
|
|
|
'user_name': "jixiangxingfu",
|
|
|
'user_id': "jixiangxingfu",
|
|
|
'avatar_url': cover_url,
|
|
|
'cover_url': cover_url,
|
|
|
'session': f"jixiangxingfu-{int(time.time())}"
|
|
|
}
|
|
|
- for k, v in video_dict.items():
|
|
|
- Common.logger(log_type, crawler).info(f"{k}:{v}")
|
|
|
-
|
|
|
- if video_title is None or cover_url is None:
|
|
|
- Common.logger(log_type, crawler).info("无效视频\n")
|
|
|
- elif any(str(word) if str(word) in video_title else False for word in
|
|
|
- cls.jixiangxingfu_config(log_type, crawler, "filter", env)) is True:
|
|
|
- Common.logger(log_type, crawler).info('已中过滤词\n')
|
|
|
- elif cls.repeat_out_video_id(log_type, crawler, out_video_id, env) != 0:
|
|
|
- Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
- else:
|
|
|
+ rule_dict = {}
|
|
|
+ pipeline = PiaoQuanPipeline(
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ item=video_dict,
|
|
|
+ rule_dict=rule_dict,
|
|
|
+ env=env,
|
|
|
+ trace_id=trace_id
|
|
|
+ )
|
|
|
+ flag = pipeline.process_item()
|
|
|
+ if flag:
|
|
|
video_url = cls.get_video_url(log_type, crawler, driver, video_element)
|
|
|
|
|
|
if video_url is None:
|
|
|
Common.logger(log_type, crawler).info("未获取到视频播放地址\n")
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="未获取到视频播放地址"
|
|
|
+ )
|
|
|
driver.press_keycode(AndroidKey.BACK)
|
|
|
elif cls.repeat_video_url(log_type, crawler, video_url, env) != 0:
|
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="视频已下载"
|
|
|
+ )
|
|
|
driver.press_keycode(AndroidKey.BACK)
|
|
|
else:
|
|
|
video_dict["video_url"] = video_url
|
|
@@ -316,9 +415,23 @@ class JixiangxingfuRecommend:
|
|
|
# driver.press_keycode(AndroidKey.BACK)
|
|
|
cls.download_publish(log_type, crawler, video_dict, env, driver)
|
|
|
Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="1000",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message="已抓取完一组,休眠 5 秒\n"
|
|
|
+ )
|
|
|
time.sleep(10)
|
|
|
index = index + len(video_element_temp)
|
|
|
except Exception as e:
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="3001",
|
|
|
+ platform=JixiangxingfuRecommend.platform,
|
|
|
+ mode=log_type,
|
|
|
+ env=env,
|
|
|
+ message=f"get_videoList:{e}\n"
|
|
|
+ )
|
|
|
Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
|
|
|
cls.i = 0
|
|
|
return
|