|
@@ -3,6 +3,7 @@
|
|
# @Time: 2023/12/18
|
|
# @Time: 2023/12/18
|
|
import json
|
|
import json
|
|
import os
|
|
import os
|
|
|
|
+import random
|
|
import sys
|
|
import sys
|
|
import time
|
|
import time
|
|
import uuid
|
|
import uuid
|
|
@@ -19,15 +20,19 @@ sys.path.append(os.getcwd())
|
|
from application.functions import get_redirect_url
|
|
from application.functions import get_redirect_url
|
|
from application.pipeline import PiaoQuanPipelineTest
|
|
from application.pipeline import PiaoQuanPipelineTest
|
|
from application.common.messageQueue import MQ
|
|
from application.common.messageQueue import MQ
|
|
-from application.common.log import AliyunLogger
|
|
|
|
|
|
+from application.common.log import Local, AliyunLogger
|
|
|
|
|
|
|
|
|
|
class XiaoNianGaoPlusRecommend(object):
|
|
class XiaoNianGaoPlusRecommend(object):
|
|
|
|
+ """
|
|
|
|
+ 小年糕+线下爬虫
|
|
|
|
+ """
|
|
|
|
+
|
|
def __init__(self, log_type, crawler, env, rule_dict, our_uid):
|
|
def __init__(self, log_type, crawler, env, rule_dict, our_uid):
|
|
self.mq = None
|
|
self.mq = None
|
|
self.platform = "xiaoniangaoplus"
|
|
self.platform = "xiaoniangaoplus"
|
|
self.download_cnt = 0
|
|
self.download_cnt = 0
|
|
- self.element_list = [ ]
|
|
|
|
|
|
+ self.element_list = []
|
|
self.count = 0
|
|
self.count = 0
|
|
self.swipe_count = 0
|
|
self.swipe_count = 0
|
|
self.log_type = log_type
|
|
self.log_type = log_type
|
|
@@ -35,7 +40,7 @@ class XiaoNianGaoPlusRecommend(object):
|
|
self.env = env
|
|
self.env = env
|
|
self.rule_dict = rule_dict
|
|
self.rule_dict = rule_dict
|
|
self.our_uid = our_uid
|
|
self.our_uid = our_uid
|
|
- chromedriverExecutable = "/usr/bin/chromedriver"
|
|
|
|
|
|
+ chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_mac_116/chromedriver"
|
|
print("启动微信")
|
|
print("启动微信")
|
|
# 微信的配置文件
|
|
# 微信的配置文件
|
|
caps = {
|
|
caps = {
|
|
@@ -43,7 +48,7 @@ class XiaoNianGaoPlusRecommend(object):
|
|
"devicesName": "Android",
|
|
"devicesName": "Android",
|
|
"appPackage": "com.tencent.mm",
|
|
"appPackage": "com.tencent.mm",
|
|
"appActivity": ".ui.LauncherUI",
|
|
"appActivity": ".ui.LauncherUI",
|
|
- "autoGrantPermissions": "true",
|
|
|
|
|
|
+ "autoGrantPermissions": True,
|
|
"noReset": True,
|
|
"noReset": True,
|
|
"resetkeyboard": True,
|
|
"resetkeyboard": True,
|
|
"unicodekeyboard": True,
|
|
"unicodekeyboard": True,
|
|
@@ -58,7 +63,7 @@ class XiaoNianGaoPlusRecommend(object):
|
|
"chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
|
|
"chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
|
|
}
|
|
}
|
|
try:
|
|
try:
|
|
- self.driver = webdriver.Remote("http://localhost:4750/wd/hub", caps)
|
|
|
|
|
|
+ self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
print(e)
|
|
print(e)
|
|
return
|
|
return
|
|
@@ -245,42 +250,51 @@ class XiaoNianGaoPlusRecommend(object):
|
|
"cover_url": cover_url,
|
|
"cover_url": cover_url,
|
|
"session": f"xiaoniangao-{int(time.time())}",
|
|
"session": f"xiaoniangao-{int(time.time())}",
|
|
}
|
|
}
|
|
- pipeline = PiaoQuanPipelineTest(
|
|
|
|
- platform=self.crawler,
|
|
|
|
- mode=self.log_type,
|
|
|
|
- item=video_dict,
|
|
|
|
- rule_dict=self.rule_dict,
|
|
|
|
- env=self.env,
|
|
|
|
- trace_id=trace_id,
|
|
|
|
|
|
+ print(json.dumps(video_dict, ensure_ascii=False, indent=4))
|
|
|
|
+ Local.logger(platform=self.platform, mode=self.log_type).info(
|
|
|
|
+ "scan_data_" + json.dumps(video_dict, ensure_ascii=False))
|
|
|
|
+ AliyunLogger(platform=self.platform, mode=self.log_type).logging(
|
|
|
|
+ code="7000",
|
|
|
|
+ message="监控到一条视频",
|
|
|
|
+ data=video_dict
|
|
)
|
|
)
|
|
- flag = pipeline.process_item()
|
|
|
|
- if flag:
|
|
|
|
- video_title_element = self.search_elements(
|
|
|
|
- f'//*[contains(text(), "{video_title}")]'
|
|
|
|
- )
|
|
|
|
- if video_title_element is None:
|
|
|
|
- return
|
|
|
|
- print("点击标题,进入视频详情页")
|
|
|
|
- video_url = self.get_video_url(video_title_element)
|
|
|
|
- print(video_url)
|
|
|
|
- video_url = get_redirect_url(video_url)
|
|
|
|
- print(video_url)
|
|
|
|
- if video_url is None:
|
|
|
|
- self.driver.press_keycode(AndroidKey.BACK)
|
|
|
|
- time.sleep(5)
|
|
|
|
- return
|
|
|
|
- video_dict["video_url"] = video_url
|
|
|
|
- video_dict["platform"] = self.crawler
|
|
|
|
- video_dict["strategy"] = self.log_type
|
|
|
|
- video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
|
- video_dict["crawler_rule"] = json.dumps(self.rule_dict)
|
|
|
|
- video_dict["user_id"] = self.our_uid
|
|
|
|
- video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
|
- print(json.dumps(video_dict, ensure_ascii=False, indent=4))
|
|
|
|
- self.download_cnt += 1
|
|
|
|
- self.driver.press_keycode(AndroidKey.BACK)
|
|
|
|
- time.sleep(5)
|
|
|
|
|
|
|
|
|
|
+ # pipeline = PiaoQuanPipelineTest(
|
|
|
|
+ # platform=self.crawler,
|
|
|
|
+ # mode=self.log_type,
|
|
|
|
+ # item=video_dict,
|
|
|
|
+ # rule_dict=self.rule_dict,
|
|
|
|
+ # env=self.env,
|
|
|
|
+ # trace_id=trace_id,
|
|
|
|
+ # )
|
|
|
|
+ # flag = pipeline.process_item()
|
|
|
|
+ # if flag:
|
|
|
|
+ # video_title_element = self.search_elements(
|
|
|
|
+ # f'//*[contains(text(), "{video_title}")]'
|
|
|
|
+ # )
|
|
|
|
+ # if video_title_element is None:
|
|
|
|
+ # return
|
|
|
|
+ # print("点击标题,进入视频详情页")
|
|
|
|
+ # video_url = self.get_video_url(video_title_element)
|
|
|
|
+ # print(video_url)
|
|
|
|
+ # video_url = get_redirect_url(video_url)
|
|
|
|
+ # print(video_url)
|
|
|
|
+ # if video_url is None:
|
|
|
|
+ # self.driver.press_keycode(AndroidKey.BACK)
|
|
|
|
+ # time.sleep(5)
|
|
|
|
+ # return
|
|
|
|
+ # video_dict["video_url"] = video_url
|
|
|
|
+ # video_dict["platform"] = self.crawler
|
|
|
|
+ # video_dict["strategy"] = self.log_type
|
|
|
|
+ # video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
|
+ # video_dict["crawler_rule"] = json.dumps(self.rule_dict)
|
|
|
|
+ # video_dict["user_id"] = self.our_uid
|
|
|
|
+ # video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
|
+ # print(json.dumps(video_dict, ensure_ascii=False, indent=4))
|
|
|
|
+ # self.download_cnt += 1
|
|
|
|
+ # self.driver.press_keycode(AndroidKey.BACK)
|
|
|
|
+ # time.sleep(5)
|
|
|
|
+ #
|
|
def get_video_info(self, video_element):
|
|
def get_video_info(self, video_element):
|
|
try:
|
|
try:
|
|
self.get_video_info_2(video_element)
|
|
self.get_video_info_2(video_element)
|
|
@@ -289,12 +303,16 @@ class XiaoNianGaoPlusRecommend(object):
|
|
print(f"抓取单条视频异常:{e}\n")
|
|
print(f"抓取单条视频异常:{e}\n")
|
|
|
|
|
|
def get_videoList(self):
|
|
def get_videoList(self):
|
|
|
|
+ """
|
|
|
|
+ 获取视频列表
|
|
|
|
+ :return:
|
|
|
|
+ """
|
|
|
|
+ # while True:
|
|
self.driver.implicitly_wait(20)
|
|
self.driver.implicitly_wait(20)
|
|
# 切换到 web_view
|
|
# 切换到 web_view
|
|
self.check_to_applet(xpath='//*[@class="tab-bar--tab tab-bar--tab-selected"]')
|
|
self.check_to_applet(xpath='//*[@class="tab-bar--tab tab-bar--tab-selected"]')
|
|
print("切换到 webview 成功")
|
|
print("切换到 webview 成功")
|
|
time.sleep(1)
|
|
time.sleep(1)
|
|
- page = 0
|
|
|
|
if self.search_elements('//*[@class="list-list--list"]') is None:
|
|
if self.search_elements('//*[@class="list-list--list"]') is None:
|
|
print("窗口已销毁")
|
|
print("窗口已销毁")
|
|
self.count = 0
|
|
self.count = 0
|
|
@@ -308,8 +326,8 @@ class XiaoNianGaoPlusRecommend(object):
|
|
element = self.parse_detail(i)
|
|
element = self.parse_detail(i)
|
|
self.get_video_info(element)
|
|
self.get_video_info(element)
|
|
self.swipe_up()
|
|
self.swipe_up()
|
|
- time.sleep(1)
|
|
|
|
- if self.swipe_count > 100:
|
|
|
|
- return
|
|
|
|
- print("已抓取完一组,休眠 5 秒\n")
|
|
|
|
- time.sleep(5)
|
|
|
|
|
|
+ time.sleep(random.randint(1, 5))
|
|
|
|
+ # if self.swipe_count > 100:
|
|
|
|
+ # return
|
|
|
|
+ print("已抓取完一组,休眠 600 秒\n")
|
|
|
|
+ # time.sleep(600)
|