|
@@ -30,7 +30,7 @@ class XiaoNianGaoPlusRecommend(object):
|
|
|
"""
|
|
|
|
|
|
def __init__(self, log_type, crawler, env, rule_dict, our_uid):
|
|
|
- self.mq = None
|
|
|
+ self.mq = MQ(topic_name="topic_crawler_etl_prod")
|
|
|
self.platform = "xiaoniangaoplus"
|
|
|
self.download_cnt = 0
|
|
|
self.element_list = []
|
|
@@ -41,7 +41,7 @@ class XiaoNianGaoPlusRecommend(object):
|
|
|
self.env = env
|
|
|
self.rule_dict = rule_dict
|
|
|
self.our_uid = our_uid
|
|
|
- chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_mac_116/chromedriver"
|
|
|
+ chromedriverExecutable = "/Users/luojunhui/chromedriver/chromedriver_v116/chromedriver"
|
|
|
print("启动微信")
|
|
|
# 微信的配置文件
|
|
|
caps = {
|
|
@@ -260,48 +260,49 @@ class XiaoNianGaoPlusRecommend(object):
|
|
|
print(json.dumps(video_dict, ensure_ascii=False, indent=4))
|
|
|
Local.logger(platform=self.platform, mode=self.log_type).info(
|
|
|
"scan_data_" + json.dumps(video_dict, ensure_ascii=False))
|
|
|
- AliyunLogger(platform=self.platform, mode=self.log_type).logging(
|
|
|
- code="7000",
|
|
|
- message="监控到一条视频",
|
|
|
- data=video_dict
|
|
|
+
|
|
|
+ pipeline = PiaoQuanPipelineTest(
|
|
|
+ platform=self.crawler,
|
|
|
+ mode=self.log_type,
|
|
|
+ item=video_dict,
|
|
|
+ rule_dict=self.rule_dict,
|
|
|
+ env=self.env,
|
|
|
+ trace_id=trace_id,
|
|
|
)
|
|
|
+ flag = pipeline.process_item()
|
|
|
+ if flag:
|
|
|
+ video_title_element = self.search_elements(
|
|
|
+ f'//*[contains(text(), "{video_title}")]'
|
|
|
+ )
|
|
|
+ if video_title_element is None:
|
|
|
+ return
|
|
|
+ print("点击标题,进入视频详情页")
|
|
|
+ video_url = self.get_video_url(video_title_element)
|
|
|
+ print(video_url)
|
|
|
+ video_url = get_redirect_url(video_url)
|
|
|
+ print(video_url)
|
|
|
+ if video_url is None:
|
|
|
+ self.driver.press_keycode(AndroidKey.BACK)
|
|
|
+ time.sleep(5)
|
|
|
+ return
|
|
|
+ video_dict["video_url"] = video_url
|
|
|
+ video_dict["platform"] = self.crawler
|
|
|
+ video_dict["strategy"] = self.log_type
|
|
|
+ video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
+ video_dict["crawler_rule"] = json.dumps(self.rule_dict)
|
|
|
+ video_dict["user_id"] = random.choice(self.our_uid)
|
|
|
+ video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
+ print(json.dumps(video_dict, ensure_ascii=False, indent=4))
|
|
|
+ self.mq.send_msg(video_dict)
|
|
|
+ AliyunLogger(platform=self.platform, mode=self.log_type).logging(
|
|
|
+ code="1002",
|
|
|
+ message="发送视频至 ETL",
|
|
|
+ data=video_dict
|
|
|
+ )
|
|
|
+ self.download_cnt += 1
|
|
|
+ self.driver.press_keycode(AndroidKey.BACK)
|
|
|
+ time.sleep(5)
|
|
|
|
|
|
- # pipeline = PiaoQuanPipelineTest(
|
|
|
- # platform=self.crawler,
|
|
|
- # mode=self.log_type,
|
|
|
- # item=video_dict,
|
|
|
- # rule_dict=self.rule_dict,
|
|
|
- # env=self.env,
|
|
|
- # trace_id=trace_id,
|
|
|
- # )
|
|
|
- # flag = pipeline.process_item()
|
|
|
- # if flag:
|
|
|
- # video_title_element = self.search_elements(
|
|
|
- # f'//*[contains(text(), "{video_title}")]'
|
|
|
- # )
|
|
|
- # if video_title_element is None:
|
|
|
- # return
|
|
|
- # print("点击标题,进入视频详情页")
|
|
|
- # video_url = self.get_video_url(video_title_element)
|
|
|
- # print(video_url)
|
|
|
- # video_url = get_redirect_url(video_url)
|
|
|
- # print(video_url)
|
|
|
- # if video_url is None:
|
|
|
- # self.driver.press_keycode(AndroidKey.BACK)
|
|
|
- # time.sleep(5)
|
|
|
- # return
|
|
|
- # video_dict["video_url"] = video_url
|
|
|
- # video_dict["platform"] = self.crawler
|
|
|
- # video_dict["strategy"] = self.log_type
|
|
|
- # video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
- # video_dict["crawler_rule"] = json.dumps(self.rule_dict)
|
|
|
- # video_dict["user_id"] = self.our_uid
|
|
|
- # video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
- # print(json.dumps(video_dict, ensure_ascii=False, indent=4))
|
|
|
- # self.download_cnt += 1
|
|
|
- # self.driver.press_keycode(AndroidKey.BACK)
|
|
|
- # time.sleep(5)
|
|
|
- #
|
|
|
def get_video_info(self, video_element):
|
|
|
try:
|
|
|
self.get_video_info_2(video_element)
|