罗俊辉 1 rok temu
rodzic
commit
0a45c371ff
1 zmienionych plików z 43 dodań i 42 usunięć
  1. 43 42
      spider/crawler_offline/xiaoniangao_plus.py

+ 43 - 42
spider/crawler_offline/xiaoniangao_plus.py

@@ -30,7 +30,7 @@ class XiaoNianGaoPlusRecommend(object):
     """
 
     def __init__(self, log_type, crawler, env, rule_dict, our_uid):
-        self.mq = None
+        self.mq = MQ(topic_name="topic_crawler_etl_prod")
         self.platform = "xiaoniangaoplus"
         self.download_cnt = 0
         self.element_list = []
@@ -41,7 +41,7 @@ class XiaoNianGaoPlusRecommend(object):
         self.env = env
         self.rule_dict = rule_dict
         self.our_uid = our_uid
-        chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_mac_116/chromedriver"
+        chromedriverExecutable = "/Users/luojunhui/chromedriver/chromedriver_v116/chromedriver"
         print("启动微信")
         # 微信的配置文件
         caps = {
@@ -260,48 +260,49 @@ class XiaoNianGaoPlusRecommend(object):
         print(json.dumps(video_dict, ensure_ascii=False, indent=4))
         Local.logger(platform=self.platform, mode=self.log_type).info(
             "scan_data_" + json.dumps(video_dict, ensure_ascii=False))
-        AliyunLogger(platform=self.platform, mode=self.log_type).logging(
-            code="7000",
-            message="监控到一条视频",
-            data=video_dict
+
+        pipeline = PiaoQuanPipelineTest(
+            platform=self.crawler,
+            mode=self.log_type,
+            item=video_dict,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            trace_id=trace_id,
         )
+        flag = pipeline.process_item()
+        if flag:
+            video_title_element = self.search_elements(
+                f'//*[contains(text(), "{video_title}")]'
+            )
+            if video_title_element is None:
+                return
+            print("点击标题,进入视频详情页")
+            video_url = self.get_video_url(video_title_element)
+            print(video_url)
+            video_url = get_redirect_url(video_url)
+            print(video_url)
+            if video_url is None:
+                self.driver.press_keycode(AndroidKey.BACK)
+                time.sleep(5)
+                return
+            video_dict["video_url"] = video_url
+            video_dict["platform"] = self.crawler
+            video_dict["strategy"] = self.log_type
+            video_dict["out_video_id"] = video_dict["video_id"]
+            video_dict["crawler_rule"] = json.dumps(self.rule_dict)
+            video_dict["user_id"] = random.choice(self.our_uid)
+            video_dict["publish_time"] = video_dict["publish_time_str"]
+            print(json.dumps(video_dict, ensure_ascii=False, indent=4))
+            self.mq.send_msg(video_dict)
+            AliyunLogger(platform=self.platform, mode=self.log_type).logging(
+                code="1002",
+                message="发送视频至 ETL",
+                data=video_dict
+            )
+            self.download_cnt += 1
+            self.driver.press_keycode(AndroidKey.BACK)
+            time.sleep(5)
 
-    #     pipeline = PiaoQuanPipelineTest(
-    #         platform=self.crawler,
-    #         mode=self.log_type,
-    #         item=video_dict,
-    #         rule_dict=self.rule_dict,
-    #         env=self.env,
-    #         trace_id=trace_id,
-    #     )
-    #     flag = pipeline.process_item()
-    #     if flag:
-    #         video_title_element = self.search_elements(
-    #             f'//*[contains(text(), "{video_title}")]'
-    #         )
-    #         if video_title_element is None:
-    #             return
-    #         print("点击标题,进入视频详情页")
-    #         video_url = self.get_video_url(video_title_element)
-    #         print(video_url)
-    #         video_url = get_redirect_url(video_url)
-    #         print(video_url)
-    #         if video_url is None:
-    #             self.driver.press_keycode(AndroidKey.BACK)
-    #             time.sleep(5)
-    #             return
-    #         video_dict["video_url"] = video_url
-    #         video_dict["platform"] = self.crawler
-    #         video_dict["strategy"] = self.log_type
-    #         video_dict["out_video_id"] = video_dict["video_id"]
-    #         video_dict["crawler_rule"] = json.dumps(self.rule_dict)
-    #         video_dict["user_id"] = self.our_uid
-    #         video_dict["publish_time"] = video_dict["publish_time_str"]
-    #         print(json.dumps(video_dict, ensure_ascii=False, indent=4))
-    #         self.download_cnt += 1
-    #         self.driver.press_keycode(AndroidKey.BACK)
-    #         time.sleep(5)
-    #
     def get_video_info(self, video_element):
         try:
             self.get_video_info_2(video_element)