Преглед на файлове

Merge branch 'master' of https://git.yishihui.com/Server/automatic_crawler

zhangyong преди 1 година
родител
ревизия
04a7a7255a
променени са 2 файла, в които са добавени 59 реда и са изтрити 42 реда
  1. 16 0
      app/festival_reminder.py
  2. 43 42
      spider/crawler_offline/xiaoniangao_plus.py

+ 16 - 0
app/festival_reminder.py

@@ -16,6 +16,8 @@ def bot(festival_info, days_count, date_str):
     level = festival_info["level"]
     festival = festival_info["festival"]
     category = festival_info["category"]
+    if festival == "龙年龙月龙日龙时":
+        date_str ="\n" + date_str + " 07:00-09:00"
     url = "https://open.feishu.cn/open-apis/bot/v2/hook/8eda2f73-e6f5-4fcd-944e-5a34579e1049"
     # url = "https://open.feishu.cn/open-apis/bot/v2/hook/f32c0456-847f-41f3-97db-33fcc1616bcd"
     headers = {"Content-Type": "application/json"}
@@ -405,6 +407,20 @@ class FestivalReminder(object):
                         "category": "节气"
                     }
                 ],
+                "2024-04-22": [
+                    {
+                        "level": "P1",
+                        "festival": "龙年龙月龙日龙时",
+                        "category": "热点"
+                    }
+                ],
+                "2024-05-04": [
+                    {
+                        "level": "P1",
+                        "festival": "龙年龙月龙日龙时",
+                        "category": "热点"
+                    }
+                ],
                 "2024-05-05": [
                     {
                         "level": "P0",

+ 43 - 42
spider/crawler_offline/xiaoniangao_plus.py

@@ -30,7 +30,7 @@ class XiaoNianGaoPlusRecommend(object):
     """
 
     def __init__(self, log_type, crawler, env, rule_dict, our_uid):
-        self.mq = None
+        self.mq = MQ(topic_name="topic_crawler_etl_prod")
         self.platform = "xiaoniangaoplus"
         self.download_cnt = 0
         self.element_list = []
@@ -41,7 +41,7 @@ class XiaoNianGaoPlusRecommend(object):
         self.env = env
         self.rule_dict = rule_dict
         self.our_uid = our_uid
-        chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_mac_116/chromedriver"
+        chromedriverExecutable = "/Users/luojunhui/chromedriver/chromedriver_v116/chromedriver"
         print("启动微信")
         # 微信的配置文件
         caps = {
@@ -260,48 +260,49 @@ class XiaoNianGaoPlusRecommend(object):
         print(json.dumps(video_dict, ensure_ascii=False, indent=4))
         Local.logger(platform=self.platform, mode=self.log_type).info(
             "scan_data_" + json.dumps(video_dict, ensure_ascii=False))
-        AliyunLogger(platform=self.platform, mode=self.log_type).logging(
-            code="7000",
-            message="监控到一条视频",
-            data=video_dict
+
+        pipeline = PiaoQuanPipelineTest(
+            platform=self.crawler,
+            mode=self.log_type,
+            item=video_dict,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            trace_id=trace_id,
         )
+        flag = pipeline.process_item()
+        if flag:
+            video_title_element = self.search_elements(
+                f'//*[contains(text(), "{video_title}")]'
+            )
+            if video_title_element is None:
+                return
+            print("点击标题,进入视频详情页")
+            video_url = self.get_video_url(video_title_element)
+            print(video_url)
+            video_url = get_redirect_url(video_url)
+            print(video_url)
+            if video_url is None:
+                self.driver.press_keycode(AndroidKey.BACK)
+                time.sleep(5)
+                return
+            video_dict["video_url"] = video_url
+            video_dict["platform"] = self.crawler
+            video_dict["strategy"] = self.log_type
+            video_dict["out_video_id"] = video_dict["video_id"]
+            video_dict["crawler_rule"] = json.dumps(self.rule_dict)
+            video_dict["user_id"] = random.choice(self.our_uid)
+            video_dict["publish_time"] = video_dict["publish_time_str"]
+            print(json.dumps(video_dict, ensure_ascii=False, indent=4))
+            self.mq.send_msg(video_dict)
+            AliyunLogger(platform=self.platform, mode=self.log_type).logging(
+                code="1002",
+                message="发送视频至 ETL",
+                data=video_dict
+            )
+            self.download_cnt += 1
+            self.driver.press_keycode(AndroidKey.BACK)
+            time.sleep(5)
 
-    #     pipeline = PiaoQuanPipelineTest(
-    #         platform=self.crawler,
-    #         mode=self.log_type,
-    #         item=video_dict,
-    #         rule_dict=self.rule_dict,
-    #         env=self.env,
-    #         trace_id=trace_id,
-    #     )
-    #     flag = pipeline.process_item()
-    #     if flag:
-    #         video_title_element = self.search_elements(
-    #             f'//*[contains(text(), "{video_title}")]'
-    #         )
-    #         if video_title_element is None:
-    #             return
-    #         print("点击标题,进入视频详情页")
-    #         video_url = self.get_video_url(video_title_element)
-    #         print(video_url)
-    #         video_url = get_redirect_url(video_url)
-    #         print(video_url)
-    #         if video_url is None:
-    #             self.driver.press_keycode(AndroidKey.BACK)
-    #             time.sleep(5)
-    #             return
-    #         video_dict["video_url"] = video_url
-    #         video_dict["platform"] = self.crawler
-    #         video_dict["strategy"] = self.log_type
-    #         video_dict["out_video_id"] = video_dict["video_id"]
-    #         video_dict["crawler_rule"] = json.dumps(self.rule_dict)
-    #         video_dict["user_id"] = self.our_uid
-    #         video_dict["publish_time"] = video_dict["publish_time_str"]
-    #         print(json.dumps(video_dict, ensure_ascii=False, indent=4))
-    #         self.download_cnt += 1
-    #         self.driver.press_keycode(AndroidKey.BACK)
-    #         time.sleep(5)
-    #
     def get_video_info(self, video_element):
         try:
             self.get_video_info_2(video_element)