Browse Source

西瓜代码修改

罗俊辉 1 year ago
parent
commit
5c6e5c4774
1 changed files with 32 additions and 14 deletions
  1. 32 14
      xigua/xigua_author/xigua_author.py

+ 32 - 14
xigua/xigua_author/xigua_author.py

@@ -654,20 +654,20 @@ class XiGuaAuthor:
 
     def get_author_list(self):
         # 每轮只抓取定量的数据,到达数量后自己退出
-        max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
+        # max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
         for user_dict in self.user_list:
-            if self.download_count <= max_count:
-                self.get_video_list(user_dict)
-                time.sleep(random.randint(1, 15))
-            else:
-                AliyunLogger.logging(
-                    code="2000",
-                    platform=self.platform,
-                    mode=self.mode,
-                    env=self.env,
-                    message="本轮已经抓取足够数量的视频,已经自动退出",
-                )
-                return
+            # if self.download_count <= max_count:
+            self.get_video_list(user_dict)
+            #     time.sleep(random.randint(1, 15))
+            # else:
+            #     AliyunLogger.logging(
+            #         code="2000",
+            #         platform=self.platform,
+            #         mode=self.mode,
+            #         env=self.env,
+            #         message="本轮已经抓取足够数量的视频,已经自动退出",
+            #     )
+            #     return
 
     def get_video_list(self, user_dict):
         offset = 0
@@ -728,7 +728,9 @@ class XiGuaAuthor:
                             env=self.env,
                             message="扫描到一条视频",
                         )
-                        self.process_video_obj(video_obj, user_dict)
+                        date_flag = self.process_video_obj(video_obj, user_dict)
+                        if not date_flag:
+                            return
                     except Exception as e:
                         AliyunLogger.logging(
                             code="3000",
@@ -766,6 +768,21 @@ class XiGuaAuthor:
         video_dict["publish_time"] = video_dict["publish_time_str"]
         video_dict["strategy_type"] = self.mode
         video_dict["update_time_stamp"] = int(time.time())
+        if (
+                int(time.time()) - video_dict['publish_time_stamp']
+                > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
+        ):
+            AliyunLogger.logging(
+                code="2004",
+                platform=self.platform,
+                mode=self.mode,
+                env=self.env,
+                data=video_dict,
+                message="发布时间超过{}天".format(
+                    int(self.rule_dict.get("period", {}).get("max", 1000))
+                ),
+            )
+            return False
         pipeline = PiaoQuanPipeline(
             platform=self.platform,
             mode=self.mode,
@@ -788,6 +805,7 @@ class XiGuaAuthor:
                 trace_id=trace_id,
                 message="成功发送 MQ 至 ETL",
             )
+        return True
 
     def get_video_info(self, item_id, trace_id):
         url = "https://www.ixigua.com/api/mixVideo/information?"