|
@@ -654,20 +654,20 @@ class XiGuaAuthor:
|
|
|
|
|
|
def get_author_list(self):
|
|
|
# 每轮只抓取定量的数据,到达数量后自己退出
|
|
|
- max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
|
|
|
+ # max_count = int(self.rule_dict.get("videos_cnt", {}).get("min", 300))
|
|
|
for user_dict in self.user_list:
|
|
|
- if self.download_count <= max_count:
|
|
|
- self.get_video_list(user_dict)
|
|
|
- time.sleep(random.randint(1, 15))
|
|
|
- else:
|
|
|
- AliyunLogger.logging(
|
|
|
- code="2000",
|
|
|
- platform=self.platform,
|
|
|
- mode=self.mode,
|
|
|
- env=self.env,
|
|
|
- message="本轮已经抓取足够数量的视频,已经自动退出",
|
|
|
- )
|
|
|
- return
|
|
|
+ # if self.download_count <= max_count:
|
|
|
+ self.get_video_list(user_dict)
|
|
|
+ # time.sleep(random.randint(1, 15))
|
|
|
+ # else:
|
|
|
+ # AliyunLogger.logging(
|
|
|
+ # code="2000",
|
|
|
+ # platform=self.platform,
|
|
|
+ # mode=self.mode,
|
|
|
+ # env=self.env,
|
|
|
+ # message="本轮已经抓取足够数量的视频,已经自动退出",
|
|
|
+ # )
|
|
|
+ # return
|
|
|
|
|
|
def get_video_list(self, user_dict):
|
|
|
offset = 0
|
|
@@ -728,7 +728,9 @@ class XiGuaAuthor:
|
|
|
env=self.env,
|
|
|
message="扫描到一条视频",
|
|
|
)
|
|
|
- self.process_video_obj(video_obj, user_dict)
|
|
|
+ date_flag = self.process_video_obj(video_obj, user_dict)
|
|
|
+ if not date_flag:
|
|
|
+ return
|
|
|
except Exception as e:
|
|
|
AliyunLogger.logging(
|
|
|
code="3000",
|
|
@@ -766,6 +768,21 @@ class XiGuaAuthor:
|
|
|
video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
video_dict["strategy_type"] = self.mode
|
|
|
video_dict["update_time_stamp"] = int(time.time())
|
|
|
+ if (
|
|
|
+ int(time.time()) - video_dict['publish_time_stamp']
|
|
|
+ > 3600 * 24 * int(self.rule_dict.get("period", {}).get("max", 1000))
|
|
|
+ ):
|
|
|
+ AliyunLogger.logging(
|
|
|
+ code="2004",
|
|
|
+ platform=self.platform,
|
|
|
+ mode=self.mode,
|
|
|
+ env=self.env,
|
|
|
+ data=video_dict,
|
|
|
+ message="发布时间超过{}天".format(
|
|
|
+ int(self.rule_dict.get("period", {}).get("max", 1000))
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ return False
|
|
|
pipeline = PiaoQuanPipeline(
|
|
|
platform=self.platform,
|
|
|
mode=self.mode,
|
|
@@ -788,6 +805,7 @@ class XiGuaAuthor:
|
|
|
trace_id=trace_id,
|
|
|
message="成功发送 MQ 至 ETL",
|
|
|
)
|
|
|
+ return True
|
|
|
|
|
|
def get_video_info(self, item_id, trace_id):
|
|
|
url = "https://www.ixigua.com/api/mixVideo/information?"
|