|
|
@@ -238,7 +238,15 @@ class CrawlerGzhAccountArticles(CrawlerGzhBaseStrategy):
|
|
|
"""crawler single account"""
|
|
|
current_cursor = None
|
|
|
gh_id = account["gh_id"]
|
|
|
- latest_timestamp = account["latest_update_time"].timestamp()
|
|
|
+ # latest_timestamp = account["latest_update_time"].timestamp()
|
|
|
+ latest_update_time = account["latest_update_time"]
|
|
|
+ if latest_update_time:
|
|
|
+ latest_timestamp = latest_update_time.timestamp()
|
|
|
+ else:
|
|
|
+ latest_timestamp = self.DEFAULT_TIMESTAMP
|
|
|
+
|
|
|
+ print("最新更新时间:", timestamp_to_str(latest_timestamp))
|
|
|
+
|
|
|
while True:
|
|
|
# fetch response from weixin
|
|
|
response = await get_article_list_from_account(
|
|
|
@@ -246,6 +254,7 @@ class CrawlerGzhAccountArticles(CrawlerGzhBaseStrategy):
|
|
|
)
|
|
|
msg_list = response.get("data", {}).get("data")
|
|
|
if not msg_list:
|
|
|
+ print("No msg, Please check your data")
|
|
|
break
|
|
|
|
|
|
# process current page
|
|
|
@@ -256,7 +265,7 @@ class CrawlerGzhAccountArticles(CrawlerGzhBaseStrategy):
|
|
|
last_time_stamp_in_this_msg = last_article_in_this_page["AppMsg"][
|
|
|
"BaseInfo"
|
|
|
]["UpdateTime"]
|
|
|
- if last_time_stamp_in_this_msg > latest_timestamp:
|
|
|
+ if last_time_stamp_in_this_msg <= latest_timestamp:
|
|
|
await self.update_account_latest_timestamp(gh_id)
|
|
|
break
|
|
|
|