luojunhui 2 달 전
부모
커밋
be93c3c816
2개의 변경된 파일12개의 추가작업 그리고 2개의 파일을 삭제
  1. 3 1
      applications/tasks/crawler_tasks/crawler_account_manager.py
  2. 9 1
      applications/tasks/monitor_tasks/gzh_article_monitor.py

+ 3 - 1
applications/tasks/crawler_tasks/crawler_account_manager.py

@@ -115,7 +115,9 @@ class WeixinAccountManager(CrawlerAccountManager):
 
         # 计算发文频率
         publish_times = pd.to_numeric(dataframe["publish_time"], errors="coerce")
-        publish_times = publish_times.replace([float("inf"), float("-inf")], pd.NA).dropna()
+        publish_times = publish_times.replace(
+            [float("inf"), float("-inf")], pd.NA
+        ).dropna()
         if len(publish_times) >= 2:
             dates = pd.to_datetime(publish_times, unit="s").dt.normalize()
             days_delta = max(int((dates.max() - dates.min()).days) + 1, 1)

+ 9 - 1
applications/tasks/monitor_tasks/gzh_article_monitor.py

@@ -273,7 +273,15 @@ class InnerGzhArticlesMonitor(MonitorConst):
             return response
 
     async def check_each_article(self, article: dict):
-        gh_id, account_name, title, url, wx_sn, publish_date = article
+        # gh_id, account_name, title, url, wx_sn, publish_date = article
+        gh_id, account_name, title, url, wx_sn, publish_date = (
+            article["ghId"],
+            article["accountName"],
+            article["title"],
+            article["ContentUrl"],
+            article["wx_sn"],
+            article["publish_timestamp"],
+        )
         try:
             response = await get_article_detail(url, is_cache=False)
             response_code = response["code"]