luojunhui пре 1 месец
родитељ
комит
57fe09b4d5

+ 19 - 4
applications/tasks/crawler_tasks/crawler_account_manager.py

@@ -1,6 +1,7 @@
 import time
 import math
 
+from datetime import datetime
 from typing import Optional, List, Dict
 from pandas import DataFrame
 from scipy import stats
@@ -32,9 +33,18 @@ class CrawlerAccountManager(CrawlerAccountManagerConst):
         """把 None / NaN / Inf 统一替换成指定默认值"""
         return default if x is None or not math.isfinite(x) else float(x)
 
-    async def get_crawling_accounts(self, platform):
+    async def get_crawling_accounts(self, platform) -> List[str]:
         """获取抓取账号信息"""
-        pass
+        match platform:
+            case "weixin":
+                query = f"""
+                    select gh_id as 'account_id' from long_articles_accounts where is_using = 1;
+                """
+            case _:
+                raise RuntimeError(f"Unknown platform: {platform}")
+
+        account_list = await self.pool.async_fetch(query)
+        return [i['account_id'] for i in account_list]
 
 
 class WeixinAccountManager(CrawlerAccountManager):
@@ -72,7 +82,7 @@ class WeixinAccountManager(CrawlerAccountManager):
         query = f"""
             update long_articles_accounts 
             set history_publish_frequency = %s, history_score_ci_lower = %s, 
-                recent_publish_frequency= %s, recent_score_ci_lower = %s
+                recent_publish_frequency= %s, recent_score_ci_lower = %s, update_date = %s
             where gh_id = %s;
         """
         return await self.pool.async_save(
@@ -82,6 +92,7 @@ class WeixinAccountManager(CrawlerAccountManager):
                 recently_info["publish_frequency"],
                 recently_info["ci_lower"],
                 account_id,
+                datetime.today().strftime("%Y-%m-%d"),
             )
         )
 
@@ -123,7 +134,11 @@ class WeixinAccountManager(CrawlerAccountManager):
         recent_30_days_analysis = self.analysis_dataframe(recent_30_days_df)
         await self.update_account_stat_detail(account_id, history_articles_analysis, recent_30_days_analysis)
 
-    async def deal(self, account_id_list: List[str]):
+    async def deal(self, platform, account_id_list: Optional[List[str]] = None) -> None:
+        """deal"""
+        if not account_id_list:
+            account_id_list = await self.get_crawling_accounts(platform=platform)
+
         for account_id in account_id_list:
             await self.analysis_single_account(account_id)
 

+ 2 - 3
applications/tasks/task_handler.py

@@ -125,8 +125,7 @@ class TaskHandler(TaskMapper):
     async def _crawler_account_manager_handler(self) -> int:
         platform = self.data.get("platform", "weixin")
         account_id_list = self.data.get("account_id_list")
-        if not account_id_list:
-            return self.TASK_FAILED_STATUS
+
         match platform:
             case "weixin":
                 task = WeixinAccountManager(
@@ -135,5 +134,5 @@ class TaskHandler(TaskMapper):
             case _:
                 raise ValueError(f"Unsupported platform {platform}")
 
-        await task.deal(account_id_list=account_id_list)
+        await task.deal(platform=platform, account_id_list=account_id_list)
         return self.TASK_SUCCESS_STATUS