|
@@ -1,6 +1,7 @@
|
|
|
import time
|
|
|
import math
|
|
|
|
|
|
+from datetime import datetime
|
|
|
from typing import Optional, List, Dict
|
|
|
from pandas import DataFrame
|
|
|
from scipy import stats
|
|
@@ -32,9 +33,18 @@ class CrawlerAccountManager(CrawlerAccountManagerConst):
|
|
|
"""把 None / NaN / Inf 统一替换成指定默认值"""
|
|
|
return default if x is None or not math.isfinite(x) else float(x)
|
|
|
|
|
|
- async def get_crawling_accounts(self, platform):
|
|
|
+ async def get_crawling_accounts(self, platform) -> List[str]:
|
|
|
"""获取抓取账号信息"""
|
|
|
- pass
|
|
|
+ match platform:
|
|
|
+ case "weixin":
|
|
|
+ query = f"""
|
|
|
+ select gh_id as 'account_id' from long_articles_accounts where is_using = 1;
|
|
|
+ """
|
|
|
+ case _:
|
|
|
+ raise RuntimeError(f"Unknown platform: {platform}")
|
|
|
+
|
|
|
+ account_list = await self.pool.async_fetch(query)
|
|
|
+ return [i['account_id'] for i in account_list]
|
|
|
|
|
|
|
|
|
class WeixinAccountManager(CrawlerAccountManager):
|
|
@@ -72,7 +82,7 @@ class WeixinAccountManager(CrawlerAccountManager):
|
|
|
query = f"""
|
|
|
update long_articles_accounts
|
|
|
set history_publish_frequency = %s, history_score_ci_lower = %s,
|
|
|
- recent_publish_frequency= %s, recent_score_ci_lower = %s
|
|
|
+ recent_publish_frequency= %s, recent_score_ci_lower = %s, update_date = %s
|
|
|
where gh_id = %s;
|
|
|
"""
|
|
|
return await self.pool.async_save(
|
|
@@ -82,6 +92,7 @@ class WeixinAccountManager(CrawlerAccountManager):
|
|
|
recently_info["publish_frequency"],
|
|
|
recently_info["ci_lower"],
|
|
|
account_id,
|
|
|
+ datetime.today().strftime("%Y-%m-%d"),
|
|
|
)
|
|
|
)
|
|
|
|
|
@@ -123,7 +134,11 @@ class WeixinAccountManager(CrawlerAccountManager):
|
|
|
recent_30_days_analysis = self.analysis_dataframe(recent_30_days_df)
|
|
|
await self.update_account_stat_detail(account_id, history_articles_analysis, recent_30_days_analysis)
|
|
|
|
|
|
- async def deal(self, account_id_list: List[str]):
|
|
|
+ async def deal(self, platform, account_id_list: Optional[List[str]] = None) -> None:
|
|
|
+ """deal"""
|
|
|
+ if not account_id_list:
|
|
|
+ account_id_list = await self.get_crawling_accounts(platform=platform)
|
|
|
+
|
|
|
for account_id in account_id_list:
|
|
|
await self.analysis_single_account(account_id)
|
|
|
|