|
@@ -1,10 +1,14 @@
|
|
import json
|
|
import json
|
|
import time
|
|
import time
|
|
import urllib.parse
|
|
import urllib.parse
|
|
|
|
+from datetime import datetime
|
|
|
|
+from typing import Optional
|
|
|
|
+
|
|
from tqdm import tqdm
|
|
from tqdm import tqdm
|
|
|
|
|
|
from pymysql.cursors import DictCursor
|
|
from pymysql.cursors import DictCursor
|
|
|
|
|
|
|
|
+from applications.api import FeishuBotApi
|
|
from applications.db import DatabaseConnector
|
|
from applications.db import DatabaseConnector
|
|
from applications.utils import str_to_md5
|
|
from applications.utils import str_to_md5
|
|
from cold_start.crawler.wechat import get_article_detail
|
|
from cold_start.crawler.wechat import get_article_detail
|
|
@@ -31,6 +35,8 @@ class FwhDataRecycle:
|
|
self.piaoquan_client = DatabaseConnector(piaoquan_crawler_config)
|
|
self.piaoquan_client = DatabaseConnector(piaoquan_crawler_config)
|
|
self.piaoquan_client.connect()
|
|
self.piaoquan_client.connect()
|
|
|
|
|
|
|
|
+ self.feishu_robot = FeishuBotApi()
|
|
|
|
+
|
|
def get_group_server_accounts(self):
|
|
def get_group_server_accounts(self):
|
|
fetch_query = f"""
|
|
fetch_query = f"""
|
|
select gzh_id from article_gzh_developer;
|
|
select gzh_id from article_gzh_developer;
|
|
@@ -40,6 +46,13 @@ class FwhDataRecycle:
|
|
# gh_id_list = ['gh_5e543853d8f0']
|
|
# gh_id_list = ['gh_5e543853d8f0']
|
|
return gh_id_list
|
|
return gh_id_list
|
|
|
|
|
|
|
|
+ def get_server_account_name(self, gh_id: str) -> Optional[str]:
|
|
|
|
+ fetch_query = f"""
|
|
|
|
+ select account_name from long_articles_group_send_result where gh_id = %s limit 1;
|
|
|
|
+ """
|
|
|
|
+ fetch_response = self.long_articles_client.fetch(fetch_query, cursor_type=DictCursor, params=(gh_id,))
|
|
|
|
+ account_name = fetch_response[0]["account_name"] if fetch_response else None
|
|
|
|
+ return account_name
|
|
|
|
|
|
class FwhGroupPublishRecordManager(FwhDataRecycle):
|
|
class FwhGroupPublishRecordManager(FwhDataRecycle):
|
|
|
|
|
|
@@ -131,6 +144,45 @@ class FwhGroupPublishRecordManager(FwhDataRecycle):
|
|
record_id, self.RECYCLE_PROCESSING_STATUS, self.RECYCLE_INIT_STATUS
|
|
record_id, self.RECYCLE_PROCESSING_STATUS, self.RECYCLE_INIT_STATUS
|
|
)
|
|
)
|
|
|
|
|
|
|
|
+ def monitor(self, date_string: str = datetime.today().strftime("%Y-%m-%d")):
|
|
|
|
+ """
|
|
|
|
+ monitor the publish record
|
|
|
|
+ """
|
|
|
|
+ now = datetime.now()
|
|
|
|
+ if now.hour < 12:
|
|
|
|
+ account_list = self.get_group_server_accounts()
|
|
|
|
+ do_not_publish_account = []
|
|
|
|
+ sql = f"""
|
|
|
|
+ select account_name as '账号名称', gh_id, count(distinct user_group_id) as '发文组数'
|
|
|
|
+ from long_articles_group_send_result
|
|
|
|
+ where publish_date = %s
|
|
|
|
+ group by account_name, gh_id;
|
|
|
|
+ """
|
|
|
|
+ publish_records = self.long_articles_client.fetch(query=sql, cursor_type=DictCursor, params=(date_string,))
|
|
|
|
+ self.feishu_robot.bot(
|
|
|
|
+ title=f"{date_string}服务号发文记录",
|
|
|
|
+ mention=False,
|
|
|
|
+ detail=publish_records,
|
|
|
|
+ env="server_account_publish_monitor"
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+ publish_account_id_set = set([i['gh_id'] for i in publish_records])
|
|
|
|
+ for account_id in account_list:
|
|
|
|
+ if account_id not in publish_account_id_set:
|
|
|
|
+ account_name = self.get_server_account_name(account_id)
|
|
|
|
+ do_not_publish_account.append({
|
|
|
|
+ "account_name": account_name,
|
|
|
|
+ "gh_id": account_id,
|
|
|
|
+ })
|
|
|
|
+
|
|
|
|
+ if do_not_publish_account:
|
|
|
|
+ self.feishu_robot.bot(
|
|
|
|
+ title=f"{date_string}发现服务号存在未发文情况",
|
|
|
|
+ detail=do_not_publish_account,
|
|
|
|
+ env="server_account_publish_monitor",
|
|
|
|
+ )
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
class SaveFwhDataToDatabase(FwhDataRecycle):
|
|
class SaveFwhDataToDatabase(FwhDataRecycle):
|
|
|
|
|
|
@@ -313,3 +365,8 @@ class FwhDataExportTemp(FwhDataRecycle):
|
|
print(f"article {article['ContentUrl']} is not available, skip it")
|
|
print(f"article {article['ContentUrl']} is not available, skip it")
|
|
df = pd.DataFrame(L)
|
|
df = pd.DataFrame(L)
|
|
df.to_csv("temp2.csv", index=False)
|
|
df.to_csv("temp2.csv", index=False)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
+ w = FwhGroupPublishRecordManager()
|
|
|
|
+ w.monitor('2025-06-21')
|