3 weeks ago · 39c0f88f74
--- a/applications/api/feishu_api.py
+++ b/applications/api/feishu_api.py
@@ -3,6 +3,9 @@ import requests
 
				 
			
 
				 
			
 
				 class Feishu:
			
 
				+    # 服务号分组群发监测机器人
			
 
				+    server_account_publish_monitor_bot = "https://open.feishu.cn/open-apis/bot/v2/hook/380fdecf-402e-4426-85b6-7d9dbd2a9f59"
			
 
				+
			
 
				     # 外部服务号投流监测机器人
			
 
				     outside_gzh_monitor_bot = "https://open.feishu.cn/open-apis/bot/v2/hook/0899d43d-9f65-48ce-a419-f83ac935bf59"
			
 
				 
			
@@ -192,6 +195,8 @@ class FeishuBotApi(Feishu):
 
				                 url = self.long_articles_bot
			
 
				             case "outside_gzh_monitor":
			
 
				                 url = self.outside_gzh_monitor_bot
			
 
				+            case "server_account_publish_monitor":
			
 
				+                url = self.server_account_publish_monitor_bot
			
 
				             case _:
			
 
				                 url = self.long_articles_bot_dev
			
 
				 
			
--- a/tasks/data_tasks/fwh_data_recycle.py
+++ b/tasks/data_tasks/fwh_data_recycle.py
@@ -1,10 +1,14 @@
 
				 import json
			
 
				 import time
			
 
				 import urllib.parse
			
 
				+from datetime import datetime
			
 
				+from typing import Optional
			
 
				+
			
 
				 from tqdm import tqdm
			
 
				 
			
 
				 from pymysql.cursors import DictCursor
			
 
				 
			
 
				+from applications.api import FeishuBotApi
			
 
				 from applications.db import DatabaseConnector
			
 
				 from applications.utils import str_to_md5
			
 
				 from cold_start.crawler.wechat import get_article_detail
			
@@ -31,6 +35,8 @@ class FwhDataRecycle:
 
				         self.piaoquan_client = DatabaseConnector(piaoquan_crawler_config)
			
 
				         self.piaoquan_client.connect()
			
 
				 
			
 
				+        self.feishu_robot = FeishuBotApi()
			
 
				+
			
 
				     def get_group_server_accounts(self):
			
 
				         fetch_query = f"""
			
 
				             select gzh_id from article_gzh_developer;
			
@@ -40,6 +46,13 @@ class FwhDataRecycle:
 
				         # gh_id_list = ['gh_5e543853d8f0']
			
 
				         return gh_id_list
			
 
				 
			
 
				+    def get_server_account_name(self, gh_id: str) -> Optional[str]:
			
 
				+        fetch_query = f"""
			
 
				+            select account_name from long_articles_group_send_result where gh_id = %s limit 1;
			
 
				+        """
			
 
				+        fetch_response = self.long_articles_client.fetch(fetch_query, cursor_type=DictCursor, params=(gh_id,))
			
 
				+        account_name = fetch_response[0]["account_name"] if fetch_response else None
			
 
				+        return account_name
			
 
				 
			
 
				 class FwhGroupPublishRecordManager(FwhDataRecycle):
			
 
				 
			
@@ -131,6 +144,45 @@ class FwhGroupPublishRecordManager(FwhDataRecycle):
 
				                     record_id, self.RECYCLE_PROCESSING_STATUS, self.RECYCLE_INIT_STATUS
			
 
				                 )
			
 
				 
			
 
				+    def monitor(self, date_string: str = datetime.today().strftime("%Y-%m-%d")):
			
 
				+        """
			
 
				+        monitor the publish record
			
 
				+        """
			
 
				+        now = datetime.now()
			
 
				+        if now.hour < 12:
			
 
				+            account_list = self.get_group_server_accounts()
			
 
				+            do_not_publish_account = []
			
 
				+            sql = f"""
			
 
				+                select account_name as '账号名称', gh_id, count(distinct user_group_id) as '发文组数'
			
 
				+                from long_articles_group_send_result
			
 
				+                where publish_date = %s
			
 
				+                group by account_name, gh_id; 
			
 
				+            """
			
 
				+            publish_records = self.long_articles_client.fetch(query=sql, cursor_type=DictCursor, params=(date_string,))
			
 
				+            self.feishu_robot.bot(
			
 
				+                title=f"{date_string}服务号发文记录",
			
 
				+                mention=False,
			
 
				+                detail=publish_records,
			
 
				+                env="server_account_publish_monitor"
			
 
				+            )
			
 
				+
			
 
				+            publish_account_id_set = set([i['gh_id'] for i in publish_records])
			
 
				+            for account_id in account_list:
			
 
				+                if account_id not in publish_account_id_set:
			
 
				+                    account_name = self.get_server_account_name(account_id)
			
 
				+                    do_not_publish_account.append({
			
 
				+                        "account_name": account_name,
			
 
				+                        "gh_id": account_id,
			
 
				+                    })
			
 
				+
			
 
				+            if do_not_publish_account:
			
 
				+                self.feishu_robot.bot(
			
 
				+                    title=f"{date_string}发现服务号存在未发文情况",
			
 
				+                    detail=do_not_publish_account,
			
 
				+                    env="server_account_publish_monitor",
			
 
				+                )
			
 
				+
			
 
				+
			
 
				 
			
 
				 class SaveFwhDataToDatabase(FwhDataRecycle):
			
 
				 
			
@@ -313,3 +365,8 @@ class FwhDataExportTemp(FwhDataRecycle):
 
				                     print(f"article {article['ContentUrl']} is not available, skip it")
			
 
				         df = pd.DataFrame(L)
			
 
				         df.to_csv("temp2.csv", index=False)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    w = FwhGroupPublishRecordManager()
			
 
				+    w.monitor('2025-06-21')