Преглед на файлове

add server account monitor

luojunhui преди 4 месеца
родител
ревизия
39c0f88f74
променени са 2 файла, в които са добавени 62 реда и са изтрити 0 реда
  1. 5 0
      applications/api/feishu_api.py
  2. 57 0
      tasks/data_tasks/fwh_data_recycle.py

+ 5 - 0
applications/api/feishu_api.py

@@ -3,6 +3,9 @@ import requests
 
 
 class Feishu:
+    # 服务号分组群发监测机器人
+    server_account_publish_monitor_bot = "https://open.feishu.cn/open-apis/bot/v2/hook/380fdecf-402e-4426-85b6-7d9dbd2a9f59"
+
     # 外部服务号投流监测机器人
     outside_gzh_monitor_bot = "https://open.feishu.cn/open-apis/bot/v2/hook/0899d43d-9f65-48ce-a419-f83ac935bf59"
 
@@ -192,6 +195,8 @@ class FeishuBotApi(Feishu):
                 url = self.long_articles_bot
             case "outside_gzh_monitor":
                 url = self.outside_gzh_monitor_bot
+            case "server_account_publish_monitor":
+                url = self.server_account_publish_monitor_bot
             case _:
                 url = self.long_articles_bot_dev
 

+ 57 - 0
tasks/data_tasks/fwh_data_recycle.py

@@ -1,10 +1,14 @@
 import json
 import time
 import urllib.parse
+from datetime import datetime
+from typing import Optional
+
 from tqdm import tqdm
 
 from pymysql.cursors import DictCursor
 
+from applications.api import FeishuBotApi
 from applications.db import DatabaseConnector
 from applications.utils import str_to_md5
 from cold_start.crawler.wechat import get_article_detail
@@ -31,6 +35,8 @@ class FwhDataRecycle:
         self.piaoquan_client = DatabaseConnector(piaoquan_crawler_config)
         self.piaoquan_client.connect()
 
+        self.feishu_robot = FeishuBotApi()
+
     def get_group_server_accounts(self):
         fetch_query = f"""
             select gzh_id from article_gzh_developer;
@@ -40,6 +46,13 @@ class FwhDataRecycle:
         # gh_id_list = ['gh_5e543853d8f0']
         return gh_id_list
 
+    def get_server_account_name(self, gh_id: str) -> Optional[str]:
+        fetch_query = f"""
+            select account_name from long_articles_group_send_result where gh_id = %s limit 1;
+        """
+        fetch_response = self.long_articles_client.fetch(fetch_query, cursor_type=DictCursor, params=(gh_id,))
+        account_name = fetch_response[0]["account_name"] if fetch_response else None
+        return account_name
 
 class FwhGroupPublishRecordManager(FwhDataRecycle):
 
@@ -131,6 +144,45 @@ class FwhGroupPublishRecordManager(FwhDataRecycle):
                     record_id, self.RECYCLE_PROCESSING_STATUS, self.RECYCLE_INIT_STATUS
                 )
 
+    def monitor(self, date_string: str = datetime.today().strftime("%Y-%m-%d")):
+        """
+        monitor the publish record
+        """
+        now = datetime.now()
+        if now.hour < 12:
+            account_list = self.get_group_server_accounts()
+            do_not_publish_account = []
+            sql = f"""
+                select account_name as '账号名称', gh_id, count(distinct user_group_id) as '发文组数'
+                from long_articles_group_send_result
+                where publish_date = %s
+                group by account_name, gh_id; 
+            """
+            publish_records = self.long_articles_client.fetch(query=sql, cursor_type=DictCursor, params=(date_string,))
+            self.feishu_robot.bot(
+                title=f"{date_string}服务号发文记录",
+                mention=False,
+                detail=publish_records,
+                env="server_account_publish_monitor"
+            )
+
+            publish_account_id_set = set([i['gh_id'] for i in publish_records])
+            for account_id in account_list:
+                if account_id not in publish_account_id_set:
+                    account_name = self.get_server_account_name(account_id)
+                    do_not_publish_account.append({
+                        "account_name": account_name,
+                        "gh_id": account_id,
+                    })
+
+            if do_not_publish_account:
+                self.feishu_robot.bot(
+                    title=f"{date_string}发现服务号存在未发文情况",
+                    detail=do_not_publish_account,
+                    env="server_account_publish_monitor",
+                )
+
+
 
 class SaveFwhDataToDatabase(FwhDataRecycle):
 
@@ -313,3 +365,8 @@ class FwhDataExportTemp(FwhDataRecycle):
                     print(f"article {article['ContentUrl']} is not available, skip it")
         df = pd.DataFrame(L)
         df.to_csv("temp2.csv", index=False)
+
+
+if __name__ == "__main__":
+    w = FwhGroupPublishRecordManager()
+    w.monitor('2025-06-21')