Browse Source

Merge branch '2025-06-23-add-server-account-monitor' of luojunhui/LongArticlesJob into master

luojunhui 4 tháng trước cách đây
mục cha
commit
eb502afd13

+ 5 - 0
applications/api/feishu_api.py

@@ -3,6 +3,9 @@ import requests
 
 
 class Feishu:
+    # 服务号分组群发监测机器人
+    server_account_publish_monitor_bot = "https://open.feishu.cn/open-apis/bot/v2/hook/380fdecf-402e-4426-85b6-7d9dbd2a9f59"
+
     # 外部服务号投流监测机器人
     outside_gzh_monitor_bot = "https://open.feishu.cn/open-apis/bot/v2/hook/0899d43d-9f65-48ce-a419-f83ac935bf59"
 
@@ -192,6 +195,8 @@ class FeishuBotApi(Feishu):
                 url = self.long_articles_bot
             case "outside_gzh_monitor":
                 url = self.outside_gzh_monitor_bot
+            case "server_account_publish_monitor":
+                url = self.server_account_publish_monitor_bot
             case _:
                 url = self.long_articles_bot_dev
 

+ 1 - 0
fwh_data_manager.py

@@ -6,6 +6,7 @@ if __name__ == '__main__':
     # 1. 从 aigc 获取数据
     fwh_group_publish_record_manager = FwhGroupPublishRecordManager()
     fwh_group_publish_record_manager.deal()
+    fwh_group_publish_record_manager.monitor()
 
     # 2. 保存数据到数据库
     save_fwh_data_to_database = SaveFwhDataToDatabase()

+ 58 - 0
tasks/data_tasks/fwh_data_recycle.py

@@ -1,10 +1,14 @@
 import json
 import time
 import urllib.parse
+from datetime import datetime
+from typing import Optional
+
 from tqdm import tqdm
 
 from pymysql.cursors import DictCursor
 
+from applications.api import FeishuBotApi
 from applications.db import DatabaseConnector
 from applications.utils import str_to_md5
 from cold_start.crawler.wechat import get_article_detail
@@ -31,6 +35,8 @@ class FwhDataRecycle:
         self.piaoquan_client = DatabaseConnector(piaoquan_crawler_config)
         self.piaoquan_client.connect()
 
+        self.feishu_robot = FeishuBotApi()
+
     def get_group_server_accounts(self):
         fetch_query = f"""
             select gzh_id from article_gzh_developer;
@@ -40,6 +46,16 @@ class FwhDataRecycle:
         # gh_id_list = ['gh_5e543853d8f0']
         return gh_id_list
 
+    def get_server_account_name(self, gh_id: str) -> Optional[str]:
+        fetch_query = f"""
+            select account_name from long_articles_group_send_result where gh_id = %s limit 1;
+        """
+        fetch_response = self.long_articles_client.fetch(
+            fetch_query, cursor_type=DictCursor, params=(gh_id,)
+        )
+        account_name = fetch_response[0]["account_name"] if fetch_response else None
+        return account_name
+
 
 class FwhGroupPublishRecordManager(FwhDataRecycle):
 
@@ -131,6 +147,48 @@ class FwhGroupPublishRecordManager(FwhDataRecycle):
                     record_id, self.RECYCLE_PROCESSING_STATUS, self.RECYCLE_INIT_STATUS
                 )
 
+    def monitor(self, date_string: str = datetime.today().strftime("%Y-%m-%d")):
+        """
+        monitor the publish record
+        """
+        now = datetime.now()
+        if now.hour < 12:
+            account_list = self.get_group_server_accounts()
+            do_not_publish_account = []
+            sql = f"""
+                select account_name as '账号名称', gh_id, count(distinct user_group_id) as '发文组数'
+                from long_articles_group_send_result
+                where publish_date = %s
+                group by account_name, gh_id; 
+            """
+            publish_records = self.long_articles_client.fetch(
+                query=sql, cursor_type=DictCursor, params=(date_string,)
+            )
+            self.feishu_robot.bot(
+                title=f"{date_string}服务号发文记录",
+                mention=False,
+                detail=publish_records,
+                env="server_account_publish_monitor",
+            )
+
+            publish_account_id_set = set([i["gh_id"] for i in publish_records])
+            for account_id in account_list:
+                if account_id not in publish_account_id_set:
+                    account_name = self.get_server_account_name(account_id)
+                    do_not_publish_account.append(
+                        {
+                            "account_name": account_name,
+                            "gh_id": account_id,
+                        }
+                    )
+
+            if do_not_publish_account:
+                self.feishu_robot.bot(
+                    title=f"{date_string}发现服务号存在未发文情况",
+                    detail=do_not_publish_account,
+                    env="server_account_publish_monitor",
+                )
+
 
 class SaveFwhDataToDatabase(FwhDataRecycle):