浏览代码

外部发文监测

luojunhui 5 月之前
父节点
当前提交
136933bece
共有 1 个文件被更改,包括 41 次插入32 次删除
  1. 41 32
      tasks/monitor_tasks/outside_gzh_articles_monitor.py

+ 41 - 32
tasks/monitor_tasks/outside_gzh_articles_monitor.py

@@ -20,21 +20,16 @@ class OutsideGzhArticlesManager:
         self.denet_client.connect()
         self.feishu_bot_api = FeishuBotApi()
 
-    def process_illegal_article(
-        self, account_name, title, reason, publish_timestamp, account_source
-    ):
-        self.feishu_bot_api.bot(
-            title="文章违规告警",
-            detail={
-                "account_name": account_name,
-                "title": title,
-                "reason": reason,
-                "publish_timestamp": publish_timestamp,
-                "account_source": account_source,
-            },
-            env="dev"
+    def update_article_illegal_status(self, article_id, illegal_reason):
+        update_query = f"""
+            update outside_gzh_account_monitor
+            set illegal_status = %s, illegal_reason = %s
+            where id = %s and illegal_reason = %s
+        """
+        self.long_articles_client.save(
+            query=update_query,
+            params=(1, illegal_reason, article_id, 0)
         )
-        return
 
 
 class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
@@ -59,7 +54,7 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
         fetch_response = get_article_list_from_account(gh_id)
         msg_list = fetch_response.get("data", {}).get("data", [])
         if msg_list:
-            for msg in msg_list[:1]:
+            for msg in tqdm(msg_list, desc=f"insert account {account['account_name']}"):
                 self.save_each_msg_to_db(msg, account)
 
         else:
@@ -70,20 +65,31 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
         detail_info = msg["AppMsg"]["DetailInfo"]
         app_msg_id = base_info["AppMsgId"]
         create_timestamp = base_info["CreateTime"]
-        update_timestamp = base_info["UpdateTime"]
         publish_type = base_info["Type"]
 
         # insert each article
-        for article in detail_info[:1]:
+        for article in detail_info:
             link = article["ContentUrl"]
             article_detail = get_article_detail(link)
             response_code = article_detail["code"]
             if response_code == 25012:
                 illegal_reason = article_detail.get("msg")
                 # bot and return
+                self.feishu_bot_api.bot(
+                    title="文章违规告警",
+                    detail={
+                        "account_name": article["account_name"],
+                        "title": article['title'],
+                        "reason": illegal_reason,
+                        "publish_timestamp": create_timestamp,
+                        "account_source": article["account_source"]
+                    },
+                    env="dev"
+                )
+
             elif response_code == 0:
                 insert_query = f"""
-                    insert into outside_gzh_account_monitor
+                    insert ignore into outside_gzh_account_monitor
                     (account_name, gh_id, account_source, account_type, app_msg_id, publish_type, position, title, link, 
                     channel_content_id, crawler_timestamp, publish_timestamp)
                     values
@@ -102,8 +108,6 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
                         article["Title"],
                         link,
                         article_detail["data"]["data"]["channel_content_id"],
-                        create_timestamp,
-                        update_timestamp,
                         int(time.time()),
                         int(article_detail["data"]["data"]["publish_timestamp"] / 1000),
                     ),
@@ -112,12 +116,13 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
                 continue
 
 
-class OutsideGzhArticlesMonitor(OutsideGzhArticlesCollector):
+class OutsideGzhArticlesMonitor(OutsideGzhArticlesManager):
 
     def fetch_article_list_to_check(self):
         publish_timestamp_threshold = int(time.time()) - 7 * 24 * 3600
         fetch_query = f"""
-            select account_name, gh_id, account_source, account_type, title, link
+            select id, account_name, gh_id, account_source, account_type, 
+                title, link, from_unixtime(publish_timestamp) as publish_date
             from outside_gzh_account_monitor
             where illegal_status = 0 and publish_timestamp > {publish_timestamp_threshold};
         """
@@ -134,13 +139,19 @@ class OutsideGzhArticlesMonitor(OutsideGzhArticlesCollector):
         response_code = article_detail["code"]
         if response_code == 25012:
             illegal_reason = article_detail.get("msg")
-            self.process_illegal_article(
-                account_name=article["account_name"],
-                title=article["title"],
-                reason=illegal_reason,
-                publish_timestamp=article["publish_timestamp"],
-                account_source=article["account_source"],
+            self.feishu_bot_api.bot(
+                title="文章违规告警",
+                detail={
+                    "account_name": article["account_name"],
+                    "title": article['title'],
+                    "reason": illegal_reason,
+                    "publish_date": article["publish_date"],
+                    "account_source": article["account_source"]
+                },
+                env="dev"
             )
+            article_id = article["id"]
+            self.update_article_illegal_status(article_id, illegal_reason)
         else:
             return
 
@@ -151,7 +162,5 @@ class OutsideGzhArticlesMonitor(OutsideGzhArticlesCollector):
 
 
 if __name__ == "__main__":
-    collector = OutsideGzhArticlesCollector()
-    accounts = collector.fetch_outside_account_list()
-    for account_ in tqdm(accounts[1:2]):
-        collector.fetch_each_account(account_)
+    monitor = OutsideGzhArticlesMonitor()
+    monitor.deal()