Jelajahi Sumber

代码版本迭代,
1. 增加违禁的 gh_id
2. 自动回复兼容群发文章

luojunhui 1 bulan lalu
induk
melakukan
3ced25d80c

+ 2 - 0
app/domains/data_recycle_tasks/recycle_daily_publish_articles.py

@@ -48,6 +48,8 @@ class Const:
         "gh_dd4c857bbb36",
         "gh_ff487cb5dab3",
         "gh_ac43eb24376d",
+        "gh_b15de7c99912",
+        "gh_56ca3dae948c"
     ]
 
     # NOT USED SERVER ACCOUNT

+ 52 - 25
app/domains/monitor_tasks/auto_reply_cards_monitor.py

@@ -4,6 +4,7 @@ import json
 import time
 import traceback
 import uuid
+from typing import List, Dict
 import xml.etree.ElementTree as ET
 
 from tqdm import tqdm
@@ -55,7 +56,7 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
             result[key] = elem.text if elem is not None and elem.text else default
         return result
 
-    def extract_reply_cards(self, msg_type, root):
+    def extract_reply_cards(self, msg_type: str, root) -> List[Dict]:
         fields = {
             "title": ".//title",
             "page_path": ".//pagepath",
@@ -67,9 +68,10 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
 
         data = self.parse_fields(root, fields)
         data["msg_type"] = msg_type
-        return data
+        results = [data]
+        return results
 
-    def extract_reply_articles(self, msg_type, root):
+    def extract_reply_articles(self, msg_type, root) -> Dict:
         fields = {
             "title": "appmsg/title",
             "url": "appmsg/url",
@@ -82,6 +84,23 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
         data["msg_type"] = msg_type
         return data
 
+    @staticmethod
+    def extract_group_reply_articles(msg_type, root) -> List[Dict]:
+        items = []
+        for item in root.findall(".//item"):
+            data = {
+                "title": item.findtext("title"),
+                "url": item.findtext("url"),
+                "cover_url": item.findtext("cover"),
+                "account_name": item.findtext("sources/source/name"),
+                "gh_id": "",
+                "desc": "",
+                "msg_type": msg_type
+            }
+            items.append(data)
+
+        return items
+
     # 解析 xml
     def extract_callback_xml(self, xml_text):
         try:
@@ -89,7 +108,8 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
             msg_type = root.find("appmsg/type").text
             match msg_type:
                 case "5":
-                    return self.extract_reply_articles(msg_type, root)
+                    # return self.extract_reply_articles(msg_type, root)
+                    return self.extract_group_reply_articles(msg_type, root)
 
                 case "33":
                     return self.extract_reply_cards(msg_type, root)
@@ -98,13 +118,13 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
                     return self.extract_reply_cards(msg_type, root)
 
                 case _:
-                    return {}
+                    return []
 
         except Exception as e:
             print(xml_text)
             print(e)
             print(traceback.format_exc())
-            return {}
+            return []
 
     # 解析 page_path
     @staticmethod
@@ -368,12 +388,17 @@ class AutoReplyCardsMonitorMapper(AutoReplyCardsMonitorUtils):
 
     # 获取带解析的任务
     async def get_extract_tasks(self):
+        # query = """
+        #     SELECT task_id, result FROM cooperate_accounts_task WHERE extract_status = %s AND task_status = %s;
+        # """
+        # return await self.pool.async_fetch(
+        #     query=query, params=(self.INIT_STATUS, self.SUCCESS_STATUS)
+        # )
+
         query = """
-            SELECT task_id, result FROM cooperate_accounts_task WHERE extract_status = %s AND task_status = %s;
-        """
-        return await self.pool.async_fetch(
-            query=query, params=(self.INIT_STATUS, self.SUCCESS_STATUS)
-        )
+                SELECT task_id, result FROM cooperate_accounts_task WHERE task_id = 'auto_reply_08faad23-7e2b-414f-a422-7472d8354b30';
+            """
+        return await self.pool.async_fetch(query=query)
 
     # 存储解析结果
     async def store_extract_result(self, query, row_table):
@@ -485,7 +510,6 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
             await self.store_extract_result(query, insert_row)
 
         else:
-            print(article_link)
             article_detail = fetch_response["data"]["data"]
             article_text = article_detail["body_text"]
             article_images = article_detail["image_url_list"]
@@ -708,19 +732,22 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
         try:
             # parse xml
             xml_list = json.loads(result) if type(result) == str else result
-            for index, item in enumerate(xml_list, 1):
-                xml_obj = self.extract_callback_xml(item)
-                if xml_obj:
-                    msg_type = xml_obj.get("msg_type", None)
-                    match msg_type:
-                        case "33":
-                            await self.store_card(task_id, index, msg_type, xml_obj)
-
-                        case "5":
-                            await self.store_article(task_id, index, msg_type, xml_obj)
-
-                        case _:
-                            continue
+            index = 0
+            for item in xml_list:
+                xml_obj_list = self.extract_callback_xml(item)
+                if xml_obj_list:
+                    for xml_obj in xml_obj_list:
+                        index += 1
+                        msg_type = xml_obj.get("msg_type", None)
+                        match msg_type:
+                            case "33":
+                                await self.store_card(task_id, index, msg_type, xml_obj)
+
+                            case "5":
+                                await self.store_article(task_id, index, msg_type, xml_obj)
+
+                            case _:
+                                continue
 
                 await asyncio.sleep(5)