|
|
@@ -4,6 +4,7 @@ import json
|
|
|
import time
|
|
|
import traceback
|
|
|
import uuid
|
|
|
+from typing import List, Dict
|
|
|
import xml.etree.ElementTree as ET
|
|
|
|
|
|
from tqdm import tqdm
|
|
|
@@ -55,7 +56,7 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
|
|
|
result[key] = elem.text if elem is not None and elem.text else default
|
|
|
return result
|
|
|
|
|
|
- def extract_reply_cards(self, msg_type, root):
|
|
|
+ def extract_reply_cards(self, msg_type: str, root) -> List[Dict]:
|
|
|
fields = {
|
|
|
"title": ".//title",
|
|
|
"page_path": ".//pagepath",
|
|
|
@@ -67,9 +68,10 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
|
|
|
|
|
|
data = self.parse_fields(root, fields)
|
|
|
data["msg_type"] = msg_type
|
|
|
- return data
|
|
|
+ results = [data]
|
|
|
+ return results
|
|
|
|
|
|
- def extract_reply_articles(self, msg_type, root):
|
|
|
+ def extract_reply_articles(self, msg_type, root) -> Dict:
|
|
|
fields = {
|
|
|
"title": "appmsg/title",
|
|
|
"url": "appmsg/url",
|
|
|
@@ -82,6 +84,23 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
|
|
|
data["msg_type"] = msg_type
|
|
|
return data
|
|
|
|
|
|
+ @staticmethod
|
|
|
+ def extract_group_reply_articles(msg_type, root) -> List[Dict]:
|
|
|
+ items = []
|
|
|
+ for item in root.findall(".//item"):
|
|
|
+ data = {
|
|
|
+ "title": item.findtext("title"),
|
|
|
+ "url": item.findtext("url"),
|
|
|
+ "cover_url": item.findtext("cover"),
|
|
|
+ "account_name": item.findtext("sources/source/name"),
|
|
|
+ "gh_id": "",
|
|
|
+ "desc": "",
|
|
|
+ "msg_type": msg_type
|
|
|
+ }
|
|
|
+ items.append(data)
|
|
|
+
|
|
|
+ return items
|
|
|
+
|
|
|
# 解析 xml
|
|
|
def extract_callback_xml(self, xml_text):
|
|
|
try:
|
|
|
@@ -89,7 +108,8 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
|
|
|
msg_type = root.find("appmsg/type").text
|
|
|
match msg_type:
|
|
|
case "5":
|
|
|
- return self.extract_reply_articles(msg_type, root)
|
|
|
+ # return self.extract_reply_articles(msg_type, root)
|
|
|
+ return self.extract_group_reply_articles(msg_type, root)
|
|
|
|
|
|
case "33":
|
|
|
return self.extract_reply_cards(msg_type, root)
|
|
|
@@ -98,13 +118,13 @@ class AutoReplyCardsMonitorUtils(AutoReplyCardsMonitorConst):
|
|
|
return self.extract_reply_cards(msg_type, root)
|
|
|
|
|
|
case _:
|
|
|
- return {}
|
|
|
+ return []
|
|
|
|
|
|
except Exception as e:
|
|
|
print(xml_text)
|
|
|
print(e)
|
|
|
print(traceback.format_exc())
|
|
|
- return {}
|
|
|
+ return []
|
|
|
|
|
|
# 解析 page_path
|
|
|
@staticmethod
|
|
|
@@ -368,12 +388,17 @@ class AutoReplyCardsMonitorMapper(AutoReplyCardsMonitorUtils):
|
|
|
|
|
|
# 获取带解析的任务
|
|
|
async def get_extract_tasks(self):
|
|
|
+ # query = """
|
|
|
+ # SELECT task_id, result FROM cooperate_accounts_task WHERE extract_status = %s AND task_status = %s;
|
|
|
+ # """
|
|
|
+ # return await self.pool.async_fetch(
|
|
|
+ # query=query, params=(self.INIT_STATUS, self.SUCCESS_STATUS)
|
|
|
+ # )
|
|
|
+
|
|
|
query = """
|
|
|
- SELECT task_id, result FROM cooperate_accounts_task WHERE extract_status = %s AND task_status = %s;
|
|
|
- """
|
|
|
- return await self.pool.async_fetch(
|
|
|
- query=query, params=(self.INIT_STATUS, self.SUCCESS_STATUS)
|
|
|
- )
|
|
|
+ SELECT task_id, result FROM cooperate_accounts_task WHERE task_id = 'auto_reply_08faad23-7e2b-414f-a422-7472d8354b30';
|
|
|
+ """
|
|
|
+ return await self.pool.async_fetch(query=query)
|
|
|
|
|
|
# 存储解析结果
|
|
|
async def store_extract_result(self, query, row_table):
|
|
|
@@ -485,7 +510,6 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
|
|
|
await self.store_extract_result(query, insert_row)
|
|
|
|
|
|
else:
|
|
|
- print(article_link)
|
|
|
article_detail = fetch_response["data"]["data"]
|
|
|
article_text = article_detail["body_text"]
|
|
|
article_images = article_detail["image_url_list"]
|
|
|
@@ -708,19 +732,22 @@ class AutoReplyCardsMonitor(AutoReplyCardsMonitorMapper):
|
|
|
try:
|
|
|
# parse xml
|
|
|
xml_list = json.loads(result) if type(result) == str else result
|
|
|
- for index, item in enumerate(xml_list, 1):
|
|
|
- xml_obj = self.extract_callback_xml(item)
|
|
|
- if xml_obj:
|
|
|
- msg_type = xml_obj.get("msg_type", None)
|
|
|
- match msg_type:
|
|
|
- case "33":
|
|
|
- await self.store_card(task_id, index, msg_type, xml_obj)
|
|
|
-
|
|
|
- case "5":
|
|
|
- await self.store_article(task_id, index, msg_type, xml_obj)
|
|
|
-
|
|
|
- case _:
|
|
|
- continue
|
|
|
+ index = 0
|
|
|
+ for item in xml_list:
|
|
|
+ xml_obj_list = self.extract_callback_xml(item)
|
|
|
+ if xml_obj_list:
|
|
|
+ for xml_obj in xml_obj_list:
|
|
|
+ index += 1
|
|
|
+ msg_type = xml_obj.get("msg_type", None)
|
|
|
+ match msg_type:
|
|
|
+ case "33":
|
|
|
+ await self.store_card(task_id, index, msg_type, xml_obj)
|
|
|
+
|
|
|
+ case "5":
|
|
|
+ await self.store_article(task_id, index, msg_type, xml_obj)
|
|
|
+
|
|
|
+ case _:
|
|
|
+ continue
|
|
|
|
|
|
await asyncio.sleep(5)
|
|
|
|