|
@@ -1,9 +1,11 @@
|
|
|
import time
|
|
|
import datetime
|
|
|
+from typing import Optional, List
|
|
|
|
|
|
from tqdm import tqdm
|
|
|
|
|
|
from applications.api import feishu_robot
|
|
|
+from applications.api import delete_illegal_gzh_articles
|
|
|
from applications.crawler.wechat import get_article_detail
|
|
|
from applications.crawler.wechat import get_article_list_from_account
|
|
|
|
|
@@ -14,7 +16,7 @@ class MonitorConst:
|
|
|
INIT_STATUS = 0
|
|
|
|
|
|
# 监测周期
|
|
|
- MONITOR_CYCLE = 5 * 24 * 3600
|
|
|
+ MONITOR_CYCLE = 3 * 24 * 3600
|
|
|
|
|
|
# article code
|
|
|
ARTICLE_ILLEGAL_CODE = 25012
|
|
@@ -81,7 +83,7 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
|
where
|
|
|
t1.mode_type = '代运营服务号';
|
|
|
"""
|
|
|
- response, error = await self.pool.async_fetch(query=query, db_name="aigc_db_pool")
|
|
|
+ response, error = await self.pool.async_fetch(query=query, db_name="aigc")
|
|
|
return response
|
|
|
|
|
|
async def fetch_each_account(self, account: dict):
|
|
@@ -128,11 +130,13 @@ class OutsideGzhArticlesCollector(OutsideGzhArticlesManager):
|
|
|
"账号名称": article["account_name"],
|
|
|
"标题": article["title"],
|
|
|
"违规理由": illegal_reason,
|
|
|
- "发布日期": datetime.datetime.fromtimestamp(create_timestamp).strftime('%Y-%m-%d %H:%M:%S'),
|
|
|
+ "发布日期": datetime.datetime.fromtimestamp(
|
|
|
+ create_timestamp
|
|
|
+ ).strftime("%Y-%m-%d %H:%M:%S"),
|
|
|
"账号合作商": article["account_source"],
|
|
|
},
|
|
|
env="outside_gzh_monitor",
|
|
|
- mention=False
|
|
|
+ mention=False,
|
|
|
)
|
|
|
|
|
|
elif response_code == self.ARTICLE_SUCCESS_CODE:
|
|
@@ -205,7 +209,7 @@ class OutsideGzhArticlesMonitor(OutsideGzhArticlesManager):
|
|
|
"账号合作商": article["account_source"],
|
|
|
},
|
|
|
env="outside_gzh_monitor",
|
|
|
- mention=False
|
|
|
+ mention=False,
|
|
|
)
|
|
|
article_id = article["id"]
|
|
|
await self.update_article_illegal_status(article_id, illegal_reason)
|
|
@@ -225,4 +229,85 @@ class OutsideGzhArticlesMonitor(OutsideGzhArticlesManager):
|
|
|
f"title: {article['title']}\n"
|
|
|
f"error: {e}\n"
|
|
|
)
|
|
|
- return self.TASK_SUCCESS_CODE
|
|
|
+ return self.TASK_SUCCESS_CODE
|
|
|
+
|
|
|
+
|
|
|
+class InnerGzhArticlesMonitor(MonitorConst):
|
|
|
+ def __init__(self, pool):
|
|
|
+ self.pool = pool
|
|
|
+
|
|
|
+ async def fetch_article_list_to_check(self, run_date: str = None) -> Optional[List]:
|
|
|
+ """
|
|
|
+ :param run_date: 执行日期,格式为“%Y-%m-%d”, default None
|
|
|
+ """
|
|
|
+ if not run_date:
|
|
|
+ run_date = datetime.datetime.today().strftime("%Y-%m-%d")
|
|
|
+
|
|
|
+ run_timestamp = int(
|
|
|
+ datetime.datetime.strptime(run_date, "%Y-%m-%d").timestamp()
|
|
|
+ )
|
|
|
+ start_timestamp = run_timestamp - self.MONITOR_CYCLE
|
|
|
+ query = f"""
|
|
|
+ select ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) as publish_timestamp
|
|
|
+ from official_articles_v2
|
|
|
+ where publish_timestamp >= {start_timestamp}
|
|
|
+ order by publish_timestamp desc;
|
|
|
+ """
|
|
|
+ response, error = await self.pool.async_fetch(
|
|
|
+ query=query, db_name="piaoquan_crawler"
|
|
|
+ )
|
|
|
+ if error:
|
|
|
+ await feishu_robot.bot(
|
|
|
+ title="站内微信公众号发文监测任务异常",
|
|
|
+ detail={"error": error, "message": "查询数据库异常"},
|
|
|
+ )
|
|
|
+ return None
|
|
|
+ else:
|
|
|
+ return response
|
|
|
+
|
|
|
+ async def check_each_article(self, article: dict):
|
|
|
+ gh_id, account_name, title, url, wx_sn, publish_date = article
|
|
|
+ try:
|
|
|
+ response = get_article_detail(url, is_cache=False)
|
|
|
+ response_code = response["code"]
|
|
|
+ if response_code == self.ARTICLE_ILLEGAL_CODE:
|
|
|
+ error_detail = article.get("msg")
|
|
|
+ query = f"""
|
|
|
+ insert ignore into illegal_articles
|
|
|
+ (gh_id, account_name, title, wx_sn, publish_date, illegal_reason)
|
|
|
+ values
|
|
|
+ (%s, %s, %s, %s, %s, %s);
|
|
|
+ """
|
|
|
+ affected_row = await self.pool.async_save(
|
|
|
+ query=query,
|
|
|
+ params=(
|
|
|
+ gh_id,
|
|
|
+ account_name,
|
|
|
+ title,
|
|
|
+ wx_sn,
|
|
|
+ publish_date,
|
|
|
+ error_detail,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+ if affected_row:
|
|
|
+ await feishu_robot.bot(
|
|
|
+ title="文章违规告警",
|
|
|
+ detail={
|
|
|
+ "account_name": account_name,
|
|
|
+ "gh_id": gh_id,
|
|
|
+ "title": title,
|
|
|
+ "wx_sn": wx_sn.decode("utf-8"),
|
|
|
+ "publish_date": str(publish_date),
|
|
|
+ "error_detail": error_detail,
|
|
|
+ },
|
|
|
+ mention=False,
|
|
|
+ )
|
|
|
+ await delete_illegal_gzh_articles(gh_id, title)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"crawler failed: {article['account_name']}, error: {e}")
|
|
|
+
|
|
|
+ async def deal(self):
|
|
|
+ article_list = await self.fetch_article_list_to_check()
|
|
|
+ for article in tqdm(article_list):
|
|
|
+ await self.check_each_article(article)
|