|
@@ -10,13 +10,15 @@ import traceback
|
|
|
import urllib.parse
|
|
|
from tqdm import tqdm
|
|
|
from datetime import datetime
|
|
|
+from argparse import ArgumentParser
|
|
|
|
|
|
-
|
|
|
-from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
|
+from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi, longArticlesMySQL
|
|
|
from applications.const import updatePublishedMsgTaskConst
|
|
|
|
|
|
ARTICLE_TABLE = "official_articles_v2"
|
|
|
const = updatePublishedMsgTaskConst()
|
|
|
+spider = WeixinSpider()
|
|
|
+functions = Functions()
|
|
|
|
|
|
|
|
|
def get_account_using_status():
|
|
@@ -418,6 +420,10 @@ def check_job():
|
|
|
for sub_item in tqdm(account_list):
|
|
|
res = check_single_account(db_client, sub_item)
|
|
|
if not res:
|
|
|
+ sub_item.drop('account_type')
|
|
|
+ sub_item.drop('account_auth')
|
|
|
+ init_timestamp = sub_item.pop('account_init_timestamp')
|
|
|
+ sub_item['account_init_date'] = datetime.fromtimestamp(init_timestamp).strftime('%Y-%m-%d %H:%M:%S')
|
|
|
fail_list.append(sub_item)
|
|
|
if fail_list:
|
|
|
try:
|
|
@@ -467,6 +473,9 @@ def update_publish_timestamp(db_client, row):
|
|
|
if response_code == const.ARTICLE_DELETE_CODE:
|
|
|
publish_timestamp_s = const.DELETE_STATUS
|
|
|
root_source_id_list = []
|
|
|
+ elif response_code == const.ARTICLE_ILLEGAL_CODE:
|
|
|
+ publish_timestamp_s = const.ILLEGAL_STATUS
|
|
|
+ root_source_id_list = []
|
|
|
elif response_code == const.ARTICLE_SUCCESS_CODE:
|
|
|
data = response['data']['data']
|
|
|
publish_timestamp_ms = data['publish_timestamp']
|
|
@@ -582,14 +591,138 @@ def get_article_detail_job():
|
|
|
)
|
|
|
|
|
|
|
|
|
+def whether_title_unsafe(db_client, title):
|
|
|
+ """
|
|
|
+ 检查文章标题是否已经存在违规记录
|
|
|
+ :param db_client:
|
|
|
+ :param title:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ title_md5 = functions.str_to_md5(title)
|
|
|
+ sql = f"""
|
|
|
+ SELECT title_md5
|
|
|
+ FROM article_unsafe_title
|
|
|
+ WHERE title_md5 = '{title_md5}';
|
|
|
+ """
|
|
|
+ res = db_client.select(sql)
|
|
|
+ if res:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+def monitor(run_date):
|
|
|
+ """
|
|
|
+ 监控任务, 监测周期为7天,监测文章是否被违规,若监测到违规文章,则进行告警
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ pq_client = PQMySQL()
|
|
|
+ lam_client = longArticlesMySQL()
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
+ bot(
|
|
|
+ title="监控任务连接数据库失败",
|
|
|
+ detail={
|
|
|
+ "job": "monitor",
|
|
|
+ "error": str(e),
|
|
|
+ "msg": error_msg
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return
|
|
|
+
|
|
|
+ if not run_date:
|
|
|
+ run_date = datetime.today().strftime("%Y-%m-%d")
|
|
|
+
|
|
|
+ monitor_start_timestamp = int(datetime.strptime(run_date, "%Y-%m-%d").timestamp()) - const.MONITOR_PERIOD
|
|
|
+ select_sql = f"""
|
|
|
+ SELECT ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) AS publish_timestamp
|
|
|
+ FROM {ARTICLE_TABLE}
|
|
|
+ WHERE publish_timestamp >= {monitor_start_timestamp};
|
|
|
+ """
|
|
|
+ article_list = pq_client.select(select_sql)
|
|
|
+ for article in tqdm(article_list, desc="monitor article list"):
|
|
|
+ gh_id = article[0]
|
|
|
+ account_name = article[1]
|
|
|
+ title = article[2]
|
|
|
+ # 判断标题是否存在违规记录
|
|
|
+ if whether_title_unsafe(lam_client, title):
|
|
|
+ continue
|
|
|
+ url = article[3]
|
|
|
+ wx_sn = article[4]
|
|
|
+ publish_date = article[5]
|
|
|
+ try:
|
|
|
+ response = spider.get_article_text(url, is_cache=False)
|
|
|
+ response_code = response['code']
|
|
|
+ if response_code == const.ARTICLE_ILLEGAL_CODE:
|
|
|
+ bot(
|
|
|
+ title="文章违规告警",
|
|
|
+ detail={
|
|
|
+ "ghId": gh_id,
|
|
|
+ "accountName": account_name,
|
|
|
+ "title": title,
|
|
|
+ "wx_sn": str(wx_sn),
|
|
|
+ "publish_date": str(publish_date)
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+ aiditApi.delete_articles(
|
|
|
+ gh_id=gh_id,
|
|
|
+ title=title
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
+ log(
|
|
|
+ task="monitor",
|
|
|
+ function="monitor",
|
|
|
+ message="请求文章详情失败",
|
|
|
+ data={
|
|
|
+ "ghId": gh_id,
|
|
|
+ "accountName": account_name,
|
|
|
+ "title": title,
|
|
|
+ "wx_sn": str(wx_sn),
|
|
|
+ "error": str(e),
|
|
|
+ "msg": error_msg
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
def main():
|
|
|
"""
|
|
|
main
|
|
|
:return:
|
|
|
"""
|
|
|
- update_job()
|
|
|
- check_job()
|
|
|
- get_article_detail_job()
|
|
|
+ parser = ArgumentParser()
|
|
|
+ parser.add_argument(
|
|
|
+ "--run_task",
|
|
|
+ help="update: update_job, check: check_job, detail: get_article_detail_job, monitor: monitor")
|
|
|
+ parser.add_argument(
|
|
|
+ "--run_date",
|
|
|
+ help="--run_date %Y-%m-%d",
|
|
|
+ )
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+ if args.run_task:
|
|
|
+ run_task = args.run_task
|
|
|
+ match run_task:
|
|
|
+ case "update":
|
|
|
+ update_job()
|
|
|
+ case "check":
|
|
|
+ check_job()
|
|
|
+ case "detail":
|
|
|
+ get_article_detail_job()
|
|
|
+ case "monitor":
|
|
|
+ if args.run_date:
|
|
|
+ run_date = args.run_date
|
|
|
+ else:
|
|
|
+ run_date = None
|
|
|
+ monitor(run_date)
|
|
|
+ case _:
|
|
|
+ print("No such task, input update: update_job, check: check_job, detail: get_article_detail_job")
|
|
|
+ else:
|
|
|
+ update_job()
|
|
|
+ check_job()
|
|
|
+ get_article_detail_job()
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|