|
@@ -12,12 +12,13 @@ from tqdm import tqdm
|
|
|
from datetime import datetime
|
|
|
from argparse import ArgumentParser
|
|
|
|
|
|
-from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
|
+from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi, longArticlesMySQL
|
|
|
from applications.const import updatePublishedMsgTaskConst
|
|
|
|
|
|
ARTICLE_TABLE = "official_articles_v2"
|
|
|
const = updatePublishedMsgTaskConst()
|
|
|
spider = WeixinSpider()
|
|
|
+functions = Functions()
|
|
|
|
|
|
|
|
|
def get_account_using_status():
|
|
@@ -590,13 +591,34 @@ def get_article_detail_job():
|
|
|
)
|
|
|
|
|
|
|
|
|
-def monitor():
|
|
|
+def whether_title_unsafe(db_client, title):
|
|
|
+ """
|
|
|
+ 检查文章标题是否已经存在违规记录
|
|
|
+ :param db_client:
|
|
|
+ :param title:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ title_md5 = functions.str_to_md5(title)
|
|
|
+ sql = f"""
|
|
|
+ SELECT title_md5
|
|
|
+ FROM article_unsafe_title
|
|
|
+ WHERE title_md5 = '{title_md5}';
|
|
|
+ """
|
|
|
+ res = db_client.select(sql)
|
|
|
+ if res:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+def monitor(run_date):
|
|
|
"""
|
|
|
监控任务, 监测周期为7天,监测文章是否被违规,若监测到违规文章,则进行告警
|
|
|
:return:
|
|
|
"""
|
|
|
try:
|
|
|
- db_client = PQMySQL()
|
|
|
+ pq_client = PQMySQL()
|
|
|
+ lam_client = longArticlesMySQL()
|
|
|
except Exception as e:
|
|
|
error_msg = traceback.format_exc()
|
|
|
bot(
|
|
@@ -609,18 +631,23 @@ def monitor():
|
|
|
)
|
|
|
return
|
|
|
|
|
|
- now_time = int(time.time())
|
|
|
- monitor_start_timestamp = now_time - const.MONITOR_PERIOD
|
|
|
+ if not run_date:
|
|
|
+ run_date = datetime.today().strftime("%Y-%m-%d")
|
|
|
+
|
|
|
+ monitor_start_timestamp = int(datetime.strptime(run_date, "%Y-%m-%d").timestamp()) - const.MONITOR_PERIOD
|
|
|
select_sql = f"""
|
|
|
SELECT ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) AS publish_timestamp
|
|
|
FROM {ARTICLE_TABLE}
|
|
|
WHERE publish_timestamp >= {monitor_start_timestamp};
|
|
|
"""
|
|
|
- article_list = db_client.select(select_sql)
|
|
|
+ article_list = pq_client.select(select_sql)
|
|
|
for article in tqdm(article_list, desc="monitor article list"):
|
|
|
gh_id = article[0]
|
|
|
account_name = article[1]
|
|
|
title = article[2]
|
|
|
+ # 判断标题是否存在违规记录
|
|
|
+ if whether_title_unsafe(lam_client, title):
|
|
|
+ continue
|
|
|
url = article[3]
|
|
|
wx_sn = article[4]
|
|
|
publish_date = article[5]
|
|
@@ -665,15 +692,14 @@ def main():
|
|
|
main
|
|
|
:return:
|
|
|
"""
|
|
|
- update_job()
|
|
|
- check_job()
|
|
|
- get_article_detail_job()
|
|
|
-
|
|
|
-
|
|
|
-if __name__ == '__main__':
|
|
|
parser = ArgumentParser()
|
|
|
- parser.add_argument("--run_task",
|
|
|
- help="update: update_job, check: check_job, detail: get_article_detail_job, monitor: monitor")
|
|
|
+ parser.add_argument(
|
|
|
+ name="--run_task",
|
|
|
+ help="update: update_job, check: check_job, detail: get_article_detail_job, monitor: monitor")
|
|
|
+ parser.add_argument(
|
|
|
+ name="--run_date",
|
|
|
+ help="--run_date %Y-%m-%d",
|
|
|
+ )
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
if args.run_task:
|
|
@@ -686,8 +712,18 @@ if __name__ == '__main__':
|
|
|
case "detail":
|
|
|
get_article_detail_job()
|
|
|
case "monitor":
|
|
|
- monitor()
|
|
|
+ if args.run_date:
|
|
|
+ run_date = args.run_date
|
|
|
+ else:
|
|
|
+ run_date = None
|
|
|
+ monitor(run_date)
|
|
|
case _:
|
|
|
print("No such task, input update: update_job, check: check_job, detail: get_article_detail_job")
|
|
|
else:
|
|
|
- main()
|
|
|
+ update_job()
|
|
|
+ check_job()
|
|
|
+ get_article_detail_job()
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|