|
@@ -10,13 +10,32 @@ import traceback
|
|
|
import urllib.parse
|
|
|
from tqdm import tqdm
|
|
|
from datetime import datetime
|
|
|
+from argparse import ArgumentParser
|
|
|
|
|
|
-
|
|
|
-from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
|
|
|
+from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi, longArticlesMySQL, \
|
|
|
+ create_feishu_columns_sheet
|
|
|
from applications.const import updatePublishedMsgTaskConst
|
|
|
|
|
|
ARTICLE_TABLE = "official_articles_v2"
|
|
|
const = updatePublishedMsgTaskConst()
|
|
|
+spider = WeixinSpider()
|
|
|
+functions = Functions()
|
|
|
+
|
|
|
+
|
|
|
+def generate_bot_columns():
|
|
|
+ """
|
|
|
+ 生成列
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ columns = [
|
|
|
+ create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="name", display_name="公众号名称"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="ghId", display_name="ghId"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="number", sheet_name="follower_count", display_name="粉丝数"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="date", sheet_name="account_init_timestamp",
|
|
|
+ display_name="账号接入系统时间"),
|
|
|
+ create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="using_status", display_name="利用状态")
|
|
|
+ ]
|
|
|
+ return columns
|
|
|
|
|
|
|
|
|
def get_account_using_status():
|
|
@@ -45,8 +64,11 @@ def get_accounts():
|
|
|
"""
|
|
|
using_account_set = get_account_using_status()
|
|
|
account_list_with_out_using_status = aiditApi.get_publish_account_from_aigc()
|
|
|
+ # only_auto_reply_accounts_set = aiditApi.get_only_auto_reply_accounts()
|
|
|
account_list = []
|
|
|
for item in account_list_with_out_using_status:
|
|
|
+ # if item['account_id'] in only_auto_reply_accounts_set:
|
|
|
+ # continue
|
|
|
if item['ghId'] in using_account_set:
|
|
|
item['using_status'] = 1
|
|
|
else:
|
|
@@ -418,12 +440,20 @@ def check_job():
|
|
|
for sub_item in tqdm(account_list):
|
|
|
res = check_single_account(db_client, sub_item)
|
|
|
if not res:
|
|
|
+ # 去掉三个不需要查看的字段
|
|
|
+ sub_item.pop('account_type', None)
|
|
|
+ sub_item.pop('account_auth', None)
|
|
|
+ sub_item.pop('account_id', None)
|
|
|
fail_list.append(sub_item)
|
|
|
if fail_list:
|
|
|
try:
|
|
|
bot(
|
|
|
title="日常报警, 存在账号更新失败",
|
|
|
- detail=fail_list
|
|
|
+ detail={
|
|
|
+ "columns": generate_bot_columns(),
|
|
|
+ "rows": fail_list
|
|
|
+ },
|
|
|
+ table=True
|
|
|
)
|
|
|
except Exception as e:
|
|
|
print("Timeout Error: {}".format(e))
|
|
@@ -467,6 +497,9 @@ def update_publish_timestamp(db_client, row):
|
|
|
if response_code == const.ARTICLE_DELETE_CODE:
|
|
|
publish_timestamp_s = const.DELETE_STATUS
|
|
|
root_source_id_list = []
|
|
|
+ elif response_code == const.ARTICLE_ILLEGAL_CODE:
|
|
|
+ publish_timestamp_s = const.ILLEGAL_STATUS
|
|
|
+ root_source_id_list = []
|
|
|
elif response_code == const.ARTICLE_SUCCESS_CODE:
|
|
|
data = response['data']['data']
|
|
|
publish_timestamp_ms = data['publish_timestamp']
|
|
@@ -486,7 +519,7 @@ def update_publish_timestamp(db_client, row):
|
|
|
root_source_id_list = []
|
|
|
except Exception as e:
|
|
|
publish_timestamp_s = const.REQUEST_FAIL_STATUS
|
|
|
- root_source_id_list = []
|
|
|
+ root_source_id_list = None
|
|
|
error_msg = traceback.format_exc()
|
|
|
print(e, error_msg)
|
|
|
|
|
@@ -582,14 +615,138 @@ def get_article_detail_job():
|
|
|
)
|
|
|
|
|
|
|
|
|
+def whether_title_unsafe(db_client, title):
|
|
|
+ """
|
|
|
+ 检查文章标题是否已经存在违规记录
|
|
|
+ :param db_client:
|
|
|
+ :param title:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ title_md5 = functions.str_to_md5(title)
|
|
|
+ sql = f"""
|
|
|
+ SELECT title_md5
|
|
|
+ FROM article_unsafe_title
|
|
|
+ WHERE title_md5 = '{title_md5}';
|
|
|
+ """
|
|
|
+ res = db_client.select(sql)
|
|
|
+ if res:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+
|
|
|
+
|
|
|
+def monitor(run_date):
|
|
|
+ """
|
|
|
+ 监控任务, 监测周期为7天,监测文章是否被违规,若监测到违规文章,则进行告警
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ pq_client = PQMySQL()
|
|
|
+ lam_client = longArticlesMySQL()
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
+ bot(
|
|
|
+ title="监控任务连接数据库失败",
|
|
|
+ detail={
|
|
|
+ "job": "monitor",
|
|
|
+ "error": str(e),
|
|
|
+ "msg": error_msg
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return
|
|
|
+
|
|
|
+ if not run_date:
|
|
|
+ run_date = datetime.today().strftime("%Y-%m-%d")
|
|
|
+
|
|
|
+ monitor_start_timestamp = int(datetime.strptime(run_date, "%Y-%m-%d").timestamp()) - const.MONITOR_PERIOD
|
|
|
+ select_sql = f"""
|
|
|
+ SELECT ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) AS publish_timestamp
|
|
|
+ FROM {ARTICLE_TABLE}
|
|
|
+ WHERE publish_timestamp >= {monitor_start_timestamp};
|
|
|
+ """
|
|
|
+ article_list = pq_client.select(select_sql)
|
|
|
+ for article in tqdm(article_list, desc="monitor article list"):
|
|
|
+ gh_id = article[0]
|
|
|
+ account_name = article[1]
|
|
|
+ title = article[2]
|
|
|
+ # 判断标题是否存在违规记录
|
|
|
+ if whether_title_unsafe(lam_client, title):
|
|
|
+ continue
|
|
|
+ url = article[3]
|
|
|
+ wx_sn = article[4]
|
|
|
+ publish_date = article[5]
|
|
|
+ try:
|
|
|
+ response = spider.get_article_text(url, is_cache=False)
|
|
|
+ response_code = response['code']
|
|
|
+ if response_code == const.ARTICLE_ILLEGAL_CODE:
|
|
|
+ bot(
|
|
|
+ title="文章违规告警",
|
|
|
+ detail={
|
|
|
+ "ghId": gh_id,
|
|
|
+ "accountName": account_name,
|
|
|
+ "title": title,
|
|
|
+ "wx_sn": str(wx_sn),
|
|
|
+ "publish_date": str(publish_date)
|
|
|
+ },
|
|
|
+ mention=False
|
|
|
+ )
|
|
|
+ aiditApi.delete_articles(
|
|
|
+ gh_id=gh_id,
|
|
|
+ title=title
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ error_msg = traceback.format_exc()
|
|
|
+ log(
|
|
|
+ task="monitor",
|
|
|
+ function="monitor",
|
|
|
+ message="请求文章详情失败",
|
|
|
+ data={
|
|
|
+ "ghId": gh_id,
|
|
|
+ "accountName": account_name,
|
|
|
+ "title": title,
|
|
|
+ "wx_sn": str(wx_sn),
|
|
|
+ "error": str(e),
|
|
|
+ "msg": error_msg
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
def main():
|
|
|
"""
|
|
|
main
|
|
|
:return:
|
|
|
"""
|
|
|
- update_job()
|
|
|
- check_job()
|
|
|
- get_article_detail_job()
|
|
|
+ parser = ArgumentParser()
|
|
|
+ parser.add_argument(
|
|
|
+ "--run_task",
|
|
|
+ help="update: update_job, check: check_job, detail: get_article_detail_job, monitor: monitor")
|
|
|
+ parser.add_argument(
|
|
|
+ "--run_date",
|
|
|
+ help="--run_date %Y-%m-%d",
|
|
|
+ )
|
|
|
+ args = parser.parse_args()
|
|
|
+
|
|
|
+ if args.run_task:
|
|
|
+ run_task = args.run_task
|
|
|
+ match run_task:
|
|
|
+ case "update":
|
|
|
+ update_job()
|
|
|
+ case "check":
|
|
|
+ check_job()
|
|
|
+ case "detail":
|
|
|
+ get_article_detail_job()
|
|
|
+ case "monitor":
|
|
|
+ if args.run_date:
|
|
|
+ run_date = args.run_date
|
|
|
+ else:
|
|
|
+ run_date = None
|
|
|
+ monitor(run_date)
|
|
|
+ case _:
|
|
|
+ print("No such task, input update: update_job, check: check_job, detail: get_article_detail_job")
|
|
|
+ else:
|
|
|
+ update_job()
|
|
|
+ check_job()
|
|
|
+ get_article_detail_job()
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|