Browse Source

Merge branch 'refs/heads/master' into 2024-11-29-luojunhui-developing-video-crawler

# Conflicts:
#	applications/wxSpiderApi.py
luojunhui 10 months ago
parent
commit
20d8dc4f25

+ 1 - 1
applications/__init__.py

@@ -10,4 +10,4 @@ from .wxSpiderApi import WeixinSpider
 from .algApi import AlgApi
 from .pqFunctionApi import PQAPI
 from .aliyunLogApi import log
-from .feishuBotApi import bot
+from .feishuBotApi import bot, create_feishu_columns_sheet

+ 36 - 4
applications/aiditApi.py

@@ -99,7 +99,8 @@ def get_publish_account_from_aigc():
             t3.follower_count,
             t3.create_timestamp,
             t4.service_type_info,
-            t4.verify_type_info
+            t4.verify_type_info,
+            t3.id
         FROM
             publish_plan t1
             JOIN publish_plan_account t2 ON t1.id = t2.plan_id
@@ -117,9 +118,10 @@ def get_publish_account_from_aigc():
             "name": line[0],
             "ghId": line[1],
             "follower_count": line[2],
-            "account_init_timestamp": int(line[3] / 1000),
+            "account_init_timestamp": int(line[3]),
             "account_type": line[4],
-            "account_auth": line[5]
+            "account_auth": line[5],
+            "account_id": line[6]
         } for line in info_tuple
     ]
     return info_list
@@ -325,7 +327,7 @@ def bind_crawler_task_to_publish_task(target_publish_task_id, crawler_task_name,
                 "publishTimeInterval": publish_task_detail_data.get("publishTimeInterval"),
                 "publishWindowEnd": publish_task_detail_data.get("publishWindowEnd"),
                 "publishWindowStart": publish_task_detail_data.get("publishWindowStart"),
-                "wxContentInsert":  publish_task_detail_data.get("wxContentInsert"),
+                "wxContentInsert": publish_task_detail_data.get("wxContentInsert"),
                 "wxVideoPublishAccountSetting": publish_task_detail_data.get("wxVideoPublishAccountSetting"),
                 "scoreJudgeFlag": publish_task_detail_data.get("scoreJudgeFlag"),
                 "scoreJudgeTasks": publish_task_detail_data.get("scoreJudgeTasks"),
@@ -363,3 +365,33 @@ def bind_crawler_task_to_publish_task(target_publish_task_id, crawler_task_name,
     else:
         return
 
+
+def delete_articles(gh_id, title):
+    """
+    删除公众号文章
+    :param gh_id: 
+    :param title: 
+    :return: 
+    """
+    url = "http://101.37.174.139:80/articleAudit/titleDangerFindDelete"
+
+    payload = {
+        "ghId": gh_id,
+        'title': title
+    }
+    headers = {
+        'Content-Type': 'application/json;charset=UTF-8'
+    }
+    response = requests.request("POST", url, headers=headers, json=payload, timeout=600)
+    return response
+
+
+def get_only_auto_reply_accounts():
+    """
+    获取即转的账号
+    """
+    sql = "select publish_account_id from publish_account_remark where remark like '%即转%';"
+    denet = DeNetMysql()
+    result = denet.select(sql)
+    account_id_list = [i[0] for i in result]
+    return set(account_id_list)

+ 6 - 0
applications/const.py

@@ -18,8 +18,10 @@ class updatePublishedMsgTaskConst:
     更新已发布文章消息常量配置
     """
     # 爬虫详情接口返回code
+    ARTICLE_ILLEGAL_CODE = 25012
     ARTICLE_DELETE_CODE = 25005
     ARTICLE_SUCCESS_CODE = 0
+    ARTICLE_UNKNOWN_CODE = 10000
 
     # 请求爬虫详情接口状态码
     # 记录默认状态
@@ -30,12 +32,16 @@ class updatePublishedMsgTaskConst:
     DELETE_STATUS = -2
     # 未知原因无信息返回状态
     UNKNOWN_STATUS = -3
+    # 文章违规状态
+    ILLEGAL_STATUS = -4
 
     # 公众号类型(订阅号 or 服务号)
     # 订阅号
     SUBSCRIBE_TYPE_SET = {0, 1}
     # 服务号
     SERVICE_TYPE = 2
+    # 监测周期(秒)
+    MONITOR_PERIOD = 60 * 60 * 24 * 7
 
 
 class updateAccountReadRateTaskConst:

+ 120 - 14
applications/feishuBotApi.py

@@ -6,26 +6,128 @@ import requests
 
 from applications.decoratorApi import retryOnTimeout
 
+mention_all = {
+    "content": "<at id=all></at>\n",
+    "tag": "lark_md",
+}
 
-@retryOnTimeout()
-def bot(title, detail, mention=True):
+
+def create_feishu_columns_sheet(sheet_type, sheet_name, display_name, width="auto",
+                                vertical_align="top", horizontal_align="left", number_format=None):
     """
-    机器人
+    create feishu sheet
+    :param number_format:
+    :param horizontal_align:
+    :param vertical_align:
+    :param width:
+    :param display_name:
+    :param sheet_type:
+    :param sheet_name:
     """
-    title_obj = {
-        "content": "{}<at id=all></at>\n".format(title) if mention else "{}\n".format(title),
-        "tag": "lark_md",
+    match sheet_type:
+        case "plain_text":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "width": width,
+                "data_type": "text",
+                "vertical_align": vertical_align,
+                "horizontal_align": horizontal_align
+            }
+
+        case "lark_md":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "lark_md"
+            }
+
+        case "number":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "number",
+                "format": number_format,
+                "width": width
+            }
+
+        case "date":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "date",
+                "date_format": "YYYY/MM/DD"
+            }
+        case "options":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "options"
+            }
+
+
+def create_feishu_table(title, columns, rows, mention):
+    """
+    create feishu table
+    :param mention:
+    :param title:
+    :param columns:
+    :param rows:
+    """
+    table_base = {
+        "header": {
+            "template": "blue",
+            "title": {
+                "content": title,
+                "tag": "plain_text"
+            }
+        },
+        "elements": [
+            {
+                "tag": "div",
+                "text": mention_all,
+            } if mention else {},
+            {
+                "tag": "table",
+                "page_size": len(rows) + 1,
+                "row_height": "low",
+                "header_style": {
+                    "text_align": "left",
+                    "text_size": "normal",
+                    "background_style": "grey",
+                    "text_color": "default",
+                    "bold": True,
+                    "lines": 1
+                },
+                "columns": columns,
+                "rows": rows
+            }
+        ]
     }
-    head_title = "【重点关注】" if mention else "【普通通知】"
+    return table_base
+
+
+@retryOnTimeout()
+def bot(title, detail, mention=True, table=False):
+    """
+    报警机器人
+    """
     url = "https://open.feishu.cn/open-apis/bot/v2/hook/b44333f2-16c0-4cb1-af01-d135f8704410"
+    # url = "https://open.feishu.cn/open-apis/bot/v2/hook/f32c0456-847f-41f3-97db-33fcc1616bcd"
     headers = {"Content-Type": "application/json"}
-    payload = {
-        "msg_type": "interactive",
-        "card": {
+    if table:
+        card = create_feishu_table(
+            title=title,
+            columns=detail["columns"],
+            rows=detail["rows"],
+            mention=mention
+        )
+    else:
+        card = {
             "elements": [
                 {
                     "tag": "div",
-                    "text": title_obj,
+                    "text": mention_all if mention else {},
                 },
                 {
                     "tag": "div",
@@ -37,7 +139,11 @@ def bot(title, detail, mention=True):
                     },
                 },
             ],
-            "header": {"title": {"content": head_title, "tag": "plain_text"}},
-        },
+            "header": {"title": {"content": title, "tag": "plain_text"}},
+        }
+    payload = {
+        "msg_type": "interactive",
+        "card": card
     }
-    requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
+    res = requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
+    return res

+ 35 - 13
cal_account_read_rate_avg_daily.py

@@ -2,15 +2,19 @@
 @author: luojunhui
 cal each account && position reading rate
 """
+import json
 from tqdm import tqdm
 from pandas import DataFrame
 from argparse import ArgumentParser
 from datetime import datetime
 
-from applications import DeNetMysql, PQMySQL, longArticlesMySQL, bot, Functions
+from applications import DeNetMysql, PQMySQL, longArticlesMySQL, bot, Functions, create_feishu_columns_sheet
 from applications.const import updateAccountReadRateTaskConst
+from config import apolloConfig
 
 const = updateAccountReadRateTaskConst()
+config = apolloConfig()
+unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 functions = Functions()
 read_rate_table = "long_articles_read_rate"
 
@@ -88,7 +92,7 @@ def get_publishing_accounts(db_client) -> list[dict]:
     WHERE
         t1.plan_status = 1
         AND t3.channel = 5
-        AND t3.follower_count > 0
+        -- AND t3.follower_count > 0
         GROUP BY t3.id;
     """
     account_list = db_client.select(sql)
@@ -145,6 +149,8 @@ def cal_account_read_rate(gh_id_tuple) -> DataFrame:
         gh_id = line['ghId']
         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
         fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
+        if not fans:
+            fans = int(unauthorized_account.get(gh_id, 0))
         line['fans'] = fans
         if fans > 1000:
             line['readRate'] = line['show_view_count'] / fans if fans else 0
@@ -207,11 +213,16 @@ def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
             return {}
         else:
             response = {
-                "账号名称": account_name,
-                "位置": index,
-                "当天阅读率均值": Functions().float_to_percentage(avg_rate),
-                "前一天阅读率均值": Functions().float_to_percentage(previous_read_rate_avg),
-                "相对变化率": Functions().float_to_percentage(relative_value)
+                "account_name": account_name,
+                "position": index,
+                "read_rate_avg_yesterday": Functions().float_to_percentage(avg_rate),
+                "read_rate_avg_the_day_before_yesterday": Functions().float_to_percentage(previous_read_rate_avg),
+                "relative_change_rate": [
+                    {
+                        "text": Functions().float_to_percentage(relative_value),
+                        "color": "red" if relative_value < 0 else "green"
+                    }
+                ]
             }
             return response
 
@@ -235,7 +246,7 @@ def update_single_day(dt, account_list, article_df, lam):
         string_format='%Y-%m-%d'
     )
 
-    for account in tqdm(account_list):
+    for account in tqdm(account_list, desc=dt):
         for index in const.ARTICLE_INDEX_LIST:
             read_rate_detail = cal_avg_account_read_rate(
                 df=article_df,
@@ -258,7 +269,6 @@ def update_single_day(dt, account_list, article_df, lam):
                     )
                     if error_obj:
                         error_list.append(error_obj)
-                # continue
                 try:
                     if not read_rate_avg:
                         continue
@@ -284,7 +294,7 @@ def update_single_day(dt, account_list, article_df, lam):
                         )
                     )
                 except Exception as e:
-                    insert_error_list.append(e)
+                    insert_error_list.append(str(e))
 
     if insert_error_list:
         bot(
@@ -293,12 +303,24 @@ def update_single_day(dt, account_list, article_df, lam):
         )
 
     if error_list:
+        columns = [
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="account_name", display_name="账号名称"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="position", display_name="文章位置"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_yesterday",
+                                        display_name="昨日阅读率均值"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_the_day_before_yesterday",
+                                        display_name="前天阅读率均值"),
+            create_feishu_columns_sheet(sheet_type="options", sheet_name="relative_change_rate",
+                                        display_name="相对变化率")
+        ]
         bot(
             title="更新阅读率均值,头次出现异常值通知",
             detail={
-                "时间": dt,
-                "异常列表": error_list
-            }
+                "columns": columns,
+                "rows": error_list
+            },
+            table=True,
+            mention=False
         )
 
     if not error_list and not insert_error_list:

+ 56 - 0
kimi_balance_monitor.py

@@ -0,0 +1,56 @@
+"""
+@author: luojunhui
+"""
+import requests
+import traceback
+
+from applications import bot
+from applications.decoratorApi import retryOnTimeout
+
+BALANCE_LIMIT_THRESHOLD = 200.0
+
+
+@retryOnTimeout(retries=5, delay=5)
+def check_kimi_balance():
+    """
+    校验kimi余额
+    :return:
+    """
+    url = "https://api.moonshot.cn/v1/users/me/balance"
+
+    payload = {}
+    headers = {
+        'Authorization': 'Bearer sk-5DqYCa88kche6nwIWjLE1p4oMm8nXrR9kQMKbBolNAWERu7q'
+    }
+    response = requests.request("GET", url, headers=headers, data=payload, timeout=10)
+    if response.status_code == 200:
+        response_json = response.json()
+        try:
+            balance = response_json['data']['available_balance']
+            if balance < BALANCE_LIMIT_THRESHOLD:
+                bot(
+                    title="kimi余额小于 {} 块".format(BALANCE_LIMIT_THRESHOLD),
+                    detail={
+                        "balance": balance
+                    }
+                )
+        except Exception as e:
+            error_stack = traceback.format_exc()
+            bot(
+                title="kimi余额接口处理失败,数据结构异常",
+                detail={
+                    "error": str(e),
+                    "error_msg": error_stack
+                }
+            )
+    else:
+        bot(
+            title="kimi余额接口调用失败",
+            detail={
+                "response": response.text
+            }
+        )
+
+
+if __name__ == '__main__':
+    check_kimi_balance()

+ 16 - 0
sh/published_articles_monitor.sh

@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# 获取当前日期,格式为 YYYY-MM-DD
+CURRENT_DATE=$(date +%F)
+
+# 日志文件路径,包含日期
+LOG_FILE="/root/luojunhui/logs/article_monitor_task_log_$CURRENT_DATE.txt"
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+
+cd /root/luojunhui/LongArticlesJob
+source /root/miniconda3/etc/profile.d/conda.sh
+conda activate tasks
+
+nohup python3 updatePublishedMsgDaily.py --run_task monitor >> "${LOG_FILE}" 2>&1 &

+ 16 - 0
sh/run_kimi_balance_monitor.sh

@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# 获取当前日期,格式为 YYYY-MM-DD
+CURRENT_DATE=$(date +%F)
+
+# 日志文件路径,包含日期
+LOG_FILE="/root/luojunhui/logs/kimi_monitor_task_log_$CURRENT_DATE.txt"
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+
+cd /root/luojunhui/LongArticlesJob
+source /root/miniconda3/etc/profile.d/conda.sh
+conda activate tasks
+
+nohup python3 kimi_balance_monitor.py >> "${LOG_FILE}" 2>&1 &

+ 9 - 5
sh/run_update_published_articles_daily.sh

@@ -13,14 +13,18 @@ then
     echo "$(date '+%Y-%m-%d %H:%M:%S') - updatePublishedMsgDaily.py is running"
 else
     echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart updatePublishedMsgDaily.py"
-    # 切换到指定目录
     cd /root/luojunhui/LongArticlesJob
-
-    # 激活 Conda 环境
     source /root/miniconda3/etc/profile.d/conda.sh
     conda activate tasks
 
-    # 在后台运行 Python 脚本并重定向日志输出
-    nohup python3 updatePublishedMsgDaily.py >> "${LOG_FILE}" 2>&1 &
+    current_time=$(date +%H:%M)
+    target_time="19:00"
+
+    if [[ "$current_time" < "$target_time" ]]; then
+        nohup python3 updatePublishedMsgDaily.py --run_task update >> "${LOG_FILE}" 2>&1 &
+        nohup python3 updatePublishedMsgDaily.py --run_task detail >> "${LOG_FILE}" 2>&1 &
+    else
+        nohup python3 updatePublishedMsgDaily.py >> "${LOG_FILE}" 2>&1 &
     echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted updatePublishedMsgDaily.py"
+    fi
 fi

+ 12 - 6
updateAccountV3.py

@@ -10,6 +10,10 @@ from argparse import ArgumentParser
 
 from applications import PQMySQL, DeNetMysql, longArticlesMySQL
 from applications.const import updateAccountReadAvgTaskConst
+from config import apolloConfig
+
+config = apolloConfig()
+unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 
 
 def get_account_fans_by_dt(db_client) -> dict:
@@ -125,13 +129,15 @@ class UpdateAccountInfoVersion3(object):
         fans_dict = get_account_fans_by_dt(db_client=self.de)
         account_list = self.get_publishing_accounts()
         rate_dict = self.get_account_position_read_rate(dt)
-        for account in tqdm(account_list):
-            business_type = self.const.TOULIU if account[
-                                                     'gh_id'] in self.const.TOULIU_ACCOUNTS else self.const.ARTICLES_DAILY
-            fans = fans_dict.get(account['gh_id'], {}).get(dt, 0)
+        for account in tqdm(account_list, desc=dt):
+            gh_id = account["gh_id"]
+            business_type = self.const.TOULIU if gh_id in self.const.TOULIU_ACCOUNTS else self.const.ARTICLES_DAILY
+            fans = fans_dict.get(gh_id, {}).get(dt, 0)
+            if not fans:
+                fans = int(unauthorized_account.get(gh_id, 0))
             if fans:
                 for index in range(1, 9):
-                    gh_id_position = "{}_{}".format(account['gh_id'], index)
+                    gh_id_position = "{}_{}".format(gh_id, index)
                     if rate_dict.get(gh_id_position):
                         rate = rate_dict[gh_id_position]
                         read_avg = fans * rate
@@ -146,7 +152,7 @@ class UpdateAccountInfoVersion3(object):
                             self.pq.update(
                                 sql=insert_sql,
                                 params=(
-                                    account['gh_id'],
+                                    gh_id,
                                     index,
                                     dt,
                                     account['account_name'],

+ 164 - 7
updatePublishedMsgDaily.py

@@ -10,13 +10,32 @@ import traceback
 import urllib.parse
 from tqdm import tqdm
 from datetime import datetime
+from argparse import ArgumentParser
 
-
-from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
+from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi, longArticlesMySQL, \
+    create_feishu_columns_sheet
 from applications.const import updatePublishedMsgTaskConst
 
 ARTICLE_TABLE = "official_articles_v2"
 const = updatePublishedMsgTaskConst()
+spider = WeixinSpider()
+functions = Functions()
+
+
+def generate_bot_columns():
+    """
+    生成列
+    :return:
+    """
+    columns = [
+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="name", display_name="公众号名称"),
+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="ghId", display_name="ghId"),
+        create_feishu_columns_sheet(sheet_type="number", sheet_name="follower_count", display_name="粉丝数"),
+        create_feishu_columns_sheet(sheet_type="date", sheet_name="account_init_timestamp",
+                                    display_name="账号接入系统时间"),
+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="using_status", display_name="利用状态")
+    ]
+    return columns
 
 
 def get_account_using_status():
@@ -45,8 +64,11 @@ def get_accounts():
     """
     using_account_set = get_account_using_status()
     account_list_with_out_using_status = aiditApi.get_publish_account_from_aigc()
+    # only_auto_reply_accounts_set = aiditApi.get_only_auto_reply_accounts()
     account_list = []
     for item in account_list_with_out_using_status:
+        # if item['account_id'] in only_auto_reply_accounts_set:
+        #     continue
         if item['ghId'] in using_account_set:
             item['using_status'] = 1
         else:
@@ -418,12 +440,20 @@ def check_job():
     for sub_item in tqdm(account_list):
         res = check_single_account(db_client, sub_item)
         if not res:
+            # 去掉三个不需要查看的字段
+            sub_item.pop('account_type', None)
+            sub_item.pop('account_auth', None)
+            sub_item.pop('account_id', None)
             fail_list.append(sub_item)
     if fail_list:
         try:
             bot(
                 title="日常报警, 存在账号更新失败",
-                detail=fail_list
+                detail={
+                    "columns": generate_bot_columns(),
+                    "rows": fail_list
+                },
+                table=True
             )
         except Exception as e:
             print("Timeout Error: {}".format(e))
@@ -467,6 +497,9 @@ def update_publish_timestamp(db_client, row):
         if response_code == const.ARTICLE_DELETE_CODE:
             publish_timestamp_s = const.DELETE_STATUS
             root_source_id_list = []
+        elif response_code == const.ARTICLE_ILLEGAL_CODE:
+            publish_timestamp_s = const.ILLEGAL_STATUS
+            root_source_id_list = []
         elif response_code == const.ARTICLE_SUCCESS_CODE:
             data = response['data']['data']
             publish_timestamp_ms = data['publish_timestamp']
@@ -486,7 +519,7 @@ def update_publish_timestamp(db_client, row):
             root_source_id_list = []
     except Exception as e:
         publish_timestamp_s = const.REQUEST_FAIL_STATUS
-        root_source_id_list = []
+        root_source_id_list = None
         error_msg = traceback.format_exc()
         print(e, error_msg)
 
@@ -582,14 +615,138 @@ def get_article_detail_job():
         )
 
 
+def whether_title_unsafe(db_client, title):
+    """
+    检查文章标题是否已经存在违规记录
+    :param db_client:
+    :param title:
+    :return:
+    """
+    title_md5 = functions.str_to_md5(title)
+    sql = f"""
+        SELECT title_md5
+        FROM article_unsafe_title
+        WHERE title_md5 = '{title_md5}';
+    """
+    res = db_client.select(sql)
+    if res:
+        return True
+    else:
+        return False
+
+
+def monitor(run_date):
+    """
+    监控任务, 监测周期为7天,监测文章是否被违规,若监测到违规文章,则进行告警
+    :return:
+    """
+    try:
+        pq_client = PQMySQL()
+        lam_client = longArticlesMySQL()
+    except Exception as e:
+        error_msg = traceback.format_exc()
+        bot(
+            title="监控任务连接数据库失败",
+            detail={
+                "job": "monitor",
+                "error": str(e),
+                "msg": error_msg
+            }
+        )
+        return
+
+    if not run_date:
+        run_date = datetime.today().strftime("%Y-%m-%d")
+
+    monitor_start_timestamp = int(datetime.strptime(run_date, "%Y-%m-%d").timestamp()) - const.MONITOR_PERIOD
+    select_sql = f"""
+        SELECT ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) AS publish_timestamp
+        FROM {ARTICLE_TABLE}
+        WHERE publish_timestamp >= {monitor_start_timestamp};
+    """
+    article_list = pq_client.select(select_sql)
+    for article in tqdm(article_list, desc="monitor article list"):
+        gh_id = article[0]
+        account_name = article[1]
+        title = article[2]
+        # 判断标题是否存在违规记录
+        if whether_title_unsafe(lam_client, title):
+            continue
+        url = article[3]
+        wx_sn = article[4]
+        publish_date = article[5]
+        try:
+            response = spider.get_article_text(url, is_cache=False)
+            response_code = response['code']
+            if response_code == const.ARTICLE_ILLEGAL_CODE:
+                bot(
+                    title="文章违规告警",
+                    detail={
+                        "ghId": gh_id,
+                        "accountName": account_name,
+                        "title": title,
+                        "wx_sn": str(wx_sn),
+                        "publish_date": str(publish_date)
+                    },
+                    mention=False
+                )
+                aiditApi.delete_articles(
+                    gh_id=gh_id,
+                    title=title
+                )
+        except Exception as e:
+            error_msg = traceback.format_exc()
+            log(
+                task="monitor",
+                function="monitor",
+                message="请求文章详情失败",
+                data={
+                    "ghId": gh_id,
+                    "accountName": account_name,
+                    "title": title,
+                    "wx_sn": str(wx_sn),
+                    "error": str(e),
+                    "msg": error_msg
+                }
+            )
+
+
 def main():
     """
     main
     :return:
     """
-    update_job()
-    check_job()
-    get_article_detail_job()
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--run_task",
+        help="update: update_job, check: check_job, detail: get_article_detail_job, monitor: monitor")
+    parser.add_argument(
+        "--run_date",
+        help="--run_date %Y-%m-%d",
+    )
+    args = parser.parse_args()
+
+    if args.run_task:
+        run_task = args.run_task
+        match run_task:
+            case "update":
+                update_job()
+            case "check":
+                check_job()
+            case "detail":
+                get_article_detail_job()
+            case "monitor":
+                if args.run_date:
+                    run_date = args.run_date
+                else:
+                    run_date = None
+                monitor(run_date)
+            case _:
+                print("No such task, input update: update_job, check: check_job, detail: get_article_detail_job")
+    else:
+        update_job()
+        check_job()
+        get_article_detail_job()
 
 
 if __name__ == '__main__':