瀏覽代碼

Merge branch 'refs/heads/master' into 2024-11-29-luojunhui-developing-video-crawler

# Conflicts:
#	applications/wxSpiderApi.py
luojunhui 4 月之前
父節點
當前提交
20d8dc4f25

+ 1 - 1
applications/__init__.py

@@ -10,4 +10,4 @@ from .wxSpiderApi import WeixinSpider
 from .algApi import AlgApi
 from .algApi import AlgApi
 from .pqFunctionApi import PQAPI
 from .pqFunctionApi import PQAPI
 from .aliyunLogApi import log
 from .aliyunLogApi import log
-from .feishuBotApi import bot
+from .feishuBotApi import bot, create_feishu_columns_sheet

+ 36 - 4
applications/aiditApi.py

@@ -99,7 +99,8 @@ def get_publish_account_from_aigc():
             t3.follower_count,
             t3.follower_count,
             t3.create_timestamp,
             t3.create_timestamp,
             t4.service_type_info,
             t4.service_type_info,
-            t4.verify_type_info
+            t4.verify_type_info,
+            t3.id
         FROM
         FROM
             publish_plan t1
             publish_plan t1
             JOIN publish_plan_account t2 ON t1.id = t2.plan_id
             JOIN publish_plan_account t2 ON t1.id = t2.plan_id
@@ -117,9 +118,10 @@ def get_publish_account_from_aigc():
             "name": line[0],
             "name": line[0],
             "ghId": line[1],
             "ghId": line[1],
             "follower_count": line[2],
             "follower_count": line[2],
-            "account_init_timestamp": int(line[3] / 1000),
+            "account_init_timestamp": int(line[3]),
             "account_type": line[4],
             "account_type": line[4],
-            "account_auth": line[5]
+            "account_auth": line[5],
+            "account_id": line[6]
         } for line in info_tuple
         } for line in info_tuple
     ]
     ]
     return info_list
     return info_list
@@ -325,7 +327,7 @@ def bind_crawler_task_to_publish_task(target_publish_task_id, crawler_task_name,
                 "publishTimeInterval": publish_task_detail_data.get("publishTimeInterval"),
                 "publishTimeInterval": publish_task_detail_data.get("publishTimeInterval"),
                 "publishWindowEnd": publish_task_detail_data.get("publishWindowEnd"),
                 "publishWindowEnd": publish_task_detail_data.get("publishWindowEnd"),
                 "publishWindowStart": publish_task_detail_data.get("publishWindowStart"),
                 "publishWindowStart": publish_task_detail_data.get("publishWindowStart"),
-                "wxContentInsert":  publish_task_detail_data.get("wxContentInsert"),
+                "wxContentInsert": publish_task_detail_data.get("wxContentInsert"),
                 "wxVideoPublishAccountSetting": publish_task_detail_data.get("wxVideoPublishAccountSetting"),
                 "wxVideoPublishAccountSetting": publish_task_detail_data.get("wxVideoPublishAccountSetting"),
                 "scoreJudgeFlag": publish_task_detail_data.get("scoreJudgeFlag"),
                 "scoreJudgeFlag": publish_task_detail_data.get("scoreJudgeFlag"),
                 "scoreJudgeTasks": publish_task_detail_data.get("scoreJudgeTasks"),
                 "scoreJudgeTasks": publish_task_detail_data.get("scoreJudgeTasks"),
@@ -363,3 +365,33 @@ def bind_crawler_task_to_publish_task(target_publish_task_id, crawler_task_name,
     else:
     else:
         return
         return
 
 
+
+def delete_articles(gh_id, title):
+    """
+    删除公众号文章
+    :param gh_id: 
+    :param title: 
+    :return: 
+    """
+    url = "http://101.37.174.139:80/articleAudit/titleDangerFindDelete"
+
+    payload = {
+        "ghId": gh_id,
+        'title': title
+    }
+    headers = {
+        'Content-Type': 'application/json;charset=UTF-8'
+    }
+    response = requests.request("POST", url, headers=headers, json=payload, timeout=600)
+    return response
+
+
+def get_only_auto_reply_accounts():
+    """
+    获取即转的账号
+    """
+    sql = "select publish_account_id from publish_account_remark where remark like '%即转%';"
+    denet = DeNetMysql()
+    result = denet.select(sql)
+    account_id_list = [i[0] for i in result]
+    return set(account_id_list)

+ 6 - 0
applications/const.py

@@ -18,8 +18,10 @@ class updatePublishedMsgTaskConst:
     更新已发布文章消息常量配置
     更新已发布文章消息常量配置
     """
     """
     # 爬虫详情接口返回code
     # 爬虫详情接口返回code
+    ARTICLE_ILLEGAL_CODE = 25012
     ARTICLE_DELETE_CODE = 25005
     ARTICLE_DELETE_CODE = 25005
     ARTICLE_SUCCESS_CODE = 0
     ARTICLE_SUCCESS_CODE = 0
+    ARTICLE_UNKNOWN_CODE = 10000
 
 
     # 请求爬虫详情接口状态码
     # 请求爬虫详情接口状态码
     # 记录默认状态
     # 记录默认状态
@@ -30,12 +32,16 @@ class updatePublishedMsgTaskConst:
     DELETE_STATUS = -2
     DELETE_STATUS = -2
     # 未知原因无信息返回状态
     # 未知原因无信息返回状态
     UNKNOWN_STATUS = -3
     UNKNOWN_STATUS = -3
+    # 文章违规状态
+    ILLEGAL_STATUS = -4
 
 
     # 公众号类型(订阅号 or 服务号)
     # 公众号类型(订阅号 or 服务号)
     # 订阅号
     # 订阅号
     SUBSCRIBE_TYPE_SET = {0, 1}
     SUBSCRIBE_TYPE_SET = {0, 1}
     # 服务号
     # 服务号
     SERVICE_TYPE = 2
     SERVICE_TYPE = 2
+    # 监测周期(秒)
+    MONITOR_PERIOD = 60 * 60 * 24 * 7
 
 
 
 
 class updateAccountReadRateTaskConst:
 class updateAccountReadRateTaskConst:

+ 120 - 14
applications/feishuBotApi.py

@@ -6,26 +6,128 @@ import requests
 
 
 from applications.decoratorApi import retryOnTimeout
 from applications.decoratorApi import retryOnTimeout
 
 
+mention_all = {
+    "content": "<at id=all></at>\n",
+    "tag": "lark_md",
+}
 
 
-@retryOnTimeout()
-def bot(title, detail, mention=True):
+
+def create_feishu_columns_sheet(sheet_type, sheet_name, display_name, width="auto",
+                                vertical_align="top", horizontal_align="left", number_format=None):
     """
     """
-    机器人
+    create feishu sheet
+    :param number_format:
+    :param horizontal_align:
+    :param vertical_align:
+    :param width:
+    :param display_name:
+    :param sheet_type:
+    :param sheet_name:
     """
     """
-    title_obj = {
-        "content": "{}<at id=all></at>\n".format(title) if mention else "{}\n".format(title),
-        "tag": "lark_md",
+    match sheet_type:
+        case "plain_text":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "width": width,
+                "data_type": "text",
+                "vertical_align": vertical_align,
+                "horizontal_align": horizontal_align
+            }
+
+        case "lark_md":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "lark_md"
+            }
+
+        case "number":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "number",
+                "format": number_format,
+                "width": width
+            }
+
+        case "date":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "date",
+                "date_format": "YYYY/MM/DD"
+            }
+        case "options":
+            return {
+                "name": sheet_name,
+                "display_name": display_name,
+                "data_type": "options"
+            }
+
+
+def create_feishu_table(title, columns, rows, mention):
+    """
+    create feishu table
+    :param mention:
+    :param title:
+    :param columns:
+    :param rows:
+    """
+    table_base = {
+        "header": {
+            "template": "blue",
+            "title": {
+                "content": title,
+                "tag": "plain_text"
+            }
+        },
+        "elements": [
+            {
+                "tag": "div",
+                "text": mention_all,
+            } if mention else {},
+            {
+                "tag": "table",
+                "page_size": len(rows) + 1,
+                "row_height": "low",
+                "header_style": {
+                    "text_align": "left",
+                    "text_size": "normal",
+                    "background_style": "grey",
+                    "text_color": "default",
+                    "bold": True,
+                    "lines": 1
+                },
+                "columns": columns,
+                "rows": rows
+            }
+        ]
     }
     }
-    head_title = "【重点关注】" if mention else "【普通通知】"
+    return table_base
+
+
+@retryOnTimeout()
+def bot(title, detail, mention=True, table=False):
+    """
+    报警机器人
+    """
     url = "https://open.feishu.cn/open-apis/bot/v2/hook/b44333f2-16c0-4cb1-af01-d135f8704410"
     url = "https://open.feishu.cn/open-apis/bot/v2/hook/b44333f2-16c0-4cb1-af01-d135f8704410"
+    # url = "https://open.feishu.cn/open-apis/bot/v2/hook/f32c0456-847f-41f3-97db-33fcc1616bcd"
     headers = {"Content-Type": "application/json"}
     headers = {"Content-Type": "application/json"}
-    payload = {
-        "msg_type": "interactive",
-        "card": {
+    if table:
+        card = create_feishu_table(
+            title=title,
+            columns=detail["columns"],
+            rows=detail["rows"],
+            mention=mention
+        )
+    else:
+        card = {
             "elements": [
             "elements": [
                 {
                 {
                     "tag": "div",
                     "tag": "div",
-                    "text": title_obj,
+                    "text": mention_all if mention else {},
                 },
                 },
                 {
                 {
                     "tag": "div",
                     "tag": "div",
@@ -37,7 +139,11 @@ def bot(title, detail, mention=True):
                     },
                     },
                 },
                 },
             ],
             ],
-            "header": {"title": {"content": head_title, "tag": "plain_text"}},
-        },
+            "header": {"title": {"content": title, "tag": "plain_text"}},
+        }
+    payload = {
+        "msg_type": "interactive",
+        "card": card
     }
     }
-    requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
+    res = requests.request("POST", url=url, headers=headers, data=json.dumps(payload), timeout=10)
+    return res

+ 35 - 13
cal_account_read_rate_avg_daily.py

@@ -2,15 +2,19 @@
 @author: luojunhui
 @author: luojunhui
 cal each account && position reading rate
 cal each account && position reading rate
 """
 """
+import json
 from tqdm import tqdm
 from tqdm import tqdm
 from pandas import DataFrame
 from pandas import DataFrame
 from argparse import ArgumentParser
 from argparse import ArgumentParser
 from datetime import datetime
 from datetime import datetime
 
 
-from applications import DeNetMysql, PQMySQL, longArticlesMySQL, bot, Functions
+from applications import DeNetMysql, PQMySQL, longArticlesMySQL, bot, Functions, create_feishu_columns_sheet
 from applications.const import updateAccountReadRateTaskConst
 from applications.const import updateAccountReadRateTaskConst
+from config import apolloConfig
 
 
 const = updateAccountReadRateTaskConst()
 const = updateAccountReadRateTaskConst()
+config = apolloConfig()
+unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 functions = Functions()
 functions = Functions()
 read_rate_table = "long_articles_read_rate"
 read_rate_table = "long_articles_read_rate"
 
 
@@ -88,7 +92,7 @@ def get_publishing_accounts(db_client) -> list[dict]:
     WHERE
     WHERE
         t1.plan_status = 1
         t1.plan_status = 1
         AND t3.channel = 5
         AND t3.channel = 5
-        AND t3.follower_count > 0
+        -- AND t3.follower_count > 0
         GROUP BY t3.id;
         GROUP BY t3.id;
     """
     """
     account_list = db_client.select(sql)
     account_list = db_client.select(sql)
@@ -145,6 +149,8 @@ def cal_account_read_rate(gh_id_tuple) -> DataFrame:
         gh_id = line['ghId']
         gh_id = line['ghId']
         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
         dt = functions.timestamp_to_str(timestamp=line['publish_timestamp'], string_format='%Y-%m-%d')
         fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
         fans = fans_dict_each_day.get(gh_id, {}).get(dt, 0)
+        if not fans:
+            fans = int(unauthorized_account.get(gh_id, 0))
         line['fans'] = fans
         line['fans'] = fans
         if fans > 1000:
         if fans > 1000:
             line['readRate'] = line['show_view_count'] / fans if fans else 0
             line['readRate'] = line['show_view_count'] / fans if fans else 0
@@ -207,11 +213,16 @@ def check_each_position(db_client, gh_id, index, dt, avg_rate) -> dict:
             return {}
             return {}
         else:
         else:
             response = {
             response = {
-                "账号名称": account_name,
-                "位置": index,
-                "当天阅读率均值": Functions().float_to_percentage(avg_rate),
-                "前一天阅读率均值": Functions().float_to_percentage(previous_read_rate_avg),
-                "相对变化率": Functions().float_to_percentage(relative_value)
+                "account_name": account_name,
+                "position": index,
+                "read_rate_avg_yesterday": Functions().float_to_percentage(avg_rate),
+                "read_rate_avg_the_day_before_yesterday": Functions().float_to_percentage(previous_read_rate_avg),
+                "relative_change_rate": [
+                    {
+                        "text": Functions().float_to_percentage(relative_value),
+                        "color": "red" if relative_value < 0 else "green"
+                    }
+                ]
             }
             }
             return response
             return response
 
 
@@ -235,7 +246,7 @@ def update_single_day(dt, account_list, article_df, lam):
         string_format='%Y-%m-%d'
         string_format='%Y-%m-%d'
     )
     )
 
 
-    for account in tqdm(account_list):
+    for account in tqdm(account_list, desc=dt):
         for index in const.ARTICLE_INDEX_LIST:
         for index in const.ARTICLE_INDEX_LIST:
             read_rate_detail = cal_avg_account_read_rate(
             read_rate_detail = cal_avg_account_read_rate(
                 df=article_df,
                 df=article_df,
@@ -258,7 +269,6 @@ def update_single_day(dt, account_list, article_df, lam):
                     )
                     )
                     if error_obj:
                     if error_obj:
                         error_list.append(error_obj)
                         error_list.append(error_obj)
-                # continue
                 try:
                 try:
                     if not read_rate_avg:
                     if not read_rate_avg:
                         continue
                         continue
@@ -284,7 +294,7 @@ def update_single_day(dt, account_list, article_df, lam):
                         )
                         )
                     )
                     )
                 except Exception as e:
                 except Exception as e:
-                    insert_error_list.append(e)
+                    insert_error_list.append(str(e))
 
 
     if insert_error_list:
     if insert_error_list:
         bot(
         bot(
@@ -293,12 +303,24 @@ def update_single_day(dt, account_list, article_df, lam):
         )
         )
 
 
     if error_list:
     if error_list:
+        columns = [
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="account_name", display_name="账号名称"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="position", display_name="文章位置"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_yesterday",
+                                        display_name="昨日阅读率均值"),
+            create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="read_rate_avg_the_day_before_yesterday",
+                                        display_name="前天阅读率均值"),
+            create_feishu_columns_sheet(sheet_type="options", sheet_name="relative_change_rate",
+                                        display_name="相对变化率")
+        ]
         bot(
         bot(
             title="更新阅读率均值,头次出现异常值通知",
             title="更新阅读率均值,头次出现异常值通知",
             detail={
             detail={
-                "时间": dt,
-                "异常列表": error_list
-            }
+                "columns": columns,
+                "rows": error_list
+            },
+            table=True,
+            mention=False
         )
         )
 
 
     if not error_list and not insert_error_list:
     if not error_list and not insert_error_list:

+ 56 - 0
kimi_balance_monitor.py

@@ -0,0 +1,56 @@
+"""
+@author: luojunhui
+"""
+import requests
+import traceback
+
+from applications import bot
+from applications.decoratorApi import retryOnTimeout
+
+BALANCE_LIMIT_THRESHOLD = 200.0
+
+
+@retryOnTimeout(retries=5, delay=5)
+def check_kimi_balance():
+    """
+    校验kimi余额
+    :return:
+    """
+    url = "https://api.moonshot.cn/v1/users/me/balance"
+
+    payload = {}
+    headers = {
+        'Authorization': 'Bearer sk-5DqYCa88kche6nwIWjLE1p4oMm8nXrR9kQMKbBolNAWERu7q'
+    }
+    response = requests.request("GET", url, headers=headers, data=payload, timeout=10)
+    if response.status_code == 200:
+        response_json = response.json()
+        try:
+            balance = response_json['data']['available_balance']
+            if balance < BALANCE_LIMIT_THRESHOLD:
+                bot(
+                    title="kimi余额小于 {} 块".format(BALANCE_LIMIT_THRESHOLD),
+                    detail={
+                        "balance": balance
+                    }
+                )
+        except Exception as e:
+            error_stack = traceback.format_exc()
+            bot(
+                title="kimi余额接口处理失败,数据结构异常",
+                detail={
+                    "error": str(e),
+                    "error_msg": error_stack
+                }
+            )
+    else:
+        bot(
+            title="kimi余额接口调用失败",
+            detail={
+                "response": response.text
+            }
+        )
+
+
+if __name__ == '__main__':
+    check_kimi_balance()

+ 16 - 0
sh/published_articles_monitor.sh

@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# 获取当前日期,格式为 YYYY-MM-DD
+CURRENT_DATE=$(date +%F)
+
+# 日志文件路径,包含日期
+LOG_FILE="/root/luojunhui/logs/article_monitor_task_log_$CURRENT_DATE.txt"
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+
+cd /root/luojunhui/LongArticlesJob
+source /root/miniconda3/etc/profile.d/conda.sh
+conda activate tasks
+
+nohup python3 updatePublishedMsgDaily.py --run_task monitor >> "${LOG_FILE}" 2>&1 &

+ 16 - 0
sh/run_kimi_balance_monitor.sh

@@ -0,0 +1,16 @@
+#!/bin/bash
+
+# 获取当前日期,格式为 YYYY-MM-DD
+CURRENT_DATE=$(date +%F)
+
+# 日志文件路径,包含日期
+LOG_FILE="/root/luojunhui/logs/kimi_monitor_task_log_$CURRENT_DATE.txt"
+
+# 重定向整个脚本的输出到带日期的日志文件
+exec >> "$LOG_FILE" 2>&1
+
+cd /root/luojunhui/LongArticlesJob
+source /root/miniconda3/etc/profile.d/conda.sh
+conda activate tasks
+
+nohup python3 kimi_balance_monitor.py >> "${LOG_FILE}" 2>&1 &

+ 9 - 5
sh/run_update_published_articles_daily.sh

@@ -13,14 +13,18 @@ then
     echo "$(date '+%Y-%m-%d %H:%M:%S') - updatePublishedMsgDaily.py is running"
     echo "$(date '+%Y-%m-%d %H:%M:%S') - updatePublishedMsgDaily.py is running"
 else
 else
     echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart updatePublishedMsgDaily.py"
     echo "$(date '+%Y-%m-%d %H:%M:%S') - trying to restart updatePublishedMsgDaily.py"
-    # 切换到指定目录
     cd /root/luojunhui/LongArticlesJob
     cd /root/luojunhui/LongArticlesJob
-
-    # 激活 Conda 环境
     source /root/miniconda3/etc/profile.d/conda.sh
     source /root/miniconda3/etc/profile.d/conda.sh
     conda activate tasks
     conda activate tasks
 
 
-    # 在后台运行 Python 脚本并重定向日志输出
-    nohup python3 updatePublishedMsgDaily.py >> "${LOG_FILE}" 2>&1 &
+    current_time=$(date +%H:%M)
+    target_time="19:00"
+
+    if [[ "$current_time" < "$target_time" ]]; then
+        nohup python3 updatePublishedMsgDaily.py --run_task update >> "${LOG_FILE}" 2>&1 &
+        nohup python3 updatePublishedMsgDaily.py --run_task detail >> "${LOG_FILE}" 2>&1 &
+    else
+        nohup python3 updatePublishedMsgDaily.py >> "${LOG_FILE}" 2>&1 &
     echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted updatePublishedMsgDaily.py"
     echo "$(date '+%Y-%m-%d %H:%M:%S') - successfully restarted updatePublishedMsgDaily.py"
+    fi
 fi
 fi

+ 12 - 6
updateAccountV3.py

@@ -10,6 +10,10 @@ from argparse import ArgumentParser
 
 
 from applications import PQMySQL, DeNetMysql, longArticlesMySQL
 from applications import PQMySQL, DeNetMysql, longArticlesMySQL
 from applications.const import updateAccountReadAvgTaskConst
 from applications.const import updateAccountReadAvgTaskConst
+from config import apolloConfig
+
+config = apolloConfig()
+unauthorized_account = json.loads(config.getConfigValue("unauthorized_gh_id_fans"))
 
 
 
 
 def get_account_fans_by_dt(db_client) -> dict:
 def get_account_fans_by_dt(db_client) -> dict:
@@ -125,13 +129,15 @@ class UpdateAccountInfoVersion3(object):
         fans_dict = get_account_fans_by_dt(db_client=self.de)
         fans_dict = get_account_fans_by_dt(db_client=self.de)
         account_list = self.get_publishing_accounts()
         account_list = self.get_publishing_accounts()
         rate_dict = self.get_account_position_read_rate(dt)
         rate_dict = self.get_account_position_read_rate(dt)
-        for account in tqdm(account_list):
-            business_type = self.const.TOULIU if account[
-                                                     'gh_id'] in self.const.TOULIU_ACCOUNTS else self.const.ARTICLES_DAILY
-            fans = fans_dict.get(account['gh_id'], {}).get(dt, 0)
+        for account in tqdm(account_list, desc=dt):
+            gh_id = account["gh_id"]
+            business_type = self.const.TOULIU if gh_id in self.const.TOULIU_ACCOUNTS else self.const.ARTICLES_DAILY
+            fans = fans_dict.get(gh_id, {}).get(dt, 0)
+            if not fans:
+                fans = int(unauthorized_account.get(gh_id, 0))
             if fans:
             if fans:
                 for index in range(1, 9):
                 for index in range(1, 9):
-                    gh_id_position = "{}_{}".format(account['gh_id'], index)
+                    gh_id_position = "{}_{}".format(gh_id, index)
                     if rate_dict.get(gh_id_position):
                     if rate_dict.get(gh_id_position):
                         rate = rate_dict[gh_id_position]
                         rate = rate_dict[gh_id_position]
                         read_avg = fans * rate
                         read_avg = fans * rate
@@ -146,7 +152,7 @@ class UpdateAccountInfoVersion3(object):
                             self.pq.update(
                             self.pq.update(
                                 sql=insert_sql,
                                 sql=insert_sql,
                                 params=(
                                 params=(
-                                    account['gh_id'],
+                                    gh_id,
                                     index,
                                     index,
                                     dt,
                                     dt,
                                     account['account_name'],
                                     account['account_name'],

+ 164 - 7
updatePublishedMsgDaily.py

@@ -10,13 +10,32 @@ import traceback
 import urllib.parse
 import urllib.parse
 from tqdm import tqdm
 from tqdm import tqdm
 from datetime import datetime
 from datetime import datetime
+from argparse import ArgumentParser
 
 
-
-from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi
+from applications import PQMySQL, WeixinSpider, Functions, log, bot, aiditApi, longArticlesMySQL, \
+    create_feishu_columns_sheet
 from applications.const import updatePublishedMsgTaskConst
 from applications.const import updatePublishedMsgTaskConst
 
 
 ARTICLE_TABLE = "official_articles_v2"
 ARTICLE_TABLE = "official_articles_v2"
 const = updatePublishedMsgTaskConst()
 const = updatePublishedMsgTaskConst()
+spider = WeixinSpider()
+functions = Functions()
+
+
+def generate_bot_columns():
+    """
+    生成列
+    :return:
+    """
+    columns = [
+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="name", display_name="公众号名称"),
+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="ghId", display_name="ghId"),
+        create_feishu_columns_sheet(sheet_type="number", sheet_name="follower_count", display_name="粉丝数"),
+        create_feishu_columns_sheet(sheet_type="date", sheet_name="account_init_timestamp",
+                                    display_name="账号接入系统时间"),
+        create_feishu_columns_sheet(sheet_type="plain_text", sheet_name="using_status", display_name="利用状态")
+    ]
+    return columns
 
 
 
 
 def get_account_using_status():
 def get_account_using_status():
@@ -45,8 +64,11 @@ def get_accounts():
     """
     """
     using_account_set = get_account_using_status()
     using_account_set = get_account_using_status()
     account_list_with_out_using_status = aiditApi.get_publish_account_from_aigc()
     account_list_with_out_using_status = aiditApi.get_publish_account_from_aigc()
+    # only_auto_reply_accounts_set = aiditApi.get_only_auto_reply_accounts()
     account_list = []
     account_list = []
     for item in account_list_with_out_using_status:
     for item in account_list_with_out_using_status:
+        # if item['account_id'] in only_auto_reply_accounts_set:
+        #     continue
         if item['ghId'] in using_account_set:
         if item['ghId'] in using_account_set:
             item['using_status'] = 1
             item['using_status'] = 1
         else:
         else:
@@ -418,12 +440,20 @@ def check_job():
     for sub_item in tqdm(account_list):
     for sub_item in tqdm(account_list):
         res = check_single_account(db_client, sub_item)
         res = check_single_account(db_client, sub_item)
         if not res:
         if not res:
+            # 去掉三个不需要查看的字段
+            sub_item.pop('account_type', None)
+            sub_item.pop('account_auth', None)
+            sub_item.pop('account_id', None)
             fail_list.append(sub_item)
             fail_list.append(sub_item)
     if fail_list:
     if fail_list:
         try:
         try:
             bot(
             bot(
                 title="日常报警, 存在账号更新失败",
                 title="日常报警, 存在账号更新失败",
-                detail=fail_list
+                detail={
+                    "columns": generate_bot_columns(),
+                    "rows": fail_list
+                },
+                table=True
             )
             )
         except Exception as e:
         except Exception as e:
             print("Timeout Error: {}".format(e))
             print("Timeout Error: {}".format(e))
@@ -467,6 +497,9 @@ def update_publish_timestamp(db_client, row):
         if response_code == const.ARTICLE_DELETE_CODE:
         if response_code == const.ARTICLE_DELETE_CODE:
             publish_timestamp_s = const.DELETE_STATUS
             publish_timestamp_s = const.DELETE_STATUS
             root_source_id_list = []
             root_source_id_list = []
+        elif response_code == const.ARTICLE_ILLEGAL_CODE:
+            publish_timestamp_s = const.ILLEGAL_STATUS
+            root_source_id_list = []
         elif response_code == const.ARTICLE_SUCCESS_CODE:
         elif response_code == const.ARTICLE_SUCCESS_CODE:
             data = response['data']['data']
             data = response['data']['data']
             publish_timestamp_ms = data['publish_timestamp']
             publish_timestamp_ms = data['publish_timestamp']
@@ -486,7 +519,7 @@ def update_publish_timestamp(db_client, row):
             root_source_id_list = []
             root_source_id_list = []
     except Exception as e:
     except Exception as e:
         publish_timestamp_s = const.REQUEST_FAIL_STATUS
         publish_timestamp_s = const.REQUEST_FAIL_STATUS
-        root_source_id_list = []
+        root_source_id_list = None
         error_msg = traceback.format_exc()
         error_msg = traceback.format_exc()
         print(e, error_msg)
         print(e, error_msg)
 
 
@@ -582,14 +615,138 @@ def get_article_detail_job():
         )
         )
 
 
 
 
+def whether_title_unsafe(db_client, title):
+    """
+    检查文章标题是否已经存在违规记录
+    :param db_client:
+    :param title:
+    :return:
+    """
+    title_md5 = functions.str_to_md5(title)
+    sql = f"""
+        SELECT title_md5
+        FROM article_unsafe_title
+        WHERE title_md5 = '{title_md5}';
+    """
+    res = db_client.select(sql)
+    if res:
+        return True
+    else:
+        return False
+
+
+def monitor(run_date):
+    """
+    监控任务, 监测周期为7天,监测文章是否被违规,若监测到违规文章,则进行告警
+    :return:
+    """
+    try:
+        pq_client = PQMySQL()
+        lam_client = longArticlesMySQL()
+    except Exception as e:
+        error_msg = traceback.format_exc()
+        bot(
+            title="监控任务连接数据库失败",
+            detail={
+                "job": "monitor",
+                "error": str(e),
+                "msg": error_msg
+            }
+        )
+        return
+
+    if not run_date:
+        run_date = datetime.today().strftime("%Y-%m-%d")
+
+    monitor_start_timestamp = int(datetime.strptime(run_date, "%Y-%m-%d").timestamp()) - const.MONITOR_PERIOD
+    select_sql = f"""
+        SELECT ghId, accountName, title, ContentUrl, wx_sn, from_unixtime(publish_timestamp) AS publish_timestamp
+        FROM {ARTICLE_TABLE}
+        WHERE publish_timestamp >= {monitor_start_timestamp};
+    """
+    article_list = pq_client.select(select_sql)
+    for article in tqdm(article_list, desc="monitor article list"):
+        gh_id = article[0]
+        account_name = article[1]
+        title = article[2]
+        # 判断标题是否存在违规记录
+        if whether_title_unsafe(lam_client, title):
+            continue
+        url = article[3]
+        wx_sn = article[4]
+        publish_date = article[5]
+        try:
+            response = spider.get_article_text(url, is_cache=False)
+            response_code = response['code']
+            if response_code == const.ARTICLE_ILLEGAL_CODE:
+                bot(
+                    title="文章违规告警",
+                    detail={
+                        "ghId": gh_id,
+                        "accountName": account_name,
+                        "title": title,
+                        "wx_sn": str(wx_sn),
+                        "publish_date": str(publish_date)
+                    },
+                    mention=False
+                )
+                aiditApi.delete_articles(
+                    gh_id=gh_id,
+                    title=title
+                )
+        except Exception as e:
+            error_msg = traceback.format_exc()
+            log(
+                task="monitor",
+                function="monitor",
+                message="请求文章详情失败",
+                data={
+                    "ghId": gh_id,
+                    "accountName": account_name,
+                    "title": title,
+                    "wx_sn": str(wx_sn),
+                    "error": str(e),
+                    "msg": error_msg
+                }
+            )
+
+
 def main():
 def main():
     """
     """
     main
     main
     :return:
     :return:
     """
     """
-    update_job()
-    check_job()
-    get_article_detail_job()
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--run_task",
+        help="update: update_job, check: check_job, detail: get_article_detail_job, monitor: monitor")
+    parser.add_argument(
+        "--run_date",
+        help="--run_date %Y-%m-%d",
+    )
+    args = parser.parse_args()
+
+    if args.run_task:
+        run_task = args.run_task
+        match run_task:
+            case "update":
+                update_job()
+            case "check":
+                check_job()
+            case "detail":
+                get_article_detail_job()
+            case "monitor":
+                if args.run_date:
+                    run_date = args.run_date
+                else:
+                    run_date = None
+                monitor(run_date)
+            case _:
+                print("No such task, input update: update_job, check: check_job, detail: get_article_detail_job")
+    else:
+        update_job()
+        check_job()
+        get_article_detail_job()
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':