Просмотр исходного кода

bugfix: 修改警告的判断标准

jihuaqiang 6 дней назад
Родитель
Сommit
25bfb36c07
2 измененных файлов с 23 добавлено и 3 удалено
  1. 1 1
      scheduler/bootstrap.py
  2. 22 2
      scheduler/decode_hourly_stats_job.py

+ 1 - 1
scheduler/bootstrap.py

@@ -30,7 +30,7 @@ def start_scheduler() -> None:
     )
     _scheduler.add_job(
         run_decode_hourly_stats_job,
-        trigger=CronTrigger(minute=0),
+        trigger=CronTrigger(minute=6),
         id="decode_hourly_stats",
         replace_existing=True,
     )

+ 22 - 2
scheduler/decode_hourly_stats_job.py

@@ -25,6 +25,7 @@ _TZ = ZoneInfo("Asia/Shanghai")
 _FEISHU_WEBHOOK = "https://open.feishu.cn/open-apis/bot/v2/hook/af94b535-ed47-47d8-87f4-d893e1077276"
 _FEISHU_SIGN_SECRET = "lebZtBVkKJrbaVFlss2Pcf"
 _ALERT_FAIL_RATE = 0.15
+_ALERT_SUCCESS_RATE = 0.20
 
 
 def _previous_hour_window(now: datetime) -> tuple[datetime, datetime]:
@@ -85,6 +86,18 @@ def _fetch_today_stats(now: datetime) -> Dict[str, int]:
     }
 
 
+def _has_overdue_pending_task(now: datetime) -> bool:
+    overdue_before = now - timedelta(minutes=30)
+    sql = """
+        SELECT COUNT(1) AS total_count
+        FROM aigc_topic_decode_task_result
+        WHERE status IN (0, 1)
+          AND create_time <= %s
+    """
+    row = mysql.fetchone(sql, (overdue_before.strftime("%Y-%m-%d %H:%M:%S"),))
+    return int((row or {}).get("total_count") or 0) > 0
+
+
 def _gen_feishu_sign(timestamp: str, secret: str) -> str:
     # 与飞书文档一致:把 timestamp + "\n" + 密钥 作为 key,对空串做 HmacSHA256 再 Base64
     # https://open.feishu.cn/document/client-docs/bot-v3/add-custom-bot
@@ -218,10 +231,15 @@ def run_decode_hourly_stats_job() -> None:
         running = stats["running"]
         fail = stats["fail"]
         fail_rate = (fail / total) if total > 0 else 0.0
+        success_rate = (success / total) if total > 0 else 0.0
         today_stats = _fetch_today_stats(now)
         today_executed_total = today_stats["executed_total"]
         today_pending_total = today_stats["pending_total"]
-        is_alert = total > 0 and fail_rate >= _ALERT_FAIL_RATE
+        has_overdue_pending_task = _has_overdue_pending_task(now)
+        is_alert = total > 0 and (
+            fail_rate > _ALERT_FAIL_RATE
+            or (success_rate < _ALERT_SUCCESS_RATE and has_overdue_pending_task)
+        )
         card = _build_feishu_card(
             is_alert=is_alert,
             window_start=window_start,
@@ -237,13 +255,15 @@ def run_decode_hourly_stats_job() -> None:
         )
         _send_feishu_card(card)
         logger.info(
-            "解构小时统计推送完成 total={} success={} pending={} running={} fail={} fail_rate={:.2%} is_alert={}",
+            "解构小时统计推送完成 total={} success={} pending={} running={} fail={} fail_rate={:.2%} success_rate={:.2%} has_overdue_pending_task={} is_alert={}",
             total,
             success,
             pending,
             running,
             fail,
             fail_rate,
+            success_rate,
+            has_overdue_pending_task,
             is_alert,
         )
     except Exception as exc: