Browse Source

增加报警重试逻辑

xueyiming 2 weeks ago
parent
commit
eed389b1a5
1 changed files with 53 additions and 30 deletions
  1. 53 30
      app/services/demand_pool_strategy_daily_alert.py

+ 53 - 30
app/services/demand_pool_strategy_daily_alert.py

@@ -1,6 +1,7 @@
 import json
 import re
 import ssl
+import time
 import urllib.error
 import urllib.request
 from datetime import datetime
@@ -13,6 +14,11 @@ IDENTIFIER_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*$")
 DATE_PARTITION_RE = re.compile(r"^\d{8}$")
 SHANGHAI_TZ = ZoneInfo("Asia/Shanghai")
 
+# 飞书开放平台对 bot / 卡片等接口有频控;返回 code=11232 时可稍后重试
+_FEISHU_API_FREQUENCY_LIMIT = 11232
+_FEISHU_WEBHOOK_MAX_ATTEMPTS = 5
+_FEISHU_WEBHOOK_RETRY_BASE_SECONDS = 15.0
+
 
 def _safe_table_identifier(name: str) -> str:
     if not IDENTIFIER_RE.match(name):
@@ -123,37 +129,54 @@ def _feishu_interactive_card_payload(partition_dt: str, rows: list[tuple[str, in
 
 def _send_feishu_webhook(webhook_url: str, payload: dict[str, object]) -> None:
     body_bytes = json.dumps(payload, ensure_ascii=False).encode("utf-8")
-    request_obj = urllib.request.Request(
-        webhook_url,
-        data=body_bytes,
-        headers={"Content-Type": "application/json"},
-        method="POST",
-    )
     ssl_context = _feishu_https_context()
-    try:
-        with urllib.request.urlopen(
-            request_obj,
-            timeout=settings.feishu_webhook_timeout_seconds,
-            context=ssl_context,
-        ) as resp:
-            raw = resp.read().decode("utf-8")
-    except urllib.error.HTTPError as exc:
-        detail = exc.read().decode("utf-8", errors="replace")
-        raise RuntimeError(f"feishu webhook http error: {exc.code} {detail}") from exc
-    except urllib.error.URLError as exc:
-        raise RuntimeError(f"feishu webhook url error: {exc}") from exc
-
-    try:
-        body = json.loads(raw) if raw else {}
-    except json.JSONDecodeError as exc:
-        raise RuntimeError(f"feishu webhook invalid json: {raw!r}") from exc
-
-    code = body.get("code")
-    if code is not None and int(code) != 0:
-        raise RuntimeError(f"feishu webhook api error: {body}")
-    status_code = body.get("StatusCode")
-    if status_code is not None and int(status_code) != 0:
-        raise RuntimeError(f"feishu webhook status error: {body}")
+
+    for attempt in range(_FEISHU_WEBHOOK_MAX_ATTEMPTS):
+        request_obj = urllib.request.Request(
+            webhook_url,
+            data=body_bytes,
+            headers={"Content-Type": "application/json"},
+            method="POST",
+        )
+        try:
+            with urllib.request.urlopen(
+                request_obj,
+                timeout=settings.feishu_webhook_timeout_seconds,
+                context=ssl_context,
+            ) as resp:
+                raw = resp.read().decode("utf-8")
+        except urllib.error.HTTPError as exc:
+            detail = exc.read().decode("utf-8", errors="replace")
+            raise RuntimeError(f"feishu webhook http error: {exc.code} {detail}") from exc
+        except urllib.error.URLError as exc:
+            raise RuntimeError(f"feishu webhook url error: {exc}") from exc
+
+        try:
+            body = json.loads(raw) if raw else {}
+        except json.JSONDecodeError as exc:
+            raise RuntimeError(f"feishu webhook invalid json: {raw!r}") from exc
+
+        code = body.get("code")
+        if code is not None:
+            code_int = int(code)
+            if (
+                code_int == _FEISHU_API_FREQUENCY_LIMIT
+                and attempt < _FEISHU_WEBHOOK_MAX_ATTEMPTS - 1
+            ):
+                delay = _FEISHU_WEBHOOK_RETRY_BASE_SECONDS * (2**attempt)
+                print(
+                    "[demand_pool_daily_alert] feishu frequency limited (11232), "
+                    f"sleep {delay:.0f}s then retry ({attempt + 1}/{_FEISHU_WEBHOOK_MAX_ATTEMPTS})"
+                )
+                time.sleep(delay)
+                continue
+            if code_int != 0:
+                raise RuntimeError(f"feishu webhook api error: {body}")
+
+        status_code = body.get("StatusCode")
+        if status_code is not None and int(status_code) != 0:
+            raise RuntimeError(f"feishu webhook status error: {body}")
+        return
 
 
 def run_daily_strategy_alert(