1 månad sedan · 6cf025c50d
--- a/content_agent/business_modules/content_discovery/pattern_recall/recall_decision.py
+++ b/content_agent/business_modules/content_discovery/pattern_recall/recall_decision.py
@@ -14,6 +14,7 @@ from datetime import datetime, timezone
 
				 from typing import Any
			
 
				 
			
 
				 from content_agent.constants import RUNTIME_RECORD_SCHEMA_VERSION
			
 
				+from content_agent.integrations.gemini_video import _fail
			
 
				 from content_agent.integrations.walk_graph_json import WalkGraphStore
			
 
				 from content_agent.interfaces import GeminiVideoClient, RuntimeFileStore
			
 
				 
			
@@ -81,13 +82,7 @@ def _collect_judgments(
 
				         future_to_offset = {}
			
 
				         for offset, item in enumerate(discovered_content_items):
			
 
				             if offset >= submitted:
			
 
				-                judgments[offset] = {
			
 
				-                    "fit_senior_50plus": False,
			
 
				-                    "fit_confidence": 0.0,
			
 
				-                    "relevance_score": 0.0,
			
 
				-                    "reason": "gemini_quota_exhausted",
			
 
				-                    "status": "failed",
			
 
				-                }
			
 
				+                judgments[offset] = _fail("gemini_quota_exhausted")
			
 
				                 continue
			
 
				             future = pool.submit(
			
 
				                 _safe_analyze,
			
@@ -113,13 +108,7 @@ def _safe_analyze(
 
				     try:
			
 
				         return client.analyze(item, media, source_context)
			
 
				     except Exception as exc:
			
 
				-        return {
			
 
				-            "fit_senior_50plus": False,
			
 
				-            "fit_confidence": 0.0,
			
 
				-            "relevance_score": 0.0,
			
 
				-            "reason": f"analyze_raised: {type(exc).__name__}",
			
 
				-            "status": "failed",
			
 
				-        }
			
 
				+        return _fail(f"analyze_raised: {type(exc).__name__}")
			
 
				 
			
 
				 
			
 
				 def _resolve_max_workers() -> int:
			
--- a/content_agent/business_modules/run_record/validation.py
+++ b/content_agent/business_modules/run_record/validation.py
@@ -5,6 +5,7 @@ from collections import Counter
 
				 from typing import Any
			
 
				 
			
 
				 from content_agent.constants import RUNTIME_RECORD_SCHEMA_VERSION, RUNTIME_SCHEMA_VERSION
			
 
				+from content_agent.findings import fail as _fail
			
 
				 from content_agent.integrations.runtime_files import RUNTIME_FILENAMES
			
 
				 from content_agent.interfaces import RuntimeFileStore
			
 
				 
			
@@ -852,5 +853,3 @@ def _result(run_id: str, findings: list[dict[str, Any]]) -> dict[str, Any]:
 
				     }
			
 
				 
			
 
				 
			
 
				-def _fail(findings: list[dict[str, Any]], check_id: str, message: str) -> None:
			
 
				-    findings.append({"level": "fail", "check_id": check_id, "message": message})
			
--- a/content_agent/findings.py
+++ b/content_agent/findings.py
@@ -0,0 +1,9 @@
 
				+"""校验 findings 的共享构造(原 validation / walk_strategy_json / walk_graph_json 各抄一份)。"""
			
 
				+
			
 
				+from __future__ import annotations
			
 
				+
			
 
				+from typing import Any
			
 
				+
			
 
				+
			
 
				+def fail(findings: list[dict[str, Any]], check_id: str, message: str) -> None:
			
 
				+    findings.append({"level": "fail", "check_id": check_id, "message": message})
			
--- a/content_agent/integrations/config_store.py
+++ b/content_agent/integrations/config_store.py
@@ -19,10 +19,24 @@ def read_text(path: Path) -> str:
 
				     return Path(path).read_text(encoding="utf-8")
			
 
				 
			
 
				 
			
 
				+# mtime+size 键缓存:同一 run 内 walk_policy 等配置会被读 3+ 次(walk_engine + 每次 recall),
			
 
				+# 文件未变时不重复读盘/parse。调用方约定不就地修改返回的 parsed 对象(全仓配置均只读消费)。
			
 
				+_CACHE: dict[str, tuple[tuple[int, int], Any, str]] = {}
			
 
				+
			
 
				+
			
 
				 def load_json(path: Path) -> tuple[Any, str]:
			
 
				     """Return (parsed, raw_text). raw_text is the exact on-disk text for hashing."""
			
 
				-    raw = read_text(path)
			
 
				-    return json.loads(raw), raw
			
 
				+    resolved = Path(path)
			
 
				+    stat = resolved.stat()
			
 
				+    key = str(resolved)
			
 
				+    stamp = (stat.st_mtime_ns, stat.st_size)
			
 
				+    cached = _CACHE.get(key)
			
 
				+    if cached and cached[0] == stamp:
			
 
				+        return cached[1], cached[2]
			
 
				+    raw = read_text(resolved)
			
 
				+    parsed = json.loads(raw)
			
 
				+    _CACHE[key] = (stamp, parsed, raw)
			
 
				+    return parsed, raw
			
 
				 
			
 
				 
			
 
				 def sha256_text(text: str) -> str:
			
--- a/content_agent/integrations/database_runtime.py
+++ b/content_agent/integrations/database_runtime.py
@@ -232,18 +232,21 @@ class DatabaseRuntimeStore:
 
				 
			
 
				     def append_jsonl(self, run_id: str, filename: str, rows: list[dict[str, Any]]) -> Path:
			
 
				         table = _table_for_runtime_file(filename)
			
 
				+        # 整批共用一个连接、一次 commit:避免每行新建连接+commit 的 N 次网络往返。
			
 
				+        statements: list[tuple[str, list[Any]]] = []
			
 
				         for row in rows:
			
 
				             if row.get("run_id") != run_id:
			
 
				                 raise ValueError(f"{filename} row run_id does not match runtime run_id")
			
 
				             record = _record_for_jsonl(filename, row)
			
 
				-            if filename in JSONL_UPSERT_KEYS:
			
 
				-                self._upsert(
			
 
				-                    table,
			
 
				-                    record,
			
 
				-                    key_columns=JSONL_UPSERT_KEYS[filename],
			
 
				-                )
			
 
				-            else:
			
 
				-                self._insert(table, record)
			
 
				+            statements.append(
			
 
				+                self._row_sql(table, record, key_columns=JSONL_UPSERT_KEYS.get(filename))
			
 
				+            )
			
 
				+        if statements:
			
 
				+            with self._connection_factory() as conn:
			
 
				+                with conn.cursor() as cur:
			
 
				+                    for sql, values in statements:
			
 
				+                        cur.execute(sql, values)
			
 
				+                conn.commit()
			
 
				         return self.run_dir(run_id) / filename
			
 
				 
			
 
				     def read_json(self, run_id: str, filename: str) -> dict[str, Any]:
			
@@ -376,7 +379,12 @@ class DatabaseRuntimeStore:
 
				                 key_columns=("run_id", "policy_run_id", "clue_id"),
			
 
				             )
			
 
				 
			
 
				-    def _insert(self, table: str, record: dict[str, Any]) -> None:
			
 
				+    def _row_sql(
			
 
				+        self,
			
 
				+        table: str,
			
 
				+        record: dict[str, Any],
			
 
				+        key_columns: tuple[str, ...] | None = None,
			
 
				+    ) -> tuple[str, list[Any]]:
			
 
				         sanitized = _sanitize_record(table, record)
			
 
				         columns = list(sanitized)
			
 
				         placeholders = ", ".join(["%s"] * len(columns))
			
@@ -385,12 +393,19 @@ class DatabaseRuntimeStore:
 
				             _db_value(table, column, sanitized[column])
			
 
				             for column in columns
			
 
				         ]
			
 
				+        sql = f"INSERT INTO `{table}` ({column_sql}) VALUES ({placeholders})"
			
 
				+        if key_columns:
			
 
				+            update_columns = [column for column in columns if column not in key_columns]
			
 
				+            assignments = ", ".join(f"`{column}` = VALUES(`{column}`)" for column in update_columns)
			
 
				+            if assignments:
			
 
				+                sql += f" ON DUPLICATE KEY UPDATE {assignments}"
			
 
				+        return sql, values
			
 
				+
			
 
				+    def _insert(self, table: str, record: dict[str, Any]) -> None:
			
 
				+        sql, values = self._row_sql(table, record)
			
 
				         with self._connection_factory() as conn:
			
 
				             with conn.cursor() as cur:
			
 
				-                cur.execute(
			
 
				-                    f"INSERT INTO `{table}` ({column_sql}) VALUES ({placeholders})",
			
 
				-                    values,
			
 
				-                )
			
 
				+                cur.execute(sql, values)
			
 
				             conn.commit()
			
 
				 
			
 
				     def _upsert(
			
@@ -399,19 +414,7 @@ class DatabaseRuntimeStore:
 
				         record: dict[str, Any],
			
 
				         key_columns: tuple[str, ...],
			
 
				     ) -> None:
			
 
				-        sanitized = _sanitize_record(table, record)
			
 
				-        columns = list(sanitized)
			
 
				-        placeholders = ", ".join(["%s"] * len(columns))
			
 
				-        column_sql = ", ".join(f"`{column}`" for column in columns)
			
 
				-        update_columns = [column for column in columns if column not in key_columns]
			
 
				-        assignments = ", ".join(f"`{column}` = VALUES(`{column}`)" for column in update_columns)
			
 
				-        values = [
			
 
				-            _db_value(table, column, sanitized[column])
			
 
				-            for column in columns
			
 
				-        ]
			
 
				-        sql = f"INSERT INTO `{table}` ({column_sql}) VALUES ({placeholders})"
			
 
				-        if assignments:
			
 
				-            sql += f" ON DUPLICATE KEY UPDATE {assignments}"
			
 
				+        sql, values = self._row_sql(table, record, key_columns=key_columns)
			
 
				         with self._connection_factory() as conn:
			
 
				             with conn.cursor() as cur:
			
 
				                 cur.execute(sql, values)
			
--- a/content_agent/integrations/video_fetch.py
+++ b/content_agent/integrations/video_fetch.py
@@ -28,6 +28,7 @@ _PLATFORM_DOWNLOAD_HEADERS = {
 
				 # 已拍板压缩档:360p / 1fps / 低清,实测 ~4MB(memory/video-multimodal-analysis)。
			
 
				 _FFMPEG_ARGS = ["-vf", "scale=360:-2,fps=1", "-crf", "33", "-c:a", "aac", "-b:a", "32k", "-ac", "1"]
			
 
				 MAX_INLINE_BYTES = 30 * 1024 * 1024  # OpenRouter inline base64 平台硬上限
			
 
				+COMPRESS_TIMEOUT_SECONDS = 120.0  # 实测 64MB/720p 压缩 ~8s,120s 足够余量
			
 
				 
			
 
				 
			
 
				 class VideoFetchError(RuntimeError):
			
@@ -41,13 +42,18 @@ def _download_headers(platform: str, override: dict[str, str] | None) -> dict[st
 
				 
			
 
				 
			
 
				 def _compress(raw: bytes, ffmpeg_exe: str) -> bytes:
			
 
				-    proc = subprocess.run(
			
 
				-        [ffmpeg_exe, "-i", "pipe:0", *_FFMPEG_ARGS, "-f", "mp4",
			
 
				-         "-movflags", "frag_keyframe+empty_moov", "pipe:1"],
			
 
				-        input=raw,
			
 
				-        stdout=subprocess.PIPE,
			
 
				-        stderr=subprocess.PIPE,
			
 
				-    )
			
 
				+    # 超时保护:坏视频会让 ffmpeg 卡死,进而挂住一个判定并发线程(实测正常压缩 ~8s)。
			
 
				+    try:
			
 
				+        proc = subprocess.run(
			
 
				+            [ffmpeg_exe, "-i", "pipe:0", *_FFMPEG_ARGS, "-f", "mp4",
			
 
				+             "-movflags", "frag_keyframe+empty_moov", "pipe:1"],
			
 
				+            input=raw,
			
 
				+            stdout=subprocess.PIPE,
			
 
				+            stderr=subprocess.PIPE,
			
 
				+            timeout=COMPRESS_TIMEOUT_SECONDS,
			
 
				+        )
			
 
				+    except subprocess.TimeoutExpired as exc:
			
 
				+        raise VideoFetchError("ffmpeg compression timeout") from exc
			
 
				     if proc.returncode != 0 or not proc.stdout:
			
 
				         raise VideoFetchError("ffmpeg compression failed")
			
 
				     return proc.stdout
			
--- a/content_agent/integrations/walk_graph_json.py
+++ b/content_agent/integrations/walk_graph_json.py
@@ -10,6 +10,8 @@ from dataclasses import dataclass
 
				 from pathlib import Path
			
 
				 from typing import Any
			
 
				 
			
 
				+from content_agent.findings import fail as _fail
			
 
				+
			
 
				 WALK_GRAPH_PATH = Path("tech_documents/数据接口与来源/walk_graph.json")
			
 
				 WALK_POLICY_PATH = Path("tech_documents/数据接口与来源/walk_policy.json")
			
 
				 PROFILE_DIR = Path("tech_documents/数据接口与来源/platform_profiles")
			
@@ -127,8 +129,6 @@ def _validate_profile(profile: dict[str, Any], edge_ids: set[str]) -> list[dict[
 
				     return findings
			
 
				 
			
 
				 
			
 
				-def _fail(findings: list[dict[str, Any]], check_id: str, message: str) -> None:
			
 
				-    findings.append({"level": "fail", "check_id": check_id, "message": message})
			
 
				 
			
 
				 
			
 
				 def _raise_on_fail(findings: list[dict[str, Any]], label: str) -> None:
			
--- a/content_agent/integrations/walk_strategy_json.py
+++ b/content_agent/integrations/walk_strategy_json.py
@@ -5,6 +5,8 @@ from dataclasses import dataclass
 
				 from pathlib import Path
			
 
				 from typing import Any
			
 
				 
			
 
				+from content_agent.findings import fail as _fail
			
 
				+
			
 
				 
			
 
				 WALK_STRATEGY_PATH = Path("product_documents/抖音游走策略/douyin_walk_strategy.v1.json")
			
 
				 RULE_PACK_PATH = Path("product_documents/规则包/douyin_rule_packs.v1.json")
			
@@ -199,7 +201,3 @@ def _check_rule_pack_bindings(
 
				 
			
 
				 def _ids(rows: list[dict[str, Any]], field: str) -> set[str]:
			
 
				     return {str(row[field]) for row in rows if row.get(field)}
			
 
				-
			
 
				-
			
 
				-def _fail(findings: list[dict[str, Any]], check_id: str, message: str) -> None:
			
 
				-    findings.append({"level": "fail", "check_id": check_id, "message": message})