|
|
@@ -139,6 +139,18 @@ for table in [
|
|
|
"content_agent_strategy_reviews",
|
|
|
]:
|
|
|
REQUIRED_COLUMNS_BY_TABLE[table].add("raw_payload")
|
|
|
+REQUIRED_COLUMNS_BY_TABLE["content_agent_runs"].update(
|
|
|
+ {"status", "error_code", "error_message", "error_detail"}
|
|
|
+)
|
|
|
+REQUIRED_COLUMNS_BY_TABLE["content_agent_queries"].update(
|
|
|
+ {"search_query_id", "search_query", "search_query_effect_status"}
|
|
|
+)
|
|
|
+REQUIRED_COLUMNS_BY_TABLE["content_agent_search_clues"].update(
|
|
|
+ {"search_query_id", "search_query_effect_status"}
|
|
|
+)
|
|
|
+REQUIRED_COLUMNS_BY_TABLE["content_agent_run_events"].update(
|
|
|
+ {"event_type", "status", "error_code", "message"}
|
|
|
+)
|
|
|
REQUIRED_UNIQUE_INDEXES_BY_TABLE = {
|
|
|
"content_agent_runs": [{"run_id"}],
|
|
|
"content_agent_source_contexts": [{"run_id"}],
|
|
|
@@ -333,13 +345,19 @@ def main() -> int:
|
|
|
}
|
|
|
result["column_status"] = column_status
|
|
|
result["index_status"] = index_status
|
|
|
+ if args.audit_run_id:
|
|
|
+ result["audit_runs"] = _audit_query_failures(cur, args.audit_run_id)
|
|
|
|
|
|
schema_ready = _schema_ready(result.get("column_status", {}), result.get("index_status", {}))
|
|
|
result["schema_ready"] = schema_ready
|
|
|
print(json.dumps(result, ensure_ascii=False, indent=2, default=str))
|
|
|
if args.allow_missing_tables:
|
|
|
return 0
|
|
|
- if result["missing_tables"] or not result["has_runtime_privileges"] or not schema_ready:
|
|
|
+ audit_ready = all(
|
|
|
+ audit.get("status") in {"pass", "skipped"}
|
|
|
+ for audit in result.get("audit_runs", [])
|
|
|
+ )
|
|
|
+ if result["missing_tables"] or not result["has_runtime_privileges"] or not schema_ready or not audit_ready:
|
|
|
return 1
|
|
|
return 0
|
|
|
|
|
|
@@ -368,6 +386,12 @@ def _parse_args() -> argparse.Namespace:
|
|
|
parser.add_argument("--password", default=None)
|
|
|
parser.add_argument("--timeout", type=int, default=8)
|
|
|
parser.add_argument("--run-label-like", default="cfa_mysql_100_20260607_batch%")
|
|
|
+ parser.add_argument(
|
|
|
+ "--audit-run-id",
|
|
|
+ action="append",
|
|
|
+ default=[],
|
|
|
+ help="Optional exact run_id to audit query failure closure. May be passed more than once.",
|
|
|
+ )
|
|
|
parser.add_argument(
|
|
|
"--allow-missing-tables",
|
|
|
action="store_true",
|
|
|
@@ -376,6 +400,134 @@ def _parse_args() -> argparse.Namespace:
|
|
|
return parser.parse_args()
|
|
|
|
|
|
|
|
|
+def _audit_query_failures(cur: Any, run_ids: list[str]) -> list[dict[str, Any]]:
|
|
|
+ audits: list[dict[str, Any]] = []
|
|
|
+ for run_id in run_ids:
|
|
|
+ cur.execute(
|
|
|
+ "SELECT `run_id`, `error_code`, `error_detail` "
|
|
|
+ "FROM `content_agent_runs` WHERE `run_id` = %s",
|
|
|
+ (run_id,),
|
|
|
+ )
|
|
|
+ run_row = cur.fetchone()
|
|
|
+ if not run_row:
|
|
|
+ audits.append({"run_id": run_id, "status": "fail", "findings": ["run_not_found"]})
|
|
|
+ continue
|
|
|
+ error_detail = _decode_json(run_row.get("error_detail")) or {}
|
|
|
+ query_failures = error_detail.get("query_failures")
|
|
|
+ if not isinstance(query_failures, list) or not query_failures:
|
|
|
+ audits.append({"run_id": run_id, "status": "skipped", "reason": "no_query_failures"})
|
|
|
+ continue
|
|
|
+ failure_ids = [
|
|
|
+ str(failure.get("search_query_id"))
|
|
|
+ for failure in query_failures
|
|
|
+ if isinstance(failure, dict) and failure.get("search_query_id")
|
|
|
+ ]
|
|
|
+ findings: list[str] = []
|
|
|
+ if not failure_ids:
|
|
|
+ findings.append("query_failures_missing_search_query_id")
|
|
|
+
|
|
|
+ query_rows = _fetch_rows_for_ids(
|
|
|
+ cur,
|
|
|
+ "content_agent_queries",
|
|
|
+ run_id,
|
|
|
+ failure_ids,
|
|
|
+ "search_query_id, search_query_effect_status, raw_payload",
|
|
|
+ )
|
|
|
+ query_by_id = {row["search_query_id"]: row for row in query_rows}
|
|
|
+ for query_id in failure_ids:
|
|
|
+ row = query_by_id.get(query_id)
|
|
|
+ if not row:
|
|
|
+ findings.append(f"query_missing:{query_id}")
|
|
|
+ continue
|
|
|
+ if row.get("search_query_effect_status") != "failed":
|
|
|
+ findings.append(f"query_not_failed:{query_id}")
|
|
|
+ raw_payload = _decode_json(row.get("raw_payload")) or {}
|
|
|
+ if not isinstance(raw_payload.get("query_failure"), dict):
|
|
|
+ findings.append(f"query_failure_payload_missing:{query_id}")
|
|
|
+
|
|
|
+ clue_rows = _fetch_rows_for_ids(
|
|
|
+ cur,
|
|
|
+ "content_agent_search_clues",
|
|
|
+ run_id,
|
|
|
+ failure_ids,
|
|
|
+ "search_query_id, search_query_effect_status, raw_payload",
|
|
|
+ )
|
|
|
+ clue_by_id = {row["search_query_id"]: row for row in clue_rows}
|
|
|
+ for query_id in failure_ids:
|
|
|
+ row = clue_by_id.get(query_id)
|
|
|
+ if not row:
|
|
|
+ findings.append(f"search_clue_missing:{query_id}")
|
|
|
+ continue
|
|
|
+ if row.get("search_query_effect_status") != "failed":
|
|
|
+ findings.append(f"search_clue_not_failed:{query_id}")
|
|
|
+
|
|
|
+ event_refs = [f"search_queries.jsonl:{query_id}" for query_id in failure_ids]
|
|
|
+ event_rows = _fetch_rows_for_values(
|
|
|
+ cur,
|
|
|
+ "content_agent_run_events",
|
|
|
+ run_id,
|
|
|
+ event_refs,
|
|
|
+ "input_ref",
|
|
|
+ "input_ref, event_type, status",
|
|
|
+ extra_where="AND event_type = 'platform_query_failed'",
|
|
|
+ )
|
|
|
+ event_by_ref = {row["input_ref"]: row for row in event_rows}
|
|
|
+ for event_ref in event_refs:
|
|
|
+ row = event_by_ref.get(event_ref)
|
|
|
+ if not row:
|
|
|
+ findings.append(f"platform_query_failed_event_missing:{event_ref}")
|
|
|
+ continue
|
|
|
+ if row.get("status") != "failed":
|
|
|
+ findings.append(f"platform_query_failed_event_status:{event_ref}")
|
|
|
+
|
|
|
+ audits.append(
|
|
|
+ {
|
|
|
+ "run_id": run_id,
|
|
|
+ "query_failure_count": len(failure_ids),
|
|
|
+ "status": "fail" if findings else "pass",
|
|
|
+ "findings": findings,
|
|
|
+ }
|
|
|
+ )
|
|
|
+ return audits
|
|
|
+
|
|
|
+
|
|
|
+def _fetch_rows_for_ids(
|
|
|
+ cur: Any,
|
|
|
+ table: str,
|
|
|
+ run_id: str,
|
|
|
+ ids: list[str],
|
|
|
+ columns: str,
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
+ return _fetch_rows_for_values(cur, table, run_id, ids, "search_query_id", columns)
|
|
|
+
|
|
|
+
|
|
|
+def _fetch_rows_for_values(
|
|
|
+ cur: Any,
|
|
|
+ table: str,
|
|
|
+ run_id: str,
|
|
|
+ values: list[str],
|
|
|
+ column: str,
|
|
|
+ columns: str,
|
|
|
+ *,
|
|
|
+ extra_where: str = "",
|
|
|
+) -> list[dict[str, Any]]:
|
|
|
+ if not values:
|
|
|
+ return []
|
|
|
+ placeholders = ", ".join(["%s"] * len(values))
|
|
|
+ cur.execute(
|
|
|
+ f"SELECT {columns} FROM `{table}` "
|
|
|
+ f"WHERE `run_id` = %s AND `{column}` IN ({placeholders}) {extra_where}",
|
|
|
+ [run_id, *values],
|
|
|
+ )
|
|
|
+ return list(cur.fetchall())
|
|
|
+
|
|
|
+
|
|
|
+def _decode_json(value: Any) -> Any:
|
|
|
+ if value is None or isinstance(value, (dict, list)):
|
|
|
+ return value
|
|
|
+ return json.loads(value)
|
|
|
+
|
|
|
+
|
|
|
def _load_env_file(path_value: str | None) -> dict[str, str]:
|
|
|
if not path_value:
|
|
|
return {}
|