|
|
@@ -31,6 +31,10 @@ from content_agent.interfaces import (
|
|
|
from content_agent.record_payload import with_raw_payload
|
|
|
|
|
|
|
|
|
+V4_SCORECARD_SCHEMA_VERSION = "v4_scorecard.v1"
|
|
|
+V4_ALLOW_WALK_DENIED_REASON = "v4_allow_walk_denied"
|
|
|
+V4_WALK_GATE_EDGES = {"query_next_page", "hashtag_to_query", "author_to_works"}
|
|
|
+
|
|
|
BLOCKED_TAG_TERMS = {
|
|
|
"h" + "ot",
|
|
|
"trend" + "ing",
|
|
|
@@ -250,18 +254,57 @@ def _expand_queries(
|
|
|
page_binding, _ = _resolve_edge_binding("query_next_page", walk_strategy)
|
|
|
query_by_id = {row["search_query_id"]: row for row in search_queries}
|
|
|
query_effect_by_id = _query_effect_by_search_query_id(discovered_content_items, rule_decisions)
|
|
|
+ query_gate_by_id = _query_v4_walk_gate_by_search_query_id(discovered_content_items, rule_decisions)
|
|
|
seen_queries: set[str] = set()
|
|
|
for item in discovered_content_items:
|
|
|
search_query_id = item.get("search_query_id")
|
|
|
cursor = item.get("next_cursor")
|
|
|
if not item.get("has_more") or not cursor or search_query_id in seen_queries:
|
|
|
continue
|
|
|
- if not _can_fetch_next_page(search_query_id, query_effect_by_id):
|
|
|
+ if not _can_fetch_next_page(search_query_id, query_effect_by_id, query_gate_by_id):
|
|
|
+ denied_decision = (query_gate_by_id.get(search_query_id) or {}).get("decision")
|
|
|
+ if (
|
|
|
+ query_effect_by_id.get(search_query_id) == "success"
|
|
|
+ and denied_decision
|
|
|
+ and _v4_walk_gate_denied(denied_decision)
|
|
|
+ ):
|
|
|
+ skipped_actions.append(
|
|
|
+ _walk_action(
|
|
|
+ run_id,
|
|
|
+ policy_run_id,
|
|
|
+ _walk_action_id(
|
|
|
+ run_id, policy_run_id, "query_next_page", search_query_id, "allow_walk"
|
|
|
+ ),
|
|
|
+ "query_next_page",
|
|
|
+ "query",
|
|
|
+ "SearchQuery",
|
|
|
+ search_query_id,
|
|
|
+ "SearchQuery",
|
|
|
+ "next_page_skipped",
|
|
|
+ "fetch_next_page",
|
|
|
+ "skipped",
|
|
|
+ created_at,
|
|
|
+ reason_code=V4_ALLOW_WALK_DENIED_REASON,
|
|
|
+ budget_tier="blocked",
|
|
|
+ rule_pack_binding=page_binding,
|
|
|
+ rule_pack_execution=_execution_record(
|
|
|
+ denied_decision,
|
|
|
+ content_pack_id=content_pack["rule_pack_id"],
|
|
|
+ ),
|
|
|
+ fallback_rule_pack=content_pack,
|
|
|
+ raw_extra={
|
|
|
+ "parent_search_query_id": search_query_id,
|
|
|
+ **_decision_context(denied_decision, denied=True),
|
|
|
+ },
|
|
|
+ )
|
|
|
+ )
|
|
|
+ seen_queries.add(search_query_id)
|
|
|
continue
|
|
|
source = query_by_id.get(search_query_id)
|
|
|
if not source:
|
|
|
continue
|
|
|
seen_queries.add(search_query_id)
|
|
|
+ gate_decision = (query_gate_by_id.get(search_query_id) or {}).get("decision")
|
|
|
if len(page_rows) >= page_budget:
|
|
|
skipped_actions.append(
|
|
|
_walk_action(
|
|
|
@@ -302,7 +345,10 @@ def _expand_queries(
|
|
|
"query_next_page",
|
|
|
created_at,
|
|
|
page_cursor=str(cursor),
|
|
|
- raw_extra={"parent_search_query_id": search_query_id},
|
|
|
+ raw_extra={
|
|
|
+ "parent_search_query_id": search_query_id,
|
|
|
+ **_decision_context(gate_decision),
|
|
|
+ },
|
|
|
)
|
|
|
)
|
|
|
|
|
|
@@ -319,6 +365,9 @@ def _expand_queries(
|
|
|
if _edge_permission_for(decision, "video_to_hashtag", policy) == "deny":
|
|
|
if item.get("tags"):
|
|
|
reason_code = (
|
|
|
+ V4_ALLOW_WALK_DENIED_REASON
|
|
|
+ if _v4_walk_gate_denied(decision)
|
|
|
+ else
|
|
|
"review_tag_expansion_disabled"
|
|
|
if decision.get("decision_action") == "KEEP_CONTENT_FOR_REVIEW"
|
|
|
else "blocked_by_rule_decision"
|
|
|
@@ -344,7 +393,10 @@ def _expand_queries(
|
|
|
rule_pack_binding=tag_binding,
|
|
|
rule_pack_execution=_execution_record(decision, content_pack_id=content_pack["rule_pack_id"]),
|
|
|
fallback_rule_pack=content_pack,
|
|
|
- raw_extra=_decision_context(decision),
|
|
|
+ raw_extra=_decision_context(
|
|
|
+ decision,
|
|
|
+ denied=reason_code == V4_ALLOW_WALK_DENIED_REASON,
|
|
|
+ ),
|
|
|
)
|
|
|
)
|
|
|
continue
|
|
|
@@ -394,6 +446,7 @@ def _expand_queries(
|
|
|
raw_extra={
|
|
|
"hashtag": normalized,
|
|
|
"source_content_id": item.get("platform_content_id"),
|
|
|
+ **_decision_context(decision),
|
|
|
},
|
|
|
)
|
|
|
)
|
|
|
@@ -470,10 +523,15 @@ def _expand_authors(
|
|
|
decision = decision_by_content_id.get(item.get("platform_content_id"))
|
|
|
permission = _edge_permission_for(decision, "author_to_works", policy)
|
|
|
if permission == "deny":
|
|
|
+ reason_code = (
|
|
|
+ V4_ALLOW_WALK_DENIED_REASON
|
|
|
+ if _v4_walk_gate_denied(decision)
|
|
|
+ else "blocked_by_rule_decision"
|
|
|
+ )
|
|
|
walk_actions.append(
|
|
|
_author_walk_action(
|
|
|
run_id, policy_run_id, author_id, "skipped", created_at,
|
|
|
- reason_code="blocked_by_rule_decision",
|
|
|
+ reason_code=reason_code,
|
|
|
budget_tier="blocked",
|
|
|
binding=binding,
|
|
|
decision=decision,
|
|
|
@@ -792,6 +850,7 @@ def _query_actions(
|
|
|
"reason": "content_decision_reused_for_walk_gate",
|
|
|
},
|
|
|
fallback_rule_pack=content_pack,
|
|
|
+ raw_extra=_walk_gate_context_from_query_row(row),
|
|
|
)
|
|
|
)
|
|
|
return actions
|
|
|
@@ -911,8 +970,41 @@ def _query_effect_by_search_query_id(
|
|
|
return effects
|
|
|
|
|
|
|
|
|
-def _can_fetch_next_page(search_query_id: str, query_effect_by_id: dict[str, str]) -> bool:
|
|
|
- return query_effect_by_id.get(search_query_id) == "success"
|
|
|
+def _query_v4_walk_gate_by_search_query_id(
|
|
|
+ discovered_content_items: list[dict[str, Any]],
|
|
|
+ rule_decisions: list[dict[str, Any]],
|
|
|
+) -> dict[str, dict[str, Any]]:
|
|
|
+ decision_by_content_id = _decision_by_content_id(rule_decisions)
|
|
|
+ gates: dict[str, dict[str, Any]] = {}
|
|
|
+ for item in discovered_content_items:
|
|
|
+ decision = decision_by_content_id.get(item.get("platform_content_id"))
|
|
|
+ if not _is_v4_decision(decision):
|
|
|
+ continue
|
|
|
+ query_sources = item.get("query_sources") or [{"search_query_id": item.get("search_query_id")}]
|
|
|
+ for query_source in query_sources:
|
|
|
+ search_query_id = query_source.get("search_query_id")
|
|
|
+ if not search_query_id:
|
|
|
+ continue
|
|
|
+ gate = gates.setdefault(search_query_id, {"has_v4": True, "allow_walk": False, "decision": decision})
|
|
|
+ if _v4_allow_walk_allowed(decision):
|
|
|
+ gate["allow_walk"] = True
|
|
|
+ gate["decision"] = decision
|
|
|
+ elif not gate.get("allow_walk"):
|
|
|
+ gate["decision"] = decision
|
|
|
+ return gates
|
|
|
+
|
|
|
+
|
|
|
+def _can_fetch_next_page(
|
|
|
+ search_query_id: str,
|
|
|
+ query_effect_by_id: dict[str, str],
|
|
|
+ query_gate_by_id: dict[str, dict[str, Any]] | None = None,
|
|
|
+) -> bool:
|
|
|
+ if query_effect_by_id.get(search_query_id) != "success":
|
|
|
+ return False
|
|
|
+ gate = (query_gate_by_id or {}).get(search_query_id)
|
|
|
+ if gate and gate.get("has_v4"):
|
|
|
+ return bool(gate.get("allow_walk"))
|
|
|
+ return True
|
|
|
|
|
|
|
|
|
def _edge_permission_for(
|
|
|
@@ -921,9 +1013,28 @@ def _edge_permission_for(
|
|
|
"""判定→边通行证:无判定 / 查询 rule_blocked 一律 deny,其余查 edge_permissions。"""
|
|
|
if not decision or decision.get("search_query_effect_status") == "rule_blocked":
|
|
|
return "deny"
|
|
|
+ if edge_id in V4_WALK_GATE_EDGES or edge_id == "video_to_hashtag":
|
|
|
+ if _is_v4_decision(decision) and not _v4_allow_walk_allowed(decision):
|
|
|
+ return "deny"
|
|
|
return edge_permission(policy, decision.get("decision_action"), edge_id)
|
|
|
|
|
|
|
|
|
+def _is_v4_decision(decision: dict[str, Any] | None) -> bool:
|
|
|
+ scorecard = (decision or {}).get("scorecard") or {}
|
|
|
+ return isinstance(scorecard, dict) and scorecard.get("schema_version") == V4_SCORECARD_SCHEMA_VERSION
|
|
|
+
|
|
|
+
|
|
|
+def _v4_allow_walk_allowed(decision: dict[str, Any] | None) -> bool:
|
|
|
+ if not _is_v4_decision(decision):
|
|
|
+ return False
|
|
|
+ replay_data = (decision or {}).get("decision_replay_data") or {}
|
|
|
+ return replay_data.get("allow_walk") is True
|
|
|
+
|
|
|
+
|
|
|
+def _v4_walk_gate_denied(decision: dict[str, Any] | None) -> bool:
|
|
|
+ return _is_v4_decision(decision) and not _v4_allow_walk_allowed(decision)
|
|
|
+
|
|
|
+
|
|
|
def _binding_by_edge_id(walk_strategy: dict[str, Any]) -> dict[str, dict[str, Any]]:
|
|
|
return {row["edge_id"]: row for row in walk_strategy.get("walk_rule_pack_binding", [])}
|
|
|
|
|
|
@@ -951,13 +1062,43 @@ def _execution_record(decision: dict[str, Any] | None, *, content_pack_id: str)
|
|
|
}
|
|
|
|
|
|
|
|
|
-def _decision_context(decision: dict[str, Any] | None) -> dict[str, Any]:
|
|
|
+def _decision_context(decision: dict[str, Any] | None, *, denied: bool = False) -> dict[str, Any]:
|
|
|
if not decision:
|
|
|
return {"decision_action": None, "search_query_effect_status": None}
|
|
|
- return {
|
|
|
+ context = {
|
|
|
+ "decision_id": decision.get("decision_id"),
|
|
|
"decision_action": decision.get("decision_action"),
|
|
|
"search_query_effect_status": decision.get("search_query_effect_status"),
|
|
|
}
|
|
|
+ if _is_v4_decision(decision):
|
|
|
+ context.update(_v4_walk_gate_context(decision, denied=denied))
|
|
|
+ return context
|
|
|
+
|
|
|
+
|
|
|
+def _v4_walk_gate_context(decision: dict[str, Any], *, denied: bool = False) -> dict[str, Any]:
|
|
|
+ replay_data = decision.get("decision_replay_data") or {}
|
|
|
+ allow_walk = replay_data.get("allow_walk")
|
|
|
+ is_denied = denied or allow_walk is not True
|
|
|
+ return {
|
|
|
+ "allow_walk": allow_walk,
|
|
|
+ "allow_walk_reason": replay_data.get("allow_walk_reason"),
|
|
|
+ "walk_gate_snapshot": replay_data.get("walk_gate_snapshot"),
|
|
|
+ "walk_gate_status": "denied" if is_denied else "allowed",
|
|
|
+ "walk_gate_reason_code": V4_ALLOW_WALK_DENIED_REASON if is_denied else None,
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+def _walk_gate_context_from_query_row(row: dict[str, Any]) -> dict[str, Any]:
|
|
|
+ payload = row.get("raw_payload") or {}
|
|
|
+ fields = [
|
|
|
+ "decision_id",
|
|
|
+ "allow_walk",
|
|
|
+ "allow_walk_reason",
|
|
|
+ "walk_gate_snapshot",
|
|
|
+ "walk_gate_status",
|
|
|
+ "walk_gate_reason_code",
|
|
|
+ ]
|
|
|
+ return {field: payload[field] for field in fields if field in payload}
|
|
|
|
|
|
|
|
|
def _merge_batch(context: dict[str, list[dict[str, Any]]], batch: dict[str, list[dict[str, Any]]]) -> None:
|