|
|
@@ -1,3 +1,11 @@
|
|
|
+"""有界游走(V3-M4):配置驱动的统一 frontier 流程,取代三段硬编码。
|
|
|
+
|
|
|
+每条边走同一套闸:platform_profiles 是否 supported(blocked 显式 skip 不调用平台)
|
|
|
+→ walk_policy.edge_permissions 按判定结果放行(取代 _can_expand_from_decision 硬编码)
|
|
|
+→ walk_policy.edge_budgets 预算(取代 [:2]/[:3]/>=1 散落硬限;预算耗尽静默,对齐 v1 行为)。
|
|
|
+终端边(commit/downgrade/stop)与 3 类血缘并入本模块终端阶段(原 plan_walk 节点已删)。
|
|
|
+"""
|
|
|
+
|
|
|
from __future__ import annotations
|
|
|
|
|
|
import hashlib
|
|
|
@@ -5,9 +13,15 @@ from datetime import datetime, timezone
|
|
|
from typing import Any
|
|
|
|
|
|
from content_agent.business_modules import content_discovery, platform_access, rule_judgment
|
|
|
+from content_agent.business_modules import walk_strategy as walk_terminal
|
|
|
from content_agent.business_modules.content_discovery import pattern_recall
|
|
|
from content_agent.constants import RUNTIME_RECORD_SCHEMA_VERSION
|
|
|
from content_agent.errors import ContentAgentError
|
|
|
+from content_agent.integrations.walk_graph_json import (
|
|
|
+ WalkGraphStore,
|
|
|
+ edge_permission,
|
|
|
+ edge_supported,
|
|
|
+)
|
|
|
from content_agent.integrations.walk_strategy_json import WalkStrategyStore
|
|
|
from content_agent.interfaces import (
|
|
|
GeminiVideoClient,
|
|
|
@@ -45,7 +59,17 @@ def run_bounded_walk(
|
|
|
) -> dict[str, list[dict[str, Any]]]:
|
|
|
created_at = datetime.now(timezone.utc).isoformat()
|
|
|
walk_strategy = WalkStrategyStore().load_walk_strategy()
|
|
|
- content_pack_id = policy_bundle["rule_pack_id"]
|
|
|
+ store = WalkGraphStore()
|
|
|
+ policy = store.load_policy()
|
|
|
+ platform = next(
|
|
|
+ (item["platform"] for item in discovered_content_items if item.get("platform")),
|
|
|
+ "douyin",
|
|
|
+ )
|
|
|
+ profile = store.load_profile(platform)
|
|
|
+ content_pack = {
|
|
|
+ "rule_pack_id": policy_bundle["rule_pack_id"],
|
|
|
+ "rule_pack_version": policy_bundle["rule_pack_version"],
|
|
|
+ }
|
|
|
context = {
|
|
|
"search_queries": list(search_queries),
|
|
|
"discovered_content_items": list(discovered_content_items),
|
|
|
@@ -57,15 +81,12 @@ def run_bounded_walk(
|
|
|
next_decision_index = len(rule_decisions) + 1
|
|
|
next_recall_index = len(discovered_content_items) + 1
|
|
|
|
|
|
- query_rows = _pagination_queries(
|
|
|
- run_id, policy_run_id, context["search_queries"], discovered_content_items, rule_decisions, created_at
|
|
|
+ query_rows, query_skipped_actions = _expand_queries(
|
|
|
+ run_id, policy_run_id, context["search_queries"], discovered_content_items,
|
|
|
+ rule_decisions, created_at,
|
|
|
+ policy=policy, profile=profile, walk_strategy=walk_strategy, content_pack=content_pack,
|
|
|
)
|
|
|
- tag_rows, tag_skipped_actions = _tag_queries(
|
|
|
- run_id, policy_run_id, discovered_content_items, rule_decisions, created_at,
|
|
|
- walk_strategy=walk_strategy, content_pack_id=content_pack_id,
|
|
|
- )
|
|
|
- query_rows.extend(tag_rows)
|
|
|
- context["walk_actions"].extend(tag_skipped_actions)
|
|
|
+ context["walk_actions"].extend(query_skipped_actions)
|
|
|
if query_rows:
|
|
|
runtime.append_jsonl(run_id, "search_queries.jsonl", query_rows)
|
|
|
context["search_queries"].extend(query_rows)
|
|
|
@@ -91,19 +112,21 @@ def run_bounded_walk(
|
|
|
context["walk_actions"].extend(
|
|
|
_query_actions(
|
|
|
query_rows, batch.get("query_failures", []), created_at,
|
|
|
- walk_strategy=walk_strategy, content_pack_id=content_pack_id,
|
|
|
+ walk_strategy=walk_strategy, content_pack=content_pack,
|
|
|
)
|
|
|
)
|
|
|
|
|
|
- author_batch = _execute_author_edges(
|
|
|
+ author_batch = _expand_authors(
|
|
|
run_id=run_id,
|
|
|
policy_run_id=policy_run_id,
|
|
|
- pattern_seed_pack=pattern_seed_pack,
|
|
|
source_context=source_context,
|
|
|
discovered_content_items=context["discovered_content_items"],
|
|
|
rule_decisions=context["rule_decisions"],
|
|
|
+ policy=policy,
|
|
|
+ profile=profile,
|
|
|
walk_strategy=walk_strategy,
|
|
|
policy_bundle=policy_bundle,
|
|
|
+ content_pack=content_pack,
|
|
|
platform_client=platform_client,
|
|
|
runtime=runtime,
|
|
|
gemini_video_client=gemini_video_client,
|
|
|
@@ -113,13 +136,23 @@ def run_bounded_walk(
|
|
|
)
|
|
|
_merge_batch(context, author_batch)
|
|
|
context["walk_actions"].extend(author_batch.get("walk_actions", []))
|
|
|
- # 作者作品的血缘 query 行:落盘后 walk_strategy(pattern_to_search_query 路径)与
|
|
|
+ # 作者作品的血缘 query 行:落盘后终端阶段(pattern_to_search_query 路径)与
|
|
|
# recorder(search_clue 聚合)即可经既有机制覆盖作者内容,validate_run 不再断链。
|
|
|
author_queries = author_batch.get("search_queries", [])
|
|
|
if author_queries:
|
|
|
runtime.append_jsonl(run_id, "search_queries.jsonl", author_queries)
|
|
|
context["search_queries"].extend(author_queries)
|
|
|
|
|
|
+ terminal = _terminal_stage(
|
|
|
+ pattern_seed_pack,
|
|
|
+ context["search_queries"],
|
|
|
+ context["discovered_content_items"],
|
|
|
+ context["rule_decisions"],
|
|
|
+ walk_strategy,
|
|
|
+ created_at,
|
|
|
+ )
|
|
|
+ context["walk_actions"].extend(terminal["walk_actions"])
|
|
|
+ context["source_path_record_basis"] = terminal["source_path_record_basis"]
|
|
|
return context
|
|
|
|
|
|
|
|
|
@@ -192,16 +225,141 @@ def _execute_query_batch(
|
|
|
}
|
|
|
|
|
|
|
|
|
-def _execute_author_edges(
|
|
|
+def _expand_queries(
|
|
|
+ run_id: str,
|
|
|
+ policy_run_id: str,
|
|
|
+ search_queries: list[dict[str, Any]],
|
|
|
+ discovered_content_items: list[dict[str, Any]],
|
|
|
+ rule_decisions: list[dict[str, Any]],
|
|
|
+ created_at: str,
|
|
|
+ *,
|
|
|
+ policy: dict[str, Any],
|
|
|
+ profile: dict[str, Any],
|
|
|
+ walk_strategy: dict[str, Any],
|
|
|
+ content_pack: dict[str, Any],
|
|
|
+) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
|
+ budgets = policy["edge_budgets_by_id"]
|
|
|
+ decision_by_content_id = _decision_by_content_id(rule_decisions)
|
|
|
+ skipped_actions: list[dict[str, Any]] = []
|
|
|
+
|
|
|
+ # 边 query_next_page:has_more+cursor、查询 effect=success 才翻页;预算耗尽静默(v1 行为)。
|
|
|
+ page_rows: list[dict[str, Any]] = []
|
|
|
+ if edge_supported(profile, "query_next_page"):
|
|
|
+ page_budget = budgets["query_next_page"]["max_total_actions"]
|
|
|
+ query_by_id = {row["search_query_id"]: row for row in search_queries}
|
|
|
+ query_effect_by_id = _query_effect_by_search_query_id(discovered_content_items, rule_decisions)
|
|
|
+ seen_queries: set[str] = set()
|
|
|
+ for item in discovered_content_items:
|
|
|
+ if len(page_rows) >= page_budget:
|
|
|
+ break
|
|
|
+ search_query_id = item.get("search_query_id")
|
|
|
+ cursor = item.get("next_cursor")
|
|
|
+ if not item.get("has_more") or not cursor or search_query_id in seen_queries:
|
|
|
+ continue
|
|
|
+ if not _can_fetch_next_page(search_query_id, query_effect_by_id):
|
|
|
+ continue
|
|
|
+ source = query_by_id.get(search_query_id)
|
|
|
+ if not source:
|
|
|
+ continue
|
|
|
+ seen_queries.add(search_query_id)
|
|
|
+ page_rows.append(
|
|
|
+ _search_query_row(
|
|
|
+ run_id,
|
|
|
+ policy_run_id,
|
|
|
+ f"{search_query_id}_page_002",
|
|
|
+ source["search_query"],
|
|
|
+ "query_next_page",
|
|
|
+ source.get("discovery_start_source", "pattern_itemset"),
|
|
|
+ "query_next_page",
|
|
|
+ created_at,
|
|
|
+ page_cursor=str(cursor),
|
|
|
+ raw_extra={"parent_search_query_id": search_query_id},
|
|
|
+ )
|
|
|
+ )
|
|
|
+
|
|
|
+ # 回灌链 video_to_hashtag → hashtag_to_query:keep_only 门由 edge_permissions 查表。
|
|
|
+ tag_rows: list[dict[str, Any]] = []
|
|
|
+ if edge_supported(profile, "video_to_hashtag") and edge_supported(profile, "hashtag_to_query"):
|
|
|
+ tag_budget = budgets["hashtag_to_query"]["max_total_actions"]
|
|
|
+ tag_binding, _ = _resolve_edge_binding("hashtag_to_query", walk_strategy)
|
|
|
+ seen_tags: set[str] = set()
|
|
|
+ for item in discovered_content_items:
|
|
|
+ if len(tag_rows) >= tag_budget:
|
|
|
+ break
|
|
|
+ decision = decision_by_content_id.get(item.get("platform_content_id"))
|
|
|
+ if not decision:
|
|
|
+ continue
|
|
|
+ if _edge_permission_for(decision, "video_to_hashtag", policy) == "deny":
|
|
|
+ if item.get("tags"):
|
|
|
+ reason_code = (
|
|
|
+ "review_tag_expansion_disabled"
|
|
|
+ if decision.get("decision_action") == "KEEP_CONTENT_FOR_REVIEW"
|
|
|
+ else "blocked_by_rule_decision"
|
|
|
+ )
|
|
|
+ skipped_actions.append(
|
|
|
+ _walk_action(
|
|
|
+ run_id,
|
|
|
+ policy_run_id,
|
|
|
+ _walk_action_id(
|
|
|
+ run_id, policy_run_id, "hashtag_to_query", item["platform_content_id"], "tags"
|
|
|
+ ),
|
|
|
+ "hashtag_to_query",
|
|
|
+ "tag_query",
|
|
|
+ "Content",
|
|
|
+ item["platform_content_id"],
|
|
|
+ "SearchQuery",
|
|
|
+ "tag_query_skipped",
|
|
|
+ "create_tag_query",
|
|
|
+ "skipped",
|
|
|
+ created_at,
|
|
|
+ reason_code=reason_code,
|
|
|
+ budget_tier="blocked",
|
|
|
+ rule_pack_binding=tag_binding,
|
|
|
+ rule_pack_execution=_execution_record(decision, content_pack_id=content_pack["rule_pack_id"]),
|
|
|
+ fallback_rule_pack=content_pack,
|
|
|
+ raw_extra=_decision_context(decision),
|
|
|
+ )
|
|
|
+ )
|
|
|
+ continue
|
|
|
+ for tag in item.get("tags") or []:
|
|
|
+ normalized = str(tag).lstrip("#").strip()
|
|
|
+ if not normalized or normalized in seen_tags or _blocked_tag(normalized):
|
|
|
+ continue
|
|
|
+ seen_tags.add(normalized)
|
|
|
+ tag_rows.append(
|
|
|
+ _search_query_row(
|
|
|
+ run_id,
|
|
|
+ policy_run_id,
|
|
|
+ f"tag_{_short_hash(normalized)}",
|
|
|
+ normalized,
|
|
|
+ "tag_query",
|
|
|
+ item.get("discovery_start_source", "pattern_itemset"),
|
|
|
+ "hashtag_to_query",
|
|
|
+ created_at,
|
|
|
+ raw_extra={
|
|
|
+ "hashtag": normalized,
|
|
|
+ "source_content_id": item.get("platform_content_id"),
|
|
|
+ },
|
|
|
+ )
|
|
|
+ )
|
|
|
+ if len(tag_rows) >= tag_budget:
|
|
|
+ break
|
|
|
+
|
|
|
+ return [*page_rows, *tag_rows], skipped_actions
|
|
|
+
|
|
|
+
|
|
|
+def _expand_authors(
|
|
|
*,
|
|
|
run_id: str,
|
|
|
policy_run_id: str,
|
|
|
- pattern_seed_pack: dict[str, Any],
|
|
|
source_context: dict[str, Any],
|
|
|
discovered_content_items: list[dict[str, Any]],
|
|
|
rule_decisions: list[dict[str, Any]],
|
|
|
+ policy: dict[str, Any],
|
|
|
+ profile: dict[str, Any],
|
|
|
walk_strategy: dict[str, Any],
|
|
|
policy_bundle: dict[str, Any],
|
|
|
+ content_pack: dict[str, Any],
|
|
|
platform_client: PlatformSearchClient,
|
|
|
runtime: RuntimeFileStore,
|
|
|
gemini_video_client: GeminiVideoClient,
|
|
|
@@ -209,15 +367,37 @@ def _execute_author_edges(
|
|
|
start_decision_index: int,
|
|
|
created_at: str,
|
|
|
) -> dict[str, list[dict[str, Any]]]:
|
|
|
+ budgets = policy["edge_budgets_by_id"]["author_to_works"]
|
|
|
+ decision_by_content_id = _decision_by_content_id(rule_decisions)
|
|
|
+ binding, _ = _resolve_edge_binding("author_to_works", walk_strategy)
|
|
|
+ author_items = _unique_authors(discovered_content_items)[: budgets["max_total_actions"]]
|
|
|
+ walk_actions: list[dict[str, Any]] = []
|
|
|
+
|
|
|
+ # 平台不支持作者边(如视频号 blogger blocked):显式 skip 留痕、不调用平台,游走自然退化。
|
|
|
+ if not edge_supported(profile, "author_to_works"):
|
|
|
+ for item in author_items:
|
|
|
+ author_id = item.get("platform_author_id")
|
|
|
+ if not author_id:
|
|
|
+ continue
|
|
|
+ decision = decision_by_content_id.get(item.get("platform_content_id"))
|
|
|
+ walk_actions.append(
|
|
|
+ _author_walk_action(
|
|
|
+ run_id, policy_run_id, author_id, "skipped", created_at,
|
|
|
+ reason_code="edge_blocked_by_platform_profile",
|
|
|
+ budget_tier="blocked",
|
|
|
+ binding=binding,
|
|
|
+ decision=decision,
|
|
|
+ content_pack=content_pack,
|
|
|
+ )
|
|
|
+ )
|
|
|
+ return {**_empty_batch(), "walk_actions": walk_actions}
|
|
|
+
|
|
|
fetch_author_works = getattr(platform_client, "fetch_author_works", None) or getattr(
|
|
|
platform_client, "author_works", None
|
|
|
)
|
|
|
if not callable(fetch_author_works):
|
|
|
return _empty_batch()
|
|
|
|
|
|
- decision_by_content_id = _decision_by_content_id(rule_decisions)
|
|
|
- binding, _ = _resolve_edge_binding("author_to_works", walk_strategy)
|
|
|
- author_items = _unique_authors(discovered_content_items)[:2]
|
|
|
# 作者近期作品天然可能包含首轮已发现的同一条视频;不去重会撞
|
|
|
# uk_ca_items_run_policy_content 唯一索引(真实 E2E v1_run_e6ba21f7543b 实证)。
|
|
|
seen_content_ids = {
|
|
|
@@ -226,38 +406,26 @@ def _execute_author_edges(
|
|
|
if item.get("platform_content_id")
|
|
|
}
|
|
|
platform_results: list[dict[str, Any]] = []
|
|
|
- walk_actions: list[dict[str, Any]] = []
|
|
|
author_search_queries: list[dict[str, Any]] = []
|
|
|
for item in author_items:
|
|
|
author_id = item.get("platform_author_id")
|
|
|
if not author_id:
|
|
|
continue
|
|
|
- action_id = _walk_action_id(run_id, policy_run_id, "author_to_works", author_id, "works")
|
|
|
decision = decision_by_content_id.get(item.get("platform_content_id"))
|
|
|
- if not _can_expand_from_decision(decision, "author_to_works"):
|
|
|
+ permission = _edge_permission_for(decision, "author_to_works", policy)
|
|
|
+ if permission == "deny":
|
|
|
walk_actions.append(
|
|
|
- _walk_action(
|
|
|
- run_id,
|
|
|
- policy_run_id,
|
|
|
- action_id,
|
|
|
- "author_to_works",
|
|
|
- "author",
|
|
|
- "Author",
|
|
|
- author_id,
|
|
|
- "AuthorWorksPage",
|
|
|
- author_id,
|
|
|
- "fetch_author_works",
|
|
|
- "skipped",
|
|
|
- created_at,
|
|
|
+ _author_walk_action(
|
|
|
+ run_id, policy_run_id, author_id, "skipped", created_at,
|
|
|
reason_code="blocked_by_rule_decision",
|
|
|
budget_tier="blocked",
|
|
|
- rule_pack_binding=binding,
|
|
|
- rule_pack_execution=_execution_record(decision, content_pack_id=policy_bundle["rule_pack_id"]),
|
|
|
- raw_extra=_decision_context(decision),
|
|
|
+ binding=binding,
|
|
|
+ decision=decision,
|
|
|
+ content_pack=content_pack,
|
|
|
)
|
|
|
)
|
|
|
continue
|
|
|
- budget_tier = _walk_budget_for_decision(decision)
|
|
|
+ budget_tier = "low_budget" if permission == "allow_low_budget" else "normal"
|
|
|
try:
|
|
|
works = fetch_author_works(
|
|
|
{
|
|
|
@@ -268,45 +436,23 @@ def _execute_author_edges(
|
|
|
)
|
|
|
except Exception as exc:
|
|
|
walk_actions.append(
|
|
|
- _walk_action(
|
|
|
- run_id,
|
|
|
- policy_run_id,
|
|
|
- action_id,
|
|
|
- "author_to_works",
|
|
|
- "author",
|
|
|
- "Author",
|
|
|
- author_id,
|
|
|
- "AuthorWorksPage",
|
|
|
- author_id,
|
|
|
- "fetch_author_works",
|
|
|
- "failed",
|
|
|
- created_at,
|
|
|
+ _author_walk_action(
|
|
|
+ run_id, policy_run_id, author_id, "failed", created_at,
|
|
|
reason_code=type(exc).__name__,
|
|
|
budget_tier=budget_tier,
|
|
|
- rule_pack_binding=binding,
|
|
|
- rule_pack_execution=_execution_record(decision, content_pack_id=policy_bundle["rule_pack_id"]),
|
|
|
- raw_extra=_decision_context(decision),
|
|
|
+ binding=binding,
|
|
|
+ decision=decision,
|
|
|
+ content_pack=content_pack,
|
|
|
)
|
|
|
)
|
|
|
continue
|
|
|
walk_actions.append(
|
|
|
- _walk_action(
|
|
|
- run_id,
|
|
|
- policy_run_id,
|
|
|
- action_id,
|
|
|
- "author_to_works",
|
|
|
- "author",
|
|
|
- "Author",
|
|
|
- author_id,
|
|
|
- "AuthorWorksPage",
|
|
|
- author_id,
|
|
|
- "fetch_author_works",
|
|
|
- "success",
|
|
|
- created_at,
|
|
|
+ _author_walk_action(
|
|
|
+ run_id, policy_run_id, author_id, "success", created_at,
|
|
|
budget_tier=budget_tier,
|
|
|
- rule_pack_binding=binding,
|
|
|
- rule_pack_execution=_execution_record(decision, content_pack_id=policy_bundle["rule_pack_id"]),
|
|
|
- raw_extra=_decision_context(decision),
|
|
|
+ binding=binding,
|
|
|
+ decision=decision,
|
|
|
+ content_pack=content_pack,
|
|
|
)
|
|
|
)
|
|
|
new_works = [
|
|
|
@@ -330,7 +476,7 @@ def _execute_author_edges(
|
|
|
raw_extra={"platform_author_id": author_id},
|
|
|
)
|
|
|
)
|
|
|
- for index, work in enumerate(new_works[:3], start=1):
|
|
|
+ for index, work in enumerate(new_works[: budgets["max_works_per_author"]], start=1):
|
|
|
seen_content_ids.add(str(work["platform_content_id"]))
|
|
|
platform_results.append(
|
|
|
{
|
|
|
@@ -344,7 +490,7 @@ def _execute_author_edges(
|
|
|
}
|
|
|
)
|
|
|
if not platform_results:
|
|
|
- return {**_empty_batch(), "walk_actions": walk_actions}
|
|
|
+ return {**_empty_batch(), "walk_actions": walk_actions, "search_queries": author_search_queries}
|
|
|
|
|
|
discovered = content_discovery.run(run_id, policy_run_id, platform_results, source_context, runtime)
|
|
|
recalled = pattern_recall.run(
|
|
|
@@ -377,120 +523,156 @@ def _execute_author_edges(
|
|
|
}
|
|
|
|
|
|
|
|
|
-def _pagination_queries(
|
|
|
+def _author_walk_action(
|
|
|
run_id: str,
|
|
|
policy_run_id: str,
|
|
|
- search_queries: list[dict[str, Any]],
|
|
|
- discovered_content_items: list[dict[str, Any]],
|
|
|
- rule_decisions: list[dict[str, Any]],
|
|
|
+ author_id: str,
|
|
|
+ walk_status: str,
|
|
|
created_at: str,
|
|
|
-) -> list[dict[str, Any]]:
|
|
|
- query_by_id = {row["search_query_id"]: row for row in search_queries}
|
|
|
- query_effect_by_id = _query_effect_by_search_query_id(discovered_content_items, rule_decisions)
|
|
|
- seen: set[str] = set()
|
|
|
- rows: list[dict[str, Any]] = []
|
|
|
- for item in discovered_content_items:
|
|
|
- search_query_id = item.get("search_query_id")
|
|
|
- cursor = item.get("next_cursor")
|
|
|
- if not item.get("has_more") or not cursor or search_query_id in seen:
|
|
|
- continue
|
|
|
- if not _can_fetch_next_page(search_query_id, query_effect_by_id):
|
|
|
- continue
|
|
|
- source = query_by_id.get(search_query_id)
|
|
|
- if not source:
|
|
|
- continue
|
|
|
- seen.add(search_query_id)
|
|
|
- rows.append(
|
|
|
- _search_query_row(
|
|
|
- run_id,
|
|
|
- policy_run_id,
|
|
|
- f"{search_query_id}_page_002",
|
|
|
- source["search_query"],
|
|
|
- "query_next_page",
|
|
|
- source.get("discovery_start_source", "pattern_itemset"),
|
|
|
- "query_next_page",
|
|
|
- created_at,
|
|
|
- page_cursor=str(cursor),
|
|
|
- raw_extra={"parent_search_query_id": search_query_id},
|
|
|
- )
|
|
|
- )
|
|
|
- return rows[:3]
|
|
|
+ *,
|
|
|
+ budget_tier: str,
|
|
|
+ binding: dict[str, Any],
|
|
|
+ decision: dict[str, Any] | None,
|
|
|
+ content_pack: dict[str, Any],
|
|
|
+ reason_code: str | None = None,
|
|
|
+) -> dict[str, Any]:
|
|
|
+ return _walk_action(
|
|
|
+ run_id,
|
|
|
+ policy_run_id,
|
|
|
+ _walk_action_id(run_id, policy_run_id, "author_to_works", author_id, "works"),
|
|
|
+ "author_to_works",
|
|
|
+ "author",
|
|
|
+ "Author",
|
|
|
+ author_id,
|
|
|
+ "AuthorWorksPage",
|
|
|
+ author_id,
|
|
|
+ "fetch_author_works",
|
|
|
+ walk_status,
|
|
|
+ created_at,
|
|
|
+ reason_code=reason_code,
|
|
|
+ budget_tier=budget_tier,
|
|
|
+ rule_pack_binding=binding,
|
|
|
+ rule_pack_execution=_execution_record(decision, content_pack_id=content_pack["rule_pack_id"]),
|
|
|
+ fallback_rule_pack=content_pack,
|
|
|
+ raw_extra=_decision_context(decision),
|
|
|
+ )
|
|
|
|
|
|
|
|
|
-def _tag_queries(
|
|
|
- run_id: str,
|
|
|
- policy_run_id: str,
|
|
|
- items: list[dict[str, Any]],
|
|
|
+def _terminal_stage(
|
|
|
+ pattern_seed_pack: dict[str, Any],
|
|
|
+ search_queries: list[dict[str, Any]],
|
|
|
+ discovered_content_items: list[dict[str, Any]],
|
|
|
decisions: list[dict[str, Any]],
|
|
|
- created_at: str,
|
|
|
- *,
|
|
|
walk_strategy: dict[str, Any],
|
|
|
- content_pack_id: str,
|
|
|
-) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
|
|
- decision_by_content_id = _decision_by_content_id(decisions)
|
|
|
- binding, _ = _resolve_edge_binding("hashtag_to_query", walk_strategy)
|
|
|
- rows: list[dict[str, Any]] = []
|
|
|
- skipped_actions: list[dict[str, Any]] = []
|
|
|
- seen: set[str] = set()
|
|
|
- for item in items:
|
|
|
- decision = decision_by_content_id.get(item.get("platform_content_id"))
|
|
|
+ created_at: str,
|
|
|
+) -> dict[str, list[dict[str, Any]]]:
|
|
|
+ """终端边(decision_to_asset/budget_downgrade/path_stop)+ 3 类血缘(原 plan_walk 语义)。"""
|
|
|
+ binding_by_edge = _binding_by_edge_id(walk_strategy)
|
|
|
+ decision_by_target_id = {decision["decision_target_id"]: decision for decision in decisions}
|
|
|
+ walk_actions: list[dict[str, Any]] = []
|
|
|
+ source_path_record_basis: list[dict[str, Any]] = []
|
|
|
+
|
|
|
+ for search_query in search_queries:
|
|
|
+ source_path_record_basis.append(
|
|
|
+ {
|
|
|
+ "policy_run_id": search_query["policy_run_id"],
|
|
|
+ "record_schema_version": search_query["record_schema_version"],
|
|
|
+ "from_node_type": "PatternSeed",
|
|
|
+ "from_node_id": pattern_seed_pack["pattern_execution_id"],
|
|
|
+ "to_node_type": "SearchQuery",
|
|
|
+ "to_node_id": search_query["search_query_id"],
|
|
|
+ "source_path_type": "pattern_to_search_query",
|
|
|
+ "rule_pack_id": None,
|
|
|
+ "decision_id": None,
|
|
|
+ "discovery_start_source": search_query["discovery_start_source"],
|
|
|
+ "previous_discovery_step": search_query["previous_discovery_step"],
|
|
|
+ "origin_path_id": f"pattern_to_search_query:{search_query['search_query_id']}",
|
|
|
+ "source_evidence_ref": "source_context.json#ext_data.evidence_pack",
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+ for item in discovered_content_items:
|
|
|
+ # 无判定内容不产终端动作;判定覆盖完整性由 validate_run 把关,这里不掩盖。
|
|
|
+ decision = decision_by_target_id.get(item["platform_content_id"])
|
|
|
if not decision:
|
|
|
continue
|
|
|
- if not _can_expand_from_decision(decision, "hashtag_to_query"):
|
|
|
- if item.get("tags"):
|
|
|
- reason_code = (
|
|
|
- "review_tag_expansion_disabled"
|
|
|
- if decision.get("decision_action") == "KEEP_CONTENT_FOR_REVIEW"
|
|
|
- else "blocked_by_rule_decision"
|
|
|
- )
|
|
|
- skipped_actions.append(
|
|
|
- _walk_action(
|
|
|
- run_id,
|
|
|
- policy_run_id,
|
|
|
- _walk_action_id(
|
|
|
- run_id, policy_run_id, "hashtag_to_query", item["platform_content_id"], "tags"
|
|
|
- ),
|
|
|
- "hashtag_to_query",
|
|
|
- "tag_query",
|
|
|
- "Content",
|
|
|
- item["platform_content_id"],
|
|
|
- "SearchQuery",
|
|
|
- "tag_query_skipped",
|
|
|
- "create_tag_query",
|
|
|
- "skipped",
|
|
|
- created_at,
|
|
|
- reason_code=reason_code,
|
|
|
- budget_tier="blocked",
|
|
|
- rule_pack_binding=binding,
|
|
|
- rule_pack_execution=_execution_record(decision, content_pack_id=content_pack_id),
|
|
|
- raw_extra=_decision_context(decision),
|
|
|
- )
|
|
|
- )
|
|
|
- continue
|
|
|
- for tag in item.get("tags") or []:
|
|
|
- normalized = str(tag).lstrip("#").strip()
|
|
|
- if not normalized or normalized in seen or _blocked_tag(normalized):
|
|
|
- continue
|
|
|
- seen.add(normalized)
|
|
|
- rows.append(
|
|
|
- _search_query_row(
|
|
|
- run_id,
|
|
|
- policy_run_id,
|
|
|
- f"tag_{_short_hash(normalized)}",
|
|
|
- normalized,
|
|
|
- "tag_query",
|
|
|
- item.get("discovery_start_source", "pattern_itemset"),
|
|
|
- "hashtag_to_query",
|
|
|
- created_at,
|
|
|
- raw_extra={
|
|
|
- "hashtag": normalized,
|
|
|
- "source_content_id": item.get("platform_content_id"),
|
|
|
- },
|
|
|
- )
|
|
|
+ decision_action = walk_terminal._action_for_decision(decision["decision_action"])
|
|
|
+ binding = binding_by_edge.get(decision_action["edge_id"]) or {}
|
|
|
+ execution = {
|
|
|
+ "executed": True,
|
|
|
+ "executed_rule_pack_id": decision["rule_pack_id"],
|
|
|
+ "reason": "content_decision_reused_for_walk_gate",
|
|
|
+ }
|
|
|
+ walk_action_id = walk_terminal._walk_action_id(
|
|
|
+ decision["run_id"],
|
|
|
+ decision["policy_run_id"],
|
|
|
+ decision_action["edge_id"],
|
|
|
+ item["platform_content_id"],
|
|
|
+ decision["decision_id"],
|
|
|
+ )
|
|
|
+ query_sources = item.get("query_sources") or [
|
|
|
+ {
|
|
|
+ "search_query_id": item["search_query_id"],
|
|
|
+ "search_query": item.get("search_query"),
|
|
|
+ "search_query_generation_method": item.get("search_query_generation_method"),
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ for query_source in query_sources:
|
|
|
+ search_query_id = query_source["search_query_id"]
|
|
|
+ source_path_record_basis.append(
|
|
|
+ {
|
|
|
+ "policy_run_id": decision["policy_run_id"],
|
|
|
+ "record_schema_version": decision["record_schema_version"],
|
|
|
+ "from_node_type": "SearchQuery",
|
|
|
+ "from_node_id": search_query_id,
|
|
|
+ "to_node_type": "Content",
|
|
|
+ "to_node_id": item["platform_content_id"],
|
|
|
+ "source_path_type": "search_query_to_content",
|
|
|
+ "rule_pack_id": decision["rule_pack_id"],
|
|
|
+ "decision_id": decision["decision_id"],
|
|
|
+ "discovery_start_source": item["discovery_start_source"],
|
|
|
+ "previous_discovery_step": item["previous_discovery_step"],
|
|
|
+ "origin_path_id": (
|
|
|
+ f"search_query_to_content:{search_query_id}:"
|
|
|
+ f"{item['platform_content_id']}"
|
|
|
+ ),
|
|
|
+ "source_evidence_ref": decision["decision_input_snapshot_id"],
|
|
|
+ "walk_action_id": walk_action_id,
|
|
|
+ "rule_pack_binding": binding,
|
|
|
+ "rule_pack_execution": execution,
|
|
|
+ }
|
|
|
+ )
|
|
|
+ walk_actions.append(
|
|
|
+ walk_terminal._walk_action_row(
|
|
|
+ decision, item, decision_action, walk_action_id, created_at, binding, execution
|
|
|
)
|
|
|
- if len(rows) >= 1:
|
|
|
- return rows, skipped_actions
|
|
|
- return rows, skipped_actions
|
|
|
+ )
|
|
|
+ if decision["decision_action"] == "ADD_TO_CONTENT_POOL":
|
|
|
+ source_path_record_basis.append(
|
|
|
+ {
|
|
|
+ "policy_run_id": decision["policy_run_id"],
|
|
|
+ "record_schema_version": decision["record_schema_version"],
|
|
|
+ "from_node_type": "RuleDecision",
|
|
|
+ "from_node_id": decision["decision_id"],
|
|
|
+ "to_node_type": "ContentAsset",
|
|
|
+ "to_node_id": item["platform_content_id"],
|
|
|
+ "source_path_type": "decision_to_asset",
|
|
|
+ "rule_pack_id": decision["rule_pack_id"],
|
|
|
+ "decision_id": decision["decision_id"],
|
|
|
+ "discovery_start_source": item["discovery_start_source"],
|
|
|
+ "previous_discovery_step": "asset_commit",
|
|
|
+ "origin_path_id": (
|
|
|
+ f"decision_to_asset:{decision['decision_id']}:"
|
|
|
+ f"{item['platform_content_id']}"
|
|
|
+ ),
|
|
|
+ "source_evidence_ref": decision["decision_input_snapshot_id"],
|
|
|
+ "walk_action_id": walk_action_id,
|
|
|
+ "rule_pack_binding": binding,
|
|
|
+ "rule_pack_execution": execution,
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+ return {"walk_actions": walk_actions, "source_path_record_basis": source_path_record_basis}
|
|
|
|
|
|
|
|
|
def _query_actions(
|
|
|
@@ -499,7 +681,7 @@ def _query_actions(
|
|
|
created_at: str,
|
|
|
*,
|
|
|
walk_strategy: dict[str, Any],
|
|
|
- content_pack_id: str,
|
|
|
+ content_pack: dict[str, Any],
|
|
|
) -> list[dict[str, Any]]:
|
|
|
failure_ids = {row["search_query_id"] for row in query_failures}
|
|
|
actions: list[dict[str, Any]] = []
|
|
|
@@ -536,9 +718,10 @@ def _query_actions(
|
|
|
rule_pack_binding=binding,
|
|
|
rule_pack_execution={
|
|
|
"executed": True,
|
|
|
- "executed_rule_pack_id": content_pack_id,
|
|
|
+ "executed_rule_pack_id": content_pack["rule_pack_id"],
|
|
|
"reason": "content_decision_reused_for_walk_gate",
|
|
|
},
|
|
|
+ fallback_rule_pack=content_pack,
|
|
|
)
|
|
|
)
|
|
|
return actions
|
|
|
@@ -593,6 +776,7 @@ def _walk_action(
|
|
|
budget_tier: str | None = None,
|
|
|
rule_pack_binding: dict[str, Any] | None = None,
|
|
|
rule_pack_execution: dict[str, Any] | None = None,
|
|
|
+ fallback_rule_pack: dict[str, Any] | None = None,
|
|
|
raw_extra: dict[str, Any] | None = None,
|
|
|
) -> dict[str, Any]:
|
|
|
row = with_raw_payload(
|
|
|
@@ -613,8 +797,10 @@ def _walk_action(
|
|
|
"depth": 1,
|
|
|
"page_cursor": page_cursor,
|
|
|
"reason_code": reason_code,
|
|
|
- "rule_pack_id": (rule_pack_binding or {}).get("rule_pack_id"),
|
|
|
- "rule_pack_version": (rule_pack_binding or {}).get("rule_pack_version"),
|
|
|
+ "rule_pack_id": (rule_pack_binding or {}).get("rule_pack_id")
|
|
|
+ or (fallback_rule_pack or {}).get("rule_pack_id"),
|
|
|
+ "rule_pack_version": (rule_pack_binding or {}).get("rule_pack_version")
|
|
|
+ or (fallback_rule_pack or {}).get("rule_pack_version"),
|
|
|
"created_at": created_at,
|
|
|
}
|
|
|
)
|
|
|
@@ -659,26 +845,13 @@ def _can_fetch_next_page(search_query_id: str, query_effect_by_id: dict[str, str
|
|
|
return query_effect_by_id.get(search_query_id) == "success"
|
|
|
|
|
|
|
|
|
-def _walk_budget_for_decision(decision: dict[str, Any] | None) -> str:
|
|
|
- if not decision or decision.get("search_query_effect_status") == "rule_blocked":
|
|
|
- return "blocked"
|
|
|
- action = decision.get("decision_action")
|
|
|
- if action == "ADD_TO_CONTENT_POOL":
|
|
|
- return "normal"
|
|
|
- if action == "KEEP_CONTENT_FOR_REVIEW":
|
|
|
- return "low_budget"
|
|
|
- return "blocked"
|
|
|
-
|
|
|
-
|
|
|
-def _can_expand_from_decision(decision: dict[str, Any] | None, edge_id: str) -> bool:
|
|
|
+def _edge_permission_for(
|
|
|
+ decision: dict[str, Any] | None, edge_id: str, policy: dict[str, Any]
|
|
|
+) -> str:
|
|
|
+ """判定→边通行证:无判定 / 查询 rule_blocked 一律 deny,其余查 edge_permissions。"""
|
|
|
if not decision or decision.get("search_query_effect_status") == "rule_blocked":
|
|
|
- return False
|
|
|
- action = decision.get("decision_action")
|
|
|
- if edge_id == "author_to_works":
|
|
|
- return action in {"ADD_TO_CONTENT_POOL", "KEEP_CONTENT_FOR_REVIEW"}
|
|
|
- if edge_id == "hashtag_to_query":
|
|
|
- return action == "ADD_TO_CONTENT_POOL"
|
|
|
- return False
|
|
|
+ return "deny"
|
|
|
+ return edge_permission(policy, decision.get("decision_action"), edge_id)
|
|
|
|
|
|
|
|
|
def _binding_by_edge_id(walk_strategy: dict[str, Any]) -> dict[str, dict[str, Any]]:
|
|
|
@@ -749,6 +922,7 @@ def _empty_batch() -> dict[str, list[dict[str, Any]]]:
|
|
|
"content_media_records": [],
|
|
|
"evidence_bundles": [],
|
|
|
"rule_decisions": [],
|
|
|
+ "search_queries": [],
|
|
|
"query_failures": [],
|
|
|
"walk_actions": [],
|
|
|
}
|