test_config_case_matrix.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. """config x case matrix (V2-M0E).
  2. Replays the same captured case under different configurations to prove the
  3. "foolproof config" safety net: changing config changes the case outcome,
  4. visibly (snapshot diff), without breaking the pipeline. Variants that depend on
  5. later modules (M3 per-entity dispatch) are xfail until then.
  6. """
  7. from __future__ import annotations
  8. import json
  9. import shutil
  10. from pathlib import Path
  11. import pytest
  12. from content_agent.integrations.policy_json import JsonPolicyBundleStore
  13. from tests.replay_harness import replay_case
  14. from tests.snapshot import assert_matches
  15. ROOT = Path(__file__).resolve().parents[1]
  16. _RULE_PACK_REL = "product_documents/规则包/douyin_rule_packs.v1.json"
  17. _WALK_REL = "product_documents/抖音游走策略/douyin_walk_strategy.v1.json"
  18. def _senior_block_store(root: Path) -> JsonPolicyBundleStore:
  19. """M3 config variant: flip the not_fit_senior gate to fire on fit_senior_50plus == true.
  20. The captured case's mock Gemini judgment marks every item fit (fit_senior_50plus=true),
  21. so inverting the gate's expected value blocks the whole batch by config alone — a clean
  22. counterproof that the hard gate (and the downstream walk) is config-driven, not hardcoded.
  23. """
  24. (root / _RULE_PACK_REL).parent.mkdir(parents=True, exist_ok=True)
  25. (root / _WALK_REL).parent.mkdir(parents=True, exist_ok=True)
  26. shutil.copy(ROOT / _WALK_REL, root / _WALK_REL)
  27. package = json.loads((ROOT / _RULE_PACK_REL).read_text(encoding="utf-8"))
  28. for pack in package.get("rule_packs", []):
  29. for gate in pack.get("hard_gates", []):
  30. if gate.get("gate_id") == "not_fit_senior":
  31. gate["when"]["value"] = True
  32. (root / _RULE_PACK_REL).write_text(json.dumps(package, ensure_ascii=False, indent=2), encoding="utf-8")
  33. return JsonPolicyBundleStore(root)
  34. def _outcome(artifacts) -> dict:
  35. return {
  36. "reasons": sorted(d.get("decision_reason_code") for d in artifacts.decisions),
  37. "effect_status_counts": artifacts.summary.get("effect_status_counts"),
  38. "pooled": artifacts.summary.get("pooled_content_count"),
  39. "rejected": artifacts.summary.get("rejected_content_count"),
  40. }
  41. def _variant_overrides(variant: str, cfg_dir: Path):
  42. if variant == "default":
  43. return None
  44. if variant == "senior_block":
  45. return {"policy_store": _senior_block_store(cfg_dir)}
  46. raise ValueError(variant)
  47. @pytest.mark.parametrize("variant", ["default", "senior_block"])
  48. def test_matrix_real_id45(variant, tmp_path):
  49. overrides = _variant_overrides(variant, tmp_path / "cfg")
  50. artifacts = replay_case("real_id45", runtime_root=tmp_path / "rt", config_overrides=overrides)
  51. assert artifacts.state["status"] == "success" # config change must not break the chain
  52. assert_matches(f"matrix/real_id45__{variant}", _outcome(artifacts))
  53. def test_senior_block_changes_outcome(tmp_path):
  54. base = _outcome(replay_case("real_id45", runtime_root=tmp_path / "rt0"))
  55. blocked = _outcome(
  56. replay_case(
  57. "real_id45",
  58. runtime_root=tmp_path / "rt1",
  59. config_overrides={"policy_store": _senior_block_store(tmp_path / "cfg")},
  60. )
  61. )
  62. # Decoupling proof: one config edit on the not_fit_senior gate visibly moves the outcome.
  63. assert base != blocked
  64. # Default: no item is blocked by the senior-fit gate; items flow into pool / review.
  65. assert "content_not_fit_senior" not in base["reasons"]
  66. assert base["effect_status_counts"]["rule_blocked"] == 0
  67. # R3 第二步: 四字段热度复合后 real_id45 默认 3 进池(原 2)。
  68. assert base["pooled"] == 3
  69. # Blocked variant: every item trips the (config-inverted) hard gate -> rule_blocked reject.
  70. assert blocked["reasons"] == ["content_not_fit_senior"] * 4
  71. assert blocked["effect_status_counts"]["rule_blocked"] == 4
  72. assert blocked["pooled"] == 0
  73. assert blocked["rejected"] == 4
  74. def test_matrix_query_profile_variant():
  75. from scripts.validate_query_prompts_config import validate_query_prompts_config
  76. config = json.loads((ROOT / "product_documents/配置/query_prompts.v1.json").read_text(encoding="utf-8"))
  77. assert validate_query_prompts_config(config) == []
  78. def test_decoupling_counterproof():
  79. # M3A removed the Content hardcode: dispatch is parametrized by target_entity,
  80. # so a non-Content (e.g. Author) pack can be routed without falling back.
  81. source = (ROOT / "content_agent/integrations/policy_json.py").read_text(encoding="utf-8")
  82. assert 'target_entity") == "Content"' not in source
  83. def test_senior_block_blocks_all_walk_expansion(tmp_path):
  84. # M4 受控变化: 全拦截(rule_blocked)时翻页/作者/tag 全停;砍包后 path_stop 戳=内容包。
  85. artifacts = replay_case(
  86. "real_id45",
  87. runtime_root=tmp_path / "rt",
  88. config_overrides={"policy_store": _senior_block_store(tmp_path / "cfg")},
  89. )
  90. walk_actions = artifacts.files["walk_actions.jsonl"]
  91. assert not [row for row in walk_actions if row["edge_id"] == "query_next_page"]
  92. expansions = [
  93. row for row in walk_actions if row["edge_id"] in {"author_to_works", "hashtag_to_query"}
  94. ]
  95. assert expansions
  96. assert all(row["walk_status"] == "skipped" for row in expansions)
  97. assert all(row["reason_code"] == "blocked_by_rule_decision" for row in expansions)
  98. path_stops = [row for row in walk_actions if row["edge_id"] == "path_stop"]
  99. assert len(path_stops) == 4
  100. for row in path_stops:
  101. assert row["rule_pack_id"] == "douyin_content_discovery_rule_pack_v1"
  102. assert row["raw_payload"]["rule_pack_execution"]["executed_rule_pack_id"] == (
  103. "douyin_content_discovery_rule_pack_v1"
  104. )