test_rule_judgment_hard_gates.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. from __future__ import annotations
  2. from copy import deepcopy
  3. import pytest
  4. from content_agent.business_modules.rule_judgment.evaluator import decide
  5. from content_agent.run_service import RunService
  6. from content_agent.schemas import RunStartRequest
  7. from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
  8. def test_hard_gate_outputs_rule_blocked_and_primary_reason(tmp_path):
  9. state = _state(tmp_path)
  10. bundle = deepcopy(state["evidence_bundles"][0])
  11. bundle["content"]["platform_content_id"] = ""
  12. bundle["source_evidence"] = {}
  13. decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
  14. assert decision["decision_action"] == "REJECT_CONTENT"
  15. assert decision["decision_reason_code"] == "missing_platform_content_id"
  16. assert decision["search_query_effect_status"] == "rule_blocked"
  17. assert set(decision["triggered_blocking_rules"]) >= {
  18. "missing_platform_content_id",
  19. "missing_source_evidence",
  20. }
  21. replay = decision["decision_replay_data"]
  22. assert replay["primary_gate_id"] == "missing_platform_content_id"
  23. assert replay["primary_reason_code"] == "missing_platform_content_id"
  24. assert replay["effect_mapping_id"] == "map_reject_rule_blocked"
  25. def test_unknown_hard_gate_operator_fails_fast(tmp_path):
  26. state = _state(tmp_path)
  27. policy_bundle = deepcopy(state["policy_bundle"])
  28. policy_bundle["rule_pack"]["hard_gates"][0]["when"]["op"] = "starts_with"
  29. with pytest.raises(ValueError, match="unsupported rule operator"):
  30. decide(
  31. state["run_id"],
  32. state["policy_run_id"],
  33. 1,
  34. state["evidence_bundles"][0],
  35. policy_bundle,
  36. )
  37. def test_not_fit_senior_is_a_blocking_hard_gate(tmp_path):
  38. # M3: pattern_recall gate retired. The senior-fit judgment is now the blocking gate.
  39. state = _state(tmp_path)
  40. bundle = deepcopy(state["evidence_bundles"][0])
  41. bundle["pattern_match_result"]["fit_senior_50plus"] = False
  42. decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
  43. assert decision["decision_action"] == "REJECT_CONTENT"
  44. assert decision["decision_reason_code"] == "content_not_fit_senior"
  45. assert decision["search_query_effect_status"] == "rule_blocked"
  46. assert decision["triggered_blocking_rules"] == ["not_fit_senior"]
  47. # Retired pattern-recall reason code must no longer surface.
  48. assert decision["decision_reason_code"] != "content_pattern_recall_required"
  49. def test_hard_gate_action_is_taken_from_rule_config(tmp_path):
  50. # The judge_failed gate defaults to KEEP_CONTENT_FOR_REVIEW; flipping its config to
  51. # REJECT_CONTENT must change the decision purely via config, with no code special-case.
  52. state = _state(tmp_path)
  53. bundle = deepcopy(state["evidence_bundles"][0])
  54. bundle["pattern_match_result"]["judge_status"] = "failed"
  55. decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
  56. assert decision["decision_action"] == "KEEP_CONTENT_FOR_REVIEW"
  57. flipped = deepcopy(state["policy_bundle"])
  58. for gate in flipped["rule_pack"]["hard_gates"]:
  59. if gate["gate_id"] == "judge_failed":
  60. gate["decision_action"] = "REJECT_CONTENT"
  61. gate["severity"] = "fatal"
  62. decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, flipped)
  63. assert decision["decision_action"] == "REJECT_CONTENT"
  64. assert decision["search_query_effect_status"] == "rule_blocked"
  65. def test_judge_failed_review_is_config_driven_not_code_special_case(tmp_path):
  66. # M3: the review/pending hard gate is now judge_failed (Gemini technical failure parks
  67. # content for human review). This is config-driven, not a code special-case.
  68. state = _state(tmp_path)
  69. bundle = deepcopy(state["evidence_bundles"][0])
  70. bundle["pattern_match_result"]["judge_status"] = "failed"
  71. decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
  72. assert decision["decision_action"] == "KEEP_CONTENT_FOR_REVIEW"
  73. assert decision["decision_reason_code"] == "content_judge_failed"
  74. assert decision["search_query_effect_status"] == "pending"
  75. assert decision["triggered_blocking_rules"] == ["judge_failed"]
  76. assert decision["score"] is None
  77. assert decision["decision_replay_data"]["effect_mapping_id"] == "map_keep_for_review_pending_hard_gate"
  78. def test_low_confidence_below_threshold_rejects(tmp_path):
  79. # M3: age_50_plus_weak gate retired. Low Gemini confidence is now the blocking gate
  80. # (fit_confidence lt 0.6 -> REJECT_CONTENT / content_low_confidence / rule_blocked).
  81. state = _state(tmp_path)
  82. bundle = deepcopy(state["evidence_bundles"][0])
  83. bundle["pattern_match_result"]["fit_confidence"] = 0.4
  84. decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
  85. assert decision["decision_action"] == "REJECT_CONTENT"
  86. assert decision["decision_reason_code"] == "content_low_confidence"
  87. assert decision["search_query_effect_status"] == "rule_blocked"
  88. assert decision["triggered_blocking_rules"] == ["low_confidence"]
  89. # Retired age gate / reason code must no longer surface.
  90. assert decision["decision_reason_code"] != "age_50_plus_weak"
  91. def test_low_confidence_lt_boundary_passes_at_threshold(tmp_path):
  92. # lt 0.6 boundary: exactly 0.6 is NOT low confidence, so the gate does not fire and
  93. # the content falls through to scoring instead of being rule-blocked.
  94. state = _state(tmp_path)
  95. bundle = deepcopy(state["evidence_bundles"][0])
  96. bundle["pattern_match_result"]["fit_confidence"] = 0.6
  97. decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
  98. assert "low_confidence" not in decision["triggered_blocking_rules"]
  99. assert decision["decision_reason_code"] != "content_low_confidence"
  100. assert decision["search_query_effect_status"] != "rule_blocked"
  101. def _state(tmp_path):
  102. service = RunService(
  103. runtime_root=tmp_path / "runtime" / "v1",
  104. query_variant_client=FakeQueryVariantClient(),
  105. )
  106. return service.start_run(
  107. RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
  108. )