| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217 |
- from __future__ import annotations
- from copy import deepcopy
- import pytest
- from content_agent.business_modules.rule_judgment.evaluator import decide
- from content_agent.run_service import RunService
- from content_agent.schemas import RunStartRequest
- from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
- def test_scorecard_uses_active_dimensions_and_thresholds(tmp_path):
- state = _state(tmp_path)
- bundle = deepcopy(state["evidence_bundles"][0])
- # M3 2-dim scorecard: relevance gte0.8 -> 60, platform_heat gte0.4 -> 20 => 80 (pool).
- bundle["pattern_match_result"]["relevance_score"] = 0.8
- bundle["content_engagement_metrics"]["platform_heat"] = 0.4
- decision = decide(
- state["run_id"],
- state["policy_run_id"],
- 1,
- bundle,
- state["policy_bundle"],
- )
- assert decision["decision_action"] == "ADD_TO_CONTENT_POOL"
- assert decision["score"] == 80
- dimensions = {row["key"]: row for row in decision["scorecard"]["dimensions"]}
- assert dimensions["relevance"]["score"] == 60
- assert dimensions["platform_heat"]["score"] == 20
- # 2026-06-12 清理: 5 个 deprecated 维度已从规则包物理删除,scorecard 只剩 2 个 active 维度。
- assert set(dimensions) == {"relevance", "platform_heat"}
- def test_rule_pack_scorecard_has_only_two_active_dimensions():
- # 配置层钉死: 规则包里只剩 relevance + platform_heat,5 个废弃维度定义已删干净。
- import json
- from pathlib import Path
- rule_pack = json.loads(
- Path("product_documents/规则包/douyin_rule_packs.v1.json").read_text(encoding="utf-8")
- )
- for pack in rule_pack["rule_packs"]:
- keys = [dim["key"] for dim in pack["scorecard"]["dimensions"]]
- assert keys == ["relevance", "platform_heat"], keys
- assert all(dim["runtime_status"] == "active" for dim in pack["scorecard"]["dimensions"])
- def test_missing_scoring_rules_fail_fast(tmp_path):
- state = _state(tmp_path)
- policy_bundle = deepcopy(state["policy_bundle"])
- policy_bundle["rule_pack"]["scorecard"]["scoring_rules"] = []
- with pytest.raises(ValueError, match="active scorecard dimensions require"):
- decide(
- state["run_id"],
- state["policy_run_id"],
- 1,
- state["evidence_bundles"][0],
- policy_bundle,
- )
- def test_no_scoring_evidence_uses_missing_score_policy(tmp_path):
- state = _state(tmp_path)
- bundle = deepcopy(state["evidence_bundles"][0])
- # Drop evidence for both active dims (relevance + platform_heat) so no scoring rule matches.
- bundle["pattern_match_result"].pop("relevance_score", None)
- bundle["content_engagement_metrics"].pop("platform_heat", None)
- decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
- assert decision["decision_action"] == "REJECT_CONTENT"
- assert decision["decision_reason_code"] == "missing_score"
- assert decision["search_query_effect_status"] == "failed"
- assert decision["score"] is None
- @pytest.mark.parametrize(
- ("total_score", "expected_action", "expected_status"),
- [
- (59, "REJECT_CONTENT", "failed"),
- (60, "KEEP_CONTENT_FOR_REVIEW", "pending"),
- (69, "KEEP_CONTENT_FOR_REVIEW", "pending"),
- (70, "ADD_TO_CONTENT_POOL", "success"),
- ],
- )
- def test_score_threshold_boundaries(tmp_path, total_score, expected_action, expected_status):
- state = _state(tmp_path)
- policy_bundle = _policy_with_total_score(state["policy_bundle"], total_score)
- decision = decide(
- state["run_id"],
- state["policy_run_id"],
- 1,
- state["evidence_bundles"][0],
- policy_bundle,
- )
- assert decision["score"] == total_score
- assert decision["decision_action"] == expected_action
- assert decision["search_query_effect_status"] == expected_status
- def test_scoring_rule_unknown_operator_fails_fast(tmp_path):
- state = _state(tmp_path)
- policy_bundle = deepcopy(state["policy_bundle"])
- scoring_rules = policy_bundle["rule_pack"]["scorecard"]["scoring_rules"]
- for rule in scoring_rules:
- if rule["scoring_rule_id"] == "score_relevance_high":
- rule["operator"] = "contains"
- with pytest.raises(ValueError, match="unsupported rule operator"):
- decide(
- state["run_id"],
- state["policy_run_id"],
- 1,
- state["evidence_bundles"][0],
- policy_bundle,
- )
- def test_single_missing_dimension_scores_zero_and_keeps_threshold_flow(tmp_path):
- state = _state(tmp_path)
- bundle = deepcopy(state["evidence_bundles"][0])
- # relevance evidence present (0.8 -> 60); platform_heat evidence absent -> scores 0, not missing_score.
- bundle["pattern_match_result"]["relevance_score"] = 0.8
- bundle["content_engagement_metrics"].pop("platform_heat", None)
- decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
- dimensions = {row["key"]: row for row in decision["scorecard"]["dimensions"]}
- assert dimensions["platform_heat"]["score_missing"] is True
- assert dimensions["platform_heat"]["score"] == 0
- assert dimensions["relevance"]["score_missing"] is False
- assert dimensions["relevance"]["score"] == 60
- assert decision["score"] == 60
- assert decision["decision_reason_code"] != "missing_score"
- assert decision["scorecard"]["score_missing"] is False
- def test_all_dimensions_missing_uses_score_missing_policy(tmp_path):
- state = _state(tmp_path)
- bundle = deepcopy(state["evidence_bundles"][0])
- # Both active dims (relevance + platform_heat) lack evidence -> score_missing policy.
- bundle["pattern_match_result"].pop("relevance_score", None)
- bundle["content_engagement_metrics"].pop("platform_heat", None)
- decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
- assert decision["decision_action"] == "REJECT_CONTENT"
- assert decision["decision_reason_code"] == "missing_score"
- assert decision["score"] is None
- assert decision["scorecard"]["score_missing"] is True
- assert all(row["score_missing"] for row in decision["scorecard"]["dimensions"])
- def test_dimension_missing_metadata_is_recorded(tmp_path):
- state = _state(tmp_path)
- bundle = deepcopy(state["evidence_bundles"][0])
- bundle["content_engagement_metrics"].pop("platform_heat", None)
- decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
- assert decision["decision_replay_data"]["missing_dimensions"] == ["platform_heat"]
- full = deepcopy(state["evidence_bundles"][0])
- full["content_engagement_metrics"]["platform_heat"] = 0.8
- full_decision = decide(state["run_id"], state["policy_run_id"], 2, full, state["policy_bundle"])
- assert full_decision["decision_replay_data"]["missing_dimensions"] == []
- def _state(tmp_path):
- service = RunService(
- runtime_root=tmp_path / "runtime" / "v1",
- query_variant_client=FakeQueryVariantClient(),
- )
- return service.start_run(
- RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
- )
- def _policy_with_total_score(policy_bundle, total_score):
- """Build an exact total score from the two M3 active dims (relevance max60, platform_heat max40).
- Replaces every scoring rule with one always-matching rule per active dimension whose
- score_value sums to ``total_score`` (relevance carries up to 60, heat the remainder).
- """
- policy_bundle = deepcopy(policy_bundle)
- scorecard = policy_bundle["rule_pack"]["scorecard"]
- relevance_score = min(total_score, 60)
- heat_score = total_score - relevance_score
- assert heat_score <= 40, "total_score exceeds combined active-dimension caps"
- scorecard["scoring_rules"] = [
- {
- "scoring_rule_id": "test_relevance_score",
- "dimension_key": "relevance",
- "field_path": "content.decision_target_type",
- "operator": "eq",
- "expected_value": "content",
- "score_value": relevance_score,
- "priority": 1,
- "enabled": True,
- },
- {
- "scoring_rule_id": "test_heat_score",
- "dimension_key": "platform_heat",
- "field_path": "content.decision_target_type",
- "operator": "eq",
- "expected_value": "content",
- "score_value": heat_score,
- "priority": 1,
- "enabled": True,
- },
- ]
- return policy_bundle
|