lisihan
/
content-find-agent-new


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217
							from __future__ import annotations

from copy import deepcopy

import pytest

from content_agent.business_modules.rule_judgment.evaluator import decide
from content_agent.run_service import RunService
from content_agent.schemas import RunStartRequest
from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE


def test_scorecard_uses_active_dimensions_and_thresholds(tmp_path):
    state = _state(tmp_path)
    bundle = deepcopy(state["evidence_bundles"][0])
    # M3 2-dim scorecard: relevance gte0.8 -> 60, platform_heat gte0.4 -> 20 => 80 (pool).
    bundle["pattern_match_result"]["relevance_score"] = 0.8
    bundle["content_engagement_metrics"]["platform_heat"] = 0.4

    decision = decide(
        state["run_id"],
        state["policy_run_id"],
        1,
        bundle,
        state["policy_bundle"],
    )

    assert decision["decision_action"] == "ADD_TO_CONTENT_POOL"
    assert decision["score"] == 80
    dimensions = {row["key"]: row for row in decision["scorecard"]["dimensions"]}
    assert dimensions["relevance"]["score"] == 60
    assert dimensions["platform_heat"]["score"] == 20
    # 2026-06-12 清理: 5 个 deprecated 维度已从规则包物理删除,scorecard 只剩 2 个 active 维度。
    assert set(dimensions) == {"relevance", "platform_heat"}


def test_rule_pack_scorecard_has_only_two_active_dimensions():
    # 配置层钉死: 规则包里只剩 relevance + platform_heat,5 个废弃维度定义已删干净。
    import json
    from pathlib import Path

    rule_pack = json.loads(
        Path("product_documents/规则包/douyin_rule_packs.v1.json").read_text(encoding="utf-8")
    )
    for pack in rule_pack["rule_packs"]:
        keys = [dim["key"] for dim in pack["scorecard"]["dimensions"]]
        assert keys == ["relevance", "platform_heat"], keys
        assert all(dim["runtime_status"] == "active" for dim in pack["scorecard"]["dimensions"])


def test_missing_scoring_rules_fail_fast(tmp_path):
    state = _state(tmp_path)
    policy_bundle = deepcopy(state["policy_bundle"])
    policy_bundle["rule_pack"]["scorecard"]["scoring_rules"] = []

    with pytest.raises(ValueError, match="active scorecard dimensions require"):
        decide(
            state["run_id"],
            state["policy_run_id"],
            1,
            state["evidence_bundles"][0],
            policy_bundle,
        )


def test_no_scoring_evidence_uses_missing_score_policy(tmp_path):
    state = _state(tmp_path)
    bundle = deepcopy(state["evidence_bundles"][0])
    # Drop evidence for both active dims (relevance + platform_heat) so no scoring rule matches.
    bundle["pattern_match_result"].pop("relevance_score", None)
    bundle["content_engagement_metrics"].pop("platform_heat", None)

    decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])

    assert decision["decision_action"] == "REJECT_CONTENT"
    assert decision["decision_reason_code"] == "missing_score"
    assert decision["search_query_effect_status"] == "failed"
    assert decision["score"] is None


@pytest.mark.parametrize(
    ("total_score", "expected_action", "expected_status"),
    [
        (59, "REJECT_CONTENT", "failed"),
        (60, "KEEP_CONTENT_FOR_REVIEW", "pending"),
        (69, "KEEP_CONTENT_FOR_REVIEW", "pending"),
        (70, "ADD_TO_CONTENT_POOL", "success"),
    ],
)
def test_score_threshold_boundaries(tmp_path, total_score, expected_action, expected_status):
    state = _state(tmp_path)
    policy_bundle = _policy_with_total_score(state["policy_bundle"], total_score)

    decision = decide(
        state["run_id"],
        state["policy_run_id"],
        1,
        state["evidence_bundles"][0],
        policy_bundle,
    )

    assert decision["score"] == total_score
    assert decision["decision_action"] == expected_action
    assert decision["search_query_effect_status"] == expected_status


def test_scoring_rule_unknown_operator_fails_fast(tmp_path):
    state = _state(tmp_path)
    policy_bundle = deepcopy(state["policy_bundle"])
    scoring_rules = policy_bundle["rule_pack"]["scorecard"]["scoring_rules"]
    for rule in scoring_rules:
        if rule["scoring_rule_id"] == "score_relevance_high":
            rule["operator"] = "contains"

    with pytest.raises(ValueError, match="unsupported rule operator"):
        decide(
            state["run_id"],
            state["policy_run_id"],
            1,
            state["evidence_bundles"][0],
            policy_bundle,
        )


def test_single_missing_dimension_scores_zero_and_keeps_threshold_flow(tmp_path):
    state = _state(tmp_path)
    bundle = deepcopy(state["evidence_bundles"][0])
    # relevance evidence present (0.8 -> 60); platform_heat evidence absent -> scores 0, not missing_score.
    bundle["pattern_match_result"]["relevance_score"] = 0.8
    bundle["content_engagement_metrics"].pop("platform_heat", None)

    decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])

    dimensions = {row["key"]: row for row in decision["scorecard"]["dimensions"]}
    assert dimensions["platform_heat"]["score_missing"] is True
    assert dimensions["platform_heat"]["score"] == 0
    assert dimensions["relevance"]["score_missing"] is False
    assert dimensions["relevance"]["score"] == 60
    assert decision["score"] == 60
    assert decision["decision_reason_code"] != "missing_score"
    assert decision["scorecard"]["score_missing"] is False


def test_all_dimensions_missing_uses_score_missing_policy(tmp_path):
    state = _state(tmp_path)
    bundle = deepcopy(state["evidence_bundles"][0])
    # Both active dims (relevance + platform_heat) lack evidence -> score_missing policy.
    bundle["pattern_match_result"].pop("relevance_score", None)
    bundle["content_engagement_metrics"].pop("platform_heat", None)

    decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])

    assert decision["decision_action"] == "REJECT_CONTENT"
    assert decision["decision_reason_code"] == "missing_score"
    assert decision["score"] is None
    assert decision["scorecard"]["score_missing"] is True
    assert all(row["score_missing"] for row in decision["scorecard"]["dimensions"])


def test_dimension_missing_metadata_is_recorded(tmp_path):
    state = _state(tmp_path)
    bundle = deepcopy(state["evidence_bundles"][0])
    bundle["content_engagement_metrics"].pop("platform_heat", None)

    decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
    assert decision["decision_replay_data"]["missing_dimensions"] == ["platform_heat"]

    full = deepcopy(state["evidence_bundles"][0])
    full["content_engagement_metrics"]["platform_heat"] = 0.8
    full_decision = decide(state["run_id"], state["policy_run_id"], 2, full, state["policy_bundle"])
    assert full_decision["decision_replay_data"]["missing_dimensions"] == []


def _state(tmp_path):
    service = RunService(
        runtime_root=tmp_path / "runtime" / "v1",
        query_variant_client=FakeQueryVariantClient(),
    )
    return service.start_run(
        RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
    )


def _policy_with_total_score(policy_bundle, total_score):
    """Build an exact total score from the two M3 active dims (relevance max60, platform_heat max40).

    Replaces every scoring rule with one always-matching rule per active dimension whose
    score_value sums to ``total_score`` (relevance carries up to 60, heat the remainder).
    """
    policy_bundle = deepcopy(policy_bundle)
    scorecard = policy_bundle["rule_pack"]["scorecard"]
    relevance_score = min(total_score, 60)
    heat_score = total_score - relevance_score
    assert heat_score <= 40, "total_score exceeds combined active-dimension caps"
    scorecard["scoring_rules"] = [
        {
            "scoring_rule_id": "test_relevance_score",
            "dimension_key": "relevance",
            "field_path": "content.decision_target_type",
            "operator": "eq",
            "expected_value": "content",
            "score_value": relevance_score,
            "priority": 1,
            "enabled": True,
        },
        {
            "scoring_rule_id": "test_heat_score",
            "dimension_key": "platform_heat",
            "field_path": "content.decision_target_type",
            "operator": "eq",
            "expected_value": "content",
            "score_value": heat_score,
            "priority": 1,
            "enabled": True,
        },
    ]
    return policy_bundle