lisihan
/
content-find-agent-new


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
							from content_agent.run_service import RunService
from content_agent.schemas import RunStartRequest
from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE


def test_source_evidence_inherits_evidence_pack_without_rewriting_origin(tmp_path):
    service = RunService(
        runtime_root=tmp_path / "runtime" / "v1",
        query_variant_client=FakeQueryVariantClient(),
    )
    state = service.start_run(
        RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
    )
    run_id = state["run_id"]

    source_context = service.read_json(run_id, "source_context.json")
    evidence_pack = source_context["ext_data"]["evidence_pack"]
    decisions = service.read_jsonl(run_id, "rule_decisions.jsonl")
    final_output = service.read_json(run_id, "final_output.json")

    source_evidence = decisions[0]["source_evidence"]
    assert source_evidence["policy_run_id"] == state["policy_run_id"]
    for field in [
        "source_kind",
        "pattern_source_system",
        "case_id_type",
        "source_post_id",
        "pattern_execution_id",
        "mining_config_id",
        "itemset_ids",
        "itemset_items",
        "category_bindings",
        "element_bindings",
        "support",
        "absolute_support",
        "matched_post_ids",
        "video_ids",
        "case_ids",
        "decode_case_ids",
        "seed_terms",
        "run_id",
        "source_certainty",
        "validation_status",
    ]:
        assert source_evidence[field] == evidence_pack[field]

    assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"]
    assert (
        source_evidence["discovered_platform_content_id"]
        not in source_evidence["matched_post_ids"]
    )
    assert source_evidence["discovery_relation"] == "mock_pattern_matched"
    # M3: mock content scores 60 (relevance 60 + zero heat) → all three land in
    # review, so the inherited source_evidence now surfaces on review_records rather
    # than content_assets; the inheritance (carrying source_path_record_ids) is the
    # property under test, not the pool/review band.
    assert final_output["review_records"][0]["source_evidence"]["source_path_record_ids"]
    assert {
        record["decision_id"] for record in final_output["decision_records"]
    } == {"d_001", "d_002", "d_003"}


def test_source_evidence_tracks_multiple_query_sources_without_polluting_origin(tmp_path):
    service = RunService(
        runtime_root=tmp_path / "runtime" / "v1",
        query_variant_client=FakeQueryVariantClient(),
    )
    state = service.start_run(
        RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
    )
    run_id = state["run_id"]

    items = service.read_jsonl(run_id, "discovered_content_items.jsonl")
    multi_source_item = next(
        item for item in items if len(item.get("query_sources", [])) > 1
    )
    decisions = service.read_jsonl(run_id, "rule_decisions.jsonl")
    decision = next(
        row
        for row in decisions
        if row["decision_target_id"] == multi_source_item["platform_content_id"]
    )
    source_evidence = decision["source_evidence"]

    assert multi_source_item["matched_search_query_ids"] == ["q_002", "q_003", "q_004"]
    assert source_evidence["matched_search_query_ids"] == ["q_002", "q_003", "q_004"]
    assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"]
    assert (
        source_evidence["discovered_platform_content_id"]
        not in source_evidence["matched_post_ids"]
    )

    paths = service.read_jsonl(run_id, "source_path_records.jsonl")
    query_content_paths = [
        path
        for path in paths
        if path["source_path_type"] == "search_query_to_content"
        and path["to_node_id"] == multi_source_item["platform_content_id"]
    ]
    assert {path["from_node_id"] for path in query_content_paths} == {
        "q_002",
        "q_003",
        "q_004",
    }