| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104 |
- from content_agent.run_service import RunService
- from content_agent.schemas import RunStartRequest
- from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
- def test_source_evidence_inherits_evidence_pack_without_rewriting_origin(tmp_path):
- service = RunService(
- runtime_root=tmp_path / "runtime" / "v1",
- query_variant_client=FakeQueryVariantClient(),
- )
- state = service.start_run(
- RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
- )
- run_id = state["run_id"]
- source_context = service.read_json(run_id, "source_context.json")
- evidence_pack = source_context["ext_data"]["evidence_pack"]
- decisions = service.read_jsonl(run_id, "rule_decisions.jsonl")
- final_output = service.read_json(run_id, "final_output.json")
- source_evidence = decisions[0]["source_evidence"]
- assert source_evidence["policy_run_id"] == state["policy_run_id"]
- for field in [
- "source_kind",
- "pattern_source_system",
- "case_id_type",
- "source_post_id",
- "pattern_execution_id",
- "mining_config_id",
- "itemset_ids",
- "itemset_items",
- "category_bindings",
- "element_bindings",
- "support",
- "absolute_support",
- "matched_post_ids",
- "video_ids",
- "case_ids",
- "decode_case_ids",
- "seed_terms",
- "run_id",
- "source_certainty",
- "validation_status",
- ]:
- assert source_evidence[field] == evidence_pack[field]
- assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"]
- assert (
- source_evidence["discovered_platform_content_id"]
- not in source_evidence["matched_post_ids"]
- )
- assert source_evidence["discovery_relation"] == "mock_pattern_matched"
- # M3: mock content scores 60 (relevance 60 + zero heat) → all three land in
- # review, so the inherited source_evidence now surfaces on review_records rather
- # than content_assets; the inheritance (carrying source_path_record_ids) is the
- # property under test, not the pool/review band.
- assert final_output["review_records"][0]["source_evidence"]["source_path_record_ids"]
- assert {
- record["decision_id"] for record in final_output["decision_records"]
- } == {"d_001", "d_002", "d_003"}
- def test_source_evidence_tracks_multiple_query_sources_without_polluting_origin(tmp_path):
- service = RunService(
- runtime_root=tmp_path / "runtime" / "v1",
- query_variant_client=FakeQueryVariantClient(),
- )
- state = service.start_run(
- RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
- )
- run_id = state["run_id"]
- items = service.read_jsonl(run_id, "discovered_content_items.jsonl")
- multi_source_item = next(
- item for item in items if len(item.get("query_sources", [])) > 1
- )
- decisions = service.read_jsonl(run_id, "rule_decisions.jsonl")
- decision = next(
- row
- for row in decisions
- if row["decision_target_id"] == multi_source_item["platform_content_id"]
- )
- source_evidence = decision["source_evidence"]
- assert multi_source_item["matched_search_query_ids"] == ["q_002", "q_003", "q_004"]
- assert source_evidence["matched_search_query_ids"] == ["q_002", "q_003", "q_004"]
- assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"]
- assert (
- source_evidence["discovered_platform_content_id"]
- not in source_evidence["matched_post_ids"]
- )
- paths = service.read_jsonl(run_id, "source_path_records.jsonl")
- query_content_paths = [
- path
- for path in paths
- if path["source_path_type"] == "search_query_to_content"
- and path["to_node_id"] == multi_source_item["platform_content_id"]
- ]
- assert {path["from_node_id"] for path in query_content_paths} == {
- "q_002",
- "q_003",
- "q_004",
- }
|