from content_agent.run_service import RunService from content_agent.schemas import RunStartRequest from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE def test_source_evidence_inherits_evidence_pack_without_rewriting_origin(tmp_path): service = RunService( runtime_root=tmp_path / "runtime" / "v1", query_variant_client=FakeQueryVariantClient(), ) state = service.start_run( RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE)) ) run_id = state["run_id"] source_context = service.read_json(run_id, "source_context.json") evidence_pack = source_context["ext_data"]["evidence_pack"] decisions = service.read_jsonl(run_id, "rule_decisions.jsonl") final_output = service.read_json(run_id, "final_output.json") source_evidence = decisions[0]["source_evidence"] assert source_evidence["policy_run_id"] == state["policy_run_id"] for field in [ "source_kind", "pattern_source_system", "case_id_type", "source_post_id", "pattern_execution_id", "mining_config_id", "itemset_ids", "itemset_items", "category_bindings", "element_bindings", "support", "absolute_support", "matched_post_ids", "video_ids", "case_ids", "decode_case_ids", "seed_terms", "run_id", "source_certainty", "validation_status", ]: assert source_evidence[field] == evidence_pack[field] assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"] assert ( source_evidence["discovered_platform_content_id"] not in source_evidence["matched_post_ids"] ) assert source_evidence["discovery_relation"] == "mock_pattern_matched" # M3: mock content scores 60 (relevance 60 + zero heat) → all three land in # review, so the inherited source_evidence now surfaces on review_records rather # than content_assets; the inheritance (carrying source_path_record_ids) is the # property under test, not the pool/review band. assert final_output["review_records"][0]["source_evidence"]["source_path_record_ids"] assert { record["decision_id"] for record in final_output["decision_records"] } == {"d_001", "d_002", "d_003"} def test_source_evidence_tracks_multiple_query_sources_without_polluting_origin(tmp_path): service = RunService( runtime_root=tmp_path / "runtime" / "v1", query_variant_client=FakeQueryVariantClient(), ) state = service.start_run( RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE)) ) run_id = state["run_id"] items = service.read_jsonl(run_id, "discovered_content_items.jsonl") multi_source_item = next( item for item in items if len(item.get("query_sources", [])) > 1 ) decisions = service.read_jsonl(run_id, "rule_decisions.jsonl") decision = next( row for row in decisions if row["decision_target_id"] == multi_source_item["platform_content_id"] ) source_evidence = decision["source_evidence"] assert multi_source_item["matched_search_query_ids"] == ["q_002", "q_003", "q_004"] assert source_evidence["matched_search_query_ids"] == ["q_002", "q_003", "q_004"] assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"] assert ( source_evidence["discovered_platform_content_id"] not in source_evidence["matched_post_ids"] ) paths = service.read_jsonl(run_id, "source_path_records.jsonl") query_content_paths = [ path for path in paths if path["source_path_type"] == "search_query_to_content" and path["to_node_id"] == multi_source_item["platform_content_id"] ] assert {path["from_node_id"] for path in query_content_paths} == { "q_002", "q_003", "q_004", }