test_source_evidence.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. from content_agent.run_service import RunService
  2. from content_agent.schemas import RunStartRequest
  3. from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
  4. def test_source_evidence_inherits_evidence_pack_without_rewriting_origin(tmp_path):
  5. service = RunService(
  6. runtime_root=tmp_path / "runtime" / "v1",
  7. query_variant_client=FakeQueryVariantClient(),
  8. )
  9. state = service.start_run(
  10. RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
  11. )
  12. run_id = state["run_id"]
  13. source_context = service.read_json(run_id, "source_context.json")
  14. evidence_pack = source_context["ext_data"]["evidence_pack"]
  15. decisions = service.read_jsonl(run_id, "rule_decisions.jsonl")
  16. final_output = service.read_json(run_id, "final_output.json")
  17. source_evidence = decisions[0]["source_evidence"]
  18. assert source_evidence["policy_run_id"] == state["policy_run_id"]
  19. for field in [
  20. "source_kind",
  21. "pattern_source_system",
  22. "case_id_type",
  23. "source_post_id",
  24. "pattern_execution_id",
  25. "mining_config_id",
  26. "itemset_ids",
  27. "itemset_items",
  28. "category_bindings",
  29. "element_bindings",
  30. "support",
  31. "absolute_support",
  32. "matched_post_ids",
  33. "video_ids",
  34. "case_ids",
  35. "decode_case_ids",
  36. "seed_terms",
  37. "run_id",
  38. "source_certainty",
  39. "validation_status",
  40. ]:
  41. assert source_evidence[field] == evidence_pack[field]
  42. assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"]
  43. assert (
  44. source_evidence["discovered_platform_content_id"]
  45. not in source_evidence["matched_post_ids"]
  46. )
  47. assert source_evidence["discovery_relation"] == "mock_pattern_matched"
  48. # M3: mock content scores 60 (relevance 60 + zero heat) → all three land in
  49. # review, so the inherited source_evidence now surfaces on review_records rather
  50. # than content_assets; the inheritance (carrying source_path_record_ids) is the
  51. # property under test, not the pool/review band.
  52. assert final_output["review_records"][0]["source_evidence"]["source_path_record_ids"]
  53. assert {
  54. record["decision_id"] for record in final_output["decision_records"]
  55. } == {"d_001", "d_002", "d_003"}
  56. def test_source_evidence_tracks_multiple_query_sources_without_polluting_origin(tmp_path):
  57. service = RunService(
  58. runtime_root=tmp_path / "runtime" / "v1",
  59. query_variant_client=FakeQueryVariantClient(),
  60. )
  61. state = service.start_run(
  62. RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
  63. )
  64. run_id = state["run_id"]
  65. items = service.read_jsonl(run_id, "discovered_content_items.jsonl")
  66. multi_source_item = next(
  67. item for item in items if len(item.get("query_sources", [])) > 1
  68. )
  69. decisions = service.read_jsonl(run_id, "rule_decisions.jsonl")
  70. decision = next(
  71. row
  72. for row in decisions
  73. if row["decision_target_id"] == multi_source_item["platform_content_id"]
  74. )
  75. source_evidence = decision["source_evidence"]
  76. assert multi_source_item["matched_search_query_ids"] == ["q_002", "q_003", "q_004"]
  77. assert source_evidence["matched_search_query_ids"] == ["q_002", "q_003", "q_004"]
  78. assert source_evidence["discovered_platform_content_id"] != source_evidence["source_post_id"]
  79. assert (
  80. source_evidence["discovered_platform_content_id"]
  81. not in source_evidence["matched_post_ids"]
  82. )
  83. paths = service.read_jsonl(run_id, "source_path_records.jsonl")
  84. query_content_paths = [
  85. path
  86. for path in paths
  87. if path["source_path_type"] == "search_query_to_content"
  88. and path["to_node_id"] == multi_source_item["platform_content_id"]
  89. ]
  90. assert {path["from_node_id"] for path in query_content_paths} == {
  91. "q_002",
  92. "q_003",
  93. "q_004",
  94. }