test_p7_author_assets.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. from content_agent.business_modules import result_source_lookup
  2. from content_agent.integrations.runtime_files import LocalRuntimeFileStore
  3. from content_agent.run_service import RunService
  4. from content_agent.schemas import RunStartRequest
  5. from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
  6. class CapturingRuntimeStore(LocalRuntimeFileStore):
  7. def __init__(self, base_dir):
  8. super().__init__(base_dir)
  9. self.author_assets = []
  10. self.author_asset_roles = []
  11. self.publish_jobs = []
  12. def write_author_assets(self, rows):
  13. self.author_assets.extend(rows)
  14. def write_author_asset_roles(self, rows):
  15. self.author_asset_roles.extend(rows)
  16. def write_publish_jobs(self, run_id, policy_run_id, rows):
  17. self.publish_jobs.extend(rows)
  18. def test_author_assets_are_written_only_when_sampling_rules_pass(tmp_path):
  19. runtime = CapturingRuntimeStore(tmp_path / "runtime")
  20. runtime.prepare_run("run_001")
  21. final_output = result_source_lookup.run(
  22. "run_001",
  23. "policy_run_001",
  24. _policy_bundle(),
  25. _items(),
  26. _media_records(),
  27. _decisions(),
  28. _source_paths(),
  29. [],
  30. runtime,
  31. )
  32. assert len(final_output["author_assets"]) == 1
  33. author_asset = final_output["author_assets"][0]
  34. assert author_asset["platform_author_id"] == "author_001"
  35. assert author_asset["asset_status"] == "active"
  36. assert author_asset["eligible_as_source"] is True
  37. assert author_asset["roles"] == ["author_asset", "source_seed", "high_50plus_profile"]
  38. assert len(author_asset["decision_ids"]) == 9
  39. assert runtime.author_assets[0]["source_type"] == "new_discovery"
  40. assert runtime.author_assets[0]["validation_status"] == "rule_validated"
  41. assert {row["role"] for row in runtime.author_asset_roles} == {
  42. "author_asset",
  43. "source_seed",
  44. "high_50plus_profile",
  45. }
  46. def test_mock_full_run_does_not_create_author_asset_without_enough_samples(tmp_path):
  47. runtime = CapturingRuntimeStore(tmp_path / "runtime" / "v1")
  48. service = RunService(
  49. runtime=runtime,
  50. query_variant_client=FakeQueryVariantClient(),
  51. )
  52. state = service.start_run(
  53. RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
  54. )
  55. final_output = service.read_json(state["run_id"], "final_output.json")
  56. assert final_output["author_assets"] == []
  57. assert runtime.author_assets == []
  58. assert runtime.author_asset_roles == []
  59. def _items():
  60. return [
  61. {
  62. "platform": "douyin",
  63. "platform_content_id": f"content_{index:03d}",
  64. "content_discovery_id": f"discovery_{index:03d}",
  65. "search_query_id": "q_001",
  66. "platform_author_id": "author_001",
  67. "author_display_name": "作者一号",
  68. "tags": ["人物故事"],
  69. "previous_discovery_step": "author_work",
  70. "discovery_start_source": "pattern_itemset",
  71. }
  72. for index in range(1, 10)
  73. ]
  74. def _media_records():
  75. return [
  76. {
  77. "platform_content_id": f"content_{index:03d}",
  78. "content_media_status": "metadata_only",
  79. }
  80. for index in range(1, 10)
  81. ]
  82. def _decisions():
  83. decisions = []
  84. for index in range(1, 10):
  85. action = "ADD_TO_CONTENT_POOL" if index <= 3 else "KEEP_CONTENT_FOR_REVIEW"
  86. decisions.append(
  87. {
  88. "decision_id": f"d_{index:03d}",
  89. "decision_target_id": f"content_{index:03d}",
  90. "decision_action": action,
  91. "decision_reason_code": "score_pass" if index <= 3 else "review_needed",
  92. "rule_pack_id": "rule_pack_v1",
  93. "rule_pack_version": "1.0.0",
  94. "strategy_version": "V1",
  95. "search_query_effect_status": "success" if index <= 3 else "pending",
  96. "source_evidence": {"source_kind": "pattern_itemset"},
  97. "age_50_plus_level": "medium",
  98. }
  99. )
  100. return decisions
  101. def _source_paths():
  102. paths = [
  103. {
  104. "source_path_record_id": "path_pattern_q_001",
  105. "source_path_type": "pattern_to_search_query",
  106. "from_node_type": "PatternSeed",
  107. "from_node_id": 581,
  108. "to_node_type": "SearchQuery",
  109. "to_node_id": "q_001",
  110. }
  111. ]
  112. for index in range(1, 10):
  113. content_id = f"content_{index:03d}"
  114. decision_id = f"d_{index:03d}"
  115. paths.append(
  116. {
  117. "source_path_record_id": f"path_query_content_{index:03d}",
  118. "source_path_type": "search_query_to_content",
  119. "from_node_type": "SearchQuery",
  120. "from_node_id": "q_001",
  121. "to_node_type": "Content",
  122. "to_node_id": content_id,
  123. "decision_id": decision_id,
  124. }
  125. )
  126. if index <= 3:
  127. paths.append(
  128. {
  129. "source_path_record_id": f"path_decision_asset_{index:03d}",
  130. "source_path_type": "decision_to_asset",
  131. "from_node_type": "RuleDecision",
  132. "from_node_id": decision_id,
  133. "to_node_type": "ContentAsset",
  134. "to_node_id": content_id,
  135. "decision_id": decision_id,
  136. }
  137. )
  138. return paths
  139. def _policy_bundle():
  140. return {
  141. "policy_bundle_id": "douyin_policy_bundle_v1",
  142. "strategy_id": "douyin_content_find_v1",
  143. "strategy_version": "V1",
  144. "rule_pack_id": "rule_pack_v1",
  145. "rule_pack_version": "1.0.0",
  146. "policy_bundle_hash": "hash_001",
  147. "strategy_source_ref": {"file": "rule_pack.json"},
  148. "rule_pack_source_ref": {"file": "rule_pack.json"},
  149. }