p6_walk_helpers.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. from __future__ import annotations
  2. from pathlib import Path
  3. from typing import Any
  4. from content_agent.business_modules import content_discovery, rule_judgment, source_seed
  5. from content_agent.business_modules.content_discovery import pattern_recall
  6. from content_agent.constants import RUNTIME_RECORD_SCHEMA_VERSION
  7. from content_agent.integrations.policy_json import JsonPolicyBundleStore
  8. from content_agent.integrations.runtime_files import LocalRuntimeFileStore
  9. from content_agent.record_payload import with_raw_payload
  10. from tests.gemini_helpers import FakeGeminiVideoClient
  11. from tests.p1_helpers import real_source_payload
  12. class FakeWalkPlatformClient:
  13. def __init__(self, fail_next_page: bool = False, tags: list[str] | None = None) -> None:
  14. self.fail_next_page = fail_next_page
  15. self.tags = tags or ["#人物故事"]
  16. self.search_calls: list[dict[str, Any]] = []
  17. self.author_calls: list[dict[str, Any]] = []
  18. def search(self, query: dict[str, Any]) -> list[dict[str, Any]]:
  19. self.search_calls.append(dict(query))
  20. method = query.get("search_query_generation_method")
  21. if method == "query_next_page" and self.fail_next_page:
  22. raise RuntimeError("page unavailable")
  23. if method == "query_next_page":
  24. return [_platform_result(query, "7390000000000000101", "下一页内容", [])]
  25. if method == "tag_query":
  26. return [_platform_result(query, "7390000000000000201", "标签内容", [])]
  27. return []
  28. def fetch_author_works(self, query: dict[str, Any]) -> list[dict[str, Any]]:
  29. self.author_calls.append(dict(query))
  30. return [_platform_result(query, "7390000000000000301", "作者作品", [])]
  31. def build_initial_walk_context(tmp_path: Path, *, tags: list[str] | None = None) -> dict[str, Any]:
  32. run_id = "run_001"
  33. policy_run_id = "policy_run_001"
  34. runtime = LocalRuntimeFileStore(tmp_path / "runtime")
  35. runtime.prepare_run(run_id)
  36. seed = source_seed.run(run_id, policy_run_id, real_source_payload(), runtime)
  37. search_query = with_raw_payload(
  38. {
  39. "record_schema_version": RUNTIME_RECORD_SCHEMA_VERSION,
  40. "run_id": run_id,
  41. "policy_run_id": policy_run_id,
  42. "search_query_id": "q_001",
  43. "search_query": "人物故事",
  44. "search_query_generation_method": "item_single",
  45. "discovery_start_source": "pattern_itemset",
  46. "previous_discovery_step": "search_query_generated",
  47. "pattern_seed_ref": {"seed_term": "人物故事"},
  48. }
  49. )
  50. runtime.append_jsonl(run_id, "search_queries.jsonl", [search_query])
  51. initial_result = _platform_result(search_query, "7390000000000000001", "首轮内容", tags or ["#人物故事"])
  52. initial_result["has_more"] = True
  53. initial_result["next_cursor"] = "10"
  54. discovered = content_discovery.run(
  55. run_id,
  56. policy_run_id,
  57. [initial_result],
  58. seed["source_context"],
  59. runtime,
  60. )
  61. recalled = pattern_recall.run(
  62. run_id,
  63. policy_run_id,
  64. discovered["discovered_content_items"],
  65. discovered["content_media_records"],
  66. discovered["evidence_bundles"],
  67. seed["source_context"],
  68. runtime,
  69. FakeGeminiVideoClient(),
  70. )
  71. policy_bundle = JsonPolicyBundleStore(Path(".")).load_policy_bundle("V1")
  72. decisions = rule_judgment.run(
  73. run_id,
  74. policy_run_id,
  75. recalled["evidence_bundles"],
  76. policy_bundle,
  77. runtime,
  78. )
  79. return {
  80. "run_id": run_id,
  81. "policy_run_id": policy_run_id,
  82. "runtime": runtime,
  83. "source_context": seed["source_context"],
  84. "pattern_seed_pack": seed["pattern_seed_pack"],
  85. "search_queries": [search_query],
  86. "discovered_content_items": recalled["discovered_content_items"],
  87. "content_media_records": discovered["content_media_records"],
  88. "evidence_bundles": recalled["evidence_bundles"],
  89. "rule_decisions": decisions,
  90. "policy_bundle": policy_bundle,
  91. "gemini_video_client": FakeGeminiVideoClient(),
  92. }
  93. def _platform_result(
  94. query: dict[str, Any],
  95. platform_content_id: str,
  96. description: str,
  97. tags: list[str],
  98. ) -> dict[str, Any]:
  99. return {
  100. "content_discovery_id": f"{query['search_query_id']}_content_{platform_content_id[-3:]}",
  101. "search_query_id": query["search_query_id"],
  102. "platform": "douyin",
  103. "platform_content_id": platform_content_id,
  104. "platform_content_format": "video",
  105. "description": description,
  106. "platform_author_id": "MS4wLjABAAAA001",
  107. "author_display_name": "作者",
  108. "statistics": {"digg_count": 9000, "comment_count": 800, "share_count": 700},
  109. "tags": tags,
  110. "score": 72,
  111. "risk_level": "low",
  112. "availability": "available",
  113. "discovery_relation": "fake_walk",
  114. "discovery_start_source": query.get("discovery_start_source", "pattern_itemset"),
  115. "previous_discovery_step": query.get("previous_discovery_step", "search_query_direct"),
  116. "content_metadata_source": "fake_platform_search",
  117. "platform_raw_payload": {"content_id": platform_content_id},
  118. }