Kaynağa Gözat

feat(v3-m3): 规则包判定重写(Gemini硬门槛+相关性/平台热度打分)

- M3A 平台热度:platform_heat.py 点赞对数归一化(按平台 floor/ceil 锚点),注入 content_engagement_metrics.platform_heat;evaluator 加 lt 算子
- M3B 内容包重写(Excel+JSON 同步,config gate 5闸全过):硬门槛删5留5增3(not_fit_senior/low_confidence/judge_failed)、scorecard 旧3维退役改 relevance(max60)+platform_heat(max40)gte分档、input_contract 改读 Gemini 字段、新增3 reason_code 旧标签退役
- M3C(部分):删 recall_decision M2→M3 桥接键;砍4 future包推迟到M4(实测砍包破坏 walk_strategy binding 闭合性致115测试崩,越界M4)
- 连带修生产 bug:validation._check_pattern_recall_evidence 改以 judge_status 把关(桥接键删后原 pattern_recall=="matched" 判据失效成死代码)
- M3D:规则单测+端到端+回放快照按新口径更新(实跑生成,无削弱);基线 306→312 passed,config gate pass
- 经验收岗 6/6 抽验(改动面最小、无越界、无测试削弱、口径一致)

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
Sam Lee 2 gün önce
ebeveyn
işleme
4761e2caa4
31 değiştirilmiş dosya ile 559 ekleme ve 347 silme
  1. 5 0
      content_agent/business_modules/content_discovery/content_discovery_builder.py
  2. 0 3
      content_agent/business_modules/content_discovery/pattern_recall/recall_decision.py
  3. 29 0
      content_agent/business_modules/content_discovery/platform_heat.py
  4. 2 0
      content_agent/business_modules/rule_judgment/evaluator.py
  5. 3 1
      content_agent/business_modules/run_record/validation.py
  6. 153 138
      product_documents/规则包/douyin_rule_packs.v1.json
  7. BIN
      tech_documents/规则包映射/规则包映射配置表.xlsx
  8. 7 7
      tests/fixtures/snapshots/matrix/real_id45__default.json
  9. 0 16
      tests/fixtures/snapshots/matrix/real_id45__relaxed_portrait.json
  10. 4 4
      tests/fixtures/snapshots/matrix/real_id45__senior_block.json
  11. 2 2
      tests/fixtures/snapshots/real_id45/decision_summary.json
  12. 4 1
      tests/test_api.py
  13. 39 18
      tests/test_case_replay.py
  14. 28 38
      tests/test_config_case_matrix.py
  15. 10 3
      tests/test_dual_channel_gemini_replay.py
  16. 14 8
      tests/test_p7_lineage_validation.py
  17. 20 2
      tests/test_p7_publish_jobs.py
  18. 38 0
      tests/test_platform_heat.py
  19. 4 7
      tests/test_policy_replay_data.py
  20. 7 6
      tests/test_query_effect_aggregation.py
  21. 56 15
      tests/test_replay_gemini_seam.py
  22. 10 9
      tests/test_rule_decision_effect_status.py
  23. 33 16
      tests/test_rule_judgment_hard_gates.py
  24. 47 42
      tests/test_rule_judgment_scorecard.py
  25. 11 5
      tests/test_rule_pack_reading.py
  26. 5 1
      tests/test_source_evidence.py
  27. 6 5
      tests/test_v1_graph.py
  28. 3 0
      tests/test_walk_engine_author.py
  29. 10 0
      tests/test_walk_engine_loop.py
  30. 6 0
      tests/test_walk_engine_pagination.py
  31. 3 0
      tests/test_walk_engine_tag.py

+ 5 - 0
content_agent/business_modules/content_discovery/content_discovery_builder.py

@@ -3,6 +3,7 @@ from __future__ import annotations
 from datetime import datetime, timezone
 from typing import Any
 
+from content_agent.business_modules.content_discovery.platform_heat import heat_score
 from content_agent.business_modules.content_discovery.source_evidence import (
     build_source_evidence,
 )
@@ -155,6 +156,10 @@ def _build_evidence_bundle(
         "content_engagement_metrics": {
             "statistics": result["statistics"],
             **result["statistics"],
+            "platform_heat": heat_score(
+                result["statistics"].get("digg_count"),
+                discovered_content_item["platform"],
+            ),
         },
         "content_audience_profile": content_audience_profile,
         "author_audience_profile": {

+ 0 - 3
content_agent/business_modules/content_discovery/pattern_recall/recall_decision.py

@@ -63,9 +63,6 @@ def _build_pattern_match_result(judgment: dict[str, Any], recall_evidence_id: st
         "relevance_score": float(judgment.get("relevance_score") or 0.0),
         "reason": str(judgment.get("reason") or ""),
         "judge_status": str(judgment.get("status") or "ok"),
-        # M2→M3 桥接键:让未重写的旧 hard_gate(not_in ["matched"])不误拒。M3 删旧门槛后移除。
-        "pattern_recall": "matched",
-        "category_or_element_binding": "matched",
         "pattern_recall_evidence_id": recall_evidence_id,
     }
 

+ 29 - 0
content_agent/business_modules/content_discovery/platform_heat.py

@@ -0,0 +1,29 @@
+"""Platform heat normalization (V3-M3A).
+
+跨平台唯一公共互动指标是点赞(digg_count;视频号仅 like_count→digg_count)。
+绝对值不可跨平台比较(抖音爆款 5e6 vs 视频号样本 ~1e2),故按平台锚点做
+对数归一化:digg ≤ floor → 0,≥ ceil → 1,之间按 log10 插值。
+锚点为拍板起步值,M7 真实跑测后按数据标定。
+"""
+
+from __future__ import annotations
+
+from math import log10
+from typing import Any
+
+# (floor, ceil) per platform — 起步值,M7 标定。
+_HEAT_ANCHORS: dict[str, tuple[float, float]] = {
+    "douyin": (10000.0, 1000000.0),
+    "shipinhao": (50.0, 50000.0),
+}
+_DEFAULT_ANCHOR = (100.0, 100000.0)
+
+
+def heat_score(digg_count: Any, platform: str) -> float:
+    floor, ceil = _HEAT_ANCHORS.get(platform, _DEFAULT_ANCHOR)
+    try:
+        digg = max(int(digg_count or 0), 0)
+    except (TypeError, ValueError):
+        digg = 0
+    raw = (log10(digg + 1) - log10(floor)) / (log10(ceil) - log10(floor))
+    return round(min(max(raw, 0.0), 1.0), 4)

+ 2 - 0
content_agent/business_modules/rule_judgment/evaluator.py

@@ -183,6 +183,8 @@ def _condition_matches(bundle: dict[str, Any], condition: dict[str, Any]) -> boo
         return value == expected
     if op == "gte":
         return value is not None and value >= expected
+    if op == "lt":
+        return value is not None and value < expected
     if op == "lte":
         return value is not None and value <= expected
     raise ValueError(f"unsupported rule operator: {op}")

+ 3 - 1
content_agent/business_modules/run_record/validation.py

@@ -524,7 +524,9 @@ def _check_pattern_recall_evidence(
 
     for item in data.get("discovered_content_items.jsonl", []):
         pattern_match = item.get("pattern_match_result") or {}
-        if pattern_match.get("pattern_recall") != "matched":
+        # V3(M3):桥接键 pattern_recall 已退役;改以"是否被判定过"(judge_status 存在)为准——
+        # 每条经 Gemini 判定的内容必须能解析到真实 evidence 行,否则视为血缘损坏。
+        if not pattern_match.get("judge_status"):
             continue
         evidence_id = pattern_match.get("pattern_recall_evidence_id")
         evidence = evidence_by_id.get(evidence_id)

+ 153 - 138
product_documents/规则包/douyin_rule_packs.v1.json

@@ -255,12 +255,10 @@
           "content.platform_content_id",
           "source_evidence",
           "run_context.run_id",
-          "pattern_match_result.pattern_recall",
-          "pattern_match_result.category_or_element_binding",
-          "content_risk_check.risk_level",
-          "content_engagement_metrics.statistics",
-          "content_audience_profile",
-          "content_audience_profile.age_50_plus_level"
+          "pattern_match_result.fit_senior_50plus",
+          "pattern_match_result.fit_confidence",
+          "pattern_match_result.relevance_score",
+          "content_engagement_metrics.platform_heat"
         ],
         "missing_policy": "fail_hard_gate",
         "score_missing_policy": {
@@ -298,57 +296,6 @@
           "stop_scoring": true,
           "priority": 30
         },
-        {
-          "gate_id": "pattern_recall_required",
-          "label": "视频必须回扣 Pattern",
-          "when": {
-            "field": "pattern_match_result.pattern_recall",
-            "op": "not_in",
-            "value": [
-              "matched"
-            ]
-          },
-          "decision_action": "REJECT_CONTENT",
-          "decision_reason_code": "content_pattern_recall_required",
-          "severity": "fatal",
-          "stop_scoring": true,
-          "priority": 20
-        },
-        {
-          "gate_id": "category_or_element_binding_required",
-          "label": "视频必须绑定分类或元素",
-          "when": {
-            "field": "pattern_match_result.category_or_element_binding",
-            "op": "not_in",
-            "value": [
-              "direct_match",
-              "tree_walk_match",
-              "matched"
-            ]
-          },
-          "decision_action": "REJECT_CONTENT",
-          "decision_reason_code": "category_or_element_binding_required",
-          "severity": "fatal",
-          "stop_scoring": true,
-          "priority": 40
-        },
-        {
-          "gate_id": "obvious_drift",
-          "label": "明显跑偏",
-          "when": {
-            "field": "pattern_match_result.level",
-            "op": "in",
-            "value": [
-              "drift",
-              "unrelated"
-            ]
-          },
-          "decision_action": "REJECT_CONTENT",
-          "decision_reason_code": "obvious_drift",
-          "severity": "fatal",
-          "stop_scoring": true,
-          "priority": 71
-        },
         {
           "gate_id": "high_risk_content",
           "label": "高风险内容",
@@ -384,49 +331,62 @@
           "priority": 70
         },
         {
-          "gate_id": "missing_content_portrait",
-          "label": "无内容画像",
+          "gate_id": "missing_platform_author_id",
+          "label": "无作者 platform_author_id",
           "when": {
-            "field": "content_audience_profile",
+            "field": "content.author.platform_author_id",
             "op": "is_empty"
           },
-          "decision_action": "KEEP_CONTENT_FOR_REVIEW",
-          "decision_reason_code": "missing_content_portrait",
-          "severity": "review",
+          "decision_action": "DO_NOT_EXPAND_AUTHOR",
+          "decision_reason_code": "missing_platform_author_id",
+          "severity": "partial",
+          "stop_scoring": false,
+          "affects_outputs": [
+            "author_expand_decision"
+          ],
+          "priority": 90
+        },
+        {
+          "gate_id": "not_fit_senior",
+          "label": "不适合中国50岁以上老年人观看",
+          "when": {
+            "field": "pattern_match_result.fit_senior_50plus",
+            "op": "eq",
+            "value": false
+          },
+          "decision_action": "REJECT_CONTENT",
+          "decision_reason_code": "content_not_fit_senior",
+          "severity": "fatal",
           "stop_scoring": true,
-          "priority": 50
+          "priority": 20
         },
         {
-          "gate_id": "age_50_plus_weak",
-          "label": "50+ 内容画像弱或缺失",
+          "gate_id": "low_confidence",
+          "label": "Gemini 判定置信度过低",
           "when": {
-            "field": "content_audience_profile.age_50_plus_level",
-            "op": "in",
-            "value": [
-              "weak"
-            ]
+            "field": "pattern_match_result.fit_confidence",
+            "op": "lt",
+            "value": 0.6
           },
           "decision_action": "REJECT_CONTENT",
-          "decision_reason_code": "age_50_plus_weak",
+          "decision_reason_code": "content_low_confidence",
           "severity": "fatal",
           "stop_scoring": true,
-          "priority": 51
+          "priority": 25
         },
         {
-          "gate_id": "missing_platform_author_id",
-          "label": "无作者 platform_author_id",
+          "gate_id": "judge_failed",
+          "label": "Gemini 判定技术失败",
           "when": {
-            "field": "content.author.platform_author_id",
-            "op": "is_empty"
+            "field": "pattern_match_result.judge_status",
+            "op": "eq",
+            "value": "failed"
           },
-          "decision_action": "DO_NOT_EXPAND_AUTHOR",
-          "decision_reason_code": "missing_platform_author_id",
-          "severity": "partial",
-          "stop_scoring": false,
-          "affects_outputs": [
-            "author_expand_decision"
-          ],
-          "priority": 90
+          "decision_action": "KEEP_CONTENT_FOR_REVIEW",
+          "decision_reason_code": "content_judge_failed",
+          "severity": "review",
+          "stop_scoring": true,
+          "priority": 15
         }
       ],
       "scorecard": {
@@ -441,9 +401,9 @@
           {
             "key": "content_audience_profile",
             "label": "内容画像",
-            "max_score": 50,
-            "weight_percent": 50,
-            "runtime_status": "active",
+            "max_score": 0,
+            "weight_percent": 0,
+            "runtime_status": "deprecated",
             "evidence_paths": [
               "content_audience_profile.age_50_plus_level",
               "content_audience_profile.age_distribution",
@@ -453,9 +413,9 @@
           {
             "key": "interaction_performance",
             "label": "互动表现",
-            "max_score": 30,
-            "weight_percent": 30,
-            "runtime_status": "active",
+            "max_score": 0,
+            "weight_percent": 0,
+            "runtime_status": "deprecated",
             "evidence_paths": [
               "content_engagement_metrics.statistics"
             ]
@@ -463,9 +423,9 @@
           {
             "key": "freshness_available",
             "label": "新鲜度 / 可用状态",
-            "max_score": 20,
-            "weight_percent": 20,
-            "runtime_status": "active",
+            "max_score": 0,
+            "weight_percent": 0,
+            "runtime_status": "deprecated",
             "evidence_paths": [
               "content_risk_check.availability",
               "content.create_time"
@@ -494,80 +454,112 @@
               "pattern_match_result.adaptability"
             ],
             "notes": "旧 5 维 scorecard 维度,deprecated,不进入新 V1 runtime。"
+          },
+          {
+            "key": "relevance",
+            "label": "相关性",
+            "max_score": 60,
+            "weight_percent": 60,
+            "runtime_status": "active",
+            "evidence_paths": [
+              "pattern_match_result.relevance_score"
+            ]
+          },
+          {
+            "key": "platform_heat",
+            "label": "平台热度",
+            "max_score": 40,
+            "weight_percent": 40,
+            "runtime_status": "active",
+            "evidence_paths": [
+              "content_engagement_metrics.platform_heat"
+            ]
           }
         ],
         "scoring_rules": [
           {
-            "scoring_rule_id": "score_content_profile_strong",
-            "dimension_key": "content_audience_profile",
-            "field_path": "content_audience_profile.age_50_plus_level",
-            "operator": "eq",
-            "expected_value": "strong",
-            "score_value": 50,
+            "scoring_rule_id": "score_relevance_high",
+            "dimension_key": "relevance",
+            "field_path": "pattern_match_result.relevance_score",
+            "operator": "gte",
+            "expected_value": 0.8,
+            "score_value": 60,
             "missing_policy": "score_0_or_fail_by_dimension",
             "priority": 1,
             "enabled": true,
-            "notes": "Excel 初稿同步:50+ 画像 strong 给满分。"
+            "notes": "M3 重写:Gemini 相关性 + 平台热度对数归一化打分。"
           },
           {
-            "scoring_rule_id": "score_content_profile_medium",
-            "dimension_key": "content_audience_profile",
-            "field_path": "content_audience_profile.age_50_plus_level",
-            "operator": "eq",
-            "expected_value": "medium",
-            "score_value": 35,
+            "scoring_rule_id": "score_relevance_mid",
+            "dimension_key": "relevance",
+            "field_path": "pattern_match_result.relevance_score",
+            "operator": "gte",
+            "expected_value": 0.6,
+            "score_value": 45,
             "missing_policy": "score_0_or_fail_by_dimension",
             "priority": 2,
             "enabled": true,
-            "notes": "Excel 初稿同步:50+ 画像 medium 给 35 分。"
+            "notes": "M3 重写:Gemini 相关性 + 平台热度对数归一化打分。"
           },
           {
-            "scoring_rule_id": "score_interaction_high",
-            "dimension_key": "interaction_performance",
-            "field_path": "content_engagement_metrics.statistics.total_interactions",
+            "scoring_rule_id": "score_relevance_low",
+            "dimension_key": "relevance",
+            "field_path": "pattern_match_result.relevance_score",
             "operator": "gte",
-            "expected_value": 10000,
-            "score_value": 30,
+            "expected_value": 0.4,
+            "score_value": 25,
             "missing_policy": "score_0_or_fail_by_dimension",
             "priority": 3,
             "enabled": true,
-            "notes": "Excel 初稿同步:互动总量 >=10000 给满分。"
+            "notes": "M3 重写:Gemini 相关性 + 平台热度对数归一化打分。"
           },
           {
-            "scoring_rule_id": "score_interaction_medium",
-            "dimension_key": "interaction_performance",
-            "field_path": "content_engagement_metrics.statistics.total_interactions",
+            "scoring_rule_id": "score_heat_high",
+            "dimension_key": "platform_heat",
+            "field_path": "content_engagement_metrics.platform_heat",
             "operator": "gte",
-            "expected_value": 3000,
-            "score_value": 20,
+            "expected_value": 0.8,
+            "score_value": 40,
             "missing_policy": "score_0_or_fail_by_dimension",
             "priority": 4,
             "enabled": true,
-            "notes": "Excel 初稿同步:互动总量 >=3000 给 20 分。"
+            "notes": "M3 重写:Gemini 相关性 + 平台热度对数归一化打分。"
           },
           {
-            "scoring_rule_id": "score_freshness_recent",
-            "dimension_key": "freshness_available",
-            "field_path": "content.create_time_days_ago",
-            "operator": "lte",
-            "expected_value": 30,
-            "score_value": 20,
+            "scoring_rule_id": "score_heat_mid",
+            "dimension_key": "platform_heat",
+            "field_path": "content_engagement_metrics.platform_heat",
+            "operator": "gte",
+            "expected_value": 0.6,
+            "score_value": 30,
             "missing_policy": "score_0_or_fail_by_dimension",
             "priority": 5,
             "enabled": true,
-            "notes": "Excel 初稿同步:30 天内给满分。"
+            "notes": "M3 重写:Gemini 相关性 + 平台热度对数归一化打分。"
           },
           {
-            "scoring_rule_id": "score_freshness_available",
-            "dimension_key": "freshness_available",
-            "field_path": "content_risk_check.availability",
-            "operator": "eq",
-            "expected_value": "available",
-            "score_value": 12,
+            "scoring_rule_id": "score_heat_low",
+            "dimension_key": "platform_heat",
+            "field_path": "content_engagement_metrics.platform_heat",
+            "operator": "gte",
+            "expected_value": 0.4,
+            "score_value": 20,
             "missing_policy": "score_0_or_fail_by_dimension",
             "priority": 6,
             "enabled": true,
-            "notes": "Excel 初稿同步:可用但较旧时给保底分。"
+            "notes": "M3 重写:Gemini 相关性 + 平台热度对数归一化打分。"
+          },
+          {
+            "scoring_rule_id": "score_heat_min",
+            "dimension_key": "platform_heat",
+            "field_path": "content_engagement_metrics.platform_heat",
+            "operator": "gte",
+            "expected_value": 0.2,
+            "score_value": 10,
+            "missing_policy": "score_0_or_fail_by_dimension",
+            "priority": 7,
+            "enabled": true,
+            "notes": "M3 重写:Gemini 相关性 + 平台热度对数归一化打分。"
           }
         ]
       },
@@ -610,11 +602,10 @@
       "dispatch_enabled": true,
       "hard_gate_primary_reason_priority": [
         "missing_platform_content_id",
-        "pattern_recall_required",
+        "content_not_fit_senior",
+        "content_low_confidence",
         "missing_source_evidence",
-        "category_or_element_binding_required",
-        "missing_content_portrait",
-        "age_50_plus_weak",
+        "content_judge_failed",
         "content_score_reject",
         "high_risk_content"
       ]
@@ -1459,6 +1450,30 @@
       "is_hard_gate": true,
       "priority": 80,
       "business_explanation": "内容风险等级 high / blocked,不允许入池。"
+    },
+    {
+      "decision_reason_code": "content_not_fit_senior",
+      "reason_label": "不适合50+老人",
+      "reason_category": "not_fit_senior",
+      "is_hard_gate": true,
+      "priority": 20,
+      "business_explanation": "Gemini 判定内容不适合中国50岁以上老年人观看。"
+    },
+    {
+      "decision_reason_code": "content_low_confidence",
+      "reason_label": "判定置信度低",
+      "reason_category": "low_confidence",
+      "is_hard_gate": true,
+      "priority": 25,
+      "business_explanation": "Gemini 对 50+ 适配判定的置信度低于 0.6。"
+    },
+    {
+      "decision_reason_code": "content_judge_failed",
+      "reason_label": "判定技术失败",
+      "reason_category": "judge_failed",
+      "is_hard_gate": true,
+      "priority": 15,
+      "business_explanation": "视频下载/压缩/Gemini 调用技术失败,降级待复看。"
     }
   ]
 }

BIN
tech_documents/规则包映射/规则包映射配置表.xlsx


+ 7 - 7
tests/fixtures/snapshots/matrix/real_id45__default.json

@@ -1,16 +1,16 @@
 {
   "effect_status_counts": {
     "failed": 0,
-    "pending": 4,
+    "pending": 2,
     "rule_blocked": 0,
-    "success": 0
+    "success": 2
   },
-  "pooled": 0,
+  "pooled": 2,
   "reasons": [
-    "missing_content_portrait",
-    "missing_content_portrait",
-    "missing_content_portrait",
-    "missing_content_portrait"
+    "content_score_pool",
+    "content_score_pool",
+    "content_score_review",
+    "content_score_review"
   ],
   "rejected": 0
 }

+ 0 - 16
tests/fixtures/snapshots/matrix/real_id45__relaxed_portrait.json

@@ -1,16 +0,0 @@
-{
-  "effect_status_counts": {
-    "failed": 4,
-    "pending": 0,
-    "rule_blocked": 0,
-    "success": 0
-  },
-  "pooled": 0,
-  "reasons": [
-    "content_score_reject",
-    "content_score_reject",
-    "content_score_reject",
-    "missing_score"
-  ],
-  "rejected": 4
-}

+ 4 - 4
tests/fixtures/snapshots/matrix/real_id45__portrait_reject.json → tests/fixtures/snapshots/matrix/real_id45__senior_block.json

@@ -7,10 +7,10 @@
   },
   "pooled": 0,
   "reasons": [
-    "missing_content_portrait",
-    "missing_content_portrait",
-    "missing_content_portrait",
-    "missing_content_portrait"
+    "content_not_fit_senior",
+    "content_not_fit_senior",
+    "content_not_fit_senior",
+    "content_not_fit_senior"
   ],
   "rejected": 4
 }

+ 2 - 2
tests/fixtures/snapshots/real_id45/decision_summary.json

@@ -1,6 +1,6 @@
 {
   "pending_content_count": 0,
-  "pooled_content_count": 0,
+  "pooled_content_count": 2,
   "rejected_content_count": 0,
-  "review_content_count": 4
+  "review_content_count": 2
 }

+ 4 - 1
tests/test_api.py

@@ -40,7 +40,10 @@ def test_api_runs_and_queries_mock_chain(tmp_path, monkeypatch):
         assert get_response.status_code == 200, path
 
     review = client.get(f"/runs/{run_id}/strategy-review").json()["data"]
-    assert review["summary"]["pooled_content_count"] == 1
+    # M3 受控变化: 画像门槛退役,改 Gemini 相关性 + 平台热度打分;mock 链热度不足,
+    # 三条内容落复看带(原 1 进池)。
+    assert review["summary"]["pooled_content_count"] == 0
+    assert review["summary"]["review_content_count"] == 3
     assert review["suggestions"]
 
     validation = client.get(f"/runs/{run_id}/validation").json()

+ 39 - 18
tests/test_case_replay.py

@@ -1,12 +1,12 @@
 """Real + synthetic case replay tests (V2-M0D).
 
-- real_id45: the harvested production baseline (demand_content.id=45). Pre-M3 it
-  replayed to all-REJECT; M3C (portrait missing -> KEEP_CONTENT_FOR_REVIEW/pending)
-  moved it to all-KEEP, because replay recomputes pattern recall with fake success
-  clients so the portrait gate is the only blocker (harvested DB facts differ:
-  1 missing_content_portrait + 3 content_pattern_recall_required).
-- syn_pool / syn_review: synthetic corpora (authored with full portrait fields)
-  exercise the ADD / KEEP paths the real baseline cannot (its portrait is empty).
+- real_id45: the harvested production baseline (demand_content.id=45). M3 受控变化:
+  画像门槛(missing_content_portrait / pattern_recall_required 等)整体退役,改由
+  Gemini 相关性(max60)+ 平台热度(max40)打分,≥70 进池 / 60-69 复看 / <60 拒。
+  默认 FakeGeminiVideoClient 给 relevance_score=0.85(→relevance 60),热度按各 item
+  digg_count 对数归一化,real_id45 因此落 2 进池 + 2 复看(原全 KEEP)。
+- syn_pool / syn_review: synthetic corpora (authored with high/low engagement)
+  exercise the ADD / KEEP paths via the same relevance + platform-heat scoring.
 
 Snapshots lock the deterministic replay output; regenerate with UPDATE_SNAPSHOTS=1.
 """
@@ -74,14 +74,28 @@ def _synthetic_item(content_id: str, *, age_level: str, digg: int) -> dict[str,
     }
 
 
-def test_replay_id45_baseline_portrait_review(tmp_path):
+def test_replay_id45_baseline_gemini_score(tmp_path):
+    # M3 受控变化: 画像门槛退役,改 Gemini 相关性 + 平台热度打分。
+    # 默认 FakeGeminiVideoClient 返回 fit_senior_50plus=true / relevance_score=0.85
+    # → relevance=60(满分)。平台热度按各 item digg_count 对数归一化:
+    # real_id45 digg=[72459,20801,491098,24]。digg 491098 → heat≥0.8 → 40 → 总分100,
+    # digg 72459 → heat≈0.4 → 20 → 总分80(两条 ≥70 进池);digg 20801、24 热度过低
+    # → 0 → 总分60(两条落 60-69 复看)。无拒、无 pending。
     artifacts = replay_case("real_id45", runtime_root=tmp_path / "rt")
     assert artifacts.state["status"] == "success"
-    # M3C 受控变化: portrait-missing content goes to review instead of reject.
-    assert artifacts.summary["review_content_count"] == 4
+    assert artifacts.summary["pooled_content_count"] == 2
+    assert artifacts.summary["review_content_count"] == 2
     assert artifacts.summary["rejected_content_count"] == 0
-    assert artifacts.summary["pooled_content_count"] == 0
-    assert _decision_counts(artifacts) == {"KEEP_CONTENT_FOR_REVIEW": 4}
+    assert artifacts.summary["pending_content_count"] == 0
+    assert _decision_counts(artifacts) == {
+        "ADD_TO_CONTENT_POOL": 2,
+        "KEEP_CONTENT_FOR_REVIEW": 2,
+    }
+    # 全部命中相关性+热度打分门(旧画像 reason_code 已退役)。
+    assert {d.get("decision_reason_code") for d in artifacts.decisions} == {
+        "content_score_pool",
+        "content_score_review",
+    }
     assert_matches("real_id45/decision_summary", artifacts.summary, subset_keys=_SUMMARY_KEYS)
 
 
@@ -105,23 +119,30 @@ def test_replay_synthetic_review_case(tmp_path):
 
 
 def test_replay_id45_walk_obeys_decisions_after_m4(tmp_path):
-    # M4 受控变化: KEEP/pending 的 query 不翻页、tag 不扩、作者只走低预算,动作全部带归属包与执行事实。
+    # M3 受控变化: 画像门槛退役后 real_id45 拿到 2 进池 + 2 复看(原全 KEEP)。
+    # 进池内容驱动正常预算扩散——query 翻页、tag 扩词、作者抓作品均 success/normal;
+    # 仅 2 条复看内容触发 budget_downgrade(low_budget)。动作仍全部带归属包与执行事实。
     artifacts = replay_case("real_id45", runtime_root=tmp_path / "rt")
     walk_actions = artifacts.files["walk_actions.jsonl"]
 
-    assert not [row for row in walk_actions if row["edge_id"] == "query_next_page"]
+    next_page = [row for row in walk_actions if row["edge_id"] == "query_next_page"]
+    assert next_page
+    assert all(row["walk_status"] == "success" for row in next_page)
 
     tag_actions = [row for row in walk_actions if row["edge_id"] == "hashtag_to_query"]
     assert tag_actions
-    assert all(row["walk_status"] == "skipped" for row in tag_actions)
-    assert all(row["reason_code"] == "review_tag_expansion_disabled" for row in tag_actions)
+    assert all(row["walk_status"] == "success" for row in tag_actions)
+    assert all(row["budget_tier"] == "normal" for row in tag_actions)
 
     author_actions = [row for row in walk_actions if row["edge_id"] == "author_to_works"]
     assert author_actions
-    assert all(row["budget_tier"] == "low_budget" for row in author_actions)
+    assert all(row["walk_status"] == "success" for row in author_actions)
+    assert all(row["budget_tier"] == "normal" for row in author_actions)
 
     downgrades = [row for row in walk_actions if row["edge_id"] == "budget_downgrade"]
-    assert len(downgrades) == 4
+    assert len(downgrades) == 2
+    assert all(row["budget_tier"] == "low_budget" for row in downgrades)
+    assert all(row["reason_code"] == "content_score_review" for row in downgrades)
     assert all(row["rule_pack_id"] == "douyin_budget_observe_rule_pack_v1" for row in downgrades)
 
     for row in walk_actions:

+ 28 - 38
tests/test_config_case_matrix.py

@@ -21,19 +21,23 @@ from tests.snapshot import assert_matches
 ROOT = Path(__file__).resolve().parents[1]
 _RULE_PACK_REL = "product_documents/规则包/douyin_rule_packs.v1.json"
 _WALK_REL = "product_documents/抖音游走策略/douyin_walk_strategy.v1.json"
-_PORTRAIT_GATES = {"missing_content_portrait", "age_50_plus_weak"}
 
 
-def _relaxed_portrait_store(root: Path) -> JsonPolicyBundleStore:
-    """A config variant root with the portrait hard gates removed."""
+def _senior_block_store(root: Path) -> JsonPolicyBundleStore:
+    """M3 config variant: flip the not_fit_senior gate to fire on fit_senior_50plus == true.
+
+    The captured case's mock Gemini judgment marks every item fit (fit_senior_50plus=true),
+    so inverting the gate's expected value blocks the whole batch by config alone — a clean
+    counterproof that the hard gate (and the downstream walk) is config-driven, not hardcoded.
+    """
     (root / _RULE_PACK_REL).parent.mkdir(parents=True, exist_ok=True)
     (root / _WALK_REL).parent.mkdir(parents=True, exist_ok=True)
     shutil.copy(ROOT / _WALK_REL, root / _WALK_REL)
     package = json.loads((ROOT / _RULE_PACK_REL).read_text(encoding="utf-8"))
     for pack in package.get("rule_packs", []):
-        pack["hard_gates"] = [
-            gate for gate in pack.get("hard_gates", []) if gate.get("gate_id") not in _PORTRAIT_GATES
-        ]
+        for gate in pack.get("hard_gates", []):
+            if gate.get("gate_id") == "not_fit_senior":
+                gate["when"]["value"] = True
     (root / _RULE_PACK_REL).write_text(json.dumps(package, ensure_ascii=False, indent=2), encoding="utf-8")
     return JsonPolicyBundleStore(root)
 
@@ -47,32 +51,15 @@ def _outcome(artifacts) -> dict:
     }
 
 
-def _portrait_reject_store(root: Path) -> JsonPolicyBundleStore:
-    """M3C counterproof variant: flip missing_content_portrait back to REJECT by config only."""
-    (root / _RULE_PACK_REL).parent.mkdir(parents=True, exist_ok=True)
-    (root / _WALK_REL).parent.mkdir(parents=True, exist_ok=True)
-    shutil.copy(ROOT / _WALK_REL, root / _WALK_REL)
-    package = json.loads((ROOT / _RULE_PACK_REL).read_text(encoding="utf-8"))
-    for pack in package.get("rule_packs", []):
-        for gate in pack.get("hard_gates", []):
-            if gate.get("gate_id") == "missing_content_portrait":
-                gate["decision_action"] = "REJECT_CONTENT"
-                gate["severity"] = "fatal"
-    (root / _RULE_PACK_REL).write_text(json.dumps(package, ensure_ascii=False, indent=2), encoding="utf-8")
-    return JsonPolicyBundleStore(root)
-
-
 def _variant_overrides(variant: str, cfg_dir: Path):
     if variant == "default":
         return None
-    if variant == "relaxed_portrait":
-        return {"policy_store": _relaxed_portrait_store(cfg_dir)}
-    if variant == "portrait_reject":
-        return {"policy_store": _portrait_reject_store(cfg_dir)}
+    if variant == "senior_block":
+        return {"policy_store": _senior_block_store(cfg_dir)}
     raise ValueError(variant)
 
 
-@pytest.mark.parametrize("variant", ["default", "relaxed_portrait", "portrait_reject"])
+@pytest.mark.parametrize("variant", ["default", "senior_block"])
 def test_matrix_real_id45(variant, tmp_path):
     overrides = _variant_overrides(variant, tmp_path / "cfg")
     artifacts = replay_case("real_id45", runtime_root=tmp_path / "rt", config_overrides=overrides)
@@ -80,23 +67,26 @@ def test_matrix_real_id45(variant, tmp_path):
     assert_matches(f"matrix/real_id45__{variant}", _outcome(artifacts))
 
 
-def test_relaxed_portrait_changes_outcome(tmp_path):
+def test_senior_block_changes_outcome(tmp_path):
     base = _outcome(replay_case("real_id45", runtime_root=tmp_path / "rt0"))
-    relaxed = _outcome(
+    blocked = _outcome(
         replay_case(
             "real_id45",
             runtime_root=tmp_path / "rt1",
-            config_overrides={"policy_store": _relaxed_portrait_store(tmp_path / "cfg")},
+            config_overrides={"policy_store": _senior_block_store(tmp_path / "cfg")},
         )
     )
-    # Decoupling proof: one config edit visibly moves the captured case's outcome.
-    assert base != relaxed
-    assert "missing_content_portrait" in base["reasons"]
-    assert "missing_content_portrait" not in relaxed["reasons"]
-    # M3C 受控变化: default config now parks portrait-missing content as pending.
-    assert base["effect_status_counts"]["pending"] == 4
+    # Decoupling proof: one config edit on the not_fit_senior gate visibly moves the outcome.
+    assert base != blocked
+    # Default: no item is blocked by the senior-fit gate; items flow into pool / review.
+    assert "content_not_fit_senior" not in base["reasons"]
     assert base["effect_status_counts"]["rule_blocked"] == 0
-    assert relaxed["effect_status_counts"]["failed"] == 4
+    assert base["pooled"] == 2
+    # Blocked variant: every item trips the (config-inverted) hard gate -> rule_blocked reject.
+    assert blocked["reasons"] == ["content_not_fit_senior"] * 4
+    assert blocked["effect_status_counts"]["rule_blocked"] == 4
+    assert blocked["pooled"] == 0
+    assert blocked["rejected"] == 4
 
 
 def test_matrix_query_profile_variant():
@@ -113,12 +103,12 @@ def test_decoupling_counterproof():
     assert 'target_entity") == "Content"' not in source
 
 
-def test_portrait_reject_blocks_all_walk_expansion(tmp_path):
+def test_senior_block_blocks_all_walk_expansion(tmp_path):
     # M4 受控变化: 全拦截(rule_blocked)时翻页/作者/tag 全停,path_stop 归属 Path 包但执行包是 Content。
     artifacts = replay_case(
         "real_id45",
         runtime_root=tmp_path / "rt",
-        config_overrides={"policy_store": _portrait_reject_store(tmp_path / "cfg")},
+        config_overrides={"policy_store": _senior_block_store(tmp_path / "cfg")},
     )
     walk_actions = artifacts.files["walk_actions.jsonl"]
 

+ 10 - 3
tests/test_dual_channel_gemini_replay.py

@@ -16,6 +16,11 @@ def _items(artifacts):
 
 
 def test_replay_writes_gemini_fields_to_pattern_match_result(tmp_path):
+    # M3 受控变化: pattern_match_result 落 Gemini 4 字段(fit_senior_50plus /
+    # fit_confidence / relevance_score / reason)+ judge_status,旧 M2→M3 桥接键
+    # (pattern_recall / category_or_element_binding)随画像门槛退役而移除。
+    # pool stub(relevance 0.85 → relevance 60)在视频号 sph_caihong(digg=[92,282,469,
+    # 1153,1272],锚 50/5e4)上:digg 92 热度过低落 60-69 复看,其余 4 条进池。
     artifacts = replay_case(
         "sph_caihong",
         runtime_root=tmp_path / "rt",
@@ -29,12 +34,14 @@ def test_replay_writes_gemini_fields_to_pattern_match_result(tmp_path):
         assert pmr["fit_senior_50plus"] is True
         assert pmr["relevance_score"] == 0.85
         assert set(pmr) >= {"fit_senior_50plus", "fit_confidence", "relevance_score", "reason"}
-        # M2→M3 桥接键
-        assert pmr["pattern_recall"] == "matched"
-        assert pmr["category_or_element_binding"] == "matched"
+        assert pmr["judge_status"] == "ok"
         # 画像列镜像
         assert item["content_audience_profile"]["fit_senior_50plus"] is True
 
+    assert artifacts.summary["pooled_content_count"] == 4
+    assert artifacts.summary["review_content_count"] == 1
+    assert artifacts.summary["rejected_content_count"] == 0
+
 
 def test_replay_real_id45_validation_pass_with_bridge(tmp_path):
     artifacts = replay_case(

+ 14 - 8
tests/test_p7_lineage_validation.py

@@ -1,20 +1,26 @@
 import json
 
 from content_agent.run_service import RunService
-from content_agent.schemas import RunStartRequest
-from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
+from tests.p1_helpers import FakeQueryVariantClient
+from tests.replay_harness import replay_case
 
 
 def _start_mock_run(tmp_path):
+    # M3: the mock platform's digg counts fall below the douyin heat floor, so the
+    # plain mock source yields only review decisions (relevance caps at 60) and zero
+    # pooled content_assets — leaving nothing for the decision_to_asset checks to
+    # bite on. Drive a real pooled run via the real_id45 replay corpus (2 pooled +
+    # 2 review under the M3 relevance+heat scoring), then re-attach a RunService to
+    # the same runtime root to validate and tamper the produced lineage.
+    runtime_root = tmp_path / "rt"
+    artifacts = replay_case("real_id45", runtime_root=runtime_root)
+    assert artifacts.state["status"] == "success"
+    assert artifacts.files["final_output.json"]["content_assets"]
     service = RunService(
-        runtime_root=tmp_path / "runtime" / "v1",
+        runtime_root=runtime_root,
         query_variant_client=FakeQueryVariantClient(),
     )
-    state = service.start_run(
-        RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
-    )
-    assert state["status"] == "success"
-    return service, state["run_id"]
+    return service, artifacts.run_id
 
 
 def test_content_asset_requires_decision_to_asset_path(tmp_path):

+ 20 - 2
tests/test_p7_publish_jobs.py

@@ -1,7 +1,12 @@
+import json
+
 from content_agent.integrations.runtime_files import LocalRuntimeFileStore, RUNTIME_FILENAMES
 from content_agent.run_service import RunService
 from content_agent.schemas import RunStartRequest
-from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
+from tests.gemini_helpers import FakeGeminiVideoClient
+from tests.p1_helpers import FakeQueryVariantClient
+from tests.replay_clients import CorpusPlatformClient
+from tests.replay_harness import load_corpus
 
 
 class CapturingRuntimeStore(LocalRuntimeFileStore):
@@ -16,14 +21,27 @@ class CapturingRuntimeStore(LocalRuntimeFileStore):
 
 
 def test_pooled_content_generates_db_only_publish_jobs(tmp_path):
+    # M3: the mock platform's digg counts sit below the douyin heat floor, so a plain
+    # mock source pools nothing (relevance caps at 60 → review). Feed the real_id45
+    # replay corpus through the same pipeline (2 items pool under M3 relevance+heat
+    # scoring) so there are pooled content_assets to emit publish jobs for.
+    corpus = load_corpus("real_id45")
+    source_path = tmp_path / "source_context.json"
+    source_path.write_text(
+        json.dumps(corpus["source_context.json"], ensure_ascii=False), encoding="utf-8"
+    )
+    discovered = corpus.get("discovered_content_items.jsonl", [])
+
     runtime = CapturingRuntimeStore(tmp_path / "runtime" / "v1")
     service = RunService(
         runtime=runtime,
         query_variant_client=FakeQueryVariantClient(),
+        gemini_video_client=FakeGeminiVideoClient(),
     )
+    service._platform_client = lambda platform, platform_mode: CorpusPlatformClient(discovered)
 
     state = service.start_run(
-        RunStartRequest(platform_mode="mock", source=str(REAL_SOURCE_FIXTURE))
+        RunStartRequest(platform_mode="mock", source=str(source_path))
     )
 
     assert state["status"] == "success"

+ 38 - 0
tests/test_platform_heat.py

@@ -0,0 +1,38 @@
+"""V3-M3A: platform heat log-normalization unit tests."""
+
+from __future__ import annotations
+
+from content_agent.business_modules.content_discovery.platform_heat import heat_score
+
+
+def test_heat_floor_maps_to_zero():
+    assert heat_score(10000, "douyin") == 0.0
+    assert heat_score(3, "douyin") == 0.0
+
+
+def test_heat_ceil_maps_to_one():
+    assert heat_score(1000000, "douyin") == 1.0
+    assert heat_score(5034215, "douyin") == 1.0
+
+
+def test_heat_midpoint_between_zero_and_one():
+    # geometric midpoint of (1e4, 1e6) is 1e5 -> ~0.5
+    assert abs(heat_score(100000, "douyin") - 0.5) < 0.01
+
+
+def test_heat_missing_or_zero_digg_is_zero():
+    assert heat_score(None, "douyin") == 0.0
+    assert heat_score(0, "shipinhao") == 0.0
+    assert heat_score("not-a-number", "douyin") == 0.0
+
+
+def test_heat_unknown_platform_uses_default_anchor():
+    # default anchors (100, 1e5): below floor -> 0, at/above ceil -> 1
+    assert heat_score(50, "bilibili") == 0.0
+    assert heat_score(100000, "bilibili") == 1.0
+
+
+def test_heat_shipinhao_low_digg_not_unfairly_zero():
+    # 92 likes is meaningful on shipinhao (anchors 50..5e4) though tiny on douyin.
+    assert heat_score(92, "shipinhao") > 0.05
+    assert heat_score(92, "douyin") == 0.0

+ 4 - 7
tests/test_policy_replay_data.py

@@ -21,13 +21,10 @@ def test_rule_decisions_and_policy_run_record_include_replay_metadata(tmp_path):
     assert replay["policy_bundle_hash"] == state["policy_bundle"]["policy_bundle_hash"]
     assert replay["dispatch_id"] == "dispatch_content"
     assert replay["rule_pack_id"] == "douyin_content_discovery_rule_pack_v1"
-    assert replay["matched_threshold"] == "70<=score<=100"
-    assert replay["effect_mapping_id"] == "map_add_to_pool_success"
-    assert replay["matched_scoring_rules"] == [
-        "score_content_profile_medium",
-        "score_interaction_high",
-        "score_freshness_available",
-    ]
+    # M3: mock judgment scores relevance 60 + zero platform_heat = 60 → review band.
+    assert replay["matched_threshold"] == "60<=score<=69"
+    assert replay["effect_mapping_id"] == "map_keep_for_review_pending"
+    assert replay["matched_scoring_rules"] == ["score_relevance_high"]
 
     policy_run = runtime.policy_runs[0]
     assert policy_run["policy_bundle_hash"] == state["policy_bundle"]["policy_bundle_hash"]

+ 7 - 6
tests/test_query_effect_aggregation.py

@@ -21,12 +21,13 @@ def test_search_clues_aggregate_query_effect_status_from_decisions(tmp_path):
         for clue in service.read_jsonl(state["run_id"], "search_clues.jsonl")
     }
 
-    assert clues["q_001"]["search_query_effect_status"] == "success"
-    # M3C 受控变化: the portrait-missing mock content is pending (was rule_blocked).
-    assert clues["q_001"]["effect_status_counts"] == {"success": 1, "pending": 1}
-    assert clues["q_001"]["query_aggregation_id"] == "agg_query_success"
-    assert clues["q_001"]["raw_payload"]["query_aggregation_id"] == "agg_query_success"
-    assert clues["q_001"]["walk_next_step"] == "keep_or_consider_next_page"
+    # M3 受控变化: mock content scores relevance 60 + zero platform_heat = 60, so both
+    # of q_001's contents land in the review band → query aggregates to pending.
+    assert clues["q_001"]["search_query_effect_status"] == "pending"
+    assert clues["q_001"]["effect_status_counts"] == {"pending": 2}
+    assert clues["q_001"]["query_aggregation_id"] == "agg_query_pending"
+    assert clues["q_001"]["raw_payload"]["query_aggregation_id"] == "agg_query_pending"
+    assert clues["q_001"]["walk_next_step"] == "review_later_or_small_budget"
     assert clues["q_002"]["search_query_effect_status"] == "pending"
     assert clues["q_002"]["effect_status_counts"] == {"pending": 1}
     assert clues["q_002"]["query_aggregation_id"] == "agg_query_pending"

+ 56 - 15
tests/test_replay_gemini_seam.py

@@ -1,36 +1,77 @@
-"""V3-M0C: gemini_video_client injection seam on the replay harness.
+"""V3-M3: gemini_video_client drives the judgment seam end-to-end.
 
-The seam is additive: in M0 the real pipeline does not consume
-`_gemini_video_client`, so passing a fake must not change replay results.
-M2 wires recall_pattern to the client and upgrades these assertions.
+M3 受控变化: 画像门槛退役,Gemini 4 字段(fit_senior_50plus / fit_confidence /
+relevance_score / judge_status)经 recall_pattern 落库,再走相关性(max60)+
+平台热度(max40)打分。这里锁三结局:default(pool stub, relevance 0.85)→ 进池为主,
+review stub(relevance 0.45)→ 按分降级,fail(judge_status=failed)→ 全部
+content_judge_failed 待复看。
 """
 
 from __future__ import annotations
 
-from tests.gemini_helpers import FakeGeminiVideoClient, fake_gemini_review
+from tests.gemini_helpers import (
+    FakeGeminiVideoClient,
+    fake_gemini_fail,
+    fake_gemini_review,
+)
 from tests.replay_harness import replay_case
 
 
-def test_replay_default_unchanged_baseline(tmp_path):
+def test_replay_default_pool_stub_scores_into_pool(tmp_path):
+    # 默认 FakeGeminiVideoClient = pool stub(relevance 0.85 → relevance 60)。
+    # real_id45 digg=[72459,20801,491098,24]:高热两条进池、低热两条 60-69 复看。
     artifacts = replay_case("real_id45", runtime_root=tmp_path / "rt")
     assert artifacts.state["status"] == "success"
-    assert artifacts.summary["review_content_count"] == 4
-    assert artifacts.summary["pooled_content_count"] == 0
+    assert artifacts.summary["pooled_content_count"] == 2
+    assert artifacts.summary["review_content_count"] == 2
+    assert artifacts.summary["rejected_content_count"] == 0
+    assert [d["decision_reason_code"] for d in artifacts.decisions] == [
+        "content_score_pool",
+        "content_score_pool",
+        "content_score_review",
+        "content_score_review",
+    ]
 
 
-def test_replay_accepts_gemini_client_without_breaking(tmp_path):
-    baseline = replay_case("real_id45", runtime_root=tmp_path / "rt_base")
-    seamed = replay_case(
+def test_replay_review_stub_scores_by_relevance(tmp_path):
+    # review stub: relevance_score 0.45 → relevance 维只拿 25。叠加各 item 热度后
+    # 仅最高热的一条进 60-69 复看带,其余 <60 被拒(无 pending、无进池)。
+    artifacts = replay_case(
         "real_id45",
-        runtime_root=tmp_path / "rt_seam",
+        runtime_root=tmp_path / "rt",
         gemini_video_client=FakeGeminiVideoClient(default_result=fake_gemini_review()),
     )
-    assert seamed.summary == baseline.summary
-    assert [d["decision_action"] for d in seamed.decisions] == [
-        d["decision_action"] for d in baseline.decisions
+    assert artifacts.state["status"] == "success"
+    assert artifacts.summary["pooled_content_count"] == 0
+    assert artifacts.summary["review_content_count"] == 1
+    assert artifacts.summary["rejected_content_count"] == 3
+    assert sorted(d["decision_reason_code"] for d in artifacts.decisions) == [
+        "content_score_reject",
+        "content_score_reject",
+        "content_score_reject",
+        "content_score_review",
     ]
 
 
+def test_replay_fail_stub_routes_to_judge_failed_review(tmp_path):
+    # judge_status=failed → 硬门槛 content_judge_failed,全部 KEEP_CONTENT_FOR_REVIEW
+    # 待复看(不进池、不拒)。
+    artifacts = replay_case(
+        "real_id45",
+        runtime_root=tmp_path / "rt",
+        gemini_video_client=FakeGeminiVideoClient(default_result=fake_gemini_fail()),
+    )
+    assert artifacts.state["status"] == "success"
+    assert artifacts.summary["review_content_count"] == 4
+    assert artifacts.summary["pooled_content_count"] == 0
+    assert artifacts.summary["rejected_content_count"] == 0
+    assert all(
+        d["decision_action"] == "KEEP_CONTENT_FOR_REVIEW"
+        and d["decision_reason_code"] == "content_judge_failed"
+        for d in artifacts.decisions
+    )
+
+
 def test_replay_sph_caihong_with_gemini_seam(tmp_path):
     artifacts = replay_case(
         "sph_caihong",

+ 10 - 9
tests/test_rule_decision_effect_status.py

@@ -11,23 +11,23 @@ from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
 def test_effect_status_mapping_for_success_pending_failed_and_rule_blocked(tmp_path):
     state = _state(tmp_path)
 
+    # M3: success = relevance 60 + platform_heat 40 (heat >= 0.8) = 100 (>= 70 pool).
+    success_bundle = deepcopy(state["evidence_bundles"][0])
+    success_bundle["content_engagement_metrics"]["platform_heat"] = 0.9
     success = decide(
         state["run_id"],
         state["policy_run_id"],
         1,
-        state["evidence_bundles"][0],
+        success_bundle,
         state["policy_bundle"],
     )
     assert success["decision_action"] == "ADD_TO_CONTENT_POOL"
     assert success["search_query_effect_status"] == "success"
     assert success["decision_replay_data"]["effect_mapping_id"] == "map_add_to_pool_success"
 
+    # M3: pending = relevance 60 + zero platform_heat = 60 (60-69 review band).
     pending_bundle = deepcopy(state["evidence_bundles"][0])
-    pending_bundle["content_engagement_metrics"]["statistics"] = {
-        "digg_count": 3000,
-        "comment_count": 20,
-        "share_count": 70,
-    }
+    pending_bundle["content_engagement_metrics"]["platform_heat"] = 0.0
     pending = decide(
         state["run_id"],
         state["policy_run_id"],
@@ -39,10 +39,11 @@ def test_effect_status_mapping_for_success_pending_failed_and_rule_blocked(tmp_p
     assert pending["search_query_effect_status"] == "pending"
     assert pending["decision_replay_data"]["effect_mapping_id"] == "map_keep_for_review_pending"
 
+    # M3: failed = no scoring rule matches either active dimension (relevance and
+    # platform_heat both below their lowest gte band) → missing_score.
     failed_bundle = deepcopy(state["evidence_bundles"][0])
-    failed_bundle["content_audience_profile"]["age_50_plus_level"] = "unknown"
-    failed_bundle["content_engagement_metrics"]["statistics"] = {}
-    failed_bundle["content_risk_check"]["availability"] = "metadata_only"
+    failed_bundle["pattern_match_result"]["relevance_score"] = 0.0
+    failed_bundle["content_engagement_metrics"]["platform_heat"] = 0.0
     failed = decide(
         state["run_id"],
         state["policy_run_id"],

+ 33 - 16
tests/test_rule_judgment_hard_gates.py

@@ -46,28 +46,35 @@ def test_unknown_hard_gate_operator_fails_fast(tmp_path):
         )
 
 
-def test_pattern_recall_pending_cannot_pass_hard_gate(tmp_path):
+def test_not_fit_senior_is_a_blocking_hard_gate(tmp_path):
+    # M3: pattern_recall gate retired. The senior-fit judgment is now the blocking gate.
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["pattern_match_result"]["pattern_recall"] = "pattern_recall_pending"
+    bundle["pattern_match_result"]["fit_senior_50plus"] = False
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
 
-    assert decision["decision_reason_code"] == "content_pattern_recall_required"
+    assert decision["decision_action"] == "REJECT_CONTENT"
+    assert decision["decision_reason_code"] == "content_not_fit_senior"
     assert decision["search_query_effect_status"] == "rule_blocked"
+    assert decision["triggered_blocking_rules"] == ["not_fit_senior"]
+    # Retired pattern-recall reason code must no longer surface.
+    assert decision["decision_reason_code"] != "content_pattern_recall_required"
 
 
 def test_hard_gate_action_is_taken_from_rule_config(tmp_path):
+    # The judge_failed gate defaults to KEEP_CONTENT_FOR_REVIEW; flipping its config to
+    # REJECT_CONTENT must change the decision purely via config, with no code special-case.
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_audience_profile"] = {}
+    bundle["pattern_match_result"]["judge_status"] = "failed"
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
     assert decision["decision_action"] == "KEEP_CONTENT_FOR_REVIEW"
 
     flipped = deepcopy(state["policy_bundle"])
     for gate in flipped["rule_pack"]["hard_gates"]:
-        if gate["gate_id"] == "missing_content_portrait":
+        if gate["gate_id"] == "judge_failed":
             gate["decision_action"] = "REJECT_CONTENT"
             gate["severity"] = "fatal"
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, flipped)
@@ -75,42 +82,52 @@ def test_hard_gate_action_is_taken_from_rule_config(tmp_path):
     assert decision["search_query_effect_status"] == "rule_blocked"
 
 
-def test_missing_portrait_review_is_config_driven_not_code_special_case(tmp_path):
+def test_judge_failed_review_is_config_driven_not_code_special_case(tmp_path):
+    # M3: the review/pending hard gate is now judge_failed (Gemini technical failure parks
+    # content for human review). This is config-driven, not a code special-case.
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_audience_profile"] = {}
+    bundle["pattern_match_result"]["judge_status"] = "failed"
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
 
     assert decision["decision_action"] == "KEEP_CONTENT_FOR_REVIEW"
-    assert decision["decision_reason_code"] == "missing_content_portrait"
+    assert decision["decision_reason_code"] == "content_judge_failed"
     assert decision["search_query_effect_status"] == "pending"
-    assert decision["triggered_blocking_rules"] == ["missing_content_portrait"]
+    assert decision["triggered_blocking_rules"] == ["judge_failed"]
     assert decision["score"] is None
     assert decision["decision_replay_data"]["effect_mapping_id"] == "map_keep_for_review_pending_hard_gate"
 
 
-def test_age_50_plus_weak_still_rejects_weak_only(tmp_path):
+def test_low_confidence_below_threshold_rejects(tmp_path):
+    # M3: age_50_plus_weak gate retired. Low Gemini confidence is now the blocking gate
+    # (fit_confidence lt 0.6 -> REJECT_CONTENT / content_low_confidence / rule_blocked).
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_audience_profile"]["age_50_plus_level"] = "weak"
+    bundle["pattern_match_result"]["fit_confidence"] = 0.4
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
 
     assert decision["decision_action"] == "REJECT_CONTENT"
-    assert decision["decision_reason_code"] == "age_50_plus_weak"
+    assert decision["decision_reason_code"] == "content_low_confidence"
     assert decision["search_query_effect_status"] == "rule_blocked"
+    assert decision["triggered_blocking_rules"] == ["low_confidence"]
+    # Retired age gate / reason code must no longer surface.
+    assert decision["decision_reason_code"] != "age_50_plus_weak"
 
 
-def test_missing_value_no_longer_hits_age_50_plus_weak_gate(tmp_path):
+def test_low_confidence_lt_boundary_passes_at_threshold(tmp_path):
+    # lt 0.6 boundary: exactly 0.6 is NOT low confidence, so the gate does not fire and
+    # the content falls through to scoring instead of being rule-blocked.
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_audience_profile"]["age_50_plus_level"] = "missing"
+    bundle["pattern_match_result"]["fit_confidence"] = 0.6
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
 
-    assert "age_50_plus_weak" not in decision["triggered_blocking_rules"]
-    assert decision["decision_reason_code"] != "age_50_plus_weak"
+    assert "low_confidence" not in decision["triggered_blocking_rules"]
+    assert decision["decision_reason_code"] != "content_low_confidence"
+    assert decision["search_query_effect_status"] != "rule_blocked"
 
 
 def _state(tmp_path):

+ 47 - 42
tests/test_rule_judgment_scorecard.py

@@ -12,20 +12,28 @@ from tests.p1_helpers import FakeQueryVariantClient, REAL_SOURCE_FIXTURE
 
 def test_scorecard_uses_active_dimensions_and_thresholds(tmp_path):
     state = _state(tmp_path)
+    bundle = deepcopy(state["evidence_bundles"][0])
+    # M3 2-dim scorecard: relevance gte0.8 -> 60, platform_heat gte0.4 -> 20 => 80 (pool).
+    bundle["pattern_match_result"]["relevance_score"] = 0.8
+    bundle["content_engagement_metrics"]["platform_heat"] = 0.4
+
     decision = decide(
         state["run_id"],
         state["policy_run_id"],
         1,
-        state["evidence_bundles"][0],
+        bundle,
         state["policy_bundle"],
     )
 
     assert decision["decision_action"] == "ADD_TO_CONTENT_POOL"
-    assert decision["score"] == 77
+    assert decision["score"] == 80
     dimensions = {row["key"]: row for row in decision["scorecard"]["dimensions"]}
-    assert dimensions["content_audience_profile"]["score"] == 35
-    assert dimensions["interaction_performance"]["score"] == 30
-    assert dimensions["freshness_available"]["score"] == 12
+    assert dimensions["relevance"]["score"] == 60
+    assert dimensions["platform_heat"]["score"] == 20
+    # Deprecated 3-dim/5-dim scorecard dimensions must not enter the runtime scorecard.
+    assert "content_audience_profile" not in dimensions
+    assert "interaction_performance" not in dimensions
+    assert "freshness_available" not in dimensions
     assert "douyin_tone" not in dimensions
 
 
@@ -47,9 +55,9 @@ def test_missing_scoring_rules_fail_fast(tmp_path):
 def test_no_scoring_evidence_uses_missing_score_policy(tmp_path):
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_audience_profile"]["age_50_plus_level"] = "unknown"
-    bundle["content_engagement_metrics"]["statistics"] = {}
-    bundle["content_risk_check"]["availability"] = "metadata_only"
+    # Drop evidence for both active dims (relevance + platform_heat) so no scoring rule matches.
+    bundle["pattern_match_result"].pop("relevance_score", None)
+    bundle["content_engagement_metrics"].pop("platform_heat", None)
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
 
@@ -90,7 +98,7 @@ def test_scoring_rule_unknown_operator_fails_fast(tmp_path):
     policy_bundle = deepcopy(state["policy_bundle"])
     scoring_rules = policy_bundle["rule_pack"]["scorecard"]["scoring_rules"]
     for rule in scoring_rules:
-        if rule["scoring_rule_id"] == "score_content_profile_medium":
+        if rule["scoring_rule_id"] == "score_relevance_high":
             rule["operator"] = "contains"
 
     with pytest.raises(ValueError, match="unsupported rule operator"):
@@ -106,15 +114,18 @@ def test_scoring_rule_unknown_operator_fails_fast(tmp_path):
 def test_single_missing_dimension_scores_zero_and_keeps_threshold_flow(tmp_path):
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_engagement_metrics"]["statistics"] = {}
+    # relevance evidence present (0.8 -> 60); platform_heat evidence absent -> scores 0, not missing_score.
+    bundle["pattern_match_result"]["relevance_score"] = 0.8
+    bundle["content_engagement_metrics"].pop("platform_heat", None)
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
 
     dimensions = {row["key"]: row for row in decision["scorecard"]["dimensions"]}
-    assert dimensions["interaction_performance"]["score_missing"] is True
-    assert dimensions["interaction_performance"]["score"] == 0
-    assert dimensions["content_audience_profile"]["score_missing"] is False
-    assert decision["score"] == 47
+    assert dimensions["platform_heat"]["score_missing"] is True
+    assert dimensions["platform_heat"]["score"] == 0
+    assert dimensions["relevance"]["score_missing"] is False
+    assert dimensions["relevance"]["score"] == 60
+    assert decision["score"] == 60
     assert decision["decision_reason_code"] != "missing_score"
     assert decision["scorecard"]["score_missing"] is False
 
@@ -122,9 +133,9 @@ def test_single_missing_dimension_scores_zero_and_keeps_threshold_flow(tmp_path)
 def test_all_dimensions_missing_uses_score_missing_policy(tmp_path):
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_audience_profile"]["age_50_plus_level"] = "unknown"
-    bundle["content_engagement_metrics"]["statistics"] = {}
-    bundle["content_risk_check"]["availability"] = "metadata_only"
+    # Both active dims (relevance + platform_heat) lack evidence -> score_missing policy.
+    bundle["pattern_match_result"].pop("relevance_score", None)
+    bundle["content_engagement_metrics"].pop("platform_heat", None)
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
 
@@ -138,14 +149,14 @@ def test_all_dimensions_missing_uses_score_missing_policy(tmp_path):
 def test_dimension_missing_metadata_is_recorded(tmp_path):
     state = _state(tmp_path)
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_engagement_metrics"]["statistics"] = {}
+    bundle["content_engagement_metrics"].pop("platform_heat", None)
 
     decision = decide(state["run_id"], state["policy_run_id"], 1, bundle, state["policy_bundle"])
-    assert decision["decision_replay_data"]["missing_dimensions"] == ["interaction_performance"]
+    assert decision["decision_replay_data"]["missing_dimensions"] == ["platform_heat"]
 
-    full_decision = decide(
-        state["run_id"], state["policy_run_id"], 2, state["evidence_bundles"][0], state["policy_bundle"]
-    )
+    full = deepcopy(state["evidence_bundles"][0])
+    full["content_engagement_metrics"]["platform_heat"] = 0.8
+    full_decision = decide(state["run_id"], state["policy_run_id"], 2, full, state["policy_bundle"])
     assert full_decision["decision_replay_data"]["missing_dimensions"] == []
 
 
@@ -160,40 +171,34 @@ def _state(tmp_path):
 
 
 def _policy_with_total_score(policy_bundle, total_score):
+    """Build an exact total score from the two M3 active dims (relevance max60, platform_heat max40).
+
+    Replaces every scoring rule with one always-matching rule per active dimension whose
+    score_value sums to ``total_score`` (relevance carries up to 60, heat the remainder).
+    """
     policy_bundle = deepcopy(policy_bundle)
     scorecard = policy_bundle["rule_pack"]["scorecard"]
-    content_score = min(total_score, 50)
-    remaining = total_score - content_score
-    interaction_score = min(remaining, 30)
-    freshness_score = remaining - interaction_score
+    relevance_score = min(total_score, 60)
+    heat_score = total_score - relevance_score
+    assert heat_score <= 40, "total_score exceeds combined active-dimension caps"
     scorecard["scoring_rules"] = [
         {
-            "scoring_rule_id": "test_content_score",
-            "dimension_key": "content_audience_profile",
-            "field_path": "content.decision_target_type",
-            "operator": "eq",
-            "expected_value": "content",
-            "score_value": content_score,
-            "priority": 1,
-            "enabled": True,
-        },
-        {
-            "scoring_rule_id": "test_interaction_zero",
-            "dimension_key": "interaction_performance",
+            "scoring_rule_id": "test_relevance_score",
+            "dimension_key": "relevance",
             "field_path": "content.decision_target_type",
             "operator": "eq",
             "expected_value": "content",
-            "score_value": interaction_score,
+            "score_value": relevance_score,
             "priority": 1,
             "enabled": True,
         },
         {
-            "scoring_rule_id": "test_freshness_zero",
-            "dimension_key": "freshness_available",
+            "scoring_rule_id": "test_heat_score",
+            "dimension_key": "platform_heat",
             "field_path": "content.decision_target_type",
             "operator": "eq",
             "expected_value": "content",
-            "score_value": freshness_score,
+            "score_value": heat_score,
             "priority": 1,
             "enabled": True,
         },

+ 11 - 5
tests/test_rule_pack_reading.py

@@ -24,7 +24,14 @@ def test_rule_pack_thresholds_drive_decision(tmp_path):
     thresholds[1]["min_score"] = 70
     thresholds[1]["max_score"] = 79
 
-    decision = decide(run_id, state["policy_run_id"], 1, state["evidence_bundles"][0], policy_bundle)
+    # M3 2-dim score: relevance 0.6 -> 45, platform_heat 0.6 -> 30 => 75, lands in the
+    # reconfigured 70<=score<=79 review band.
+    bundle = deepcopy(state["evidence_bundles"][0])
+    bundle["pattern_match_result"]["relevance_score"] = 0.6
+    bundle["content_engagement_metrics"]["platform_heat"] = 0.6
+
+    decision = decide(run_id, state["policy_run_id"], 1, bundle, policy_bundle)
+    assert decision["score"] == 75
     assert decision["decision_action"] == "KEEP_CONTENT_FOR_REVIEW"
     assert decision["decision_reason_code"] == "content_score_review"
     assert decision["decision_replay_data"]["matched_threshold"] == "70<=score<=79"
@@ -64,10 +71,9 @@ def test_missing_score_uses_rule_pack_missing_policy(tmp_path):
     run_id = state["run_id"]
 
     bundle = deepcopy(state["evidence_bundles"][0])
-    bundle["content_audience_profile"]["age_50_plus_level"] = "unknown"
-    bundle["content_engagement_metrics"]["statistics"] = {}
-    bundle["content_risk_check"]["availability"] = "metadata_only"
-    bundle["content"].pop("create_time_days_ago", None)
+    # No evidence for either active dim -> rule pack's score_missing_policy applies.
+    bundle["pattern_match_result"].pop("relevance_score", None)
+    bundle["content_engagement_metrics"].pop("platform_heat", None)
 
     decision = decide(run_id, state["policy_run_id"], 1, bundle, state["policy_bundle"])
     assert decision["decision_action"] == "REJECT_CONTENT"

+ 5 - 1
tests/test_source_evidence.py

@@ -50,7 +50,11 @@ def test_source_evidence_inherits_evidence_pack_without_rewriting_origin(tmp_pat
         not in source_evidence["matched_post_ids"]
     )
     assert source_evidence["discovery_relation"] == "mock_pattern_matched"
-    assert final_output["content_assets"][0]["source_evidence"]["source_path_record_ids"]
+    # M3: mock content scores 60 (relevance 60 + zero heat) → all three land in
+    # review, so the inherited source_evidence now surfaces on review_records rather
+    # than content_assets; the inheritance (carrying source_path_record_ids) is the
+    # property under test, not the pool/review band.
+    assert final_output["review_records"][0]["source_evidence"]["source_path_record_ids"]
     assert {
         record["decision_id"] for record in final_output["decision_records"]
     } == {"d_001", "d_002", "d_003"}

+ 6 - 5
tests/test_v1_graph.py

@@ -22,14 +22,15 @@ def test_v1_graph_generates_all_runtime_files(tmp_path):
 
     final_output = service.read_json(run_id, "final_output.json")
     assert final_output["policy_run_id"] == state["policy_run_id"]
-    assert final_output["summary"]["pooled_content_count"] == 1
-    # M3C 受控变化: the portrait-missing mock content moved to review (was rejected).
-    assert final_output["summary"]["review_content_count"] == 2
+    # M3 受控变化: 画像门槛退役,改 Gemini 相关性 + 平台热度打分。mock 链默认 Gemini
+    # 给 relevance 60,mock 平台内容热度均不足 → 三条都落 60-69 复看带(无进池/无拒)。
+    assert final_output["summary"]["pooled_content_count"] == 0
+    assert final_output["summary"]["review_content_count"] == 3
     assert final_output["summary"]["pending_content_count"] == 0
     assert final_output["summary"]["rejected_content_count"] == 0
     assert final_output["summary"]["effect_status_counts"] == {
-        "success": 1,
-        "pending": 2,
+        "success": 0,
+        "pending": 3,
         "failed": 0,
         "rule_blocked": 0,
     }

+ 3 - 0
tests/test_walk_engine_author.py

@@ -52,6 +52,9 @@ def test_author_edge_skips_rejected_content(tmp_path):
 
 def test_author_edge_allows_add_content_pool(tmp_path):
     context = build_initial_walk_context(tmp_path)
+    # M3: mock judgment scores 60 (review/low_budget); promote the seed decision to
+    # pool so the author edge runs at normal budget, which is what this test asserts.
+    _override_decisions(context, "ADD_TO_CONTENT_POOL", "success")
     client = FakeWalkPlatformClient()
 
     result = run_bounded_walk(platform_client=client, **context)

+ 10 - 0
tests/test_walk_engine_loop.py

@@ -2,8 +2,17 @@ from content_agent.business_modules.walk_engine import run_bounded_walk
 from tests.p6_walk_helpers import FakeWalkPlatformClient, build_initial_walk_context
 
 
+def _pool_decisions(context):
+    # M3: mock judgment scores 60 (review/pending); the query_next_page edge needs a
+    # pooled success query, so promote the seed decision to pool to exercise the loop.
+    for decision in context["rule_decisions"]:
+        decision["decision_action"] = "ADD_TO_CONTENT_POOL"
+        decision["search_query_effect_status"] = "success"
+
+
 def test_walk_engine_runs_bounded_edges_in_same_run_and_policy(tmp_path):
     context = build_initial_walk_context(tmp_path)
+    _pool_decisions(context)
     client = FakeWalkPlatformClient()
 
     result = run_bounded_walk(platform_client=client, **context)
@@ -18,6 +27,7 @@ def test_walk_engine_runs_bounded_edges_in_same_run_and_policy(tmp_path):
 
 def test_walk_engine_edge_failure_records_failed_action_not_failed_run(tmp_path):
     context = build_initial_walk_context(tmp_path)
+    _pool_decisions(context)
     client = FakeWalkPlatformClient(fail_next_page=True)
 
     result = run_bounded_walk(platform_client=client, **context)

+ 6 - 0
tests/test_walk_engine_pagination.py

@@ -4,6 +4,9 @@ from tests.p6_walk_helpers import FakeWalkPlatformClient, build_initial_walk_con
 
 def test_walk_engine_pagination_uses_explicit_cursor(tmp_path):
     context = build_initial_walk_context(tmp_path)
+    # M3: mock judgment scores 60 (review/pending); pagination requires a pooled
+    # success query, so promote the seed decision to pool to exercise paging.
+    _override_decisions(context, "ADD_TO_CONTENT_POOL", "success")
     client = FakeWalkPlatformClient()
 
     result = run_bounded_walk(platform_client=client, **context)
@@ -80,6 +83,9 @@ def test_missing_decision_does_not_page(tmp_path):
 
 def test_success_query_with_cursor_pages_once(tmp_path):
     context = build_initial_walk_context(tmp_path)
+    # M3: mock judgment now lands in review/pending; pagination requires a pooled
+    # success query, so promote the seed decision to pool.
+    _override_decisions(context, "ADD_TO_CONTENT_POOL", "success")
     client = FakeWalkPlatformClient()
 
     result = run_bounded_walk(platform_client=client, **context)

+ 3 - 0
tests/test_walk_engine_tag.py

@@ -4,6 +4,9 @@ from tests.p6_walk_helpers import FakeWalkPlatformClient, build_initial_walk_con
 
 def test_walk_engine_tag_query_is_created_only_inside_p6(tmp_path):
     context = build_initial_walk_context(tmp_path)
+    # M3: mock judgment scores 60 (review/pending), which does not expand tags by
+    # default; promote the seed decision to pool so the hashtag edge fires.
+    _override_decisions(context, "ADD_TO_CONTENT_POOL", "success")
     client = FakeWalkPlatformClient()
 
     result = run_bounded_walk(platform_client=client, **context)