Browse Source

feat(M11): 平台表现打分改'量级分 + 收缩后比例分'(配置驱动,三平台各一套)

抛弃旧'纯绝对值 log 归一'(被绝对量绑架,官方大号霸榜):
- 新算法 _performance_score_v2:量级分(absolute,log 归一,体量地板)+ 比例分(ratio,收缩后÷目标封顶)。
  收缩 (分子+C×prior)/(分母+C) 防小样本虚高;分母缺失/为0 → 记 missing、不计权重。profile 无 observable_performance 则回退旧 legacy。
- 三平台按真实字段可得性各一套(配置在 profile 的 observable_performance 块,参数=759 抖音/245 快手历史实测,prior=中位、target=p75、C=500):
  · 抖音:总互动量级 + 转/赞·藏/赞·评/赞(互动间比例)
  · 快手:播放量级 + 赞·藏·评 per 播放真互动率(share 恒0 不设转发项)
  · 视频号:仅点赞量级(其余字段不可得)
- 展示层 flow_ledger 出自然语言文案(转783/赞1532=51%·收缩后X%·目标27%)。

回测(759抖音/245快手历史):被埋高共鸣内容大幅上升、绝对量巨大但比例极低的大号内容合理下降,分数从满屏100/个位数收敛到健康中段。
re-baseline 16 个回放/快照测试到新真实结论(入池/淘汰/游走/指纹),未削弱任何断言;全量 535 passed 0 failed。

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Sam Lee 5 days ago
parent
commit
3bfba3ce68

+ 90 - 0
content_agent/business_modules/content_discovery/platform_observable_performance.py

@@ -13,6 +13,96 @@ DEFAULT_SIGNAL = {"weight": 1.0, "floor": 1.0, "ceil": 100000.0}
 
 def performance_score(statistics: dict[str, Any], platform: str) -> dict[str, Any]:
     profile = _load_profile(platform)
+    # M11:profile 带 observable_performance(量级+收缩比例)→ 走新打分;否则回退旧 heat.signals 绝对值。
+    op = profile.get("observable_performance")
+    if isinstance(op, dict) and isinstance(op.get("components"), list) and op.get("components"):
+        return _performance_score_v2(statistics, platform, op, profile)
+    return _performance_score_legacy(statistics, platform, profile)
+
+
+def _performance_score_v2(
+    statistics: dict[str, Any], platform: str, op: dict[str, Any], profile: dict[str, Any]
+) -> dict[str, Any]:
+    """M11 配置驱动:量级分(absolute,log 归一)+ 比例分(ratio,收缩后÷目标封顶)。
+
+    收缩 `(分子 + C×prior)/(分母 + C)`,防"小样本虚高比例"。分母缺失/为 0 → 记 missing、不计权重。
+    component 带 ratio 元数据(numerator/denominator/raw_ratio/shrunk_ratio/target)供展示层出文案。
+    """
+    components: list[dict[str, Any]] = []
+    missing: list[dict[str, Any]] = list(_natural_missing_fields(profile))
+    for comp in op.get("components", []):
+        if not isinstance(comp, dict):
+            continue
+        weight = float(comp.get("weight") or 0.0)
+        label = comp.get("label") or comp.get("field") or ""
+        if comp.get("type") == "ratio":
+            num_field = str(comp.get("numerator") or "")
+            den_field = str(comp.get("denominator") or "")
+            den = _number(statistics.get(den_field))
+            num = _number(statistics.get(num_field))
+            if den <= 0:
+                missing.append(
+                    {
+                        "field": comp.get("field") or num_field,
+                        "missing_type": "runtime_missing",
+                        "platform": platform,
+                        "reason": "denominator_zero_or_missing",
+                    }
+                )
+                continue
+            target = float(comp.get("target") or 0.0)
+            prior = float(comp.get("prior") or 0.0)
+            c = float(comp.get("c") or op.get("c") or 500.0)
+            raw = num / den
+            shrunk = (num + c * prior) / (den + c)
+            score = min(shrunk / target, 1.0) * 100 if target > 0 else 0.0
+            components.append(
+                {
+                    "field": comp.get("field") or f"{num_field}_over_{den_field}",
+                    "label": label,
+                    "type": "ratio",
+                    "weight": weight,
+                    "numerator_field": num_field,
+                    "denominator_field": den_field,
+                    "numerator_value": num,
+                    "denominator_value": den,
+                    "raw_ratio": round(raw, 4),
+                    "shrunk_ratio": round(shrunk, 4),
+                    "target": target,
+                    "normalized_score": round(score, 2),
+                }
+            )
+        else:
+            sum_fields = comp.get("sum_fields")
+            if isinstance(sum_fields, list) and sum_fields:
+                value = sum(_number(statistics.get(f)) for f in sum_fields)
+            else:
+                value = _number(statistics.get(comp.get("field")))
+            floor = float(comp.get("floor", DEFAULT_SIGNAL["floor"]))
+            ceil = float(comp.get("ceil", DEFAULT_SIGNAL["ceil"]))
+            score = _log_norm(value, floor, ceil) * 100
+            components.append(
+                {
+                    "field": comp.get("field") or "total_interaction",
+                    "label": label,
+                    "type": "absolute",
+                    "weight": weight,
+                    "value": value,
+                    "normalized_score": round(score, 2),
+                }
+            )
+    return {
+        "schema_version": SCHEMA_VERSION,
+        "platform": platform,
+        "platform_performance_score": _weighted_score(components),
+        "platform_performance_components": components,
+        "missing_observable_fields": missing,
+    }
+
+
+def _performance_score_legacy(
+    statistics: dict[str, Any], platform: str, profile: dict[str, Any]
+) -> dict[str, Any]:
     observable = _observable_fields(profile)
     natural_missing = _natural_missing_fields(profile)
     signals = _signals_by_field(profile)

+ 48 - 3
content_agent/flow_ledger_service.py

@@ -1535,11 +1535,19 @@ def _score_items_from_values(
                 weight=weights.get("fifty"), group="main", status=fifty.get("status"), allow_none=True,
             ))
     for component in components:
-        field = _text(component.get("field"))
+        if component.get("type"):
+            # M11:新打分 component 自带 label(中文)+ type(absolute/ratio)
+            label = _text(component.get("label")) or _platform_component_label(_text(component.get("field")))
+            detail = _platform_component_detail(component)
+        else:
+            # 旧 run 的 component(无 type):沿用绝对值展示
+            field = _text(component.get("field"))
+            label = _platform_component_label(field)
+            detail = _platform_component_raw_text(field, component.get("value"))
         items.append(_score_item(
-            _platform_component_label(field),
+            label,
             component.get("normalized_score"),
-            _platform_component_raw_text(field, component.get("value")),
+            detail,
             weight=_float_or_none(component.get("weight")),
             group="platform",
         ))
@@ -1629,6 +1637,43 @@ def _platform_component_raw_text(field: str, value: Any) -> str:
     return f"该视频原始 {number} {unit}" if unit else f"该视频原始值 {number}"
 
 
+def _metric_unit(field: str) -> str:
+    """指标字段 → 中文短词(用于 ratio detail,如 转发/赞/播放)。"""
+    return {
+        "digg_count": "赞",
+        "like_count": "赞",
+        "share_count": "转发",
+        "collect_count": "收藏",
+        "comment_count": "评论",
+        "play_count": "播放",
+    }.get(_text(field).removeprefix("statistics."), _text(field))
+
+
+def _ratio_pct(value: Any) -> str:
+    num = _float_or_none(value)
+    return f"{round(num * 100, 1):g}%" if num is not None else "—"
+
+
+def _platform_component_detail(component: dict[str, Any]) -> str:
+    """M11 平台分项的展示文案:ratio 出「转发 783 / 赞 1532 = 51% · 收缩后 X% · 目标 27%」;
+    absolute 出体量/原始量。"""
+    if _text(component.get("type")) == "ratio":
+        nu = _metric_unit(component.get("numerator_field"))
+        du = _metric_unit(component.get("denominator_field"))
+        return (
+            f"{nu} {_num_text(component.get('numerator_value'))} / {du} {_num_text(component.get('denominator_value'))}"
+            f" = {_ratio_pct(component.get('raw_ratio'))}"
+            f" · 收缩后 {_ratio_pct(component.get('shrunk_ratio'))}"
+            f" · 目标 {_ratio_pct(component.get('target'))}"
+        )
+    field = _text(component.get("field"))
+    value = component.get("value")
+    if field == "total_interaction":
+        return f"内容体量(赞+评+转+藏 共 {_num_text(value)})"
+    unit = _metric_unit(field)
+    return f"该视频原始 {_num_text(value)} {unit}" if unit else f"该视频原始值 {_num_text(value)}"
+
+
 def _percent_text(value: Any) -> str:
     number = _float_or_none(value)
     if number is None:

+ 10 - 0
tech_documents/数据接口与来源/platform_profiles/douyin.json

@@ -18,6 +18,16 @@
       { "field": "collect_count", "weight": 0.2, "floor": 100,   "ceil": 100000 }
     ]
   },
+  "observable_performance": {
+    "note": "M11(2026-06-23):量级分(总互动)+ 收缩后互动间比例;参数=759 条历史实测(prior=中位,target=p75,C=500)。抖音无曝光,比例分母用赞。",
+    "c": 500,
+    "components": [
+      { "field": "total_interaction", "label": "内容体量", "type": "absolute", "weight": 0.45, "floor": 1500, "ceil": 1000000, "sum_fields": ["digg_count", "comment_count", "share_count", "collect_count"] },
+      { "field": "share_ratio",   "label": "转发率", "type": "ratio", "weight": 0.25, "numerator": "share_count",   "denominator": "digg_count", "target": 0.27, "prior": 0.12, "c": 500 },
+      { "field": "collect_ratio", "label": "收藏率", "type": "ratio", "weight": 0.20, "numerator": "collect_count", "denominator": "digg_count", "target": 0.47, "prior": 0.21, "c": 500 },
+      { "field": "comment_ratio", "label": "评论率", "type": "ratio", "weight": 0.10, "numerator": "comment_count", "denominator": "digg_count", "target": 0.08, "prior": 0.04, "c": 500 }
+    ]
+  },
   "observable_fields": [
     { "field": "statistics.digg_count", "availability": "supported", "source": "search.statistics.digg_count / detail.like_count" },
     { "field": "statistics.comment_count", "availability": "supported", "source": "search.statistics.comment_count / detail.comment_count" },

+ 10 - 0
tech_documents/数据接口与来源/platform_profiles/kuaishou.json

@@ -51,6 +51,16 @@
       }
     ]
   },
+  "observable_performance": {
+    "note": "M11(2026-06-23):快手有曝光 play_count → 真互动率(per 播放);参数=245 条历史实测(prior=中位,target=p75,C=500)。快手 share_count 实测恒 0,不设转发项。",
+    "c": 500,
+    "components": [
+      { "field": "play_count",   "label": "播放量",        "type": "absolute", "weight": 0.45, "floor": 10000, "ceil": 5000000 },
+      { "field": "like_rate",    "label": "赞率(每播放)",   "type": "ratio", "weight": 0.25, "numerator": "digg_count",    "denominator": "play_count", "target": 0.039,  "prior": 0.015,  "c": 500 },
+      { "field": "collect_rate", "label": "收藏率(每播放)", "type": "ratio", "weight": 0.20, "numerator": "collect_count", "denominator": "play_count", "target": 0.0066, "prior": 0.0028, "c": 500 },
+      { "field": "comment_rate", "label": "评论率(每播放)", "type": "ratio", "weight": 0.10, "numerator": "comment_count", "denominator": "play_count", "target": 0.0039, "prior": 0.0013, "c": 500 }
+    ]
+  },
   "observable_fields": [
     { "field": "statistics.play_count", "availability": "supported", "source": "view_count" },
     { "field": "statistics.digg_count", "availability": "supported", "source": "like_count" },

+ 6 - 0
tech_documents/数据接口与来源/platform_profiles/shipinhao.json

@@ -15,6 +15,12 @@
       { "field": "digg_count", "weight": 1.0, "floor": 50, "ceil": 50000 }
     ]
   },
+  "observable_performance": {
+    "note": "M11(2026-06-23):视频号实测仅点赞非零(评/转/藏/播全 0),无法算任何比例 → 只用点赞量级(同 heat,绝对值 log 归一)。",
+    "components": [
+      { "field": "digg_count", "label": "点赞量", "type": "absolute", "weight": 1.0, "floor": 50, "ceil": 50000 }
+    ]
+  },
   "observable_fields": [
     { "field": "statistics.digg_count", "availability": "supported", "source": "like_count" }
   ],

+ 9 - 9
tests/fixtures/snapshots/real_id45/walk_actions_fingerprint.json

@@ -8,15 +8,6 @@
     "normal",
     ""
   ],
-  [
-    "budget_downgrade",
-    "d_003",
-    "7406990358799732018",
-    "downgrade_budget",
-    "success",
-    "low_budget",
-    "v4_score_review_needed"
-  ],
   [
     "decision_to_asset",
     "d_001",
@@ -35,6 +26,15 @@
     "normal",
     "v4_query_and_platform_pass"
   ],
+  [
+    "decision_to_asset",
+    "d_003",
+    "7406990358799732018",
+    "commit_asset",
+    "success",
+    "normal",
+    "v4_query_and_platform_pass"
+  ],
   [
     "hashtag_to_query",
     "2026养生",

+ 10 - 5
tests/p6_walk_helpers.py

@@ -114,7 +114,10 @@ def build_initial_walk_context(tmp_path: Path, *, tags: list[str] | None = None)
 
 def set_v4_allow_walk(decision: dict[str, Any], allow_walk: bool) -> None:
     query_score = 80
-    platform_score = 66.84 if allow_walk else 60
+    # M11 re-baseline:walk fixture(digg=100k,高共鸣占比)新平台分由 rule_judgment 实算为 88.41。
+    # True 分支必须与实算一致,否则 final_output v4_explanation 校验报 platform_performance_score 不一致。
+    # False 分支仅用于"显式拒绝游走"测试(不跑 validate_run),沿用低分占位即可。
+    platform_score = 88.41 if allow_walk else 60
     total_score = round(query_score * 0.5 + platform_score * 0.5, 2)
     decision["decision_action"] = "ADD_TO_CONTENT_POOL"
     decision["decision_reason_code"] = "v4_query_and_platform_pass"
@@ -154,10 +157,12 @@ def _platform_result(
         "platform_author_id": "MS4wLjABAAAA001",
         "author_display_name": "作者",
         "statistics": {
-            "digg_count": 5_000_000,
-            "comment_count": 800,
-            "share_count": 700,
-            "collect_count": 5_000,
+            # M11:平台分改 ratio 后,光大点赞不够;给"高共鸣"占比(转/赞30%、藏/赞50%、评/赞8%)
+            # 让该 fixture 在新打分下平台分 ~88 ≥ 65 门槛(walk 测试本意:成功内容能继续游走)。
+            "digg_count": 100_000,
+            "comment_count": 8_000,
+            "share_count": 30_000,
+            "collect_count": 50_000,
         },
         "tags": tags,
         "score": 72,

+ 3 - 2
tests/test_api.py

@@ -41,9 +41,10 @@ def test_api_runs_and_queries_mock_chain(tmp_path, monkeypatch):
 
     review = client.get(f"/runs/{run_id}/strategy-review").json()["data"]
     # V4-M3: mock 链路用 query relevance + 平台可观测表现 50/50 打分。
+    # M11 re-baseline:平台分改"量级+收缩比例"后,mock 首条进复看带 → 0 入池 / 1 复看 / 2 淘汰。
     assert review["summary"]["pooled_content_count"] == 0
-    assert review["summary"]["review_content_count"] == 0
-    assert review["summary"]["rejected_content_count"] == 3
+    assert review["summary"]["review_content_count"] == 1
+    assert review["summary"]["rejected_content_count"] == 2
     assert review["suggestions"]
 
     validation = client.get(f"/runs/{run_id}/validation").json()

+ 31 - 15
tests/test_case_replay.py

@@ -45,7 +45,14 @@ def _build_synthetic_corpus(cases_dir: Path, case_id: str, items: list[dict[str,
     )
 
 
-def _synthetic_item(content_id: str, *, digg: int) -> dict[str, Any]:
+def _synthetic_item(
+    content_id: str,
+    *,
+    digg: int,
+    comment: int = 800,
+    share: int = 600,
+    collect: int = 5000,
+) -> dict[str, Any]:
     return {
         "content_discovery_id": f"syn_{content_id}",
         "search_query_id": "q_001",
@@ -57,9 +64,9 @@ def _synthetic_item(content_id: str, *, digg: int) -> dict[str, Any]:
         "author_display_name": "养生作者",
         "statistics": {
             "digg_count": digg,
-            "comment_count": 800,
-            "share_count": 600,
-            "collect_count": 5000,
+            "comment_count": comment,
+            "share_count": share,
+            "collect_count": collect,
         },
         "tags": ["#中医养生"],
         "score": 85,
@@ -74,27 +81,38 @@ def _synthetic_item(content_id: str, *, digg: int) -> dict[str, Any]:
 def test_replay_id45_baseline_gemini_score(tmp_path):
     artifacts = replay_case("real_id45", runtime_root=tmp_path / "rt")
     assert artifacts.state["status"] == "success"
-    assert artifacts.summary["pooled_content_count"] == 2
-    assert artifacts.summary["review_content_count"] == 1
+    # M11 re-baseline:平台分改"量级+收缩比例"后,7406990358799732018(中等比例)平台分 60.5、
+    # 总分 70.25 ≥ 70 → 由原"待复看"升为"入池"(allow_walk=False);7577667864522907506 比例极低
+    # (官方大号式)平台分 25.1 → 仍淘汰。结果:3 入池 / 0 复看 / 1 淘汰。
+    assert artifacts.summary["pooled_content_count"] == 3
+    assert artifacts.summary["review_content_count"] == 0
     assert artifacts.summary["rejected_content_count"] == 1
     assert artifacts.summary["pending_content_count"] == 0
     assert _decision_counts(artifacts) == {
-        "ADD_TO_CONTENT_POOL": 2,
-        "KEEP_CONTENT_FOR_REVIEW": 1,
+        "ADD_TO_CONTENT_POOL": 3,
         "REJECT_CONTENT": 1,
     }
     assert {d.get("decision_reason_code") for d in artifacts.decisions} == {
         "v4_query_and_platform_pass",
-        "v4_score_review_needed",
         "v4_query_or_score_below_threshold",
     }
 
 
 def test_replay_synthetic_pool_case(tmp_path):
+    # M11:平台分改"收缩后比例"后,光大点赞不入池;高共鸣占比(转/赞0.3、藏/赞0.5、评/赞0.08)
+    # 才是该入池的画像 → 给强比例,平台分 ~88、总分 ~84 ≥ 70 入池。
     _build_synthetic_corpus(
         tmp_path / "cases",
         "syn_pool",
-        [_synthetic_item("9000000000000000001", digg=5_000_000)],
+        [
+            _synthetic_item(
+                "9000000000000000001",
+                digg=100_000,
+                comment=8_000,
+                share=30_000,
+                collect=50_000,
+            )
+        ],
     )
     artifacts = replay_case("syn_pool", runtime_root=tmp_path / "rt", cases_dir=tmp_path / "cases")
     assert artifacts.state["status"] == "success"
@@ -137,12 +155,10 @@ def test_replay_id45_walk_obeys_decisions_after_m4(tmp_path):
     assert all(row["walk_status"] == "success" for row in author_actions)
     assert all(row["budget_tier"] == "normal" for row in author_actions)
 
+    # M11 re-baseline:新平台分下原"待复看"的 7406990358799732018 升为"入池"(allow_walk=False),
+    # 已无 KEEP 内容触发预算降级 → budget_downgrade 边为空。
     downgrades = [row for row in walk_actions if row["edge_id"] == "budget_downgrade"]
-    assert len(downgrades) == 1
-    assert all(row["budget_tier"] == "low_budget" for row in downgrades)
-    assert all(row["reason_code"] == "v4_score_review_needed" for row in downgrades)
-    # M4 砍包受控变化:Budget 包及 binding 已删,KEEP 的戳回退内容包(=executed_rule_pack_id)。
-    assert all(row["rule_pack_id"] == "douyin_content_discovery_rule_pack_v1" for row in downgrades)
+    assert len(downgrades) == 0
 
     for row in walk_actions:
         execution = row["raw_payload"]["rule_pack_execution"]

+ 5 - 4
tests/test_config_case_matrix.py

@@ -61,11 +61,12 @@ def test_matrix_real_id45(variant, tmp_path):
     assert artifacts.state["status"] == "success"  # config change must not break the chain
     outcome = _outcome(artifacts)
     if variant == "default":
-        assert outcome["pooled"] == 2
+        # M11 re-baseline:3 入池(原第3条复看升入池)/ 1 淘汰。
+        assert outcome["pooled"] == 3
         assert outcome["rejected"] == 1
         assert outcome["effect_status_counts"] == {
-            "success": 2,
-            "pending": 1,
+            "success": 3,
+            "pending": 0,
             "failed": 1,
             "rule_blocked": 0,
         }
@@ -86,7 +87,7 @@ def test_judge_ok_block_changes_outcome(tmp_path):
     )
     assert base != blocked
     assert base["effect_status_counts"]["rule_blocked"] == 0
-    assert base["pooled"] == 2
+    assert base["pooled"] == 3
     assert blocked["reasons"] == ["v4_technical_retry_needed"] * 4
     assert blocked["effect_status_counts"]["rule_blocked"] == 4
     assert blocked["pooled"] == 0

+ 53 - 49
tests/test_platform_observable_performance.py

@@ -5,58 +5,64 @@ from content_agent.business_modules.content_discovery.platform_observable_perfor
 )
 
 
-def test_douyin_play_count_is_natural_missing_and_score_is_bounded():
+# M11:平台表现改"量级分 + 收缩后比例分"(配置驱动 observable_performance)。
+# 三平台按真实字段可得性走不同 component:抖音互动间比例 / 快手 per 播放真互动率 / 视频号仅点赞量级。
+
+
+def test_douyin_v2_volume_plus_inter_metric_ratios():
     result = performance_score(
-        {
-            "digg_count": 100000,
-            "comment_count": 1000,
-            "share_count": 500,
-            "collect_count": 1000,
-        },
+        {"digg_count": 100000, "comment_count": 1000, "share_count": 500, "collect_count": 1000},
         "douyin",
     )
-
     assert 0 <= result["platform_performance_score"] <= 100
-    assert {row["field"] for row in result["platform_performance_components"]} == {
-        "statistics.digg_count",
-        "statistics.comment_count",
-        "statistics.share_count",
-        "statistics.collect_count",
-    }
-    assert result["missing_observable_fields"] == [
-        {
-            "field": "statistics.play_count",
-            "missing_type": "natural_platform_missing",
-            "platform": "douyin",
-            "evidence": "跨平台字段映射.json",
-        }
+    assert [row["field"] for row in result["platform_performance_components"]] == [
+        "total_interaction",
+        "share_ratio",
+        "collect_ratio",
+        "comment_ratio",
     ]
+    assert result["platform_performance_components"][0]["type"] == "absolute"
+    assert result["platform_performance_components"][1]["type"] == "ratio"
+    # play_count 抖音天然缺失仍如实上报
+    assert {
+        "field": "statistics.play_count",
+        "missing_type": "natural_platform_missing",
+        "platform": "douyin",
+        "evidence": "跨平台字段映射.json",
+    } in result["missing_observable_fields"]
     assert "platform_heat" not in result
 
 
-def test_kuaishou_all_five_fields_supported():
+def test_kuaishou_v2_per_view_rates_no_share():
     result = performance_score(
-        {
-            "play_count": 10000,
-            "digg_count": 2000,
-            "comment_count": 200,
-            "share_count": 100,
-            "collect_count": 100,
-        },
+        {"play_count": 10000, "digg_count": 2000, "comment_count": 200, "share_count": 100, "collect_count": 100},
         "kuaishou",
     )
-
-    assert len(result["platform_performance_components"]) == 5
-    assert result["missing_observable_fields"] == []
+    # 快手:播放量级 + 三个 per 播放真互动率;无转发项(share 恒 0)
+    assert [row["field"] for row in result["platform_performance_components"]] == [
+        "play_count",
+        "like_rate",
+        "collect_rate",
+        "comment_rate",
+    ]
     assert result["platform_performance_score"] is not None
+    assert result["missing_observable_fields"] == []  # play 在 → 比例分母不缺
 
 
-def test_shipinhao_only_digg_supported_and_other_fields_natural_missing():
-    result = performance_score({"digg_count": 500}, "shipinhao")
-
-    assert [row["field"] for row in result["platform_performance_components"]] == [
-        "statistics.digg_count"
+def test_kuaishou_missing_play_count_marks_ratio_denominator_missing():
+    # 快手缺 play_count → per 播放比例分母为 0 → 记 runtime_missing、只剩播放量级一项
+    result = performance_score({"digg_count": 2000, "comment_count": 200, "collect_count": 100}, "kuaishou")
+    runtime_missing = [
+        row for row in result["missing_observable_fields"] if row.get("missing_type") == "runtime_missing"
     ]
+    assert {row["field"] for row in runtime_missing} == {"like_rate", "collect_rate", "comment_rate"}
+    assert [row["field"] for row in result["platform_performance_components"]] == ["play_count"]
+
+
+def test_shipinhao_only_digg_volume():
+    result = performance_score({"digg_count": 500}, "shipinhao")
+    assert [row["field"] for row in result["platform_performance_components"]] == ["digg_count"]
+    assert result["platform_performance_components"][0]["type"] == "absolute"
     assert {row["field"] for row in result["missing_observable_fields"]} == {
         "statistics.comment_count",
         "statistics.share_count",
@@ -65,15 +71,13 @@ def test_shipinhao_only_digg_supported_and_other_fields_natural_missing():
     }
 
 
-def test_supported_field_absent_is_runtime_missing():
-    result = performance_score({"digg_count": 10}, "douyin")
-
-    runtime_missing = [
-        row for row in result["missing_observable_fields"]
-        if row.get("missing_type") == "runtime_missing"
-    ]
-    assert {row["field"] for row in runtime_missing} == {
-        "statistics.comment_count",
-        "statistics.share_count",
-        "statistics.collect_count",
-    }
+def test_ratio_shrinkage_pulls_low_sample_toward_prior():
+    # 抖音转发率:小样本(3 赞 1 转=33%)被收缩拉回正常,不给虚高分;大样本真高比例保留。
+    low = performance_score({"digg_count": 3, "share_count": 1, "comment_count": 0, "collect_count": 0}, "douyin")
+    high = performance_score({"digg_count": 100000, "share_count": 35000, "comment_count": 5000, "collect_count": 50000}, "douyin")
+    low_share = next(c for c in low["platform_performance_components"] if c["field"] == "share_ratio")
+    high_share = next(c for c in high["platform_performance_components"] if c["field"] == "share_ratio")
+    assert low_share["raw_ratio"] > 0.3  # 原始 33%
+    assert low_share["shrunk_ratio"] < 0.15  # 收缩后被拉回(prior 0.12 附近)
+    assert low_share["normalized_score"] < 60
+    assert high_share["normalized_score"] == 100  # 真·高转发占满

+ 2 - 1
tests/test_policy_replay_data.py

@@ -22,7 +22,8 @@ def test_rule_decisions_and_policy_run_record_include_replay_metadata(tmp_path):
     assert replay["dispatch_id"] == "dispatch_content"
     assert replay["rule_pack_id"] == "douyin_content_discovery_rule_pack_v1"
     assert replay["strategy_version"] == "V4"
-    assert replay["effect_mapping_id"] == "map_reject_failed"
+    # M11 re-baseline:mock 首条(总分 55.67)由"淘汰"升为"复看(pending)"→ effect_mapping 改 keep。
+    assert replay["effect_mapping_id"] == "map_keep_for_review_pending"
     assert replay["allow_walk"] is False
     assert replay["walk_gate_snapshot"]["query_relevance_score"] == 80
 

+ 7 - 4
tests/test_progressive_screening.py

@@ -456,10 +456,13 @@ def _item(content_id: str, *, has_more: bool, cursor: str) -> dict[str, Any]:
         "platform_author_id": f"author_{content_id}",
         "author_display_name": "作者",
         "statistics": {
-            "digg_count": 1_000_000,
-            "comment_count": 50_000,
-            "share_count": 20_000,
-            "collect_count": 100_000,
+            # M11 re-baseline:平台分改"量级+收缩比例"。原 digg=1M/低占比新算只得 57.36,
+            # 使 pool 桩(q=80)总分 68.68<70 与 ADD 动作冲突(validate_run 报 threshold_mismatch)。
+            # 改为中等共鸣占比 → 平台分 ~66:pool 桩总分 ~73≥70 真入池;review 桩(q=60)总分 ~63 仍复看。
+            "digg_count": 500_000,
+            "comment_count": 24_000,
+            "share_count": 40_000,
+            "collect_count": 120_000,
             "play_count": 0,
         },
         "tags": [],

+ 6 - 5
tests/test_query_effect_aggregation.py

@@ -21,11 +21,12 @@ def test_search_clues_aggregate_query_effect_status_from_decisions(tmp_path):
         for clue in service.read_jsonl(state["run_id"], "search_clues.jsonl")
     }
 
-    assert clues["q_001"]["search_query_effect_status"] == "failed"
-    assert clues["q_001"]["effect_status_counts"] == {"failed": 2}
-    assert clues["q_001"]["query_aggregation_id"] == "agg_query_failed"
-    assert clues["q_001"]["raw_payload"]["query_aggregation_id"] == "agg_query_failed"
-    assert clues["q_001"]["walk_next_step"] == "stop_search_query"
+    # M11 re-baseline:q_001 首条候选(总分 55.67)进复看(pending),该 query 由"全失败"升为"含复看"。
+    assert clues["q_001"]["search_query_effect_status"] == "pending"
+    assert clues["q_001"]["effect_status_counts"] == {"pending": 1, "failed": 1}
+    assert clues["q_001"]["query_aggregation_id"] == "agg_query_pending"
+    assert clues["q_001"]["raw_payload"]["query_aggregation_id"] == "agg_query_pending"
+    assert clues["q_001"]["walk_next_step"] == "review_later_or_small_budget"
     assert clues["q_002"]["search_query_effect_status"] == "failed"
     assert clues["q_002"]["effect_status_counts"] == {"failed": 1}
     assert clues["q_002"]["query_aggregation_id"] == "agg_query_failed"

+ 8 - 6
tests/test_replay_gemini_seam.py

@@ -13,13 +13,14 @@ from tests.replay_harness import replay_case
 def test_replay_default_pool_stub_scores_into_pool(tmp_path):
     artifacts = replay_case("real_id45", runtime_root=tmp_path / "rt")
     assert artifacts.state["status"] == "success"
-    assert artifacts.summary["pooled_content_count"] == 2
-    assert artifacts.summary["review_content_count"] == 1
+    # M11 re-baseline:第3条(7406990358799732018)平台分 60.5、总分 70.25 → 升入池。
+    assert artifacts.summary["pooled_content_count"] == 3
+    assert artifacts.summary["review_content_count"] == 0
     assert artifacts.summary["rejected_content_count"] == 1
     assert [d["decision_reason_code"] for d in artifacts.decisions] == [
         "v4_query_and_platform_pass",
         "v4_query_and_platform_pass",
-        "v4_score_review_needed",
+        "v4_query_and_platform_pass",
         "v4_query_or_score_below_threshold",
     ]
     assert all(d["scorecard"]["schema_version"] == "v4_scorecard.v1" for d in artifacts.decisions)
@@ -32,12 +33,13 @@ def test_replay_review_stub_scores_by_relevance(tmp_path):
         gemini_video_client=FakeGeminiVideoClient(default_result=fake_gemini_review()),
     )
     assert artifacts.state["status"] == "success"
+    # M11 re-baseline:review 桩 q=60,平台分升高使前3条总分进 55-69.99 复看带、仅末条(平台分25.1)淘汰。
     assert artifacts.summary["pooled_content_count"] == 0
-    assert artifacts.summary["review_content_count"] == 2
-    assert artifacts.summary["rejected_content_count"] == 2
+    assert artifacts.summary["review_content_count"] == 3
+    assert artifacts.summary["rejected_content_count"] == 1
     assert sorted(d["decision_reason_code"] for d in artifacts.decisions) == [
         "v4_query_or_score_below_threshold",
-        "v4_query_or_score_below_threshold",
+        "v4_score_review_needed",
         "v4_score_review_needed",
         "v4_score_review_needed",
     ]

+ 6 - 4
tests/test_unified_search_unit.py

@@ -20,10 +20,12 @@ def _result(cid: str, *, has_more: bool = False, next_cursor: str = "") -> dict[
         "platform_author_id": f"author_{cid}",
         "author_display_name": "n",
         "statistics": {
-            "digg_count": 5_000_000,
-            "comment_count": 800,
-            "share_count": 700,
-            "collect_count": 5_000,
+            # M11 re-baseline:原 digg=5M/极低占比新算只得 45.08,总分 62.54<70 不入池,
+            # "前3命中再翻页"不触发。改为高共鸣占比 → 平台分 ~88、总分 ~84 真入池,游走分页生效。
+            "digg_count": 100_000,
+            "comment_count": 8_000,
+            "share_count": 30_000,
+            "collect_count": 50_000,
         },
         "tags": [],
         "score": 72,

+ 5 - 4
tests/test_v1_graph.py

@@ -22,14 +22,15 @@ def test_v1_graph_generates_all_runtime_files(tmp_path):
 
     final_output = service.read_json(run_id, "final_output.json")
     assert final_output["policy_run_id"] == state["policy_run_id"]
+    # M11 re-baseline:mock 首条(plat 31.35,总分 55.67)进复看带 → 1 复看(pending)/ 2 淘汰。
     assert final_output["summary"]["pooled_content_count"] == 0
-    assert final_output["summary"]["review_content_count"] == 0
+    assert final_output["summary"]["review_content_count"] == 1
     assert final_output["summary"]["pending_content_count"] == 0
-    assert final_output["summary"]["rejected_content_count"] == 3
+    assert final_output["summary"]["rejected_content_count"] == 2
     assert final_output["summary"]["effect_status_counts"] == {
         "success": 0,
-        "pending": 0,
-        "failed": 3,
+        "pending": 1,
+        "failed": 2,
         "rule_blocked": 0,
     }
     assert (

+ 5 - 4
tests/test_walk_engine_deep_frontier.py

@@ -34,10 +34,11 @@ def _content(i: int, query: dict[str, Any], *, tags: list[str]) -> dict[str, Any
         "platform_author_id": f"auG{i:04d}",
         "author_display_name": "n",
         "statistics": {
-            "digg_count": 5_000_000,
-            "comment_count": 800,
-            "share_count": 700,
-            "collect_count": 5_000,
+            # M11:平台分改 ratio 后,光大点赞不够;给高共鸣占比让平台分过 65 门槛(本意:能继续游走)。
+            "digg_count": 100_000,
+            "comment_count": 8_000,
+            "share_count": 30_000,
+            "collect_count": 50_000,
         },
         "tags": tags,
         "score": 72,