1 månad sedan · 7385cb752e
--- a/content_agent/business_modules/content_discovery/content_discovery_builder.py
+++ b/content_agent/business_modules/content_discovery/content_discovery_builder.py
@@ -150,7 +150,7 @@ def _build_evidence_bundle(
 
				             "statistics": result["statistics"],
			
 
				             **result["statistics"],
			
 
				             "platform_heat": heat_score(
			
 
				-                result["statistics"].get("digg_count"),
			
 
				+                result["statistics"],
			
 
				                 discovered_content_item["platform"],
			
 
				             ),
			
 
				         },
			
--- a/content_agent/business_modules/content_discovery/platform_heat.py
+++ b/content_agent/business_modules/content_discovery/platform_heat.py
@@ -1,29 +1,56 @@
 
				-"""Platform heat normalization (V3-M3A).
			
 
				+"""平台热度归一化(V3-M3A;2026-06-12 R3 改配置驱动的每平台互动复合)。
			
 
				 
			
 
				-跨平台唯一公共互动指标是点赞(digg_count;视频号仅 like_count→digg_count)。
			
 
				-绝对值不可跨平台比较(抖音爆款 5e6 vs 视频号样本 ~1e2),故按平台锚点做
			
 
				-对数归一化:digg ≤ floor → 0,≥ ceil → 1,之间按 log10 插值。
			
 
				-锚点为拍板起步值,M7 真实跑测后按数据标定。
			
 
				+各平台能拉到的互动数据天差地别(抖音无播放量、视频号只有点赞、快手/B站有播放量),
			
 
				+一套公式套不上。每平台从自己 profile 的 `heat.signals` 读「用哪些字段、各自锚点和权重」,
			
 
				+每字段先 log 归一化 0~1,再对**在场(>0)**字段加权平均——缺字段/0 值不计入,
			
 
				+权重在在场字段间重新归一。锚点从代码搬进 platform_profiles,加平台/调权重只改 JSON。
			
 
				+
			
 
				+无 heat 段或无 profile 时回退「点赞单信号 + 默认锚点」(=R3 前老行为)。
			
 
				 """
			
 
				 
			
 
				 from __future__ import annotations
			
 
				 
			
 
				 from math import log10
			
 
				+from pathlib import Path
			
 
				 from typing import Any
			
 
				 
			
 
				-# (floor, ceil) per platform — 起步值,M7 标定。
			
 
				-_HEAT_ANCHORS: dict[str, tuple[float, float]] = {
			
 
				-    "douyin": (10000.0, 1000000.0),
			
 
				-    "shipinhao": (50.0, 50000.0),
			
 
				-}
			
 
				-_DEFAULT_ANCHOR = (100.0, 100000.0)
			
 
				+from content_agent.integrations import config_store
			
 
				+
			
 
				+_PROFILE_DIR = Path("tech_documents/数据接口与来源/platform_profiles")
			
 
				+_DEFAULT_ANCHOR = (100.0, 100000.0)  # 兜底:profile 缺 heat 段时,仅用点赞单信号
			
 
				+
			
 
				 
			
 
				+def _log_norm(value: float, floor: float, ceil: float) -> float:
			
 
				+    raw = (log10(value + 1) - log10(floor)) / (log10(ceil) - log10(floor))
			
 
				+    return min(max(raw, 0.0), 1.0)
			
 
				 
			
 
				-def heat_score(digg_count: Any, platform: str) -> float:
			
 
				-    floor, ceil = _HEAT_ANCHORS.get(platform, _DEFAULT_ANCHOR)
			
 
				+
			
 
				+def _heat_signals(platform: str) -> list[dict[str, Any]]:
			
 
				+    """读 profile.heat.signals;无 profile/无 heat 段 → 点赞单信号 + 默认锚点。"""
			
 
				     try:
			
 
				-        digg = max(int(digg_count or 0), 0)
			
 
				-    except (TypeError, ValueError):
			
 
				-        digg = 0
			
 
				-    raw = (log10(digg + 1) - log10(floor)) / (log10(ceil) - log10(floor))
			
 
				-    return round(min(max(raw, 0.0), 1.0), 4)
			
 
				+        profile, _ = config_store.load_json(_PROFILE_DIR / f"{platform}.json")
			
 
				+    except (FileNotFoundError, OSError):
			
 
				+        profile = {}
			
 
				+    signals = (profile.get("heat") or {}).get("signals")
			
 
				+    if signals:
			
 
				+        return signals
			
 
				+    floor, ceil = _DEFAULT_ANCHOR
			
 
				+    return [{"field": "digg_count", "weight": 1.0, "floor": floor, "ceil": ceil}]
			
 
				+
			
 
				+
			
 
				+def heat_score(statistics: dict[str, Any], platform: str) -> float:
			
 
				+    weighted = 0.0
			
 
				+    total_weight = 0.0
			
 
				+    for signal in _heat_signals(platform):
			
 
				+        try:
			
 
				+            value = int(statistics.get(signal["field"]) or 0)
			
 
				+        except (TypeError, ValueError):
			
 
				+            value = 0
			
 
				+        if value <= 0:
			
 
				+            continue  # 缺字段/0 值不计入(每平台数据可得性不同)
			
 
				+        weight = float(signal.get("weight", 1.0))
			
 
				+        weighted += weight * _log_norm(value, float(signal["floor"]), float(signal["ceil"]))
			
 
				+        total_weight += weight
			
 
				+    if total_weight == 0:
			
 
				+        return 0.0
			
 
				+    return round(weighted / total_weight, 4)
			
--- a/tech_documents/数据接口与来源/platform_profiles/douyin.json
+++ b/tech_documents/数据接口与来源/platform_profiles/douyin.json
@@ -9,6 +9,12 @@
 
				     "retry": null,
			
 
				     "video_download": { "downloadable": true, "headers": { "User-Agent": "iOS UA", "Referer": "https://www.douyin.com/" }, "note": "实测 206/video/mp4;play_addr 地址有时效" }
			
 
				   },
			
 
				+  "heat": {
			
 
				+    "note": "R3 第一步(2026-06-12):点赞单信号=R3 前行为;抖音无播放量,第二步加 comment/share/collect 复合",
			
 
				+    "signals": [
			
 
				+      { "field": "digg_count", "weight": 1.0, "floor": 10000, "ceil": 1000000 }
			
 
				+    ]
			
 
				+  },
			
 
				   "endpoints": {
			
 
				     "search":         { "path": "/crawler/dou_yin/keyword", "params": { "keyword": "str", "content_type": "视频|图文", "sort_type": "最多点赞|最多分享", "cursor": "str" }, "response_shape": "raw(aweme 原生结构)" },
			
 
				     "detail":         { "path": "/crawler/dou_yin/detail", "params": { "content_id": "str" }, "response_shape": "normalized(channel_* 归一化)" },
			
--- a/tech_documents/数据接口与来源/platform_profiles/shipinhao.json
+++ b/tech_documents/数据接口与来源/platform_profiles/shipinhao.json
@@ -9,6 +9,12 @@
 
				     "retry": { "search": { "trigger": "仅暂时性故障重试:code=25011 接口异常 / 超时 / 网络错误(code=0 空结果、参数/鉴权错 不重试)", "max_attempts": 3, "backoff_seconds": [1, 2, 4], "on_exhausted": "标 blocked·失败待查,游走绕行不卡流水线", "note": "实测首次 25011、重试 code=0;退避与 rate_limit_seconds=15 取较大值,故退避多被 15s 吸收(2026-06-11 拍板)" } },
			
 
				     "video_download": { "downloadable": true, "headers": { "User-Agent": "PC UA", "Referer": "https://channels.weixin.qq.com/" }, "note": "实测 200/video/mp4;findermp 带 encfilekey" }
			
 
				   },
			
 
				+  "heat": {
			
 
				+    "note": "R3 第一步(2026-06-12):点赞单信号=R3 前行为;视频号 comment/share/collect 实测常空,数据所限只能用点赞",
			
 
				+    "signals": [
			
 
				+      { "field": "digg_count", "weight": 1.0, "floor": 50, "ceil": 50000 }
			
 
				+    ]
			
 
				+  },
			
 
				   "endpoints": {
			
 
				     "search":       { "path": "/crawler/shi_pin_hao/keyword", "params": { "keyword": "str", "cursor": "str" }, "response_shape": "normalized(channel_*)", "stability": "unstable" },
			
 
				     "blogger":      { "path": "/crawler/shi_pin_hao/blogger", "status": "blocked", "note": "实测 code=25011" },
			
--- a/tests/test_platform_heat.py
+++ b/tests/test_platform_heat.py
@@ -1,38 +1,88 @@
 
				-"""V3-M3A: platform heat log-normalization unit tests."""
			
 
				+"""V3-M3A: platform heat 归一化单测;R3(2026-06-12)改配置驱动复合后扩充。"""
			
 
				 
			
 
				 from __future__ import annotations
			
 
				 
			
 
				+from math import log10
			
 
				+
			
 
				 from content_agent.business_modules.content_discovery.platform_heat import heat_score
			
 
				 
			
 
				 
			
 
				+def _stat(digg, **extra):
			
 
				+    return {"digg_count": digg, **extra}
			
 
				+
			
 
				+
			
 
				+# --- R3 第一步:点赞单信号,与 R3 前行为字节等价(锚点现读自 profile heat 段) ---
			
 
				+
			
 
				 def test_heat_floor_maps_to_zero():
			
 
				-    assert heat_score(10000, "douyin") == 0.0
			
 
				-    assert heat_score(3, "douyin") == 0.0
			
 
				+    assert heat_score(_stat(10000), "douyin") == 0.0
			
 
				+    assert heat_score(_stat(3), "douyin") == 0.0
			
 
				 
			
 
				 
			
 
				 def test_heat_ceil_maps_to_one():
			
 
				-    assert heat_score(1000000, "douyin") == 1.0
			
 
				-    assert heat_score(5034215, "douyin") == 1.0
			
 
				+    assert heat_score(_stat(1000000), "douyin") == 1.0
			
 
				+    assert heat_score(_stat(5034215), "douyin") == 1.0
			
 
				 
			
 
				 
			
 
				 def test_heat_midpoint_between_zero_and_one():
			
 
				     # geometric midpoint of (1e4, 1e6) is 1e5 -> ~0.5
			
 
				-    assert abs(heat_score(100000, "douyin") - 0.5) < 0.01
			
 
				+    assert abs(heat_score(_stat(100000), "douyin") - 0.5) < 0.01
			
 
				 
			
 
				 
			
 
				 def test_heat_missing_or_zero_digg_is_zero():
			
 
				-    assert heat_score(None, "douyin") == 0.0
			
 
				-    assert heat_score(0, "shipinhao") == 0.0
			
 
				-    assert heat_score("not-a-number", "douyin") == 0.0
			
 
				+    assert heat_score(_stat(None), "douyin") == 0.0
			
 
				+    assert heat_score(_stat(0), "shipinhao") == 0.0
			
 
				+    assert heat_score(_stat("not-a-number"), "douyin") == 0.0
			
 
				 
			
 
				 
			
 
				 def test_heat_unknown_platform_uses_default_anchor():
			
 
				-    # default anchors (100, 1e5): below floor -> 0, at/above ceil -> 1
			
 
				-    assert heat_score(50, "bilibili") == 0.0
			
 
				-    assert heat_score(100000, "bilibili") == 1.0
			
 
				+    # 无 heat 段平台回退默认锚点 (100, 1e5):below floor -> 0, at/above ceil -> 1
			
 
				+    assert heat_score(_stat(50), "bilibili") == 0.0
			
 
				+    assert heat_score(_stat(100000), "bilibili") == 1.0
			
 
				 
			
 
				 
			
 
				 def test_heat_shipinhao_low_digg_not_unfairly_zero():
			
 
				     # 92 likes is meaningful on shipinhao (anchors 50..5e4) though tiny on douyin.
			
 
				-    assert heat_score(92, "shipinhao") > 0.05
			
 
				-    assert heat_score(92, "douyin") == 0.0
			
 
				+    assert heat_score(_stat(92), "shipinhao") > 0.05
			
 
				+    assert heat_score(_stat(92), "douyin") == 0.0
			
 
				+
			
 
				+
			
 
				+# --- R3 复合机制守卫(放宽到多字段后这些仍成立,无需改) ---
			
 
				+
			
 
				+def test_heat_single_signal_equals_legacy_digg_only():
			
 
				+    # 单信号配置 == 老的纯点赞归一化(机制中性证明)。
			
 
				+    digg = 200000
			
 
				+    expected = round((log10(digg + 1) - log10(10000)) / (log10(1000000) - log10(10000)), 4)
			
 
				+    assert heat_score(_stat(digg), "douyin") == expected
			
 
				+
			
 
				+
			
 
				+def test_heat_ignores_zero_and_missing_fields_in_composite(monkeypatch):
			
 
				+    # 复合时 0 值字段被排除、权重在在场字段间重新归一。
			
 
				+    from content_agent.business_modules.content_discovery import platform_heat
			
 
				+
			
 
				+    def fake_signals(platform):
			
 
				+        return [
			
 
				+            {"field": "digg_count", "weight": 0.5, "floor": 100, "ceil": 100000},
			
 
				+            {"field": "comment_count", "weight": 0.5, "floor": 10, "ceil": 10000},
			
 
				+        ]
			
 
				+
			
 
				+    monkeypatch.setattr(platform_heat, "_heat_signals", fake_signals)
			
 
				+    # comment 为 0 → 只剩 digg,权重重新归一为 1.0 → 等于纯 digg 归一
			
 
				+    digg_only = heat_score({"digg_count": 10000, "comment_count": 0}, "x")
			
 
				+    expected = round((log10(10001) - log10(100)) / (log10(100000) - log10(100)), 4)
			
 
				+    assert digg_only == expected
			
 
				+
			
 
				+
			
 
				+def test_heat_weighted_average_of_two_signals(monkeypatch):
			
 
				+    from content_agent.business_modules.content_discovery import platform_heat
			
 
				+
			
 
				+    def fake_signals(platform):
			
 
				+        return [
			
 
				+            {"field": "digg_count", "weight": 0.5, "floor": 100, "ceil": 100000},
			
 
				+            {"field": "comment_count", "weight": 0.5, "floor": 10, "ceil": 10000},
			
 
				+        ]
			
 
				+
			
 
				+    monkeypatch.setattr(platform_heat, "_heat_signals", fake_signals)
			
 
				+    score = heat_score({"digg_count": 10000, "comment_count": 1000}, "x")
			
 
				+    d = (log10(10001) - log10(100)) / (log10(100000) - log10(100))
			
 
				+    c = (log10(1001) - log10(10)) / (log10(10000) - log10(10))
			
 
				+    assert score == round((0.5 * d + 0.5 * c) / 1.0, 4)