|
|
@@ -1,38 +1,88 @@
|
|
|
-"""V3-M3A: platform heat log-normalization unit tests."""
|
|
|
+"""V3-M3A: platform heat 归一化单测;R3(2026-06-12)改配置驱动复合后扩充。"""
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
+from math import log10
|
|
|
+
|
|
|
from content_agent.business_modules.content_discovery.platform_heat import heat_score
|
|
|
|
|
|
|
|
|
+def _stat(digg, **extra):
|
|
|
+ return {"digg_count": digg, **extra}
|
|
|
+
|
|
|
+
|
|
|
+# --- R3 第一步:点赞单信号,与 R3 前行为字节等价(锚点现读自 profile heat 段) ---
|
|
|
+
|
|
|
def test_heat_floor_maps_to_zero():
|
|
|
- assert heat_score(10000, "douyin") == 0.0
|
|
|
- assert heat_score(3, "douyin") == 0.0
|
|
|
+ assert heat_score(_stat(10000), "douyin") == 0.0
|
|
|
+ assert heat_score(_stat(3), "douyin") == 0.0
|
|
|
|
|
|
|
|
|
def test_heat_ceil_maps_to_one():
|
|
|
- assert heat_score(1000000, "douyin") == 1.0
|
|
|
- assert heat_score(5034215, "douyin") == 1.0
|
|
|
+ assert heat_score(_stat(1000000), "douyin") == 1.0
|
|
|
+ assert heat_score(_stat(5034215), "douyin") == 1.0
|
|
|
|
|
|
|
|
|
def test_heat_midpoint_between_zero_and_one():
|
|
|
# geometric midpoint of (1e4, 1e6) is 1e5 -> ~0.5
|
|
|
- assert abs(heat_score(100000, "douyin") - 0.5) < 0.01
|
|
|
+ assert abs(heat_score(_stat(100000), "douyin") - 0.5) < 0.01
|
|
|
|
|
|
|
|
|
def test_heat_missing_or_zero_digg_is_zero():
|
|
|
- assert heat_score(None, "douyin") == 0.0
|
|
|
- assert heat_score(0, "shipinhao") == 0.0
|
|
|
- assert heat_score("not-a-number", "douyin") == 0.0
|
|
|
+ assert heat_score(_stat(None), "douyin") == 0.0
|
|
|
+ assert heat_score(_stat(0), "shipinhao") == 0.0
|
|
|
+ assert heat_score(_stat("not-a-number"), "douyin") == 0.0
|
|
|
|
|
|
|
|
|
def test_heat_unknown_platform_uses_default_anchor():
|
|
|
- # default anchors (100, 1e5): below floor -> 0, at/above ceil -> 1
|
|
|
- assert heat_score(50, "bilibili") == 0.0
|
|
|
- assert heat_score(100000, "bilibili") == 1.0
|
|
|
+ # 无 heat 段平台回退默认锚点 (100, 1e5):below floor -> 0, at/above ceil -> 1
|
|
|
+ assert heat_score(_stat(50), "bilibili") == 0.0
|
|
|
+ assert heat_score(_stat(100000), "bilibili") == 1.0
|
|
|
|
|
|
|
|
|
def test_heat_shipinhao_low_digg_not_unfairly_zero():
|
|
|
# 92 likes is meaningful on shipinhao (anchors 50..5e4) though tiny on douyin.
|
|
|
- assert heat_score(92, "shipinhao") > 0.05
|
|
|
- assert heat_score(92, "douyin") == 0.0
|
|
|
+ assert heat_score(_stat(92), "shipinhao") > 0.05
|
|
|
+ assert heat_score(_stat(92), "douyin") == 0.0
|
|
|
+
|
|
|
+
|
|
|
+# --- R3 复合机制守卫(放宽到多字段后这些仍成立,无需改) ---
|
|
|
+
|
|
|
+def test_heat_single_signal_equals_legacy_digg_only():
|
|
|
+ # 单信号配置 == 老的纯点赞归一化(机制中性证明)。
|
|
|
+ digg = 200000
|
|
|
+ expected = round((log10(digg + 1) - log10(10000)) / (log10(1000000) - log10(10000)), 4)
|
|
|
+ assert heat_score(_stat(digg), "douyin") == expected
|
|
|
+
|
|
|
+
|
|
|
+def test_heat_ignores_zero_and_missing_fields_in_composite(monkeypatch):
|
|
|
+ # 复合时 0 值字段被排除、权重在在场字段间重新归一。
|
|
|
+ from content_agent.business_modules.content_discovery import platform_heat
|
|
|
+
|
|
|
+ def fake_signals(platform):
|
|
|
+ return [
|
|
|
+ {"field": "digg_count", "weight": 0.5, "floor": 100, "ceil": 100000},
|
|
|
+ {"field": "comment_count", "weight": 0.5, "floor": 10, "ceil": 10000},
|
|
|
+ ]
|
|
|
+
|
|
|
+ monkeypatch.setattr(platform_heat, "_heat_signals", fake_signals)
|
|
|
+ # comment 为 0 → 只剩 digg,权重重新归一为 1.0 → 等于纯 digg 归一
|
|
|
+ digg_only = heat_score({"digg_count": 10000, "comment_count": 0}, "x")
|
|
|
+ expected = round((log10(10001) - log10(100)) / (log10(100000) - log10(100)), 4)
|
|
|
+ assert digg_only == expected
|
|
|
+
|
|
|
+
|
|
|
+def test_heat_weighted_average_of_two_signals(monkeypatch):
|
|
|
+ from content_agent.business_modules.content_discovery import platform_heat
|
|
|
+
|
|
|
+ def fake_signals(platform):
|
|
|
+ return [
|
|
|
+ {"field": "digg_count", "weight": 0.5, "floor": 100, "ceil": 100000},
|
|
|
+ {"field": "comment_count", "weight": 0.5, "floor": 10, "ceil": 10000},
|
|
|
+ ]
|
|
|
+
|
|
|
+ monkeypatch.setattr(platform_heat, "_heat_signals", fake_signals)
|
|
|
+ score = heat_score({"digg_count": 10000, "comment_count": 1000}, "x")
|
|
|
+ d = (log10(10001) - log10(100)) / (log10(100000) - log10(100))
|
|
|
+ c = (log10(1001) - log10(10)) / (log10(10000) - log10(10))
|
|
|
+ assert score == round((0.5 * d + 0.5 * c) / 1.0, 4)
|