Procházet zdrojové kódy

refactor(r3-C2): 删废弃维度的死 evidence 字段 + 死代码(行为中性)

5 个废弃维度的专属 evidence 源,确认无活读者后清除:
- builder: pattern_match_result 删 platform_fit(douyin_tone evidence)/adaptability;
  删 content_audience_profile:{} 初始化
- recall_decision: 删 content_audience_profile 镜像写入 + 过时注释
- evaluator: 删 _total_interactions 死路径(_get_path 特例)+ 函数
  (只服务 deprecated 的 interaction_performance,运行时永不触发)
- 规则包 shared_contracts.required_input_fields 删 content_audience_profile(零代码消费);
  Excel input_contract_fields 清 2 行文档
保留: statistics(R3 热度命根)/create_time/availability/relevance_score 等活字段。
测试: 画像镜像断言改反向(not in item);327 passed,replay 进池复看分布零变,
byte-equal + config gate pass。

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Sam Lee před 1 dnem
rodič
revize
c75a001f71

+ 0 - 3
content_agent/business_modules/content_discovery/content_discovery_builder.py

@@ -145,8 +145,6 @@ def _build_evidence_bundle(
         "pattern_match_result": {
             "level": result.get("relevance_level", "related"),
             "score": result.get("score"),
-            "platform_fit": result.get("platform_fit"),
-            "adaptability": result.get("adaptability"),
         },
         "content_engagement_metrics": {
             "statistics": result["statistics"],
@@ -156,7 +154,6 @@ def _build_evidence_bundle(
                 discovered_content_item["platform"],
             ),
         },
-        "content_audience_profile": {},
         "author_audience_profile": {
             "platform_author_id": discovered_content_item.get("platform_author_id", ""),
             "author": {

+ 1 - 4
content_agent/business_modules/content_discovery/pattern_recall/recall_decision.py

@@ -2,7 +2,7 @@
 
 替换原 decode 异步解构 + 分类树匹配。每条内容调一次 gemini_video_client.analyze,
 把 4 个判定字段(fit_senior_50plus / fit_confidence / relevance_score / reason)写进
-discovered item 的 pattern_match_result,并镜像 fit_senior_50plus 进 content_audience_profile
+discovered item 的 pattern_match_result。
 M5:analyze 纯 IO 且每条独立,用 ThreadPool 并发执行、按 offset 归位回收;
 id 编号、三个 list 的组装与落盘全部留主线程按 offset 串行 → 产物与串行逐条等价。
 """
@@ -136,9 +136,6 @@ def _update_discovered_item(
     updated = {
         **item,
         "pattern_match_result": pattern_match_result,
-        "content_audience_profile": {
-            "fit_senior_50plus": pattern_match_result["fit_senior_50plus"]
-        },
     }
     raw_payload = dict(updated.get("raw_payload") or {})
     raw_payload["pattern_match_result"] = pattern_match_result

+ 0 - 9
content_agent/business_modules/rule_judgment/evaluator.py

@@ -203,10 +203,6 @@ def _match_threshold(score: int | float | None, thresholds: list[dict[str, Any]]
 
 
 def _get_path(data: dict[str, Any], path: str) -> Any:
-    if path == "content_engagement_metrics.statistics.total_interactions":
-        statistics = _get_path(data, "content_engagement_metrics.statistics")
-        if isinstance(statistics, dict):
-            return _total_interactions(statistics)
     current: Any = data
     for part in path.split("."):
         if not part:
@@ -394,11 +390,6 @@ def _gate_replay(gate: dict[str, Any]) -> dict[str, Any]:
     }
 
 
-def _total_interactions(statistics: dict[str, Any]) -> int:
-    keys = ["digg_count", "comment_count", "share_count", "collect_count"]
-    return sum(int(statistics.get(key) or 0) for key in keys)
-
-
 def _evidence_refs(rule_pack: dict[str, Any]) -> list[str]:
     refs = ["source_evidence"]
     for field in rule_pack.get("input_contract", {}).get("required_fields", []):

+ 0 - 1
product_documents/规则包/douyin_rule_packs.v1.json

@@ -100,7 +100,6 @@
   "shared_contracts": {
     "input_type": "EvidenceBundle",
     "required_input_fields": [
-      "content_audience_profile",
       "author_audience_profile",
       "content_engagement_metrics",
       "pattern_match_result",

binární
tech_documents/规则包映射/规则包映射配置表.xlsx


+ 4 - 4
tests/test_dual_channel_gemini_replay.py

@@ -1,8 +1,7 @@
 """V3-M2D: Gemini 判定结果端到端落到 pattern_match_result(经回放 harness)。
 
 验证 M2 的核心契约:recall_pattern 调 GeminiVideoClient,把 4 个判定字段写进
-discovered item 的 pattern_match_result,并镜像 fit_senior_50plus 进 content_audience_profile,
-带 M2→M3 桥接键。real_id45 经桥接键回放零回归(决策 + validation pass)。
+discovered item 的 pattern_match_result。real_id45 回放零回归(决策 + validation pass)。
 """
 
 from __future__ import annotations
@@ -35,8 +34,9 @@ def test_replay_writes_gemini_fields_to_pattern_match_result(tmp_path):
         assert pmr["relevance_score"] == 0.85
         assert set(pmr) >= {"fit_senior_50plus", "fit_confidence", "relevance_score", "reason"}
         assert pmr["judge_status"] == "ok"
-        # 画像列镜像
-        assert item["content_audience_profile"]["fit_senior_50plus"] is True
+        # 2026-06-12 清理: content_audience_profile 画像镜像维度已废弃,不再写入
+        # (fit_senior_50plus 真身在 pattern_match_result)。
+        assert "content_audience_profile" not in item
 
     assert artifacts.summary["pooled_content_count"] == 4
     assert artifacts.summary["review_content_count"] == 1