Ver Fonte

refactor(v3-cleanup-B1): 砍画像调用链+V2 代码死支(-387 行)

- douyin 客户端不再为每条内容白调画像接口(真跑 19 条全调全失败的死成本);
  删 _fetch_content_portrait 及 5 个仅画像用助手、桥接键 pattern_recall/
  category_or_element_binding 写入(M3 已退役)
- builder 删画像分支(content_audience_profile 字段保留,判定写 fit_senior_50plus);
  evaluator 删恒 missing 的 age_50_plus_level;validation 删 recall_status==matched
  decode 时代死循环;dashboard 删 decode recalls 终态回退(V3 无 decode 事件)
- 测试: 删 3 个画像专属用例,搜索/作者用例加反向断言(请求清单仅 1 次+URL 后缀);
  326→323 passed,快照/指纹零重钉(diff 不含 tests/fixtures/snapshots/)
- 验收岗交叉核验: 施工单 7 项+防误删 7 项全 PASS

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
Sam Lee há 1 dia atrás
pai
commit
d17fc13a66

+ 1 - 21
content_agent/business_modules/content_discovery/content_discovery_builder.py

@@ -123,7 +123,6 @@ def _build_evidence_bundle(
     source_evidence = build_source_evidence(
         run_id, policy_run_id, discovered_content_item, result, source_context
     )
-    content_audience_profile = _build_content_audience_profile(result)
     return {
         "schema_version": EVIDENCE_BUNDLE_SCHEMA_VERSION,
         "source_evidence": source_evidence,
@@ -144,10 +143,6 @@ def _build_evidence_bundle(
             "platform_author_id": discovered_content_item.get("platform_author_id", ""),
         },
         "pattern_match_result": {
-            "pattern_recall": result.get("pattern_recall", "pattern_recall_pending"),
-            "category_or_element_binding": result.get(
-                "category_or_element_binding", "pattern_recall_pending"
-            ),
             "level": result.get("relevance_level", "related"),
             "score": result.get("score"),
             "platform_fit": result.get("platform_fit"),
@@ -161,7 +156,7 @@ def _build_evidence_bundle(
                 discovered_content_item["platform"],
             ),
         },
-        "content_audience_profile": content_audience_profile,
+        "content_audience_profile": {},
         "author_audience_profile": {
             "platform_author_id": discovered_content_item.get("platform_author_id", ""),
             "author": {
@@ -187,18 +182,3 @@ def _build_evidence_bundle(
             ),
         },
     }
-
-
-def _build_content_audience_profile(result: dict[str, Any]) -> dict[str, Any]:
-    if not result.get("portrait_available", False):
-        return {}
-    profile = {
-        "age_50_plus_level": result.get("age_50_plus_level", "missing"),
-        "portrait_available": True,
-    }
-    for field in ["age_distribution", "age_50_plus_ratio", "age_50_plus_tgi"]:
-        if field in result:
-            profile[field] = result[field]
-    if "age_50_plus_tgi" in result:
-        profile["tgi"] = result["age_50_plus_tgi"]
-    return profile

+ 0 - 2
content_agent/business_modules/rule_judgment/evaluator.py

@@ -115,7 +115,6 @@ def _build_decision(
     replay_marker: dict[str, Any],
 ) -> dict[str, Any]:
     content = bundle["content"]
-    audience_profile = bundle.get("content_audience_profile") or {}
     decision = {
         "record_schema_version": RUNTIME_RECORD_SCHEMA_VERSION,
         "run_id": run_id,
@@ -133,7 +132,6 @@ def _build_decision(
         "triggered_blocking_rules": triggered_blocking_rules,
         "scorecard": scorecard,
         "score": score,
-        "age_50_plus_level": audience_profile.get("age_50_plus_level", "missing"),
         "decision_action": decision_action,
         "decision_reason_code": decision_reason_code,
         "search_query_effect_status": search_query_effect_status,

+ 0 - 29
content_agent/business_modules/run_record/validation.py

@@ -491,35 +491,6 @@ def _check_pattern_recall_evidence(
 ) -> None:
     evidence_rows = data.get("pattern_recall_evidence.jsonl", [])
     evidence_by_id = {row.get("recall_evidence_id"): row for row in evidence_rows}
-    for row in evidence_rows:
-        recall_status = row.get("recall_status")
-        if recall_status == "matched":
-            missing = [
-                field
-                for field in [
-                    "recall_evidence_id",
-                    "matched_terms",
-                    "matched_category_paths",
-                ]
-                if not row.get(field)
-            ]
-            if missing:
-                _fail(
-                    findings,
-                    "pattern_recall_matched_missing_evidence",
-                    f"matched recall evidence missing {missing}",
-                )
-            if len(row.get("matched_category_paths") or []) > 1:
-                primary_path = (row.get("raw_payload") or {}).get("primary_matched_category_path")
-                if not primary_path:
-                    summary = row.get("evidence_summary") or {}
-                    primary_path = summary.get("primary_matched_category_path")
-                if not primary_path:
-                    _fail(
-                        findings,
-                        "pattern_recall_primary_path_missing",
-                        "matched multi-path recall evidence missing primary path",
-                    )
 
     for item in data.get("discovered_content_items.jsonl", []):
         pattern_match = item.get("pattern_match_result") or {}

+ 3 - 11
content_agent/dashboard_service.py

@@ -281,7 +281,6 @@ class DashboardService:
         run_event_rows = self._read_jsonl_optional(run_id, "run_events.jsonl")
         walk_action_rows = self._read_jsonl_optional(run_id, "walk_actions.jsonl")
         source_path_rows = self._read_jsonl_optional(run_id, "source_path_records.jsonl")
-        recall_rows = self._read_jsonl_optional(run_id, "pattern_recall_evidence.jsonl")
         events = [
             {
                 "source": "run_events.jsonl",
@@ -329,7 +328,7 @@ class DashboardService:
             "total": len(events),
             "data_origin": self._combined_origin(run_id),
             "summary": _timeline_summary(
-                run_event_rows, walk_action_rows, source_path_rows, recall_rows
+                run_event_rows, walk_action_rows, source_path_rows
             ),
         }
 
@@ -877,7 +876,6 @@ def _timeline_summary(
     events: list[dict[str, Any]],
     walk_actions: list[dict[str, Any]],
     source_paths: list[dict[str, Any]],
-    recalls: list[dict[str, Any]],
 ) -> dict[str, Any]:
     stage_duration_ms: dict[str, int] = {}
     error_counts: dict[str, int] = {}
@@ -935,14 +933,8 @@ def _timeline_summary(
         and action.get("walk_status") == "failed"
     )
 
-    # 只统计 decode 事件;run 内零 decode 事件(M6 前旧数据)才整体回退为 recalls 终态计数,不混合。
-    if decode_event_counts:
-        decode_status_counts = decode_event_counts
-    else:
-        decode_status_counts = {}
-        for recall in recalls:
-            status = str(recall.get("decode_status") or "unknown")
-            decode_status_counts[status] = decode_status_counts.get(status, 0) + 1
+    # V3 判定为 Gemini 直读,正常 run 无 decode 事件,此计数恒 {};仅当历史数据带 decode 事件时呈现。
+    decode_status_counts = decode_event_counts
 
     return {
         "total_duration_ms": total_duration_ms,

+ 1 - 130
content_agent/integrations/douyin.py

@@ -1,6 +1,5 @@
 from __future__ import annotations
 
-import re
 from pathlib import Path
 from typing import Any
 
@@ -37,7 +36,6 @@ class CrawapiDouyinClient:
         self,
         base_url: str,
         keyword_path: str,
-        content_portrait_path: str,
         blogger_path: str = "",
         detail_path: str = "",
         timeout_seconds: float = 60.0,
@@ -53,7 +51,6 @@ class CrawapiDouyinClient:
     ) -> None:
         self.base_url = base_url.rstrip("/") + "/"
         self.keyword_path = keyword_path.lstrip("/")
-        self.content_portrait_path = content_portrait_path.lstrip("/")
         self.blogger_path = blogger_path.lstrip("/")
         self.detail_path = detail_path.lstrip("/")
         self.timeout_seconds = timeout_seconds
@@ -73,9 +70,6 @@ class CrawapiDouyinClient:
         return cls(
             base_url=_env("CONTENTFIND_API_CRAWAPI_BASE_URL", env, required=True),
             keyword_path=_env("CONTENTFIND_DOUYIN_KEYWORD_PATH", env, required=True),
-            content_portrait_path=_env(
-                "CONTENTFIND_DOUYIN_VIDEO_LIKE_PORTRAIT_PATH", env, required=True
-            ),
             blogger_path=_env("CONTENTFIND_DOUYIN_BLOGGER_PATH", env, required=True),
             detail_path=_env(
                 "CONTENTFIND_DOUYIN_DETAIL_PATH", env, default="/crawler/dou_yin/detail"
@@ -118,10 +112,7 @@ class CrawapiDouyinClient:
         results: list[dict[str, Any]] = []
         selected_items = items[: self.max_results_per_query] if self.max_results_per_query else items
         for index, item in enumerate(selected_items, start=1):
-            normalized = self._normalize_content_item(query, item, index, has_more, next_cursor)
-            portrait = self._fetch_content_portrait(normalized["platform_content_id"])
-            normalized.update(portrait)
-            results.append(normalized)
+            results.append(self._normalize_content_item(query, item, index, has_more, next_cursor))
         return results
 
     def fetch_author_works(self, query: dict[str, Any]) -> list[dict[str, Any]]:
@@ -145,8 +136,6 @@ class CrawapiDouyinClient:
             normalized = self._normalize_content_item(query, item, index, has_more, next_cursor)
             normalized["previous_discovery_step"] = "author_works"
             normalized["content_metadata_source"] = "douyin_blogger"
-            portrait = self._fetch_content_portrait(normalized["platform_content_id"])
-            normalized.update(portrait)
             results.append(normalized)
         return results
 
@@ -186,8 +175,6 @@ class CrawapiDouyinClient:
             "next_cursor": next_cursor,
             "score": _score_from_statistics(statistics),
             "risk_level": "unknown",
-            "pattern_recall": "pattern_recall_pending",
-            "category_or_element_binding": "pattern_recall_pending",
             "discovery_relation": "derived_from_pattern_demand",
             "discovery_start_source": query["discovery_start_source"],
             "previous_discovery_step": "search_query_direct",
@@ -199,47 +186,6 @@ class CrawapiDouyinClient:
             },
         }
 
-    def _fetch_content_portrait(self, platform_content_id: str) -> dict[str, Any]:
-        data = None
-        for _ in range(2):
-            try:
-                data = self._post_json(
-                    self.content_portrait_path,
-                    {
-                        "content_id": platform_content_id,
-                        "need_age": True,
-                        "need_gender": True,
-                        "need_province": True,
-                        "need_city": False,
-                        "need_city_level": False,
-                        "need_phone_brand": False,
-                        "need_phone_price": False,
-                    },
-                    operation="content_portrait",
-                )
-                break
-            except RuntimeError:
-                continue
-        if data is None:
-            return {"portrait_available": False, "age_50_plus_level": "missing"}
-
-        portrait = _extract_portrait_dimensions(data)
-        age_distribution = _normalize_age_distribution(portrait.get("年龄"))
-        if not age_distribution:
-            return {"portrait_available": False, "age_50_plus_level": "missing"}
-
-        age_50_ratio = sum(row["percentage"] for row in age_distribution if row["is_50_plus"])
-        age_50_tgi = max(
-            [row["preference"] for row in age_distribution if row["is_50_plus"]] or [0.0]
-        )
-        return {
-            "portrait_available": True,
-            "age_50_plus_level": _age_level(age_50_ratio, age_50_tgi),
-            "age_distribution": age_distribution,
-            "age_50_plus_ratio": age_50_ratio,
-            "age_50_plus_tgi": age_50_tgi,
-        }
-
     def fetch_detail(self, content_id: str) -> dict[str, Any]:
         data = self._post_json(
             self.detail_path,
@@ -315,78 +261,3 @@ def _extract_tags(item: dict[str, Any]) -> list[str]:
         if isinstance(text, dict) and text.get("hashtag_name"):
             tags.append(f"#{text['hashtag_name']}")
     return list(dict.fromkeys(tags))
-
-
-def _normalize_age_distribution(age_data: Any) -> list[dict[str, Any]]:
-    rows: list[dict[str, Any]] = []
-    items = age_data.items() if isinstance(age_data, dict) else []
-    if isinstance(age_data, list):
-        items = [(row.get("name"), row) for row in age_data if isinstance(row, dict)]
-    for name, value in items:
-        metrics = value if isinstance(value, dict) else {}
-        label = str(name or metrics.get("name") or "")
-        if not label:
-            continue
-        rows.append(
-            {
-                "name": label,
-                "percentage": _to_float(metrics.get("percentage")),
-                "preference": _to_float(metrics.get("preference")),
-                "is_50_plus": _is_50_plus_label(label),
-            }
-        )
-    return rows
-
-
-def _extract_portrait_dimensions(data: dict[str, Any]) -> dict[str, Any]:
-    data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
-    content_blocks = [
-        data_block.get("data"),
-        data_block.get("portrait"),
-        data_block,
-        data,
-    ]
-    for content_block in content_blocks:
-        if not isinstance(content_block, dict):
-            continue
-        dimensions = content_block.get("dimensions")
-        if isinstance(dimensions, dict):
-            return dimensions
-        portrait = content_block.get("portrait")
-        if isinstance(portrait, dict) and isinstance(portrait.get("dimensions"), dict):
-            return portrait["dimensions"]
-        if "年龄" in content_block:
-            return content_block
-    return {}
-
-
-def _to_float(value: Any) -> float:
-    if value is None:
-        return 0.0
-    if isinstance(value, (int, float)):
-        return float(value)
-    text = str(value).strip().replace("%", "")
-    try:
-        parsed = float(text)
-    except ValueError:
-        return 0.0
-    return parsed / 100 if "%" in str(value) else parsed
-
-
-def _is_50_plus_label(label: str) -> bool:
-    if "50+" in label or "50以上" in label or "50-" in label or "老年" in label:
-        return True
-    numbers = [int(value) for value in re.findall(r"\d+", label)]
-    if not numbers:
-        return False
-    if "-" in label and numbers[0] < 50:
-        return False
-    return min(numbers) >= 50
-
-
-def _age_level(ratio: float, tgi: float) -> str:
-    if ratio >= 0.25 or tgi >= 130:
-        return "strong"
-    if ratio >= 0.1 or tgi >= 100:
-        return "medium"
-    return "weak"

+ 0 - 12
content_agent/integrations/mock_platform.py

@@ -24,12 +24,8 @@ class MockPlatformClient:
                     "statistics": {"digg_count": 9000, "comment_count": 800, "share_count": 700},
                     "tags": ["#人物故事"],
                     "score": 72,
-                    "age_50_plus_level": "medium",
                     "risk_level": "low",
                     "availability": "available",
-                    "portrait_available": True,
-                    "pattern_recall": "matched",
-                    "category_or_element_binding": "matched",
                     "discovery_relation": "mock_pattern_matched",
                     "discovery_start_source": "pattern_itemset",
                     "previous_discovery_step": "search_query_direct",
@@ -51,11 +47,7 @@ class MockPlatformClient:
                     "statistics": {"digg_count": 12, "comment_count": 0, "share_count": 1},
                     "tags": [],
                     "score": None,
-                    "age_50_plus_level": "missing",
                     "risk_level": "unknown",
-                    "portrait_available": False,
-                    "pattern_recall": "matched",
-                    "category_or_element_binding": "matched",
                     "discovery_relation": "mock_pattern_matched",
                     "discovery_start_source": "pattern_itemset",
                     "previous_discovery_step": "search_query_direct",
@@ -79,12 +71,8 @@ class MockPlatformClient:
                 "statistics": {"digg_count": 3000, "comment_count": 20, "share_count": 70},
                 "tags": ["#基层治理"],
                 "score": 55,
-                "age_50_plus_level": "medium",
                 "risk_level": "low",
                 "availability": "available",
-                "portrait_available": True,
-                "pattern_recall": "matched",
-                "category_or_element_binding": "matched",
                 "discovery_relation": "mock_pattern_matched",
                 "discovery_start_source": "pattern_itemset",
                 "previous_discovery_step": "search_query_direct",

+ 0 - 2
content_agent/integrations/shipinhao.py

@@ -87,8 +87,6 @@ def _normalize_shipinhao_item(
         "next_cursor": next_cursor,
         "score": score_from_statistics(statistics),
         "risk_level": "unknown",
-        "pattern_recall": "pattern_recall_pending",
-        "category_or_element_binding": "pattern_recall_pending",
         "discovery_relation": "derived_from_pattern_demand",
         "discovery_start_source": query["discovery_start_source"],
         "previous_discovery_step": "search_query_direct",

+ 0 - 4
tests/p6_walk_helpers.py

@@ -118,12 +118,8 @@ def _platform_result(
         "statistics": {"digg_count": 9000, "comment_count": 800, "share_count": 700},
         "tags": tags,
         "score": 72,
-        "age_50_plus_level": "medium",
         "risk_level": "low",
         "availability": "available",
-        "portrait_available": True,
-        "pattern_recall": "matched",
-        "category_or_element_binding": "matched",
         "discovery_relation": "fake_walk",
         "discovery_start_source": query.get("discovery_start_source", "pattern_itemset"),
         "previous_discovery_step": query.get("previous_discovery_step", "search_query_direct"),

+ 4 - 8
tests/test_case_replay.py

@@ -49,7 +49,7 @@ def _build_synthetic_corpus(cases_dir: Path, case_id: str, items: list[dict[str,
     )
 
 
-def _synthetic_item(content_id: str, *, age_level: str, digg: int) -> dict[str, Any]:
+def _synthetic_item(content_id: str, *, digg: int) -> dict[str, Any]:
     return {
         "content_discovery_id": f"syn_{content_id}",
         "search_query_id": "q_001",
@@ -62,12 +62,8 @@ def _synthetic_item(content_id: str, *, age_level: str, digg: int) -> dict[str,
         "statistics": {"digg_count": digg, "comment_count": 800, "share_count": 600},
         "tags": ["#中医养生"],
         "score": 85,
-        "age_50_plus_level": age_level,
         "risk_level": "low",
         "availability": "available",
-        "portrait_available": True,
-        "pattern_recall": "matched",
-        "category_or_element_binding": "matched",
         "discovery_start_source": "pattern_itemset",
         "previous_discovery_step": "search_query_direct",
         "content_metadata_source": "synthetic",
@@ -100,7 +96,7 @@ def test_replay_id45_baseline_gemini_score(tmp_path):
 
 
 def test_replay_synthetic_pool_case(tmp_path):
-    _build_synthetic_corpus(tmp_path / "cases", "syn_pool", [_synthetic_item("9000000000000000001", age_level="strong", digg=50000)])
+    _build_synthetic_corpus(tmp_path / "cases", "syn_pool", [_synthetic_item("9000000000000000001", digg=50000)])
     artifacts = replay_case("syn_pool", runtime_root=tmp_path / "rt", cases_dir=tmp_path / "cases")
     assert artifacts.state["status"] == "success"
     assert artifacts.summary["pooled_content_count"] >= 1
@@ -109,8 +105,8 @@ def test_replay_synthetic_pool_case(tmp_path):
 
 
 def test_replay_synthetic_review_case(tmp_path):
-    # Strong portrait but low engagement scores into the review band (60-69).
-    _build_synthetic_corpus(tmp_path / "cases", "syn_review", [_synthetic_item("9000000000000000002", age_level="strong", digg=500)])
+    # Low engagement scores into the review band (60-69).
+    _build_synthetic_corpus(tmp_path / "cases", "syn_review", [_synthetic_item("9000000000000000002", digg=500)])
     artifacts = replay_case("syn_review", runtime_root=tmp_path / "rt", cases_dir=tmp_path / "cases")
     assert artifacts.state["status"] == "success"
     assert artifacts.summary["review_content_count"] >= 1

+ 7 - 155
tests/test_douyin_client.py

@@ -36,7 +36,6 @@ def _client(responses, rate_limiter=None):
     return CrawapiDouyinClient(
         base_url="http://crawapi.test",
         keyword_path="/crawler/dou_yin/keyword",
-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
         blogger_path="/crawler/dou_yin/blogger",
         default_crawapi_account_ref="771431222",
         http_client=FakeHttpClient(responses),
@@ -52,7 +51,7 @@ def _search_query(text="早上好祝福视频"):
     }
 
 
-def test_douyin_keyword_search_maps_content_and_portrait_fields():
+def test_douyin_keyword_search_maps_content_fields():
     client = _client(
         [
             _response(
@@ -81,19 +80,6 @@ def test_douyin_keyword_search_maps_content_and_portrait_fields():
                     }
                 },
             ),
-            _response(
-                200,
-                {
-                    "data": {
-                        "data": {
-                            "年龄": {
-                                "50+": {"percentage": "18.00%", "preference": "135.0"},
-                                "31-40": {"percentage": "20.00%", "preference": "80.0"},
-                            }
-                        }
-                    }
-                },
-            ),
         ]
     )
 
@@ -107,14 +93,13 @@ def test_douyin_keyword_search_maps_content_and_portrait_fields():
     assert result["tags"] == ["#早上好"]
     assert result["has_more"] is True
     assert result["next_cursor"] == "10"
-    assert result["portrait_available"] is True
-    assert result["age_50_plus_level"] == "strong"
-    assert result["pattern_recall"] == "pattern_recall_pending"
     assert result["discovery_relation"] == "derived_from_pattern_demand"
     assert result["platform_auth_mode"] == "no_bearer"
     assert result["platform_raw_payload"][RAW_CONTENT_ID_KEY] == "7615247738577423622"
     assert client.http_client.requests[0]["json"][RAW_AUTHOR_ACCOUNT_KEY] == "771431222"
-    assert len(client.http_client.requests) == 2
+    # V3 清理: 画像调用链已砍,搜索一条内容只发 1 次 keyword 请求,不再追加画像请求。
+    assert len(client.http_client.requests) == 1
+    assert client.http_client.requests[0]["url"].endswith("/crawler/dou_yin/keyword")
 
 
 def test_douyin_keyword_search_returns_empty_list():
@@ -155,7 +140,6 @@ def test_douyin_fetch_author_works_maps_fake_response():
                     }
                 },
             ),
-            _response(200, {"data": {"data": {"年龄": {}}}}),
         ]
     )
 
@@ -172,6 +156,7 @@ def test_douyin_fetch_author_works_maps_fake_response():
     assert results[0]["search_query_id"] == "author_001"
     assert results[0]["previous_discovery_step"] == "author_works"
     assert client.http_client.requests[0]["json"][RAW_AUTHOR_ACCOUNT_KEY] == "MS4wLjABAAAA001"
+    assert len(client.http_client.requests) == 1
 
 
 def test_douyin_keyword_search_http_error_is_sanitized():
@@ -199,7 +184,6 @@ def test_douyin_keyword_search_bad_json_is_sanitized():
     client = CrawapiDouyinClient(
         base_url="http://crawapi.test",
         keyword_path="/crawler/dou_yin/keyword",
-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
         http_client=FakeHttpClient(
             [
                 httpx.Response(
@@ -215,90 +199,10 @@ def test_douyin_keyword_search_bad_json_is_sanitized():
         client.search(_search_query("坏 JSON"))
 
 
-def test_douyin_portrait_http_error_retries_before_missing():
-    client = _client(
-        [
-            _response(
-                200,
-                {
-                    "data": {
-                        "data": [
-                            {
-                                RAW_CONTENT_ID_KEY: "7615247738577423622",
-                                "desc": "早上好",
-                                "author": {
-                                    "nickname": "作者",
-                                    RAW_AUTHOR_ID_KEY: "MS4wLjABAAAA001",
-                                },
-                                "statistics": {"digg_count": 1},
-                            }
-                        ]
-                    }
-                },
-            ),
-            _response(500, {"error": "portrait failed"}),
-            _response(500, {"error": "portrait failed again"}),
-        ]
-    )
-
-    result = client.search(_search_query("早上好"))[0]
-
-    assert result["portrait_available"] is False
-    assert result["age_50_plus_level"] == "missing"
-    assert len(client.http_client.requests) == 3
-
-
-def test_douyin_portrait_retry_can_recover():
-    client = _client(
-        [
-            _response(
-                200,
-                {
-                    "data": {
-                        "data": [
-                            {
-                                RAW_CONTENT_ID_KEY: "7615247738577423622",
-                                "desc": "早上好",
-                                "author": {
-                                    "nickname": "作者",
-                                    RAW_AUTHOR_ID_KEY: "MS4wLjABAAAA001",
-                                },
-                                "statistics": {"digg_count": 1},
-                            }
-                        ],
-                        "has_more": True,
-                        "next_cursor": "10",
-                    }
-                },
-            ),
-            _response(500, {"error": "portrait failed"}),
-            _response(
-                200,
-                {
-                    "data": {
-                        "data": {
-                            "年龄": {
-                                "50+": {"percentage": "18.00%", "preference": "135.0"}
-                            }
-                        }
-                    }
-                },
-            ),
-        ]
-    )
-
-    result = client.search(_search_query("早上好"))[0]
-
-    assert result["portrait_available"] is True
-    assert result["age_50_plus_level"] == "strong"
-    assert len(client.http_client.requests) == 3
-
-
 def test_douyin_keyword_search_can_limit_results_per_query():
     client = CrawapiDouyinClient(
         base_url="http://crawapi.test",
         keyword_path="/crawler/dou_yin/keyword",
-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
         max_results_per_query=1,
         http_client=FakeHttpClient(
             [
@@ -313,7 +217,6 @@ def test_douyin_keyword_search_can_limit_results_per_query():
                         }
                     },
                 ),
-                _response(200, {"data": {"data": {"年龄": {}}}}),
             ]
         ),
     )
@@ -321,54 +224,7 @@ def test_douyin_keyword_search_can_limit_results_per_query():
     results = client.search(_search_query("限量"))
 
     assert [result["platform_content_id"] for result in results] == ["1"]
-
-
-def test_douyin_portrait_supports_dimensions_shape_and_excludes_41_to_50():
-    client = _client(
-        [
-            _response(
-                200,
-                {
-                    "data": {
-                        "data": [
-                            {
-                                RAW_CONTENT_ID_KEY: "7635992906608060495",
-                                "desc": "高考加油",
-                                "author": {
-                                    "nickname": "一个富贵",
-                                    RAW_AUTHOR_ID_KEY: "MS4wLjABAAAA001",
-                                },
-                                "statistics": {"digg_count": 100},
-                            }
-                        ]
-                    }
-                },
-            ),
-            _response(
-                200,
-                {
-                    "data": {
-                        "data": {
-                            "dimensions": {
-                                "年龄": [
-                                    {"name": "41-50", "percentage": "30.00%", "preference": "150.0"},
-                                    {"name": "50-", "percentage": "4.83%", "preference": "13.80"},
-                                ]
-                            }
-                        }
-                    }
-                },
-            ),
-        ]
-    )
-
-    result = client.search(_search_query("高考加油"))[0]
-
-    assert result["portrait_available"] is True
-    assert result["age_50_plus_ratio"] == 0.0483
-    assert result["age_50_plus_level"] == "weak"
-    assert result["age_distribution"][0]["is_50_plus"] is False
-    assert result["age_distribution"][1]["is_50_plus"] is True
+    assert len(client.http_client.requests) == 1
 
 
 def _author_query(author_id="MS4wLjABAAAA001", **extra):
@@ -442,7 +298,6 @@ def test_fetch_author_works_normalizes_author_work_fields():
                     }
                 ]
             ),
-            _response(200, {"data": {"data": {"年龄": {}}}}),
         ]
     )
 
@@ -454,15 +309,12 @@ def test_fetch_author_works_normalizes_author_work_fields():
     assert results[0]["create_time"] == 1733000000
     assert results[0]["previous_discovery_step"] == "author_works"
     assert results[0]["content_metadata_source"] == "douyin_blogger"
+    assert len(client.http_client.requests) == 1
 
 
 def test_from_env_reads_blogger_path_and_sort_type(monkeypatch, tmp_path):
     monkeypatch.setenv("CONTENTFIND_API_CRAWAPI_BASE_URL", "http://crawapi.test")
     monkeypatch.setenv("CONTENTFIND_DOUYIN_KEYWORD_PATH", "/crawler/dou_yin/keyword")
-    monkeypatch.setenv(
-        "CONTENTFIND_DOUYIN_VIDEO_LIKE_PORTRAIT_PATH",
-        "/crawler/dou_yin/re_dian_bao/video_like_portrait",
-    )
     monkeypatch.setenv("CONTENTFIND_DOUYIN_BLOGGER_PATH", "/crawler/dou_yin/blogger")
     monkeypatch.setenv("CONTENTFIND_DOUYIN_ACCOUNT_WORKS_DEFAULT_SORT_TYPE", "最热")
 

+ 0 - 1
tests/test_douyin_detail.py

@@ -23,7 +23,6 @@ def _client(responses):
     return CrawapiDouyinClient(
         base_url="http://crawapi.test",
         keyword_path="/crawler/dou_yin/keyword",
-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
         blogger_path="/crawler/dou_yin/blogger",
         detail_path="/crawler/dou_yin/detail",
         http_client=FakeHttpClient(responses),

+ 0 - 1
tests/test_dual_channel_normalization.py

@@ -19,7 +19,6 @@ def _douyin_client():
     return CrawapiDouyinClient(
         base_url="http://crawapi.test",
         keyword_path="/k",
-        content_portrait_path="/p",
         blogger_path="/b",
         detail_path="/d",
         http_client=object(),

+ 8 - 7
tests/test_run_timeline_observability.py

@@ -98,7 +98,7 @@ def test_timeline_summary_counts_query_failures():
         {"edge_id": "query_next_page", "walk_status": "success"},
     ]
 
-    summary = _timeline_summary([], walk_actions, [], [])
+    summary = _timeline_summary([], walk_actions, [])
 
     assert summary["query_failure_count"] == 2
 
@@ -110,7 +110,7 @@ def test_timeline_summary_counts_platform_rate_limited():
         {"event_type": "platform_query_failed", "error_code": "PLATFORM_REQUEST_FAILED"},
     ]
 
-    summary = _timeline_summary(events, [], [], [])
+    summary = _timeline_summary(events, [], [])
 
     assert summary["platform_rate_limited_count"] == 2
     assert summary["error_counts"] == {
@@ -127,12 +127,13 @@ def test_timeline_summary_counts_decode_statuses():
         {"event_type": "decode_succeeded"},
     ]
 
-    summary = _timeline_summary(events, [], [], [{"decode_status": "pending"}])
-    # 有 decode 事件时只统计事件,不与 recalls 终态混合
+    summary = _timeline_summary(events, [], [])
+    # 历史数据带 decode 事件时按事件计数呈现
     assert summary["decode_status_counts"] == {"submitted": 1, "polling": 2, "succeeded": 1}
 
-    fallback = _timeline_summary([], [], [], [{"decode_status": "pending"}, {"decode_status": "success"}])
-    assert fallback["decode_status_counts"] == {"pending": 1, "success": 1}
+    # V3 清理: recalls 终态回退已删,无 decode 事件时计数恒为空。
+    no_decode = _timeline_summary([], [], [])
+    assert no_decode["decode_status_counts"] == {}
 
 
 def test_timeline_summary_counts_walk_statuses():
@@ -142,7 +143,7 @@ def test_timeline_summary_counts_walk_statuses():
         {"edge_id": "hashtag_to_query", "walk_status": "skipped"},
     ]
 
-    summary = _timeline_summary([], walk_actions, [], [])
+    summary = _timeline_summary([], walk_actions, [])
 
     assert summary["walk_status_counts"] == {"success": 1, "skipped": 2}
 

+ 1 - 4
tests/test_runtime_files.py

@@ -320,10 +320,7 @@ def test_runtime_validation_catches_missing_pattern_recall_evidence(tmp_path):
     validation = service.validate_run(run_id)
     assert validation["status"] == "fail"
     assert any(
-        finding["check_id"] in {
-            "pattern_recall_evidence_missing",
-            "pattern_recall_matched_missing_evidence",
-        }
+        finding["check_id"] == "pattern_recall_evidence_missing"
         for finding in validation["findings"]
     )