1 ヶ月前 · d17fc13a66
--- a/content_agent/business_modules/content_discovery/content_discovery_builder.py
+++ b/content_agent/business_modules/content_discovery/content_discovery_builder.py
@@ -123,7 +123,6 @@ def _build_evidence_bundle(
 
				     source_evidence = build_source_evidence(
			
 
				         run_id, policy_run_id, discovered_content_item, result, source_context
			
 
				     )
			
 
				-    content_audience_profile = _build_content_audience_profile(result)
			
 
				     return {
			
 
				         "schema_version": EVIDENCE_BUNDLE_SCHEMA_VERSION,
			
 
				         "source_evidence": source_evidence,
			
@@ -144,10 +143,6 @@ def _build_evidence_bundle(
 
				             "platform_author_id": discovered_content_item.get("platform_author_id", ""),
			
 
				         },
			
 
				         "pattern_match_result": {
			
 
				-            "pattern_recall": result.get("pattern_recall", "pattern_recall_pending"),
			
 
				-            "category_or_element_binding": result.get(
			
 
				-                "category_or_element_binding", "pattern_recall_pending"
			
 
				-            ),
			
 
				             "level": result.get("relevance_level", "related"),
			
 
				             "score": result.get("score"),
			
 
				             "platform_fit": result.get("platform_fit"),
			
@@ -161,7 +156,7 @@ def _build_evidence_bundle(
 
				                 discovered_content_item["platform"],
			
 
				             ),
			
 
				         },
			
 
				-        "content_audience_profile": content_audience_profile,
			
 
				+        "content_audience_profile": {},
			
 
				         "author_audience_profile": {
			
 
				             "platform_author_id": discovered_content_item.get("platform_author_id", ""),
			
 
				             "author": {
			
@@ -187,18 +182,3 @@ def _build_evidence_bundle(
 
				             ),
			
 
				         },
			
 
				     }
			
 
				-
			
 
				-
			
 
				-def _build_content_audience_profile(result: dict[str, Any]) -> dict[str, Any]:
			
 
				-    if not result.get("portrait_available", False):
			
 
				-        return {}
			
 
				-    profile = {
			
 
				-        "age_50_plus_level": result.get("age_50_plus_level", "missing"),
			
 
				-        "portrait_available": True,
			
 
				-    }
			
 
				-    for field in ["age_distribution", "age_50_plus_ratio", "age_50_plus_tgi"]:
			
 
				-        if field in result:
			
 
				-            profile[field] = result[field]
			
 
				-    if "age_50_plus_tgi" in result:
			
 
				-        profile["tgi"] = result["age_50_plus_tgi"]
			
 
				-    return profile
			
--- a/content_agent/business_modules/rule_judgment/evaluator.py
+++ b/content_agent/business_modules/rule_judgment/evaluator.py
@@ -115,7 +115,6 @@ def _build_decision(
 
				     replay_marker: dict[str, Any],
			
 
				 ) -> dict[str, Any]:
			
 
				     content = bundle["content"]
			
 
				-    audience_profile = bundle.get("content_audience_profile") or {}
			
 
				     decision = {
			
 
				         "record_schema_version": RUNTIME_RECORD_SCHEMA_VERSION,
			
 
				         "run_id": run_id,
			
@@ -133,7 +132,6 @@ def _build_decision(
 
				         "triggered_blocking_rules": triggered_blocking_rules,
			
 
				         "scorecard": scorecard,
			
 
				         "score": score,
			
 
				-        "age_50_plus_level": audience_profile.get("age_50_plus_level", "missing"),
			
 
				         "decision_action": decision_action,
			
 
				         "decision_reason_code": decision_reason_code,
			
 
				         "search_query_effect_status": search_query_effect_status,
			
--- a/content_agent/business_modules/run_record/validation.py
+++ b/content_agent/business_modules/run_record/validation.py
@@ -491,35 +491,6 @@ def _check_pattern_recall_evidence(
 
				 ) -> None:
			
 
				     evidence_rows = data.get("pattern_recall_evidence.jsonl", [])
			
 
				     evidence_by_id = {row.get("recall_evidence_id"): row for row in evidence_rows}
			
 
				-    for row in evidence_rows:
			
 
				-        recall_status = row.get("recall_status")
			
 
				-        if recall_status == "matched":
			
 
				-            missing = [
			
 
				-                field
			
 
				-                for field in [
			
 
				-                    "recall_evidence_id",
			
 
				-                    "matched_terms",
			
 
				-                    "matched_category_paths",
			
 
				-                ]
			
 
				-                if not row.get(field)
			
 
				-            ]
			
 
				-            if missing:
			
 
				-                _fail(
			
 
				-                    findings,
			
 
				-                    "pattern_recall_matched_missing_evidence",
			
 
				-                    f"matched recall evidence missing {missing}",
			
 
				-                )
			
 
				-            if len(row.get("matched_category_paths") or []) > 1:
			
 
				-                primary_path = (row.get("raw_payload") or {}).get("primary_matched_category_path")
			
 
				-                if not primary_path:
			
 
				-                    summary = row.get("evidence_summary") or {}
			
 
				-                    primary_path = summary.get("primary_matched_category_path")
			
 
				-                if not primary_path:
			
 
				-                    _fail(
			
 
				-                        findings,
			
 
				-                        "pattern_recall_primary_path_missing",
			
 
				-                        "matched multi-path recall evidence missing primary path",
			
 
				-                    )
			
 
				 
			
 
				     for item in data.get("discovered_content_items.jsonl", []):
			
 
				         pattern_match = item.get("pattern_match_result") or {}
			
--- a/content_agent/dashboard_service.py
+++ b/content_agent/dashboard_service.py
@@ -281,7 +281,6 @@ class DashboardService:
 
				         run_event_rows = self._read_jsonl_optional(run_id, "run_events.jsonl")
			
 
				         walk_action_rows = self._read_jsonl_optional(run_id, "walk_actions.jsonl")
			
 
				         source_path_rows = self._read_jsonl_optional(run_id, "source_path_records.jsonl")
			
 
				-        recall_rows = self._read_jsonl_optional(run_id, "pattern_recall_evidence.jsonl")
			
 
				         events = [
			
 
				             {
			
 
				                 "source": "run_events.jsonl",
			
@@ -329,7 +328,7 @@ class DashboardService:
 
				             "total": len(events),
			
 
				             "data_origin": self._combined_origin(run_id),
			
 
				             "summary": _timeline_summary(
			
 
				-                run_event_rows, walk_action_rows, source_path_rows, recall_rows
			
 
				+                run_event_rows, walk_action_rows, source_path_rows
			
 
				             ),
			
 
				         }
			
 
				 
			
@@ -877,7 +876,6 @@ def _timeline_summary(
 
				     events: list[dict[str, Any]],
			
 
				     walk_actions: list[dict[str, Any]],
			
 
				     source_paths: list[dict[str, Any]],
			
 
				-    recalls: list[dict[str, Any]],
			
 
				 ) -> dict[str, Any]:
			
 
				     stage_duration_ms: dict[str, int] = {}
			
 
				     error_counts: dict[str, int] = {}
			
@@ -935,14 +933,8 @@ def _timeline_summary(
 
				         and action.get("walk_status") == "failed"
			
 
				     )
			
 
				 
			
 
				-    # 只统计 decode 事件;run 内零 decode 事件(M6 前旧数据)才整体回退为 recalls 终态计数,不混合。
			
 
				-    if decode_event_counts:
			
 
				-        decode_status_counts = decode_event_counts
			
 
				-    else:
			
 
				-        decode_status_counts = {}
			
 
				-        for recall in recalls:
			
 
				-            status = str(recall.get("decode_status") or "unknown")
			
 
				-            decode_status_counts[status] = decode_status_counts.get(status, 0) + 1
			
 
				+    # V3 判定为 Gemini 直读,正常 run 无 decode 事件,此计数恒 {};仅当历史数据带 decode 事件时呈现。
			
 
				+    decode_status_counts = decode_event_counts
			
 
				 
			
 
				     return {
			
 
				         "total_duration_ms": total_duration_ms,
			
--- a/content_agent/integrations/douyin.py
+++ b/content_agent/integrations/douyin.py
@@ -1,6 +1,5 @@
 
				 from __future__ import annotations
			
 
				 
			
 
				-import re
			
 
				 from pathlib import Path
			
 
				 from typing import Any
			
 
				 
			
@@ -37,7 +36,6 @@ class CrawapiDouyinClient:
 
				         self,
			
 
				         base_url: str,
			
 
				         keyword_path: str,
			
 
				-        content_portrait_path: str,
			
 
				         blogger_path: str = "",
			
 
				         detail_path: str = "",
			
 
				         timeout_seconds: float = 60.0,
			
@@ -53,7 +51,6 @@ class CrawapiDouyinClient:
 
				     ) -> None:
			
 
				         self.base_url = base_url.rstrip("/") + "/"
			
 
				         self.keyword_path = keyword_path.lstrip("/")
			
 
				-        self.content_portrait_path = content_portrait_path.lstrip("/")
			
 
				         self.blogger_path = blogger_path.lstrip("/")
			
 
				         self.detail_path = detail_path.lstrip("/")
			
 
				         self.timeout_seconds = timeout_seconds
			
@@ -73,9 +70,6 @@ class CrawapiDouyinClient:
 
				         return cls(
			
 
				             base_url=_env("CONTENTFIND_API_CRAWAPI_BASE_URL", env, required=True),
			
 
				             keyword_path=_env("CONTENTFIND_DOUYIN_KEYWORD_PATH", env, required=True),
			
 
				-            content_portrait_path=_env(
			
 
				-                "CONTENTFIND_DOUYIN_VIDEO_LIKE_PORTRAIT_PATH", env, required=True
			
 
				-            ),
			
 
				             blogger_path=_env("CONTENTFIND_DOUYIN_BLOGGER_PATH", env, required=True),
			
 
				             detail_path=_env(
			
 
				                 "CONTENTFIND_DOUYIN_DETAIL_PATH", env, default="/crawler/dou_yin/detail"
			
@@ -118,10 +112,7 @@ class CrawapiDouyinClient:
 
				         results: list[dict[str, Any]] = []
			
 
				         selected_items = items[: self.max_results_per_query] if self.max_results_per_query else items
			
 
				         for index, item in enumerate(selected_items, start=1):
			
 
				-            normalized = self._normalize_content_item(query, item, index, has_more, next_cursor)
			
 
				-            portrait = self._fetch_content_portrait(normalized["platform_content_id"])
			
 
				-            normalized.update(portrait)
			
 
				-            results.append(normalized)
			
 
				+            results.append(self._normalize_content_item(query, item, index, has_more, next_cursor))
			
 
				         return results
			
 
				 
			
 
				     def fetch_author_works(self, query: dict[str, Any]) -> list[dict[str, Any]]:
			
@@ -145,8 +136,6 @@ class CrawapiDouyinClient:
 
				             normalized = self._normalize_content_item(query, item, index, has_more, next_cursor)
			
 
				             normalized["previous_discovery_step"] = "author_works"
			
 
				             normalized["content_metadata_source"] = "douyin_blogger"
			
 
				-            portrait = self._fetch_content_portrait(normalized["platform_content_id"])
			
 
				-            normalized.update(portrait)
			
 
				             results.append(normalized)
			
 
				         return results
			
 
				 
			
@@ -186,8 +175,6 @@ class CrawapiDouyinClient:
 
				             "next_cursor": next_cursor,
			
 
				             "score": _score_from_statistics(statistics),
			
 
				             "risk_level": "unknown",
			
 
				-            "pattern_recall": "pattern_recall_pending",
			
 
				-            "category_or_element_binding": "pattern_recall_pending",
			
 
				             "discovery_relation": "derived_from_pattern_demand",
			
 
				             "discovery_start_source": query["discovery_start_source"],
			
 
				             "previous_discovery_step": "search_query_direct",
			
@@ -199,47 +186,6 @@ class CrawapiDouyinClient:
 
				             },
			
 
				         }
			
 
				 
			
 
				-    def _fetch_content_portrait(self, platform_content_id: str) -> dict[str, Any]:
			
 
				-        data = None
			
 
				-        for _ in range(2):
			
 
				-            try:
			
 
				-                data = self._post_json(
			
 
				-                    self.content_portrait_path,
			
 
				-                    {
			
 
				-                        "content_id": platform_content_id,
			
 
				-                        "need_age": True,
			
 
				-                        "need_gender": True,
			
 
				-                        "need_province": True,
			
 
				-                        "need_city": False,
			
 
				-                        "need_city_level": False,
			
 
				-                        "need_phone_brand": False,
			
 
				-                        "need_phone_price": False,
			
 
				-                    },
			
 
				-                    operation="content_portrait",
			
 
				-                )
			
 
				-                break
			
 
				-            except RuntimeError:
			
 
				-                continue
			
 
				-        if data is None:
			
 
				-            return {"portrait_available": False, "age_50_plus_level": "missing"}
			
 
				-
			
 
				-        portrait = _extract_portrait_dimensions(data)
			
 
				-        age_distribution = _normalize_age_distribution(portrait.get("年龄"))
			
 
				-        if not age_distribution:
			
 
				-            return {"portrait_available": False, "age_50_plus_level": "missing"}
			
 
				-
			
 
				-        age_50_ratio = sum(row["percentage"] for row in age_distribution if row["is_50_plus"])
			
 
				-        age_50_tgi = max(
			
 
				-            [row["preference"] for row in age_distribution if row["is_50_plus"]] or [0.0]
			
 
				-        )
			
 
				-        return {
			
 
				-            "portrait_available": True,
			
 
				-            "age_50_plus_level": _age_level(age_50_ratio, age_50_tgi),
			
 
				-            "age_distribution": age_distribution,
			
 
				-            "age_50_plus_ratio": age_50_ratio,
			
 
				-            "age_50_plus_tgi": age_50_tgi,
			
 
				-        }
			
 
				-
			
 
				     def fetch_detail(self, content_id: str) -> dict[str, Any]:
			
 
				         data = self._post_json(
			
 
				             self.detail_path,
			
@@ -315,78 +261,3 @@ def _extract_tags(item: dict[str, Any]) -> list[str]:
 
				         if isinstance(text, dict) and text.get("hashtag_name"):
			
 
				             tags.append(f"#{text['hashtag_name']}")
			
 
				     return list(dict.fromkeys(tags))
			
 
				-
			
 
				-
			
 
				-def _normalize_age_distribution(age_data: Any) -> list[dict[str, Any]]:
			
 
				-    rows: list[dict[str, Any]] = []
			
 
				-    items = age_data.items() if isinstance(age_data, dict) else []
			
 
				-    if isinstance(age_data, list):
			
 
				-        items = [(row.get("name"), row) for row in age_data if isinstance(row, dict)]
			
 
				-    for name, value in items:
			
 
				-        metrics = value if isinstance(value, dict) else {}
			
 
				-        label = str(name or metrics.get("name") or "")
			
 
				-        if not label:
			
 
				-            continue
			
 
				-        rows.append(
			
 
				-            {
			
 
				-                "name": label,
			
 
				-                "percentage": _to_float(metrics.get("percentage")),
			
 
				-                "preference": _to_float(metrics.get("preference")),
			
 
				-                "is_50_plus": _is_50_plus_label(label),
			
 
				-            }
			
 
				-        )
			
 
				-    return rows
			
 
				-
			
 
				-
			
 
				-def _extract_portrait_dimensions(data: dict[str, Any]) -> dict[str, Any]:
			
 
				-    data_block = data.get("data", {}) if isinstance(data.get("data"), dict) else {}
			
 
				-    content_blocks = [
			
 
				-        data_block.get("data"),
			
 
				-        data_block.get("portrait"),
			
 
				-        data_block,
			
 
				-        data,
			
 
				-    ]
			
 
				-    for content_block in content_blocks:
			
 
				-        if not isinstance(content_block, dict):
			
 
				-            continue
			
 
				-        dimensions = content_block.get("dimensions")
			
 
				-        if isinstance(dimensions, dict):
			
 
				-            return dimensions
			
 
				-        portrait = content_block.get("portrait")
			
 
				-        if isinstance(portrait, dict) and isinstance(portrait.get("dimensions"), dict):
			
 
				-            return portrait["dimensions"]
			
 
				-        if "年龄" in content_block:
			
 
				-            return content_block
			
 
				-    return {}
			
 
				-
			
 
				-
			
 
				-def _to_float(value: Any) -> float:
			
 
				-    if value is None:
			
 
				-        return 0.0
			
 
				-    if isinstance(value, (int, float)):
			
 
				-        return float(value)
			
 
				-    text = str(value).strip().replace("%", "")
			
 
				-    try:
			
 
				-        parsed = float(text)
			
 
				-    except ValueError:
			
 
				-        return 0.0
			
 
				-    return parsed / 100 if "%" in str(value) else parsed
			
 
				-
			
 
				-
			
 
				-def _is_50_plus_label(label: str) -> bool:
			
 
				-    if "50+" in label or "50以上" in label or "50-" in label or "老年" in label:
			
 
				-        return True
			
 
				-    numbers = [int(value) for value in re.findall(r"\d+", label)]
			
 
				-    if not numbers:
			
 
				-        return False
			
 
				-    if "-" in label and numbers[0] < 50:
			
 
				-        return False
			
 
				-    return min(numbers) >= 50
			
 
				-
			
 
				-
			
 
				-def _age_level(ratio: float, tgi: float) -> str:
			
 
				-    if ratio >= 0.25 or tgi >= 130:
			
 
				-        return "strong"
			
 
				-    if ratio >= 0.1 or tgi >= 100:
			
 
				-        return "medium"
			
 
				-    return "weak"
			
--- a/content_agent/integrations/mock_platform.py
+++ b/content_agent/integrations/mock_platform.py
@@ -24,12 +24,8 @@ class MockPlatformClient:
 
				                     "statistics": {"digg_count": 9000, "comment_count": 800, "share_count": 700},
			
 
				                     "tags": ["#人物故事"],
			
 
				                     "score": 72,
			
 
				-                    "age_50_plus_level": "medium",
			
 
				                     "risk_level": "low",
			
 
				                     "availability": "available",
			
 
				-                    "portrait_available": True,
			
 
				-                    "pattern_recall": "matched",
			
 
				-                    "category_or_element_binding": "matched",
			
 
				                     "discovery_relation": "mock_pattern_matched",
			
 
				                     "discovery_start_source": "pattern_itemset",
			
 
				                     "previous_discovery_step": "search_query_direct",
			
@@ -51,11 +47,7 @@ class MockPlatformClient:
 
				                     "statistics": {"digg_count": 12, "comment_count": 0, "share_count": 1},
			
 
				                     "tags": [],
			
 
				                     "score": None,
			
 
				-                    "age_50_plus_level": "missing",
			
 
				                     "risk_level": "unknown",
			
 
				-                    "portrait_available": False,
			
 
				-                    "pattern_recall": "matched",
			
 
				-                    "category_or_element_binding": "matched",
			
 
				                     "discovery_relation": "mock_pattern_matched",
			
 
				                     "discovery_start_source": "pattern_itemset",
			
 
				                     "previous_discovery_step": "search_query_direct",
			
@@ -79,12 +71,8 @@ class MockPlatformClient:
 
				                 "statistics": {"digg_count": 3000, "comment_count": 20, "share_count": 70},
			
 
				                 "tags": ["#基层治理"],
			
 
				                 "score": 55,
			
 
				-                "age_50_plus_level": "medium",
			
 
				                 "risk_level": "low",
			
 
				                 "availability": "available",
			
 
				-                "portrait_available": True,
			
 
				-                "pattern_recall": "matched",
			
 
				-                "category_or_element_binding": "matched",
			
 
				                 "discovery_relation": "mock_pattern_matched",
			
 
				                 "discovery_start_source": "pattern_itemset",
			
 
				                 "previous_discovery_step": "search_query_direct",
			
--- a/content_agent/integrations/shipinhao.py
+++ b/content_agent/integrations/shipinhao.py
@@ -87,8 +87,6 @@ def _normalize_shipinhao_item(
 
				         "next_cursor": next_cursor,
			
 
				         "score": score_from_statistics(statistics),
			
 
				         "risk_level": "unknown",
			
 
				-        "pattern_recall": "pattern_recall_pending",
			
 
				-        "category_or_element_binding": "pattern_recall_pending",
			
 
				         "discovery_relation": "derived_from_pattern_demand",
			
 
				         "discovery_start_source": query["discovery_start_source"],
			
 
				         "previous_discovery_step": "search_query_direct",
			
--- a/tests/p6_walk_helpers.py
+++ b/tests/p6_walk_helpers.py
@@ -118,12 +118,8 @@ def _platform_result(
 
				         "statistics": {"digg_count": 9000, "comment_count": 800, "share_count": 700},
			
 
				         "tags": tags,
			
 
				         "score": 72,
			
 
				-        "age_50_plus_level": "medium",
			
 
				         "risk_level": "low",
			
 
				         "availability": "available",
			
 
				-        "portrait_available": True,
			
 
				-        "pattern_recall": "matched",
			
 
				-        "category_or_element_binding": "matched",
			
 
				         "discovery_relation": "fake_walk",
			
 
				         "discovery_start_source": query.get("discovery_start_source", "pattern_itemset"),
			
 
				         "previous_discovery_step": query.get("previous_discovery_step", "search_query_direct"),
			
--- a/tests/test_case_replay.py
+++ b/tests/test_case_replay.py
@@ -49,7 +49,7 @@ def _build_synthetic_corpus(cases_dir: Path, case_id: str, items: list[dict[str,
 
				     )
			
 
				 
			
 
				 
			
 
				-def _synthetic_item(content_id: str, *, age_level: str, digg: int) -> dict[str, Any]:
			
 
				+def _synthetic_item(content_id: str, *, digg: int) -> dict[str, Any]:
			
 
				     return {
			
 
				         "content_discovery_id": f"syn_{content_id}",
			
 
				         "search_query_id": "q_001",
			
@@ -62,12 +62,8 @@ def _synthetic_item(content_id: str, *, age_level: str, digg: int) -> dict[str,
 
				         "statistics": {"digg_count": digg, "comment_count": 800, "share_count": 600},
			
 
				         "tags": ["#中医养生"],
			
 
				         "score": 85,
			
 
				-        "age_50_plus_level": age_level,
			
 
				         "risk_level": "low",
			
 
				         "availability": "available",
			
 
				-        "portrait_available": True,
			
 
				-        "pattern_recall": "matched",
			
 
				-        "category_or_element_binding": "matched",
			
 
				         "discovery_start_source": "pattern_itemset",
			
 
				         "previous_discovery_step": "search_query_direct",
			
 
				         "content_metadata_source": "synthetic",
			
@@ -100,7 +96,7 @@ def test_replay_id45_baseline_gemini_score(tmp_path):
 
				 
			
 
				 
			
 
				 def test_replay_synthetic_pool_case(tmp_path):
			
 
				-    _build_synthetic_corpus(tmp_path / "cases", "syn_pool", [_synthetic_item("9000000000000000001", age_level="strong", digg=50000)])
			
 
				+    _build_synthetic_corpus(tmp_path / "cases", "syn_pool", [_synthetic_item("9000000000000000001", digg=50000)])
			
 
				     artifacts = replay_case("syn_pool", runtime_root=tmp_path / "rt", cases_dir=tmp_path / "cases")
			
 
				     assert artifacts.state["status"] == "success"
			
 
				     assert artifacts.summary["pooled_content_count"] >= 1
			
@@ -109,8 +105,8 @@ def test_replay_synthetic_pool_case(tmp_path):
 
				 
			
 
				 
			
 
				 def test_replay_synthetic_review_case(tmp_path):
			
 
				-    # Strong portrait but low engagement scores into the review band (60-69).
			
 
				-    _build_synthetic_corpus(tmp_path / "cases", "syn_review", [_synthetic_item("9000000000000000002", age_level="strong", digg=500)])
			
 
				+    # Low engagement scores into the review band (60-69).
			
 
				+    _build_synthetic_corpus(tmp_path / "cases", "syn_review", [_synthetic_item("9000000000000000002", digg=500)])
			
 
				     artifacts = replay_case("syn_review", runtime_root=tmp_path / "rt", cases_dir=tmp_path / "cases")
			
 
				     assert artifacts.state["status"] == "success"
			
 
				     assert artifacts.summary["review_content_count"] >= 1
			
--- a/tests/test_douyin_client.py
+++ b/tests/test_douyin_client.py
@@ -36,7 +36,6 @@ def _client(responses, rate_limiter=None):
 
				     return CrawapiDouyinClient(
			
 
				         base_url="http://crawapi.test",
			
 
				         keyword_path="/crawler/dou_yin/keyword",
			
 
				-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
			
 
				         blogger_path="/crawler/dou_yin/blogger",
			
 
				         default_crawapi_account_ref="771431222",
			
 
				         http_client=FakeHttpClient(responses),
			
@@ -52,7 +51,7 @@ def _search_query(text="早上好祝福视频"):
 
				     }
			
 
				 
			
 
				 
			
 
				-def test_douyin_keyword_search_maps_content_and_portrait_fields():
			
 
				+def test_douyin_keyword_search_maps_content_fields():
			
 
				     client = _client(
			
 
				         [
			
 
				             _response(
			
@@ -81,19 +80,6 @@ def test_douyin_keyword_search_maps_content_and_portrait_fields():
 
				                     }
			
 
				                 },
			
 
				             ),
			
 
				-            _response(
			
 
				-                200,
			
 
				-                {
			
 
				-                    "data": {
			
 
				-                        "data": {
			
 
				-                            "年龄": {
			
 
				-                                "50+": {"percentage": "18.00%", "preference": "135.0"},
			
 
				-                                "31-40": {"percentage": "20.00%", "preference": "80.0"},
			
 
				-                            }
			
 
				-                        }
			
 
				-                    }
			
 
				-                },
			
 
				-            ),
			
 
				         ]
			
 
				     )
			
 
				 
			
@@ -107,14 +93,13 @@ def test_douyin_keyword_search_maps_content_and_portrait_fields():
 
				     assert result["tags"] == ["#早上好"]
			
 
				     assert result["has_more"] is True
			
 
				     assert result["next_cursor"] == "10"
			
 
				-    assert result["portrait_available"] is True
			
 
				-    assert result["age_50_plus_level"] == "strong"
			
 
				-    assert result["pattern_recall"] == "pattern_recall_pending"
			
 
				     assert result["discovery_relation"] == "derived_from_pattern_demand"
			
 
				     assert result["platform_auth_mode"] == "no_bearer"
			
 
				     assert result["platform_raw_payload"][RAW_CONTENT_ID_KEY] == "7615247738577423622"
			
 
				     assert client.http_client.requests[0]["json"][RAW_AUTHOR_ACCOUNT_KEY] == "771431222"
			
 
				-    assert len(client.http_client.requests) == 2
			
 
				+    # V3 清理: 画像调用链已砍,搜索一条内容只发 1 次 keyword 请求,不再追加画像请求。
			
 
				+    assert len(client.http_client.requests) == 1
			
 
				+    assert client.http_client.requests[0]["url"].endswith("/crawler/dou_yin/keyword")
			
 
				 
			
 
				 
			
 
				 def test_douyin_keyword_search_returns_empty_list():
			
@@ -155,7 +140,6 @@ def test_douyin_fetch_author_works_maps_fake_response():
 
				                     }
			
 
				                 },
			
 
				             ),
			
 
				-            _response(200, {"data": {"data": {"年龄": {}}}}),
			
 
				         ]
			
 
				     )
			
 
				 
			
@@ -172,6 +156,7 @@ def test_douyin_fetch_author_works_maps_fake_response():
 
				     assert results[0]["search_query_id"] == "author_001"
			
 
				     assert results[0]["previous_discovery_step"] == "author_works"
			
 
				     assert client.http_client.requests[0]["json"][RAW_AUTHOR_ACCOUNT_KEY] == "MS4wLjABAAAA001"
			
 
				+    assert len(client.http_client.requests) == 1
			
 
				 
			
 
				 
			
 
				 def test_douyin_keyword_search_http_error_is_sanitized():
			
@@ -199,7 +184,6 @@ def test_douyin_keyword_search_bad_json_is_sanitized():
 
				     client = CrawapiDouyinClient(
			
 
				         base_url="http://crawapi.test",
			
 
				         keyword_path="/crawler/dou_yin/keyword",
			
 
				-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
			
 
				         http_client=FakeHttpClient(
			
 
				             [
			
 
				                 httpx.Response(
			
@@ -215,90 +199,10 @@ def test_douyin_keyword_search_bad_json_is_sanitized():
 
				         client.search(_search_query("坏 JSON"))
			
 
				 
			
 
				 
			
 
				-def test_douyin_portrait_http_error_retries_before_missing():
			
 
				-    client = _client(
			
 
				-        [
			
 
				-            _response(
			
 
				-                200,
			
 
				-                {
			
 
				-                    "data": {
			
 
				-                        "data": [
			
 
				-                            {
			
 
				-                                RAW_CONTENT_ID_KEY: "7615247738577423622",
			
 
				-                                "desc": "早上好",
			
 
				-                                "author": {
			
 
				-                                    "nickname": "作者",
			
 
				-                                    RAW_AUTHOR_ID_KEY: "MS4wLjABAAAA001",
			
 
				-                                },
			
 
				-                                "statistics": {"digg_count": 1},
			
 
				-                            }
			
 
				-                        ]
			
 
				-                    }
			
 
				-                },
			
 
				-            ),
			
 
				-            _response(500, {"error": "portrait failed"}),
			
 
				-            _response(500, {"error": "portrait failed again"}),
			
 
				-        ]
			
 
				-    )
			
 
				-
			
 
				-    result = client.search(_search_query("早上好"))[0]
			
 
				-
			
 
				-    assert result["portrait_available"] is False
			
 
				-    assert result["age_50_plus_level"] == "missing"
			
 
				-    assert len(client.http_client.requests) == 3
			
 
				-
			
 
				-
			
 
				-def test_douyin_portrait_retry_can_recover():
			
 
				-    client = _client(
			
 
				-        [
			
 
				-            _response(
			
 
				-                200,
			
 
				-                {
			
 
				-                    "data": {
			
 
				-                        "data": [
			
 
				-                            {
			
 
				-                                RAW_CONTENT_ID_KEY: "7615247738577423622",
			
 
				-                                "desc": "早上好",
			
 
				-                                "author": {
			
 
				-                                    "nickname": "作者",
			
 
				-                                    RAW_AUTHOR_ID_KEY: "MS4wLjABAAAA001",
			
 
				-                                },
			
 
				-                                "statistics": {"digg_count": 1},
			
 
				-                            }
			
 
				-                        ],
			
 
				-                        "has_more": True,
			
 
				-                        "next_cursor": "10",
			
 
				-                    }
			
 
				-                },
			
 
				-            ),
			
 
				-            _response(500, {"error": "portrait failed"}),
			
 
				-            _response(
			
 
				-                200,
			
 
				-                {
			
 
				-                    "data": {
			
 
				-                        "data": {
			
 
				-                            "年龄": {
			
 
				-                                "50+": {"percentage": "18.00%", "preference": "135.0"}
			
 
				-                            }
			
 
				-                        }
			
 
				-                    }
			
 
				-                },
			
 
				-            ),
			
 
				-        ]
			
 
				-    )
			
 
				-
			
 
				-    result = client.search(_search_query("早上好"))[0]
			
 
				-
			
 
				-    assert result["portrait_available"] is True
			
 
				-    assert result["age_50_plus_level"] == "strong"
			
 
				-    assert len(client.http_client.requests) == 3
			
 
				-
			
 
				-
			
 
				 def test_douyin_keyword_search_can_limit_results_per_query():
			
 
				     client = CrawapiDouyinClient(
			
 
				         base_url="http://crawapi.test",
			
 
				         keyword_path="/crawler/dou_yin/keyword",
			
 
				-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
			
 
				         max_results_per_query=1,
			
 
				         http_client=FakeHttpClient(
			
 
				             [
			
@@ -313,7 +217,6 @@ def test_douyin_keyword_search_can_limit_results_per_query():
 
				                         }
			
 
				                     },
			
 
				                 ),
			
 
				-                _response(200, {"data": {"data": {"年龄": {}}}}),
			
 
				             ]
			
 
				         ),
			
 
				     )
			
@@ -321,54 +224,7 @@ def test_douyin_keyword_search_can_limit_results_per_query():
 
				     results = client.search(_search_query("限量"))
			
 
				 
			
 
				     assert [result["platform_content_id"] for result in results] == ["1"]
			
 
				-
			
 
				-
			
 
				-def test_douyin_portrait_supports_dimensions_shape_and_excludes_41_to_50():
			
 
				-    client = _client(
			
 
				-        [
			
 
				-            _response(
			
 
				-                200,
			
 
				-                {
			
 
				-                    "data": {
			
 
				-                        "data": [
			
 
				-                            {
			
 
				-                                RAW_CONTENT_ID_KEY: "7635992906608060495",
			
 
				-                                "desc": "高考加油",
			
 
				-                                "author": {
			
 
				-                                    "nickname": "一个富贵",
			
 
				-                                    RAW_AUTHOR_ID_KEY: "MS4wLjABAAAA001",
			
 
				-                                },
			
 
				-                                "statistics": {"digg_count": 100},
			
 
				-                            }
			
 
				-                        ]
			
 
				-                    }
			
 
				-                },
			
 
				-            ),
			
 
				-            _response(
			
 
				-                200,
			
 
				-                {
			
 
				-                    "data": {
			
 
				-                        "data": {
			
 
				-                            "dimensions": {
			
 
				-                                "年龄": [
			
 
				-                                    {"name": "41-50", "percentage": "30.00%", "preference": "150.0"},
			
 
				-                                    {"name": "50-", "percentage": "4.83%", "preference": "13.80"},
			
 
				-                                ]
			
 
				-                            }
			
 
				-                        }
			
 
				-                    }
			
 
				-                },
			
 
				-            ),
			
 
				-        ]
			
 
				-    )
			
 
				-
			
 
				-    result = client.search(_search_query("高考加油"))[0]
			
 
				-
			
 
				-    assert result["portrait_available"] is True
			
 
				-    assert result["age_50_plus_ratio"] == 0.0483
			
 
				-    assert result["age_50_plus_level"] == "weak"
			
 
				-    assert result["age_distribution"][0]["is_50_plus"] is False
			
 
				-    assert result["age_distribution"][1]["is_50_plus"] is True
			
 
				+    assert len(client.http_client.requests) == 1
			
 
				 
			
 
				 
			
 
				 def _author_query(author_id="MS4wLjABAAAA001", **extra):
			
@@ -442,7 +298,6 @@ def test_fetch_author_works_normalizes_author_work_fields():
 
				                     }
			
 
				                 ]
			
 
				             ),
			
 
				-            _response(200, {"data": {"data": {"年龄": {}}}}),
			
 
				         ]
			
 
				     )
			
 
				 
			
@@ -454,15 +309,12 @@ def test_fetch_author_works_normalizes_author_work_fields():
 
				     assert results[0]["create_time"] == 1733000000
			
 
				     assert results[0]["previous_discovery_step"] == "author_works"
			
 
				     assert results[0]["content_metadata_source"] == "douyin_blogger"
			
 
				+    assert len(client.http_client.requests) == 1
			
 
				 
			
 
				 
			
 
				 def test_from_env_reads_blogger_path_and_sort_type(monkeypatch, tmp_path):
			
 
				     monkeypatch.setenv("CONTENTFIND_API_CRAWAPI_BASE_URL", "http://crawapi.test")
			
 
				     monkeypatch.setenv("CONTENTFIND_DOUYIN_KEYWORD_PATH", "/crawler/dou_yin/keyword")
			
 
				-    monkeypatch.setenv(
			
 
				-        "CONTENTFIND_DOUYIN_VIDEO_LIKE_PORTRAIT_PATH",
			
 
				-        "/crawler/dou_yin/re_dian_bao/video_like_portrait",
			
 
				-    )
			
 
				     monkeypatch.setenv("CONTENTFIND_DOUYIN_BLOGGER_PATH", "/crawler/dou_yin/blogger")
			
 
				     monkeypatch.setenv("CONTENTFIND_DOUYIN_ACCOUNT_WORKS_DEFAULT_SORT_TYPE", "最热")
			
 
				 
			
--- a/tests/test_douyin_detail.py
+++ b/tests/test_douyin_detail.py
@@ -23,7 +23,6 @@ def _client(responses):
 
				     return CrawapiDouyinClient(
			
 
				         base_url="http://crawapi.test",
			
 
				         keyword_path="/crawler/dou_yin/keyword",
			
 
				-        content_portrait_path="/crawler/dou_yin/re_dian_bao/video_like_portrait",
			
 
				         blogger_path="/crawler/dou_yin/blogger",
			
 
				         detail_path="/crawler/dou_yin/detail",
			
 
				         http_client=FakeHttpClient(responses),
			
--- a/tests/test_dual_channel_normalization.py
+++ b/tests/test_dual_channel_normalization.py
@@ -19,7 +19,6 @@ def _douyin_client():
 
				     return CrawapiDouyinClient(
			
 
				         base_url="http://crawapi.test",
			
 
				         keyword_path="/k",
			
 
				-        content_portrait_path="/p",
			
 
				         blogger_path="/b",
			
 
				         detail_path="/d",
			
 
				         http_client=object(),
			
--- a/tests/test_run_timeline_observability.py
+++ b/tests/test_run_timeline_observability.py
@@ -98,7 +98,7 @@ def test_timeline_summary_counts_query_failures():
 
				         {"edge_id": "query_next_page", "walk_status": "success"},
			
 
				     ]
			
 
				 
			
 
				-    summary = _timeline_summary([], walk_actions, [], [])
			
 
				+    summary = _timeline_summary([], walk_actions, [])
			
 
				 
			
 
				     assert summary["query_failure_count"] == 2
			
 
				 
			
@@ -110,7 +110,7 @@ def test_timeline_summary_counts_platform_rate_limited():
 
				         {"event_type": "platform_query_failed", "error_code": "PLATFORM_REQUEST_FAILED"},
			
 
				     ]
			
 
				 
			
 
				-    summary = _timeline_summary(events, [], [], [])
			
 
				+    summary = _timeline_summary(events, [], [])
			
 
				 
			
 
				     assert summary["platform_rate_limited_count"] == 2
			
 
				     assert summary["error_counts"] == {
			
@@ -127,12 +127,13 @@ def test_timeline_summary_counts_decode_statuses():
 
				         {"event_type": "decode_succeeded"},
			
 
				     ]
			
 
				 
			
 
				-    summary = _timeline_summary(events, [], [], [{"decode_status": "pending"}])
			
 
				-    # 有 decode 事件时只统计事件,不与 recalls 终态混合。
			
 
				+    summary = _timeline_summary(events, [], [])
			
 
				+    # 历史数据带 decode 事件时按事件计数呈现。
			
 
				     assert summary["decode_status_counts"] == {"submitted": 1, "polling": 2, "succeeded": 1}
			
 
				 
			
 
				-    fallback = _timeline_summary([], [], [], [{"decode_status": "pending"}, {"decode_status": "success"}])
			
 
				-    assert fallback["decode_status_counts"] == {"pending": 1, "success": 1}
			
 
				+    # V3 清理: recalls 终态回退已删,无 decode 事件时计数恒为空。
			
 
				+    no_decode = _timeline_summary([], [], [])
			
 
				+    assert no_decode["decode_status_counts"] == {}
			
 
				 
			
 
				 
			
 
				 def test_timeline_summary_counts_walk_statuses():
			
@@ -142,7 +143,7 @@ def test_timeline_summary_counts_walk_statuses():
 
				         {"edge_id": "hashtag_to_query", "walk_status": "skipped"},
			
 
				     ]
			
 
				 
			
 
				-    summary = _timeline_summary([], walk_actions, [], [])
			
 
				+    summary = _timeline_summary([], walk_actions, [])
			
 
				 
			
 
				     assert summary["walk_status_counts"] == {"success": 1, "skipped": 2}
			
 
				 
			
--- a/tests/test_runtime_files.py
+++ b/tests/test_runtime_files.py
@@ -320,10 +320,7 @@ def test_runtime_validation_catches_missing_pattern_recall_evidence(tmp_path):
 
				     validation = service.validate_run(run_id)
			
 
				     assert validation["status"] == "fail"
			
 
				     assert any(
			
 
				-        finding["check_id"] in {
			
 
				-            "pattern_recall_evidence_missing",
			
 
				-            "pattern_recall_matched_missing_evidence",
			
 
				-        }
			
 
				+        finding["check_id"] == "pattern_recall_evidence_missing"
			
 
				         for finding in validation["findings"]
			
 
				     )