2 дней назад · 2b6e974588
--- a/agent/tools/builtin/content/platforms/aigc_channel.py
+++ b/agent/tools/builtin/content/platforms/aigc_channel.py
@@ -116,10 +116,32 @@ async def search(
 
				 
			
 
				     # 构建概览摘要
			
 
				     summary_list = []
			
 
				+    
			
 
				+    # 动态导入评价模块
			
 
				+    try:
			
 
				+        from examples.process_pipeline.script.evaluate_source_quality import SourceQualityEvaluator
			
 
				+        evaluator = SourceQualityEvaluator()
			
 
				+    except ImportError:
			
 
				+        evaluator = None
			
 
				+        
			
 
				     for idx, post in enumerate(posts, 1):
			
 
				         body = post.get("body_text", "") or ""
			
 
				         title = post.get("title") or body[:20] or ""
			
 
				-        summary_list.append({
			
 
				+        
			
 
				+        score_info = {}
			
 
				+        if evaluator:
			
 
				+            try:
			
 
				+                eval_res = evaluator.evaluate_post(post)
			
 
				+                score_info = {
			
 
				+                    "quality_score": eval_res["total_score"],
			
 
				+                    "quality_grade": eval_res["grade"]
			
 
				+                }
			
 
				+                post["_quality_score"] = eval_res["total_score"]
			
 
				+                post["_quality_grade"] = eval_res["grade"]
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+                
			
 
				+        summary_item = {
			
 
				             "index": idx,
			
 
				             "title": title,
			
 
				             "body_text": body[:100] + ("..." if len(body) > 100 else ""),
			
@@ -128,7 +150,9 @@ async def search(
 
				             "channel": post.get("channel"),
			
 
				             "link": post.get("link"),
			
 
				             "content_type": post.get("content_type"),
			
 
				-        })
			
 
				+        }
			
 
				+        summary_item.update(score_info)
			
 
				+        summary_list.append(summary_item)
			
 
				 
			
 
				     # 封面拼图
			
 
				     images = []
			
@@ -151,18 +175,67 @@ async def search(
 
				 
			
 
				 # ── 详情实现（从缓存获取，不需要额外 HTTP） ──
			
 
				 
			
 
				+MAX_DETAIL_IMAGES = 10  # detail 中保留的图片总数上限（含拼图）
			
 
				+KEEP_INDIVIDUAL = 8     # 单张图片保留数量；剩余图片合并为 1 张拼图
			
 
				+
			
 
				+
			
 
				+async def _build_images_collage(urls: List[str]) -> Optional[Dict[str, Any]]:
			
 
				+    """将一组图片 URL 拼成单张网格图"""
			
 
				+    if not urls:
			
 
				+        return None
			
 
				+
			
 
				+    loaded = await load_images(urls)
			
 
				+    valid_images = [img for (_, img) in loaded if img is not None]
			
 
				+    if not valid_images:
			
 
				+        return None
			
 
				+
			
 
				+    grid = build_image_grid(images=valid_images, labels=None)
			
 
				+    import io
			
 
				+    buf = io.BytesIO()
			
 
				+    grid.save(buf, format="PNG")
			
 
				+    img_bytes = buf.getvalue()
			
 
				+
			
 
				+    try:
			
 
				+        from agent.tools.builtin.file.image_cdn import _upload_bytes_to_oss
			
 
				+        import hashlib
			
 
				+
			
 
				+        md5_hash = hashlib.md5(img_bytes).hexdigest()[:12]
			
 
				+        filename = f"collage_detail_{md5_hash}.png"
			
 
				+        cdn_url = await _upload_bytes_to_oss(img_bytes, filename)
			
 
				+        return {"type": "url", "url": cdn_url}
			
 
				+    except Exception as e:
			
 
				+        import logging
			
 
				+        logging.getLogger(__name__).warning("Failed to upload detail collage to CDN: %s", e)
			
 
				+        b64, _ = encode_base64(grid, format="PNG")
			
 
				+        return {"type": "base64", "media_type": "image/png", "data": b64}
			
 
				+
			
 
				+
			
 
				 async def detail(post: Dict[str, Any], extras: Optional[Dict[str, Any]] = None) -> ToolResult:
			
 
				     """返回单条帖子的完整内容"""
			
 
				     title = post.get("title") or post.get("body_text", "")[:30] or "无标题"
			
 
				 
			
 
				+    img_urls = [u for u in post.get("images", []) if u]
			
 
				     images = []
			
 
				-    for img_url in post.get("images", []):
			
 
				-        if img_url:
			
 
				-            images.append({"type": "url", "url": img_url})
			
 
				+    if len(img_urls) > MAX_DETAIL_IMAGES:
			
 
				+        # 保留前 KEEP_INDIVIDUAL 张原图，剩余拼成 1 张网格图
			
 
				+        for u in img_urls[:KEEP_INDIVIDUAL]:
			
 
				+            images.append({"type": "url", "url": u})
			
 
				+        collage = await _build_images_collage(img_urls[KEEP_INDIVIDUAL:])
			
 
				+        if collage:
			
 
				+            images.append(collage)
			
 
				+    else:
			
 
				+        for u in img_urls:
			
 
				+            images.append({"type": "url", "url": u})
			
 
				+
			
 
				+    output_json = json.dumps(post, ensure_ascii=False, indent=2)
			
 
				+    output_text = (
			
 
				+        output_json
			
 
				+        + "\n\n---\n请基于以上内容，从信息完整度、内容质量和实用价值三个角度，给出一句简短的内容评价。"
			
 
				+    )
			
 
				 
			
 
				     return ToolResult(
			
 
				         title=f"详情: {title}",
			
 
				-        output=json.dumps(post, ensure_ascii=False, indent=2),
			
 
				+        output=output_text,
			
 
				         long_term_memory=f"Viewed detail: {title}",
			
 
				         images=images,
			
 
				     )
			
@@ -201,7 +274,13 @@ async def _build_collage(posts: List[Dict[str, Any]]) -> Optional[str]:
 
				         imgs = post.get("images", [])
			
 
				         if imgs and imgs[0]:
			
 
				             urls.append(imgs[0])
			
 
				-            titles.append(post.get("title", "") or "")
			
 
				+            base_title = post.get("title", "") or ""
			
 
				+            score = post.get("_quality_score")
			
 
				+            if score is not None:
			
 
				+                title_with_score = f"[{score}分] {base_title}"
			
 
				+            else:
			
 
				+                title_with_score = base_title
			
 
				+            titles.append(title_with_score)
			
 
				 
			
 
				     if not urls:
			
 
				         return None
			
--- a/agent/tools/builtin/content/platforms/x.py
+++ b/agent/tools/builtin/content/platforms/x.py
@@ -36,16 +36,39 @@ async def search(
 
				         result_data = data.get("data", {})
			
 
				         tweets = result_data.get("data", []) if isinstance(result_data, dict) else []
			
 
				 
			
 
				+        # 动态导入评价模块
			
 
				+        try:
			
 
				+            from examples.process_pipeline.script.evaluate_source_quality import SourceQualityEvaluator
			
 
				+            evaluator = SourceQualityEvaluator()
			
 
				+        except ImportError:
			
 
				+            evaluator = None
			
 
				+
			
 
				         summary_list = []
			
 
				         for idx, tweet in enumerate(tweets[:max_count], 1):
			
 
				             text = tweet.get("body_text", "")
			
 
				-            summary_list.append({
			
 
				+
			
 
				+            score_info = {}
			
 
				+            if evaluator:
			
 
				+                try:
			
 
				+                    eval_res = evaluator.evaluate_post(tweet)
			
 
				+                    score_info = {
			
 
				+                        "quality_score": eval_res["total_score"],
			
 
				+                        "quality_grade": eval_res["grade"]
			
 
				+                    }
			
 
				+                    tweet["_quality_score"] = eval_res["total_score"]
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+
			
 
				+            summary_item = {
			
 
				                 "index": idx,
			
 
				                 "author": tweet.get("channel_account_name", ""),
			
 
				                 "body_text": text[:100] + ("..." if len(text) > 100 else ""),
			
 
				                 "like_count": tweet.get("like_count"),
			
 
				                 "comment_count": tweet.get("comment_count"),
			
 
				-            })
			
 
				+                "link": tweet.get("link"),
			
 
				+            }
			
 
				+            summary_item.update(score_info)
			
 
				+            summary_list.append(summary_item)
			
 
				 
			
 
				         # 拼图
			
 
				         images = []
			
@@ -65,20 +88,72 @@ async def search(
 
				         return ToolResult(title="X 搜索异常", output="", error=str(e))
			
 
				 
			
 
				 
			
 
				+MAX_DETAIL_IMAGES = 10
			
 
				+KEEP_INDIVIDUAL = 8
			
 
				+
			
 
				+
			
 
				+async def _build_images_collage(urls: List[str]) -> Optional[Dict[str, Any]]:
			
 
				+    """将一组图片 URL 拼成单张网格图"""
			
 
				+    if not urls:
			
 
				+        return None
			
 
				+
			
 
				+    loaded = await load_images(urls)
			
 
				+    valid_images = [img for (_, img) in loaded if img is not None]
			
 
				+    if not valid_images:
			
 
				+        return None
			
 
				+
			
 
				+    grid = build_image_grid(images=valid_images, labels=None)
			
 
				+    import io
			
 
				+    buf = io.BytesIO()
			
 
				+    grid.save(buf, format="PNG")
			
 
				+    img_bytes = buf.getvalue()
			
 
				+
			
 
				+    try:
			
 
				+        from agent.tools.builtin.file.image_cdn import _upload_bytes_to_oss
			
 
				+        import hashlib
			
 
				+
			
 
				+        md5_hash = hashlib.md5(img_bytes).hexdigest()[:12]
			
 
				+        filename = f"x_detail_collage_{md5_hash}.png"
			
 
				+        cdn_url = await _upload_bytes_to_oss(img_bytes, filename)
			
 
				+        return {"type": "url", "url": cdn_url}
			
 
				+    except Exception as e:
			
 
				+        import logging
			
 
				+        logging.getLogger(__name__).warning("Failed to upload x detail collage to CDN: %s", e)
			
 
				+        b64, _ = encode_base64(grid, format="PNG")
			
 
				+        return {"type": "base64", "media_type": "image/png", "data": b64}
			
 
				+
			
 
				+
			
 
				 async def detail(post: Dict[str, Any], extras: Optional[Dict[str, Any]] = None) -> ToolResult:
			
 
				     """X 的详情直接从缓存的搜索结果取完整数据"""
			
 
				     author = post.get("channel_account_name", "")
			
 
				     text = post.get("body_text", "")[:30]
			
 
				 
			
 
				-    all_images = []
			
 
				+    img_urls = []
			
 
				     for img_item in post.get("image_url_list", []):
			
 
				         url = img_item.get("image_url") if isinstance(img_item, dict) else img_item
			
 
				         if url:
			
 
				-            all_images.append({"type": "url", "url": url})
			
 
				+            img_urls.append(url)
			
 
				+
			
 
				+    all_images = []
			
 
				+    if len(img_urls) > MAX_DETAIL_IMAGES:
			
 
				+        for u in img_urls[:KEEP_INDIVIDUAL]:
			
 
				+            all_images.append({"type": "url", "url": u})
			
 
				+        collage = await _build_images_collage(img_urls[KEEP_INDIVIDUAL:])
			
 
				+        if collage:
			
 
				+            all_images.append(collage)
			
 
				+    else:
			
 
				+        for u in img_urls:
			
 
				+            all_images.append({"type": "url", "url": u})
			
 
				+
			
 
				+    output_json = json.dumps(post, ensure_ascii=False, indent=2)
			
 
				+    output_text = (
			
 
				+        output_json
			
 
				+        + "\n\n---\n请基于以上内容，从信息完整度、内容质量和实用价值三个角度，给出一句简短的内容评价。"
			
 
				+    )
			
 
				 
			
 
				     return ToolResult(
			
 
				         title=f"X 详情: @{author}",
			
 
				-        output=json.dumps(post, ensure_ascii=False, indent=2),
			
 
				+        output=output_text,
			
 
				         long_term_memory=f"Viewed X post by @{author}: {text}",
			
 
				         images=all_images,
			
 
				     )
			
@@ -97,7 +172,13 @@ async def _build_tweet_collage(tweets: List[Dict[str, Any]]) -> Optional[str]:
 
				             thumb = tweet.get("cover_url")
			
 
				         if thumb:
			
 
				             urls.append(thumb)
			
 
				-            titles.append(f"@{tweet.get('channel_account_name', '')}")
			
 
				+            base_title = f"@{tweet.get('channel_account_name', '')}"
			
 
				+            score = tweet.get("_quality_score")
			
 
				+            if score is not None:
			
 
				+                title_with_score = f"[{score}分] {base_title}"
			
 
				+            else:
			
 
				+                title_with_score = base_title
			
 
				+            titles.append(title_with_score)
			
 
				 
			
 
				     if not urls:
			
 
				         return None
			
--- a/agent/tools/builtin/content/platforms/youtube.py
+++ b/agent/tools/builtin/content/platforms/youtube.py
@@ -43,15 +43,37 @@ async def search(
 
				         result_data = data.get("data", {})
			
 
				         videos = result_data.get("data", []) if isinstance(result_data, dict) else []
			
 
				 
			
 
				+        # 动态导入评价模块
			
 
				+        try:
			
 
				+            from examples.process_pipeline.script.evaluate_source_quality import SourceQualityEvaluator
			
 
				+            evaluator = SourceQualityEvaluator()
			
 
				+        except ImportError:
			
 
				+            evaluator = None
			
 
				+
			
 
				         # 概览
			
 
				         summary_list = []
			
 
				         for idx, video in enumerate(videos[:max_count], 1):
			
 
				-            summary_list.append({
			
 
				+            score_info = {}
			
 
				+            if evaluator:
			
 
				+                try:
			
 
				+                    eval_res = evaluator.evaluate_post(video)
			
 
				+                    score_info = {
			
 
				+                        "quality_score": eval_res["total_score"],
			
 
				+                        "quality_grade": eval_res["grade"]
			
 
				+                    }
			
 
				+                    video["_quality_score"] = eval_res["total_score"]
			
 
				+                    video["_quality_grade"] = eval_res["grade"]
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+            
			
 
				+            summary_item = {
			
 
				                 "index": idx,
			
 
				                 "title": video.get("title", ""),
			
 
				                 "author": video.get("author", ""),
			
 
				                 "video_id": video.get("video_id", ""),
			
 
				-            })
			
 
				+            }
			
 
				+            summary_item.update(score_info)
			
 
				+            summary_list.append(summary_item)
			
 
				 
			
 
				         # 拼图
			
 
				         images = []
			
@@ -142,9 +164,15 @@ async def detail(post: Dict[str, Any], extras: Optional[Dict[str, Any]] = None)
 
				             output_data["video_path"] = video_path
			
 
				             output_data["video_outline"] = video_outline
			
 
				 
			
 
				+        output_json = json.dumps(output_data, ensure_ascii=False, indent=2)
			
 
				+        output_text = (
			
 
				+            output_json
			
 
				+            + "\n\n---\n请基于以上内容，从信息完整度、内容质量和实用价值三个角度，给出一句简短的内容评价。"
			
 
				+        )
			
 
				+
			
 
				         return ToolResult(
			
 
				             title=f"YouTube 详情: {video_info.get('title', content_id)}",
			
 
				-            output=json.dumps(output_data, ensure_ascii=False, indent=2),
			
 
				+            output=output_text,
			
 
				             long_term_memory=f"YouTube detail for {content_id}" + (" with captions" if captions_text else ""),
			
 
				         )
			
 
				 
			
@@ -167,7 +195,13 @@ async def _build_video_collage(videos: List[Dict[str, Any]]) -> Optional[str]:
 
				 
			
 
				         if thumb:
			
 
				             urls.append(thumb)
			
 
				-            titles.append(video.get("title", ""))
			
 
				+            base_title = video.get("title", "")
			
 
				+            score = video.get("_quality_score")
			
 
				+            if score is not None:
			
 
				+                title_with_score = f"[{score}分] {base_title}"
			
 
				+            else:
			
 
				+                title_with_score = base_title
			
 
				+            titles.append(title_with_score)
			
 
				 
			
 
				     if not urls:
			
 
				         return None
			
--- a/examples/process_pipeline/db_requirements.json
+++ b/examples/process_pipeline/db_requirements.json
@@ -107,5 +107,7 @@
 
				   "test",
			
 
				   "用AI生成人物写真组图",
			
 
				   "按url搜索到的6条帖子",
			
 
				-  "用AI生成真实感live图"
			
 
				+  "用AI生成真实感live图",
			
 
				+  "用ai生成真实摄影的美女写真组图，要求具有真实感，氛围感，人物一致性保持",
			
 
				+  "图文排版（优先选择使用终端/API/agent友好工具的帖子）"
			
 
				 ]
			
--- a/examples/process_pipeline/prompts/extract_capability.schema.json
+++ b/examples/process_pipeline/prompts/extract_capability.schema.json
@@ -2,19 +2,41 @@
 
				   "$schema": "http://json-schema.org/draft-07/schema#",
			
 
				   "title": "extract_capability_output_v5",
			
 
				   "type": "object",
			
 
				-  "required": ["skip", "skip_reason", "capabilities"],
			
 
				+  "required": [
			
 
				+    "skip",
			
 
				+    "skip_reason",
			
 
				+    "capabilities"
			
 
				+  ],
			
 
				   "properties": {
			
 
				-    "skip": { "type": "boolean" },
			
 
				-    "skip_reason": { "type": "string" },
			
 
				+    "skip": {
			
 
				+      "type": "boolean"
			
 
				+    },
			
 
				+    "skip_reason": {
			
 
				+      "type": "string"
			
 
				+    },
			
 
				     "capabilities": {
			
 
				       "type": "array",
			
 
				       "items": {
			
 
				         "type": "object",
			
 
				-        "required": ["action", "inputs", "outputs", "body", "effects", "stage", "tools", "criterion", "apply_to_draft", "unstructured_what"],
			
 
				+        "required": [
			
 
				+          "action",
			
 
				+          "inputs",
			
 
				+          "outputs",
			
 
				+          "body",
			
 
				+          "effects",
			
 
				+          "stage",
			
 
				+          "tools",
			
 
				+          "criterion",
			
 
				+          "apply_to_draft",
			
 
				+          "unstructured_what"
			
 
				+        ],
			
 
				         "properties": {
			
 
				           "action": {
			
 
				             "type": "object",
			
 
				-            "required": ["main_action", "mechanism"],
			
 
				+            "required": [
			
 
				+              "main_action",
			
 
				+              "mechanism"
			
 
				+            ],
			
 
				             "properties": {
			
 
				               "main_action": {
			
 
				                 "type": "string",
			
@@ -32,7 +54,17 @@
 
				             "type": "array",
			
 
				             "items": {
			
 
				               "type": "object",
			
 
				-              "required": ["role", "modality", "artifact_type", "control_target", "target_scope", "constraint_strength", "source", "lifecycle", "description"],
			
 
				+              "required": [
			
 
				+                "role",
			
 
				+                "modality",
			
 
				+                "artifact_type",
			
 
				+                "control_target",
			
 
				+                "target_scope",
			
 
				+                "constraint_strength",
			
 
				+                "source",
			
 
				+                "lifecycle",
			
 
				+                "description"
			
 
				+              ],
			
 
				               "properties": {
			
 
				                 "role": {
			
 
				                   "type": "string",
			
@@ -51,12 +83,16 @@
 
				                 },
			
 
				                 "control_target": {
			
 
				                   "type": "array",
			
 
				-                  "items": { "type": "string" },
			
 
				+                  "items": {
			
 
				+                    "type": "string"
			
 
				+                  },
			
 
				                   "description": "控制目标：主体、身份一致性、脸部特征、姿势、构图、场景、光线、色彩、质感、画质、局部区域、瑕疵排除"
			
 
				                 },
			
 
				                 "target_scope": {
			
 
				                   "type": "array",
			
 
				-                  "items": { "type": "string" },
			
 
				+                  "items": {
			
 
				+                    "type": "string"
			
 
				+                  },
			
 
				                   "description": "作用范围：整图、人物、脸部、身体、背景、局部区域"
			
 
				                 },
			
 
				                 "constraint_strength": {
			
@@ -83,48 +119,153 @@
 
				             "type": "array",
			
 
				             "items": {
			
 
				               "type": "object",
			
 
				-              "required": ["role", "modality", "artifact_type", "control_target", "target_scope", "constraint_strength", "source", "lifecycle", "description"],
			
 
				+              "required": [
			
 
				+                "role",
			
 
				+                "modality",
			
 
				+                "artifact_type",
			
 
				+                "control_target",
			
 
				+                "target_scope",
			
 
				+                "constraint_strength",
			
 
				+                "source",
			
 
				+                "lifecycle",
			
 
				+                "description"
			
 
				+              ],
			
 
				               "properties": {
			
 
				-                "role": { "type": "string", "minLength": 1 },
			
 
				-                "modality": { "type": "string", "minLength": 1 },
			
 
				-                "artifact_type": { "type": "string", "minLength": 1 },
			
 
				-                "control_target": { "type": "array", "items": { "type": "string" } },
			
 
				-                "target_scope": { "type": "array", "items": { "type": "string" } },
			
 
				-                "constraint_strength": { "type": "string" },
			
 
				-                "source": { "type": "string" },
			
 
				-                "lifecycle": { "type": "string" },
			
 
				-                "description": { "type": "string", "minLength": 1 }
			
 
				+                "role": {
			
 
				+                  "type": "string",
			
 
				+                  "minLength": 1
			
 
				+                },
			
 
				+                "modality": {
			
 
				+                  "type": "string",
			
 
				+                  "minLength": 1
			
 
				+                },
			
 
				+                "artifact_type": {
			
 
				+                  "type": "string",
			
 
				+                  "minLength": 1
			
 
				+                },
			
 
				+                "control_target": {
			
 
				+                  "type": "array",
			
 
				+                  "items": {
			
 
				+                    "type": "string"
			
 
				+                  }
			
 
				+                },
			
 
				+                "target_scope": {
			
 
				+                  "type": "array",
			
 
				+                  "items": {
			
 
				+                    "type": "string"
			
 
				+                  }
			
 
				+                },
			
 
				+                "constraint_strength": {
			
 
				+                  "type": "string"
			
 
				+                },
			
 
				+                "source": {
			
 
				+                  "type": "string"
			
 
				+                },
			
 
				+                "lifecycle": {
			
 
				+                  "type": "string"
			
 
				+                },
			
 
				+                "description": {
			
 
				+                  "type": "string",
			
 
				+                  "minLength": 1
			
 
				+                }
			
 
				               }
			
 
				             }
			
 
				           },
			
 
				-          "body": { "type": "string", "minLength": 1 },
			
 
				+          "body": {
			
 
				+            "type": "string",
			
 
				+            "minLength": 1
			
 
				+          },
			
 
				           "effects": {
			
 
				             "type": "array",
			
 
				-            "items": { "type": "string", "pattern": "^实现" }
			
 
				+            "items": {
			
 
				+              "type": "object",
			
 
				+              "required": [
			
 
				+                "statement",
			
 
				+                "criteria",
			
 
				+                "judge_method",
			
 
				+                "negative_examples"
			
 
				+              ],
			
 
				+              "properties": {
			
 
				+                "statement": {
			
 
				+                  "type": "string",
			
 
				+                  "pattern": "^实现"
			
 
				+                },
			
 
				+                "criteria": {
			
 
				+                  "type": "string",
			
 
				+                  "minLength": 1
			
 
				+                },
			
 
				+                "judge_method": {
			
 
				+                  "type": "string",
			
 
				+                  "enum": [
			
 
				+                    "llm",
			
 
				+                    "vlm",
			
 
				+                    "rule",
			
 
				+                    "human"
			
 
				+                  ]
			
 
				+                },
			
 
				+                "negative_examples": {
			
 
				+                  "type": "array",
			
 
				+                  "items": {
			
 
				+                    "type": "string",
			
 
				+                    "minLength": 1
			
 
				+                  },
			
 
				+                  "default": []
			
 
				+                }
			
 
				+              }
			
 
				+            }
			
 
				           },
			
 
				           "stage": {
			
 
				             "type": "array",
			
 
				-            "items": { "type": "string", "enum": ["preprocess", "generate", "refine"] }
			
 
				+            "items": {
			
 
				+              "type": "string",
			
 
				+              "enum": [
			
 
				+                "preprocess",
			
 
				+                "generate",
			
 
				+                "refine"
			
 
				+              ]
			
 
				+            }
			
 
				           },
			
 
				           "tools": {
			
 
				             "type": "array",
			
 
				-            "items": { "type": "string" }
			
 
				+            "items": {
			
 
				+              "type": "string"
			
 
				+            }
			
 
				+          },
			
 
				+          "criterion": {
			
 
				+            "type": [
			
 
				+              "string",
			
 
				+              "null"
			
 
				+            ]
			
 
				           },
			
 
				-          "criterion": { "type": ["string", "null"] },
			
 
				           "apply_to_draft": {
			
 
				             "type": "object",
			
 
				-            "required": ["实质", "形式"],
			
 
				+            "required": [
			
 
				+              "实质",
			
 
				+              "形式"
			
 
				+            ],
			
 
				             "properties": {
			
 
				-              "实质": { "type": "array", "items": { "type": "string" } },
			
 
				-              "形式": { "type": "array", "items": { "type": "string" } }
			
 
				+              "实质": {
			
 
				+                "type": "array",
			
 
				+                "items": {
			
 
				+                  "type": "string"
			
 
				+                }
			
 
				+              },
			
 
				+              "形式": {
			
 
				+                "type": "array",
			
 
				+                "items": {
			
 
				+                  "type": "string"
			
 
				+                }
			
 
				+              }
			
 
				             }
			
 
				           },
			
 
				           "unstructured_what": {
			
 
				             "type": "array",
			
 
				-            "items": { "type": "string" }
			
 
				+            "items": {
			
 
				+              "type": "string"
			
 
				+            }
			
 
				           }
			
 
				         }
			
 
				       }
			
 
				     }
			
 
				   }
			
 
				-}
			
 
				+}
			
--- a/examples/process_pipeline/prompts/extract_workflow.prompt
+++ b/examples/process_pipeline/prompts/extract_workflow.prompt
@@ -26,31 +26,30 @@ $system$
 
				 
			
 
				 - order：步骤序号，整数。
			
 
				 - phase: phase: 该步骤所属阶段，取值为「非制作」/「预处理」/「生成」/「编辑」之一；「非制作」指创作与运营层面的行为，如故事构思、选题策划、热点参考、发布节奏等，该阶段不含 action；「预处理」是产出物的目的，不面向最终成品；生成和编辑可复用 action 的定义。
			
 
				-- action: 该步骤的核心动作，格式为「一级动作：二级动作」，如「编辑：局部重绘」、「生成：融合」；若二级动作与一级相同可省略，如「生成」；非制作阶段的 action 值固定为 null。
			
 
				+- action: 该步骤的核心动作，格式为「一级动作：二级动作」；若二级动作与一级相同可省略；非制作阶段的 action 值固定为 null。
			
 
				 - body：具体做法，包含 prompt 写法、参数配置、操作细节等；从帖子原文中提取，未提及则为 null。
			
 
				-- inputs：该步骤的输入，数组。
			
 
				-- outputs：该步骤的产出，数组。
			
 
				+- inputs：该步骤的输入，包含来源，数组。
			
 
				+- outputs：该步骤的产出，包含去向，数组。
			
 
				 - tools：使用的工具或平台，数组；未提及则为 []。
			
 
				 
			
 
				-# inputs / outputs
			
 
				+# action 字段
			
 
				 
			
 
				-每个输入 / 输出项写成：
			
 
				+## 格式规定
			
 
				 
			
 
				-```json
			
 
				-{
			
 
				-  "modality": "文本",
			
 
				-  "description": "该项在当前步骤中实际起到的作用，用简短名词短语表达，如：场景参考、角色参考、故事情节与镜头要求等"，不要写指令，中间产物，等没有信息量的词汇
			
 
				-}
			
 
				-```
			
 
				+- 一级动作，即main_action，应从以下五个选项中选择：
			
 
				+  - 1.生成。此时output为新内容，input不延续到output，只作为约束或参考
			
 
				+  - 2.编辑。input延续到output，output只是input的修改
			
 
				+  - 3.提取。output是来源的子集。（来源可以是input或者外部库）
			
 
				+  - 4.组织。多项素材的结构化集合/索引/模板
			
 
				+  - 5.筛选。output是候选集的子集（基于评估）。
			
 
				+- 每个main_action应该有不同的二级动作，即mechanism：
			
 
				+  - 生成：直接生成/参考引导/一致性保持/动画化/多模态合成/多候选生成/......
			
 
				+  - 编辑：局部重绘/风格迁移/颜色调整/蒙板重绘/拼接组合/裁切扩展/......
			
 
				+  - 提取：提示词反推/关键帧提取/蒙板提取/知识库检索/特征向量话/......
			
 
				+  - 组织：分类入库/模板化/标签化/变量抽象/结构抽象/......
			
 
				+  - 筛选：抽卡选优/评分排序top k/人工挑选/阈值过滤/......
			
 
				 
			
 
				-要求：
			
 
				-
			
 
				-- modality 是数据形态，如 文本 / 图片 / 视频 / 音频 / 特征点 / 参数 / 模型 / 向量。
			
 
				-- 同一次提交给模型的所有文字描述统一合并为一个输入项，不得按语义功能拆分
			
 
				-
			
 
				-# action
			
 
				-
			
 
				-action 写成对象：
			
 
				+## action 写成对象：
			
 
				 
			
 
				 ```json
			
 
				 { "main_action": "编辑", "mechanism": "局部重绘" }
			
@@ -63,6 +62,31 @@ action 写成对象：
 
				 
			
 
				 {interface_vocab}
			
 
				 
			
 
				+# body 字段
			
 
				+**重要**：在描述具体做法时，应注意结合已有的架构。遇到相关内容时，*必须*使用已有的术语和架构。已有架构如下：
			
 
				+- 制作表，是制作帖子的原始输入，代表了制作一个贴子所需要的全部信息。所有对帖子解构的最终结果都应该是**制作表**。
			
 
				+- 知识库，这是一个庞大的数据库系统，用于保存得到的数据，知识是对数据的抽象提炼，由Agent负责获取。
			
 
				+- 不能新建、生成知识库或其他数据库，**系统里只有一个知识库**。不同种类的数据都存入这一个知识库中，不额外构建新的数据库，但支持根据标签进行筛选，从而隔离不同知识。
			
 
				+- 业务Agent，是一个自建的智能体系统，可以从知识库中获取知识；也可以处理数据，将其变成知识存入知识库；处理制作表，完成任务。**所有需要的智能体系统都应该被业务Agent所替代**
			
 
				+
			
 
				+# inputs / outputs
			
 
				+
			
 
				+每个输入 / 输出项写成：
			
 
				+
			
 
				+```json
			
 
				+{
			
 
				+  "modality": "文本",
			
 
				+  "description": "该项在当前步骤中实际起到的作用，用简短名词短语表达，如：场景参考、角色参考、故事情节与镜头要求等"，不要写指令，中间产物，等没有信息量的词汇。
			
 
				+  "relation": "文本",该输入/输出的来源/去向。
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+要求：
			
 
				+
			
 
				+- modality 是数据形态，如 文本 / 图片 / 视频 / 音频 / 特征点 / 参数 / 模型 / 向量。
			
 
				+- 同一次提交给模型的所有文字描述统一合并为一个输入项，不得按语义功能拆分
			
 
				+- relation 格式应为：[来源.1O]、[去向.2I]、[来源.原始输入]、[去向.最终成品]（含义分别为：来源是序号1的output、去向是序号2的input、从原贴得到的信息、最终的结果，不需要额外的文字描述或标点符号）。input只需要来源，output只需要去向，来源和去向可以有多个。
			
 
				+
			
 
				 $user$
			
 
				 
			
 
				 # 输入：原帖
			
@@ -87,20 +111,39 @@ $user$
 
				         "inputs": [
			
 
				           {
			
 
				             "modality": "...",
			
 
				-            "description": "..."
			
 
				+            "description": "...",
			
 
				+            "relation": "..."
			
 
				           }
			
 
				         ],
			
 
				         "outputs": [
			
 
				           {
			
 
				             "modality": "...",
			
 
				-            "description": "..."
			
 
				+            "description": "...",
			
 
				+            "relation": "..."
			
 
				           }
			
 
				         ],
			
 
				         "tools": [],
			
 
				         "apply_to_draft": { "实质": ["该步骤操作的内容点"], "形式": ["该步骤的呈现方式"] }
			
 
				       }
			
 
				     ],
			
 
				-    "effects": ["实现 XX 效果", "实现 YY 效果"],
			
 
				+    "effects": [
			
 
				+     {                                                               
			
 
				+          "statement": "实现XXX",  // 解决了什么具体需求
			
 
				+          "criteria": "...", // 结果是否成功的判定标准
			
 
				+          "judge_method": "...", // 可选: llm / vlm / rule / human                       
			
 
				+          "negative_examples": [                                                                      
			
 
				+              "...",                                                                                  
			
 
				+              "..."                                                                                     
			
 
				+          ]                                                             
			
 
				+       },
			
 
				+      {                                                               
			
 
				+          "statement": "实现YYY",
			
 
				+          "criteria": "...",
			
 
				+          "judge_method": "...",                 
			
 
				+          "negative_examples": [                                                                      
			
 
				+              "...",                                                                                                                                                                    
			
 
				+          ]                                                             
			
 
				+       }],
			
 
				     "criterion": null,
			
 
				     "unstructured_what": []
			
 
				   }
			
@@ -113,4 +156,4 @@ $user$
 
				 - strategy 顶层不要输出 inputs / outputs / tools / stage。
			
 
				 - 不要任何前言、解释、标题。
			
 
				 - 字符串值内禁止出现 ASCII 双引号；需要引号请用中文书名号。
			
 
				-- **effects 数组中的每个字符串都必须以"实现"开头**，如 "实现快速生成"、"实现风格统一"。
			
 
				+- **effects 的 statement 都必须以"实现"开头**，如 "实现快速生成"、"实现风格统一"。
			
--- a/examples/process_pipeline/prompts/extract_workflow.schema.json
+++ b/examples/process_pipeline/prompts/extract_workflow.schema.json
@@ -67,12 +67,12 @@
 
				                       "main_action": {
			
 
				                         "type": "string",
			
 
				                         "minLength": 1,
			
 
				-                        "description": "主动作：生成、编辑、提取、改写、合成、修复、增强、训练、评估、剪辑、模板化、排版、转写、配音、匹配、扩展、导出"
			
 
				+                        "description": "主动作：生成、编辑、提取、组织、筛选"
			
 
				                       },
			
 
				                       "mechanism": {
			
 
				                         "type": "string",
			
 
				                         "minLength": 1,
			
 
				-                        "description": "动作方式：直接生成、一致性保持、结构约束、质量收束、局部重绘、扩图、换背景等"
			
 
				+                        "description": "动作方式：直接生成、局部重绘、提示词反推、分类入库、抽卡选优等"
			
 
				                       }
			
 
				                     }
			
 
				                   },
			
@@ -88,7 +88,8 @@
 
				                       "type": "object",
			
 
				                       "required": [
			
 
				                         "modality",
			
 
				-                        "description"
			
 
				+                        "description",
			
 
				+                        "relation"
			
 
				                       ],
			
 
				                       "properties": {
			
 
				                         "modality": {
			
@@ -100,6 +101,11 @@
 
				                           "type": "string",
			
 
				                           "minLength": 1,
			
 
				                           "description": "功能性描述，不写具体内容what"
			
 
				+                        },
			
 
				+                        "relation": {
			
 
				+                          "type": "string",
			
 
				+                          "minLength": 1,
			
 
				+                          "description": "来源与去向，有特定格式"
			
 
				                         }
			
 
				                       }
			
 
				                     }
			
@@ -110,7 +116,8 @@
 
				                       "type": "object",
			
 
				                       "required": [
			
 
				                         "modality",
			
 
				-                        "description"
			
 
				+                        "description",
			
 
				+                        "relation"
			
 
				                       ],
			
 
				                       "properties": {
			
 
				                         "modality": {
			
@@ -120,6 +127,11 @@
 
				                         "description": {
			
 
				                           "type": "string",
			
 
				                           "minLength": 1
			
 
				+                        },
			
 
				+                        "relation": {
			
 
				+                          "type": "string",
			
 
				+                          "minLength": 1,
			
 
				+                          "description": "来源与去向，有特定格式"
			
 
				                         }
			
 
				                       }
			
 
				                     }
			
@@ -157,8 +169,40 @@
 
				             "effects": {
			
 
				               "type": "array",
			
 
				               "items": {
			
 
				-                "type": "string",
			
 
				-                "pattern": "^实现"
			
 
				+                "type": "object",
			
 
				+                "required": [
			
 
				+                  "statement",
			
 
				+                  "criteria",
			
 
				+                  "judge_method",
			
 
				+                  "negative_examples"
			
 
				+                ],
			
 
				+                "properties": {
			
 
				+                  "statement": {
			
 
				+                    "type": "string",
			
 
				+                    "pattern": "^实现"
			
 
				+                  },
			
 
				+                  "criteria": {
			
 
				+                    "type": "string",
			
 
				+                    "minLength": 1
			
 
				+                  },
			
 
				+                  "judge_method": {
			
 
				+                    "type": "string",
			
 
				+                    "enum": [
			
 
				+                      "llm",
			
 
				+                      "vlm",
			
 
				+                      "rule",
			
 
				+                      "human"
			
 
				+                    ]
			
 
				+                  },
			
 
				+                  "negative_examples": {
			
 
				+                    "type": "array",
			
 
				+                    "items": {
			
 
				+                      "type": "string",
			
 
				+                      "minLength": 1
			
 
				+                    },
			
 
				+                    "default": []
			
 
				+                  }
			
 
				+                }
			
 
				               }
			
 
				             },
			
 
				             "criterion": {
			
--- a/examples/process_pipeline/prompts/researcher.prompt
+++ b/examples/process_pipeline/prompts/researcher.prompt
@@ -36,13 +36,8 @@ $system$
 
				   - 禁止搜索具体的软件名称，如 MJ，controlnet
			
 
				 2. **搜索要求**：仅搜索/查看近半年的结果，不要查看过时的帖子
			
 
				 3. **适度查看内容**：对点赞数高或标题符合业务需求的帖子查看详情（可看图片）。
			
 
				-4. **结构化提取**：
			
 
				-    提取规则
			
 
				-    - 步骤粒度是"做了什么"，而非"怎么做"
			
 
				-    - 若涉及输入内容，描述其目的而非具体内容（如"输入将照片转化为X风格的提示词"，而非"输入提示词包含A/B/C参数"）
			
 
				-    - 若涉及上传素材，说明素材类型和用途
			
 
				-    - 同一工具内的参数配置不拆分为多步
			
 
				-    - 若本质上只有一步，就输出一步
			
 
				+    - 当调用 `content_search` 时，你会看到每条结果附带了 `quality_score`（质量得分）。**必须主动剔除得分低于 80 分的结果，只提取高质量帖子**。
			
 
				+    - 在写入 case 前，你需要针对帖子执行多维度评估，包括：内容本身的知识质量（指导性与可信度）、是否包含多步骤执行、以及需求匹配度（具体解决的需求细分）。
			
 
				 
			
 
				 ### 第三步：存储结果文件
			
 
				 🚨 **绝对不能更改任务规定的 `output_file` 路径名**！
			
@@ -63,9 +58,26 @@ write_file("{output_file}", 更新后的完整 JSON)
 
				   "采集时间": "string - ISO 8601",
			
 
				   "cases": [
			
 
				     {
			
 
				-      "case_id": "string - 格式：{platform}_{channel_content_id}，如 bili_BV1xxx、xhs_694e17e9000000001e006669",
			
 
				+      "case_id": "string - 格式：{platform}_{channel_content_id}",
			
 
				       "source_url": "string - 帖子链接",
			
 
				-      "title": "string - 帖子标题"
			
 
				+      "title": "string - 帖子标题",
			
 
				+      "evaluation": {
			
 
				+        "quality": {
			
 
				+          "overall_score": "number (0-100) - 总体的知识质量分数",
			
 
				+          "instructive_score": "number (0-10) - 指导性评分（是否包含可参考的、详细的做法和教程）",
			
 
				+          "credibility": {
			
 
				+            "on_feedback": "number (0-10) - 基于互动反馈（如点赞/评论等）的可信度评分",
			
 
				+            "on_content": "number (0-10) - 基于内容特征的可信度评分（警惕AI水帖，甄别真实分享）",
			
 
				+            "on_author": "number (0-10) - 基于作者特征的可信度评分（根据提供的作者信息评估，若无则酌情打分）"
			
 
				+          }
			
 
				+        },
			
 
				+        "multi_step": "boolean - 是否为多步骤执行（仅做判断，不影响质量分，用于后续筛选）",
			
 
				+        "requirement": {
			
 
				+          "match_score": "number (0-10) - 与原始业务需求的匹配度评分",
			
 
				+          "description": "string - 简述该内容具体解决了什么需求，是否对需求进行了细分"
			
 
				+        },
			
 
				+        "reason": "string - 一句话简述给出上述评分的核心理由"
			
 
				+      }
			
 
				     }
			
 
				   ]
			
 
				 }
			
@@ -73,9 +85,9 @@ write_file("{output_file}", 更新后的完整 JSON)
 
				 
			
 
				 **重要说明**：
			
 
				 - `case_id` 必须使用 `{platform}_{channel_content_id}` 格式，不要自己编号
			
 
				-- 只需要输出 case_id、source_url、title 三个字段
			
 
				-- 不需要提取工序步骤（后续会由专门的模块处理）
			
 
				-- 每收集到 2~3 个 case，应立即持久化一次
			
 
				+- 必须基于帖子的真实内容给出 evaluation（评价分数）
			
 
				+- 不需要提取具体的工序步骤（后续会由专门的模块处理）
			
 
				+- 每收集到 2~3 个高质量 case，应立即持久化追加一次
			
 
				 
			
 
				 $user$
			
 
				 
			
--- a/examples/process_pipeline/run_metrics.json
+++ b/examples/process_pipeline/run_metrics.json
@@ -2294,5 +2294,46 @@
 
				     "trace_ids": {},
			
 
				     "errors": [],
			
 
				     "timestamp": "2026-05-06T15:46:19.875101"
			
 
				+  },
			
 
				+  {
			
 
				+    "index": 108,
			
 
				+    "requirement": "用AI生成真实感live图...",
			
 
				+    "duration_seconds": 915.27,
			
 
				+    "total_cost_usd": 1.983,
			
 
				+    "costs_breakdown": {
			
 
				+      "P1_Research_zhihu": 0.3943,
			
 
				+      "P1.6a_WorkflowExtraction": 0.3264,
			
 
				+      "P1.6b_CapabilityExtraction": 0.489,
			
 
				+      "P1.7_ApplyGrounding": 0.7733
			
 
				+    },
			
 
				+    "trace_ids": {
			
 
				+      "P1_Research_zhihu": "44fdfdff-056e-4cde-93c8-bccbced38c82"
			
 
				+    },
			
 
				+    "errors": [
			
 
				+      "Case generation failed: ImportError: cannot import name 'generate_case' from 'examples.process_pipeline.script.generate_case' (C:\\Users\\11304\\gitlab\\cybertogether\\Agent\\examples\\process_pipeline\\script\\generate_case.py)"
			
 
				+    ],
			
 
				+    "timestamp": "2026-05-06T21:15:01.947405"
			
 
				+  },
			
 
				+  {
			
 
				+    "index": 109,
			
 
				+    "requirement": "用ai生成真实摄影的美女写真组图，要求具有真实感，氛围感，人物一致性保持...",
			
 
				+    "duration_seconds": 186.76,
			
 
				+    "total_cost_usd": 0.0,
			
 
				+    "costs_breakdown": {},
			
 
				+    "trace_ids": {},
			
 
				+    "errors": [
			
 
				+      "Case generation failed: ImportError: cannot import name 'generate_case' from 'examples.process_pipeline.script.generate_case' (C:\\Users\\11304\\gitlab\\cybertogether\\Agent\\examples\\process_pipeline\\script\\generate_case.py)"
			
 
				+    ],
			
 
				+    "timestamp": "2026-05-07T13:36:22.884612"
			
 
				+  },
			
 
				+  {
			
 
				+    "index": 109,
			
 
				+    "requirement": "用ai生成真实摄影的美女写真组图，要求具有真实感，氛围感，人物一致性保持...",
			
 
				+    "duration_seconds": 0.33,
			
 
				+    "total_cost_usd": 0.0,
			
 
				+    "costs_breakdown": {},
			
 
				+    "trace_ids": {},
			
 
				+    "errors": [],
			
 
				+    "timestamp": "2026-05-07T15:48:22.454758"
			
 
				   }
			
 
				 ]
			
--- a/examples/process_pipeline/run_pipeline.py
+++ b/examples/process_pipeline/run_pipeline.py
@@ -37,13 +37,15 @@ from examples.process_research.config import (
 
				 )
			
 
				 from agent.utils import setup_logging
			
 
				 
			
 
				-async def run_agent_task(runner: AgentRunner, prompt_name: str, kwargs: dict, task_name: str, model_name: str, skip_validation: bool = False):
			
 
				+async def run_agent_task(runner: AgentRunner, prompt_name: str, kwargs: dict, task_name: str, model_name: str, skip_validation: bool = False, start_trace_id: str = None, additional_messages: list = None):
			
 
				     from examples.process_pipeline.script.validate_schema import validate_case, validate_blueprint, validate_capabilities, validate_strategy
			
 
				     base_dir = Path(__file__).parent
			
 
				     prompt_path = base_dir / "prompts" / f"{prompt_name}.prompt"
			
 
				     prompt = SimplePrompt(prompt_path)
			
 
				 
			
 
				     messages = prompt.build_messages(**kwargs)
			
 
				+    if additional_messages:
			
 
				+        messages.extend(additional_messages)
			
 
				     target_tools = []
			
 
				     if prompt_name == "extract_capabilities":
			
 
				         target_tools = ["capability_search", "capability_list", "tool_search"]
			
@@ -61,7 +63,7 @@ async def run_agent_task(runner: AgentRunner, prompt_name: str, kwargs: dict, ta
 
				     out_file = kwargs.get("output_file")
			
 
				 
			
 
				     max_retries = 3
			
 
				-    last_trace_id = None
			
 
				+    last_trace_id = start_trace_id
			
 
				     last_validation_error = None
			
 
				     final_trace_id = None  # 用于返回最终成功的 trace_id
			
 
				 
			
@@ -199,7 +201,8 @@ async def run_agent_task(runner: AgentRunner, prompt_name: str, kwargs: dict, ta
 
				                 agent_type=prompt_name,
			
 
				                 tools=target_tools,
			
 
				                 tool_groups=tool_groups_map.get(prompt_name, ["core"]),
			
 
				-                knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False)
			
 
				+                knowledge=KnowledgeConfig(enable_completion_extraction=False, enable_extraction=False, enable_injection=False),
			
 
				+                trace_id=last_trace_id
			
 
				             )
			
 
				 
			
 
				             print(f"🚀 [Launch] {task_name} (Attempt {attempt+1})")
			
@@ -790,6 +793,7 @@ async def main():
 
				         costs_breakdown = {}
			
 
				         global_errors = []
			
 
				         strategy_file = None
			
 
				+        TARGET_QUALIFIED_CASES = 15
			
 
				 
			
 
				         # ── --only-step 单步执行模式 ──────────────────────────────
			
 
				         if args.only_step:
			
@@ -814,7 +818,7 @@ async def main():
 
				                 single_is_single = args.restart_mode.startswith("single_")
			
 
				                 phase1_tasks = []
			
 
				                 for p in single_platforms:
			
 
				-                    task_desc = f"渠道：{p.upper()}。核心需求：{requirement}"
			
 
				+                    task_desc = f"渠道：{p.upper()}。核心需求：{requirement}。目标：至少收集 {TARGET_QUALIFIED_CASES} 条高质量案例（评分>=70、正文充实）。"
			
 
				                     out_file = str(raw_cases_dir / f"case_{p}.json")
			
 
				                     kwargs = {
			
 
				                         "task": task_desc,
			
@@ -832,7 +836,10 @@ async def main():
 
				                 # Phase 1.5: 提取原始 source.json
			
 
				                 from examples.process_pipeline.script.extract_sources import extract_sources_to_json
			
 
				                 result = extract_sources_to_json(raw_cases_dir)
			
 
				-                print(f"   ✓ source.json: matched={result['total_matched']}")
			
 
				+                print(f"   ✓ source.json: matched={result['total_matched']}, filtered={result['filtered_total']}")
			
 
				+                if result.get("filtered_reasons"):
			
 
				+                    for reason, cnt in result["filtered_reasons"].items():
			
 
				+                        print(f"      - {reason}: {cnt}")
			
 
				 
			
 
				             elif step == "generate-case":
			
 
				                 # Phase 1.5.5: 生成标准化 case.json
			
@@ -1064,73 +1071,163 @@ async def main():
 
				             return
			
 
				 
			
 
				         # ── 正常 pipeline 流程 ──────────────────────────────
			
 
				-        existing_platforms = []
			
 
				-        if raw_cases_dir.exists():
			
 
				-            for f in raw_cases_dir.glob("case_*.json"):
			
 
				-                plat = f.stem.replace("case_", "")
			
 
				-                if plat in ["xhs", "youtube", "bili", "x", "zhihu", "gzh"]:
			
 
				-                    existing_platforms.append(plat)
			
 
				 
			
 
				         platforms = [p.strip() for p in args.platforms.split(",") if p.strip()]
			
 
				 
			
 
				         # Phase 0: Platform Selection (controlled by --platforms)
			
 
				         if should_run("research"):
			
 
				-            new_platforms = [p for p in platforms if p not in existing_platforms]
			
 
				-            if not new_platforms:
			
 
				-                print(f"\n--- Phase 0: Skipping Research (All specified platforms already exist: {existing_platforms}) ---")
			
 
				-                platforms = []
			
 
				-            else:
			
 
				-                print(f"\n--- Phase 0: Using specified platforms ---")
			
 
				-                print(f"📡 Found existing cases: {existing_platforms}. Will research new platforms: {new_platforms}")
			
 
				-                platforms = new_platforms
			
 
				+            print(f"\n--- Phase 0: Using specified platforms ---")
			
 
				+            print(f"📡 Will research platforms: {platforms}")
			
 
				+            # 注：不再跳过已有平台，因为 agent 是增量追加模式
			
 
				 
			
 
				         is_single_step = args.restart_mode.startswith("single_")
			
 
				 
			
 
				-        # Phase 1: MAP (Parallel Search) uses Qwen
			
 
				-        if should_run("research"):
			
 
				-            print(f"\n--- Phase 1: Distributed Research Map ({qwen_model}) ---")
			
 
				-            phase1_tasks = []
			
 
				-            for p in platforms:
			
 
				-                task_desc = f"渠道：{p.upper()}。核心需求：{requirement}"
			
 
				-                out_file = str(raw_cases_dir / f"case_{p}.json")
			
 
				-                kwargs = {
			
 
				-                    "task": task_desc,
			
 
				-                    "output_file": out_file
			
 
				-                }
			
 
				-                phase1_tasks.append(run_agent_task(runner_qwen, "researcher", kwargs, f"P1_Research_{p}", qwen_model, skip_validation=is_single_step))
			
 
				-                
			
 
				-            phase1_results = await asyncio.gather(*phase1_tasks)
			
 
				+        # Phase 1 & 1.5: MAP (Parallel Search) and Source Extraction Loop
			
 
				+        if should_run("research") or should_run("source"):
			
 
				+            print(f"\n--- Phase 1: Distributed Research Map with Source Filter Loop ({qwen_model}) ---")
			
 
				+
			
 
				+            from examples.process_pipeline.script.extract_sources import extract_sources_to_json
			
 
				+
			
 
				+            MAX_ROUNDS = 50
			
 
				+
			
 
				+            platform_traces = {p: None for p in platforms}
			
 
				+            active_platforms = platforms.copy()
			
 
				             phase1_trace_ids = {}
			
 
				-            for (task_cost, task_errors, trace_id), p in zip(phase1_results, platforms):
			
 
				-                total_cost += task_cost
			
 
				-                costs_breakdown[f"P1_Research_{p}"] = round(task_cost, 4)
			
 
				-                phase1_trace_ids[f"P1_Research_{p}"] = trace_id
			
 
				-                global_errors.extend(task_errors)
			
 
				-
			
 
				-                # Check if cases actually got written
			
 
				-                expected_file = Path(raw_cases_dir / f"case_{p}.json")
			
 
				-                if not expected_file.exists():
			
 
				-                    err_msg = f"Missing case file for {p}! Agent likely hit max_iterations without saving."
			
 
				+
			
 
				+            if not should_run("research") and should_run("source"):
			
 
				+                try:
			
 
				+                    src_stats = extract_sources_to_json(raw_cases_dir, trace_ids=None)
			
 
				+                    print(f"📎 [Source Extraction] matched={src_stats['total_matched']}, filtered={src_stats['filtered_total']} → {raw_cases_dir / 'source.json'}")
			
 
				+                    if src_stats.get("filtered_reasons"):
			
 
				+                        for reason, cnt in src_stats["filtered_reasons"].items():
			
 
				+                            print(f"      - {reason}: {cnt}")
			
 
				+                except Exception as e:
			
 
				+                    err_msg = f"Source extraction failed: {type(e).__name__}: {e}"
			
 
				                     print(f"⚠️ [Warning] {err_msg}")
			
 
				                     global_errors.append(err_msg)
			
 
				-        else:
			
 
				-            print("\n⏭️  [Skip] Phase 1 Skipped. Using existing cases...")
			
 
				+            elif should_run("research"):
			
 
				+                round_idx = 0
			
 
				+                last_src_stats = None  # 保存上一轮的过滤统计
			
 
				+                last_platform_count = {}  # 保存上一轮每个平台的合格数
			
 
				+                while active_platforms and round_idx < MAX_ROUNDS:
			
 
				+                    print(f"\n   >>> [Research Loop] Round {round_idx+1} - Active platforms: {active_platforms}")
			
 
				+
			
 
				+                    if active_platforms:
			
 
				+                        phase1_tasks = []
			
 
				+                        for p in active_platforms:
			
 
				+                            task_desc = f"渠道：{p.upper()}。核心需求：{requirement}。目标：至少收集 {TARGET_QUALIFIED_CASES} 条高质量案例（评分>=70、正文充实）。"
			
 
				+                            out_file = str(raw_cases_dir / f"case_{p}.json")
			
 
				+
			
 
				+                            additional_msgs = None
			
 
				+                            if round_idx > 0 and last_src_stats:
			
 
				+                                # 构建带过滤详情的 feedback message
			
 
				+                                p_count = last_platform_count.get(p, 0)
			
 
				+                                # 筛选出该平台被过滤的条目
			
 
				+                                p_filtered = [d for d in last_src_stats.get("filtered_details", []) if d.get("platform") == p]
			
 
				+                                reason_summary = last_src_stats.get("filtered_reasons", {})
			
 
				+
			
 
				+                                feedback_lines = [
			
 
				+                                    f"【系统反馈】你在上一轮提取的有效案例数量未达标。",
			
 
				+                                    f"当前 {p.upper()} 合格案例：{p_count}/{TARGET_QUALIFIED_CASES}",
			
 
				+                                    f"过滤统计：{dict(reason_summary)}" if reason_summary else "",
			
 
				+                                ]
			
 
				+
			
 
				+                                if p_filtered:
			
 
				+                                    feedback_lines.append(f"\n以下是你提交的被过滤掉的帖子（共{len(p_filtered)}条）：")
			
 
				+                                    for item in p_filtered[:10]:
			
 
				+                                        feedback_lines.append(f"  - [{item['case_id']}] {item['title']} → 原因: {item['filter_reason']}")
			
 
				+                                    if len(p_filtered) > 10:
			
 
				+                                        feedback_lines.append(f"  ... 还有 {len(p_filtered) - 10} 条未列出")
			
 
				+
			
 
				+                                feedback_lines.append(
			
 
				+                                    f"\n请继续搜索并提取更多**全新的、不同的**高质量案例，**追加**写入到你一直在维护的文件中。"
			
 
				+                                    f"注意：不要重复之前已找过的案例！针对上述被过滤的原因，请确保新案例有详实的正文内容且评分准确。"
			
 
				+                                )
			
 
				+
			
 
				+                                additional_msgs = [{
			
 
				+                                    "role": "user",
			
 
				+                                    "content": "\n".join(line for line in feedback_lines if line)
			
 
				+                                }]
			
 
				+
			
 
				+                            kwargs = {
			
 
				+                                "task": task_desc,
			
 
				+                                "output_file": out_file
			
 
				+                            }
			
 
				+                            phase1_tasks.append(
			
 
				+                                run_agent_task(
			
 
				+                                    runner_qwen, "researcher", kwargs,
			
 
				+                                    f"P1_Research_{p}_R{round_idx+1}", qwen_model,
			
 
				+                                    skip_validation=is_single_step,
			
 
				+                                    start_trace_id=platform_traces[p],
			
 
				+                                    additional_messages=additional_msgs
			
 
				+                                )
			
 
				+                            )
			
 
				+
			
 
				+                        phase1_results = await asyncio.gather(*phase1_tasks)
			
 
				+                        for (task_cost, task_errors, trace_id), p in zip(phase1_results, active_platforms):
			
 
				+                            total_cost += task_cost
			
 
				+                            cost_key = f"P1_Research_{p}"
			
 
				+                            costs_breakdown[cost_key] = costs_breakdown.get(cost_key, 0.0) + round(task_cost, 4)
			
 
				+                            platform_traces[p] = trace_id
			
 
				+                            phase1_trace_ids[f"P1_Research_{p}"] = trace_id
			
 
				+                            global_errors.extend(task_errors)
			
 
				+
			
 
				+                            expected_file = Path(raw_cases_dir / f"case_{p}.json")
			
 
				+                            if not expected_file.exists():
			
 
				+                                err_msg = f"Missing case file for {p}! Agent likely hit max_iterations without saving."
			
 
				+                                print(f"⚠️ [Warning] {err_msg}")
			
 
				+                                global_errors.append(err_msg)
			
 
				+
			
 
				+                    if should_run("source"):
			
 
				+                        try:
			
 
				+                            trace_id_list = [tid for tid in phase1_trace_ids.values() if tid]
			
 
				+                            src_stats = extract_sources_to_json(raw_cases_dir, trace_ids=trace_id_list)
			
 
				+                            last_src_stats = src_stats
			
 
				+                            print(f"   📎 [Source Extraction] matched={src_stats['total_matched']}, filtered={src_stats['filtered_total']} → {raw_cases_dir / 'source.json'}")
			
 
				+                            if src_stats.get("filtered_reasons"):
			
 
				+                                for reason, cnt in src_stats["filtered_reasons"].items():
			
 
				+                                    print(f"      - {reason}: {cnt}")
			
 
				+                        except Exception as e:
			
 
				+                            err_msg = f"Source extraction failed: {type(e).__name__}: {e}"
			
 
				+                            print(f"   ⚠️ [Warning] {err_msg}")
			
 
				+                            global_errors.append(err_msg)
			
 
				+
			
 
				+                    # 判断是否达标：直接看 source.json 里每个平台的条目数
			
 
				+                    source_file = raw_cases_dir / "source.json"
			
 
				+                    if source_file.exists():
			
 
				+                        with open(source_file, "r", encoding="utf-8") as f:
			
 
				+                            source_data = json.load(f)
			
 
				+
			
 
				+                        platform_count = {}
			
 
				+                        for s in source_data.get("sources", []):
			
 
				+                            p = s.get("platform")
			
 
				+                            if p:
			
 
				+                                platform_count[p] = platform_count.get(p, 0) + 1
			
 
				+                        last_platform_count = platform_count
			
 
				+
			
 
				+                        print(f"   📊 [Source Count] Target: >={TARGET_QUALIFIED_CASES} qualified items per platform")
			
 
				+                        platforms_to_keep = []
			
 
				+                        for p in platforms:
			
 
				+                            count = platform_count.get(p, 0)
			
 
				+                            if p in active_platforms:
			
 
				+                                print(f"      - {p}: {count}/{TARGET_QUALIFIED_CASES}")
			
 
				+                            if count < TARGET_QUALIFIED_CASES:
			
 
				+                                platforms_to_keep.append(p)
			
 
				+
			
 
				+                        active_platforms = platforms_to_keep
			
 
				+
			
 
				+                        if not active_platforms:
			
 
				+                            print(f"   ✅ All platforms reached the target {TARGET_QUALIFIED_CASES} qualified items!")
			
 
				+                            break
			
 
				+                    else:
			
 
				+                        print(f"   ⚠️ source.json not found, continuing loop to retry...")
			
 
				 
			
 
				-        # Phase 1.5: Extract raw post data from cache → raw_cases/source.json
			
 
				-        if should_run("source"):
			
 
				-            try:
			
 
				-                from examples.process_pipeline.script.extract_sources import extract_sources_to_json
			
 
				-                trace_id_list = [tid for tid in phase1_trace_ids.values() if tid] if 'phase1_trace_ids' in dir() else None
			
 
				-                src_stats = extract_sources_to_json(raw_cases_dir, trace_ids=trace_id_list)
			
 
				-                print(
			
 
				-                    f"📎 [Source Extraction] "
			
 
				-                    f"matched={src_stats['total_matched']} "
			
 
				-                    f"→ {raw_cases_dir / 'source.json'}"
			
 
				-                )
			
 
				-            except Exception as e:
			
 
				-                err_msg = f"Source extraction failed: {type(e).__name__}: {e}"
			
 
				-                print(f"⚠️ [Warning] {err_msg}")
			
 
				-                global_errors.append(err_msg)
			
 
				+                    round_idx += 1
			
 
				+
			
 
				+                if round_idx >= MAX_ROUNDS and active_platforms:
			
 
				+                    print(f"   ⚠️ [Max Rounds] Reached {MAX_ROUNDS} rounds. Remaining platforms: {active_platforms}")
			
 
				+        else:
			
 
				+            print("\n⏭️  [Skip] Phase 1 & 1.5 Skipped. Using existing cases...")
			
 
				 
			
 
				         # Phase 1.3: Generate case.json from source.json
			
 
				         if should_run("generate-case"):
			
--- a/examples/process_pipeline/script/evaluate_source_quality.py
+++ b/examples/process_pipeline/script/evaluate_source_quality.py
@@ -0,0 +1,443 @@
 
				+"""
			
 
				+Source.json 质量评估模块
			
 
				+
			
 
				+基于字段完整性和文本量对调研结果进行评分，识别低质量内容并支持二次筛选。
			
 
				+
			
 
				+评分维度：
			
 
				+1. 字段完整性（40分）：有效字段占比
			
 
				+2. 文本质量（40分）：body_text 长度和信息密度
			
 
				+3. 互动数据（20分）：点赞数、时间戳等
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Tuple
			
 
				+from datetime import datetime, timedelta
			
 
				+
			
 
				+
			
 
				+class SourceQualityEvaluator:
			
 
				+    """Source 数据质量评估器"""
			
 
				+
			
 
				+    # 字段权重配置
			
 
				+    FIELD_WEIGHTS = {
			
 
				+        "title": 5,
			
 
				+        "body_text": 15,
			
 
				+        "like_count": 5,
			
 
				+        "publish_timestamp": 5,
			
 
				+        "images": 3,
			
 
				+        "videos": 3,
			
 
				+        "link": 2,
			
 
				+        "content_type": 2,
			
 
				+    }
			
 
				+
			
 
				+    # 文本质量阈值
			
 
				+    TEXT_LENGTH_THRESHOLDS = {
			
 
				+        "excellent": 200,   # 优秀：200字以上
			
 
				+        "good": 100,        # 良好：100-200字
			
 
				+        "fair": 50,         # 一般：50-100字
			
 
				+        "poor": 20,         # 较差：20-50字
			
 
				+        # < 20字：极差
			
 
				+    }
			
 
				+
			
 
				+    def __init__(self, time_window_days: int = 180):
			
 
				+        """
			
 
				+        Args:
			
 
				+            time_window_days: 时效性窗口（天），默认180天（半年）
			
 
				+        """
			
 
				+        self.time_window_days = time_window_days
			
 
				+        self.cutoff_timestamp = (
			
 
				+            datetime.now() - timedelta(days=time_window_days)
			
 
				+        ).timestamp()
			
 
				+
			
 
				+    def evaluate_post(self, post: dict) -> Dict[str, any]:
			
 
				+        """
			
 
				+        评估单个 post 的质量
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+                "field_score": float,      # 字段完整性得分 (0-40)
			
 
				+                "text_score": float,       # 文本质量得分 (0-40)
			
 
				+                "engagement_score": float, # 互动数据得分 (0-20)
			
 
				+                "total_score": float,      # 总分 (0-100)
			
 
				+                "grade": str,              # 等级 A/B/C/D/F
			
 
				+                "issues": List[str],       # 问题列表
			
 
				+                "valid_fields": int,       # 有效字段数
			
 
				+                "total_fields": int,       # 总字段数
			
 
				+            }
			
 
				+        """
			
 
				+        result = {
			
 
				+            "field_score": 0.0,
			
 
				+            "text_score": 0.0,
			
 
				+            "engagement_score": 0.0,
			
 
				+            "total_score": 0.0,
			
 
				+            "grade": "F",
			
 
				+            "issues": [],
			
 
				+            "valid_fields": 0,
			
 
				+            "total_fields": len(self.FIELD_WEIGHTS),
			
 
				+        }
			
 
				+
			
 
				+        # 1. 字段完整性评分 (0-40分)
			
 
				+        field_score, valid_count = self._evaluate_fields(post)
			
 
				+        result["field_score"] = field_score
			
 
				+        result["valid_fields"] = valid_count
			
 
				+
			
 
				+        # 2. 文本质量评分 (0-40分)
			
 
				+        text_score, text_issues = self._evaluate_text(post)
			
 
				+        result["text_score"] = text_score
			
 
				+        result["issues"].extend(text_issues)
			
 
				+
			
 
				+        # 3. 互动数据评分 (0-20分)
			
 
				+        engagement_score, engagement_issues = self._evaluate_engagement(post)
			
 
				+        result["engagement_score"] = engagement_score
			
 
				+        result["issues"].extend(engagement_issues)
			
 
				+
			
 
				+        # 计算总分和等级
			
 
				+        result["total_score"] = round(
			
 
				+            result["field_score"] + result["text_score"] + result["engagement_score"], 2
			
 
				+        )
			
 
				+        result["grade"] = self._calculate_grade(result["total_score"])
			
 
				+
			
 
				+        return result
			
 
				+
			
 
				+    def _evaluate_fields(self, post: dict) -> Tuple[float, int]:
			
 
				+        """评估字段完整性"""
			
 
				+        total_weight = sum(self.FIELD_WEIGHTS.values())
			
 
				+        earned_weight = 0.0
			
 
				+        valid_count = 0
			
 
				+
			
 
				+        for field, weight in self.FIELD_WEIGHTS.items():
			
 
				+            value = post.get(field)
			
 
				+            is_valid = False
			
 
				+
			
 
				+            if field == "title":
			
 
				+                is_valid = bool(value and len(str(value).strip()) > 0)
			
 
				+            elif field == "body_text":
			
 
				+                is_valid = bool(value and len(str(value).strip()) > 0)
			
 
				+            elif field == "like_count":
			
 
				+                is_valid = isinstance(value, (int, float)) and value > 0
			
 
				+            elif field == "publish_timestamp":
			
 
				+                is_valid = isinstance(value, (int, float)) and value > 0
			
 
				+            elif field in ("images", "videos"):
			
 
				+                is_valid = isinstance(value, list) and len(value) > 0
			
 
				+            elif field == "link":
			
 
				+                is_valid = bool(value and len(str(value).strip()) > 0)
			
 
				+            elif field == "content_type":
			
 
				+                is_valid = bool(value and len(str(value).strip()) > 0)
			
 
				+
			
 
				+            if is_valid:
			
 
				+                earned_weight += weight
			
 
				+                valid_count += 1
			
 
				+
			
 
				+        # 转换为 0-40 分
			
 
				+        field_score = (earned_weight / total_weight) * 40
			
 
				+        return round(field_score, 2), valid_count
			
 
				+
			
 
				+    def _evaluate_text(self, post: dict) -> Tuple[float, List[str]]:
			
 
				+        """评估文本质量"""
			
 
				+        issues = []
			
 
				+        body_text = post.get("body_text", "")
			
 
				+        title = post.get("title", "")
			
 
				+
			
 
				+        # 清理 HTML 标签（如 <em class="keyword">）
			
 
				+        import re
			
 
				+        body_text_clean = re.sub(r'<[^>]+>', '', body_text)
			
 
				+        title_clean = re.sub(r'<[^>]+>', '', title)
			
 
				+
			
 
				+        body_len = len(body_text_clean.strip())
			
 
				+        title_len = len(title_clean.strip())
			
 
				+
			
 
				+        # 标题评分 (0-10分)
			
 
				+        if title_len == 0:
			
 
				+            title_score = 0
			
 
				+            issues.append("标题为空")
			
 
				+        elif title_len < 10:
			
 
				+            title_score = 3
			
 
				+            issues.append(f"标题过短 ({title_len}字)")
			
 
				+        elif title_len < 20:
			
 
				+            title_score = 6
			
 
				+        else:
			
 
				+            title_score = 10
			
 
				+
			
 
				+        # 正文评分 (0-30分)
			
 
				+        if body_len == 0:
			
 
				+            body_score = 0
			
 
				+            issues.append("正文为空")
			
 
				+        elif body_len < self.TEXT_LENGTH_THRESHOLDS["poor"]:
			
 
				+            body_score = 5
			
 
				+            issues.append(f"正文极短 ({body_len}字)")
			
 
				+        elif body_len < self.TEXT_LENGTH_THRESHOLDS["fair"]:
			
 
				+            body_score = 12
			
 
				+            issues.append(f"正文较短 ({body_len}字)")
			
 
				+        elif body_len < self.TEXT_LENGTH_THRESHOLDS["good"]:
			
 
				+            body_score = 20
			
 
				+        elif body_len < self.TEXT_LENGTH_THRESHOLDS["excellent"]:
			
 
				+            body_score = 26
			
 
				+        else:
			
 
				+            body_score = 30
			
 
				+
			
 
				+        text_score = title_score + body_score
			
 
				+        return round(text_score, 2), issues
			
 
				+
			
 
				+    def _evaluate_engagement(self, post: dict) -> Tuple[float, List[str]]:
			
 
				+        """评估互动数据"""
			
 
				+        issues = []
			
 
				+        score = 0.0
			
 
				+
			
 
				+        # 点赞数评分 (0-10分)
			
 
				+        like_count = post.get("like_count", 0)
			
 
				+        if not isinstance(like_count, (int, float)):
			
 
				+            like_count = 0
			
 
				+
			
 
				+        if like_count == 0:
			
 
				+            issues.append("无点赞数据")
			
 
				+        elif like_count < 10:
			
 
				+            score += 3
			
 
				+        elif like_count < 100:
			
 
				+            score += 6
			
 
				+        elif like_count < 1000:
			
 
				+            score += 8
			
 
				+        else:
			
 
				+            score += 10
			
 
				+
			
 
				+        # 时间戳评分 (0-10分)
			
 
				+        timestamp = post.get("publish_timestamp", 0)
			
 
				+        if not isinstance(timestamp, (int, float)):
			
 
				+            timestamp = 0
			
 
				+
			
 
				+        if timestamp == 0:
			
 
				+            issues.append("无发布时间")
			
 
				+        elif timestamp < self.cutoff_timestamp:
			
 
				+            issues.append(f"内容过时（超过{self.time_window_days}天）")
			
 
				+            score += 2
			
 
				+        else:
			
 
				+            score += 10
			
 
				+
			
 
				+        return round(score, 2), issues
			
 
				+
			
 
				+    def _calculate_grade(self, score: float) -> str:
			
 
				+        """计算等级"""
			
 
				+        if score >= 80:
			
 
				+            return "A"
			
 
				+        elif score >= 60:
			
 
				+            return "B"
			
 
				+        elif score >= 40:
			
 
				+            return "C"
			
 
				+        elif score >= 20:
			
 
				+            return "D"
			
 
				+        else:
			
 
				+            return "F"
			
 
				+
			
 
				+    def evaluate_source_file(self, source_file: Path) -> Dict[str, any]:
			
 
				+        """
			
 
				+        评估整个 source.json 文件
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+                "total_sources": int,
			
 
				+                "grade_distribution": Dict[str, int],  # A/B/C/D/F 的数量分布
			
 
				+                "avg_score": float,
			
 
				+                "low_quality_count": int,  # C/D/F 的数量
			
 
				+                "low_quality_indices": List[int],  # 低质量条目的索引
			
 
				+                "details": List[Dict],  # 每条的详细评分
			
 
				+            }
			
 
				+        """
			
 
				+        with open(source_file, "r", encoding="utf-8") as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        sources = data.get("sources", [])
			
 
				+        total = len(sources)
			
 
				+
			
 
				+        grade_dist = {"A": 0, "B": 0, "C": 0, "D": 0, "F": 0}
			
 
				+        scores = []
			
 
				+        low_quality_indices = []
			
 
				+        details = []
			
 
				+
			
 
				+        for idx, source in enumerate(sources):
			
 
				+            post = source.get("post", {})
			
 
				+            eval_result = self.evaluate_post(post)
			
 
				+            eval_result["index"] = idx
			
 
				+            eval_result["case_id"] = source.get("case_id", "")
			
 
				+            eval_result["platform"] = source.get("platform", "")
			
 
				+
			
 
				+            grade_dist[eval_result["grade"]] += 1
			
 
				+            scores.append(eval_result["total_score"])
			
 
				+            details.append(eval_result)
			
 
				+
			
 
				+            # C/D/F 视为低质量
			
 
				+            if eval_result["grade"] in ("C", "D", "F"):
			
 
				+                low_quality_indices.append(idx)
			
 
				+
			
 
				+        avg_score = round(sum(scores) / total, 2) if total > 0 else 0.0
			
 
				+
			
 
				+        return {
			
 
				+            "total_sources": total,
			
 
				+            "grade_distribution": grade_dist,
			
 
				+            "avg_score": avg_score,
			
 
				+            "low_quality_count": len(low_quality_indices),
			
 
				+            "low_quality_indices": low_quality_indices,
			
 
				+            "details": details,
			
 
				+        }
			
 
				+
			
 
				+    def filter_low_quality(
			
 
				+        self, source_file: Path, output_file: Path, min_score: float = 40.0
			
 
				+    ) -> Dict[str, any]:
			
 
				+        """
			
 
				+        过滤低质量内容，生成新的 source.json
			
 
				+
			
 
				+        Args:
			
 
				+            source_file: 原始 source.json 路径
			
 
				+            output_file: 输出文件路径
			
 
				+            min_score: 最低分数阈值（默认40分，即C级以上）
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+                "original_count": int,
			
 
				+                "filtered_count": int,
			
 
				+                "removed_count": int,
			
 
				+                "removed_cases": List[str],  # 被移除的 case_id
			
 
				+            }
			
 
				+        """
			
 
				+        with open(source_file, "r", encoding="utf-8") as f:
			
 
				+            data = json.load(f)
			
 
				+
			
 
				+        sources = data.get("sources", [])
			
 
				+        original_count = len(sources)
			
 
				+
			
 
				+        filtered_sources = []
			
 
				+        removed_sources = []
			
 
				+        removed_cases = []
			
 
				+
			
 
				+        for source in sources:
			
 
				+            post = source.get("post", {})
			
 
				+            eval_result = self.evaluate_post(post)
			
 
				+
			
 
				+            if eval_result["total_score"] >= min_score:
			
 
				+                filtered_sources.append(source)
			
 
				+            else:
			
 
				+                source["filter_reason"] = f"完备性评分不足 (得分: {eval_result['total_score']} < {min_score})"
			
 
				+                removed_sources.append(source)
			
 
				+                removed_cases.append(source.get("case_id", "unknown"))
			
 
				+
			
 
				+        # 将被过滤的数据保存到 filtered_cases.json
			
 
				+        if removed_sources:
			
 
				+            filtered_cases_file = output_file.parent / "filtered_cases.json"
			
 
				+            filtered_data = {"total": len(removed_sources), "sources": removed_sources}
			
 
				+            with open(filtered_cases_file, "w", encoding="utf-8") as f:
			
 
				+                json.dump(filtered_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        # 更新数据
			
 
				+        data["sources"] = filtered_sources
			
 
				+        data["total"] = len(filtered_sources)
			
 
				+        data["quality_filter"] = {
			
 
				+            "min_score": min_score,
			
 
				+            "original_count": original_count,
			
 
				+            "filtered_count": len(filtered_sources),
			
 
				+            "removed_count": len(removed_cases),
			
 
				+            "filter_timestamp": datetime.now().isoformat(),
			
 
				+        }
			
 
				+
			
 
				+        # 写入新文件
			
 
				+        with open(output_file, "w", encoding="utf-8") as f:
			
 
				+            json.dump(data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+        return {
			
 
				+            "original_count": original_count,
			
 
				+            "filtered_count": len(filtered_sources),
			
 
				+            "removed_count": len(removed_cases),
			
 
				+            "removed_cases": removed_cases,
			
 
				+        }
			
 
				+
			
 
				+
			
 
				+def generate_quality_report(source_file: Path, output_report: Path = None):
			
 
				+    """生成质量评估报告"""
			
 
				+    evaluator = SourceQualityEvaluator()
			
 
				+    result = evaluator.evaluate_source_file(source_file)
			
 
				+
			
 
				+    # 生成报告文本
			
 
				+    report_lines = [
			
 
				+        "=" * 60,
			
 
				+        f"Source 质量评估报告",
			
 
				+        f"文件：{source_file}",
			
 
				+        f"评估时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
			
 
				+        "=" * 60,
			
 
				+        "",
			
 
				+        f"📊 总体统计",
			
 
				+        f"   总条目数：{result['total_sources']}",
			
 
				+        f"   平均得分：{result['avg_score']}/100",
			
 
				+        f"   低质量数：{result['low_quality_count']} ({result['low_quality_count']/result['total_sources']*100:.1f}%)",
			
 
				+        "",
			
 
				+        f"📈 等级分布",
			
 
				+    ]
			
 
				+
			
 
				+    for grade in ["A", "B", "C", "D", "F"]:
			
 
				+        count = result["grade_distribution"][grade]
			
 
				+        pct = count / result["total_sources"] * 100 if result["total_sources"] > 0 else 0
			
 
				+        bar = "█" * int(pct / 2)
			
 
				+        report_lines.append(f"   {grade}: {count:3d} ({pct:5.1f}%) {bar}")
			
 
				+
			
 
				+    report_lines.extend([
			
 
				+        "",
			
 
				+        f"⚠️  低质量条目详情 (C/D/F 级)",
			
 
				+        "",
			
 
				+    ])
			
 
				+
			
 
				+    # 只显示低质量条目
			
 
				+    low_quality_details = [d for d in result["details"] if d["grade"] in ("C", "D", "F")]
			
 
				+    low_quality_details.sort(key=lambda x: x["total_score"])
			
 
				+
			
 
				+    for detail in low_quality_details[:20]:  # 最多显示20条
			
 
				+        report_lines.extend([
			
 
				+            f"[{detail['index']:3d}] {detail['case_id']} | 得分: {detail['total_score']}/100 ({detail['grade']})",
			
 
				+            f"      字段: {detail['field_score']}/40 ({detail['valid_fields']}/{detail['total_fields']}个有效)",
			
 
				+            f"      文本: {detail['text_score']}/40",
			
 
				+            f"      互动: {detail['engagement_score']}/20",
			
 
				+        ])
			
 
				+        if detail["issues"]:
			
 
				+            report_lines.append(f"      问题: {', '.join(detail['issues'])}")
			
 
				+        report_lines.append("")
			
 
				+
			
 
				+    if len(low_quality_details) > 20:
			
 
				+        report_lines.append(f"   ... 还有 {len(low_quality_details) - 20} 条低质量条目未显示")
			
 
				+
			
 
				+    report_text = "\n".join(report_lines)
			
 
				+
			
 
				+    # 输出到控制台
			
 
				+    print(report_text)
			
 
				+
			
 
				+    # 保存到文件
			
 
				+    if output_report:
			
 
				+        with open(output_report, "w", encoding="utf-8") as f:
			
 
				+            f.write(report_text)
			
 
				+        print(f"\n报告已保存到：{output_report}")
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import argparse
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="评估 source.json 的质量")
			
 
				+    parser.add_argument("source_file", type=Path, help="source.json 文件路径")
			
 
				+    parser.add_argument("--report", type=Path, help="输出报告文件路径")
			
 
				+    parser.add_argument("--filter", type=Path, help="过滤后输出文件路径")
			
 
				+    parser.add_argument("--min-score", type=float, default=40.0, help="最低分数阈值（默认40）")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if not args.source_file.exists():
			
 
				+        print(f"错误：文件不存在 {args.source_file}")
			
 
				+        exit(1)
			
 
				+
			
 
				+    # 生成评估报告
			
 
				+    result = generate_quality_report(args.source_file, args.report)
			
 
				+
			
 
				+    # 如果指定了过滤输出
			
 
				+    if args.filter:
			
 
				+        evaluator = SourceQualityEvaluator()
			
 
				+        filter_result = evaluator.filter_low_quality(
			
 
				+            args.source_file, args.filter, args.min_score
			
 
				+        )
			
 
				+        print(f"\n🔍 质量过滤完成")
			
 
				+        print(f"   原始条目：{filter_result['original_count']}")
			
 
				+        print(f"   保留条目：{filter_result['filtered_count']}")
			
 
				+        print(f"   移除条目：{filter_result['removed_count']}")
			
 
				+        print(f"   输出文件：{args.filter}")
			
--- a/examples/process_pipeline/script/extract_sources.py
+++ b/examples/process_pipeline/script/extract_sources.py
@@ -49,30 +49,130 @@ def parse_url(url: str) -> Optional[Tuple[str, str]]:
 
				     return None
			
 
				 
			
 
				 
			
 
				-# ── 从 case 文件中抽取所有链接 ────────────────────────────────
			
 
				+# ── 从 case 文件中抽取所有条目（URL + evaluation） ────────────────────────────────
			
 
				 
			
 
				-def extract_urls_from_case(case_data: Any) -> List[str]:
			
 
				-    """兼容新旧两种格式，返回 case 文件里出现的所有 URL。"""
			
 
				-    urls: List[str] = []
			
 
				+def extract_entries_from_case(case_data: Any) -> List[Dict[str, Any]]:
			
 
				+    """
			
 
				+    从 case 数据中抽取条目，每条包含 url 和 agent 的 evaluation。
			
 
				+
			
 
				+    返回: [{"url": str, "evaluation": dict | None, "title": str | None, "case_id": str | None}]
			
 
				+    """
			
 
				+    entries: List[Dict[str, Any]] = []
			
 
				 
			
 
				     if not isinstance(case_data, dict):
			
 
				-        return urls
			
 
				+        return entries
			
 
				 
			
 
				     # 新格式：工序发现[].帖子链接
			
 
				     for item in case_data.get("工序发现", []) or []:
			
 
				         if isinstance(item, dict):
			
 
				             link = item.get("帖子链接") or item.get("source_url")
			
 
				             if link:
			
 
				-                urls.append(link)
			
 
				+                entries.append({
			
 
				+                    "url": link,
			
 
				+                    "evaluation": item.get("evaluation"),
			
 
				+                    "title": item.get("title"),
			
 
				+                    "case_id": item.get("case_id"),
			
 
				+                })
			
 
				 
			
 
				-    # 旧格式：cases[].source_url
			
 
				+    # 主格式：cases[]
			
 
				     for item in case_data.get("cases", []) or []:
			
 
				         if isinstance(item, dict):
			
 
				             link = item.get("source_url") or item.get("帖子链接")
			
 
				             if link:
			
 
				-                urls.append(link)
			
 
				+                entries.append({
			
 
				+                    "url": link,
			
 
				+                    "evaluation": item.get("evaluation"),
			
 
				+                    "title": item.get("title"),
			
 
				+                    "case_id": item.get("case_id"),
			
 
				+                })
			
 
				 
			
 
				-    return urls
			
 
				+    return entries
			
 
				+
			
 
				+
			
 
				+def extract_urls_from_case(case_data: Any) -> List[str]:
			
 
				+    """[Legacy] 旧接口，保留供可能的外部调用。内部改用 extract_entries_from_case。"""
			
 
				+    return [e["url"] for e in extract_entries_from_case(case_data)]
			
 
				+
			
 
				+
			
 
				+# ── 过滤规则（统一入口） ────────────────────────────────
			
 
				+
			
 
				+# 默认阈值：body_text 最少字符数、agent 评分下限
			
 
				+DEFAULT_MIN_BODY_LEN = 30
			
 
				+DEFAULT_MIN_SCORE = 70.0
			
 
				+DEFAULT_CUTOFF_DATE = (2025, 10, 1)
			
 
				+
			
 
				+
			
 
				+def _is_before_cutoff(source: Dict[str, Any], cutoff_ts: int) -> bool:
			
 
				+    """判断帖子是否早于截止时间戳（秒级）
			
 
				+
			
 
				+    如果 timestamp 为 0 或不存在，返回 False（保留）
			
 
				+    """
			
 
				+    post = source.get("post", {})
			
 
				+    if not isinstance(post, dict):
			
 
				+        return False
			
 
				+
			
 
				+    ts = post.get("publish_timestamp")
			
 
				+    # 没有 timestamp 或为 0，保留
			
 
				+    if not ts or ts == 0:
			
 
				+        return False
			
 
				+
			
 
				+    try:
			
 
				+        ts = int(ts)
			
 
				+        # 毫秒级转秒级
			
 
				+        if ts > 1000000000000:
			
 
				+            ts = ts / 1000
			
 
				+        return ts < cutoff_ts
			
 
				+    except (ValueError, TypeError):
			
 
				+        pass
			
 
				+
			
 
				+    # 尝试解析字符串格式 "2025-05-02 19:25:30"
			
 
				+    try:
			
 
				+        from datetime import datetime
			
 
				+        dt = datetime.strptime(str(ts), "%Y-%m-%d %H:%M:%S")
			
 
				+        return dt.timestamp() < cutoff_ts
			
 
				+    except Exception:
			
 
				+        # 解析失败，保留
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def _check_filters(
			
 
				+    source: Dict[str, Any],
			
 
				+    cutoff_ts: int,
			
 
				+    min_body_len: int,
			
 
				+    min_score: float,
			
 
				+) -> Optional[str]:
			
 
				+    """
			
 
				+    对一条 source 逐条检查过滤规则。
			
 
				+
			
 
				+    返回:
			
 
				+        None          —— 条目合格
			
 
				+        非 None 字符串 —— 不合格的原因（写入 filter_reason）
			
 
				+    """
			
 
				+    post = source.get("post", {}) or {}
			
 
				+
			
 
				+    # 1. body_text 完整性
			
 
				+    body = post.get("body_text") or post.get("desc") or ""
			
 
				+    if not isinstance(body, str) or len(body.strip()) < min_body_len:
			
 
				+        return f"missing_body_text:len={len(body.strip()) if isinstance(body, str) else 0}"
			
 
				+
			
 
				+    # 2. agent 评分
			
 
				+    evaluation = source.get("evaluation")
			
 
				+    if not isinstance(evaluation, dict):
			
 
				+        return "missing_evaluation"
			
 
				+    quality = evaluation.get("quality")
			
 
				+    if not isinstance(quality, dict):
			
 
				+        return "missing_evaluation"
			
 
				+    score = quality.get("overall_score")
			
 
				+    if not isinstance(score, (int, float)):
			
 
				+        return "invalid_score"
			
 
				+    if score < min_score:
			
 
				+        return f"low_score:{score}"
			
 
				+
			
 
				+    # 3. 过时（复用原 _is_before_cutoff 对时间戳的解析）
			
 
				+    if _is_before_cutoff(source, cutoff_ts):
			
 
				+        return "outdated"
			
 
				+
			
 
				+    return None
			
 
				 
			
 
				 
			
 
				 # ── 从 cache 中构建 (platform, content_id) → post 索引 ────────────────────────────────
			
@@ -172,10 +272,23 @@ def extract_sources_to_json(
 
				     cache_dir: Optional[Path] = None,
			
 
				     output_name: str = "source.json",
			
 
				     trace_ids: Optional[List[str]] = None,
			
 
				+    min_body_len: int = DEFAULT_MIN_BODY_LEN,
			
 
				+    min_score: float = DEFAULT_MIN_SCORE,
			
 
				+    cutoff_date: Tuple[int, int, int] = DEFAULT_CUTOFF_DATE,
			
 
				 ) -> Dict[str, Any]:
			
 
				     """
			
 
				     扫描 raw_cases_dir 下的 case_*.json，
			
 
				-    从 cache 中找出原始帖子，输出到 {raw_cases_dir}/{output_name}。
			
 
				+    从 cache 中找出原始帖子，并对结果执行统一过滤（body_text / 评分 / 时效）。
			
 
				+
			
 
				+    过滤规则（均可通过参数调整）:
			
 
				+      - body_text 为空或少于 min_body_len 字符  → filter_reason=missing_body_text
			
 
				+      - agent evaluation 缺失或 weighted_score < min_score → filter_reason=missing_evaluation / invalid_score / low_score
			
 
				+      - publish_timestamp 早于 cutoff_date → filter_reason=outdated
			
 
				+
			
 
				+    参数:
			
 
				+        min_body_len: body_text 最少字符数，默认 30
			
 
				+        min_score:    agent 评分下限，默认 70
			
 
				+        cutoff_date:  过时截止日期 (year, month, day)，默认 (2025, 10, 1)
			
 
				 
			
 
				     返回统计信息 dict。
			
 
				     """
			
@@ -253,8 +366,10 @@ def extract_sources_to_json(
 
				                 unmatched.append({"case_file": case_file.name, "error": str(e)})
			
 
				                 continue
			
 
				 
			
 
				-        urls = extract_urls_from_case(case_data)
			
 
				-        for url in urls:
			
 
				+        entries = extract_entries_from_case(case_data)
			
 
				+        for entry in entries:
			
 
				+            url = entry["url"]
			
 
				+            evaluation = entry.get("evaluation")
			
 
				             # 解析 URL 得到 platform 和 content_id
			
 
				             parsed = parse_url(url)
			
 
				             if not parsed:
			
@@ -299,6 +414,7 @@ def extract_sources_to_json(
 
				                     "platform": platform,
			
 
				                     "channel_content_id": str(actual_cid),
			
 
				                     "source_url": url,
			
 
				+                    "evaluation": evaluation,
			
 
				                     "post": post,
			
 
				                 })
			
 
				             else:
			
@@ -314,13 +430,29 @@ def extract_sources_to_json(
 
				     # 4. 合并已有数据和新匹配的数据
			
 
				     all_sources = existing_sources + matched
			
 
				 
			
 
				-    # 5. 过滤掉 2025-10 之前的过时帖子
			
 
				+    # 5. 统一过滤：body_text 完整性 / agent 评分 / 时效
			
 
				     from datetime import datetime as _dt
			
 
				-    cutoff_ts = int(_dt(2025, 10, 1).timestamp())  # 本地时区的 2025-10-01
			
 
				-    before_filter = len(all_sources)
			
 
				-    filtered_sources = [s for s in all_sources if _is_before_cutoff(s, cutoff_ts)]
			
 
				-    all_sources = [s for s in all_sources if not _is_before_cutoff(s, cutoff_ts)]
			
 
				-    filtered_count = before_filter - len(all_sources)
			
 
				+    cutoff_ts = int(_dt(*cutoff_date).timestamp())
			
 
				+
			
 
				+    kept_sources: List[Dict[str, Any]] = []
			
 
				+    filtered_sources: List[Dict[str, Any]] = []
			
 
				+    reason_counts: Dict[str, int] = {}
			
 
				+
			
 
				+    for s in all_sources:
			
 
				+        reason = _check_filters(s, cutoff_ts, min_body_len, min_score)
			
 
				+        if reason is None:
			
 
				+            kept_sources.append(s)
			
 
				+        else:
			
 
				+            # 记录过滤原因
			
 
				+            s_copy = dict(s)
			
 
				+            s_copy["filter_reason"] = reason
			
 
				+            filtered_sources.append(s_copy)
			
 
				+            # 统计原因类型（只取冒号前的类别）
			
 
				+            category = reason.split(":", 1)[0]
			
 
				+            reason_counts[category] = reason_counts.get(category, 0) + 1
			
 
				+
			
 
				+    all_sources = kept_sources
			
 
				+    filtered_count = len(filtered_sources)
			
 
				 
			
 
				     # 6. 转换 timestamp 为可读格式
			
 
				     _convert_timestamps(all_sources)
			
@@ -337,7 +469,7 @@ def extract_sources_to_json(
 
				     with open(output_file, "w", encoding="utf-8") as f:
			
 
				         json.dump(output, f, ensure_ascii=False, indent=2)
			
 
				 
			
 
				-    # 8. 写 filtered_cases.json（被过滤掉的帖子，去重后追加）
			
 
				+    # 8. 写 filtered_cases.json（被过滤掉的帖子，按原因分组）
			
 
				     if filtered_sources:
			
 
				         for fs in filtered_sources:
			
 
				             key = (fs.get("platform"), fs.get("channel_content_id"))
			
@@ -345,10 +477,22 @@ def extract_sources_to_json(
 
				                 existing_filtered_sources.append(fs)
			
 
				                 existing_filtered_ids.add(key)
			
 
				 
			
 
				+        # 按原因类别分组
			
 
				+        by_reason: Dict[str, List[Dict[str, Any]]] = {}
			
 
				+        for fs in existing_filtered_sources:
			
 
				+            reason = fs.get("filter_reason", "unknown")
			
 
				+            category = reason.split(":", 1)[0]
			
 
				+            by_reason.setdefault(category, []).append(fs)
			
 
				+
			
 
				         filtered_output = {
			
 
				             "total": len(existing_filtered_sources),
			
 
				-            "reason": "publish_timestamp before 2025-10-01",
			
 
				-            "sources": existing_filtered_sources,
			
 
				+            "by_reason": {
			
 
				+                category: {
			
 
				+                    "count": len(items),
			
 
				+                    "sources": items,
			
 
				+                }
			
 
				+                for category, items in by_reason.items()
			
 
				+            },
			
 
				         }
			
 
				         with open(filtered_output_file, "w", encoding="utf-8") as f:
			
 
				             json.dump(filtered_output, f, ensure_ascii=False, indent=2)
			
@@ -356,12 +500,26 @@ def extract_sources_to_json(
 
				     # 9. 下载图片到 raw_cases/images/{case_id}/
			
 
				     images_downloaded = download_images_for_sources(matched, raw_cases_dir)
			
 
				 
			
 
				-    # 返回统计信息（包含 unmatched 用于日志输出）
			
 
				+    # 10. 构建被过滤条目的摘要（供续跑 feedback 使用）
			
 
				+    filtered_details: List[Dict[str, Any]] = []
			
 
				+    for fs in filtered_sources:
			
 
				+        post = fs.get("post", {}) or {}
			
 
				+        title = post.get("title") or fs.get("source_url", "")
			
 
				+        filtered_details.append({
			
 
				+            "case_id": fs.get("case_id", ""),
			
 
				+            "platform": fs.get("platform", ""),
			
 
				+            "title": title[:60] if title else "",
			
 
				+            "filter_reason": fs.get("filter_reason", ""),
			
 
				+        })
			
 
				+
			
 
				+    # 返回统计信息
			
 
				     return {
			
 
				         "total_matched": len(matched),
			
 
				         "total_existing": len(existing_sources),
			
 
				         "total_unmatched": len(unmatched),
			
 
				-        "filtered_outdated": filtered_count,
			
 
				+        "filtered_total": filtered_count,
			
 
				+        "filtered_reasons": reason_counts,
			
 
				+        "filtered_details": filtered_details,
			
 
				         "images_downloaded": images_downloaded,
			
 
				         "output_file": str(output_file),
			
 
				     }
			
@@ -389,39 +547,6 @@ def _get_image_urls_from_post(post: Dict[str, Any]) -> List[str]:
 
				     return urls
			
 
				 
			
 
				 
			
 
				-def _is_before_cutoff(source: Dict[str, Any], cutoff_ts: int) -> bool:
			
 
				-    """判断帖子是否早于截止时间戳（秒级）
			
 
				-
			
 
				-    如果 timestamp 为 0 或不存在，返回 False（保留）
			
 
				-    """
			
 
				-    post = source.get("post", {})
			
 
				-    if not isinstance(post, dict):
			
 
				-        return False
			
 
				-
			
 
				-    ts = post.get("publish_timestamp")
			
 
				-    # 没有 timestamp 或为 0，保留
			
 
				-    if not ts or ts == 0:
			
 
				-        return False
			
 
				-
			
 
				-    try:
			
 
				-        ts = int(ts)
			
 
				-        # 毫秒级转秒级
			
 
				-        if ts > 1000000000000:
			
 
				-            ts = ts / 1000
			
 
				-        return ts < cutoff_ts
			
 
				-    except (ValueError, TypeError):
			
 
				-        pass
			
 
				-
			
 
				-    # 尝试解析字符串格式 "2025-05-02 19:25:30"
			
 
				-    try:
			
 
				-        from datetime import datetime
			
 
				-        dt = datetime.strptime(str(ts), "%Y-%m-%d %H:%M:%S")
			
 
				-        return dt.timestamp() < cutoff_ts
			
 
				-    except Exception:
			
 
				-        # 解析失败，保留
			
 
				-        return False
			
 
				-
			
 
				-
			
 
				 def _format_timestamp(ts: Any) -> Optional[str]:
			
 
				     """将时间戳（秒/毫秒）转换为可读格式"""
			
 
				     from datetime import datetime
			
--- a/examples/process_pipeline/script/integrate_quality_check.py
+++ b/examples/process_pipeline/script/integrate_quality_check.py
@@ -0,0 +1,183 @@
 
				+"""
			
 
				+将质量评估集成到 Pipeline 中
			
 
				+
			
 
				+在 Phase 1.5 (source.json 生成后) 自动执行质量检查，
			
 
				+对低质量内容进行二次筛选或触发重新调研。
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List
			
 
				+from evaluate_source_quality import SourceQualityEvaluator, generate_quality_report
			
 
				+
			
 
				+
			
 
				+def check_and_filter_source(
			
 
				+    source_file: Path,
			
 
				+    min_score: float = 40.0,
			
 
				+    min_pass_rate: float = 0.6,
			
 
				+    auto_filter: bool = True,
			
 
				+) -> Dict[str, any]:
			
 
				+    """
			
 
				+    检查 source.json 质量并决定是否需要重新调研
			
 
				+
			
 
				+    Args:
			
 
				+        source_file: source.json 路径
			
 
				+        min_score: 单条内容的最低分数阈值（默认40分，C级）
			
 
				+        min_pass_rate: 整体通过率阈值（默认60%）
			
 
				+        auto_filter: 是否自动过滤低质量内容
			
 
				+
			
 
				+    Returns:
			
 
				+        {
			
 
				+            "status": str,  # "pass" | "filtered" | "failed"
			
 
				+            "message": str,
			
 
				+            "stats": dict,
			
 
				+            "action": str,  # "continue" | "rerun_research" | "manual_review"
			
 
				+        }
			
 
				+    """
			
 
				+    evaluator = SourceQualityEvaluator()
			
 
				+
			
 
				+    # 1. 生成质量报告
			
 
				+    print(f"\n{'='*60}")
			
 
				+    print(f"📊 质量评估：{source_file.name}")
			
 
				+    print(f"{'='*60}")
			
 
				+
			
 
				+    eval_result = evaluator.evaluate_source_file(source_file)
			
 
				+
			
 
				+    total = eval_result["total_sources"]
			
 
				+    low_quality_count = eval_result["low_quality_count"]
			
 
				+    pass_count = total - low_quality_count
			
 
				+    pass_rate = pass_count / total if total > 0 else 0.0
			
 
				+    avg_score = eval_result["avg_score"]
			
 
				+
			
 
				+    print(f"\n总条目数：{total}")
			
 
				+    print(f"平均得分：{avg_score:.1f}/100")
			
 
				+    print(f"通过率：{pass_rate*100:.1f}% ({pass_count}/{total})")
			
 
				+    print(f"\n等级分布：")
			
 
				+    for grade in ["A", "B", "C", "D", "F"]:
			
 
				+        count = eval_result["grade_distribution"][grade]
			
 
				+        pct = count / total * 100 if total > 0 else 0
			
 
				+        print(f"  {grade}: {count:3d} ({pct:5.1f}%)")
			
 
				+
			
 
				+    # 2. 判断质量是否达标
			
 
				+    result = {
			
 
				+        "status": "unknown",
			
 
				+        "message": "",
			
 
				+        "stats": {
			
 
				+            "total": total,
			
 
				+            "pass_count": pass_count,
			
 
				+            "pass_rate": pass_rate,
			
 
				+            "avg_score": avg_score,
			
 
				+            "grade_distribution": eval_result["grade_distribution"],
			
 
				+        },
			
 
				+        "action": "continue",
			
 
				+    }
			
 
				+
			
 
				+    # 3. 决策逻辑
			
 
				+    if pass_rate >= min_pass_rate and avg_score >= 50:
			
 
				+        # 质量良好，直接通过
			
 
				+        result["status"] = "pass"
			
 
				+        result["message"] = f"✅ 质量达标（通过率 {pass_rate*100:.1f}%）"
			
 
				+        result["action"] = "continue"
			
 
				+        print(f"\n{result['message']}")
			
 
				+
			
 
				+    elif pass_rate >= 0.4 and auto_filter:
			
 
				+        # 质量一般，自动过滤低质量内容
			
 
				+        print(f"\n⚠️  质量一般（通过率 {pass_rate*100:.1f}%），执行自动过滤...")
			
 
				+
			
 
				+        filtered_file = source_file.parent / "source_filtered.json"
			
 
				+        filter_result = evaluator.filter_low_quality(
			
 
				+            source_file, filtered_file, min_score
			
 
				+        )
			
 
				+
			
 
				+        # 备份原文件
			
 
				+        backup_file = source_file.parent / "source_original.json"
			
 
				+        source_file.rename(backup_file)
			
 
				+        filtered_file.rename(source_file)
			
 
				+
			
 
				+        result["status"] = "filtered"
			
 
				+        result["message"] = (
			
 
				+            f"🔍 已过滤 {filter_result['removed_count']} 条低质量内容\n"
			
 
				+            f"   保留：{filter_result['filtered_count']}/{filter_result['original_count']}\n"
			
 
				+            f"   原始文件备份至：{backup_file.name}"
			
 
				+        )
			
 
				+        result["action"] = "continue"
			
 
				+        result["stats"]["filtered_count"] = filter_result["filtered_count"]
			
 
				+        result["stats"]["removed_count"] = filter_result["removed_count"]
			
 
				+
			
 
				+        print(f"\n{result['message']}")
			
 
				+
			
 
				+    else:
			
 
				+        # 质量太差，建议重新调研
			
 
				+        result["status"] = "failed"
			
 
				+        result["message"] = (
			
 
				+            f"❌ 质量不达标（通过率 {pass_rate*100:.1f}%，平均分 {avg_score:.1f}）\n"
			
 
				+            f"   建议：重新调研或人工审核"
			
 
				+        )
			
 
				+        result["action"] = "manual_review"
			
 
				+
			
 
				+        print(f"\n{result['message']}")
			
 
				+
			
 
				+        # 显示主要问题
			
 
				+        print(f"\n主要问题：")
			
 
				+        issue_summary = _summarize_issues(eval_result["details"])
			
 
				+        for issue, count in issue_summary.items():
			
 
				+            print(f"  - {issue}: {count} 条")
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+def _summarize_issues(details: List[Dict]) -> Dict[str, int]:
			
 
				+    """汇总常见问题"""
			
 
				+    issue_counts = {}
			
 
				+    for detail in details:
			
 
				+        for issue in detail.get("issues", []):
			
 
				+            issue_counts[issue] = issue_counts.get(issue, 0) + 1
			
 
				+    # 按频率排序
			
 
				+    return dict(sorted(issue_counts.items(), key=lambda x: x[1], reverse=True)[:5])
			
 
				+
			
 
				+
			
 
				+def integrate_into_pipeline(output_dir: Path, min_score: float = 40.0):
			
 
				+    """
			
 
				+    集成到 pipeline 的入口函数
			
 
				+
			
 
				+    在 Phase 1.5 之后调用，检查 source.json 质量
			
 
				+    """
			
 
				+    source_file = output_dir / "raw_cases" / "source.json"
			
 
				+
			
 
				+    if not source_file.exists():
			
 
				+        print(f"⚠️  source.json 不存在，跳过质量检查")
			
 
				+        return {"status": "skip", "action": "continue"}
			
 
				+
			
 
				+    # 执行质量检查
			
 
				+    result = check_and_filter_source(
			
 
				+        source_file,
			
 
				+        min_score=min_score,
			
 
				+        min_pass_rate=0.6,
			
 
				+        auto_filter=True,
			
 
				+    )
			
 
				+
			
 
				+    # 保存质量报告
			
 
				+    report_file = output_dir / "quality_report.txt"
			
 
				+    generate_quality_report(source_file, report_file)
			
 
				+    print(f"\n📄 详细报告已保存：{report_file}")
			
 
				+
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    import argparse
			
 
				+
			
 
				+    parser = argparse.ArgumentParser(description="集成质量检查到 pipeline")
			
 
				+    parser.add_argument("output_dir", type=Path, help="输出目录（如 output/001）")
			
 
				+    parser.add_argument("--min-score", type=float, default=40.0, help="最低分数阈值")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    result = integrate_into_pipeline(args.output_dir, args.min_score)
			
 
				+
			
 
				+    # 根据结果决定是否继续
			
 
				+    if result["action"] == "manual_review":
			
 
				+        print(f"\n⚠️  需要人工介入，pipeline 暂停")
			
 
				+        exit(1)
			
 
				+    else:
			
 
				+        print(f"\n✅ 质量检查完成，pipeline 继续")
			
 
				+        exit(0)
			
--- a/examples/process_pipeline/server.py
+++ b/examples/process_pipeline/server.py
@@ -174,11 +174,10 @@ def get_requirements():
 
				         case_json_path = dir_path / "case.json"
			
 
				         if case_json_path.exists():
			
 
				             try:
			
 
				-                import json
			
 
				                 with open(case_json_path, 'r', encoding='utf-8') as f:
			
 
				                     case_data = json.load(f)
			
 
				                     raw_cases_count = len(case_data.get('cases', []))
			
 
				-            except:
			
 
				+            except Exception:
			
 
				                 pass
			
 
				         
			
 
				         if raw_cases_count == 0:
			
--- a/examples/process_pipeline/ui/app.js
+++ b/examples/process_pipeline/ui/app.js
@@ -235,9 +235,11 @@ function renderRawCases(rawCasesObj) {
 
				     const renderPaneContent = (pList) => {
			
 
				         let paneHtml = '';
			
 
				         let totalCases = 0;
			
 
				-        let gridHtml = '';
			
 
				         let seenIds = new Set();
			
 
				         
			
 
				+        let groupedHtml = {};
			
 
				+        const getGroupKey = (c, p) => (p === 'filtered_cases' && c.filter_reason) ? `🚫 过滤原因: ${c.filter_reason}` : 'default';
			
 
				+        
			
 
				         pList.forEach(p => {
			
 
				             if (!rawCasesObj[p]) return;
			
 
				             if (rawCasesObj[p].error) {
			
@@ -248,11 +250,27 @@ function renderRawCases(rawCasesObj) {
 
				                 </div>`;
			
 
				                 return;
			
 
				             }
			
 
				-            if (rawCasesObj[p].reason) {
			
 
				-                paneHtml += `<div style="padding:0.5rem 1rem; background:rgba(239, 68, 68, 0.1); border:1px solid var(--danger); border-radius:6px; margin-bottom:1rem; color:var(--danger); font-size:0.9rem;">🛑 ${p} 过滤原因: ${rawCasesObj[p].reason}</div>`;
			
 
				+            if (rawCasesObj[p].reason && p !== 'filtered_cases') {
			
 
				+                paneHtml += `<div style="padding:0.5rem 1rem; background:rgba(239, 68, 68, 0.1); border:1px solid var(--danger); border-radius:6px; margin-bottom:1rem; color:var(--danger); font-size:0.9rem;">🛑 ${p} 提示: ${rawCasesObj[p].reason}</div>`;
			
 
				             }
			
 
				 
			
 
				-            const cases = Array.isArray(rawCasesObj[p]) ? rawCasesObj[p] : (rawCasesObj[p].cases || rawCasesObj[p].sources || []);
			
 
				+            let cases = [];
			
 
				+            if (Array.isArray(rawCasesObj[p])) {
			
 
				+                cases = rawCasesObj[p];
			
 
				+            } else if (rawCasesObj[p].cases) {
			
 
				+                cases = rawCasesObj[p].cases;
			
 
				+            } else if (rawCasesObj[p].sources) {
			
 
				+                cases = rawCasesObj[p].sources;
			
 
				+            } else if (rawCasesObj[p].by_reason) {
			
 
				+                Object.entries(rawCasesObj[p].by_reason).forEach(([reasonKey, reasonObj]) => {
			
 
				+                    if (reasonObj.sources && Array.isArray(reasonObj.sources)) {
			
 
				+                        reasonObj.sources.forEach(src => {
			
 
				+                            if (!src.filter_reason) src.filter_reason = reasonKey;
			
 
				+                            cases.push(src);
			
 
				+                        });
			
 
				+                    }
			
 
				+                });
			
 
				+            }
			
 
				             if (cases.length > 0) {
			
 
				                 if (!rawCasesObj['source'] && p !== 'source_ex' && p !== 'filtered_cases' && p !== 'source') {
			
 
				                     paneHtml += `<div style="padding:1rem; background:rgba(0, 0, 0, 0.05); border:1px solid rgba(0, 0, 0, 0.1); border-radius:8px; margin-bottom:1rem;">
			
@@ -266,6 +284,10 @@ function renderRawCases(rawCasesObj) {
 
				                     totalCases++;
			
 
				                     const cId = c.case_id || (c._raw && c._raw.case_id) || (c.post && c.post.channel_content_id) || `temp_${p}_${idx}`;
			
 
				                     const cUrl = c.source_url || c.url || (c.post && c.post.link) || '';
			
 
				+                    
			
 
				+                    const groupKey = getGroupKey(c, p);
			
 
				+                    if (!groupedHtml[groupKey]) groupedHtml[groupKey] = '';
			
 
				+
			
 
				                     if (cId || cUrl || c.post) {
			
 
				                         const mappedS = sourceMap[cId] || sourceMap[cUrl] || (c._raw && sourceMap[c._raw.case_id]);
			
 
				                         if (p !== 'filtered_cases' && p !== 'source' && p !== 'source_ex' && !mappedS) return;
			
@@ -300,7 +322,7 @@ function renderRawCases(rawCasesObj) {
 
				                             }
			
 
				                         }
			
 
				                         
			
 
				-                        gridHtml += `<div class="masonry-card" style="position:relative;" onclick="openCaseDetail('${p}', ${idx})">
			
 
				+                        groupedHtml[groupKey] += `<div class="masonry-card" style="position:relative;" onclick="openCaseDetail('${p}', ${idx})">
			
 
				                             ${platBadge}
			
 
				                             ${actionBtn}
			
 
				                             ${allImages.length > 0 ? `<img class="cover-img" src="${coverImgUrl}" onerror="this.onerror=null; this.src='${fallbackImgUrl}';">` : ''}
			
@@ -314,7 +336,7 @@ function renderRawCases(rawCasesObj) {
 
				                             </div>
			
 
				                         </div>`;
			
 
				                     } else {
			
 
				-                        gridHtml += `<div class="masonry-card" style="padding:12px; font-family:monospace; font-size:0.8em;" onclick="openCaseDetail('${p}', ${idx})">
			
 
				+                        groupedHtml[groupKey] += `<div class="masonry-card" style="padding:12px; font-family:monospace; font-size:0.8em;" onclick="openCaseDetail('${p}', ${idx})">
			
 
				                             📝 旧版格式 / 解析失败<br>点击查看详情
			
 
				                         </div>`;
			
 
				                     }
			
@@ -324,8 +346,15 @@ function renderRawCases(rawCasesObj) {
 
				         
			
 
				         if (totalCases === 0 && pList.length > 0 && !paneHtml.includes('解析失败') && !paneHtml.includes('未进行')) {
			
 
				             paneHtml += `<div style="padding:1rem; color:var(--text-muted); text-align:center;">暂无数据</div>`;
			
 
				-        } else if (gridHtml) {
			
 
				-            paneHtml += `<div class="masonry-grid">${gridHtml}</div>`;
			
 
				+        } else {
			
 
				+            Object.entries(groupedHtml).forEach(([groupName, gHtml]) => {
			
 
				+                if (gHtml) {
			
 
				+                    if (groupName !== 'default') {
			
 
				+                        paneHtml += `<h3 style="margin-top: 1rem; margin-bottom: 0.5rem; font-size: 1.1rem; color: var(--text-main); border-left: 4px solid var(--accent-primary); padding-left: 8px;">${groupName}</h3>`;
			
 
				+                    }
			
 
				+                    paneHtml += `<div class="masonry-grid">${gHtml}</div>`;
			
 
				+                }
			
 
				+            });
			
 
				         }
			
 
				         return paneHtml;
			
 
				     };
			
@@ -1824,9 +1853,29 @@ window.openCaseDetail = function(p, initialIdx) {
 
				     
			
 
				     platformsToAggregate.forEach(plat => {
			
 
				         if (!ctx.rawCasesObj[plat]) return;
			
 
				-        const platCases = ctx.rawCasesObj[plat].cases || ctx.rawCasesObj[plat].sources || [];
			
 
				+        let platCases = [];
			
 
				+        if (Array.isArray(ctx.rawCasesObj[plat])) {
			
 
				+            platCases = ctx.rawCasesObj[plat];
			
 
				+        } else if (ctx.rawCasesObj[plat].cases) {
			
 
				+            platCases = ctx.rawCasesObj[plat].cases;
			
 
				+        } else if (ctx.rawCasesObj[plat].sources) {
			
 
				+            platCases = ctx.rawCasesObj[plat].sources;
			
 
				+        } else if (ctx.rawCasesObj[plat].by_reason) {
			
 
				+            Object.entries(ctx.rawCasesObj[plat].by_reason).forEach(([reasonKey, reasonObj]) => {
			
 
				+                if (reasonObj.sources && Array.isArray(reasonObj.sources)) {
			
 
				+                    reasonObj.sources.forEach(src => {
			
 
				+                        if (!src.filter_reason) src.filter_reason = reasonKey;
			
 
				+                        platCases.push(src);
			
 
				+                    });
			
 
				+                }
			
 
				+            });
			
 
				+        }
			
 
				         platCases.forEach((c, idx) => {
			
 
				             const cId = c.case_id || (c._raw && c._raw.case_id) || (c.post && c.post.channel_content_id) || `temp_${plat}_${idx}`;
			
 
				+            const cUrl = c.source_url || c.url || (c.post && c.post.link) || '';
			
 
				+            
			
 
				+            const mappedS = ctx.sourceMap[cId] || ctx.sourceMap[cUrl] || (c._raw && ctx.sourceMap[c._raw.case_id]);
			
 
				+            if (plat !== 'filtered_cases' && plat !== 'source' && plat !== 'source_ex' && !mappedS) return;
			
 
				             
			
 
				             if (seenIds.has(cId)) return;
			
 
				             seenIds.add(cId);
			
@@ -1914,13 +1963,40 @@ window.renderSingleCaseDetail = function(idx) {
 
				     const title = post.title || c.title || '无标题';
			
 
				     const workflowUrl = s.source_url || s.url || cUrl;
			
 
				     
			
 
				-    // Header
			
 
				+    const publishedTime = post.publish_timestamp || post.published_at || '-';
			
 
				+    const likeCount = post.like_count !== undefined ? post.like_count : (post.likes !== undefined ? post.likes : '-');
			
 
				+    const collectCount = post.collect_count !== undefined ? post.collect_count : (post.collects !== undefined ? post.collects : '-');
			
 
				+    const commentCount = post.comment_count !== undefined ? post.comment_count : (post.comments !== undefined ? post.comments : '-');
			
 
				+    const shareCount = post.share_count !== undefined ? post.share_count : (post.shares !== undefined ? post.shares : '-');
			
 
				+
			
 
				     const headerHtml = `
			
 
				         <h2 style="margin: 0 0 0.5rem 0; font-size: 1.4em; color: var(--text-main);">${title}</h2>
			
 
				-        <div style="display: flex; gap: 12px; margin-bottom: 1rem;">
			
 
				+        <div style="display: flex; gap: 12px; margin-bottom: 0.8rem;">
			
 
				             ${workflowUrl ? `<a href="${workflowUrl}" target="_blank" style="color: var(--accent-primary); text-decoration: none; font-size: 0.9em;">原文 ↗</a>` : ''}
			
 
				             <span style="color: var(--text-muted); font-size: 0.9em;">平台: ${platformName}</span>
			
 
				         </div>
			
 
				+        <div style="display: flex; gap: 10px; margin-bottom: 1rem; flex-wrap: wrap;">
			
 
				+            <div style="display: flex; flex-direction: column; background: rgba(0,0,0,0.02); border: 1px solid rgba(0,0,0,0.06); padding: 4px 10px; border-radius: 6px;">
			
 
				+                <span style="font-size: 0.7em; color: var(--text-muted); text-transform: uppercase;">Published</span>
			
 
				+                <span style="font-size: 0.9rem; font-weight: 500; color: var(--text-main);">${publishedTime}</span>
			
 
				+            </div>
			
 
				+            <div style="display: flex; flex-direction: column; background: rgba(0,0,0,0.02); border: 1px solid rgba(0,0,0,0.06); padding: 4px 10px; border-radius: 6px;">
			
 
				+                <span style="font-size: 0.7em; color: var(--text-muted); text-transform: uppercase;">Likes</span>
			
 
				+                <span style="font-size: 0.9rem; font-weight: 500; color: var(--text-main);">${likeCount}</span>
			
 
				+            </div>
			
 
				+            <div style="display: flex; flex-direction: column; background: rgba(0,0,0,0.02); border: 1px solid rgba(0,0,0,0.06); padding: 4px 10px; border-radius: 6px;">
			
 
				+                <span style="font-size: 0.7em; color: var(--text-muted); text-transform: uppercase;">Collects</span>
			
 
				+                <span style="font-size: 0.9rem; font-weight: 500; color: var(--text-main);">${collectCount}</span>
			
 
				+            </div>
			
 
				+            <div style="display: flex; flex-direction: column; background: rgba(0,0,0,0.02); border: 1px solid rgba(0,0,0,0.06); padding: 4px 10px; border-radius: 6px;">
			
 
				+                <span style="font-size: 0.7em; color: var(--text-muted); text-transform: uppercase;">Comments</span>
			
 
				+                <span style="font-size: 0.9rem; font-weight: 500; color: var(--text-main);">${commentCount}</span>
			
 
				+            </div>
			
 
				+            <div style="display: flex; flex-direction: column; background: rgba(0,0,0,0.02); border: 1px solid rgba(0,0,0,0.06); padding: 4px 10px; border-radius: 6px;">
			
 
				+                <span style="font-size: 0.7em; color: var(--text-muted); text-transform: uppercase;">Shares</span>
			
 
				+                <span style="font-size: 0.9rem; font-weight: 500; color: var(--text-main);">${shareCount}</span>
			
 
				+            </div>
			
 
				+        </div>
			
 
				     `;
			
 
				     document.getElementById('modal-main-header').innerHTML = headerHtml;
			
 
				     
			
@@ -1969,6 +2045,40 @@ window.renderSingleCaseDetail = function(idx) {
 
				         </div>
			
 
				     `;
			
 
				     
			
 
				+    // Evaluation Panel
			
 
				+    if (s.evaluation && Object.keys(s.evaluation).length > 0) {
			
 
				+        const renderEvalNode = (node, indent = 0) => {
			
 
				+            let html = '';
			
 
				+            if (typeof node === 'object' && node !== null) {
			
 
				+                Object.entries(node).forEach(([k, v]) => {
			
 
				+                    html += `<div style="display: flex; flex-direction: column; padding-left: ${indent}px; margin-bottom: 8px;">
			
 
				+                        <span style="color: var(--text-muted); font-size: 0.85rem; font-weight: bold; text-transform: uppercase;">${k.replace(/_/g, ' ')}</span>`;
			
 
				+                    if (typeof v === 'object' && v !== null) {
			
 
				+                        html += `<div style="margin-top: 4px; border-left: 2px solid rgba(0,0,0,0.1); padding-left: 8px;">${renderEvalNode(v, 0)}</div>`;
			
 
				+                    } else {
			
 
				+                        const valColor = typeof v === 'number' ? '#3b82f6' : 'var(--text-main)';
			
 
				+                        html += `<span style="font-weight: 500; font-size: 0.95rem; color: ${valColor}; margin-top: 2px;">${String(v).replace(/</g, '&lt;').replace(/>/g, '&gt;')}</span>`;
			
 
				+                    }
			
 
				+                    html += `</div>`;
			
 
				+                });
			
 
				+            }
			
 
				+            return html;
			
 
				+        };
			
 
				+        
			
 
				+        mainScrollableHtml += `
			
 
				+        <div class="case-section" style="margin-top: 1rem;">
			
 
				+            <div style="background: rgba(0,0,0,0.02); border: 1px solid rgba(0,0,0,0.05); border-radius: 8px; padding: 1rem;">
			
 
				+                <h3 style="margin: 0 0 1rem 0; color: var(--text-main); font-size: 1.1rem; display: flex; align-items: center; gap: 10px;">
			
 
				+                    <span style="color: #10b981;">📊</span> 质量评估 (Evaluation)
			
 
				+                </h3>
			
 
				+                <div style="display: flex; flex-direction: column; gap: 4px;">
			
 
				+                    ${renderEvalNode(s.evaluation)}
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>`;
			
 
				+    }
			
 
				+
			
 
				+    
			
 
				     // Extracted Data
			
 
				     const wf = ctx.detailMap[cId] || (workflowUrl ? ctx.detailMapByUrl[workflowUrl] : null) || c;
			
 
				     
			
@@ -2236,11 +2346,57 @@ window.renderStructuredData = function(items, type, parentItem = null) {
 
				         // Stage rendering removed per request
			
 
				         // Render effects
			
 
				         if (item.effects && Array.isArray(item.effects) && item.effects.length > 0) {
			
 
				+            let effectsHtml = '';
			
 
				+            item.effects.forEach(effectItem => {
			
 
				+                if (typeof effectItem === 'string') {
			
 
				+                    effectsHtml += `<li style="margin-bottom: 4px;">${effectItem.replace(/</g, '&lt;').replace(/>/g, '&gt;')}</li>`;
			
 
				+                } else if (typeof effectItem === 'object' && effectItem !== null) {
			
 
				+                    const stmt = effectItem.statement ? effectItem.statement.replace(/</g, '&lt;').replace(/>/g, '&gt;') : 'Effect';
			
 
				+                    let detailsHtml = '';
			
 
				+                    const excludeKeys = ['statement'];
			
 
				+                    Object.entries(effectItem).forEach(([k, v]) => {
			
 
				+                        if (!excludeKeys.includes(k) && v !== null && v !== undefined && v !== '' && (!Array.isArray(v) || v.length > 0)) {
			
 
				+                            let valStr = '';
			
 
				+                            if (Array.isArray(v)) {
			
 
				+                                valStr = `<ul style="margin: 2px 0 0 20px; padding: 0;">` + v.map(vi => `<li>${String(vi).replace(/</g, '&lt;').replace(/>/g, '&gt;')}</li>`).join('') + `</ul>`;
			
 
				+                            } else if (typeof v === 'object') {
			
 
				+                                valStr = JSON.stringify(v);
			
 
				+                            } else {
			
 
				+                                valStr = String(v).replace(/</g, '&lt;').replace(/>/g, '&gt;');
			
 
				+                            }
			
 
				+                            
			
 
				+                            if (Array.isArray(v)) {
			
 
				+                                detailsHtml += `<div style="margin-top: 6px; font-size: 0.9em;">
			
 
				+                                    <div style="color: var(--text-muted); font-weight: 500; margin-bottom: 2px; text-transform: capitalize;">${k.replace(/_/g, ' ')}:</div> 
			
 
				+                                    <div style="color: var(--text-main);">${valStr}</div>
			
 
				+                                </div>`;
			
 
				+                            } else {
			
 
				+                                detailsHtml += `<div style="margin-top: 4px; font-size: 0.9em;">
			
 
				+                                    <span style="color: var(--text-muted); font-weight: 500; text-transform: capitalize;">${k.replace(/_/g, ' ')}:</span> 
			
 
				+                                    <span style="color: var(--text-main);">${valStr}</span>
			
 
				+                                </div>`;
			
 
				+                            }
			
 
				+                        }
			
 
				+                    });
			
 
				+                    
			
 
				+                    effectsHtml += `<li style="list-style: none; margin-bottom: 8px; margin-left: -20px;">
			
 
				+                        <details style="background: rgba(0,0,0,0.02); border: 1px solid rgba(0,0,0,0.06); border-radius: 6px; padding: 6px 10px;">
			
 
				+                            <summary style="cursor: pointer; font-weight: 500; color: var(--accent-primary); outline: none;">
			
 
				+                                ${stmt}
			
 
				+                            </summary>
			
 
				+                            <div style="padding-top: 8px; margin-top: 6px; border-top: 1px dashed rgba(0,0,0,0.1);">
			
 
				+                                ${detailsHtml}
			
 
				+                            </div>
			
 
				+                        </details>
			
 
				+                    </li>`;
			
 
				+                }
			
 
				+            });
			
 
				+            
			
 
				             html += `<div class="structured-row">
			
 
				                 <div class="structured-label">effects</div>
			
 
				                 <div class="structured-value">
			
 
				                     <ul class="effects-list">
			
 
				-                        ${item.effects.map(li => `<li>${li.replace(/</g, '&lt;').replace(/>/g, '&gt;')}</li>`).join('')}
			
 
				+                        ${effectsHtml}
			
 
				                     </ul>
			
 
				                 </div>
			
 
				             </div>`;
			
@@ -2334,6 +2490,8 @@ window.renderStructuredData = function(items, type, parentItem = null) {
 
				             return list.map(io => {
			
 
				                 const desc = isValid(io.description) ? io.description.replace(/</g, '&lt;').replace(/>/g, '&gt;') : '';
			
 
				                 const mod = isValid(io.modality) ? io.modality : '';
			
 
				+                const relation = isValid(io.relation) ? io.relation.replace(/</g, '&lt;').replace(/>/g, '&gt;') : '';
			
 
				+                
			
 
				                 let content = '';
			
 
				                 if (mod) {
			
 
				                     content += `<span class="data-type-badge" style="background:#e0e7ff;color:#3730a3;font-weight:normal;margin-right:6px;margin-bottom:2px;display:inline-block;">${mod}</span>`;
			
@@ -2341,6 +2499,12 @@ window.renderStructuredData = function(items, type, parentItem = null) {
 
				                 if (desc) {
			
 
				                     content += desc;
			
 
				                 }
			
 
				+                
			
 
				+                let extraHtml = '';
			
 
				+                if (relation) {
			
 
				+                    extraHtml = `<div style="font-size: 0.85em; color: #94a3b8; margin-top: 2px; font-weight: normal;">↳ relation: ${relation}</div>`;
			
 
				+                }
			
 
				+                
			
 
				                 if (!content) {
			
 
				                     const keys = Object.keys(io);
			
 
				                     if (keys.length === 1 && typeof io[keys[0]] === 'string') {
			
@@ -2349,7 +2513,7 @@ window.renderStructuredData = function(items, type, parentItem = null) {
 
				                         content = `<span class="data-type-badge">未知</span>`;
			
 
				                     }
			
 
				                 }
			
 
				-                return `<div style="margin-bottom: 6px; color: var(--text-main); font-weight: bold; line-height: 1.5; word-wrap: break-word;">${content}</div>`;
			
 
				+                return `<div style="margin-bottom: 6px; color: var(--text-main); font-weight: bold; line-height: 1.5; word-wrap: break-word;">${content}${extraHtml}</div>`;
			
 
				             }).join('');
			
 
				         };
			
 
				 
			
--- a/examples/test_gzh_api.py
+++ b/examples/test_gzh_api.py
@@ -0,0 +1,39 @@
 
				+import asyncio
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+
			
 
				+# 将当前目录加入系统路径以便导包
			
 
				+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				+
			
 
				+from agent.tools.builtin.content.platforms.aigc_channel import search
			
 
				+
			
 
				+async def main():
			
 
				+    keyword = "人工智能"
			
 
				+    print(f"=====================================")
			
 
				+    print(f"Testing GZH (公众号) search API")
			
 
				+    print(f"Keyword: {keyword}")
			
 
				+    print(f"=====================================\n")
			
 
				+    
			
 
				+    result = await search(platform_id="gzh", keyword=keyword, max_count=5)
			
 
				+    
			
 
				+    if result.error:
			
 
				+        print(f"❌ [Error] 搜索失败: {result.error}")
			
 
				+    else:
			
 
				+        print(f"✅ [Success] 搜索成功！返回结果如下：")
			
 
				+        try:
			
 
				+            parsed = json.loads(result.output)
			
 
				+            print(json.dumps(parsed, ensure_ascii=False, indent=2))
			
 
				+        except Exception:
			
 
				+            print(result.output)
			
 
				+            
			
 
				+        if result.metadata and "posts" in result.metadata:
			
 
				+            posts = result.metadata["posts"]
			
 
				+            print(f"\n[Metadata] 成功获取到底层 posts 完整数据条数: {len(posts)}")
			
 
				+            if len(posts) > 0:
			
 
				+                print(f"[Metadata Sample] 第一条数据的标题: {posts[0].get('title', '无标题')}")
			
 
				+                if "_quality_score" in posts[0]:
			
 
				+                    print(f"[Quality] 第一条数据的质量评分: {posts[0]['_quality_score']} ({posts[0]['_quality_grade']})")
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    asyncio.run(main())