刘立冬 3 tygodni temu
rodzic
commit
1bc11f27a5

+ 23 - 13
knowledge_search_traverse.py

@@ -18,7 +18,7 @@ REQUIRED_SCORE_GAIN = 0.02
 from script.search_recommendations.xiaohongshu_search_recommendations import XiaohongshuSearchRecommendations
 from script.search.xiaohongshu_search import XiaohongshuSearch
 # from multimodal_extractor import extract_post_images  # 内容提取流程已断开
-from post_evaluator import evaluate_post, apply_evaluation_to_post
+from post_evaluator_v2 import evaluate_post_v2, apply_evaluation_v2_to_post
 
 
 # ============================================================================
@@ -146,13 +146,23 @@ class Post(BaseModel):
     note_id: str = ""
     note_url: str = ""
 
-    # 评估字段
+    # 评估字段(顶层 - 快速访问)
     is_knowledge: bool | None = None  # 是否是知识内容
-    knowledge_reason: str = ""  # 知识判定原因
-    relevance_score: float | None = None  # 与原始query的相关性(0-1)
+    knowledge_reason: str = ""  # 知识判定原因(简短)
+    knowledge_score: float | None = None  # 知识评分(0-100)
+    knowledge_level: int | None = None  # 知识星级(1-5)
+
+    relevance_score: float | None = None  # 相关性综合得分(0-100)
     relevance_level: str = ""  # 相关性分级:"高度相关" | "中度相关" | "低度相关"
-    relevance_reason: str = ""  # 相关性评分原因
+    relevance_reason: str = ""  # 相关性评分原因(简短)
+    relevance_conclusion: str = ""  # 匹配结论: "高度匹配"/"基本匹配"/"部分匹配"/"不匹配"
+
     evaluation_time: str = ""  # 评估时间戳
+    evaluator_version: str = "v2.0"  # 评估器版本
+
+    # 评估字段(嵌套 - 详细信息)
+    knowledge_evaluation: dict | None = None  # 知识评估详情
+    relevance_evaluation: dict | None = None  # 相关性评估详情
 
 
 class Search(Sug):
@@ -2871,11 +2881,11 @@ async def run_round(
         for search in search_list:
             if search.post_list:
                 print(f"  评估来自 '{search.text}' 的 {len(search.post_list)} 个帖子")
-                # 对每个帖子进行评估
+                # 对每个帖子进行评估 (V2)
                 for post in search.post_list:
-                    evaluation = await evaluate_post(post, o, semaphore=None)
-                    if evaluation:
-                        apply_evaluation_to_post(post, evaluation)
+                    knowledge_eval, relevance_eval = await evaluate_post_v2(post, o, semaphore=None)
+                    if knowledge_eval and relevance_eval:
+                        apply_evaluation_v2_to_post(post, knowledge_eval, relevance_eval)
     else:
         print(f"  没有高分建议词,search_list为空")
 
@@ -3523,11 +3533,11 @@ async def run_round_v2(
         for search in search_list:
             if search.post_list:
                 print(f"  评估来自 '{search.text}' 的 {len(search.post_list)} 个帖子")
-                # 对每个帖子进行评估
+                # 对每个帖子进行评估 (V2)
                 for post in search.post_list:
-                    evaluation = await evaluate_post(post, o, semaphore=None)
-                    if evaluation:
-                        apply_evaluation_to_post(post, evaluation)
+                    knowledge_eval, relevance_eval = await evaluate_post_v2(post, o, semaphore=None)
+                    if knowledge_eval and relevance_eval:
+                        apply_evaluation_v2_to_post(post, knowledge_eval, relevance_eval)
 
     # 步骤4: 生成N域组合
     print(f"\n[步骤4] 生成{round_num}域组合...")

+ 570 - 0
post_evaluator_v2.py

@@ -0,0 +1,570 @@
+"""
+帖子评估模块 V2 - 分离的知识评估和相关性评估
+
+改进:
+1. 知识评估: 6维度分层打分系统 (0-100分)
+2. 相关性评估: 目的性(70%) + 品类(30%)
+3. 并发评估: 两个API同时调用
+4. 详细数据: 嵌套结构存储完整评估信息
+"""
+
+import asyncio
+import json
+import os
+from datetime import datetime
+from typing import Optional
+from pydantic import BaseModel, Field
+import requests
+
+MODEL_NAME = "google/gemini-2.5-flash"
+MAX_IMAGES_PER_POST = 10
+MAX_CONCURRENT_EVALUATIONS = 5
+API_TIMEOUT = 120
+
+# ============================================================================
+# 数据模型
+# ============================================================================
+
+class KnowledgeEvaluation(BaseModel):
+    """知识评估结果"""
+    is_knowledge: bool = Field(..., description="是否是知识内容")
+    quick_exclude: bool = Field(False, description="快速排除判定")
+    dimension_scores: dict[str, int] = Field(default_factory=dict, description="6维度得分")
+    weighted_score: float = Field(..., description="加权总分(0-100)")
+    level: int = Field(..., description="满足度等级(1-5星)")
+    evidence: list[str] = Field(default_factory=list, description="关键证据")
+    issues: list[str] = Field(default_factory=list, description="存在问题")
+    summary: str = Field(..., description="总结陈述")
+
+
+class RelevanceEvaluation(BaseModel):
+    """相关性评估结果"""
+    purpose_score: float = Field(..., description="目的性匹配得分(0-100)")
+    category_score: float = Field(..., description="品类匹配得分(0-100)")
+    total_score: float = Field(..., description="综合得分(0-100)")
+    conclusion: str = Field(..., description="匹配结论")
+    summary: str = Field(..., description="总结说明")
+
+
+# ============================================================================
+# Prompt 定义
+# ============================================================================
+
+KNOWLEDGE_EVALUATION_PROMPT = """# 内容知识判定系统
+
+## 角色定义
+你是一个多模态内容评估专家,专门判断社交媒体帖子是否属于"内容知识"类别。
+
+## 内容知识定义
+**内容知识**是指对创作/制作有实际帮助的、具有通用性和可迁移性的知识,包括:
+- ✅ **原理型知识**: 讲解创作背后的原理、逻辑、方法论
+- ✅ **体系型知识**: 提供完整的框架、流程、体系化方法
+- ✅ **案例提炼型知识**: 通过多案例总结出通用规律和可复用方法
+
+**非内容知识**(需严格排除):
+- ❌ **单案例展示**: 仅展示某一个作品/项目,无方法论提炼
+- ❌ **单点细节**: 只讲某个具体细节的操作,缺乏系统性
+- ❌ **纯元素展示**: 配色/字体/素材等单点展示,无创作方法
+- ❌ **作品集型**: 纯粹的作品展示集合,无教学目的
+
+---
+
+## 输入信息
+- **标题**: {title}
+- **正文**: {body_text}
+- **图片数量**: {num_images}张
+
+---
+
+## 判断流程
+
+### 第一步: 快速排除判断(任一项为"是"则直接判定为非内容知识)
+1. 标题是否为纯展示型? (如:"我的XX作品"、"今天做了XX"、"分享一下")
+2. 正文或者图片里内容是否缺乏方法/原理/步骤描述,仅是叙事或展示?
+3. 图片是否全为作品展示,无原理型/体系型/知识提炼型内容元素?
+4. 是否只讲一个具体项目的单次操作,无通用性?
+
+**输出**: "quick_exclude": true/false
+
+---
+
+### 第二步: 分层评估体系(满分10分)
+
+#### 维度1: 标题语义 (权重15%)
+- 10分: 明确包含"教程/方法/技巧/如何/原理/攻略/指南/X步"等教学词
+- 7分: 包含"合集/总结/分享XX方法"等整理型词汇
+- 4分: 描述性标题但暗示有方法论
+- 0分: 纯展示型标题或单案例描述
+
+#### 维度2: 封面首图 (权重60%)
+- 10分: 包含步骤编号/流程图/对比图/知识框架图
+- 7分: 有明显的教学性文字标注或视觉引导
+- 4分: 有多个知识点的视觉呈现
+- 0分: 单一作品展示或纯美图
+
+#### 维度3: 多图教学性 (权重60%)
+- 10分: 多图形成步骤/对比/原理说明体系,有标注/序号/箭头
+- 7分: 多图展示不同方法/案例,有一定教学逻辑
+- 4分: 多图但教学性不明显
+- 0分: 多图仅为作品多角度展示
+
+#### 维度4: 内容结构 (权重60%)
+- 10分: 有清晰的知识框架(原理→方法→案例,或问题→方案→总结)
+- 7分: 有分层次的内容组织(分章节/要点/步骤展示)
+- 4分: 有一定逻辑但不够系统
+- 0分: 流水账式/单线性叙述
+
+#### 维度5: 正文步骤性 (权重25%)
+- 10分: 有清晰的步骤序号和完整流程(≥3步)
+- 7分: 有步骤描述但不够系统化
+- 4分: 有零散的方法提及
+- 0分: 无步骤,纯叙事或展示
+
+#### 维度6: 知识提炼度 (权重25%)
+- 10分: 有明确的总结/归纳/对比/框架化输出
+- 7分: 有一定的知识整理
+- 4分: 有零散总结
+- 0分: 无任何知识提炼
+
+---
+
+### 第三步: 综合计算
+
+**加权总分计算**:
+```
+总分 = 维度1×0.15 + (维度2+维度3+维度4)×0.6/3 + (维度5+维度6)×0.25/2
+```
+
+**满足度等级**:
+- 90-100分: 5星 ⭐⭐⭐⭐⭐ 优质内容知识
+- 75-89分: 4星 ⭐⭐⭐⭐ 良好内容知识
+- 60-74分: 3星 ⭐⭐⭐ 基础内容知识
+- 45-59分: 2星 ⭐⭐ 弱内容知识倾向
+- 0-44分: 1星 ⭐ 非内容知识
+
+---
+
+## 输出格式
+
+请严格按照以下JSON格式输出:
+
+{{
+  "is_knowledge": true/false,
+  "quick_exclude": false,
+  "dimension_scores": {{
+    "标题语义": 8,
+    "封面首图": 9,
+    "多图教学性": 10,
+    "内容结构": 7,
+    "正文步骤性": 9,
+    "知识提炼度": 8
+  }},
+  "weighted_score": 85.5,
+  "level": 4,
+  "evidence": [
+    "证据1",
+    "证据2"
+  ],
+  "issues": [
+    "问题1"
+  ],
+  "summary": "总结陈述(2-3句话)"
+}}
+
+## 重要提示
+- 严格按照评分标准打分
+- 图片层占60%权重,重点评估
+- 综合得分>=60分才判定为知识内容
+"""
+
+RELEVANCE_EVALUATION_PROMPT = """# 相关性评估系统
+
+## 角色定义
+你是一位专业的多模态内容评估专家,擅长分析社交媒体UGC平台的帖子内容,能够精准判断帖子与用户搜索需求的匹配程度。
+
+## 任务说明
+评估帖子与原始搜索需求的匹配程度。
+
+---
+
+## 输入信息
+
+**原始搜索需求:** {original_query}
+
+**多模态帖子内容:**
+- **标题:** {title}
+- **正文:** {body_text}
+- **图片数量:** {num_images}张
+
+---
+
+## 评估维度
+
+### 1. 目的性匹配判断(权重:70%)
+
+**分析要点:**
+- 识别原始需求中的**核心动词/意图**(如:推荐、教程、评测、对比、寻找、了解等)
+- 判断帖子是否实质性地**解答或满足**了这个目的
+- 评估帖子内容的**实用性和完整性**
+
+**评分标准(0-100分):**
+- 90-100分:完全解答需求,内容实用且完整
+- 70-89分:基本解答需求,但信息不够全面或深入
+- 40-69分:部分相关,但核心目的未充分满足
+- 10-39分:仅有微弱关联,未真正解答需求
+- 0-9分:完全不相关
+
+---
+
+### 2. 品类匹配判断(权重:30%)
+
+**分析要点:**
+- 从**图片内容**中识别:产品类别、场景、属性特征
+- 从**标题和正文**中提取:品类名称、产品类型、关键词
+- 将提取的品类信息与**原始需求中的品类**进行对比
+- 判断品类的**一致性、包含关系或相关性**
+
+**评分标准(0-100分):**
+- 90-100分:品类完全一致,精准匹配
+- 70-89分:品类高度相关,属于同类或子类
+- 40-69分:品类部分相关,有交叉但存在偏差
+- 10-39分:品类关联较弱,仅边缘相关
+- 0-9分:品类完全不匹配
+
+---
+
+## 综合评分计算
+
+**总分 = 目的性匹配得分 × 0.7 + 品类匹配得分 × 0.3**
+
+**匹配结论:**
+- 85-100分:高度匹配
+- 65-84分:基本匹配
+- 40-64分:部分匹配
+- 0-39分:不匹配
+
+---
+
+## 输出格式
+
+请严格按照以下JSON格式输出:
+
+{{
+  "purpose_score": 85.0,
+  "category_score": 90.0,
+  "total_score": 86.5,
+  "conclusion": "高度匹配",
+  "summary": "总结说明(2-3句话)"
+}}
+
+## 重要提示
+- 目的性权重70%,是评估重点
+- 综合考虑文本和图片信息
+- 评分要客观公正,避免主观偏好
+"""
+
+
+# ============================================================================
+# 核心评估函数
+# ============================================================================
+
+async def evaluate_knowledge_v2(
+    post,
+    semaphore: Optional[asyncio.Semaphore] = None
+) -> Optional[KnowledgeEvaluation]:
+    """
+    评估帖子的知识属性(新版6维度评估)
+    """
+    if post.type == "video":
+        return None
+
+    image_urls = post.images[:MAX_IMAGES_PER_POST] if post.images else []
+
+    try:
+        if semaphore:
+            async with semaphore:
+                result = await _evaluate_knowledge_internal(post, image_urls)
+        else:
+            result = await _evaluate_knowledge_internal(post, image_urls)
+        return result
+    except Exception as e:
+        print(f"      ❌ 知识评估失败: {post.note_id} - {str(e)[:100]}")
+        return None
+
+
+async def _evaluate_knowledge_internal(post, image_urls: list[str]) -> KnowledgeEvaluation:
+    """内部知识评估函数"""
+    api_key = os.getenv("OPENROUTER_API_KEY")
+    if not api_key:
+        raise ValueError("OPENROUTER_API_KEY environment variable not set")
+
+    prompt_text = KNOWLEDGE_EVALUATION_PROMPT.format(
+        title=post.title,
+        body_text=post.body_text or "",
+        num_images=len(image_urls)
+    )
+
+    content = [{"type": "text", "text": prompt_text}]
+    for url in image_urls:
+        content.append({"type": "image_url", "image_url": {"url": url}})
+
+    payload = {
+        "model": MODEL_NAME,
+        "messages": [{"role": "user", "content": content}],
+        "response_format": {"type": "json_object"}
+    }
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+
+    loop = asyncio.get_event_loop()
+    response = await loop.run_in_executor(
+        None,
+        lambda: requests.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=API_TIMEOUT
+        )
+    )
+
+    if response.status_code != 200:
+        raise Exception(f"API error: {response.status_code} - {response.text[:200]}")
+
+    result = response.json()
+    content_text = result["choices"][0]["message"]["content"]
+
+    # 清理JSON标记
+    content_text = content_text.strip()
+    if content_text.startswith("```json"):
+        content_text = content_text[7:]
+    elif content_text.startswith("```"):
+        content_text = content_text[3:]
+    if content_text.endswith("```"):
+        content_text = content_text[:-3]
+    content_text = content_text.strip()
+
+    data = json.loads(content_text)
+
+    return KnowledgeEvaluation(
+        is_knowledge=data.get("is_knowledge", False),
+        quick_exclude=data.get("quick_exclude", False),
+        dimension_scores=data.get("dimension_scores", {}),
+        weighted_score=data.get("weighted_score", 0.0),
+        level=data.get("level", 1),
+        evidence=data.get("evidence", []),
+        issues=data.get("issues", []),
+        summary=data.get("summary", "")
+    )
+
+
+async def evaluate_relevance_v2(
+    post,
+    original_query: str,
+    semaphore: Optional[asyncio.Semaphore] = None
+) -> Optional[RelevanceEvaluation]:
+    """
+    评估帖子与原始query的相关性(新版双维度评估)
+    """
+    if post.type == "video":
+        return None
+
+    image_urls = post.images[:MAX_IMAGES_PER_POST] if post.images else []
+
+    try:
+        if semaphore:
+            async with semaphore:
+                result = await _evaluate_relevance_internal(post, original_query, image_urls)
+        else:
+            result = await _evaluate_relevance_internal(post, original_query, image_urls)
+        return result
+    except Exception as e:
+        print(f"      ❌ 相关性评估失败: {post.note_id} - {str(e)[:100]}")
+        return None
+
+
+async def _evaluate_relevance_internal(
+    post,
+    original_query: str,
+    image_urls: list[str]
+) -> RelevanceEvaluation:
+    """内部相关性评估函数"""
+    api_key = os.getenv("OPENROUTER_API_KEY")
+    if not api_key:
+        raise ValueError("OPENROUTER_API_KEY environment variable not set")
+
+    prompt_text = RELEVANCE_EVALUATION_PROMPT.format(
+        original_query=original_query,
+        title=post.title,
+        body_text=post.body_text or "",
+        num_images=len(image_urls)
+    )
+
+    content = [{"type": "text", "text": prompt_text}]
+    for url in image_urls:
+        content.append({"type": "image_url", "image_url": {"url": url}})
+
+    payload = {
+        "model": MODEL_NAME,
+        "messages": [{"role": "user", "content": content}],
+        "response_format": {"type": "json_object"}
+    }
+
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json"
+    }
+
+    loop = asyncio.get_event_loop()
+    response = await loop.run_in_executor(
+        None,
+        lambda: requests.post(
+            "https://openrouter.ai/api/v1/chat/completions",
+            headers=headers,
+            json=payload,
+            timeout=API_TIMEOUT
+        )
+    )
+
+    if response.status_code != 200:
+        raise Exception(f"API error: {response.status_code} - {response.text[:200]}")
+
+    result = response.json()
+    content_text = result["choices"][0]["message"]["content"]
+
+    # 清理JSON标记
+    content_text = content_text.strip()
+    if content_text.startswith("```json"):
+        content_text = content_text[7:]
+    elif content_text.startswith("```"):
+        content_text = content_text[3:]
+    if content_text.endswith("```"):
+        content_text = content_text[:-3]
+    content_text = content_text.strip()
+
+    data = json.loads(content_text)
+
+    return RelevanceEvaluation(
+        purpose_score=data.get("purpose_score", 0.0),
+        category_score=data.get("category_score", 0.0),
+        total_score=data.get("total_score", 0.0),
+        conclusion=data.get("conclusion", "不匹配"),
+        summary=data.get("summary", "")
+    )
+
+
+async def evaluate_post_v2(
+    post,
+    original_query: str,
+    semaphore: Optional[asyncio.Semaphore] = None
+) -> tuple[Optional[KnowledgeEvaluation], Optional[RelevanceEvaluation]]:
+    """
+    并发评估帖子(知识 + 相关性)
+
+    Returns:
+        (KnowledgeEvaluation, RelevanceEvaluation) 或 (None, None)
+    """
+    if post.type == "video":
+        print(f"      ⊗ 跳过视频帖子: {post.note_id}")
+        return None, None
+
+    print(f"      🔍 开始评估帖子: {post.note_id}")
+
+    # 并发调用两个评估
+    knowledge_task = evaluate_knowledge_v2(post, semaphore)
+    relevance_task = evaluate_relevance_v2(post, original_query, semaphore)
+
+    knowledge_eval, relevance_eval = await asyncio.gather(knowledge_task, relevance_task)
+
+    if knowledge_eval and relevance_eval:
+        print(f"      ✅ 评估完成: {post.note_id} | 知识:{knowledge_eval.weighted_score:.1f}分 {knowledge_eval.level}星 | 相关性:{relevance_eval.total_score:.1f}分 {relevance_eval.conclusion}")
+    else:
+        print(f"      ⚠️  部分评估失败: {post.note_id}")
+
+    return knowledge_eval, relevance_eval
+
+
+def apply_evaluation_v2_to_post(
+    post,
+    knowledge_eval: Optional[KnowledgeEvaluation],
+    relevance_eval: Optional[RelevanceEvaluation]
+):
+    """
+    将V2评估结果应用到Post对象
+    """
+    # 知识评估
+    if knowledge_eval:
+        post.is_knowledge = knowledge_eval.is_knowledge
+        post.knowledge_score = knowledge_eval.weighted_score
+        post.knowledge_level = knowledge_eval.level
+        post.knowledge_reason = knowledge_eval.summary[:100]  # 简短版本
+
+        # 详细信息
+        post.knowledge_evaluation = {
+            "quick_exclude": knowledge_eval.quick_exclude,
+            "dimension_scores": knowledge_eval.dimension_scores,
+            "weighted_score": knowledge_eval.weighted_score,
+            "level": knowledge_eval.level,
+            "level_text": "⭐" * knowledge_eval.level,
+            "evidence": knowledge_eval.evidence,
+            "issues": knowledge_eval.issues,
+            "summary": knowledge_eval.summary
+        }
+
+    # 相关性评估
+    if relevance_eval:
+        post.relevance_score = relevance_eval.total_score
+        post.relevance_conclusion = relevance_eval.conclusion
+        post.relevance_reason = relevance_eval.summary[:150]  # 简短版本
+
+        # 设置相关性级别(兼容旧系统)
+        if relevance_eval.total_score >= 85:
+            post.relevance_level = "高度相关"
+        elif relevance_eval.total_score >= 65:
+            post.relevance_level = "中度相关"
+        else:
+            post.relevance_level = "低度相关"
+
+        # 详细信息
+        post.relevance_evaluation = {
+            "purpose_score": relevance_eval.purpose_score,
+            "category_score": relevance_eval.category_score,
+            "total_score": relevance_eval.total_score,
+            "conclusion": relevance_eval.conclusion,
+            "summary": relevance_eval.summary
+        }
+
+    # 设置评估时间和版本
+    post.evaluation_time = datetime.now().isoformat()
+    post.evaluator_version = "v2.0"
+
+
+async def batch_evaluate_posts_v2(
+    posts: list,
+    original_query: str,
+    max_concurrent: int = MAX_CONCURRENT_EVALUATIONS
+) -> int:
+    """
+    批量评估多个帖子(V2版本)
+
+    Returns:
+        成功评估的帖子数量
+    """
+    semaphore = asyncio.Semaphore(max_concurrent)
+
+    print(f"\n📊 开始批量评估 {len(posts)} 个帖子(并发限制: {max_concurrent})...")
+
+    tasks = [evaluate_post_v2(post, original_query, semaphore) for post in posts]
+    results = await asyncio.gather(*tasks)
+
+    success_count = 0
+    for i, (knowledge_eval, relevance_eval) in enumerate(results):
+        if knowledge_eval and relevance_eval:
+            apply_evaluation_v2_to_post(posts[i], knowledge_eval, relevance_eval)
+            success_count += 1
+
+    print(f"✅ 批量评估完成: 成功 {success_count}/{len(posts)}")
+
+    return success_count

+ 245 - 0
test_evaluation_v2.py

@@ -0,0 +1,245 @@
+"""
+测试评估V2模块
+从现有run_context.json读取帖子,使用V2评估模块重新评估,生成统计报告
+"""
+
+import asyncio
+import json
+import sys
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict
+
+# 导入必要的模块
+from knowledge_search_traverse import Post
+from post_evaluator_v2 import evaluate_post_v2, apply_evaluation_v2_to_post
+
+
+async def test_evaluation_v2(run_context_path: str, max_posts: int = 10):
+    """
+    测试V2评估模块
+
+    Args:
+        run_context_path: run_context.json路径
+        max_posts: 最多评估的帖子数量(用于快速测试)
+    """
+    print(f"\n{'='*80}")
+    print(f"📊 评估V2测试 - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+    print(f"{'='*80}\n")
+
+    # 读取run_context.json
+    print(f"📂 读取: {run_context_path}")
+    with open(run_context_path, 'r', encoding='utf-8') as f:
+        run_context = json.load(f)
+
+    # 提取原始query
+    original_query = run_context.get('o', '')
+    print(f"🔍 原始Query: {original_query}\n")
+
+    # 提取所有帖子 (从rounds -> search_results -> post_list)
+    post_data_list = []
+    rounds = run_context.get('rounds', [])
+
+    for round_idx, round_data in enumerate(rounds):
+        search_results = round_data.get('search_results', [])
+        for search_idx, search in enumerate(search_results):
+            post_list = search.get('post_list', [])
+            for post_idx, post_data in enumerate(post_list):
+                # 生成唯一ID
+                post_id = f"r{round_idx}_s{search_idx}_p{post_idx}"
+                post_data_list.append((round_idx, search_idx, post_id, post_data))
+
+    total_posts = len(post_data_list)
+    print(f"📝 找到 {total_posts} 个帖子 (来自 {len(rounds)} 轮)")
+
+    # 限制评估数量(快速测试)
+    if max_posts and max_posts < total_posts:
+        post_data_list = post_data_list[:max_posts]
+        print(f"⚡ 快速测试模式: 仅评估前 {max_posts} 个帖子\n")
+    else:
+        print()
+
+    # 将post_data转换为Post对象
+    posts = []
+    for round_idx, search_idx, post_id, post_data in post_data_list:
+        post = Post(
+            note_id=post_data.get('note_id', post_id),
+            title=post_data.get('title', ''),
+            body_text=post_data.get('body_text', ''),
+            images=post_data.get('images', []),
+            type=post_data.get('type', 'normal')
+        )
+        posts.append((round_idx, search_idx, post_id, post))
+
+    # 批量评估
+    print(f"🚀 开始批量评估 (并发数: 5)...\n")
+
+    semaphore = asyncio.Semaphore(5)
+    tasks = []
+
+    for round_idx, search_idx, post_id, post in posts:
+        task = evaluate_post_v2(post, original_query, semaphore)
+        tasks.append((round_idx, search_idx, post_id, post, task))
+
+    results = []
+    for i, (round_idx, search_idx, post_id, post, task) in enumerate(tasks, 1):
+        print(f"  [{i}/{len(tasks)}] 评估: {post.note_id}")
+        knowledge_eval, relevance_eval = await task
+
+        if knowledge_eval and relevance_eval:
+            apply_evaluation_v2_to_post(post, knowledge_eval, relevance_eval)
+            results.append((round_idx, search_idx, post_id, post, knowledge_eval, relevance_eval))
+            print(f"      ✅ 知识:{post.knowledge_score:.0f}分({post.knowledge_level}⭐) | 相关:{post.relevance_score:.0f}分({post.relevance_conclusion})")
+        else:
+            print(f"      ❌ 评估失败")
+
+    print(f"\n✅ 评估完成: {len(results)}/{len(posts)} 成功\n")
+
+    # 更新run_context.json中的帖子数据
+    print("💾 更新 run_context.json...")
+    for round_idx, search_idx, post_id, post, knowledge_eval, relevance_eval in results:
+        # 定位到对应的post_list
+        if round_idx < len(rounds):
+            search_results = rounds[round_idx].get('search_results', [])
+            if search_idx < len(search_results):
+                post_list = search_results[search_idx].get('post_list', [])
+
+                # 找到对应的帖子并更新
+                for p in post_list:
+                    if p.get('note_id') == post.note_id:
+                        # 更新顶层字段
+                        p['is_knowledge'] = post.is_knowledge
+                        p['knowledge_reason'] = post.knowledge_reason
+                        p['knowledge_score'] = post.knowledge_score
+                        p['knowledge_level'] = post.knowledge_level
+
+                        p['relevance_score'] = post.relevance_score
+                        p['relevance_level'] = post.relevance_level
+                        p['relevance_reason'] = post.relevance_reason
+                        p['relevance_conclusion'] = post.relevance_conclusion
+
+                        p['evaluation_time'] = post.evaluation_time
+                        p['evaluator_version'] = post.evaluator_version
+
+                        # 更新嵌套字段
+                        p['knowledge_evaluation'] = post.knowledge_evaluation
+                        p['relevance_evaluation'] = post.relevance_evaluation
+                        break
+
+    # 保存更新后的run_context.json
+    output_path = run_context_path.replace('.json', '_v2.json')
+    with open(output_path, 'w', encoding='utf-8') as f:
+        json.dump(run_context, f, ensure_ascii=False, indent=2)
+    print(f"✅ 已保存: {output_path}\n")
+
+    # 生成统计报告
+    print(f"\n{'='*80}")
+    print("📊 统计报告")
+    print(f"{'='*80}\n")
+
+    # 知识评估统计
+    knowledge_counts = defaultdict(int)
+    knowledge_level_counts = defaultdict(int)
+    knowledge_scores = []
+
+    for _, _, _, post, _, _ in results:
+        if post.is_knowledge:
+            knowledge_counts['知识内容'] += 1
+        else:
+            knowledge_counts['非知识内容'] += 1
+
+        if post.knowledge_level:
+            knowledge_level_counts[post.knowledge_level] += 1
+
+        if post.knowledge_score is not None:
+            knowledge_scores.append(post.knowledge_score)
+
+    total = len(results)
+    print("📚 知识评估:")
+    print(f"  知识内容:   {knowledge_counts['知识内容']:3d} / {total} ({knowledge_counts['知识内容']/total*100:.1f}%)")
+    print(f"  非知识内容: {knowledge_counts['非知识内容']:3d} / {total} ({knowledge_counts['非知识内容']/total*100:.1f}%)")
+    print()
+
+    if knowledge_scores:
+        avg_score = sum(knowledge_scores) / len(knowledge_scores)
+        print(f"  平均得分: {avg_score:.1f}分")
+        print(f"  最高得分: {max(knowledge_scores):.0f}分")
+        print(f"  最低得分: {min(knowledge_scores):.0f}分")
+        print()
+
+    print("  星级分布:")
+    for level in range(1, 6):
+        count = knowledge_level_counts.get(level, 0)
+        bar = '★' * count
+        print(f"    {level}星: {count:3d} {bar}")
+    print()
+
+    # 相关性评估统计
+    relevance_conclusion_counts = defaultdict(int)
+    relevance_scores = []
+    purpose_scores = []
+    category_scores = []
+
+    for _, _, _, post, _, _ in results:
+        if post.relevance_conclusion:
+            relevance_conclusion_counts[post.relevance_conclusion] += 1
+
+        if post.relevance_score is not None:
+            relevance_scores.append(post.relevance_score)
+
+        if post.relevance_evaluation:
+            if 'purpose_score' in post.relevance_evaluation:
+                purpose_scores.append(post.relevance_evaluation['purpose_score'])
+            if 'category_score' in post.relevance_evaluation:
+                category_scores.append(post.relevance_evaluation['category_score'])
+
+    print("🎯 相关性评估:")
+    for conclusion in ['高度匹配', '中度匹配', '低度匹配', '不匹配']:
+        count = relevance_conclusion_counts.get(conclusion, 0)
+        if count > 0:
+            print(f"  {conclusion}: {count:3d} / {total} ({count/total*100:.1f}%)")
+    print()
+
+    if relevance_scores:
+        avg_score = sum(relevance_scores) / len(relevance_scores)
+        high_relevance = sum(1 for s in relevance_scores if s >= 70)
+        print(f"  平均得分:   {avg_score:.1f}分")
+        print(f"  高相关性:   {high_relevance} / {total} ({high_relevance/total*100:.1f}%) [≥70分]")
+        print(f"  最高得分:   {max(relevance_scores):.0f}分")
+        print(f"  最低得分:   {min(relevance_scores):.0f}分")
+        print()
+
+    if purpose_scores and category_scores:
+        avg_purpose = sum(purpose_scores) / len(purpose_scores)
+        avg_category = sum(category_scores) / len(category_scores)
+        print(f"  目的性平均: {avg_purpose:.1f}分 (权重70%)")
+        print(f"  品类平均:   {avg_category:.1f}分 (权重30%)")
+        print()
+
+    # 综合分析
+    print("🔥 高质量内容 (知识内容 + 高相关性):")
+    high_quality = sum(
+        1 for _, _, _, post, _, _ in results
+        if post.is_knowledge and post.relevance_score and post.relevance_score >= 70
+    )
+    print(f"  {high_quality} / {total} ({high_quality/total*100:.1f}%)")
+    print()
+
+    print(f"{'='*80}\n")
+
+    return results
+
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("用法: python3 test_evaluation_v2.py <run_context.json路径> [最大评估数量]")
+        print()
+        print("示例:")
+        print("  python3 test_evaluation_v2.py input/test_case/output/knowledge_search_traverse/20251112/173512_dc/run_context.json")
+        print("  python3 test_evaluation_v2.py input/test_case/output/knowledge_search_traverse/20251112/173512_dc/run_context.json 20")
+        sys.exit(1)
+
+    run_context_path = sys.argv[1]
+    max_posts = int(sys.argv[2]) if len(sys.argv) > 2 else None
+
+    asyncio.run(test_evaluation_v2(run_context_path, max_posts))

+ 8 - 2
visualization/knowledge_search_traverse/convert_v8_to_graph_v3.js

@@ -459,12 +459,18 @@ function convertV8ToGraphV2(runContext, searchResults, extractionData) {
                   interact_info: post.interact_info || {},
                   // 附加多模态提取数据
                   extraction: extractionData && extractionData[post.note_id] ? extractionData[post.note_id] : null,
-                  // 评估数据
+                  // 评估数据 (V2)
                   is_knowledge: post.is_knowledge !== undefined ? post.is_knowledge : null,
                   knowledge_reason: post.knowledge_reason || '',
+                  knowledge_score: post.knowledge_score !== undefined ? post.knowledge_score : null,
+                  knowledge_level: post.knowledge_level !== undefined ? post.knowledge_level : null,
+                  knowledge_evaluation: post.knowledge_evaluation || null,
+
                   post_relevance_score: post.relevance_score !== undefined ? post.relevance_score : null,
                   relevance_level: post.relevance_level || '',
-                  relevance_reason: post.relevance_reason || ''
+                  relevance_reason: post.relevance_reason || '',
+                  relevance_conclusion: post.relevance_conclusion || '',
+                  relevance_evaluation: post.relevance_evaluation || null
                 };
 
                 edges.push({