2 месяцев назад · 2bc234ea9e
--- a/analyze_stage6_results.py
+++ b/analyze_stage6_results.py
@@ -0,0 +1,236 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+Stage 6 评估结果统计分析
			
 
				+分析两层评估的过滤效果和匹配质量
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from typing import Dict, List, Any
			
 
				+from collections import defaultdict
			
 
				+
			
 
				+
			
 
				+def load_stage6_results(file_path: str) -> List[Dict[str, Any]]:
			
 
				+    """加载Stage 6评估结果"""
			
 
				+    with open(file_path, 'r', encoding='utf-8') as f:
			
 
				+        return json.load(f)
			
 
				+
			
 
				+
			
 
				+def analyze_evaluation_results(data: List[Dict[str, Any]]) -> Dict[str, Any]:
			
 
				+    """分析评估结果"""
			
 
				+
			
 
				+    # 全局统计
			
 
				+    global_stats = {
			
 
				+        'total_search_words': 0,
			
 
				+        'total_notes_evaluated': 0,
			
 
				+        'total_filtered': 0,
			
 
				+        'match_distribution': {
			
 
				+            '完全匹配(8-10)': 0,
			
 
				+            '相似匹配(6-7)': 0,
			
 
				+            '弱相似(5-6)': 0,
			
 
				+            '无匹配(≤4)': 0
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    # 按原始特征分组统计
			
 
				+    feature_stats = defaultdict(lambda: {
			
 
				+        'search_words_count': 0,
			
 
				+        'total_notes': 0,
			
 
				+        'total_filtered': 0,
			
 
				+        'match_distribution': {
			
 
				+            '完全匹配(8-10)': 0,
			
 
				+            '相似匹配(6-7)': 0,
			
 
				+            '弱相似(5-6)': 0,
			
 
				+            '无匹配(≤4)': 0
			
 
				+        },
			
 
				+        'search_words': []
			
 
				+    })
			
 
				+
			
 
				+    # 所有搜索词的详细统计
			
 
				+    search_word_details = []
			
 
				+
			
 
				+    # 遍历所有原始特征
			
 
				+    for feature_result in data:
			
 
				+        original_feature = feature_result.get('原始特征名称', 'Unknown')
			
 
				+
			
 
				+        # 从组合评估结果_分组中读取
			
 
				+        grouped_results = feature_result.get('组合评估结果_分组', [])
			
 
				+
			
 
				+        for group in grouped_results:
			
 
				+            base_word = group.get('base_word', '')
			
 
				+
			
 
				+            for eval_item in group.get('top10_searches', []):
			
 
				+                # 检查是否有评估结果
			
 
				+                evaluation = eval_item.get('evaluation_with_filter')
			
 
				+                if not evaluation:
			
 
				+                    continue
			
 
				+
			
 
				+                search_word = eval_item.get('search_word', '')
			
 
				+
			
 
				+                # 提取评估数据
			
 
				+                total_notes = evaluation.get('total_notes', 0)
			
 
				+                evaluated_notes = evaluation.get('evaluated_notes', 0)
			
 
				+                filtered_count = evaluation.get('filtered_count', 0)
			
 
				+                statistics = evaluation.get('statistics', {})
			
 
				+
			
 
				+                # 更新全局统计
			
 
				+                global_stats['total_search_words'] += 1
			
 
				+                global_stats['total_notes_evaluated'] += total_notes
			
 
				+                global_stats['total_filtered'] += filtered_count
			
 
				+
			
 
				+                for key in global_stats['match_distribution']:
			
 
				+                    global_stats['match_distribution'][key] += statistics.get(key, 0)
			
 
				+
			
 
				+                # 更新特征统计
			
 
				+                feature_stats[original_feature]['search_words_count'] += 1
			
 
				+                feature_stats[original_feature]['total_notes'] += total_notes
			
 
				+                feature_stats[original_feature]['total_filtered'] += filtered_count
			
 
				+
			
 
				+                for key in feature_stats[original_feature]['match_distribution']:
			
 
				+                    feature_stats[original_feature]['match_distribution'][key] += statistics.get(key, 0)
			
 
				+
			
 
				+                # 记录搜索词详情
			
 
				+                search_word_info = {
			
 
				+                    'original_feature': original_feature,
			
 
				+                    'base_word': base_word,
			
 
				+                    'search_word': search_word,
			
 
				+                    'total_notes': total_notes,
			
 
				+                    'evaluated_notes': evaluated_notes,
			
 
				+                    'filtered_count': filtered_count,
			
 
				+                    'match_distribution': statistics,
			
 
				+                    'high_quality_count': statistics.get('完全匹配(8-10)', 0),
			
 
				+                    'similar_count': statistics.get('相似匹配(6-7)', 0)
			
 
				+                }
			
 
				+
			
 
				+                search_word_details.append(search_word_info)
			
 
				+                feature_stats[original_feature]['search_words'].append(search_word_info)
			
 
				+
			
 
				+    # 计算全局过滤率
			
 
				+    if global_stats['total_notes_evaluated'] > 0:
			
 
				+        global_stats['filter_rate'] = global_stats['total_filtered'] / global_stats['total_notes_evaluated']
			
 
				+    else:
			
 
				+        global_stats['filter_rate'] = 0.0
			
 
				+
			
 
				+    # 计算每个特征的过滤率
			
 
				+    for feature_name, stats in feature_stats.items():
			
 
				+        if stats['total_notes'] > 0:
			
 
				+            stats['filter_rate'] = stats['total_filtered'] / stats['total_notes']
			
 
				+        else:
			
 
				+            stats['filter_rate'] = 0.0
			
 
				+
			
 
				+    # 按高质量匹配数排序搜索词
			
 
				+    search_word_details.sort(key=lambda x: x['high_quality_count'], reverse=True)
			
 
				+
			
 
				+    return {
			
 
				+        'global_stats': global_stats,
			
 
				+        'feature_stats': dict(feature_stats),
			
 
				+        'search_word_details': search_word_details
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def print_statistics(stats: Dict[str, Any]):
			
 
				+    """打印统计结果"""
			
 
				+    global_stats = stats['global_stats']
			
 
				+    feature_stats = stats['feature_stats']
			
 
				+    search_word_details = stats['search_word_details']
			
 
				+
			
 
				+    print("=" * 80)
			
 
				+    print("Stage 6 评估结果统计分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    # 全局统计
			
 
				+    print("\n【全局统计】")
			
 
				+    print(f"  总搜索词数: {global_stats['total_search_words']}")
			
 
				+    print(f"  总评估帖子数: {global_stats['total_notes_evaluated']}")
			
 
				+    print(f"  总过滤帖子数: {global_stats['total_filtered']} (过滤率: {global_stats['filter_rate']*100:.1f}%)")
			
 
				+    print(f"\n  匹配度分布:")
			
 
				+    for match_type, count in global_stats['match_distribution'].items():
			
 
				+        print(f"    {match_type}: {count} 个帖子")
			
 
				+
			
 
				+    # 按原始特征统计
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("【按原始特征统计】")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    for feature_name, stats in sorted(feature_stats.items()):
			
 
				+        print(f"\n特征: {feature_name}")
			
 
				+        print(f"  搜索词数: {stats['search_words_count']}")
			
 
				+        print(f"  总评估帖子: {stats['total_notes']}")
			
 
				+        print(f"  总过滤帖子: {stats['total_filtered']} (过滤率: {stats['filter_rate']*100:.1f}%)")
			
 
				+        print(f"  高质量匹配: {stats['match_distribution']['完全匹配(8-10)']} 个帖子")
			
 
				+        print(f"  相似匹配: {stats['match_distribution']['相似匹配(6-7)']} 个帖子")
			
 
				+
			
 
				+        # 找出该特征下高质量匹配最多的搜索词
			
 
				+        best_searches = sorted(stats['search_words'], key=lambda x: x['high_quality_count'], reverse=True)[:3]
			
 
				+        if best_searches:
			
 
				+            print(f"  Top 3 最佳搜索词:")
			
 
				+            for idx, sw in enumerate(best_searches, 1):
			
 
				+                print(f"    {idx}. \"{sw['search_word']}\" - {sw['high_quality_count']}个完全匹配")
			
 
				+
			
 
				+    # Top 10 最佳搜索词
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("【Top 10 最佳搜索词（按完全匹配数排序）】")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    for idx, sw in enumerate(search_word_details[:10], 1):
			
 
				+        print(f"\n{idx}. \"{sw['search_word']}\"")
			
 
				+        print(f"   原始特征: {sw['original_feature']}")
			
 
				+        print(f"   Base Word: {sw['base_word']}")
			
 
				+        print(f"   评估帖子: {sw['total_notes']}, 过滤: {sw['filtered_count']}")
			
 
				+        print(f"   完全匹配(8-10): {sw['high_quality_count']} 个")
			
 
				+        print(f"   相似匹配(6-7): {sw['similar_count']} 个")
			
 
				+
			
 
				+    # 过滤效果分析
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("【过滤效果分析】")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    total_evaluated = global_stats['total_notes_evaluated']
			
 
				+    total_filtered = global_stats['total_filtered']
			
 
				+    total_remaining = total_evaluated - total_filtered
			
 
				+
			
 
				+    total_high_quality = global_stats['match_distribution']['完全匹配(8-10)']
			
 
				+    total_similar = global_stats['match_distribution']['相似匹配(6-7)']
			
 
				+    total_weak = global_stats['match_distribution']['弱相似(5-6)']
			
 
				+    total_no_match = global_stats['match_distribution']['无匹配(≤4)']
			
 
				+
			
 
				+    print(f"  评估帖子总数: {total_evaluated}")
			
 
				+    print(f"  第一层过滤（Query不相关）: {total_filtered} ({total_filtered/total_evaluated*100:.1f}%)")
			
 
				+    print(f"  通过过滤的帖子: {total_remaining} ({total_remaining/total_evaluated*100:.1f}%)")
			
 
				+    print(f"\n  通过过滤后的质量分布:")
			
 
				+    if total_remaining > 0:
			
 
				+        print(f"    完全匹配(8-10): {total_high_quality} ({total_high_quality/total_remaining*100:.1f}%)")
			
 
				+        print(f"    相似匹配(6-7): {total_similar} ({total_similar/total_remaining*100:.1f}%)")
			
 
				+        print(f"    弱相似(5-6): {total_weak} ({total_weak/total_remaining*100:.1f}%)")
			
 
				+        print(f"    无匹配(≤4): {total_no_match} ({total_no_match/total_remaining*100:.1f}%)")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+
			
 
				+
			
 
				+def save_statistics(stats: Dict[str, Any], output_path: str):
			
 
				+    """保存统计结果到JSON文件"""
			
 
				+    with open(output_path, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(stats, f, ensure_ascii=False, indent=2)
			
 
				+    print(f"\n统计结果已保存到: {output_path}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """主函数"""
			
 
				+    input_file = "output_v2/stage6_with_evaluations.json"
			
 
				+    output_file = "output_v2/stage6_statistics.json"
			
 
				+
			
 
				+    print("正在加载数据...")
			
 
				+    data = load_stage6_results(input_file)
			
 
				+
			
 
				+    print("正在分析评估结果...")
			
 
				+    stats = analyze_evaluation_results(data)
			
 
				+
			
 
				+    # 打印统计结果
			
 
				+    print_statistics(stats)
			
 
				+
			
 
				+    # 保存结果
			
 
				+    save_statistics(stats, output_file)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/enhanced_search_v2.py
+++ b/enhanced_search_v2.py
@@ -60,7 +60,7 @@ class EnhancedSearchV2:
 
				         stage7_skip: int = 0,
			
 
				         stage7_sort_by: str = 'score',
			
 
				         stage7_api_url: str = "http://192.168.245.150:7000/what/analysis/single",
			
 
				-        stage7_min_score: float = 8.0
			
 
				+        stage7_min_score: float = 0.8
			
 
				     ):
			
 
				         """
			
 
				         初始化系统
			
--- a/llm_evaluator.py
+++ b/llm_evaluator.py
@@ -597,6 +597,280 @@ class LLMEvaluator:
 
				 
			
 
				     # ========== Stage 6: 两层评估方法 ==========
			
 
				 
			
 
				+    def evaluate_query_relevance_batch(
			
 
				+        self,
			
 
				+        search_query: str,
			
 
				+        notes: List[Dict[str, Any]],
			
 
				+        max_notes: int = 20
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        第一层评估：批量判断搜索结果与 Query 的相关性
			
 
				+
			
 
				+        一次 LLM 调用评估多个笔记的 Query 相关性
			
 
				+
			
 
				+        Args:
			
 
				+            search_query: 搜索Query
			
 
				+            notes: 笔记列表
			
 
				+            max_notes: 最多评估几条笔记
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+              "note_0": {"与query相关性": "相关", "说明": "..."},
			
 
				+              "note_1": {"与query相关性": "不相关", "说明": "..."},
			
 
				+              ...
			
 
				+            }
			
 
				+        """
			
 
				+        if not notes:
			
 
				+            return {}
			
 
				+
			
 
				+        notes_to_eval = notes[:max_notes]
			
 
				+
			
 
				+        # 构建笔记列表文本
			
 
				+        notes_text = ""
			
 
				+        for idx, note in enumerate(notes_to_eval):
			
 
				+            note_card = note.get('note_card', {})
			
 
				+            title = note_card.get('display_title', '')
			
 
				+            content = note_card.get('desc', '')[:800]  # 限制长度
			
 
				+            images = note_card.get('image_list', [])
			
 
				+
			
 
				+            notes_text += f"note_{idx}:\n"
			
 
				+            notes_text += f"- 标题: {title}\n"
			
 
				+            notes_text += f"- 正文: {content}\n"
			
 
				+            notes_text += f"- 图像: {len(images)}张图片\n\n"
			
 
				+
			
 
				+        # 构建完整的第一层评估 Prompt（用户提供，不简化）
			
 
				+        prompt = f"""# 任务说明
			
 
				+判断搜索结果是否与搜索Query相关,过滤掉完全无关的结果。
			
 
				+
			
 
				+# 输入信息
			
 
				+
			
 
				+搜索Query: {search_query}
			
 
				+
			
 
				+搜索结果列表:
			
 
				+{notes_text}
			
 
				+
			
 
				+# 判断标准
			
 
				+✅ 相关(保留)
			
 
				+搜索结果的标题、正文或图像内容中包含Query相关的信息:
			
 
				+
			
 
				+Query的核心关键词在结果中出现
			
 
				+或 结果讨论的主题与Query直接相关
			
 
				+或 结果是Query概念的上位/下位/平行概念
			
 
				+
			
 
				+
			
 
				+❌ 不相关(过滤)
			
 
				+搜索结果与Query完全无关:
			
 
				+Query的关键词完全未出现
			
 
				+结果主题与Query无任何关联
			
 
				+仅因搜索引擎误匹配而出现
			
 
				+
			
 
				+
			
 
				+## 判断示例
			
 
				+Query "墨镜搭配" → 结果"太阳镜选购指南" ✅ 保留（墨镜=太阳镜）
			
 
				+Query "墨镜搭配" → 结果"眼镜搭配技巧" ✅ 保留（眼镜是墨镜的上位概念）
			
 
				+Query "墨镜搭配" → 结果"帽子搭配技巧" ❌ 过滤（完全无关）
			
 
				+Query "复古滤镜" → 结果"滤镜调色教程" ✅ 保留（包含滤镜）
			
 
				+Query "复古滤镜" → 结果"相机推荐" ❌ 过滤（主题不相关）
			
 
				+
			
 
				+# 输出格式
			
 
				+{{
			
 
				+  "note_0": {{
			
 
				+    "与query相关性": "相关 / 不相关",
			
 
				+    "说明": ""
			
 
				+  }},
			
 
				+  "note_1": {{
			
 
				+    "与query相关性": "相关 / 不相关",
			
 
				+    "说明": ""
			
 
				+  }}
			
 
				+}}
			
 
				+
			
 
				+# 特殊情况处理
			
 
				+
			
 
				+- 如果OCR提取的图像文字不完整或正文内容缺失,应在说明中注明,并根据实际可获取的信息进行判断
			
 
				+- 当无法明确判断时,倾向于保留(标记为"相关")
			
 
				+
			
 
				+只返回JSON，不要其他内容。"""
			
 
				+
			
 
				+        # 调用 LLM（批量评估）
			
 
				+        result = self.client.chat_json(
			
 
				+            prompt=prompt,
			
 
				+            max_retries=3
			
 
				+        )
			
 
				+
			
 
				+        if result:
			
 
				+            return result
			
 
				+        else:
			
 
				+            logger.error(f"  第一层批量评估失败: Query={search_query}")
			
 
				+            # 返回默认结果（全部标记为"相关"以保守处理）
			
 
				+            default_result = {}
			
 
				+            for idx in range(len(notes_to_eval)):
			
 
				+                default_result[f"note_{idx}"] = {
			
 
				+                    "与query相关性": "相关",
			
 
				+                    "说明": "LLM评估失败，默认保留"
			
 
				+                }
			
 
				+            return default_result
			
 
				+
			
 
				+    def evaluate_feature_matching_single(
			
 
				+        self,
			
 
				+        target_feature: str,
			
 
				+        note_title: str,
			
 
				+        note_content: str,
			
 
				+        note_images: List[str],
			
 
				+        note_index: int
			
 
				+    ) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        第二层评估：评估单个笔记与目标特征的匹配度
			
 
				+
			
 
				+        Args:
			
 
				+            target_feature: 目标特征
			
 
				+            note_title: 笔记标题
			
 
				+            note_content: 笔记正文
			
 
				+            note_images: 图片URL列表
			
 
				+            note_index: 笔记索引
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+              "综合得分": 0.9,  # 0-1分
			
 
				+              "匹配类型": "完全匹配",
			
 
				+              "评分说明": "...",
			
 
				+              "关键匹配点": [...]
			
 
				+            }
			
 
				+        """
			
 
				+        # 构建完整的第二层评估 Prompt（用户提供，不简化）
			
 
				+        prompt = f"""# 任务说明
			
 
				+你需要判断搜索到的案例与目标特征的相关性。
			
 
				+
			
 
				+# 输入信息
			
 
				+目标特征：{target_feature}
			
 
				+
			
 
				+搜索结果：
			
 
				+- 标题: {note_title}
			
 
				+- 正文: {note_content[:800]}
			
 
				+- 图像: {len(note_images)}张图片（请仔细分析图片内容，包括OCR提取图片中的文字）
			
 
				+
			
 
				+# 判断流程
			
 
				+## 目标特征匹配度评分
			
 
				+综合考虑语义相似度（概念匹配、层级关系）和场景关联度（应用场景、使用语境）进行评分：
			
 
				+
			
 
				+- 0.8-1分：完全匹配
			
 
				+语义层面：找到与目标特征完全相同或高度一致的内容，核心概念完全一致
			
 
				+场景层面：完全适用于同一场景、受众、平台和语境
			
 
				+
			
 
				+示例：
			
 
				+目标"复古滤镜" + 小红书穿搭场景 vs 结果"小红书复古滤镜调色教程"
			
 
				+目标"墨镜" + 时尚搭配场景 vs 结果"时尚墨镜搭配指南"
			
 
				+
			
 
				+
			
 
				+- 0.6-0.7分：相似匹配
			
 
				+语义层面：
			
 
				+结果是目标的上位概念（更宽泛）或下位概念（更具体）
			
 
				+或属于同一概念的不同表现形式，或属于平行概念（同级不同类）
			
 
				+场景层面：场景相近但有差异，需要筛选或调整后可用
			
 
				+
			
 
				+示例：
			
 
				+目标"墨镜" + 时尚搭配 vs 结果"眼镜搭配技巧"（上位概念，需筛选）
			
 
				+目标"怀旧滤镜" + 人像拍摄 vs 结果"胶片感调色"（不同表现形式）
			
 
				+目标"日常穿搭" + 街拍 vs 结果"通勤穿搭拍照"（场景相近）
			
 
				+
			
 
				+
			
 
				+
			
 
				+- 0.5-0.6分：弱相似
			
 
				+语义层面：属于同一大类但具体方向或侧重点明显不同，仅提供了相关概念
			
 
				+场景层面：场景有明显差异，迁移需要较大改造
			
 
				+
			
 
				+示例：
			
 
				+目标"户外运动穿搭" vs 结果"健身房穿搭指南"
			
 
				+目标"小红书图文笔记" vs 结果"抖音短视频脚本"
			
 
				+
			
 
				+
			
 
				+- 0.4分及以下：无匹配
			
 
				+语义层面：仅表面词汇重叠，实质关联弱，或概念距离过远
			
 
				+场景层面：应用场景基本不同或完全不同
			
 
				+
			
 
				+示例：
			
 
				+目标"墨镜" vs 结果"配饰大全"（概念过于宽泛）
			
 
				+目标"美食摄影构图" vs 结果"美食博主日常vlog"
			
 
				+
			
 
				+
			
 
				+
			
 
				+## 概念层级关系说明
			
 
				+在评分时，需要注意概念层级关系的影响：
			
 
				+完全匹配（同一概念 + 同场景）→ 0.8-1分
			
 
				+目标"墨镜" vs 结果"墨镜搭配"，且都在时尚搭配场景
			
 
				+
			
 
				+
			
 
				+上位/下位概念（层级差一层）→ 通常0.6-0.7分
			
 
				+目标"墨镜" vs 结果"眼镜搭配"（结果更宽泛，需筛选）
			
 
				+目标"眼镜" vs 结果"墨镜选购"（结果更具体，部分适用）
			
 
				+
			
 
				+
			
 
				+平行概念（同级不同类）→ 通常0.6-0.7分
			
 
				+目标"墨镜" vs 结果"近视眼镜"（都是眼镜类，但功能场景不同）
			
 
				+
			
 
				+
			
 
				+远距离概念（层级差两层及以上）→ 0.5分及以下
			
 
				+目标"墨镜" vs 结果"配饰"（概念过于宽泛，指导性弱）
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 匹配结论判断
			
 
				+根据综合得分判定匹配类型：
			
 
				+
			
 
				+0.8-1.0分：✅ 完全匹配
			
 
				+
			
 
				+判断：找到了目标特征的直接灵感来源
			
 
				+建议：直接采纳为该特征的灵感溯源结果
			
 
				+
			
 
				+
			
 
				+0.6-0.79分：⚠️ 相似匹配
			
 
				+
			
 
				+判断：找到了相关的灵感参考，但存在一定差异
			
 
				+建议：作为候选结果保留，可与其他结果综合判断或继续搜索更精确的匹配
			
 
				+
			
 
				+
			
 
				+0.59分及以下：❌ 无匹配
			
 
				+
			
 
				+判断：该结果与目标特征关联度不足
			
 
				+建议：排除该结果，需要调整搜索策略继续寻找
			
 
				+
			
 
				+
			
 
				+# 输出格式
			
 
				+{{
			
 
				+  "综合得分": 0.7,
			
 
				+  "匹配类型": "相似匹配",
			
 
				+  "评分说明": "结果'眼镜搭配技巧'是目标'墨镜'的上位概念,内容涵盖多种眼镜类型。场景都是时尚搭配,但需要从结果中筛选出墨镜相关的内容。概念关系:上位概念(宽泛一层)",
			
 
				+  "关键匹配点": [
			
 
				+    "眼镜与脸型的搭配原则(部分适用于墨镜)",
			
 
				+    "配饰的风格选择方法"
			
 
				+  ]
			
 
				+}}
			
 
				+
			
 
				+# 特殊情况处理
			
 
				+复合特征评估：如果目标特征是复合型（如"复古滤镜+第一人称视角"），需要分别评估每个子特征的匹配度，然后取平均值作为最终得分
			
 
				+信息不完整：如果OCR提取的图像文字不完整或正文内容缺失，应在说明中注明，并根据实际可获取的信息进行评分
			
 
				+上位概念的实用性：当结果是目标的上位概念时，评分应考虑：内容中目标相关部分的占比；是否提供了可直接应用于目标的知识；场景的一致性程度；如果结果虽是上位概念但完全不涉及目标内容，应降至5-6分或更低
			
 
				+
			
 
				+只返回JSON，不要其他内容。"""
			
 
				+
			
 
				+        # 调用 LLM（传递图片进行多模态分析）
			
 
				+        result = self.client.chat_json(
			
 
				+            prompt=prompt,
			
 
				+            images=note_images if note_images else None,
			
 
				+            max_retries=3
			
 
				+        )
			
 
				+
			
 
				+        if result:
			
 
				+            return result
			
 
				+        else:
			
 
				+            logger.error(f"  第二层评估失败: note {note_index}, target={target_feature}")
			
 
				+            return {
			
 
				+                "综合得分": 0.0,
			
 
				+                "匹配类型": "评估失败",
			
 
				+                "评分说明": "LLM评估失败",
			
 
				+                "关键匹配点": []
			
 
				+            }
			
 
				+
			
 
				     def evaluate_note_with_filter(
			
 
				         self,
			
 
				         search_query: str,
			
@@ -852,7 +1126,10 @@ Query与目标特征的关系：
 
				         max_workers: int = 10
			
 
				     ) -> Dict[str, Any]:
			
 
				         """
			
 
				-        并行评估多个笔记（两层评估）
			
 
				+        两层评估多个笔记（拆分为两次LLM调用）
			
 
				+
			
 
				+        第一层：批量评估Query相关性（1次LLM调用）
			
 
				+        第二层：对"相关"的笔记评估特征匹配度（M次LLM调用）
			
 
				 
			
 
				         Args:
			
 
				             search_query: 搜索Query
			
@@ -874,37 +1151,102 @@ Query与目标特征的关系：
 
				             }
			
 
				 
			
 
				         notes_to_eval = notes[:max_notes]
			
 
				-        evaluated_notes = []
			
 
				+        logger.info(f"    两层评估 {len(notes_to_eval)} 个笔记")
			
 
				+
			
 
				+        # ========== 第一层：批量评估Query相关性 ==========
			
 
				+        logger.info(f"      [第一层] 批量评估Query相关性（1次LLM调用）")
			
 
				+        query_relevance_result = self.evaluate_query_relevance_batch(
			
 
				+            search_query=search_query,
			
 
				+            notes=notes_to_eval,
			
 
				+            max_notes=max_notes
			
 
				+        )
			
 
				 
			
 
				-        logger.info(f"    并行评估 {len(notes_to_eval)} 个笔记（{max_workers}并发）")
			
 
				+        # 解析第一层结果，找出"相关"的笔记
			
 
				+        relevant_notes_info = []
			
 
				+        for idx, note in enumerate(notes_to_eval):
			
 
				+            note_key = f"note_{idx}"
			
 
				+            relevance_info = query_relevance_result.get(note_key, {})
			
 
				+            relevance = relevance_info.get("与query相关性", "相关")  # 默认为"相关"
			
 
				 
			
 
				-        # 并发评估每个笔记
			
 
				-        with ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				-            futures = []
			
 
				-            for idx, note in enumerate(notes_to_eval):
			
 
				+            if relevance == "相关":
			
 
				+                # 保留笔记信息用于第二层评估
			
 
				                 note_card = note.get('note_card', {})
			
 
				-                title = note_card.get('display_title', '')
			
 
				-                content = note_card.get('desc', '')
			
 
				-                images = note_card.get('image_list', [])
			
 
				+                relevant_notes_info.append({
			
 
				+                    "note_index": idx,
			
 
				+                    "note_card": note_card,
			
 
				+                    "title": note_card.get('display_title', ''),
			
 
				+                    "content": note_card.get('desc', ''),
			
 
				+                    "images": note_card.get('image_list', []),
			
 
				+                    "第一层评估": relevance_info
			
 
				+                })
			
 
				+
			
 
				+        logger.info(f"      [第一层] 过滤结果: {len(relevant_notes_info)}/{len(notes_to_eval)} 条相关")
			
 
				+
			
 
				+        # ========== 第二层：对相关笔记评估特征匹配度 ==========
			
 
				+        evaluated_notes = []
			
 
				 
			
 
				-                future = executor.submit(
			
 
				-                    self.evaluate_note_with_filter,
			
 
				-                    search_query,
			
 
				-                    target_feature,
			
 
				-                    title,
			
 
				-                    content,
			
 
				-                    images,
			
 
				-                    idx
			
 
				-                )
			
 
				-                futures.append(future)
			
 
				+        if relevant_notes_info:
			
 
				+            logger.info(f"      [第二层] 并行评估特征匹配度（{len(relevant_notes_info)}次LLM调用，{max_workers}并发）")
			
 
				+
			
 
				+            with ThreadPoolExecutor(max_workers=max_workers) as executor:
			
 
				+                futures = []
			
 
				+                for note_info in relevant_notes_info:
			
 
				+                    future = executor.submit(
			
 
				+                        self.evaluate_feature_matching_single,
			
 
				+                        target_feature,
			
 
				+                        note_info["title"],
			
 
				+                        note_info["content"],
			
 
				+                        note_info["images"],
			
 
				+                        note_info["note_index"]
			
 
				+                    )
			
 
				+                    futures.append((future, note_info))
			
 
				+
			
 
				+                # 收集结果并合并
			
 
				+                for future, note_info in futures:
			
 
				+                    try:
			
 
				+                        second_layer_result = future.result()
			
 
				+
			
 
				+                        # 合并两层评估结果
			
 
				+                        merged_result = {
			
 
				+                            "note_index": note_info["note_index"],
			
 
				+                            "Query相关性": "相关",
			
 
				+                            "综合得分": second_layer_result.get("综合得分", 0.0),  # 0-1分制
			
 
				+                            "匹配类型": second_layer_result.get("匹配类型", ""),
			
 
				+                            "评分说明": second_layer_result.get("评分说明", ""),
			
 
				+                            "关键匹配点": second_layer_result.get("关键匹配点", []),
			
 
				+                            "第一层评估": note_info["第一层评估"],
			
 
				+                            "第二层评估": second_layer_result
			
 
				+                        }
			
 
				+                        evaluated_notes.append(merged_result)
			
 
				+                    except Exception as e:
			
 
				+                        logger.error(f"      [第二层] 评估笔记 {note_info['note_index']} 失败: {e}")
			
 
				+                        # 失败的笔记也加入结果
			
 
				+                        evaluated_notes.append({
			
 
				+                            "note_index": note_info["note_index"],
			
 
				+                            "Query相关性": "相关",
			
 
				+                            "综合得分": 0.0,
			
 
				+                            "匹配类型": "评估失败",
			
 
				+                            "评分说明": f"第二层评估失败: {str(e)}",
			
 
				+                            "关键匹配点": [],
			
 
				+                            "第一层评估": note_info["第一层评估"],
			
 
				+                            "第二层评估": {}
			
 
				+                        })
			
 
				 
			
 
				-            # 收集结果
			
 
				-            for future in as_completed(futures):
			
 
				-                try:
			
 
				-                    result = future.result()
			
 
				-                    evaluated_notes.append(result)
			
 
				-                except Exception as e:
			
 
				-                    logger.error(f"    评估笔记失败: {e}")
			
 
				+        # 添加第一层就被过滤的笔记（Query不相关）
			
 
				+        for idx, note in enumerate(notes_to_eval):
			
 
				+            note_key = f"note_{idx}"
			
 
				+            relevance_info = query_relevance_result.get(note_key, {})
			
 
				+            relevance = relevance_info.get("与query相关性", "相关")
			
 
				+
			
 
				+            if relevance == "不相关":
			
 
				+                evaluated_notes.append({
			
 
				+                    "note_index": idx,
			
 
				+                    "Query相关性": "不相关",
			
 
				+                    "综合得分": 0.0,
			
 
				+                    "匹配类型": "过滤",
			
 
				+                    "说明": relevance_info.get("说明", ""),
			
 
				+                    "第一层评估": relevance_info
			
 
				+                })
			
 
				 
			
 
				         # 按note_index排序
			
 
				         evaluated_notes.sort(key=lambda x: x.get('note_index', 0))
			
@@ -914,12 +1256,12 @@ Query与目标特征的关系：
 
				         evaluated_count = len(evaluated_notes)
			
 
				         filtered_count = sum(1 for n in evaluated_notes if n.get('Query相关性') == '不相关')
			
 
				 
			
 
				-        # 匹配度分布统计
			
 
				+        # 匹配度分布统计（使用0-1分制的阈值）
			
 
				         match_distribution = {
			
 
				-            '完全匹配(8-10)': 0,
			
 
				-            '相似匹配(6-7)': 0,
			
 
				-            '弱相似(5-6)': 0,
			
 
				-            '无匹配(≤4)': 0
			
 
				+            '完全匹配(0.8-1.0)': 0,
			
 
				+            '相似匹配(0.6-0.79)': 0,
			
 
				+            '弱相似(0.5-0.59)': 0,
			
 
				+            '无匹配(≤0.4)': 0
			
 
				         }
			
 
				 
			
 
				         for note_eval in evaluated_notes:
			
@@ -927,14 +1269,14 @@ Query与目标特征的关系：
 
				                 continue  # 过滤的不计入分布
			
 
				 
			
 
				             score = note_eval.get('综合得分', 0)
			
 
				-            if score >= 8.0:
			
 
				-                match_distribution['完全匹配(8-10)'] += 1
			
 
				-            elif score >= 6.0:
			
 
				-                match_distribution['相似匹配(6-7)'] += 1
			
 
				-            elif score >= 5.0:
			
 
				-                match_distribution['弱相似(5-6)'] += 1
			
 
				+            if score >= 0.8:
			
 
				+                match_distribution['完全匹配(0.8-1.0)'] += 1
			
 
				+            elif score >= 0.6:
			
 
				+                match_distribution['相似匹配(0.6-0.79)'] += 1
			
 
				+            elif score >= 0.5:
			
 
				+                match_distribution['弱相似(0.5-0.59)'] += 1
			
 
				             else:
			
 
				-                match_distribution['无匹配(≤4)'] += 1
			
 
				+                match_distribution['无匹配(≤0.4)'] += 1
			
 
				 
			
 
				         logger.info(f"    评估完成: 过滤{filtered_count}条, 匹配分布: {match_distribution}")
			
 
				 
			
--- a/run_stage7.py
+++ b/run_stage7.py
@@ -113,8 +113,8 @@ def main():
 
				     parser.add_argument(
			
 
				         '--min-score',
			
 
				         type=float,
			
 
				-        default=8.0,
			
 
				-        help='最低分数阈值，只处理 >= 此分数的帖子（默认: 8.0）'
			
 
				+        default=0.8,
			
 
				+        help='最低分数阈值，只处理 >= 此分数的帖子（默认: 0.8）'
			
 
				     )
			
 
				     parser.add_argument(
			
 
				         '--skip',