3 주 전 · 35e82d6e1f
--- a/analyze_content_types.py
+++ b/analyze_content_types.py
@@ -0,0 +1,100 @@
 
				+"""
			
 
				+分析搜索结果中的内容类型分布（视频 vs 图文）
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from collections import Counter
			
 
				+from typing import Dict, Any, List
			
 
				+
			
 
				+
			
 
				+def analyze_content_types(stage6_path: str):
			
 
				+    """分析 Stage6 搜索结果中的内容类型"""
			
 
				+
			
 
				+    # 加载数据
			
 
				+    with open(stage6_path, 'r', encoding='utf-8') as f:
			
 
				+        stage6_data = json.load(f)
			
 
				+
			
 
				+    print("=" * 80)
			
 
				+    print("Stage6 搜索结果内容类型分析")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    # 收集所有搜索结果的内容类型
			
 
				+    content_type_counter = Counter()
			
 
				+    feature_content_types = {}  # 原始特征 -> 内容类型分布
			
 
				+
			
 
				+    total_searches = 0
			
 
				+    total_notes = 0
			
 
				+
			
 
				+    for original_feature in stage6_data:
			
 
				+        feature_name = original_feature['原始特征名称']
			
 
				+        feature_types = Counter()
			
 
				+
			
 
				+        for association in original_feature.get('找到的关联', []):
			
 
				+            for feature in association.get('特征列表', []):
			
 
				+                search_result = feature.get('search_result')
			
 
				+
			
 
				+                if search_result:
			
 
				+                    total_searches += 1
			
 
				+
			
 
				+                    # 提取帖子数据
			
 
				+                    notes = search_result.get('data', {}).get('data', [])
			
 
				+                    total_notes += len(notes)
			
 
				+
			
 
				+                    for note in notes:
			
 
				+                        note_card = note.get('note_card', {})
			
 
				+                        note_type = note_card.get('type', 'unknown')
			
 
				+
			
 
				+                        content_type_counter[note_type] += 1
			
 
				+                        feature_types[note_type] += 1
			
 
				+
			
 
				+        if feature_types:
			
 
				+            feature_content_types[feature_name] = feature_types
			
 
				+
			
 
				+    # 打印总体统计
			
 
				+    print(f"\n📊 总体统计:")
			
 
				+    print(f"  已执行搜索: {total_searches} 次")
			
 
				+    print(f"  总帖子数: {total_notes} 个")
			
 
				+
			
 
				+    print(f"\n📋 内容类型分布:")
			
 
				+    for content_type, count in content_type_counter.most_common():
			
 
				+        percentage = count / total_notes * 100
			
 
				+        print(f"  {content_type}: {count} 个 ({percentage:.1f}%)")
			
 
				+
			
 
				+    # 打印各特征的内容类型分布
			
 
				+    print(f"\n📊 各原始特征的内容类型分布:")
			
 
				+    for feature_name, types in feature_content_types.items():
			
 
				+        total_feature_notes = sum(types.values())
			
 
				+        print(f"\n  【{feature_name}】 共 {total_feature_notes} 个帖子")
			
 
				+
			
 
				+        for content_type, count in types.most_common():
			
 
				+            percentage = count / total_feature_notes * 100
			
 
				+            print(f"    {content_type}: {count} 个 ({percentage:.1f}%)")
			
 
				+
			
 
				+    # 分析视频占比
			
 
				+    video_count = content_type_counter.get('video', 0)
			
 
				+    normal_count = content_type_counter.get('normal', 0)  # 图文类型
			
 
				+
			
 
				+    print(f"\n🎯 关键发现:")
			
 
				+    if video_count > 0:
			
 
				+        video_ratio = video_count / total_notes * 100
			
 
				+        print(f"  ⚠️  发现 {video_count} 个视频帖子 (占比 {video_ratio:.1f}%)")
			
 
				+        print(f"  ✓ 图文帖子: {normal_count} 个 (占比 {normal_count/total_notes*100:.1f}%)")
			
 
				+        print(f"\n  问题原因分析:")
			
 
				+        print(f"    - 小红书 API 的 content_type='图文' 参数可能未被严格遵守")
			
 
				+        print(f"    - 或者 API 返回混合类型的内容")
			
 
				+        print(f"    - 建议在客户端侧添加内容类型过滤")
			
 
				+    else:
			
 
				+        print(f"  ✓ 未发现视频内容，全部为图文")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    import sys
			
 
				+
			
 
				+    stage6_path = 'output_v2/stage6_with_evaluations.json'
			
 
				+
			
 
				+    if len(sys.argv) > 1:
			
 
				+        stage6_path = sys.argv[1]
			
 
				+
			
 
				+    analyze_content_types(stage6_path)
			
--- a/analyze_feature_matches.py
+++ b/analyze_feature_matches.py
@@ -0,0 +1,202 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+统计 how 解构文件中所有原始特征匹配到的分类/标签及其路径
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from collections import defaultdict
			
 
				+from typing import Dict, List, Set, Any
			
 
				+
			
 
				+
			
 
				+def build_classification_path(classification_list: List[str]) -> str:
			
 
				+    """构建分类路径字符串"""
			
 
				+    if not classification_list:
			
 
				+        return ""
			
 
				+    return "/".join(classification_list)
			
 
				+
			
 
				+
			
 
				+def analyze_feature_matches(json_file_path: str) -> Dict[str, Any]:
			
 
				+    """
			
 
				+    分析文件中所有原始特征的匹配情况
			
 
				+
			
 
				+    返回结构：
			
 
				+    {
			
 
				+        "原始特征1": {
			
 
				+            "匹配的分类标签": [
			
 
				+                {
			
 
				+                    "名称": "...",
			
 
				+                    "类型": "标签/分类",
			
 
				+                    "路径": "...",
			
 
				+                    "层级": "...",
			
 
				+                    "相似度": 0.xxx
			
 
				+                }
			
 
				+            ],
			
 
				+            "统计": {
			
 
				+                "总匹配数": xxx,
			
 
				+                "高相似度匹配数(>0.8)": xxx,
			
 
				+                "中等相似度匹配数(0.5-0.8)": xxx,
			
 
				+                "低相似度匹配数(<0.5)": xxx
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    """
			
 
				+
			
 
				+    # 读取JSON文件
			
 
				+    with open(json_file_path, 'r', encoding='utf-8') as f:
			
 
				+        data = json.load(f)
			
 
				+
			
 
				+    # 存储结果
			
 
				+    feature_matches = defaultdict(lambda: {
			
 
				+        "匹配的分类标签": [],
			
 
				+        "统计": {
			
 
				+            "高相似度匹配数(>=0.8)": 0
			
 
				+        }
			
 
				+    })
			
 
				+
			
 
				+    # 遍历 how解构结果
			
 
				+    how_result = data.get('how解构结果', {})
			
 
				+
			
 
				+    # 处理三种列表：灵感点列表、目的点列表、关键点列表
			
 
				+    for level_name in ['灵感点列表', '目的点列表', '关键点列表']:
			
 
				+        level_list = how_result.get(level_name, [])
			
 
				+
			
 
				+        for item in level_list:
			
 
				+            # 遍历how步骤列表
			
 
				+            for step in item.get('how步骤列表', []):
			
 
				+                # 遍历每个步骤中的特征
			
 
				+                for feature in step.get('特征列表', []):
			
 
				+                    feature_name = feature.get('特征名称', '')
			
 
				+                    matches = feature.get('匹配结果', [])
			
 
				+
			
 
				+                    if not feature_name:
			
 
				+                        continue
			
 
				+
			
 
				+                    # 处理每个匹配结果
			
 
				+                    for match in matches:
			
 
				+                        persona_feature_name = match.get('人设特征名称', '')
			
 
				+                        feature_type = match.get('特征类型', '')
			
 
				+                        classification_list = match.get('特征分类', [])
			
 
				+                        feature_level = match.get('人设特征层级', '')
			
 
				+                        similarity = match.get('匹配结果', {}).get('相似度', 0)
			
 
				+
			
 
				+                        # 只保留相似度>=0.8的匹配
			
 
				+                        if similarity < 0.8:
			
 
				+                            continue
			
 
				+
			
 
				+                        # 构建路径
			
 
				+                        path = build_classification_path(classification_list)
			
 
				+
			
 
				+                        # 添加到结果
			
 
				+                        match_info = {
			
 
				+                            "名称": persona_feature_name,
			
 
				+                            "类型": feature_type,
			
 
				+                            "路径": path,
			
 
				+                            "层级": feature_level,
			
 
				+                            "相似度": round(similarity, 3)
			
 
				+                        }
			
 
				+
			
 
				+                        feature_matches[feature_name]["匹配的分类标签"].append(match_info)
			
 
				+
			
 
				+                        # 更新统计
			
 
				+                        stats = feature_matches[feature_name]["统计"]
			
 
				+                        stats["高相似度匹配数(>=0.8)"] += 1
			
 
				+
			
 
				+    # 对每个原始特征的匹配结果按相似度降序排序
			
 
				+    for feature_name in feature_matches:
			
 
				+        feature_matches[feature_name]["匹配的分类标签"].sort(
			
 
				+            key=lambda x: x["相似度"],
			
 
				+            reverse=True
			
 
				+        )
			
 
				+
			
 
				+    return dict(feature_matches)
			
 
				+
			
 
				+
			
 
				+def print_summary(results: Dict[str, Any]):
			
 
				+    """打印统计摘要"""
			
 
				+    print("=" * 80)
			
 
				+    print("原始特征匹配统计摘要（仅相似度>=0.8）")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    total_features = len(results)
			
 
				+    # 统计有匹配的特征数
			
 
				+    features_with_matches = sum(1 for data in results.values() if data["统计"]["高相似度匹配数(>=0.8)"] > 0)
			
 
				+
			
 
				+    print(f"\n总原始特征数: {total_features}")
			
 
				+    print(f"有高相似度匹配的特征数: {features_with_matches}")
			
 
				+    print(f"无匹配的特征数: {total_features - features_with_matches}")
			
 
				+
			
 
				+    # 统计总体数据
			
 
				+    total_matches = 0
			
 
				+
			
 
				+    for feature_name, data in results.items():
			
 
				+        stats = data["统计"]
			
 
				+        total_matches += stats["高相似度匹配数(>=0.8)"]
			
 
				+
			
 
				+    print(f"\n总高相似度匹配数(>=0.8): {total_matches}")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+    print("各原始特征详细匹配情况")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+
			
 
				+def print_detailed_results(results: Dict[str, Any], top_n: int = None):
			
 
				+    """打印详细结果"""
			
 
				+
			
 
				+    for idx, (feature_name, data) in enumerate(results.items(), 1):
			
 
				+        stats = data["统计"]
			
 
				+        matches = data["匹配的分类标签"]
			
 
				+        match_count = stats['高相似度匹配数(>=0.8)']
			
 
				+
			
 
				+        # 跳过没有匹配的特征
			
 
				+        if match_count == 0:
			
 
				+            continue
			
 
				+
			
 
				+        print(f"\n[{idx}] 原始特征: {feature_name}")
			
 
				+        print(f"    高相似度匹配数(>=0.8): {match_count}")
			
 
				+
			
 
				+        # 显示所有匹配（如果指定了top_n则只显示前N个）
			
 
				+        display_matches = matches[:top_n] if top_n else matches
			
 
				+        print(f"    匹配列表（共{len(display_matches)}个）:")
			
 
				+        for i, match in enumerate(display_matches, 1):
			
 
				+            print(f"      {i}. {match['名称']} ({match['相似度']:.3f})")
			
 
				+            print(f"         类型: {match['类型']}, 层级: {match['层级']}")
			
 
				+            if match['路径']:
			
 
				+                print(f"         路径: {match['路径']}")
			
 
				+            else:
			
 
				+                print(f"         路径: (顶级分类)")
			
 
				+
			
 
				+
			
 
				+def save_results(results: Dict[str, Any], output_file: str):
			
 
				+    """保存结果到JSON文件"""
			
 
				+    with open(output_file, 'w', encoding='utf-8') as f:
			
 
				+        json.dump(results, f, ensure_ascii=False, indent=2)
			
 
				+    print(f"\n详细结果已保存到: {output_file}")
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # 输入文件路径
			
 
				+    input_file = "/Users/liulidong/project/pattern相关文件/optimization/69114f150000000007001f30_how copy.json"
			
 
				+
			
 
				+    # 输出文件路径
			
 
				+    output_file = "/Users/liulidong/project/pattern相关文件/optimization/feature_matches_analysis.json"
			
 
				+
			
 
				+    print("开始分析特征匹配...")
			
 
				+
			
 
				+    # 分析
			
 
				+    results = analyze_feature_matches(input_file)
			
 
				+
			
 
				+    # 打印摘要
			
 
				+    print_summary(results)
			
 
				+
			
 
				+    # 打印详细结果（显示所有匹配，不限制数量）
			
 
				+    print_detailed_results(results, top_n=None)
			
 
				+
			
 
				+    # 保存结果
			
 
				+    save_results(results, output_file)
			
 
				+
			
 
				+    print("\n分析完成！")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/analyze_specific_feature.py
+++ b/analyze_specific_feature.py
@@ -0,0 +1,168 @@
 
				+"""
			
 
				+分析特定原始特征的搜索执行情况
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+import sys
			
 
				+from typing import Dict, Any, List
			
 
				+
			
 
				+
			
 
				+def analyze_feature_searches(stage4_path: str, stage6_path: str, feature_name: str):
			
 
				+    """分析指定原始特征的搜索情况"""
			
 
				+
			
 
				+    # 加载数据
			
 
				+    with open(stage4_path, 'r', encoding='utf-8') as f:
			
 
				+        stage4_data = json.load(f)
			
 
				+
			
 
				+    with open(stage6_path, 'r', encoding='utf-8') as f:
			
 
				+        stage6_data = json.load(f)
			
 
				+
			
 
				+    # 找到指定特征
			
 
				+    stage4_feature = None
			
 
				+    stage6_feature = None
			
 
				+
			
 
				+    for item in stage4_data:
			
 
				+        if item['原始特征名称'] == feature_name:
			
 
				+            stage4_feature = item
			
 
				+            break
			
 
				+
			
 
				+    for item in stage6_data:
			
 
				+        if item['原始特征名称'] == feature_name:
			
 
				+            stage6_feature = item
			
 
				+            break
			
 
				+
			
 
				+    if not stage4_feature:
			
 
				+        print(f"❌ 在 Stage4 中未找到特征: {feature_name}")
			
 
				+        return
			
 
				+
			
 
				+    if not stage6_feature:
			
 
				+        print(f"❌ 在 Stage6 中未找到特征: {feature_name}")
			
 
				+        return
			
 
				+
			
 
				+    print("=" * 80)
			
 
				+    print(f"原始特征: {feature_name}")
			
 
				+    print("=" * 80)
			
 
				+
			
 
				+    # 收集 Stage4 的所有搜索词
			
 
				+    stage4_search_words = []
			
 
				+    for association in stage4_feature.get('找到的关联', []):
			
 
				+        assoc_name = association.get('分类名称', '')
			
 
				+        for feature in association.get('特征列表', []):
			
 
				+            search_word = feature.get('search_word')
			
 
				+            llm_eval = feature.get('llm_evaluation', {})
			
 
				+
			
 
				+            if search_word:
			
 
				+                stage4_search_words.append({
			
 
				+                    'search_word': search_word,
			
 
				+                    'association': assoc_name,
			
 
				+                    'feature_name': feature.get('特征名称', ''),
			
 
				+                    'llm_score': llm_eval.get('score'),
			
 
				+                    'llm_rank': llm_eval.get('rank'),
			
 
				+                    'reasoning': llm_eval.get('reasoning', '')
			
 
				+                })
			
 
				+
			
 
				+    # 收集 Stage6 的所有搜索词及其执行状态
			
 
				+    stage6_search_words = []
			
 
				+    for association in stage6_feature.get('找到的关联', []):
			
 
				+        assoc_name = association.get('分类名称', '')
			
 
				+        for feature in association.get('特征列表', []):
			
 
				+            search_word = feature.get('search_word')
			
 
				+            search_result = feature.get('search_result')
			
 
				+            search_metadata = feature.get('search_metadata', {})
			
 
				+            llm_eval = feature.get('llm_evaluation', {})
			
 
				+
			
 
				+            if search_word:
			
 
				+                stage6_search_words.append({
			
 
				+                    'search_word': search_word,
			
 
				+                    'association': assoc_name,
			
 
				+                    'feature_name': feature.get('特征名称', ''),
			
 
				+                    'llm_score': llm_eval.get('score'),
			
 
				+                    'llm_rank': llm_eval.get('rank'),
			
 
				+                    'has_result': search_result is not None,
			
 
				+                    'status': search_metadata.get('status', 'not_searched'),
			
 
				+                    'note_count': search_metadata.get('note_count', 0)
			
 
				+                })
			
 
				+
			
 
				+    # 统计
			
 
				+    total_stage4 = len(stage4_search_words)
			
 
				+    total_stage6 = len(stage6_search_words)
			
 
				+    searched = sum(1 for w in stage6_search_words if w['has_result'])
			
 
				+    not_searched = total_stage6 - searched
			
 
				+
			
 
				+    print(f"\n📊 统计信息:")
			
 
				+    print(f"  Stage4 生成的搜索词数: {total_stage4}")
			
 
				+    print(f"  Stage6 保留的搜索词数: {total_stage6}")
			
 
				+    print(f"  已执行搜索: {searched} 个")
			
 
				+    print(f"  未执行搜索: {not_searched} 个")
			
 
				+    print(f"  搜索执行率: {searched/total_stage6*100:.1f}%")
			
 
				+
			
 
				+    # 按 rank 排序并展示
			
 
				+    stage6_sorted = sorted(stage6_search_words, key=lambda x: x['llm_rank'] if x['llm_rank'] else 999)
			
 
				+
			
 
				+    print(f"\n📋 详细搜索词列表 (按 LLM Rank 排序):")
			
 
				+    print(f"{'Rank':<6} {'评分':<6} {'搜索状态':<12} {'帖子数':<8} 搜索词")
			
 
				+    print("-" * 80)
			
 
				+
			
 
				+    for idx, word in enumerate(stage6_sorted, 1):
			
 
				+        rank = word['llm_rank'] if word['llm_rank'] else 'N/A'
			
 
				+        score = f"{word['llm_score']:.2f}" if word['llm_score'] else 'N/A'
			
 
				+        status = '✅ 已搜索' if word['has_result'] else '⏸️  未搜索'
			
 
				+        note_count = word['note_count'] if word['has_result'] else '-'
			
 
				+
			
 
				+        print(f"{rank:<6} {score:<6} {status:<12} {note_count:<8} {word['search_word']}")
			
 
				+
			
 
				+    # 展示已搜索的搜索词详情
			
 
				+    searched_words = [w for w in stage6_sorted if w['has_result']]
			
 
				+    if searched_words:
			
 
				+        print(f"\n✅ 已执行搜索的 {len(searched_words)} 个搜索词:")
			
 
				+        for idx, word in enumerate(searched_words, 1):
			
 
				+            print(f"\n  【{idx}】 {word['search_word']}")
			
 
				+            print(f"       关联: {word['association']}")
			
 
				+            print(f"       特征: {word['feature_name']}")
			
 
				+            print(f"       评分: {word['llm_score']:.2f}, 排名: #{word['llm_rank']}")
			
 
				+            print(f"       结果: {word['note_count']} 个帖子")
			
 
				+
			
 
				+    # 展示未搜索的搜索词
			
 
				+    not_searched_words = [w for w in stage6_sorted if not w['has_result']]
			
 
				+    if not_searched_words:
			
 
				+        print(f"\n⏸️  未执行搜索的 {len(not_searched_words)} 个搜索词:")
			
 
				+        for idx, word in enumerate(not_searched_words, 1):
			
 
				+            print(f"\n  【{idx}】 {word['search_word']}")
			
 
				+            print(f"       关联: {word['association']}")
			
 
				+            print(f"       特征: {word['feature_name']}")
			
 
				+            print(f"       评分: {word['llm_score']:.2f}, 排名: #{word['llm_rank']}")
			
 
				+
			
 
				+    # 分析为什么只搜索了部分
			
 
				+    print(f"\n🔍 搜索策略分析:")
			
 
				+    if searched == 10:
			
 
				+        print(f"  系统使用了 Top-10 策略")
			
 
				+        top_10_ranks = sorted([w['llm_rank'] for w in searched_words if w['llm_rank']])
			
 
				+        print(f"  实际搜索的 Rank 范围: {top_10_ranks}")
			
 
				+
			
 
				+        # 检查是否严格按 rank 取的 top-10
			
 
				+        expected_top_10_ranks = sorted([w['llm_rank'] for w in stage6_sorted[:10] if w['llm_rank']])
			
 
				+        if top_10_ranks == expected_top_10_ranks:
			
 
				+            print(f"  ✓ 严格按照 LLM Rank 取了 Top-10")
			
 
				+        else:
			
 
				+            print(f"  ⚠️  不是严格的 Top-10 (期望: {expected_top_10_ranks})")
			
 
				+    elif searched > 0:
			
 
				+        print(f"  系统执行了 {searched} 个搜索")
			
 
				+    else:
			
 
				+        print(f"  该特征的搜索尚未执行")
			
 
				+
			
 
				+    print("\n" + "=" * 80)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    stage4_path = 'output_v2/stage4_with_llm_scores.json'
			
 
				+    stage6_path = 'output_v2/stage6_with_evaluations.json'
			
 
				+    feature_name = '墨镜'
			
 
				+
			
 
				+    if len(sys.argv) > 1:
			
 
				+        feature_name = sys.argv[1]
			
 
				+    if len(sys.argv) > 2:
			
 
				+        stage4_path = sys.argv[2]
			
 
				+    if len(sys.argv) > 3:
			
 
				+        stage6_path = sys.argv[3]
			
 
				+
			
 
				+    analyze_feature_searches(stage4_path, stage6_path, feature_name)
			
--- a/enhanced_search_v2.py
+++ b/enhanced_search_v2.py
@@ -88,7 +88,7 @@ class EnhancedSearchV2:
 
				             stage7_skip: Stage 7跳过前N个帖子（默认0）
			
 
				             stage7_sort_by: Stage 7排序方式：score/time/engagement（默认score）
			
 
				             stage7_api_url: Stage 7解构API地址
			
 
				-            stage7_min_score: Stage 7处理的最低分数阈值（默认8.0）
			
 
				+            stage7_min_score: Stage 7处理的最低分数阈值（默认0.8，0-1分制）
			
 
				         """
			
 
				         self.how_json_path = how_json_path
			
 
				         self.dimension_associations_path = dimension_associations_path
			
@@ -1425,7 +1425,7 @@ class EnhancedSearchV2:
 
				 
			
 
				         遍历所有搜索结果，使用两层评估机制：
			
 
				         1. 第一层：过滤与搜索Query无关的结果
			
 
				-        2. 第二层：评估与目标特征的匹配度（8-10/6-7/5-6/≤4）
			
 
				+        2. 第二层：评估与目标特征的匹配度（0.8-1.0/0.6-0.79/0.5-0.59/≤0.4）
			
 
				 
			
 
				         Args:
			
 
				             features_data: 阶段5的数据
			
@@ -1551,8 +1551,8 @@ class EnhancedSearchV2:
 
				         match_dist = evaluation.get('match_distribution', {})
			
 
				 
			
 
				         logger.info(f"  ✓ 完成: 过滤 {filtered_count}, 评估 {evaluated_count}, "
			
 
				-                   f"完全匹配 {match_dist.get('完全匹配(8-10)', 0)}, "
			
 
				-                   f"相似匹配 {match_dist.get('相似匹配(6-7)', 0)}")
			
 
				+                   f"完全匹配 {match_dist.get('完全匹配(0.8-1.0)', 0)}, "
			
 
				+                   f"相似匹配 {match_dist.get('相似匹配(0.6-0.79)', 0)}")
			
 
				 
			
 
				         return evaluation
			
 
				 
			
@@ -1895,8 +1895,8 @@ def main():
 
				     parser.add_argument(
			
 
				         '--stage7-min-score',
			
 
				         type=float,
			
 
				-        default=8.0,
			
 
				-        help='Stage 7 处理的最低分数阈值（默认8.0）'
			
 
				+        default=0.8,
			
 
				+        help='Stage 7 处理的最低分数阈值（默认0.8，0-1分制）'
			
 
				     )
			
 
				 
			
 
				     args = parser.parse_args()
			
--- a/llm_evaluator.py
+++ b/llm_evaluator.py
@@ -182,7 +182,7 @@ class LLMEvaluator:
 
				             prompt = f"""
			
 
				 
			
 
				 # 任务说明
			
 
				-从给定关键词中提取并组合适合在小红书搜索的query词（目标是找到【{original_feature}】相关内容，但query中不能直接出现"{original_feature}"二字）
			
 
				+模拟你是一个内容创作者，生成的组合词要符合一个创作者在内容平台搜索的习惯。从给定关键词中提取并组合适合在小红书搜索的query词。
			
 
				 
			
 
				 ## 可选词汇
			
 
				 {available_words_str}
			
@@ -193,21 +193,21 @@ class LLMEvaluator:
 
				    - 多个词组合
			
 
				    - 适当精简
			
 
				 2. 不能添加可选词汇以外的新词
			
 
				-3. 按推荐程度排序(越靠前越推荐)，取top10
			
 
				+3. 按推荐程度排序(越靠前越推荐)，取top5
			
 
				 
			
 
				 ## 输出格式（JSON）:
			
 
				 [
			
 
				   {{
			
 
				     "rank": 1,
			
 
				     "search_word": "组合的搜索词",
			
 
				-    "source_word": "组合来源词，空格分割",
			
 
				+    "source_word": "组合来源词，空格分割，组合来源词都是从available_words_str中选取的",
			
 
				     "score": 0.85,
			
 
				     "reasoning": "推荐理由"
			
 
				   }},
			
 
				   {{
			
 
				     "index": 2,
			
 
				     "search_word": "组合的搜索词",
			
 
				-    "source_word": "组合来源词，空格分割",
			
 
				+    "source_word": "组合来源词，空格分割，组合来源词都是从available_words_str中选取的",
			
 
				     "score": 0.80,
			
 
				     "reasoning": "推荐理由"
			
 
				   }}
			
--- a/run_stage7.py
+++ b/run_stage7.py
@@ -42,6 +42,9 @@ def main():
 
				   # 处理"墨镜"和"耳环"两个特征，每个最多5个
			
 
				   python3 run_stage7.py --feature "墨镜" "耳环" --max-notes 5
			
 
				 
			
 
				+  # 按数据原始顺序处理前50个（不排序）
			
 
				+  python3 run_stage7.py --sort-by none --max-notes 50
			
 
				+
			
 
				   # 处理所有特征，按时间排序，前20个
			
 
				   python3 run_stage7.py --sort-by time --max-notes 20
			
 
				 
			
@@ -130,9 +133,9 @@ def main():
 
				     )
			
 
				     parser.add_argument(
			
 
				         '--sort-by',
			
 
				-        choices=['score', 'time', 'engagement'],
			
 
				+        choices=['none', 'score', 'time', 'engagement'],
			
 
				         default='score',
			
 
				-        help='排序方式: score(评分), time(时间), engagement(互动量)（默认: score）'
			
 
				+        help='排序方式: none(不排序,保持数据原始顺序), score(评分), time(时间), engagement(互动量)（默认: score）'
			
 
				     )
			
 
				 
			
 
				     # API 配置
			
--- a/stage7_analyzer.py
+++ b/stage7_analyzer.py
@@ -53,7 +53,11 @@ class Stage7DeconstructionAnalyzer:
 
				             max_notes: 最多处理多少个帖子（None = 不限制）
			
 
				             min_score: 最低分数阈值（只处理 >= 此分数的帖子）
			
 
				             skip_count: 跳过前 N 个
			
 
				-            sort_by: 排序方式 ('score' | 'time' | 'engagement')
			
 
				+            sort_by: 排序方式 ('none' | 'score' | 'time' | 'engagement')
			
 
				+                    - 'none': 不排序，保持Stage6数据原始顺序
			
 
				+                    - 'score': 按评分降序
			
 
				+                    - 'time': 按时间降序
			
 
				+                    - 'engagement': 按互动量降序
			
 
				             timeout: API 超时时间
			
 
				             max_retries: API 最大重试次数
			
 
				             output_dir: 输出目录
			
@@ -155,7 +159,11 @@ class Stage7DeconstructionAnalyzer:
 
				         Returns:
			
 
				             排序后的帖子列表
			
 
				         """
			
 
				-        if self.sort_by == 'score':
			
 
				+        if self.sort_by == 'none':
			
 
				+            # 不排序，保持数据原始顺序
			
 
				+            return matched_notes
			
 
				+
			
 
				+        elif self.sort_by == 'score':
			
 
				             # 按评分降序（优先处理高分帖子）
			
 
				             return sorted(
			
 
				                 matched_notes,
			
--- a/visualize_stage6_results.py
+++ b/visualize_stage6_results.py
@@ -30,10 +30,10 @@ def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
 
				     # 评估统计
			
 
				     total_evaluated_notes = 0
			
 
				     total_filtered = 0
			
 
				-    match_complete = 0  # 8-10分
			
 
				-    match_similar = 0   # 6-7分
			
 
				-    match_weak = 0      # 5-6分
			
 
				-    match_none = 0      # ≤4分
			
 
				+    match_complete = 0  # 0.8-1.0分
			
 
				+    match_similar = 0   # 0.6-0.79分
			
 
				+    match_weak = 0      # 0.5-0.59分
			
 
				+    match_none = 0      # ≤0.4分
			
 
				 
			
 
				     for feature in data:
			
 
				         grouped_results = feature.get('组合评估结果_分组', [])
			
@@ -66,10 +66,10 @@ def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
 
				                         total_filtered += evaluation.get('filtered_count', 0)
			
 
				 
			
 
				                         stats = evaluation.get('statistics', {})
			
 
				-                        match_complete += stats.get('完全匹配(8-10)', 0)
			
 
				-                        match_similar += stats.get('相似匹配(6-7)', 0)
			
 
				-                        match_weak += stats.get('弱相似(5-6)', 0)
			
 
				-                        match_none += stats.get('无匹配(≤4)', 0)
			
 
				+                        match_complete += stats.get('完全匹配(0.8-1.0)', 0)
			
 
				+                        match_similar += stats.get('相似匹配(0.6-0.79)', 0)
			
 
				+                        match_weak += stats.get('弱相似(0.5-0.59)', 0)
			
 
				+                        match_none += stats.get('无匹配(≤0.4)', 0)
			
 
				                 else:
			
 
				                     not_searched_count += 1
			
 
				 
			
@@ -966,9 +966,9 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path
 
				                 return 'filtered';
			
 
				             }}
			
 
				             const score = noteEval['综合得分'];
			
 
				-            if (score >= 8) return 'complete';
			
 
				-            if (score >= 6) return 'similar';
			
 
				-            if (score >= 5) return 'weak';
			
 
				+            if (score >= 0.8) return 'complete';
			
 
				+            if (score >= 0.6) return 'similar';
			
 
				+            if (score >= 0.5) return 'weak';
			
 
				             return 'none';
			
 
				         }}
			
 
				 
			
@@ -1014,7 +1014,7 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path
 
				                                 <div class="base-word-meta">相似度: ${{baseSimilarity.toFixed(2)}} · ${{searches.length}}个搜索词</div>
			
 
				                             </div>
			
 
				                             <div class="base-word-desc" id="base-word-desc-${{featureIdx}}-${{groupIdx}}">
			
 
				-                                ${{relatedWordNames || '无相关词汇'}}
			
 
				+                                <strong>关联特征范围（可用词汇池）：</strong>${{relatedWordNames || '无相关词汇'}}
			
 
				                             </div>
			
 
				                             <div class="search-words-sublist" id="search-words-sublist-${{featureIdx}}-${{groupIdx}}">
			
 
				                     `;
			
@@ -1031,10 +1031,10 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path
 
				                         let evalBadges = '';
			
 
				                         if (evaluation) {{
			
 
				                             const stats = evaluation.statistics || {{}};
			
 
				-                            const complete = stats['完全匹配(8-10)'] || 0;
			
 
				-                            const similar = stats['相似匹配(6-7)'] || 0;
			
 
				-                            const weak = stats['弱相似(5-6)'] || 0;
			
 
				-                            const none = stats['无匹配(≤4)'] || 0;
			
 
				+                            const complete = stats['完全匹配(0.8-1.0)'] || 0;
			
 
				+                            const similar = stats['相似匹配(0.6-0.79)'] || 0;
			
 
				+                            const weak = stats['弱相似(0.5-0.59)'] || 0;
			
 
				+                            const none = stats['无匹配(≤0.4)'] || 0;
			
 
				                             const filtered = evaluation.filtered_count || 0;
			
 
				 
			
 
				                             if (complete > 0) evalBadges += `<span class="eval-badge eval-complete">🟢${{complete}}</span>`;
			
@@ -1099,10 +1099,10 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path
 
				                         let evalStats = '';
			
 
				                         if (evaluation) {{
			
 
				                             const stats = evaluation.statistics || {{}};
			
 
				-                            const complete = stats['完全匹配(8-10)'] || 0;
			
 
				-                            const similar = stats['相似匹配(6-7)'] || 0;
			
 
				-                            const weak = stats['弱相似(5-6)'] || 0;
			
 
				-                            const none = stats['无匹配(≤4)'] || 0;
			
 
				+                            const complete = stats['完全匹配(0.8-1.0)'] || 0;
			
 
				+                            const similar = stats['相似匹配(0.6-0.79)'] || 0;
			
 
				+                            const weak = stats['弱相似(0.5-0.59)'] || 0;
			
 
				+                            const none = stats['无匹配(≤0.4)'] || 0;
			
 
				                             const filtered = evaluation.filtered_count || 0;
			
 
				 
			
 
				                             if (complete > 0) evalStats += `<span class="stat-badge eval complete">🟢 完全:${{complete}}</span>`;
			
@@ -1207,8 +1207,8 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path
 
				             let evalSection = '';
			
 
				             if (noteEval) {{
			
 
				                 const score = noteEval['综合得分'];
			
 
				-                const scoreEmoji = score >= 8 ? '🟢' : score >= 6 ? '🟡' : score >= 5 ? '🟠' : '🔴';
			
 
				-                const scoreText = score >= 8 ? '完全匹配' : score >= 6 ? '相似匹配' : score >= 5 ? '弱相似' : '无匹配';
			
 
				+                const scoreEmoji = score >= 0.8 ? '🟢' : score >= 0.6 ? '🟡' : score >= 0.5 ? '🟠' : '🔴';
			
 
				+                const scoreText = score >= 0.8 ? '完全匹配' : score >= 0.6 ? '相似匹配' : score >= 0.5 ? '弱相似' : '无匹配';
			
 
				                 const reasoning = noteEval['评分说明'] || '无';
			
 
				                 const matchingPoints = (noteEval['关键匹配点'] || []).join('、') || '无';
			
 
				 
			
--- a/visualize_stage78_with_deconstruction.py
+++ b/visualize_stage78_with_deconstruction.py
@@ -93,10 +93,10 @@ def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
 
				                         total_filtered += evaluation.get('filtered_count', 0)
			
 
				 
			
 
				                         stats = evaluation.get('statistics', {})
			
 
				-                        match_complete += stats.get('完全匹配(8-10)', 0)
			
 
				-                        match_similar += stats.get('相似匹配(6-7)', 0)
			
 
				-                        match_weak += stats.get('弱相似(5-6)', 0)
			
 
				-                        match_none += stats.get('无匹配(≤4)', 0)
			
 
				+                        match_complete += stats.get('完全匹配(0.8-1.0)', 0)
			
 
				+                        match_similar += stats.get('相似匹配(0.6-0.79)', 0)
			
 
				+                        match_weak += stats.get('弱相似(0.5-0.59)', 0)
			
 
				+                        match_none += stats.get('无匹配(≤0.4)', 0)
			
 
				                 else:
			
 
				                     not_searched_count += 1
			
 
				 
			
@@ -1332,9 +1332,9 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				                 return 'filtered';
			
 
				             }}
			
 
				             const score = noteEval['综合得分'];
			
 
				-            if (score >= 8) return 'complete';
			
 
				-            if (score >= 6) return 'similar';
			
 
				-            if (score >= 5) return 'weak';
			
 
				+            if (score >= 0.8) return 'complete';
			
 
				+            if (score >= 0.6) return 'similar';
			
 
				+            if (score >= 0.5) return 'weak';
			
 
				             return 'none';
			
 
				         }}
			
 
				 
			
@@ -1391,10 +1391,10 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				                         let evalBadges = '';
			
 
				                         if (evaluation) {{
			
 
				                             const stats = evaluation.statistics || {{}};
			
 
				-                            const complete = stats['完全匹配(8-10)'] || 0;
			
 
				-                            const similar = stats['相似匹配(6-7)'] || 0;
			
 
				-                            const weak = stats['弱相似(5-6)'] || 0;
			
 
				-                            const none = stats['无匹配(≤4)'] || 0;
			
 
				+                            const complete = stats['完全匹配(0.8-1.0)'] || 0;
			
 
				+                            const similar = stats['相似匹配(0.6-0.79)'] || 0;
			
 
				+                            const weak = stats['弱相似(0.5-0.59)'] || 0;
			
 
				+                            const none = stats['无匹配(≤0.4)'] || 0;
			
 
				                             const filtered = evaluation.filtered_count || 0;
			
 
				 
			
 
				                             if (complete > 0) evalBadges += `<span class="eval-badge eval-complete">🟢${{complete}}</span>`;
			
@@ -1458,10 +1458,10 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				                         let evalStats = '';
			
 
				                         if (evaluation) {{
			
 
				                             const stats = evaluation.statistics || {{}};
			
 
				-                            const complete = stats['完全匹配(8-10)'] || 0;
			
 
				-                            const similar = stats['相似匹配(6-7)'] || 0;
			
 
				-                            const weak = stats['弱相似(5-6)'] || 0;
			
 
				-                            const none = stats['无匹配(≤4)'] || 0;
			
 
				+                            const complete = stats['完全匹配(0.8-1.0)'] || 0;
			
 
				+                            const similar = stats['相似匹配(0.6-0.79)'] || 0;
			
 
				+                            const weak = stats['弱相似(0.5-0.59)'] || 0;
			
 
				+                            const none = stats['无匹配(≤0.4)'] || 0;
			
 
				                             const filtered = evaluation.filtered_count || 0;
			
 
				 
			
 
				                             if (complete > 0) evalStats += `<span class="stat-badge eval complete">🟢 完全:${{complete}}</span>`;
			
@@ -1549,8 +1549,8 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				             let evalSection = '';
			
 
				             if (noteEval) {{
			
 
				                 const score = noteEval['综合得分'];
			
 
				-                const scoreEmoji = score >= 8 ? '🟢' : score >= 6 ? '🟡' : score >= 5 ? '🟠' : '🔴';
			
 
				-                const scoreText = score >= 8 ? '完全匹配' : score >= 6 ? '相似匹配' : score >= 5 ? '弱相似' : '无匹配';
			
 
				+                const scoreEmoji = score >= 0.8 ? '🟢' : score >= 0.6 ? '🟡' : score >= 0.5 ? '🟠' : '🔴';
			
 
				+                const scoreText = score >= 0.8 ? '完全匹配' : score >= 0.6 ? '相似匹配' : score >= 0.5 ? '弱相似' : '无匹配';
			
 
				                 const reasoning = noteEval['评分说明'] || '无';
			
 
				                 const matchingPoints = (noteEval['关键匹配点'] || []).join('、') || '无';
			
 
				 
			
@@ -1577,7 +1577,16 @@ def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any],
 
				             }}
			
 
				 
			
 
				             // 检查是否有解构数据(仅完全匹配)
			
 
				-            const hasDeconstruction = evalCategory === 'complete' && (stage7Data[noteId] || stage8Data[noteId]);
			
 
				+            // 调试日志: 记录解构按钮判断过程
			
 
				+            const inStage7 = !!stage7Data[noteId];
			
 
				+            const inStage8 = !!stage8Data[noteId];
			
 
				+            const hasDeconstruction = evalCategory === 'complete' && (inStage7 || inStage8);
			
 
				+
			
 
				+            // 只为特定note_id输出调试信息（避免console过多输出）
			
 
				+            if (noteId === '67bda4680000000029013382' || hasDeconstruction) {{
			
 
				+                console.log(`[解构按钮判断] noteId=${{noteId}}, evalCategory=${{evalCategory}}, inStage7=${{inStage7}}, inStage8=${{inStage8}}, hasDeconstruction=${{hasDeconstruction}}`);
			
 
				+            }}
			
 
				+
			
 
				             let deconstructionSection = '';
			
 
				 
			
 
				             if (hasDeconstruction) {{