#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 快速运行相似度分析脚本 """ import os import json import logging from src.analyzers.similarity_analyzer import SimilarityAnalyzer # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) def main(): """主函数""" # 读取解构分析结果 deconstruction_path = "output_v2/deep_analysis_results.json" if not os.path.exists(deconstruction_path): print(f"❌ 解构分析结果不存在: {deconstruction_path}") print(" 请先运行: python3 main.py --enable-stage5 --enable-stage6") return print(f"📖 加载解构分析结果: {deconstruction_path}") with open(deconstruction_path, 'r', encoding='utf-8') as f: deconstruction_results = json.load(f) print(f"✓ 加载了 {len(deconstruction_results.get('results', []))} 个解构结果") # 创建分析器 print("\n🚀 初始化相似度分析器...") analyzer = SimilarityAnalyzer( weight_embedding=0.5, # 向量模型权重 weight_semantic=0.5, # LLM模型权重 max_workers=5, # 并发数 min_similarity=0.0, # 最小相似度阈值(0.0保留所有) target_features=None, # None = 处理所有特征 evaluation_results_path='output_v2/evaluated_results.json', update_evaluation_scores=True # 自动计算综合得分P ) # 运行分析 print("\n" + "=" * 60) print("开始相似度分析...") print("=" * 60) similarity_results = analyzer.run(deconstruction_results) print("\n" + "=" * 60) print("✅ 相似度分析完成!") print("=" * 60) # 打印统计信息 meta = similarity_results['metadata'] stats = meta['overall_statistics'] print(f"\n📊 统计结果:") print(f" - 处理帖子数: {stats['total_notes']}") print(f" - 提取特征总数: {stats['total_features_extracted']}") print(f" - 平均特征/帖子: {stats['avg_features_per_note']}") print(f" - 平均最高相似度: {stats['avg_max_similarity']}") print(f" - 包含高相似度特征的帖子: {stats['notes_with_high_similarity']}") print(f" - 总耗时: {meta['processing_time_seconds']}秒") print(f"\n📁 输出文件:") print(f" - output_v2/similarity_analysis_results.json (相似度分析结果)") print(f" - output_v2/evaluated_results.json (已更新综合得分P)") print(f"\n🎨 现在可以运行可视化:") print(f" python3 src/visualizers/deconstruction_visualizer.py") if __name__ == '__main__': main()