| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- 快速运行相似度分析脚本
- """
- import os
- import json
- import logging
- from src.analyzers.similarity_analyzer import SimilarityAnalyzer
- # 配置日志
- logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(levelname)s - %(message)s'
- )
- def main():
- """主函数"""
- # 读取解构分析结果
- deconstruction_path = "output_v2/deep_analysis_results.json"
- if not os.path.exists(deconstruction_path):
- print(f"❌ 解构分析结果不存在: {deconstruction_path}")
- print(" 请先运行: python3 main.py --enable-stage5 --enable-stage6")
- return
- print(f"📖 加载解构分析结果: {deconstruction_path}")
- with open(deconstruction_path, 'r', encoding='utf-8') as f:
- deconstruction_results = json.load(f)
- print(f"✓ 加载了 {len(deconstruction_results.get('results', []))} 个解构结果")
- # 创建分析器
- print("\n🚀 初始化相似度分析器...")
- analyzer = SimilarityAnalyzer(
- weight_embedding=0.5, # 向量模型权重
- weight_semantic=0.5, # LLM模型权重
- max_workers=5, # 并发数
- min_similarity=0.0, # 最小相似度阈值(0.0保留所有)
- target_features=None, # None = 处理所有特征
- evaluation_results_path='output_v2/evaluated_results.json',
- update_evaluation_scores=True # 自动计算综合得分P
- )
- # 运行分析
- print("\n" + "=" * 60)
- print("开始相似度分析...")
- print("=" * 60)
- similarity_results = analyzer.run(deconstruction_results)
- print("\n" + "=" * 60)
- print("✅ 相似度分析完成!")
- print("=" * 60)
- # 打印统计信息
- meta = similarity_results['metadata']
- stats = meta['overall_statistics']
- print(f"\n📊 统计结果:")
- print(f" - 处理帖子数: {stats['total_notes']}")
- print(f" - 提取特征总数: {stats['total_features_extracted']}")
- print(f" - 平均特征/帖子: {stats['avg_features_per_note']}")
- print(f" - 平均最高相似度: {stats['avg_max_similarity']}")
- print(f" - 包含高相似度特征的帖子: {stats['notes_with_high_similarity']}")
- print(f" - 总耗时: {meta['processing_time_seconds']}秒")
- print(f"\n📁 输出文件:")
- print(f" - output_v2/similarity_analysis_results.json (相似度分析结果)")
- print(f" - output_v2/evaluated_results.json (已更新综合得分P)")
- print(f"\n🎨 现在可以运行可视化:")
- print(f" python3 src/visualizers/deconstruction_visualizer.py")
- if __name__ == '__main__':
- main()
|