#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Stage5搜索结果可视化工具 生成带图片轮播的交互式HTML页面 """ import json import os from datetime import datetime from typing import List, Dict, Any def load_data(json_path: str) -> List[Dict[str, Any]]: """加载JSON数据""" with open(json_path, 'r', encoding='utf-8') as f: return json.load(f) def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]: """计算统计数据""" total_features = len(data) total_search_words = 0 total_notes = 0 video_count = 0 normal_count = 0 for feature in data: search_results = feature.get('组合评估结果', []) total_search_words += len(search_results) for search_item in search_results: search_result = search_item.get('search_result', {}) notes = search_result.get('data', {}).get('data', []) total_notes += len(notes) for note in notes: note_type = note.get('note_card', {}).get('type', '') if note_type == 'video': video_count += 1 else: normal_count += 1 return { 'total_features': total_features, 'total_search_words': total_search_words, 'total_notes': total_notes, 'video_count': video_count, 'normal_count': normal_count, 'video_percentage': round(video_count / total_notes * 100, 1) if total_notes > 0 else 0, 'normal_percentage': round(normal_count / total_notes * 100, 1) if total_notes > 0 else 0 } def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path: str): """生成HTML可视化页面""" # 准备数据JSON(用于JavaScript) data_json = json.dumps(data, ensure_ascii=False, indent=2) html_content = f''' Stage5 搜索结果可视化
📊 {stats['total_features']}
原始特征数
🔍 {stats['total_search_words']}
搜索词数
📝 {stats['total_notes']}
帖子总数
🎬 {stats['video_count']}
视频类型 ({stats['video_percentage']}%)
📷 {stats['normal_count']}
图文类型 ({stats['normal_percentage']}%)
''' # 写入文件 with open(output_path, 'w', encoding='utf-8') as f: f.write(html_content) def main(): """主函数""" # 配置路径 script_dir = os.path.dirname(os.path.abspath(__file__)) json_path = os.path.join(script_dir, 'output_v2', 'stage5_with_search_results.json') output_dir = os.path.join(script_dir, 'visualization') os.makedirs(output_dir, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') output_path = os.path.join(output_dir, f'stage5_interactive_{timestamp}.html') # 加载数据 print(f"📖 加载数据: {json_path}") data = load_data(json_path) print(f"✓ 加载了 {len(data)} 个原始特征") # 计算统计 print("📊 计算统计数据...") stats = calculate_statistics(data) print(f"✓ 统计完成:") print(f" - 原始特征: {stats['total_features']}") print(f" - 搜索词: {stats['total_search_words']}") print(f" - 帖子总数: {stats['total_notes']}") print(f" - 视频: {stats['video_count']} ({stats['video_percentage']}%)") print(f" - 图文: {stats['normal_count']} ({stats['normal_percentage']}%)") # 生成HTML print(f"\n🎨 生成可视化页面...") generate_html(data, stats, output_path) print(f"✓ 生成完成: {output_path}") # 打印访问提示 print(f"\n🌐 在浏览器中打开查看:") print(f" file://{output_path}") if __name__ == '__main__': main()