#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Stage6评估结果可视化工具 整合两层评估结果的交互式HTML页面 """ import json import os from datetime import datetime from typing import List, Dict, Any def load_data(json_path: str) -> List[Dict[str, Any]]: """加载JSON数据""" with open(json_path, 'r', encoding='utf-8') as f: return json.load(f) def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]: """计算统计数据(包括评估结果)""" total_features = len(data) total_search_words = 0 searched_count = 0 # 已执行搜索的数量 not_searched_count = 0 # 未执行搜索的数量 total_notes = 0 video_count = 0 normal_count = 0 # 评估统计 total_evaluated_notes = 0 total_filtered = 0 match_complete = 0 # 0.8-1.0分 match_similar = 0 # 0.6-0.79分 match_weak = 0 # 0.5-0.59分 match_none = 0 # ≤0.4分 for feature in data: grouped_results = feature.get('组合评估结果_分组', []) for group in grouped_results: search_items = group.get('top10_searches', []) total_search_words += len(search_items) for search_item in search_items: search_result = search_item.get('search_result', {}) # 统计搜索状态 if search_result: searched_count += 1 notes = search_result.get('data', {}).get('data', []) total_notes += len(notes) # 统计视频/图文类型 for note in notes: note_type = note.get('note_card', {}).get('type', '') if note_type == 'video': video_count += 1 else: normal_count += 1 # 统计评估结果 evaluation = search_item.get('evaluation_with_filter') if evaluation: total_evaluated_notes += evaluation.get('total_notes', 0) total_filtered += evaluation.get('filtered_count', 0) stats = evaluation.get('statistics', {}) match_complete += stats.get('完全匹配(0.8-1.0)', 0) match_similar += stats.get('相似匹配(0.6-0.79)', 0) match_weak += stats.get('弱相似(0.5-0.59)', 0) match_none += stats.get('无匹配(≤0.4)', 0) else: not_searched_count += 1 # 计算百分比 total_remaining = total_evaluated_notes - total_filtered if total_evaluated_notes > 0 else 0 return { 'total_features': total_features, 'total_search_words': total_search_words, 'searched_count': searched_count, 'not_searched_count': not_searched_count, 'searched_percentage': round(searched_count / total_search_words * 100, 1) if total_search_words > 0 else 0, 'total_notes': total_notes, 'video_count': video_count, 'normal_count': normal_count, 'video_percentage': round(video_count / total_notes * 100, 1) if total_notes > 0 else 0, 'normal_percentage': round(normal_count / total_notes * 100, 1) if total_notes > 0 else 0, # 评估统计 'total_evaluated': total_evaluated_notes, 'total_filtered': total_filtered, 'total_remaining': total_remaining, 'filter_rate': round(total_filtered / total_evaluated_notes * 100, 1) if total_evaluated_notes > 0 else 0, 'match_complete': match_complete, 'match_similar': match_similar, 'match_weak': match_weak, 'match_none': match_none, 'complete_rate': round(match_complete / total_remaining * 100, 1) if total_remaining > 0 else 0, 'similar_rate': round(match_similar / total_remaining * 100, 1) if total_remaining > 0 else 0, } def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path: str): """生成HTML可视化页面""" # 准备数据JSON(用于JavaScript) data_json = json.dumps(data, ensure_ascii=False, indent=2) html_content = f'''