#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ 级联搜索结果可视化工具 展示候选词 → Top3人设特征 → 搜索词 → 搜索结果的完整流程 """ import json import os import sys from datetime import datetime from typing import List, Dict, Any, Set import webbrowser def load_json(file_path: str) -> Any: """加载JSON文件""" with open(file_path, 'r', encoding='utf-8') as f: return json.load(f) def load_all_data(output_dir: str = "output_v2") -> Dict[str, Any]: """ 加载所有需要的数据文件 Returns: 包含所有数据的字典 """ print("正在加载数据文件...") data = { 'filtered_features': load_json(os.path.join(output_dir, 'filtered_features.json')), 'candidate_words': load_json(os.path.join(output_dir, 'candidate_words.json')), 'search_queries': load_json(os.path.join(output_dir, 'search_queries_evaluated.json')), 'search_results': load_json(os.path.join(output_dir, 'search_results.json')), 'evaluated_results': load_json(os.path.join(output_dir, 'evaluated_results.json')) } # 尝试加载深度分析数据(可选) deep_path = os.path.join(output_dir, 'deep_analysis_results.json') similarity_path = os.path.join(output_dir, 'similarity_analysis_results.json') if os.path.exists(deep_path): deep_data = load_json(deep_path) # 创建note_id到解构数据的映射 data['stage7_mapping'] = {} for result in deep_data.get('results', []): note_id = result.get('note_id') if note_id: data['stage7_mapping'][note_id] = result else: data['stage7_mapping'] = {} if os.path.exists(similarity_path): sim_data = load_json(similarity_path) # 创建note_id到相似度数据的映射 data['stage8_mapping'] = {} for result in sim_data.get('results', []): note_id = result.get('note_id') if note_id: data['stage8_mapping'][note_id] = result else: data['stage8_mapping'] = {} print(f" ✓ 已加载 {len(data['filtered_features'])} 个原始特征") print(f" ✓ 已加载 {len(data['candidate_words'])} 个候选词数据") print(f" ✓ 已加载解构数据: {len(data['stage7_mapping'])} 个帖子") print(f" ✓ 已加载相似度数据: {len(data['stage8_mapping'])} 个帖子") return data def extract_global_candidates(data: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]: """ 提取全局候选词并按相似度分类 Returns: { 'matched': [...], # 相似度 >= 0.8 'partial': [...], # 0.5 <= 相似度 < 0.8 'unmatched': [...] # 相似度 < 0.5 } """ print("\n提取全局候选词...") candidates_map = {} # 用于去重 # 遍历所有特征的候选词 for feature_data in data['candidate_words']: candidates_by_base = feature_data.get('高相似度候选_按base_word', {}) for base_word, candidates in candidates_by_base.items(): for cand in candidates: cand_name = cand.get('候选词', '') if not cand_name: continue # 计算相似度 similarity = cand.get('相似度', 0) # 如果是帖子候选词,使用点最高人设相似度 if cand.get('候选词类型') == 'post': similarity = cand.get('点最高人设相似度', similarity) # 去重:保留最高相似度 if cand_name not in candidates_map or similarity > candidates_map[cand_name]['相似度']: candidates_map[cand_name] = { '名称': cand_name, '类型': cand.get('候选词类型', 'unknown'), '相似度': similarity, '特征类型': cand.get('特征类型', ''), '来源路径': cand.get('来源路径', ''), '匹配说明': cand.get('匹配说明', '') } # 按相似度分类 result = { 'matched': [], # >= 0.8 'partial': [], # 0.5 ~ 0.8 'unmatched': [] # < 0.5 } for cand in candidates_map.values(): similarity = cand['相似度'] if similarity >= 0.8: result['matched'].append(cand) elif similarity >= 0.5: result['partial'].append(cand) else: result['unmatched'].append(cand) # 排序:按相似度降序 for category in result.values(): category.sort(key=lambda x: x['相似度'], reverse=True) print(f" ✓ 已匹配: {len(result['matched'])} 个") print(f" ✓ 部分匹配: {len(result['partial'])} 个") print(f" ✓ 不匹配: {len(result['unmatched'])} 个") return result def render_left_candidates_html(global_candidates: Dict[str, List[Dict[str, Any]]]) -> str: """ 渲染左侧固定候选词区域HTML Args: global_candidates: 分类后的全局候选词 Returns: HTML字符串 """ html_parts = [] html_parts.append('''
📚 可用候选词
此区域固定展示 不随滚动
''') # 已匹配区域 html_parts.append('''
✅ 已匹配 ({count})
与人设相似度 ≥ 0.8
'''.format(count=len(global_candidates['matched']))) for cand in global_candidates['matched']: icon = '📝' if cand['类型'] == 'post' else '👤' type_label = '帖子' if cand['类型'] == 'post' else '人设' html_parts.append(f'''
{icon}
{cand['名称']}
{type_label} {cand['相似度']:.2f}
''') html_parts.append('''
''') # 部分匹配区域 html_parts.append('''
🟡 部分匹配 ({count})
与人设特征相似度 0.5-0.8
'''.format(count=len(global_candidates['partial']))) for cand in global_candidates['partial']: icon = '📝' if cand['类型'] == 'post' else '👤' type_label = '帖子' if cand['类型'] == 'post' else '人设' html_parts.append(f'''
{icon}
{cand['名称']}
{type_label} {cand['相似度']:.2f}
''') html_parts.append('''
''') # 不匹配区域 html_parts.append('''
❌ 不匹配 ({count})
与人设特征相似度 < 0.5
'''.format(count=len(global_candidates['unmatched']))) for cand in global_candidates['unmatched']: icon = '📝' if cand['类型'] == 'post' else '👤' type_label = '帖子' if cand['类型'] == 'post' else '人设' html_parts.append(f'''
{icon}
{cand['名称']}
{type_label} {cand['相似度']:.2f}
''') html_parts.append('''
''') return ''.join(html_parts) def render_cascade_flow_html(data: Dict[str, Any]) -> str: """ 渲染中间级联流程HTML(三层结构) Returns: HTML字符串 """ html_parts = [] html_parts.append('''
🔄 级联搜索流程
''') # 默认显示第一个特征的级联流程 if data['evaluated_results']: first_feature = data['evaluated_results'][0] html_parts.append(render_single_cascade(first_feature, 0, data)) html_parts.append('''
''') return ''.join(html_parts) def render_single_cascade(feature_data: Dict[str, Any], feature_idx: int, data: Dict[str, Any]) -> str: """ 渲染单个特征的级联流程 Args: feature_data: 特征数据 feature_idx: 特征索引 data: 全部数据 Returns: HTML字符串 """ html_parts = [] original_feature = feature_data.get('原始特征名称', '') top3_matches = feature_data.get('top3匹配信息', []) groups = feature_data.get('组合评估结果_分组', []) # 层级1: 原始特征 html_parts.append(f'''
📌 帖子选题点
{original_feature}
''') # 层级2: Top3人设特征 html_parts.append('''
🎯 Top1各 相似度(x)
''') for idx, match in enumerate(top3_matches[:3], 1): base_word = match.get('人设特征名称', '') similarity = match.get('相似度', 0) is_top1 = (idx == 1) card_class = 'top3-card top1-card' if is_top1 else 'top3-card' html_parts.append(f'''
Top{idx}
{base_word}
相似度: {similarity:.2f}
''') html_parts.append('''
''') # 层级3: 搜索词(默认展开Top1) if groups: html_parts.append('''
🔍 搜索词生成
''') # 默认显示第一个group(Top1) html_parts.append(render_search_words_group(groups[0], feature_idx, 0)) html_parts.append('''
''') return ''.join(html_parts) def render_search_words_group(group: Dict[str, Any], feature_idx: int, group_idx: int) -> str: """ 渲染搜索词组 Args: group: 搜索词组数据 feature_idx: 特征索引 group_idx: 组索引 Returns: HTML字符串 """ html_parts = [] base_word = group.get('base_word', '') searches = group.get('top10_searches', []) available_words = group.get('available_words', []) html_parts.append(f'''
中心词: {base_word}
''') # 显示每个搜索词 for sw_idx, search in enumerate(searches): html_parts.append(render_search_word_card(search, feature_idx, group_idx, sw_idx, available_words)) html_parts.append('''
''') return ''.join(html_parts) def render_search_word_card(search: Dict[str, Any], feature_idx: int, group_idx: int, sw_idx: int, available_words: List) -> str: """ 渲染单个搜索词卡片 Args: search: 搜索词数据 feature_idx, group_idx, sw_idx: 索引 available_words: 可用候选词列表 Returns: HTML字符串 """ search_word = search.get('search_word', '') score = search.get('score', 0) reasoning = search.get('reasoning', '') has_result = search.get('search_result') is not None # 检查是否已执行搜索 status_icon = '✅' if has_result else '⏸️' status_text = '已搜索' if has_result else '未搜索' status_class = 'searched' if has_result else 'not-searched' # 显示候选词(最多前10个) cand_names = [w.get('候选词', '') if isinstance(w, dict) else w for w in available_words[:10]] cand_display = ', '.join(cand_names) if cand_names else '无' html = f'''
{status_icon} {status_text}
#{sw_idx + 1}
可用候选词池:
{cand_display}
score: {score:.2f}
{search_word}
💡 LLM推理理由:
{reasoning}
''' return html def generate_html(data: Dict[str, Any], global_candidates: Dict[str, List[Dict[str, Any]]]) -> str: """ 生成完整HTML页面 Args: data: 所有数据 global_candidates: 全局候选词 Returns: 完整HTML字符串 """ print("\n正在生成HTML...") # 准备数据JSON data_json = json.dumps(data['evaluated_results'], ensure_ascii=False) stage7_json = json.dumps(data['stage7_mapping'], ensure_ascii=False) stage8_json = json.dumps(data['stage8_mapping'], ensure_ascii=False) # 生成各部分HTML left_html = render_left_candidates_html(global_candidates) cascade_html = render_cascade_flow_html(data) # 生成完整HTML html_template = f''' 级联搜索结果可视化
{left_html} {cascade_html}
📝 搜索结果卡片
请选择一个搜索词查看结果
🔍
选择搜索词后,这里将显示对应的搜索结果
''' print(" ✓ HTML生成完成") return html_template def get_css_styles() -> str: """获取CSS样式""" return ''' * { margin: 0; padding: 0; box-sizing: border-box; } body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif; background: #f5f7fa; color: #333; overflow-x: hidden; } /* 页面头部 */ .page-header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; text-align: center; box-shadow: 0 2px 10px rgba(0,0,0,0.1); } .header-title { font-size: 24px; font-weight: bold; margin-bottom: 5px; } .header-subtitle { font-size: 14px; opacity: 0.9; } /* 主布局 - 三栏 */ .main-layout { display: flex; gap: 20px; padding: 20px; height: calc(100vh - 100px); } /* 左侧候选词面板 - 固定 */ .left-candidates-panel { width: 280px; background: white; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); position: sticky; top: 20px; height: fit-content; max-height: calc(100vh - 140px); display: flex; flex-direction: column; } .candidates-header { padding: 15px; border-bottom: 2px solid #e5e7eb; } .candidates-title { font-size: 16px; font-weight: 600; color: #374151; margin-bottom: 5px; } .candidates-hint { font-size: 11px; color: #ef4444; font-weight: 500; } .candidates-content { flex: 1; overflow-y: auto; padding: 10px; } .candidates-section { margin-bottom: 15px; } .section-title { font-size: 13px; font-weight: 600; margin-bottom: 5px; display: flex; align-items: center; gap: 5px; } .section-count { color: #6b7280; font-size: 12px; } .section-hint { font-size: 11px; color: #6b7280; margin-bottom: 8px; } .candidates-list { display: flex; flex-direction: column; gap: 6px; } .candidate-item { display: flex; align-items: center; gap: 8px; padding: 8px; border-radius: 6px; border: 1px solid #e5e7eb; transition: all 0.2s; cursor: pointer; } .candidate-item:hover { box-shadow: 0 2px 4px rgba(0,0,0,0.1); transform: translateY(-1px); } .candidate-item.matched { background: #f0fdf4; border-color: #86efac; } .candidate-item.partial { background: #fffbeb; border-color: #fcd34d; } .candidate-item.unmatched { background: #fef2f2; border-color: #fca5a5; } .candidate-icon { font-size: 18px; flex-shrink: 0; } .candidate-info { flex: 1; min-width: 0; } .candidate-name { font-size: 12px; font-weight: 500; color: #374151; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .candidate-meta { display: flex; justify-content: space-between; align-items: center; margin-top: 2px; } .candidate-type { font-size: 10px; color: #6b7280; } .candidate-similarity { font-size: 10px; font-weight: 600; color: #10b981; } /* 中间级联流程面板 */ .cascade-flow-panel { flex: 1; background: white; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); overflow-y: auto; padding: 20px; } .cascade-header { margin-bottom: 20px; padding-bottom: 10px; border-bottom: 2px solid #e5e7eb; } .cascade-title { font-size: 18px; font-weight: 600; color: #374151; } .cascade-content { display: flex; flex-direction: column; gap: 15px; } .cascade-layer { background: #f9fafb; border-radius: 8px; padding: 15px; } .layer-title { font-size: 14px; font-weight: 600; color: #6b7280; margin-bottom: 10px; } /* 层级1: 特征选择器 */ .selected-feature { display: flex; justify-content: space-between; align-items: center; padding: 12px; background: white; border-radius: 6px; border: 2px solid #667eea; } .feature-name { font-size: 15px; font-weight: 600; color: #374151; } .switch-feature-btn { padding: 6px 12px; background: #667eea; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px; transition: all 0.2s; } .switch-feature-btn:hover { background: #5568d3; } /* 层级2: Top3卡片 */ .top3-container { display: flex; gap: 10px; } .top3-card { flex: 1; padding: 12px; background: white; border-radius: 6px; border: 2px solid #e5e7eb; cursor: pointer; transition: all 0.2s; } .top3-card:hover { border-color: #667eea; box-shadow: 0 2px 6px rgba(102, 126, 234, 0.2); } .top3-card.top1-card { border-color: #10b981; background: #f0fdf4; } .top3-card.top1-card:hover { border-color: #059669; } .top3-card.selected { border-color: #667eea; box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2); } .top3-rank { font-size: 11px; font-weight: 600; color: #6b7280; margin-bottom: 4px; } .top3-name { font-size: 14px; font-weight: 600; color: #374151; margin-bottom: 4px; } .top3-similarity { font-size: 12px; color: #10b981; } /* 级联箭头 */ .cascade-arrow { text-align: center; font-size: 24px; color: #667eea; margin: 5px 0; } /* 层级3: 搜索词 */ .base-word-label { font-size: 13px; color: #6b7280; margin-bottom: 12px; } .base-word-value { font-weight: 600; color: #10b981; } .search-word-card { background: white; border-radius: 8px; border: 2px solid #e5e7eb; padding: 15px; margin-bottom: 12px; cursor: pointer; transition: all 0.2s; } .search-word-card:hover { border-color: #667eea; box-shadow: 0 2px 6px rgba(0,0,0,0.1); } .search-word-card.searched { border-color: #10b981; } .search-word-card.selected { border-color: #667eea; box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.2); } .sw-header { display: flex; justify-content: space-between; align-items: center; margin-bottom: 10px; } .sw-status { font-size: 12px; font-weight: 600; color: #10b981; } .sw-rank { font-size: 11px; color: #6b7280; } .sw-candidates-pool { margin-bottom: 10px; } .sw-label { font-size: 11px; color: #6b7280; margin-bottom: 4px; } .sw-candidates { font-size: 12px; color: #374151; background: #f9fafb; padding: 6px; border-radius: 4px; } .sw-arrow-container { text-align: center; margin: 10px 0; } .sw-arrow { display: inline-flex; align-items: center; gap: 8px; } .arrow-line { font-size: 20px; color: #667eea; } .arrow-score { font-size: 12px; font-weight: 600; color: #667eea; background: #ede9fe; padding: 2px 8px; border-radius: 4px; } .sw-result { text-align: center; margin-bottom: 10px; } .sw-query { font-size: 16px; font-weight: 600; color: #374151; background: #f0fdf4; padding: 8px; border-radius: 6px; border: 1px solid #86efac; } .sw-reasoning { background: #fffbeb; padding: 10px; border-radius: 6px; border: 1px solid #fcd34d; } .reasoning-label { font-size: 12px; font-weight: 600; color: #374151; margin-bottom: 4px; } .reasoning-content { font-size: 12px; color: #6b7280; line-height: 1.5; } /* 右侧结果面板 */ .right-results-panel { width: 500px; background: white; border-radius: 8px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); overflow-y: auto; display: flex; flex-direction: column; } .results-header { padding: 15px; border-bottom: 2px solid #e5e7eb; } .results-title { font-size: 16px; font-weight: 600; color: #374151; margin-bottom: 5px; } .results-subtitle { font-size: 12px; color: #6b7280; } .results-content { flex: 1; padding: 15px; } .empty-results { text-align: center; padding: 60px 20px; } .empty-icon { font-size: 48px; margin-bottom: 15px; } .empty-text { font-size: 14px; color: #6b7280; } /* Modal */ .modal-overlay { display: none; position: fixed; top: 0; left: 0; right: 0; bottom: 0; background: rgba(0,0,0,0.5); z-index: 1000; align-items: center; justify-content: center; } .modal-overlay.active { display: flex; } .modal-window { background: white; border-radius: 12px; box-shadow: 0 10px 40px rgba(0,0,0,0.2); max-width: 600px; width: 90%; max-height: 80vh; display: flex; flex-direction: column; } .modal-header { padding: 20px; border-bottom: 1px solid #e5e7eb; display: flex; justify-content: space-between; align-items: center; } .modal-title { font-size: 18px; font-weight: 600; color: #374151; } .modal-close-btn { background: none; border: none; font-size: 28px; color: #6b7280; cursor: pointer; padding: 0; width: 32px; height: 32px; display: flex; align-items: center; justify-content: center; border-radius: 4px; } .modal-close-btn:hover { background: #f3f4f6; } .modal-body { flex: 1; overflow-y: auto; padding: 20px; } .feature-list { display: flex; flex-direction: column; gap: 10px; } .feature-list-item { padding: 12px; background: #f9fafb; border-radius: 6px; border: 2px solid #e5e7eb; cursor: pointer; transition: all 0.2s; } .feature-list-item:hover { border-color: #667eea; background: white; } .feature-list-item.active { border-color: #10b981; background: #f0fdf4; } ''' def get_javascript_code() -> str: """获取JavaScript代码""" return ''' // 初始化 document.addEventListener('DOMContentLoaded', function() { console.log('页面加载完成'); renderFeatureList(); }); // 显示特征选择器 function showFeatureSelector() { const modal = document.getElementById('featureSelectorModal'); modal.classList.add('active'); } // 关闭特征选择器 function closeFeatureSelector() { const modal = document.getElementById('featureSelectorModal'); modal.classList.remove('active'); } // 渲染特征列表 function renderFeatureList() { const listEl = document.getElementById('featureList'); let html = ''; allData.forEach((feature, idx) => { const name = feature['原始特征名称']; const isActive = idx === currentFeatureIdx; const activeClass = isActive ? 'active' : ''; html += `
${name}
`; }); listEl.innerHTML = html; } // 选择特征 function selectFeature(featureIdx) { currentFeatureIdx = featureIdx; currentGroupIdx = 0; currentSwIdx = 0; closeFeatureSelector(); updateCascadeView(); renderFeatureList(); } // 更新级联视图 function updateCascadeView() { const feature = allData[currentFeatureIdx]; const cascadeContent = document.getElementById('cascadeContent'); // 重新渲染级联流程(这里简化处理,实际应该用JavaScript动态更新) location.reload(); // 简化版:重新加载页面 } // 选择base_word function selectBaseWord(featureIdx, matchIdx) { currentFeatureIdx = featureIdx; currentGroupIdx = matchIdx; currentSwIdx = 0; // 移除所有选中状态 document.querySelectorAll('.top3-card').forEach(card => { card.classList.remove('selected'); }); // 添加选中状态 event.target.closest('.top3-card').classList.add('selected'); // 更新搜索词显示 const feature = allData[currentFeatureIdx]; const groups = feature['组合评估结果_分组'] || []; if (groups[currentGroupIdx]) { // TODO: 更新搜索词列表显示 console.log('切换到group:', currentGroupIdx); } } // 选择搜索词 function selectSearchWord(featureIdx, groupIdx, swIdx) { currentFeatureIdx = featureIdx; currentGroupIdx = groupIdx; currentSwIdx = swIdx; // 移除所有搜索词的选中状态 document.querySelectorAll('.search-word-card').forEach(card => { card.classList.remove('selected'); }); // 添加选中状态 event.target.closest('.search-word-card').classList.add('selected'); // 显示搜索结果 renderSearchResults(featureIdx, groupIdx, swIdx); } // 渲染搜索结果 function renderSearchResults(featureIdx, groupIdx, swIdx) { const feature = allData[featureIdx]; const groups = feature['组合评估结果_分组'] || []; const group = groups[groupIdx]; if (!group) return; const searches = group['top10_searches'] || []; const search = searches[swIdx]; if (!search) return; const searchWord = search['search_word'] || ''; const searchResult = search['search_result']; const resultsContent = document.getElementById('resultsContent'); const resultsSubtitle = document.getElementById('resultsSubtitle'); resultsSubtitle.textContent = `搜索词: ${searchWord}`; if (!searchResult) { resultsContent.innerHTML = `
该搜索词未执行搜索
`; return; } const notes = searchResult.data?.data || []; if (notes.length === 0) { resultsContent.innerHTML = `
📭
未找到匹配的帖子
`; return; } // 渲染帖子卡片(简化版) let html = '
'; notes.forEach((note, idx) => { const card = note.note_card || {}; const title = card.display_title || '无标题'; const image = (card.image_list || [])[0] || ''; html += `
${image ? `${title}` : ''}
${title}
`; }); html += '
'; resultsContent.innerHTML = html; } ''' def main(): """主函数""" print("=" * 60) print("级联搜索结果可视化工具") print("=" * 60) # 加载数据 data = load_all_data() # 提取全局候选词 global_candidates = extract_global_candidates(data) # 生成HTML html_content = generate_html(data, global_candidates) # 保存HTML文件 output_path = "visualization/cascade_search_results.html" os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, 'w', encoding='utf-8') as f: f.write(html_content) print(f"\n✓ HTML文件已保存: {output_path}") # 打开HTML文件 abs_path = os.path.abspath(output_path) print(f"正在打开浏览器...") webbrowser.open(f'file://{abs_path}') print("\n" + "=" * 60) print("✅ 可视化生成完成!") print("=" * 60) if __name__ == '__main__': main()