| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818 |
- #!/usr/bin/env python3
- # -*- coding: utf-8 -*-
- """
- Stage5搜索结果可视化工具
- 生成带图片轮播的交互式HTML页面
- """
- import json
- import os
- from datetime import datetime
- from typing import List, Dict, Any
- def load_data(json_path: str) -> List[Dict[str, Any]]:
- """加载JSON数据"""
- with open(json_path, 'r', encoding='utf-8') as f:
- return json.load(f)
- def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
- """计算统计数据"""
- total_features = len(data)
- total_search_words = 0
- total_notes = 0
- video_count = 0
- normal_count = 0
- for feature in data:
- search_results = feature.get('组合评估结果', [])
- total_search_words += len(search_results)
- for search_item in search_results:
- search_result = search_item.get('search_result', {})
- notes = search_result.get('data', {}).get('data', [])
- total_notes += len(notes)
- for note in notes:
- note_type = note.get('note_card', {}).get('type', '')
- if note_type == 'video':
- video_count += 1
- else:
- normal_count += 1
- return {
- 'total_features': total_features,
- 'total_search_words': total_search_words,
- 'total_notes': total_notes,
- 'video_count': video_count,
- 'normal_count': normal_count,
- 'video_percentage': round(video_count / total_notes * 100, 1) if total_notes > 0 else 0,
- 'normal_percentage': round(normal_count / total_notes * 100, 1) if total_notes > 0 else 0
- }
- def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path: str):
- """生成HTML可视化页面"""
- # 准备数据JSON(用于JavaScript)
- data_json = json.dumps(data, ensure_ascii=False, indent=2)
- html_content = f'''<!DOCTYPE html>
- <html lang="zh-CN">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>Stage5 搜索结果可视化</title>
- <style>
- * {{
- margin: 0;
- padding: 0;
- box-sizing: border-box;
- }}
- body {{
- font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
- background: #f5f7fa;
- color: #333;
- overflow-x: hidden;
- }}
- /* 顶部统计面板 */
- .stats-panel {{
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
- color: white;
- padding: 20px;
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
- }}
- .stats-container {{
- max-width: 1400px;
- margin: 0 auto;
- display: flex;
- justify-content: space-around;
- align-items: center;
- flex-wrap: wrap;
- gap: 20px;
- }}
- .stat-item {{
- text-align: center;
- }}
- .stat-value {{
- font-size: 32px;
- font-weight: bold;
- margin-bottom: 5px;
- }}
- .stat-label {{
- font-size: 14px;
- opacity: 0.9;
- }}
- /* 主容器 */
- .main-container {{
- display: flex;
- max-width: 1400px;
- margin: 20px auto;
- gap: 20px;
- padding: 0 20px;
- height: calc(100vh - 140px);
- }}
- /* 左侧导航 */
- .left-sidebar {{
- width: 30%;
- background: white;
- border-radius: 8px;
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
- overflow-y: auto;
- position: sticky;
- top: 20px;
- height: fit-content;
- max-height: calc(100vh - 160px);
- }}
- .feature-group {{
- border-bottom: 1px solid #e5e7eb;
- }}
- .feature-header {{
- padding: 15px 20px;
- background: #f9fafb;
- cursor: pointer;
- user-select: none;
- transition: background 0.2s;
- }}
- .feature-header:hover {{
- background: #f3f4f6;
- }}
- .feature-header.active {{
- background: #667eea;
- color: white;
- }}
- .feature-title {{
- font-size: 16px;
- font-weight: 600;
- margin-bottom: 5px;
- }}
- .feature-meta {{
- font-size: 12px;
- color: #6b7280;
- }}
- .feature-header.active .feature-meta {{
- color: rgba(255,255,255,0.8);
- }}
- .search-words-list {{
- display: none;
- padding: 10px 0;
- }}
- .search-words-list.expanded {{
- display: block;
- }}
- .search-word-item {{
- padding: 12px 20px 12px 40px;
- cursor: pointer;
- border-left: 3px solid transparent;
- transition: all 0.2s;
- }}
- .search-word-item:hover {{
- background: #f9fafb;
- border-left-color: #667eea;
- }}
- .search-word-item.active {{
- background: #ede9fe;
- border-left-color: #7c3aed;
- }}
- .search-word-text {{
- font-size: 14px;
- font-weight: 500;
- color: #374151;
- margin-bottom: 4px;
- }}
- .search-word-score {{
- display: inline-block;
- padding: 2px 8px;
- border-radius: 12px;
- font-size: 11px;
- font-weight: 600;
- margin-left: 8px;
- }}
- .score-high {{
- background: #d1fae5;
- color: #065f46;
- }}
- .score-medium {{
- background: #fef3c7;
- color: #92400e;
- }}
- .score-low {{
- background: #fee2e2;
- color: #991b1b;
- }}
- .search-word-reasoning {{
- font-size: 12px;
- color: #6b7280;
- margin-top: 4px;
- display: -webkit-box;
- -webkit-line-clamp: 2;
- -webkit-box-orient: vertical;
- overflow: hidden;
- }}
- /* 右侧结果区 */
- .right-content {{
- flex: 1;
- overflow-y: auto;
- padding-bottom: 40px;
- }}
- .result-block {{
- background: white;
- border-radius: 8px;
- box-shadow: 0 2px 8px rgba(0,0,0,0.1);
- margin-bottom: 30px;
- padding: 20px;
- scroll-margin-top: 20px;
- }}
- .result-header {{
- margin-bottom: 20px;
- padding-bottom: 15px;
- border-bottom: 2px solid #e5e7eb;
- }}
- .result-title {{
- font-size: 20px;
- font-weight: 600;
- color: #111827;
- margin-bottom: 10px;
- }}
- .result-stats {{
- display: flex;
- gap: 15px;
- font-size: 13px;
- color: #6b7280;
- }}
- .stat-badge {{
- background: #f3f4f6;
- padding: 4px 10px;
- border-radius: 4px;
- }}
- /* 帖子网格 */
- .notes-grid {{
- display: grid;
- grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
- gap: 20px;
- }}
- .note-card {{
- border: 1px solid #e5e7eb;
- border-radius: 8px;
- overflow: hidden;
- cursor: pointer;
- transition: all 0.3s;
- background: white;
- }}
- .note-card:hover {{
- transform: translateY(-4px);
- box-shadow: 0 10px 25px rgba(0,0,0,0.15);
- }}
- /* 图片轮播 */
- .image-carousel {{
- position: relative;
- width: 100%;
- height: 280px;
- background: #f3f4f6;
- overflow: hidden;
- }}
- .carousel-images {{
- display: flex;
- height: 100%;
- transition: transform 0.3s ease;
- }}
- .carousel-image {{
- min-width: 100%;
- height: 100%;
- object-fit: cover;
- }}
- .carousel-btn {{
- position: absolute;
- top: 50%;
- transform: translateY(-50%);
- background: rgba(0,0,0,0.5);
- color: white;
- border: none;
- width: 32px;
- height: 32px;
- border-radius: 50%;
- cursor: pointer;
- font-size: 16px;
- display: none;
- align-items: center;
- justify-content: center;
- transition: background 0.2s;
- z-index: 10;
- }}
- .carousel-btn:hover {{
- background: rgba(0,0,0,0.7);
- }}
- .carousel-btn.prev {{
- left: 8px;
- }}
- .carousel-btn.next {{
- right: 8px;
- }}
- .note-card:hover .carousel-btn {{
- display: flex;
- }}
- .carousel-indicators {{
- position: absolute;
- bottom: 10px;
- left: 50%;
- transform: translateX(-50%);
- display: flex;
- gap: 6px;
- z-index: 10;
- }}
- .dot {{
- width: 8px;
- height: 8px;
- border-radius: 50%;
- background: rgba(255,255,255,0.5);
- cursor: pointer;
- transition: all 0.2s;
- }}
- .dot.active {{
- background: white;
- width: 24px;
- border-radius: 4px;
- }}
- .image-counter {{
- position: absolute;
- top: 10px;
- right: 10px;
- background: rgba(0,0,0,0.6);
- color: white;
- padding: 4px 8px;
- border-radius: 4px;
- font-size: 12px;
- z-index: 10;
- }}
- /* 帖子信息 */
- .note-info {{
- padding: 12px;
- }}
- .note-title {{
- font-size: 14px;
- font-weight: 500;
- color: #111827;
- margin-bottom: 8px;
- display: -webkit-box;
- -webkit-line-clamp: 2;
- -webkit-box-orient: vertical;
- overflow: hidden;
- line-height: 1.4;
- }}
- .note-meta {{
- display: flex;
- align-items: center;
- justify-content: space-between;
- font-size: 12px;
- color: #6b7280;
- }}
- .note-type {{
- padding: 3px 8px;
- border-radius: 4px;
- font-weight: 500;
- }}
- .type-video {{
- background: #dbeafe;
- color: #1e40af;
- }}
- .type-normal {{
- background: #d1fae5;
- color: #065f46;
- }}
- .note-author {{
- display: flex;
- align-items: center;
- gap: 6px;
- }}
- .author-avatar {{
- width: 24px;
- height: 24px;
- border-radius: 50%;
- }}
- /* SVG连线层 */
- #connection-svg {{
- position: fixed;
- top: 0;
- left: 0;
- width: 100%;
- height: 100%;
- pointer-events: none;
- z-index: 1;
- }}
- .connection-line {{
- stroke: #cbd5e1;
- stroke-width: 1;
- stroke-dasharray: 5,5;
- fill: none;
- opacity: 0.3;
- transition: all 0.2s;
- }}
- .connection-line.active {{
- stroke: #667eea;
- stroke-width: 2;
- stroke-dasharray: none;
- opacity: 1;
- }}
- /* 滚动条样式 */
- ::-webkit-scrollbar {{
- width: 8px;
- height: 8px;
- }}
- ::-webkit-scrollbar-track {{
- background: #f1f1f1;
- }}
- ::-webkit-scrollbar-thumb {{
- background: #888;
- border-radius: 4px;
- }}
- ::-webkit-scrollbar-thumb:hover {{
- background: #555;
- }}
- </style>
- </head>
- <body>
- <!-- 统计面板 -->
- <div class="stats-panel">
- <div class="stats-container">
- <div class="stat-item">
- <div class="stat-value">📊 {stats['total_features']}</div>
- <div class="stat-label">原始特征数</div>
- </div>
- <div class="stat-item">
- <div class="stat-value">🔍 {stats['total_search_words']}</div>
- <div class="stat-label">搜索词数</div>
- </div>
- <div class="stat-item">
- <div class="stat-value">📝 {stats['total_notes']}</div>
- <div class="stat-label">帖子总数</div>
- </div>
- <div class="stat-item">
- <div class="stat-value">🎬 {stats['video_count']}</div>
- <div class="stat-label">视频类型 ({stats['video_percentage']}%)</div>
- </div>
- <div class="stat-item">
- <div class="stat-value">📷 {stats['normal_count']}</div>
- <div class="stat-label">图文类型 ({stats['normal_percentage']}%)</div>
- </div>
- </div>
- </div>
- <!-- SVG连线层 -->
- <svg id="connection-svg"></svg>
- <!-- 主容器 -->
- <div class="main-container">
- <!-- 左侧导航 -->
- <div class="left-sidebar" id="leftSidebar">
- <!-- 通过JavaScript动态生成 -->
- </div>
- <!-- 右侧结果区 -->
- <div class="right-content" id="rightContent">
- <!-- 通过JavaScript动态生成 -->
- </div>
- </div>
- <script>
- // 数据
- const data = {data_json};
- // 渲染左侧导航
- function renderLeftSidebar() {{
- const sidebar = document.getElementById('leftSidebar');
- let html = '';
- data.forEach((feature, featureIdx) => {{
- const searchWords = feature['组合评估结果'] || [];
- html += `
- <div class="feature-group">
- <div class="feature-header" onclick="toggleFeature(${{featureIdx}})" id="feature-header-${{featureIdx}}">
- <div class="feature-title">${{feature['原始特征名称']}}</div>
- <div class="feature-meta">
- ${{feature['来源层级']}} · 权重: ${{feature['权重'].toFixed(2)}} · ${{searchWords.length}}个搜索词
- </div>
- </div>
- <div class="search-words-list" id="search-words-${{featureIdx}}">
- `;
- searchWords.forEach((sw, swIdx) => {{
- const score = sw.score || 0;
- const scoreClass = score >= 0.9 ? 'score-high' : score >= 0.7 ? 'score-medium' : 'score-low';
- const blockId = `block-${{featureIdx}}-${{swIdx}}`;
- html += `
- <div class="search-word-item" onclick="scrollToBlock('${{blockId}}')"
- id="sw-${{featureIdx}}-${{swIdx}}"
- data-block-id="${{blockId}}">
- <div class="search-word-text">
- ${{sw.search_word}}
- <span class="search-word-score ${{scoreClass}}">${{score.toFixed(2)}}</span>
- </div>
- <div class="search-word-reasoning" title="${{sw.reasoning}}">
- ${{sw.reasoning || ''}}
- </div>
- </div>
- `;
- }});
- html += `
- </div>
- </div>
- `;
- }});
- sidebar.innerHTML = html;
- }}
- // 渲染右侧结果区
- function renderRightContent() {{
- const content = document.getElementById('rightContent');
- let html = '';
- data.forEach((feature, featureIdx) => {{
- const searchWords = feature['组合评估结果'] || [];
- searchWords.forEach((sw, swIdx) => {{
- const blockId = `block-${{featureIdx}}-${{swIdx}}`;
- const searchResult = sw.search_result || {{}};
- const notes = searchResult.data?.data || [];
- const videoCount = notes.filter(n => n.note_card?.type === 'video').length;
- const normalCount = notes.length - videoCount;
- html += `
- <div class="result-block" id="${{blockId}}">
- <div class="result-header">
- <div class="result-title">${{sw.search_word}}</div>
- <div class="result-stats">
- <span class="stat-badge">📝 ${{notes.length}} 条帖子</span>
- <span class="stat-badge">🎬 ${{videoCount}} 视频</span>
- <span class="stat-badge">📷 ${{normalCount}} 图文</span>
- </div>
- </div>
- <div class="notes-grid">
- ${{notes.map((note, noteIdx) => renderNoteCard(note, featureIdx, swIdx, noteIdx)).join('')}}
- </div>
- </div>
- `;
- }});
- }});
- content.innerHTML = html;
- }}
- // 渲染单个帖子卡片
- function renderNoteCard(note, featureIdx, swIdx, noteIdx) {{
- const card = note.note_card || {{}};
- const images = card.image_list || [];
- const title = card.display_title || '无标题';
- const noteType = card.type || 'normal';
- const noteId = note.id || '';
- const user = card.user || {{}};
- const userName = user.nick_name || '未知用户';
- const userAvatar = user.avatar || '';
- const carouselId = `carousel-${{featureIdx}}-${{swIdx}}-${{noteIdx}}`;
- return `
- <div class="note-card" onclick="openNote('${{noteId}}')">
- <div class="image-carousel" id="${{carouselId}}">
- <div class="carousel-images">
- ${{images.map(img => `<img class="carousel-image" src="${{img}}" alt="帖子图片" loading="lazy">`).join('')}}
- </div>
- ${{images.length > 1 ? `
- <button class="carousel-btn prev" onclick="event.stopPropagation(); changeImage('${{carouselId}}', -1)">←</button>
- <button class="carousel-btn next" onclick="event.stopPropagation(); changeImage('${{carouselId}}', 1)">→</button>
- <div class="carousel-indicators">
- ${{images.map((_, i) => `<span class="dot ${{i === 0 ? 'active' : ''}}" onclick="event.stopPropagation(); goToImage('${{carouselId}}', ${{i}})"></span>`).join('')}}
- </div>
- <span class="image-counter">1/${{images.length}}</span>
- ` : ''}}
- </div>
- <div class="note-info">
- <div class="note-title">${{title}}</div>
- <div class="note-meta">
- <span class="note-type type-${{noteType}}">
- ${{noteType === 'video' ? '🎬 视频' : '📷 图文'}}
- </span>
- <div class="note-author">
- ${{userAvatar ? `<img class="author-avatar" src="${{userAvatar}}" alt="${{userName}}">` : ''}}
- <span>${{userName}}</span>
- </div>
- </div>
- </div>
- </div>
- `;
- }}
- // 图片轮播逻辑
- const carouselStates = {{}};
- function changeImage(carouselId, direction) {{
- if (!carouselStates[carouselId]) {{
- carouselStates[carouselId] = {{ currentIndex: 0 }};
- }}
- const carousel = document.getElementById(carouselId);
- const imagesContainer = carousel.querySelector('.carousel-images');
- const images = carousel.querySelectorAll('.carousel-image');
- const dots = carousel.querySelectorAll('.dot');
- const counter = carousel.querySelector('.image-counter');
- let newIndex = carouselStates[carouselId].currentIndex + direction;
- if (newIndex < 0) newIndex = images.length - 1;
- if (newIndex >= images.length) newIndex = 0;
- carouselStates[carouselId].currentIndex = newIndex;
- imagesContainer.style.transform = `translateX(-${{newIndex * 100}}%)`;
- // 更新指示器
- dots.forEach((dot, i) => {{
- dot.classList.toggle('active', i === newIndex);
- }});
- // 更新计数器
- if (counter) {{
- counter.textContent = `${{newIndex + 1}}/${{images.length}}`;
- }}
- }}
- function goToImage(carouselId, index) {{
- if (!carouselStates[carouselId]) {{
- carouselStates[carouselId] = {{ currentIndex: 0 }};
- }}
- const carousel = document.getElementById(carouselId);
- const imagesContainer = carousel.querySelector('.carousel-images');
- const dots = carousel.querySelectorAll('.dot');
- const counter = carousel.querySelector('.image-counter');
- carouselStates[carouselId].currentIndex = index;
- imagesContainer.style.transform = `translateX(-${{index * 100}}%)`;
- // 更新指示器
- dots.forEach((dot, i) => {{
- dot.classList.toggle('active', i === index);
- }});
- // 更新计数器
- if (counter) {{
- counter.textContent = `${{index + 1}}/${{dots.length}}`;
- }}
- }}
- // 展开/折叠特征组
- function toggleFeature(featureIdx) {{
- const searchWordsList = document.getElementById(`search-words-${{featureIdx}}`);
- const featureHeader = document.getElementById(`feature-header-${{featureIdx}}`);
- searchWordsList.classList.toggle('expanded');
- featureHeader.classList.toggle('active');
- }}
- // 滚动到指定结果块
- function scrollToBlock(blockId) {{
- const block = document.getElementById(blockId);
- if (block) {{
- block.scrollIntoView({{ behavior: 'smooth', block: 'start' }});
- // 高亮对应的搜索词
- document.querySelectorAll('.search-word-item').forEach(item => {{
- item.classList.remove('active');
- }});
- document.querySelectorAll(`[data-block-id="${{blockId}}"]`).forEach(item => {{
- item.classList.add('active');
- }});
- }}
- }}
- // 打开小红书帖子
- function openNote(noteId) {{
- if (noteId) {{
- window.open(`https://www.xiaohongshu.com/explore/${{noteId}}`, '_blank');
- }}
- }}
- // 初始化
- document.addEventListener('DOMContentLoaded', () => {{
- renderLeftSidebar();
- renderRightContent();
- // 默认展开第一个特征组
- if (data.length > 0) {{
- toggleFeature(0);
- }}
- }});
- </script>
- </body>
- </html>
- '''
- # 写入文件
- with open(output_path, 'w', encoding='utf-8') as f:
- f.write(html_content)
- def main():
- """主函数"""
- # 配置路径
- script_dir = os.path.dirname(os.path.abspath(__file__))
- json_path = os.path.join(script_dir, 'output_v2', 'stage5_with_search_results.json')
- output_dir = os.path.join(script_dir, 'visualization')
- os.makedirs(output_dir, exist_ok=True)
- timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
- output_path = os.path.join(output_dir, f'stage5_interactive_{timestamp}.html')
- # 加载数据
- print(f"📖 加载数据: {json_path}")
- data = load_data(json_path)
- print(f"✓ 加载了 {len(data)} 个原始特征")
- # 计算统计
- print("📊 计算统计数据...")
- stats = calculate_statistics(data)
- print(f"✓ 统计完成:")
- print(f" - 原始特征: {stats['total_features']}")
- print(f" - 搜索词: {stats['total_search_words']}")
- print(f" - 帖子总数: {stats['total_notes']}")
- print(f" - 视频: {stats['video_count']} ({stats['video_percentage']}%)")
- print(f" - 图文: {stats['normal_count']} ({stats['normal_percentage']}%)")
- # 生成HTML
- print(f"\n🎨 生成可视化页面...")
- generate_html(data, stats, output_path)
- print(f"✓ 生成完成: {output_path}")
- # 打印访问提示
- print(f"\n🌐 在浏览器中打开查看:")
- print(f" file://{output_path}")
- if __name__ == '__main__':
- main()
|