visualize_stage5_results.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. Stage5搜索结果可视化工具
  5. 生成带图片轮播的交互式HTML页面
  6. """
  7. import json
  8. import os
  9. from datetime import datetime
  10. from typing import List, Dict, Any
  11. def load_data(json_path: str) -> List[Dict[str, Any]]:
  12. """加载JSON数据"""
  13. with open(json_path, 'r', encoding='utf-8') as f:
  14. return json.load(f)
  15. def calculate_statistics(data: List[Dict[str, Any]]) -> Dict[str, Any]:
  16. """计算统计数据"""
  17. total_features = len(data)
  18. total_search_words = 0
  19. total_notes = 0
  20. video_count = 0
  21. normal_count = 0
  22. for feature in data:
  23. search_results = feature.get('组合评估结果', [])
  24. total_search_words += len(search_results)
  25. for search_item in search_results:
  26. search_result = search_item.get('search_result', {})
  27. notes = search_result.get('data', {}).get('data', [])
  28. total_notes += len(notes)
  29. for note in notes:
  30. note_type = note.get('note_card', {}).get('type', '')
  31. if note_type == 'video':
  32. video_count += 1
  33. else:
  34. normal_count += 1
  35. return {
  36. 'total_features': total_features,
  37. 'total_search_words': total_search_words,
  38. 'total_notes': total_notes,
  39. 'video_count': video_count,
  40. 'normal_count': normal_count,
  41. 'video_percentage': round(video_count / total_notes * 100, 1) if total_notes > 0 else 0,
  42. 'normal_percentage': round(normal_count / total_notes * 100, 1) if total_notes > 0 else 0
  43. }
  44. def generate_html(data: List[Dict[str, Any]], stats: Dict[str, Any], output_path: str):
  45. """生成HTML可视化页面"""
  46. # 准备数据JSON(用于JavaScript)
  47. data_json = json.dumps(data, ensure_ascii=False, indent=2)
  48. html_content = f'''<!DOCTYPE html>
  49. <html lang="zh-CN">
  50. <head>
  51. <meta charset="UTF-8">
  52. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  53. <title>Stage5 搜索结果可视化</title>
  54. <style>
  55. * {{
  56. margin: 0;
  57. padding: 0;
  58. box-sizing: border-box;
  59. }}
  60. body {{
  61. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  62. background: #f5f7fa;
  63. color: #333;
  64. overflow-x: hidden;
  65. }}
  66. /* 顶部统计面板 */
  67. .stats-panel {{
  68. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  69. color: white;
  70. padding: 20px;
  71. box-shadow: 0 2px 10px rgba(0,0,0,0.1);
  72. }}
  73. .stats-container {{
  74. max-width: 1400px;
  75. margin: 0 auto;
  76. display: flex;
  77. justify-content: space-around;
  78. align-items: center;
  79. flex-wrap: wrap;
  80. gap: 20px;
  81. }}
  82. .stat-item {{
  83. text-align: center;
  84. }}
  85. .stat-value {{
  86. font-size: 32px;
  87. font-weight: bold;
  88. margin-bottom: 5px;
  89. }}
  90. .stat-label {{
  91. font-size: 14px;
  92. opacity: 0.9;
  93. }}
  94. /* 主容器 */
  95. .main-container {{
  96. display: flex;
  97. max-width: 1400px;
  98. margin: 20px auto;
  99. gap: 20px;
  100. padding: 0 20px;
  101. height: calc(100vh - 140px);
  102. }}
  103. /* 左侧导航 */
  104. .left-sidebar {{
  105. width: 30%;
  106. background: white;
  107. border-radius: 8px;
  108. box-shadow: 0 2px 8px rgba(0,0,0,0.1);
  109. overflow-y: auto;
  110. position: sticky;
  111. top: 20px;
  112. height: fit-content;
  113. max-height: calc(100vh - 160px);
  114. }}
  115. .feature-group {{
  116. border-bottom: 1px solid #e5e7eb;
  117. }}
  118. .feature-header {{
  119. padding: 15px 20px;
  120. background: #f9fafb;
  121. cursor: pointer;
  122. user-select: none;
  123. transition: background 0.2s;
  124. }}
  125. .feature-header:hover {{
  126. background: #f3f4f6;
  127. }}
  128. .feature-header.active {{
  129. background: #667eea;
  130. color: white;
  131. }}
  132. .feature-title {{
  133. font-size: 16px;
  134. font-weight: 600;
  135. margin-bottom: 5px;
  136. }}
  137. .feature-meta {{
  138. font-size: 12px;
  139. color: #6b7280;
  140. }}
  141. .feature-header.active .feature-meta {{
  142. color: rgba(255,255,255,0.8);
  143. }}
  144. .search-words-list {{
  145. display: none;
  146. padding: 10px 0;
  147. }}
  148. .search-words-list.expanded {{
  149. display: block;
  150. }}
  151. .search-word-item {{
  152. padding: 12px 20px 12px 40px;
  153. cursor: pointer;
  154. border-left: 3px solid transparent;
  155. transition: all 0.2s;
  156. }}
  157. .search-word-item:hover {{
  158. background: #f9fafb;
  159. border-left-color: #667eea;
  160. }}
  161. .search-word-item.active {{
  162. background: #ede9fe;
  163. border-left-color: #7c3aed;
  164. }}
  165. .search-word-text {{
  166. font-size: 14px;
  167. font-weight: 500;
  168. color: #374151;
  169. margin-bottom: 4px;
  170. }}
  171. .search-word-score {{
  172. display: inline-block;
  173. padding: 2px 8px;
  174. border-radius: 12px;
  175. font-size: 11px;
  176. font-weight: 600;
  177. margin-left: 8px;
  178. }}
  179. .score-high {{
  180. background: #d1fae5;
  181. color: #065f46;
  182. }}
  183. .score-medium {{
  184. background: #fef3c7;
  185. color: #92400e;
  186. }}
  187. .score-low {{
  188. background: #fee2e2;
  189. color: #991b1b;
  190. }}
  191. .search-word-reasoning {{
  192. font-size: 12px;
  193. color: #6b7280;
  194. margin-top: 4px;
  195. display: -webkit-box;
  196. -webkit-line-clamp: 2;
  197. -webkit-box-orient: vertical;
  198. overflow: hidden;
  199. }}
  200. /* 右侧结果区 */
  201. .right-content {{
  202. flex: 1;
  203. overflow-y: auto;
  204. padding-bottom: 40px;
  205. }}
  206. .result-block {{
  207. background: white;
  208. border-radius: 8px;
  209. box-shadow: 0 2px 8px rgba(0,0,0,0.1);
  210. margin-bottom: 30px;
  211. padding: 20px;
  212. scroll-margin-top: 20px;
  213. }}
  214. .result-header {{
  215. margin-bottom: 20px;
  216. padding-bottom: 15px;
  217. border-bottom: 2px solid #e5e7eb;
  218. }}
  219. .result-title {{
  220. font-size: 20px;
  221. font-weight: 600;
  222. color: #111827;
  223. margin-bottom: 10px;
  224. }}
  225. .result-stats {{
  226. display: flex;
  227. gap: 15px;
  228. font-size: 13px;
  229. color: #6b7280;
  230. }}
  231. .stat-badge {{
  232. background: #f3f4f6;
  233. padding: 4px 10px;
  234. border-radius: 4px;
  235. }}
  236. /* 帖子网格 */
  237. .notes-grid {{
  238. display: grid;
  239. grid-template-columns: repeat(auto-fill, minmax(280px, 1fr));
  240. gap: 20px;
  241. }}
  242. .note-card {{
  243. border: 1px solid #e5e7eb;
  244. border-radius: 8px;
  245. overflow: hidden;
  246. cursor: pointer;
  247. transition: all 0.3s;
  248. background: white;
  249. }}
  250. .note-card:hover {{
  251. transform: translateY(-4px);
  252. box-shadow: 0 10px 25px rgba(0,0,0,0.15);
  253. }}
  254. /* 图片轮播 */
  255. .image-carousel {{
  256. position: relative;
  257. width: 100%;
  258. height: 280px;
  259. background: #f3f4f6;
  260. overflow: hidden;
  261. }}
  262. .carousel-images {{
  263. display: flex;
  264. height: 100%;
  265. transition: transform 0.3s ease;
  266. }}
  267. .carousel-image {{
  268. min-width: 100%;
  269. height: 100%;
  270. object-fit: cover;
  271. }}
  272. .carousel-btn {{
  273. position: absolute;
  274. top: 50%;
  275. transform: translateY(-50%);
  276. background: rgba(0,0,0,0.5);
  277. color: white;
  278. border: none;
  279. width: 32px;
  280. height: 32px;
  281. border-radius: 50%;
  282. cursor: pointer;
  283. font-size: 16px;
  284. display: none;
  285. align-items: center;
  286. justify-content: center;
  287. transition: background 0.2s;
  288. z-index: 10;
  289. }}
  290. .carousel-btn:hover {{
  291. background: rgba(0,0,0,0.7);
  292. }}
  293. .carousel-btn.prev {{
  294. left: 8px;
  295. }}
  296. .carousel-btn.next {{
  297. right: 8px;
  298. }}
  299. .note-card:hover .carousel-btn {{
  300. display: flex;
  301. }}
  302. .carousel-indicators {{
  303. position: absolute;
  304. bottom: 10px;
  305. left: 50%;
  306. transform: translateX(-50%);
  307. display: flex;
  308. gap: 6px;
  309. z-index: 10;
  310. }}
  311. .dot {{
  312. width: 8px;
  313. height: 8px;
  314. border-radius: 50%;
  315. background: rgba(255,255,255,0.5);
  316. cursor: pointer;
  317. transition: all 0.2s;
  318. }}
  319. .dot.active {{
  320. background: white;
  321. width: 24px;
  322. border-radius: 4px;
  323. }}
  324. .image-counter {{
  325. position: absolute;
  326. top: 10px;
  327. right: 10px;
  328. background: rgba(0,0,0,0.6);
  329. color: white;
  330. padding: 4px 8px;
  331. border-radius: 4px;
  332. font-size: 12px;
  333. z-index: 10;
  334. }}
  335. /* 帖子信息 */
  336. .note-info {{
  337. padding: 12px;
  338. }}
  339. .note-title {{
  340. font-size: 14px;
  341. font-weight: 500;
  342. color: #111827;
  343. margin-bottom: 8px;
  344. display: -webkit-box;
  345. -webkit-line-clamp: 2;
  346. -webkit-box-orient: vertical;
  347. overflow: hidden;
  348. line-height: 1.4;
  349. }}
  350. .note-meta {{
  351. display: flex;
  352. align-items: center;
  353. justify-content: space-between;
  354. font-size: 12px;
  355. color: #6b7280;
  356. }}
  357. .note-type {{
  358. padding: 3px 8px;
  359. border-radius: 4px;
  360. font-weight: 500;
  361. }}
  362. .type-video {{
  363. background: #dbeafe;
  364. color: #1e40af;
  365. }}
  366. .type-normal {{
  367. background: #d1fae5;
  368. color: #065f46;
  369. }}
  370. .note-author {{
  371. display: flex;
  372. align-items: center;
  373. gap: 6px;
  374. }}
  375. .author-avatar {{
  376. width: 24px;
  377. height: 24px;
  378. border-radius: 50%;
  379. }}
  380. /* SVG连线层 */
  381. #connection-svg {{
  382. position: fixed;
  383. top: 0;
  384. left: 0;
  385. width: 100%;
  386. height: 100%;
  387. pointer-events: none;
  388. z-index: 1;
  389. }}
  390. .connection-line {{
  391. stroke: #cbd5e1;
  392. stroke-width: 1;
  393. stroke-dasharray: 5,5;
  394. fill: none;
  395. opacity: 0.3;
  396. transition: all 0.2s;
  397. }}
  398. .connection-line.active {{
  399. stroke: #667eea;
  400. stroke-width: 2;
  401. stroke-dasharray: none;
  402. opacity: 1;
  403. }}
  404. /* 滚动条样式 */
  405. ::-webkit-scrollbar {{
  406. width: 8px;
  407. height: 8px;
  408. }}
  409. ::-webkit-scrollbar-track {{
  410. background: #f1f1f1;
  411. }}
  412. ::-webkit-scrollbar-thumb {{
  413. background: #888;
  414. border-radius: 4px;
  415. }}
  416. ::-webkit-scrollbar-thumb:hover {{
  417. background: #555;
  418. }}
  419. </style>
  420. </head>
  421. <body>
  422. <!-- 统计面板 -->
  423. <div class="stats-panel">
  424. <div class="stats-container">
  425. <div class="stat-item">
  426. <div class="stat-value">📊 {stats['total_features']}</div>
  427. <div class="stat-label">原始特征数</div>
  428. </div>
  429. <div class="stat-item">
  430. <div class="stat-value">🔍 {stats['total_search_words']}</div>
  431. <div class="stat-label">搜索词数</div>
  432. </div>
  433. <div class="stat-item">
  434. <div class="stat-value">📝 {stats['total_notes']}</div>
  435. <div class="stat-label">帖子总数</div>
  436. </div>
  437. <div class="stat-item">
  438. <div class="stat-value">🎬 {stats['video_count']}</div>
  439. <div class="stat-label">视频类型 ({stats['video_percentage']}%)</div>
  440. </div>
  441. <div class="stat-item">
  442. <div class="stat-value">📷 {stats['normal_count']}</div>
  443. <div class="stat-label">图文类型 ({stats['normal_percentage']}%)</div>
  444. </div>
  445. </div>
  446. </div>
  447. <!-- SVG连线层 -->
  448. <svg id="connection-svg"></svg>
  449. <!-- 主容器 -->
  450. <div class="main-container">
  451. <!-- 左侧导航 -->
  452. <div class="left-sidebar" id="leftSidebar">
  453. <!-- 通过JavaScript动态生成 -->
  454. </div>
  455. <!-- 右侧结果区 -->
  456. <div class="right-content" id="rightContent">
  457. <!-- 通过JavaScript动态生成 -->
  458. </div>
  459. </div>
  460. <script>
  461. // 数据
  462. const data = {data_json};
  463. // 渲染左侧导航
  464. function renderLeftSidebar() {{
  465. const sidebar = document.getElementById('leftSidebar');
  466. let html = '';
  467. data.forEach((feature, featureIdx) => {{
  468. const searchWords = feature['组合评估结果'] || [];
  469. html += `
  470. <div class="feature-group">
  471. <div class="feature-header" onclick="toggleFeature(${{featureIdx}})" id="feature-header-${{featureIdx}}">
  472. <div class="feature-title">${{feature['原始特征名称']}}</div>
  473. <div class="feature-meta">
  474. ${{feature['来源层级']}} · 权重: ${{feature['权重'].toFixed(2)}} · ${{searchWords.length}}个搜索词
  475. </div>
  476. </div>
  477. <div class="search-words-list" id="search-words-${{featureIdx}}">
  478. `;
  479. searchWords.forEach((sw, swIdx) => {{
  480. const score = sw.score || 0;
  481. const scoreClass = score >= 0.9 ? 'score-high' : score >= 0.7 ? 'score-medium' : 'score-low';
  482. const blockId = `block-${{featureIdx}}-${{swIdx}}`;
  483. html += `
  484. <div class="search-word-item" onclick="scrollToBlock('${{blockId}}')"
  485. id="sw-${{featureIdx}}-${{swIdx}}"
  486. data-block-id="${{blockId}}">
  487. <div class="search-word-text">
  488. ${{sw.search_word}}
  489. <span class="search-word-score ${{scoreClass}}">${{score.toFixed(2)}}</span>
  490. </div>
  491. <div class="search-word-reasoning" title="${{sw.reasoning}}">
  492. ${{sw.reasoning || ''}}
  493. </div>
  494. </div>
  495. `;
  496. }});
  497. html += `
  498. </div>
  499. </div>
  500. `;
  501. }});
  502. sidebar.innerHTML = html;
  503. }}
  504. // 渲染右侧结果区
  505. function renderRightContent() {{
  506. const content = document.getElementById('rightContent');
  507. let html = '';
  508. data.forEach((feature, featureIdx) => {{
  509. const searchWords = feature['组合评估结果'] || [];
  510. searchWords.forEach((sw, swIdx) => {{
  511. const blockId = `block-${{featureIdx}}-${{swIdx}}`;
  512. const searchResult = sw.search_result || {{}};
  513. const notes = searchResult.data?.data || [];
  514. const videoCount = notes.filter(n => n.note_card?.type === 'video').length;
  515. const normalCount = notes.length - videoCount;
  516. html += `
  517. <div class="result-block" id="${{blockId}}">
  518. <div class="result-header">
  519. <div class="result-title">${{sw.search_word}}</div>
  520. <div class="result-stats">
  521. <span class="stat-badge">📝 ${{notes.length}} 条帖子</span>
  522. <span class="stat-badge">🎬 ${{videoCount}} 视频</span>
  523. <span class="stat-badge">📷 ${{normalCount}} 图文</span>
  524. </div>
  525. </div>
  526. <div class="notes-grid">
  527. ${{notes.map((note, noteIdx) => renderNoteCard(note, featureIdx, swIdx, noteIdx)).join('')}}
  528. </div>
  529. </div>
  530. `;
  531. }});
  532. }});
  533. content.innerHTML = html;
  534. }}
  535. // 渲染单个帖子卡片
  536. function renderNoteCard(note, featureIdx, swIdx, noteIdx) {{
  537. const card = note.note_card || {{}};
  538. const images = card.image_list || [];
  539. const title = card.display_title || '无标题';
  540. const noteType = card.type || 'normal';
  541. const noteId = note.id || '';
  542. const user = card.user || {{}};
  543. const userName = user.nick_name || '未知用户';
  544. const userAvatar = user.avatar || '';
  545. const carouselId = `carousel-${{featureIdx}}-${{swIdx}}-${{noteIdx}}`;
  546. return `
  547. <div class="note-card" onclick="openNote('${{noteId}}')">
  548. <div class="image-carousel" id="${{carouselId}}">
  549. <div class="carousel-images">
  550. ${{images.map(img => `<img class="carousel-image" src="${{img}}" alt="帖子图片" loading="lazy">`).join('')}}
  551. </div>
  552. ${{images.length > 1 ? `
  553. <button class="carousel-btn prev" onclick="event.stopPropagation(); changeImage('${{carouselId}}', -1)">←</button>
  554. <button class="carousel-btn next" onclick="event.stopPropagation(); changeImage('${{carouselId}}', 1)">→</button>
  555. <div class="carousel-indicators">
  556. ${{images.map((_, i) => `<span class="dot ${{i === 0 ? 'active' : ''}}" onclick="event.stopPropagation(); goToImage('${{carouselId}}', ${{i}})"></span>`).join('')}}
  557. </div>
  558. <span class="image-counter">1/${{images.length}}</span>
  559. ` : ''}}
  560. </div>
  561. <div class="note-info">
  562. <div class="note-title">${{title}}</div>
  563. <div class="note-meta">
  564. <span class="note-type type-${{noteType}}">
  565. ${{noteType === 'video' ? '🎬 视频' : '📷 图文'}}
  566. </span>
  567. <div class="note-author">
  568. ${{userAvatar ? `<img class="author-avatar" src="${{userAvatar}}" alt="${{userName}}">` : ''}}
  569. <span>${{userName}}</span>
  570. </div>
  571. </div>
  572. </div>
  573. </div>
  574. `;
  575. }}
  576. // 图片轮播逻辑
  577. const carouselStates = {{}};
  578. function changeImage(carouselId, direction) {{
  579. if (!carouselStates[carouselId]) {{
  580. carouselStates[carouselId] = {{ currentIndex: 0 }};
  581. }}
  582. const carousel = document.getElementById(carouselId);
  583. const imagesContainer = carousel.querySelector('.carousel-images');
  584. const images = carousel.querySelectorAll('.carousel-image');
  585. const dots = carousel.querySelectorAll('.dot');
  586. const counter = carousel.querySelector('.image-counter');
  587. let newIndex = carouselStates[carouselId].currentIndex + direction;
  588. if (newIndex < 0) newIndex = images.length - 1;
  589. if (newIndex >= images.length) newIndex = 0;
  590. carouselStates[carouselId].currentIndex = newIndex;
  591. imagesContainer.style.transform = `translateX(-${{newIndex * 100}}%)`;
  592. // 更新指示器
  593. dots.forEach((dot, i) => {{
  594. dot.classList.toggle('active', i === newIndex);
  595. }});
  596. // 更新计数器
  597. if (counter) {{
  598. counter.textContent = `${{newIndex + 1}}/${{images.length}}`;
  599. }}
  600. }}
  601. function goToImage(carouselId, index) {{
  602. if (!carouselStates[carouselId]) {{
  603. carouselStates[carouselId] = {{ currentIndex: 0 }};
  604. }}
  605. const carousel = document.getElementById(carouselId);
  606. const imagesContainer = carousel.querySelector('.carousel-images');
  607. const dots = carousel.querySelectorAll('.dot');
  608. const counter = carousel.querySelector('.image-counter');
  609. carouselStates[carouselId].currentIndex = index;
  610. imagesContainer.style.transform = `translateX(-${{index * 100}}%)`;
  611. // 更新指示器
  612. dots.forEach((dot, i) => {{
  613. dot.classList.toggle('active', i === index);
  614. }});
  615. // 更新计数器
  616. if (counter) {{
  617. counter.textContent = `${{index + 1}}/${{dots.length}}`;
  618. }}
  619. }}
  620. // 展开/折叠特征组
  621. function toggleFeature(featureIdx) {{
  622. const searchWordsList = document.getElementById(`search-words-${{featureIdx}}`);
  623. const featureHeader = document.getElementById(`feature-header-${{featureIdx}}`);
  624. searchWordsList.classList.toggle('expanded');
  625. featureHeader.classList.toggle('active');
  626. }}
  627. // 滚动到指定结果块
  628. function scrollToBlock(blockId) {{
  629. const block = document.getElementById(blockId);
  630. if (block) {{
  631. block.scrollIntoView({{ behavior: 'smooth', block: 'start' }});
  632. // 高亮对应的搜索词
  633. document.querySelectorAll('.search-word-item').forEach(item => {{
  634. item.classList.remove('active');
  635. }});
  636. document.querySelectorAll(`[data-block-id="${{blockId}}"]`).forEach(item => {{
  637. item.classList.add('active');
  638. }});
  639. }}
  640. }}
  641. // 打开小红书帖子
  642. function openNote(noteId) {{
  643. if (noteId) {{
  644. window.open(`https://www.xiaohongshu.com/explore/${{noteId}}`, '_blank');
  645. }}
  646. }}
  647. // 初始化
  648. document.addEventListener('DOMContentLoaded', () => {{
  649. renderLeftSidebar();
  650. renderRightContent();
  651. // 默认展开第一个特征组
  652. if (data.length > 0) {{
  653. toggleFeature(0);
  654. }}
  655. }});
  656. </script>
  657. </body>
  658. </html>
  659. '''
  660. # 写入文件
  661. with open(output_path, 'w', encoding='utf-8') as f:
  662. f.write(html_content)
  663. def main():
  664. """主函数"""
  665. # 配置路径
  666. script_dir = os.path.dirname(os.path.abspath(__file__))
  667. json_path = os.path.join(script_dir, 'output_v2', 'stage5_with_search_results.json')
  668. output_dir = os.path.join(script_dir, 'visualization')
  669. os.makedirs(output_dir, exist_ok=True)
  670. timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
  671. output_path = os.path.join(output_dir, f'stage5_interactive_{timestamp}.html')
  672. # 加载数据
  673. print(f"📖 加载数据: {json_path}")
  674. data = load_data(json_path)
  675. print(f"✓ 加载了 {len(data)} 个原始特征")
  676. # 计算统计
  677. print("📊 计算统计数据...")
  678. stats = calculate_statistics(data)
  679. print(f"✓ 统计完成:")
  680. print(f" - 原始特征: {stats['total_features']}")
  681. print(f" - 搜索词: {stats['total_search_words']}")
  682. print(f" - 帖子总数: {stats['total_notes']}")
  683. print(f" - 视频: {stats['video_count']} ({stats['video_percentage']}%)")
  684. print(f" - 图文: {stats['normal_count']} ({stats['normal_percentage']}%)")
  685. # 生成HTML
  686. print(f"\n🎨 生成可视化页面...")
  687. generate_html(data, stats, output_path)
  688. print(f"✓ 生成完成: {output_path}")
  689. # 打印访问提示
  690. print(f"\n🌐 在浏览器中打开查看:")
  691. print(f" file://{output_path}")
  692. if __name__ == '__main__':
  693. main()