visualize_how_results.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. How解构结果可视化脚本
  5. 将 how 解构结果转化为 HTML 格式,使用标签页展示多个帖子
  6. """
  7. import json
  8. from pathlib import Path
  9. from typing import Dict, List
  10. import sys
  11. # 添加项目根目录到路径
  12. project_root = Path(__file__).parent.parent.parent
  13. sys.path.insert(0, str(project_root))
  14. def get_relation_color(relation: str) -> str:
  15. """根据关系类型返回对应的颜色"""
  16. color_map = {
  17. "same": "#10b981", # 绿色 - 同义
  18. "contains": "#3b82f6", # 蓝色 - 包含
  19. "contained_by": "#8b5cf6", # 紫色 - 被包含
  20. "coordinate": "#f59e0b", # 橙色 - 同级
  21. "overlap": "#ec4899", # 粉色 - 部分重叠
  22. "related": "#6366f1", # 靛蓝 - 相关
  23. "unrelated": "#9ca3af" # 灰色 - 无关
  24. }
  25. return color_map.get(relation, "#9ca3af")
  26. def get_relation_label(relation: str) -> str:
  27. """返回关系类型的中文标签"""
  28. label_map = {
  29. "same": "同义",
  30. "contains": "包含",
  31. "contained_by": "被包含",
  32. "coordinate": "同级",
  33. "overlap": "部分重叠",
  34. "related": "相关",
  35. "unrelated": "无关"
  36. }
  37. return label_map.get(relation, relation)
  38. def generate_match_item_html(match: Dict) -> str:
  39. """生成单个匹配项的HTML"""
  40. persona_name = match.get("人设特征名称", "")
  41. match_result = match.get("匹配结果", {})
  42. relation = match_result.get("relation", "unrelated")
  43. score = match_result.get("score", 0.0)
  44. explanation = match_result.get("explanation", "")
  45. color = get_relation_color(relation)
  46. label = get_relation_label(relation)
  47. # 根据分数设置背景透明度
  48. opacity = min(score, 1.0)
  49. bg_color = f"rgba({int(color[1:3], 16)}, {int(color[3:5], 16)}, {int(color[5:7], 16)}, {opacity * 0.15})"
  50. html = f"""
  51. <div class="match-item" style="border-left: 3px solid {color}; background: {bg_color};">
  52. <div class="match-header">
  53. <span class="persona-name">{persona_name}</span>
  54. <span class="relation-badge" style="background: {color};">{label}</span>
  55. <span class="score-badge">分数: {score:.2f}</span>
  56. </div>
  57. <div class="match-explanation">{explanation}</div>
  58. </div>
  59. """
  60. return html
  61. def generate_feature_html(feature_data: Dict) -> str:
  62. """生成单个特征的HTML"""
  63. feature_name = feature_data.get("特征名称", "")
  64. match_results = feature_data.get("匹配结果", [])
  65. # 按分数排序(从高到低)
  66. sorted_matches = sorted(match_results, key=lambda x: x.get("匹配结果", {}).get("score", 0), reverse=True)
  67. # 统计匹配类型
  68. relation_counts = {}
  69. for match in match_results:
  70. relation = match.get("匹配结果", {}).get("relation", "unrelated")
  71. relation_counts[relation] = relation_counts.get(relation, 0) + 1
  72. # 生成统计信息
  73. stats_html = "<div class='relation-stats'>"
  74. for relation, count in sorted(relation_counts.items(), key=lambda x: x[1], reverse=True):
  75. label = get_relation_label(relation)
  76. color = get_relation_color(relation)
  77. stats_html += f"<span class='stat-item' style='color: {color};'>{label}: {count}</span>"
  78. stats_html += "</div>"
  79. matches_html = "".join(generate_match_item_html(match) for match in sorted_matches)
  80. html = f"""
  81. <div class="feature-section">
  82. <div class="feature-header">
  83. <h3>特征: {feature_name}</h3>
  84. {stats_html}
  85. </div>
  86. <div class="matches-container">
  87. {matches_html}
  88. </div>
  89. </div>
  90. """
  91. return html
  92. def generate_inspiration_point_html(point_data: Dict) -> str:
  93. """生成单个灵感点的HTML"""
  94. name = point_data.get("名称", "")
  95. desc = point_data.get("描述", "")
  96. how_steps = point_data.get("how步骤列表", [])
  97. steps_html = ""
  98. for step in how_steps:
  99. step_name = step.get("步骤名称", "")
  100. features = step.get("特征列表", [])
  101. features_html = "".join(generate_feature_html(f) for f in features)
  102. steps_html += f"""
  103. <div class="step-section">
  104. <h4 class="step-name">{step_name}</h4>
  105. {features_html}
  106. </div>
  107. """
  108. html = f"""
  109. <div class="inspiration-point">
  110. <div class="point-header">
  111. <h2>{name}</h2>
  112. </div>
  113. <div class="point-description">{desc}</div>
  114. {steps_html}
  115. </div>
  116. """
  117. return html
  118. def generate_post_content_html(post_data: Dict) -> str:
  119. """生成单个帖子的内容HTML(不包含完整页面结构)"""
  120. post_id = post_data.get("帖子id", "")
  121. post_detail = post_data.get("帖子详情", {})
  122. publish_time = post_detail.get("publish_time", "")
  123. like_count = post_detail.get("like_count", 0)
  124. link = post_detail.get("link", "")
  125. how_result = post_data.get("how解构结果", {})
  126. inspiration_list = how_result.get("灵感点列表", [])
  127. inspiration_html = "".join(generate_inspiration_point_html(p) for p in inspiration_list)
  128. html = f"""
  129. <div class="post-meta-bar">
  130. <div class="meta-item">
  131. <span class="meta-label">帖子ID:</span>
  132. <span class="meta-value">{post_id}</span>
  133. </div>
  134. <div class="meta-item">
  135. <span class="meta-label">发布时间:</span>
  136. <span class="meta-value">{publish_time}</span>
  137. </div>
  138. <div class="meta-item">
  139. <span class="meta-label">点赞数:</span>
  140. <span class="meta-value">{like_count}</span>
  141. </div>
  142. <div class="meta-item">
  143. <a href="{link}" target="_blank" class="view-link">查看原帖 →</a>
  144. </div>
  145. </div>
  146. {inspiration_html}
  147. """
  148. return html
  149. def generate_combined_html(posts_data: List[Dict]) -> str:
  150. """生成包含所有帖子的单一HTML(带标签页)"""
  151. # 生成标签页按钮
  152. tabs_html = ""
  153. for i, post in enumerate(posts_data):
  154. post_detail = post.get("帖子详情", {})
  155. title = post_detail.get("title", "无标题")
  156. active_class = "active" if i == 0 else ""
  157. tabs_html += f'<button class="tab-button {active_class}" onclick="openTab(event, \'post-{i}\')">{title}</button>\n'
  158. # 生成标签页内容
  159. contents_html = ""
  160. for i, post in enumerate(posts_data):
  161. active_class = "active" if i == 0 else ""
  162. content = generate_post_content_html(post)
  163. contents_html += f"""
  164. <div id="post-{i}" class="tab-content {active_class}">
  165. {content}
  166. </div>
  167. """
  168. html = f"""
  169. <!DOCTYPE html>
  170. <html lang="zh-CN">
  171. <head>
  172. <meta charset="UTF-8">
  173. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  174. <title>How解构结果可视化</title>
  175. <style>
  176. * {{
  177. margin: 0;
  178. padding: 0;
  179. box-sizing: border-box;
  180. }}
  181. body {{
  182. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  183. background: #f5f5f5;
  184. color: #333;
  185. line-height: 1.6;
  186. }}
  187. .container {{
  188. max-width: 1400px;
  189. margin: 0 auto;
  190. background: white;
  191. min-height: 100vh;
  192. box-shadow: 0 0 40px rgba(0,0,0,0.1);
  193. }}
  194. .header {{
  195. background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
  196. color: white;
  197. padding: 30px;
  198. text-align: center;
  199. }}
  200. .header h1 {{
  201. font-size: 32px;
  202. font-weight: bold;
  203. margin-bottom: 10px;
  204. }}
  205. .header p {{
  206. font-size: 16px;
  207. opacity: 0.9;
  208. }}
  209. .tabs-container {{
  210. display: flex;
  211. background: #f9fafb;
  212. border-bottom: 2px solid #e5e7eb;
  213. overflow-x: auto;
  214. position: sticky;
  215. top: 0;
  216. z-index: 100;
  217. }}
  218. .tab-button {{
  219. flex: 1;
  220. min-width: 200px;
  221. padding: 18px 30px;
  222. background: transparent;
  223. border: none;
  224. border-bottom: 3px solid transparent;
  225. cursor: pointer;
  226. font-size: 15px;
  227. font-weight: 500;
  228. color: #6b7280;
  229. transition: all 0.3s;
  230. white-space: nowrap;
  231. overflow: hidden;
  232. text-overflow: ellipsis;
  233. }}
  234. .tab-button:hover {{
  235. background: #f3f4f6;
  236. color: #374151;
  237. }}
  238. .tab-button.active {{
  239. color: #667eea;
  240. border-bottom-color: #667eea;
  241. background: white;
  242. }}
  243. .tab-content {{
  244. display: none;
  245. padding: 30px;
  246. animation: fadeIn 0.3s;
  247. }}
  248. .tab-content.active {{
  249. display: block;
  250. }}
  251. @keyframes fadeIn {{
  252. from {{ opacity: 0; transform: translateY(10px); }}
  253. to {{ opacity: 1; transform: translateY(0); }}
  254. }}
  255. .post-meta-bar {{
  256. display: flex;
  257. flex-wrap: wrap;
  258. gap: 25px;
  259. padding: 20px;
  260. background: #f9fafb;
  261. border-radius: 8px;
  262. margin-bottom: 30px;
  263. border: 1px solid #e5e7eb;
  264. }}
  265. .meta-item {{
  266. display: flex;
  267. align-items: center;
  268. gap: 8px;
  269. }}
  270. .meta-label {{
  271. font-weight: 600;
  272. color: #6b7280;
  273. font-size: 14px;
  274. }}
  275. .meta-value {{
  276. color: #111827;
  277. font-size: 14px;
  278. }}
  279. .view-link {{
  280. color: #667eea;
  281. text-decoration: none;
  282. font-weight: 600;
  283. font-size: 14px;
  284. transition: color 0.2s;
  285. }}
  286. .view-link:hover {{
  287. color: #764ba2;
  288. }}
  289. .inspiration-point {{
  290. margin-bottom: 40px;
  291. border: 1px solid #e5e7eb;
  292. border-radius: 8px;
  293. overflow: hidden;
  294. }}
  295. .point-header {{
  296. background: #f9fafb;
  297. padding: 20px;
  298. border-bottom: 2px solid #e5e7eb;
  299. }}
  300. .point-header h2 {{
  301. font-size: 22px;
  302. color: #1f2937;
  303. }}
  304. .point-description {{
  305. padding: 20px;
  306. background: #fefefe;
  307. font-size: 15px;
  308. color: #4b5563;
  309. line-height: 1.8;
  310. border-bottom: 1px solid #e5e7eb;
  311. }}
  312. .step-section {{
  313. padding: 20px;
  314. }}
  315. .step-name {{
  316. font-size: 18px;
  317. color: #374151;
  318. margin-bottom: 20px;
  319. padding-bottom: 10px;
  320. border-bottom: 2px solid #e5e7eb;
  321. }}
  322. .feature-section {{
  323. margin-bottom: 30px;
  324. }}
  325. .feature-header {{
  326. display: flex;
  327. justify-content: space-between;
  328. align-items: center;
  329. margin-bottom: 15px;
  330. padding: 15px;
  331. background: #f3f4f6;
  332. border-radius: 6px;
  333. }}
  334. .feature-header h3 {{
  335. font-size: 18px;
  336. color: #111827;
  337. }}
  338. .relation-stats {{
  339. display: flex;
  340. gap: 15px;
  341. font-size: 13px;
  342. }}
  343. .stat-item {{
  344. font-weight: 600;
  345. }}
  346. .matches-container {{
  347. display: grid;
  348. gap: 10px;
  349. }}
  350. .match-item {{
  351. padding: 15px;
  352. border-radius: 6px;
  353. transition: transform 0.2s;
  354. }}
  355. .match-item:hover {{
  356. transform: translateX(5px);
  357. }}
  358. .match-header {{
  359. display: flex;
  360. align-items: center;
  361. gap: 10px;
  362. margin-bottom: 8px;
  363. }}
  364. .persona-name {{
  365. font-weight: 600;
  366. font-size: 15px;
  367. color: #111827;
  368. }}
  369. .relation-badge {{
  370. padding: 3px 10px;
  371. border-radius: 12px;
  372. color: white;
  373. font-size: 12px;
  374. font-weight: 600;
  375. }}
  376. .score-badge {{
  377. padding: 3px 10px;
  378. border-radius: 12px;
  379. background: #e5e7eb;
  380. color: #374151;
  381. font-size: 12px;
  382. font-weight: 600;
  383. }}
  384. .match-explanation {{
  385. font-size: 14px;
  386. color: #6b7280;
  387. line-height: 1.6;
  388. }}
  389. @media (max-width: 768px) {{
  390. .header {{
  391. padding: 20px;
  392. }}
  393. .header h1 {{
  394. font-size: 24px;
  395. }}
  396. .tab-button {{
  397. min-width: 150px;
  398. padding: 15px 20px;
  399. font-size: 14px;
  400. }}
  401. .tab-content {{
  402. padding: 15px;
  403. }}
  404. .post-meta-bar {{
  405. flex-direction: column;
  406. gap: 10px;
  407. }}
  408. .feature-header {{
  409. flex-direction: column;
  410. align-items: flex-start;
  411. gap: 10px;
  412. }}
  413. .relation-stats {{
  414. flex-wrap: wrap;
  415. }}
  416. }}
  417. </style>
  418. </head>
  419. <body>
  420. <div class="container">
  421. <div class="header">
  422. <h1>How 解构结果可视化</h1>
  423. <p>灵感点特征匹配分析</p>
  424. </div>
  425. <div class="tabs-container">
  426. {tabs_html}
  427. </div>
  428. {contents_html}
  429. </div>
  430. <script>
  431. function openTab(evt, tabId) {{
  432. // 隐藏所有标签页内容
  433. var tabContents = document.getElementsByClassName("tab-content");
  434. for (var i = 0; i < tabContents.length; i++) {{
  435. tabContents[i].classList.remove("active");
  436. }}
  437. // 移除所有按钮的 active 类
  438. var tabButtons = document.getElementsByClassName("tab-button");
  439. for (var i = 0; i < tabButtons.length; i++) {{
  440. tabButtons[i].classList.remove("active");
  441. }}
  442. // 显示当前标签页并添加 active 类
  443. document.getElementById(tabId).classList.add("active");
  444. evt.currentTarget.classList.add("active");
  445. }}
  446. </script>
  447. </body>
  448. </html>
  449. """
  450. return html
  451. def main():
  452. """主函数"""
  453. # 输入输出路径
  454. script_dir = Path(__file__).parent
  455. project_root = script_dir.parent.parent
  456. data_dir = project_root / "data" / "data_1117"
  457. input_dir = data_dir / "当前帖子_how解构结果"
  458. output_file = data_dir / "当前帖子_how解构结果_可视化.html"
  459. print(f"读取 how 解构结果: {input_dir}")
  460. # 获取所有 JSON 文件
  461. json_files = list(input_dir.glob("*_how.json"))
  462. print(f"找到 {len(json_files)} 个文件\n")
  463. # 读取所有帖子数据
  464. posts_data = []
  465. for i, file_path in enumerate(json_files, 1):
  466. print(f"读取文件 [{i}/{len(json_files)}]: {file_path.name}")
  467. with open(file_path, "r", encoding="utf-8") as f:
  468. post_data = json.load(f)
  469. posts_data.append(post_data)
  470. # 生成合并的 HTML
  471. print(f"\n生成合并的 HTML...")
  472. html_content = generate_combined_html(posts_data)
  473. # 保存 HTML 文件
  474. print(f"保存到: {output_file}")
  475. with open(output_file, "w", encoding="utf-8") as f:
  476. f.write(html_content)
  477. print(f"\n完成! 可视化文件已保存")
  478. print(f"请在浏览器中打开: {output_file}")
  479. if __name__ == "__main__":
  480. main()