visualize_match_graph.py 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. 将匹配图谱数据可视化为交互式HTML文件
  5. 输入:match_graph目录下的JSON文件
  6. 输出:单个HTML文件,包含所有帖子的图谱,可通过Tab切换
  7. """
  8. import json
  9. from pathlib import Path
  10. from typing import Dict, List
  11. import sys
  12. # 添加项目根目录到路径
  13. project_root = Path(__file__).parent.parent.parent
  14. sys.path.insert(0, str(project_root))
  15. from script.data_processing.path_config import PathConfig
  16. HTML_TEMPLATE = '''<!DOCTYPE html>
  17. <html lang="zh-CN">
  18. <head>
  19. <meta charset="UTF-8">
  20. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  21. <title>匹配图谱可视化</title>
  22. <script src="https://d3js.org/d3.v7.min.js"></script>
  23. <style>
  24. * {{
  25. margin: 0;
  26. padding: 0;
  27. box-sizing: border-box;
  28. }}
  29. body {{
  30. font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  31. background: #1a1a2e;
  32. color: #eee;
  33. overflow: hidden;
  34. }}
  35. #container {{
  36. display: flex;
  37. height: 100vh;
  38. flex-direction: column;
  39. }}
  40. /* Tab样式 */
  41. .tabs {{
  42. display: flex;
  43. background: #0f3460;
  44. padding: 0 20px;
  45. overflow-x: auto;
  46. flex-shrink: 0;
  47. }}
  48. .tab {{
  49. padding: 12px 20px;
  50. cursor: pointer;
  51. border-bottom: 3px solid transparent;
  52. white-space: nowrap;
  53. font-size: 13px;
  54. color: #888;
  55. transition: all 0.2s;
  56. }}
  57. .tab:hover {{
  58. color: #fff;
  59. background: rgba(255,255,255,0.05);
  60. }}
  61. .tab.active {{
  62. color: #e94560;
  63. border-bottom-color: #e94560;
  64. background: rgba(233, 69, 96, 0.1);
  65. }}
  66. /* 主内容区 */
  67. .main-content {{
  68. display: flex;
  69. flex: 1;
  70. overflow: hidden;
  71. }}
  72. #graph {{
  73. flex: 1;
  74. position: relative;
  75. }}
  76. #sidebar {{
  77. width: 280px;
  78. background: #16213e;
  79. padding: 15px;
  80. overflow-y: auto;
  81. border-left: 1px solid #0f3460;
  82. }}
  83. h1 {{
  84. font-size: 15px;
  85. margin-bottom: 10px;
  86. color: #e94560;
  87. }}
  88. h2 {{
  89. font-size: 12px;
  90. margin: 10px 0 6px;
  91. color: #0f9b8e;
  92. }}
  93. .legend {{
  94. margin-top: 10px;
  95. }}
  96. .legend-grid {{
  97. display: grid;
  98. grid-template-columns: 1fr 1fr;
  99. gap: 4px 8px;
  100. }}
  101. .legend-item {{
  102. display: flex;
  103. align-items: center;
  104. font-size: 11px;
  105. }}
  106. .legend-color {{
  107. width: 12px;
  108. height: 12px;
  109. border-radius: 50%;
  110. margin-right: 6px;
  111. flex-shrink: 0;
  112. }}
  113. .legend-line {{
  114. width: 20px;
  115. height: 3px;
  116. margin-right: 6px;
  117. flex-shrink: 0;
  118. }}
  119. .detail-panel {{
  120. margin-top: 20px;
  121. padding: 15px;
  122. background: #0f3460;
  123. border-radius: 8px;
  124. display: none;
  125. }}
  126. .detail-panel.active {{
  127. display: block;
  128. }}
  129. .detail-panel h3 {{
  130. font-size: 14px;
  131. margin-bottom: 10px;
  132. color: #e94560;
  133. }}
  134. .detail-panel p {{
  135. font-size: 12px;
  136. line-height: 1.6;
  137. color: #ccc;
  138. margin: 5px 0;
  139. }}
  140. .detail-panel .label {{
  141. color: #888;
  142. }}
  143. .detail-panel .close-btn {{
  144. position: absolute;
  145. top: 10px;
  146. right: 10px;
  147. background: none;
  148. border: none;
  149. color: #888;
  150. cursor: pointer;
  151. font-size: 16px;
  152. }}
  153. .detail-panel .close-btn:hover {{
  154. color: #e94560;
  155. }}
  156. .detail-panel-wrapper {{
  157. position: relative;
  158. }}
  159. .similarity-score {{
  160. background: #e94560;
  161. color: #fff;
  162. padding: 2px 6px;
  163. border-radius: 4px;
  164. font-weight: bold;
  165. }}
  166. .edge-description {{
  167. background: #1a1a2e;
  168. padding: 10px;
  169. border-radius: 4px;
  170. margin-top: 8px;
  171. font-size: 11px;
  172. line-height: 1.5;
  173. }}
  174. svg {{
  175. width: 100%;
  176. height: 100%;
  177. }}
  178. .node {{
  179. cursor: pointer;
  180. }}
  181. .node circle, .node rect {{
  182. stroke-width: 3px;
  183. }}
  184. .node .post-node {{
  185. stroke: #fff;
  186. stroke-dasharray: 4,2;
  187. }}
  188. .node .persona-node {{
  189. stroke: #fff;
  190. }}
  191. .node text {{
  192. font-size: 11px;
  193. fill: #fff;
  194. pointer-events: none;
  195. }}
  196. .link {{
  197. stroke-opacity: 0.7;
  198. }}
  199. .link-hitarea {{
  200. stroke: transparent;
  201. stroke-width: 15px;
  202. cursor: pointer;
  203. fill: none;
  204. }}
  205. .link-hitarea:hover + .link {{
  206. stroke-opacity: 1;
  207. stroke-width: 3px;
  208. }}
  209. .edge-label {{
  210. font-size: 10px;
  211. fill: #fff;
  212. pointer-events: none;
  213. text-anchor: middle;
  214. }}
  215. .edge-label-bg {{
  216. fill: rgba(0,0,0,0.7);
  217. }}
  218. .link.match {{
  219. stroke: #e94560;
  220. stroke-dasharray: 5,5;
  221. }}
  222. .link.category-cross {{
  223. stroke: #2ecc71;
  224. }}
  225. .link.category-intra {{
  226. stroke: #27ae60;
  227. stroke-dasharray: 3,3;
  228. }}
  229. .link.tag-cooccur {{
  230. stroke: #f39c12;
  231. }}
  232. .link.belong {{
  233. stroke: #9b59b6;
  234. }}
  235. .link.contain {{
  236. stroke: #8e44ad;
  237. stroke-dasharray: 2,2;
  238. }}
  239. /* 镜像边样式(虚线,颜色与原边相同) */
  240. .link.mirror-category-cross {{
  241. stroke: #2ecc71;
  242. stroke-dasharray: 6,3;
  243. }}
  244. .link.mirror-category-intra {{
  245. stroke: #27ae60;
  246. stroke-dasharray: 6,3;
  247. }}
  248. .link.mirror-tag-cooccur {{
  249. stroke: #f39c12;
  250. stroke-dasharray: 6,3;
  251. }}
  252. .link.mirror-belong {{
  253. stroke: #9b59b6;
  254. stroke-dasharray: 6,3;
  255. }}
  256. .link.mirror-contain {{
  257. stroke: #8e44ad;
  258. stroke-dasharray: 6,3;
  259. }}
  260. .tooltip {{
  261. position: absolute;
  262. background: rgba(0,0,0,0.9);
  263. color: #fff;
  264. padding: 10px 15px;
  265. border-radius: 6px;
  266. font-size: 12px;
  267. pointer-events: none;
  268. max-width: 300px;
  269. z-index: 1000;
  270. display: none;
  271. }}
  272. .controls {{
  273. position: absolute;
  274. top: 20px;
  275. left: 20px;
  276. background: rgba(22, 33, 62, 0.9);
  277. padding: 15px;
  278. border-radius: 8px;
  279. z-index: 100;
  280. }}
  281. .controls button {{
  282. background: #0f3460;
  283. color: #fff;
  284. border: none;
  285. padding: 8px 15px;
  286. margin: 5px;
  287. border-radius: 4px;
  288. cursor: pointer;
  289. font-size: 12px;
  290. }}
  291. .controls button:hover {{
  292. background: #e94560;
  293. }}
  294. </style>
  295. </head>
  296. <body>
  297. <div id="container">
  298. <div class="tabs" id="tabs">
  299. {tabs_html}
  300. </div>
  301. <div class="main-content">
  302. <div id="graph">
  303. <div class="controls">
  304. <button onclick="resetZoom()">重置视图</button>
  305. <button onclick="toggleLabels()">切换标签</button>
  306. </div>
  307. <div class="tooltip" id="tooltip"></div>
  308. </div>
  309. <div id="sidebar">
  310. <h1>匹配图谱</h1>
  311. <div class="detail-panel active" id="detailPanel">
  312. <h3 id="detailTitle">点击节点或边查看详情</h3>
  313. <div id="detailContent">
  314. <p style="color: #888; font-size: 11px;">点击图中的节点或边,这里会显示详细信息</p>
  315. </div>
  316. </div>
  317. <div class="legend">
  318. <h2>节点</h2>
  319. <div class="legend-grid">
  320. <div class="legend-item">
  321. <div class="legend-color" style="background: #666; border: 2px dashed #fff;"></div>
  322. <span>帖子(虚线)</span>
  323. </div>
  324. <div class="legend-item">
  325. <div class="legend-color" style="background: #666; border: 2px solid #fff;"></div>
  326. <span>人设(实线)</span>
  327. </div>
  328. <div class="legend-item">
  329. <div class="legend-color" style="background: #666; border-radius: 50%;"></div>
  330. <span>标签(圆)</span>
  331. </div>
  332. <div class="legend-item">
  333. <div class="legend-color" style="background: #666; border-radius: 2px;"></div>
  334. <span>分类(方)</span>
  335. </div>
  336. <div class="legend-item">
  337. <div class="legend-color" style="background: #666; opacity: 1;"></div>
  338. <span>直接匹配</span>
  339. </div>
  340. <div class="legend-item">
  341. <div class="legend-color" style="background: #666; opacity: 0.5;"></div>
  342. <span>扩展节点</span>
  343. </div>
  344. <div class="legend-item">
  345. <div class="legend-color" style="background: #f39c12;"></div>
  346. <span>灵感点</span>
  347. </div>
  348. <div class="legend-item">
  349. <div class="legend-color" style="background: #3498db;"></div>
  350. <span>目的点</span>
  351. </div>
  352. <div class="legend-item">
  353. <div class="legend-color" style="background: #9b59b6;"></div>
  354. <span>关键点</span>
  355. </div>
  356. </div>
  357. <h2>边(人设/实线)</h2>
  358. <div class="legend-grid">
  359. <div class="legend-item">
  360. <div class="legend-line" style="background: #e94560;"></div>
  361. <span>匹配</span>
  362. </div>
  363. <div class="legend-item">
  364. <div class="legend-line" style="background: #2ecc71;"></div>
  365. <span>分类共现(跨)</span>
  366. </div>
  367. <div class="legend-item">
  368. <div class="legend-line" style="background: #27ae60;"></div>
  369. <span>分类共现(内)</span>
  370. </div>
  371. <div class="legend-item">
  372. <div class="legend-line" style="background: #f39c12;"></div>
  373. <span>标签共现</span>
  374. </div>
  375. <div class="legend-item">
  376. <div class="legend-line" style="background: #9b59b6;"></div>
  377. <span>属于</span>
  378. </div>
  379. <div class="legend-item">
  380. <div class="legend-line" style="background: #8e44ad;"></div>
  381. <span>包含</span>
  382. </div>
  383. </div>
  384. <h2>镜像边(帖子/虚线)</h2>
  385. <div class="legend-grid">
  386. <div class="legend-item">
  387. <div class="legend-line" style="background: repeating-linear-gradient(90deg, #2ecc71, #2ecc71 6px, transparent 6px, transparent 9px);"></div>
  388. <span>分类共现</span>
  389. </div>
  390. <div class="legend-item">
  391. <div class="legend-line" style="background: repeating-linear-gradient(90deg, #f39c12, #f39c12 6px, transparent 6px, transparent 9px);"></div>
  392. <span>标签共现</span>
  393. </div>
  394. <div class="legend-item">
  395. <div class="legend-line" style="background: repeating-linear-gradient(90deg, #9b59b6, #9b59b6 6px, transparent 6px, transparent 9px);"></div>
  396. <span>属于</span>
  397. </div>
  398. <div class="legend-item">
  399. <div class="legend-line" style="background: repeating-linear-gradient(90deg, #8e44ad, #8e44ad 6px, transparent 6px, transparent 9px);"></div>
  400. <span>包含</span>
  401. </div>
  402. </div>
  403. </div>
  404. </div>
  405. </div>
  406. </div>
  407. <script>
  408. // 所有帖子的图谱数据
  409. const allGraphData = {all_graph_data};
  410. // 当前选中的帖子索引
  411. let currentIndex = 0;
  412. let simulation = null;
  413. let svg = null;
  414. let g = null;
  415. let zoom = null;
  416. let showLabels = true;
  417. // 初始化
  418. function init() {{
  419. const container = document.getElementById("graph");
  420. const width = container.clientWidth;
  421. const height = container.clientHeight;
  422. svg = d3.select("#graph")
  423. .append("svg")
  424. .attr("width", width)
  425. .attr("height", height);
  426. g = svg.append("g");
  427. zoom = d3.zoom()
  428. .scaleExtent([0.1, 4])
  429. .on("zoom", (event) => {{
  430. g.attr("transform", event.transform);
  431. }});
  432. svg.call(zoom);
  433. // 绑定Tab点击事件
  434. document.querySelectorAll(".tab").forEach((tab, index) => {{
  435. tab.addEventListener("click", () => switchTab(index));
  436. }});
  437. // 显示第一个帖子
  438. switchTab(0);
  439. }}
  440. // 切换Tab
  441. function switchTab(index) {{
  442. currentIndex = index;
  443. // 更新Tab样式
  444. document.querySelectorAll(".tab").forEach((tab, i) => {{
  445. tab.classList.toggle("active", i === index);
  446. }});
  447. // 更新图谱
  448. renderGraph(allGraphData[index]);
  449. }}
  450. // 渲染图谱
  451. function renderGraph(data) {{
  452. // 清空现有图谱
  453. g.selectAll("*").remove();
  454. if (simulation) {{
  455. simulation.stop();
  456. }}
  457. const container = document.getElementById("graph");
  458. const width = container.clientWidth;
  459. const height = container.clientHeight;
  460. // 准备数据
  461. const nodes = data.nodes.map(n => ({{
  462. ...n,
  463. id: n.节点ID,
  464. source: n.节点ID.startsWith("帖子_") ? "帖子" : "人设",
  465. level: n.节点层级
  466. }}));
  467. const links = data.edges.map(e => ({{
  468. ...e,
  469. source: e.源节点ID,
  470. target: e.目标节点ID,
  471. type: e.边类型
  472. }}));
  473. // 分离帖子节点和人设节点
  474. const postNodes = nodes.filter(n => n.source === "帖子");
  475. const personaNodes = nodes.filter(n => n.source === "人设");
  476. const matchLinks = links.filter(l => l.type === "匹配");
  477. // 构建帖子节点到人设节点的映射
  478. const postToPersona = {{}};
  479. const personaToPost = {{}};
  480. matchLinks.forEach(l => {{
  481. const sid = typeof l.source === "object" ? l.source.id : l.source;
  482. const tid = typeof l.target === "object" ? l.target.id : l.target;
  483. if (!postToPersona[sid]) postToPersona[sid] = [];
  484. postToPersona[sid].push(tid);
  485. if (!personaToPost[tid]) personaToPost[tid] = [];
  486. personaToPost[tid].push(sid);
  487. }});
  488. // 找出所有连通分量
  489. function findConnectedComponents(nodes, links) {{
  490. const nodeIds = new Set(nodes.map(n => n.id));
  491. const adj = {{}};
  492. nodeIds.forEach(id => adj[id] = []);
  493. links.forEach(l => {{
  494. const sid = typeof l.source === "object" ? l.source.id : l.source;
  495. const tid = typeof l.target === "object" ? l.target.id : l.target;
  496. if (nodeIds.has(sid) && nodeIds.has(tid)) {{
  497. adj[sid].push(tid);
  498. adj[tid].push(sid);
  499. }}
  500. }});
  501. const visited = new Set();
  502. const components = [];
  503. nodeIds.forEach(startId => {{
  504. if (visited.has(startId)) return;
  505. const component = [];
  506. const queue = [startId];
  507. while (queue.length > 0) {{
  508. const id = queue.shift();
  509. if (visited.has(id)) continue;
  510. visited.add(id);
  511. component.push(id);
  512. adj[id].forEach(neighbor => {{
  513. if (!visited.has(neighbor)) queue.push(neighbor);
  514. }});
  515. }}
  516. components.push(component);
  517. }});
  518. return components;
  519. }}
  520. // 按大小排序连通分量(大的在前)
  521. const components = findConnectedComponents(nodes, links)
  522. .sort((a, b) => b.length - a.length);
  523. console.log(`找到 ${{components.length}} 个连通分量`);
  524. // 为每个节点分配连通分量ID和分量内的X范围
  525. const nodeToComponent = {{}};
  526. const componentCenters = {{}};
  527. const componentBounds = {{}};
  528. const padding = 50; // 分量之间的间距
  529. const totalPadding = padding * (components.length - 1);
  530. const availableWidth = width - totalPadding - 100; // 留边距
  531. // 根据分量大小分配宽度
  532. const totalNodes = nodes.length;
  533. let currentX = 50; // 起始边距
  534. components.forEach((comp, i) => {{
  535. const compWidth = Math.max(150, (comp.length / totalNodes) * availableWidth);
  536. const centerX = currentX + compWidth / 2;
  537. componentCenters[i] = centerX;
  538. componentBounds[i] = {{ start: currentX, end: currentX + compWidth, width: compWidth }};
  539. comp.forEach(nodeId => {{
  540. nodeToComponent[nodeId] = i;
  541. }});
  542. currentX += compWidth + padding;
  543. }});
  544. // 使用重心法(Barycenter)减少边交叉
  545. // 迭代优化:交替调整两层节点的顺序
  546. const nodeTargetX = {{}};
  547. const personaXMap = {{}};
  548. // 对每个连通分量单独处理
  549. components.forEach((comp, compIdx) => {{
  550. const bounds = componentBounds[compIdx];
  551. const compPostNodes = postNodes.filter(n => nodeToComponent[n.id] === compIdx);
  552. const compPersonaNodes = personaNodes.filter(n => nodeToComponent[n.id] === compIdx);
  553. if (compPostNodes.length === 0 || compPersonaNodes.length === 0) {{
  554. // 没有匹配关系的分量,均匀分布
  555. const spacing = bounds.width / (comp.length + 1);
  556. comp.forEach((nodeId, i) => {{
  557. const node = nodes.find(n => n.id === nodeId);
  558. if (node) {{
  559. node.x = bounds.start + spacing * (i + 1);
  560. nodeTargetX[nodeId] = node.x;
  561. if (node.source === "人设") personaXMap[nodeId] = node.x;
  562. }}
  563. }});
  564. return;
  565. }}
  566. // 初始化:给人设节点一个初始顺序
  567. let personaOrder = compPersonaNodes.map((n, i) => ({{ node: n, order: i }}));
  568. // 迭代优化(3轮)
  569. for (let iter = 0; iter < 3; iter++) {{
  570. // 1. 根据人设节点位置,计算帖子节点的重心
  571. const postBarycenter = {{}};
  572. compPostNodes.forEach(pn => {{
  573. const matched = postToPersona[pn.id] || [];
  574. if (matched.length > 0) {{
  575. const avgOrder = matched.reduce((sum, pid) => {{
  576. const po = personaOrder.find(p => p.node.id === pid);
  577. return sum + (po ? po.order : 0);
  578. }}, 0) / matched.length;
  579. postBarycenter[pn.id] = avgOrder;
  580. }} else {{
  581. postBarycenter[pn.id] = 0;
  582. }}
  583. }});
  584. // 按重心排序帖子节点
  585. const sortedPosts = [...compPostNodes].sort((a, b) =>
  586. postBarycenter[a.id] - postBarycenter[b.id]
  587. );
  588. // 2. 根据帖子节点位置,重新计算人设节点的重心
  589. const personaBarycenter = {{}};
  590. compPersonaNodes.forEach(pn => {{
  591. const matched = personaToPost[pn.id] || [];
  592. if (matched.length > 0) {{
  593. const avgOrder = matched.reduce((sum, pid) => {{
  594. const idx = sortedPosts.findIndex(p => p.id === pid);
  595. return sum + (idx >= 0 ? idx : 0);
  596. }}, 0) / matched.length;
  597. personaBarycenter[pn.id] = avgOrder;
  598. }} else {{
  599. personaBarycenter[pn.id] = personaOrder.find(p => p.node.id === pn.id)?.order || 0;
  600. }}
  601. }});
  602. // 更新人设节点顺序
  603. personaOrder = compPersonaNodes
  604. .map(n => ({{ node: n, order: personaBarycenter[n.id] }}))
  605. .sort((a, b) => a.order - b.order)
  606. .map((item, i) => ({{ node: item.node, order: i }}));
  607. }}
  608. // 最终排序
  609. const finalPersonaOrder = personaOrder.map(p => p.node);
  610. const postBarycenter = {{}};
  611. compPostNodes.forEach(pn => {{
  612. const matched = postToPersona[pn.id] || [];
  613. if (matched.length > 0) {{
  614. const avgOrder = matched.reduce((sum, pid) => {{
  615. const idx = finalPersonaOrder.findIndex(n => n.id === pid);
  616. return sum + (idx >= 0 ? idx : 0);
  617. }}, 0) / matched.length;
  618. postBarycenter[pn.id] = avgOrder;
  619. }} else {{
  620. postBarycenter[pn.id] = 0;
  621. }}
  622. }});
  623. const finalPostOrder = [...compPostNodes].sort((a, b) =>
  624. postBarycenter[a.id] - postBarycenter[b.id]
  625. );
  626. // 设置位置
  627. const personaSpacing = bounds.width / (finalPersonaOrder.length + 1);
  628. finalPersonaOrder.forEach((n, i) => {{
  629. n.x = bounds.start + personaSpacing * (i + 1);
  630. nodeTargetX[n.id] = n.x;
  631. personaXMap[n.id] = n.x;
  632. }});
  633. const postSpacing = bounds.width / (finalPostOrder.length + 1);
  634. finalPostOrder.forEach((n, i) => {{
  635. // 帖子节点用重心位置(匹配人设的平均X)
  636. const matched = postToPersona[n.id] || [];
  637. if (matched.length > 0) {{
  638. const avgX = matched.reduce((sum, pid) => sum + (personaXMap[pid] || bounds.start + bounds.width/2), 0) / matched.length;
  639. n.x = avgX;
  640. }} else {{
  641. n.x = bounds.start + postSpacing * (i + 1);
  642. }}
  643. nodeTargetX[n.id] = n.x;
  644. }});
  645. }});
  646. // 节点颜色
  647. const levelColors = {{
  648. "灵感点": "#f39c12",
  649. "目的点": "#3498db",
  650. "关键点": "#9b59b6"
  651. }};
  652. // 两层Y坐标(带倾斜:右边高,左边低)
  653. const postBaseY = height * 0.25; // 帖子节点基准Y
  654. const personaBaseY = height * 0.7; // 人设节点基准Y
  655. const tiltAmount = height * 0.25; // 倾斜幅度(约14度)
  656. // 根据X位置计算Y(右边高,左边低)
  657. function getTiltedY(baseY, x) {{
  658. const tilt = tiltAmount * (0.5 - x / width);
  659. return baseY + tilt;
  660. }}
  661. // 力导向模拟
  662. simulation = d3.forceSimulation(nodes)
  663. .force("link", d3.forceLink(links).id(d => d.id).distance(120).strength(0.1))
  664. .force("charge", d3.forceManyBody().strength(-400)) // 更强的互斥
  665. // X方向:拉向目标位置,但允许被推开
  666. .force("x", d3.forceX(d => nodeTargetX[d.id] || width / 2).strength(0.15))
  667. // Y方向力:带倾斜
  668. .force("y", d3.forceY(d => {{
  669. const baseY = d.source === "帖子" ? postBaseY : personaBaseY;
  670. return getTiltedY(baseY, d.x || width / 2);
  671. }}).strength(0.4))
  672. .force("collision", d3.forceCollide().radius(50)); // 更大的碰撞半径
  673. // 边类型到CSS类的映射
  674. const edgeTypeClass = {{
  675. "匹配": "match",
  676. "分类共现(跨点)": "category-cross",
  677. "分类共现(点内)": "category-intra",
  678. "标签共现": "tag-cooccur",
  679. "属于": "belong",
  680. "包含": "contain",
  681. // 镜像边(帖子节点之间,虚线)
  682. "镜像_分类共现(跨点)": "mirror-category-cross",
  683. "镜像_分类共现(点内)": "mirror-category-intra",
  684. "镜像_标签共现": "mirror-tag-cooccur",
  685. "镜像_属于": "mirror-belong",
  686. "镜像_包含": "mirror-contain"
  687. }};
  688. // 创建边的容器
  689. const linkGroup = g.append("g").attr("class", "links");
  690. // 为每条边创建组
  691. const linkG = linkGroup.selectAll("g")
  692. .data(links)
  693. .join("g")
  694. .attr("class", "link-group");
  695. // 绘制点击热区(透明宽线)
  696. const linkHitarea = linkG.append("line")
  697. .attr("class", "link-hitarea");
  698. // 绘制可见的边
  699. const link = linkG.append("line")
  700. .attr("class", d => "link " + (edgeTypeClass[d.type] || "match"))
  701. .attr("stroke-width", d => d.type === "匹配" ? 2.5 : 1.5);
  702. // 为匹配边添加分数标签
  703. const edgeLabels = linkG.filter(d => d.type === "匹配" && d.边详情 && d.边详情.相似度)
  704. .append("g")
  705. .attr("class", "edge-label-group");
  706. edgeLabels.append("rect")
  707. .attr("class", "edge-label-bg")
  708. .attr("rx", 3)
  709. .attr("ry", 3);
  710. edgeLabels.append("text")
  711. .attr("class", "edge-label")
  712. .text(d => {{
  713. const score = d.边详情.相似度;
  714. return typeof score === "number" ? score.toFixed(2) : score;
  715. }});
  716. // 边的点击事件
  717. linkHitarea.on("click", (event, d) => {{
  718. event.stopPropagation();
  719. showEdgeInfo(d);
  720. }})
  721. .on("mouseover", function(event, d) {{
  722. d3.select(this.parentNode).select(".link")
  723. .attr("stroke-opacity", 1)
  724. .attr("stroke-width", 4);
  725. }})
  726. .on("mouseout", function(event, d) {{
  727. d3.select(this.parentNode).select(".link")
  728. .attr("stroke-opacity", 0.7)
  729. .attr("stroke-width", d.type === "匹配" ? 2.5 : 1.5);
  730. }});
  731. // 绘制节点
  732. const node = g.append("g")
  733. .selectAll("g")
  734. .data(nodes)
  735. .join("g")
  736. .attr("class", "node")
  737. .call(d3.drag()
  738. .on("start", dragstarted)
  739. .on("drag", dragged)
  740. .on("end", dragended));
  741. // 根据节点类型绘制不同形状:标签用圆形,分类用方形
  742. // 扩展节点用较低透明度表示
  743. node.each(function(d) {{
  744. const el = d3.select(this);
  745. const isExpanded = d.是否扩展 === true;
  746. const size = d.source === "帖子" ? 12 : (isExpanded ? 8 : 10);
  747. const fill = levelColors[d.level] || "#666";
  748. const nodeClass = d.source === "帖子" ? "post-node" : "persona-node";
  749. const opacity = isExpanded ? 0.5 : 1;
  750. if (d.节点类型 === "分类") {{
  751. // 方形
  752. el.append("rect")
  753. .attr("width", size * 2)
  754. .attr("height", size * 2)
  755. .attr("x", -size)
  756. .attr("y", -size)
  757. .attr("fill", fill)
  758. .attr("class", nodeClass)
  759. .attr("rx", 3)
  760. .attr("opacity", opacity);
  761. }} else {{
  762. // 圆形(标签)
  763. el.append("circle")
  764. .attr("r", size)
  765. .attr("fill", fill)
  766. .attr("class", nodeClass)
  767. .attr("opacity", opacity);
  768. }}
  769. }});
  770. const labels = node.append("text")
  771. .attr("dx", 15)
  772. .attr("dy", 4)
  773. .text(d => d.节点名称)
  774. .style("display", showLabels ? "block" : "none");
  775. // 工具提示
  776. const tooltip = d3.select("#tooltip");
  777. node.on("mouseover", (event, d) => {{
  778. tooltip.style("display", "block")
  779. .html(`<strong>${{d.节点名称}}</strong><br/>类型: ${{d.节点类型}}<br/>层级: ${{d.节点层级}}`);
  780. }})
  781. .on("mousemove", (event) => {{
  782. tooltip.style("left", (event.pageX + 15) + "px")
  783. .style("top", (event.pageY - 10) + "px");
  784. }})
  785. .on("mouseout", () => {{
  786. tooltip.style("display", "none");
  787. }})
  788. .on("click", (event, d) => {{
  789. showNodeInfo(d);
  790. }});
  791. // 更新位置
  792. simulation.on("tick", () => {{
  793. // 更新热区线
  794. linkHitarea
  795. .attr("x1", d => d.source.x)
  796. .attr("y1", d => d.source.y)
  797. .attr("x2", d => d.target.x)
  798. .attr("y2", d => d.target.y);
  799. // 更新可见边
  800. link
  801. .attr("x1", d => d.source.x)
  802. .attr("y1", d => d.source.y)
  803. .attr("x2", d => d.target.x)
  804. .attr("y2", d => d.target.y);
  805. // 更新边标签位置(放在边的中点)
  806. edgeLabels.attr("transform", d => {{
  807. const midX = (d.source.x + d.target.x) / 2;
  808. const midY = (d.source.y + d.target.y) / 2;
  809. return `translate(${{midX}},${{midY}})`;
  810. }});
  811. // 更新标签背景大小
  812. edgeLabels.each(function(d) {{
  813. const textEl = d3.select(this).select("text").node();
  814. if (textEl) {{
  815. const bbox = textEl.getBBox();
  816. d3.select(this).select("rect")
  817. .attr("x", bbox.x - 3)
  818. .attr("y", bbox.y - 1)
  819. .attr("width", bbox.width + 6)
  820. .attr("height", bbox.height + 2);
  821. }}
  822. }});
  823. node.attr("transform", d => `translate(${{d.x}},${{d.y}})`);
  824. }});
  825. // 拖拽函数
  826. function dragstarted(event, d) {{
  827. if (!event.active) simulation.alphaTarget(0.3).restart();
  828. d.fx = d.x;
  829. d.fy = d.y;
  830. }}
  831. function dragged(event, d) {{
  832. d.fx = event.x;
  833. d.fy = event.y;
  834. }}
  835. function dragended(event, d) {{
  836. if (!event.active) simulation.alphaTarget(0);
  837. d.fx = null;
  838. d.fy = null;
  839. }}
  840. }}
  841. // 控制函数
  842. function resetZoom() {{
  843. const container = document.getElementById("graph");
  844. const width = container.clientWidth;
  845. const height = container.clientHeight;
  846. svg.transition().duration(750).call(
  847. zoom.transform,
  848. d3.zoomIdentity.translate(width/2, height/2).scale(1).translate(-width/2, -height/2)
  849. );
  850. }}
  851. function toggleLabels() {{
  852. showLabels = !showLabels;
  853. g.selectAll(".node text").style("display", showLabels ? "block" : "none");
  854. }}
  855. function showNodeInfo(d) {{
  856. const panel = document.getElementById("detailPanel");
  857. panel.classList.add("active");
  858. document.getElementById("detailTitle").textContent = d.source === "帖子" ? "📌 帖子节点" : "👤 人设节点";
  859. let html = `
  860. <p><span class="label">节点ID:</span> ${{d.节点ID}}</p>
  861. <p><span class="label">名称:</span> <strong>${{d.节点名称}}</strong></p>
  862. <p><span class="label">类型:</span> ${{d.节点类型}}</p>
  863. <p><span class="label">层级:</span> ${{d.节点层级}}</p>
  864. `;
  865. if (d.权重) {{
  866. html += `<p><span class="label">权重:</span> ${{d.权重}}</p>`;
  867. }}
  868. if (d.所属分类 && d.所属分类.length > 0) {{
  869. html += `<p><span class="label">所属分类:</span> ${{d.所属分类.join(" > ")}}</p>`;
  870. }}
  871. if (d.帖子数) {{
  872. html += `<p><span class="label">帖子数:</span> ${{d.帖子数}}</p>`;
  873. }}
  874. document.getElementById("detailContent").innerHTML = html;
  875. }}
  876. function showEdgeInfo(d) {{
  877. const panel = document.getElementById("detailPanel");
  878. panel.classList.add("active");
  879. const sourceNode = typeof d.source === "object" ? d.source : {{ id: d.source }};
  880. const targetNode = typeof d.target === "object" ? d.target : {{ id: d.target }};
  881. // 判断是否为镜像边
  882. const isMirror = d.type.startsWith("镜像_");
  883. document.getElementById("detailTitle").textContent = isMirror ? "🪞 镜像边详情" : "🔗 边详情";
  884. let html = `
  885. <p><span class="label">边类型:</span> <strong>${{d.type}}</strong></p>
  886. <p><span class="label">源节点:</span> ${{sourceNode.节点名称 || sourceNode.id}}</p>
  887. <p><span class="label">目标节点:</span> ${{targetNode.节点名称 || targetNode.id}}</p>
  888. `;
  889. if (d.边详情) {{
  890. if (d.边详情.相似度 !== undefined) {{
  891. const score = typeof d.边详情.相似度 === "number" ? d.边详情.相似度.toFixed(2) : d.边详情.相似度;
  892. html += `<p><span class="label">相似度:</span> <span class="similarity-score">${{score}}</span></p>`;
  893. }}
  894. if (d.边详情.说明) {{
  895. html += `<p><span class="label">说明:</span></p><div class="edge-description">${{d.边详情.说明}}</div>`;
  896. }}
  897. if (d.边详情.共现次数 !== undefined) {{
  898. html += `<p><span class="label">共现次数:</span> ${{d.边详情.共现次数}}</p>`;
  899. }}
  900. // 镜像边特有信息
  901. if (d.边详情.原始边类型) {{
  902. html += `<p><span class="label">原始边类型:</span> ${{d.边详情.原始边类型}}</p>`;
  903. }}
  904. if (d.边详情.源人设节点) {{
  905. html += `<p><span class="label">源人设节点:</span> ${{d.边详情.源人设节点}}</p>`;
  906. }}
  907. if (d.边详情.目标人设节点) {{
  908. html += `<p><span class="label">目标人设节点:</span> ${{d.边详情.目标人设节点}}</p>`;
  909. }}
  910. }}
  911. document.getElementById("detailContent").innerHTML = html;
  912. }}
  913. function closeDetailPanel() {{
  914. document.getElementById("detailPanel").classList.remove("active");
  915. }}
  916. // 页面加载完成后初始化
  917. window.addEventListener("load", init);
  918. window.addEventListener("resize", () => {{
  919. if (currentIndex >= 0) {{
  920. renderGraph(allGraphData[currentIndex]);
  921. }}
  922. }});
  923. </script>
  924. </body>
  925. </html>
  926. '''
  927. def generate_combined_html(all_graph_data: List[Dict], output_file: Path):
  928. """
  929. 生成包含所有帖子图谱的HTML文件
  930. Args:
  931. all_graph_data: 所有帖子的图谱数据列表
  932. output_file: 输出文件路径
  933. """
  934. # 生成Tab HTML
  935. tabs_html = ""
  936. for i, data in enumerate(all_graph_data):
  937. post_title = data.get("postTitle", "")
  938. # 使用帖子标题,如果太长则截断
  939. if post_title:
  940. tab_name = post_title[:15] + "..." if len(post_title) > 15 else post_title
  941. else:
  942. tab_name = f"帖子 {i+1}"
  943. active_class = "active" if i == 0 else ""
  944. tabs_html += f'<div class="tab {active_class}" data-index="{i}">{tab_name}</div>\n'
  945. # 生成HTML
  946. html_content = HTML_TEMPLATE.format(
  947. tabs_html=tabs_html,
  948. all_graph_data=json.dumps(all_graph_data, ensure_ascii=False)
  949. )
  950. with open(output_file, "w", encoding="utf-8") as f:
  951. f.write(html_content)
  952. def main():
  953. # 使用路径配置
  954. config = PathConfig()
  955. print(f"账号: {config.account_name}")
  956. print(f"输出版本: {config.output_version}")
  957. print()
  958. # 输入目录
  959. match_graph_dir = config.intermediate_dir / "match_graph"
  960. # 输出文件
  961. output_file = config.intermediate_dir / "match_graph.html"
  962. print(f"输入目录: {match_graph_dir}")
  963. print(f"输出文件: {output_file}")
  964. print()
  965. # 读取所有匹配图谱文件
  966. graph_files = sorted(match_graph_dir.glob("*_match_graph.json"))
  967. print(f"找到 {len(graph_files)} 个匹配图谱文件")
  968. all_graph_data = []
  969. for i, graph_file in enumerate(graph_files, 1):
  970. print(f" [{i}/{len(graph_files)}] 读取: {graph_file.name}")
  971. with open(graph_file, "r", encoding="utf-8") as f:
  972. match_graph_data = json.load(f)
  973. # 提取需要的数据
  974. graph_data = {
  975. "postId": match_graph_data["说明"]["帖子ID"],
  976. "postTitle": match_graph_data["说明"].get("帖子标题", ""),
  977. "stats": match_graph_data["说明"]["统计"],
  978. "nodes": match_graph_data["节点列表"],
  979. "edges": match_graph_data["边列表"]
  980. }
  981. all_graph_data.append(graph_data)
  982. # 生成HTML
  983. print("\n生成HTML文件...")
  984. generate_combined_html(all_graph_data, output_file)
  985. print("\n" + "="*60)
  986. print("处理完成!")
  987. print(f"输出文件: {output_file}")
  988. if __name__ == "__main__":
  989. main()