tree_html.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. from __future__ import annotations
  2. import html as html_lib
  3. import json
  4. import math
  5. from dataclasses import dataclass, field
  6. from pathlib import Path
  7. from sqlalchemy import text
  8. from examples.demand.db_manager import DatabaseManager
  9. db = DatabaseManager()
  10. SOURCE_TYPES = ["实质", "形式", "意图"]
  11. # JSON field → 展示维度名(扩展新维度只需在此添加)
  12. SCORE_DIMS: dict[str, str] = {"score": "rov"}
  13. THEME = {
  14. "实质": {
  15. "root_bg": "#e8841a",
  16. "line": "#d4a574",
  17. "low": (253, 243, 228),
  18. "high": (195, 80, 5),
  19. "no_score": "#f5e6d3",
  20. },
  21. "形式": {
  22. "root_bg": "#5b9bd5",
  23. "line": "#8cb9dc",
  24. "low": (232, 244, 253),
  25. "high": (30, 90, 165),
  26. "no_score": "#d6e9f8",
  27. },
  28. "意图": {
  29. "root_bg": "#70ad47",
  30. "line": "#8fc270",
  31. "low": (235, 249, 225),
  32. "high": (40, 120, 20),
  33. "no_score": "#dbefd0",
  34. },
  35. }
  36. # ---------------------------------------------------------------------------
  37. # Color helpers (Python 侧用于首次渲染;JS 侧有等价实现用于切换)
  38. # ---------------------------------------------------------------------------
  39. def _luminance(r: int, g: int, b: int) -> float:
  40. return 0.299 * r / 255 + 0.587 * g / 255 + 0.114 * b / 255
  41. def _blend(low: tuple, high: tuple, t: float) -> tuple[int, int, int]:
  42. t = max(0.0, min(1.0, t))
  43. return tuple(int(lo + (hi - lo) * t) for lo, hi in zip(low, high))
  44. def _rgb_hex(rgb: tuple) -> str:
  45. return f"#{rgb[0]:02x}{rgb[1]:02x}{rgb[2]:02x}"
  46. def _normalize_score(score: float, max_score: float) -> float:
  47. if max_score <= 0 or score <= 0:
  48. return 0.0
  49. return math.log1p(score) / math.log1p(max_score)
  50. # ---------------------------------------------------------------------------
  51. # Data model
  52. # ---------------------------------------------------------------------------
  53. @dataclass
  54. class CatNode:
  55. id: int
  56. name: str
  57. source_stable_id: int | None
  58. source_type: str
  59. description: str | None
  60. level: int | None
  61. parent_id: int | None
  62. element_count: int
  63. children: list[CatNode] = field(default_factory=list)
  64. @property
  65. def subtree_element_count(self) -> int:
  66. total = self.element_count or 0
  67. for c in self.children:
  68. total += c.subtree_element_count
  69. return total
  70. # ---------------------------------------------------------------------------
  71. # Data loading
  72. # ---------------------------------------------------------------------------
  73. def fetch_categories(eid: int) -> list[dict]:
  74. session = db.get_session()
  75. try:
  76. rows = session.execute(
  77. text(
  78. "SELECT id, source_stable_id, source_type, name, description, "
  79. "level, parent_id, element_count "
  80. "FROM topic_pattern_category WHERE execution_id = :eid "
  81. "ORDER BY source_type, level, id"
  82. ),
  83. {"eid": eid},
  84. ).mappings().fetchall()
  85. return [dict(r) for r in rows]
  86. finally:
  87. session.close()
  88. def load_scores(eid: int) -> tuple[dict[str, dict[str, dict]], list[str]]:
  89. """读取 data/{eid}/*_分类.json,返回 (scores_data, dim_names)。
  90. scores_data: {source_type: {category_path: {dim_name: float, post_ids_count: int}}}
  91. """
  92. data_dir = Path(__file__).resolve().parent / "data" / str(eid)
  93. result: dict[str, dict[str, dict]] = {}
  94. dims_found: set[str] = set()
  95. for st in SOURCE_TYPES:
  96. fpath = data_dir / f"{st}_分类.json"
  97. if not fpath.exists():
  98. continue
  99. with open(fpath, encoding="utf-8") as f:
  100. items = json.load(f)
  101. lookup: dict[str, dict] = {}
  102. for item in items:
  103. cp = item.get("category_path", "")
  104. if not cp:
  105. continue
  106. entry: dict = {"post_ids_count": item.get("post_ids_count", 0)}
  107. for json_key, dim_name in SCORE_DIMS.items():
  108. if json_key in item:
  109. entry[dim_name] = item[json_key]
  110. dims_found.add(dim_name)
  111. lookup[cp] = entry
  112. result[st] = lookup
  113. return result, sorted(dims_found)
  114. def build_trees(categories: list[dict]) -> dict[str, list[CatNode]]:
  115. nodes: dict[int, CatNode] = {}
  116. for c in categories:
  117. n = CatNode(
  118. id=c["id"],
  119. name=c["name"],
  120. source_stable_id=c.get("source_stable_id"),
  121. source_type=c["source_type"],
  122. description=c.get("description"),
  123. level=c.get("level"),
  124. parent_id=c.get("parent_id"),
  125. element_count=c.get("element_count") or 0,
  126. )
  127. nodes[n.id] = n
  128. roots: dict[str, list[CatNode]] = {}
  129. for n in nodes.values():
  130. if n.parent_id and n.parent_id in nodes:
  131. nodes[n.parent_id].children.append(n)
  132. else:
  133. roots.setdefault(n.source_type, []).append(n)
  134. for n in nodes.values():
  135. n.children.sort(key=lambda x: x.id)
  136. return roots
  137. # ---------------------------------------------------------------------------
  138. # HTML rendering
  139. # ---------------------------------------------------------------------------
  140. def _node_html(
  141. n: CatNode,
  142. th: dict,
  143. depth: int = 0,
  144. parent_path: str = "",
  145. scores: dict[str, dict] | None = None,
  146. max_score: float = 1.0,
  147. default_dim: str = "rov",
  148. ) -> str:
  149. has_ch = bool(n.children)
  150. ec = n.element_count or 0
  151. cc = len(n.children)
  152. cur_path = n.name if not parent_path else f"{parent_path}>{n.name}"
  153. info = scores.get(cur_path) if scores else None
  154. score_val = info.get(default_dim) if info else None
  155. pcount = info.get("post_ids_count") if info else None
  156. if score_val is not None:
  157. t = _normalize_score(score_val, max_score) if score_val > 0 else 0.0
  158. rgb = _blend(th["low"], th["high"], t)
  159. bg = _rgb_hex(rgb)
  160. lum = _luminance(*rgb)
  161. tc = "#fff" if lum < 0.55 else "#4a3520"
  162. else:
  163. bg = th["no_score"]
  164. tc = "#6b5240"
  165. parts: list[str] = []
  166. if has_ch:
  167. parts.append('<span class="ti">\u25BC</span>')
  168. parts.append(f'<span class="nn">{html_lib.escape(n.name)}</span>')
  169. if n.source_stable_id is not None:
  170. parts.append(f'<span class="si">{n.source_stable_id}</span>')
  171. # .sc 始终存在(JS 切换维度时需要更新),无分数时隐藏
  172. if score_val is not None:
  173. parts.append(f'<span class="sc">{score_val:.2f}</span>')
  174. else:
  175. parts.append('<span class="sc" style="display:none"></span>')
  176. if ec:
  177. parts.append(f'<span class="ec">{ec}</span>')
  178. if pcount is not None:
  179. parts.append(f'<span class="pc">{pcount}p</span>')
  180. if cc:
  181. parts.append(f'<span class="cc">{cc}\u25B6</span>')
  182. onclick = ' onclick="tog(this)"' if has_ch else ""
  183. cls = "b e" if has_ch else "b"
  184. title_attr = f' title="{html_lib.escape(n.description)}"' if n.description else ""
  185. esc = html_lib.escape
  186. data_attr = f' data-path="{esc(cur_path)}" data-st="{esc(n.source_type)}"'
  187. h = f'<div class="t" data-depth="{depth}">'
  188. h += f'<div class="{cls}"{data_attr} style="background:{bg};color:{tc}"{onclick}{title_attr}>'
  189. h += "".join(parts)
  190. h += "</div>"
  191. if has_ch:
  192. h += f'<div class="ch" style="--lc:{th["line"]}">'
  193. for child in n.children:
  194. h += _node_html(child, th, depth + 1, cur_path, scores, max_score, default_dim)
  195. h += "</div>"
  196. h += "</div>"
  197. return h
  198. def _section_html(
  199. source_type: str,
  200. roots: list[CatNode],
  201. scores: dict[str, dict] | None,
  202. max_score: float,
  203. default_dim: str = "rov",
  204. ) -> str:
  205. th = THEME.get(source_type, THEME["实质"])
  206. total = sum(r.subtree_element_count for r in roots)
  207. h = '<div class="sec">'
  208. h += (
  209. f'<div class="sh" style="background:{th["root_bg"]}">'
  210. f"\u25BC {html_lib.escape(source_type)} ({total})</div>"
  211. )
  212. h += '<div class="sb">'
  213. for r in roots:
  214. h += _node_html(r, th, depth=0, parent_path="", scores=scores,
  215. max_score=max_score, default_dim=default_dim)
  216. h += "</div></div>"
  217. return h
  218. def generate_tree_html(eid: int) -> str:
  219. categories = fetch_categories(eid)
  220. trees = build_trees(categories)
  221. all_scores, dims = load_scores(eid)
  222. # 每个维度、每个 source_type 的最大分数(用于归一化)
  223. max_scores: dict[str, dict[str, float]] = {}
  224. for dim in dims:
  225. max_scores[dim] = {}
  226. for st in SOURCE_TYPES:
  227. st_scores = all_scores.get(st, {})
  228. mx = max((v.get(dim, 0) for v in st_scores.values()), default=0.0)
  229. max_scores[dim][st] = mx
  230. default_dim = dims[0] if dims else "rov"
  231. sections: list[str] = []
  232. for st in SOURCE_TYPES:
  233. if st not in trees:
  234. continue
  235. st_scores = all_scores.get(st, {})
  236. mx = max_scores.get(default_dim, {}).get(st, 1.0) or 1.0
  237. sections.append(_section_html(st, trees[st], st_scores, mx, default_dim))
  238. body = "\n".join(sections)
  239. # 维度按钮
  240. dim_btns = ""
  241. for i, d in enumerate(dims):
  242. active = " active" if i == 0 else ""
  243. dim_btns += (
  244. f'<button class="dim-btn{active}" data-dim="{html_lib.escape(d)}" '
  245. f"onclick=\"switchDim('{d}')\">{html_lib.escape(d)}</button>"
  246. )
  247. if not dim_btns:
  248. dim_btns = '<span style="color:#999;font-size:12px">无得分数据</span>'
  249. def _safe_json(obj: object) -> str:
  250. return json.dumps(obj, ensure_ascii=False).replace("</", "<\\/")
  251. return (
  252. _PAGE_HTML
  253. .replace("{{EID}}", str(eid))
  254. .replace("{{BODY}}", body)
  255. .replace("{{DIM_BTNS}}", dim_btns)
  256. .replace("{{SCORES}}", _safe_json(all_scores))
  257. .replace("{{DIMS}}", _safe_json(dims))
  258. .replace("{{MAX}}", _safe_json(max_scores))
  259. )
  260. # ---------------------------------------------------------------------------
  261. # Full-page HTML template
  262. # ---------------------------------------------------------------------------
  263. _PAGE_HTML = r"""<!DOCTYPE html>
  264. <html lang="zh-CN">
  265. <head>
  266. <meta charset="UTF-8">
  267. <meta name="viewport" content="width=device-width,initial-scale=1">
  268. <title>分类树 · execution_id={{EID}}</title>
  269. <style>
  270. *{box-sizing:border-box;margin:0;padding:0}
  271. body{
  272. font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Noto Sans SC",sans-serif;
  273. background:#faf8f5;color:#333;padding:24px;
  274. }
  275. /* ===== header ===== */
  276. .hdr{
  277. display:flex;align-items:center;gap:10px;
  278. margin-bottom:28px;flex-wrap:wrap;
  279. }
  280. .hdr h1{font-size:20px;font-weight:700;color:#4a3520}
  281. .hdr .eid{color:#999;font-size:13px}
  282. .hdr button{
  283. padding:5px 12px;border:1px solid #d0c8c0;border-radius:6px;
  284. background:#fff;cursor:pointer;font-size:12px;color:#4a3520;
  285. }
  286. .hdr button:hover{background:#f5ebe0}
  287. /* dimension selector — 右侧 */
  288. .dim-sel{
  289. margin-left:auto;
  290. display:flex;align-items:center;gap:6px;
  291. background:#fff;border:1px solid #d0c8c0;border-radius:8px;
  292. padding:4px 10px;
  293. }
  294. .dim-label{font-size:12px;color:#888;white-space:nowrap}
  295. .dim-btn{
  296. padding:4px 14px;border:1px solid #d0c8c0;border-radius:5px;
  297. background:#fff;cursor:pointer;font-size:12px;color:#4a3520;
  298. transition:all .15s;
  299. }
  300. .dim-btn:hover{background:#f5ebe0}
  301. .dim-btn.active{
  302. background:#4a3520;color:#fff;border-color:#4a3520;
  303. }
  304. /* ===== section ===== */
  305. .sec{margin-bottom:36px}
  306. .sh{
  307. display:inline-block;padding:8px 18px;border-radius:8px;color:#fff;
  308. font-size:15px;font-weight:700;margin-bottom:14px;
  309. }
  310. .sb{overflow-x:auto;padding:12px 0 12px 4px}
  311. /* ===== horizontal tree ===== */
  312. .t{display:inline-flex;align-items:center}
  313. .b{
  314. display:inline-flex;align-items:center;gap:5px;
  315. padding:4px 10px;border-radius:5px;white-space:nowrap;font-size:13px;
  316. box-shadow:0 1px 2px rgba(0,0,0,.08);user-select:none;
  317. border:1px solid rgba(0,0,0,.06);
  318. transition:background .2s,color .2s;
  319. }
  320. .b.e{cursor:pointer}
  321. .b.e:hover{filter:brightness(1.06);box-shadow:0 2px 6px rgba(0,0,0,.14)}
  322. .ti{font-size:10px;transition:transform .15s ease;display:inline-block}
  323. .nn{font-weight:600}
  324. .si{font-size:11px;opacity:.5}
  325. .sc{
  326. font-size:11px;font-weight:700;
  327. background:rgba(255,255,255,.5);padding:0 5px;border-radius:3px;
  328. line-height:1.6;letter-spacing:.02em;
  329. }
  330. .ec,.cc,.pc{
  331. font-size:11px;background:rgba(255,255,255,.35);
  332. padding:0 4px;border-radius:3px;line-height:1.6;
  333. }
  334. .ec{font-weight:600}
  335. .pc{font-style:italic;opacity:.7}
  336. /* children container & connector lines */
  337. .ch{
  338. display:flex;flex-direction:column;position:relative;
  339. padding-left:28px;margin-left:10px;
  340. }
  341. .ch::before{
  342. content:'';position:absolute;left:0;top:50%;width:14px;
  343. border-top:1.5px solid var(--lc);
  344. }
  345. .ch>.t{position:relative;padding:3px 0}
  346. .ch>.t::before{
  347. content:'';position:absolute;left:-14px;top:0;bottom:0;
  348. border-left:1.5px solid var(--lc);
  349. }
  350. .ch>.t:first-child::before{top:50%}
  351. .ch>.t:last-child::before{bottom:50%}
  352. .ch>.t:only-child::before{display:none}
  353. .ch>.t::after{
  354. content:'';position:absolute;left:-14px;top:50%;width:14px;
  355. border-top:1.5px solid var(--lc);
  356. }
  357. .ch>.t:only-child::after{left:-28px;width:28px}
  358. .t.collapsed>.ch{display:none}
  359. .t.collapsed>.b>.ti{transform:rotate(-90deg)}
  360. /* legend */
  361. .legend{
  362. display:inline-flex;align-items:center;gap:6px;
  363. font-size:12px;color:#888;
  364. }
  365. .legend-bar{
  366. width:100px;height:12px;border-radius:3px;border:1px solid rgba(0,0,0,.1);
  367. }
  368. </style>
  369. </head>
  370. <body>
  371. <div class="hdr">
  372. <h1>选题模式分类树</h1>
  373. <span class="eid">execution_id = {{EID}}</span>
  374. <button onclick="ea()">全部展开</button>
  375. <button onclick="ca()">全部收起</button>
  376. <button onclick="lv(1)">展开1层</button>
  377. <button onclick="lv(2)">展开2层</button>
  378. <button onclick="lv(3)">展开3层</button>
  379. <button onclick="lv(4)">展开4层</button>
  380. <span class="legend">
  381. <span>低分</span>
  382. <span class="legend-bar" style="background:linear-gradient(to right,#fdf3e4,#c35005)"></span>
  383. <span>高分</span>
  384. </span>
  385. <div class="dim-sel">
  386. <span class="dim-label">得分维度</span>
  387. {{DIM_BTNS}}
  388. </div>
  389. </div>
  390. {{BODY}}
  391. <script>
  392. /* ===== 嵌入的分数数据 ===== */
  393. var S={{SCORES}};
  394. var DIMS={{DIMS}};
  395. var MX={{MAX}};
  396. var TH={
  397. "\u5b9e\u8d28":{low:[253,243,228],high:[195,80,5],ns:"#f5e6d3"},
  398. "\u5f62\u5f0f":{low:[232,244,253],high:[30,90,165],ns:"#d6e9f8"},
  399. "\u610f\u56fe":{low:[235,249,225],high:[40,120,20],ns:"#dbefd0"}
  400. };
  401. var curDim=DIMS[0]||'rov';
  402. /* ===== 颜色计算(与 Python 侧等价) ===== */
  403. function norm(s,m){return(m<=0||s<=0)?0:Math.log1p(s)/Math.log1p(m)}
  404. function blend(lo,hi,t){
  405. t=Math.max(0,Math.min(1,t));
  406. return lo.map(function(l,i){return Math.round(l+(hi[i]-l)*t)});
  407. }
  408. function lum(r,g,b){return .299*r/255+.587*g/255+.114*b/255}
  409. function rgbHex(c){return'#'+c.map(function(v){return('0'+v.toString(16)).slice(-2)}).join('')}
  410. /* ===== 切换得分维度 ===== */
  411. function switchDim(dim){
  412. curDim=dim;
  413. document.querySelectorAll('.dim-btn').forEach(function(b){
  414. b.classList.toggle('active',b.dataset.dim===dim);
  415. });
  416. document.querySelectorAll('.b[data-path]').forEach(function(b){
  417. var st=b.dataset.st, path=b.dataset.path;
  418. var info=(S[st]||{})[path];
  419. var sc=(info!=null)?info[dim]:null;
  420. var scEl=b.querySelector('.sc');
  421. var th=TH[st];
  422. if(!th)return;
  423. if(sc!=null){
  424. var mx=((MX[dim]||{})[st])||1;
  425. var t=(sc>0)?norm(sc,mx):0;
  426. var rgb=blend(th.low,th.high,t);
  427. b.style.background=rgbHex(rgb);
  428. var l=lum(rgb[0],rgb[1],rgb[2]);
  429. b.style.color=l<.55?'#fff':'#4a3520';
  430. if(scEl){scEl.textContent=sc.toFixed(2);scEl.style.display=''}
  431. }else{
  432. b.style.background=th.ns;
  433. b.style.color='#6b5240';
  434. if(scEl)scEl.style.display='none';
  435. }
  436. });
  437. }
  438. /* ===== 树操作 ===== */
  439. function tog(el){el.closest('.t').classList.toggle('collapsed')}
  440. function ea(){document.querySelectorAll('.t.collapsed').forEach(function(e){e.classList.remove('collapsed')})}
  441. function ca(){document.querySelectorAll('.t').forEach(function(e){if(e.querySelector(':scope>.ch'))e.classList.add('collapsed')})}
  442. function lv(n){document.querySelectorAll('.t').forEach(function(e){
  443. var ch=e.querySelector(':scope>.ch');if(!ch)return;
  444. var d=+(e.dataset.depth||0);
  445. if(d<n)e.classList.remove('collapsed');else e.classList.add('collapsed');
  446. })}
  447. </script>
  448. </body>
  449. </html>
  450. """
  451. # ---------------------------------------------------------------------------
  452. # Entry
  453. # ---------------------------------------------------------------------------
  454. if __name__ == "__main__":
  455. execution_id = 58
  456. html_content = generate_tree_html(execution_id)
  457. out = Path(__file__).resolve().parent / "new_result" / f"topic_pattern_tree_{execution_id}.html"
  458. out.parent.mkdir(parents=True, exist_ok=True)
  459. out.write_text(html_content, encoding="utf-8")
  460. print(f"已生成: {out}")