| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529 |
- from __future__ import annotations
- import html as html_lib
- import json
- import math
- from dataclasses import dataclass, field
- from pathlib import Path
- from sqlalchemy import text
- from examples.demand.db_manager import DatabaseManager
- db = DatabaseManager()
- SOURCE_TYPES = ["实质", "形式", "意图"]
- # JSON field → 展示维度名(扩展新维度只需在此添加)
- SCORE_DIMS: dict[str, str] = {"score": "rov"}
- THEME = {
- "实质": {
- "root_bg": "#e8841a",
- "line": "#d4a574",
- "low": (253, 243, 228),
- "high": (195, 80, 5),
- "no_score": "#f5e6d3",
- },
- "形式": {
- "root_bg": "#5b9bd5",
- "line": "#8cb9dc",
- "low": (232, 244, 253),
- "high": (30, 90, 165),
- "no_score": "#d6e9f8",
- },
- "意图": {
- "root_bg": "#70ad47",
- "line": "#8fc270",
- "low": (235, 249, 225),
- "high": (40, 120, 20),
- "no_score": "#dbefd0",
- },
- }
- # ---------------------------------------------------------------------------
- # Color helpers (Python 侧用于首次渲染;JS 侧有等价实现用于切换)
- # ---------------------------------------------------------------------------
- def _luminance(r: int, g: int, b: int) -> float:
- return 0.299 * r / 255 + 0.587 * g / 255 + 0.114 * b / 255
- def _blend(low: tuple, high: tuple, t: float) -> tuple[int, int, int]:
- t = max(0.0, min(1.0, t))
- return tuple(int(lo + (hi - lo) * t) for lo, hi in zip(low, high))
- def _rgb_hex(rgb: tuple) -> str:
- return f"#{rgb[0]:02x}{rgb[1]:02x}{rgb[2]:02x}"
- def _normalize_score(score: float, max_score: float) -> float:
- if max_score <= 0 or score <= 0:
- return 0.0
- return math.log1p(score) / math.log1p(max_score)
- # ---------------------------------------------------------------------------
- # Data model
- # ---------------------------------------------------------------------------
- @dataclass
- class CatNode:
- id: int
- name: str
- source_stable_id: int | None
- source_type: str
- description: str | None
- level: int | None
- parent_id: int | None
- element_count: int
- children: list[CatNode] = field(default_factory=list)
- @property
- def subtree_element_count(self) -> int:
- total = self.element_count or 0
- for c in self.children:
- total += c.subtree_element_count
- return total
- # ---------------------------------------------------------------------------
- # Data loading
- # ---------------------------------------------------------------------------
- def fetch_categories(eid: int) -> list[dict]:
- session = db.get_session()
- try:
- rows = session.execute(
- text(
- "SELECT id, source_stable_id, source_type, name, description, "
- "level, parent_id, element_count "
- "FROM topic_pattern_category WHERE execution_id = :eid "
- "ORDER BY source_type, level, id"
- ),
- {"eid": eid},
- ).mappings().fetchall()
- return [dict(r) for r in rows]
- finally:
- session.close()
- def load_scores(eid: int) -> tuple[dict[str, dict[str, dict]], list[str]]:
- """读取 data/{eid}/*_分类.json,返回 (scores_data, dim_names)。
- scores_data: {source_type: {category_path: {dim_name: float, post_ids_count: int}}}
- """
- data_dir = Path(__file__).resolve().parent / "data" / str(eid)
- result: dict[str, dict[str, dict]] = {}
- dims_found: set[str] = set()
- for st in SOURCE_TYPES:
- fpath = data_dir / f"{st}_分类.json"
- if not fpath.exists():
- continue
- with open(fpath, encoding="utf-8") as f:
- items = json.load(f)
- lookup: dict[str, dict] = {}
- for item in items:
- cp = item.get("category_path", "")
- if not cp:
- continue
- entry: dict = {"post_ids_count": item.get("post_ids_count", 0)}
- for json_key, dim_name in SCORE_DIMS.items():
- if json_key in item:
- entry[dim_name] = item[json_key]
- dims_found.add(dim_name)
- lookup[cp] = entry
- result[st] = lookup
- return result, sorted(dims_found)
- def build_trees(categories: list[dict]) -> dict[str, list[CatNode]]:
- nodes: dict[int, CatNode] = {}
- for c in categories:
- n = CatNode(
- id=c["id"],
- name=c["name"],
- source_stable_id=c.get("source_stable_id"),
- source_type=c["source_type"],
- description=c.get("description"),
- level=c.get("level"),
- parent_id=c.get("parent_id"),
- element_count=c.get("element_count") or 0,
- )
- nodes[n.id] = n
- roots: dict[str, list[CatNode]] = {}
- for n in nodes.values():
- if n.parent_id and n.parent_id in nodes:
- nodes[n.parent_id].children.append(n)
- else:
- roots.setdefault(n.source_type, []).append(n)
- for n in nodes.values():
- n.children.sort(key=lambda x: x.id)
- return roots
- # ---------------------------------------------------------------------------
- # HTML rendering
- # ---------------------------------------------------------------------------
- def _node_html(
- n: CatNode,
- th: dict,
- depth: int = 0,
- parent_path: str = "",
- scores: dict[str, dict] | None = None,
- max_score: float = 1.0,
- default_dim: str = "rov",
- ) -> str:
- has_ch = bool(n.children)
- ec = n.element_count or 0
- cc = len(n.children)
- cur_path = n.name if not parent_path else f"{parent_path}>{n.name}"
- info = scores.get(cur_path) if scores else None
- score_val = info.get(default_dim) if info else None
- pcount = info.get("post_ids_count") if info else None
- if score_val is not None:
- t = _normalize_score(score_val, max_score) if score_val > 0 else 0.0
- rgb = _blend(th["low"], th["high"], t)
- bg = _rgb_hex(rgb)
- lum = _luminance(*rgb)
- tc = "#fff" if lum < 0.55 else "#4a3520"
- else:
- bg = th["no_score"]
- tc = "#6b5240"
- parts: list[str] = []
- if has_ch:
- parts.append('<span class="ti">\u25BC</span>')
- parts.append(f'<span class="nn">{html_lib.escape(n.name)}</span>')
- if n.source_stable_id is not None:
- parts.append(f'<span class="si">{n.source_stable_id}</span>')
- # .sc 始终存在(JS 切换维度时需要更新),无分数时隐藏
- if score_val is not None:
- parts.append(f'<span class="sc">{score_val:.2f}</span>')
- else:
- parts.append('<span class="sc" style="display:none"></span>')
- if ec:
- parts.append(f'<span class="ec">{ec}</span>')
- if pcount is not None:
- parts.append(f'<span class="pc">{pcount}p</span>')
- if cc:
- parts.append(f'<span class="cc">{cc}\u25B6</span>')
- onclick = ' onclick="tog(this)"' if has_ch else ""
- cls = "b e" if has_ch else "b"
- title_attr = f' title="{html_lib.escape(n.description)}"' if n.description else ""
- esc = html_lib.escape
- data_attr = f' data-path="{esc(cur_path)}" data-st="{esc(n.source_type)}"'
- h = f'<div class="t" data-depth="{depth}">'
- h += f'<div class="{cls}"{data_attr} style="background:{bg};color:{tc}"{onclick}{title_attr}>'
- h += "".join(parts)
- h += "</div>"
- if has_ch:
- h += f'<div class="ch" style="--lc:{th["line"]}">'
- for child in n.children:
- h += _node_html(child, th, depth + 1, cur_path, scores, max_score, default_dim)
- h += "</div>"
- h += "</div>"
- return h
- def _section_html(
- source_type: str,
- roots: list[CatNode],
- scores: dict[str, dict] | None,
- max_score: float,
- default_dim: str = "rov",
- ) -> str:
- th = THEME.get(source_type, THEME["实质"])
- total = sum(r.subtree_element_count for r in roots)
- h = '<div class="sec">'
- h += (
- f'<div class="sh" style="background:{th["root_bg"]}">'
- f"\u25BC {html_lib.escape(source_type)} ({total})</div>"
- )
- h += '<div class="sb">'
- for r in roots:
- h += _node_html(r, th, depth=0, parent_path="", scores=scores,
- max_score=max_score, default_dim=default_dim)
- h += "</div></div>"
- return h
- def generate_tree_html(eid: int) -> str:
- categories = fetch_categories(eid)
- trees = build_trees(categories)
- all_scores, dims = load_scores(eid)
- # 每个维度、每个 source_type 的最大分数(用于归一化)
- max_scores: dict[str, dict[str, float]] = {}
- for dim in dims:
- max_scores[dim] = {}
- for st in SOURCE_TYPES:
- st_scores = all_scores.get(st, {})
- mx = max((v.get(dim, 0) for v in st_scores.values()), default=0.0)
- max_scores[dim][st] = mx
- default_dim = dims[0] if dims else "rov"
- sections: list[str] = []
- for st in SOURCE_TYPES:
- if st not in trees:
- continue
- st_scores = all_scores.get(st, {})
- mx = max_scores.get(default_dim, {}).get(st, 1.0) or 1.0
- sections.append(_section_html(st, trees[st], st_scores, mx, default_dim))
- body = "\n".join(sections)
- # 维度按钮
- dim_btns = ""
- for i, d in enumerate(dims):
- active = " active" if i == 0 else ""
- dim_btns += (
- f'<button class="dim-btn{active}" data-dim="{html_lib.escape(d)}" '
- f"onclick=\"switchDim('{d}')\">{html_lib.escape(d)}</button>"
- )
- if not dim_btns:
- dim_btns = '<span style="color:#999;font-size:12px">无得分数据</span>'
- def _safe_json(obj: object) -> str:
- return json.dumps(obj, ensure_ascii=False).replace("</", "<\\/")
- return (
- _PAGE_HTML
- .replace("{{EID}}", str(eid))
- .replace("{{BODY}}", body)
- .replace("{{DIM_BTNS}}", dim_btns)
- .replace("{{SCORES}}", _safe_json(all_scores))
- .replace("{{DIMS}}", _safe_json(dims))
- .replace("{{MAX}}", _safe_json(max_scores))
- )
- # ---------------------------------------------------------------------------
- # Full-page HTML template
- # ---------------------------------------------------------------------------
- _PAGE_HTML = r"""<!DOCTYPE html>
- <html lang="zh-CN">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width,initial-scale=1">
- <title>分类树 · execution_id={{EID}}</title>
- <style>
- *{box-sizing:border-box;margin:0;padding:0}
- body{
- font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Noto Sans SC",sans-serif;
- background:#faf8f5;color:#333;padding:24px;
- }
- /* ===== header ===== */
- .hdr{
- display:flex;align-items:center;gap:10px;
- margin-bottom:28px;flex-wrap:wrap;
- }
- .hdr h1{font-size:20px;font-weight:700;color:#4a3520}
- .hdr .eid{color:#999;font-size:13px}
- .hdr button{
- padding:5px 12px;border:1px solid #d0c8c0;border-radius:6px;
- background:#fff;cursor:pointer;font-size:12px;color:#4a3520;
- }
- .hdr button:hover{background:#f5ebe0}
- /* dimension selector — 右侧 */
- .dim-sel{
- margin-left:auto;
- display:flex;align-items:center;gap:6px;
- background:#fff;border:1px solid #d0c8c0;border-radius:8px;
- padding:4px 10px;
- }
- .dim-label{font-size:12px;color:#888;white-space:nowrap}
- .dim-btn{
- padding:4px 14px;border:1px solid #d0c8c0;border-radius:5px;
- background:#fff;cursor:pointer;font-size:12px;color:#4a3520;
- transition:all .15s;
- }
- .dim-btn:hover{background:#f5ebe0}
- .dim-btn.active{
- background:#4a3520;color:#fff;border-color:#4a3520;
- }
- /* ===== section ===== */
- .sec{margin-bottom:36px}
- .sh{
- display:inline-block;padding:8px 18px;border-radius:8px;color:#fff;
- font-size:15px;font-weight:700;margin-bottom:14px;
- }
- .sb{overflow-x:auto;padding:12px 0 12px 4px}
- /* ===== horizontal tree ===== */
- .t{display:inline-flex;align-items:center}
- .b{
- display:inline-flex;align-items:center;gap:5px;
- padding:4px 10px;border-radius:5px;white-space:nowrap;font-size:13px;
- box-shadow:0 1px 2px rgba(0,0,0,.08);user-select:none;
- border:1px solid rgba(0,0,0,.06);
- transition:background .2s,color .2s;
- }
- .b.e{cursor:pointer}
- .b.e:hover{filter:brightness(1.06);box-shadow:0 2px 6px rgba(0,0,0,.14)}
- .ti{font-size:10px;transition:transform .15s ease;display:inline-block}
- .nn{font-weight:600}
- .si{font-size:11px;opacity:.5}
- .sc{
- font-size:11px;font-weight:700;
- background:rgba(255,255,255,.5);padding:0 5px;border-radius:3px;
- line-height:1.6;letter-spacing:.02em;
- }
- .ec,.cc,.pc{
- font-size:11px;background:rgba(255,255,255,.35);
- padding:0 4px;border-radius:3px;line-height:1.6;
- }
- .ec{font-weight:600}
- .pc{font-style:italic;opacity:.7}
- /* children container & connector lines */
- .ch{
- display:flex;flex-direction:column;position:relative;
- padding-left:28px;margin-left:10px;
- }
- .ch::before{
- content:'';position:absolute;left:0;top:50%;width:14px;
- border-top:1.5px solid var(--lc);
- }
- .ch>.t{position:relative;padding:3px 0}
- .ch>.t::before{
- content:'';position:absolute;left:-14px;top:0;bottom:0;
- border-left:1.5px solid var(--lc);
- }
- .ch>.t:first-child::before{top:50%}
- .ch>.t:last-child::before{bottom:50%}
- .ch>.t:only-child::before{display:none}
- .ch>.t::after{
- content:'';position:absolute;left:-14px;top:50%;width:14px;
- border-top:1.5px solid var(--lc);
- }
- .ch>.t:only-child::after{left:-28px;width:28px}
- .t.collapsed>.ch{display:none}
- .t.collapsed>.b>.ti{transform:rotate(-90deg)}
- /* legend */
- .legend{
- display:inline-flex;align-items:center;gap:6px;
- font-size:12px;color:#888;
- }
- .legend-bar{
- width:100px;height:12px;border-radius:3px;border:1px solid rgba(0,0,0,.1);
- }
- </style>
- </head>
- <body>
- <div class="hdr">
- <h1>选题模式分类树</h1>
- <span class="eid">execution_id = {{EID}}</span>
- <button onclick="ea()">全部展开</button>
- <button onclick="ca()">全部收起</button>
- <button onclick="lv(1)">展开1层</button>
- <button onclick="lv(2)">展开2层</button>
- <button onclick="lv(3)">展开3层</button>
- <button onclick="lv(4)">展开4层</button>
- <span class="legend">
- <span>低分</span>
- <span class="legend-bar" style="background:linear-gradient(to right,#fdf3e4,#c35005)"></span>
- <span>高分</span>
- </span>
- <div class="dim-sel">
- <span class="dim-label">得分维度</span>
- {{DIM_BTNS}}
- </div>
- </div>
- {{BODY}}
- <script>
- /* ===== 嵌入的分数数据 ===== */
- var S={{SCORES}};
- var DIMS={{DIMS}};
- var MX={{MAX}};
- var TH={
- "\u5b9e\u8d28":{low:[253,243,228],high:[195,80,5],ns:"#f5e6d3"},
- "\u5f62\u5f0f":{low:[232,244,253],high:[30,90,165],ns:"#d6e9f8"},
- "\u610f\u56fe":{low:[235,249,225],high:[40,120,20],ns:"#dbefd0"}
- };
- var curDim=DIMS[0]||'rov';
- /* ===== 颜色计算(与 Python 侧等价) ===== */
- function norm(s,m){return(m<=0||s<=0)?0:Math.log1p(s)/Math.log1p(m)}
- function blend(lo,hi,t){
- t=Math.max(0,Math.min(1,t));
- return lo.map(function(l,i){return Math.round(l+(hi[i]-l)*t)});
- }
- function lum(r,g,b){return .299*r/255+.587*g/255+.114*b/255}
- function rgbHex(c){return'#'+c.map(function(v){return('0'+v.toString(16)).slice(-2)}).join('')}
- /* ===== 切换得分维度 ===== */
- function switchDim(dim){
- curDim=dim;
- document.querySelectorAll('.dim-btn').forEach(function(b){
- b.classList.toggle('active',b.dataset.dim===dim);
- });
- document.querySelectorAll('.b[data-path]').forEach(function(b){
- var st=b.dataset.st, path=b.dataset.path;
- var info=(S[st]||{})[path];
- var sc=(info!=null)?info[dim]:null;
- var scEl=b.querySelector('.sc');
- var th=TH[st];
- if(!th)return;
- if(sc!=null){
- var mx=((MX[dim]||{})[st])||1;
- var t=(sc>0)?norm(sc,mx):0;
- var rgb=blend(th.low,th.high,t);
- b.style.background=rgbHex(rgb);
- var l=lum(rgb[0],rgb[1],rgb[2]);
- b.style.color=l<.55?'#fff':'#4a3520';
- if(scEl){scEl.textContent=sc.toFixed(2);scEl.style.display=''}
- }else{
- b.style.background=th.ns;
- b.style.color='#6b5240';
- if(scEl)scEl.style.display='none';
- }
- });
- }
- /* ===== 树操作 ===== */
- function tog(el){el.closest('.t').classList.toggle('collapsed')}
- function ea(){document.querySelectorAll('.t.collapsed').forEach(function(e){e.classList.remove('collapsed')})}
- function ca(){document.querySelectorAll('.t').forEach(function(e){if(e.querySelector(':scope>.ch'))e.classList.add('collapsed')})}
- function lv(n){document.querySelectorAll('.t').forEach(function(e){
- var ch=e.querySelector(':scope>.ch');if(!ch)return;
- var d=+(e.dataset.depth||0);
- if(d<n)e.classList.remove('collapsed');else e.classList.add('collapsed');
- })}
- </script>
- </body>
- </html>
- """
- # ---------------------------------------------------------------------------
- # Entry
- # ---------------------------------------------------------------------------
- if __name__ == "__main__":
- execution_id = 58
- html_content = generate_tree_html(execution_id)
- out = Path(__file__).resolve().parent / "new_result" / f"topic_pattern_tree_{execution_id}.html"
- out.parent.mkdir(parents=True, exist_ok=True)
- out.write_text(html_content, encoding="utf-8")
- print(f"已生成: {out}")
|