xueyiming 2 settimane fa
parent
commit
254b84ba38

+ 109 - 0
examples/demand/changwen_weight_rank.py

@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""
+读取长文 JSON,计算权重 = 分发realplay_uv / 当日分发曝光uv,按权重降序输出。
+每个输入文件单独生成一个输出文件(不合并)。
+"""
+from __future__ import annotations
+
+import argparse
+import json
+import math
+from pathlib import Path
+
+
+def load_records(path: Path) -> list[dict]:
+    rows: list[dict] = []
+    with open(path, encoding="utf-8") as f:
+        data = json.load(f)
+    if not isinstance(data, list):
+        raise ValueError(f"{path} 根节点应为数组")
+    for item in data:
+        ext = item.get("ext_data") or {}
+        expose = ext.get("当日分发曝光uv")
+        realplay = ext.get("分发realplay_uv")
+        if expose is None or realplay is None:
+            continue
+        try:
+            expose_f = float(expose)
+            realplay_f = float(realplay)
+        except (TypeError, ValueError):
+            continue
+        if expose_f <= 0:
+            weight = float("nan")
+        else:
+            weight = realplay_f / expose_f
+        rows.append(
+            {
+                "videoid": str(item.get("videoid", "")),
+                "二级品类": item.get("二级品类", ""),
+                "权重值": weight,
+            }
+        )
+    return rows
+
+
+def _sort_key(r: dict) -> tuple:
+    w = r["权重值"]
+    if math.isnan(w):
+        return (1, 0.0)
+    return (0, -w)
+
+
+def default_out_path(inp: Path, out_dir: Path | None) -> Path:
+    name = f"{inp.stem}_weight_rank.txt"
+    if out_dir is not None:
+        return out_dir / name
+    return inp.parent / name
+
+
+def main() -> None:
+    base = Path(__file__).resolve().parent / "data" / "changwen_data"
+    parser = argparse.ArgumentParser(
+        description="长文数据按 分发realplay_uv/当日分发曝光uv 排序;每个输入单独写一个输出文件"
+    )
+    parser.add_argument(
+        "inputs",
+        nargs="*",
+        type=Path,
+        default=[
+            base / "奇观妙技有乾坤.json",
+            base / "青史铁事漫谈.json",
+        ],
+        help="输入 JSON 路径(默认两个账号文件)",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=Path,
+        default=None,
+        help="可选:把所有输出写到该目录(文件名仍为 {原文件名去扩展}_weight_rank.txt)",
+    )
+    args = parser.parse_args()
+
+    paths = [p.resolve() for p in args.inputs]
+    for p in paths:
+        if not p.is_file():
+            raise SystemExit(f"文件不存在: {p}")
+
+    out_dir = args.out_dir.resolve() if args.out_dir else None
+    if out_dir is not None:
+        out_dir.mkdir(parents=True, exist_ok=True)
+
+    for p in paths:
+        rows = load_records(p)
+        rows.sort(key=_sort_key)
+        lines = []
+        for r in rows:
+            w = r["权重值"]
+            w_str = "nan" if w != w else f"{w:.6f}"
+            lines.append(f"{r['videoid']}\t{r['二级品类']}\t{w_str}")
+
+        out = default_out_path(p, out_dir)
+        if out_dir is None:
+            out.parent.mkdir(parents=True, exist_ok=True)
+        text = "\n".join(lines) + ("\n" if lines else "")
+        out.write_text(text, encoding="utf-8")
+        print(f"{p.name}: 共 {len(lines)} 行 -> {out}")
+
+
+if __name__ == "__main__":
+    main()

+ 28 - 5
examples/demand/data_query_tools.py

@@ -97,11 +97,11 @@ where t1.缺量>= {count}
         for r in data:
             lack_count = r[9]
             if lack_count > 1000:
-                count = 70
-            elif 500 < lack_count <= 1000:
                 count = 50
-            elif 100 < lack_count <= 500:
+            elif 500 < lack_count <= 1000:
                 count = 30
+            elif 100 < lack_count <= 500:
+                count = 20
             elif 50 < lack_count <= 100:
                 count = 10
             else:
@@ -116,8 +116,10 @@ where t1.缺量>= {count}
     return result_list
 
 
-def get_rov_by_merge_leve2_and_video_ids(merge_leve2, video_ids):
-    merge_level_in_clause = f"'{merge_leve2}'"
+def get_rov_by_merge_leve2_and_video_ids(merge_leve2_list, video_ids):
+    if not merge_leve2_list or not video_ids:
+        return {}
+    merge_level_in_clause = ", ".join([f"'{m}'" for m in merge_leve2_list])
     video_ids_in_clause = ", ".join([f"'{video_id}'" for video_id in video_ids])
     end_date = (date.today() - timedelta(days=1)).strftime("%Y%m%d")
     start_date = (date.today() - timedelta(days=14)).strftime("%Y%m%d")
@@ -495,6 +497,7 @@ GROUP BY 合作方名
 ORDER BY SUM(推荐曝光数)
 ;
     '''
+    print(sql_query)
     result_list = []
     data = get_odps_data(sql_query)
     if data:
@@ -568,6 +571,26 @@ def get_all_decode_task_result_rows():
     )
 
 
+def get_channel_content_ids_by_merge_leve2_list(merge_leve2_list):
+    """
+    在 MySQL 表 workflow_decode_task_result 中,按 merge_leve2 IN (给定列表) 一次查询,
+    返回 channel_content_id 组成的 list。
+    """
+    if not merge_leve2_list:
+        return []
+    normalized = [str(x).strip() for x in merge_leve2_list if x is not None and str(x).strip()]
+    if not normalized:
+        return []
+
+    placeholders = ", ".join(["%s"] * len(normalized))
+    rows = mysql_db.select(
+        "workflow_decode_task_result",
+        columns="channel_content_id",
+        where=f"merge_leve2 IN ({placeholders})",
+        where_params=tuple(normalized),
+    )
+    return [row.get("channel_content_id") for row in rows if row.get("channel_content_id") is not None]
+
 def update_decode_task_result_merge_leve2(channel_content_id, merge_leve2):
     return mysql_db.update(
         "workflow_decode_task_result",

+ 31 - 18
examples/demand/db_manager.py

@@ -20,22 +20,6 @@ class DatabaseManager:
         return self.SessionLocal()
 
 
-class DatabaseManager2:
-    """数据库管理类"""
-
-    # mysql+pymysql://<用户名>:<密码>@<主机地址>:<端口>/<数据库名>?charset=utf8mb4
-    def __init__(self):
-        connection_string = (
-            f"mysql+pymysql://content_rw:bC1aH4bA1lB0@rm-t4nh1xx6o2a6vj8qu3o.mysql.singapore.rds.aliyuncs.com:3306/open_aigc_pattern?charset=utf8mb4"
-        )
-        self.engine = create_engine(connection_string, pool_pre_ping=True, pool_recycle=3600)
-        self.SessionLocal = sessionmaker(bind=self.engine, autoflush=False, autocommit=False)
-
-    def get_session(self) -> Session:
-        """获取数据库会话"""
-        return self.SessionLocal()
-
-
 def query_video_ids_by_names(execution_id: int, names: Iterable[str]) -> list[str]:
     """按 execution_id + 名称列表查询去重后的 post_id。"""
     clean_names = [str(n).strip() for n in names if n is not None and str(n).strip()]
@@ -93,11 +77,40 @@ def query_video_ids_by_names(execution_id: int, names: Iterable[str]) -> list[st
     return list(video_ids)
 
 
-db2 = DatabaseManager2()
+def query_category_level(execution_id: int, name: str) -> int | None:
+    """按 execution_id + 分类名称查询 topic_pattern_category.level。"""
+    clean_name = str(name).strip() if name is not None else ""
+    if not clean_name:
+        return None
+
+    manager = DatabaseManager()
+    session = manager.get_session()
+    try:
+        row = session.execute(
+            text(
+                """
+                SELECT level
+                FROM topic_pattern_category
+                WHERE execution_id = :execution_id AND name = :name
+                ORDER BY id DESC
+                LIMIT 1
+                """
+            ),
+            {"execution_id": execution_id, "name": clean_name},
+        ).first()
+        if not row:
+            return None
+        level = row[0]
+        return int(level) if level is not None else None
+    finally:
+        session.close()
+
+
+db = DatabaseManager()
 
 
 def exist_cluster_tree(merge_level2):
-    session = db2.get_session()
+    session = db.get_session()
 
     exec_row = session.execute(
         text("""

+ 100 - 0
examples/demand/demand1.md

@@ -0,0 +1,100 @@
+---
+model: anthropic/claude-sonnet-4.5
+temperature: 0.5
+max_iterations: 200
+---
+
+$system$
+
+# 需求选择 Agent
+
+你是一个需求产生 Agent。你的任务是基于高权重的元素,产生需求,并且根据已经选择的元素,进行拓展,发现更多的需求
+
+**需求 = 一个人带着某种目的或兴趣,能用一个词/短语表达出来**
+
+它的本质公式是:
+
+```
+需求 = 人的渴求 × 内容的可满足性
+```
+
+二者缺一不可:
+
+- 没有人的渴求 → 不是需求,是凭空造词
+- 内容无法满足 → 不是有效需求,是伪需求
+
+
+## 背景知识
+
+### 数据来源
+
+数据来自社交媒体视频的结构化分析。每个帖子被拆解为多个"选题点"(灵感点、目的点、关键点),每个点下有三个维度的元素:
+
+- **实质**: 内容的核心主题/对象(如 "咖啡豆"、"护肤品")
+- **形式**: 内容的呈现形式(如 "测评对比"、"教程")
+- **意图**: 内容的目标/用户意图(如 "购买决策"、"学习技能")
+
+每个元素归属于一个分类树节点(如 实质 > 食品 > 饮品 > 咖啡),形成层级分类结构。
+每个元素或者分类都有自己的权重分,权重分用于评判元素或者分类受欢迎程度(核心要素)
+
+## 数据模型
+
+### DemandItem(核心实体)
+
+需求产生过程 = ADD DemandItem。每个 DemandItem 代表一个需求。
+
+**字段:**
+
+- `element_names`: 元素名称列表
+- `reason`: 产生该需求的理由
+- `desc`: 需求的描述,只描述需求,不要揣测意图
+- `type`: 需求的来源类型(元素/分类/关系/pattern)
+
+## 工具概览
+
+### 查询工具(只读)
+
+- `get_category_tree` — 查看当前分类下的完整分类树(分类)
+- `get_weight_score_topn` — 元素/分类权重排行榜(元素/分类)
+- `get_weight_score_by_name` — 执行元素/分类权重查询
+- `get_post_elements` — 帖子元素
+- `search_elements` / `search_categories` — 关键词搜索
+
+### CRUD 工具
+
+- `create_demand_item` — 创建一个新需求
+- `create_demand_items` — 批量创建新需求
+
+### 输出工具
+
+- `write_execution_summary` — 写入执行总结
+
+## 硬约束(不可违反)
+
+- 执行每个操作前,必须输出自己的思考,为什么要这样做,原因是什么,目的是什么
+- category 级 item 必须来自分类树的真实节点(通过 `search_categories` 查到对应的 `category_id`),不允许凭空编造分类
+- 正确的创建顺序:先 element 后 category
+- result 中出现的每一个具体内容,都必须有对应的 DemandItem
+- `search_elements` / `search_categories`只能用于查询单元素/单分类,不能用于查询完整树,完整树查询用`get_category_tree`
+
+$user$
+
+## 前置条件
+
+`get_category_tree`工具查到到的全部分类都是属于「%merge_level2%」,`search_categories`查询的只是树种的一个或者多个分类
+
+## 任务
+
+从"高权重叶子元素","高权重分类节点"出发完成需求生成
+
+1. 单元素生成需求,需要先通过`get_weight_score_topn`工具查找高权重元素/高权重分类,判断是否能作为需求,给出理由。满足的进入需求池,不满足的给出丢弃的理由。
+2. 生成的需求,必须要有实际的含义,不符合要求的词,给出理由直接过滤掉
+
+## 要求
+
+1. 共现查询的地点必须来自于高权重分类,不能直接从树上寻找分类
+2. 最终结果的保留,必须要有权重分或者支持度进行支持
+3. 意图分类没有任何实际意义,不能作为需求,严禁使用意图产生需求
+4. 尽可能多的产生需求,尽量保证最终产生的需求数量在「%count%」个左右
+5. 优先返回分类结果,不足用元素结果补充
+

+ 1 - 1
examples/demand/demand_pattern_tools.py

@@ -50,7 +50,7 @@ def _log_tool_output(tool_name: str, result: str) -> str:
 # ============================================================================
 
 @tool(
-    "获取二级品类分类树结构快照。分类树是所有数据的骨架——帖子中的元素按'实质/形式/意图'三个维度归类到树形分类节点中。"
+    "获取二级品类分类树结构快照。分类树是所有数据的骨架——帖子中的元素按'实质/形式'三个维度归类到树形分类节点中。"
     "\n\n返回紧凑文本格式,包含每个分类节点的名称、层级和元素数量。这是理解数据整体结构的起点。"
     "\n\n使用场景:"
     "\n- 启动时调用,了解数据涵盖哪些主题领域和维度"

+ 62 - 0
examples/demand/export.py

@@ -0,0 +1,62 @@
+import argparse
+import json
+from pathlib import Path
+from typing import Any, List, Optional, Union
+
+
+def build_lines_from_demand_items(input_path: Union[str, Path]) -> List[str]:
+    """读取 demand items JSON,并格式化为逐行文本。"""
+    path = Path(input_path)
+    data: Any = json.loads(path.read_text(encoding="utf-8"))
+
+    if not isinstance(data, list):
+        raise ValueError(f"JSON 顶层必须是 list,实际是: {type(data).__name__}")
+
+    lines: List[str] = []
+    for idx, item in enumerate(data):
+        if not isinstance(item, dict):
+            raise ValueError(f"第 {idx + 1} 条记录不是对象: {type(item).__name__}")
+
+        name = item.get("name", [])
+        reason = str(item.get("reason", ""))
+        suggestion = str(item.get("suggestion", ""))
+        score = item.get("score", 0)
+        ext_data = json.loads(item.get("ext_data", '{}'))
+        type = ext_data.get('type')
+        video_ids = ext_data.get('video_ids', [])
+
+        lines.append(f"{name}|{type}|{reason}|{suggestion}|{score}|{video_ids}")
+
+    return lines
+
+
+def export_lines(
+    input_path: Union[str, Path],
+    output_path: Optional[Union[str, Path]] = None,
+) -> List[str]:
+    """
+    读取 JSON 并导出为逐行结果。
+    - output_path 为空: 打印到标准输出
+    - output_path 不为空: 写入文件(UTF-8)
+    """
+    lines = build_lines_from_demand_items(input_path)
+    text = "\n".join(lines)
+
+    if output_path is None:
+        print(text)
+    else:
+        Path(output_path).write_text(text, encoding="utf-8")
+
+    return lines
+
+
+def main() -> None:
+    account_name = 'R_50*泛知识*生活科普'
+    input_path = f'/Users/shimeng/Desktop/py/Agent/examples/demand/result/{account_name}.json'
+    output_path = f'/Users/shimeng/Desktop/py/Agent/examples/demand/result/{account_name}.txt'
+    export_lines(input_path,
+                 output_path)
+
+
+if __name__ == "__main__":
+    main()

+ 11 - 11
examples/demand/pattern_builds/data_operation.py

@@ -615,7 +615,7 @@ def create_classification_mapping(
     session = db_manager.get_session()
     try:
         mapping = ElementClassificationMapping(
-            post_decode_topic_point_element_id=post_element_id,
+            source_element_id=post_element_id,
             post_id=post_id,
             element_name=element_name,
             element_type=element_type,
@@ -640,7 +640,7 @@ def batch_create_classification_mappings(mappings_data: list[dict], execution_id
         count = 0
         for m in mappings_data:
             mapping = ElementClassificationMapping(
-                post_decode_topic_point_element_id=m['post_element_id'],
+                source_element_id=m['post_element_id'],
                 post_id=m['post_id'],
                 element_name=m['element_name'],
                 element_type=m['element_type'],
@@ -687,7 +687,7 @@ def get_unclassified_elements(
     try:
         # 子查询: 已分类的 element ids
         classified_ids_subq = session.query(
-            ElementClassificationMapping.post_decode_topic_point_element_id
+            ElementClassificationMapping.source_element_id
         ).subquery()
 
         # 查询未分类元素并去重
@@ -852,9 +852,9 @@ def commit_auto_link_mappings(
             all_ids = [int(x.strip()) for x in str(elem['all_ids']).split(',') if x.strip()]
             existing_mapped = set(
                 r[0] for r in session.query(
-                    ElementClassificationMapping.post_decode_topic_point_element_id
+                    ElementClassificationMapping.source_element_id
                 ).filter(
-                    ElementClassificationMapping.post_decode_topic_point_element_id.in_(all_ids)
+                    ElementClassificationMapping.source_element_id.in_(all_ids)
                 ).all()
             )
             to_link = [eid for eid in all_ids if eid not in existing_mapped]
@@ -867,7 +867,7 @@ def commit_auto_link_mappings(
                     continue
 
                 mapping = ElementClassificationMapping(
-                    post_decode_topic_point_element_id=element_id,
+                    source_element_id=element_id,
                     post_id=original.post_id,
                     element_name=elem['element_name'],
                     element_type=source_type,
@@ -927,7 +927,7 @@ def backfill_classification_mappings(
     try:
         # 获取代表元素的映射
         rep_mapping = session.query(ElementClassificationMapping).filter(
-            ElementClassificationMapping.post_decode_topic_point_element_id == representative_id,
+            ElementClassificationMapping.source_element_id == representative_id,
         ).first()
 
         if not rep_mapping:
@@ -938,9 +938,9 @@ def backfill_classification_mappings(
         # 排除代表元素自己和已有映射的
         existing_mapped = set(
             r[0] for r in session.query(
-                ElementClassificationMapping.post_decode_topic_point_element_id
+                ElementClassificationMapping.source_element_id
             ).filter(
-                ElementClassificationMapping.post_decode_topic_point_element_id.in_(all_ids)
+                ElementClassificationMapping.source_element_id.in_(all_ids)
             ).all()
         )
 
@@ -955,7 +955,7 @@ def backfill_classification_mappings(
                 continue
 
             mapping = ElementClassificationMapping(
-                post_decode_topic_point_element_id=element_id,
+                source_element_id=element_id,
                 post_id=original.post_id,
                 element_name=rep_mapping.element_name,
                 element_type=rep_mapping.element_type,
@@ -1345,7 +1345,7 @@ def refresh_post_classification_status(
 
             # 统计已分类的元素数
             classified_subq = session.query(
-                ElementClassificationMapping.post_decode_topic_point_element_id
+                ElementClassificationMapping.source_element_id
             ).subquery()
 
             classified = session.query(func.count(PostDecodeTopicPointElement.id)).filter(

+ 2 - 2
examples/demand/pattern_builds/db_manager.py

@@ -5,10 +5,10 @@ from sqlalchemy.orm import sessionmaker, Session
 class DatabaseManager1:
     """数据库管理类"""
 
-    # mysql+pymysql://<用户名>:<密码>@<主机地址>:<端口>/<数据库名>?charset=utf8mb4
+    # postgresql://<用户名>:<密码>@<主机地址>:<端口>/<数据库名>
     def __init__(self):
         connection_string = (
-            f"mysql+pymysql://wx2016_longvideo:wx2016_longvideoP%40assword1234@rm-bp1k5853td1r25g3n690.mysql.rds.aliyuncs.com:3306/open_aigc?charset=utf8mb4"
+            f"postgresql://aiddit_aigc:%25a%26%26yqNxg%5EV1%24toJ%2AWOa%5E-b%5EX%3DQJ@gp-t4n72471pkmt4b9q7o-master.gpdbmaster.singapore.rds.aliyuncs.com:5432/open_aigc"
         )
         self.engine = create_engine(connection_string, pool_pre_ping=True, pool_recycle=3600)
         self.SessionLocal = sessionmaker(bind=self.engine, autoflush=False, autocommit=False)

+ 3 - 2
examples/demand/pattern_builds/models1.py

@@ -208,7 +208,7 @@ class ElementClassificationMapping(Base):
     __tablename__ = 'element_classification_mapping'
 
     id = Column(BigInteger, primary_key=True, autoincrement=True, comment='主键ID')
-    post_decode_topic_point_element_id = Column(BigInteger, nullable=False,
+    source_element_id = Column(BigInteger, nullable=False,
                                                  comment='FK → PostDecodeTopicPointElement.id')
     post_id = Column(String(100), comment='冗余帖子ID,便于查询')
     element_name = Column(String(500), comment='原始元素名称')
@@ -221,9 +221,10 @@ class ElementClassificationMapping(Base):
 
     classify_execution_id = Column(BigInteger, nullable=False, comment='哪次执行分类的')
     created_at = Column(DateTime, nullable=False, default=datetime.now, comment='创建时间')
+    source_table = Column(String(100), comment='')
 
     __table_args__ = (
-        Index('idx_ecm_element_id', 'post_decode_topic_point_element_id'),
+        Index('idx_ecm_element_id', 'source_element_id'),
         Index('idx_ecm_post_id', 'post_id'),
         Index('idx_ecm_global_element_id', 'global_element_id'),
         Index('idx_ecm_category_stable_id', 'global_category_stable_id'),

+ 70 - 1
examples/demand/pattern_builds/pattern_service.py

@@ -13,7 +13,7 @@ from .models2 import (
     TopicPatternItemset, TopicPatternItemsetItem,
     TopicPatternCategory, TopicPatternElement,
 )
-from .post_data_service import export_post_elements
+from .post_data_service import export_post_elements, export_post_elements_by_post_ids
 from .apriori_analysis_post_level import (
     build_transactions_at_depth,
     run_fpgrowth_with_absolute_support,
@@ -462,6 +462,75 @@ def get_merge_level2_crawler_data(merge_level2) -> dict:
         session.close()
 
 
+def run_category_element_snapshot(
+        post_ids: List[str] = None,
+        cluster_name: str = None,
+        merge_leve2: str = None,
+        platform: str = None,
+        account_name: str = None,
+        classify_execution_id: int = None,
+):
+    """仅写入 topic_pattern_category 与 topic_pattern_element(分类树快照 + 帖子级元素),不执行 FP-Growth 与项集写入。
+
+    仍会创建一条 topic_pattern_execution 记录以提供 execution_id 外键;不写 Post 元数据、mining_config、itemset 等。
+    changwen/zengzhang 等走 DB 帖子源时按 post_ids 分批拉取(每批最多 1000),不再使用 post_limit。
+
+    Returns:
+        成功时返回 execution_id;无源数据(result is None)时返回 None。
+    """
+    ensure_tables()
+    session = db.get_session()
+
+    _requested_n = len(list(dict.fromkeys(post_ids or [])))
+    execution = TopicPatternExecution(
+        cluster_name=cluster_name,
+        merge_leve2=merge_leve2,
+        platform=platform,
+        account_name=account_name,
+        post_limit=max(_requested_n, 0),
+        min_absolute_support=0,
+        classify_execution_id=classify_execution_id,
+        mining_configs=None,
+        status='running',
+        start_time=datetime.now(),
+    )
+    session.add(execution)
+    session.commit()
+    execution_id = execution.id
+
+    try:
+        t_start = time.time()
+        print(f"[Snapshot {execution_id}] 正在获取数据...")
+        result = export_post_elements_by_post_ids(post_ids=post_ids or [])
+        source_data = result['data']
+        post_count = result['post_count']
+        categories = result['categories']
+        print(f"[Snapshot {execution_id}] 获取到 {post_count} 个帖子, 耗时 {time.time() - t_start:.2f}s")
+        _store_category_tree_snapshot(session, execution_id, categories, source_data)
+
+        execution = session.get(TopicPatternExecution, execution_id)
+        execution.status = 'success'
+        execution.post_count = post_count
+        execution.itemset_count = 0
+        execution.end_time = datetime.now()
+        session.commit()
+
+        print(f"[Snapshot {execution_id}] 分类/元素快照完成, 总耗时 {time.time() - t_start:.2f}s")
+        return execution_id
+
+    except Exception as e:
+        traceback.print_exc()
+        execution = session.get(TopicPatternExecution, execution_id)
+        if execution:
+            execution.status = 'failed'
+            execution.error_message = str(e)[:2000]
+            execution.end_time = datetime.now()
+            session.commit()
+        raise
+    finally:
+        session.close()
+
+
 def run_mining(
         post_ids: List[str] = None,
         cluster_name: str = None,

+ 194 - 2
examples/demand/pattern_builds/post_data_service.py

@@ -7,6 +7,198 @@ from .models1 import Post, PostDecodeTopicPoint, PostDecodeTopicPointElement, El
 
 db_manager = DatabaseManager1()
 
+# 按 post_id 批量查询时,单次 IN 列表长度上限
+POST_ID_QUERY_BATCH_SIZE = 1000
+
+
+def export_post_elements_by_post_ids(post_ids: List[str]):
+    """仅按 post_id 列表导出帖子元素与分类树(不使用 post_limit)。
+
+    与 ``export_post_elements`` 在「合格帖 ∩ 给定 post_ids」上的选题点/元素/映射/分类树补全/
+    虚拟意图分类/``data`` 与 ``categories`` 组装逻辑一致;每批最多
+    ``POST_ID_QUERY_BATCH_SIZE`` 个 post_id,避免单次 IN 过大。
+
+    与 ``export_post_elements`` 的差异(有意为之):不按 merge_leve2/platform/account 筛 Post;
+    无 post_limit(列表内合格帖全部导出);不查 GlobalElement,返回不含 post_metadata/elements。
+
+    Returns:
+        ``{"post_count", "data", "categories"}``;异常时返回 None。
+    """
+    empty = {"post_count": 0, "data": {}, "categories": []}
+
+    if not post_ids:
+        return empty
+
+    try:
+        unique_ids = list(dict.fromkeys(post_ids))
+        qualified_post_ids = set(data_operation.get_fully_classified_post_ids(
+            required_types=['实质', '形式'], min_ratio=0.8
+        ))
+        if not qualified_post_ids:
+            return empty
+
+        session = db_manager.get_session()
+        try:
+            from collections import defaultdict
+
+            result = {}
+            cat_map = {}
+            intent_name_to_virtual_sid = {}
+            virtual_stable_id_counter = -1
+
+            for i in range(0, len(unique_ids), POST_ID_QUERY_BATCH_SIZE):
+                batch = unique_ids[i:i + POST_ID_QUERY_BATCH_SIZE]
+                target = [pid for pid in batch if pid in qualified_post_ids]
+                if not target:
+                    continue
+
+                post_rows = session.query(Post).filter(
+                    Post.post_id.in_(target)
+                ).order_by(Post.id.desc()).all()
+                batch_pids = [p.post_id for p in post_rows]
+                if not batch_pids:
+                    continue
+
+                points = session.query(PostDecodeTopicPoint).filter(
+                    PostDecodeTopicPoint.post_id.in_(batch_pids)
+                ).all()
+
+                point_ids = [p.id for p in points]
+                points_map = defaultdict(lambda: defaultdict(list))
+                for p in points:
+                    points_map[p.post_id][p.topic_point_type].append(p)
+
+                elements = session.query(PostDecodeTopicPointElement).filter(
+                    PostDecodeTopicPointElement.topic_point_id.in_(point_ids)
+                ).all() if point_ids else []
+
+                elem_ids = [e.id for e in elements]
+                elems_map = defaultdict(list)
+                for e in elements:
+                    elems_map[e.topic_point_id].append(e)
+
+                mappings = session.query(ElementClassificationMapping).filter(
+                    ElementClassificationMapping.source_element_id.in_(elem_ids)
+                ).all() if elem_ids else []
+
+                mapping_map = {m.source_element_id: m for m in mappings}
+
+                direct_stable_ids = {m.global_category_stable_id for m in mappings if m.global_category_stable_id}
+                if direct_stable_ids:
+                    new_cats = session.query(GlobalCategory).filter(
+                        GlobalCategory.stable_id.in_(direct_stable_ids),
+                        GlobalCategory.retired_at_execution_id.is_(None),
+                    ).all()
+                    for c in new_cats:
+                        cat_map[c.stable_id] = c
+
+                all_stable_ids = set(cat_map.keys())
+                missing_parents = set()
+                for c in cat_map.values():
+                    if c.parent_stable_id and c.parent_stable_id not in all_stable_ids:
+                        missing_parents.add(c.parent_stable_id)
+
+                while missing_parents:
+                    parent_cats = session.query(GlobalCategory).filter(
+                        GlobalCategory.stable_id.in_(missing_parents),
+                        GlobalCategory.retired_at_execution_id.is_(None),
+                    ).all()
+                    if not parent_cats:
+                        break
+                    next_missing = set()
+                    for c in parent_cats:
+                        cat_map[c.stable_id] = c
+                        all_stable_ids.add(c.stable_id)
+                        if c.parent_stable_id and c.parent_stable_id not in all_stable_ids:
+                            next_missing.add(c.parent_stable_id)
+                    missing_parents = next_missing
+
+                cat_path_map = {sid: c.path for sid, c in cat_map.items()}
+
+                for pid in batch_pids:
+                    for point_type in ['灵感点', '目的点', '关键点']:
+                        for point in points_map[pid].get(point_type, []):
+                            for elem in elems_map.get(point.id, []):
+                                if elem.element_type != '意图':
+                                    continue
+                                mapping = mapping_map.get(elem.id)
+                                path_str = cat_path_map.get(mapping.global_category_stable_id) if mapping else None
+                                if not path_str:
+                                    name = elem.element_name
+                                    if name and name not in intent_name_to_virtual_sid:
+                                        intent_name_to_virtual_sid[name] = virtual_stable_id_counter
+                                        virtual_stable_id_counter -= 1
+
+                for pid in batch_pids:
+                    post_data = {}
+                    for point_type in ['灵感点', '目的点', '关键点']:
+                        point_list = []
+                        for point in points_map[pid].get(point_type, []):
+                            point_item = {
+                                "点": point.topic_point_result,
+                                "实质": [], "形式": [], "意图": [],
+                            }
+                            for elem in elems_map.get(point.id, []):
+                                mapping = mapping_map.get(elem.id)
+                                path_str = cat_path_map.get(mapping.global_category_stable_id) if mapping else None
+                                path_list = [s for s in path_str.split('/') if s] if path_str else []
+
+                                if not path_list and elem.element_type == '意图':
+                                    path_list = [elem.element_name]
+
+                                elem_item = {
+                                    "名称": elem.element_name,
+                                    "详细描述": elem.element_description or "",
+                                    "分类路径": path_list,
+                                }
+                                if elem.element_type in point_item:
+                                    point_item[elem.element_type].append(elem_item)
+
+                            point_list.append(point_item)
+                        if point_list:
+                            post_data[point_type] = point_list
+
+                    if post_data:
+                        result[pid] = post_data
+
+            categories_out = []
+            for c in cat_map.values():
+                categories_out.append({
+                    "stable_id": c.stable_id,
+                    "name": c.name,
+                    "description": c.description or "",
+                    "category_nature": c.category_nature,
+                    "source_type": c.source_type,
+                    "path": c.path,
+                    "level": c.level,
+                    "parent_stable_id": c.parent_stable_id,
+                })
+
+            for intent_name, v_sid in intent_name_to_virtual_sid.items():
+                categories_out.append({
+                    "stable_id": v_sid,
+                    "name": intent_name,
+                    "description": "",
+                    "category_nature": None,
+                    "source_type": "意图",
+                    "path": f"/{intent_name}",
+                    "level": 1,
+                    "parent_stable_id": None,
+                })
+
+            return {
+                "post_count": len(result),
+                "data": result,
+                "categories": categories_out,
+            }
+        finally:
+            session.close()
+
+    except Exception:
+        import traceback
+        traceback.print_exc()
+        return None
+
 
 def export_post_elements(
         post_ids: List[str],
@@ -77,10 +269,10 @@ def export_post_elements(
 
             # 4. 查询分类映射
             mappings = session.query(ElementClassificationMapping).filter(
-                ElementClassificationMapping.post_decode_topic_point_element_id.in_(elem_ids)
+                ElementClassificationMapping.source_element_id.in_(elem_ids)
             ).all() if elem_ids else []
 
-            mapping_map = {m.post_decode_topic_point_element_id: m for m in mappings}
+            mapping_map = {m.source_element_id: m for m in mappings}
 
             # 5. 查询涉及的分类(当前有效版本)
             direct_stable_ids = {m.global_category_stable_id for m in mappings if m.global_category_stable_id}

+ 8 - 8
examples/demand/piaoquan_prepare.py

@@ -7,7 +7,6 @@ from examples.demand.db_manager import DatabaseManager
 from examples.demand.models import TopicPatternElement, TopicPatternExecution
 from examples.demand.pattern_builds.pattern_service import run_mining
 
-
 db = DatabaseManager()
 
 
@@ -48,7 +47,7 @@ def _write_json(path, payload):
         json.dump(payload, f, ensure_ascii=False, indent=2)
 
 
-def prepare(execution_id):
+def prepare(merge_leve2_list, execution_id):
     session = db.get_session()
     try:
         execution = session.query(TopicPatternExecution).filter(
@@ -57,8 +56,6 @@ def prepare(execution_id):
         if not execution:
             raise ValueError(f"execution_id 不存在: {execution_id}")
 
-        merge_leve2 = execution.merge_leve2
-
         rows = session.query(TopicPatternElement).filter(
             TopicPatternElement.execution_id == execution_id
         ).all()
@@ -67,7 +64,9 @@ def prepare(execution_id):
 
         # 1) 去重 post_id 拉取 ROV
         all_post_ids = sorted({r.post_id for r in rows if r.post_id})
-        rov_by_post_id = get_rov_by_merge_leve2_and_video_ids(merge_leve2, all_post_ids) if all_post_ids else {}
+        rov_by_post_id = (
+            get_rov_by_merge_leve2_and_video_ids(merge_leve2_list, all_post_ids) if all_post_ids else {}
+        )
 
         # 2) 按 element_type 分组,计算 name 的平均 ROV 分
         grouped = {
@@ -101,7 +100,7 @@ def prepare(execution_id):
         output_dir = Path(__file__).parent / "data" / str(execution_id)
         output_dir.mkdir(parents=True, exist_ok=True)
 
-        summary = {"execution_id": execution_id, "merge_leve2": merge_leve2, "files": {}}
+        summary = {"execution_id": execution_id, "files": {}}
 
         for element_type, data in grouped.items():
             name_post_ids = data["name_post_ids"]
@@ -162,12 +161,14 @@ def prepare(execution_id):
     finally:
         session.close()
 
+
 def piaoquan_prepare(cluster_name):
     execution_id = run_mining(cluster_name=cluster_name, merge_leve2=cluster_name, platform='piaoquan')
     if execution_id:
-        prepare(execution_id)
+        prepare([cluster_name], execution_id)
         return execution_id
 
+
 if __name__ == '__main__':
     # cluster_name = '贪污腐败'
     #
@@ -175,4 +176,3 @@ if __name__ == '__main__':
     prepare(8)
     # execution_id = piaoquan_prepare(cluster_name=cluster_name)
     # print(execution_id)
-

+ 1 - 1
examples/demand/render_log_html.py

@@ -73,7 +73,7 @@ TOOL_DESCRIPTION_MAP: dict[str, str] = {
 # =========================
 # 运行配置(直接改变量即可)
 # =========================
-INPUT_LOG_PATH = "examples/demand/output/12/run_log_12_20260327_205003.txt"
+INPUT_LOG_PATH = "examples/demand/output/58/run_log_58_20260410_153916.txt"
 # 设为 None 则默认生成到输入文件同名 .html
 OUTPUT_HTML_PATH: str | None = None
 # 是否默认折叠所有 [FOLD] 块

+ 27 - 12
examples/demand/run.py

@@ -15,7 +15,11 @@ from sqlalchemy import desc, or_
 
 from examples.demand.changwen_prepare import changwen_prepare
 from examples.demand.config import LOG_LEVEL, ENABLED_TOOLS
-from examples.demand.db_manager import DatabaseManager, query_video_ids_by_names
+from examples.demand.db_manager import (
+    DatabaseManager,
+    query_category_level,
+    query_video_ids_by_names,
+)
 from examples.demand.models import TopicPatternExecution
 from examples.demand.piaoquan_prepare import prepare, piaoquan_prepare
 from examples.demand.demand_agent_context import TopicBuildAgentContext
@@ -292,6 +296,10 @@ def write_demand_items_to_mysql(execution_id: int, merge_level2: str) -> int:
         desc_value = di.get("desc")
         type = di.get("type")
         suggestion = desc_value
+        if str(type).strip() == "分类":
+            category_level = query_category_level(execution_id=execution_id, name=name)
+            if category_level:
+                type = type + f'L{category_level}'
         video_ids = _resolve_video_ids_by_name_and_execution_id(name=name, execution_id=execution_id)
         # 兼容旧字段:同时保留 ext_data(reason/desc)JSON,便于旧版消费逻辑迁移期继续使用。
         ext_data = {"reason": reason, "desc": desc_value, "type": type, "video_ids": video_ids}
@@ -312,12 +320,15 @@ def write_demand_items_to_mysql(execution_id: int, merge_level2: str) -> int:
         log("[mysql] 生成行为空,跳过写入")
         return 0
 
-    affected = mysql_db.insert_many("demand_content", rows)
-    log(f"[mysql] 写入 demand_content 完成,rows={len(rows)}, affected={affected}")
+    # affected = mysql_db.insert_many("demand_content", rows)
+    # log(f"[mysql] 写入 demand_content 完成,rows={len(rows)}, affected={affected}")
+    with open(f'/Users/shimeng/Desktop/py/Agent/examples/demand/result/{merge_level2}.json', 'w',
+              encoding='utf-8') as f:
+        json.dump(rows, f, ensure_ascii=False, indent=4)
     return len(rows)
 
 
-async def run_once(execution_id, merge_level2, count: int = 30, task_id: Optional[int] = None) -> str:
+async def run_once(execution_id, cluster_name, platform_type, count: int = 30, task_id: Optional[int] = None) -> str:
     task_log_text = ""
     task_status = 0
 
@@ -329,8 +340,10 @@ async def run_once(execution_id, merge_level2, count: int = 30, task_id: Optiona
 
     setup_logging(level=LOG_LEVEL, file=LOG_FILE)
     register_selected_tools(ENABLED_TOOLS)
-
-    prompt = SimplePrompt(base_dir / "demand.md")
+    if platform_type == 'piaoquan':
+        prompt = SimplePrompt(base_dir / "demand.md")
+    else:
+        prompt = SimplePrompt(base_dir / "demand1.md")
 
     model = resolve_model(prompt)
 
@@ -347,7 +360,7 @@ async def run_once(execution_id, merge_level2, count: int = 30, task_id: Optiona
     run_config.knowledge.enable_injection = False
     run_config.trace_id = None
 
-    initial_messages = prompt.build_messages(merge_level2=merge_level2, count=count)
+    initial_messages = prompt.build_messages(merge_level2=cluster_name, count=count)
 
     store = FileSystemTraceStore(base_path=TRACE_STORE_PATH)
     runner = AgentRunner(
@@ -392,7 +405,7 @@ async def run_once(execution_id, merge_level2, count: int = 30, task_id: Optiona
             # agent 执行完成后:把本地 result JSON 写入 MySQL 表 demand_content
             # element_names -> name(逗号分隔);reason -> demand_content.reason;desc -> demand_content.suggestion;dt -> demand_content.dt
             try:
-                write_demand_items_to_mysql(execution_id=execution_id, merge_level2=merge_level2)
+                write_demand_items_to_mysql(execution_id=execution_id, merge_level2=cluster_name)
             except Exception as e:
                 log(f"[mysql] 写入 demand_content 异常:{e}")
 
@@ -419,7 +432,7 @@ async def run_once(execution_id, merge_level2, count: int = 30, task_id: Optiona
             except Exception:
                 # 兜底:即使写文件失败,也要确保 MySQL 状态被更新
                 pass
-        _finish_demand_task(task_id=task_id, status=task_status, task_log=task_log_text)
+        # _finish_demand_task(task_id=task_id, status=task_status, task_log=task_log_text)
 
     return final_text
 
@@ -427,7 +440,7 @@ async def run_once(execution_id, merge_level2, count: int = 30, task_id: Optiona
 async def main(
         cluster_name: str,
         platform_type: str,
-        count,
+        count=30,
         execution_id: Optional[int] = None,
         task_id: Optional[int] = None,
 ) -> dict:
@@ -443,10 +456,12 @@ async def main(
     if not execution_id:
         return {"execution_id": None, "final_text": ""}
 
-    final_text = await run_once(execution_id, cluster_name, count=count, task_id=task_id)
+    final_text = await run_once(execution_id, cluster_name, platform_type, count=count, task_id=task_id)
     return {"execution_id": execution_id, "final_text": final_text}
 
 
 if __name__ == "__main__":
-    asyncio.run(main('贪污腐败', 'piaoquan'))
+    # asyncio.run(run_once(58, 'R_50*泛知识*生活科普', 'zengzhang'))
+    write_demand_items_to_mysql(58, 'R_50*泛知识*生活科普')
+    # asyncio.run(main('贪污腐败', 'piaoquan'))
     # write_demand_items_to_mysql(execution_id=8, merge_level2='贪污腐败')

+ 54 - 0
examples/demand/test.py

@@ -0,0 +1,54 @@
+
+from odps import ODPS
+from odps.errors import ODPSError
+
+
+def get_odps_data(sql):
+    # 配置信息
+    access_id = 'LTAI9EBa0bd5PrDa'
+    access_key = 'vAalxds7YxhfOA2yVv8GziCg3Y87v5'
+    project = 'loghubods'
+    endpoint = 'http://service.odps.aliyun.com/api'
+
+    # 1. 初始化 ODPS 入口
+    o = ODPS(access_id, access_key, project, endpoint=endpoint)
+
+    try:
+        # 2. 执行 SQL 并获取结果
+        # execute_sql 会等待任务完成,使用 open_reader 读取数据
+        with o.execute_sql(sql).open_reader() as reader:
+            # reader 类似于 Java 中的 List<Record>
+            # 我们可以直接将其转换为 Python 的 list
+            records = [record for record in reader]
+            return records
+
+    except ODPSError as e:
+        print(f"ODPS 错误: {e}")
+        return None
+
+def get_data():
+    sql = '''
+    SELECT  type
+                    ,channel
+                    ,vid
+                    ,cate1
+                    ,cate2
+                    ,title
+                    ,url1
+                    ,url2
+                    ,SUBSTR(decode_result,1,20) AS decode_result
+                    ,is_new
+                    ,extend
+                    ,dt
+            FROM    loghubods.dwd_topic_decode_result_di
+            WHERE   dt = '20260415'
+            limit 1;
+    '''
+
+    data = get_odps_data(sql)
+    if data:
+        for r in data:
+            print(r[0],r[1],r[2],r[3],r[4],r[5],r[6],r[7],r[8],r[9],r[10],r[11])
+
+if __name__ == '__main__':
+    get_data()

+ 529 - 0
examples/demand/tree_html.py

@@ -0,0 +1,529 @@
+from __future__ import annotations
+
+import html as html_lib
+import json
+import math
+from dataclasses import dataclass, field
+from pathlib import Path
+
+from sqlalchemy import text
+
+from examples.demand.db_manager import DatabaseManager
+
+db = DatabaseManager()
+
+SOURCE_TYPES = ["实质", "形式", "意图"]
+
+# JSON field → 展示维度名(扩展新维度只需在此添加)
+SCORE_DIMS: dict[str, str] = {"score": "rov"}
+
+THEME = {
+    "实质": {
+        "root_bg": "#e8841a",
+        "line": "#d4a574",
+        "low": (253, 243, 228),
+        "high": (195, 80, 5),
+        "no_score": "#f5e6d3",
+    },
+    "形式": {
+        "root_bg": "#5b9bd5",
+        "line": "#8cb9dc",
+        "low": (232, 244, 253),
+        "high": (30, 90, 165),
+        "no_score": "#d6e9f8",
+    },
+    "意图": {
+        "root_bg": "#70ad47",
+        "line": "#8fc270",
+        "low": (235, 249, 225),
+        "high": (40, 120, 20),
+        "no_score": "#dbefd0",
+    },
+}
+
+
+# ---------------------------------------------------------------------------
+# Color helpers (Python 侧用于首次渲染;JS 侧有等价实现用于切换)
+# ---------------------------------------------------------------------------
+
+def _luminance(r: int, g: int, b: int) -> float:
+    return 0.299 * r / 255 + 0.587 * g / 255 + 0.114 * b / 255
+
+
+def _blend(low: tuple, high: tuple, t: float) -> tuple[int, int, int]:
+    t = max(0.0, min(1.0, t))
+    return tuple(int(lo + (hi - lo) * t) for lo, hi in zip(low, high))
+
+
+def _rgb_hex(rgb: tuple) -> str:
+    return f"#{rgb[0]:02x}{rgb[1]:02x}{rgb[2]:02x}"
+
+
+def _normalize_score(score: float, max_score: float) -> float:
+    if max_score <= 0 or score <= 0:
+        return 0.0
+    return math.log1p(score) / math.log1p(max_score)
+
+
+# ---------------------------------------------------------------------------
+# Data model
+# ---------------------------------------------------------------------------
+
+@dataclass
+class CatNode:
+    id: int
+    name: str
+    source_stable_id: int | None
+    source_type: str
+    description: str | None
+    level: int | None
+    parent_id: int | None
+    element_count: int
+    children: list[CatNode] = field(default_factory=list)
+
+    @property
+    def subtree_element_count(self) -> int:
+        total = self.element_count or 0
+        for c in self.children:
+            total += c.subtree_element_count
+        return total
+
+
+# ---------------------------------------------------------------------------
+# Data loading
+# ---------------------------------------------------------------------------
+
+def fetch_categories(eid: int) -> list[dict]:
+    session = db.get_session()
+    try:
+        rows = session.execute(
+            text(
+                "SELECT id, source_stable_id, source_type, name, description, "
+                "level, parent_id, element_count "
+                "FROM topic_pattern_category WHERE execution_id = :eid "
+                "ORDER BY source_type, level, id"
+            ),
+            {"eid": eid},
+        ).mappings().fetchall()
+        return [dict(r) for r in rows]
+    finally:
+        session.close()
+
+
+def load_scores(eid: int) -> tuple[dict[str, dict[str, dict]], list[str]]:
+    """读取 data/{eid}/*_分类.json,返回 (scores_data, dim_names)。
+
+    scores_data: {source_type: {category_path: {dim_name: float, post_ids_count: int}}}
+    """
+    data_dir = Path(__file__).resolve().parent / "data" / str(eid)
+    result: dict[str, dict[str, dict]] = {}
+    dims_found: set[str] = set()
+    for st in SOURCE_TYPES:
+        fpath = data_dir / f"{st}_分类.json"
+        if not fpath.exists():
+            continue
+        with open(fpath, encoding="utf-8") as f:
+            items = json.load(f)
+        lookup: dict[str, dict] = {}
+        for item in items:
+            cp = item.get("category_path", "")
+            if not cp:
+                continue
+            entry: dict = {"post_ids_count": item.get("post_ids_count", 0)}
+            for json_key, dim_name in SCORE_DIMS.items():
+                if json_key in item:
+                    entry[dim_name] = item[json_key]
+                    dims_found.add(dim_name)
+            lookup[cp] = entry
+        result[st] = lookup
+    return result, sorted(dims_found)
+
+
+def build_trees(categories: list[dict]) -> dict[str, list[CatNode]]:
+    nodes: dict[int, CatNode] = {}
+    for c in categories:
+        n = CatNode(
+            id=c["id"],
+            name=c["name"],
+            source_stable_id=c.get("source_stable_id"),
+            source_type=c["source_type"],
+            description=c.get("description"),
+            level=c.get("level"),
+            parent_id=c.get("parent_id"),
+            element_count=c.get("element_count") or 0,
+        )
+        nodes[n.id] = n
+
+    roots: dict[str, list[CatNode]] = {}
+    for n in nodes.values():
+        if n.parent_id and n.parent_id in nodes:
+            nodes[n.parent_id].children.append(n)
+        else:
+            roots.setdefault(n.source_type, []).append(n)
+
+    for n in nodes.values():
+        n.children.sort(key=lambda x: x.id)
+
+    return roots
+
+
+# ---------------------------------------------------------------------------
+# HTML rendering
+# ---------------------------------------------------------------------------
+
+def _node_html(
+    n: CatNode,
+    th: dict,
+    depth: int = 0,
+    parent_path: str = "",
+    scores: dict[str, dict] | None = None,
+    max_score: float = 1.0,
+    default_dim: str = "rov",
+) -> str:
+    has_ch = bool(n.children)
+    ec = n.element_count or 0
+    cc = len(n.children)
+
+    cur_path = n.name if not parent_path else f"{parent_path}>{n.name}"
+    info = scores.get(cur_path) if scores else None
+    score_val = info.get(default_dim) if info else None
+    pcount = info.get("post_ids_count") if info else None
+
+    if score_val is not None:
+        t = _normalize_score(score_val, max_score) if score_val > 0 else 0.0
+        rgb = _blend(th["low"], th["high"], t)
+        bg = _rgb_hex(rgb)
+        lum = _luminance(*rgb)
+        tc = "#fff" if lum < 0.55 else "#4a3520"
+    else:
+        bg = th["no_score"]
+        tc = "#6b5240"
+
+    parts: list[str] = []
+    if has_ch:
+        parts.append('<span class="ti">\u25BC</span>')
+    parts.append(f'<span class="nn">{html_lib.escape(n.name)}</span>')
+    if n.source_stable_id is not None:
+        parts.append(f'<span class="si">{n.source_stable_id}</span>')
+    # .sc 始终存在(JS 切换维度时需要更新),无分数时隐藏
+    if score_val is not None:
+        parts.append(f'<span class="sc">{score_val:.2f}</span>')
+    else:
+        parts.append('<span class="sc" style="display:none"></span>')
+    if ec:
+        parts.append(f'<span class="ec">{ec}</span>')
+    if pcount is not None:
+        parts.append(f'<span class="pc">{pcount}p</span>')
+    if cc:
+        parts.append(f'<span class="cc">{cc}\u25B6</span>')
+
+    onclick = ' onclick="tog(this)"' if has_ch else ""
+    cls = "b e" if has_ch else "b"
+    title_attr = f' title="{html_lib.escape(n.description)}"' if n.description else ""
+    esc = html_lib.escape
+    data_attr = f' data-path="{esc(cur_path)}" data-st="{esc(n.source_type)}"'
+
+    h = f'<div class="t" data-depth="{depth}">'
+    h += f'<div class="{cls}"{data_attr} style="background:{bg};color:{tc}"{onclick}{title_attr}>'
+    h += "".join(parts)
+    h += "</div>"
+
+    if has_ch:
+        h += f'<div class="ch" style="--lc:{th["line"]}">'
+        for child in n.children:
+            h += _node_html(child, th, depth + 1, cur_path, scores, max_score, default_dim)
+        h += "</div>"
+
+    h += "</div>"
+    return h
+
+
+def _section_html(
+    source_type: str,
+    roots: list[CatNode],
+    scores: dict[str, dict] | None,
+    max_score: float,
+    default_dim: str = "rov",
+) -> str:
+    th = THEME.get(source_type, THEME["实质"])
+    total = sum(r.subtree_element_count for r in roots)
+
+    h = '<div class="sec">'
+    h += (
+        f'<div class="sh" style="background:{th["root_bg"]}">'
+        f"\u25BC {html_lib.escape(source_type)} ({total})</div>"
+    )
+    h += '<div class="sb">'
+    for r in roots:
+        h += _node_html(r, th, depth=0, parent_path="", scores=scores,
+                        max_score=max_score, default_dim=default_dim)
+    h += "</div></div>"
+    return h
+
+
+def generate_tree_html(eid: int) -> str:
+    categories = fetch_categories(eid)
+    trees = build_trees(categories)
+    all_scores, dims = load_scores(eid)
+
+    # 每个维度、每个 source_type 的最大分数(用于归一化)
+    max_scores: dict[str, dict[str, float]] = {}
+    for dim in dims:
+        max_scores[dim] = {}
+        for st in SOURCE_TYPES:
+            st_scores = all_scores.get(st, {})
+            mx = max((v.get(dim, 0) for v in st_scores.values()), default=0.0)
+            max_scores[dim][st] = mx
+
+    default_dim = dims[0] if dims else "rov"
+
+    sections: list[str] = []
+    for st in SOURCE_TYPES:
+        if st not in trees:
+            continue
+        st_scores = all_scores.get(st, {})
+        mx = max_scores.get(default_dim, {}).get(st, 1.0) or 1.0
+        sections.append(_section_html(st, trees[st], st_scores, mx, default_dim))
+
+    body = "\n".join(sections)
+
+    # 维度按钮
+    dim_btns = ""
+    for i, d in enumerate(dims):
+        active = " active" if i == 0 else ""
+        dim_btns += (
+            f'<button class="dim-btn{active}" data-dim="{html_lib.escape(d)}" '
+            f"onclick=\"switchDim('{d}')\">{html_lib.escape(d)}</button>"
+        )
+    if not dim_btns:
+        dim_btns = '<span style="color:#999;font-size:12px">无得分数据</span>'
+
+    def _safe_json(obj: object) -> str:
+        return json.dumps(obj, ensure_ascii=False).replace("</", "<\\/")
+
+    return (
+        _PAGE_HTML
+        .replace("{{EID}}", str(eid))
+        .replace("{{BODY}}", body)
+        .replace("{{DIM_BTNS}}", dim_btns)
+        .replace("{{SCORES}}", _safe_json(all_scores))
+        .replace("{{DIMS}}", _safe_json(dims))
+        .replace("{{MAX}}", _safe_json(max_scores))
+    )
+
+
+# ---------------------------------------------------------------------------
+# Full-page HTML template
+# ---------------------------------------------------------------------------
+
+_PAGE_HTML = r"""<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>分类树 · execution_id={{EID}}</title>
+<style>
+*{box-sizing:border-box;margin:0;padding:0}
+body{
+  font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Noto Sans SC",sans-serif;
+  background:#faf8f5;color:#333;padding:24px;
+}
+
+/* ===== header ===== */
+.hdr{
+  display:flex;align-items:center;gap:10px;
+  margin-bottom:28px;flex-wrap:wrap;
+}
+.hdr h1{font-size:20px;font-weight:700;color:#4a3520}
+.hdr .eid{color:#999;font-size:13px}
+.hdr button{
+  padding:5px 12px;border:1px solid #d0c8c0;border-radius:6px;
+  background:#fff;cursor:pointer;font-size:12px;color:#4a3520;
+}
+.hdr button:hover{background:#f5ebe0}
+
+/* dimension selector — 右侧 */
+.dim-sel{
+  margin-left:auto;
+  display:flex;align-items:center;gap:6px;
+  background:#fff;border:1px solid #d0c8c0;border-radius:8px;
+  padding:4px 10px;
+}
+.dim-label{font-size:12px;color:#888;white-space:nowrap}
+.dim-btn{
+  padding:4px 14px;border:1px solid #d0c8c0;border-radius:5px;
+  background:#fff;cursor:pointer;font-size:12px;color:#4a3520;
+  transition:all .15s;
+}
+.dim-btn:hover{background:#f5ebe0}
+.dim-btn.active{
+  background:#4a3520;color:#fff;border-color:#4a3520;
+}
+
+/* ===== section ===== */
+.sec{margin-bottom:36px}
+.sh{
+  display:inline-block;padding:8px 18px;border-radius:8px;color:#fff;
+  font-size:15px;font-weight:700;margin-bottom:14px;
+}
+.sb{overflow-x:auto;padding:12px 0 12px 4px}
+
+/* ===== horizontal tree ===== */
+.t{display:inline-flex;align-items:center}
+
+.b{
+  display:inline-flex;align-items:center;gap:5px;
+  padding:4px 10px;border-radius:5px;white-space:nowrap;font-size:13px;
+  box-shadow:0 1px 2px rgba(0,0,0,.08);user-select:none;
+  border:1px solid rgba(0,0,0,.06);
+  transition:background .2s,color .2s;
+}
+.b.e{cursor:pointer}
+.b.e:hover{filter:brightness(1.06);box-shadow:0 2px 6px rgba(0,0,0,.14)}
+.ti{font-size:10px;transition:transform .15s ease;display:inline-block}
+.nn{font-weight:600}
+.si{font-size:11px;opacity:.5}
+.sc{
+  font-size:11px;font-weight:700;
+  background:rgba(255,255,255,.5);padding:0 5px;border-radius:3px;
+  line-height:1.6;letter-spacing:.02em;
+}
+.ec,.cc,.pc{
+  font-size:11px;background:rgba(255,255,255,.35);
+  padding:0 4px;border-radius:3px;line-height:1.6;
+}
+.ec{font-weight:600}
+.pc{font-style:italic;opacity:.7}
+
+/* children container & connector lines */
+.ch{
+  display:flex;flex-direction:column;position:relative;
+  padding-left:28px;margin-left:10px;
+}
+.ch::before{
+  content:'';position:absolute;left:0;top:50%;width:14px;
+  border-top:1.5px solid var(--lc);
+}
+.ch>.t{position:relative;padding:3px 0}
+.ch>.t::before{
+  content:'';position:absolute;left:-14px;top:0;bottom:0;
+  border-left:1.5px solid var(--lc);
+}
+.ch>.t:first-child::before{top:50%}
+.ch>.t:last-child::before{bottom:50%}
+.ch>.t:only-child::before{display:none}
+.ch>.t::after{
+  content:'';position:absolute;left:-14px;top:50%;width:14px;
+  border-top:1.5px solid var(--lc);
+}
+.ch>.t:only-child::after{left:-28px;width:28px}
+
+.t.collapsed>.ch{display:none}
+.t.collapsed>.b>.ti{transform:rotate(-90deg)}
+
+/* legend */
+.legend{
+  display:inline-flex;align-items:center;gap:6px;
+  font-size:12px;color:#888;
+}
+.legend-bar{
+  width:100px;height:12px;border-radius:3px;border:1px solid rgba(0,0,0,.1);
+}
+</style>
+</head>
+<body>
+<div class="hdr">
+  <h1>选题模式分类树</h1>
+  <span class="eid">execution_id = {{EID}}</span>
+  <button onclick="ea()">全部展开</button>
+  <button onclick="ca()">全部收起</button>
+  <button onclick="lv(1)">展开1层</button>
+  <button onclick="lv(2)">展开2层</button>
+  <button onclick="lv(3)">展开3层</button>
+  <button onclick="lv(4)">展开4层</button>
+  <span class="legend">
+    <span>低分</span>
+    <span class="legend-bar" style="background:linear-gradient(to right,#fdf3e4,#c35005)"></span>
+    <span>高分</span>
+  </span>
+  <div class="dim-sel">
+    <span class="dim-label">得分维度</span>
+    {{DIM_BTNS}}
+  </div>
+</div>
+{{BODY}}
+<script>
+/* ===== 嵌入的分数数据 ===== */
+var S={{SCORES}};
+var DIMS={{DIMS}};
+var MX={{MAX}};
+var TH={
+  "\u5b9e\u8d28":{low:[253,243,228],high:[195,80,5],ns:"#f5e6d3"},
+  "\u5f62\u5f0f":{low:[232,244,253],high:[30,90,165],ns:"#d6e9f8"},
+  "\u610f\u56fe":{low:[235,249,225],high:[40,120,20],ns:"#dbefd0"}
+};
+var curDim=DIMS[0]||'rov';
+
+/* ===== 颜色计算(与 Python 侧等价) ===== */
+function norm(s,m){return(m<=0||s<=0)?0:Math.log1p(s)/Math.log1p(m)}
+function blend(lo,hi,t){
+  t=Math.max(0,Math.min(1,t));
+  return lo.map(function(l,i){return Math.round(l+(hi[i]-l)*t)});
+}
+function lum(r,g,b){return .299*r/255+.587*g/255+.114*b/255}
+function rgbHex(c){return'#'+c.map(function(v){return('0'+v.toString(16)).slice(-2)}).join('')}
+
+/* ===== 切换得分维度 ===== */
+function switchDim(dim){
+  curDim=dim;
+  document.querySelectorAll('.dim-btn').forEach(function(b){
+    b.classList.toggle('active',b.dataset.dim===dim);
+  });
+  document.querySelectorAll('.b[data-path]').forEach(function(b){
+    var st=b.dataset.st, path=b.dataset.path;
+    var info=(S[st]||{})[path];
+    var sc=(info!=null)?info[dim]:null;
+    var scEl=b.querySelector('.sc');
+    var th=TH[st];
+    if(!th)return;
+    if(sc!=null){
+      var mx=((MX[dim]||{})[st])||1;
+      var t=(sc>0)?norm(sc,mx):0;
+      var rgb=blend(th.low,th.high,t);
+      b.style.background=rgbHex(rgb);
+      var l=lum(rgb[0],rgb[1],rgb[2]);
+      b.style.color=l<.55?'#fff':'#4a3520';
+      if(scEl){scEl.textContent=sc.toFixed(2);scEl.style.display=''}
+    }else{
+      b.style.background=th.ns;
+      b.style.color='#6b5240';
+      if(scEl)scEl.style.display='none';
+    }
+  });
+}
+
+/* ===== 树操作 ===== */
+function tog(el){el.closest('.t').classList.toggle('collapsed')}
+function ea(){document.querySelectorAll('.t.collapsed').forEach(function(e){e.classList.remove('collapsed')})}
+function ca(){document.querySelectorAll('.t').forEach(function(e){if(e.querySelector(':scope>.ch'))e.classList.add('collapsed')})}
+function lv(n){document.querySelectorAll('.t').forEach(function(e){
+  var ch=e.querySelector(':scope>.ch');if(!ch)return;
+  var d=+(e.dataset.depth||0);
+  if(d<n)e.classList.remove('collapsed');else e.classList.add('collapsed');
+})}
+</script>
+</body>
+</html>
+"""
+
+# ---------------------------------------------------------------------------
+# Entry
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    execution_id = 58
+    html_content = generate_tree_html(execution_id)
+    out = Path(__file__).resolve().parent / "new_result" / f"topic_pattern_tree_{execution_id}.html"
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text(html_content, encoding="utf-8")
+    print(f"已生成: {out}")

+ 279 - 0
examples/demand/tree_html_back.py

@@ -0,0 +1,279 @@
+from __future__ import annotations
+
+import html as html_lib
+from dataclasses import dataclass, field
+from pathlib import Path
+
+from sqlalchemy import text
+
+from examples.demand.db_manager import DatabaseManager
+
+db = DatabaseManager()
+
+SOURCE_TYPES = ["实质", "形式", "意图"]
+
+THEME = {
+    "实质": {"root": "#e8841a", "mid": "#f0c389", "leaf": "#fae6cc", "line": "#d4a574"},
+    "形式": {"root": "#5b9bd5", "mid": "#a3c9e8", "leaf": "#daeaf8", "line": "#8cb9dc"},
+    "意图": {"root": "#70ad47", "mid": "#a9d18e", "leaf": "#dbefd0", "line": "#8fc270"},
+}
+
+
+@dataclass
+class CatNode:
+    id: int
+    name: str
+    source_stable_id: int | None
+    source_type: str
+    description: str | None
+    level: int | None
+    parent_id: int | None
+    element_count: int
+    children: list[CatNode] = field(default_factory=list)
+
+    @property
+    def subtree_element_count(self) -> int:
+        total = self.element_count or 0
+        for c in self.children:
+            total += c.subtree_element_count
+        return total
+
+
+def fetch_categories(eid: int) -> list[dict]:
+    session = db.get_session()
+    try:
+        rows = session.execute(
+            text(
+                "SELECT id, source_stable_id, source_type, name, description, "
+                "level, parent_id, element_count "
+                "FROM topic_pattern_category WHERE execution_id = :eid "
+                "ORDER BY source_type, level, id"
+            ),
+            {"eid": eid},
+        ).mappings().fetchall()
+        return [dict(r) for r in rows]
+    finally:
+        session.close()
+
+
+def build_trees(categories: list[dict]) -> dict[str, list[CatNode]]:
+    nodes: dict[int, CatNode] = {}
+    for c in categories:
+        n = CatNode(
+            id=c["id"],
+            name=c["name"],
+            source_stable_id=c.get("source_stable_id"),
+            source_type=c["source_type"],
+            description=c.get("description"),
+            level=c.get("level"),
+            parent_id=c.get("parent_id"),
+            element_count=c.get("element_count") or 0,
+        )
+        nodes[n.id] = n
+
+    roots: dict[str, list[CatNode]] = {}
+    for n in nodes.values():
+        if n.parent_id and n.parent_id in nodes:
+            nodes[n.parent_id].children.append(n)
+        else:
+            roots.setdefault(n.source_type, []).append(n)
+
+    for n in nodes.values():
+        n.children.sort(key=lambda x: x.id)
+
+    return roots
+
+
+# ---------------------------------------------------------------------------
+# HTML rendering
+# ---------------------------------------------------------------------------
+
+def _node_html(n: CatNode, th: dict, depth: int = 0, is_root: bool = False) -> str:
+    has_ch = bool(n.children)
+    ec = n.element_count or 0
+    cc = len(n.children)
+
+    if is_root:
+        bg, tc = th["root"], "#fff"
+    elif has_ch:
+        bg, tc = th["mid"], "#4a3520"
+    else:
+        bg, tc = th["leaf"], "#6b5240"
+
+    badge_inner: list[str] = []
+    if has_ch:
+        badge_inner.append('<span class="ti">\u25BC</span>')
+    badge_inner.append(f'<span class="nn">{html_lib.escape(n.name)}</span>')
+    if n.source_stable_id is not None:
+        badge_inner.append(f'<span class="si">{n.source_stable_id}</span>')
+    if ec:
+        badge_inner.append(f'<span class="ec">{ec}</span>')
+    if cc:
+        badge_inner.append(f'<span class="cc">{cc}\u25B6</span>')
+
+    onclick = ' onclick="tog(this)"' if has_ch else ''
+    cls = "b e" if has_ch else "b"
+    title = f' title="{html_lib.escape(n.description)}"' if n.description else ""
+
+    h = f'<div class="t" data-depth="{depth}">'
+    h += f'<div class="{cls}" style="background:{bg};color:{tc}"{onclick}{title}>'
+    h += "".join(badge_inner)
+    h += "</div>"
+
+    if has_ch:
+        h += f'<div class="ch" style="--lc:{th["line"]}">'
+        for child in n.children:
+            h += _node_html(child, th, depth + 1)
+        h += "</div>"
+
+    h += "</div>"
+    return h
+
+
+def _section_html(source_type: str, roots: list[CatNode]) -> str:
+    th = THEME.get(source_type, THEME["实质"])
+    total = sum(r.subtree_element_count for r in roots)
+
+    h = '<div class="sec">'
+    h += (
+        f'<div class="sh" style="background:{th["root"]}">'
+        f"\u25BC {html_lib.escape(source_type)} ({total})</div>"
+    )
+    h += '<div class="sb">'
+    for r in roots:
+        h += _node_html(r, th, depth=0, is_root=True)
+    h += "</div></div>"
+    return h
+
+
+def generate_tree_html(eid: int) -> str:
+    categories = fetch_categories(eid)
+    trees = build_trees(categories)
+    sections = [_section_html(st, trees[st]) for st in SOURCE_TYPES if st in trees]
+    body = "\n".join(sections)
+    return _PAGE_HTML.replace("{{EID}}", str(eid)).replace("{{BODY}}", body)
+
+
+# ---------------------------------------------------------------------------
+# Full-page HTML template
+# ---------------------------------------------------------------------------
+
+_PAGE_HTML = r"""<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width,initial-scale=1">
+<title>分类树 · execution_id={{EID}}</title>
+<style>
+*{box-sizing:border-box;margin:0;padding:0}
+body{
+  font-family:-apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,"Noto Sans SC",sans-serif;
+  background:#faf8f5;color:#333;padding:24px;
+}
+/* header */
+.hdr{display:flex;align-items:center;gap:10px;margin-bottom:28px;flex-wrap:wrap}
+.hdr h1{font-size:20px;font-weight:700;color:#4a3520}
+.hdr .eid{color:#999;font-size:13px}
+.hdr button{
+  padding:5px 12px;border:1px solid #d0c8c0;border-radius:6px;
+  background:#fff;cursor:pointer;font-size:12px;color:#4a3520;
+}
+.hdr button:hover{background:#f5ebe0}
+
+/* section */
+.sec{margin-bottom:36px}
+.sh{
+  display:inline-block;padding:8px 18px;border-radius:8px;color:#fff;
+  font-size:15px;font-weight:700;margin-bottom:14px;
+}
+.sb{overflow-x:auto;padding:12px 0 12px 4px}
+
+/* ---- horizontal tree ---- */
+.t{display:inline-flex;align-items:center}
+
+/* badge */
+.b{
+  display:inline-flex;align-items:center;gap:5px;
+  padding:4px 10px;border-radius:5px;white-space:nowrap;font-size:13px;
+  box-shadow:0 1px 2px rgba(0,0,0,.08);user-select:none;
+}
+.b.e{cursor:pointer}
+.b.e:hover{filter:brightness(1.06);box-shadow:0 2px 5px rgba(0,0,0,.12)}
+.ti{font-size:10px;transition:transform .15s ease;display:inline-block}
+.nn{font-weight:600}
+.si{font-size:11px;opacity:.55}
+.ec,.cc{
+  font-size:11px;background:rgba(255,255,255,.45);
+  padding:0 4px;border-radius:3px;line-height:1.6;
+}
+.ec{font-weight:600}
+
+/* children container */
+.ch{
+  display:flex;flex-direction:column;position:relative;
+  padding-left:28px;margin-left:10px;
+}
+/* horizontal line from parent to junction */
+.ch::before{
+  content:'';position:absolute;left:0;top:50%;width:14px;
+  border-top:1.5px solid var(--lc);
+}
+/* each child row */
+.ch>.t{position:relative;padding:3px 0}
+/* vertical line between siblings */
+.ch>.t::before{
+  content:'';position:absolute;left:-14px;top:0;bottom:0;
+  border-left:1.5px solid var(--lc);
+}
+.ch>.t:first-child::before{top:50%}
+.ch>.t:last-child::before{bottom:50%}
+.ch>.t:only-child::before{display:none}
+/* horizontal connector to child */
+.ch>.t::after{
+  content:'';position:absolute;left:-14px;top:50%;width:14px;
+  border-top:1.5px solid var(--lc);
+}
+.ch>.t:only-child::after{left:-28px;width:28px}
+
+/* collapsed state */
+.t.collapsed>.ch{display:none}
+.t.collapsed>.b>.ti{transform:rotate(-90deg)}
+</style>
+</head>
+<body>
+<div class="hdr">
+  <h1>选题模式分类树</h1>
+  <span class="eid">execution_id = {{EID}}</span>
+  <button onclick="ea()">全部展开</button>
+  <button onclick="ca()">全部收起</button>
+  <button onclick="lv(1)">展开1层</button>
+  <button onclick="lv(2)">展开2层</button>
+  <button onclick="lv(3)">展开3层</button>
+  <button onclick="lv(4)">展开4层</button>
+</div>
+{{BODY}}
+<script>
+function tog(el){el.closest('.t').classList.toggle('collapsed')}
+function ea(){document.querySelectorAll('.t.collapsed').forEach(function(e){e.classList.remove('collapsed')})}
+function ca(){document.querySelectorAll('.t').forEach(function(e){if(e.querySelector(':scope>.ch'))e.classList.add('collapsed')})}
+function lv(n){document.querySelectorAll('.t').forEach(function(e){
+  var ch=e.querySelector(':scope>.ch');if(!ch)return;
+  var d=+(e.dataset.depth||0);
+  if(d<n)e.classList.remove('collapsed');else e.classList.add('collapsed');
+})}
+</script>
+</body>
+</html>
+"""
+
+# ---------------------------------------------------------------------------
+# Entry
+# ---------------------------------------------------------------------------
+
+if __name__ == "__main__":
+    execution_id = 58
+    html_content = generate_tree_html(execution_id)
+    out = Path(__file__).resolve().parent / "new_result" / f"topic_pattern_tree_{execution_id}.html"
+    out.parent.mkdir(parents=True, exist_ok=True)
+    out.write_text(html_content, encoding="utf-8")
+    print(f"已生成: {out}")

+ 137 - 0
examples/demand/tree_service.py

@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+from collections.abc import Iterable
+from pathlib import Path
+
+from sqlalchemy import text
+
+from examples.demand.db_manager import DatabaseManager
+
+db = DatabaseManager()
+
+
+def _fmt_node(name: str, level: int | None) -> str:
+    if level is None:
+        return f"{name}[L?]"
+    return f"{name}[L{int(level)}]"
+
+
+def _fetch_roots_by_name(session, execution_id: int, name: str):
+    return session.execute(
+        text(
+            """
+            SELECT id, name, level
+            FROM topic_pattern_category
+            WHERE execution_id = :execution_id AND name = :name
+            ORDER BY id
+            """
+        ),
+        {"execution_id": execution_id, "name": name},
+    ).fetchall()
+
+
+def _fetch_children_by_parent(session, execution_id: int, parent_id: int):
+    return session.execute(
+        text(
+            """
+            SELECT id, name, level
+            FROM topic_pattern_category
+            WHERE execution_id = :execution_id AND parent_id = :parent_id
+            ORDER BY id
+            """
+        ),
+        {"execution_id": execution_id, "parent_id": parent_id},
+    ).fetchall()
+
+
+def _collect_subtree_lines(
+        session,
+        execution_id: int,
+        node_id: int,
+        name: str,
+        level: int | None,
+        depth: int,
+) -> list[str]:
+    indent = "  " * depth
+    lines = [f"{indent}{_fmt_node(name, level)}"]
+    for row in _fetch_children_by_parent(session, execution_id, node_id):
+        cid, cname, clevel = row[0], row[1], row[2]
+        lines.extend(
+            _collect_subtree_lines(session, execution_id, cid, cname, clevel, depth + 1)
+        )
+    return lines
+
+
+def build_category_trees_text(execution_id: int, names: Iterable[str]) -> str:
+    """
+    按 execution_id 与名称列表,在 topic_pattern_category 中从每个 name 对应行出发,
+    沿 parent_id 向下遍历整棵子树,返回多棵树拼接的文本(子节点每深一层缩进 2 空格)。
+
+    每行格式:name[L{level}](level 为表字段;若为空则显示 L?)。
+    输入列表中的每个 name 输出一块;若同名匹配多行,则该块内为多棵子树,树之间空一行。
+    某 name 无匹配时,仅输出一行 name[L?]。
+    """
+    clean_names = [str(n).strip() for n in names if n is not None and str(n).strip()]
+    if not clean_names:
+        return ""
+
+    blocks: list[str] = []
+    session = db.get_session()
+    try:
+        for name in clean_names:
+            roots = _fetch_roots_by_name(session, execution_id, name)
+            if not roots:
+                blocks.append(_fmt_node(name, None))
+                continue
+            tree_texts: list[str] = []
+            for row in roots:
+                rid, rname, rlevel = row[0], row[1], row[2]
+                tree_texts.append(
+                    "\n".join(
+                        _collect_subtree_lines(session, execution_id, rid, rname, rlevel, 0)
+                    )
+                )
+            blocks.append("\n\n".join(tree_texts))
+    finally:
+        session.close()
+
+    return "\n\n".join(blocks)
+
+
+if __name__ == "__main__":
+    execution_id = 58
+    names = ["健康养生"
+        , "公共安全"
+        , "地标景观"
+        , "防诈反骗"
+        , "身体健康"
+        , "饮食调理"
+        , "地形水域"
+        , "建筑地标"
+        , "自然地标"
+        , "医疗卫生"
+        , "综合风光"
+        , "居家生活"
+        , "办事指南"
+        , "政策法规"
+        , "自然奇景"
+        , "中医养生"
+        , "烹饪技巧"
+        , "安全防护"
+        , "皮肤护理"
+        , "地质奇观"
+        , "社会保障"
+        , "心理认知"
+        , "办事规程"
+        , "养老规划"
+        , "金融理财"
+        , "农业农村"
+        , "补贴福利"
+        , "禁毒防毒"
+        , "国家安全"
+        , "设施安全"]
+    text_out = build_category_trees_text(execution_id, names)
+    out_path = Path(__file__).resolve().parent / "new_result" / f"category_tree_{execution_id}.txt"
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    out_path.write_text(text_out, encoding="utf-8")
+    print(out_path)

+ 20 - 0
examples/demand/vids_prepare.py

@@ -0,0 +1,20 @@
+from collections import defaultdict
+
+from examples.demand.data_query_tools import (
+    get_channel_content_ids_by_merge_leve2_list,
+)
+from examples.demand.pattern_builds.pattern_service import run_category_element_snapshot
+from examples.demand.piaoquan_prepare import prepare
+
+
+def merge_leve2_list_prepare(cluster_name, merge_leve2_list):
+    # post_ids = get_channel_content_ids_by_merge_leve2_list(merge_leve2_list)
+    # execution_id = run_category_element_snapshot(post_ids=post_ids, cluster_name=cluster_name)
+    execution_id = 58
+    if execution_id:
+        prepare(merge_leve2_list, execution_id)
+        return execution_id
+
+if __name__ == '__main__':
+    execution_id = merge_leve2_list_prepare('R_50*泛知识*生活科普',['知识科普','健康知识','饮食健康','食品安全','生活技巧科普','生活小妙招','美食教程','人财诈骗','旅行攻略','本地新闻','旅行记录','本地生活'])
+    print(execution_id)

+ 5 - 5
examples/demand/weight_score_query_tools.py

@@ -14,7 +14,7 @@ from examples.demand.demand_agent_context import TopicBuildAgentContext
 from examples.demand.demand_pattern_tools import _log_tool_input, _log_tool_output
 
 _VALID_LEVELS = {"元素", "分类"}
-_VALID_DIMENSIONS = {"实质", "形式", "意图"}
+_VALID_DIMENSIONS = {"实质", "形式"}
 
 
 def _get_weight_file_path(level: str, dimension: str) -> Path:
@@ -50,13 +50,13 @@ def _validate_params(level: str, dimension: str):
         raise ValueError(f"dimension 参数非法: {dimension},可选值: {sorted(_VALID_DIMENSIONS)}")
 
 
-@tool("查询元素或分类权重分排名区间。参数:level(元素/分类)、dimension(实质/形式/意图)、start(起始排名,含,从1开始)、end(结束排名,含,从1开始)。")
+@tool("查询元素或分类权重分排名区间。参数:level(元素/分类)、dimension(实质/形式)、start(起始排名,含,从1开始)、end(结束排名,含,从1开始)。")
 def get_weight_score_topn(level: str, dimension: str, start: int = 1, end: int = 10) -> str:
     """查询元素或分类权重分排名区间。
 
     Args:
         level: 查询层级,元素 或 分类。
-        dimension: 查询维度,实质 / 形式 / 意图
+        dimension: 查询维度,实质 / 形式。
         start: 起始排名(包含),从 1 开始。
         end: 结束排名(包含),从 1 开始。
 
@@ -105,13 +105,13 @@ def get_weight_score_topn(level: str, dimension: str, start: int = 1, end: int =
         return _log_tool_output("get_weight_score_topn", f"查询失败: {e}")
 
 
-@tool("批量查询指定名称的权重分。参数:level(元素/分类)、dimension(实质/形式/意图)、names(名称列表)。")
+@tool("批量查询指定名称的权重分。参数:level(元素/分类)、dimension(实质/形式)、names(名称列表)。")
 def get_weight_score_by_name(level: str, dimension: str, names: list[str]) -> str:
     """批量查询指定名称的权重分。
 
     Args:
         level: 查询层级,元素 或 分类。
-        dimension: 查询维度,实质 / 形式 / 意图
+        dimension: 查询维度,实质 / 形式。
         names: 要查询的名称列表(元素名或分类名),顺序与返回 results 一一对应。
 
     Returns:

+ 1 - 0
requirements.txt

@@ -28,3 +28,4 @@ pyfim==6.28
 loguru>=0.7.0
 pyodps>=0.12.0
 DBUtils>=3.1.0
+psycopg2-binary>=2.9.9