SHA1
--- a/script/data_processing/build_match_graph.py
+++ b/script/data_processing/build_match_graph.py
@@ -645,11 +645,30 @@ def process_filtered_result(
 
				     useful_category_edges = [e for e in category_edges
			
 
				                             if e["源节点ID"] in useful_expanded_ids and e["目标节点ID"] in useful_expanded_ids]
			
 
				 
			
 
				+    # 5. 获取直接匹配层（第2层）和扩展层（第3层）之间的所有边（不仅仅是属于边）
			
 
				+    # 这些边连接了直接匹配的人设节点和扩展的分类节点
			
 
				+    cross_layer_edges = []
			
 
				+    for edge in edges_data.get("边列表", []):
			
 
				+        src, tgt = edge["源节点ID"], edge["目标节点ID"]
			
 
				+        edge_type = edge["边类型"]
			
 
				+        # 跳过已经收集的属于边（避免重复）
			
 
				+        if edge_type == "属于":
			
 
				+            continue
			
 
				+        # 一端在直接匹配层，另一端在扩展层
			
 
				+        src_in_direct = src in persona_node_ids
			
 
				+        src_in_expanded = src in useful_expanded_ids
			
 
				+        tgt_in_direct = tgt in persona_node_ids
			
 
				+        tgt_in_expanded = tgt in useful_expanded_ids
			
 
				+        if (src_in_direct and tgt_in_expanded) or (src_in_expanded and tgt_in_direct):
			
 
				+            cross_layer_edges.append(edge)
			
 
				+
			
 
				     # 合并节点列表
			
 
				     all_nodes = post_nodes + persona_nodes + useful_expanded_nodes
			
 
				 
			
 
				     # 合并边列表（加入帖子内的属于边）
			
 
				-    all_edges = post_belong_edges + match_edges + persona_edges + post_edges + useful_expanded_edges + useful_category_edges + post_edges_via_expanded
			
 
				+    all_edges = (post_belong_edges + match_edges + persona_edges + post_edges +
			
 
				+                 useful_expanded_edges + useful_category_edges + cross_layer_edges +
			
 
				+                 post_edges_via_expanded)
			
 
				     # 去重边
			
 
				     seen_edges = set()
			
 
				     unique_edges = []
			
@@ -709,6 +728,7 @@ def process_filtered_result(
 
				                 "匹配边数": len(match_edges),
			
 
				                 "人设节点间边数": len(persona_edges),
			
 
				                 "扩展边数（有效）": len(useful_expanded_edges),
			
 
				+                "跨层边数": len(cross_layer_edges),
			
 
				                 "帖子镜像边数（直接）": len(post_edges),
			
 
				                 "帖子镜像边数（二阶）": len(post_edges_via_expanded),
			
 
				                 "总节点数": len(all_nodes),
			
@@ -724,6 +744,7 @@ def process_filtered_result(
 
				         "匹配边列表": match_edges,
			
 
				         "人设节点间边列表": persona_edges,
			
 
				         "扩展边列表": useful_expanded_edges,
			
 
				+        "跨层边列表": cross_layer_edges,
			
 
				         "帖子镜像边列表（直接）": post_edges,
			
 
				         "帖子镜像边列表（二阶）": post_edges_via_expanded,
			
 
				         "节点列表": all_nodes,
			
@@ -748,6 +769,7 @@ def process_filtered_result(
 
				         "匹配边数": len(match_edges),
			
 
				         "人设边数": len(persona_edges),
			
 
				         "扩展边数": len(useful_expanded_edges),
			
 
				+        "跨层边数": len(cross_layer_edges),
			
 
				         "帖子边数（直接）": len(post_edges),
			
 
				         "帖子边数（二阶）": len(post_edges_via_expanded),
			
 
				         "总节点数": len(all_nodes),
			
@@ -805,7 +827,7 @@ def main():
 
				         result = process_filtered_result(filtered_file, nodes_data, edges_data, output_dir)
			
 
				         results.append(result)
			
 
				         print(f"  帖子节点: {result['帖子节点数']}, 人设节点: {result['人设节点数']}, 扩展节点: {result['扩展节点数']}")
			
 
				-        print(f"  匹配边: {result['匹配边数']}, 人设边: {result['人设边数']}, 扩展边: {result['扩展边数']}")
			
 
				+        print(f"  匹配边: {result['匹配边数']}, 人设边: {result['人设边数']}, 扩展边: {result['扩展边数']}, 跨层边: {result['跨层边数']}")
			
 
				         print(f"  帖子边(直接): {result['帖子边数（直接）']}, 帖子边(二阶): {result['帖子边数（二阶）']}")
			
 
				 
			
 
				     # 汇总统计
			
@@ -819,6 +841,7 @@ def main():
 
				     total_match = sum(r['匹配边数'] for r in results)
			
 
				     total_persona_edges = sum(r['人设边数'] for r in results)
			
 
				     total_expanded_edges = sum(r['扩展边数'] for r in results)
			
 
				+    total_cross_layer_edges = sum(r['跨层边数'] for r in results)
			
 
				     total_post_edges_direct = sum(r['帖子边数（直接）'] for r in results)
			
 
				     total_post_edges_2hop = sum(r['帖子边数（二阶）'] for r in results)
			
 
				     print(f"  总帖子节点: {total_post}")
			
@@ -827,6 +850,7 @@ def main():
 
				     print(f"  总匹配边: {total_match}")
			
 
				     print(f"  总人设边: {total_persona_edges}")
			
 
				     print(f"  总扩展边: {total_expanded_edges}")
			
 
				+    print(f"  总跨层边: {total_cross_layer_edges}")
			
 
				     print(f"  总帖子边(直接): {total_post_edges_direct}")
			
 
				     print(f"  总帖子边(二阶): {total_post_edges_2hop}")
			
 
				     print(f"\n输出目录: {output_dir}")
			
--- a/script/data_processing/build_post_tree.py
+++ b/script/data_processing/build_post_tree.py
@@ -0,0 +1,185 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+构建帖子树的中间数据
			
 
				+
			
 
				+输入：match_graph/*.json, results/*.json
			
 
				+输出：match_graph/post_trees.json（包含所有帖子的树结构）
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+import sys
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				+
			
 
				+
			
 
				+def build_post_trees():
			
 
				+    """构建所有帖子的树数据"""
			
 
				+    config = PathConfig()
			
 
				+
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"输出版本: {config.output_version}")
			
 
				+    print()
			
 
				+
			
 
				+    match_graph_dir = config.intermediate_dir / "match_graph"
			
 
				+    results_dir = config.intermediate_dir.parent / "results"
			
 
				+    output_file = match_graph_dir / "post_trees.json"
			
 
				+
			
 
				+    # 读取所有匹配图谱文件
			
 
				+    graph_files = sorted(match_graph_dir.glob("*_match_graph.json"))
			
 
				+    print(f"找到 {len(graph_files)} 个匹配图谱文件")
			
 
				+
			
 
				+    all_post_trees = []
			
 
				+
			
 
				+    for i, graph_file in enumerate(graph_files, 1):
			
 
				+        print(f"\n[{i}/{len(graph_files)}] 处理: {graph_file.name}")
			
 
				+
			
 
				+        with open(graph_file, "r", encoding="utf-8") as f:
			
 
				+            match_graph_data = json.load(f)
			
 
				+
			
 
				+        post_id = match_graph_data["说明"]["帖子ID"]
			
 
				+        post_title = match_graph_data["说明"].get("帖子标题", "")
			
 
				+
			
 
				+        # 读取完整帖子详情
			
 
				+        post_detail = {
			
 
				+            "title": post_title,
			
 
				+            "post_id": post_id
			
 
				+        }
			
 
				+        how_file = results_dir / f"{post_id}_how.json"
			
 
				+        if how_file.exists():
			
 
				+            with open(how_file, "r", encoding="utf-8") as f:
			
 
				+                how_data = json.load(f)
			
 
				+                if "帖子详情" in how_data:
			
 
				+                    post_detail = how_data["帖子详情"]
			
 
				+                    post_detail["post_id"] = post_id
			
 
				+            print(f"  读取帖子详情: {how_file.name}")
			
 
				+
			
 
				+        # 获取帖子点和帖子标签
			
 
				+        post_points = match_graph_data.get("帖子点节点列表", [])
			
 
				+        post_tags = match_graph_data.get("帖子标签节点列表", [])
			
 
				+        belong_edges = match_graph_data.get("帖子属于边列表", [])
			
 
				+
			
 
				+        print(f"  帖子点: {len(post_points)}, 帖子标签: {len(post_tags)}, 属于边: {len(belong_edges)}")
			
 
				+
			
 
				+        # 构建树结构
			
 
				+        # 维度颜色
			
 
				+        dim_colors = {
			
 
				+            "灵感点": "#f39c12",
			
 
				+            "目的点": "#3498db",
			
 
				+            "关键点": "#9b59b6"
			
 
				+        }
			
 
				+
			
 
				+        # 构建节点映射
			
 
				+        point_map = {}
			
 
				+        for n in post_points:
			
 
				+            point_map[n["节点ID"]] = {
			
 
				+                "id": n["节点ID"],
			
 
				+                "name": n["节点名称"],
			
 
				+                "nodeType": "点",
			
 
				+                "level": n.get("节点层级", ""),
			
 
				+                "dimColor": dim_colors.get(n.get("节点层级", ""), "#888"),
			
 
				+                "description": n.get("描述", ""),
			
 
				+                "children": []
			
 
				+            }
			
 
				+
			
 
				+        tag_map = {}
			
 
				+        for n in post_tags:
			
 
				+            tag_map[n["节点ID"]] = {
			
 
				+                "id": n["节点ID"],
			
 
				+                "name": n["节点名称"],
			
 
				+                "nodeType": "标签",
			
 
				+                "level": n.get("节点层级", ""),
			
 
				+                "dimColor": dim_colors.get(n.get("节点层级", ""), "#888"),
			
 
				+                "weight": n.get("权重", 0),
			
 
				+                "children": []
			
 
				+            }
			
 
				+
			
 
				+        # 根据属于边，把标签挂到点下面
			
 
				+        for e in belong_edges:
			
 
				+            tag_node = tag_map.get(e["源节点ID"])
			
 
				+            point_node = point_map.get(e["目标节点ID"])
			
 
				+            if tag_node and point_node:
			
 
				+                point_node["children"].append(tag_node)
			
 
				+
			
 
				+        # 按维度分组点节点
			
 
				+        dimensions = ["灵感点", "目的点", "关键点"]
			
 
				+        dimension_children = []
			
 
				+
			
 
				+        for dim in dimensions:
			
 
				+            dim_points = [
			
 
				+                point_map[n["节点ID"]]
			
 
				+                for n in post_points
			
 
				+                if n.get("节点层级") == dim and n["节点ID"] in point_map
			
 
				+            ]
			
 
				+
			
 
				+            if dim_points:
			
 
				+                dim_node = {
			
 
				+                    "id": f"dim_{dim}",
			
 
				+                    "name": dim,
			
 
				+                    "nodeType": "维度",
			
 
				+                    "isDimension": True,
			
 
				+                    "dimColor": dim_colors[dim],
			
 
				+                    "children": dim_points
			
 
				+                }
			
 
				+                dimension_children.append(dim_node)
			
 
				+
			
 
				+        # 根节点（帖子）
			
 
				+        root_node = {
			
 
				+            "id": f"post_{post_id}",
			
 
				+            "name": post_title[:20] + "..." if len(post_title) > 20 else post_title,
			
 
				+            "nodeType": "帖子",
			
 
				+            "isRoot": True,
			
 
				+            "postDetail": post_detail,
			
 
				+            "children": dimension_children
			
 
				+        }
			
 
				+
			
 
				+        # 统计节点数
			
 
				+        total_nodes = 1 + len(dimension_children)  # 根节点 + 维度节点
			
 
				+        for dim_node in dimension_children:
			
 
				+            total_nodes += len(dim_node["children"])  # 点节点
			
 
				+            for point_node in dim_node["children"]:
			
 
				+                total_nodes += len(point_node["children"])  # 标签节点
			
 
				+
			
 
				+        post_tree = {
			
 
				+            "postId": post_id,
			
 
				+            "postTitle": post_title,
			
 
				+            "postDetail": post_detail,
			
 
				+            "root": root_node,
			
 
				+            "stats": {
			
 
				+                "totalNodes": total_nodes,
			
 
				+                "pointCount": len(post_points),
			
 
				+                "tagCount": len(post_tags)
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        all_post_trees.append(post_tree)
			
 
				+        print(f"  构建完成: {total_nodes} 个节点")
			
 
				+
			
 
				+    # 输出
			
 
				+    output_data = {
			
 
				+        "说明": {
			
 
				+            "描述": "帖子树结构数据（每个帖子一棵树）",
			
 
				+            "帖子数": len(all_post_trees)
			
 
				+        },
			
 
				+        "postTrees": all_post_trees
			
 
				+    }
			
 
				+
			
 
				+    with open(output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(output_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print()
			
 
				+    print("=" * 60)
			
 
				+    print(f"构建完成!")
			
 
				+    print(f"  帖子数: {len(all_post_trees)}")
			
 
				+    print(f"  输出文件: {output_file}")
			
 
				+
			
 
				+    return output_file
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    build_post_trees()
			
--- a/script/data_processing/run_graph_pipeline.sh
+++ b/script/data_processing/run_graph_pipeline.sh
@@ -0,0 +1,129 @@
 
				+#!/bin/bash
			
 
				+# 图谱构建与可视化流程（步骤5-9）
			
 
				+#
			
 
				+# 依赖前置步骤（1-4）已执行完成：
			
 
				+#   1. extract_feature_categories.py
			
 
				+#   2. extract_features_from_posts.py
			
 
				+#   3. extract_current_posts.py
			
 
				+#   4. match_inspiration_features.py
			
 
				+#
			
 
				+# 本脚本执行：
			
 
				+#   5. filter_how_results.py      - 过滤how解构结果
			
 
				+#   6. extract_nodes_and_edges.py - 提取节点和边
			
 
				+#   7. build_persona_tree.py      - 构建人设树
			
 
				+#   8. build_match_graph.py       - 构建匹配图谱
			
 
				+#   9. visualize_match_graph.py   - 生成可视化HTML
			
 
				+#
			
 
				+# 使用方式：
			
 
				+#   ./run_graph_pipeline.sh              # 使用默认账号
			
 
				+#   ./run_graph_pipeline.sh 阿里多多酱    # 指定账号
			
 
				+#   ACCOUNT_NAME=xxx ./run_graph_pipeline.sh
			
 
				+
			
 
				+set -e  # 遇到错误立即退出
			
 
				+
			
 
				+# 获取脚本所在目录的绝对路径
			
 
				+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
			
 
				+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
			
 
				+
			
 
				+cd "$PROJECT_ROOT"
			
 
				+
			
 
				+# 颜色定义
			
 
				+GREEN='\033[0;32m'
			
 
				+RED='\033[0;31m'
			
 
				+YELLOW='\033[1;33m'
			
 
				+BLUE='\033[0;34m'
			
 
				+NC='\033[0m'
			
 
				+
			
 
				+print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
			
 
				+print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
			
 
				+print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
			
 
				+print_step() { echo -e "${YELLOW}[$1]${NC} $2"; }
			
 
				+
			
 
				+# 执行单个步骤
			
 
				+run_step() {
			
 
				+    local step_num=$1
			
 
				+    local step_name=$2
			
 
				+    local script_name=$3
			
 
				+
			
 
				+    print_step "$step_num/5" "$step_name"
			
 
				+
			
 
				+    if python "script/data_processing/$script_name"; then
			
 
				+        print_success "$step_name 完成"
			
 
				+        echo ""
			
 
				+        return 0
			
 
				+    else
			
 
				+        print_error "$step_name 失败"
			
 
				+        return 1
			
 
				+    fi
			
 
				+}
			
 
				+
			
 
				+# 主处理函数
			
 
				+process_account() {
			
 
				+    local account_name=$1
			
 
				+
			
 
				+    echo ""
			
 
				+    echo "=========================================="
			
 
				+    echo "图谱构建与可视化流程"
			
 
				+    echo "账号: $account_name"
			
 
				+    echo "项目: $PROJECT_ROOT"
			
 
				+    echo "=========================================="
			
 
				+    echo ""
			
 
				+
			
 
				+    # 设置环境变量
			
 
				+    export ACCOUNT_NAME="$account_name"
			
 
				+
			
 
				+    # 步骤5: 过滤how解构结果
			
 
				+    run_step 1 "过滤how解构结果" "filter_how_results.py" || return 1
			
 
				+
			
 
				+    # 步骤6: 提取节点和边
			
 
				+    run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
			
 
				+
			
 
				+    # 步骤7: 构建人设树
			
 
				+    run_step 3 "构建人设树" "build_persona_tree.py" || return 1
			
 
				+
			
 
				+    # 步骤8: 构建匹配图谱
			
 
				+    run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
			
 
				+
			
 
				+    # 步骤9: 生成可视化HTML
			
 
				+    run_step 5 "生成可视化HTML" "visualize_match_graph.py" || return 1
			
 
				+
			
 
				+    echo "=========================================="
			
 
				+    print_success "图谱构建与可视化流程完成！"
			
 
				+    echo "=========================================="
			
 
				+}
			
 
				+
			
 
				+# 获取默认账号
			
 
				+get_default_account() {
			
 
				+    python -c "
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+config_file = Path('config/accounts.json')
			
 
				+with open(config_file) as f:
			
 
				+    config = json.load(f)
			
 
				+print(config.get('default_account', ''))
			
 
				+"
			
 
				+}
			
 
				+
			
 
				+# 主逻辑
			
 
				+main() {
			
 
				+    local account_name=""
			
 
				+
			
 
				+    # 解析参数
			
 
				+    if [ -n "$1" ]; then
			
 
				+        account_name="$1"
			
 
				+    elif [ -n "$ACCOUNT_NAME" ]; then
			
 
				+        account_name="$ACCOUNT_NAME"
			
 
				+    else
			
 
				+        account_name=$(get_default_account)
			
 
				+        if [ -z "$account_name" ]; then
			
 
				+            print_error "未指定账号，请通过参数或环境变量指定"
			
 
				+            echo "用法: $0 <账号名>"
			
 
				+            exit 1
			
 
				+        fi
			
 
				+        print_info "使用默认账号: $account_name"
			
 
				+    fi
			
 
				+
			
 
				+    process_account "$account_name"
			
 
				+}
			
 
				+
			
 
				+main "$@"
			
--- a/script/data_processing/visualize_match_graph.py
+++ b/script/data_processing/visualize_match_graph.py
Autor	SHA1 Nachricht	Datum
yangxiaohui	d3022c2454 Merge remote-tracking branch 'origin/how_1125_v2' into how_1125_v1	vor 5 Tagen
yangxiaohui	8d67e969d1 feat: 添加帖子树可视化，优化布局为三层结构	vor 5 Tagen
yangxiaohui	a70cd23ef9 feat: 添加跨层边提取和图谱流程一键脚本	vor 5 Tagen
yangxiaohui	81f6ab587f refactor: 重命名图层标签	vor 5 Tagen
yangxiaohui	c47cfcbbd9 feat: 关系图展示完整路径并支持点击交互	vor 5 Tagen
yangxiaohui	36e1132d87 fix: 统一使用预计算路径节点高亮帖子标签边	vor 5 Tagen