hace 1 semana · 64380d838c
--- a/config/accounts.json
+++ b/config/accounts.json
@@ -32,7 +32,7 @@
 
				       "description": "未启用的示例账号"
			
 
				     }
			
 
				   ],
			
 
				-  "default_account": "阿里多多酱",
			
 
				+  "default_account": "阿里多多酱_1125",
			
 
				   "comment": "数据根目录可通过 data_root 配置（支持绝对路径、~、环境变量），也可通过 DATA_ROOT 环境变量覆盖",
			
 
				   "filter_mode": "exclude_current_posts",
			
 
				   "filter_modes": {
			
--- a/script/data_processing/build_match_graph.py
+++ b/script/data_processing/build_match_graph.py
@@ -0,0 +1,488 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+从匹配结果中构建帖子与人设的节点边关系图
			
 
				+
			
 
				+输入：
			
 
				+1. filtered_results目录下的匹配结果文件
			
 
				+2. 节点列表.json
			
 
				+3. 边关系.json
			
 
				+
			
 
				+输出：
			
 
				+1. match_graph目录下的节点边关系文件
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Set, Any, Optional
			
 
				+import sys
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				+
			
 
				+
			
 
				+def build_post_node_id(dimension: str, node_type: str, name: str) -> str:
			
 
				+    """构建帖子节点ID"""
			
 
				+    return f"帖子_{dimension}_{node_type}_{name}"
			
 
				+
			
 
				+
			
 
				+def build_persona_node_id(dimension: str, node_type: str, name: str) -> str:
			
 
				+    """构建人设节点ID"""
			
 
				+    return f"{dimension}_{node_type}_{name}"
			
 
				+
			
 
				+
			
 
				+def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
			
 
				+    """
			
 
				+    从匹配结果中提取帖子节点、人设节点和匹配边
			
 
				+
			
 
				+    Args:
			
 
				+        filtered_data: 匹配结果数据
			
 
				+
			
 
				+    Returns:
			
 
				+        (帖子节点列表, 人设节点ID集合, 匹配边列表)
			
 
				+    """
			
 
				+    post_nodes = []
			
 
				+    persona_node_ids = set()
			
 
				+    match_edges = []
			
 
				+
			
 
				+    how_result = filtered_data.get("how解构结果", {})
			
 
				+
			
 
				+    # 维度映射
			
 
				+    dimension_mapping = {
			
 
				+        "灵感点列表": "灵感点",
			
 
				+        "目的点列表": "目的点",
			
 
				+        "关键点列表": "关键点"
			
 
				+    }
			
 
				+
			
 
				+    for list_key, dimension in dimension_mapping.items():
			
 
				+        points = how_result.get(list_key, [])
			
 
				+
			
 
				+        for point in points:
			
 
				+            # 遍历how步骤列表
			
 
				+            how_steps = point.get("how步骤列表", [])
			
 
				+
			
 
				+            for step in how_steps:
			
 
				+                features = step.get("特征列表", [])
			
 
				+
			
 
				+                for feature in features:
			
 
				+                    feature_name = feature.get("特征名称", "")
			
 
				+                    weight = feature.get("权重", 0)
			
 
				+                    match_results = feature.get("匹配结果", [])
			
 
				+
			
 
				+                    if not feature_name:
			
 
				+                        continue
			
 
				+
			
 
				+                    # 如果有匹配结果，创建帖子节点和匹配边
			
 
				+                    if match_results:
			
 
				+                        # 创建帖子节点（标签类型）
			
 
				+                        post_node_id = build_post_node_id(dimension, "标签", feature_name)
			
 
				+                        post_node = {
			
 
				+                            "节点ID": post_node_id,
			
 
				+                            "节点名称": feature_name,
			
 
				+                            "节点类型": "标签",
			
 
				+                            "节点层级": dimension,
			
 
				+                            "权重": weight
			
 
				+                        }
			
 
				+
			
 
				+                        # 避免重复添加
			
 
				+                        if not any(n["节点ID"] == post_node_id for n in post_nodes):
			
 
				+                            post_nodes.append(post_node)
			
 
				+
			
 
				+                        # 处理每个匹配结果
			
 
				+                        for match in match_results:
			
 
				+                            persona_name = match.get("人设特征名称", "")
			
 
				+                            persona_dimension = match.get("人设特征层级", "")
			
 
				+                            persona_type = match.get("特征类型", "标签")
			
 
				+                            match_detail = match.get("匹配结果", {})
			
 
				+
			
 
				+                            if not persona_name or not persona_dimension:
			
 
				+                                continue
			
 
				+
			
 
				+                            # 构建人设节点ID
			
 
				+                            persona_node_id = build_persona_node_id(
			
 
				+                                persona_dimension, persona_type, persona_name
			
 
				+                            )
			
 
				+                            persona_node_ids.add(persona_node_id)
			
 
				+
			
 
				+                            # 创建匹配边
			
 
				+                            match_edge = {
			
 
				+                                "源节点ID": post_node_id,
			
 
				+                                "目标节点ID": persona_node_id,
			
 
				+                                "边类型": "匹配",
			
 
				+                                "边详情": {
			
 
				+                                    "相似度": match_detail.get("相似度", 0),
			
 
				+                                    "说明": match_detail.get("说明", "")
			
 
				+                                }
			
 
				+                            }
			
 
				+                            match_edges.append(match_edge)
			
 
				+
			
 
				+    return post_nodes, persona_node_ids, match_edges
			
 
				+
			
 
				+
			
 
				+def get_persona_nodes_details(
			
 
				+    persona_node_ids: Set[str],
			
 
				+    nodes_data: Dict
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    从节点列表中获取人设节点的详细信息
			
 
				+
			
 
				+    Args:
			
 
				+        persona_node_ids: 人设节点ID集合
			
 
				+        nodes_data: 节点列表数据
			
 
				+
			
 
				+    Returns:
			
 
				+        人设节点详情列表
			
 
				+    """
			
 
				+    persona_nodes = []
			
 
				+    all_nodes = nodes_data.get("节点列表", [])
			
 
				+
			
 
				+    for node in all_nodes:
			
 
				+        if node["节点ID"] in persona_node_ids:
			
 
				+            persona_nodes.append(node)
			
 
				+
			
 
				+    return persona_nodes
			
 
				+
			
 
				+
			
 
				+def get_edges_between_nodes(
			
 
				+    node_ids: Set[str],
			
 
				+    edges_data: Dict
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    获取指定节点之间的边关系
			
 
				+
			
 
				+    Args:
			
 
				+        node_ids: 节点ID集合
			
 
				+        edges_data: 边关系数据
			
 
				+
			
 
				+    Returns:
			
 
				+        节点之间的边列表
			
 
				+    """
			
 
				+    edges_between = []
			
 
				+    all_edges = edges_data.get("边列表", [])
			
 
				+
			
 
				+    for edge in all_edges:
			
 
				+        source_id = edge["源节点ID"]
			
 
				+        target_id = edge["目标节点ID"]
			
 
				+
			
 
				+        # 两个节点都在集合中
			
 
				+        if source_id in node_ids and target_id in node_ids:
			
 
				+            edges_between.append(edge)
			
 
				+
			
 
				+    return edges_between
			
 
				+
			
 
				+
			
 
				+def create_mirrored_post_edges(
			
 
				+    match_edges: List[Dict],
			
 
				+    persona_edges: List[Dict]
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    根据人设节点之间的边，创建帖子节点之间的镜像边
			
 
				+
			
 
				+    逻辑：如果人设节点A和B之间有边，且帖子节点X匹配A，帖子节点Y匹配B，
			
 
				+    则创建帖子节点X和Y之间的镜像边
			
 
				+
			
 
				+    Args:
			
 
				+        match_edges: 匹配边列表（帖子节点 -> 人设节点）
			
 
				+        persona_edges: 人设节点之间的边列表
			
 
				+
			
 
				+    Returns:
			
 
				+        帖子节点之间的镜像边列表
			
 
				+    """
			
 
				+    # 构建人设节点到帖子节点的反向映射
			
 
				+    # persona_id -> [post_id1, post_id2, ...]
			
 
				+    persona_to_posts = {}
			
 
				+    for edge in match_edges:
			
 
				+        post_id = edge["源节点ID"]
			
 
				+        persona_id = edge["目标节点ID"]
			
 
				+        if persona_id not in persona_to_posts:
			
 
				+            persona_to_posts[persona_id] = []
			
 
				+        if post_id not in persona_to_posts[persona_id]:
			
 
				+            persona_to_posts[persona_id].append(post_id)
			
 
				+
			
 
				+    # 根据人设边创建帖子镜像边
			
 
				+    post_edges = []
			
 
				+    seen_edges = set()
			
 
				+
			
 
				+    for persona_edge in persona_edges:
			
 
				+        source_persona = persona_edge["源节点ID"]
			
 
				+        target_persona = persona_edge["目标节点ID"]
			
 
				+        edge_type = persona_edge["边类型"]
			
 
				+
			
 
				+        # 获取匹配到这两个人设节点的帖子节点
			
 
				+        source_posts = persona_to_posts.get(source_persona, [])
			
 
				+        target_posts = persona_to_posts.get(target_persona, [])
			
 
				+
			
 
				+        # 为每对帖子节点创建镜像边
			
 
				+        for src_post in source_posts:
			
 
				+            for tgt_post in target_posts:
			
 
				+                if src_post == tgt_post:
			
 
				+                    continue
			
 
				+
			
 
				+                # 使用排序后的key避免重复（A-B 和 B-A 视为同一条边）
			
 
				+                edge_key = tuple(sorted([src_post, tgt_post])) + (edge_type,)
			
 
				+                if edge_key in seen_edges:
			
 
				+                    continue
			
 
				+                seen_edges.add(edge_key)
			
 
				+
			
 
				+                post_edge = {
			
 
				+                    "源节点ID": src_post,
			
 
				+                    "目标节点ID": tgt_post,
			
 
				+                    "边类型": f"镜像_{edge_type}",  # 标记为镜像边
			
 
				+                    "边详情": {
			
 
				+                        "原始边类型": edge_type,
			
 
				+                        "源人设节点": source_persona,
			
 
				+                        "目标人设节点": target_persona
			
 
				+                    }
			
 
				+                }
			
 
				+                post_edges.append(post_edge)
			
 
				+
			
 
				+    return post_edges
			
 
				+
			
 
				+
			
 
				+def expand_one_layer(
			
 
				+    node_ids: Set[str],
			
 
				+    edges_data: Dict,
			
 
				+    nodes_data: Dict,
			
 
				+    edge_types: List[str] = None,
			
 
				+    direction: str = "both"
			
 
				+) -> tuple:
			
 
				+    """
			
 
				+    从指定节点扩展一层，获取相邻节点和连接边
			
 
				+
			
 
				+    Args:
			
 
				+        node_ids: 起始节点ID集合
			
 
				+        edges_data: 边关系数据
			
 
				+        nodes_data: 节点列表数据
			
 
				+        edge_types: 要扩展的边类型列表，None表示所有类型
			
 
				+        direction: 扩展方向
			
 
				+            - "outgoing": 只沿出边扩展（源节点在集合中，扩展到目标节点）
			
 
				+            - "incoming": 只沿入边扩展（目标节点在集合中，扩展到源节点）
			
 
				+            - "both": 双向扩展
			
 
				+
			
 
				+    Returns:
			
 
				+        (扩展的节点列表, 扩展的边列表, 扩展的节点ID集合)
			
 
				+    """
			
 
				+    expanded_node_ids = set()
			
 
				+    expanded_edges = []
			
 
				+    all_edges = edges_data.get("边列表", [])
			
 
				+
			
 
				+    # 找出所有与起始节点相连的边和节点
			
 
				+    for edge in all_edges:
			
 
				+        # 过滤边类型
			
 
				+        if edge_types and edge["边类型"] not in edge_types:
			
 
				+            continue
			
 
				+
			
 
				+        source_id = edge["源节点ID"]
			
 
				+        target_id = edge["目标节点ID"]
			
 
				+
			
 
				+        # 沿出边扩展：源节点在集合中，扩展到目标节点
			
 
				+        if direction in ["outgoing", "both"]:
			
 
				+            if source_id in node_ids and target_id not in node_ids:
			
 
				+                expanded_node_ids.add(target_id)
			
 
				+                expanded_edges.append(edge)
			
 
				+
			
 
				+        # 沿入边扩展：目标节点在集合中，扩展到源节点
			
 
				+        if direction in ["incoming", "both"]:
			
 
				+            if target_id in node_ids and source_id not in node_ids:
			
 
				+                expanded_node_ids.add(source_id)
			
 
				+                expanded_edges.append(edge)
			
 
				+
			
 
				+    # 获取扩展节点的详情
			
 
				+    expanded_nodes = []
			
 
				+    all_nodes = nodes_data.get("节点列表", [])
			
 
				+    for node in all_nodes:
			
 
				+        if node["节点ID"] in expanded_node_ids:
			
 
				+            # 标记为扩展节点
			
 
				+            node_copy = node.copy()
			
 
				+            node_copy["是否扩展"] = True
			
 
				+            expanded_nodes.append(node_copy)
			
 
				+
			
 
				+    return expanded_nodes, expanded_edges, expanded_node_ids
			
 
				+
			
 
				+
			
 
				+def process_filtered_result(
			
 
				+    filtered_file: Path,
			
 
				+    nodes_data: Dict,
			
 
				+    edges_data: Dict,
			
 
				+    output_dir: Path
			
 
				+) -> Dict:
			
 
				+    """
			
 
				+    处理单个匹配结果文件
			
 
				+
			
 
				+    Args:
			
 
				+        filtered_file: 匹配结果文件路径
			
 
				+        nodes_data: 节点列表数据
			
 
				+        edges_data: 边关系数据
			
 
				+        output_dir: 输出目录
			
 
				+
			
 
				+    Returns:
			
 
				+        处理结果统计
			
 
				+    """
			
 
				+    # 读取匹配结果
			
 
				+    with open(filtered_file, "r", encoding="utf-8") as f:
			
 
				+        filtered_data = json.load(f)
			
 
				+
			
 
				+    post_id = filtered_data.get("帖子id", "")
			
 
				+    post_detail = filtered_data.get("帖子详情", {})
			
 
				+    post_title = post_detail.get("title", "")
			
 
				+
			
 
				+    # 提取节点和边
			
 
				+    post_nodes, persona_node_ids, match_edges = extract_matched_nodes_and_edges(filtered_data)
			
 
				+
			
 
				+    # 获取人设节点详情（直接匹配的，标记为非扩展）
			
 
				+    persona_nodes = get_persona_nodes_details(persona_node_ids, nodes_data)
			
 
				+    for node in persona_nodes:
			
 
				+        node["是否扩展"] = False
			
 
				+
			
 
				+    # 获取人设节点之间的边
			
 
				+    persona_edges = get_edges_between_nodes(persona_node_ids, edges_data)
			
 
				+
			
 
				+    # 创建帖子节点之间的镜像边（基于人设边的投影）
			
 
				+    post_edges = create_mirrored_post_edges(match_edges, persona_edges)
			
 
				+
			
 
				+    # 合并节点列表（不扩展，只保留直接匹配的节点）
			
 
				+    all_nodes = post_nodes + persona_nodes
			
 
				+
			
 
				+    # 合并边列表
			
 
				+    all_edges = match_edges + persona_edges + post_edges
			
 
				+    # 去重边
			
 
				+    seen_edges = set()
			
 
				+    unique_edges = []
			
 
				+    for edge in all_edges:
			
 
				+        edge_key = (edge["源节点ID"], edge["目标节点ID"], edge["边类型"])
			
 
				+        if edge_key not in seen_edges:
			
 
				+            seen_edges.add(edge_key)
			
 
				+            unique_edges.append(edge)
			
 
				+    all_edges = unique_edges
			
 
				+
			
 
				+    # 构建节点边索引
			
 
				+    edges_by_node = {}
			
 
				+    for edge in all_edges:
			
 
				+        source_id = edge["源节点ID"]
			
 
				+        target_id = edge["目标节点ID"]
			
 
				+        edge_type = edge["边类型"]
			
 
				+
			
 
				+        if source_id not in edges_by_node:
			
 
				+            edges_by_node[source_id] = {}
			
 
				+        if edge_type not in edges_by_node[source_id]:
			
 
				+            edges_by_node[source_id][edge_type] = {}
			
 
				+        edges_by_node[source_id][edge_type][target_id] = edge
			
 
				+
			
 
				+    # 构建输出数据
			
 
				+    output_data = {
			
 
				+        "说明": {
			
 
				+            "帖子ID": post_id,
			
 
				+            "帖子标题": post_title,
			
 
				+            "描述": "帖子与人设的节点匹配关系",
			
 
				+            "统计": {
			
 
				+                "帖子节点数": len(post_nodes),
			
 
				+                "人设节点数": len(persona_nodes),
			
 
				+                "匹配边数": len(match_edges),
			
 
				+                "人设节点间边数": len(persona_edges),
			
 
				+                "帖子节点间边数": len(post_edges),
			
 
				+                "总节点数": len(all_nodes),
			
 
				+                "总边数": len(all_edges)
			
 
				+            }
			
 
				+        },
			
 
				+        "帖子节点列表": post_nodes,
			
 
				+        "人设节点列表": persona_nodes,
			
 
				+        "匹配边列表": match_edges,
			
 
				+        "人设节点间边列表": persona_edges,
			
 
				+        "帖子节点间边列表": post_edges,
			
 
				+        "节点列表": all_nodes,
			
 
				+        "边列表": all_edges,
			
 
				+        "节点边索引": edges_by_node
			
 
				+    }
			
 
				+
			
 
				+    # 保存输出文件
			
 
				+    output_file = output_dir / f"{post_id}_match_graph.json"
			
 
				+    with open(output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(output_data, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    return {
			
 
				+        "帖子ID": post_id,
			
 
				+        "帖子节点数": len(post_nodes),
			
 
				+        "人设节点数": len(persona_nodes),
			
 
				+        "匹配边数": len(match_edges),
			
 
				+        "人设节点间边数": len(persona_edges),
			
 
				+        "帖子节点间边数": len(post_edges),
			
 
				+        "总节点数": len(all_nodes),
			
 
				+        "总边数": len(all_edges),
			
 
				+        "输出文件": str(output_file)
			
 
				+    }
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				+    config.ensure_dirs()
			
 
				+
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"输出版本: {config.output_version}")
			
 
				+    print()
			
 
				+
			
 
				+    # 输入文件/目录
			
 
				+    filtered_results_dir = config.intermediate_dir / "filtered_results"
			
 
				+    nodes_file = config.intermediate_dir / "节点列表.json"
			
 
				+    edges_file = config.intermediate_dir / "边关系.json"
			
 
				+
			
 
				+    # 输出目录
			
 
				+    output_dir = config.intermediate_dir / "match_graph"
			
 
				+    output_dir.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    print(f"输入:")
			
 
				+    print(f"  匹配结果目录: {filtered_results_dir}")
			
 
				+    print(f"  节点列表: {nodes_file}")
			
 
				+    print(f"  边关系: {edges_file}")
			
 
				+    print(f"\n输出目录: {output_dir}")
			
 
				+    print()
			
 
				+
			
 
				+    # 读取节点和边数据
			
 
				+    print("正在读取节点列表...")
			
 
				+    with open(nodes_file, "r", encoding="utf-8") as f:
			
 
				+        nodes_data = json.load(f)
			
 
				+    print(f"  共 {len(nodes_data.get('节点列表', []))} 个节点")
			
 
				+
			
 
				+    print("正在读取边关系...")
			
 
				+    with open(edges_file, "r", encoding="utf-8") as f:
			
 
				+        edges_data = json.load(f)
			
 
				+    print(f"  共 {len(edges_data.get('边列表', []))} 条边")
			
 
				+
			
 
				+    # 处理所有匹配结果文件
			
 
				+    print("\n" + "="*60)
			
 
				+    print("处理匹配结果文件...")
			
 
				+
			
 
				+    filtered_files = list(filtered_results_dir.glob("*_filtered.json"))
			
 
				+    print(f"找到 {len(filtered_files)} 个匹配结果文件")
			
 
				+
			
 
				+    results = []
			
 
				+    for i, filtered_file in enumerate(filtered_files, 1):
			
 
				+        print(f"\n[{i}/{len(filtered_files)}] 处理: {filtered_file.name}")
			
 
				+        result = process_filtered_result(filtered_file, nodes_data, edges_data, output_dir)
			
 
				+        results.append(result)
			
 
				+        print(f"  帖子节点: {result['帖子节点数']}, 人设节点: {result['人设节点数']}")
			
 
				+        print(f"  匹配边: {result['匹配边数']}, 人设边: {result['人设节点间边数']}, 帖子边: {result['帖子节点间边数']}")
			
 
				+
			
 
				+    # 汇总统计
			
 
				+    print("\n" + "="*60)
			
 
				+    print("处理完成!")
			
 
				+    print(f"\n汇总:")
			
 
				+    print(f"  处理文件数: {len(results)}")
			
 
				+    total_post = sum(r['帖子节点数'] for r in results)
			
 
				+    total_persona = sum(r['人设节点数'] for r in results)
			
 
				+    total_match = sum(r['匹配边数'] for r in results)
			
 
				+    total_persona_edges = sum(r['人设节点间边数'] for r in results)
			
 
				+    total_post_edges = sum(r['帖子节点间边数'] for r in results)
			
 
				+    print(f"  总帖子节点: {total_post}")
			
 
				+    print(f"  总人设节点: {total_persona}")
			
 
				+    print(f"  总匹配边: {total_match}")
			
 
				+    print(f"  总人设边: {total_persona_edges}")
			
 
				+    print(f"  总帖子边: {total_post_edges}")
			
 
				+    print(f"\n输出目录: {output_dir}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/script/data_processing/extract_category_edges.py
+++ b/script/data_processing/extract_category_edges.py
@@ -0,0 +1,166 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+从dimension_associations_analysis.json中提取分类之间的边关系
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Any
			
 
				+import argparse
			
 
				+
			
 
				+
			
 
				+def get_last_segment(path: str) -> str:
			
 
				+    """获取路径的最后一段"""
			
 
				+    return path.split("/")[-1]
			
 
				+
			
 
				+
			
 
				+def build_node_id(dimension: str, node_type: str, name: str) -> str:
			
 
				+    """
			
 
				+    构建节点ID
			
 
				+
			
 
				+    Args:
			
 
				+        dimension: 节点层级（灵感点、目的点、关键点）
			
 
				+        node_type: 节点类型（分类、标签）
			
 
				+        name: 节点名称（完整路径）
			
 
				+
			
 
				+    Returns:
			
 
				+        节点ID，格式: {层级}_{类型}_{名称最后一段}
			
 
				+    """
			
 
				+    last_segment = get_last_segment(name)
			
 
				+    return f"{dimension}_{node_type}_{last_segment}"
			
 
				+
			
 
				+
			
 
				+def extract_edges_from_single_dimension(data: Dict) -> List[Dict]:
			
 
				+    """
			
 
				+    从单维度关联分析中提取边
			
 
				+
			
 
				+    Args:
			
 
				+        data: 单维度关联分析数据
			
 
				+
			
 
				+    Returns:
			
 
				+        边列表
			
 
				+    """
			
 
				+    edges = []
			
 
				+
			
 
				+    if "单维度关联分析" not in data:
			
 
				+        return edges
			
 
				+
			
 
				+    single_dim = data["单维度关联分析"]
			
 
				+
			
 
				+    # 维度映射
			
 
				+    dimension_map = {
			
 
				+        "灵感点维度": "灵感点",
			
 
				+        "目的点维度": "目的点",
			
 
				+        "关键点维度": "关键点"
			
 
				+    }
			
 
				+
			
 
				+    for dim_key, dim_data in single_dim.items():
			
 
				+        if dim_key not in dimension_map:
			
 
				+            continue
			
 
				+
			
 
				+        source_dimension = dimension_map[dim_key]
			
 
				+
			
 
				+        # 遍历该维度下的所有关联方向
			
 
				+        for direction_key, direction_data in dim_data.items():
			
 
				+            if direction_key == "说明":
			
 
				+                continue
			
 
				+
			
 
				+            # 解析方向，如 "灵感点→目的点"
			
 
				+            if "→" not in direction_key:
			
 
				+                continue
			
 
				+
			
 
				+            # 遍历每个源分类
			
 
				+            for source_path, source_info in direction_data.items():
			
 
				+                source_node_id = build_node_id(source_dimension, "分类", source_path)
			
 
				+
			
 
				+                # 确定目标维度
			
 
				+                # 从关联字段名推断，如 "与目的点的关联"
			
 
				+                for field_name, associations in source_info.items():
			
 
				+                    if not field_name.startswith("与") or not field_name.endswith("的关联"):
			
 
				+                        continue
			
 
				+
			
 
				+                    # 提取目标维度名称
			
 
				+                    target_dimension = field_name[1:-3]  # 去掉"与"和"的关联"
			
 
				+
			
 
				+                    if not isinstance(associations, list):
			
 
				+                        continue
			
 
				+
			
 
				+                    for assoc in associations:
			
 
				+                        target_path = assoc.get("目标分类", "")
			
 
				+                        if not target_path:
			
 
				+                            continue
			
 
				+
			
 
				+                        target_node_id = build_node_id(target_dimension, "分类", target_path)
			
 
				+
			
 
				+                        edge = {
			
 
				+                            "源节点ID": source_node_id,
			
 
				+                            "目标节点ID": target_node_id,
			
 
				+                            "边类型": f"{source_dimension}_分类-{target_dimension}_分类",
			
 
				+                            "边详情": {
			
 
				+                                "Jaccard相似度": assoc.get("Jaccard相似度", 0),
			
 
				+                                "重叠系数": assoc.get("重叠系数", 0),
			
 
				+                                "共同帖子数": assoc.get("共同帖子数", 0),
			
 
				+                                "共同帖子ID": assoc.get("共同帖子ID", [])
			
 
				+                            }
			
 
				+                        }
			
 
				+                        edges.append(edge)
			
 
				+
			
 
				+    return edges
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    parser = argparse.ArgumentParser(description="从dimension_associations_analysis.json中提取分类边关系")
			
 
				+    parser.add_argument("--input", "-i", type=str, required=True, help="输入文件路径")
			
 
				+    parser.add_argument("--output", "-o", type=str, required=True, help="输出文件路径")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    input_file = Path(args.input)
			
 
				+    output_file = Path(args.output)
			
 
				+
			
 
				+    print(f"输入文件: {input_file}")
			
 
				+    print(f"输出文件: {output_file}")
			
 
				+
			
 
				+    # 读取输入文件
			
 
				+    print(f"\n正在读取文件: {input_file}")
			
 
				+    with open(input_file, "r", encoding="utf-8") as f:
			
 
				+        data = json.load(f)
			
 
				+
			
 
				+    # 提取边
			
 
				+    print("\n正在提取边关系...")
			
 
				+    edges = extract_edges_from_single_dimension(data)
			
 
				+
			
 
				+    print(f"提取到 {len(edges)} 条边")
			
 
				+
			
 
				+    # 统计边类型
			
 
				+    edge_type_count = {}
			
 
				+    for edge in edges:
			
 
				+        edge_type = edge["边类型"]
			
 
				+        edge_type_count[edge_type] = edge_type_count.get(edge_type, 0) + 1
			
 
				+
			
 
				+    print("\n边类型统计:")
			
 
				+    for edge_type, count in sorted(edge_type_count.items()):
			
 
				+        print(f"  {edge_type}: {count} 条")
			
 
				+
			
 
				+    # 构建输出
			
 
				+    output = {
			
 
				+        "说明": {
			
 
				+            "描述": "分类之间的边关系",
			
 
				+            "数据来源": input_file.name
			
 
				+        },
			
 
				+        "边列表": edges
			
 
				+    }
			
 
				+
			
 
				+    # 确保输出目录存在
			
 
				+    output_file.parent.mkdir(parents=True, exist_ok=True)
			
 
				+
			
 
				+    # 保存结果
			
 
				+    print(f"\n正在保存结果到: {output_file}")
			
 
				+    with open(output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(output, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print("完成!")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/script/data_processing/extract_nodes_and_edges.py
+++ b/script/data_processing/extract_nodes_and_edges.py
@@ -0,0 +1,978 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+从源数据文件中提取节点列表和边关系
			
 
				+
			
 
				+输入：
			
 
				+1. 过去帖子_pattern聚合结果.json - 分类节点、标签-分类边
			
 
				+2. 过去帖子_what解构结果目录 - 标签节点来源
			
 
				+3. dimension_associations_analysis.json - 分类-分类边（共现）
			
 
				+
			
 
				+输出：
			
 
				+1. 节点列表.json
			
 
				+2. 边关系.json
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List, Any, Set, Optional
			
 
				+import sys
			
 
				+import re
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				+from script.detail import get_xiaohongshu_detail
			
 
				+
			
 
				+
			
 
				+def get_post_detail(post_id: str) -> Optional[Dict]:
			
 
				+    """获取帖子详情"""
			
 
				+    try:
			
 
				+        detail = get_xiaohongshu_detail(post_id)
			
 
				+        return detail
			
 
				+    except Exception as e:
			
 
				+        print(f"  警告: 获取帖子 {post_id} 详情失败: {e}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+def get_last_segment(path: str) -> str:
			
 
				+    """获取路径的最后一段"""
			
 
				+    return path.split("/")[-1]
			
 
				+
			
 
				+
			
 
				+def build_node_id(dimension: str, node_type: str, name: str) -> str:
			
 
				+    """
			
 
				+    构建节点ID
			
 
				+
			
 
				+    Args:
			
 
				+        dimension: 节点层级（灵感点、目的点、关键点）
			
 
				+        node_type: 节点类型（分类、标签）
			
 
				+        name: 节点名称
			
 
				+
			
 
				+    Returns:
			
 
				+        节点ID，格式: {层级}_{类型}_{名称}
			
 
				+    """
			
 
				+    return f"{dimension}_{node_type}_{name}"
			
 
				+
			
 
				+
			
 
				+def extract_post_id_from_filename(filename: str) -> str:
			
 
				+    """从文件名中提取帖子ID"""
			
 
				+    match = re.match(r'^([^_]+)_', filename)
			
 
				+    if match:
			
 
				+        return match.group(1)
			
 
				+    return ""
			
 
				+
			
 
				+
			
 
				+def get_current_post_ids(current_posts_dir: Path) -> Set[str]:
			
 
				+    """
			
 
				+    获取当前帖子目录中的所有帖子ID
			
 
				+
			
 
				+    Args:
			
 
				+        current_posts_dir: 当前帖子目录路径
			
 
				+
			
 
				+    Returns:
			
 
				+        当前帖子ID集合
			
 
				+    """
			
 
				+    if not current_posts_dir.exists():
			
 
				+        print(f"警告: 当前帖子目录不存在: {current_posts_dir}")
			
 
				+        return set()
			
 
				+
			
 
				+    json_files = list(current_posts_dir.glob("*.json"))
			
 
				+    if not json_files:
			
 
				+        print(f"警告: 当前帖子目录为空: {current_posts_dir}")
			
 
				+        return set()
			
 
				+
			
 
				+    print(f"找到 {len(json_files)} 个当前帖子")
			
 
				+
			
 
				+    post_ids = set()
			
 
				+    for file_path in json_files:
			
 
				+        post_id = extract_post_id_from_filename(file_path.name)
			
 
				+        if post_id:
			
 
				+            post_ids.add(post_id)
			
 
				+
			
 
				+    print(f"提取到 {len(post_ids)} 个帖子ID")
			
 
				+    return post_ids
			
 
				+
			
 
				+
			
 
				+def collect_all_post_ids_from_nodes(nodes: List[Dict]) -> Set[str]:
			
 
				+    """从节点列表中收集所有帖子ID"""
			
 
				+    post_ids = set()
			
 
				+    for node in nodes:
			
 
				+        for source in node.get("节点来源", []):
			
 
				+            post_id = source.get("帖子ID", "")
			
 
				+            if post_id:
			
 
				+                post_ids.add(post_id)
			
 
				+    return post_ids
			
 
				+
			
 
				+
			
 
				+def collect_all_post_ids_from_edges(edges: List[Dict]) -> Set[str]:
			
 
				+    """从边列表中收集所有帖子ID"""
			
 
				+    post_ids = set()
			
 
				+    for edge in edges:
			
 
				+        if edge.get("边类型") in ("分类共现（跨点）", "标签共现"):
			
 
				+            edge_details = edge.get("边详情", {})
			
 
				+            common_post_ids = edge_details.get("共同帖子ID", [])
			
 
				+            post_ids.update(common_post_ids)
			
 
				+        # 点内共现边不包含帖子ID
			
 
				+    return post_ids
			
 
				+
			
 
				+
			
 
				+def fetch_post_details(post_ids: Set[str]) -> Dict[str, Dict]:
			
 
				+    """
			
 
				+    批量获取帖子详情
			
 
				+
			
 
				+    Args:
			
 
				+        post_ids: 帖子ID集合
			
 
				+
			
 
				+    Returns:
			
 
				+        帖子ID -> 帖子详情 的映射
			
 
				+    """
			
 
				+    print(f"\n正在获取 {len(post_ids)} 个帖子的详情...")
			
 
				+    post_details = {}
			
 
				+    for i, post_id in enumerate(sorted(post_ids), 1):
			
 
				+        print(f"  [{i}/{len(post_ids)}] 获取帖子 {post_id} 的详情...")
			
 
				+        detail = get_post_detail(post_id)
			
 
				+        if detail:
			
 
				+            post_details[post_id] = detail
			
 
				+    print(f"成功获取 {len(post_details)} 个帖子详情")
			
 
				+    return post_details
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def filter_nodes_by_post_ids(nodes: List[Dict], exclude_post_ids: Set[str]) -> List[Dict]:
			
 
				+    """
			
 
				+    过滤节点，排除指定帖子ID的来源
			
 
				+
			
 
				+    Args:
			
 
				+        nodes: 节点列表
			
 
				+        exclude_post_ids: 要排除的帖子ID集合
			
 
				+
			
 
				+    Returns:
			
 
				+        过滤后的节点列表
			
 
				+    """
			
 
				+    filtered_nodes = []
			
 
				+    for node in nodes:
			
 
				+        # 过滤节点来源
			
 
				+        filtered_sources = [
			
 
				+            source for source in node.get("节点来源", [])
			
 
				+            if source.get("帖子ID", "") not in exclude_post_ids
			
 
				+        ]
			
 
				+
			
 
				+        # 只保留有来源的节点
			
 
				+        if filtered_sources:
			
 
				+            node_copy = node.copy()
			
 
				+            node_copy["节点来源"] = filtered_sources
			
 
				+            # 重新计算帖子数
			
 
				+            unique_post_ids = set(s.get("帖子ID", "") for s in filtered_sources if s.get("帖子ID"))
			
 
				+            node_copy["帖子数"] = len(unique_post_ids)
			
 
				+            filtered_nodes.append(node_copy)
			
 
				+
			
 
				+    return filtered_nodes
			
 
				+
			
 
				+
			
 
				+def filter_edges_by_post_ids(edges: List[Dict], exclude_post_ids: Set[str]) -> List[Dict]:
			
 
				+    """
			
 
				+    过滤边，排除指定帖子ID的共现边
			
 
				+
			
 
				+    Args:
			
 
				+        edges: 边列表
			
 
				+        exclude_post_ids: 要排除的帖子ID集合
			
 
				+
			
 
				+    Returns:
			
 
				+        过滤后的边列表
			
 
				+    """
			
 
				+    filtered_edges = []
			
 
				+    for edge in edges:
			
 
				+        edge_type = edge["边类型"]
			
 
				+        if edge_type in ("分类共现（跨点）", "标签共现"):
			
 
				+            # 过滤共同帖子ID
			
 
				+            edge_details = edge.get("边详情", {})
			
 
				+            common_post_ids = edge_details.get("共同帖子ID", [])
			
 
				+            filtered_post_ids = [pid for pid in common_post_ids if pid not in exclude_post_ids]
			
 
				+
			
 
				+            if filtered_post_ids:
			
 
				+                edge_copy = edge.copy()
			
 
				+                edge_copy["边详情"] = edge_details.copy()
			
 
				+                edge_copy["边详情"]["共同帖子ID"] = filtered_post_ids
			
 
				+                edge_copy["边详情"]["共同帖子数"] = len(filtered_post_ids)
			
 
				+                filtered_edges.append(edge_copy)
			
 
				+        elif edge_type == "分类共现（点内）":
			
 
				+            # 点内共现边不涉及帖子ID，直接保留
			
 
				+            filtered_edges.append(edge)
			
 
				+        else:
			
 
				+            # 属于/包含边不需要过滤
			
 
				+            filtered_edges.append(edge)
			
 
				+
			
 
				+    return filtered_edges
			
 
				+
			
 
				+
			
 
				+# ========== 分类节点提取 ==========
			
 
				+
			
 
				+def extract_category_nodes_from_pattern(
			
 
				+    pattern_data: Dict,
			
 
				+    dimension_key: str,
			
 
				+    dimension_name: str
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    从pattern聚合结果中提取分类节点
			
 
				+
			
 
				+    Args:
			
 
				+        pattern_data: pattern聚合数据
			
 
				+        dimension_key: 维度键名（灵感点列表、目的点、关键点列表）
			
 
				+        dimension_name: 维度名称（灵感点、目的点、关键点）
			
 
				+
			
 
				+    Returns:
			
 
				+        分类节点列表
			
 
				+    """
			
 
				+    nodes = []
			
 
				+
			
 
				+    if dimension_key not in pattern_data:
			
 
				+        return nodes
			
 
				+
			
 
				+    def traverse_node(node: Dict, parent_categories: List[str]):
			
 
				+        """递归遍历节点"""
			
 
				+        for key, value in node.items():
			
 
				+            if key in ["特征列表", "_meta", "帖子数", "特征数", "帖子列表"]:
			
 
				+                continue
			
 
				+
			
 
				+            if isinstance(value, dict):
			
 
				+                # 当前节点是一个分类
			
 
				+                current_path = parent_categories + [key]
			
 
				+
			
 
				+                # 获取帖子列表
			
 
				+                post_ids = value.get("帖子列表", [])
			
 
				+
			
 
				+                # 构建节点来源（从特征列表中获取）
			
 
				+                node_sources = []
			
 
				+                if "特征列表" in value:
			
 
				+                    for feature in value["特征列表"]:
			
 
				+                        source = {
			
 
				+                            "点的名称": feature.get("所属点", ""),
			
 
				+                            "点的描述": feature.get("点描述", ""),
			
 
				+                            "帖子ID": feature.get("帖子id", "")
			
 
				+                        }
			
 
				+                        node_sources.append(source)
			
 
				+
			
 
				+                node_info = {
			
 
				+                    "节点ID": build_node_id(dimension_name, "分类", key),
			
 
				+                    "节点名称": key,
			
 
				+                    "节点类型": "分类",
			
 
				+                    "节点层级": dimension_name,
			
 
				+                    "所属分类": parent_categories.copy(),
			
 
				+                    "帖子数": len(post_ids),
			
 
				+                    "节点来源": node_sources
			
 
				+                }
			
 
				+                nodes.append(node_info)
			
 
				+
			
 
				+                # 递归处理子节点
			
 
				+                traverse_node(value, current_path)
			
 
				+
			
 
				+    traverse_node(pattern_data[dimension_key], [])
			
 
				+    return nodes
			
 
				+
			
 
				+
			
 
				+# ========== 标签节点提取 ==========
			
 
				+
			
 
				+def extract_tag_nodes_from_pattern(
			
 
				+    pattern_data: Dict,
			
 
				+    dimension_key: str,
			
 
				+    dimension_name: str
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    从pattern聚合结果中提取标签节点
			
 
				+
			
 
				+    Args:
			
 
				+        pattern_data: pattern聚合数据
			
 
				+        dimension_key: 维度键名
			
 
				+        dimension_name: 维度名称
			
 
				+
			
 
				+    Returns:
			
 
				+        标签节点列表
			
 
				+    """
			
 
				+    nodes = []
			
 
				+    tag_map = {}  # 用于合并同名标签
			
 
				+
			
 
				+    if dimension_key not in pattern_data:
			
 
				+        return nodes
			
 
				+
			
 
				+    def traverse_node(node: Dict, parent_categories: List[str]):
			
 
				+        """递归遍历节点"""
			
 
				+        # 处理特征列表（标签）
			
 
				+        if "特征列表" in node:
			
 
				+            for feature in node["特征列表"]:
			
 
				+                tag_name = feature.get("特征名称", "")
			
 
				+                if not tag_name:
			
 
				+                    continue
			
 
				+
			
 
				+                source = {
			
 
				+                    "点的名称": feature.get("所属点", ""),
			
 
				+                    "点的描述": feature.get("点描述", ""),
			
 
				+                    "帖子ID": feature.get("帖子id", "")
			
 
				+                }
			
 
				+
			
 
				+                tag_id = build_node_id(dimension_name, "标签", tag_name)
			
 
				+
			
 
				+                if tag_id not in tag_map:
			
 
				+                    tag_map[tag_id] = {
			
 
				+                        "节点ID": tag_id,
			
 
				+                        "节点名称": tag_name,
			
 
				+                        "节点类型": "标签",
			
 
				+                        "节点层级": dimension_name,
			
 
				+                        "所属分类": parent_categories.copy(),
			
 
				+                        "帖子数": 0,
			
 
				+                        "节点来源": [],
			
 
				+                        "_post_ids": set()
			
 
				+                    }
			
 
				+
			
 
				+                tag_map[tag_id]["节点来源"].append(source)
			
 
				+                if source["帖子ID"]:
			
 
				+                    tag_map[tag_id]["_post_ids"].add(source["帖子ID"])
			
 
				+
			
 
				+        # 递归处理子节点
			
 
				+        for key, value in node.items():
			
 
				+            if key in ["特征列表", "_meta", "帖子数", "特征数", "帖子列表"]:
			
 
				+                continue
			
 
				+
			
 
				+            if isinstance(value, dict):
			
 
				+                current_path = parent_categories + [key]
			
 
				+                traverse_node(value, current_path)
			
 
				+
			
 
				+    traverse_node(pattern_data[dimension_key], [])
			
 
				+
			
 
				+    # 转换为列表，计算帖子数
			
 
				+    for tag_id, tag_info in tag_map.items():
			
 
				+        tag_info["帖子数"] = len(tag_info["_post_ids"])
			
 
				+        del tag_info["_post_ids"]
			
 
				+        nodes.append(tag_info)
			
 
				+
			
 
				+    return nodes
			
 
				+
			
 
				+
			
 
				+# ========== 标签-分类边提取 ==========
			
 
				+
			
 
				+def extract_tag_category_edges_from_pattern(
			
 
				+    pattern_data: Dict,
			
 
				+    dimension_key: str,
			
 
				+    dimension_name: str
			
 
				+) -> List[Dict]:
			
 
				+    """
			
 
				+    从pattern聚合结果中提取标签-分类边（属于/包含）
			
 
				+
			
 
				+    Args:
			
 
				+        pattern_data: pattern聚合数据
			
 
				+        dimension_key: 维度键名
			
 
				+        dimension_name: 维度名称
			
 
				+
			
 
				+    Returns:
			
 
				+        边列表
			
 
				+    """
			
 
				+    edges = []
			
 
				+    seen_edges = set()  # 避免重复边
			
 
				+
			
 
				+    if dimension_key not in pattern_data:
			
 
				+        return edges
			
 
				+
			
 
				+    def traverse_node(node: Dict, parent_categories: List[str]):
			
 
				+        """递归遍历节点"""
			
 
				+        current_category = parent_categories[-1] if parent_categories else None
			
 
				+
			
 
				+        # 处理特征列表（标签）
			
 
				+        if "特征列表" in node and current_category:
			
 
				+            for feature in node["特征列表"]:
			
 
				+                tag_name = feature.get("特征名称", "")
			
 
				+                if not tag_name:
			
 
				+                    continue
			
 
				+
			
 
				+                tag_id = build_node_id(dimension_name, "标签", tag_name)
			
 
				+                category_id = build_node_id(dimension_name, "分类", current_category)
			
 
				+
			
 
				+                # 属于边：标签 -> 分类
			
 
				+                edge_key_belong = (tag_id, category_id, "属于")
			
 
				+                if edge_key_belong not in seen_edges:
			
 
				+                    seen_edges.add(edge_key_belong)
			
 
				+                    edges.append({
			
 
				+                        "源节点ID": tag_id,
			
 
				+                        "目标节点ID": category_id,
			
 
				+                        "边类型": "属于",
			
 
				+                        "边详情": {}
			
 
				+                    })
			
 
				+
			
 
				+                # 包含边：分类 -> 标签
			
 
				+                edge_key_contain = (category_id, tag_id, "包含")
			
 
				+                if edge_key_contain not in seen_edges:
			
 
				+                    seen_edges.add(edge_key_contain)
			
 
				+                    edges.append({
			
 
				+                        "源节点ID": category_id,
			
 
				+                        "目标节点ID": tag_id,
			
 
				+                        "边类型": "包含",
			
 
				+                        "边详情": {}
			
 
				+                    })
			
 
				+
			
 
				+        # 递归处理子节点
			
 
				+        for key, value in node.items():
			
 
				+            if key in ["特征列表", "_meta", "帖子数", "特征数", "帖子列表"]:
			
 
				+                continue
			
 
				+
			
 
				+            if isinstance(value, dict):
			
 
				+                current_path = parent_categories + [key]
			
 
				+                traverse_node(value, current_path)
			
 
				+
			
 
				+    traverse_node(pattern_data[dimension_key], [])
			
 
				+    return edges
			
 
				+
			
 
				+
			
 
				+# ========== 标签-标签共现边提取 ==========
			
 
				+
			
 
				+def extract_tags_from_post(post_data: Dict) -> Dict[str, List[str]]:
			
 
				+    """
			
 
				+    从单个帖子的解构结果中提取所有标签（特征名称）
			
 
				+
			
 
				+    Args:
			
 
				+        post_data: 帖子解构数据
			
 
				+
			
 
				+    Returns:
			
 
				+        按维度分组的标签字典 {"灵感点": [...], "目的点": [...], "关键点": [...]}
			
 
				+    """
			
 
				+    tags_by_dimension = {
			
 
				+        "灵感点": [],
			
 
				+        "目的点": [],
			
 
				+        "关键点": []
			
 
				+    }
			
 
				+
			
 
				+    if "三点解构" not in post_data:
			
 
				+        return tags_by_dimension
			
 
				+
			
 
				+    three_points = post_data["三点解构"]
			
 
				+
			
 
				+    # 提取灵感点的特征
			
 
				+    if "灵感点" in three_points:
			
 
				+        inspiration = three_points["灵感点"]
			
 
				+        for section in ["全新内容", "共性差异", "共性内容"]:
			
 
				+            if section in inspiration and isinstance(inspiration[section], list):
			
 
				+                for item in inspiration[section]:
			
 
				+                    if "提取的特征" in item and isinstance(item["提取的特征"], list):
			
 
				+                        for feature in item["提取的特征"]:
			
 
				+                            tag_name = feature.get("特征名称", "")
			
 
				+                            if tag_name:
			
 
				+                                tags_by_dimension["灵感点"].append(tag_name)
			
 
				+
			
 
				+    # 提取目的点的特征
			
 
				+    if "目的点" in three_points:
			
 
				+        purpose = three_points["目的点"]
			
 
				+        if "purposes" in purpose and isinstance(purpose["purposes"], list):
			
 
				+            for item in purpose["purposes"]:
			
 
				+                if "提取的特征" in item and isinstance(item["提取的特征"], list):
			
 
				+                    for feature in item["提取的特征"]:
			
 
				+                        tag_name = feature.get("特征名称", "")
			
 
				+                        if tag_name:
			
 
				+                            tags_by_dimension["目的点"].append(tag_name)
			
 
				+
			
 
				+    # 提取关键点的特征
			
 
				+    if "关键点" in three_points:
			
 
				+        key_points = three_points["关键点"]
			
 
				+        if "key_points" in key_points and isinstance(key_points["key_points"], list):
			
 
				+            for item in key_points["key_points"]:
			
 
				+                if "提取的特征" in item and isinstance(item["提取的特征"], list):
			
 
				+                    for feature in item["提取的特征"]:
			
 
				+                        tag_name = feature.get("特征名称", "")
			
 
				+                        if tag_name:
			
 
				+                            tags_by_dimension["关键点"].append(tag_name)
			
 
				+
			
 
				+    return tags_by_dimension
			
 
				+
			
 
				+
			
 
				+def extract_tag_cooccurrence_edges(historical_posts_dir: Path, exclude_post_ids: Set[str] = None) -> List[Dict]:
			
 
				+    """
			
 
				+    从历史帖子解构结果中提取标签-标签共现边
			
 
				+
			
 
				+    Args:
			
 
				+        historical_posts_dir: 历史帖子解构结果目录
			
 
				+        exclude_post_ids: 要排除的帖子ID集合
			
 
				+
			
 
				+    Returns:
			
 
				+        标签共现边列表
			
 
				+    """
			
 
				+    if exclude_post_ids is None:
			
 
				+        exclude_post_ids = set()
			
 
				+
			
 
				+    # 存储每对标签的共现信息
			
 
				+    # key: (tag1_id, tag2_id), value: {"共同帖子ID": set()}
			
 
				+    cooccurrence_map = {}
			
 
				+
			
 
				+    if not historical_posts_dir.exists():
			
 
				+        print(f"警告: 历史帖子目录不存在: {historical_posts_dir}")
			
 
				+        return []
			
 
				+
			
 
				+    json_files = list(historical_posts_dir.glob("*.json"))
			
 
				+    print(f"找到 {len(json_files)} 个历史帖子文件")
			
 
				+
			
 
				+    for file_path in json_files:
			
 
				+        # 提取帖子ID
			
 
				+        post_id = extract_post_id_from_filename(file_path.name)
			
 
				+        if not post_id:
			
 
				+            continue
			
 
				+
			
 
				+        # 跳过排除的帖子
			
 
				+        if post_id in exclude_post_ids:
			
 
				+            continue
			
 
				+
			
 
				+        try:
			
 
				+            with open(file_path, "r", encoding="utf-8") as f:
			
 
				+                post_data = json.load(f)
			
 
				+
			
 
				+            # 提取该帖子的所有标签
			
 
				+            tags_by_dimension = extract_tags_from_post(post_data)
			
 
				+
			
 
				+            # 对每个维度内的标签两两组合，构建共现关系
			
 
				+            for dimension, tags in tags_by_dimension.items():
			
 
				+                unique_tags = list(set(tags))  # 去重
			
 
				+                for i in range(len(unique_tags)):
			
 
				+                    for j in range(i + 1, len(unique_tags)):
			
 
				+                        tag1 = unique_tags[i]
			
 
				+                        tag2 = unique_tags[j]
			
 
				+
			
 
				+                        # 构建节点ID
			
 
				+                        tag1_id = build_node_id(dimension, "标签", tag1)
			
 
				+                        tag2_id = build_node_id(dimension, "标签", tag2)
			
 
				+
			
 
				+                        # 确保顺序一致（按字典序）
			
 
				+                        if tag1_id > tag2_id:
			
 
				+                            tag1_id, tag2_id = tag2_id, tag1_id
			
 
				+
			
 
				+                        key = (tag1_id, tag2_id, dimension)
			
 
				+
			
 
				+                        if key not in cooccurrence_map:
			
 
				+                            cooccurrence_map[key] = {"共同帖子ID": set()}
			
 
				+
			
 
				+                        cooccurrence_map[key]["共同帖子ID"].add(post_id)
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f"  警告: 处理文件 {file_path.name} 时出错: {e}")
			
 
				+
			
 
				+    # 转换为边列表
			
 
				+    edges = []
			
 
				+    for (tag1_id, tag2_id, dimension), info in cooccurrence_map.items():
			
 
				+        common_post_ids = list(info["共同帖子ID"])
			
 
				+        edge = {
			
 
				+            "源节点ID": tag1_id,
			
 
				+            "目标节点ID": tag2_id,
			
 
				+            "边类型": "标签共现",
			
 
				+            "边详情": {
			
 
				+                "共同帖子数": len(common_post_ids),
			
 
				+                "共同帖子ID": common_post_ids
			
 
				+            }
			
 
				+        }
			
 
				+        edges.append(edge)
			
 
				+
			
 
				+    return edges
			
 
				+
			
 
				+
			
 
				+# ========== 分类-分类边提取 ==========
			
 
				+
			
 
				+def extract_category_edges_from_associations(associations_data: Dict) -> List[Dict]:
			
 
				+    """
			
 
				+    从dimension_associations_analysis.json中提取分类-分类边（共现）
			
 
				+
			
 
				+    Args:
			
 
				+        associations_data: 关联分析数据
			
 
				+
			
 
				+    Returns:
			
 
				+        边列表
			
 
				+    """
			
 
				+    edges = []
			
 
				+
			
 
				+    if "单维度关联分析" not in associations_data:
			
 
				+        return edges
			
 
				+
			
 
				+    single_dim = associations_data["单维度关联分析"]
			
 
				+
			
 
				+    # 维度映射
			
 
				+    dimension_map = {
			
 
				+        "灵感点维度": "灵感点",
			
 
				+        "目的点维度": "目的点",
			
 
				+        "关键点维度": "关键点"
			
 
				+    }
			
 
				+
			
 
				+    for dim_key, dim_data in single_dim.items():
			
 
				+        if dim_key not in dimension_map:
			
 
				+            continue
			
 
				+
			
 
				+        source_dimension = dimension_map[dim_key]
			
 
				+
			
 
				+        # 遍历该维度下的所有关联方向
			
 
				+        for direction_key, direction_data in dim_data.items():
			
 
				+            if direction_key == "说明":
			
 
				+                continue
			
 
				+
			
 
				+            if "→" not in direction_key:
			
 
				+                continue
			
 
				+
			
 
				+            # 遍历每个源分类
			
 
				+            for source_path, source_info in direction_data.items():
			
 
				+                source_name = get_last_segment(source_path)
			
 
				+                source_node_id = build_node_id(source_dimension, "分类", source_name)
			
 
				+
			
 
				+                # 确定目标维度
			
 
				+                for field_name, associations in source_info.items():
			
 
				+                    if not field_name.startswith("与") or not field_name.endswith("的关联"):
			
 
				+                        continue
			
 
				+
			
 
				+                    target_dimension = field_name[1:-3]
			
 
				+
			
 
				+                    if not isinstance(associations, list):
			
 
				+                        continue
			
 
				+
			
 
				+                    for assoc in associations:
			
 
				+                        target_path = assoc.get("目标分类", "")
			
 
				+                        if not target_path:
			
 
				+                            continue
			
 
				+
			
 
				+                        target_name = get_last_segment(target_path)
			
 
				+                        target_node_id = build_node_id(target_dimension, "分类", target_name)
			
 
				+
			
 
				+                        edge = {
			
 
				+                            "源节点ID": source_node_id,
			
 
				+                            "目标节点ID": target_node_id,
			
 
				+                            "边类型": "分类共现（跨点）",
			
 
				+                            "边详情": {
			
 
				+                                "Jaccard相似度": assoc.get("Jaccard相似度", 0),
			
 
				+                                "重叠系数": assoc.get("重叠系数", 0),
			
 
				+                                "共同帖子数": assoc.get("共同帖子数", 0),
			
 
				+                                "共同帖子ID": assoc.get("共同帖子ID", [])
			
 
				+                            }
			
 
				+                        }
			
 
				+                        edges.append(edge)
			
 
				+
			
 
				+    return edges
			
 
				+
			
 
				+
			
 
				+# ========== 点内分类共现边提取 ==========
			
 
				+
			
 
				+def extract_intra_category_edges(intra_associations_data: Dict) -> List[Dict]:
			
 
				+    """
			
 
				+    从intra_dimension_associations_analysis.json中提取点内分类共现边
			
 
				+
			
 
				+    Args:
			
 
				+        intra_associations_data: 点内关联分析数据
			
 
				+
			
 
				+    Returns:
			
 
				+        边列表
			
 
				+    """
			
 
				+    edges = []
			
 
				+    seen_edges = set()  # 避免重复边
			
 
				+
			
 
				+    if "叶子分类组合聚类" not in intra_associations_data:
			
 
				+        return edges
			
 
				+
			
 
				+    clusters_by_dim = intra_associations_data["叶子分类组合聚类"]
			
 
				+
			
 
				+    for dimension, clusters in clusters_by_dim.items():
			
 
				+        if dimension not in ("灵感点", "目的点", "关键点"):
			
 
				+            continue
			
 
				+
			
 
				+        for cluster_key, cluster_data in clusters.items():
			
 
				+            leaf_categories = cluster_data.get("叶子分类组合", [])
			
 
				+            point_count = cluster_data.get("点数", 0)
			
 
				+            point_details = cluster_data.get("点详情列表", [])
			
 
				+
			
 
				+            # 提取点名称列表
			
 
				+            point_names = [p.get("点名称", "") for p in point_details if p.get("点名称")]
			
 
				+
			
 
				+            # 两两组合生成共现边
			
 
				+            for i in range(len(leaf_categories)):
			
 
				+                for j in range(i + 1, len(leaf_categories)):
			
 
				+                    cat1 = leaf_categories[i]
			
 
				+                    cat2 = leaf_categories[j]
			
 
				+
			
 
				+                    # 构建节点ID
			
 
				+                    cat1_id = build_node_id(dimension, "分类", cat1)
			
 
				+                    cat2_id = build_node_id(dimension, "分类", cat2)
			
 
				+
			
 
				+                    # 确保顺序一致（按字典序）
			
 
				+                    if cat1_id > cat2_id:
			
 
				+                        cat1_id, cat2_id = cat2_id, cat1_id
			
 
				+
			
 
				+                    edge_key = (cat1_id, cat2_id, dimension)
			
 
				+
			
 
				+                    if edge_key in seen_edges:
			
 
				+                        # 已存在的边，累加点数和点名称
			
 
				+                        for edge in edges:
			
 
				+                            if (edge["源节点ID"] == cat1_id and
			
 
				+                                edge["目标节点ID"] == cat2_id and
			
 
				+                                edge["边类型"] == "分类共现（点内）"):
			
 
				+                                edge["边详情"]["点数"] += point_count
			
 
				+                                edge["边详情"]["关联点名称"].extend(point_names)
			
 
				+                                break
			
 
				+                    else:
			
 
				+                        seen_edges.add(edge_key)
			
 
				+                        edge = {
			
 
				+                            "源节点ID": cat1_id,
			
 
				+                            "目标节点ID": cat2_id,
			
 
				+                            "边类型": "分类共现（点内）",
			
 
				+                            "边详情": {
			
 
				+                                "点数": point_count,
			
 
				+                                "关联点名称": point_names.copy()
			
 
				+                            }
			
 
				+                        }
			
 
				+                        edges.append(edge)
			
 
				+
			
 
				+    return edges
			
 
				+
			
 
				+
			
 
				+# ========== 主函数 ==========
			
 
				+
			
 
				+def main():
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				+    config.ensure_dirs()
			
 
				+
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"输出版本: {config.output_version}")
			
 
				+    print(f"过滤模式: {config.filter_mode}")
			
 
				+    print()
			
 
				+
			
 
				+    # 输入文件路径
			
 
				+    pattern_file = config.pattern_cluster_file
			
 
				+    associations_file = config.account_dir / "pattern相关文件/optimization/dimension_associations_analysis.json"
			
 
				+    intra_associations_file = config.account_dir / "pattern相关文件/optimization/intra_dimension_associations_analysis.json"
			
 
				+    current_posts_dir = config.current_posts_dir
			
 
				+
			
 
				+    # 输出文件路径
			
 
				+    nodes_output_file = config.intermediate_dir / "节点列表.json"
			
 
				+    edges_output_file = config.intermediate_dir / "边关系.json"
			
 
				+
			
 
				+    print(f"输入文件:")
			
 
				+    print(f"  pattern聚合文件: {pattern_file}")
			
 
				+    print(f"  跨点关联分析文件: {associations_file}")
			
 
				+    print(f"  点内关联分析文件: {intra_associations_file}")
			
 
				+    print(f"  当前帖子目录: {current_posts_dir}")
			
 
				+    print(f"\n输出文件:")
			
 
				+    print(f"  节点列表: {nodes_output_file}")
			
 
				+    print(f"  边关系: {edges_output_file}")
			
 
				+    print()
			
 
				+
			
 
				+    # 读取pattern聚合结果
			
 
				+    print("正在读取pattern聚合结果...")
			
 
				+    with open(pattern_file, "r", encoding="utf-8") as f:
			
 
				+        pattern_data = json.load(f)
			
 
				+
			
 
				+    # 读取跨点关联分析结果
			
 
				+    print("正在读取跨点关联分析结果...")
			
 
				+    with open(associations_file, "r", encoding="utf-8") as f:
			
 
				+        associations_data = json.load(f)
			
 
				+
			
 
				+    # 读取点内关联分析结果
			
 
				+    print("正在读取点内关联分析结果...")
			
 
				+    with open(intra_associations_file, "r", encoding="utf-8") as f:
			
 
				+        intra_associations_data = json.load(f)
			
 
				+
			
 
				+    # ===== 提取节点 =====
			
 
				+    print("\n" + "="*60)
			
 
				+    print("正在提取节点...")
			
 
				+
			
 
				+    all_nodes = []
			
 
				+
			
 
				+    # 维度映射
			
 
				+    dimension_mapping = {
			
 
				+        "灵感点列表": "灵感点",
			
 
				+        "目的点": "目的点",
			
 
				+        "关键点列表": "关键点"
			
 
				+    }
			
 
				+
			
 
				+    # 提取分类节点
			
 
				+    print("\n提取分类节点:")
			
 
				+    for dim_key, dim_name in dimension_mapping.items():
			
 
				+        category_nodes = extract_category_nodes_from_pattern(pattern_data, dim_key, dim_name)
			
 
				+        all_nodes.extend(category_nodes)
			
 
				+        print(f"  {dim_name}: {len(category_nodes)} 个分类节点")
			
 
				+
			
 
				+    # 提取标签节点
			
 
				+    print("\n提取标签节点:")
			
 
				+    for dim_key, dim_name in dimension_mapping.items():
			
 
				+        tag_nodes = extract_tag_nodes_from_pattern(pattern_data, dim_key, dim_name)
			
 
				+        all_nodes.extend(tag_nodes)
			
 
				+        print(f"  {dim_name}: {len(tag_nodes)} 个标签节点")
			
 
				+
			
 
				+    print(f"\n总计: {len(all_nodes)} 个节点")
			
 
				+
			
 
				+    # 统计节点类型
			
 
				+    category_count = sum(1 for n in all_nodes if n["节点类型"] == "分类")
			
 
				+    tag_count = sum(1 for n in all_nodes if n["节点类型"] == "标签")
			
 
				+    print(f"  分类节点: {category_count}")
			
 
				+    print(f"  标签节点: {tag_count}")
			
 
				+
			
 
				+    # ===== 提取边 =====
			
 
				+    print("\n" + "="*60)
			
 
				+    print("正在提取边...")
			
 
				+
			
 
				+    all_edges = []
			
 
				+
			
 
				+    # 提取分类-分类边（跨点共现）
			
 
				+    print("\n提取分类-分类边（跨点共现）:")
			
 
				+    category_edges = extract_category_edges_from_associations(associations_data)
			
 
				+    all_edges.extend(category_edges)
			
 
				+    print(f"  分类共现（跨点）边: {len(category_edges)} 条")
			
 
				+
			
 
				+    # 提取分类-分类边（点内共现）
			
 
				+    print("\n提取分类-分类边（点内共现）:")
			
 
				+    intra_category_edges = extract_intra_category_edges(intra_associations_data)
			
 
				+    all_edges.extend(intra_category_edges)
			
 
				+    print(f"  分类共现（点内）边: {len(intra_category_edges)} 条")
			
 
				+
			
 
				+    # 提取标签-分类边（属于/包含）
			
 
				+    print("\n提取标签-分类边（属于/包含）:")
			
 
				+    belong_count = 0
			
 
				+    contain_count = 0
			
 
				+    for dim_key, dim_name in dimension_mapping.items():
			
 
				+        tag_category_edges = extract_tag_category_edges_from_pattern(pattern_data, dim_key, dim_name)
			
 
				+        all_edges.extend(tag_category_edges)
			
 
				+        dim_belong = sum(1 for e in tag_category_edges if e["边类型"] == "属于")
			
 
				+        dim_contain = sum(1 for e in tag_category_edges if e["边类型"] == "包含")
			
 
				+        belong_count += dim_belong
			
 
				+        contain_count += dim_contain
			
 
				+        print(f"  {dim_name}: {dim_belong} 条属于边, {dim_contain} 条包含边")
			
 
				+
			
 
				+    # 提取标签-标签边（共现）- 需要在过滤之前先记录排除的帖子ID
			
 
				+    # 这里先占位，过滤后再处理
			
 
				+    tag_cooccurrence_edges_placeholder = True
			
 
				+
			
 
				+    print(f"\n边统计（标签共现待提取）:")
			
 
				+    print(f"  分类共现（跨点）边: {len(category_edges)}")
			
 
				+    print(f"  分类共现（点内）边: {len(intra_category_edges)}")
			
 
				+    print(f"  属于边: {belong_count}")
			
 
				+    print(f"  包含边: {contain_count}")
			
 
				+
			
 
				+    # ===== 应用过滤 =====
			
 
				+    exclude_post_ids = set()
			
 
				+    filter_mode = config.filter_mode
			
 
				+
			
 
				+    if filter_mode == "exclude_current_posts":
			
 
				+        print("\n" + "="*60)
			
 
				+        print("应用过滤规则: 排除当前帖子ID")
			
 
				+        exclude_post_ids = get_current_post_ids(current_posts_dir)
			
 
				+
			
 
				+        if exclude_post_ids:
			
 
				+            # 过滤节点
			
 
				+            nodes_before = len(all_nodes)
			
 
				+            all_nodes = filter_nodes_by_post_ids(all_nodes, exclude_post_ids)
			
 
				+            nodes_after = len(all_nodes)
			
 
				+            print(f"\n节点过滤: {nodes_before} -> {nodes_after} (移除 {nodes_before - nodes_after} 个)")
			
 
				+
			
 
				+            # 过滤边
			
 
				+            edges_before = len(all_edges)
			
 
				+            all_edges = filter_edges_by_post_ids(all_edges, exclude_post_ids)
			
 
				+            edges_after = len(all_edges)
			
 
				+            print(f"边过滤: {edges_before} -> {edges_after} (移除 {edges_before - edges_after} 条)")
			
 
				+    elif filter_mode == "none":
			
 
				+        print("\n过滤模式: none，不应用任何过滤")
			
 
				+    else:
			
 
				+        print(f"\n警告: 未知的过滤模式 '{filter_mode}'，不应用过滤")
			
 
				+
			
 
				+    # ===== 提取标签-标签共现边 =====
			
 
				+    print("\n" + "="*60)
			
 
				+    print("提取标签-标签共现边...")
			
 
				+    historical_posts_dir = config.historical_posts_dir
			
 
				+    print(f"历史帖子目录: {historical_posts_dir}")
			
 
				+    tag_cooccurrence_edges = extract_tag_cooccurrence_edges(historical_posts_dir, exclude_post_ids)
			
 
				+    all_edges.extend(tag_cooccurrence_edges)
			
 
				+    print(f"  标签-标签共现边: {len(tag_cooccurrence_edges)} 条")
			
 
				+
			
 
				+    # 更新总计
			
 
				+    print(f"\n总计: {len(all_edges)} 条边")
			
 
				+    print(f"  分类共现（跨点）边: {len(category_edges)}")
			
 
				+    print(f"  分类共现（点内）边: {len(intra_category_edges)}")
			
 
				+    print(f"  标签共现边: {len(tag_cooccurrence_edges)}")
			
 
				+    print(f"  属于边: {belong_count}")
			
 
				+    print(f"  包含边: {contain_count}")
			
 
				+
			
 
				+    # ===== 获取帖子详情 =====
			
 
				+    print("\n" + "="*60)
			
 
				+    print("获取帖子详情...")
			
 
				+
			
 
				+    # 收集所有需要获取详情的帖子ID（从节点和边）
			
 
				+    post_ids_from_nodes = collect_all_post_ids_from_nodes(all_nodes)
			
 
				+    post_ids_from_edges = collect_all_post_ids_from_edges(all_edges)
			
 
				+    all_post_ids = post_ids_from_nodes | post_ids_from_edges
			
 
				+    print(f"节点中的帖子: {len(post_ids_from_nodes)} 个")
			
 
				+    print(f"边中的帖子: {len(post_ids_from_edges)} 个")
			
 
				+    print(f"合计（去重）: {len(all_post_ids)} 个")
			
 
				+
			
 
				+    # 批量获取帖子详情
			
 
				+    post_details = fetch_post_details(all_post_ids)
			
 
				+
			
 
				+    # ===== 保存结果 =====
			
 
				+    print("\n" + "="*60)
			
 
				+
			
 
				+    # 输出文件路径
			
 
				+    post_details_output_file = config.intermediate_dir / "帖子详情映射.json"
			
 
				+
			
 
				+    # 保存节点列表
			
 
				+    nodes_output = {
			
 
				+        "说明": {
			
 
				+            "描述": "分类和标签节点列表",
			
 
				+            "数据来源": ["过去帖子_pattern聚合结果.json"],
			
 
				+            "过滤模式": filter_mode,
			
 
				+            "过滤帖子数": len(exclude_post_ids) if exclude_post_ids else 0
			
 
				+        },
			
 
				+        "节点列表": all_nodes
			
 
				+    }
			
 
				+
			
 
				+    print(f"正在保存节点列表到: {nodes_output_file}")
			
 
				+    with open(nodes_output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(nodes_output, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    # 构建节点ID索引的边关系: 节点 -> 边类型 -> {目标节点: 完整边信息}
			
 
				+    edges_by_node = {}  # key: 节点ID, value: {边类型: {目标节点ID: 完整边信息}}
			
 
				+    for edge in all_edges:
			
 
				+        source_id = edge["源节点ID"]
			
 
				+        target_id = edge["目标节点ID"]
			
 
				+        edge_type = edge["边类型"]
			
 
				+
			
 
				+        # 源节点 -> 目标节点
			
 
				+        if source_id not in edges_by_node:
			
 
				+            edges_by_node[source_id] = {}
			
 
				+        if edge_type not in edges_by_node[source_id]:
			
 
				+            edges_by_node[source_id][edge_type] = {}
			
 
				+        edges_by_node[source_id][edge_type][target_id] = edge
			
 
				+
			
 
				+    # 保存边关系
			
 
				+    edges_output = {
			
 
				+        "说明": {
			
 
				+            "描述": "分类和标签之间的边关系",
			
 
				+            "数据来源": ["过去帖子_pattern聚合结果.json", "dimension_associations_analysis.json", "过去帖子_what解构结果目录"],
			
 
				+            "过滤模式": filter_mode,
			
 
				+            "过滤帖子数": len(exclude_post_ids) if exclude_post_ids else 0
			
 
				+        },
			
 
				+        "边列表": all_edges,
			
 
				+        "节点边索引": edges_by_node
			
 
				+    }
			
 
				+
			
 
				+    print(f"正在保存边关系到: {edges_output_file}")
			
 
				+    with open(edges_output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(edges_output, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    # 保存帖子详情映射
			
 
				+    post_details_output = {
			
 
				+        "说明": {
			
 
				+            "描述": "帖子ID到帖子详情的映射",
			
 
				+            "帖子数": len(post_details)
			
 
				+        },
			
 
				+        "帖子详情": post_details
			
 
				+    }
			
 
				+
			
 
				+    print(f"正在保存帖子详情映射到: {post_details_output_file}")
			
 
				+    with open(post_details_output_file, "w", encoding="utf-8") as f:
			
 
				+        json.dump(post_details_output, f, ensure_ascii=False, indent=2)
			
 
				+
			
 
				+    print("\n完成!")
			
 
				+    print(f"\n输出文件:")
			
 
				+    print(f"  节点列表: {len(all_nodes)} 个节点")
			
 
				+    print(f"  边关系: {len(all_edges)} 条边")
			
 
				+    print(f"  帖子详情映射: {len(post_details)} 个帖子")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/script/data_processing/visualize_match_graph.py
+++ b/script/data_processing/visualize_match_graph.py
@@ -0,0 +1,1100 @@
 
				+#!/usr/bin/env python3
			
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+将匹配图谱数据可视化为交互式HTML文件
			
 
				+
			
 
				+输入：match_graph目录下的JSON文件
			
 
				+输出：单个HTML文件，包含所有帖子的图谱，可通过Tab切换
			
 
				+"""
			
 
				+
			
 
				+import json
			
 
				+from pathlib import Path
			
 
				+from typing import Dict, List
			
 
				+import sys
			
 
				+
			
 
				+# 添加项目根目录到路径
			
 
				+project_root = Path(__file__).parent.parent.parent
			
 
				+sys.path.insert(0, str(project_root))
			
 
				+
			
 
				+from script.data_processing.path_config import PathConfig
			
 
				+
			
 
				+
			
 
				+HTML_TEMPLATE = '''<!DOCTYPE html>
			
 
				+<html lang="zh-CN">
			
 
				+<head>
			
 
				+    <meta charset="UTF-8">
			
 
				+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
			
 
				+    <title>匹配图谱可视化</title>
			
 
				+    <script src="https://d3js.org/d3.v7.min.js"></script>
			
 
				+    <style>
			
 
				+        * {{
			
 
				+            margin: 0;
			
 
				+            padding: 0;
			
 
				+            box-sizing: border-box;
			
 
				+        }}
			
 
				+        body {{
			
 
				+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
			
 
				+            background: #1a1a2e;
			
 
				+            color: #eee;
			
 
				+            overflow: hidden;
			
 
				+        }}
			
 
				+        #container {{
			
 
				+            display: flex;
			
 
				+            height: 100vh;
			
 
				+            flex-direction: column;
			
 
				+        }}
			
 
				+
			
 
				+        /* Tab样式 */
			
 
				+        .tabs {{
			
 
				+            display: flex;
			
 
				+            background: #0f3460;
			
 
				+            padding: 0 20px;
			
 
				+            overflow-x: auto;
			
 
				+            flex-shrink: 0;
			
 
				+        }}
			
 
				+        .tab {{
			
 
				+            padding: 12px 20px;
			
 
				+            cursor: pointer;
			
 
				+            border-bottom: 3px solid transparent;
			
 
				+            white-space: nowrap;
			
 
				+            font-size: 13px;
			
 
				+            color: #888;
			
 
				+            transition: all 0.2s;
			
 
				+        }}
			
 
				+        .tab:hover {{
			
 
				+            color: #fff;
			
 
				+            background: rgba(255,255,255,0.05);
			
 
				+        }}
			
 
				+        .tab.active {{
			
 
				+            color: #e94560;
			
 
				+            border-bottom-color: #e94560;
			
 
				+            background: rgba(233, 69, 96, 0.1);
			
 
				+        }}
			
 
				+
			
 
				+        /* 主内容区 */
			
 
				+        .main-content {{
			
 
				+            display: flex;
			
 
				+            flex: 1;
			
 
				+            overflow: hidden;
			
 
				+        }}
			
 
				+        #graph {{
			
 
				+            flex: 1;
			
 
				+            position: relative;
			
 
				+        }}
			
 
				+        #sidebar {{
			
 
				+            width: 280px;
			
 
				+            background: #16213e;
			
 
				+            padding: 15px;
			
 
				+            overflow-y: auto;
			
 
				+            border-left: 1px solid #0f3460;
			
 
				+        }}
			
 
				+        h1 {{
			
 
				+            font-size: 15px;
			
 
				+            margin-bottom: 10px;
			
 
				+            color: #e94560;
			
 
				+        }}
			
 
				+        h2 {{
			
 
				+            font-size: 12px;
			
 
				+            margin: 10px 0 6px;
			
 
				+            color: #0f9b8e;
			
 
				+        }}
			
 
				+        .legend {{
			
 
				+            margin-top: 10px;
			
 
				+        }}
			
 
				+        .legend-grid {{
			
 
				+            display: grid;
			
 
				+            grid-template-columns: 1fr 1fr;
			
 
				+            gap: 4px 8px;
			
 
				+        }}
			
 
				+        .legend-item {{
			
 
				+            display: flex;
			
 
				+            align-items: center;
			
 
				+            font-size: 11px;
			
 
				+        }}
			
 
				+        .legend-color {{
			
 
				+            width: 12px;
			
 
				+            height: 12px;
			
 
				+            border-radius: 50%;
			
 
				+            margin-right: 6px;
			
 
				+            flex-shrink: 0;
			
 
				+        }}
			
 
				+        .legend-line {{
			
 
				+            width: 20px;
			
 
				+            height: 3px;
			
 
				+            margin-right: 6px;
			
 
				+            flex-shrink: 0;
			
 
				+        }}
			
 
				+        .detail-panel {{
			
 
				+            margin-top: 20px;
			
 
				+            padding: 15px;
			
 
				+            background: #0f3460;
			
 
				+            border-radius: 8px;
			
 
				+            display: none;
			
 
				+        }}
			
 
				+        .detail-panel.active {{
			
 
				+            display: block;
			
 
				+        }}
			
 
				+        .detail-panel h3 {{
			
 
				+            font-size: 14px;
			
 
				+            margin-bottom: 10px;
			
 
				+            color: #e94560;
			
 
				+        }}
			
 
				+        .detail-panel p {{
			
 
				+            font-size: 12px;
			
 
				+            line-height: 1.6;
			
 
				+            color: #ccc;
			
 
				+            margin: 5px 0;
			
 
				+        }}
			
 
				+        .detail-panel .label {{
			
 
				+            color: #888;
			
 
				+        }}
			
 
				+        .detail-panel .close-btn {{
			
 
				+            position: absolute;
			
 
				+            top: 10px;
			
 
				+            right: 10px;
			
 
				+            background: none;
			
 
				+            border: none;
			
 
				+            color: #888;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 16px;
			
 
				+        }}
			
 
				+        .detail-panel .close-btn:hover {{
			
 
				+            color: #e94560;
			
 
				+        }}
			
 
				+        .detail-panel-wrapper {{
			
 
				+            position: relative;
			
 
				+        }}
			
 
				+        .similarity-score {{
			
 
				+            background: #e94560;
			
 
				+            color: #fff;
			
 
				+            padding: 2px 6px;
			
 
				+            border-radius: 4px;
			
 
				+            font-weight: bold;
			
 
				+        }}
			
 
				+        .edge-description {{
			
 
				+            background: #1a1a2e;
			
 
				+            padding: 10px;
			
 
				+            border-radius: 4px;
			
 
				+            margin-top: 8px;
			
 
				+            font-size: 11px;
			
 
				+            line-height: 1.5;
			
 
				+        }}
			
 
				+        svg {{
			
 
				+            width: 100%;
			
 
				+            height: 100%;
			
 
				+        }}
			
 
				+        .node {{
			
 
				+            cursor: pointer;
			
 
				+        }}
			
 
				+        .node circle, .node rect {{
			
 
				+            stroke-width: 3px;
			
 
				+        }}
			
 
				+        .node .post-node {{
			
 
				+            stroke: #fff;
			
 
				+            stroke-dasharray: 4,2;
			
 
				+        }}
			
 
				+        .node .persona-node {{
			
 
				+            stroke: #fff;
			
 
				+        }}
			
 
				+        .node text {{
			
 
				+            font-size: 11px;
			
 
				+            fill: #fff;
			
 
				+            pointer-events: none;
			
 
				+        }}
			
 
				+        .link {{
			
 
				+            stroke-opacity: 0.7;
			
 
				+        }}
			
 
				+        .link-hitarea {{
			
 
				+            stroke: transparent;
			
 
				+            stroke-width: 15px;
			
 
				+            cursor: pointer;
			
 
				+            fill: none;
			
 
				+        }}
			
 
				+        .link-hitarea:hover + .link {{
			
 
				+            stroke-opacity: 1;
			
 
				+            stroke-width: 3px;
			
 
				+        }}
			
 
				+        .edge-label {{
			
 
				+            font-size: 10px;
			
 
				+            fill: #fff;
			
 
				+            pointer-events: none;
			
 
				+            text-anchor: middle;
			
 
				+        }}
			
 
				+        .edge-label-bg {{
			
 
				+            fill: rgba(0,0,0,0.7);
			
 
				+        }}
			
 
				+        .link.match {{
			
 
				+            stroke: #e94560;
			
 
				+            stroke-dasharray: 5,5;
			
 
				+        }}
			
 
				+        .link.category-cross {{
			
 
				+            stroke: #2ecc71;
			
 
				+        }}
			
 
				+        .link.category-intra {{
			
 
				+            stroke: #27ae60;
			
 
				+            stroke-dasharray: 3,3;
			
 
				+        }}
			
 
				+        .link.tag-cooccur {{
			
 
				+            stroke: #f39c12;
			
 
				+        }}
			
 
				+        .link.belong {{
			
 
				+            stroke: #9b59b6;
			
 
				+        }}
			
 
				+        .link.contain {{
			
 
				+            stroke: #8e44ad;
			
 
				+            stroke-dasharray: 2,2;
			
 
				+        }}
			
 
				+        /* 镜像边样式（虚线，颜色与原边相同） */
			
 
				+        .link.mirror-category-cross {{
			
 
				+            stroke: #2ecc71;
			
 
				+            stroke-dasharray: 6,3;
			
 
				+        }}
			
 
				+        .link.mirror-category-intra {{
			
 
				+            stroke: #27ae60;
			
 
				+            stroke-dasharray: 6,3;
			
 
				+        }}
			
 
				+        .link.mirror-tag-cooccur {{
			
 
				+            stroke: #f39c12;
			
 
				+            stroke-dasharray: 6,3;
			
 
				+        }}
			
 
				+        .link.mirror-belong {{
			
 
				+            stroke: #9b59b6;
			
 
				+            stroke-dasharray: 6,3;
			
 
				+        }}
			
 
				+        .link.mirror-contain {{
			
 
				+            stroke: #8e44ad;
			
 
				+            stroke-dasharray: 6,3;
			
 
				+        }}
			
 
				+        .tooltip {{
			
 
				+            position: absolute;
			
 
				+            background: rgba(0,0,0,0.9);
			
 
				+            color: #fff;
			
 
				+            padding: 10px 15px;
			
 
				+            border-radius: 6px;
			
 
				+            font-size: 12px;
			
 
				+            pointer-events: none;
			
 
				+            max-width: 300px;
			
 
				+            z-index: 1000;
			
 
				+            display: none;
			
 
				+        }}
			
 
				+        .controls {{
			
 
				+            position: absolute;
			
 
				+            top: 20px;
			
 
				+            left: 20px;
			
 
				+            background: rgba(22, 33, 62, 0.9);
			
 
				+            padding: 15px;
			
 
				+            border-radius: 8px;
			
 
				+            z-index: 100;
			
 
				+        }}
			
 
				+        .controls button {{
			
 
				+            background: #0f3460;
			
 
				+            color: #fff;
			
 
				+            border: none;
			
 
				+            padding: 8px 15px;
			
 
				+            margin: 5px;
			
 
				+            border-radius: 4px;
			
 
				+            cursor: pointer;
			
 
				+            font-size: 12px;
			
 
				+        }}
			
 
				+        .controls button:hover {{
			
 
				+            background: #e94560;
			
 
				+        }}
			
 
				+    </style>
			
 
				+</head>
			
 
				+<body>
			
 
				+    <div id="container">
			
 
				+        <div class="tabs" id="tabs">
			
 
				+            {tabs_html}
			
 
				+        </div>
			
 
				+        <div class="main-content">
			
 
				+            <div id="graph">
			
 
				+                <div class="controls">
			
 
				+                    <button onclick="resetZoom()">重置视图</button>
			
 
				+                    <button onclick="toggleLabels()">切换标签</button>
			
 
				+                </div>
			
 
				+                <div class="tooltip" id="tooltip"></div>
			
 
				+            </div>
			
 
				+            <div id="sidebar">
			
 
				+                <h1>匹配图谱</h1>
			
 
				+
			
 
				+                <div class="detail-panel active" id="detailPanel">
			
 
				+                    <h3 id="detailTitle">点击节点或边查看详情</h3>
			
 
				+                    <div id="detailContent">
			
 
				+                        <p style="color: #888; font-size: 11px;">点击图中的节点或边，这里会显示详细信息</p>
			
 
				+                    </div>
			
 
				+                </div>
			
 
				+
			
 
				+                <div class="legend">
			
 
				+                    <h2>节点</h2>
			
 
				+                    <div class="legend-grid">
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #666; border: 2px dashed #fff;"></div>
			
 
				+                            <span>帖子(虚线)</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #666; border: 2px solid #fff;"></div>
			
 
				+                            <span>人设(实线)</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #666; border-radius: 50%;"></div>
			
 
				+                            <span>标签(圆)</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #666; border-radius: 2px;"></div>
			
 
				+                            <span>分类(方)</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #666; opacity: 1;"></div>
			
 
				+                            <span>直接匹配</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #666; opacity: 0.5;"></div>
			
 
				+                            <span>扩展节点</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #f39c12;"></div>
			
 
				+                            <span>灵感点</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #3498db;"></div>
			
 
				+                            <span>目的点</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-color" style="background: #9b59b6;"></div>
			
 
				+                            <span>关键点</span>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                    <h2>边(人设/实线)</h2>
			
 
				+                    <div class="legend-grid">
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: #e94560;"></div>
			
 
				+                            <span>匹配</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: #2ecc71;"></div>
			
 
				+                            <span>分类共现(跨)</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: #27ae60;"></div>
			
 
				+                            <span>分类共现(内)</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: #f39c12;"></div>
			
 
				+                            <span>标签共现</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: #9b59b6;"></div>
			
 
				+                            <span>属于</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: #8e44ad;"></div>
			
 
				+                            <span>包含</span>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                    <h2>镜像边(帖子/虚线)</h2>
			
 
				+                    <div class="legend-grid">
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #2ecc71, #2ecc71 6px, transparent 6px, transparent 9px);"></div>
			
 
				+                            <span>分类共现</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #f39c12, #f39c12 6px, transparent 6px, transparent 9px);"></div>
			
 
				+                            <span>标签共现</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #9b59b6, #9b59b6 6px, transparent 6px, transparent 9px);"></div>
			
 
				+                            <span>属于</span>
			
 
				+                        </div>
			
 
				+                        <div class="legend-item">
			
 
				+                            <div class="legend-line" style="background: repeating-linear-gradient(90deg, #8e44ad, #8e44ad 6px, transparent 6px, transparent 9px);"></div>
			
 
				+                            <span>包含</span>
			
 
				+                        </div>
			
 
				+                    </div>
			
 
				+                </div>
			
 
				+            </div>
			
 
				+        </div>
			
 
				+    </div>
			
 
				+
			
 
				+    <script>
			
 
				+        // 所有帖子的图谱数据
			
 
				+        const allGraphData = {all_graph_data};
			
 
				+
			
 
				+        // 当前选中的帖子索引
			
 
				+        let currentIndex = 0;
			
 
				+        let simulation = null;
			
 
				+        let svg = null;
			
 
				+        let g = null;
			
 
				+        let zoom = null;
			
 
				+        let showLabels = true;
			
 
				+
			
 
				+        // 初始化
			
 
				+        function init() {{
			
 
				+            const container = document.getElementById("graph");
			
 
				+            const width = container.clientWidth;
			
 
				+            const height = container.clientHeight;
			
 
				+
			
 
				+            svg = d3.select("#graph")
			
 
				+                .append("svg")
			
 
				+                .attr("width", width)
			
 
				+                .attr("height", height);
			
 
				+
			
 
				+            g = svg.append("g");
			
 
				+
			
 
				+            zoom = d3.zoom()
			
 
				+                .scaleExtent([0.1, 4])
			
 
				+                .on("zoom", (event) => {{
			
 
				+                    g.attr("transform", event.transform);
			
 
				+                }});
			
 
				+
			
 
				+            svg.call(zoom);
			
 
				+
			
 
				+            // 绑定Tab点击事件
			
 
				+            document.querySelectorAll(".tab").forEach((tab, index) => {{
			
 
				+                tab.addEventListener("click", () => switchTab(index));
			
 
				+            }});
			
 
				+
			
 
				+            // 显示第一个帖子
			
 
				+            switchTab(0);
			
 
				+        }}
			
 
				+
			
 
				+        // 切换Tab
			
 
				+        function switchTab(index) {{
			
 
				+            currentIndex = index;
			
 
				+
			
 
				+            // 更新Tab样式
			
 
				+            document.querySelectorAll(".tab").forEach((tab, i) => {{
			
 
				+                tab.classList.toggle("active", i === index);
			
 
				+            }});
			
 
				+
			
 
				+            // 更新图谱
			
 
				+            renderGraph(allGraphData[index]);
			
 
				+        }}
			
 
				+
			
 
				+        // 渲染图谱
			
 
				+        function renderGraph(data) {{
			
 
				+            // 清空现有图谱
			
 
				+            g.selectAll("*").remove();
			
 
				+            if (simulation) {{
			
 
				+                simulation.stop();
			
 
				+            }}
			
 
				+
			
 
				+            const container = document.getElementById("graph");
			
 
				+            const width = container.clientWidth;
			
 
				+            const height = container.clientHeight;
			
 
				+
			
 
				+            // 准备数据
			
 
				+            const nodes = data.nodes.map(n => ({{
			
 
				+                ...n,
			
 
				+                id: n.节点ID,
			
 
				+                source: n.节点ID.startsWith("帖子_") ? "帖子" : "人设",
			
 
				+                level: n.节点层级
			
 
				+            }}));
			
 
				+
			
 
				+            const links = data.edges.map(e => ({{
			
 
				+                ...e,
			
 
				+                source: e.源节点ID,
			
 
				+                target: e.目标节点ID,
			
 
				+                type: e.边类型
			
 
				+            }}));
			
 
				+
			
 
				+            // 分离帖子节点和人设节点
			
 
				+            const postNodes = nodes.filter(n => n.source === "帖子");
			
 
				+            const personaNodes = nodes.filter(n => n.source === "人设");
			
 
				+            const matchLinks = links.filter(l => l.type === "匹配");
			
 
				+
			
 
				+            // 构建帖子节点到人设节点的映射
			
 
				+            const postToPersona = {{}};
			
 
				+            const personaToPost = {{}};
			
 
				+            matchLinks.forEach(l => {{
			
 
				+                const sid = typeof l.source === "object" ? l.source.id : l.source;
			
 
				+                const tid = typeof l.target === "object" ? l.target.id : l.target;
			
 
				+                if (!postToPersona[sid]) postToPersona[sid] = [];
			
 
				+                postToPersona[sid].push(tid);
			
 
				+                if (!personaToPost[tid]) personaToPost[tid] = [];
			
 
				+                personaToPost[tid].push(sid);
			
 
				+            }});
			
 
				+
			
 
				+            // 找出所有连通分量
			
 
				+            function findConnectedComponents(nodes, links) {{
			
 
				+                const nodeIds = new Set(nodes.map(n => n.id));
			
 
				+                const adj = {{}};
			
 
				+                nodeIds.forEach(id => adj[id] = []);
			
 
				+
			
 
				+                links.forEach(l => {{
			
 
				+                    const sid = typeof l.source === "object" ? l.source.id : l.source;
			
 
				+                    const tid = typeof l.target === "object" ? l.target.id : l.target;
			
 
				+                    if (nodeIds.has(sid) && nodeIds.has(tid)) {{
			
 
				+                        adj[sid].push(tid);
			
 
				+                        adj[tid].push(sid);
			
 
				+                    }}
			
 
				+                }});
			
 
				+
			
 
				+                const visited = new Set();
			
 
				+                const components = [];
			
 
				+
			
 
				+                nodeIds.forEach(startId => {{
			
 
				+                    if (visited.has(startId)) return;
			
 
				+
			
 
				+                    const component = [];
			
 
				+                    const queue = [startId];
			
 
				+
			
 
				+                    while (queue.length > 0) {{
			
 
				+                        const id = queue.shift();
			
 
				+                        if (visited.has(id)) continue;
			
 
				+                        visited.add(id);
			
 
				+                        component.push(id);
			
 
				+                        adj[id].forEach(neighbor => {{
			
 
				+                            if (!visited.has(neighbor)) queue.push(neighbor);
			
 
				+                        }});
			
 
				+                    }}
			
 
				+
			
 
				+                    components.push(component);
			
 
				+                }});
			
 
				+
			
 
				+                return components;
			
 
				+            }}
			
 
				+
			
 
				+            // 按大小排序连通分量（大的在前）
			
 
				+            const components = findConnectedComponents(nodes, links)
			
 
				+                .sort((a, b) => b.length - a.length);
			
 
				+            console.log(`找到 ${{components.length}} 个连通分量`);
			
 
				+
			
 
				+            // 为每个节点分配连通分量ID和分量内的X范围
			
 
				+            const nodeToComponent = {{}};
			
 
				+            const componentCenters = {{}};
			
 
				+            const componentBounds = {{}};
			
 
				+            const padding = 50;  // 分量之间的间距
			
 
				+            const totalPadding = padding * (components.length - 1);
			
 
				+            const availableWidth = width - totalPadding - 100;  // 留边距
			
 
				+
			
 
				+            // 根据分量大小分配宽度
			
 
				+            const totalNodes = nodes.length;
			
 
				+            let currentX = 50;  // 起始边距
			
 
				+
			
 
				+            components.forEach((comp, i) => {{
			
 
				+                const compWidth = Math.max(150, (comp.length / totalNodes) * availableWidth);
			
 
				+                const centerX = currentX + compWidth / 2;
			
 
				+                componentCenters[i] = centerX;
			
 
				+                componentBounds[i] = {{ start: currentX, end: currentX + compWidth, width: compWidth }};
			
 
				+                comp.forEach(nodeId => {{
			
 
				+                    nodeToComponent[nodeId] = i;
			
 
				+                }});
			
 
				+                currentX += compWidth + padding;
			
 
				+            }});
			
 
				+
			
 
				+            // 使用重心法(Barycenter)减少边交叉
			
 
				+            // 迭代优化：交替调整两层节点的顺序
			
 
				+
			
 
				+            const nodeTargetX = {{}};
			
 
				+            const personaXMap = {{}};
			
 
				+
			
 
				+            // 对每个连通分量单独处理
			
 
				+            components.forEach((comp, compIdx) => {{
			
 
				+                const bounds = componentBounds[compIdx];
			
 
				+                const compPostNodes = postNodes.filter(n => nodeToComponent[n.id] === compIdx);
			
 
				+                const compPersonaNodes = personaNodes.filter(n => nodeToComponent[n.id] === compIdx);
			
 
				+
			
 
				+                if (compPostNodes.length === 0 || compPersonaNodes.length === 0) {{
			
 
				+                    // 没有匹配关系的分量，均匀分布
			
 
				+                    const spacing = bounds.width / (comp.length + 1);
			
 
				+                    comp.forEach((nodeId, i) => {{
			
 
				+                        const node = nodes.find(n => n.id === nodeId);
			
 
				+                        if (node) {{
			
 
				+                            node.x = bounds.start + spacing * (i + 1);
			
 
				+                            nodeTargetX[nodeId] = node.x;
			
 
				+                            if (node.source === "人设") personaXMap[nodeId] = node.x;
			
 
				+                        }}
			
 
				+                    }});
			
 
				+                    return;
			
 
				+                }}
			
 
				+
			
 
				+                // 初始化：给人设节点一个初始顺序
			
 
				+                let personaOrder = compPersonaNodes.map((n, i) => ({{ node: n, order: i }}));
			
 
				+
			
 
				+                // 迭代优化（3轮）
			
 
				+                for (let iter = 0; iter < 3; iter++) {{
			
 
				+                    // 1. 根据人设节点位置，计算帖子节点的重心
			
 
				+                    const postBarycenter = {{}};
			
 
				+                    compPostNodes.forEach(pn => {{
			
 
				+                        const matched = postToPersona[pn.id] || [];
			
 
				+                        if (matched.length > 0) {{
			
 
				+                            const avgOrder = matched.reduce((sum, pid) => {{
			
 
				+                                const po = personaOrder.find(p => p.node.id === pid);
			
 
				+                                return sum + (po ? po.order : 0);
			
 
				+                            }}, 0) / matched.length;
			
 
				+                            postBarycenter[pn.id] = avgOrder;
			
 
				+                        }} else {{
			
 
				+                            postBarycenter[pn.id] = 0;
			
 
				+                        }}
			
 
				+                    }});
			
 
				+
			
 
				+                    // 按重心排序帖子节点
			
 
				+                    const sortedPosts = [...compPostNodes].sort((a, b) =>
			
 
				+                        postBarycenter[a.id] - postBarycenter[b.id]
			
 
				+                    );
			
 
				+
			
 
				+                    // 2. 根据帖子节点位置，重新计算人设节点的重心
			
 
				+                    const personaBarycenter = {{}};
			
 
				+                    compPersonaNodes.forEach(pn => {{
			
 
				+                        const matched = personaToPost[pn.id] || [];
			
 
				+                        if (matched.length > 0) {{
			
 
				+                            const avgOrder = matched.reduce((sum, pid) => {{
			
 
				+                                const idx = sortedPosts.findIndex(p => p.id === pid);
			
 
				+                                return sum + (idx >= 0 ? idx : 0);
			
 
				+                            }}, 0) / matched.length;
			
 
				+                            personaBarycenter[pn.id] = avgOrder;
			
 
				+                        }} else {{
			
 
				+                            personaBarycenter[pn.id] = personaOrder.find(p => p.node.id === pn.id)?.order || 0;
			
 
				+                        }}
			
 
				+                    }});
			
 
				+
			
 
				+                    // 更新人设节点顺序
			
 
				+                    personaOrder = compPersonaNodes
			
 
				+                        .map(n => ({{ node: n, order: personaBarycenter[n.id] }}))
			
 
				+                        .sort((a, b) => a.order - b.order)
			
 
				+                        .map((item, i) => ({{ node: item.node, order: i }}));
			
 
				+                }}
			
 
				+
			
 
				+                // 最终排序
			
 
				+                const finalPersonaOrder = personaOrder.map(p => p.node);
			
 
				+                const postBarycenter = {{}};
			
 
				+                compPostNodes.forEach(pn => {{
			
 
				+                    const matched = postToPersona[pn.id] || [];
			
 
				+                    if (matched.length > 0) {{
			
 
				+                        const avgOrder = matched.reduce((sum, pid) => {{
			
 
				+                            const idx = finalPersonaOrder.findIndex(n => n.id === pid);
			
 
				+                            return sum + (idx >= 0 ? idx : 0);
			
 
				+                        }}, 0) / matched.length;
			
 
				+                        postBarycenter[pn.id] = avgOrder;
			
 
				+                    }} else {{
			
 
				+                        postBarycenter[pn.id] = 0;
			
 
				+                    }}
			
 
				+                }});
			
 
				+                const finalPostOrder = [...compPostNodes].sort((a, b) =>
			
 
				+                    postBarycenter[a.id] - postBarycenter[b.id]
			
 
				+                );
			
 
				+
			
 
				+                // 设置位置
			
 
				+                const personaSpacing = bounds.width / (finalPersonaOrder.length + 1);
			
 
				+                finalPersonaOrder.forEach((n, i) => {{
			
 
				+                    n.x = bounds.start + personaSpacing * (i + 1);
			
 
				+                    nodeTargetX[n.id] = n.x;
			
 
				+                    personaXMap[n.id] = n.x;
			
 
				+                }});
			
 
				+
			
 
				+                const postSpacing = bounds.width / (finalPostOrder.length + 1);
			
 
				+                finalPostOrder.forEach((n, i) => {{
			
 
				+                    // 帖子节点用重心位置（匹配人设的平均X）
			
 
				+                    const matched = postToPersona[n.id] || [];
			
 
				+                    if (matched.length > 0) {{
			
 
				+                        const avgX = matched.reduce((sum, pid) => sum + (personaXMap[pid] || bounds.start + bounds.width/2), 0) / matched.length;
			
 
				+                        n.x = avgX;
			
 
				+                    }} else {{
			
 
				+                        n.x = bounds.start + postSpacing * (i + 1);
			
 
				+                    }}
			
 
				+                    nodeTargetX[n.id] = n.x;
			
 
				+                }});
			
 
				+            }});
			
 
				+
			
 
				+            // 节点颜色
			
 
				+            const levelColors = {{
			
 
				+                "灵感点": "#f39c12",
			
 
				+                "目的点": "#3498db",
			
 
				+                "关键点": "#9b59b6"
			
 
				+            }};
			
 
				+
			
 
				+            // 两层Y坐标（带倾斜：右边高，左边低）
			
 
				+            const postBaseY = height * 0.25;      // 帖子节点基准Y
			
 
				+            const personaBaseY = height * 0.7;    // 人设节点基准Y
			
 
				+            const tiltAmount = height * 0.25;     // 倾斜幅度（约14度）
			
 
				+
			
 
				+            // 根据X位置计算Y（右边高，左边低）
			
 
				+            function getTiltedY(baseY, x) {{
			
 
				+                const tilt = tiltAmount * (0.5 - x / width);
			
 
				+                return baseY + tilt;
			
 
				+            }}
			
 
				+
			
 
				+            // 力导向模拟
			
 
				+            simulation = d3.forceSimulation(nodes)
			
 
				+                .force("link", d3.forceLink(links).id(d => d.id).distance(120).strength(0.1))
			
 
				+                .force("charge", d3.forceManyBody().strength(-400))  // 更强的互斥
			
 
				+                // X方向：拉向目标位置，但允许被推开
			
 
				+                .force("x", d3.forceX(d => nodeTargetX[d.id] || width / 2).strength(0.15))
			
 
				+                // Y方向力：带倾斜
			
 
				+                .force("y", d3.forceY(d => {{
			
 
				+                    const baseY = d.source === "帖子" ? postBaseY : personaBaseY;
			
 
				+                    return getTiltedY(baseY, d.x || width / 2);
			
 
				+                }}).strength(0.4))
			
 
				+                .force("collision", d3.forceCollide().radius(50));  // 更大的碰撞半径
			
 
				+
			
 
				+            // 边类型到CSS类的映射
			
 
				+            const edgeTypeClass = {{
			
 
				+                "匹配": "match",
			
 
				+                "分类共现（跨点）": "category-cross",
			
 
				+                "分类共现（点内）": "category-intra",
			
 
				+                "标签共现": "tag-cooccur",
			
 
				+                "属于": "belong",
			
 
				+                "包含": "contain",
			
 
				+                // 镜像边（帖子节点之间，虚线）
			
 
				+                "镜像_分类共现（跨点）": "mirror-category-cross",
			
 
				+                "镜像_分类共现（点内）": "mirror-category-intra",
			
 
				+                "镜像_标签共现": "mirror-tag-cooccur",
			
 
				+                "镜像_属于": "mirror-belong",
			
 
				+                "镜像_包含": "mirror-contain"
			
 
				+            }};
			
 
				+
			
 
				+            // 创建边的容器
			
 
				+            const linkGroup = g.append("g").attr("class", "links");
			
 
				+
			
 
				+            // 为每条边创建组
			
 
				+            const linkG = linkGroup.selectAll("g")
			
 
				+                .data(links)
			
 
				+                .join("g")
			
 
				+                .attr("class", "link-group");
			
 
				+
			
 
				+            // 绘制点击热区（透明宽线）
			
 
				+            const linkHitarea = linkG.append("line")
			
 
				+                .attr("class", "link-hitarea");
			
 
				+
			
 
				+            // 绘制可见的边
			
 
				+            const link = linkG.append("line")
			
 
				+                .attr("class", d => "link " + (edgeTypeClass[d.type] || "match"))
			
 
				+                .attr("stroke-width", d => d.type === "匹配" ? 2.5 : 1.5);
			
 
				+
			
 
				+            // 为匹配边添加分数标签
			
 
				+            const edgeLabels = linkG.filter(d => d.type === "匹配" && d.边详情 && d.边详情.相似度)
			
 
				+                .append("g")
			
 
				+                .attr("class", "edge-label-group");
			
 
				+
			
 
				+            edgeLabels.append("rect")
			
 
				+                .attr("class", "edge-label-bg")
			
 
				+                .attr("rx", 3)
			
 
				+                .attr("ry", 3);
			
 
				+
			
 
				+            edgeLabels.append("text")
			
 
				+                .attr("class", "edge-label")
			
 
				+                .text(d => {{
			
 
				+                    const score = d.边详情.相似度;
			
 
				+                    return typeof score === "number" ? score.toFixed(2) : score;
			
 
				+                }});
			
 
				+
			
 
				+            // 边的点击事件
			
 
				+            linkHitarea.on("click", (event, d) => {{
			
 
				+                event.stopPropagation();
			
 
				+                showEdgeInfo(d);
			
 
				+            }})
			
 
				+            .on("mouseover", function(event, d) {{
			
 
				+                d3.select(this.parentNode).select(".link")
			
 
				+                    .attr("stroke-opacity", 1)
			
 
				+                    .attr("stroke-width", 4);
			
 
				+            }})
			
 
				+            .on("mouseout", function(event, d) {{
			
 
				+                d3.select(this.parentNode).select(".link")
			
 
				+                    .attr("stroke-opacity", 0.7)
			
 
				+                    .attr("stroke-width", d.type === "匹配" ? 2.5 : 1.5);
			
 
				+            }});
			
 
				+
			
 
				+            // 绘制节点
			
 
				+            const node = g.append("g")
			
 
				+                .selectAll("g")
			
 
				+                .data(nodes)
			
 
				+                .join("g")
			
 
				+                .attr("class", "node")
			
 
				+                .call(d3.drag()
			
 
				+                    .on("start", dragstarted)
			
 
				+                    .on("drag", dragged)
			
 
				+                    .on("end", dragended));
			
 
				+
			
 
				+            // 根据节点类型绘制不同形状：标签用圆形，分类用方形
			
 
				+            // 扩展节点用较低透明度表示
			
 
				+            node.each(function(d) {{
			
 
				+                const el = d3.select(this);
			
 
				+                const isExpanded = d.是否扩展 === true;
			
 
				+                const size = d.source === "帖子" ? 12 : (isExpanded ? 8 : 10);
			
 
				+                const fill = levelColors[d.level] || "#666";
			
 
				+                const nodeClass = d.source === "帖子" ? "post-node" : "persona-node";
			
 
				+                const opacity = isExpanded ? 0.5 : 1;
			
 
				+
			
 
				+                if (d.节点类型 === "分类") {{
			
 
				+                    // 方形
			
 
				+                    el.append("rect")
			
 
				+                        .attr("width", size * 2)
			
 
				+                        .attr("height", size * 2)
			
 
				+                        .attr("x", -size)
			
 
				+                        .attr("y", -size)
			
 
				+                        .attr("fill", fill)
			
 
				+                        .attr("class", nodeClass)
			
 
				+                        .attr("rx", 3)
			
 
				+                        .attr("opacity", opacity);
			
 
				+                }} else {{
			
 
				+                    // 圆形（标签）
			
 
				+                    el.append("circle")
			
 
				+                        .attr("r", size)
			
 
				+                        .attr("fill", fill)
			
 
				+                        .attr("class", nodeClass)
			
 
				+                        .attr("opacity", opacity);
			
 
				+                }}
			
 
				+            }});
			
 
				+
			
 
				+            const labels = node.append("text")
			
 
				+                .attr("dx", 15)
			
 
				+                .attr("dy", 4)
			
 
				+                .text(d => d.节点名称)
			
 
				+                .style("display", showLabels ? "block" : "none");
			
 
				+
			
 
				+            // 工具提示
			
 
				+            const tooltip = d3.select("#tooltip");
			
 
				+
			
 
				+            node.on("mouseover", (event, d) => {{
			
 
				+                tooltip.style("display", "block")
			
 
				+                    .html(`<strong>${{d.节点名称}}</strong><br/>类型: ${{d.节点类型}}<br/>层级: ${{d.节点层级}}`);
			
 
				+            }})
			
 
				+            .on("mousemove", (event) => {{
			
 
				+                tooltip.style("left", (event.pageX + 15) + "px")
			
 
				+                    .style("top", (event.pageY - 10) + "px");
			
 
				+            }})
			
 
				+            .on("mouseout", () => {{
			
 
				+                tooltip.style("display", "none");
			
 
				+            }})
			
 
				+            .on("click", (event, d) => {{
			
 
				+                showNodeInfo(d);
			
 
				+            }});
			
 
				+
			
 
				+            // 更新位置
			
 
				+            simulation.on("tick", () => {{
			
 
				+                // 更新热区线
			
 
				+                linkHitarea
			
 
				+                    .attr("x1", d => d.source.x)
			
 
				+                    .attr("y1", d => d.source.y)
			
 
				+                    .attr("x2", d => d.target.x)
			
 
				+                    .attr("y2", d => d.target.y);
			
 
				+
			
 
				+                // 更新可见边
			
 
				+                link
			
 
				+                    .attr("x1", d => d.source.x)
			
 
				+                    .attr("y1", d => d.source.y)
			
 
				+                    .attr("x2", d => d.target.x)
			
 
				+                    .attr("y2", d => d.target.y);
			
 
				+
			
 
				+                // 更新边标签位置（放在边的中点）
			
 
				+                edgeLabels.attr("transform", d => {{
			
 
				+                    const midX = (d.source.x + d.target.x) / 2;
			
 
				+                    const midY = (d.source.y + d.target.y) / 2;
			
 
				+                    return `translate(${{midX}},${{midY}})`;
			
 
				+                }});
			
 
				+
			
 
				+                // 更新标签背景大小
			
 
				+                edgeLabels.each(function(d) {{
			
 
				+                    const textEl = d3.select(this).select("text").node();
			
 
				+                    if (textEl) {{
			
 
				+                        const bbox = textEl.getBBox();
			
 
				+                        d3.select(this).select("rect")
			
 
				+                            .attr("x", bbox.x - 3)
			
 
				+                            .attr("y", bbox.y - 1)
			
 
				+                            .attr("width", bbox.width + 6)
			
 
				+                            .attr("height", bbox.height + 2);
			
 
				+                    }}
			
 
				+                }});
			
 
				+
			
 
				+                node.attr("transform", d => `translate(${{d.x}},${{d.y}})`);
			
 
				+            }});
			
 
				+
			
 
				+            // 拖拽函数
			
 
				+            function dragstarted(event, d) {{
			
 
				+                if (!event.active) simulation.alphaTarget(0.3).restart();
			
 
				+                d.fx = d.x;
			
 
				+                d.fy = d.y;
			
 
				+            }}
			
 
				+
			
 
				+            function dragged(event, d) {{
			
 
				+                d.fx = event.x;
			
 
				+                d.fy = event.y;
			
 
				+            }}
			
 
				+
			
 
				+            function dragended(event, d) {{
			
 
				+                if (!event.active) simulation.alphaTarget(0);
			
 
				+                d.fx = null;
			
 
				+                d.fy = null;
			
 
				+            }}
			
 
				+        }}
			
 
				+
			
 
				+        // 控制函数
			
 
				+        function resetZoom() {{
			
 
				+            const container = document.getElementById("graph");
			
 
				+            const width = container.clientWidth;
			
 
				+            const height = container.clientHeight;
			
 
				+            svg.transition().duration(750).call(
			
 
				+                zoom.transform,
			
 
				+                d3.zoomIdentity.translate(width/2, height/2).scale(1).translate(-width/2, -height/2)
			
 
				+            );
			
 
				+        }}
			
 
				+
			
 
				+        function toggleLabels() {{
			
 
				+            showLabels = !showLabels;
			
 
				+            g.selectAll(".node text").style("display", showLabels ? "block" : "none");
			
 
				+        }}
			
 
				+
			
 
				+        function showNodeInfo(d) {{
			
 
				+            const panel = document.getElementById("detailPanel");
			
 
				+            panel.classList.add("active");
			
 
				+            document.getElementById("detailTitle").textContent = d.source === "帖子" ? "📌 帖子节点" : "👤 人设节点";
			
 
				+
			
 
				+            let html = `
			
 
				+                <p><span class="label">节点ID:</span> ${{d.节点ID}}</p>
			
 
				+                <p><span class="label">名称:</span> <strong>${{d.节点名称}}</strong></p>
			
 
				+                <p><span class="label">类型:</span> ${{d.节点类型}}</p>
			
 
				+                <p><span class="label">层级:</span> ${{d.节点层级}}</p>
			
 
				+            `;
			
 
				+
			
 
				+            if (d.权重) {{
			
 
				+                html += `<p><span class="label">权重:</span> ${{d.权重}}</p>`;
			
 
				+            }}
			
 
				+            if (d.所属分类 && d.所属分类.length > 0) {{
			
 
				+                html += `<p><span class="label">所属分类:</span> ${{d.所属分类.join(" > ")}}</p>`;
			
 
				+            }}
			
 
				+            if (d.帖子数) {{
			
 
				+                html += `<p><span class="label">帖子数:</span> ${{d.帖子数}}</p>`;
			
 
				+            }}
			
 
				+            document.getElementById("detailContent").innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        function showEdgeInfo(d) {{
			
 
				+            const panel = document.getElementById("detailPanel");
			
 
				+            panel.classList.add("active");
			
 
				+
			
 
				+            const sourceNode = typeof d.source === "object" ? d.source : {{ id: d.source }};
			
 
				+            const targetNode = typeof d.target === "object" ? d.target : {{ id: d.target }};
			
 
				+
			
 
				+            // 判断是否为镜像边
			
 
				+            const isMirror = d.type.startsWith("镜像_");
			
 
				+            document.getElementById("detailTitle").textContent = isMirror ? "🪞 镜像边详情" : "🔗 边详情";
			
 
				+
			
 
				+            let html = `
			
 
				+                <p><span class="label">边类型:</span> <strong>${{d.type}}</strong></p>
			
 
				+                <p><span class="label">源节点:</span> ${{sourceNode.节点名称 || sourceNode.id}}</p>
			
 
				+                <p><span class="label">目标节点:</span> ${{targetNode.节点名称 || targetNode.id}}</p>
			
 
				+            `;
			
 
				+
			
 
				+            if (d.边详情) {{
			
 
				+                if (d.边详情.相似度 !== undefined) {{
			
 
				+                    const score = typeof d.边详情.相似度 === "number" ? d.边详情.相似度.toFixed(2) : d.边详情.相似度;
			
 
				+                    html += `<p><span class="label">相似度:</span> <span class="similarity-score">${{score}}</span></p>`;
			
 
				+                }}
			
 
				+                if (d.边详情.说明) {{
			
 
				+                    html += `<p><span class="label">说明:</span></p><div class="edge-description">${{d.边详情.说明}}</div>`;
			
 
				+                }}
			
 
				+                if (d.边详情.共现次数 !== undefined) {{
			
 
				+                    html += `<p><span class="label">共现次数:</span> ${{d.边详情.共现次数}}</p>`;
			
 
				+                }}
			
 
				+                // 镜像边特有信息
			
 
				+                if (d.边详情.原始边类型) {{
			
 
				+                    html += `<p><span class="label">原始边类型:</span> ${{d.边详情.原始边类型}}</p>`;
			
 
				+                }}
			
 
				+                if (d.边详情.源人设节点) {{
			
 
				+                    html += `<p><span class="label">源人设节点:</span> ${{d.边详情.源人设节点}}</p>`;
			
 
				+                }}
			
 
				+                if (d.边详情.目标人设节点) {{
			
 
				+                    html += `<p><span class="label">目标人设节点:</span> ${{d.边详情.目标人设节点}}</p>`;
			
 
				+                }}
			
 
				+            }}
			
 
				+
			
 
				+            document.getElementById("detailContent").innerHTML = html;
			
 
				+        }}
			
 
				+
			
 
				+        function closeDetailPanel() {{
			
 
				+            document.getElementById("detailPanel").classList.remove("active");
			
 
				+        }}
			
 
				+
			
 
				+        // 页面加载完成后初始化
			
 
				+        window.addEventListener("load", init);
			
 
				+        window.addEventListener("resize", () => {{
			
 
				+            if (currentIndex >= 0) {{
			
 
				+                renderGraph(allGraphData[currentIndex]);
			
 
				+            }}
			
 
				+        }});
			
 
				+    </script>
			
 
				+</body>
			
 
				+</html>
			
 
				+'''
			
 
				+
			
 
				+
			
 
				+def generate_combined_html(all_graph_data: List[Dict], output_file: Path):
			
 
				+    """
			
 
				+    生成包含所有帖子图谱的HTML文件
			
 
				+
			
 
				+    Args:
			
 
				+        all_graph_data: 所有帖子的图谱数据列表
			
 
				+        output_file: 输出文件路径
			
 
				+    """
			
 
				+    # 生成Tab HTML
			
 
				+    tabs_html = ""
			
 
				+    for i, data in enumerate(all_graph_data):
			
 
				+        post_title = data.get("postTitle", "")
			
 
				+        # 使用帖子标题，如果太长则截断
			
 
				+        if post_title:
			
 
				+            tab_name = post_title[:15] + "..." if len(post_title) > 15 else post_title
			
 
				+        else:
			
 
				+            tab_name = f"帖子 {i+1}"
			
 
				+        active_class = "active" if i == 0 else ""
			
 
				+        tabs_html += f'<div class="tab {active_class}" data-index="{i}">{tab_name}</div>\n'
			
 
				+
			
 
				+    # 生成HTML
			
 
				+    html_content = HTML_TEMPLATE.format(
			
 
				+        tabs_html=tabs_html,
			
 
				+        all_graph_data=json.dumps(all_graph_data, ensure_ascii=False)
			
 
				+    )
			
 
				+
			
 
				+    with open(output_file, "w", encoding="utf-8") as f:
			
 
				+        f.write(html_content)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # 使用路径配置
			
 
				+    config = PathConfig()
			
 
				+
			
 
				+    print(f"账号: {config.account_name}")
			
 
				+    print(f"输出版本: {config.output_version}")
			
 
				+    print()
			
 
				+
			
 
				+    # 输入目录
			
 
				+    match_graph_dir = config.intermediate_dir / "match_graph"
			
 
				+
			
 
				+    # 输出文件
			
 
				+    output_file = config.intermediate_dir / "match_graph.html"
			
 
				+
			
 
				+    print(f"输入目录: {match_graph_dir}")
			
 
				+    print(f"输出文件: {output_file}")
			
 
				+    print()
			
 
				+
			
 
				+    # 读取所有匹配图谱文件
			
 
				+    graph_files = sorted(match_graph_dir.glob("*_match_graph.json"))
			
 
				+    print(f"找到 {len(graph_files)} 个匹配图谱文件")
			
 
				+
			
 
				+    all_graph_data = []
			
 
				+    for i, graph_file in enumerate(graph_files, 1):
			
 
				+        print(f"  [{i}/{len(graph_files)}] 读取: {graph_file.name}")
			
 
				+
			
 
				+        with open(graph_file, "r", encoding="utf-8") as f:
			
 
				+            match_graph_data = json.load(f)
			
 
				+
			
 
				+        # 提取需要的数据
			
 
				+        graph_data = {
			
 
				+            "postId": match_graph_data["说明"]["帖子ID"],
			
 
				+            "postTitle": match_graph_data["说明"].get("帖子标题", ""),
			
 
				+            "stats": match_graph_data["说明"]["统计"],
			
 
				+            "nodes": match_graph_data["节点列表"],
			
 
				+            "edges": match_graph_data["边列表"]
			
 
				+        }
			
 
				+        all_graph_data.append(graph_data)
			
 
				+
			
 
				+    # 生成HTML
			
 
				+    print("\n生成HTML文件...")
			
 
				+    generate_combined_html(all_graph_data, output_file)
			
 
				+
			
 
				+    print("\n" + "="*60)
			
 
				+    print("处理完成!")
			
 
				+    print(f"输出文件: {output_file}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()