1 dag sedan · 19a98bacec
--- a/script/data_processing/build_post_graph.py
+++ b/script/data_processing/build_post_graph.py
@@ -152,19 +152,26 @@ def create_edge(
 
				 
			
 
				 # ==================== 从帖子解构结果提取节点和匹配边 ====================
			
 
				 
			
 
				-def extract_points_tags_and_matches(filtered_data: Dict) -> tuple:
			
 
				+def extract_tags_and_matches(filtered_data: Dict) -> tuple:
			
 
				     """
			
 
				-    从帖子解构结果中提取点节点、标签节点和匹配边
			
 
				+    从帖子解构结果中提取标签节点和匹配边（适配新结构）
			
 
				+
			
 
				+    新结构：解构结果 → 点列表 → 点 → 匹配人设结果
			
 
				+    新结构的"点"对应旧结构的"标签"节点，直接挂在维度下
			
 
				 
			
 
				     Returns:
			
 
				-        (点节点字典, 标签节点字典, 标签到点的映射, 匹配边字典)
			
 
				+        (标签节点字典, 匹配边字典, 支撑边字典, 关联边字典)
			
 
				     """
			
 
				-    point_nodes = {}  # nodeId -> nodeData
			
 
				     tag_nodes = {}    # nodeId -> nodeData
			
 
				-    tag_to_point = {} # tagId -> [pointId, ...]
			
 
				     match_edges = {}  # edgeId -> edgeData
			
 
				+    support_edges = {}  # 支撑边
			
 
				+    relation_edges = {}  # 关联边
			
 
				+
			
 
				+    # ID 到节点ID的映射（用于构建支撑边和关联边）
			
 
				+    id_to_node_id = {}
			
 
				 
			
 
				-    how_result = filtered_data.get("how解构结果", {})
			
 
				+    # 新结构使用 "解构结果"
			
 
				+    result = filtered_data.get("解构结果", {})
			
 
				 
			
 
				     dimension_mapping = {
			
 
				         "灵感点列表": "灵感点",
			
@@ -172,167 +179,158 @@ def extract_points_tags_and_matches(filtered_data: Dict) -> tuple:
 
				         "关键点列表": "关键点"
			
 
				     }
			
 
				 
			
 
				+    # 第一遍：创建节点并建立 ID 映射
			
 
				     for list_key, dimension in dimension_mapping.items():
			
 
				-        points = how_result.get(list_key, [])
			
 
				+        points = result.get(list_key, [])
			
 
				 
			
 
				         for point in points:
			
 
				-            point_name = point.get("名称", "")
			
 
				-            point_desc = point.get("描述", "")
			
 
				+            tag_name = point.get("名称", "")
			
 
				+            tag_desc = point.get("描述", "")
			
 
				+            point_id = point.get("ID", "")
			
 
				 
			
 
				-            if not point_name:
			
 
				+            if not tag_name:
			
 
				                 continue
			
 
				 
			
 
				-            # 创建点节点
			
 
				-            point_id = build_node_id("帖子", dimension, "点", point_name)
			
 
				-            point_nodes[point_id] = create_node(
			
 
				+            # 新结构的"点"直接创建为"标签"节点
			
 
				+            tag_id = build_node_id("帖子", dimension, "标签", tag_name)
			
 
				+            tag_nodes[tag_id] = create_node(
			
 
				                 domain="帖子",
			
 
				                 dimension=dimension,
			
 
				-                node_type="点",
			
 
				-                name=point_name,
			
 
				+                node_type="标签",
			
 
				+                name=tag_name,
			
 
				                 detail={
			
 
				-                    "description": point_desc
			
 
				+                    "description": tag_desc,
			
 
				+                    "pointId": point_id
			
 
				                 }
			
 
				             )
			
 
				 
			
 
				-            # 遍历how步骤列表，提取标签和匹配
			
 
				-            how_steps = point.get("how步骤列表", [])
			
 
				-
			
 
				-            for step in how_steps:
			
 
				-                step_name = step.get("步骤名称", "")
			
 
				-                features = step.get("特征列表", [])
			
 
				-
			
 
				-                for feature in features:
			
 
				-                    tag_name = feature.get("特征名称", "")
			
 
				-                    weight = feature.get("权重", 1.0)
			
 
				-
			
 
				-                    if not tag_name:
			
 
				-                        continue
			
 
				-
			
 
				-                    # 创建标签节点
			
 
				-                    tag_id = build_node_id("帖子", dimension, "标签", tag_name)
			
 
				-
			
 
				-                    if tag_id not in tag_nodes:
			
 
				-                        tag_nodes[tag_id] = create_node(
			
 
				-                            domain="帖子",
			
 
				-                            dimension=dimension,
			
 
				-                            node_type="标签",
			
 
				-                            name=tag_name,
			
 
				-                            detail={
			
 
				-                                "weight": weight,
			
 
				-                                "stepName": step_name,
			
 
				-                                "pointNames": [point_name]
			
 
				-                            }
			
 
				-                        )
			
 
				-                    else:
			
 
				-                        # 同一标签可能属于多个点
			
 
				-                        if point_name not in tag_nodes[tag_id]["detail"]["pointNames"]:
			
 
				-                            tag_nodes[tag_id]["detail"]["pointNames"].append(point_name)
			
 
				-
			
 
				-                    # 记录标签到点的映射
			
 
				-                    if tag_id not in tag_to_point:
			
 
				-                        tag_to_point[tag_id] = []
			
 
				-                    if point_id not in tag_to_point[tag_id]:
			
 
				-                        tag_to_point[tag_id].append(point_id)
			
 
				-
			
 
				-                    # 提取匹配边
			
 
				-                    matches = feature.get("匹配结果", [])
			
 
				-                    for match in matches:
			
 
				-                        persona_name = match.get("人设特征名称", "")
			
 
				-                        persona_dimension = match.get("人设特征层级", "")
			
 
				-                        persona_type = match.get("特征类型", "标签")
			
 
				-                        match_detail = match.get("匹配结果", {})
			
 
				-                        similarity = match_detail.get("相似度", 0)
			
 
				-
			
 
				-                        if not persona_name or not persona_dimension:
			
 
				-                            continue
			
 
				-
			
 
				-                        # 构建人设节点ID
			
 
				-                        persona_id = build_node_id("人设", persona_dimension, persona_type, persona_name)
			
 
				-
			
 
				-                        # 创建双向匹配边
			
 
				-                        # 帖子标签 -> 人设标签
			
 
				-                        edge_id_1 = build_edge_id(tag_id, "匹配", persona_id)
			
 
				-                        match_edges[edge_id_1] = create_edge(
			
 
				-                            source=tag_id,
			
 
				-                            target=persona_id,
			
 
				-                            edge_type="匹配",
			
 
				-                            score=similarity,
			
 
				-                            detail={}
			
 
				-                        )
			
 
				+            # 建立 ID 映射
			
 
				+            if point_id:
			
 
				+                id_to_node_id[point_id] = tag_id
			
 
				+
			
 
				+            # 直接从点的 匹配人设结果 提取匹配边
			
 
				+            matches = point.get("匹配人设结果", [])
			
 
				+            for match in matches:
			
 
				+                persona_name = match.get("人设特征名称", "")
			
 
				+                persona_dimension = match.get("人设特征层级", "")
			
 
				+                # 映射：源数据中 "点" → "标签"
			
 
				+                persona_type = match.get("特征类型", "标签")
			
 
				+                if persona_type == "点":
			
 
				+                    persona_type = "标签"
			
 
				+                similarity = match.get("相似度", 0)
			
 
				+
			
 
				+                if not persona_name or not persona_dimension:
			
 
				+                    continue
			
 
				+
			
 
				+                # 构建人设节点ID
			
 
				+                persona_id = build_node_id("人设", persona_dimension, persona_type, persona_name)
			
 
				+
			
 
				+                # 创建双向匹配边
			
 
				+                # 帖子标签 -> 人设标签
			
 
				+                edge_id_1 = build_edge_id(tag_id, "匹配", persona_id)
			
 
				+                match_edges[edge_id_1] = create_edge(
			
 
				+                    source=tag_id,
			
 
				+                    target=persona_id,
			
 
				+                    edge_type="匹配",
			
 
				+                    score=similarity,
			
 
				+                    detail={}
			
 
				+                )
			
 
				+
			
 
				+                # 人设标签 -> 帖子标签
			
 
				+                edge_id_2 = build_edge_id(persona_id, "匹配", tag_id)
			
 
				+                match_edges[edge_id_2] = create_edge(
			
 
				+                    source=persona_id,
			
 
				+                    target=tag_id,
			
 
				+                    edge_type="匹配",
			
 
				+                    score=similarity,
			
 
				+                    detail={}
			
 
				+                )
			
 
				+
			
 
				+    # 第二遍：构建支撑边和关联边
			
 
				+    for list_key, dimension in dimension_mapping.items():
			
 
				+        points = result.get(list_key, [])
			
 
				+
			
 
				+        for point in points:
			
 
				+            tag_name = point.get("名称", "")
			
 
				+            point_id = point.get("ID", "")
			
 
				 
			
 
				-                        # 人设标签 -> 帖子标签
			
 
				-                        edge_id_2 = build_edge_id(persona_id, "匹配", tag_id)
			
 
				-                        match_edges[edge_id_2] = create_edge(
			
 
				-                            source=persona_id,
			
 
				-                            target=tag_id,
			
 
				-                            edge_type="匹配",
			
 
				-                            score=similarity,
			
 
				+            if not tag_name or not point_id:
			
 
				+                continue
			
 
				+
			
 
				+            tag_id = id_to_node_id.get(point_id)
			
 
				+            if not tag_id:
			
 
				+                continue
			
 
				+
			
 
				+            # 支撑边：当前点 -> 被支撑的点
			
 
				+            support_ids = point.get("支撑的ID", [])
			
 
				+            for target_point_id in support_ids:
			
 
				+                target_node_id = id_to_node_id.get(target_point_id)
			
 
				+                if target_node_id:
			
 
				+                    edge_id = build_edge_id(tag_id, "支撑", target_node_id)
			
 
				+                    support_edges[edge_id] = create_edge(
			
 
				+                        source=tag_id,
			
 
				+                        target=target_node_id,
			
 
				+                        edge_type="支撑",
			
 
				+                        score=1.0,
			
 
				+                        detail={}
			
 
				+                    )
			
 
				+
			
 
				+            # 关联边：当前点 <-> 关联的点（双向）
			
 
				+            relation_ids = point.get("关联的ID", [])
			
 
				+            for target_point_id in relation_ids:
			
 
				+                target_node_id = id_to_node_id.get(target_point_id)
			
 
				+                if target_node_id:
			
 
				+                    # 只创建一个方向的边（避免重复）
			
 
				+                    edge_id = build_edge_id(tag_id, "关联", target_node_id)
			
 
				+                    if edge_id not in relation_edges:
			
 
				+                        relation_edges[edge_id] = create_edge(
			
 
				+                            source=tag_id,
			
 
				+                            target=target_node_id,
			
 
				+                            edge_type="关联",
			
 
				+                            score=1.0,
			
 
				                             detail={}
			
 
				                         )
			
 
				 
			
 
				-    return point_nodes, tag_nodes, tag_to_point, match_edges
			
 
				+    return tag_nodes, match_edges, support_edges, relation_edges
			
 
				 
			
 
				 
			
 
				 # ==================== 构建边 ====================
			
 
				 
			
 
				 def build_belong_contain_edges(
			
 
				-    point_nodes: Dict[str, Dict],
			
 
				     tag_nodes: Dict[str, Dict],
			
 
				-    tag_to_point: Dict[str, List[str]],
			
 
				     dimension_node_ids: Dict[str, str]
			
 
				 ) -> Dict[str, Dict]:
			
 
				     """
			
 
				-    构建属于/包含边
			
 
				+    构建属于/包含边（新结构：标签直接挂维度下）
			
 
				 
			
 
				     Returns:
			
 
				         边字典 { edgeId: edgeData }
			
 
				     """
			
 
				     edges = {}
			
 
				 
			
 
				-    # 1. 点 -> 维度（属于/包含）
			
 
				-    for point_id, point_data in point_nodes.items():
			
 
				-        dimension = point_data["dimension"]
			
 
				+    # 标签 -> 维度（属于/包含）
			
 
				+    for tag_id, tag_data in tag_nodes.items():
			
 
				+        dimension = tag_data["dimension"]
			
 
				         dim_node_id = dimension_node_ids[dimension]
			
 
				 
			
 
				-        # 属于边：点 -> 维度
			
 
				-        edge_id = build_edge_id(point_id, "属于", dim_node_id)
			
 
				+        # 属于边：标签 -> 维度
			
 
				+        edge_id = build_edge_id(tag_id, "属于", dim_node_id)
			
 
				         edges[edge_id] = create_edge(
			
 
				-            source=point_id,
			
 
				+            source=tag_id,
			
 
				             target=dim_node_id,
			
 
				             edge_type="属于",
			
 
				             score=1.0
			
 
				         )
			
 
				 
			
 
				-        # 包含边：维度 -> 点
			
 
				-        edge_id_contain = build_edge_id(dim_node_id, "包含", point_id)
			
 
				+        # 包含边：维度 -> 标签
			
 
				+        edge_id_contain = build_edge_id(dim_node_id, "包含", tag_id)
			
 
				         edges[edge_id_contain] = create_edge(
			
 
				             source=dim_node_id,
			
 
				-            target=point_id,
			
 
				+            target=tag_id,
			
 
				             edge_type="包含",
			
 
				             score=1.0
			
 
				         )
			
 
				 
			
 
				-    # 2. 标签 -> 点（属于/包含）
			
 
				-    for tag_id, point_ids in tag_to_point.items():
			
 
				-        for point_id in point_ids:
			
 
				-            # 属于边：标签 -> 点
			
 
				-            edge_id = build_edge_id(tag_id, "属于", point_id)
			
 
				-            edges[edge_id] = create_edge(
			
 
				-                source=tag_id,
			
 
				-                target=point_id,
			
 
				-                edge_type="属于",
			
 
				-                score=1.0
			
 
				-            )
			
 
				-
			
 
				-            # 包含边：点 -> 标签
			
 
				-            edge_id_contain = build_edge_id(point_id, "包含", tag_id)
			
 
				-            edges[edge_id_contain] = create_edge(
			
 
				-                source=point_id,
			
 
				-                target=tag_id,
			
 
				-                edge_type="包含",
			
 
				-                score=1.0
			
 
				-            )
			
 
				-
			
 
				     return edges
			
 
				 
			
 
				 
			
@@ -530,8 +528,8 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
 
				     all_nodes = {}
			
 
				     all_edges = {}
			
 
				 
			
 
				-    # 1. 提取点节点、标签节点和匹配边
			
 
				-    point_nodes, tag_nodes, tag_to_point, match_edges = extract_points_tags_and_matches(filtered_data)
			
 
				+    # 1. 提取标签节点和匹配边（新结构：没有点层）
			
 
				+    tag_nodes, match_edges, support_edges, relation_edges = extract_tags_and_matches(filtered_data)
			
 
				 
			
 
				     # 2. 添加根节点
			
 
				     root_id = build_node_id("帖子", "帖子", "帖子", post_id)
			
@@ -580,36 +578,36 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
 
				             score=1.0
			
 
				         )
			
 
				 
			
 
				-    # 4. 添加点节点和标签节点
			
 
				-    all_nodes.update(point_nodes)
			
 
				+    # 4. 添加标签节点
			
 
				     all_nodes.update(tag_nodes)
			
 
				 
			
 
				-    # 5. 构建属于/包含边
			
 
				-    belong_contain_edges = build_belong_contain_edges(
			
 
				-        point_nodes, tag_nodes, tag_to_point, dimension_node_ids
			
 
				-    )
			
 
				+    # 5. 构建属于/包含边（标签直接挂维度下）
			
 
				+    belong_contain_edges = build_belong_contain_edges(tag_nodes, dimension_node_ids)
			
 
				     all_edges.update(belong_contain_edges)
			
 
				 
			
 
				     # 6. 添加匹配边
			
 
				     all_edges.update(match_edges)
			
 
				 
			
 
				-    # 7. 构建索引
			
 
				+    # 7. 添加支撑边和关联边
			
 
				+    all_edges.update(support_edges)
			
 
				+    all_edges.update(relation_edges)
			
 
				+
			
 
				+    # 8. 构建索引
			
 
				     index = build_index(all_edges)
			
 
				 
			
 
				-    # 8. 构建嵌套树
			
 
				+    # 9. 构建嵌套树
			
 
				     tree = build_nested_tree(all_nodes, all_edges, root_id)
			
 
				 
			
 
				     # 统计
			
 
				-    point_count = len(point_nodes)
			
 
				     tag_count = len(tag_nodes)
			
 
				     match_count = len(match_edges) // 2  # 双向边，除以2得到实际匹配数
			
 
				+    support_count = len(support_edges)
			
 
				+    relation_count = len(relation_edges)
			
 
				 
			
 
				     dimension_stats = {}
			
 
				     for dim in dimensions:
			
 
				-        dim_points = sum(1 for n in point_nodes.values() if n["dimension"] == dim)
			
 
				         dim_tags = sum(1 for n in tag_nodes.values() if n["dimension"] == dim)
			
 
				         dimension_stats[dim] = {
			
 
				-            "pointCount": dim_points,
			
 
				             "tagCount": dim_tags
			
 
				         }
			
 
				 
			
@@ -623,9 +621,10 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
 
				             "stats": {
			
 
				                 "nodeCount": len(all_nodes),
			
 
				                 "edgeCount": len(all_edges),
			
 
				-                "pointCount": point_count,
			
 
				                 "tagCount": tag_count,
			
 
				                 "matchCount": match_count,
			
 
				+                "supportCount": support_count,
			
 
				+                "relationCount": relation_count,
			
 
				                 "dimensions": dimension_stats
			
 
				             }
			
 
				         },
			
@@ -645,9 +644,10 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
 
				         "postTitle": post_title,
			
 
				         "nodeCount": len(all_nodes),
			
 
				         "edgeCount": len(all_edges),
			
 
				-        "pointCount": point_count,
			
 
				         "tagCount": tag_count,
			
 
				         "matchCount": match_count,
			
 
				+        "supportCount": support_count,
			
 
				+        "relationCount": relation_count,
			
 
				         "outputFile": str(output_file)
			
 
				     }
			
 
				 
			
@@ -685,7 +685,7 @@ def main():
 
				         result = process_single_post(filtered_file, output_dir)
			
 
				         results.append(result)
			
 
				         print(f"  节点: {result['nodeCount']}, 边: {result['edgeCount']}")
			
 
				-        print(f"  点: {result['pointCount']}, 标签: {result['tagCount']}, 匹配: {result['matchCount']}")
			
 
				+        print(f"  标签: {result['tagCount']}, 匹配: {result['matchCount']}, 支撑: {result['supportCount']}, 关联: {result['relationCount']}")
			
 
				         print(f"  → {Path(result['outputFile']).name}")
			
 
				         print()
			
 
				 
			
@@ -695,9 +695,10 @@ def main():
 
				     print(f"  帖子数: {len(results)}")
			
 
				     print(f"  总节点数: {sum(r['nodeCount'] for r in results)}")
			
 
				     print(f"  总边数: {sum(r['edgeCount'] for r in results)}")
			
 
				-    print(f"  总点数: {sum(r['pointCount'] for r in results)}")
			
 
				     print(f"  总标签数: {sum(r['tagCount'] for r in results)}")
			
 
				     print(f"  总匹配数: {sum(r['matchCount'] for r in results)}")
			
 
				+    print(f"  总支撑边: {sum(r['supportCount'] for r in results)}")
			
 
				+    print(f"  总关联边: {sum(r['relationCount'] for r in results)}")
			
 
				     print(f"\n输出目录: {output_dir}")
			
 
				 
			
 
				 
			
--- a/script/data_processing/filter_how_results.py
+++ b/script/data_processing/filter_how_results.py
@@ -3,10 +3,12 @@
 
				 """
			
 
				 How解构结果过滤脚本
			
 
				 
			
 
				-从 how 解构结果中过滤出高质量的匹配结果：
			
 
				-1. 移除 what解构结果 字段
			
 
				-2. 只保留相似度 >= 0.5 的 top1 匹配结果
			
 
				-3. 保留特征即使其匹配结果为空
			
 
				+从解构结果中过滤出高质量的匹配结果：
			
 
				+1. 只保留相似度 >= 阈值的匹配结果
			
 
				+2. 可选只保留 top N 匹配结果
			
 
				+
			
 
				+新数据结构：
			
 
				+  解构结果 → 点列表 → 点 → 匹配人设结果[]
			
 
				 """
			
 
				 
			
 
				 import json
			
@@ -23,94 +25,63 @@ sys.path.insert(0, str(project_root))
 
				 from script.data_processing.path_config import PathConfig
			
 
				 
			
 
				 
			
 
				-def filter_match_results(feature_list: List[Dict], threshold: float = 0.5) -> List[Dict]:
			
 
				+def filter_point_matches(point: Dict, threshold: float = 0.5, top_n: int = None) -> Dict:
			
 
				     """
			
 
				-    过滤特征列表中的匹配结果
			
 
				+    过滤单个点的匹配人设结果
			
 
				 
			
 
				     Args:
			
 
				-        feature_list: 特征列表
			
 
				+        point: 点数据
			
 
				         threshold: 相似度阈值
			
 
				+        top_n: 只保留前N个匹配（None表示不限制）
			
 
				 
			
 
				     Returns:
			
 
				-        过滤后的特征列表
			
 
				-    """
			
 
				-    filtered_features = []
			
 
				-
			
 
				-    for feature in feature_list:
			
 
				-        filtered_feature = {
			
 
				-            "特征名称": feature.get("特征名称", ""),
			
 
				-            "权重": feature.get("权重", 1.0),
			
 
				-            "匹配结果": []
			
 
				-        }
			
 
				-
			
 
				-        # 过滤匹配结果
			
 
				-        match_results = feature.get("匹配结果", [])
			
 
				-        for match in match_results:
			
 
				-            similarity = match.get("匹配结果", {}).get("相似度", 0)
			
 
				-            if similarity >= threshold:
			
 
				-                filtered_feature["匹配结果"].append(match)
			
 
				-
			
 
				-        # 按相似度降序排序，只保留 top1
			
 
				-        if filtered_feature["匹配结果"]:
			
 
				-            filtered_feature["匹配结果"].sort(
			
 
				-                key=lambda x: x.get("匹配结果", {}).get("相似度", 0),
			
 
				-                reverse=True
			
 
				-            )
			
 
				-            # 只保留相似度最高的一个
			
 
				-            filtered_feature["匹配结果"] = [filtered_feature["匹配结果"][0]]
			
 
				-
			
 
				-        # 保留特征即使匹配结果为空
			
 
				-        filtered_features.append(filtered_feature)
			
 
				-
			
 
				-    return filtered_features
			
 
				-
			
 
				-
			
 
				-def filter_how_steps(how_steps: List[Dict], threshold: float = 0.5) -> List[Dict]:
			
 
				+        过滤后的点数据
			
 
				     """
			
 
				-    过滤 how 步骤列表
			
 
				+    # 复制点的基本信息
			
 
				+    filtered_point = {
			
 
				+        "名称": point.get("名称", ""),
			
 
				+        "描述": point.get("描述", ""),
			
 
				+        "匹配人设结果": []
			
 
				+    }
			
 
				 
			
 
				-    Args:
			
 
				-        how_steps: how 步骤列表
			
 
				-        threshold: 相似度阈值
			
 
				+    # 保留其他字段（如ID、类型等）
			
 
				+    for key in ["ID", "类型", "置信度", "支撑的ID", "关联的ID"]:
			
 
				+        if key in point:
			
 
				+            filtered_point[key] = point[key]
			
 
				 
			
 
				-    Returns:
			
 
				-        过滤后的 how 步骤列表
			
 
				-    """
			
 
				-    filtered_steps = []
			
 
				+    # 过滤匹配结果
			
 
				+    matches = point.get("匹配人设结果", [])
			
 
				+    filtered_matches = []
			
 
				 
			
 
				-    for step in how_steps:
			
 
				-        filtered_step = {
			
 
				-            "步骤名称": step.get("步骤名称", ""),
			
 
				-            "特征列表": filter_match_results(step.get("特征列表", []), threshold)
			
 
				-        }
			
 
				-        filtered_steps.append(filtered_step)
			
 
				+    for match in matches:
			
 
				+        similarity = match.get("相似度", 0)
			
 
				+        if similarity >= threshold:
			
 
				+            filtered_matches.append(match)
			
 
				 
			
 
				-    return filtered_steps
			
 
				+    # 按相似度降序排序
			
 
				+    filtered_matches.sort(key=lambda x: x.get("相似度", 0), reverse=True)
			
 
				 
			
 
				+    # 只保留 top N
			
 
				+    if top_n is not None and len(filtered_matches) > top_n:
			
 
				+        filtered_matches = filtered_matches[:top_n]
			
 
				 
			
 
				-def filter_point_list(point_list: List[Dict], threshold: float = 0.5) -> List[Dict]:
			
 
				+    filtered_point["匹配人设结果"] = filtered_matches
			
 
				+    return filtered_point
			
 
				+
			
 
				+
			
 
				+def filter_point_list(point_list: List[Dict], threshold: float = 0.5, top_n: int = None) -> List[Dict]:
			
 
				     """
			
 
				     过滤点列表（灵感点/关键点/目的点）
			
 
				 
			
 
				     Args:
			
 
				         point_list: 点列表
			
 
				         threshold: 相似度阈值
			
 
				+        top_n: 只保留前N个匹配
			
 
				 
			
 
				     Returns:
			
 
				         过滤后的点列表
			
 
				     """
			
 
				-    filtered_points = []
			
 
				-
			
 
				-    for point in point_list:
			
 
				-        filtered_point = {
			
 
				-            "名称": point.get("名称", ""),
			
 
				-            "描述": point.get("描述", ""),
			
 
				-            "特征列表": point.get("特征列表", []),
			
 
				-            "how步骤列表": filter_how_steps(point.get("how步骤列表", []), threshold)
			
 
				-        }
			
 
				-        filtered_points.append(filtered_point)
			
 
				-
			
 
				-    return filtered_points
			
 
				+    return [filter_point_matches(point, threshold, top_n) for point in point_list]
			
 
				 
			
 
				 
			
 
				 def calculate_statistics(original_point_list: List[Dict], filtered_point_list: List[Dict]) -> Dict:
			
@@ -129,15 +100,11 @@ def calculate_statistics(original_point_list: List[Dict], filtered_point_list: L
 
				 
			
 
				     # 统计原始匹配数量
			
 
				     for point in original_point_list:
			
 
				-        for step in point.get("how步骤列表", []):
			
 
				-            for feature in step.get("特征列表", []):
			
 
				-                original_count += len(feature.get("匹配结果", []))
			
 
				+        original_count += len(point.get("匹配人设结果", []))
			
 
				 
			
 
				     # 统计过滤后匹配数量
			
 
				     for point in filtered_point_list:
			
 
				-        for step in point.get("how步骤列表", []):
			
 
				-            for feature in step.get("特征列表", []):
			
 
				-                filtered_count += len(feature.get("匹配结果", []))
			
 
				+        filtered_count += len(point.get("匹配人设结果", []))
			
 
				 
			
 
				     return {
			
 
				         "原始匹配数": original_count,
			
@@ -148,7 +115,7 @@ def calculate_statistics(original_point_list: List[Dict], filtered_point_list: L
 
				     }
			
 
				 
			
 
				 
			
 
				-def process_single_file(input_file: Path, output_file: Path, threshold: float = 0.5) -> Dict:
			
 
				+def process_single_file(input_file: Path, output_file: Path, threshold: float = 0.5, top_n: int = None) -> Dict:
			
 
				     """
			
 
				     处理单个文件
			
 
				 
			
@@ -156,6 +123,7 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
 
				         input_file: 输入文件路径
			
 
				         output_file: 输出文件路径
			
 
				         threshold: 相似度阈值
			
 
				+        top_n: 只保留前N个匹配
			
 
				 
			
 
				     Returns:
			
 
				         统计信息
			
@@ -164,15 +132,15 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
 
				     with open(input_file, "r", encoding="utf-8") as f:
			
 
				         data = json.load(f)
			
 
				 
			
 
				-    # 提取基本信息（移除 what解构结果）
			
 
				+    # 提取基本信息
			
 
				     filtered_data = {
			
 
				         "帖子id": data.get("帖子id", ""),
			
 
				         "帖子详情": data.get("帖子详情", {})
			
 
				     }
			
 
				 
			
 
				-    # 处理 how解构结果
			
 
				-    how_result = data.get("how解构结果", {})
			
 
				-    filtered_how_result = {}
			
 
				+    # 处理解构结果（新结构）
			
 
				+    result = data.get("解构结果", {})
			
 
				+    filtered_result = {}
			
 
				 
			
 
				     stats = {
			
 
				         "灵感点": {"原始匹配数": 0, "过滤后匹配数": 0},
			
@@ -182,17 +150,17 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
 
				 
			
 
				     for point_type in ["灵感点", "关键点", "目的点"]:
			
 
				         point_list_key = f"{point_type}列表"
			
 
				-        original_points = how_result.get(point_list_key, [])
			
 
				+        original_points = result.get(point_list_key, [])
			
 
				 
			
 
				         if original_points:
			
 
				-            filtered_points = filter_point_list(original_points, threshold)
			
 
				-            filtered_how_result[point_list_key] = filtered_points
			
 
				+            filtered_points = filter_point_list(original_points, threshold, top_n)
			
 
				+            filtered_result[point_list_key] = filtered_points
			
 
				 
			
 
				             # 计算统计
			
 
				             point_stats = calculate_statistics(original_points, filtered_points)
			
 
				             stats[point_type] = point_stats
			
 
				 
			
 
				-    filtered_data["how解构结果"] = filtered_how_result
			
 
				+    filtered_data["解构结果"] = filtered_result
			
 
				 
			
 
				     # 保存过滤后的文件
			
 
				     output_file.parent.mkdir(parents=True, exist_ok=True)
			
@@ -211,13 +179,19 @@ def process_single_file(input_file: Path, output_file: Path, threshold: float =
 
				 
			
 
				 def main():
			
 
				     """主函数"""
			
 
				-    parser = argparse.ArgumentParser(description="过滤 how 解构结果，只保留高相似度的匹配")
			
 
				+    parser = argparse.ArgumentParser(description="过滤解构结果，只保留高相似度的匹配")
			
 
				     parser.add_argument(
			
 
				         "--threshold",
			
 
				         type=float,
			
 
				         default=0.5,
			
 
				         help="相似度阈值（默认 0.5）"
			
 
				     )
			
 
				+    parser.add_argument(
			
 
				+        "--top-n",
			
 
				+        type=int,
			
 
				+        default=1,
			
 
				+        help="每个点只保留前N个匹配（默认1）"
			
 
				+    )
			
 
				 
			
 
				     args = parser.parse_args()
			
 
				 
			
@@ -231,11 +205,13 @@ def main():
 
				     input_dir = config.how_results_dir
			
 
				     output_dir = config.intermediate_dir / "filtered_results"
			
 
				     threshold = args.threshold
			
 
				+    top_n = args.top_n
			
 
				 
			
 
				     print(f"账号: {config.account_name}")
			
 
				     print(f"输入目录: {input_dir}")
			
 
				     print(f"输出目录: {output_dir}")
			
 
				     print(f"相似度阈值: {threshold}")
			
 
				+    print(f"Top N: {top_n if top_n else '不限制'}")
			
 
				     print()
			
 
				 
			
 
				     # 确保输出目录存在
			
@@ -260,7 +236,7 @@ def main():
 
				         output_file = output_dir / f"{post_id}_filtered.json"
			
 
				 
			
 
				         # 处理文件
			
 
				-        stats = process_single_file(input_file, output_file, threshold)
			
 
				+        stats = process_single_file(input_file, output_file, threshold, top_n)
			
 
				 
			
 
				         total_original += stats["原始匹配数"]
			
 
				         total_filtered += stats["过滤后匹配数"]
			
--- a/script/data_processing/run_graph_pipeline.sh
+++ b/script/data_processing/run_graph_pipeline.sh
@@ -8,13 +8,10 @@
 
				 #   4. match_inspiration_features.py
			
 
				 #
			
 
				 # 本脚本执行：
			
 
				-#   5. filter_how_results.py      - 过滤how解构结果
			
 
				-#   6. extract_nodes_and_edges.py - 提取节点和边
			
 
				-#   7. build_persona_graph.py     - 构建人设图谱
			
 
				-#   8. build_match_graph.py       - 构建匹配图谱
			
 
				-#   9. build_post_graph.py        - 构建帖子图谱
			
 
				-#  10. visualize_match_graph.py   - 生成匹配图谱可视化HTML
			
 
				-#  11. visualization/build.py     - 生成人设图谱可视化HTML
			
 
				+#   1. filter_how_results.py      - 过滤how解构结果
			
 
				+#   2. build_persona_graph.py     - 构建人设图谱
			
 
				+#   3. build_post_graph.py        - 构建帖子图谱
			
 
				+#   4. visualization/build.py     - 生成人设图谱可视化HTML
			
 
				 #
			
 
				 # 使用方式：
			
 
				 #   ./run_graph_pipeline.sh              # 使用默认账号
			
@@ -47,7 +44,7 @@ run_step() {
 
				     local step_name=$2
			
 
				     local script_name=$3
			
 
				 
			
 
				-    print_step "$step_num/7" "$step_name"
			
 
				+    print_step "$step_num/4" "$step_name"
			
 
				 
			
 
				     if python "script/data_processing/$script_name"; then
			
 
				         print_success "$step_name 完成"
			
@@ -74,26 +71,26 @@ process_account() {
 
				     # 设置环境变量
			
 
				     export ACCOUNT_NAME="$account_name"
			
 
				 
			
 
				-    # 步骤5: 过滤how解构结果
			
 
				+    # 步骤1: 过滤how解构结果
			
 
				     run_step 1 "过滤how解构结果" "filter_how_results.py" || return 1
			
 
				 
			
 
				-    # 步骤6: 提取节点和边
			
 
				-    run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
			
 
				+    # # 步骤2: 提取节点和边（不再需要）
			
 
				+    # run_step 2 "提取节点和边" "extract_nodes_and_edges.py" || return 1
			
 
				 
			
 
				-    # 步骤7: 构建人设图谱
			
 
				-    run_step 3 "构建人设图谱" "build_persona_graph.py" || return 1
			
 
				+    # 步骤2: 构建人设图谱
			
 
				+    run_step 2 "构建人设图谱" "build_persona_graph.py" || return 1
			
 
				 
			
 
				-    # 步骤8: 构建匹配图谱
			
 
				-    run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
			
 
				+    # # 步骤: 构建匹配图谱（不再需要）
			
 
				+    # run_step 4 "构建匹配图谱" "build_match_graph.py" || return 1
			
 
				 
			
 
				-    # 步骤9: 构建帖子图谱
			
 
				-    run_step 5 "构建帖子图谱" "build_post_graph.py" || return 1
			
 
				+    # 步骤3: 构建帖子图谱
			
 
				+    run_step 3 "构建帖子图谱" "build_post_graph.py" || return 1
			
 
				 
			
 
				-    # 步骤10: 生成匹配图谱可视化HTML
			
 
				-    run_step 6 "生成匹配图谱可视化" "visualize_match_graph.py" || return 1
			
 
				+    # # 步骤: 生成匹配图谱可视化HTML（不再需要）
			
 
				+    # run_step 6 "生成匹配图谱可视化" "visualize_match_graph.py" || return 1
			
 
				 
			
 
				-    # 步骤11: 生成人设图谱可视化HTML
			
 
				-    print_step "7/7" "生成人设图谱可视化"
			
 
				+    # 步骤4: 生成人设图谱可视化HTML
			
 
				+    print_step "4/4" "生成人设图谱可视化"
			
 
				     if python "script/visualization/build.py"; then
			
 
				         print_success "生成人设图谱可视化 完成"
			
 
				         echo ""
			
--- a/script/visualization/src/config/edgeStyle.js
+++ b/script/visualization/src/config/edgeStyle.js
@@ -8,7 +8,9 @@ export const edgeTypeColors = {
 
				   '分类共现': '#f39c12',
			
 
				   '匹配': '#e94560',
			
 
				   '推导': '#00bcd4',  // 青色 - 推导关系
			
 
				-  '组成': '#8bc34a'   // 浅绿色 - 组合成员
			
 
				+  '组成': '#8bc34a',  // 浅绿色 - 组合成员
			
 
				+  '支撑': '#ff9800',  // 橙色 - 支撑关系
			
 
				+  '关联': '#9c27b0'   // 紫色 - 关联关系
			
 
				 }
			
 
				 
			
 
				 // 获取边样式（统一入口）
			
@@ -35,6 +37,10 @@ export function getEdgeStyle(edge) {
 
				     opacity = Math.max(0.4, score * 0.8)
			
 
				   } else if (type === '组成') {
			
 
				     opacity = 0.6
			
 
				+  } else if (type === '支撑') {
			
 
				+    opacity = 0.7
			
 
				+  } else if (type === '关联') {
			
 
				+    opacity = 0.6
			
 
				   }
			
 
				 
			
 
				   return {