|
|
@@ -152,19 +152,26 @@ def create_edge(
|
|
|
|
|
|
# ==================== 从帖子解构结果提取节点和匹配边 ====================
|
|
|
|
|
|
-def extract_points_tags_and_matches(filtered_data: Dict) -> tuple:
|
|
|
+def extract_tags_and_matches(filtered_data: Dict) -> tuple:
|
|
|
"""
|
|
|
- 从帖子解构结果中提取点节点、标签节点和匹配边
|
|
|
+ 从帖子解构结果中提取标签节点和匹配边(适配新结构)
|
|
|
+
|
|
|
+ 新结构:解构结果 → 点列表 → 点 → 匹配人设结果
|
|
|
+ 新结构的"点"对应旧结构的"标签"节点,直接挂在维度下
|
|
|
|
|
|
Returns:
|
|
|
- (点节点字典, 标签节点字典, 标签到点的映射, 匹配边字典)
|
|
|
+ (标签节点字典, 匹配边字典, 支撑边字典, 关联边字典)
|
|
|
"""
|
|
|
- point_nodes = {} # nodeId -> nodeData
|
|
|
tag_nodes = {} # nodeId -> nodeData
|
|
|
- tag_to_point = {} # tagId -> [pointId, ...]
|
|
|
match_edges = {} # edgeId -> edgeData
|
|
|
+ support_edges = {} # 支撑边
|
|
|
+ relation_edges = {} # 关联边
|
|
|
+
|
|
|
+ # ID 到节点ID的映射(用于构建支撑边和关联边)
|
|
|
+ id_to_node_id = {}
|
|
|
|
|
|
- how_result = filtered_data.get("how解构结果", {})
|
|
|
+ # 新结构使用 "解构结果"
|
|
|
+ result = filtered_data.get("解构结果", {})
|
|
|
|
|
|
dimension_mapping = {
|
|
|
"灵感点列表": "灵感点",
|
|
|
@@ -172,167 +179,158 @@ def extract_points_tags_and_matches(filtered_data: Dict) -> tuple:
|
|
|
"关键点列表": "关键点"
|
|
|
}
|
|
|
|
|
|
+ # 第一遍:创建节点并建立 ID 映射
|
|
|
for list_key, dimension in dimension_mapping.items():
|
|
|
- points = how_result.get(list_key, [])
|
|
|
+ points = result.get(list_key, [])
|
|
|
|
|
|
for point in points:
|
|
|
- point_name = point.get("名称", "")
|
|
|
- point_desc = point.get("描述", "")
|
|
|
+ tag_name = point.get("名称", "")
|
|
|
+ tag_desc = point.get("描述", "")
|
|
|
+ point_id = point.get("ID", "")
|
|
|
|
|
|
- if not point_name:
|
|
|
+ if not tag_name:
|
|
|
continue
|
|
|
|
|
|
- # 创建点节点
|
|
|
- point_id = build_node_id("帖子", dimension, "点", point_name)
|
|
|
- point_nodes[point_id] = create_node(
|
|
|
+ # 新结构的"点"直接创建为"标签"节点
|
|
|
+ tag_id = build_node_id("帖子", dimension, "标签", tag_name)
|
|
|
+ tag_nodes[tag_id] = create_node(
|
|
|
domain="帖子",
|
|
|
dimension=dimension,
|
|
|
- node_type="点",
|
|
|
- name=point_name,
|
|
|
+ node_type="标签",
|
|
|
+ name=tag_name,
|
|
|
detail={
|
|
|
- "description": point_desc
|
|
|
+ "description": tag_desc,
|
|
|
+ "pointId": point_id
|
|
|
}
|
|
|
)
|
|
|
|
|
|
- # 遍历how步骤列表,提取标签和匹配
|
|
|
- how_steps = point.get("how步骤列表", [])
|
|
|
-
|
|
|
- for step in how_steps:
|
|
|
- step_name = step.get("步骤名称", "")
|
|
|
- features = step.get("特征列表", [])
|
|
|
-
|
|
|
- for feature in features:
|
|
|
- tag_name = feature.get("特征名称", "")
|
|
|
- weight = feature.get("权重", 1.0)
|
|
|
-
|
|
|
- if not tag_name:
|
|
|
- continue
|
|
|
-
|
|
|
- # 创建标签节点
|
|
|
- tag_id = build_node_id("帖子", dimension, "标签", tag_name)
|
|
|
-
|
|
|
- if tag_id not in tag_nodes:
|
|
|
- tag_nodes[tag_id] = create_node(
|
|
|
- domain="帖子",
|
|
|
- dimension=dimension,
|
|
|
- node_type="标签",
|
|
|
- name=tag_name,
|
|
|
- detail={
|
|
|
- "weight": weight,
|
|
|
- "stepName": step_name,
|
|
|
- "pointNames": [point_name]
|
|
|
- }
|
|
|
- )
|
|
|
- else:
|
|
|
- # 同一标签可能属于多个点
|
|
|
- if point_name not in tag_nodes[tag_id]["detail"]["pointNames"]:
|
|
|
- tag_nodes[tag_id]["detail"]["pointNames"].append(point_name)
|
|
|
-
|
|
|
- # 记录标签到点的映射
|
|
|
- if tag_id not in tag_to_point:
|
|
|
- tag_to_point[tag_id] = []
|
|
|
- if point_id not in tag_to_point[tag_id]:
|
|
|
- tag_to_point[tag_id].append(point_id)
|
|
|
-
|
|
|
- # 提取匹配边
|
|
|
- matches = feature.get("匹配结果", [])
|
|
|
- for match in matches:
|
|
|
- persona_name = match.get("人设特征名称", "")
|
|
|
- persona_dimension = match.get("人设特征层级", "")
|
|
|
- persona_type = match.get("特征类型", "标签")
|
|
|
- match_detail = match.get("匹配结果", {})
|
|
|
- similarity = match_detail.get("相似度", 0)
|
|
|
-
|
|
|
- if not persona_name or not persona_dimension:
|
|
|
- continue
|
|
|
-
|
|
|
- # 构建人设节点ID
|
|
|
- persona_id = build_node_id("人设", persona_dimension, persona_type, persona_name)
|
|
|
-
|
|
|
- # 创建双向匹配边
|
|
|
- # 帖子标签 -> 人设标签
|
|
|
- edge_id_1 = build_edge_id(tag_id, "匹配", persona_id)
|
|
|
- match_edges[edge_id_1] = create_edge(
|
|
|
- source=tag_id,
|
|
|
- target=persona_id,
|
|
|
- edge_type="匹配",
|
|
|
- score=similarity,
|
|
|
- detail={}
|
|
|
- )
|
|
|
+ # 建立 ID 映射
|
|
|
+ if point_id:
|
|
|
+ id_to_node_id[point_id] = tag_id
|
|
|
+
|
|
|
+ # 直接从点的 匹配人设结果 提取匹配边
|
|
|
+ matches = point.get("匹配人设结果", [])
|
|
|
+ for match in matches:
|
|
|
+ persona_name = match.get("人设特征名称", "")
|
|
|
+ persona_dimension = match.get("人设特征层级", "")
|
|
|
+ # 映射:源数据中 "点" → "标签"
|
|
|
+ persona_type = match.get("特征类型", "标签")
|
|
|
+ if persona_type == "点":
|
|
|
+ persona_type = "标签"
|
|
|
+ similarity = match.get("相似度", 0)
|
|
|
+
|
|
|
+ if not persona_name or not persona_dimension:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 构建人设节点ID
|
|
|
+ persona_id = build_node_id("人设", persona_dimension, persona_type, persona_name)
|
|
|
+
|
|
|
+ # 创建双向匹配边
|
|
|
+ # 帖子标签 -> 人设标签
|
|
|
+ edge_id_1 = build_edge_id(tag_id, "匹配", persona_id)
|
|
|
+ match_edges[edge_id_1] = create_edge(
|
|
|
+ source=tag_id,
|
|
|
+ target=persona_id,
|
|
|
+ edge_type="匹配",
|
|
|
+ score=similarity,
|
|
|
+ detail={}
|
|
|
+ )
|
|
|
+
|
|
|
+ # 人设标签 -> 帖子标签
|
|
|
+ edge_id_2 = build_edge_id(persona_id, "匹配", tag_id)
|
|
|
+ match_edges[edge_id_2] = create_edge(
|
|
|
+ source=persona_id,
|
|
|
+ target=tag_id,
|
|
|
+ edge_type="匹配",
|
|
|
+ score=similarity,
|
|
|
+ detail={}
|
|
|
+ )
|
|
|
+
|
|
|
+ # 第二遍:构建支撑边和关联边
|
|
|
+ for list_key, dimension in dimension_mapping.items():
|
|
|
+ points = result.get(list_key, [])
|
|
|
+
|
|
|
+ for point in points:
|
|
|
+ tag_name = point.get("名称", "")
|
|
|
+ point_id = point.get("ID", "")
|
|
|
|
|
|
- # 人设标签 -> 帖子标签
|
|
|
- edge_id_2 = build_edge_id(persona_id, "匹配", tag_id)
|
|
|
- match_edges[edge_id_2] = create_edge(
|
|
|
- source=persona_id,
|
|
|
- target=tag_id,
|
|
|
- edge_type="匹配",
|
|
|
- score=similarity,
|
|
|
+ if not tag_name or not point_id:
|
|
|
+ continue
|
|
|
+
|
|
|
+ tag_id = id_to_node_id.get(point_id)
|
|
|
+ if not tag_id:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 支撑边:当前点 -> 被支撑的点
|
|
|
+ support_ids = point.get("支撑的ID", [])
|
|
|
+ for target_point_id in support_ids:
|
|
|
+ target_node_id = id_to_node_id.get(target_point_id)
|
|
|
+ if target_node_id:
|
|
|
+ edge_id = build_edge_id(tag_id, "支撑", target_node_id)
|
|
|
+ support_edges[edge_id] = create_edge(
|
|
|
+ source=tag_id,
|
|
|
+ target=target_node_id,
|
|
|
+ edge_type="支撑",
|
|
|
+ score=1.0,
|
|
|
+ detail={}
|
|
|
+ )
|
|
|
+
|
|
|
+ # 关联边:当前点 <-> 关联的点(双向)
|
|
|
+ relation_ids = point.get("关联的ID", [])
|
|
|
+ for target_point_id in relation_ids:
|
|
|
+ target_node_id = id_to_node_id.get(target_point_id)
|
|
|
+ if target_node_id:
|
|
|
+ # 只创建一个方向的边(避免重复)
|
|
|
+ edge_id = build_edge_id(tag_id, "关联", target_node_id)
|
|
|
+ if edge_id not in relation_edges:
|
|
|
+ relation_edges[edge_id] = create_edge(
|
|
|
+ source=tag_id,
|
|
|
+ target=target_node_id,
|
|
|
+ edge_type="关联",
|
|
|
+ score=1.0,
|
|
|
detail={}
|
|
|
)
|
|
|
|
|
|
- return point_nodes, tag_nodes, tag_to_point, match_edges
|
|
|
+ return tag_nodes, match_edges, support_edges, relation_edges
|
|
|
|
|
|
|
|
|
# ==================== 构建边 ====================
|
|
|
|
|
|
def build_belong_contain_edges(
|
|
|
- point_nodes: Dict[str, Dict],
|
|
|
tag_nodes: Dict[str, Dict],
|
|
|
- tag_to_point: Dict[str, List[str]],
|
|
|
dimension_node_ids: Dict[str, str]
|
|
|
) -> Dict[str, Dict]:
|
|
|
"""
|
|
|
- 构建属于/包含边
|
|
|
+ 构建属于/包含边(新结构:标签直接挂维度下)
|
|
|
|
|
|
Returns:
|
|
|
边字典 { edgeId: edgeData }
|
|
|
"""
|
|
|
edges = {}
|
|
|
|
|
|
- # 1. 点 -> 维度(属于/包含)
|
|
|
- for point_id, point_data in point_nodes.items():
|
|
|
- dimension = point_data["dimension"]
|
|
|
+ # 标签 -> 维度(属于/包含)
|
|
|
+ for tag_id, tag_data in tag_nodes.items():
|
|
|
+ dimension = tag_data["dimension"]
|
|
|
dim_node_id = dimension_node_ids[dimension]
|
|
|
|
|
|
- # 属于边:点 -> 维度
|
|
|
- edge_id = build_edge_id(point_id, "属于", dim_node_id)
|
|
|
+ # 属于边:标签 -> 维度
|
|
|
+ edge_id = build_edge_id(tag_id, "属于", dim_node_id)
|
|
|
edges[edge_id] = create_edge(
|
|
|
- source=point_id,
|
|
|
+ source=tag_id,
|
|
|
target=dim_node_id,
|
|
|
edge_type="属于",
|
|
|
score=1.0
|
|
|
)
|
|
|
|
|
|
- # 包含边:维度 -> 点
|
|
|
- edge_id_contain = build_edge_id(dim_node_id, "包含", point_id)
|
|
|
+ # 包含边:维度 -> 标签
|
|
|
+ edge_id_contain = build_edge_id(dim_node_id, "包含", tag_id)
|
|
|
edges[edge_id_contain] = create_edge(
|
|
|
source=dim_node_id,
|
|
|
- target=point_id,
|
|
|
+ target=tag_id,
|
|
|
edge_type="包含",
|
|
|
score=1.0
|
|
|
)
|
|
|
|
|
|
- # 2. 标签 -> 点(属于/包含)
|
|
|
- for tag_id, point_ids in tag_to_point.items():
|
|
|
- for point_id in point_ids:
|
|
|
- # 属于边:标签 -> 点
|
|
|
- edge_id = build_edge_id(tag_id, "属于", point_id)
|
|
|
- edges[edge_id] = create_edge(
|
|
|
- source=tag_id,
|
|
|
- target=point_id,
|
|
|
- edge_type="属于",
|
|
|
- score=1.0
|
|
|
- )
|
|
|
-
|
|
|
- # 包含边:点 -> 标签
|
|
|
- edge_id_contain = build_edge_id(point_id, "包含", tag_id)
|
|
|
- edges[edge_id_contain] = create_edge(
|
|
|
- source=point_id,
|
|
|
- target=tag_id,
|
|
|
- edge_type="包含",
|
|
|
- score=1.0
|
|
|
- )
|
|
|
-
|
|
|
return edges
|
|
|
|
|
|
|
|
|
@@ -530,8 +528,8 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
|
|
|
all_nodes = {}
|
|
|
all_edges = {}
|
|
|
|
|
|
- # 1. 提取点节点、标签节点和匹配边
|
|
|
- point_nodes, tag_nodes, tag_to_point, match_edges = extract_points_tags_and_matches(filtered_data)
|
|
|
+ # 1. 提取标签节点和匹配边(新结构:没有点层)
|
|
|
+ tag_nodes, match_edges, support_edges, relation_edges = extract_tags_and_matches(filtered_data)
|
|
|
|
|
|
# 2. 添加根节点
|
|
|
root_id = build_node_id("帖子", "帖子", "帖子", post_id)
|
|
|
@@ -580,36 +578,36 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
|
|
|
score=1.0
|
|
|
)
|
|
|
|
|
|
- # 4. 添加点节点和标签节点
|
|
|
- all_nodes.update(point_nodes)
|
|
|
+ # 4. 添加标签节点
|
|
|
all_nodes.update(tag_nodes)
|
|
|
|
|
|
- # 5. 构建属于/包含边
|
|
|
- belong_contain_edges = build_belong_contain_edges(
|
|
|
- point_nodes, tag_nodes, tag_to_point, dimension_node_ids
|
|
|
- )
|
|
|
+ # 5. 构建属于/包含边(标签直接挂维度下)
|
|
|
+ belong_contain_edges = build_belong_contain_edges(tag_nodes, dimension_node_ids)
|
|
|
all_edges.update(belong_contain_edges)
|
|
|
|
|
|
# 6. 添加匹配边
|
|
|
all_edges.update(match_edges)
|
|
|
|
|
|
- # 7. 构建索引
|
|
|
+ # 7. 添加支撑边和关联边
|
|
|
+ all_edges.update(support_edges)
|
|
|
+ all_edges.update(relation_edges)
|
|
|
+
|
|
|
+ # 8. 构建索引
|
|
|
index = build_index(all_edges)
|
|
|
|
|
|
- # 8. 构建嵌套树
|
|
|
+ # 9. 构建嵌套树
|
|
|
tree = build_nested_tree(all_nodes, all_edges, root_id)
|
|
|
|
|
|
# 统计
|
|
|
- point_count = len(point_nodes)
|
|
|
tag_count = len(tag_nodes)
|
|
|
match_count = len(match_edges) // 2 # 双向边,除以2得到实际匹配数
|
|
|
+ support_count = len(support_edges)
|
|
|
+ relation_count = len(relation_edges)
|
|
|
|
|
|
dimension_stats = {}
|
|
|
for dim in dimensions:
|
|
|
- dim_points = sum(1 for n in point_nodes.values() if n["dimension"] == dim)
|
|
|
dim_tags = sum(1 for n in tag_nodes.values() if n["dimension"] == dim)
|
|
|
dimension_stats[dim] = {
|
|
|
- "pointCount": dim_points,
|
|
|
"tagCount": dim_tags
|
|
|
}
|
|
|
|
|
|
@@ -623,9 +621,10 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
|
|
|
"stats": {
|
|
|
"nodeCount": len(all_nodes),
|
|
|
"edgeCount": len(all_edges),
|
|
|
- "pointCount": point_count,
|
|
|
"tagCount": tag_count,
|
|
|
"matchCount": match_count,
|
|
|
+ "supportCount": support_count,
|
|
|
+ "relationCount": relation_count,
|
|
|
"dimensions": dimension_stats
|
|
|
}
|
|
|
},
|
|
|
@@ -645,9 +644,10 @@ def process_single_post(filtered_file: Path, output_dir: Path) -> Dict:
|
|
|
"postTitle": post_title,
|
|
|
"nodeCount": len(all_nodes),
|
|
|
"edgeCount": len(all_edges),
|
|
|
- "pointCount": point_count,
|
|
|
"tagCount": tag_count,
|
|
|
"matchCount": match_count,
|
|
|
+ "supportCount": support_count,
|
|
|
+ "relationCount": relation_count,
|
|
|
"outputFile": str(output_file)
|
|
|
}
|
|
|
|
|
|
@@ -685,7 +685,7 @@ def main():
|
|
|
result = process_single_post(filtered_file, output_dir)
|
|
|
results.append(result)
|
|
|
print(f" 节点: {result['nodeCount']}, 边: {result['edgeCount']}")
|
|
|
- print(f" 点: {result['pointCount']}, 标签: {result['tagCount']}, 匹配: {result['matchCount']}")
|
|
|
+ print(f" 标签: {result['tagCount']}, 匹配: {result['matchCount']}, 支撑: {result['supportCount']}, 关联: {result['relationCount']}")
|
|
|
print(f" → {Path(result['outputFile']).name}")
|
|
|
print()
|
|
|
|
|
|
@@ -695,9 +695,10 @@ def main():
|
|
|
print(f" 帖子数: {len(results)}")
|
|
|
print(f" 总节点数: {sum(r['nodeCount'] for r in results)}")
|
|
|
print(f" 总边数: {sum(r['edgeCount'] for r in results)}")
|
|
|
- print(f" 总点数: {sum(r['pointCount'] for r in results)}")
|
|
|
print(f" 总标签数: {sum(r['tagCount'] for r in results)}")
|
|
|
print(f" 总匹配数: {sum(r['matchCount'] for r in results)}")
|
|
|
+ print(f" 总支撑边: {sum(r['supportCount'] for r in results)}")
|
|
|
+ print(f" 总关联边: {sum(r['relationCount'] for r in results)}")
|
|
|
print(f"\n输出目录: {output_dir}")
|
|
|
|
|
|
|