|
@@ -25,7 +25,13 @@ from script.data_processing.path_config import PathConfig
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_post_node_id(dimension: str, node_type: str, name: str) -> str:
|
|
def build_post_node_id(dimension: str, node_type: str, name: str) -> str:
|
|
|
- """构建帖子节点ID"""
|
|
|
|
|
|
|
+ """构建帖子节点ID
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ dimension: 维度(灵感点/关键点/目的点)
|
|
|
|
|
+ node_type: 节点类型(点/标签)
|
|
|
|
|
+ name: 节点名称
|
|
|
|
|
+ """
|
|
|
return f"帖子_{dimension}_{node_type}_{name}"
|
|
return f"帖子_{dimension}_{node_type}_{name}"
|
|
|
|
|
|
|
|
|
|
|
|
@@ -36,17 +42,19 @@ def build_persona_node_id(dimension: str, node_type: str, name: str) -> str:
|
|
|
|
|
|
|
|
def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
|
|
def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
|
|
|
"""
|
|
"""
|
|
|
- 从匹配结果中提取帖子节点、人设节点和匹配边
|
|
|
|
|
|
|
+ 从匹配结果中提取帖子节点(点+标签)、人设节点和边
|
|
|
|
|
|
|
|
Args:
|
|
Args:
|
|
|
filtered_data: 匹配结果数据
|
|
filtered_data: 匹配结果数据
|
|
|
|
|
|
|
|
Returns:
|
|
Returns:
|
|
|
- (帖子节点列表, 人设节点ID集合, 匹配边列表)
|
|
|
|
|
|
|
+ (帖子节点列表, 人设节点ID集合, 边列表)
|
|
|
|
|
+ 帖子节点包括:点节点(灵感点/关键点/目的点)和标签节点
|
|
|
|
|
+ 边包括:点→标签的属于边 + 标签→人设的匹配边
|
|
|
"""
|
|
"""
|
|
|
post_nodes = []
|
|
post_nodes = []
|
|
|
persona_node_ids = set()
|
|
persona_node_ids = set()
|
|
|
- match_edges = []
|
|
|
|
|
|
|
+ edges = [] # 包含属于边和匹配边
|
|
|
|
|
|
|
|
how_result = filtered_data.get("how解构结果", {})
|
|
how_result = filtered_data.get("how解构结果", {})
|
|
|
|
|
|
|
@@ -61,7 +69,28 @@ def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
|
|
|
points = how_result.get(list_key, [])
|
|
points = how_result.get(list_key, [])
|
|
|
|
|
|
|
|
for point in points:
|
|
for point in points:
|
|
|
- # 遍历how步骤列表
|
|
|
|
|
|
|
+ point_name = point.get("名称", "")
|
|
|
|
|
+ point_desc = point.get("描述", "")
|
|
|
|
|
+
|
|
|
|
|
+ if not point_name:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # 创建帖子点节点
|
|
|
|
|
+ point_node_id = build_post_node_id(dimension, "点", point_name)
|
|
|
|
|
+ point_node = {
|
|
|
|
|
+ "节点ID": point_node_id,
|
|
|
|
|
+ "节点名称": point_name,
|
|
|
|
|
+ "节点类型": "点",
|
|
|
|
|
+ "节点层级": dimension,
|
|
|
|
|
+ "描述": point_desc,
|
|
|
|
|
+ "source": "帖子"
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 避免重复添加点节点
|
|
|
|
|
+ if not any(n["节点ID"] == point_node_id for n in post_nodes):
|
|
|
|
|
+ post_nodes.append(point_node)
|
|
|
|
|
+
|
|
|
|
|
+ # 遍历how步骤列表,提取标签节点
|
|
|
how_steps = point.get("how步骤列表", [])
|
|
how_steps = point.get("how步骤列表", [])
|
|
|
|
|
|
|
|
for step in how_steps:
|
|
for step in how_steps:
|
|
@@ -75,10 +104,10 @@ def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
|
|
|
if not feature_name:
|
|
if not feature_name:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- # 创建帖子节点(无论是否有匹配结果)
|
|
|
|
|
- post_node_id = build_post_node_id(dimension, "标签", feature_name)
|
|
|
|
|
- post_node = {
|
|
|
|
|
- "节点ID": post_node_id,
|
|
|
|
|
|
|
+ # 创建帖子标签节点(无论是否有匹配结果)
|
|
|
|
|
+ tag_node_id = build_post_node_id(dimension, "标签", feature_name)
|
|
|
|
|
+ tag_node = {
|
|
|
|
|
+ "节点ID": tag_node_id,
|
|
|
"节点名称": feature_name,
|
|
"节点名称": feature_name,
|
|
|
"节点类型": "标签",
|
|
"节点类型": "标签",
|
|
|
"节点层级": dimension,
|
|
"节点层级": dimension,
|
|
@@ -87,9 +116,23 @@ def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
|
|
|
"已匹配": len(match_results) > 0 # 标记是否有匹配
|
|
"已匹配": len(match_results) > 0 # 标记是否有匹配
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- # 避免重复添加
|
|
|
|
|
- if not any(n["节点ID"] == post_node_id for n in post_nodes):
|
|
|
|
|
- post_nodes.append(post_node)
|
|
|
|
|
|
|
+ # 避免重复添加标签节点
|
|
|
|
|
+ if not any(n["节点ID"] == tag_node_id for n in post_nodes):
|
|
|
|
|
+ post_nodes.append(tag_node)
|
|
|
|
|
+
|
|
|
|
|
+ # 创建标签→点的属于边
|
|
|
|
|
+ belong_edge = {
|
|
|
|
|
+ "源节点ID": tag_node_id,
|
|
|
|
|
+ "目标节点ID": point_node_id,
|
|
|
|
|
+ "边类型": "属于",
|
|
|
|
|
+ "边详情": {
|
|
|
|
|
+ "说明": f"标签「{feature_name}」属于点「{point_name}」"
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ # 避免重复添加属于边
|
|
|
|
|
+ edge_key = (tag_node_id, point_node_id, "属于")
|
|
|
|
|
+ if not any((e["源节点ID"], e["目标节点ID"], e["边类型"]) == edge_key for e in edges):
|
|
|
|
|
+ edges.append(belong_edge)
|
|
|
|
|
|
|
|
# 如果有匹配结果,创建匹配边
|
|
# 如果有匹配结果,创建匹配边
|
|
|
if match_results:
|
|
if match_results:
|
|
@@ -110,7 +153,7 @@ def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
|
|
|
|
|
|
|
|
# 创建匹配边
|
|
# 创建匹配边
|
|
|
match_edge = {
|
|
match_edge = {
|
|
|
- "源节点ID": post_node_id,
|
|
|
|
|
|
|
+ "源节点ID": tag_node_id,
|
|
|
"目标节点ID": persona_node_id,
|
|
"目标节点ID": persona_node_id,
|
|
|
"边类型": "匹配",
|
|
"边类型": "匹配",
|
|
|
"边详情": {
|
|
"边详情": {
|
|
@@ -118,9 +161,9 @@ def extract_matched_nodes_and_edges(filtered_data: Dict) -> tuple:
|
|
|
"说明": match_detail.get("说明", "")
|
|
"说明": match_detail.get("说明", "")
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
- match_edges.append(match_edge)
|
|
|
|
|
|
|
+ edges.append(match_edge)
|
|
|
|
|
|
|
|
- return post_nodes, persona_node_ids, match_edges
|
|
|
|
|
|
|
+ return post_nodes, persona_node_ids, edges
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_persona_nodes_details(
|
|
def get_persona_nodes_details(
|
|
@@ -465,8 +508,16 @@ def process_filtered_result(
|
|
|
post_detail = filtered_data.get("帖子详情", {})
|
|
post_detail = filtered_data.get("帖子详情", {})
|
|
|
post_title = post_detail.get("title", "")
|
|
post_title = post_detail.get("title", "")
|
|
|
|
|
|
|
|
- # 提取节点和边
|
|
|
|
|
- post_nodes, persona_node_ids, match_edges = extract_matched_nodes_and_edges(filtered_data)
|
|
|
|
|
|
|
+ # 提取节点和边(包括帖子点节点、标签节点、属于边和匹配边)
|
|
|
|
|
+ post_nodes, persona_node_ids, post_edges_raw = extract_matched_nodes_and_edges(filtered_data)
|
|
|
|
|
+
|
|
|
|
|
+ # 分离帖子侧的边:属于边(标签→点)和匹配边(标签→人设)
|
|
|
|
|
+ post_belong_edges = [e for e in post_edges_raw if e["边类型"] == "属于"]
|
|
|
|
|
+ match_edges = [e for e in post_edges_raw if e["边类型"] == "匹配"]
|
|
|
|
|
+
|
|
|
|
|
+ # 统计帖子点节点和标签节点
|
|
|
|
|
+ post_point_nodes = [n for n in post_nodes if n["节点类型"] == "点"]
|
|
|
|
|
+ post_tag_nodes = [n for n in post_nodes if n["节点类型"] == "标签"]
|
|
|
|
|
|
|
|
# 获取人设节点详情(直接匹配的,标记为非扩展)
|
|
# 获取人设节点详情(直接匹配的,标记为非扩展)
|
|
|
persona_nodes = get_persona_nodes_details(persona_node_ids, nodes_data)
|
|
persona_nodes = get_persona_nodes_details(persona_node_ids, nodes_data)
|
|
@@ -584,8 +635,8 @@ def process_filtered_result(
|
|
|
# 合并节点列表
|
|
# 合并节点列表
|
|
|
all_nodes = post_nodes + persona_nodes + useful_expanded_nodes
|
|
all_nodes = post_nodes + persona_nodes + useful_expanded_nodes
|
|
|
|
|
|
|
|
- # 合并边列表
|
|
|
|
|
- all_edges = match_edges + persona_edges + post_edges + useful_expanded_edges + useful_category_edges + post_edges_via_expanded
|
|
|
|
|
|
|
+ # 合并边列表(加入帖子内的属于边)
|
|
|
|
|
+ all_edges = post_belong_edges + match_edges + persona_edges + post_edges + useful_expanded_edges + useful_category_edges + post_edges_via_expanded
|
|
|
# 去重边
|
|
# 去重边
|
|
|
seen_edges = set()
|
|
seen_edges = set()
|
|
|
unique_edges = []
|
|
unique_edges = []
|
|
@@ -616,9 +667,12 @@ def process_filtered_result(
|
|
|
"帖子标题": post_title,
|
|
"帖子标题": post_title,
|
|
|
"描述": "帖子与人设的节点匹配关系",
|
|
"描述": "帖子与人设的节点匹配关系",
|
|
|
"统计": {
|
|
"统计": {
|
|
|
- "帖子节点数": len(post_nodes),
|
|
|
|
|
|
|
+ "帖子点节点数": len(post_point_nodes),
|
|
|
|
|
+ "帖子标签节点数": len(post_tag_nodes),
|
|
|
|
|
+ "帖子节点总数": len(post_nodes),
|
|
|
"人设节点数(直接匹配)": len(persona_nodes),
|
|
"人设节点数(直接匹配)": len(persona_nodes),
|
|
|
"扩展节点数(有效)": len(useful_expanded_nodes),
|
|
"扩展节点数(有效)": len(useful_expanded_nodes),
|
|
|
|
|
+ "帖子属于边数": len(post_belong_edges),
|
|
|
"匹配边数": len(match_edges),
|
|
"匹配边数": len(match_edges),
|
|
|
"人设节点间边数": len(persona_edges),
|
|
"人设节点间边数": len(persona_edges),
|
|
|
"扩展边数(有效)": len(useful_expanded_edges),
|
|
"扩展边数(有效)": len(useful_expanded_edges),
|
|
@@ -628,9 +682,12 @@ def process_filtered_result(
|
|
|
"总边数": len(all_edges)
|
|
"总边数": len(all_edges)
|
|
|
}
|
|
}
|
|
|
},
|
|
},
|
|
|
|
|
+ "帖子点节点列表": post_point_nodes,
|
|
|
|
|
+ "帖子标签节点列表": post_tag_nodes,
|
|
|
"帖子节点列表": post_nodes,
|
|
"帖子节点列表": post_nodes,
|
|
|
"人设节点列表": persona_nodes,
|
|
"人设节点列表": persona_nodes,
|
|
|
"扩展节点列表": useful_expanded_nodes,
|
|
"扩展节点列表": useful_expanded_nodes,
|
|
|
|
|
+ "帖子属于边列表": post_belong_edges,
|
|
|
"匹配边列表": match_edges,
|
|
"匹配边列表": match_edges,
|
|
|
"人设节点间边列表": persona_edges,
|
|
"人设节点间边列表": persona_edges,
|
|
|
"扩展边列表": useful_expanded_edges,
|
|
"扩展边列表": useful_expanded_edges,
|
|
@@ -648,9 +705,12 @@ def process_filtered_result(
|
|
|
|
|
|
|
|
return {
|
|
return {
|
|
|
"帖子ID": post_id,
|
|
"帖子ID": post_id,
|
|
|
|
|
+ "帖子点节点数": len(post_point_nodes),
|
|
|
|
|
+ "帖子标签节点数": len(post_tag_nodes),
|
|
|
"帖子节点数": len(post_nodes),
|
|
"帖子节点数": len(post_nodes),
|
|
|
"人设节点数": len(persona_nodes),
|
|
"人设节点数": len(persona_nodes),
|
|
|
"扩展节点数": len(useful_expanded_nodes),
|
|
"扩展节点数": len(useful_expanded_nodes),
|
|
|
|
|
+ "帖子属于边数": len(post_belong_edges),
|
|
|
"匹配边数": len(match_edges),
|
|
"匹配边数": len(match_edges),
|
|
|
"人设边数": len(persona_edges),
|
|
"人设边数": len(persona_edges),
|
|
|
"扩展边数": len(useful_expanded_edges),
|
|
"扩展边数": len(useful_expanded_edges),
|