|
@@ -645,11 +645,30 @@ def process_filtered_result(
|
|
|
useful_category_edges = [e for e in category_edges
|
|
useful_category_edges = [e for e in category_edges
|
|
|
if e["源节点ID"] in useful_expanded_ids and e["目标节点ID"] in useful_expanded_ids]
|
|
if e["源节点ID"] in useful_expanded_ids and e["目标节点ID"] in useful_expanded_ids]
|
|
|
|
|
|
|
|
|
|
+ # 5. 获取直接匹配层(第2层)和扩展层(第3层)之间的所有边(不仅仅是属于边)
|
|
|
|
|
+ # 这些边连接了直接匹配的人设节点和扩展的分类节点
|
|
|
|
|
+ cross_layer_edges = []
|
|
|
|
|
+ for edge in edges_data.get("边列表", []):
|
|
|
|
|
+ src, tgt = edge["源节点ID"], edge["目标节点ID"]
|
|
|
|
|
+ edge_type = edge["边类型"]
|
|
|
|
|
+ # 跳过已经收集的属于边(避免重复)
|
|
|
|
|
+ if edge_type == "属于":
|
|
|
|
|
+ continue
|
|
|
|
|
+ # 一端在直接匹配层,另一端在扩展层
|
|
|
|
|
+ src_in_direct = src in persona_node_ids
|
|
|
|
|
+ src_in_expanded = src in useful_expanded_ids
|
|
|
|
|
+ tgt_in_direct = tgt in persona_node_ids
|
|
|
|
|
+ tgt_in_expanded = tgt in useful_expanded_ids
|
|
|
|
|
+ if (src_in_direct and tgt_in_expanded) or (src_in_expanded and tgt_in_direct):
|
|
|
|
|
+ cross_layer_edges.append(edge)
|
|
|
|
|
+
|
|
|
# 合并节点列表
|
|
# 合并节点列表
|
|
|
all_nodes = post_nodes + persona_nodes + useful_expanded_nodes
|
|
all_nodes = post_nodes + persona_nodes + useful_expanded_nodes
|
|
|
|
|
|
|
|
# 合并边列表(加入帖子内的属于边)
|
|
# 合并边列表(加入帖子内的属于边)
|
|
|
- all_edges = post_belong_edges + match_edges + persona_edges + post_edges + useful_expanded_edges + useful_category_edges + post_edges_via_expanded
|
|
|
|
|
|
|
+ all_edges = (post_belong_edges + match_edges + persona_edges + post_edges +
|
|
|
|
|
+ useful_expanded_edges + useful_category_edges + cross_layer_edges +
|
|
|
|
|
+ post_edges_via_expanded)
|
|
|
# 去重边
|
|
# 去重边
|
|
|
seen_edges = set()
|
|
seen_edges = set()
|
|
|
unique_edges = []
|
|
unique_edges = []
|
|
@@ -709,6 +728,7 @@ def process_filtered_result(
|
|
|
"匹配边数": len(match_edges),
|
|
"匹配边数": len(match_edges),
|
|
|
"人设节点间边数": len(persona_edges),
|
|
"人设节点间边数": len(persona_edges),
|
|
|
"扩展边数(有效)": len(useful_expanded_edges),
|
|
"扩展边数(有效)": len(useful_expanded_edges),
|
|
|
|
|
+ "跨层边数": len(cross_layer_edges),
|
|
|
"帖子镜像边数(直接)": len(post_edges),
|
|
"帖子镜像边数(直接)": len(post_edges),
|
|
|
"帖子镜像边数(二阶)": len(post_edges_via_expanded),
|
|
"帖子镜像边数(二阶)": len(post_edges_via_expanded),
|
|
|
"总节点数": len(all_nodes),
|
|
"总节点数": len(all_nodes),
|
|
@@ -724,6 +744,7 @@ def process_filtered_result(
|
|
|
"匹配边列表": match_edges,
|
|
"匹配边列表": match_edges,
|
|
|
"人设节点间边列表": persona_edges,
|
|
"人设节点间边列表": persona_edges,
|
|
|
"扩展边列表": useful_expanded_edges,
|
|
"扩展边列表": useful_expanded_edges,
|
|
|
|
|
+ "跨层边列表": cross_layer_edges,
|
|
|
"帖子镜像边列表(直接)": post_edges,
|
|
"帖子镜像边列表(直接)": post_edges,
|
|
|
"帖子镜像边列表(二阶)": post_edges_via_expanded,
|
|
"帖子镜像边列表(二阶)": post_edges_via_expanded,
|
|
|
"节点列表": all_nodes,
|
|
"节点列表": all_nodes,
|
|
@@ -748,6 +769,7 @@ def process_filtered_result(
|
|
|
"匹配边数": len(match_edges),
|
|
"匹配边数": len(match_edges),
|
|
|
"人设边数": len(persona_edges),
|
|
"人设边数": len(persona_edges),
|
|
|
"扩展边数": len(useful_expanded_edges),
|
|
"扩展边数": len(useful_expanded_edges),
|
|
|
|
|
+ "跨层边数": len(cross_layer_edges),
|
|
|
"帖子边数(直接)": len(post_edges),
|
|
"帖子边数(直接)": len(post_edges),
|
|
|
"帖子边数(二阶)": len(post_edges_via_expanded),
|
|
"帖子边数(二阶)": len(post_edges_via_expanded),
|
|
|
"总节点数": len(all_nodes),
|
|
"总节点数": len(all_nodes),
|
|
@@ -805,7 +827,7 @@ def main():
|
|
|
result = process_filtered_result(filtered_file, nodes_data, edges_data, output_dir)
|
|
result = process_filtered_result(filtered_file, nodes_data, edges_data, output_dir)
|
|
|
results.append(result)
|
|
results.append(result)
|
|
|
print(f" 帖子节点: {result['帖子节点数']}, 人设节点: {result['人设节点数']}, 扩展节点: {result['扩展节点数']}")
|
|
print(f" 帖子节点: {result['帖子节点数']}, 人设节点: {result['人设节点数']}, 扩展节点: {result['扩展节点数']}")
|
|
|
- print(f" 匹配边: {result['匹配边数']}, 人设边: {result['人设边数']}, 扩展边: {result['扩展边数']}")
|
|
|
|
|
|
|
+ print(f" 匹配边: {result['匹配边数']}, 人设边: {result['人设边数']}, 扩展边: {result['扩展边数']}, 跨层边: {result['跨层边数']}")
|
|
|
print(f" 帖子边(直接): {result['帖子边数(直接)']}, 帖子边(二阶): {result['帖子边数(二阶)']}")
|
|
print(f" 帖子边(直接): {result['帖子边数(直接)']}, 帖子边(二阶): {result['帖子边数(二阶)']}")
|
|
|
|
|
|
|
|
# 汇总统计
|
|
# 汇总统计
|
|
@@ -819,6 +841,7 @@ def main():
|
|
|
total_match = sum(r['匹配边数'] for r in results)
|
|
total_match = sum(r['匹配边数'] for r in results)
|
|
|
total_persona_edges = sum(r['人设边数'] for r in results)
|
|
total_persona_edges = sum(r['人设边数'] for r in results)
|
|
|
total_expanded_edges = sum(r['扩展边数'] for r in results)
|
|
total_expanded_edges = sum(r['扩展边数'] for r in results)
|
|
|
|
|
+ total_cross_layer_edges = sum(r['跨层边数'] for r in results)
|
|
|
total_post_edges_direct = sum(r['帖子边数(直接)'] for r in results)
|
|
total_post_edges_direct = sum(r['帖子边数(直接)'] for r in results)
|
|
|
total_post_edges_2hop = sum(r['帖子边数(二阶)'] for r in results)
|
|
total_post_edges_2hop = sum(r['帖子边数(二阶)'] for r in results)
|
|
|
print(f" 总帖子节点: {total_post}")
|
|
print(f" 总帖子节点: {total_post}")
|
|
@@ -827,6 +850,7 @@ def main():
|
|
|
print(f" 总匹配边: {total_match}")
|
|
print(f" 总匹配边: {total_match}")
|
|
|
print(f" 总人设边: {total_persona_edges}")
|
|
print(f" 总人设边: {total_persona_edges}")
|
|
|
print(f" 总扩展边: {total_expanded_edges}")
|
|
print(f" 总扩展边: {total_expanded_edges}")
|
|
|
|
|
+ print(f" 总跨层边: {total_cross_layer_edges}")
|
|
|
print(f" 总帖子边(直接): {total_post_edges_direct}")
|
|
print(f" 总帖子边(直接): {total_post_edges_direct}")
|
|
|
print(f" 总帖子边(二阶): {total_post_edges_2hop}")
|
|
print(f" 总帖子边(二阶): {total_post_edges_2hop}")
|
|
|
print(f"\n输出目录: {output_dir}")
|
|
print(f"\n输出目录: {output_dir}")
|