|
@@ -362,20 +362,10 @@ def build_visualize_edges(
|
|
|
|
|
|
|
|
edge_list = []
|
|
edge_list = []
|
|
|
round_output_seen: set[tuple[int, str]] = set() # (round_num, node_name) 本轮已作为某边的 output
|
|
round_output_seen: set[tuple[int, str]] = set() # (round_num, node_name) 本轮已作为某边的 output
|
|
|
- best_score_by_node: dict[str, float] = {} # node_name -> 已出现过的最高 matched_score
|
|
|
|
|
- fully_derived_nodes: set[str] = set()
|
|
|
|
|
- current_round: int | None = None
|
|
|
|
|
|
|
+ prev_best_by_node: dict[str, tuple[float, bool]] = {} # node_name -> (score, is_fully) of last included round
|
|
|
|
|
|
|
|
for round_idx, derivation in enumerate(derivations):
|
|
for round_idx, derivation in enumerate(derivations):
|
|
|
round_num = derivation.get("round", round_idx + 1)
|
|
round_num = derivation.get("round", round_idx + 1)
|
|
|
- if current_round is None:
|
|
|
|
|
- current_round = round_num
|
|
|
|
|
- elif round_num != current_round:
|
|
|
|
|
- # 一轮结束后,将本轮 is_fully_derived=true 的节点加入 fully_derived_nodes,用于后续轮次过滤
|
|
|
|
|
- for (rn, name), (score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
|
|
|
|
|
- if rn == current_round and is_fully:
|
|
|
|
|
- fully_derived_nodes.add(name)
|
|
|
|
|
- current_round = round_num
|
|
|
|
|
for dr in derivation.get("derivation_results") or []:
|
|
for dr in derivation.get("derivation_results") or []:
|
|
|
output_list = dr.get("output") or []
|
|
output_list = dr.get("output") or []
|
|
|
path_id = dr.get("id")
|
|
path_id = dr.get("id")
|
|
@@ -391,16 +381,19 @@ def build_visualize_edges(
|
|
|
if not matched:
|
|
if not matched:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- # 同一轮内 output 节点不重复;若前面轮次该节点匹配分更高则本轮不保留;
|
|
|
|
|
|
|
+ # 同一轮内 output 节点不重复;若前面轮次该节点已完全推导,或分数未提升且未从 false 变 true,则本轮跳过;
|
|
|
# 并且只保留与 node_list 中该轮该节点的最高分记录一致的边
|
|
# 并且只保留与 node_list 中该轮该节点的最高分记录一致的边
|
|
|
output_names_this_edge = []
|
|
output_names_this_edge = []
|
|
|
for mp, reason, score, is_fully, out_item in matched:
|
|
for mp, reason, score, is_fully, out_item in matched:
|
|
|
if (round_num, mp) in round_output_seen:
|
|
if (round_num, mp) in round_output_seen:
|
|
|
continue
|
|
continue
|
|
|
- if mp in fully_derived_nodes:
|
|
|
|
|
- continue
|
|
|
|
|
- if score <= best_score_by_node.get(mp, -1.0):
|
|
|
|
|
- continue
|
|
|
|
|
|
|
+ prev = prev_best_by_node.get(mp)
|
|
|
|
|
+ if prev is not None:
|
|
|
|
|
+ prev_score, prev_fully = prev
|
|
|
|
|
+ if prev_fully:
|
|
|
|
|
+ continue
|
|
|
|
|
+ if not is_fully and score <= prev_score:
|
|
|
|
|
+ continue
|
|
|
best_info = best_node_info_by_round_mp.get((round_num, mp))
|
|
best_info = best_node_info_by_round_mp.get((round_num, mp))
|
|
|
if not best_info or score < best_info[0]:
|
|
if not best_info or score < best_info[0]:
|
|
|
continue
|
|
continue
|
|
@@ -409,9 +402,11 @@ def build_visualize_edges(
|
|
|
if not output_names_this_edge:
|
|
if not output_names_this_edge:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- for mp, _r, score, _f, _o in output_names_this_edge:
|
|
|
|
|
|
|
+ for mp, _r, score, is_fully, _o in output_names_this_edge:
|
|
|
round_output_seen.add((round_num, mp))
|
|
round_output_seen.add((round_num, mp))
|
|
|
- best_score_by_node[mp] = max(best_score_by_node.get(mp, -1.0), score)
|
|
|
|
|
|
|
+ prev = prev_best_by_node.get(mp)
|
|
|
|
|
+ if prev is None or (not prev[1] and (is_fully or score > prev[0])):
|
|
|
|
|
+ prev_best_by_node[mp] = (score, is_fully)
|
|
|
|
|
|
|
|
input_data = dr.get("input") or {}
|
|
input_data = dr.get("input") or {}
|
|
|
derived_nodes = input_data.get("derived_nodes") or []
|
|
derived_nodes = input_data.get("derived_nodes") or []
|
|
@@ -454,28 +449,30 @@ def build_visualize_edges(
|
|
|
"detail": detail,
|
|
"detail": detail,
|
|
|
})
|
|
})
|
|
|
|
|
|
|
|
- # 处理最后一轮的 fully_derived_nodes
|
|
|
|
|
- if current_round is not None:
|
|
|
|
|
- for (rn, name), (score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
|
|
|
|
|
- if rn == current_round and is_fully:
|
|
|
|
|
- fully_derived_nodes.add(name)
|
|
|
|
|
-
|
|
|
|
|
# 根据按 (round, mp) 聚合后的最佳信息生成 node_list
|
|
# 根据按 (round, mp) 聚合后的最佳信息生成 node_list
|
|
|
- # 规则:若某节点在某轮已经 is_fully_derived=True,则之后轮次即便分数更高也不再保留该节点
|
|
|
|
|
- first_full_round_by_name: dict[str, int] = {}
|
|
|
|
|
- for (round_num, mp), (_score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
|
|
|
|
|
- if not is_fully:
|
|
|
|
|
- continue
|
|
|
|
|
- prev = first_full_round_by_name.get(mp)
|
|
|
|
|
- if prev is None or round_num < prev:
|
|
|
|
|
- first_full_round_by_name[mp] = round_num
|
|
|
|
|
-
|
|
|
|
|
|
|
+ # 规则:节点首次出现保留;is_fully_derived 从 false 变 true 时保留;
|
|
|
|
|
+ # is_fully_derived=false 且分数高于之前已保留轮次时保留;其余情况跳过
|
|
|
|
|
+ prev_node_best: dict[str, tuple[float, bool]] = {} # mp -> (score, is_fully) of last included round
|
|
|
node_list: list[dict] = []
|
|
node_list: list[dict] = []
|
|
|
- for (round_num, mp), (score, is_fully, out_item, method) in best_node_info_by_round_mp.items():
|
|
|
|
|
- full_round = first_full_round_by_name.get(mp)
|
|
|
|
|
- # 若存在更早的 fully 轮次,且当前轮次在其之后,则不再保留
|
|
|
|
|
- if full_round is not None and round_num > full_round:
|
|
|
|
|
|
|
+ for (round_num, mp), (score, is_fully, out_item, method) in sorted(
|
|
|
|
|
+ best_node_info_by_round_mp.items(), key=lambda x: (x[0][0], x[0][1])
|
|
|
|
|
+ ):
|
|
|
|
|
+ prev = prev_node_best.get(mp)
|
|
|
|
|
+ if prev is None:
|
|
|
|
|
+ should_include = True
|
|
|
|
|
+ else:
|
|
|
|
|
+ prev_score, prev_fully = prev
|
|
|
|
|
+ if prev_fully:
|
|
|
|
|
+ should_include = False
|
|
|
|
|
+ elif is_fully:
|
|
|
|
|
+ should_include = True
|
|
|
|
|
+ elif score > prev_score:
|
|
|
|
|
+ should_include = True
|
|
|
|
|
+ else:
|
|
|
|
|
+ should_include = False
|
|
|
|
|
+ if not should_include:
|
|
|
continue
|
|
continue
|
|
|
|
|
+ prev_node_best[mp] = (score, is_fully)
|
|
|
base = dict(topic_by_name.get(mp, {"name": mp, "point": "", "dimension": "", "root_source": "", "root_sources_desc": ""}))
|
|
base = dict(topic_by_name.get(mp, {"name": mp, "point": "", "dimension": "", "root_source": "", "root_sources_desc": ""}))
|
|
|
base["level"] = round_num
|
|
base["level"] = round_num
|
|
|
base.setdefault("original_word", base.get("name", mp))
|
|
base.setdefault("original_word", base.get("name", mp))
|
|
@@ -558,9 +555,9 @@ if __name__ == "__main__":
|
|
|
account_name="家有大志"
|
|
account_name="家有大志"
|
|
|
|
|
|
|
|
items = [
|
|
items = [
|
|
|
- {"post_id":"68fb6a5c000000000302e5de","log_id":"20260318220540"},
|
|
|
|
|
- {"post_id":"69185d49000000000d00f94e","log_id":"20260318221136"},
|
|
|
|
|
- {"post_id":"6921937a000000001b0278d1","log_id":"20260318221538"}
|
|
|
|
|
|
|
+ {"post_id":"68fb6a5c000000000302e5de","log_id":"20260319134630"},
|
|
|
|
|
+ {"post_id":"69185d49000000000d00f94e","log_id":"20260319140603"},
|
|
|
|
|
+ {"post_id":"6921937a000000001b0278d1","log_id":"20260319141843"}
|
|
|
]
|
|
]
|
|
|
for item in items:
|
|
for item in items:
|
|
|
post_id = item["post_id"]
|
|
post_id = item["post_id"]
|