|
|
@@ -146,7 +146,8 @@ def build_derivation_result(
|
|
|
result = []
|
|
|
derived_names_so_far: set[str] = set()
|
|
|
fully_derived_names_so_far: set[str] = set() # 已出现过 is_fully_derived=true 的选题点
|
|
|
- best_score_by_name: dict[str, tuple[float, bool]] = {} # name -> (matched_score, is_fully_derived),遇 is_fully=true 时更新
|
|
|
+ # name -> (matched_score, is_fully_derived),一旦 is_fully_derived=True,后续轮次不再更新 matched_score
|
|
|
+ best_score_by_name: dict[str, tuple[float, bool]] = {}
|
|
|
|
|
|
for i, (derivation, eval_data) in enumerate(zip(derivations, evals)):
|
|
|
round_num = derivation.get("round", i + 1)
|
|
|
@@ -175,7 +176,10 @@ def build_derivation_result(
|
|
|
elif name not in fully_derived_names_so_far and is_fully:
|
|
|
new_derived_names.add(name)
|
|
|
|
|
|
- # 更新推导集合与 best:首次出现或本轮 is_fully=true 时更新 best
|
|
|
+ # 更新推导集合与 best:
|
|
|
+ # - 首次出现时写入
|
|
|
+ # - 若尚未 fully 且本轮 fully,则更新为 fully,并锁定,不再被后续轮次覆盖
|
|
|
+ # - 若尚未 fully 且本轮仍为部分推导,可用更高分数更新
|
|
|
derived_names_so_far |= matched_post_points
|
|
|
for name in matched_post_points:
|
|
|
val = this_round_scores.get(name)
|
|
|
@@ -184,8 +188,18 @@ def build_derivation_result(
|
|
|
score, is_fully = val
|
|
|
if name not in best_score_by_name:
|
|
|
best_score_by_name[name] = (score, is_fully)
|
|
|
- elif is_fully:
|
|
|
- best_score_by_name[name] = (score, is_fully)
|
|
|
+ else:
|
|
|
+ prev_score, prev_fully = best_score_by_name[name]
|
|
|
+ # 已经 fully 的节点,后续轮次不再更新 matched_score
|
|
|
+ if prev_fully:
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ if is_fully:
|
|
|
+ best_score_by_name[name] = (score, True)
|
|
|
+ else:
|
|
|
+ # 都是部分推导时,可以用更高分覆盖
|
|
|
+ if score > prev_score:
|
|
|
+ best_score_by_name[name] = (score, False)
|
|
|
if is_fully:
|
|
|
fully_derived_names_so_far.add(name)
|
|
|
|
|
|
@@ -320,9 +334,6 @@ def build_visualize_edges(
|
|
|
if k not in match_by_round_output:
|
|
|
match_by_round_output[k] = val
|
|
|
|
|
|
- # 按 (round_num, mp) 收集节点候选,同轮同节点保留 matched_score 最高的一条
|
|
|
- node_candidates: dict[tuple[int, str], dict] = {} # (round_num, mp) -> node_dict (含 score, is_fully_derived)
|
|
|
-
|
|
|
def get_match(round_num: int, path_id: int | None, item_id: int | None, out_item: str) -> tuple[str, str, float, bool] | None:
|
|
|
if path_id is not None and item_id is not None:
|
|
|
v = match_by_path_item.get((round_num, path_id, item_id))
|
|
|
@@ -330,13 +341,41 @@ def build_visualize_edges(
|
|
|
return v
|
|
|
return match_by_round_output.get((round_num, out_item))
|
|
|
|
|
|
+ # 第一遍:按 (round_num, mp) 聚合节点最佳信息(不考虑边是否最终保留)
|
|
|
+ # (round_num, mp) -> (score, is_fully_derived, derivation_output_point, method)
|
|
|
+ best_node_info_by_round_mp: dict[tuple[int, str], tuple[float, bool, str, str]] = {}
|
|
|
+ for round_idx, derivation in enumerate(derivations):
|
|
|
+ round_num = derivation.get("round", round_idx + 1)
|
|
|
+ for dr in derivation.get("derivation_results") or []:
|
|
|
+ output_list = dr.get("output") or []
|
|
|
+ path_id = dr.get("id")
|
|
|
+ for i, out_item in enumerate(output_list):
|
|
|
+ item_id = i + 1
|
|
|
+ v = get_match(round_num, path_id, item_id, out_item)
|
|
|
+ if not v:
|
|
|
+ continue
|
|
|
+ mp, _reason, score, is_fully = v
|
|
|
+ key = (round_num, mp)
|
|
|
+ prev = best_node_info_by_round_mp.get(key)
|
|
|
+ if prev is None or score > prev[0]:
|
|
|
+ best_node_info_by_round_mp[key] = (score, bool(is_fully), out_item, dr.get("method", ""))
|
|
|
+
|
|
|
edge_list = []
|
|
|
round_output_seen: set[tuple[int, str]] = set() # (round_num, node_name) 本轮已作为某边的 output
|
|
|
best_score_by_node: dict[str, float] = {} # node_name -> 已出现过的最高 matched_score
|
|
|
fully_derived_nodes: set[str] = set()
|
|
|
+ current_round: int | None = None
|
|
|
|
|
|
for round_idx, derivation in enumerate(derivations):
|
|
|
round_num = derivation.get("round", round_idx + 1)
|
|
|
+ if current_round is None:
|
|
|
+ current_round = round_num
|
|
|
+ elif round_num != current_round:
|
|
|
+ # 一轮结束后,将本轮 is_fully_derived=true 的节点加入 fully_derived_nodes,用于后续轮次过滤
|
|
|
+ for (rn, name), (score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
|
|
|
+ if rn == current_round and is_fully:
|
|
|
+ fully_derived_nodes.add(name)
|
|
|
+ current_round = round_num
|
|
|
for dr in derivation.get("derivation_results") or []:
|
|
|
output_list = dr.get("output") or []
|
|
|
path_id = dr.get("id")
|
|
|
@@ -352,7 +391,8 @@ def build_visualize_edges(
|
|
|
if not matched:
|
|
|
continue
|
|
|
|
|
|
- # 同一轮内 output 节点不重复;若前面轮次该节点匹配分更高则本轮不保留
|
|
|
+ # 同一轮内 output 节点不重复;若前面轮次该节点匹配分更高则本轮不保留;
|
|
|
+ # 并且只保留与 node_list 中该轮该节点的最高分记录一致的边
|
|
|
output_names_this_edge = []
|
|
|
for mp, reason, score, is_fully, out_item in matched:
|
|
|
if (round_num, mp) in round_output_seen:
|
|
|
@@ -361,6 +401,9 @@ def build_visualize_edges(
|
|
|
continue
|
|
|
if score <= best_score_by_node.get(mp, -1.0):
|
|
|
continue
|
|
|
+ best_info = best_node_info_by_round_mp.get((round_num, mp))
|
|
|
+ if not best_info or score < best_info[0]:
|
|
|
+ continue
|
|
|
output_names_this_edge.append((mp, reason, score, is_fully, out_item))
|
|
|
|
|
|
if not output_names_this_edge:
|
|
|
@@ -370,20 +413,6 @@ def build_visualize_edges(
|
|
|
round_output_seen.add((round_num, mp))
|
|
|
best_score_by_node[mp] = max(best_score_by_node.get(mp, -1.0), score)
|
|
|
|
|
|
- # 节点候选:同轮同节点保留匹配分更高的
|
|
|
- for mp, _reason, score, is_fully, out_item in output_names_this_edge:
|
|
|
- key = (round_num, mp)
|
|
|
- if key not in node_candidates or node_candidates[key].get("matched_score", 0) < score:
|
|
|
- node = dict(topic_by_name.get(mp, {"name": mp, "point": "", "dimension": "", "root_source": "", "root_sources_desc": ""}))
|
|
|
- node["level"] = round_num
|
|
|
- node.setdefault("original_word", node.get("name", mp))
|
|
|
- node["derivation_type"] = dr.get("method", "")
|
|
|
- node["matched_score"] = score
|
|
|
- node["is_fully_derived"] = is_fully
|
|
|
- # 对应评估中的 derivation_output_point
|
|
|
- node["derivation_output_point"] = out_item
|
|
|
- node_candidates[key] = node
|
|
|
-
|
|
|
input_data = dr.get("input") or {}
|
|
|
derived_nodes = input_data.get("derived_nodes") or []
|
|
|
tree_nodes = input_data.get("tree_nodes") or []
|
|
|
@@ -425,11 +454,38 @@ def build_visualize_edges(
|
|
|
"detail": detail,
|
|
|
})
|
|
|
|
|
|
- for (rn, name), nd in node_candidates.items():
|
|
|
- if rn == round_num and nd.get("is_fully_derived"):
|
|
|
+ # 处理最后一轮的 fully_derived_nodes
|
|
|
+ if current_round is not None:
|
|
|
+ for (rn, name), (score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
|
|
|
+ if rn == current_round and is_fully:
|
|
|
fully_derived_nodes.add(name)
|
|
|
|
|
|
- node_list = list(node_candidates.values())
|
|
|
+ # 根据按 (round, mp) 聚合后的最佳信息生成 node_list
|
|
|
+ # 规则:若某节点在某轮已经 is_fully_derived=True,则之后轮次即便分数更高也不再保留该节点
|
|
|
+ first_full_round_by_name: dict[str, int] = {}
|
|
|
+ for (round_num, mp), (_score, is_fully, _out_item, _method) in best_node_info_by_round_mp.items():
|
|
|
+ if not is_fully:
|
|
|
+ continue
|
|
|
+ prev = first_full_round_by_name.get(mp)
|
|
|
+ if prev is None or round_num < prev:
|
|
|
+ first_full_round_by_name[mp] = round_num
|
|
|
+
|
|
|
+ node_list: list[dict] = []
|
|
|
+ for (round_num, mp), (score, is_fully, out_item, method) in best_node_info_by_round_mp.items():
|
|
|
+ full_round = first_full_round_by_name.get(mp)
|
|
|
+ # 若存在更早的 fully 轮次,且当前轮次在其之后,则不再保留
|
|
|
+ if full_round is not None and round_num > full_round:
|
|
|
+ continue
|
|
|
+ base = dict(topic_by_name.get(mp, {"name": mp, "point": "", "dimension": "", "root_source": "", "root_sources_desc": ""}))
|
|
|
+ base["level"] = round_num
|
|
|
+ base.setdefault("original_word", base.get("name", mp))
|
|
|
+ base["derivation_type"] = method
|
|
|
+ base["matched_score"] = score
|
|
|
+ base["is_fully_derived"] = is_fully
|
|
|
+ base["derivation_output_point"] = out_item
|
|
|
+ node_list.append(base)
|
|
|
+
|
|
|
+ node_list.sort(key=lambda n: (n.get("level", 0), str(n.get("name", ""))))
|
|
|
return node_list, edge_list
|
|
|
|
|
|
|
|
|
@@ -502,9 +558,9 @@ if __name__ == "__main__":
|
|
|
account_name="家有大志"
|
|
|
|
|
|
items = [
|
|
|
- {"post_id":"68fb6a5c000000000302e5de","log_id":"20260317214307"},
|
|
|
- {"post_id":"69185d49000000000d00f94e","log_id":"20260317214841"},
|
|
|
- {"post_id":"6921937a000000001b0278d1","log_id":"20260317215616"}
|
|
|
+ {"post_id":"68fb6a5c000000000302e5de","log_id":"20260318220540"},
|
|
|
+ {"post_id":"69185d49000000000d00f94e","log_id":"20260318221136"},
|
|
|
+ {"post_id":"6921937a000000001b0278d1","log_id":"20260318221538"}
|
|
|
]
|
|
|
for item in items:
|
|
|
post_id = item["post_id"]
|