# -*- coding: utf-8 -*- """把 runs_full/qXXXX 里 idx 错位的 q 目录原地搬到正确 idx 位。 错位成因:旧 batch_3forms.py 默认吃 high_priority_queries.json(468 条),写入 runs_full 时用的是那个文件的 idx。后来换默认到 high_priority_queries_full.json(2808 条), 两套 idx 表是不同的,于是 runs_full/q0002 装的 original_q 在新 full 表里其实属于 另一个 idx。本脚本读每个 q 的 form_A.json original_q,查新 full 表的等值 q, mv 到正确目录 q{正确 idx:04d}/。 规则: - 已在正确位的: 跳过 - 错位但目标位空闲: shutil.move 改名(顺手更新 _manifest.json) - 错位但目标位已占用: 不动,写入 _conflicts 段供人工裁决 - original_q 在新 full 表里找不到: 标记 unmatched(应该不会发生,前面 build_runs_mapping 已过滤) """ import json import re import shutil from pathlib import Path HERE = Path(__file__).parent SEARCH_EVAL = HERE.parent RUNS_FULL = SEARCH_EVAL / "runs_full" FULL = HERE / "high_priority_queries_full.json" MANIFEST = RUNS_FULL / "_manifest.json" def _qnum(name: str) -> int: m = re.search(r"\d+", name) return int(m.group()) if m else -1 def load_original_q(qd: Path): for fk in ("A", "B", "C"): f = qd / f"form_{fk}.json" if not f.exists(): continue try: d = json.load(open(f, encoding="utf-8")) oq = d.get("original_q") or d.get("query") or "" if oq: return oq, fk except Exception: continue return "", "" def main(): full = json.load(open(FULL, encoding="utf-8"))["queries"] q_to_idx = {item["q"]: i for i, item in enumerate(full)} q_dirs = sorted([d for d in RUNS_FULL.iterdir() if d.is_dir() and d.name.startswith("q")], key=lambda p: _qnum(p.name)) already_ok, moved, conflicts, unmatched = [], [], [], [] # Plan: 先扫描决策,再执行;避免边扫边动导致后续判断错乱 plan = [] for qd in q_dirs: cur_idx = _qnum(qd.name) oq, src_form = load_original_q(qd) if not oq: unmatched.append({"name": qd.name, "reason": "form_*.json 缺失或 original_q 空"}) continue correct_idx = q_to_idx.get(oq) if correct_idx is None: unmatched.append({"name": qd.name, "original_q": oq, "reason": "在新 full 表中找不到等值 q(可能是历史模态)"}) continue if correct_idx == cur_idx: already_ok.append((qd.name, oq)) continue plan.append((qd, cur_idx, correct_idx, oq)) # 两轮执行:先把没冲突的全搬走,再回头看冲突(可能搬走后冲突就消解了) pending = list(plan) progress = True while pending and progress: progress = False next_pending = [] for qd, cur_idx, correct_idx, oq in pending: dst = RUNS_FULL / f"q{correct_idx:04d}" if dst.exists(): next_pending.append((qd, cur_idx, correct_idx, oq)) continue shutil.move(str(qd), str(dst)) moved.append({"from": qd.name, "to": dst.name, "original_q": oq}) progress = True pending = next_pending # 仍有冲突的:目标位真的被占用且非空(让人工看) for qd, cur_idx, correct_idx, oq in pending: dst = RUNS_FULL / f"q{correct_idx:04d}" dst_oq, _ = load_original_q(dst) if dst.exists() else ("", "") conflicts.append({ "src": qd.name, "src_idx": cur_idx, "src_original_q": oq, "dst": dst.name, "dst_idx": correct_idx, "dst_original_q": dst_oq, }) # 更新 manifest:用搬完后的 runs_full 状态重写 new_manifest_items = [] for qd in sorted([d for d in RUNS_FULL.iterdir() if d.is_dir() and d.name.startswith("q")], key=lambda p: _qnum(p.name)): oq, _ = load_original_q(qd) idx = _qnum(qd.name) if idx >= 0 and idx < len(full): item = full[idx] new_manifest_items.append({ "qdir": qd.name, "full_idx": idx, "original_q": oq, "cell_idx": idx // 18, "lens": item["lens"], "constraint_value": (item.get("constraint") or {}).get("value") or "无约束", }) MANIFEST.write_text(json.dumps({ "_doc": "runs_full/qXXXX 的当前状态快照;XXXX = high_priority_queries_full.json idx 4 位 zero-pad", "items": new_manifest_items, }, ensure_ascii=False, indent=2), encoding="utf-8") print(f"=== runs_full 原地重映射结果 ===") print(f" 原本就对的: {len(already_ok)}") print(f" 移动到位: {len(moved)}") print(f" 冲突待人工: {len(conflicts)}") print(f" 未匹配: {len(unmatched)}") print(f"→ manifest 已重写: {MANIFEST}") print() if moved[:8]: print("=== 已搬动(抽样)===") for m in moved[:8]: print(f" {m['from']:<7} → {m['to']:<7} [{m['original_q']}]") if conflicts: print() print("=== ⚠️ 冲突(目标位被别的 original_q 占住,未搬)===") for c in conflicts: print(f" {c['src']} (idx={c['src_idx']}, q={c['src_original_q']!r})") print(f" -> 想去 {c['dst']} (idx={c['dst_idx']}, 但被 {c['dst_original_q']!r} 占)") if unmatched: print() print("=== ⚠️ 在新 full 中找不到 ===") for u in unmatched: print(f" {u}") if __name__ == "__main__": main()