import_history.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. # -*- coding: utf-8 -*-
  2. """一次性导入:fixed_query_eval/runs_full/*/form_A.json → 搜索表。
  3. 幂等(upsert),可反复执行。默认导入 search_process(工序方向)。
  4. 用法:
  5. python import_history.py
  6. python import_history.py --runs-dir /path/to/runs_full --table search_tools
  7. """
  8. import argparse
  9. import json
  10. import sys
  11. from pathlib import Path
  12. HERE = Path(__file__).resolve().parent
  13. sys.path.insert(0, str(HERE))
  14. import db
  15. DEFAULT_RUNS = (HERE.parent / "process_pipeline" / "script" / "search_eval"
  16. / "fixed_query_eval" / "runs_full")
  17. def main():
  18. p = argparse.ArgumentParser(description="历史搜索结果导入搜索表")
  19. p.add_argument("--runs-dir", default=str(DEFAULT_RUNS))
  20. p.add_argument("--table", default="search_process",
  21. choices=["search_process", "search_tools"])
  22. args = p.parse_args()
  23. runs = Path(args.runs_dir)
  24. files = sorted(runs.glob("q*/form_A.json"))
  25. if not files:
  26. print(f"❌ {runs} 下没有 q*/form_A.json"); return 1
  27. total = 0
  28. for f in files:
  29. data = json.loads(f.read_text(encoding="utf-8"))
  30. qid = f.parent.name
  31. results = data.get("results", [])
  32. n = db.upsert_search_posts(qid, data.get("query") or data.get("original_q"),
  33. results, table=args.table)
  34. print(f" {qid}: 文件 {len(results)} 条 → 入库 {n} 条")
  35. total += n
  36. print(f"✅ 共导入 {total} 条 → {args.table}")
  37. return 0
  38. if __name__ == "__main__":
  39. raise SystemExit(main())