| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- # -*- coding: utf-8 -*-
- """一次性导入:fixed_query_eval/runs_full/*/form_A.json → 搜索表。
- 幂等(upsert),可反复执行。默认导入 search_process(工序方向)。
- 用法:
- python import_history.py
- python import_history.py --runs-dir /path/to/runs_full --table search_tools
- """
- import argparse
- import json
- import sys
- from pathlib import Path
- HERE = Path(__file__).resolve().parent
- sys.path.insert(0, str(HERE))
- import db
- DEFAULT_RUNS = (HERE.parent / "process_pipeline" / "script" / "search_eval"
- / "fixed_query_eval" / "runs_full")
- def main():
- p = argparse.ArgumentParser(description="历史搜索结果导入搜索表")
- p.add_argument("--runs-dir", default=str(DEFAULT_RUNS))
- p.add_argument("--table", default="search_process",
- choices=["search_process", "search_tools"])
- args = p.parse_args()
- runs = Path(args.runs_dir)
- files = sorted(runs.glob("q*/form_A.json"))
- if not files:
- print(f"❌ {runs} 下没有 q*/form_A.json"); return 1
- total = 0
- for f in files:
- data = json.loads(f.read_text(encoding="utf-8"))
- qid = f.parent.name
- results = data.get("results", [])
- n = db.upsert_search_posts(qid, data.get("query") or data.get("original_q"),
- results, table=args.table)
- print(f" {qid}: 文件 {len(results)} 条 → 入库 {n} 条")
- total += n
- print(f"✅ 共导入 {total} 条 → {args.table}")
- return 0
- if __name__ == "__main__":
- raise SystemExit(main())
|