| 123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- # -*- coding: utf-8 -*-
- """一次性导入:fixed_query_eval/runs_full/*/form_A.json → search_data 表。
- 幂等(upsert),可反复执行。
- 用法:
- python import_history.py
- python import_history.py --runs-dir /path/to/runs_full
- """
- import argparse
- import json
- import sys
- from pathlib import Path
- HERE = Path(__file__).resolve().parent
- sys.path.insert(0, str(HERE))
- import db
- DEFAULT_RUNS = (HERE.parent / "process_pipeline" / "script" / "search_eval"
- / "fixed_query_eval" / "runs_full")
- def main():
- p = argparse.ArgumentParser(description="历史搜索结果导入 search_data")
- p.add_argument("--runs-dir", default=str(DEFAULT_RUNS))
- args = p.parse_args()
- runs = Path(args.runs_dir)
- files = sorted(runs.glob("q*/form_A.json"))
- if not files:
- print(f"❌ {runs} 下没有 q*/form_A.json"); return 1
- total = 0
- for f in files:
- data = json.loads(f.read_text(encoding="utf-8"))
- qid = f.parent.name
- results = data.get("results", [])
- n = db.upsert_search_posts(qid, data.get("query") or data.get("original_q"), results)
- print(f" {qid}: 文件 {len(results)} 条 → 入库 {n} 条")
- total += n
- print(f"✅ 共导入 {total} 条 → search_data")
- return 0
- if __name__ == "__main__":
- raise SystemExit(main())
|