|
|
@@ -0,0 +1,45 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""一次性导入:fixed_query_eval/runs_full/*/form_A.json → search_data 表。
|
|
|
+幂等(upsert),可反复执行。
|
|
|
+
|
|
|
+用法:
|
|
|
+ python import_history.py
|
|
|
+ python import_history.py --runs-dir /path/to/runs_full
|
|
|
+"""
|
|
|
+import argparse
|
|
|
+import json
|
|
|
+import sys
|
|
|
+from pathlib import Path
|
|
|
+
|
|
|
+HERE = Path(__file__).resolve().parent
|
|
|
+sys.path.insert(0, str(HERE))
|
|
|
+import db
|
|
|
+
|
|
|
+DEFAULT_RUNS = (HERE.parent / "process_pipeline" / "script" / "search_eval"
|
|
|
+ / "fixed_query_eval" / "runs_full")
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ p = argparse.ArgumentParser(description="历史搜索结果导入 search_data")
|
|
|
+ p.add_argument("--runs-dir", default=str(DEFAULT_RUNS))
|
|
|
+ args = p.parse_args()
|
|
|
+
|
|
|
+ runs = Path(args.runs_dir)
|
|
|
+ files = sorted(runs.glob("q*/form_A.json"))
|
|
|
+ if not files:
|
|
|
+ print(f"❌ {runs} 下没有 q*/form_A.json"); return 1
|
|
|
+
|
|
|
+ total = 0
|
|
|
+ for f in files:
|
|
|
+ data = json.loads(f.read_text(encoding="utf-8"))
|
|
|
+ qid = f.parent.name
|
|
|
+ results = data.get("results", [])
|
|
|
+ n = db.upsert_search_posts(qid, data.get("query") or data.get("original_q"), results)
|
|
|
+ print(f" {qid}: 文件 {len(results)} 条 → 入库 {n} 条")
|
|
|
+ total += n
|
|
|
+ print(f"✅ 共导入 {total} 条 → search_data")
|
|
|
+ return 0
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ raise SystemExit(main())
|