|
@@ -95,8 +95,6 @@ async def run(args):
|
|
|
if n:
|
|
if n:
|
|
|
print(f"🎙️ 视频转写 {n} 条")
|
|
print(f"🎙️ 视频转写 {n} 条")
|
|
|
|
|
|
|
|
- table = "search_tools" if args.mode_type == "工具" else "search_process"
|
|
|
|
|
-
|
|
|
|
|
# ── 评估去重 ────────────────────────────────────────────────────────────────
|
|
# ── 评估去重 ────────────────────────────────────────────────────────────────
|
|
|
# 评估的相关性含两子项:「和内容制作知识相关」(与 query 无关)与「和 query 相关」
|
|
# 评估的相关性含两子项:「和内容制作知识相关」(与 query 无关)与「和 query 相关」
|
|
|
# (query 专属)。同帖在别的相似 query 下评过时,质量/通用相关/时效等 query 无关分
|
|
# (query 专属)。同帖在别的相似 query 下评过时,质量/通用相关/时效等 query 无关分
|
|
@@ -107,7 +105,7 @@ async def run(args):
|
|
|
prior = {}
|
|
prior = {}
|
|
|
if not args.force_eval:
|
|
if not args.force_eval:
|
|
|
for s in sources:
|
|
for s in sources:
|
|
|
- e = db.fetch_existing_eval(s["case_id"], table)
|
|
|
|
|
|
|
+ e = db.fetch_existing_eval_any(s["case_id"])
|
|
|
if e:
|
|
if e:
|
|
|
prior[s["case_id"]] = e
|
|
prior[s["case_id"]] = e
|
|
|
fresh = [s for s in sources if s["case_id"] not in prior]
|
|
fresh = [s for s in sources if s["case_id"] not in prior]
|
|
@@ -148,10 +146,22 @@ async def run(args):
|
|
|
for s in sources:
|
|
for s in sources:
|
|
|
s.pop("_image_data_urls", None)
|
|
s.pop("_image_data_urls", None)
|
|
|
|
|
|
|
|
- n = db.upsert_search_posts(args.query_id, args.query, sources, table=table)
|
|
|
|
|
- print(f"🗄️ {table} 入库 {n} 行 · 方向 {args.mode_type or '工序'} · 评估成本 ${cost:.4f}")
|
|
|
|
|
-
|
|
|
|
|
- out_dir = MW / "runs" / table
|
|
|
|
|
|
|
+ # 按评估标签路由:工序/能力→search_process,工具→search_tools,两者都含→两表都写同一 blob。
|
|
|
|
|
+ # 评估只做一次(统一一套),双表只是多写一行,不重复打分。
|
|
|
|
|
+ routed = {"search_process": [], "search_tools": []}
|
|
|
|
|
+ for s in sources:
|
|
|
|
|
+ kt = (s.get("llm_evaluation") or {}).get("知识类型") or []
|
|
|
|
|
+ for t in db.route_tables(kt):
|
|
|
|
|
+ routed[t].append(s)
|
|
|
|
|
+ total = 0
|
|
|
|
|
+ for t, group in routed.items():
|
|
|
|
|
+ if group:
|
|
|
|
|
+ n = db.upsert_search_posts(args.query_id, args.query, group, table=t)
|
|
|
|
|
+ total += n
|
|
|
|
|
+ print(f"🗄️ {t} 入库 {n} 行")
|
|
|
|
|
+ print(f"📊 评估成本 ${cost:.4f} · 共写 {total} 行(双表含同帖重复)")
|
|
|
|
|
+
|
|
|
|
|
+ out_dir = MW / "runs" / "search"
|
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
(out_dir / f"{args.query_id}.json").write_text(json.dumps({
|
|
(out_dir / f"{args.query_id}.json").write_text(json.dumps({
|
|
|
"query_id": args.query_id, "query": args.query, "phrasings": phrasings,
|
|
"query_id": args.query_id, "query": args.query, "phrasings": phrasings,
|
|
@@ -166,7 +176,7 @@ def main():
|
|
|
p.add_argument("--query", required=True, help="基准 query(评估锚点)")
|
|
p.add_argument("--query", required=True, help="基准 query(评估锚点)")
|
|
|
p.add_argument("--synonyms", default="", help="逗号分隔的同义措辞(可选)")
|
|
p.add_argument("--synonyms", default="", help="逗号分隔的同义措辞(可选)")
|
|
|
p.add_argument("--mode-type", default="", choices=["", "工序", "工具"],
|
|
p.add_argument("--mode-type", default="", choices=["", "工序", "工具"],
|
|
|
- help="解构方向,决定写哪张表(工具 → search_tools;其余 → search_process)")
|
|
|
|
|
|
|
+ help="(兼容保留,已不决定路由)落表现由评估的 知识类型 标签自动路由")
|
|
|
p.add_argument("--platforms", default="xhs,gzh")
|
|
p.add_argument("--platforms", default="xhs,gzh")
|
|
|
p.add_argument("--max-count", type=int, default=10)
|
|
p.add_argument("--max-count", type=int, default=10)
|
|
|
p.add_argument("--eval-model", default=DEFAULT_EVAL_MODEL, choices=list(EVAL_MODELS))
|
|
p.add_argument("--eval-model", default=DEFAULT_EVAL_MODEL, choices=list(EVAL_MODELS))
|