1 неделя назад · 76d0ff30bd
--- a/examples/mode_workflow/stages/search_eval.py
+++ b/examples/mode_workflow/stages/search_eval.py
@@ -95,8 +95,6 @@ async def run(args):
 
															         if n:
														
 
															             print(f"🎙️  视频转写 {n} 条")
														
 
															-    table = "search_tools" if args.mode_type == "工具" else "search_process"
														
 
															-
														
 
															     # ── 评估去重 ────────────────────────────────────────────────────────────────
														
 
															     # 评估的相关性含两子项:「和内容制作知识相关」(与 query 无关)与「和 query 相关」
														
 
															     # (query 专属)。同帖在别的相似 query 下评过时,质量/通用相关/时效等 query 无关分
														
@@ -107,7 +105,7 @@ async def run(args):
 
															         prior = {}
														
 
															         if not args.force_eval:
														
 
															             for s in sources:
														
 
															-                e = db.fetch_existing_eval(s["case_id"], table)
														
 
															+                e = db.fetch_existing_eval_any(s["case_id"])
														
 
															                 if e:
														
 
															                     prior[s["case_id"]] = e
														
 
															         fresh = [s for s in sources if s["case_id"] not in prior]
														
@@ -148,10 +146,22 @@ async def run(args):
 
															     for s in sources:
														
 
															         s.pop("_image_data_urls", None)
														
 
															-    n = db.upsert_search_posts(args.query_id, args.query, sources, table=table)
														
 
															-    print(f"🗄️  {table} 入库 {n} 行 · 方向 {args.mode_type or '工序'} · 评估成本 ${cost:.4f}")
														
 
															-
														
 
															-    out_dir = MW / "runs" / table
														
 
															+    # 按评估标签路由:工序/能力→search_process,工具→search_tools,两者都含→两表都写同一 blob。
														
 
															+    # 评估只做一次(统一一套),双表只是多写一行,不重复打分。
														
 
															+    routed = {"search_process": [], "search_tools": []}
														
 
															+    for s in sources:
														
 
															+        kt = (s.get("llm_evaluation") or {}).get("知识类型") or []
														
 
															+        for t in db.route_tables(kt):
														
 
															+            routed[t].append(s)
														
 
															+    total = 0
														
 
															+    for t, group in routed.items():
														
 
															+        if group:
														
 
															+            n = db.upsert_search_posts(args.query_id, args.query, group, table=t)
														
 
															+            total += n
														
 
															+            print(f"🗄️  {t} 入库 {n} 行")
														
 
															+    print(f"📊 评估成本 ${cost:.4f} · 共写 {total} 行(双表含同帖重复)")
														
 
															+
														
 
															+    out_dir = MW / "runs" / "search"
														
 
															     out_dir.mkdir(parents=True, exist_ok=True)
														
 
															     (out_dir / f"{args.query_id}.json").write_text(json.dumps({
														
 
															         "query_id": args.query_id, "query": args.query, "phrasings": phrasings,
														
@@ -166,7 +176,7 @@ def main():
 
															     p.add_argument("--query", required=True, help="基准 query(评估锚点)")
														
 
															     p.add_argument("--synonyms", default="", help="逗号分隔的同义措辞(可选)")
														
 
															     p.add_argument("--mode-type", default="", choices=["", "工序", "工具"],
														
 
															-                   help="解构方向,决定写哪张表(工具 → search_tools;其余 → search_process)")
														
 
															+                   help="(兼容保留,已不决定路由)落表现由评估的 知识类型 标签自动路由")
														
 
															     p.add_argument("--platforms", default="xhs,gzh")
														
 
															     p.add_argument("--max-count", type=int, default=10)
														
 
															     p.add_argument("--eval-model", default=DEFAULT_EVAL_MODEL, choices=list(EVAL_MODELS))