1 tydzień temu · 76d0ff30bd
--- a/examples/mode_workflow/stages/search_eval.py
+++ b/examples/mode_workflow/stages/search_eval.py
@@ -95,8 +95,6 @@ async def run(args):
 
				         if n:
			
 
				             print(f"🎙️  视频转写 {n} 条")
			
 
				 
			
 
				-    table = "search_tools" if args.mode_type == "工具" else "search_process"
			
 
				-
			
 
				     # ── 评估去重 ────────────────────────────────────────────────────────────────
			
 
				     # 评估的相关性含两子项:「和内容制作知识相关」(与 query 无关)与「和 query 相关」
			
 
				     # (query 专属)。同帖在别的相似 query 下评过时,质量/通用相关/时效等 query 无关分
			
@@ -107,7 +105,7 @@ async def run(args):
 
				         prior = {}
			
 
				         if not args.force_eval:
			
 
				             for s in sources:
			
 
				-                e = db.fetch_existing_eval(s["case_id"], table)
			
 
				+                e = db.fetch_existing_eval_any(s["case_id"])
			
 
				                 if e:
			
 
				                     prior[s["case_id"]] = e
			
 
				         fresh = [s for s in sources if s["case_id"] not in prior]
			
@@ -148,10 +146,22 @@ async def run(args):
 
				     for s in sources:
			
 
				         s.pop("_image_data_urls", None)
			
 
				 
			
 
				-    n = db.upsert_search_posts(args.query_id, args.query, sources, table=table)
			
 
				-    print(f"🗄️  {table} 入库 {n} 行 · 方向 {args.mode_type or '工序'} · 评估成本 ${cost:.4f}")
			
 
				-
			
 
				-    out_dir = MW / "runs" / table
			
 
				+    # 按评估标签路由:工序/能力→search_process,工具→search_tools,两者都含→两表都写同一 blob。
			
 
				+    # 评估只做一次(统一一套),双表只是多写一行,不重复打分。
			
 
				+    routed = {"search_process": [], "search_tools": []}
			
 
				+    for s in sources:
			
 
				+        kt = (s.get("llm_evaluation") or {}).get("知识类型") or []
			
 
				+        for t in db.route_tables(kt):
			
 
				+            routed[t].append(s)
			
 
				+    total = 0
			
 
				+    for t, group in routed.items():
			
 
				+        if group:
			
 
				+            n = db.upsert_search_posts(args.query_id, args.query, group, table=t)
			
 
				+            total += n
			
 
				+            print(f"🗄️  {t} 入库 {n} 行")
			
 
				+    print(f"📊 评估成本 ${cost:.4f} · 共写 {total} 行(双表含同帖重复)")
			
 
				+
			
 
				+    out_dir = MW / "runs" / "search"
			
 
				     out_dir.mkdir(parents=True, exist_ok=True)
			
 
				     (out_dir / f"{args.query_id}.json").write_text(json.dumps({
			
 
				         "query_id": args.query_id, "query": args.query, "phrasings": phrasings,
			
@@ -166,7 +176,7 @@ def main():
 
				     p.add_argument("--query", required=True, help="基准 query(评估锚点)")
			
 
				     p.add_argument("--synonyms", default="", help="逗号分隔的同义措辞(可选)")
			
 
				     p.add_argument("--mode-type", default="", choices=["", "工序", "工具"],
			
 
				-                   help="解构方向,决定写哪张表(工具 → search_tools;其余 → search_process)")
			
 
				+                   help="(兼容保留,已不决定路由)落表现由评估的 知识类型 标签自动路由")
			
 
				     p.add_argument("--platforms", default="xhs,gzh")
			
 
				     p.add_argument("--max-count", type=int, default=10)
			
 
				     p.add_argument("--eval-model", default=DEFAULT_EVAL_MODEL, choices=list(EVAL_MODELS))