Просмотр исходного кода

feat(mode_workflow): search_data 增加 mode_type 字段区分工序/工具方向

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
刘文武 4 дней назад
Родитель
Сommit
759f8b1433

+ 16 - 5
examples/mode_workflow/db.py

@@ -64,6 +64,7 @@ CREATE TABLE IF NOT EXISTS search_data (
   quality_grade VARCHAR(8)    NULL,
   quality_grade VARCHAR(8)    NULL,
   found_by      JSON          NULL COMMENT '命中的措辞数组',
   found_by      JSON          NULL COMMENT '命中的措辞数组',
   knowledge_type JSON         NULL COMMENT '["能力","工序","工具"] 子集',
   knowledge_type JSON         NULL COMMENT '["能力","工序","工具"] 子集',
+  mode_type     VARCHAR(16)   NULL COMMENT '该 query 的解构方向:工序/工具(空=通用)',
   overall_score FLOAT         NULL COMMENT '(相关均值+质量均值)/2',
   overall_score FLOAT         NULL COMMENT '(相关均值+质量均值)/2',
   llm_evaluation JSON         NULL COMMENT '评估全量 blob',
   llm_evaluation JSON         NULL COMMENT '评估全量 blob',
   created_at    TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
   created_at    TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
@@ -137,6 +138,12 @@ def init_tables():
             cur.execute(DDL_SEARCH)
             cur.execute(DDL_SEARCH)
             cur.execute(DDL_PROCESS)
             cur.execute(DDL_PROCESS)
             cur.execute(DDL_TOOLS)
             cur.execute(DDL_TOOLS)
+            # 迁移:旧表补 mode_type 列(CREATE IF NOT EXISTS 不会改已有表)
+            cur.execute("SHOW COLUMNS FROM search_data LIKE 'mode_type'")
+            if not cur.fetchone():
+                cur.execute("ALTER TABLE search_data ADD COLUMN mode_type VARCHAR(16) NULL "
+                            "COMMENT '该 query 的解构方向:工序/工具(空=通用)' AFTER knowledge_type")
+                print("🔧 迁移:search_data 已补 mode_type 列")
         print("✅ 建表完成:search_data, mode_process, mode_tools")
         print("✅ 建表完成:search_data, mode_process, mode_tools")
     finally:
     finally:
         conn.close()
         conn.close()
@@ -188,8 +195,9 @@ def overall_score(e):
 
 
 # ── search_data ──────────────────────────────────────────────────────────────
 # ── search_data ──────────────────────────────────────────────────────────────
 
 
-def upsert_search_posts(query_id, query_text, results):
-    """一组搜索结果写入 search_data(按 (query_id, case_id) upsert)。返回写入条数。"""
+def upsert_search_posts(query_id, query_text, results, mode_type=None):
+    """一组搜索结果写入 search_data(按 (query_id, case_id) upsert)。返回写入条数。
+    mode_type:该 query 的解构方向(工序/工具),None 不覆盖已有值。"""
     if not results:
     if not results:
         return 0
         return 0
     rows = []
     rows = []
@@ -208,6 +216,7 @@ def upsert_search_posts(query_id, query_text, results):
             post.get("_quality_score"), post.get("_quality_grade"),
             post.get("_quality_score"), post.get("_quality_grade"),
             _j(r.get("found_by_queries") or []),
             _j(r.get("found_by_queries") or []),
             _j(e.get("知识类型") or []),
             _j(e.get("知识类型") or []),
+            mode_type,
             overall_score(e),
             overall_score(e),
             _j(e),
             _j(e),
         ))
         ))
@@ -215,8 +224,9 @@ def upsert_search_posts(query_id, query_text, results):
     INSERT INTO search_data
     INSERT INTO search_data
       (query_id, query_text, case_id, platform, channel_content_id, title, url,
       (query_id, query_text, case_id, platform, channel_content_id, title, url,
        content_type, body, images, videos, like_count, publish_time,
        content_type, body, images, videos, like_count, publish_time,
-       quality_score, quality_grade, found_by, knowledge_type, overall_score, llm_evaluation)
-    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
+       quality_score, quality_grade, found_by, knowledge_type, mode_type,
+       overall_score, llm_evaluation)
+    VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
     ON DUPLICATE KEY UPDATE
     ON DUPLICATE KEY UPDATE
       query_text=VALUES(query_text), platform=VALUES(platform),
       query_text=VALUES(query_text), platform=VALUES(platform),
       channel_content_id=VALUES(channel_content_id), title=VALUES(title), url=VALUES(url),
       channel_content_id=VALUES(channel_content_id), title=VALUES(title), url=VALUES(url),
@@ -224,6 +234,7 @@ def upsert_search_posts(query_id, query_text, results):
       videos=VALUES(videos), like_count=VALUES(like_count), publish_time=VALUES(publish_time),
       videos=VALUES(videos), like_count=VALUES(like_count), publish_time=VALUES(publish_time),
       quality_score=VALUES(quality_score), quality_grade=VALUES(quality_grade),
       quality_score=VALUES(quality_score), quality_grade=VALUES(quality_grade),
       found_by=VALUES(found_by), knowledge_type=VALUES(knowledge_type),
       found_by=VALUES(found_by), knowledge_type=VALUES(knowledge_type),
+      mode_type=COALESCE(VALUES(mode_type), mode_type),
       overall_score=VALUES(overall_score), llm_evaluation=VALUES(llm_evaluation);
       overall_score=VALUES(overall_score), llm_evaluation=VALUES(llm_evaluation);
     """
     """
     conn = _conn()
     conn = _conn()
@@ -241,7 +252,7 @@ def fetch_queries():
     try:
     try:
         with conn.cursor() as cur:
         with conn.cursor() as cur:
             cur.execute("""SELECT query_id, MAX(query_text) AS query_text,
             cur.execute("""SELECT query_id, MAX(query_text) AS query_text,
-                                  COUNT(*) AS post_count
+                                  MAX(mode_type) AS mode_type, COUNT(*) AS post_count
                            FROM search_data GROUP BY query_id ORDER BY query_id""")
                            FROM search_data GROUP BY query_id ORDER BY query_id""")
             queries = cur.fetchall()
             queries = cur.fetchall()
             cur.execute("SELECT query_id, COUNT(DISTINCT case_id) AS n FROM mode_process GROUP BY query_id")
             cur.execute("SELECT query_id, COUNT(DISTINCT case_id) AS n FROM mode_process GROUP BY query_id")

+ 16 - 5
examples/mode_workflow/index.html

@@ -327,6 +327,7 @@ select:focus,input:focus{border-color:var(--navy)}
     <h2>新建搜索</h2>
     <h2>新建搜索</h2>
     <div class="mb">
     <div class="mb">
       <label>Query(评估锚点,必填)<input type="text" id="s-query" placeholder="如:AI 人像 图片 生成 怎么做"></label>
       <label>Query(评估锚点,必填)<input type="text" id="s-query" placeholder="如:AI 人像 图片 生成 怎么做"></label>
+      <label>解构方向<select id="s-mode"><option value="工序">工序</option><option value="工具">工具</option></select></label>
       <label>同义措辞(可选,逗号分隔)<input type="text" id="s-syn" placeholder="如:AI 人像生成 教程,AI 写真 怎么做"></label>
       <label>同义措辞(可选,逗号分隔)<input type="text" id="s-syn" placeholder="如:AI 人像生成 教程,AI 写真 怎么做"></label>
       <label>渠道<input type="text" id="s-plat" value="xhs,gzh"></label>
       <label>渠道<input type="text" id="s-plat" value="xhs,gzh"></label>
       <label>每措辞每渠道上限<input type="number" id="s-max" value="10" min="1" max="50"></label>
       <label>每措辞每渠道上限<input type="number" id="s-max" value="10" min="1" max="50"></label>
@@ -439,14 +440,20 @@ async function loadQueries(){
   renderQueries();
   renderQueries();
 }
 }
 function renderQueries(){
 function renderQueries(){
-  $('#q-count').textContent = state.queries.length ? state.queries.length + ' 组' : '';
-  if (!state.queries.length){
+  /* mode_type 过滤:工序 tab 显示「工序 + 通用(空)」,工具 tab 显示「工具 + 通用(空)」 */
+  const want = state.mode === 'process' ? '工序' : '工具';
+  const list = state.queries.filter(q => !q.mode_type || q.mode_type === want);
+  $('#q-count').textContent = list.length ? list.length + ' 组' : '';
+  if (!list.length){
     $('#query-list').innerHTML = '<div class="empty"><span class="glyph">空</span>暂无 query<br>点右上「新建搜索」开始</div>'; return;
     $('#query-list').innerHTML = '<div class="empty"><span class="glyph">空</span>暂无 query<br>点右上「新建搜索」开始</div>'; return;
   }
   }
-  $('#query-list').innerHTML = state.queries.map(q => {
+  $('#query-list').innerHTML = list.map(q => {
     const done = state.mode === 'process' ? q.process_done : q.tools_done;
     const done = state.mode === 'process' ? q.process_done : q.tools_done;
+    const mt = q.mode_type
+      ? `<span class="pill ${q.mode_type==='工序'?'navy':'teal'}">${esc(q.mode_type)}</span>`
+      : '<span class="pill">通用</span>';
     return `<div class="qitem ${q.query_id===state.queryId?'on':''}" onclick="selectQuery('${q.query_id}')">
     return `<div class="qitem ${q.query_id===state.queryId?'on':''}" onclick="selectQuery('${q.query_id}')">
-      <div class="qid">${q.query_id}</div>
+      <div class="qid">${q.query_id} ${mt}</div>
       <div class="qt">${esc(q.query_text || '(未命名)')}</div>
       <div class="qt">${esc(q.query_text || '(未命名)')}</div>
       <div class="qm"><span class="num">${q.post_count} 帖</span><span>已解构 <b class="num">${done}</b></span></div>
       <div class="qm"><span class="num">${q.post_count} 帖</span><span>已解构 <b class="num">${done}</b></span></div>
     </div>`;
     </div>`;
@@ -740,12 +747,16 @@ function showTask(title, taskId, onDone){
 function hideTask(){ $('#task-panel').hidden = true; clearTimeout(pollTimer); }
 function hideTask(){ $('#task-panel').hidden = true; clearTimeout(pollTimer); }
 
 
 /* ════ 新建搜索 ════ */
 /* ════ 新建搜索 ════ */
-$('#btn-new-search').onclick = () => { $('#search-modal').hidden = false; $('#s-query').focus(); };
+$('#btn-new-search').onclick = () => {
+  $('#s-mode').value = state.mode === 'process' ? '工序' : '工具';   // 默认跟随当前子模式
+  $('#search-modal').hidden = false; $('#s-query').focus();
+};
 $('#search-modal').onclick = e => { if (e.target === $('#search-modal')) $('#search-modal').hidden = true; };
 $('#search-modal').onclick = e => { if (e.target === $('#search-modal')) $('#search-modal').hidden = true; };
 $('#s-go').onclick = async () => {
 $('#s-go').onclick = async () => {
   const query = $('#s-query').value.trim();
   const query = $('#s-query').value.trim();
   if (!query) return alert('请填写 query');
   if (!query) return alert('请填写 query');
   const body = {query, synonyms: $('#s-syn').value.trim(),
   const body = {query, synonyms: $('#s-syn').value.trim(),
+                mode_type: $('#s-mode').value,
                 platforms: $('#s-plat').value.trim() || 'xhs,gzh',
                 platforms: $('#s-plat').value.trim() || 'xhs,gzh',
                 max_count: parseInt($('#s-max').value) || 10};
                 max_count: parseInt($('#s-max').value) || 10};
   try {
   try {

+ 5 - 2
examples/mode_workflow/pipeline/search_eval.py

@@ -74,8 +74,9 @@ async def run(args):
     for s in sources:
     for s in sources:
         s.pop("_image_data_urls", None)
         s.pop("_image_data_urls", None)
 
 
-    n = db.upsert_search_posts(args.query_id, args.query, sources)
-    print(f"🗄️  search_data 入库 {n} 行 · 评估成本 ${cost:.4f}")
+    n = db.upsert_search_posts(args.query_id, args.query, sources,
+                               mode_type=args.mode_type or None)
+    print(f"🗄️  search_data 入库 {n} 行 · 方向 {args.mode_type or '通用'} · 评估成本 ${cost:.4f}")
 
 
     out_dir = MW / "runs" / "search"
     out_dir = MW / "runs" / "search"
     out_dir.mkdir(parents=True, exist_ok=True)
     out_dir.mkdir(parents=True, exist_ok=True)
@@ -91,6 +92,8 @@ def main():
     p.add_argument("--query-id", required=True, help="如 q0004(server 自动分配)")
     p.add_argument("--query-id", required=True, help="如 q0004(server 自动分配)")
     p.add_argument("--query", required=True, help="基准 query(评估锚点)")
     p.add_argument("--query", required=True, help="基准 query(评估锚点)")
     p.add_argument("--synonyms", default="", help="逗号分隔的同义措辞(可选)")
     p.add_argument("--synonyms", default="", help="逗号分隔的同义措辞(可选)")
+    p.add_argument("--mode-type", default="", choices=["", "工序", "工具"],
+                   help="该 query 的解构方向(写入 search_data.mode_type;空=通用)")
     p.add_argument("--platforms", default="xhs,gzh")
     p.add_argument("--platforms", default="xhs,gzh")
     p.add_argument("--max-count", type=int, default=10)
     p.add_argument("--max-count", type=int, default=10)
     p.add_argument("--eval-model", default=DEFAULT_EVAL_MODEL, choices=list(EVAL_MODELS))
     p.add_argument("--eval-model", default=DEFAULT_EVAL_MODEL, choices=list(EVAL_MODELS))

+ 2 - 0
examples/mode_workflow/server.py

@@ -282,6 +282,8 @@ class Handler(BaseHTTPRequestHandler):
                        "--query-id", qid, "--query", query]
                        "--query-id", qid, "--query", query]
                 if payload.get("synonyms"):
                 if payload.get("synonyms"):
                     cmd += ["--synonyms", payload["synonyms"]]
                     cmd += ["--synonyms", payload["synonyms"]]
+                if payload.get("mode_type") in ("工序", "工具"):
+                    cmd += ["--mode-type", payload["mode_type"]]
                 if payload.get("platforms"):
                 if payload.get("platforms"):
                     cmd += ["--platforms", payload["platforms"]]
                     cmd += ["--platforms", payload["platforms"]]
                 if payload.get("max_count"):
                 if payload.get("max_count"):