Przeglądaj źródła

修复(mode_workflow): 修复分类匹配错位问题,优化分类检测与前端展示

详细修改:
1.  修复分类匹配逻辑,确保匹配结果与原始子项等长等序,未命中时使用"无"作为占位符防止前端下标错位,更新相关函数注释
2.  重构分类案例检测函数,修复空steps案例被误判为未分类的问题,新增has_category字段标记帖子分类状态
3.  更新前端页面:重新设计实质/形式匹配的展示方式,新增批量操作计数,优化批量分类确认流程并添加乐观更新逻辑
刘文武 2 dni temu
rodzic
commit
9ad97c5f96

+ 35 - 16
examples/mode_workflow/db.py

@@ -552,6 +552,17 @@ def fetch_posts(query_id, mode="process"):
             hp = {r["case_id"] for r in cur.fetchall()}
             cur.execute("SELECT DISTINCT case_id FROM mode_tools WHERE query_id=%s", (query_id,))
             ht = {r["case_id"] for r in cur.fetchall()}
+            # 已归类(工序):hp 中各 case 最新真实版的工序里有任一步骤含 substanceMatch(与归类回写
+            # 同口径)。库端 LIKE 算、只回 0/1,不拉 steps。聚合逻辑见 _categorized_from_rows
+            # (不能只看 id 最大行——空 steps 的 procedure 行永远不含,会误判)。
+            hc = set()
+            if hp:
+                ph = ",".join(["%s"] * len(hp))
+                cur.execute(f"""SELECT case_id, version, id,
+                                       (LEFT(version,5)='link_') AS islink, (steps LIKE %s) AS cat
+                                FROM mode_process WHERE case_id IN ({ph})""",
+                            ['%substanceMatch%'] + list(hp))
+                hc = _categorized_from_rows(cur.fetchall())
     finally:
         conn.close()
     for r in rows:
@@ -561,6 +572,7 @@ def fetch_posts(query_id, mode="process"):
                                       r["publish_time"], r.pop("repro", None))
         r["has_process"] = r["case_id"] in hp
         r["has_tools"] = r["case_id"] in ht
+        r["has_category"] = r["case_id"] in hc
     return rows
 
 
@@ -812,9 +824,25 @@ def update_process_steps(case_id, version, steps_in_order):
         conn.close()
 
 
+def _categorized_from_rows(rows):
+    """rows:[{case_id, version, id, islink(0/1), cat(0/1)}]。返回已归类 case 集合。
+    口径:每 case 取最新真实版(真实版优先、id 最大),该版本**任一行** cat=1 即已归类。
+    关键——不能只看「id 最大的那一行」:工序里可能有 steps 为空的 procedure(step_count=0),
+    其行永远不含 substanceMatch,若恰好 id 最大会误判整条 case 未归类(见该函数修复缘由)。"""
+    best, has = {}, {}
+    for r in rows:
+        c, v = r["case_id"], r["version"]
+        sk = (1 if r["islink"] else 0, -r["id"])   # 真实版(islink=0)优先,其次 id 大;取 min
+        if c not in best or sk < best[c][0]:
+            best[c] = (sk, v)
+        k = (c, v)
+        has[k] = has.get(k, False) or bool(r["cat"])
+    return {c for c, (sk, v) in best.items() if has.get((c, v))}
+
+
 def fetch_categorized_cases(case_ids, mode="process"):
-    """返回 case_ids 中「已归类」的子集:该 case 最新真实版(link_ 排后)的 steps 已含
-    substanceMatch 字段(归类跑过的工序行一定带此 key)。与归类回写/前端展示同口径。
+    """返回 case_ids 中「已归类」的子集:该 case 最新真实版(link_ 排后)的工序里有任一步骤
+    含 substanceMatch(归类跑过的非空 step 一定带此 key)。与归类回写/前端展示同口径。
     供前端判断「是否已全部归类 → 提示重新归类」。仅工序方向有意义(mode_process)。"""
     if not case_ids:
         return set()
@@ -823,24 +851,15 @@ def fetch_categorized_cases(case_ids, mode="process"):
     conn = _conn()
     try:
         with conn.cursor() as cur:
-            # 每 case 按「真实版优先、id 降序」排,取首行(最新真实版的代表工序行)的判断结果;
-            # steps LIKE 在库端算,只回传 0/1,不拉 steps 大字段。
-            cur.execute(f"""SELECT case_id, (steps LIKE %s) AS cat
-                            FROM {table} WHERE case_id IN ({ph})
-                            ORDER BY case_id, (LEFT(version,5)='link_') ASC, id DESC""",
+            # 拉各行的 (case_id, version, id, islink, cat);steps LIKE 在库端算,不拉 steps 大字段。
+            cur.execute(f"""SELECT case_id, version, id,
+                                   (LEFT(version,5)='link_') AS islink, (steps LIKE %s) AS cat
+                            FROM {table} WHERE case_id IN ({ph})""",
                         ['%substanceMatch%'] + list(case_ids))
             rows = cur.fetchall()
     finally:
         conn.close()
-    seen, out = set(), set()
-    for r in rows:
-        cid = r["case_id"]
-        if cid in seen:        # 每 case 只看首行(最新真实版)
-            continue
-        seen.add(cid)
-        if r["cat"]:
-            out.add(cid)
-    return out
+    return _categorized_from_rows(rows)
 
 
 def _proc_payload(case_id, version, rows):

+ 182 - 41
examples/mode_workflow/index.html

@@ -566,6 +566,15 @@
         flex-wrap: wrap;
         gap: 8px;
       }
+      /* 按钮内计数:未做数(灰) / 已采纳总数(绿) */
+      .ph-actions .cnt-rest {
+        color: #9aa0a6;
+        font-weight: 700;
+      }
+      .ph-actions .cnt-total {
+        color: #2e9e5b;
+        font-weight: 700;
+      }
 
       .qlist {
         flex: 1;
@@ -979,23 +988,38 @@
         font-size: 11.5px;
         word-break: break-all;
       }
-      /* 归类命中 tag(实质/形式单元格内,原值下方)──绿色胶囊,与原文本区分 */
-      .steps .match-tags {
-        margin-top: 5px;
+      /* 归类命中(实质/形式):原值 → 命中值 逐行配对 */
+      .steps .sf-map {
+        display: flex;
+        flex-direction: column;
+        gap: 4px;
+      }
+      .steps .sf-pair {
         display: flex;
+        align-items: center;
+        gap: 4px;
         flex-wrap: wrap;
-        gap: 3px;
+        line-height: 1.5;
+      }
+      .steps .sf-old {
+        color: #9aa0a6;
+        text-decoration: line-through;
+      }
+      .steps .sf-plain {
+        color: var(--ink);
       }
-      .steps .mtag {
+      .steps .sf-arrow {
+        color: #2e9e5b;
+        flex: none;
+      }
+      .steps .sf-new {
         display: inline-block;
         padding: 1px 7px;
         border-radius: 10px;
         background: #e3f3e8;
         color: #2e6b45;
         border: 1px solid #bfe3cb;
-        font-size: 10.5px;
         font-weight: 600;
-        line-height: 1.6;
         white-space: nowrap;
       }
       .inf {
@@ -2992,16 +3016,20 @@
         const b = $("#btn-batch");
         b.disabled = !state.selected.size;
         b.textContent = state.selected.size ? `批量解构(${state.selected.size})` : "批量解构";
-        // 解构全部已采纳:有采纳帖才显示,带数量(口径同帖列表的 采纳 标记)
-        const adoptedN = (state.posts || []).filter((p) => p.adopted).length;
+        // 解构全部(未解构/已采纳总数):未解构=已采纳但本方向未解构;A 灰 B 绿
+        const adopted = (state.posts || []).filter((p) => p.adopted);
+        const adoptedN = adopted.length;
+        const doneKey = state.mode === "process" ? "has_process" : "has_tools";
+        const undoneN = adopted.filter((p) => !p[doneKey]).length;
         const ea = $("#btn-extract-adopted");
         ea.hidden = !adoptedN;
-        ea.textContent = `解构全部已采纳(${adoptedN})`;
-        // 归类全部已采纳:仅工序方向 + 有「已采纳且已解构」的帖才显示(只有已解构能归类)
-        const catN = (state.posts || []).filter((p) => p.adopted && p.has_process).length;
+        ea.innerHTML = `解构全部(<span class="cnt-rest">${undoneN}</span>/<span class="cnt-total">${adoptedN}</span>)`;
+        // 归类全部(未归类/已采纳总数):仅工序方向 + 有已采纳已解构帖才显示;未归类=已采纳未归类
+        const hasCatTarget = adopted.some((p) => p.has_process);
+        const unCatN = adopted.filter((p) => !p.has_category).length;
         const ca = $("#btn-cat-adopted");
-        ca.hidden = state.mode !== "process" || !catN;
-        ca.textContent = `归类全部已采纳(${catN})`;
+        ca.hidden = state.mode !== "process" || !hasCatTarget;
+        ca.innerHTML = `归类全部(<span class="cnt-rest">${unCatN}</span>/<span class="cnt-total">${adoptedN}</span>)`;
       }
       $("#btn-batch").onclick = () => state.selected.size && startExtract([...state.selected]);
       $("#btn-extract-adopted").onclick = async () => {
@@ -3020,21 +3048,31 @@
         startExtract(cids, allDone ? { force: true } : {});
       };
       $("#btn-cat-adopted").onclick = async () => {
-        // 只归类「已采纳且已解构」的帖(只有已解构才有 steps 可归类)
-        const cids = (state.posts || []).filter((p) => p.adopted && p.has_process).map((p) => p.case_id);
+        // 只归类「已采纳且已解构」的帖(只有已解构才有 steps 可归类);has_category 来自 fetch_posts
+        const adoptedProc = (state.posts || []).filter((p) => p.adopted && p.has_process);
+        const cids = adoptedProc.map((p) => p.case_id);
         if (!cids.length) return toast("当前 query 下没有「已采纳且已解构」的帖子", "warn");
-        // 查这些帖已归类的数量,决定提示文案(已归类口径:steps 含 substanceMatch)
-        let catN = 0;
-        try {
-          const r = await api(`/api/categorize_status?mode=${state.mode}&case_ids=${encodeURIComponent(cids.join(","))}`);
-          catN = (r.categorized || []).length;
-        } catch (e) { /* 查不到归类状态不阻断,按未归类提示 */ }
-        const allCat = catN >= cids.length;   // 全部已归类
-        const msg = allCat
-          ? `这 ${cids.length} 个帖的工序都已归类。是否重新归类?\n重新归类会用最新分类结果覆盖原有实质/形式 tag。`
-          : `对该 query 下 ${cids.length} 个已采纳且已解构的帖做工序归类${catN ? `(其中 ${catN} 个已归类,将覆盖)` : ""}?\n将把命中的分类回写进各工序的实质/形式。`;
-        if (!(await uiConfirm(msg))) return;
-        startCategorize(cids);   // 归类即覆盖写,无需 force
+        const rest = adoptedProc.filter((p) => !p.has_category).map((p) => p.case_id);   // 未归类
+        const catN = cids.length - rest.length;
+
+        let action;   // "all"=归类全部(覆盖) / "rest"=只归类剩余 / null=取消
+        if (rest.length === 0) {
+          // 全部已归类:只能重新归类全部
+          action = (await uiConfirm(`这 ${cids.length} 个帖的工序都已归类。是否重新归类?\n重新归类会用最新分类结果覆盖原有实质/形式 tag。`)) ? "all" : null;
+        } else if (catN === 0) {
+          // 都没归类:归类全部 = 归类剩余,两按钮即可
+          action = (await uiConfirm(`对该 query 下 ${cids.length} 个已采纳且已解构的帖做工序归类?\n将把命中的分类回写进各工序的实质/形式。`)) ? "all" : null;
+        } else {
+          // 部分已归类:归类剩余(只处理未归类) / 归类全部(覆盖) / 取消
+          action = await uiChoose(
+            `该 query 下 ${cids.length} 个已采纳且已解构的帖,其中 ${catN} 个已归类、${rest.length} 个未归类。\n「归类剩余」只处理未归类的 ${rest.length} 个;「归类全部」会重跑并覆盖已归类的。`,
+            [
+              { key: "rest", text: `归类剩余(${rest.length})` },
+              { key: "all", text: "归类全部(覆盖)", primary: true },
+            ]);
+        }
+        if (!action) return;
+        startCategorize(action === "rest" ? rest : cids);   // 归类即覆盖写,无需 force
       };
 
       /* ════ 帖子详情弹层 ════ */
@@ -3543,8 +3581,8 @@
               rows += `<td rowspan="${n}" class="sid">${esc(s.id || "")}</td>
           <td rowspan="${n}"><div class="intent-text">${renderIntent(s.intent || s.directive || "")}</div></td>
           <td rowspan="${n}">${s.effect ? `<span class="pill navy">${esc(s.effect)}</span>` : ""}</td>
-          <td rowspan="${n}">${esc(fmtSF(s.substance))}${matchTag(s.substanceMatch)}</td>
-          <td rowspan="${n}">${esc(fmtSF(s.form))}${matchTag(s.formMatch)}</td>`;
+          <td rowspan="${n}">${renderSF(s.substance, s.substanceMatch)}</td>
+          <td rowspan="${n}">${renderSF(s.form, s.formMatch)}</td>`;
             }
             rows += ioCell(ins[i], "in");
             if (i === 0) {
@@ -3558,7 +3596,7 @@
         return `<div style="overflow-x:auto"><table class="steps">
     <colgroup>
       <col style="width:44px"><col style="width:200px"><col style="width:92px">
-      <col style="width:112px"><col style="width:100px">
+      <col style="width:180px"><col style="width:168px">
       <col style="width:112px"><col style="width:330px"><col style="width:92px">
       <col style="width:118px"><col style="width:130px">
       <col style="width:112px"><col style="width:360px"><col style="width:110px">
@@ -3576,16 +3614,80 @@
       function fmtSF(v) {
         return v == null ? "" : Array.isArray(v) ? v.join("、") : v;
       }
-      /* 归类命中(substanceMatch/formMatch):多个用「、」拆,逐个出绿色 tag 放原值下方 */
-      function matchTag(v) {
-        if (v == null || v === "") return "";
-        const tags = String(v)
-          .split("、")
-          .map((x) => x.trim())
-          .filter(Boolean)
-          .map((x) => `<span class="mtag">${esc(x)}</span>`)
-          .join("");
-        return tags ? `<div class="match-tags">${tags}</div>` : "";
+      const SF_ARROW =
+        '<svg class="sf-arrow" viewBox="0 0 24 24" width="13" height="13" aria-hidden="true"><path d="M5 12h14M13 6l6 6-6 6" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"/></svg>';
+      const SF_NO_MATCH = "无"; // 后端未命中占位符(与 category_match.py NO_MATCH 一致)
+      /* 原值拆分:与后端 _split_values 完全一致 —— 括号内的「、」不拆、去重保序,
+         保证原值子项与 *Match 子项「等长等序」可按下标配对 */
+      function _splitParts(raw) {
+        if (raw == null) return [];
+        if (Array.isArray(raw)) {
+          const out = [],
+            seen = new Set();
+          for (const x of raw) {
+            const p = String(x).trim();
+            if (p && !seen.has(p)) {
+              seen.add(p);
+              out.push(p);
+            }
+          }
+          return out;
+        }
+        const parts = [];
+        let cur = "",
+          depth = 0;
+        for (const ch of String(raw)) {
+          if (ch === "(" || ch === "(") {
+            depth++;
+            cur += ch;
+          } else if (ch === ")" || ch === ")") {
+            depth--;
+            cur += ch;
+          } else if (ch === "、" && depth === 0) {
+            const p = cur.trim();
+            if (p) parts.push(p);
+            cur = "";
+          } else {
+            cur += ch;
+          }
+        }
+        const last = cur.trim();
+        if (last) parts.push(last);
+        const out = [],
+          seen = new Set();
+        for (const p of parts)
+          if (!seen.has(p)) {
+            seen.add(p);
+            out.push(p);
+          }
+        return out;
+      }
+      /* *Match 拆分:按下标对齐,**不去重不滤空**,保留「无」占位 */
+      function _matchParts(v) {
+        return v == null ? [] : String(v).split("、").map((x) => x.trim());
+      }
+      /* 实质/形式单元格:逐子项「原值 → 命中值」配对。
+         命中 → 原值灰色划除 + 箭头 + 绿色命中值;
+         未命中(占位「无」/缺失)→ 只显原值,黑色正常(无划除无箭头) */
+      function renderSF(value, match) {
+        const olds = _splitParts(value);
+        const news = _matchParts(match);
+        if (!news.length) return esc(fmtSF(value)); // 整格未归类 → 原值黑色
+        const n = Math.max(olds.length, news.length);
+        let rows = "";
+        for (let i = 0; i < n; i++) {
+          const a = olds[i],
+            b = news[i];
+          const matched = b != null && b !== "" && b !== SF_NO_MATCH;
+          if (matched) {
+            rows += `<div class="sf-pair">${
+              a != null ? `<span class="sf-old">${esc(a)}</span>${SF_ARROW}` : ""
+            }<span class="sf-new">${esc(b)}</span></div>`;
+          } else if (a != null) {
+            rows += `<div class="sf-pair"><span class="sf-plain">${esc(a)}</span></div>`;
+          }
+        }
+        return `<div class="sf-map">${rows}</div>`;
       }
       function ioCell(x, kind) {
         const cls = kind === "in" ? "c-in" : "c-out";
@@ -3797,6 +3899,10 @@
           if (!r.task_id) return toast(r.note || "无可归类帖", "info");
           showTask(`工序归类 · ${caseIds.length} 帖${opts.auto ? "(自动)" : ""}`, r.task_id, async () => {
             caseIds.forEach(invalidateExtractCache); // 归类改了 steps,清缓存才能拿到新 match
+            // 乐观更新:归类成功的帖标记已归类,刷新「归类全部(未归类/已采纳)」计数
+            const done = new Set(caseIds);
+            (state.posts || []).forEach((p) => { if (done.has(p.case_id)) p.has_category = true; });
+            updateBatchBtn();
             // 当前正看的帖在本批里 → 重载解构,实质/形式立即出 tag
             if (caseIds.includes(state.caseId)) {
               state.version = null;
@@ -3845,6 +3951,41 @@
           bg.querySelector('[data-act="ok"]').focus();
         });
       }
+      /* 多按钮选择弹框(uiConfirm 的多选版):buttons=[{key,text,primary?}]。
+         点业务按钮返回其 key;取消/Esc/点遮罩返回 null;Enter=primary(或第一个)按钮。 */
+      function uiChoose(message, buttons, opt = {}) {
+        const { cancelText = "取消" } = opt;
+        return new Promise((resolve) => {
+          const bg = document.createElement("div");
+          bg.className = "ui-confirm-bg";
+          const btns = buttons
+            .map((b) => `<button class="btn ${b.primary ? "seal" : ""}" data-key="${esc(b.key)}">${esc(b.text)}</button>`)
+            .join("");
+          bg.innerHTML =
+            '<div class="ui-confirm"><div class="ui-confirm-msg"></div>'
+            + '<div class="ui-confirm-acts">'
+            + `<button class="btn" data-key="">${esc(cancelText)}</button>`
+            + btns + "</div></div>";
+          bg.querySelector(".ui-confirm-msg").textContent = message;
+          document.body.appendChild(bg);
+          const done = (v) => { bg.remove(); document.removeEventListener("keydown", onKey, true); resolve(v); };
+          bg.addEventListener("click", (e) => {
+            if (e.target === bg) return done(null);
+            const a = e.target.closest("[data-key]");
+            if (a) done(a.dataset.key || null);   // data-key="" → 取消 → null
+          });
+          const onKey = (e) => {
+            if (e.key === "Escape") { e.preventDefault(); done(null); }
+            else if (e.key === "Enter") {
+              e.preventDefault();
+              const p = buttons.find((b) => b.primary) || buttons[0];
+              done(p ? p.key : null);
+            }
+          };
+          document.addEventListener("keydown", onKey, true);
+          (bg.querySelector(".ui-confirm-acts .seal") || bg.querySelector(".ui-confirm-acts .btn:last-child")).focus();
+        });
+      }
       function showTask(title, taskId, onDone, onSettled) {
         hasTask = true;
         $("#task-panel").hidden = false;

+ 14 - 8
examples/mode_workflow/stages/category_match.py

@@ -159,19 +159,25 @@ def _build_match_lookup(resp: dict) -> dict:
     return {k: v[1] for k, v in best.items()}
 
 
+# 子项无命中时的占位符:保证 substanceMatch/formMatch 与原值子项「等长等序」,
+# 前端才能按下标正确配对(否则丢项/去重会导致 龙舟→人物 这类错位)。
+NO_MATCH = "无"
+
+
 def enrich_steps(procedures: List[dict], resp: dict) -> List[dict]:
     """逐子项匹配后拼接:对每个 step,把 substance/form 按「、」拆,逐子项查命中的
-    分类 name(按 source_type 过滤),多个用「、」拼接写入 substanceMatch/formMatch;
-    无命中写 None。原地修改 procedures 并返回。"""
+    分类 name(按 source_type 过滤),用「、」拼接写入 substanceMatch/formMatch。
+    关键:结果与原值子项 **等长等序** —— 未命中的子项填 NO_MATCH 占位、**不去重不丢项**,
+    以便前端按下标精确配对;整格全未命中(或无子项)写 None(前端按原值显示)。
+    原地修改 procedures 并返回。"""
     lookup = _build_match_lookup(resp)
 
     def match_for(raw, st):
-        names = []
-        for part in _split_values(_s(raw)):
-            name = lookup.get((part, st))
-            if name and name not in names:
-                names.append(name)
-        return "、".join(names) if names else None
+        parts = _split_values(_s(raw))
+        if not parts:
+            return None
+        names = [lookup.get((part, st)) or NO_MATCH for part in parts]
+        return "、".join(names) if any(n != NO_MATCH for n in names) else None
 
     for proc in (procedures or []):
         for step in (proc.get("steps") or []):