Explorar o código

feat(示例工作流): 新增占位查询支持与查询列表筛选功能

- 新增add_pending_process_queries函数,支持注册无需触发搜索的占位查询
- 添加前端查询列表筛选控件,支持按时间排序与已/未搜索状态过滤
- 更新所有数据库查询逻辑,排除占位哨兵行的统计与展示
- 为空查询的帖子列表添加一键快速搜索按钮
刘文武 hai 1 día
pai
achega
789391ec28
Modificáronse 2 ficheiros con 214 adicións e 15 borrados
  1. 54 6
      examples/mode_workflow/db.py
  2. 160 9
      examples/mode_workflow/index.html

+ 54 - 6
examples/mode_workflow/db.py

@@ -502,14 +502,61 @@ def upsert_search_posts(query_id, query_text, results, table="search_process"):
         conn.close()
 
 
+# 占位帖 case_id:query 列表由 search_process 按 query_id 聚合得出(无独立 query 主表),
+# 一个 query 要进列表必须至少有一行。为支持「只登记 query、不触发搜索」,给这类 query 写
+# 一行哨兵帖,只承载 query_id+query_text。该哨兵行不属于任何真实帖子,故所有「帖子视图 /
+# 统计」读取点都用 _REAL_POST 过滤掉它(fetch_queries 的 post_count、fetch_posts、
+# fetch_all_posts、count_executed_queries、fetch_dashboard_rows)。真搜不会用到此 case_id。
+PENDING_CASE_ID = "__pending__"
+_REAL_POST = f"case_id <> '{PENDING_CASE_ID}'"
+
+
+def add_pending_process_queries(texts):
+    """把一批 query 词作为「占位 query」加入工序 query 列表(search_process),不触发搜索/解构。
+    每条新增写一行哨兵帖(case_id=PENDING_CASE_ID,只填 query_id/query_text)。
+    去重:① 文件内重复保序去重;② query_text 已存在于 search_process(含此前占位)则跳过。
+    query_id 跨 process/tools 统一续号,避免与工具方向撞号。返回 (added, skipped)。"""
+    seen, cleaned = set(), []
+    for t in texts:
+        t = (t or "").strip()
+        if t and t not in seen:
+            seen.add(t)
+            cleaned.append(t)
+    conn = _conn()
+    try:
+        with conn.cursor() as cur:
+            cur.execute("SELECT DISTINCT query_text FROM search_process WHERE query_text IS NOT NULL")
+            existing = {r["query_text"] for r in cur.fetchall()}
+            cur.execute("SELECT query_id FROM search_process "
+                        "UNION SELECT query_id FROM search_tools")
+            nums = [int(r["query_id"][1:]) for r in cur.fetchall()
+                    if r["query_id"] and r["query_id"].startswith("q") and r["query_id"][1:].isdigit()]
+            nxt = (max(nums) + 1) if nums else 0
+            rows = []
+            for t in cleaned:
+                if t in existing:
+                    continue
+                rows.append((f"q{nxt:04d}", t, PENDING_CASE_ID))
+                nxt += 1
+            if rows:
+                cur.executemany(
+                    "INSERT INTO search_process (query_id, query_text, case_id) "
+                    "VALUES (%s,%s,%s)", rows)
+        return len(rows), len(cleaned) - len(rows)
+    finally:
+        conn.close()
+
+
 def fetch_queries(mode="process"):
     """某方向搜索表的 query 列表 + 帖子数 + 采纳/命中数 + 解构进度。"""
     table = _search_table(mode)
     conn = _conn()
     try:
         with conn.cursor() as cur:
+            # post_count 只数真实帖,占位哨兵行不计(占位 query 显示为 0 帖);
+            # GROUP BY 仍含占位 query_id,故无搜索的 query 也会出现在列表里。
             cur.execute(f"""SELECT query_id, MAX(query_text) AS query_text,
-                                   COUNT(*) AS post_count
+                                   COUNT(CASE WHEN {_REAL_POST} THEN 1 END) AS post_count
                             FROM {table} GROUP BY query_id ORDER BY query_id""")
             queries = cur.fetchall()
             # 采纳数:SQL 直取 rel/repro 标量算,**不拉整表 llm_evaluation**(旧版全表 blob,切 tab 巨慢)
@@ -545,7 +592,7 @@ def fetch_posts(query_id, mode="process"):
                                    title, url, content_type, images, like_count, publish_time,
                                    quality_score, quality_grade, found_by, knowledge_type, overall_score,
                                    {_REL_SQL} AS rel, {_REPRO_SQL} AS repro
-                            FROM {table} WHERE query_id=%s
+                            FROM {table} WHERE query_id=%s AND {_REAL_POST}
                             ORDER BY overall_score DESC, id""", (query_id,))
             rows = cur.fetchall()
             cur.execute("SELECT DISTINCT case_id FROM mode_process WHERE query_id=%s", (query_id,))
@@ -607,11 +654,12 @@ def fetch_all_posts(mode="process", *, query_ids=None, adopted_only=False, disti
       - limit/offset:分页(limit=None 不分页)。
     返回 (total, rows):total 为过滤(+去重)后的总条数,rows 为本页切片。"""
     table = _search_table(mode)
-    where, params = "", []
+    # 始终排除占位哨兵行(无搜索的 query 不在帖子视图里出现)
+    where, params = f" WHERE {_REAL_POST}", []
     if query_ids is not None:
         if not query_ids:
             return 0, []   # 显式空列表:直接空结果,不必查库
-        where = " WHERE query_id IN (" + ",".join(["%s"] * len(query_ids)) + ")"
+        where += " AND query_id IN (" + ",".join(["%s"] * len(query_ids)) + ")"
         params = list(query_ids)
     conn = _conn()
     try:
@@ -660,7 +708,7 @@ def count_executed_queries(mode="process"):
     conn = _conn()
     try:
         with conn.cursor() as cur:
-            cur.execute(f"SELECT COUNT(DISTINCT query_id) AS n FROM {table}")
+            cur.execute(f"SELECT COUNT(DISTINCT query_id) AS n FROM {table} WHERE {_REAL_POST}")
             return cur.fetchone()["n"]
     finally:
         conn.close()
@@ -1261,7 +1309,7 @@ def fetch_dashboard_rows():
             # 进度分母走「采纳」口径;mode 标方向(工序帖来自 search_process)。
             cols = (f"query_id, case_id, platform, overall_score, publish_time, "
                     f"{_REL_SQL} AS rel, {_REPRO_SQL} AS repro")
-            cur.execute(f"SELECT {cols} FROM search_process")
+            cur.execute(f"SELECT {cols} FROM search_process WHERE {_REAL_POST}")
             posts = cur.fetchall()
             for p in posts:
                 p["mode"] = "process"

+ 160 - 9
examples/mode_workflow/index.html

@@ -550,6 +550,58 @@
         color: var(--ink-faint);
         font-weight: 500;
       }
+      /* query 列表筛选按钮 + 下拉 */
+      .qf-btn {
+        font-size: 11px;
+        font-weight: 600;
+        color: var(--ink-faint);
+        background: #f3f4f6;
+        border: 1px solid var(--line);
+        border-radius: 6px;
+        padding: 2px 8px;
+        cursor: pointer;
+        letter-spacing: 0;
+      }
+      .qf-btn:hover { color: var(--ink); }
+      .qf-btn.active {
+        color: #2563eb;
+        border-color: #b6cdf7;
+        background: #eef3fe;
+      }
+      .qf-pop {
+        position: absolute;
+        top: 100%;
+        right: 10px;
+        z-index: 30;
+        margin-top: 4px;
+        background: #fff;
+        border: 1px solid var(--line);
+        border-radius: 8px;
+        box-shadow: 0 6px 24px rgba(0, 0, 0, 0.12);
+        padding: 10px 12px;
+        display: flex;
+        flex-direction: column;
+        gap: 10px;
+        min-width: 178px;
+      }
+      .qf-pop label {
+        display: flex;
+        align-items: center;
+        justify-content: space-between;
+        gap: 10px;
+        font-size: 12px;
+        font-weight: 500;
+        color: var(--ink);
+        letter-spacing: 0;
+      }
+      .qf-pop select {
+        font-size: 12px;
+        padding: 3px 6px;
+        border: 1px solid var(--line);
+        border-radius: 6px;
+        background: #fff;
+        cursor: pointer;
+      }
       /* 帖子列头:标题一行、操作按钮另起一行(按钮不和「帖子 N/M」挤一行) */
       .col-head.posts-head {
         flex-direction: column;
@@ -2246,12 +2298,30 @@
       </div>
       <div class="ds-grid">
         <div class="card">
-          <div class="col-head">
+          <div class="col-head" style="position:relative">
             QUERY
-            <span
-              class="n"
-              id="q-count"
-            ></span>
+            <div style="display:flex;align-items:center;gap:8px">
+              <span
+                class="n"
+                id="q-count"
+              ></span>
+              <button class="qf-btn" id="qf-btn" title="筛选 query 列表">筛选 ▾</button>
+            </div>
+            <div class="qf-pop" id="qf-pop" hidden>
+              <label>排序
+                <select id="qf-time">
+                  <option value="desc">最新在前</option>
+                  <option value="asc">最早在前</option>
+                </select>
+              </label>
+              <label>是否搜索
+                <select id="qf-searched">
+                  <option value="all">全部</option>
+                  <option value="yes">已搜索</option>
+                  <option value="no">未搜索</option>
+                </select>
+              </label>
+            </div>
           </div>
           <div
             class="qlist"
@@ -2859,13 +2929,35 @@
         /* 进入/切换子模式时默认选中第一个(即最新)query(进而联动第一帖与解构结果) */
         if (!state.queryId && state.queries.length) await selectQuery(state.queries[0].query_id);
       }
+      /* query 列表筛选/排序状态:time(按时间,用 query_id 数字序代理)+ searched(是否已搜索,post_count>0) */
+      const qFilter = { time: "desc", searched: "all" };
+      function applyQueryFilter(queries) {
+        let list = queries.slice();
+        if (qFilter.searched === "yes") list = list.filter((q) => (q.post_count || 0) > 0);
+        else if (qFilter.searched === "no") list = list.filter((q) => !(q.post_count || 0));
+        const num = (q) => {
+          const n = parseInt(String(q.query_id || "").replace(/^q/, ""), 10);
+          return isNaN(n) ? -1 : n;
+        };
+        list.sort((a, b) => (qFilter.time === "asc" ? num(a) - num(b) : num(b) - num(a)));
+        return list;
+      }
       function renderQueries() {
-        const list = state.queries;
-        $("#q-count").textContent = list.length ? list.length + " 组" : "";
-        if (!list.length) {
+        const total = state.queries.length;
+        const list = applyQueryFilter(state.queries);
+        $("#q-count").textContent = total
+          ? list.length === total
+            ? total + " 组"
+            : list.length + " / " + total + " 组"
+          : "";
+        if (!total) {
           $("#query-list").innerHTML = '<div class="empty">暂无 query<br>点右上「新建搜索」开始</div>';
           return;
         }
+        if (!list.length) {
+          $("#query-list").innerHTML = '<div class="empty">该筛选下无 query</div>';
+          return;
+        }
         $("#query-list").innerHTML = list
           .map((q) => {
             const done = state.mode === "process" ? q.process_done : q.tools_done;
@@ -2903,6 +2995,38 @@
         }
       }
 
+      /* 空态「搜索该 query」:对当前选中 query 直接发起搜索(小红书+公众号 各20,含评估),
+         复用 run_search;传当前 query_id,使结果落在同一占位 query 下(占位行被各读取点过滤,
+         真帖填进来后该 query 自然从「未搜索」变「已搜索」)。完成后刷新计数与帖子列表。 */
+      async function searchCurrentQuery(btn) {
+        const q = state.queries.find((x) => x.query_id === state.queryId);
+        if (!q) return toast("请先选择一个 query", "warn");
+        const body = {
+          query: q.query_text,
+          query_id: q.query_id,
+          mode_type: state.mode === "process" ? "工序" : "工具",
+          platforms: "xhs,gzh",
+          max_count: 20,
+        };
+        if (btn) {
+          btn.disabled = true;
+          btn.textContent = "搜索启动中…";
+        }
+        try {
+          const r = await api("/api/run_search", { method: "POST", body: JSON.stringify(body) });
+          showTask(`搜索 · ${r.query_id} ${q.query_text}`, r.task_id, async () => {
+            await loadQueries();
+            await selectQuery(q.query_id);
+          });
+        } catch (e) {
+          toast("搜索启动失败:" + (e.body?.error || e.status), "error");
+          if (btn) {
+            btn.disabled = false;
+            btn.textContent = "🔍 搜索该 query(小红书·公众号 各20)";
+          }
+        }
+      }
+
       /* ════ Dataset:帖子列表(按渠道分 tab)════ */
       function setPlatFilter(p) {
         state.platFilter = p;
@@ -2971,7 +3095,11 @@
             : `${list.length} / ${all.length} 帖`
           : "";
         if (!all.length) {
-          $("#post-list").innerHTML = '<div class="empty">该 query 暂无帖子</div>';
+          $("#post-list").innerHTML =
+            `<div class="empty">该 query 暂无帖子<br>
+              <button class="btn seal" id="btn-search-query" style="margin-top:12px"
+                onclick="searchCurrentQuery(this)">🔍 搜索该 query(小红书·公众号 各20)</button>
+            </div>`;
           return;
         }
         if (!list.length) {
@@ -4397,6 +4525,29 @@
         }
       };
 
+      /* ════ query 列表筛选下拉 ════ */
+      function markQfActive() {
+        $("#qf-btn").classList.toggle("active", qFilter.time !== "desc" || qFilter.searched !== "all");
+      }
+      $("#qf-btn").onclick = (e) => {
+        e.stopPropagation();
+        $("#qf-pop").hidden = !$("#qf-pop").hidden;
+      };
+      $("#qf-time").onchange = (e) => {
+        qFilter.time = e.target.value;
+        markQfActive();
+        renderQueries();
+      };
+      $("#qf-searched").onchange = (e) => {
+        qFilter.searched = e.target.value;
+        markQfActive();
+        renderQueries();
+      };
+      /* 点下拉外部关闭 */
+      document.addEventListener("click", (e) => {
+        if (!e.target.closest("#qf-pop") && !e.target.closest("#qf-btn")) $("#qf-pop").hidden = true;
+      });
+
       /* ════ 工序/工具子模式 ════ */
       $("#m-process").onclick = () => setMode("process");
       $("#m-tools").onclick = () => setMode("tools");