|
@@ -502,14 +502,61 @@ def upsert_search_posts(query_id, query_text, results, table="search_process"):
|
|
|
conn.close()
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
+# 占位帖 case_id:query 列表由 search_process 按 query_id 聚合得出(无独立 query 主表),
|
|
|
|
|
+# 一个 query 要进列表必须至少有一行。为支持「只登记 query、不触发搜索」,给这类 query 写
|
|
|
|
|
+# 一行哨兵帖,只承载 query_id+query_text。该哨兵行不属于任何真实帖子,故所有「帖子视图 /
|
|
|
|
|
+# 统计」读取点都用 _REAL_POST 过滤掉它(fetch_queries 的 post_count、fetch_posts、
|
|
|
|
|
+# fetch_all_posts、count_executed_queries、fetch_dashboard_rows)。真搜不会用到此 case_id。
|
|
|
|
|
+PENDING_CASE_ID = "__pending__"
|
|
|
|
|
+_REAL_POST = f"case_id <> '{PENDING_CASE_ID}'"
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
+def add_pending_process_queries(texts):
|
|
|
|
|
+ """把一批 query 词作为「占位 query」加入工序 query 列表(search_process),不触发搜索/解构。
|
|
|
|
|
+ 每条新增写一行哨兵帖(case_id=PENDING_CASE_ID,只填 query_id/query_text)。
|
|
|
|
|
+ 去重:① 文件内重复保序去重;② query_text 已存在于 search_process(含此前占位)则跳过。
|
|
|
|
|
+ query_id 跨 process/tools 统一续号,避免与工具方向撞号。返回 (added, skipped)。"""
|
|
|
|
|
+ seen, cleaned = set(), []
|
|
|
|
|
+ for t in texts:
|
|
|
|
|
+ t = (t or "").strip()
|
|
|
|
|
+ if t and t not in seen:
|
|
|
|
|
+ seen.add(t)
|
|
|
|
|
+ cleaned.append(t)
|
|
|
|
|
+ conn = _conn()
|
|
|
|
|
+ try:
|
|
|
|
|
+ with conn.cursor() as cur:
|
|
|
|
|
+ cur.execute("SELECT DISTINCT query_text FROM search_process WHERE query_text IS NOT NULL")
|
|
|
|
|
+ existing = {r["query_text"] for r in cur.fetchall()}
|
|
|
|
|
+ cur.execute("SELECT query_id FROM search_process "
|
|
|
|
|
+ "UNION SELECT query_id FROM search_tools")
|
|
|
|
|
+ nums = [int(r["query_id"][1:]) for r in cur.fetchall()
|
|
|
|
|
+ if r["query_id"] and r["query_id"].startswith("q") and r["query_id"][1:].isdigit()]
|
|
|
|
|
+ nxt = (max(nums) + 1) if nums else 0
|
|
|
|
|
+ rows = []
|
|
|
|
|
+ for t in cleaned:
|
|
|
|
|
+ if t in existing:
|
|
|
|
|
+ continue
|
|
|
|
|
+ rows.append((f"q{nxt:04d}", t, PENDING_CASE_ID))
|
|
|
|
|
+ nxt += 1
|
|
|
|
|
+ if rows:
|
|
|
|
|
+ cur.executemany(
|
|
|
|
|
+ "INSERT INTO search_process (query_id, query_text, case_id) "
|
|
|
|
|
+ "VALUES (%s,%s,%s)", rows)
|
|
|
|
|
+ return len(rows), len(cleaned) - len(rows)
|
|
|
|
|
+ finally:
|
|
|
|
|
+ conn.close()
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
def fetch_queries(mode="process"):
|
|
def fetch_queries(mode="process"):
|
|
|
"""某方向搜索表的 query 列表 + 帖子数 + 采纳/命中数 + 解构进度。"""
|
|
"""某方向搜索表的 query 列表 + 帖子数 + 采纳/命中数 + 解构进度。"""
|
|
|
table = _search_table(mode)
|
|
table = _search_table(mode)
|
|
|
conn = _conn()
|
|
conn = _conn()
|
|
|
try:
|
|
try:
|
|
|
with conn.cursor() as cur:
|
|
with conn.cursor() as cur:
|
|
|
|
|
+ # post_count 只数真实帖,占位哨兵行不计(占位 query 显示为 0 帖);
|
|
|
|
|
+ # GROUP BY 仍含占位 query_id,故无搜索的 query 也会出现在列表里。
|
|
|
cur.execute(f"""SELECT query_id, MAX(query_text) AS query_text,
|
|
cur.execute(f"""SELECT query_id, MAX(query_text) AS query_text,
|
|
|
- COUNT(*) AS post_count
|
|
|
|
|
|
|
+ COUNT(CASE WHEN {_REAL_POST} THEN 1 END) AS post_count
|
|
|
FROM {table} GROUP BY query_id ORDER BY query_id""")
|
|
FROM {table} GROUP BY query_id ORDER BY query_id""")
|
|
|
queries = cur.fetchall()
|
|
queries = cur.fetchall()
|
|
|
# 采纳数:SQL 直取 rel/repro 标量算,**不拉整表 llm_evaluation**(旧版全表 blob,切 tab 巨慢)
|
|
# 采纳数:SQL 直取 rel/repro 标量算,**不拉整表 llm_evaluation**(旧版全表 blob,切 tab 巨慢)
|
|
@@ -545,7 +592,7 @@ def fetch_posts(query_id, mode="process"):
|
|
|
title, url, content_type, images, like_count, publish_time,
|
|
title, url, content_type, images, like_count, publish_time,
|
|
|
quality_score, quality_grade, found_by, knowledge_type, overall_score,
|
|
quality_score, quality_grade, found_by, knowledge_type, overall_score,
|
|
|
{_REL_SQL} AS rel, {_REPRO_SQL} AS repro
|
|
{_REL_SQL} AS rel, {_REPRO_SQL} AS repro
|
|
|
- FROM {table} WHERE query_id=%s
|
|
|
|
|
|
|
+ FROM {table} WHERE query_id=%s AND {_REAL_POST}
|
|
|
ORDER BY overall_score DESC, id""", (query_id,))
|
|
ORDER BY overall_score DESC, id""", (query_id,))
|
|
|
rows = cur.fetchall()
|
|
rows = cur.fetchall()
|
|
|
cur.execute("SELECT DISTINCT case_id FROM mode_process WHERE query_id=%s", (query_id,))
|
|
cur.execute("SELECT DISTINCT case_id FROM mode_process WHERE query_id=%s", (query_id,))
|
|
@@ -607,11 +654,12 @@ def fetch_all_posts(mode="process", *, query_ids=None, adopted_only=False, disti
|
|
|
- limit/offset:分页(limit=None 不分页)。
|
|
- limit/offset:分页(limit=None 不分页)。
|
|
|
返回 (total, rows):total 为过滤(+去重)后的总条数,rows 为本页切片。"""
|
|
返回 (total, rows):total 为过滤(+去重)后的总条数,rows 为本页切片。"""
|
|
|
table = _search_table(mode)
|
|
table = _search_table(mode)
|
|
|
- where, params = "", []
|
|
|
|
|
|
|
+ # 始终排除占位哨兵行(无搜索的 query 不在帖子视图里出现)
|
|
|
|
|
+ where, params = f" WHERE {_REAL_POST}", []
|
|
|
if query_ids is not None:
|
|
if query_ids is not None:
|
|
|
if not query_ids:
|
|
if not query_ids:
|
|
|
return 0, [] # 显式空列表:直接空结果,不必查库
|
|
return 0, [] # 显式空列表:直接空结果,不必查库
|
|
|
- where = " WHERE query_id IN (" + ",".join(["%s"] * len(query_ids)) + ")"
|
|
|
|
|
|
|
+ where += " AND query_id IN (" + ",".join(["%s"] * len(query_ids)) + ")"
|
|
|
params = list(query_ids)
|
|
params = list(query_ids)
|
|
|
conn = _conn()
|
|
conn = _conn()
|
|
|
try:
|
|
try:
|
|
@@ -660,7 +708,7 @@ def count_executed_queries(mode="process"):
|
|
|
conn = _conn()
|
|
conn = _conn()
|
|
|
try:
|
|
try:
|
|
|
with conn.cursor() as cur:
|
|
with conn.cursor() as cur:
|
|
|
- cur.execute(f"SELECT COUNT(DISTINCT query_id) AS n FROM {table}")
|
|
|
|
|
|
|
+ cur.execute(f"SELECT COUNT(DISTINCT query_id) AS n FROM {table} WHERE {_REAL_POST}")
|
|
|
return cur.fetchone()["n"]
|
|
return cur.fetchone()["n"]
|
|
|
finally:
|
|
finally:
|
|
|
conn.close()
|
|
conn.close()
|
|
@@ -1261,7 +1309,7 @@ def fetch_dashboard_rows():
|
|
|
# 进度分母走「采纳」口径;mode 标方向(工序帖来自 search_process)。
|
|
# 进度分母走「采纳」口径;mode 标方向(工序帖来自 search_process)。
|
|
|
cols = (f"query_id, case_id, platform, overall_score, publish_time, "
|
|
cols = (f"query_id, case_id, platform, overall_score, publish_time, "
|
|
|
f"{_REL_SQL} AS rel, {_REPRO_SQL} AS repro")
|
|
f"{_REL_SQL} AS rel, {_REPRO_SQL} AS repro")
|
|
|
- cur.execute(f"SELECT {cols} FROM search_process")
|
|
|
|
|
|
|
+ cur.execute(f"SELECT {cols} FROM search_process WHERE {_REAL_POST}")
|
|
|
posts = cur.fetchall()
|
|
posts = cur.fetchall()
|
|
|
for p in posts:
|
|
for p in posts:
|
|
|
p["mode"] = "process"
|
|
p["mode"] = "process"
|