|
|
@@ -141,7 +141,9 @@ CREATE TABLE IF NOT EXISTS mode_process (
|
|
|
version VARCHAR(32) NULL COMMENT 'v_MMDDHHMM,保留历史;link_* 为跨 query 复制(cost=0)',
|
|
|
cost_usd DECIMAL(10,6) NULL COMMENT '本次解构调用成本(同版本各行相同,聚合需按 case+version 去重)',
|
|
|
duration_s FLOAT NULL,
|
|
|
+ seq SMALLINT NULL COMMENT '帖内序号(0-based);与 (query_id,case_id,version) 组唯一键防并发/重复写',
|
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
+ UNIQUE KEY uk_q_case_ver_seq (query_id, case_id, version, seq),
|
|
|
KEY idx_case_ver (case_id, version),
|
|
|
KEY idx_qid (query_id)
|
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='工序解构结果(每行一个工序)';
|
|
|
@@ -154,6 +156,7 @@ CREATE TABLE IF NOT EXISTS mode_tools (
|
|
|
case_id VARCHAR(128) NOT NULL,
|
|
|
platform VARCHAR(32) NULL,
|
|
|
post_title VARCHAR(512) NULL,
|
|
|
+ source JSON NULL COMMENT '解构时帖子来源块(tool_extract._row_to_source 产出)',
|
|
|
tool_name VARCHAR(255) NULL,
|
|
|
substance_scope JSON NULL COMMENT '实质作用域(数组)',
|
|
|
form_scope JSON NULL COMMENT '形式作用域(数组或null)',
|
|
|
@@ -169,7 +172,9 @@ CREATE TABLE IF NOT EXISTS mode_tools (
|
|
|
version VARCHAR(32) NULL COMMENT 'v_MMDDHHMM;link_* 为跨 query 复制(cost=0)',
|
|
|
cost_usd DECIMAL(10,6) NULL COMMENT '同 mode_process,聚合按 case+version 去重',
|
|
|
duration_s FLOAT NULL,
|
|
|
+ seq SMALLINT NULL COMMENT '帖内序号(0-based);与 (query_id,case_id,version) 组唯一键防并发/重复写',
|
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
+ UNIQUE KEY uk_q_case_ver_seq (query_id, case_id, version, seq),
|
|
|
KEY idx_case_ver (case_id, version),
|
|
|
KEY idx_qid (query_id),
|
|
|
KEY idx_tool_name (tool_name)
|
|
|
@@ -181,6 +186,8 @@ CREATE TABLE IF NOT EXISTS mode_tools (
|
|
|
# 每条知识 = 某 case 的某个工序(proc_index 1-based)。记录导入时的 mode_process 版本:
|
|
|
# 版本变了(重解构)说明内容已变,应重导;版本不变即视为「已传过」,跳过。
|
|
|
# 选 DB 台账而非本地文件,是为了换机器/换链接后也不会重复写知识库。
|
|
|
+# 注:工具知识用独立的 tools_ingest_log,不与本表混用(case_id 是帖子物理身份,
|
|
|
+# 同帖可能既被工序解构又被工具解构,共表会在 (case_id, index) 上撞键)。
|
|
|
DDL_INGEST_LOG = """
|
|
|
CREATE TABLE IF NOT EXISTS knowledge_ingest_log (
|
|
|
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
|
|
@@ -195,6 +202,43 @@ CREATE TABLE IF NOT EXISTS knowledge_ingest_log (
|
|
|
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='工序知识已导入台账(防重复上传)';
|
|
|
"""
|
|
|
|
|
|
+# 工具知识「已导入知识库」台账:语义同 knowledge_ingest_log,但针对工具方向独立成表
|
|
|
+# (stages/import_tools_knowledge.py 用)。每条知识 = 某 case 的某个工具(tool_index 1-based),
|
|
|
+# 版本记录导入时的 mode_tools 版本;变了(重解构)应重导,不变即「已传过」跳过。
|
|
|
+DDL_TOOLS_INGEST_LOG = """
|
|
|
+CREATE TABLE IF NOT EXISTS tools_ingest_log (
|
|
|
+ id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
|
|
+ case_id VARCHAR(128) NOT NULL,
|
|
|
+ tool_index INT NOT NULL COMMENT '工具序号(1-based),对齐导入脚本枚举',
|
|
|
+ version VARCHAR(32) NULL COMMENT '导入时 mode_tools 版本;变了应重导',
|
|
|
+ knowledge_id VARCHAR(128) NULL COMMENT '接口返回的 knowledge_id',
|
|
|
+ api_url VARCHAR(255) NULL,
|
|
|
+ ingested_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
|
|
+ UNIQUE KEY uk_case_tool (case_id, tool_index)
|
|
|
+) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='工具知识已导入台账(防重复上传)';
|
|
|
+"""
|
|
|
+
|
|
|
+
|
|
|
+def _ensure_column(cur, table, column, column_ddl):
|
|
|
+ """给已存在的表幂等补列:列已存在则跳过(MySQL ADD COLUMN 无 IF NOT EXISTS)。
|
|
|
+ column_ddl 为 ADD COLUMN 后的完整定义,如 \"source JSON NULL ... AFTER post_title\"。"""
|
|
|
+ cur.execute("""SELECT COUNT(*) AS n FROM information_schema.columns
|
|
|
+ WHERE table_schema=DATABASE() AND table_name=%s AND column_name=%s""",
|
|
|
+ (table, column))
|
|
|
+ if cur.fetchone()["n"] == 0:
|
|
|
+ cur.execute(f"ALTER TABLE {table} ADD COLUMN {column_ddl}")
|
|
|
+
|
|
|
+
|
|
|
+def _ensure_unique_index(cur, table, index_name, cols):
|
|
|
+ """幂等加唯一索引:已存在则跳过(MySQL ADD INDEX 无 IF NOT EXISTS)。
|
|
|
+ cols 为列表达式,如 "query_id, case_id, version, seq"。加之前需保证无冲突数据。"""
|
|
|
+ cur.execute("""SELECT COUNT(*) AS n FROM information_schema.statistics
|
|
|
+ WHERE table_schema=DATABASE() AND table_name=%s AND index_name=%s""",
|
|
|
+ (table, index_name))
|
|
|
+ if cur.fetchone()["n"] == 0:
|
|
|
+ cur.execute(f"ALTER TABLE {table} ADD UNIQUE KEY {index_name} ({cols})")
|
|
|
+
|
|
|
|
|
|
def init_tables():
|
|
|
conn = _conn()
|
|
|
@@ -205,11 +249,38 @@ def init_tables():
|
|
|
cur.execute(DDL_PROCESS)
|
|
|
cur.execute(DDL_TOOLS)
|
|
|
cur.execute(DDL_INGEST_LOG)
|
|
|
+ cur.execute(DDL_TOOLS_INGEST_LOG)
|
|
|
# 历史库迁移:version 由 VARCHAR(16) 放宽到 32,容纳 link_v_mopN_* 复制版本。
|
|
|
# MODIFY 幂等(已是 32 则 MySQL 元数据无操作),建表后表必存在,可安全执行。
|
|
|
for t in ("mode_process", "mode_tools"):
|
|
|
cur.execute(f"ALTER TABLE {t} MODIFY COLUMN version VARCHAR(32) NULL")
|
|
|
- print("✅ 建表完成:search_process, search_tools, mode_process, mode_tools, knowledge_ingest_log")
|
|
|
+ # 历史库迁移:给老 mode_tools 补 source 列(MySQL 的 ADD COLUMN 无 IF NOT EXISTS,
|
|
|
+ # 故先查 information_schema 判存在,缺了才 ADD,幂等)。
|
|
|
+ _ensure_column(cur, "mode_tools", "source",
|
|
|
+ "source JSON NULL COMMENT '解构时帖子来源块' AFTER post_title")
|
|
|
+ # 历史库迁移:加 seq(帖内序号)+ (query_id,case_id,version,seq) 唯一键,防并发/重复
|
|
|
+ # 写入产生重复行。顺序必须是 加列 → 回填 → 加唯一键。MySQL 5.7 无窗口函数,seq 在
|
|
|
+ # 应用层按 (query_id,case_id,version) 内 id 升序回填(现有数据该粒度已无重复)。
|
|
|
+ for t in ("mode_process", "mode_tools"):
|
|
|
+ _ensure_column(cur, t, "seq",
|
|
|
+ "seq SMALLINT NULL COMMENT '帖内序号(0-based)' AFTER duration_s")
|
|
|
+ for t in ("mode_process", "mode_tools"):
|
|
|
+ cur.execute(f"""SELECT id, query_id, case_id, version FROM {t}
|
|
|
+ WHERE seq IS NULL ORDER BY query_id, case_id, version, id""")
|
|
|
+ key, n, ups = None, 0, []
|
|
|
+ for r in cur.fetchall():
|
|
|
+ k = (r["query_id"], r["case_id"], r["version"])
|
|
|
+ if k != key:
|
|
|
+ key, n = k, 0
|
|
|
+ ups.append((n, r["id"])); n += 1
|
|
|
+ if ups:
|
|
|
+ cur.executemany(f"UPDATE {t} SET seq=%s WHERE id=%s", ups)
|
|
|
+ print(f" ↳ {t}: 回填 seq {len(ups)} 行")
|
|
|
+ for t in ("mode_process", "mode_tools"):
|
|
|
+ _ensure_unique_index(cur, t, "uk_q_case_ver_seq",
|
|
|
+ "query_id, case_id, version, seq")
|
|
|
+ print("✅ 建表完成:search_process, search_tools, mode_process, mode_tools, "
|
|
|
+ "knowledge_ingest_log, tools_ingest_log")
|
|
|
finally:
|
|
|
conn.close()
|
|
|
|
|
|
@@ -513,12 +584,13 @@ def replace_process(query_id, case_id, platform, post_title, payload,
|
|
|
procedures = payload.get("procedures") or []
|
|
|
conn = _conn()
|
|
|
try:
|
|
|
+ conn.begin() # DELETE+INSERT 原子化:配合 uk_q_case_ver_seq,并发/重复写入不会留下重复行
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute("DELETE FROM mode_process WHERE case_id=%s AND version=%s",
|
|
|
(case_id, version))
|
|
|
if procedures:
|
|
|
rows = []
|
|
|
- for p in procedures:
|
|
|
+ for i, p in enumerate(procedures):
|
|
|
steps = p.get("steps") or []
|
|
|
vias = []
|
|
|
for s in steps:
|
|
|
@@ -531,16 +603,20 @@ def replace_process(query_id, case_id, platform, post_title, payload,
|
|
|
p.get("purpose"), p.get("category"),
|
|
|
_j(p.get("declarations")), _j(p.get("type_registry")),
|
|
|
_j(steps), len(steps), _j(vias),
|
|
|
- model, version, cost_usd, duration_s,
|
|
|
+ model, version, cost_usd, duration_s, i,
|
|
|
))
|
|
|
cur.executemany("""
|
|
|
INSERT INTO mode_process
|
|
|
(query_id, case_id, platform, post_title, source, procedure_id, name,
|
|
|
purpose, category, declarations, type_registry, steps, step_count,
|
|
|
- tools_used, model, version, cost_usd, duration_s)
|
|
|
- VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
+ tools_used, model, version, cost_usd, duration_s, seq)
|
|
|
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
""", rows)
|
|
|
+ conn.commit()
|
|
|
return len(procedures)
|
|
|
+ except Exception:
|
|
|
+ conn.rollback()
|
|
|
+ raise
|
|
|
finally:
|
|
|
conn.close()
|
|
|
|
|
|
@@ -551,7 +627,8 @@ def fetch_process_versions(case_id):
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute("""SELECT version, COUNT(*) AS n, MAX(model) AS model
|
|
|
FROM mode_process WHERE case_id=%s
|
|
|
- GROUP BY version ORDER BY version DESC""", (case_id,))
|
|
|
+ GROUP BY version
|
|
|
+ ORDER BY (LEFT(version,5)='link_') ASC, MAX(id) DESC""", (case_id,))
|
|
|
return cur.fetchall()
|
|
|
finally:
|
|
|
conn.close()
|
|
|
@@ -564,7 +641,7 @@ def fetch_process(case_id, version=None):
|
|
|
with conn.cursor() as cur:
|
|
|
if version is None:
|
|
|
cur.execute("""SELECT version FROM mode_process WHERE case_id=%s
|
|
|
- ORDER BY version DESC, id DESC LIMIT 1""", (case_id,))
|
|
|
+ ORDER BY (LEFT(version,5)='link_') ASC, id DESC LIMIT 1""", (case_id,))
|
|
|
row = cur.fetchone()
|
|
|
if not row:
|
|
|
return None
|
|
|
@@ -597,31 +674,38 @@ def _proc_payload(case_id, version, rows):
|
|
|
# ── mode_tools ───────────────────────────────────────────────────────────────
|
|
|
|
|
|
def replace_tools(query_id, case_id, platform, post_title, tools,
|
|
|
- model, version, cost_usd, duration_s):
|
|
|
- """写入一帖某版本的工具解构结果。语义同 replace_process。返回工具条数。"""
|
|
|
+ model, version, cost_usd, duration_s, source=None):
|
|
|
+ """写入一帖某版本的工具解构结果。语义同 replace_process。返回工具条数。
|
|
|
+ source:帖子来源块(同 mode_process,每行重复存),供知识上传脚本重建 source 用。"""
|
|
|
+ src = _j(source)
|
|
|
conn = _conn()
|
|
|
try:
|
|
|
+ conn.begin() # DELETE+INSERT 原子化:配合 uk_q_case_ver_seq,并发/重复写入不会留下重复行
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute("DELETE FROM mode_tools WHERE case_id=%s AND version=%s",
|
|
|
(case_id, version))
|
|
|
if tools:
|
|
|
rows = [(
|
|
|
- query_id, case_id, platform, (post_title or "")[:500],
|
|
|
+ query_id, case_id, platform, (post_title or "")[:500], src,
|
|
|
(t.get("工具名称") or "")[:250],
|
|
|
_j(t.get("实质作用域")), _j(t.get("形式作用域")),
|
|
|
t.get("创作层级"), t.get("来源链接"), t.get("输入"), t.get("输出"),
|
|
|
_j(t.get("用法")), _j(t.get("案例")), _j(t.get("缺点")),
|
|
|
- t.get("最新更新时间"), model, version, cost_usd, duration_s,
|
|
|
- ) for t in tools]
|
|
|
+ t.get("最新更新时间"), model, version, cost_usd, duration_s, i,
|
|
|
+ ) for i, t in enumerate(tools)]
|
|
|
cur.executemany("""
|
|
|
INSERT INTO mode_tools
|
|
|
- (query_id, case_id, platform, post_title, tool_name, substance_scope,
|
|
|
+ (query_id, case_id, platform, post_title, source, tool_name, substance_scope,
|
|
|
form_scope, creation_layer, source_link, input_desc, output_desc,
|
|
|
usage_json, cases_json, defects_json, updated_time, model, version,
|
|
|
- cost_usd, duration_s)
|
|
|
- VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
+ cost_usd, duration_s, seq)
|
|
|
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
""", rows)
|
|
|
+ conn.commit()
|
|
|
return len(tools)
|
|
|
+ except Exception:
|
|
|
+ conn.rollback()
|
|
|
+ raise
|
|
|
finally:
|
|
|
conn.close()
|
|
|
|
|
|
@@ -632,7 +716,8 @@ def fetch_tools_versions(case_id):
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute("""SELECT version, COUNT(*) AS n, MAX(model) AS model
|
|
|
FROM mode_tools WHERE case_id=%s
|
|
|
- GROUP BY version ORDER BY version DESC""", (case_id,))
|
|
|
+ GROUP BY version
|
|
|
+ ORDER BY (LEFT(version,5)='link_') ASC, MAX(id) DESC""", (case_id,))
|
|
|
return cur.fetchall()
|
|
|
finally:
|
|
|
conn.close()
|
|
|
@@ -645,7 +730,7 @@ def fetch_tools(case_id, version=None):
|
|
|
with conn.cursor() as cur:
|
|
|
if version is None:
|
|
|
cur.execute("""SELECT version FROM mode_tools WHERE case_id=%s
|
|
|
- ORDER BY version DESC, id DESC LIMIT 1""", (case_id,))
|
|
|
+ ORDER BY (LEFT(version,5)='link_') ASC, id DESC LIMIT 1""", (case_id,))
|
|
|
row = cur.fetchone()
|
|
|
if not row:
|
|
|
return None
|
|
|
@@ -673,6 +758,7 @@ def _tools_payload(case_id, version, rows):
|
|
|
"title": rows[0]["post_title"], "model": rows[0]["model"],
|
|
|
"cost_usd": float(rows[0]["cost_usd"]) if rows[0]["cost_usd"] is not None else None,
|
|
|
"duration_s": rows[0]["duration_s"],
|
|
|
+ "source": _loads(rows[0].get("source")),
|
|
|
"tool_count": len(tools), "tools": tools}
|
|
|
|
|
|
|
|
|
@@ -688,7 +774,8 @@ def fetch_extract(mode, case_id, version=None):
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute(f"""SELECT version, COUNT(*) AS n, MAX(model) AS model
|
|
|
FROM {mtable} WHERE case_id=%s
|
|
|
- GROUP BY version ORDER BY version DESC""", (case_id,))
|
|
|
+ GROUP BY version
|
|
|
+ ORDER BY (LEFT(version,5)='link_') ASC, MAX(id) DESC""", (case_id,))
|
|
|
versions = cur.fetchall()
|
|
|
# 详情:把"取最新版本"折进同一条 SQL,版本指定时直接用;省一次往返。
|
|
|
target = version or (versions[0]["version"] if versions else None)
|
|
|
@@ -716,7 +803,7 @@ def latest_real_version(case_id, mode="process"):
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute(f"""SELECT version, query_id FROM {table}
|
|
|
WHERE case_id=%s AND LEFT(version,5) <> 'link_'
|
|
|
- ORDER BY version DESC, id DESC LIMIT 1""", (case_id,))
|
|
|
+ ORDER BY id DESC LIMIT 1""", (case_id,))
|
|
|
return cur.fetchone()
|
|
|
finally:
|
|
|
conn.close()
|
|
|
@@ -732,7 +819,7 @@ def link_process(query_id, case_id, mode="process"):
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute(f"""SELECT version FROM {table}
|
|
|
WHERE case_id=%s AND LEFT(version,5) <> 'link_'
|
|
|
- ORDER BY version DESC, id DESC LIMIT 1""", (case_id,))
|
|
|
+ ORDER BY id DESC LIMIT 1""", (case_id,))
|
|
|
r = cur.fetchone()
|
|
|
if not r:
|
|
|
return 0
|
|
|
@@ -905,6 +992,34 @@ def mark_ingested(case_id, proc_index, version, knowledge_id=None, api_url=None)
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
+def fetch_tools_ingested_map(case_id):
|
|
|
+ """返回 {tool_index: version} —— 该 case 各工具已导入知识库的版本。空表示没传过。
|
|
|
+ 工具方向独立台账(tools_ingest_log),与工序的 knowledge_ingest_log 互不干扰。"""
|
|
|
+ conn = _conn()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cur:
|
|
|
+ cur.execute("SELECT tool_index, version FROM tools_ingest_log WHERE case_id=%s",
|
|
|
+ (case_id,))
|
|
|
+ return {r["tool_index"]: r["version"] for r in cur.fetchall()}
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+
|
|
|
+def mark_tools_ingested(case_id, tool_index, version, knowledge_id=None, api_url=None):
|
|
|
+ """记一条工具「已导入」台账(case_id+tool_index 唯一,重导同序号则更新版本/knowledge_id)。"""
|
|
|
+ conn = _conn()
|
|
|
+ try:
|
|
|
+ with conn.cursor() as cur:
|
|
|
+ cur.execute("""INSERT INTO tools_ingest_log
|
|
|
+ (case_id, tool_index, version, knowledge_id, api_url)
|
|
|
+ VALUES (%s,%s,%s,%s,%s)
|
|
|
+ ON DUPLICATE KEY UPDATE version=VALUES(version),
|
|
|
+ knowledge_id=VALUES(knowledge_id), api_url=VALUES(api_url)""",
|
|
|
+ (case_id, tool_index, version, knowledge_id, api_url))
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+
|
|
|
def fetch_dashboard_rows():
|
|
|
"""拉 Dashboard 计算所需的轻量行。数据量级:百~千行,Python 聚合足够。
|
|
|
优化:① 不传 llm_evaluation 整块,SQL 只取采纳判定要的相关性得分;
|
|
|
@@ -925,15 +1040,17 @@ def fetch_dashboard_rows():
|
|
|
p["mode"] = "tools"
|
|
|
posts += st
|
|
|
# 成本/耗时按全部版本计;steps 仅最新版需要 → 非最新版只回 NULL,省传输。
|
|
|
- cur.execute("""SELECT p.case_id, p.version, p.cost_usd, p.duration_s, p.created_at,
|
|
|
+ cur.execute("""SELECT p.id, p.case_id, p.version, p.cost_usd, p.duration_s, p.created_at,
|
|
|
CASE WHEN p.version = m.maxv THEN p.steps END AS steps
|
|
|
FROM mode_process p
|
|
|
- JOIN (SELECT case_id, MAX(version) AS maxv
|
|
|
- FROM mode_process GROUP BY case_id) m
|
|
|
+ JOIN (SELECT t.case_id, t.version AS maxv FROM mode_process t
|
|
|
+ JOIN (SELECT case_id, MAX(id) AS mid FROM mode_process
|
|
|
+ WHERE LEFT(version,5) <> 'link_' GROUP BY case_id) x
|
|
|
+ ON t.id = x.mid) m
|
|
|
ON p.case_id = m.case_id
|
|
|
ORDER BY p.id""")
|
|
|
procs = cur.fetchall()
|
|
|
- cur.execute("""SELECT case_id, version, tool_name, substance_scope,
|
|
|
+ cur.execute("""SELECT id, case_id, version, tool_name, substance_scope,
|
|
|
form_scope, cost_usd, duration_s, created_at
|
|
|
FROM mode_tools""")
|
|
|
tools = cur.fetchall()
|