|
|
@@ -64,6 +64,7 @@ CREATE TABLE IF NOT EXISTS search_data (
|
|
|
quality_grade VARCHAR(8) NULL,
|
|
|
found_by JSON NULL COMMENT '命中的措辞数组',
|
|
|
knowledge_type JSON NULL COMMENT '["能力","工序","工具"] 子集',
|
|
|
+ mode_type VARCHAR(16) NULL COMMENT '该 query 的解构方向:工序/工具(空=通用)',
|
|
|
overall_score FLOAT NULL COMMENT '(相关均值+质量均值)/2',
|
|
|
llm_evaluation JSON NULL COMMENT '评估全量 blob',
|
|
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
|
@@ -137,6 +138,12 @@ def init_tables():
|
|
|
cur.execute(DDL_SEARCH)
|
|
|
cur.execute(DDL_PROCESS)
|
|
|
cur.execute(DDL_TOOLS)
|
|
|
+ # 迁移:旧表补 mode_type 列(CREATE IF NOT EXISTS 不会改已有表)
|
|
|
+ cur.execute("SHOW COLUMNS FROM search_data LIKE 'mode_type'")
|
|
|
+ if not cur.fetchone():
|
|
|
+ cur.execute("ALTER TABLE search_data ADD COLUMN mode_type VARCHAR(16) NULL "
|
|
|
+ "COMMENT '该 query 的解构方向:工序/工具(空=通用)' AFTER knowledge_type")
|
|
|
+ print("🔧 迁移:search_data 已补 mode_type 列")
|
|
|
print("✅ 建表完成:search_data, mode_process, mode_tools")
|
|
|
finally:
|
|
|
conn.close()
|
|
|
@@ -188,8 +195,9 @@ def overall_score(e):
|
|
|
|
|
|
# ── search_data ──────────────────────────────────────────────────────────────
|
|
|
|
|
|
-def upsert_search_posts(query_id, query_text, results):
|
|
|
- """一组搜索结果写入 search_data(按 (query_id, case_id) upsert)。返回写入条数。"""
|
|
|
+def upsert_search_posts(query_id, query_text, results, mode_type=None):
|
|
|
+ """一组搜索结果写入 search_data(按 (query_id, case_id) upsert)。返回写入条数。
|
|
|
+ mode_type:该 query 的解构方向(工序/工具),None 不覆盖已有值。"""
|
|
|
if not results:
|
|
|
return 0
|
|
|
rows = []
|
|
|
@@ -208,6 +216,7 @@ def upsert_search_posts(query_id, query_text, results):
|
|
|
post.get("_quality_score"), post.get("_quality_grade"),
|
|
|
_j(r.get("found_by_queries") or []),
|
|
|
_j(e.get("知识类型") or []),
|
|
|
+ mode_type,
|
|
|
overall_score(e),
|
|
|
_j(e),
|
|
|
))
|
|
|
@@ -215,8 +224,9 @@ def upsert_search_posts(query_id, query_text, results):
|
|
|
INSERT INTO search_data
|
|
|
(query_id, query_text, case_id, platform, channel_content_id, title, url,
|
|
|
content_type, body, images, videos, like_count, publish_time,
|
|
|
- quality_score, quality_grade, found_by, knowledge_type, overall_score, llm_evaluation)
|
|
|
- VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
+ quality_score, quality_grade, found_by, knowledge_type, mode_type,
|
|
|
+ overall_score, llm_evaluation)
|
|
|
+ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
|
|
|
ON DUPLICATE KEY UPDATE
|
|
|
query_text=VALUES(query_text), platform=VALUES(platform),
|
|
|
channel_content_id=VALUES(channel_content_id), title=VALUES(title), url=VALUES(url),
|
|
|
@@ -224,6 +234,7 @@ def upsert_search_posts(query_id, query_text, results):
|
|
|
videos=VALUES(videos), like_count=VALUES(like_count), publish_time=VALUES(publish_time),
|
|
|
quality_score=VALUES(quality_score), quality_grade=VALUES(quality_grade),
|
|
|
found_by=VALUES(found_by), knowledge_type=VALUES(knowledge_type),
|
|
|
+ mode_type=COALESCE(VALUES(mode_type), mode_type),
|
|
|
overall_score=VALUES(overall_score), llm_evaluation=VALUES(llm_evaluation);
|
|
|
"""
|
|
|
conn = _conn()
|
|
|
@@ -241,7 +252,7 @@ def fetch_queries():
|
|
|
try:
|
|
|
with conn.cursor() as cur:
|
|
|
cur.execute("""SELECT query_id, MAX(query_text) AS query_text,
|
|
|
- COUNT(*) AS post_count
|
|
|
+ MAX(mode_type) AS mode_type, COUNT(*) AS post_count
|
|
|
FROM search_data GROUP BY query_id ORDER BY query_id""")
|
|
|
queries = cur.fetchall()
|
|
|
cur.execute("SELECT query_id, COUNT(DISTINCT case_id) AS n FROM mode_process GROUP BY query_id")
|