|
|
@@ -0,0 +1,997 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+"""搜索评估案例查看 server。
|
|
|
+沿用 图文排版搜索评估.html 的版式(卡片 + dialog 详情 + rubric 评分条),
|
|
|
+数据实时扫描 runs_full/*/form_*.json —— runs_full 下每新增一个 q 文件夹,刷新即出现。
|
|
|
+分页:query → 三种形式(A/B/C) → 三个渠道 三行从上到下。
|
|
|
+
|
|
|
+用法:python server.py [port] 默认 8770,浏览器开 http://0.0.0.0:8770
|
|
|
+"""
|
|
|
+import json, re, glob, sys, pathlib, subprocess, threading
|
|
|
+from datetime import datetime
|
|
|
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
|
|
+from urllib.parse import urlparse, parse_qs
|
|
|
+
|
|
|
+try: # Windows 控制台默认 cp1252,中文 print 会崩,统一切 utf-8
|
|
|
+ sys.stdout.reconfigure(encoding="utf-8")
|
|
|
+except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+HERE = pathlib.Path(__file__).parent
|
|
|
+sys.path.insert(0, str(HERE))
|
|
|
+sys.path.insert(0, str(HERE.parent)) # fixed_query_eval:让兄弟模块 batch_extract_procedures 可 import
|
|
|
+PORT = int(sys.argv[1]) if len(sys.argv) > 1 else 8770
|
|
|
+
|
|
|
+PLAT = {"xhs": "小红书", "gzh": "公众号", "zhihu": "知乎", "x": "X", "bili": "B站", "douyin": "抖音",
|
|
|
+ "sph": "视频号", "youtube": "YouTube", "github": "GitHub", "toutiao": "头条", "weibo": "微博"}
|
|
|
+KT = {"procedure": "工序", "step": "步骤", "tool": "工具"}
|
|
|
+
|
|
|
+# 从 taxonomy 取动作叶子/类型名,用于把 original_q 解析回原始维度(动作×类型 正交)
|
|
|
+# 路径优先级:search_eval/evaluation/(主源,IDE 编辑那份就是 runtime 实际读的)
|
|
|
+# → test_script/evaluation/(历史副本兜底)→ script/evaluation/(更老兜底)
|
|
|
+# 谁也找不到时整目录扫空,server 仍能起。
|
|
|
+EVALDIR = HERE / "evaluation"
|
|
|
+if not EVALDIR.exists():
|
|
|
+ EVALDIR = HERE.parent.parent / "test_script" / "evaluation"
|
|
|
+if not EVALDIR.exists():
|
|
|
+ EVALDIR = HERE.parent / "evaluation"
|
|
|
+try:
|
|
|
+ _jm = json.load(open(EVALDIR / "judged_matrix.json", encoding="utf-8"))
|
|
|
+ ACT_L1 = {a["name"]: a["l1"] for a in _jm["actions"]}
|
|
|
+ ACTION_SET = set(ACT_L1)
|
|
|
+ TYPE_SET = {t["name"] for t in _jm["types"]}
|
|
|
+ ACTIONS_TAX = [{"name": a["name"], "l1": a["l1"], "l2": a.get("l2", "")} for a in _jm["actions"]]
|
|
|
+ TYPES_TAX = [{"name": t["name"], "l1": t["l1"]} for t in _jm["types"]]
|
|
|
+ # taxonomy 顺序沿用 judged_matrix(严格版);矩阵分值改用 type_action_scores(宽松版) —
|
|
|
+ # 两份是同一组 27×50 cell 的独立 gemini judging,前者只 53 格到 tier3,后者 156 格到 score3
|
|
|
+ _tas = json.load(open(EVALDIR / "type_action_scores.json", encoding="utf-8"))["scores"]
|
|
|
+ _MATRIX = []
|
|
|
+ for a in _jm["actions"]:
|
|
|
+ row = []
|
|
|
+ for t in _jm["types"]:
|
|
|
+ rec = _tas.get(t["name"], {}).get(a["name"])
|
|
|
+ row.append({"tier": rec["score"], "r": rec.get("reason", "")} if rec else {})
|
|
|
+ _MATRIX.append(row)
|
|
|
+except Exception:
|
|
|
+ ACT_L1, ACTION_SET, TYPE_SET, ACTIONS_TAX, TYPES_TAX, _MATRIX = {}, set(), set(), [], [], []
|
|
|
+
|
|
|
+ACTIVE_TASKS = {}
|
|
|
+ACTIVE_REEVALS = {}
|
|
|
+
|
|
|
+from batch_extract_procedures import _short_case, _source_to_dsl_input, _write_meta, _composite_score
|
|
|
+
|
|
|
+def run_extraction_task(q, folder_name, src_path, out_dir, engine, model):
|
|
|
+ task_key = f"{q}/{folder_name}"
|
|
|
+ log_path = out_dir / "_extract.log"
|
|
|
+ try:
|
|
|
+ out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+ if engine == "cyber_runner":
|
|
|
+ script_path = HERE / "procedure-dsl" / "run_cyber.py"
|
|
|
+ else:
|
|
|
+ script_path = HERE / "procedure-dsl" / "run_procedure_dsl.py"
|
|
|
+
|
|
|
+ cmd = [
|
|
|
+ sys.executable, "-u", str(script_path),
|
|
|
+ str(src_path),
|
|
|
+ "--out-dir", str(out_dir),
|
|
|
+ "--model", model,
|
|
|
+ "--max-turns", "300"
|
|
|
+ ]
|
|
|
+ if engine != "cyber_runner":
|
|
|
+ cmd.extend(["--max-retries", "3"])
|
|
|
+
|
|
|
+ flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
|
|
|
+ with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
|
|
|
+ proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
|
|
|
+ cwd=str(HERE), creationflags=flags)
|
|
|
+ ACTIVE_TASKS[task_key]["pid"] = proc.pid
|
|
|
+ proc.wait()
|
|
|
+
|
|
|
+ if proc.returncode == 0:
|
|
|
+ try:
|
|
|
+ import build_workflows
|
|
|
+ build_workflows.write_one(q, folder_name, runs_dir=HERE / "runs_full")
|
|
|
+ ACTIVE_TASKS[task_key]["status"] = "success"
|
|
|
+ except Exception as ex:
|
|
|
+ ACTIVE_TASKS[task_key]["status"] = "failed"
|
|
|
+ ACTIVE_TASKS[task_key]["error"] = f"Workflow compilation failed: {ex}"
|
|
|
+ with open(log_path, "a", encoding="utf-8") as f_err:
|
|
|
+ f_err.write(f"\n[server error] Workflow compilation failed: {ex}\n")
|
|
|
+ else:
|
|
|
+ ACTIVE_TASKS[task_key]["status"] = "failed"
|
|
|
+ ACTIVE_TASKS[task_key]["error"] = f"Runner failed with exit code {proc.returncode}"
|
|
|
+ except Exception as e:
|
|
|
+ ACTIVE_TASKS[task_key]["status"] = "failed"
|
|
|
+ ACTIVE_TASKS[task_key]["error"] = str(e)
|
|
|
+ try:
|
|
|
+ with open(log_path, "a", encoding="utf-8") as f_err:
|
|
|
+ f_err.write(f"\n[server error] Extraction failed: {e}\n")
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+
|
|
|
+# ── 工具解构(fixed_query_eval 新增)──────────────────────────────────────────────
|
|
|
+# 单次 LLM 调用(gemini-3.1-flash-lite)即可,比工序解构(多轮 agent)轻得多。
|
|
|
+# 仍走 subprocess(tool_extract.py),让 LLM 重依赖留在子进程,server 本身保持轻量。
|
|
|
+TOOL_TASKS = {} # {q: {"status", "case_ids", "error", "start_time"}}
|
|
|
+
|
|
|
+def run_tool_extraction_task(q, case_ids, force):
|
|
|
+ log_path = HERE / "runs_full" / q / "tools" / "_extract.log"
|
|
|
+ try:
|
|
|
+ log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ cmd = [sys.executable, "-u", str(HERE / "tool_extract.py"),
|
|
|
+ "--q", q, "--case-ids", ",".join(case_ids)]
|
|
|
+ if force:
|
|
|
+ cmd.append("--force")
|
|
|
+ flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
|
|
|
+ with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
|
|
|
+ proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
|
|
|
+ cwd=str(HERE), creationflags=flags)
|
|
|
+ TOOL_TASKS[q]["pid"] = proc.pid
|
|
|
+ proc.wait()
|
|
|
+ TOOL_TASKS[q]["status"] = "success" if proc.returncode == 0 else "failed"
|
|
|
+ if proc.returncode != 0:
|
|
|
+ TOOL_TASKS[q]["error"] = f"tool_extract exit code {proc.returncode}"
|
|
|
+ except Exception as e:
|
|
|
+ TOOL_TASKS[q]["status"] = "failed"
|
|
|
+ TOOL_TASKS[q]["error"] = str(e)
|
|
|
+
|
|
|
+
|
|
|
+MODSET = {"文", "图", "视频", "音频"}
|
|
|
+TOOLQUAL = {"AI": "AI 模型", "软件": "桌面 APP", "电脑端": "桌面 APP", "在线": "云端 Web",
|
|
|
+ "网页版": "云端 Web", "代码": "API·CLI", "命令行": "API·CLI", "插件": "插件扩展"}
|
|
|
+
|
|
|
+def parse_dims(oq):
|
|
|
+ """把组合 query(如 '文 元素生成 提示词 教程')解析回 {动作, 类型, 动作L1, 约束}。"""
|
|
|
+ toks = (oq or "").split()
|
|
|
+ action = next((t for t in toks if t in ACTION_SET), None)
|
|
|
+ type_ = next((t for t in toks if t in TYPE_SET), None)
|
|
|
+ cons = None
|
|
|
+ if toks:
|
|
|
+ t0 = toks[0]
|
|
|
+ if t0 in MODSET:
|
|
|
+ cons = {"kind": "模态", "value": t0}
|
|
|
+ elif t0 in TOOLQUAL:
|
|
|
+ cons = {"kind": "工具类型", "value": TOOLQUAL[t0]}
|
|
|
+ return {"action": action, "type": type_, "action_l1": ACT_L1.get(action, ""), "constraint": cons}
|
|
|
+
|
|
|
+def flat_scores(sc):
|
|
|
+ f = {}
|
|
|
+ for k, v in (sc or {}).items():
|
|
|
+ if isinstance(v, dict):
|
|
|
+ for kk, vv in v.items():
|
|
|
+ try: f[kk] = int(vv)
|
|
|
+ except Exception: pass
|
|
|
+ else:
|
|
|
+ try: f[k] = int(v)
|
|
|
+ except Exception: pass
|
|
|
+ return f
|
|
|
+
|
|
|
+def _recency_hard(date_str):
|
|
|
+ """按 publish_timestamp 头 10 字符(YYYY-MM-DD)算硬时效:半年内=3 / 两年内=2 / 更早=1。
|
|
|
+
|
|
|
+ 取代原 LLM 评的 recency 维度——脚本算更稳,发布时间在帖子抓取时就有,无需 LLM token。
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ d = datetime.strptime((date_str or "")[:10], "%Y-%m-%d")
|
|
|
+ except (ValueError, TypeError):
|
|
|
+ return None
|
|
|
+ days = (datetime.now() - d).days
|
|
|
+ if days <= 180: return 3
|
|
|
+ if days <= 730: return 2
|
|
|
+ return 1
|
|
|
+
|
|
|
+
|
|
|
+def adapt(r, run, form_name=None):
|
|
|
+ p = r.get("post", {}); e = r.get("llm_evaluation", {})
|
|
|
+
|
|
|
+ # 1. 解析 知识类型 (knowledge_type)
|
|
|
+ kt = []
|
|
|
+ kt_raw = e.get("知识类型") or e.get("knowledge_type") or []
|
|
|
+ for k in kt_raw:
|
|
|
+ if k in ("工序", "procedure"): kt.append("procedure")
|
|
|
+ elif k in ("能力", "步骤", "step"): kt.append("step")
|
|
|
+ elif k in ("工具", "tool"): kt.append("tool")
|
|
|
+
|
|
|
+ fs = {}
|
|
|
+ score_reasons = {}
|
|
|
+
|
|
|
+ # 检测是否为 eval_prompt_sample-mod 里的新版 0-10 分数 schema
|
|
|
+ is_mod_schema = "相关性" in e and isinstance(e["相关性"], dict) and ("和内容制作知识相关" in e["相关性"] or "和 query 相关" in e["相关性"])
|
|
|
+
|
|
|
+ if is_mod_schema:
|
|
|
+ # 新版 0-10 分数格式解析
|
|
|
+ # 1. 相关性
|
|
|
+ rel = e.get("相关性") or {}
|
|
|
+ for subkey, item in rel.items():
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ code_key = None
|
|
|
+ if "内容制作" in subkey or "知识" in subkey:
|
|
|
+ code_key = "relevance_production"
|
|
|
+ elif "query" in subkey or "检索" in subkey:
|
|
|
+ code_key = "relevance_query"
|
|
|
+ if code_key and score_val is not None:
|
|
|
+ try:
|
|
|
+ fs[code_key] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons[code_key] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 2. 质量
|
|
|
+ q_block = e.get("质量") or {}
|
|
|
+ fixed = q_block.get("固定维度") or {}
|
|
|
+
|
|
|
+ # 固定维度
|
|
|
+ fixed_keys = {
|
|
|
+ "时效性": "recency",
|
|
|
+ "热度性": "popularity",
|
|
|
+ "评论反馈": "feedback"
|
|
|
+ }
|
|
|
+ for cn, code in fixed_keys.items():
|
|
|
+ item = fixed.get(cn)
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs[code] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons[code] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 用例 (真实感, 表现力)
|
|
|
+ usecase = fixed.get("用例") or {}
|
|
|
+ usecase_keys = {
|
|
|
+ "真实感": "realism",
|
|
|
+ "表现力": "expressiveness"
|
|
|
+ }
|
|
|
+ for cn, code in usecase_keys.items():
|
|
|
+ item = usecase.get(cn)
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs[code] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons[code] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 动态维度
|
|
|
+ dynamic = q_block.get("动态维度") or {}
|
|
|
+
|
|
|
+ # 工序
|
|
|
+ proc = dynamic.get("工序") or {}
|
|
|
+ if proc:
|
|
|
+ item = proc.get("流程完整性")
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs["procedure_completeness"] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons["procedure_completeness"] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ field = proc.get("字段完整性") or {}
|
|
|
+ field_keys = {
|
|
|
+ "输入完整性": "procedure_input",
|
|
|
+ "实现完整性": "procedure_implementation",
|
|
|
+ "输出完整性": "procedure_output"
|
|
|
+ }
|
|
|
+ for cn, code in field_keys.items():
|
|
|
+ item = field.get(cn)
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs[code] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons[code] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ item = proc.get("泛化性")
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs["procedure_generality"] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons["procedure_generality"] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 能力
|
|
|
+ cap = dynamic.get("能力") or dynamic.get("步骤") or {}
|
|
|
+ if cap:
|
|
|
+ field = cap.get("字段完整性") or {}
|
|
|
+ field_keys = {
|
|
|
+ "输入完整性": "step_input",
|
|
|
+ "实现完整性": "step_implementation",
|
|
|
+ "输出完整性": "step_output"
|
|
|
+ }
|
|
|
+ for cn, code in field_keys.items():
|
|
|
+ item = field.get(cn)
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs[code] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons[code] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ item = cap.get("泛化性")
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs["step_generality"] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons["step_generality"] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 工具
|
|
|
+ tool = dynamic.get("工具") or {}
|
|
|
+ if tool:
|
|
|
+ tool_keys = {
|
|
|
+ "能力边界覆盖": "tool_boundary",
|
|
|
+ "有效比较": "tool_comparison",
|
|
|
+ "参数/接口具体性": "tool_specificity",
|
|
|
+ "实操示例": "tool_example",
|
|
|
+ "版本&限制": "tool_limits"
|
|
|
+ }
|
|
|
+ for cn, code in tool_keys.items():
|
|
|
+ item = tool.get(cn)
|
|
|
+ if isinstance(item, dict):
|
|
|
+ score_val = item.get("得分")
|
|
|
+ reason_val = item.get("理由")
|
|
|
+ if score_val is not None:
|
|
|
+ try:
|
|
|
+ fs[code] = float(score_val)
|
|
|
+ if reason_val:
|
|
|
+ score_reasons[code] = reason_val
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ else:
|
|
|
+ # 兼容老版 1-5 分数 schema (带 "评分" 或 old-style flatness)
|
|
|
+ is_new_schema = "评分" in e or "知识类型" in e or "制作相关性" in e
|
|
|
+ CN_TO_EN = {
|
|
|
+ "相关性": "relevance",
|
|
|
+ "成品质量": "result_quality",
|
|
|
+ "可信度": "credibility",
|
|
|
+ "具体用例": "concrete_use_case",
|
|
|
+ "完整性": "completeness",
|
|
|
+ "步骤结构": "step_structure",
|
|
|
+ "步骤可复现": "step_reproducibility",
|
|
|
+ "步骤可复现性": "step_reproducibility",
|
|
|
+ "能力定义": "capability_definition",
|
|
|
+ "实现深度": "implementation_depth",
|
|
|
+ "边界失败": "boundary_failure_eval",
|
|
|
+ "通用性": "generality",
|
|
|
+ "能力覆盖": "capability_coverage",
|
|
|
+ "有效对比": "effective_comparison",
|
|
|
+ "参数具体": "param_specificity",
|
|
|
+ "实操示例": "worked_example",
|
|
|
+ "实操用例": "worked_example",
|
|
|
+ "示例完整": "worked_example",
|
|
|
+ "版本限制": "version_limits",
|
|
|
+ "版本说明": "version_limits",
|
|
|
+ "限制说明": "version_limits",
|
|
|
+ }
|
|
|
+
|
|
|
+ if is_new_schema:
|
|
|
+ pf = e.get("评分") or {}
|
|
|
+ for cat, metrics in pf.items():
|
|
|
+ if isinstance(metrics, dict):
|
|
|
+ for metric, val in metrics.items():
|
|
|
+ en_key = CN_TO_EN.get(metric, metric)
|
|
|
+ if isinstance(val, dict) and "得分" in val:
|
|
|
+ try: fs[en_key] = int(val["得分"])
|
|
|
+ except Exception: pass
|
|
|
+ elif isinstance(val, (int, float)):
|
|
|
+ fs[en_key] = int(val)
|
|
|
+
|
|
|
+ if isinstance(val, dict) and "理由" in val:
|
|
|
+ score_reasons[en_key] = val["理由"]
|
|
|
+ else:
|
|
|
+ fs = flat_scores(e.get("scores", {}))
|
|
|
+
|
|
|
+ # 计算均分 (overall)
|
|
|
+ if is_mod_schema:
|
|
|
+ rel_keys = {"relevance_production", "relevance_query"}
|
|
|
+ rel_vals = [v for k, v in fs.items() if k in rel_keys]
|
|
|
+ qual_vals = [v for k, v in fs.items() if k not in rel_keys]
|
|
|
+
|
|
|
+ rel_avg = sum(rel_vals) / len(rel_vals) if rel_vals else None
|
|
|
+ qual_avg = sum(qual_vals) / len(qual_vals) if qual_vals else None
|
|
|
+
|
|
|
+ if rel_avg is not None and qual_avg is not None:
|
|
|
+ overall = round((rel_avg + qual_avg) / 2, 1)
|
|
|
+ elif rel_avg is not None:
|
|
|
+ overall = round(rel_avg, 1)
|
|
|
+ elif qual_avg is not None:
|
|
|
+ overall = round(qual_avg, 1)
|
|
|
+ else:
|
|
|
+ overall = 0.0
|
|
|
+ else:
|
|
|
+ overall = round(sum(fs.values()) / len(fs), 1) if fs else 0
|
|
|
+ anomaly = bool(e.get("error")) or not fs
|
|
|
+ grade = p.get("_quality_grade", "")
|
|
|
+ fb = r.get("found_by_queries", [])
|
|
|
+
|
|
|
+ # 4. 解析 制作相关性 (production_relevance)
|
|
|
+ if is_mod_schema:
|
|
|
+ # 新版使用 "相关性" 中的 "和内容制作知识相关" 代表制作相关性
|
|
|
+ production_relevance = fs.get("relevance_production")
|
|
|
+ else:
|
|
|
+ if is_new_schema:
|
|
|
+ pr_block = e.get("制作相关性") or {}
|
|
|
+ pr_raw = pr_block.get("得分") if isinstance(pr_block, dict) else pr_block
|
|
|
+ if isinstance(pr_block, dict) and "理由" in pr_block:
|
|
|
+ score_reasons["production_relevance"] = pr_block["理由"]
|
|
|
+ else:
|
|
|
+ pr_raw = e.get("production_relevance")
|
|
|
+
|
|
|
+ try: production_relevance = int(float(pr_raw)) if pr_raw is not None else None
|
|
|
+ except (TypeError, ValueError): production_relevance = None
|
|
|
+
|
|
|
+ recency_hard = _recency_hard(p.get("publish_timestamp", ""))
|
|
|
+
|
|
|
+ # 5. 解析 判定决策 (decision) 和 理由 (reason)
|
|
|
+ reason = e.get("判定理由") or e.get("reason") or ""
|
|
|
+
|
|
|
+ # 根据过滤指标决定是否保留 (过滤指标判定逻辑优先,不依赖文字匹配)
|
|
|
+ is_discard = False
|
|
|
+
|
|
|
+ # 制作相关性低于阈值则丢弃 (新版 0-10 满分,因此低于 4 丢弃;老版低于 2 丢弃)
|
|
|
+ if production_relevance is not None:
|
|
|
+ threshold = 4 if is_mod_schema else 2
|
|
|
+ if production_relevance < threshold:
|
|
|
+ is_discard = True
|
|
|
+
|
|
|
+ # 时效性低于 2 被丢弃(发布时间超两年的老帖)
|
|
|
+ if recency_hard is not None and recency_hard < 2:
|
|
|
+ is_discard = True
|
|
|
+
|
|
|
+ # 综合均分低于阈值被丢弃 (新版低于 6 丢弃;老版低于 3 丢弃)
|
|
|
+ if overall is not None:
|
|
|
+ threshold_ov = 6 if is_mod_schema else 3
|
|
|
+ if overall < threshold_ov:
|
|
|
+ is_discard = True
|
|
|
+
|
|
|
+ decision = "discard" if is_discard else "report"
|
|
|
+
|
|
|
+ # Find matching procedure html
|
|
|
+ procedure_html = None
|
|
|
+ case_id = r.get("case_id", "")
|
|
|
+ title = p.get("title", "")
|
|
|
+ run_dir = HERE / "runs_full" / run
|
|
|
+
|
|
|
+ if run_dir.is_dir():
|
|
|
+ # 1. 优先扫描该帖子对应的文件夹下的任何 HTML 文件 (不限名称)
|
|
|
+ # 文件夹名格式: {form}_{platform}_{channel_content_id[:8]}
|
|
|
+ content_id = r.get("channel_content_id") or ""
|
|
|
+ if not content_id and case_id and "_" in case_id:
|
|
|
+ content_id = case_id.split("_", 1)[1]
|
|
|
+ plat_key = r.get("platform") or ""
|
|
|
+
|
|
|
+ if form_name and plat_key and content_id:
|
|
|
+ folder_name = f"{form_name}_{plat_key}_{content_id[:8]}"
|
|
|
+ case_dir = run_dir / "procedures" / folder_name
|
|
|
+ if case_dir.is_dir():
|
|
|
+ html_files = list(case_dir.glob("*.html"))
|
|
|
+ if html_files:
|
|
|
+ procedure_html = f"runs_full/{run}/procedures/{folder_name}/{html_files[0].name}"
|
|
|
+
|
|
|
+ # 2. 其次匹配标准文件名: case-{case_id}.html 或 {case_id}.html
|
|
|
+ candidate_dirs = [run_dir, run_dir / "procedures"]
|
|
|
+ if not procedure_html and case_id:
|
|
|
+ named_files = [f"case-{case_id}.html", f"{case_id}.html"]
|
|
|
+ for d_dir in candidate_dirs:
|
|
|
+ if d_dir.is_dir():
|
|
|
+ for name in named_files:
|
|
|
+ if (d_dir / name).is_file():
|
|
|
+ procedure_html = f"runs_full/{run}/procedures/{name}" if d_dir.name == "procedures" else f"runs_full/{run}/{name}"
|
|
|
+ break
|
|
|
+ if procedure_html:
|
|
|
+ break
|
|
|
+
|
|
|
+ # 3. 再次匹配 HTML 内部的标准声明 (meta 标签或 HTML 注释)
|
|
|
+ if not procedure_html and case_id:
|
|
|
+ for d_dir in candidate_dirs:
|
|
|
+ if d_dir.is_dir():
|
|
|
+ for html_path in d_dir.glob("*.html"):
|
|
|
+ try:
|
|
|
+ content = html_path.read_text(encoding="utf-8")
|
|
|
+ if f'name="case-id" content="{case_id}"' in content or \
|
|
|
+ f'name="case_id" content="{case_id}"' in content or \
|
|
|
+ f'<!-- case_id: {case_id} -->' in content or \
|
|
|
+ f'<!-- case-id: {case_id} -->' in content:
|
|
|
+ procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
|
|
|
+ break
|
|
|
+ except Exception:
|
|
|
+ continue
|
|
|
+ if procedure_html:
|
|
|
+ break
|
|
|
+
|
|
|
+ # 4. 最后使用标题作为兜底模糊匹配
|
|
|
+ if not procedure_html and title:
|
|
|
+ for d_dir in candidate_dirs:
|
|
|
+ if d_dir.is_dir():
|
|
|
+ for html_path in d_dir.glob("*.html"):
|
|
|
+ try:
|
|
|
+ content = html_path.read_text(encoding="utf-8")
|
|
|
+ if title in content:
|
|
|
+ procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
|
|
|
+ break
|
|
|
+ except Exception:
|
|
|
+ continue
|
|
|
+ if procedure_html:
|
|
|
+ break
|
|
|
+
|
|
|
+ return {
|
|
|
+ "case_id": r.get("case_id", ""),
|
|
|
+ "platform": PLAT.get(r.get("platform"), r.get("platform")), "platformKey": r.get("platform"),
|
|
|
+ "title": p.get("title", "") or "(无标题)", "date": (p.get("publish_timestamp", "") or "")[:10],
|
|
|
+ "url": r.get("source_url", ""), "engagement": f'{p.get("like_count", 0)} 赞',
|
|
|
+ "knowledge_type": kt, "decision": decision,
|
|
|
+ "tools": [KT.get(k, k) for k in kt] + ([f"质量 {grade}"] if grade else []), "found_by": fb,
|
|
|
+ "images": (p.get("images") or [])[:6], "text": p.get("body_text", "") or "",
|
|
|
+ "scores": fs, "overall": overall, "reason": reason, "score_reasons": score_reasons,
|
|
|
+ "grade": grade, "qscore": p.get("_quality_score", 0), "anomaly": anomaly,
|
|
|
+ "production_relevance": production_relevance, "recency_hard": recency_hard,
|
|
|
+ "run": run, "procedure_html": procedure_html,
|
|
|
+ }
|
|
|
+
|
|
|
+def scan_runs():
|
|
|
+ runs = {}
|
|
|
+ for f in sorted(glob.glob(str(HERE / "runs_full" / "*" / "form_*.json"))):
|
|
|
+ try:
|
|
|
+ d = json.load(open(f, encoding="utf-8"))
|
|
|
+ except Exception:
|
|
|
+ continue
|
|
|
+ run = pathlib.Path(f).parent.name
|
|
|
+ form_name = d.get("form") or ""
|
|
|
+ results = [adapt(r, run, form_name) for r in d.get("results", [])]
|
|
|
+ report_val = sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly"))
|
|
|
+ discard_val = sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly"))
|
|
|
+
|
|
|
+ runs.setdefault(run, []).append({
|
|
|
+ "form": d.get("form"), "query": d.get("query"), "original_q": d.get("original_q", ""),
|
|
|
+ "requirement": d.get("requirement", ""),
|
|
|
+ "platforms": d.get("platforms", []), "total": d.get("total"),
|
|
|
+ "report": report_val, "discard": discard_val,
|
|
|
+ "results": results,
|
|
|
+ })
|
|
|
+
|
|
|
+ # 数据库回退:仅当本地 runs_full 完全为空时才读库(本地清空后界面仍有数据)。
|
|
|
+ # 本地有任何数据就信本地,避免每次 /api/data 都查远程 DB 拖慢。
|
|
|
+ try:
|
|
|
+ import db
|
|
|
+ for q, g in (db.fetch_posts_grouped().items() if not runs else []):
|
|
|
+ if q in runs:
|
|
|
+ continue
|
|
|
+ results = [adapt(r, q, "A") for r in g["results"]]
|
|
|
+ runs[q] = [{
|
|
|
+ "form": "A", "query": g.get("query_text"), "original_q": g.get("query_text") or "",
|
|
|
+ "requirement": "", "platforms": [], "total": len(results),
|
|
|
+ "report": sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly")),
|
|
|
+ "discard": sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly")),
|
|
|
+ "results": results, "_from_db": True,
|
|
|
+ }]
|
|
|
+ except Exception as e:
|
|
|
+ print(f"⚠️ DB 回退读取失败(仅用本地数据):{e}")
|
|
|
+
|
|
|
+ for v in runs.values():
|
|
|
+ v.sort(key=lambda x: x.get("form") or "")
|
|
|
+
|
|
|
+ def _qnum(name): # "q156" → 156,按数字排,避免 "q156" < "q99" 的字符串误排
|
|
|
+ m = re.search(r"\d+", name)
|
|
|
+ return (int(m.group()) if m else 0, name)
|
|
|
+ out = []
|
|
|
+ for k, v in sorted(runs.items(), key=lambda kv: _qnum(kv[0])):
|
|
|
+ oq = v[0].get("original_q") or v[0].get("query") or ""
|
|
|
+ seen, hits = set(), 0 # 知识命中数 = 各形式采纳(report)且非异常、按 url 去重后的帖子数
|
|
|
+ for f in v:
|
|
|
+ for r in f.get("results", []):
|
|
|
+ if r.get("decision") == "report" and not r.get("anomaly") and r.get("url") not in seen:
|
|
|
+ seen.add(r.get("url")); hits += 1
|
|
|
+ out.append({"key": k, "forms": v, "dims": parse_dims(oq), "original_q": oq,
|
|
|
+ "hits": hits, "tot": sum((f.get("total") or 0) for f in v)})
|
|
|
+ active_reevals = {k: v["status"] for k, v in ACTIVE_REEVALS.items()}
|
|
|
+ return {"queries": out, "actions": ACTIONS_TAX, "types": TYPES_TAX, "matrix": _MATRIX, "active_reevals": active_reevals}
|
|
|
+
|
|
|
+class H(BaseHTTPRequestHandler):
|
|
|
+ def _send(self, code, body, ctype):
|
|
|
+ b = body.encode("utf-8") if isinstance(body, str) else body
|
|
|
+ self.send_response(code)
|
|
|
+ if ctype.startswith("text/") or ctype == "application/json" or ctype == "application/javascript":
|
|
|
+ self.send_header("Content-Type", ctype + "; charset=utf-8")
|
|
|
+ else:
|
|
|
+ self.send_header("Content-Type", ctype)
|
|
|
+ self.send_header("Content-Length", str(len(b))); self.end_headers(); self.wfile.write(b)
|
|
|
+ def do_GET(self):
|
|
|
+ parsed = urlparse(self.path)
|
|
|
+ path = parsed.path
|
|
|
+ params = parse_qs(parsed.query)
|
|
|
+
|
|
|
+ if path in ("/", "/index.html"):
|
|
|
+ try:
|
|
|
+ page = (HERE / "index.html").read_text(encoding="utf-8")
|
|
|
+ self._send(200, page, "text/html")
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, f"Error reading index.html: {e}", "text/plain")
|
|
|
+ elif path == "/api/data":
|
|
|
+ self._send(200, json.dumps(scan_runs(), ensure_ascii=False), "application/json")
|
|
|
+ elif path == "/api/tools_status":
|
|
|
+ # 工具解构状态:done=结果文件已存在;running=该 q 解构任务仍在跑
|
|
|
+ q = (params.get("q") or [""])[0].strip()
|
|
|
+ case_id = (params.get("case_id") or [""])[0].strip()
|
|
|
+ if not q or not case_id:
|
|
|
+ self._send(400, "missing q or case_id", "text/plain"); return
|
|
|
+ done = (HERE / "runs_full" / q / "tools" / f"{case_id}.json").is_file()
|
|
|
+ if not done: # 本地无 → 看库里有没有(本地清空后仍算已解构)
|
|
|
+ try:
|
|
|
+ import db
|
|
|
+ done = db.has_tools(q, case_id)
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+ task = TOOL_TASKS.get(q) or {}
|
|
|
+ running = task.get("status") == "running" and case_id in (task.get("case_ids") or [])
|
|
|
+ self._send(200, json.dumps({
|
|
|
+ "done": done, "running": running, "error": task.get("error"),
|
|
|
+ }, ensure_ascii=False), "application/json")
|
|
|
+ elif path == "/api/tools_data":
|
|
|
+ # 取某帖的工具解构结果
|
|
|
+ q = (params.get("q") or [""])[0].strip()
|
|
|
+ case_id = (params.get("case_id") or [""])[0].strip()
|
|
|
+ if not q or not case_id:
|
|
|
+ self._send(400, "missing q or case_id", "text/plain"); return
|
|
|
+ f = HERE / "runs_full" / q / "tools" / f"{case_id}.json"
|
|
|
+ if not f.is_file():
|
|
|
+ # 本地无 → 回退读库重建
|
|
|
+ try:
|
|
|
+ import db
|
|
|
+ dbdata = db.fetch_tools(q, case_id)
|
|
|
+ except Exception:
|
|
|
+ dbdata = None
|
|
|
+ if dbdata:
|
|
|
+ dbdata["exists"] = True
|
|
|
+ self._send(200, json.dumps(dbdata, ensure_ascii=False), "application/json"); return
|
|
|
+ self._send(200, json.dumps({"exists": False}, ensure_ascii=False), "application/json"); return
|
|
|
+ try:
|
|
|
+ data = json.loads(f.read_text(encoding="utf-8"))
|
|
|
+ data["exists"] = True
|
|
|
+ self._send(200, json.dumps(data, ensure_ascii=False), "application/json")
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, json.dumps({"error": f"read failed: {e}"}, ensure_ascii=False), "application/json")
|
|
|
+ elif path == "/api/procedure_status":
|
|
|
+ q = (params.get("q") or [""])[0].strip()
|
|
|
+ form = (params.get("form") or [""])[0].strip()
|
|
|
+ case_id = (params.get("case_id") or [""])[0].strip()
|
|
|
+
|
|
|
+ if not q or not form or not case_id:
|
|
|
+ self._send(400, "missing q, form, or case_id", "text/plain")
|
|
|
+ return
|
|
|
+
|
|
|
+ folder_name = f"{form}_{_short_case(case_id)}"
|
|
|
+ task_key = f"{q}/{folder_name}"
|
|
|
+
|
|
|
+ if task_key in ACTIVE_TASKS:
|
|
|
+ task = ACTIVE_TASKS[task_key]
|
|
|
+ res = {
|
|
|
+ "status": task["status"],
|
|
|
+ "error": task["error"]
|
|
|
+ }
|
|
|
+ if task["status"] == "success":
|
|
|
+ out_dir = HERE / "runs_full" / q / "procedures" / folder_name
|
|
|
+ html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
|
|
|
+ if html_files:
|
|
|
+ res["procedure_html"] = f"runs_full/{q}/procedures/{folder_name}/{html_files[0].name}"
|
|
|
+ self._send(200, json.dumps(res, ensure_ascii=False), "application/json")
|
|
|
+ return
|
|
|
+
|
|
|
+ out_dir = HERE / "runs_full" / q / "procedures" / folder_name
|
|
|
+ html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
|
|
|
+ if html_files:
|
|
|
+ self._send(200, json.dumps({
|
|
|
+ "status": "success",
|
|
|
+ "procedure_html": f"runs_full/{q}/procedures/{folder_name}/{html_files[0].name}"
|
|
|
+ }, ensure_ascii=False), "application/json")
|
|
|
+ return
|
|
|
+
|
|
|
+ log_path = out_dir / "_extract.log"
|
|
|
+ if log_path.is_file():
|
|
|
+ self._send(200, json.dumps({"status": "failed", "error": "Not running, but no HTML output found (possibly crashed)."}, ensure_ascii=False), "application/json")
|
|
|
+ return
|
|
|
+
|
|
|
+ self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
|
|
|
+ elif path == "/api/procedure_log":
|
|
|
+ q = (params.get("q") or [""])[0].strip()
|
|
|
+ form = (params.get("form") or [""])[0].strip()
|
|
|
+ case_id = (params.get("case_id") or [""])[0].strip()
|
|
|
+
|
|
|
+ if not q or not form or not case_id:
|
|
|
+ self._send(400, "missing q, form, or case_id", "text/plain")
|
|
|
+ return
|
|
|
+
|
|
|
+ folder_name = f"{form}_{_short_case(case_id)}"
|
|
|
+ log_path = HERE / "runs_full" / q / "procedures" / folder_name / "_extract.log"
|
|
|
+
|
|
|
+ if not log_path.is_file():
|
|
|
+ self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
|
|
|
+ return
|
|
|
+
|
|
|
+ try:
|
|
|
+ content = log_path.read_text(encoding="utf-8", errors="replace")
|
|
|
+ self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
|
|
|
+ elif path == "/api/spec_content":
|
|
|
+ file_name = (params.get("file") or [""])[0].strip()
|
|
|
+ allowed = [
|
|
|
+ "README.md",
|
|
|
+ "tools.md",
|
|
|
+ "extraction/phase1-skeleton.md",
|
|
|
+ "extraction/phase2-normalize.md",
|
|
|
+ "extraction/phase3-finalize.md",
|
|
|
+ "taxonomy/type_suggestions.md"
|
|
|
+ ]
|
|
|
+ if file_name not in allowed:
|
|
|
+ self._send(400, "invalid file parameter", "text/plain")
|
|
|
+ return
|
|
|
+ target_path = HERE / "procedure-dsl" / "spec" / file_name
|
|
|
+ if not target_path.is_file():
|
|
|
+ self._send(404, "spec file not found", "text/plain")
|
|
|
+ return
|
|
|
+ try:
|
|
|
+ content = target_path.read_text(encoding="utf-8", errors="replace")
|
|
|
+ self._send(200, json.dumps({"content": content}, ensure_ascii=False), "application/json")
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
|
|
|
+ elif path == "/api/reeval_status":
|
|
|
+ q = (params.get("q") or [""])[0].strip()
|
|
|
+ if not q:
|
|
|
+ self._send(400, "missing q", "text/plain")
|
|
|
+ return
|
|
|
+ if q in ACTIVE_REEVALS:
|
|
|
+ self._send(200, json.dumps({
|
|
|
+ "status": ACTIVE_REEVALS[q]["status"],
|
|
|
+ "error": ACTIVE_REEVALS[q].get("error")
|
|
|
+ }, ensure_ascii=False), "application/json")
|
|
|
+ else:
|
|
|
+ self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
|
|
|
+ elif self.path.startswith("/runs_full/"):
|
|
|
+ try:
|
|
|
+ clean_path = self.path.split("?")[0]
|
|
|
+ parts = clean_path.strip("/").split("/")
|
|
|
+ target_file = HERE
|
|
|
+ for part in parts:
|
|
|
+ target_file = target_file / part
|
|
|
+ runs_dir = HERE / "runs_full"
|
|
|
+ if runs_dir.resolve() in target_file.resolve().parents and target_file.is_file():
|
|
|
+ content = target_file.read_bytes()
|
|
|
+ ext = target_file.suffix.lower()
|
|
|
+ ctype = "text/html"
|
|
|
+ if ext in (".png", ".webp"):
|
|
|
+ ctype = f"image/{ext[1:]}"
|
|
|
+ elif ext in (".jpg", ".jpeg"):
|
|
|
+ ctype = "image/jpeg"
|
|
|
+ elif ext == ".json":
|
|
|
+ ctype = "application/json"
|
|
|
+ elif ext == ".js":
|
|
|
+ ctype = "application/javascript"
|
|
|
+ elif ext == ".css":
|
|
|
+ ctype = "text/css"
|
|
|
+ self._send(200, content, ctype)
|
|
|
+ else:
|
|
|
+ self._send(404, "not found", "text/plain")
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, f"Error: {e}", "text/plain")
|
|
|
+ else:
|
|
|
+ self._send(404, "not found", "text/plain")
|
|
|
+ def do_POST(self):
|
|
|
+ if self.path == "/api/generate_procedure":
|
|
|
+ length = int(self.headers.get("Content-Length") or 0)
|
|
|
+ raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
|
|
|
+ try:
|
|
|
+ payload = json.loads(raw)
|
|
|
+ except Exception as e:
|
|
|
+ self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
|
|
|
+
|
|
|
+ q = (payload.get("q") or "").strip()
|
|
|
+ form = (payload.get("form") or "").strip()
|
|
|
+ case_id = (payload.get("case_id") or "").strip()
|
|
|
+ engine = (payload.get("engine") or "cyber_runner").strip()
|
|
|
+ model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
|
|
|
+
|
|
|
+ if not re.match(r"^q\d+$", q):
|
|
|
+ self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"}, ensure_ascii=False), "application/json"); return
|
|
|
+ if form not in ("A", "B", "C"):
|
|
|
+ self._send(400, json.dumps({"error": f"bad form: {form!r}"}, ensure_ascii=False), "application/json"); return
|
|
|
+ if not case_id:
|
|
|
+ self._send(400, json.dumps({"error": "missing case_id"}, ensure_ascii=False), "application/json"); return
|
|
|
+
|
|
|
+ q_dir = HERE / "runs_full" / q
|
|
|
+ form_file = q_dir / f"form_{form}.json"
|
|
|
+ if not form_file.is_file():
|
|
|
+ self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
|
|
|
+
|
|
|
+ try:
|
|
|
+ with open(form_file, encoding="utf-8") as f:
|
|
|
+ form_data = json.load(f)
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
|
|
|
+
|
|
|
+ matching_result = None
|
|
|
+ for r in form_data.get("results", []):
|
|
|
+ if r.get("case_id") == case_id:
|
|
|
+ matching_result = r
|
|
|
+ break
|
|
|
+
|
|
|
+ if not matching_result:
|
|
|
+ self._send(404, json.dumps({"error": f"case_id {case_id} not found in form {form}"}, ensure_ascii=False), "application/json"); return
|
|
|
+
|
|
|
+ folder_name = f"{form}_{_short_case(case_id)}"
|
|
|
+ out_dir = q_dir / "procedures" / folder_name
|
|
|
+ out_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+
|
|
|
+ src_path = out_dir / "_source.json"
|
|
|
+ try:
|
|
|
+ with open(src_path, "w", encoding="utf-8") as f:
|
|
|
+ json.dump(_source_to_dsl_input(matching_result), f, ensure_ascii=False, indent=2)
|
|
|
+
|
|
|
+ score = _composite_score(matching_result.get("llm_evaluation") or {})
|
|
|
+ _write_meta(out_dir, case_id=case_id, from_q=q, form=form, score=score)
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, json.dumps({"error": f"failed to write inputs: {e}"}, ensure_ascii=False), "application/json"); return
|
|
|
+
|
|
|
+ task_key = f"{q}/{folder_name}"
|
|
|
+ ACTIVE_TASKS[task_key] = {
|
|
|
+ "status": "running",
|
|
|
+ "start_time": datetime.now().isoformat(),
|
|
|
+ "pid": None,
|
|
|
+ "error": None
|
|
|
+ }
|
|
|
+
|
|
|
+ t = threading.Thread(target=run_extraction_task, args=(q, folder_name, src_path, out_dir, engine, model))
|
|
|
+ t.daemon = True
|
|
|
+ t.start()
|
|
|
+
|
|
|
+ self._send(200, json.dumps({
|
|
|
+ "status": "started",
|
|
|
+ "task_key": task_key,
|
|
|
+ "log": f"runs_full/{q}/procedures/{folder_name}/_extract.log"
|
|
|
+ }, ensure_ascii=False), "application/json")
|
|
|
+ elif self.path == "/api/extract_tools":
|
|
|
+ # 工具解构:body {q, case_ids:[...], force?} → 起 tool_extract.py 子进程
|
|
|
+ length = int(self.headers.get("Content-Length") or 0)
|
|
|
+ raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
|
|
|
+ try:
|
|
|
+ payload = json.loads(raw)
|
|
|
+ except Exception as e:
|
|
|
+ self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
|
|
|
+ q = (payload.get("q") or "").strip()
|
|
|
+ case_ids = payload.get("case_ids") or []
|
|
|
+ force = bool(payload.get("force"))
|
|
|
+ if not re.match(r"^q\d+$", q):
|
|
|
+ self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"}, ensure_ascii=False), "application/json"); return
|
|
|
+ if not isinstance(case_ids, list) or not case_ids:
|
|
|
+ self._send(400, json.dumps({"error": "case_ids must be a non-empty list"}, ensure_ascii=False), "application/json"); return
|
|
|
+ if not (HERE / "runs_full" / q / "form_A.json").is_file():
|
|
|
+ self._send(404, json.dumps({"error": f"runs_full/{q}/form_A.json not found"}, ensure_ascii=False), "application/json"); return
|
|
|
+ TOOL_TASKS[q] = {
|
|
|
+ "status": "running", "case_ids": case_ids,
|
|
|
+ "start_time": datetime.now().isoformat(), "pid": None, "error": None,
|
|
|
+ }
|
|
|
+ t = threading.Thread(target=run_tool_extraction_task, args=(q, case_ids, force))
|
|
|
+ t.daemon = True
|
|
|
+ t.start()
|
|
|
+ self._send(200, json.dumps({
|
|
|
+ "status": "started", "q": q, "count": len(case_ids),
|
|
|
+ "log": f"runs_full/{q}/tools/_extract.log",
|
|
|
+ }, ensure_ascii=False), "application/json")
|
|
|
+ elif self.path == "/api/reeval":
|
|
|
+ length = int(self.headers.get("Content-Length") or 0)
|
|
|
+ raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
|
|
|
+ try:
|
|
|
+ payload = json.loads(raw)
|
|
|
+ except Exception as e:
|
|
|
+ self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
|
|
|
+ q = (payload.get("q") or "").strip()
|
|
|
+ if not re.match(r"^q\d+$", q):
|
|
|
+ self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"},
|
|
|
+ ensure_ascii=False), "application/json"); return
|
|
|
+ q_dir = HERE / "runs_full" / q
|
|
|
+ if not q_dir.is_dir():
|
|
|
+ self._send(404, json.dumps({"error": f"runs_full/{q} not found"}, ensure_ascii=False),
|
|
|
+ "application/json"); return
|
|
|
+ log_path = q_dir / "_reeval.log"
|
|
|
+ try:
|
|
|
+ log_fh = open(log_path, "w", encoding="utf-8", buffering=1)
|
|
|
+ cmd = [sys.executable, "-u", str(HERE / "batch_3forms.py"),
|
|
|
+ "--reeval", "--reeval-q", q, "--output-dir", str(HERE / "runs_full")]
|
|
|
+ flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
|
|
|
+ proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
|
|
|
+ cwd=str(HERE), creationflags=flags)
|
|
|
+
|
|
|
+ ACTIVE_REEVALS[q] = {
|
|
|
+ "status": "running",
|
|
|
+ "pid": proc.pid,
|
|
|
+ "error": None
|
|
|
+ }
|
|
|
+
|
|
|
+ def wait_reeval(q_key, p_obj, fh):
|
|
|
+ try:
|
|
|
+ p_obj.wait()
|
|
|
+ if p_obj.returncode == 0:
|
|
|
+ ACTIVE_REEVALS[q_key]["status"] = "success"
|
|
|
+ else:
|
|
|
+ ACTIVE_REEVALS[q_key]["status"] = "failed"
|
|
|
+ ACTIVE_REEVALS[q_key]["error"] = f"Subprocess exited with code {p_obj.returncode}"
|
|
|
+ except Exception as ex:
|
|
|
+ ACTIVE_REEVALS[q_key]["status"] = "failed"
|
|
|
+ ACTIVE_REEVALS[q_key]["error"] = str(ex)
|
|
|
+ finally:
|
|
|
+ try:
|
|
|
+ fh.close()
|
|
|
+ except Exception:
|
|
|
+ pass
|
|
|
+
|
|
|
+ t = threading.Thread(target=wait_reeval, args=(q, proc, log_fh))
|
|
|
+ t.daemon = True
|
|
|
+ t.start()
|
|
|
+
|
|
|
+ self._send(200, json.dumps(
|
|
|
+ {"status": "started", "pid": proc.pid, "q": q,
|
|
|
+ "log": str(log_path.relative_to(HERE))},
|
|
|
+ ensure_ascii=False), "application/json")
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, json.dumps({"error": f"failed to start: {e}"},
|
|
|
+ ensure_ascii=False), "application/json")
|
|
|
+ elif self.path == "/api/save_spec":
|
|
|
+ length = int(self.headers.get("Content-Length") or 0)
|
|
|
+ raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
|
|
|
+ try:
|
|
|
+ payload = json.loads(raw)
|
|
|
+ except Exception as e:
|
|
|
+ self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
|
|
|
+ file_name = (payload.get("file") or "").strip()
|
|
|
+ content = payload.get("content") or ""
|
|
|
+ allowed = [
|
|
|
+ "README.md",
|
|
|
+ "tools.md",
|
|
|
+ "extraction/phase1-skeleton.md",
|
|
|
+ "extraction/phase2-normalize.md",
|
|
|
+ "extraction/phase3-finalize.md",
|
|
|
+ "taxonomy/type_suggestions.md"
|
|
|
+ ]
|
|
|
+ if file_name not in allowed:
|
|
|
+ self._send(400, json.dumps({"error": "invalid file parameter"}), "application/json"); return
|
|
|
+ target_path = HERE / "procedure-dsl" / "spec" / file_name
|
|
|
+ try:
|
|
|
+ target_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
+ target_path.write_text(content, encoding="utf-8")
|
|
|
+ self._send(200, json.dumps({"status": "ok"}, ensure_ascii=False), "application/json")
|
|
|
+ except Exception as e:
|
|
|
+ self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
|
|
|
+ else:
|
|
|
+ self._send(404, json.dumps({"error": "not found"}), "application/json")
|
|
|
+ def log_message(self, *a): pass
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ n = len(scan_runs()["queries"])
|
|
|
+ print(f"搜索评估查看 server:http://0.0.0.0:{PORT} (runs_full/ 下 {n} 个 query,实时扫描)")
|
|
|
+ ThreadingHTTPServer(("0.0.0.0", PORT), H).serve_forever()
|