server.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233
  1. # -*- coding: utf-8 -*-
  2. """搜索评估案例查看 server。
  3. 沿用 图文排版搜索评估.html 的版式(卡片 + dialog 详情 + rubric 评分条),
  4. 数据实时扫描 runs_full/*/form_*.json —— runs_full 下每新增一个 q 文件夹,刷新即出现。
  5. 分页:query → 三种形式(A/B/C) → 三个渠道 三行从上到下。
  6. 用法:python server.py [port] 默认 8770,浏览器开 http://0.0.0.0:8770
  7. """
  8. import json, re, glob, sys, pathlib, subprocess, threading
  9. from datetime import datetime
  10. from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
  11. from urllib.parse import urlparse, parse_qs
  12. try: # Windows 控制台默认 cp1252,中文 print 会崩,统一切 utf-8
  13. sys.stdout.reconfigure(encoding="utf-8")
  14. except Exception:
  15. pass
  16. HERE = pathlib.Path(__file__).parent
  17. sys.path.insert(0, str(HERE))
  18. import argparse
  19. _parser = argparse.ArgumentParser(description="搜索评估查看 server")
  20. _parser.add_argument("port", type=int, nargs="?", default=8770, help="运行端口")
  21. _parser.add_argument("--mode", choices=["legacy", "new"], default="legacy", help="运行模式:legacy / new")
  22. _args, _ = _parser.parse_known_args()
  23. PORT = _args.port
  24. MODE = _args.mode
  25. RUNS_DIR_NAME = "runs_new" if MODE == "new" else "runs_full"
  26. RUNS_DIR = HERE / RUNS_DIR_NAME
  27. PLAT = {"xhs": "小红书", "gzh": "公众号", "zhihu": "知乎", "x": "X", "bili": "B站", "douyin": "抖音",
  28. "sph": "视频号", "youtube": "YouTube", "github": "GitHub", "toutiao": "头条", "weibo": "微博"}
  29. KT = {"procedure": "工序", "step": "步骤", "tool": "工具"}
  30. # 从 taxonomy 取动作叶子/类型名,用于把 original_q 解析回原始维度(动作×类型 正交)
  31. # 路径优先级:search_eval/evaluation/(主源,IDE 编辑那份就是 runtime 实际读的)
  32. # → test_script/evaluation/(历史副本兜底)→ script/evaluation/(更老兜底)
  33. # 谁也找不到时整目录扫空,server 仍能起。
  34. EVALDIR = HERE / "evaluation"
  35. if not EVALDIR.exists():
  36. EVALDIR = HERE.parent.parent / "test_script" / "evaluation"
  37. if not EVALDIR.exists():
  38. EVALDIR = HERE.parent / "evaluation"
  39. try:
  40. _jm = json.load(open(EVALDIR / "judged_matrix.json", encoding="utf-8"))
  41. ACT_L1 = {a["name"]: a["l1"] for a in _jm["actions"]}
  42. ACTION_SET = set(ACT_L1)
  43. TYPE_SET = {t["name"] for t in _jm["types"]}
  44. ACTIONS_TAX = [{"name": a["name"], "l1": a["l1"], "l2": a.get("l2", "")} for a in _jm["actions"]]
  45. TYPES_TAX = [{"name": t["name"], "l1": t["l1"]} for t in _jm["types"]]
  46. # taxonomy 顺序沿用 judged_matrix(严格版);矩阵分值改用 type_action_scores(宽松版) —
  47. # 两份是同一组 27×50 cell 的独立 gemini judging,前者只 53 格到 tier3,后者 156 格到 score3
  48. _tas = json.load(open(EVALDIR / "type_action_scores.json", encoding="utf-8"))["scores"]
  49. _MATRIX = []
  50. for a in _jm["actions"]:
  51. row = []
  52. for t in _jm["types"]:
  53. rec = _tas.get(t["name"], {}).get(a["name"])
  54. row.append({"tier": rec["score"], "r": rec.get("reason", "")} if rec else {})
  55. _MATRIX.append(row)
  56. except Exception:
  57. ACT_L1, ACTION_SET, TYPE_SET, ACTIONS_TAX, TYPES_TAX, _MATRIX = {}, set(), set(), [], [], []
  58. ACTIVE_TASKS = {}
  59. ACTIVE_REEVALS = {}
  60. ACTIVE_SEARCH_EVALS = {}
  61. ACTIVE_BATCH_TASKS = {}
  62. def backup_procedure_history(out_dir: pathlib.Path):
  63. if not out_dir.is_dir():
  64. return
  65. files_to_backup = [f for f in out_dir.iterdir() if f.is_file()]
  66. if not files_to_backup:
  67. return
  68. history_dir = out_dir / "history"
  69. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  70. version_dir = history_dir / timestamp
  71. version_dir.mkdir(parents=True, exist_ok=True)
  72. import shutil
  73. for f in files_to_backup:
  74. try:
  75. shutil.move(str(f), str(version_dir / f.name))
  76. except Exception as e:
  77. print(f"[backup] failed to move procedure file {f.name}: {e}")
  78. def backup_reeval_history(q_dir: pathlib.Path):
  79. if not q_dir.is_dir():
  80. return
  81. files_to_backup = list(q_dir.glob("form_*.json"))
  82. if not files_to_backup:
  83. return
  84. history_dir = q_dir / "history"
  85. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  86. version_dir = history_dir / timestamp
  87. version_dir.mkdir(parents=True, exist_ok=True)
  88. import shutil
  89. for f in files_to_backup:
  90. try:
  91. shutil.copy(str(f), str(version_dir / f.name))
  92. except Exception as e:
  93. print(f"[backup] failed to copy reeval file {f.name}: {e}")
  94. from batch_extract_procedures import _short_case, _source_to_dsl_input, _write_meta, _composite_score
  95. def run_extraction_task(q, folder_name, src_path, out_dir, engine, model):
  96. task_key = f"{q}/{folder_name}"
  97. log_path = out_dir / "_extract.log"
  98. try:
  99. out_dir.mkdir(parents=True, exist_ok=True)
  100. if engine == "cyber_runner":
  101. script_path = HERE / "procedure-dsl" / "run_cyber.py"
  102. else:
  103. script_path = HERE / "procedure-dsl" / "run_procedure_dsl.py"
  104. cmd = [
  105. sys.executable, "-u", str(script_path),
  106. str(src_path),
  107. "--out-dir", str(out_dir),
  108. "--model", model,
  109. "--max-turns", "300"
  110. ]
  111. if engine != "cyber_runner":
  112. cmd.extend(["--max-retries", "3"])
  113. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  114. with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
  115. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  116. cwd=str(HERE), creationflags=flags)
  117. ACTIVE_TASKS[task_key]["pid"] = proc.pid
  118. proc.wait()
  119. if proc.returncode == 0:
  120. try:
  121. import build_workflows
  122. build_workflows.write_one(q, folder_name, runs_dir=RUNS_DIR)
  123. ACTIVE_TASKS[task_key]["status"] = "success"
  124. except Exception as ex:
  125. ACTIVE_TASKS[task_key]["status"] = "failed"
  126. ACTIVE_TASKS[task_key]["error"] = f"Workflow compilation failed: {ex}"
  127. with open(log_path, "a", encoding="utf-8") as f_err:
  128. f_err.write(f"\n[server error] Workflow compilation failed: {ex}\n")
  129. else:
  130. ACTIVE_TASKS[task_key]["status"] = "failed"
  131. ACTIVE_TASKS[task_key]["error"] = f"Runner failed with exit code {proc.returncode}"
  132. except Exception as e:
  133. ACTIVE_TASKS[task_key]["status"] = "failed"
  134. ACTIVE_TASKS[task_key]["error"] = str(e)
  135. try:
  136. with open(log_path, "a", encoding="utf-8") as f_err:
  137. f_err.write(f"\n[server error] Extraction failed: {e}\n")
  138. except Exception:
  139. pass
  140. MODSET = {"文", "图", "视频", "音频"}
  141. TOOLQUAL = {"AI": "AI 模型", "软件": "桌面 APP", "电脑端": "桌面 APP", "在线": "云端 Web",
  142. "网页版": "云端 Web", "代码": "API·CLI", "命令行": "API·CLI", "插件": "插件扩展"}
  143. def parse_dims(oq):
  144. """把组合 query(如 '文 元素生成 提示词 教程')解析回 {动作, 类型, 动作L1, 约束}。"""
  145. toks = (oq or "").split()
  146. action = next((t for t in toks if t in ACTION_SET), None)
  147. type_ = next((t for t in toks if t in TYPE_SET), None)
  148. cons = None
  149. if toks:
  150. t0 = toks[0]
  151. if t0 in MODSET:
  152. cons = {"kind": "模态", "value": t0}
  153. elif t0 in TOOLQUAL:
  154. cons = {"kind": "工具类型", "value": TOOLQUAL[t0]}
  155. return {"action": action, "type": type_, "action_l1": ACT_L1.get(action, ""), "constraint": cons}
  156. def flat_scores(sc):
  157. f = {}
  158. for k, v in (sc or {}).items():
  159. if isinstance(v, dict):
  160. for kk, vv in v.items():
  161. try: f[kk] = int(vv)
  162. except Exception: pass
  163. else:
  164. try: f[k] = int(v)
  165. except Exception: pass
  166. return f
  167. def _recency_hard(date_str):
  168. """按 publish_timestamp 头 10 字符(YYYY-MM-DD)算硬时效:半年内=3 / 两年内=2 / 更早=1。
  169. 取代原 LLM 评的 recency 维度——脚本算更稳,发布时间在帖子抓取时就有,无需 LLM token。
  170. """
  171. try:
  172. d = datetime.strptime((date_str or "")[:10], "%Y-%m-%d")
  173. except (ValueError, TypeError):
  174. return None
  175. days = (datetime.now() - d).days
  176. if days <= 180: return 3
  177. if days <= 730: return 2
  178. return 1
  179. def adapt(r, run, form_name=None):
  180. p = r.get("post", {}); e = r.get("llm_evaluation", {})
  181. # 1. 解析 知识类型 (knowledge_type)
  182. kt = []
  183. kt_raw = e.get("知识类型") or e.get("knowledge_type") or []
  184. for k in kt_raw:
  185. if k in ("工序", "procedure"): kt.append("procedure")
  186. elif k in ("能力", "步骤", "step"): kt.append("step")
  187. elif k in ("工具", "tool"): kt.append("tool")
  188. fs = {}
  189. score_reasons = {}
  190. # 检测是否为 eval_prompt_sample-mod 里的新版 0-10 分数 schema
  191. is_mod_schema = "相关性" in e and isinstance(e["相关性"], dict) and ("和内容制作知识相关" in e["相关性"] or "和 query 相关" in e["相关性"])
  192. if is_mod_schema:
  193. # 新版 0-10 分数格式解析
  194. # 1. 相关性
  195. rel = e.get("相关性") or {}
  196. for subkey, item in rel.items():
  197. if isinstance(item, dict):
  198. score_val = item.get("得分")
  199. reason_val = item.get("理由")
  200. code_key = None
  201. if "内容制作" in subkey or "知识" in subkey:
  202. code_key = "relevance_production"
  203. elif "query" in subkey or "检索" in subkey:
  204. code_key = "relevance_query"
  205. if code_key and score_val is not None:
  206. try:
  207. fs[code_key] = float(score_val)
  208. if reason_val:
  209. score_reasons[code_key] = reason_val
  210. except Exception:
  211. pass
  212. # 2. 质量
  213. q_block = e.get("质量") or {}
  214. fixed = q_block.get("固定维度") or {}
  215. # 固定维度
  216. fixed_keys = {
  217. "时效性": "recency",
  218. "热度性": "popularity",
  219. "评论反馈": "feedback"
  220. }
  221. for cn, code in fixed_keys.items():
  222. item = fixed.get(cn)
  223. if isinstance(item, dict):
  224. score_val = item.get("得分")
  225. reason_val = item.get("理由")
  226. if score_val is not None:
  227. try:
  228. fs[code] = float(score_val)
  229. if reason_val:
  230. score_reasons[code] = reason_val
  231. except Exception:
  232. pass
  233. # 用例 (真实感, 表现力)
  234. usecase = fixed.get("用例") or {}
  235. usecase_keys = {
  236. "真实感": "realism",
  237. "表现力": "expressiveness"
  238. }
  239. for cn, code in usecase_keys.items():
  240. item = usecase.get(cn)
  241. if isinstance(item, dict):
  242. score_val = item.get("得分")
  243. reason_val = item.get("理由")
  244. if score_val is not None:
  245. try:
  246. fs[code] = float(score_val)
  247. if reason_val:
  248. score_reasons[code] = reason_val
  249. except Exception:
  250. pass
  251. # 动态维度
  252. dynamic = q_block.get("动态维度") or {}
  253. # 工序
  254. proc = dynamic.get("工序") or {}
  255. if proc:
  256. item = proc.get("流程完整性")
  257. if isinstance(item, dict):
  258. score_val = item.get("得分")
  259. reason_val = item.get("理由")
  260. if score_val is not None:
  261. try:
  262. fs["procedure_completeness"] = float(score_val)
  263. if reason_val:
  264. score_reasons["procedure_completeness"] = reason_val
  265. except Exception:
  266. pass
  267. field = proc.get("字段完整性") or {}
  268. field_keys = {
  269. "输入完整性": "procedure_input",
  270. "实现完整性": "procedure_implementation",
  271. "输出完整性": "procedure_output"
  272. }
  273. for cn, code in field_keys.items():
  274. item = field.get(cn)
  275. if isinstance(item, dict):
  276. score_val = item.get("得分")
  277. reason_val = item.get("理由")
  278. if score_val is not None:
  279. try:
  280. fs[code] = float(score_val)
  281. if reason_val:
  282. score_reasons[code] = reason_val
  283. except Exception:
  284. pass
  285. item = proc.get("泛化性")
  286. if isinstance(item, dict):
  287. score_val = item.get("得分")
  288. reason_val = item.get("理由")
  289. if score_val is not None:
  290. try:
  291. fs["procedure_generality"] = float(score_val)
  292. if reason_val:
  293. score_reasons["procedure_generality"] = reason_val
  294. except Exception:
  295. pass
  296. # 能力
  297. cap = dynamic.get("能力") or dynamic.get("步骤") or {}
  298. if cap:
  299. field = cap.get("字段完整性") or {}
  300. field_keys = {
  301. "输入完整性": "step_input",
  302. "实现完整性": "step_implementation",
  303. "输出完整性": "step_output"
  304. }
  305. for cn, code in field_keys.items():
  306. item = field.get(cn)
  307. if isinstance(item, dict):
  308. score_val = item.get("得分")
  309. reason_val = item.get("理由")
  310. if score_val is not None:
  311. try:
  312. fs[code] = float(score_val)
  313. if reason_val:
  314. score_reasons[code] = reason_val
  315. except Exception:
  316. pass
  317. item = cap.get("泛化性")
  318. if isinstance(item, dict):
  319. score_val = item.get("得分")
  320. reason_val = item.get("理由")
  321. if score_val is not None:
  322. try:
  323. fs["step_generality"] = float(score_val)
  324. if reason_val:
  325. score_reasons["step_generality"] = reason_val
  326. except Exception:
  327. pass
  328. # 工具
  329. tool = dynamic.get("工具") or {}
  330. if tool:
  331. tool_keys = {
  332. "能力边界覆盖": "tool_boundary",
  333. "有效比较": "tool_comparison",
  334. "参数/接口具体性": "tool_specificity",
  335. "实操示例": "tool_example",
  336. "版本&限制": "tool_limits"
  337. }
  338. for cn, code in tool_keys.items():
  339. item = tool.get(cn)
  340. if isinstance(item, dict):
  341. score_val = item.get("得分")
  342. reason_val = item.get("理由")
  343. if score_val is not None:
  344. try:
  345. fs[code] = float(score_val)
  346. if reason_val:
  347. score_reasons[code] = reason_val
  348. except Exception:
  349. pass
  350. else:
  351. # 兼容老版 1-5 分数 schema (带 "评分" 或 old-style flatness)
  352. is_new_schema = "评分" in e or "知识类型" in e or "制作相关性" in e
  353. CN_TO_EN = {
  354. "相关性": "relevance",
  355. "成品质量": "result_quality",
  356. "可信度": "credibility",
  357. "具体用例": "concrete_use_case",
  358. "完整性": "completeness",
  359. "步骤结构": "step_structure",
  360. "步骤可复现": "step_reproducibility",
  361. "步骤可复现性": "step_reproducibility",
  362. "能力定义": "capability_definition",
  363. "实现深度": "implementation_depth",
  364. "边界失败": "boundary_failure_eval",
  365. "通用性": "generality",
  366. "能力覆盖": "capability_coverage",
  367. "有效对比": "effective_comparison",
  368. "参数具体": "param_specificity",
  369. "实操示例": "worked_example",
  370. "实操用例": "worked_example",
  371. "示例完整": "worked_example",
  372. "版本限制": "version_limits",
  373. "版本说明": "version_limits",
  374. "限制说明": "version_limits",
  375. }
  376. if is_new_schema:
  377. pf = e.get("评分") or {}
  378. for cat, metrics in pf.items():
  379. if isinstance(metrics, dict):
  380. for metric, val in metrics.items():
  381. en_key = CN_TO_EN.get(metric, metric)
  382. if isinstance(val, dict) and "得分" in val:
  383. try: fs[en_key] = int(val["得分"])
  384. except Exception: pass
  385. elif isinstance(val, (int, float)):
  386. fs[en_key] = int(val)
  387. if isinstance(val, dict) and "理由" in val:
  388. score_reasons[en_key] = val["理由"]
  389. else:
  390. fs = flat_scores(e.get("scores", {}))
  391. # 计算均分 (overall)
  392. if is_mod_schema:
  393. rel_keys = {"relevance_production", "relevance_query"}
  394. rel_vals = [v for k, v in fs.items() if k in rel_keys]
  395. qual_vals = [v for k, v in fs.items() if k not in rel_keys]
  396. rel_avg = sum(rel_vals) / len(rel_vals) if rel_vals else None
  397. qual_avg = sum(qual_vals) / len(qual_vals) if qual_vals else None
  398. if rel_avg is not None and qual_avg is not None:
  399. overall = round((rel_avg + qual_avg) / 2, 1)
  400. elif rel_avg is not None:
  401. overall = round(rel_avg, 1)
  402. elif qual_avg is not None:
  403. overall = round(qual_avg, 1)
  404. else:
  405. overall = 0.0
  406. else:
  407. overall = round(sum(fs.values()) / len(fs), 1) if fs else 0
  408. anomaly = bool(e.get("error")) or not fs
  409. grade = p.get("_quality_grade", "")
  410. fb = r.get("found_by_queries", [])
  411. # 4. 解析 制作相关性 (production_relevance)
  412. if is_mod_schema:
  413. # 新版使用 "相关性" 中的 "和内容制作知识相关" 代表制作相关性
  414. production_relevance = fs.get("relevance_production")
  415. else:
  416. if is_new_schema:
  417. pr_block = e.get("制作相关性") or {}
  418. pr_raw = pr_block.get("得分") if isinstance(pr_block, dict) else pr_block
  419. if isinstance(pr_block, dict) and "理由" in pr_block:
  420. score_reasons["production_relevance"] = pr_block["理由"]
  421. else:
  422. pr_raw = e.get("production_relevance")
  423. try: production_relevance = int(float(pr_raw)) if pr_raw is not None else None
  424. except (TypeError, ValueError): production_relevance = None
  425. recency_hard = _recency_hard(p.get("publish_timestamp", ""))
  426. # 5. 解析 判定决策 (decision) 和 理由 (reason)
  427. reason = e.get("判定理由") or e.get("reason") or ""
  428. # 根据过滤指标决定是否保留 (过滤指标判定逻辑优先,不依赖文字匹配)
  429. is_discard = False
  430. # 制作相关性低于阈值则丢弃 (新版 0-10 满分,因此低于 4 丢弃;老版低于 2 丢弃)
  431. if production_relevance is not None:
  432. threshold = 4 if is_mod_schema else 2
  433. if production_relevance < threshold:
  434. is_discard = True
  435. # 时效性低于 2 被丢弃(发布时间超两年的老帖)
  436. if recency_hard is not None and recency_hard < 2:
  437. is_discard = True
  438. # 综合均分低于阈值被丢弃 (新版低于 6 丢弃;老版低于 3 丢弃)
  439. if overall is not None:
  440. threshold_ov = 6 if is_mod_schema else 3
  441. if overall < threshold_ov:
  442. is_discard = True
  443. decision = "discard" if is_discard else "report"
  444. # Find matching procedure html
  445. procedure_html = None
  446. case_id = r.get("case_id", "")
  447. title = p.get("title", "")
  448. run_dir = RUNS_DIR / run
  449. if run_dir.is_dir():
  450. # 1. 优先扫描该帖子对应的文件夹下的任何 HTML 文件 (不限名称)
  451. # 文件夹名格式: {form}_{platform}_{channel_content_id[:8]}
  452. content_id = r.get("channel_content_id") or ""
  453. if not content_id and case_id and "_" in case_id:
  454. content_id = case_id.split("_", 1)[1]
  455. plat_key = r.get("platform") or ""
  456. if form_name and plat_key and content_id:
  457. folder_name = f"{form_name}_{plat_key}_{content_id[:8]}"
  458. case_dir = run_dir / "procedures" / folder_name
  459. if case_dir.is_dir():
  460. html_files = list(case_dir.glob("*.html"))
  461. if html_files:
  462. procedure_html = f"runs_full/{run}/procedures/{folder_name}/{html_files[0].name}"
  463. # 1b. 兼容 fallback: 批量提取模式下直接以 case_id 命名的文件夹
  464. if not procedure_html and case_id:
  465. case_dir = run_dir / "procedures" / case_id
  466. if case_dir.is_dir():
  467. html_files = list(case_dir.glob("*.html"))
  468. if html_files:
  469. procedure_html = f"runs_full/{run}/procedures/{case_id}/{html_files[0].name}"
  470. # 2. 其次匹配标准文件名: case-{case_id}.html 或 {case_id}.html
  471. candidate_dirs = [run_dir, run_dir / "procedures"]
  472. if not procedure_html and case_id:
  473. named_files = [f"case-{case_id}.html", f"{case_id}.html"]
  474. for d_dir in candidate_dirs:
  475. if d_dir.is_dir():
  476. for name in named_files:
  477. if (d_dir / name).is_file():
  478. procedure_html = f"runs_full/{run}/procedures/{name}" if d_dir.name == "procedures" else f"runs_full/{run}/{name}"
  479. break
  480. if procedure_html:
  481. break
  482. # 3. 再次匹配 HTML 内部的标准声明 (meta 标签或 HTML 注释)
  483. if not procedure_html and case_id:
  484. for d_dir in candidate_dirs:
  485. if d_dir.is_dir():
  486. for html_path in d_dir.glob("*.html"):
  487. try:
  488. content = html_path.read_text(encoding="utf-8")
  489. if f'name="case-id" content="{case_id}"' in content or \
  490. f'name="case_id" content="{case_id}"' in content or \
  491. f'<!-- case_id: {case_id} -->' in content or \
  492. f'<!-- case-id: {case_id} -->' in content:
  493. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  494. break
  495. except Exception:
  496. continue
  497. if procedure_html:
  498. break
  499. # 4. 最后使用标题作为兜底模糊匹配
  500. if not procedure_html and title:
  501. for d_dir in candidate_dirs:
  502. if d_dir.is_dir():
  503. for html_path in d_dir.glob("*.html"):
  504. try:
  505. content = html_path.read_text(encoding="utf-8")
  506. if title in content:
  507. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  508. break
  509. except Exception:
  510. continue
  511. if procedure_html:
  512. break
  513. if procedure_html:
  514. procedure_html = procedure_html.replace("runs_full/", f"{RUNS_DIR_NAME}/")
  515. return {
  516. "case_id": r.get("case_id", ""),
  517. "platform": PLAT.get(r.get("platform"), r.get("platform")), "platformKey": r.get("platform"),
  518. "title": p.get("title", "") or "(无标题)", "date": (p.get("publish_timestamp", "") or "")[:10],
  519. "url": r.get("source_url", ""), "engagement": f'{p.get("like_count", 0)} 赞',
  520. "knowledge_type": kt, "decision": decision,
  521. "tools": [KT.get(k, k) for k in kt] + ([f"质量 {grade}"] if grade else []), "found_by": fb,
  522. "images": (p.get("images") or [])[:6], "text": p.get("body_text", "") or "",
  523. "scores": fs, "overall": overall, "reason": reason, "score_reasons": score_reasons,
  524. "grade": grade, "qscore": p.get("_quality_score", 0), "anomaly": anomaly,
  525. "production_relevance": production_relevance, "recency_hard": recency_hard,
  526. "run": run, "procedure_html": procedure_html,
  527. }
  528. def scan_runs():
  529. runs = {}
  530. for f in sorted(glob.glob(str(RUNS_DIR / "*" / "form_*.json"))):
  531. try:
  532. d = json.load(open(f, encoding="utf-8"))
  533. except Exception:
  534. continue
  535. run = pathlib.Path(f).parent.name
  536. form_name = d.get("form") or ""
  537. results = [adapt(r, run, form_name) for r in d.get("results", [])]
  538. report_val = sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly"))
  539. discard_val = sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly"))
  540. runs.setdefault(run, []).append({
  541. "form": d.get("form"), "query": d.get("query"), "original_q": d.get("original_q", ""),
  542. "requirement": d.get("requirement", ""),
  543. "platforms": d.get("platforms", []), "total": d.get("total"),
  544. "report": report_val, "discard": discard_val,
  545. "results": results,
  546. })
  547. for v in runs.values():
  548. v.sort(key=lambda x: x.get("form") or "")
  549. if MODE == "new":
  550. def _mtime(name):
  551. try:
  552. return (RUNS_DIR / name).stat().st_mtime
  553. except Exception:
  554. return 0
  555. sorted_keys = sorted(runs.keys(), key=_mtime, reverse=True)
  556. else:
  557. def _qnum(name):
  558. m = re.search(r"\d+", name)
  559. return (int(m.group()) if m else 0, name)
  560. sorted_keys = [kv[0] for kv in sorted(runs.items(), key=lambda kv: _qnum(kv[0]))]
  561. out = []
  562. for k in sorted_keys:
  563. v = runs[k]
  564. oq = v[0].get("original_q") or v[0].get("query") or ""
  565. seen, hits = set(), 0 # 知识命中数 = 各形式采纳(report)且非异常、按 url 去重后的帖子数
  566. for f in v:
  567. for r in f.get("results", []):
  568. if r.get("decision") == "report" and not r.get("anomaly") and r.get("url") not in seen:
  569. seen.add(r.get("url")); hits += 1
  570. out.append({"key": k, "forms": v, "dims": parse_dims(oq), "original_q": oq,
  571. "hits": hits, "tot": sum((f.get("total") or 0) for f in v)})
  572. active_reevals = {k: v["status"] for k, v in ACTIVE_REEVALS.items()}
  573. active_batch_tasks = {k: v["status"] for k, v in ACTIVE_BATCH_TASKS.items()}
  574. return {"queries": out, "actions": ACTIONS_TAX, "types": TYPES_TAX, "matrix": _MATRIX, "active_reevals": active_reevals, "active_batch_tasks": active_batch_tasks}
  575. class H(BaseHTTPRequestHandler):
  576. def _send(self, code, body, ctype):
  577. b = body.encode("utf-8") if isinstance(body, str) else body
  578. self.send_response(code)
  579. if ctype.startswith("text/") or ctype == "application/json" or ctype == "application/javascript":
  580. self.send_header("Content-Type", ctype + "; charset=utf-8")
  581. else:
  582. self.send_header("Content-Type", ctype)
  583. self.send_header("Content-Length", str(len(b))); self.end_headers(); self.wfile.write(b)
  584. def do_GET(self):
  585. parsed = urlparse(self.path)
  586. path = parsed.path
  587. params = parse_qs(parsed.query)
  588. if path in ("/", "/index.html"):
  589. try:
  590. html_file = "new_query.html" if MODE == "new" else "index.html"
  591. page = (HERE / html_file).read_text(encoding="utf-8")
  592. self._send(200, page, "text/html")
  593. except Exception as e:
  594. self._send(500, f"Error reading page: {e}", "text/plain")
  595. elif path == "/api/data":
  596. self._send(200, json.dumps(scan_runs(), ensure_ascii=False), "application/json")
  597. elif path == "/api/procedure_status":
  598. q = (params.get("q") or [""])[0].strip()
  599. form = (params.get("form") or [""])[0].strip()
  600. case_id = (params.get("case_id") or [""])[0].strip()
  601. if not q or not form or not case_id:
  602. self._send(400, "missing q, form, or case_id", "text/plain")
  603. return
  604. folder_name = f"{form}_{_short_case(case_id)}"
  605. task_key = f"{q}/{folder_name}"
  606. if task_key in ACTIVE_TASKS:
  607. task = ACTIVE_TASKS[task_key]
  608. res = {
  609. "status": task["status"],
  610. "error": task["error"]
  611. }
  612. if task["status"] == "success":
  613. out_dir = RUNS_DIR / q / "procedures" / folder_name
  614. html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
  615. if html_files:
  616. res["procedure_html"] = f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/{html_files[0].name}"
  617. self._send(200, json.dumps(res, ensure_ascii=False), "application/json")
  618. return
  619. out_dir = RUNS_DIR / q / "procedures" / folder_name
  620. html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
  621. if not html_files and case_id:
  622. fallback_dir = RUNS_DIR / q / "procedures" / case_id
  623. html_files = list(fallback_dir.glob("*.html")) if fallback_dir.is_dir() else []
  624. if html_files:
  625. self._send(200, json.dumps({
  626. "status": "success",
  627. "procedure_html": f"{RUNS_DIR_NAME}/{q}/procedures/{case_id}/{html_files[0].name}"
  628. }, ensure_ascii=False), "application/json")
  629. return
  630. else:
  631. if html_files:
  632. self._send(200, json.dumps({
  633. "status": "success",
  634. "procedure_html": f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/{html_files[0].name}"
  635. }, ensure_ascii=False), "application/json")
  636. return
  637. log_path = out_dir / "_extract.log"
  638. if not log_path.is_file() and case_id:
  639. fallback_dir = RUNS_DIR / q / "procedures" / case_id
  640. log_path = fallback_dir / "_extract.log"
  641. if log_path.is_file():
  642. self._send(200, json.dumps({"status": "failed", "error": "Not running, but no HTML output found (possibly crashed)."}, ensure_ascii=False), "application/json")
  643. return
  644. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  645. elif path == "/api/procedure_log":
  646. q = (params.get("q") or [""])[0].strip()
  647. form = (params.get("form") or [""])[0].strip()
  648. case_id = (params.get("case_id") or [""])[0].strip()
  649. if not q or not form or not case_id:
  650. self._send(400, "missing q, form, or case_id", "text/plain")
  651. return
  652. folder_name = f"{form}_{_short_case(case_id)}"
  653. log_path = RUNS_DIR / q / "procedures" / folder_name / "_extract.log"
  654. if not log_path.is_file() and case_id:
  655. log_path = RUNS_DIR / q / "procedures" / case_id / "_extract.log"
  656. if not log_path.is_file():
  657. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  658. return
  659. try:
  660. content = log_path.read_text(encoding="utf-8", errors="replace")
  661. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  662. except Exception as e:
  663. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  664. elif path == "/api/spec_content":
  665. file_name = (params.get("file") or [""])[0].strip()
  666. allowed = [
  667. "README.md",
  668. "tools.md",
  669. "extraction/phase1-skeleton.md",
  670. "extraction/phase2-normalize.md",
  671. "extraction/phase3-finalize.md",
  672. "taxonomy/type_suggestions.md"
  673. ]
  674. if file_name not in allowed:
  675. self._send(400, "invalid file parameter", "text/plain")
  676. return
  677. target_path = HERE / "procedure-dsl" / "spec" / file_name
  678. if not target_path.is_file():
  679. self._send(404, "spec file not found", "text/plain")
  680. return
  681. try:
  682. content = target_path.read_text(encoding="utf-8", errors="replace")
  683. self._send(200, json.dumps({"content": content}, ensure_ascii=False), "application/json")
  684. except Exception as e:
  685. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  686. elif path == "/api/reeval_status":
  687. q = (params.get("q") or [""])[0].strip()
  688. if not q:
  689. self._send(400, "missing q", "text/plain")
  690. return
  691. if q in ACTIVE_REEVALS:
  692. self._send(200, json.dumps({
  693. "status": ACTIVE_REEVALS[q]["status"],
  694. "error": ACTIVE_REEVALS[q].get("error")
  695. }, ensure_ascii=False), "application/json")
  696. else:
  697. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  698. elif path == "/api/search_eval_status":
  699. q = (params.get("q") or [""])[0].strip()
  700. if not q:
  701. self._send(400, "missing q", "text/plain")
  702. return
  703. if q in ACTIVE_SEARCH_EVALS:
  704. self._send(200, json.dumps({
  705. "status": ACTIVE_SEARCH_EVALS[q]["status"],
  706. "error": ACTIVE_SEARCH_EVALS[q].get("error")
  707. }, ensure_ascii=False), "application/json")
  708. else:
  709. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  710. elif path == "/api/batch_generate_status":
  711. q = (params.get("q") or [""])[0].strip()
  712. if not q:
  713. self._send(400, "missing q", "text/plain")
  714. return
  715. if q in ACTIVE_BATCH_TASKS:
  716. self._send(200, json.dumps({
  717. "status": ACTIVE_BATCH_TASKS[q]["status"],
  718. "error": ACTIVE_BATCH_TASKS[q].get("error")
  719. }, ensure_ascii=False), "application/json")
  720. else:
  721. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  722. elif path == "/api/batch_generate_log":
  723. q = (params.get("q") or [""])[0].strip()
  724. if not q:
  725. self._send(400, "missing q", "text/plain")
  726. return
  727. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  728. log_path = RUNS_DIR / safe_q / "procedures" / "_batch_extract.log"
  729. if not log_path.is_file():
  730. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  731. return
  732. try:
  733. content = log_path.read_text(encoding="utf-8", errors="replace")
  734. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  735. except Exception as e:
  736. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  737. elif path == "/api/search_eval_log":
  738. q = (params.get("q") or [""])[0].strip()
  739. if not q:
  740. self._send(400, "missing q", "text/plain")
  741. return
  742. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  743. log_path = RUNS_DIR / safe_q / "_search_eval.log"
  744. if not log_path.is_file():
  745. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  746. return
  747. try:
  748. content = log_path.read_text(encoding="utf-8", errors="replace")
  749. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  750. except Exception as e:
  751. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  752. elif self.path.startswith("/runs_full/") or self.path.startswith("/runs_new/"):
  753. try:
  754. import urllib.parse
  755. clean_path = urllib.parse.unquote(self.path.split("?")[0])
  756. parts = clean_path.strip("/").split("/")
  757. target_file = HERE
  758. for part in parts:
  759. target_file = target_file / part
  760. runs_dir = HERE / parts[0]
  761. if runs_dir.resolve() in target_file.resolve().parents and target_file.is_file():
  762. content = target_file.read_bytes()
  763. ext = target_file.suffix.lower()
  764. ctype = "text/html"
  765. if ext in (".png", ".webp"):
  766. ctype = f"image/{ext[1:]}"
  767. elif ext in (".jpg", ".jpeg"):
  768. ctype = "image/jpeg"
  769. elif ext == ".json":
  770. ctype = "application/json"
  771. elif ext == ".js":
  772. ctype = "application/javascript"
  773. elif ext == ".css":
  774. ctype = "text/css"
  775. self._send(200, content, ctype)
  776. else:
  777. self._send(404, "not found", "text/plain")
  778. except Exception as e:
  779. self._send(500, f"Error: {e}", "text/plain")
  780. else:
  781. self._send(404, "not found", "text/plain")
  782. def do_POST(self):
  783. if self.path == "/api/run_search_eval":
  784. length = int(self.headers.get("Content-Length") or 0)
  785. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  786. try:
  787. payload = json.loads(raw)
  788. except Exception as e:
  789. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  790. q = (payload.get("query") or "").strip()
  791. platforms = (payload.get("platforms") or "xhs,zhihu").strip()
  792. if not q:
  793. self._send(400, json.dumps({"error": "missing query"}, ensure_ascii=False), "application/json"); return
  794. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  795. if not safe_q:
  796. self._send(400, json.dumps({"error": "invalid query name"}, ensure_ascii=False), "application/json"); return
  797. q_dir = RUNS_DIR / safe_q
  798. q_dir.mkdir(parents=True, exist_ok=True)
  799. temp_queries_file = q_dir / "temp_queries.json"
  800. try:
  801. with open(temp_queries_file, "w", encoding="utf-8") as f:
  802. json.dump([q], f, ensure_ascii=False)
  803. except Exception as e:
  804. self._send(500, json.dumps({"error": f"failed to write temp query: {e}"}), "application/json"); return
  805. log_path = q_dir / "_search_eval.log"
  806. ACTIVE_SEARCH_EVALS[q] = {
  807. "status": "running",
  808. "pid": None,
  809. "error": None
  810. }
  811. def run_search_eval_task(query_text, safe_query_name, queries_file, out_dir, target_platforms, log_file):
  812. try:
  813. cmd = [
  814. sys.executable, "-u", str(HERE / "search_and_evaluate.py"),
  815. "--queries", str(queries_file),
  816. "--platforms", target_platforms,
  817. "--output-dir", str(out_dir),
  818. "--max-count", "20"
  819. ]
  820. import os
  821. env = os.environ.copy()
  822. env["PYTHONIOENCODING"] = "utf-8"
  823. env["PYTHONUTF8"] = "1"
  824. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  825. with open(log_file, "w", encoding="utf-8", buffering=1) as log_fh:
  826. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  827. cwd=str(HERE), env=env, creationflags=flags)
  828. ACTIVE_SEARCH_EVALS[query_text]["pid"] = proc.pid
  829. proc.wait()
  830. if proc.returncode == 0:
  831. evaluated_file = out_dir / "evaluated.json"
  832. if evaluated_file.is_file():
  833. with open(evaluated_file, "r", encoding="utf-8") as rf:
  834. eval_data = json.load(rf)
  835. eval_data["form"] = "A"
  836. eval_data["query"] = query_text
  837. eval_data["original_q"] = query_text
  838. with open(out_dir / "form_A.json", "w", encoding="utf-8") as wf:
  839. json.dump(eval_data, wf, ensure_ascii=False, indent=2)
  840. ACTIVE_SEARCH_EVALS[query_text]["status"] = "success"
  841. else:
  842. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  843. ACTIVE_SEARCH_EVALS[query_text]["error"] = "evaluated.json not found after execution"
  844. with open(log_file, "a", encoding="utf-8") as f_err:
  845. f_err.write("\n[server error] evaluated.json not found after execution\n")
  846. else:
  847. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  848. ACTIVE_SEARCH_EVALS[query_text]["error"] = f"search_and_evaluate.py exited with code {proc.returncode}"
  849. except Exception as ex:
  850. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  851. ACTIVE_SEARCH_EVALS[query_text]["error"] = str(ex)
  852. try:
  853. with open(log_file, "a", encoding="utf-8") as f_err:
  854. f_err.write(f"\n[server error] Exception: {ex}\n")
  855. except Exception:
  856. pass
  857. finally:
  858. try:
  859. if queries_file.is_file():
  860. queries_file.unlink()
  861. except Exception:
  862. pass
  863. t = threading.Thread(target=run_search_eval_task, args=(q, safe_q, temp_queries_file, q_dir, platforms, log_path))
  864. t.daemon = True
  865. t.start()
  866. self._send(200, json.dumps({
  867. "status": "started",
  868. "query": q,
  869. "log": f"{RUNS_DIR_NAME}/{safe_q}/_search_eval.log"
  870. }, ensure_ascii=False), "application/json")
  871. elif self.path == "/api/generate_procedure":
  872. length = int(self.headers.get("Content-Length") or 0)
  873. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  874. try:
  875. payload = json.loads(raw)
  876. except Exception as e:
  877. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  878. q = (payload.get("q") or "").strip()
  879. form = (payload.get("form") or "").strip()
  880. case_id = (payload.get("case_id") or "").strip()
  881. engine = (payload.get("engine") or "cyber_runner").strip()
  882. model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
  883. if MODE != "new" and not re.match(r"^q\d+$", q):
  884. self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"}, ensure_ascii=False), "application/json"); return
  885. if form not in ("A", "B", "C"):
  886. self._send(400, json.dumps({"error": f"bad form: {form!r}"}, ensure_ascii=False), "application/json"); return
  887. if not case_id:
  888. self._send(400, json.dumps({"error": "missing case_id"}, ensure_ascii=False), "application/json"); return
  889. q_dir = RUNS_DIR / q
  890. form_file = q_dir / f"form_{form}.json"
  891. if not form_file.is_file():
  892. self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
  893. try:
  894. with open(form_file, encoding="utf-8") as f:
  895. form_data = json.load(f)
  896. except Exception as e:
  897. self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
  898. matching_result = None
  899. for r in form_data.get("results", []):
  900. if r.get("case_id") == case_id:
  901. matching_result = r
  902. break
  903. if not matching_result:
  904. self._send(404, json.dumps({"error": f"case_id {case_id} not found in form {form}"}, ensure_ascii=False), "application/json"); return
  905. folder_name = f"{form}_{_short_case(case_id)}"
  906. out_dir = q_dir / "procedures" / folder_name
  907. backup_procedure_history(out_dir)
  908. out_dir.mkdir(parents=True, exist_ok=True)
  909. src_path = out_dir / "_source.json"
  910. try:
  911. with open(src_path, "w", encoding="utf-8") as f:
  912. json.dump(_source_to_dsl_input(matching_result), f, ensure_ascii=False, indent=2)
  913. score = _composite_score(matching_result.get("llm_evaluation") or {})
  914. _write_meta(out_dir, case_id=case_id, from_q=q, form=form, score=score)
  915. except Exception as e:
  916. self._send(500, json.dumps({"error": f"failed to write inputs: {e}"}, ensure_ascii=False), "application/json"); return
  917. task_key = f"{q}/{folder_name}"
  918. ACTIVE_TASKS[task_key] = {
  919. "status": "running",
  920. "start_time": datetime.now().isoformat(),
  921. "pid": None,
  922. "error": None
  923. }
  924. t = threading.Thread(target=run_extraction_task, args=(q, folder_name, src_path, out_dir, engine, model))
  925. t.daemon = True
  926. t.start()
  927. self._send(200, json.dumps({
  928. "status": "started",
  929. "task_key": task_key,
  930. "log": f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/_extract.log"
  931. }, ensure_ascii=False), "application/json")
  932. elif self.path == "/api/reeval":
  933. length = int(self.headers.get("Content-Length") or 0)
  934. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  935. try:
  936. payload = json.loads(raw)
  937. except Exception as e:
  938. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  939. q = (payload.get("q") or "").strip()
  940. if MODE != "new" and not re.match(r"^q\d+$", q):
  941. self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"},
  942. ensure_ascii=False), "application/json"); return
  943. q_dir = RUNS_DIR / q
  944. if not q_dir.is_dir():
  945. self._send(404, json.dumps({"error": f"{RUNS_DIR_NAME}/{q} not found"}, ensure_ascii=False),
  946. "application/json"); return
  947. backup_reeval_history(q_dir)
  948. log_path = q_dir / "_reeval.log"
  949. try:
  950. log_fh = open(log_path, "w", encoding="utf-8", buffering=1)
  951. cmd = [sys.executable, "-u", str(HERE / "batch_3forms.py"),
  952. "--reeval", "--reeval-q", q, "--output-dir", str(RUNS_DIR)]
  953. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  954. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  955. cwd=str(HERE), creationflags=flags)
  956. ACTIVE_REEVALS[q] = {
  957. "status": "running",
  958. "pid": proc.pid,
  959. "error": None
  960. }
  961. def wait_reeval(q_key, p_obj, fh):
  962. try:
  963. p_obj.wait()
  964. if p_obj.returncode == 0:
  965. ACTIVE_REEVALS[q_key]["status"] = "success"
  966. else:
  967. ACTIVE_REEVALS[q_key]["status"] = "failed"
  968. ACTIVE_REEVALS[q_key]["error"] = f"Subprocess exited with code {p_obj.returncode}"
  969. except Exception as ex:
  970. ACTIVE_REEVALS[q_key]["status"] = "failed"
  971. ACTIVE_REEVALS[q_key]["error"] = str(ex)
  972. finally:
  973. try:
  974. fh.close()
  975. except Exception:
  976. pass
  977. t = threading.Thread(target=wait_reeval, args=(q, proc, log_fh))
  978. t.daemon = True
  979. t.start()
  980. self._send(200, json.dumps(
  981. {"status": "started", "pid": proc.pid, "q": q,
  982. "log": str(log_path.relative_to(HERE))},
  983. ensure_ascii=False), "application/json")
  984. except Exception as e:
  985. self._send(500, json.dumps({"error": f"failed to start: {e}"},
  986. ensure_ascii=False), "application/json")
  987. elif self.path == "/api/batch_generate_procedure":
  988. length = int(self.headers.get("Content-Length") or 0)
  989. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  990. try:
  991. payload = json.loads(raw)
  992. except Exception as e:
  993. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  994. q = (payload.get("q") or "").strip()
  995. form = (payload.get("form") or "A").strip()
  996. engine = (payload.get("engine") or "cyber_runner").strip()
  997. model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
  998. try:
  999. concurrency = int(payload.get("concurrency", 4))
  1000. except Exception:
  1001. concurrency = 4
  1002. concurrency = max(1, min(16, concurrency))
  1003. if not q:
  1004. self._send(400, json.dumps({"error": "missing q"}, ensure_ascii=False), "application/json"); return
  1005. q_dir = RUNS_DIR / q
  1006. form_file = q_dir / f"form_{form}.json"
  1007. if not form_file.is_file():
  1008. self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
  1009. try:
  1010. with open(form_file, encoding="utf-8") as f:
  1011. form_data = json.load(f)
  1012. except Exception as e:
  1013. self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
  1014. valid_results = []
  1015. import copy
  1016. for r in form_data.get("results", []):
  1017. adapted = adapt(r, q, form)
  1018. if adapted.get("decision") == "report" and not adapted.get("anomaly"):
  1019. r_copy = copy.deepcopy(r)
  1020. original_case_id = r.get("case_id", "")
  1021. r_copy["case_id"] = f"{form}_{_short_case(original_case_id)}"
  1022. valid_results.append(r_copy)
  1023. if not valid_results:
  1024. self._send(400, json.dumps({"error": "没有可提取工序的帖子(所有帖子都已被过滤,或都存在解析异常)"}, ensure_ascii=False), "application/json"); return
  1025. procedures_dir = q_dir / "procedures"
  1026. procedures_dir.mkdir(parents=True, exist_ok=True)
  1027. batch_posts_path = procedures_dir / "temp_batch.json"
  1028. try:
  1029. with open(batch_posts_path, "w", encoding="utf-8") as f:
  1030. json.dump({"results": valid_results}, f, ensure_ascii=False, indent=2)
  1031. except Exception as e:
  1032. self._send(500, json.dumps({"error": f"failed to write temp_batch.json: {e}"}, ensure_ascii=False), "application/json"); return
  1033. ACTIVE_BATCH_TASKS[q] = {
  1034. "status": "running",
  1035. "start_time": datetime.now().isoformat(),
  1036. "pid": None,
  1037. "error": None
  1038. }
  1039. def run_batch_extraction():
  1040. try:
  1041. log_path = procedures_dir / "_batch_extract.log"
  1042. cmd = [
  1043. sys.executable, "-u", str(HERE / "procedure-dsl" / "run_cyber.py"),
  1044. str(batch_posts_path),
  1045. "--out-dir", str(procedures_dir),
  1046. "--batch",
  1047. "--batch-workers", str(concurrency),
  1048. "--model", model,
  1049. "--max-turns", "300"
  1050. ]
  1051. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  1052. with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
  1053. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  1054. cwd=str(HERE), creationflags=flags)
  1055. ACTIVE_BATCH_TASKS[q]["pid"] = proc.pid
  1056. proc.wait()
  1057. if proc.returncode == 0:
  1058. try:
  1059. import build_workflows
  1060. build_workflows.write_run(q, runs_dir=RUNS_DIR)
  1061. ACTIVE_BATCH_TASKS[q]["status"] = "success"
  1062. except Exception as ex:
  1063. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1064. ACTIVE_BATCH_TASKS[q]["error"] = f"Workflows compilation failed: {ex}"
  1065. else:
  1066. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1067. ACTIVE_BATCH_TASKS[q]["error"] = f"Batch runner exited with code {proc.returncode}"
  1068. except Exception as ex:
  1069. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1070. ACTIVE_BATCH_TASKS[q]["error"] = str(ex)
  1071. finally:
  1072. try:
  1073. if batch_posts_path.is_file():
  1074. batch_posts_path.unlink()
  1075. except Exception:
  1076. pass
  1077. t = threading.Thread(target=run_batch_extraction)
  1078. t.daemon = True
  1079. t.start()
  1080. self._send(200, json.dumps({
  1081. "status": "started",
  1082. "q": q,
  1083. "log": f"{RUNS_DIR_NAME}/{q}/procedures/_batch_extract.log"
  1084. }, ensure_ascii=False), "application/json")
  1085. elif self.path == "/api/save_spec":
  1086. length = int(self.headers.get("Content-Length") or 0)
  1087. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  1088. try:
  1089. payload = json.loads(raw)
  1090. except Exception as e:
  1091. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  1092. file_name = (payload.get("file") or "").strip()
  1093. content = payload.get("content") or ""
  1094. allowed = [
  1095. "README.md",
  1096. "tools.md",
  1097. "extraction/phase1-skeleton.md",
  1098. "extraction/phase2-normalize.md",
  1099. "extraction/phase3-finalize.md",
  1100. "taxonomy/type_suggestions.md"
  1101. ]
  1102. if file_name not in allowed:
  1103. self._send(400, json.dumps({"error": "invalid file parameter"}), "application/json"); return
  1104. target_path = HERE / "procedure-dsl" / "spec" / file_name
  1105. try:
  1106. target_path.parent.mkdir(parents=True, exist_ok=True)
  1107. target_path.write_text(content, encoding="utf-8")
  1108. self._send(200, json.dumps({"status": "ok"}, ensure_ascii=False), "application/json")
  1109. except Exception as e:
  1110. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  1111. else:
  1112. self._send(404, json.dumps({"error": "not found"}), "application/json")
  1113. def log_message(self, *a): pass
  1114. if __name__ == "__main__":
  1115. n = len(scan_runs()["queries"])
  1116. print(f"搜索评估查看 server ({MODE} 模式):http://0.0.0.0:{PORT} ({RUNS_DIR_NAME}/ 下 {n} 个 query,实时扫描)")
  1117. ThreadingHTTPServer(("0.0.0.0", PORT), H).serve_forever()