server.py 58 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232
  1. # -*- coding: utf-8 -*-
  2. """搜索评估案例查看 server。
  3. 沿用 图文排版搜索评估.html 的版式(卡片 + dialog 详情 + rubric 评分条),
  4. 数据实时扫描 runs_full/*/form_*.json —— runs_full 下每新增一个 q 文件夹,刷新即出现。
  5. 分页:query → 三种形式(A/B/C) → 三个渠道 三行从上到下。
  6. 用法:python server.py [port] 默认 8770,浏览器开 http://0.0.0.0:8770
  7. """
  8. import json, re, glob, sys, pathlib, subprocess, threading
  9. from datetime import datetime
  10. from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
  11. from urllib.parse import urlparse, parse_qs
  12. try: # Windows 控制台默认 cp1252,中文 print 会崩,统一切 utf-8
  13. sys.stdout.reconfigure(encoding="utf-8")
  14. except Exception:
  15. pass
  16. HERE = pathlib.Path(__file__).parent
  17. sys.path.insert(0, str(HERE))
  18. import argparse
  19. _parser = argparse.ArgumentParser(description="搜索评估查看 server")
  20. _parser.add_argument("port", type=int, nargs="?", default=8770, help="运行端口")
  21. _parser.add_argument("--mode", choices=["legacy", "new"], default="legacy", help="运行模式:legacy / new")
  22. _args, _ = _parser.parse_known_args()
  23. PORT = _args.port
  24. MODE = _args.mode
  25. RUNS_DIR_NAME = "runs_new" if MODE == "new" else "runs_full"
  26. RUNS_DIR = HERE / RUNS_DIR_NAME
  27. PLAT = {"xhs": "小红书", "gzh": "公众号", "zhihu": "知乎", "x": "X", "bili": "B站", "douyin": "抖音",
  28. "sph": "视频号", "youtube": "YouTube", "github": "GitHub", "toutiao": "头条", "weibo": "微博"}
  29. KT = {"procedure": "工序", "step": "步骤", "tool": "工具"}
  30. # 从 taxonomy 取动作叶子/类型名,用于把 original_q 解析回原始维度(动作×类型 正交)
  31. # 路径优先级:search_eval/evaluation/(主源,IDE 编辑那份就是 runtime 实际读的)
  32. # → test_script/evaluation/(历史副本兜底)→ script/evaluation/(更老兜底)
  33. # 谁也找不到时整目录扫空,server 仍能起。
  34. EVALDIR = HERE / "evaluation"
  35. if not EVALDIR.exists():
  36. EVALDIR = HERE.parent.parent / "test_script" / "evaluation"
  37. if not EVALDIR.exists():
  38. EVALDIR = HERE.parent / "evaluation"
  39. try:
  40. _jm = json.load(open(EVALDIR / "judged_matrix.json", encoding="utf-8"))
  41. ACT_L1 = {a["name"]: a["l1"] for a in _jm["actions"]}
  42. ACTION_SET = set(ACT_L1)
  43. TYPE_SET = {t["name"] for t in _jm["types"]}
  44. ACTIONS_TAX = [{"name": a["name"], "l1": a["l1"], "l2": a.get("l2", "")} for a in _jm["actions"]]
  45. TYPES_TAX = [{"name": t["name"], "l1": t["l1"]} for t in _jm["types"]]
  46. # taxonomy 顺序沿用 judged_matrix(严格版);矩阵分值改用 type_action_scores(宽松版) —
  47. # 两份是同一组 27×50 cell 的独立 gemini judging,前者只 53 格到 tier3,后者 156 格到 score3
  48. _tas = json.load(open(EVALDIR / "type_action_scores.json", encoding="utf-8"))["scores"]
  49. _MATRIX = []
  50. for a in _jm["actions"]:
  51. row = []
  52. for t in _jm["types"]:
  53. rec = _tas.get(t["name"], {}).get(a["name"])
  54. row.append({"tier": rec["score"], "r": rec.get("reason", "")} if rec else {})
  55. _MATRIX.append(row)
  56. except Exception:
  57. ACT_L1, ACTION_SET, TYPE_SET, ACTIONS_TAX, TYPES_TAX, _MATRIX = {}, set(), set(), [], [], []
  58. ACTIVE_TASKS = {}
  59. ACTIVE_REEVALS = {}
  60. ACTIVE_SEARCH_EVALS = {}
  61. ACTIVE_BATCH_TASKS = {}
  62. def backup_procedure_history(out_dir: pathlib.Path):
  63. if not out_dir.is_dir():
  64. return
  65. files_to_backup = [f for f in out_dir.iterdir() if f.is_file()]
  66. if not files_to_backup:
  67. return
  68. history_dir = out_dir / "history"
  69. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  70. version_dir = history_dir / timestamp
  71. version_dir.mkdir(parents=True, exist_ok=True)
  72. import shutil
  73. for f in files_to_backup:
  74. try:
  75. shutil.move(str(f), str(version_dir / f.name))
  76. except Exception as e:
  77. print(f"[backup] failed to move procedure file {f.name}: {e}")
  78. def backup_reeval_history(q_dir: pathlib.Path):
  79. if not q_dir.is_dir():
  80. return
  81. files_to_backup = list(q_dir.glob("form_*.json"))
  82. if not files_to_backup:
  83. return
  84. history_dir = q_dir / "history"
  85. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  86. version_dir = history_dir / timestamp
  87. version_dir.mkdir(parents=True, exist_ok=True)
  88. import shutil
  89. for f in files_to_backup:
  90. try:
  91. shutil.copy(str(f), str(version_dir / f.name))
  92. except Exception as e:
  93. print(f"[backup] failed to copy reeval file {f.name}: {e}")
  94. from batch_extract_procedures import _short_case, _source_to_dsl_input, _write_meta, _composite_score
  95. def run_extraction_task(q, folder_name, src_path, out_dir, engine, model):
  96. task_key = f"{q}/{folder_name}"
  97. log_path = out_dir / "_extract.log"
  98. try:
  99. out_dir.mkdir(parents=True, exist_ok=True)
  100. if engine == "cyber_runner":
  101. script_path = HERE / "procedure-dsl" / "run_cyber.py"
  102. else:
  103. script_path = HERE / "procedure-dsl" / "run_procedure_dsl.py"
  104. cmd = [
  105. sys.executable, "-u", str(script_path),
  106. str(src_path),
  107. "--out-dir", str(out_dir),
  108. "--model", model,
  109. "--max-turns", "300"
  110. ]
  111. if engine != "cyber_runner":
  112. cmd.extend(["--max-retries", "3"])
  113. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  114. with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
  115. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  116. cwd=str(HERE), creationflags=flags)
  117. ACTIVE_TASKS[task_key]["pid"] = proc.pid
  118. proc.wait()
  119. if proc.returncode == 0:
  120. try:
  121. import build_workflows
  122. build_workflows.write_one(q, folder_name, runs_dir=RUNS_DIR)
  123. ACTIVE_TASKS[task_key]["status"] = "success"
  124. except Exception as ex:
  125. ACTIVE_TASKS[task_key]["status"] = "failed"
  126. ACTIVE_TASKS[task_key]["error"] = f"Workflow compilation failed: {ex}"
  127. with open(log_path, "a", encoding="utf-8") as f_err:
  128. f_err.write(f"\n[server error] Workflow compilation failed: {ex}\n")
  129. else:
  130. ACTIVE_TASKS[task_key]["status"] = "failed"
  131. ACTIVE_TASKS[task_key]["error"] = f"Runner failed with exit code {proc.returncode}"
  132. except Exception as e:
  133. ACTIVE_TASKS[task_key]["status"] = "failed"
  134. ACTIVE_TASKS[task_key]["error"] = str(e)
  135. try:
  136. with open(log_path, "a", encoding="utf-8") as f_err:
  137. f_err.write(f"\n[server error] Extraction failed: {e}\n")
  138. except Exception:
  139. pass
  140. MODSET = {"文", "图", "视频", "音频"}
  141. TOOLQUAL = {"AI": "AI 模型", "软件": "桌面 APP", "电脑端": "桌面 APP", "在线": "云端 Web",
  142. "网页版": "云端 Web", "代码": "API·CLI", "命令行": "API·CLI", "插件": "插件扩展"}
  143. def parse_dims(oq):
  144. """把组合 query(如 '文 元素生成 提示词 教程')解析回 {动作, 类型, 动作L1, 约束}。"""
  145. toks = (oq or "").split()
  146. action = next((t for t in toks if t in ACTION_SET), None)
  147. type_ = next((t for t in toks if t in TYPE_SET), None)
  148. cons = None
  149. if toks:
  150. t0 = toks[0]
  151. if t0 in MODSET:
  152. cons = {"kind": "模态", "value": t0}
  153. elif t0 in TOOLQUAL:
  154. cons = {"kind": "工具类型", "value": TOOLQUAL[t0]}
  155. return {"action": action, "type": type_, "action_l1": ACT_L1.get(action, ""), "constraint": cons}
  156. def flat_scores(sc):
  157. f = {}
  158. for k, v in (sc or {}).items():
  159. if isinstance(v, dict):
  160. for kk, vv in v.items():
  161. try: f[kk] = int(vv)
  162. except Exception: pass
  163. else:
  164. try: f[k] = int(v)
  165. except Exception: pass
  166. return f
  167. def _recency_hard(date_str):
  168. """按 publish_timestamp 头 10 字符(YYYY-MM-DD)算硬时效:半年内=3 / 两年内=2 / 更早=1。
  169. 取代原 LLM 评的 recency 维度——脚本算更稳,发布时间在帖子抓取时就有,无需 LLM token。
  170. """
  171. try:
  172. d = datetime.strptime((date_str or "")[:10], "%Y-%m-%d")
  173. except (ValueError, TypeError):
  174. return None
  175. days = (datetime.now() - d).days
  176. if days <= 180: return 3
  177. if days <= 730: return 2
  178. return 1
  179. def adapt(r, run, form_name=None):
  180. p = r.get("post", {}); e = r.get("llm_evaluation", {})
  181. # 1. 解析 知识类型 (knowledge_type)
  182. kt = []
  183. kt_raw = e.get("知识类型") or e.get("knowledge_type") or []
  184. for k in kt_raw:
  185. if k in ("工序", "procedure"): kt.append("procedure")
  186. elif k in ("能力", "步骤", "step"): kt.append("step")
  187. elif k in ("工具", "tool"): kt.append("tool")
  188. fs = {}
  189. score_reasons = {}
  190. # 检测是否为 eval_prompt_sample-mod 里的新版 0-10 分数 schema
  191. is_mod_schema = "相关性" in e and isinstance(e["相关性"], dict) and ("和内容制作知识相关" in e["相关性"] or "和 query 相关" in e["相关性"])
  192. if is_mod_schema:
  193. # 新版 0-10 分数格式解析
  194. # 1. 相关性
  195. rel = e.get("相关性") or {}
  196. for subkey, item in rel.items():
  197. if isinstance(item, dict):
  198. score_val = item.get("得分")
  199. reason_val = item.get("理由")
  200. code_key = None
  201. if "内容制作" in subkey or "知识" in subkey:
  202. code_key = "relevance_production"
  203. elif "query" in subkey or "检索" in subkey:
  204. code_key = "relevance_query"
  205. if code_key and score_val is not None:
  206. try:
  207. fs[code_key] = float(score_val)
  208. if reason_val:
  209. score_reasons[code_key] = reason_val
  210. except Exception:
  211. pass
  212. # 2. 质量
  213. q_block = e.get("质量") or {}
  214. fixed = q_block.get("固定维度") or {}
  215. # 固定维度
  216. fixed_keys = {
  217. "时效性": "recency",
  218. "热度性": "popularity",
  219. }
  220. for cn, code in fixed_keys.items():
  221. item = fixed.get(cn)
  222. if isinstance(item, dict):
  223. score_val = item.get("得分")
  224. reason_val = item.get("理由")
  225. if score_val is not None:
  226. try:
  227. fs[code] = float(score_val)
  228. if reason_val:
  229. score_reasons[code] = reason_val
  230. except Exception:
  231. pass
  232. # 用例 (真实感, 表现力)
  233. usecase = fixed.get("用例") or {}
  234. usecase_keys = {
  235. "真实感": "realism",
  236. "表现力": "expressiveness"
  237. }
  238. for cn, code in usecase_keys.items():
  239. item = usecase.get(cn)
  240. if isinstance(item, dict):
  241. score_val = item.get("得分")
  242. reason_val = item.get("理由")
  243. if score_val is not None:
  244. try:
  245. fs[code] = float(score_val)
  246. if reason_val:
  247. score_reasons[code] = reason_val
  248. except Exception:
  249. pass
  250. # 动态维度
  251. dynamic = q_block.get("动态维度") or {}
  252. # 工序
  253. proc = dynamic.get("工序") or {}
  254. if proc:
  255. item = proc.get("流程完整性")
  256. if isinstance(item, dict):
  257. score_val = item.get("得分")
  258. reason_val = item.get("理由")
  259. if score_val is not None:
  260. try:
  261. fs["procedure_completeness"] = float(score_val)
  262. if reason_val:
  263. score_reasons["procedure_completeness"] = reason_val
  264. except Exception:
  265. pass
  266. field = proc.get("字段完整性") or {}
  267. field_keys = {
  268. "输入完整性": "procedure_input",
  269. "实现完整性": "procedure_implementation",
  270. "输出完整性": "procedure_output"
  271. }
  272. for cn, code in field_keys.items():
  273. item = field.get(cn)
  274. if isinstance(item, dict):
  275. score_val = item.get("得分")
  276. reason_val = item.get("理由")
  277. if score_val is not None:
  278. try:
  279. fs[code] = float(score_val)
  280. if reason_val:
  281. score_reasons[code] = reason_val
  282. except Exception:
  283. pass
  284. item = proc.get("泛化性")
  285. if isinstance(item, dict):
  286. score_val = item.get("得分")
  287. reason_val = item.get("理由")
  288. if score_val is not None:
  289. try:
  290. fs["procedure_generality"] = float(score_val)
  291. if reason_val:
  292. score_reasons["procedure_generality"] = reason_val
  293. except Exception:
  294. pass
  295. # 能力
  296. cap = dynamic.get("能力") or dynamic.get("步骤") or {}
  297. if cap:
  298. field = cap.get("字段完整性") or {}
  299. field_keys = {
  300. "输入完整性": "step_input",
  301. "实现完整性": "step_implementation",
  302. "输出完整性": "step_output"
  303. }
  304. for cn, code in field_keys.items():
  305. item = field.get(cn)
  306. if isinstance(item, dict):
  307. score_val = item.get("得分")
  308. reason_val = item.get("理由")
  309. if score_val is not None:
  310. try:
  311. fs[code] = float(score_val)
  312. if reason_val:
  313. score_reasons[code] = reason_val
  314. except Exception:
  315. pass
  316. item = cap.get("泛化性")
  317. if isinstance(item, dict):
  318. score_val = item.get("得分")
  319. reason_val = item.get("理由")
  320. if score_val is not None:
  321. try:
  322. fs["step_generality"] = float(score_val)
  323. if reason_val:
  324. score_reasons["step_generality"] = reason_val
  325. except Exception:
  326. pass
  327. # 工具
  328. tool = dynamic.get("工具") or {}
  329. if tool:
  330. tool_keys = {
  331. "能力边界覆盖": "tool_boundary",
  332. "有效比较": "tool_comparison",
  333. "参数/接口具体性": "tool_specificity",
  334. "实操示例": "tool_example",
  335. "版本&限制": "tool_limits"
  336. }
  337. for cn, code in tool_keys.items():
  338. item = tool.get(cn)
  339. if isinstance(item, dict):
  340. score_val = item.get("得分")
  341. reason_val = item.get("理由")
  342. if score_val is not None:
  343. try:
  344. fs[code] = float(score_val)
  345. if reason_val:
  346. score_reasons[code] = reason_val
  347. except Exception:
  348. pass
  349. else:
  350. # 兼容老版 1-5 分数 schema (带 "评分" 或 old-style flatness)
  351. is_new_schema = "评分" in e or "知识类型" in e or "制作相关性" in e
  352. CN_TO_EN = {
  353. "相关性": "relevance",
  354. "成品质量": "result_quality",
  355. "可信度": "credibility",
  356. "具体用例": "concrete_use_case",
  357. "完整性": "completeness",
  358. "步骤结构": "step_structure",
  359. "步骤可复现": "step_reproducibility",
  360. "步骤可复现性": "step_reproducibility",
  361. "能力定义": "capability_definition",
  362. "实现深度": "implementation_depth",
  363. "边界失败": "boundary_failure_eval",
  364. "通用性": "generality",
  365. "能力覆盖": "capability_coverage",
  366. "有效对比": "effective_comparison",
  367. "参数具体": "param_specificity",
  368. "实操示例": "worked_example",
  369. "实操用例": "worked_example",
  370. "示例完整": "worked_example",
  371. "版本限制": "version_limits",
  372. "版本说明": "version_limits",
  373. "限制说明": "version_limits",
  374. }
  375. if is_new_schema:
  376. pf = e.get("评分") or {}
  377. for cat, metrics in pf.items():
  378. if isinstance(metrics, dict):
  379. for metric, val in metrics.items():
  380. en_key = CN_TO_EN.get(metric, metric)
  381. if isinstance(val, dict) and "得分" in val:
  382. try: fs[en_key] = int(val["得分"])
  383. except Exception: pass
  384. elif isinstance(val, (int, float)):
  385. fs[en_key] = int(val)
  386. if isinstance(val, dict) and "理由" in val:
  387. score_reasons[en_key] = val["理由"]
  388. else:
  389. fs = flat_scores(e.get("scores", {}))
  390. # 计算均分 (overall)
  391. if is_mod_schema:
  392. rel_keys = {"relevance_production", "relevance_query"}
  393. rel_vals = [v for k, v in fs.items() if k in rel_keys]
  394. qual_vals = [v for k, v in fs.items() if k not in rel_keys]
  395. rel_avg = sum(rel_vals) / len(rel_vals) if rel_vals else None
  396. qual_avg = sum(qual_vals) / len(qual_vals) if qual_vals else None
  397. if rel_avg is not None and qual_avg is not None:
  398. overall = round((rel_avg + qual_avg) / 2, 1)
  399. elif rel_avg is not None:
  400. overall = round(rel_avg, 1)
  401. elif qual_avg is not None:
  402. overall = round(qual_avg, 1)
  403. else:
  404. overall = 0.0
  405. else:
  406. overall = round(sum(fs.values()) / len(fs), 1) if fs else 0
  407. anomaly = bool(e.get("error")) or not fs
  408. grade = p.get("_quality_grade", "")
  409. fb = r.get("found_by_queries", [])
  410. # 4. 解析 制作相关性 (production_relevance)
  411. if is_mod_schema:
  412. # 新版使用 "相关性" 中的 "和内容制作知识相关" 代表制作相关性
  413. production_relevance = fs.get("relevance_production")
  414. else:
  415. if is_new_schema:
  416. pr_block = e.get("制作相关性") or {}
  417. pr_raw = pr_block.get("得分") if isinstance(pr_block, dict) else pr_block
  418. if isinstance(pr_block, dict) and "理由" in pr_block:
  419. score_reasons["production_relevance"] = pr_block["理由"]
  420. else:
  421. pr_raw = e.get("production_relevance")
  422. try: production_relevance = int(float(pr_raw)) if pr_raw is not None else None
  423. except (TypeError, ValueError): production_relevance = None
  424. recency_hard = _recency_hard(p.get("publish_timestamp", ""))
  425. # 5. 解析 判定决策 (decision) 和 理由 (reason)
  426. reason = e.get("判定理由") or e.get("reason") or ""
  427. # 根据过滤指标决定是否保留 (过滤指标判定逻辑优先,不依赖文字匹配)
  428. is_discard = False
  429. # 制作相关性低于阈值则丢弃 (新版 0-10 满分,因此低于 4 丢弃;老版低于 2 丢弃)
  430. if production_relevance is not None:
  431. threshold = 4 if is_mod_schema else 2
  432. if production_relevance < threshold:
  433. is_discard = True
  434. # 时效性低于 2 被丢弃(发布时间超两年的老帖)
  435. if recency_hard is not None and recency_hard < 2:
  436. is_discard = True
  437. # 综合均分低于阈值被丢弃 (新版低于 6 丢弃;老版低于 3 丢弃)
  438. if overall is not None:
  439. threshold_ov = 6 if is_mod_schema else 3
  440. if overall < threshold_ov:
  441. is_discard = True
  442. decision = "discard" if is_discard else "report"
  443. # Find matching procedure html
  444. procedure_html = None
  445. case_id = r.get("case_id", "")
  446. title = p.get("title", "")
  447. run_dir = RUNS_DIR / run
  448. if run_dir.is_dir():
  449. # 1. 优先扫描该帖子对应的文件夹下的任何 HTML 文件 (不限名称)
  450. # 文件夹名格式: {form}_{platform}_{channel_content_id[:8]}
  451. content_id = r.get("channel_content_id") or ""
  452. if not content_id and case_id and "_" in case_id:
  453. content_id = case_id.split("_", 1)[1]
  454. plat_key = r.get("platform") or ""
  455. if form_name and plat_key and content_id:
  456. folder_name = f"{form_name}_{plat_key}_{content_id[:8]}"
  457. case_dir = run_dir / "procedures" / folder_name
  458. if case_dir.is_dir():
  459. html_files = list(case_dir.glob("*.html"))
  460. if html_files:
  461. procedure_html = f"runs_full/{run}/procedures/{folder_name}/{html_files[0].name}"
  462. # 1b. 兼容 fallback: 批量提取模式下直接以 case_id 命名的文件夹
  463. if not procedure_html and case_id:
  464. case_dir = run_dir / "procedures" / case_id
  465. if case_dir.is_dir():
  466. html_files = list(case_dir.glob("*.html"))
  467. if html_files:
  468. procedure_html = f"runs_full/{run}/procedures/{case_id}/{html_files[0].name}"
  469. # 2. 其次匹配标准文件名: case-{case_id}.html 或 {case_id}.html
  470. candidate_dirs = [run_dir, run_dir / "procedures"]
  471. if not procedure_html and case_id:
  472. named_files = [f"case-{case_id}.html", f"{case_id}.html"]
  473. for d_dir in candidate_dirs:
  474. if d_dir.is_dir():
  475. for name in named_files:
  476. if (d_dir / name).is_file():
  477. procedure_html = f"runs_full/{run}/procedures/{name}" if d_dir.name == "procedures" else f"runs_full/{run}/{name}"
  478. break
  479. if procedure_html:
  480. break
  481. # 3. 再次匹配 HTML 内部的标准声明 (meta 标签或 HTML 注释)
  482. if not procedure_html and case_id:
  483. for d_dir in candidate_dirs:
  484. if d_dir.is_dir():
  485. for html_path in d_dir.glob("*.html"):
  486. try:
  487. content = html_path.read_text(encoding="utf-8")
  488. if f'name="case-id" content="{case_id}"' in content or \
  489. f'name="case_id" content="{case_id}"' in content or \
  490. f'<!-- case_id: {case_id} -->' in content or \
  491. f'<!-- case-id: {case_id} -->' in content:
  492. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  493. break
  494. except Exception:
  495. continue
  496. if procedure_html:
  497. break
  498. # 4. 最后使用标题作为兜底模糊匹配
  499. if not procedure_html and title:
  500. for d_dir in candidate_dirs:
  501. if d_dir.is_dir():
  502. for html_path in d_dir.glob("*.html"):
  503. try:
  504. content = html_path.read_text(encoding="utf-8")
  505. if title in content:
  506. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  507. break
  508. except Exception:
  509. continue
  510. if procedure_html:
  511. break
  512. if procedure_html:
  513. procedure_html = procedure_html.replace("runs_full/", f"{RUNS_DIR_NAME}/")
  514. return {
  515. "case_id": r.get("case_id", ""),
  516. "platform": PLAT.get(r.get("platform"), r.get("platform")), "platformKey": r.get("platform"),
  517. "title": p.get("title", "") or "(无标题)", "date": (p.get("publish_timestamp", "") or "")[:10],
  518. "url": r.get("source_url", ""), "engagement": f'{p.get("like_count", 0)} 赞',
  519. "knowledge_type": kt, "decision": decision,
  520. "tools": [KT.get(k, k) for k in kt] + ([f"质量 {grade}"] if grade else []), "found_by": fb,
  521. "images": (p.get("images") or [])[:6], "text": p.get("body_text", "") or "",
  522. "scores": fs, "overall": overall, "reason": reason, "score_reasons": score_reasons,
  523. "grade": grade, "qscore": p.get("_quality_score", 0), "anomaly": anomaly,
  524. "production_relevance": production_relevance, "recency_hard": recency_hard,
  525. "run": run, "procedure_html": procedure_html,
  526. }
  527. def scan_runs():
  528. runs = {}
  529. for f in sorted(glob.glob(str(RUNS_DIR / "*" / "form_*.json"))):
  530. try:
  531. d = json.load(open(f, encoding="utf-8"))
  532. except Exception:
  533. continue
  534. run = pathlib.Path(f).parent.name
  535. form_name = d.get("form") or ""
  536. results = [adapt(r, run, form_name) for r in d.get("results", [])]
  537. report_val = sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly"))
  538. discard_val = sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly"))
  539. runs.setdefault(run, []).append({
  540. "form": d.get("form"), "query": d.get("query"), "original_q": d.get("original_q", ""),
  541. "requirement": d.get("requirement", ""),
  542. "platforms": d.get("platforms", []), "total": d.get("total"),
  543. "report": report_val, "discard": discard_val,
  544. "results": results,
  545. })
  546. for v in runs.values():
  547. v.sort(key=lambda x: x.get("form") or "")
  548. if MODE == "new":
  549. def _mtime(name):
  550. try:
  551. return (RUNS_DIR / name).stat().st_mtime
  552. except Exception:
  553. return 0
  554. sorted_keys = sorted(runs.keys(), key=_mtime, reverse=True)
  555. else:
  556. def _qnum(name):
  557. m = re.search(r"\d+", name)
  558. return (int(m.group()) if m else 0, name)
  559. sorted_keys = [kv[0] for kv in sorted(runs.items(), key=lambda kv: _qnum(kv[0]))]
  560. out = []
  561. for k in sorted_keys:
  562. v = runs[k]
  563. oq = v[0].get("original_q") or v[0].get("query") or ""
  564. seen, hits = set(), 0 # 知识命中数 = 各形式采纳(report)且非异常、按 url 去重后的帖子数
  565. for f in v:
  566. for r in f.get("results", []):
  567. if r.get("decision") == "report" and not r.get("anomaly") and r.get("url") not in seen:
  568. seen.add(r.get("url")); hits += 1
  569. out.append({"key": k, "forms": v, "dims": parse_dims(oq), "original_q": oq,
  570. "hits": hits, "tot": sum((f.get("total") or 0) for f in v)})
  571. active_reevals = {k: v["status"] for k, v in ACTIVE_REEVALS.items()}
  572. active_batch_tasks = {k: v["status"] for k, v in ACTIVE_BATCH_TASKS.items()}
  573. return {"queries": out, "actions": ACTIONS_TAX, "types": TYPES_TAX, "matrix": _MATRIX, "active_reevals": active_reevals, "active_batch_tasks": active_batch_tasks}
  574. class H(BaseHTTPRequestHandler):
  575. def _send(self, code, body, ctype):
  576. b = body.encode("utf-8") if isinstance(body, str) else body
  577. self.send_response(code)
  578. if ctype.startswith("text/") or ctype == "application/json" or ctype == "application/javascript":
  579. self.send_header("Content-Type", ctype + "; charset=utf-8")
  580. else:
  581. self.send_header("Content-Type", ctype)
  582. self.send_header("Content-Length", str(len(b))); self.end_headers(); self.wfile.write(b)
  583. def do_GET(self):
  584. parsed = urlparse(self.path)
  585. path = parsed.path
  586. params = parse_qs(parsed.query)
  587. if path in ("/", "/index.html"):
  588. try:
  589. html_file = "new_query.html" if MODE == "new" else "index.html"
  590. page = (HERE / html_file).read_text(encoding="utf-8")
  591. self._send(200, page, "text/html")
  592. except Exception as e:
  593. self._send(500, f"Error reading page: {e}", "text/plain")
  594. elif path == "/api/data":
  595. self._send(200, json.dumps(scan_runs(), ensure_ascii=False), "application/json")
  596. elif path == "/api/procedure_status":
  597. q = (params.get("q") or [""])[0].strip()
  598. form = (params.get("form") or [""])[0].strip()
  599. case_id = (params.get("case_id") or [""])[0].strip()
  600. if not q or not form or not case_id:
  601. self._send(400, "missing q, form, or case_id", "text/plain")
  602. return
  603. folder_name = f"{form}_{_short_case(case_id)}"
  604. task_key = f"{q}/{folder_name}"
  605. if task_key in ACTIVE_TASKS:
  606. task = ACTIVE_TASKS[task_key]
  607. res = {
  608. "status": task["status"],
  609. "error": task["error"]
  610. }
  611. if task["status"] == "success":
  612. out_dir = RUNS_DIR / q / "procedures" / folder_name
  613. html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
  614. if html_files:
  615. res["procedure_html"] = f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/{html_files[0].name}"
  616. self._send(200, json.dumps(res, ensure_ascii=False), "application/json")
  617. return
  618. out_dir = RUNS_DIR / q / "procedures" / folder_name
  619. html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
  620. if not html_files and case_id:
  621. fallback_dir = RUNS_DIR / q / "procedures" / case_id
  622. html_files = list(fallback_dir.glob("*.html")) if fallback_dir.is_dir() else []
  623. if html_files:
  624. self._send(200, json.dumps({
  625. "status": "success",
  626. "procedure_html": f"{RUNS_DIR_NAME}/{q}/procedures/{case_id}/{html_files[0].name}"
  627. }, ensure_ascii=False), "application/json")
  628. return
  629. else:
  630. if html_files:
  631. self._send(200, json.dumps({
  632. "status": "success",
  633. "procedure_html": f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/{html_files[0].name}"
  634. }, ensure_ascii=False), "application/json")
  635. return
  636. log_path = out_dir / "_extract.log"
  637. if not log_path.is_file() and case_id:
  638. fallback_dir = RUNS_DIR / q / "procedures" / case_id
  639. log_path = fallback_dir / "_extract.log"
  640. if log_path.is_file():
  641. self._send(200, json.dumps({"status": "failed", "error": "Not running, but no HTML output found (possibly crashed)."}, ensure_ascii=False), "application/json")
  642. return
  643. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  644. elif path == "/api/procedure_log":
  645. q = (params.get("q") or [""])[0].strip()
  646. form = (params.get("form") or [""])[0].strip()
  647. case_id = (params.get("case_id") or [""])[0].strip()
  648. if not q or not form or not case_id:
  649. self._send(400, "missing q, form, or case_id", "text/plain")
  650. return
  651. folder_name = f"{form}_{_short_case(case_id)}"
  652. log_path = RUNS_DIR / q / "procedures" / folder_name / "_extract.log"
  653. if not log_path.is_file() and case_id:
  654. log_path = RUNS_DIR / q / "procedures" / case_id / "_extract.log"
  655. if not log_path.is_file():
  656. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  657. return
  658. try:
  659. content = log_path.read_text(encoding="utf-8", errors="replace")
  660. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  661. except Exception as e:
  662. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  663. elif path == "/api/spec_content":
  664. file_name = (params.get("file") or [""])[0].strip()
  665. allowed = [
  666. "README.md",
  667. "tools.md",
  668. "extraction/phase1-skeleton.md",
  669. "extraction/phase2-normalize.md",
  670. "extraction/phase3-finalize.md",
  671. "taxonomy/type_suggestions.md"
  672. ]
  673. if file_name not in allowed:
  674. self._send(400, "invalid file parameter", "text/plain")
  675. return
  676. target_path = HERE / "procedure-dsl" / "spec" / file_name
  677. if not target_path.is_file():
  678. self._send(404, "spec file not found", "text/plain")
  679. return
  680. try:
  681. content = target_path.read_text(encoding="utf-8", errors="replace")
  682. self._send(200, json.dumps({"content": content}, ensure_ascii=False), "application/json")
  683. except Exception as e:
  684. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  685. elif path == "/api/reeval_status":
  686. q = (params.get("q") or [""])[0].strip()
  687. if not q:
  688. self._send(400, "missing q", "text/plain")
  689. return
  690. if q in ACTIVE_REEVALS:
  691. self._send(200, json.dumps({
  692. "status": ACTIVE_REEVALS[q]["status"],
  693. "error": ACTIVE_REEVALS[q].get("error")
  694. }, ensure_ascii=False), "application/json")
  695. else:
  696. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  697. elif path == "/api/search_eval_status":
  698. q = (params.get("q") or [""])[0].strip()
  699. if not q:
  700. self._send(400, "missing q", "text/plain")
  701. return
  702. if q in ACTIVE_SEARCH_EVALS:
  703. self._send(200, json.dumps({
  704. "status": ACTIVE_SEARCH_EVALS[q]["status"],
  705. "error": ACTIVE_SEARCH_EVALS[q].get("error")
  706. }, ensure_ascii=False), "application/json")
  707. else:
  708. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  709. elif path == "/api/batch_generate_status":
  710. q = (params.get("q") or [""])[0].strip()
  711. if not q:
  712. self._send(400, "missing q", "text/plain")
  713. return
  714. if q in ACTIVE_BATCH_TASKS:
  715. self._send(200, json.dumps({
  716. "status": ACTIVE_BATCH_TASKS[q]["status"],
  717. "error": ACTIVE_BATCH_TASKS[q].get("error")
  718. }, ensure_ascii=False), "application/json")
  719. else:
  720. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  721. elif path == "/api/batch_generate_log":
  722. q = (params.get("q") or [""])[0].strip()
  723. if not q:
  724. self._send(400, "missing q", "text/plain")
  725. return
  726. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  727. log_path = RUNS_DIR / safe_q / "procedures" / "_batch_extract.log"
  728. if not log_path.is_file():
  729. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  730. return
  731. try:
  732. content = log_path.read_text(encoding="utf-8", errors="replace")
  733. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  734. except Exception as e:
  735. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  736. elif path == "/api/search_eval_log":
  737. q = (params.get("q") or [""])[0].strip()
  738. if not q:
  739. self._send(400, "missing q", "text/plain")
  740. return
  741. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  742. log_path = RUNS_DIR / safe_q / "_search_eval.log"
  743. if not log_path.is_file():
  744. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  745. return
  746. try:
  747. content = log_path.read_text(encoding="utf-8", errors="replace")
  748. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  749. except Exception as e:
  750. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  751. elif self.path.startswith("/runs_full/") or self.path.startswith("/runs_new/"):
  752. try:
  753. import urllib.parse
  754. clean_path = urllib.parse.unquote(self.path.split("?")[0])
  755. parts = clean_path.strip("/").split("/")
  756. target_file = HERE
  757. for part in parts:
  758. target_file = target_file / part
  759. runs_dir = HERE / parts[0]
  760. if runs_dir.resolve() in target_file.resolve().parents and target_file.is_file():
  761. content = target_file.read_bytes()
  762. ext = target_file.suffix.lower()
  763. ctype = "text/html"
  764. if ext in (".png", ".webp"):
  765. ctype = f"image/{ext[1:]}"
  766. elif ext in (".jpg", ".jpeg"):
  767. ctype = "image/jpeg"
  768. elif ext == ".json":
  769. ctype = "application/json"
  770. elif ext == ".js":
  771. ctype = "application/javascript"
  772. elif ext == ".css":
  773. ctype = "text/css"
  774. self._send(200, content, ctype)
  775. else:
  776. self._send(404, "not found", "text/plain")
  777. except Exception as e:
  778. self._send(500, f"Error: {e}", "text/plain")
  779. else:
  780. self._send(404, "not found", "text/plain")
  781. def do_POST(self):
  782. if self.path == "/api/run_search_eval":
  783. length = int(self.headers.get("Content-Length") or 0)
  784. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  785. try:
  786. payload = json.loads(raw)
  787. except Exception as e:
  788. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  789. q = (payload.get("query") or "").strip()
  790. platforms = (payload.get("platforms") or "xhs,zhihu").strip()
  791. if not q:
  792. self._send(400, json.dumps({"error": "missing query"}, ensure_ascii=False), "application/json"); return
  793. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  794. if not safe_q:
  795. self._send(400, json.dumps({"error": "invalid query name"}, ensure_ascii=False), "application/json"); return
  796. q_dir = RUNS_DIR / safe_q
  797. q_dir.mkdir(parents=True, exist_ok=True)
  798. temp_queries_file = q_dir / "temp_queries.json"
  799. try:
  800. with open(temp_queries_file, "w", encoding="utf-8") as f:
  801. json.dump([q], f, ensure_ascii=False)
  802. except Exception as e:
  803. self._send(500, json.dumps({"error": f"failed to write temp query: {e}"}), "application/json"); return
  804. log_path = q_dir / "_search_eval.log"
  805. ACTIVE_SEARCH_EVALS[q] = {
  806. "status": "running",
  807. "pid": None,
  808. "error": None
  809. }
  810. def run_search_eval_task(query_text, safe_query_name, queries_file, out_dir, target_platforms, log_file):
  811. try:
  812. cmd = [
  813. sys.executable, "-u", str(HERE / "search_and_evaluate.py"),
  814. "--queries", str(queries_file),
  815. "--platforms", target_platforms,
  816. "--output-dir", str(out_dir),
  817. "--max-count", "20"
  818. ]
  819. import os
  820. env = os.environ.copy()
  821. env["PYTHONIOENCODING"] = "utf-8"
  822. env["PYTHONUTF8"] = "1"
  823. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  824. with open(log_file, "w", encoding="utf-8", buffering=1) as log_fh:
  825. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  826. cwd=str(HERE), env=env, creationflags=flags)
  827. ACTIVE_SEARCH_EVALS[query_text]["pid"] = proc.pid
  828. proc.wait()
  829. if proc.returncode == 0:
  830. evaluated_file = out_dir / "evaluated.json"
  831. if evaluated_file.is_file():
  832. with open(evaluated_file, "r", encoding="utf-8") as rf:
  833. eval_data = json.load(rf)
  834. eval_data["form"] = "A"
  835. eval_data["query"] = query_text
  836. eval_data["original_q"] = query_text
  837. with open(out_dir / "form_A.json", "w", encoding="utf-8") as wf:
  838. json.dump(eval_data, wf, ensure_ascii=False, indent=2)
  839. ACTIVE_SEARCH_EVALS[query_text]["status"] = "success"
  840. else:
  841. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  842. ACTIVE_SEARCH_EVALS[query_text]["error"] = "evaluated.json not found after execution"
  843. with open(log_file, "a", encoding="utf-8") as f_err:
  844. f_err.write("\n[server error] evaluated.json not found after execution\n")
  845. else:
  846. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  847. ACTIVE_SEARCH_EVALS[query_text]["error"] = f"search_and_evaluate.py exited with code {proc.returncode}"
  848. except Exception as ex:
  849. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  850. ACTIVE_SEARCH_EVALS[query_text]["error"] = str(ex)
  851. try:
  852. with open(log_file, "a", encoding="utf-8") as f_err:
  853. f_err.write(f"\n[server error] Exception: {ex}\n")
  854. except Exception:
  855. pass
  856. finally:
  857. try:
  858. if queries_file.is_file():
  859. queries_file.unlink()
  860. except Exception:
  861. pass
  862. t = threading.Thread(target=run_search_eval_task, args=(q, safe_q, temp_queries_file, q_dir, platforms, log_path))
  863. t.daemon = True
  864. t.start()
  865. self._send(200, json.dumps({
  866. "status": "started",
  867. "query": q,
  868. "log": f"{RUNS_DIR_NAME}/{safe_q}/_search_eval.log"
  869. }, ensure_ascii=False), "application/json")
  870. elif self.path == "/api/generate_procedure":
  871. length = int(self.headers.get("Content-Length") or 0)
  872. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  873. try:
  874. payload = json.loads(raw)
  875. except Exception as e:
  876. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  877. q = (payload.get("q") or "").strip()
  878. form = (payload.get("form") or "").strip()
  879. case_id = (payload.get("case_id") or "").strip()
  880. engine = (payload.get("engine") or "cyber_runner").strip()
  881. model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
  882. if MODE != "new" and not re.match(r"^q\d+$", q):
  883. self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"}, ensure_ascii=False), "application/json"); return
  884. if form not in ("A", "B", "C"):
  885. self._send(400, json.dumps({"error": f"bad form: {form!r}"}, ensure_ascii=False), "application/json"); return
  886. if not case_id:
  887. self._send(400, json.dumps({"error": "missing case_id"}, ensure_ascii=False), "application/json"); return
  888. q_dir = RUNS_DIR / q
  889. form_file = q_dir / f"form_{form}.json"
  890. if not form_file.is_file():
  891. self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
  892. try:
  893. with open(form_file, encoding="utf-8") as f:
  894. form_data = json.load(f)
  895. except Exception as e:
  896. self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
  897. matching_result = None
  898. for r in form_data.get("results", []):
  899. if r.get("case_id") == case_id:
  900. matching_result = r
  901. break
  902. if not matching_result:
  903. self._send(404, json.dumps({"error": f"case_id {case_id} not found in form {form}"}, ensure_ascii=False), "application/json"); return
  904. folder_name = f"{form}_{_short_case(case_id)}"
  905. out_dir = q_dir / "procedures" / folder_name
  906. backup_procedure_history(out_dir)
  907. out_dir.mkdir(parents=True, exist_ok=True)
  908. src_path = out_dir / "_source.json"
  909. try:
  910. with open(src_path, "w", encoding="utf-8") as f:
  911. json.dump(_source_to_dsl_input(matching_result), f, ensure_ascii=False, indent=2)
  912. score = _composite_score(matching_result.get("llm_evaluation") or {})
  913. _write_meta(out_dir, case_id=case_id, from_q=q, form=form, score=score)
  914. except Exception as e:
  915. self._send(500, json.dumps({"error": f"failed to write inputs: {e}"}, ensure_ascii=False), "application/json"); return
  916. task_key = f"{q}/{folder_name}"
  917. ACTIVE_TASKS[task_key] = {
  918. "status": "running",
  919. "start_time": datetime.now().isoformat(),
  920. "pid": None,
  921. "error": None
  922. }
  923. t = threading.Thread(target=run_extraction_task, args=(q, folder_name, src_path, out_dir, engine, model))
  924. t.daemon = True
  925. t.start()
  926. self._send(200, json.dumps({
  927. "status": "started",
  928. "task_key": task_key,
  929. "log": f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/_extract.log"
  930. }, ensure_ascii=False), "application/json")
  931. elif self.path == "/api/reeval":
  932. length = int(self.headers.get("Content-Length") or 0)
  933. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  934. try:
  935. payload = json.loads(raw)
  936. except Exception as e:
  937. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  938. q = (payload.get("q") or "").strip()
  939. if MODE != "new" and not re.match(r"^q\d+$", q):
  940. self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"},
  941. ensure_ascii=False), "application/json"); return
  942. q_dir = RUNS_DIR / q
  943. if not q_dir.is_dir():
  944. self._send(404, json.dumps({"error": f"{RUNS_DIR_NAME}/{q} not found"}, ensure_ascii=False),
  945. "application/json"); return
  946. backup_reeval_history(q_dir)
  947. log_path = q_dir / "_reeval.log"
  948. try:
  949. log_fh = open(log_path, "w", encoding="utf-8", buffering=1)
  950. cmd = [sys.executable, "-u", str(HERE / "batch_3forms.py"),
  951. "--reeval", "--reeval-q", q, "--output-dir", str(RUNS_DIR)]
  952. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  953. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  954. cwd=str(HERE), creationflags=flags)
  955. ACTIVE_REEVALS[q] = {
  956. "status": "running",
  957. "pid": proc.pid,
  958. "error": None
  959. }
  960. def wait_reeval(q_key, p_obj, fh):
  961. try:
  962. p_obj.wait()
  963. if p_obj.returncode == 0:
  964. ACTIVE_REEVALS[q_key]["status"] = "success"
  965. else:
  966. ACTIVE_REEVALS[q_key]["status"] = "failed"
  967. ACTIVE_REEVALS[q_key]["error"] = f"Subprocess exited with code {p_obj.returncode}"
  968. except Exception as ex:
  969. ACTIVE_REEVALS[q_key]["status"] = "failed"
  970. ACTIVE_REEVALS[q_key]["error"] = str(ex)
  971. finally:
  972. try:
  973. fh.close()
  974. except Exception:
  975. pass
  976. t = threading.Thread(target=wait_reeval, args=(q, proc, log_fh))
  977. t.daemon = True
  978. t.start()
  979. self._send(200, json.dumps(
  980. {"status": "started", "pid": proc.pid, "q": q,
  981. "log": str(log_path.relative_to(HERE))},
  982. ensure_ascii=False), "application/json")
  983. except Exception as e:
  984. self._send(500, json.dumps({"error": f"failed to start: {e}"},
  985. ensure_ascii=False), "application/json")
  986. elif self.path == "/api/batch_generate_procedure":
  987. length = int(self.headers.get("Content-Length") or 0)
  988. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  989. try:
  990. payload = json.loads(raw)
  991. except Exception as e:
  992. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  993. q = (payload.get("q") or "").strip()
  994. form = (payload.get("form") or "A").strip()
  995. engine = (payload.get("engine") or "cyber_runner").strip()
  996. model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
  997. try:
  998. concurrency = int(payload.get("concurrency", 4))
  999. except Exception:
  1000. concurrency = 4
  1001. concurrency = max(1, min(16, concurrency))
  1002. if not q:
  1003. self._send(400, json.dumps({"error": "missing q"}, ensure_ascii=False), "application/json"); return
  1004. q_dir = RUNS_DIR / q
  1005. form_file = q_dir / f"form_{form}.json"
  1006. if not form_file.is_file():
  1007. self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
  1008. try:
  1009. with open(form_file, encoding="utf-8") as f:
  1010. form_data = json.load(f)
  1011. except Exception as e:
  1012. self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
  1013. valid_results = []
  1014. import copy
  1015. for r in form_data.get("results", []):
  1016. adapted = adapt(r, q, form)
  1017. if adapted.get("decision") == "report" and not adapted.get("anomaly"):
  1018. r_copy = copy.deepcopy(r)
  1019. original_case_id = r.get("case_id", "")
  1020. r_copy["case_id"] = f"{form}_{_short_case(original_case_id)}"
  1021. valid_results.append(r_copy)
  1022. if not valid_results:
  1023. self._send(400, json.dumps({"error": "没有可提取工序的帖子(所有帖子都已被过滤,或都存在解析异常)"}, ensure_ascii=False), "application/json"); return
  1024. procedures_dir = q_dir / "procedures"
  1025. procedures_dir.mkdir(parents=True, exist_ok=True)
  1026. batch_posts_path = procedures_dir / "temp_batch.json"
  1027. try:
  1028. with open(batch_posts_path, "w", encoding="utf-8") as f:
  1029. json.dump({"results": valid_results}, f, ensure_ascii=False, indent=2)
  1030. except Exception as e:
  1031. self._send(500, json.dumps({"error": f"failed to write temp_batch.json: {e}"}, ensure_ascii=False), "application/json"); return
  1032. ACTIVE_BATCH_TASKS[q] = {
  1033. "status": "running",
  1034. "start_time": datetime.now().isoformat(),
  1035. "pid": None,
  1036. "error": None
  1037. }
  1038. def run_batch_extraction():
  1039. try:
  1040. log_path = procedures_dir / "_batch_extract.log"
  1041. cmd = [
  1042. sys.executable, "-u", str(HERE / "procedure-dsl" / "run_cyber.py"),
  1043. str(batch_posts_path),
  1044. "--out-dir", str(procedures_dir),
  1045. "--batch",
  1046. "--batch-workers", str(concurrency),
  1047. "--model", model,
  1048. "--max-turns", "300"
  1049. ]
  1050. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  1051. with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
  1052. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  1053. cwd=str(HERE), creationflags=flags)
  1054. ACTIVE_BATCH_TASKS[q]["pid"] = proc.pid
  1055. proc.wait()
  1056. if proc.returncode == 0:
  1057. try:
  1058. import build_workflows
  1059. build_workflows.write_run(q, runs_dir=RUNS_DIR)
  1060. ACTIVE_BATCH_TASKS[q]["status"] = "success"
  1061. except Exception as ex:
  1062. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1063. ACTIVE_BATCH_TASKS[q]["error"] = f"Workflows compilation failed: {ex}"
  1064. else:
  1065. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1066. ACTIVE_BATCH_TASKS[q]["error"] = f"Batch runner exited with code {proc.returncode}"
  1067. except Exception as ex:
  1068. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1069. ACTIVE_BATCH_TASKS[q]["error"] = str(ex)
  1070. finally:
  1071. try:
  1072. if batch_posts_path.is_file():
  1073. batch_posts_path.unlink()
  1074. except Exception:
  1075. pass
  1076. t = threading.Thread(target=run_batch_extraction)
  1077. t.daemon = True
  1078. t.start()
  1079. self._send(200, json.dumps({
  1080. "status": "started",
  1081. "q": q,
  1082. "log": f"{RUNS_DIR_NAME}/{q}/procedures/_batch_extract.log"
  1083. }, ensure_ascii=False), "application/json")
  1084. elif self.path == "/api/save_spec":
  1085. length = int(self.headers.get("Content-Length") or 0)
  1086. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  1087. try:
  1088. payload = json.loads(raw)
  1089. except Exception as e:
  1090. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  1091. file_name = (payload.get("file") or "").strip()
  1092. content = payload.get("content") or ""
  1093. allowed = [
  1094. "README.md",
  1095. "tools.md",
  1096. "extraction/phase1-skeleton.md",
  1097. "extraction/phase2-normalize.md",
  1098. "extraction/phase3-finalize.md",
  1099. "taxonomy/type_suggestions.md"
  1100. ]
  1101. if file_name not in allowed:
  1102. self._send(400, json.dumps({"error": "invalid file parameter"}), "application/json"); return
  1103. target_path = HERE / "procedure-dsl" / "spec" / file_name
  1104. try:
  1105. target_path.parent.mkdir(parents=True, exist_ok=True)
  1106. target_path.write_text(content, encoding="utf-8")
  1107. self._send(200, json.dumps({"status": "ok"}, ensure_ascii=False), "application/json")
  1108. except Exception as e:
  1109. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  1110. else:
  1111. self._send(404, json.dumps({"error": "not found"}), "application/json")
  1112. def log_message(self, *a): pass
  1113. if __name__ == "__main__":
  1114. n = len(scan_runs()["queries"])
  1115. print(f"搜索评估查看 server ({MODE} 模式):http://0.0.0.0:{PORT} ({RUNS_DIR_NAME}/ 下 {n} 个 query,实时扫描)")
  1116. ThreadingHTTPServer(("0.0.0.0", PORT), H).serve_forever()