server.py 71 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446
  1. # -*- coding: utf-8 -*-
  2. """搜索评估案例查看 server。
  3. 沿用 图文排版搜索评估.html 的版式(卡片 + dialog 详情 + rubric 评分条),
  4. 数据实时扫描 runs_full/*/form_*.json —— runs_full 下每新增一个 q 文件夹,刷新即出现。
  5. 分页:query → 三种形式(A/B/C) → 三个渠道 三行从上到下。
  6. 用法:python server.py [port] 默认 8770,浏览器开 http://0.0.0.0:8770
  7. """
  8. import json, re, glob, sys, pathlib, subprocess, threading
  9. from datetime import datetime
  10. from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
  11. from urllib.parse import urlparse, parse_qs
  12. try: # Windows 控制台默认 cp1252,中文 print 会崩,统一切 utf-8
  13. sys.stdout.reconfigure(encoding="utf-8")
  14. except Exception:
  15. pass
  16. HERE = pathlib.Path(__file__).parent
  17. SE = HERE.parent # search_eval:引擎脚本/procedure-dsl/evaluation 都在父目录
  18. sys.path.insert(0, str(HERE))
  19. sys.path.insert(0, str(SE)) # 复用兄弟模块 batch_extract_procedures / build_workflows
  20. import argparse
  21. _parser = argparse.ArgumentParser(description="工序解构 server (mode_procedure:写死 new 模式 + procedure 直写版)")
  22. _parser.add_argument("port", type=int, nargs="?", default=8771, help="运行端口(默认 8771)")
  23. _parser.add_argument("--mode", choices=["legacy", "new"], default="new", help="(mode_procedure 固定 new)")
  24. _args, _ = _parser.parse_known_args()
  25. PORT = _args.port
  26. MODE = "new" # mode_procedure 写死 new 模式
  27. RUNS_DIR_NAME = "runs_new"
  28. RUNS_DIR = HERE / RUNS_DIR_NAME # 产出目录在本文件夹 mode_procedure/runs_new
  29. PLAT = {"xhs": "小红书", "gzh": "公众号", "zhihu": "知乎", "x": "X", "bili": "B站", "douyin": "抖音",
  30. "sph": "视频号", "youtube": "YouTube", "github": "GitHub", "toutiao": "头条", "weibo": "微博"}
  31. KT = {"procedure": "工序", "step": "步骤", "tool": "工具"}
  32. # 从 taxonomy 取动作叶子/类型名,用于把 original_q 解析回原始维度(动作×类型 正交)
  33. # 路径优先级:search_eval/evaluation/(主源,IDE 编辑那份就是 runtime 实际读的)
  34. # → test_script/evaluation/(历史副本兜底)→ script/evaluation/(更老兜底)
  35. # 谁也找不到时整目录扫空,server 仍能起。
  36. EVALDIR = HERE / "evaluation"
  37. if not EVALDIR.exists():
  38. EVALDIR = HERE.parent.parent / "test_script" / "evaluation"
  39. if not EVALDIR.exists():
  40. EVALDIR = HERE.parent / "evaluation"
  41. try:
  42. _jm = json.load(open(EVALDIR / "judged_matrix.json", encoding="utf-8"))
  43. ACT_L1 = {a["name"]: a["l1"] for a in _jm["actions"]}
  44. ACTION_SET = set(ACT_L1)
  45. TYPE_SET = {t["name"] for t in _jm["types"]}
  46. ACTIONS_TAX = [{"name": a["name"], "l1": a["l1"], "l2": a.get("l2", "")} for a in _jm["actions"]]
  47. TYPES_TAX = [{"name": t["name"], "l1": t["l1"]} for t in _jm["types"]]
  48. # taxonomy 顺序沿用 judged_matrix(严格版);矩阵分值改用 type_action_scores(宽松版) —
  49. # 两份是同一组 27×50 cell 的独立 gemini judging,前者只 53 格到 tier3,后者 156 格到 score3
  50. _tas = json.load(open(EVALDIR / "type_action_scores.json", encoding="utf-8"))["scores"]
  51. _MATRIX = []
  52. for a in _jm["actions"]:
  53. row = []
  54. for t in _jm["types"]:
  55. rec = _tas.get(t["name"], {}).get(a["name"])
  56. row.append({"tier": rec["score"], "r": rec.get("reason", "")} if rec else {})
  57. _MATRIX.append(row)
  58. except Exception:
  59. ACT_L1, ACTION_SET, TYPE_SET, ACTIONS_TAX, TYPES_TAX, _MATRIX = {}, set(), set(), [], [], []
  60. ACTIVE_TASKS = {}
  61. ACTIVE_REEVALS = {}
  62. ACTIVE_SEARCH_EVALS = {}
  63. ACTIVE_BATCH_TASKS = {}
  64. def _gen_version() -> str:
  65. """生成版本号 v_月日时分。"""
  66. return "v_" + datetime.now().strftime("%m%d%H%M")
  67. def _read_meta_version(out_dir: pathlib.Path):
  68. """读 out_dir/_meta.json 里的版本号(用于把旧产物归档到 history/{旧版本}/)。"""
  69. try:
  70. meta = json.loads((out_dir / "_meta.json").read_text(encoding="utf-8"))
  71. return meta.get("version")
  72. except Exception:
  73. return None
  74. def _stamp_version(out_dir: pathlib.Path, version: str):
  75. """把版本号写进 out_dir/_meta.json(_write_meta 之后调用)。"""
  76. meta_path = out_dir / "_meta.json"
  77. try:
  78. meta = json.loads(meta_path.read_text(encoding="utf-8")) if meta_path.is_file() else {}
  79. except Exception:
  80. meta = {}
  81. meta["version"] = version
  82. try:
  83. meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
  84. except Exception:
  85. pass
  86. def _store_dsl(table: str, q: str, case_id: str, version: str, model: str, out_dir: pathlib.Path):
  87. """把刚生成的 workflow.json 写入数据库(agent_dsl / mode_dsl)。失败不阻断。"""
  88. try:
  89. import db
  90. wf = json.loads((out_dir / "workflow.json").read_text(encoding="utf-8"))
  91. src = None
  92. sp = out_dir / "_source.json"
  93. if sp.is_file():
  94. try:
  95. src = json.loads(sp.read_text(encoding="utf-8"))
  96. except Exception:
  97. src = None
  98. platform = case_id.split("_")[0] if "_" in case_id else None
  99. db.upsert_dsl(table, q, case_id, version, model, wf, source=src,
  100. platform=platform,
  101. post_title=(src or {}).get("title"),
  102. source_link=(src or {}).get("link"))
  103. except Exception as ex:
  104. print(f"[db] _store_dsl({table}) 失败(不阻断):{ex}")
  105. def backup_procedure_history(out_dir: pathlib.Path):
  106. if not out_dir.is_dir():
  107. return
  108. files_to_backup = [f for f in out_dir.iterdir() if f.is_file()]
  109. if not files_to_backup:
  110. return
  111. history_dir = out_dir / "history"
  112. # 按旧产物的版本号归档(v_MMDDHHMM);无版本号(历史遗留)回退时间戳
  113. label = _read_meta_version(out_dir) or datetime.now().strftime("%Y%m%d_%H%M%S")
  114. version_dir = history_dir / label
  115. version_dir.mkdir(parents=True, exist_ok=True)
  116. import shutil
  117. for f in files_to_backup:
  118. try:
  119. shutil.move(str(f), str(version_dir / f.name))
  120. except Exception as e:
  121. print(f"[backup] failed to move procedure file {f.name}: {e}")
  122. def backup_reeval_history(q_dir: pathlib.Path):
  123. if not q_dir.is_dir():
  124. return
  125. files_to_backup = list(q_dir.glob("form_*.json"))
  126. if not files_to_backup:
  127. return
  128. history_dir = q_dir / "history"
  129. timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
  130. version_dir = history_dir / timestamp
  131. version_dir.mkdir(parents=True, exist_ok=True)
  132. import shutil
  133. for f in files_to_backup:
  134. try:
  135. shutil.copy(str(f), str(version_dir / f.name))
  136. except Exception as e:
  137. print(f"[backup] failed to copy reeval file {f.name}: {e}")
  138. from batch_extract_procedures import _short_case, _source_to_dsl_input, _write_meta, _composite_score
  139. def _backend_note(model, engine="cyber_runner"):
  140. """根据引擎/模型判断计费后端,明确扣 OpenRouter 余额还是 Claude 订阅。"""
  141. if engine and engine != "cyber_runner":
  142. return "Claude SDK / OAuth(扣 Claude 订阅额度)"
  143. if "qwen" in (model or "").lower():
  144. return "Qwen 原生(阿里云,不走 OpenRouter)"
  145. return "OpenRouter(扣 OpenRouter 余额,非 Claude 订阅)"
  146. def _log_header(log_fh, title, model, engine="cyber_runner"):
  147. """在子进程日志开头写一行:本次用什么模型、走什么后端。"""
  148. line = f"[server] {title} · 模型 {model} · 后端 {_backend_note(model, engine)}\n"
  149. log_fh.write(line); log_fh.write("-" * 60 + "\n"); log_fh.flush()
  150. def run_model_extraction_task(q, folder_name, case_id, src_path, out_dir, model, version):
  151. """模型提取:跑 procedure_model_extract.py(单次大模型直出 workflow.json + 渲染)。
  152. 产物写 procedures/{folder}/_model/ 子目录,task_key 带 /_model 后缀,与 Agent 工序分开。"""
  153. task_key = f"{q}/{folder_name}/_model"
  154. log_path = out_dir / "_extract.log"
  155. try:
  156. out_dir.mkdir(parents=True, exist_ok=True)
  157. cmd = [sys.executable, "-u", str(HERE / "mode-dsl" / "procedure_model_extract.py"),
  158. str(src_path), "--out-dir", str(out_dir), "--model", model, "--case-id", folder_name]
  159. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  160. with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
  161. _log_header(log_fh, "🤖 工序提取(模型直出,无 agent/validate 循环)", model, "cyber_runner")
  162. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  163. cwd=str(HERE), creationflags=flags)
  164. ACTIVE_TASKS[task_key]["pid"] = proc.pid
  165. proc.wait()
  166. if proc.returncode == 0:
  167. # 模型工序独立展示在「大模型工序」子 tab(按 _model/*.html 检测),不进 gallery 合并
  168. _stamp_version(out_dir, version)
  169. _store_dsl("mode_dsl", q, case_id, version, model, out_dir) # 写库
  170. ACTIVE_TASKS[task_key]["status"] = "success"
  171. else:
  172. ACTIVE_TASKS[task_key]["status"] = "failed"
  173. ACTIVE_TASKS[task_key]["error"] = f"model extract exit code {proc.returncode}"
  174. except Exception as e:
  175. ACTIVE_TASKS[task_key]["status"] = "failed"
  176. ACTIVE_TASKS[task_key]["error"] = str(e)
  177. try:
  178. with open(log_path, "a", encoding="utf-8") as f_err:
  179. f_err.write(f"\n[server error] model extract failed: {e}\n")
  180. except Exception:
  181. pass
  182. def run_extraction_task(q, folder_name, case_id, src_path, out_dir, engine, model, version):
  183. task_key = f"{q}/{folder_name}"
  184. log_path = out_dir / "_extract.log"
  185. try:
  186. out_dir.mkdir(parents=True, exist_ok=True)
  187. if engine == "cyber_runner":
  188. script_path = HERE / "procedure-dsl" / "run_cyber.py"
  189. else:
  190. script_path = HERE / "procedure-dsl" / "run_procedure_dsl.py"
  191. cmd = [
  192. sys.executable, "-u", str(script_path),
  193. str(src_path),
  194. "--out-dir", str(out_dir),
  195. "--model", model,
  196. "--max-turns", "300"
  197. ]
  198. if engine != "cyber_runner":
  199. cmd.extend(["--max-retries", "3"])
  200. else:
  201. cmd.extend(["--skill", "procedure"]) # mode_procedure:run_cyber 用直写版 skill
  202. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  203. with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
  204. _log_header(log_fh, "🔧 工序提取(单帖)", model, engine)
  205. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  206. cwd=str(HERE / "procedure-dsl"), creationflags=flags) # cwd=本地 procedure-dsl(run_cyber 假设的工作目录,agent 相对路径才对)
  207. ACTIVE_TASKS[task_key]["pid"] = proc.pid
  208. proc.wait()
  209. if proc.returncode == 0:
  210. try:
  211. import build_workflows
  212. build_workflows.write_one(q, folder_name, runs_dir=RUNS_DIR)
  213. _stamp_version(out_dir, version)
  214. _store_dsl("agent_dsl", q, case_id, version, model, out_dir) # 写库
  215. ACTIVE_TASKS[task_key]["status"] = "success"
  216. except Exception as ex:
  217. ACTIVE_TASKS[task_key]["status"] = "failed"
  218. ACTIVE_TASKS[task_key]["error"] = f"Workflow compilation failed: {ex}"
  219. with open(log_path, "a", encoding="utf-8") as f_err:
  220. f_err.write(f"\n[server error] Workflow compilation failed: {ex}\n")
  221. else:
  222. ACTIVE_TASKS[task_key]["status"] = "failed"
  223. ACTIVE_TASKS[task_key]["error"] = f"Runner failed with exit code {proc.returncode}"
  224. except Exception as e:
  225. ACTIVE_TASKS[task_key]["status"] = "failed"
  226. ACTIVE_TASKS[task_key]["error"] = str(e)
  227. try:
  228. with open(log_path, "a", encoding="utf-8") as f_err:
  229. f_err.write(f"\n[server error] Extraction failed: {e}\n")
  230. except Exception:
  231. pass
  232. MODSET = {"文", "图", "视频", "音频"}
  233. TOOLQUAL = {"AI": "AI 模型", "软件": "桌面 APP", "电脑端": "桌面 APP", "在线": "云端 Web",
  234. "网页版": "云端 Web", "代码": "API·CLI", "命令行": "API·CLI", "插件": "插件扩展"}
  235. def parse_dims(oq):
  236. """把组合 query(如 '文 元素生成 提示词 教程')解析回 {动作, 类型, 动作L1, 约束}。"""
  237. toks = (oq or "").split()
  238. action = next((t for t in toks if t in ACTION_SET), None)
  239. type_ = next((t for t in toks if t in TYPE_SET), None)
  240. cons = None
  241. if toks:
  242. t0 = toks[0]
  243. if t0 in MODSET:
  244. cons = {"kind": "模态", "value": t0}
  245. elif t0 in TOOLQUAL:
  246. cons = {"kind": "工具类型", "value": TOOLQUAL[t0]}
  247. return {"action": action, "type": type_, "action_l1": ACT_L1.get(action, ""), "constraint": cons}
  248. def flat_scores(sc):
  249. f = {}
  250. for k, v in (sc or {}).items():
  251. if isinstance(v, dict):
  252. for kk, vv in v.items():
  253. try: f[kk] = int(vv)
  254. except Exception: pass
  255. else:
  256. try: f[k] = int(v)
  257. except Exception: pass
  258. return f
  259. def _recency_hard(date_str):
  260. """按 publish_timestamp 头 10 字符(YYYY-MM-DD)算硬时效:半年内=3 / 两年内=2 / 更早=1。
  261. 取代原 LLM 评的 recency 维度——脚本算更稳,发布时间在帖子抓取时就有,无需 LLM token。
  262. """
  263. try:
  264. d = datetime.strptime((date_str or "")[:10], "%Y-%m-%d")
  265. except (ValueError, TypeError):
  266. return None
  267. days = (datetime.now() - d).days
  268. if days <= 180: return 3
  269. if days <= 730: return 2
  270. return 1
  271. def adapt(r, run, form_name=None):
  272. p = r.get("post", {}); e = r.get("llm_evaluation", {})
  273. # 1. 解析 知识类型 (knowledge_type)
  274. kt = []
  275. kt_raw = e.get("知识类型") or e.get("knowledge_type") or []
  276. for k in kt_raw:
  277. if k in ("工序", "procedure"): kt.append("procedure")
  278. elif k in ("能力", "步骤", "step"): kt.append("step")
  279. elif k in ("工具", "tool"): kt.append("tool")
  280. fs = {}
  281. score_reasons = {}
  282. # 检测是否为 eval_prompt_sample-mod 里的新版 0-10 分数 schema
  283. is_mod_schema = "相关性" in e and isinstance(e["相关性"], dict) and ("和内容制作知识相关" in e["相关性"] or "和 query 相关" in e["相关性"])
  284. if is_mod_schema:
  285. # 新版 0-10 分数格式解析
  286. # 1. 相关性
  287. rel = e.get("相关性") or {}
  288. for subkey, item in rel.items():
  289. if isinstance(item, dict):
  290. score_val = item.get("得分")
  291. reason_val = item.get("理由")
  292. code_key = None
  293. if "内容制作" in subkey or "知识" in subkey:
  294. code_key = "relevance_production"
  295. elif "query" in subkey or "检索" in subkey:
  296. code_key = "relevance_query"
  297. if code_key and score_val is not None:
  298. try:
  299. fs[code_key] = float(score_val)
  300. if reason_val:
  301. score_reasons[code_key] = reason_val
  302. except Exception:
  303. pass
  304. # 2. 质量
  305. q_block = e.get("质量") or {}
  306. fixed = q_block.get("固定维度") or {}
  307. # 固定维度
  308. fixed_keys = {
  309. "时效性": "recency",
  310. "热度性": "popularity",
  311. }
  312. for cn, code in fixed_keys.items():
  313. item = fixed.get(cn)
  314. if isinstance(item, dict):
  315. score_val = item.get("得分")
  316. reason_val = item.get("理由")
  317. if score_val is not None:
  318. try:
  319. fs[code] = float(score_val)
  320. if reason_val:
  321. score_reasons[code] = reason_val
  322. except Exception:
  323. pass
  324. # 用例 (真实感, 表现力)
  325. usecase = fixed.get("用例") or {}
  326. usecase_keys = {
  327. "真实感": "realism",
  328. "表现力": "expressiveness"
  329. }
  330. for cn, code in usecase_keys.items():
  331. item = usecase.get(cn)
  332. if isinstance(item, dict):
  333. score_val = item.get("得分")
  334. reason_val = item.get("理由")
  335. if score_val is not None:
  336. try:
  337. fs[code] = float(score_val)
  338. if reason_val:
  339. score_reasons[code] = reason_val
  340. except Exception:
  341. pass
  342. # 动态维度
  343. dynamic = q_block.get("动态维度") or {}
  344. # 工序
  345. proc = dynamic.get("工序") or {}
  346. if proc:
  347. item = proc.get("流程完整性")
  348. if isinstance(item, dict):
  349. score_val = item.get("得分")
  350. reason_val = item.get("理由")
  351. if score_val is not None:
  352. try:
  353. fs["procedure_completeness"] = float(score_val)
  354. if reason_val:
  355. score_reasons["procedure_completeness"] = reason_val
  356. except Exception:
  357. pass
  358. field = proc.get("字段完整性") or {}
  359. field_keys = {
  360. "输入完整性": "procedure_input",
  361. "实现完整性": "procedure_implementation",
  362. "输出完整性": "procedure_output"
  363. }
  364. for cn, code in field_keys.items():
  365. item = field.get(cn)
  366. if isinstance(item, dict):
  367. score_val = item.get("得分")
  368. reason_val = item.get("理由")
  369. if score_val is not None:
  370. try:
  371. fs[code] = float(score_val)
  372. if reason_val:
  373. score_reasons[code] = reason_val
  374. except Exception:
  375. pass
  376. item = proc.get("泛化性")
  377. if isinstance(item, dict):
  378. score_val = item.get("得分")
  379. reason_val = item.get("理由")
  380. if score_val is not None:
  381. try:
  382. fs["procedure_generality"] = float(score_val)
  383. if reason_val:
  384. score_reasons["procedure_generality"] = reason_val
  385. except Exception:
  386. pass
  387. # 能力
  388. cap = dynamic.get("能力") or dynamic.get("步骤") or {}
  389. if cap:
  390. field = cap.get("字段完整性") or {}
  391. field_keys = {
  392. "输入完整性": "step_input",
  393. "实现完整性": "step_implementation",
  394. "输出完整性": "step_output"
  395. }
  396. for cn, code in field_keys.items():
  397. item = field.get(cn)
  398. if isinstance(item, dict):
  399. score_val = item.get("得分")
  400. reason_val = item.get("理由")
  401. if score_val is not None:
  402. try:
  403. fs[code] = float(score_val)
  404. if reason_val:
  405. score_reasons[code] = reason_val
  406. except Exception:
  407. pass
  408. item = cap.get("泛化性")
  409. if isinstance(item, dict):
  410. score_val = item.get("得分")
  411. reason_val = item.get("理由")
  412. if score_val is not None:
  413. try:
  414. fs["step_generality"] = float(score_val)
  415. if reason_val:
  416. score_reasons["step_generality"] = reason_val
  417. except Exception:
  418. pass
  419. # 工具
  420. tool = dynamic.get("工具") or {}
  421. if tool:
  422. tool_keys = {
  423. "能力边界覆盖": "tool_boundary",
  424. "有效比较": "tool_comparison",
  425. "参数/接口具体性": "tool_specificity",
  426. "实操示例": "tool_example",
  427. "版本&限制": "tool_limits"
  428. }
  429. for cn, code in tool_keys.items():
  430. item = tool.get(cn)
  431. if isinstance(item, dict):
  432. score_val = item.get("得分")
  433. reason_val = item.get("理由")
  434. if score_val is not None:
  435. try:
  436. fs[code] = float(score_val)
  437. if reason_val:
  438. score_reasons[code] = reason_val
  439. except Exception:
  440. pass
  441. else:
  442. # 兼容老版 1-5 分数 schema (带 "评分" 或 old-style flatness)
  443. is_new_schema = "评分" in e or "知识类型" in e or "制作相关性" in e
  444. CN_TO_EN = {
  445. "相关性": "relevance",
  446. "成品质量": "result_quality",
  447. "可信度": "credibility",
  448. "具体用例": "concrete_use_case",
  449. "完整性": "completeness",
  450. "步骤结构": "step_structure",
  451. "步骤可复现": "step_reproducibility",
  452. "步骤可复现性": "step_reproducibility",
  453. "能力定义": "capability_definition",
  454. "实现深度": "implementation_depth",
  455. "边界失败": "boundary_failure_eval",
  456. "通用性": "generality",
  457. "能力覆盖": "capability_coverage",
  458. "有效对比": "effective_comparison",
  459. "参数具体": "param_specificity",
  460. "实操示例": "worked_example",
  461. "实操用例": "worked_example",
  462. "示例完整": "worked_example",
  463. "版本限制": "version_limits",
  464. "版本说明": "version_limits",
  465. "限制说明": "version_limits",
  466. }
  467. if is_new_schema:
  468. pf = e.get("评分") or {}
  469. for cat, metrics in pf.items():
  470. if isinstance(metrics, dict):
  471. for metric, val in metrics.items():
  472. en_key = CN_TO_EN.get(metric, metric)
  473. if isinstance(val, dict) and "得分" in val:
  474. try: fs[en_key] = int(val["得分"])
  475. except Exception: pass
  476. elif isinstance(val, (int, float)):
  477. fs[en_key] = int(val)
  478. if isinstance(val, dict) and "理由" in val:
  479. score_reasons[en_key] = val["理由"]
  480. else:
  481. fs = flat_scores(e.get("scores", {}))
  482. # 计算均分 (overall)
  483. if is_mod_schema:
  484. rel_keys = {"relevance_production", "relevance_query"}
  485. rel_vals = [v for k, v in fs.items() if k in rel_keys]
  486. qual_vals = [v for k, v in fs.items() if k not in rel_keys]
  487. rel_avg = sum(rel_vals) / len(rel_vals) if rel_vals else None
  488. qual_avg = sum(qual_vals) / len(qual_vals) if qual_vals else None
  489. if rel_avg is not None and qual_avg is not None:
  490. overall = round((rel_avg + qual_avg) / 2, 1)
  491. elif rel_avg is not None:
  492. overall = round(rel_avg, 1)
  493. elif qual_avg is not None:
  494. overall = round(qual_avg, 1)
  495. else:
  496. overall = 0.0
  497. else:
  498. overall = round(sum(fs.values()) / len(fs), 1) if fs else 0
  499. anomaly = bool(e.get("error")) or not fs
  500. grade = p.get("_quality_grade", "")
  501. fb = r.get("found_by_queries", [])
  502. # 4. 解析 制作相关性 (production_relevance)
  503. if is_mod_schema:
  504. # 新版使用 "相关性" 中的 "和内容制作知识相关" 代表制作相关性
  505. production_relevance = fs.get("relevance_production")
  506. else:
  507. if is_new_schema:
  508. pr_block = e.get("制作相关性") or {}
  509. pr_raw = pr_block.get("得分") if isinstance(pr_block, dict) else pr_block
  510. if isinstance(pr_block, dict) and "理由" in pr_block:
  511. score_reasons["production_relevance"] = pr_block["理由"]
  512. else:
  513. pr_raw = e.get("production_relevance")
  514. try: production_relevance = int(float(pr_raw)) if pr_raw is not None else None
  515. except (TypeError, ValueError): production_relevance = None
  516. recency_hard = _recency_hard(p.get("publish_timestamp", ""))
  517. # 5. 解析 判定决策 (decision) 和 理由 (reason)
  518. reason = e.get("判定理由") or e.get("reason") or ""
  519. # 根据过滤指标决定是否保留 (过滤指标判定逻辑优先,不依赖文字匹配)
  520. is_discard = False
  521. # 制作相关性低于阈值则丢弃 (新版 0-10 满分,因此低于 4 丢弃;老版低于 2 丢弃)
  522. if production_relevance is not None:
  523. threshold = 4 if is_mod_schema else 2
  524. if production_relevance < threshold:
  525. is_discard = True
  526. # 时效性低于 2 被丢弃(发布时间超两年的老帖)
  527. if recency_hard is not None and recency_hard < 2:
  528. is_discard = True
  529. # 综合均分低于阈值被丢弃 (新版低于 6 丢弃;老版低于 3 丢弃)
  530. if overall is not None:
  531. threshold_ov = 6 if is_mod_schema else 3
  532. if overall < threshold_ov:
  533. is_discard = True
  534. decision = "discard" if is_discard else "report"
  535. # Find matching procedure html
  536. procedure_html = None
  537. case_id = r.get("case_id", "")
  538. title = p.get("title", "")
  539. run_dir = RUNS_DIR / run
  540. if run_dir.is_dir():
  541. # 1. 优先扫描该帖子对应的文件夹下的任何 HTML 文件 (不限名称)
  542. # 文件夹名格式: {form}_{platform}_{channel_content_id[:8]}
  543. content_id = r.get("channel_content_id") or ""
  544. if not content_id and case_id and "_" in case_id:
  545. content_id = case_id.split("_", 1)[1]
  546. plat_key = r.get("platform") or ""
  547. if form_name and plat_key and content_id:
  548. folder_name = f"{form_name}_{plat_key}_{content_id[:8]}"
  549. case_dir = run_dir / "procedures" / folder_name
  550. if case_dir.is_dir():
  551. html_files = list(case_dir.glob("*.html"))
  552. if html_files:
  553. procedure_html = f"runs_full/{run}/procedures/{folder_name}/{html_files[0].name}"
  554. # 1b. 兼容 fallback: 批量提取模式下直接以 case_id 命名的文件夹
  555. if not procedure_html and case_id:
  556. case_dir = run_dir / "procedures" / case_id
  557. if case_dir.is_dir():
  558. html_files = list(case_dir.glob("*.html"))
  559. if html_files:
  560. procedure_html = f"runs_full/{run}/procedures/{case_id}/{html_files[0].name}"
  561. # 2. 其次匹配标准文件名: case-{case_id}.html 或 {case_id}.html
  562. candidate_dirs = [run_dir, run_dir / "procedures"]
  563. if not procedure_html and case_id:
  564. named_files = [f"case-{case_id}.html", f"{case_id}.html"]
  565. for d_dir in candidate_dirs:
  566. if d_dir.is_dir():
  567. for name in named_files:
  568. if (d_dir / name).is_file():
  569. procedure_html = f"runs_full/{run}/procedures/{name}" if d_dir.name == "procedures" else f"runs_full/{run}/{name}"
  570. break
  571. if procedure_html:
  572. break
  573. # 3. 再次匹配 HTML 内部的标准声明 (meta 标签或 HTML 注释)
  574. if not procedure_html and case_id:
  575. for d_dir in candidate_dirs:
  576. if d_dir.is_dir():
  577. for html_path in d_dir.glob("*.html"):
  578. try:
  579. content = html_path.read_text(encoding="utf-8")
  580. if f'name="case-id" content="{case_id}"' in content or \
  581. f'name="case_id" content="{case_id}"' in content or \
  582. f'<!-- case_id: {case_id} -->' in content or \
  583. f'<!-- case-id: {case_id} -->' in content:
  584. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  585. break
  586. except Exception:
  587. continue
  588. if procedure_html:
  589. break
  590. # 4. 最后使用标题作为兜底模糊匹配
  591. if not procedure_html and title:
  592. for d_dir in candidate_dirs:
  593. if d_dir.is_dir():
  594. for html_path in d_dir.glob("*.html"):
  595. try:
  596. content = html_path.read_text(encoding="utf-8")
  597. if title in content:
  598. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  599. break
  600. except Exception:
  601. continue
  602. if procedure_html:
  603. break
  604. if procedure_html:
  605. procedure_html = procedure_html.replace("runs_full/", f"{RUNS_DIR_NAME}/")
  606. return {
  607. "case_id": r.get("case_id", ""),
  608. "platform": PLAT.get(r.get("platform"), r.get("platform")), "platformKey": r.get("platform"),
  609. "title": p.get("title", "") or "(无标题)", "date": (p.get("publish_timestamp", "") or "")[:10],
  610. "url": r.get("source_url", ""), "engagement": f'{p.get("like_count", 0)} 赞',
  611. "knowledge_type": kt, "decision": decision,
  612. "tools": [KT.get(k, k) for k in kt] + ([f"质量 {grade}"] if grade else []), "found_by": fb,
  613. "images": (p.get("images") or [])[:6], "text": p.get("body_text", "") or "",
  614. "scores": fs, "overall": overall, "reason": reason, "score_reasons": score_reasons,
  615. "grade": grade, "qscore": p.get("_quality_score", 0), "anomaly": anomaly,
  616. "production_relevance": production_relevance, "recency_hard": recency_hard,
  617. "run": run, "procedure_html": procedure_html,
  618. }
  619. def scan_runs():
  620. runs = {}
  621. for f in sorted(glob.glob(str(RUNS_DIR / "*" / "form_*.json"))):
  622. try:
  623. d = json.load(open(f, encoding="utf-8"))
  624. except Exception:
  625. continue
  626. run = pathlib.Path(f).parent.name
  627. form_name = d.get("form") or ""
  628. results = [adapt(r, run, form_name) for r in d.get("results", [])]
  629. report_val = sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly"))
  630. discard_val = sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly"))
  631. runs.setdefault(run, []).append({
  632. "form": d.get("form"), "query": d.get("query"), "original_q": d.get("original_q", ""),
  633. "requirement": d.get("requirement", ""),
  634. "platforms": d.get("platforms", []), "total": d.get("total"),
  635. "report": report_val, "discard": discard_val,
  636. "results": results,
  637. })
  638. for v in runs.values():
  639. v.sort(key=lambda x: x.get("form") or "")
  640. if MODE == "new":
  641. def _mtime(name):
  642. try:
  643. return (RUNS_DIR / name).stat().st_mtime
  644. except Exception:
  645. return 0
  646. sorted_keys = sorted(runs.keys(), key=_mtime, reverse=True)
  647. else:
  648. def _qnum(name):
  649. m = re.search(r"\d+", name)
  650. return (int(m.group()) if m else 0, name)
  651. sorted_keys = [kv[0] for kv in sorted(runs.items(), key=lambda kv: _qnum(kv[0]))]
  652. out = []
  653. for k in sorted_keys:
  654. v = runs[k]
  655. oq = v[0].get("original_q") or v[0].get("query") or ""
  656. seen, hits = set(), 0 # 知识命中数 = 各形式采纳(report)且非异常、按 url 去重后的帖子数
  657. for f in v:
  658. for r in f.get("results", []):
  659. if r.get("decision") == "report" and not r.get("anomaly") and r.get("url") not in seen:
  660. seen.add(r.get("url")); hits += 1
  661. out.append({"key": k, "forms": v, "dims": parse_dims(oq), "original_q": oq,
  662. "hits": hits, "tot": sum((f.get("total") or 0) for f in v)})
  663. active_reevals = {k: v["status"] for k, v in ACTIVE_REEVALS.items()}
  664. active_batch_tasks = {k: v["status"] for k, v in ACTIVE_BATCH_TASKS.items()}
  665. return {"queries": out, "actions": ACTIONS_TAX, "types": TYPES_TAX, "matrix": _MATRIX, "active_reevals": active_reevals, "active_batch_tasks": active_batch_tasks}
  666. class H(BaseHTTPRequestHandler):
  667. def _send(self, code, body, ctype):
  668. b = body.encode("utf-8") if isinstance(body, str) else body
  669. self.send_response(code)
  670. if ctype.startswith("text/") or ctype == "application/json" or ctype == "application/javascript":
  671. self.send_header("Content-Type", ctype + "; charset=utf-8")
  672. else:
  673. self.send_header("Content-Type", ctype)
  674. self.send_header("Content-Length", str(len(b))); self.end_headers(); self.wfile.write(b)
  675. def do_GET(self):
  676. parsed = urlparse(self.path)
  677. path = parsed.path
  678. params = parse_qs(parsed.query)
  679. if path in ("/", "/index.html"):
  680. try:
  681. html_file = "new_query.html" if MODE == "new" else "index.html"
  682. page = (HERE / html_file).read_text(encoding="utf-8")
  683. self._send(200, page, "text/html")
  684. except Exception as e:
  685. self._send(500, f"Error reading page: {e}", "text/plain")
  686. elif path == "/api/data":
  687. self._send(200, json.dumps(scan_runs(), ensure_ascii=False), "application/json")
  688. elif path == "/api/procedure_status":
  689. q = (params.get("q") or [""])[0].strip()
  690. form = (params.get("form") or [""])[0].strip()
  691. case_id = (params.get("case_id") or [""])[0].strip()
  692. if not q or not form or not case_id:
  693. self._send(400, "missing q, form, or case_id", "text/plain")
  694. return
  695. variant = (params.get("variant") or ["agent"])[0].strip()
  696. req_version = (params.get("version") or [""])[0].strip()
  697. folder_name = f"{form}_{_short_case(case_id)}"
  698. out_dir = RUNS_DIR / q / "procedures" / folder_name
  699. rel_base = f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}"
  700. task_key = f"{q}/{folder_name}"
  701. if variant == "model": # 大模型工序在 _model/ 子目录
  702. out_dir = out_dir / "_model"
  703. rel_base = rel_base + "/_model"
  704. task_key = task_key + "/_model"
  705. # 指定了非当前版本 → 直接从 history/{version}/ 取该版本的 html
  706. if req_version and req_version != _read_meta_version(out_dir):
  707. hist = out_dir / "history" / req_version
  708. hf = list(hist.glob("*.html")) if hist.is_dir() else []
  709. if hf:
  710. self._send(200, json.dumps({"status": "success",
  711. "procedure_html": f"{rel_base}/history/{req_version}/{hf[0].name}"},
  712. ensure_ascii=False), "application/json")
  713. else:
  714. self._send(200, json.dumps({"status": "not_started",
  715. "error": f"版本 {req_version} 的页面文件已不存在"}, ensure_ascii=False), "application/json")
  716. return
  717. if task_key in ACTIVE_TASKS:
  718. task = ACTIVE_TASKS[task_key]
  719. res = {"status": task["status"], "error": task["error"]}
  720. if task["status"] == "success":
  721. html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
  722. if html_files:
  723. res["procedure_html"] = f"{rel_base}/{html_files[0].name}"
  724. self._send(200, json.dumps(res, ensure_ascii=False), "application/json")
  725. return
  726. html_files = list(out_dir.glob("*.html")) if out_dir.is_dir() else []
  727. if html_files:
  728. self._send(200, json.dumps({
  729. "status": "success",
  730. "procedure_html": f"{rel_base}/{html_files[0].name}"
  731. }, ensure_ascii=False), "application/json")
  732. return
  733. # 仅 agent 变体兼容旧的 case_id 命名目录
  734. if variant != "model" and case_id:
  735. fallback_dir = RUNS_DIR / q / "procedures" / case_id
  736. fb_html = list(fallback_dir.glob("*.html")) if fallback_dir.is_dir() else []
  737. if fb_html:
  738. self._send(200, json.dumps({
  739. "status": "success",
  740. "procedure_html": f"{RUNS_DIR_NAME}/{q}/procedures/{case_id}/{fb_html[0].name}"
  741. }, ensure_ascii=False), "application/json")
  742. return
  743. log_path = out_dir / "_extract.log"
  744. if not log_path.is_file() and variant != "model" and case_id:
  745. log_path = RUNS_DIR / q / "procedures" / case_id / "_extract.log"
  746. if log_path.is_file():
  747. self._send(200, json.dumps({"status": "failed", "error": "Not running, but no HTML output found (possibly crashed)."}, ensure_ascii=False), "application/json")
  748. return
  749. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  750. elif path == "/api/procedure_versions":
  751. # 列某帖某变体的所有版本(DB 为主,文件系统兜底)。latest = 当前 _meta 版本。
  752. q = (params.get("q") or [""])[0].strip()
  753. form = (params.get("form") or [""])[0].strip()
  754. case_id = (params.get("case_id") or [""])[0].strip()
  755. variant = (params.get("variant") or ["agent"])[0].strip()
  756. folder_name = f"{form}_{_short_case(case_id)}"
  757. out_dir = RUNS_DIR / q / "procedures" / folder_name
  758. if variant == "model":
  759. out_dir = out_dir / "_model"
  760. latest = _read_meta_version(out_dir)
  761. by_ver = {}
  762. # DB(带 model / 工序数 / 时间)
  763. try:
  764. import db
  765. for r in db.fetch_versions("mode_dsl" if variant == "model" else "agent_dsl", q, case_id):
  766. if r.get("version"):
  767. by_ver[r["version"]] = {"version": r["version"], "model": r.get("model"),
  768. "procedure_count": r.get("procedure_count"),
  769. "created_time": str(r.get("created_time") or "")}
  770. except Exception:
  771. pass
  772. # 文件系统兜底(当前 + history/v_*)
  773. for v in ([latest] if latest else []) + (
  774. [d.name for d in (out_dir / "history").iterdir()
  775. if d.is_dir() and d.name.startswith("v_")] if (out_dir / "history").is_dir() else []):
  776. by_ver.setdefault(v, {"version": v, "model": None, "procedure_count": None, "created_time": ""})
  777. versions = sorted(by_ver.values(), key=lambda x: x["version"], reverse=True)
  778. for v in versions:
  779. v["is_latest"] = (v["version"] == latest)
  780. self._send(200, json.dumps({"versions": versions, "latest": latest}, ensure_ascii=False), "application/json")
  781. elif path == "/api/procedure_log":
  782. q = (params.get("q") or [""])[0].strip()
  783. form = (params.get("form") or [""])[0].strip()
  784. case_id = (params.get("case_id") or [""])[0].strip()
  785. if not q or not form or not case_id:
  786. self._send(400, "missing q, form, or case_id", "text/plain")
  787. return
  788. variant = (params.get("variant") or ["agent"])[0].strip()
  789. folder_name = f"{form}_{_short_case(case_id)}"
  790. if variant == "model":
  791. log_path = RUNS_DIR / q / "procedures" / folder_name / "_model" / "_extract.log"
  792. else:
  793. log_path = RUNS_DIR / q / "procedures" / folder_name / "_extract.log"
  794. if not log_path.is_file() and case_id:
  795. log_path = RUNS_DIR / q / "procedures" / case_id / "_extract.log"
  796. if not log_path.is_file():
  797. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  798. return
  799. try:
  800. content = log_path.read_text(encoding="utf-8", errors="replace")
  801. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  802. except Exception as e:
  803. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  804. elif path == "/api/spec_content":
  805. file_name = (params.get("file") or [""])[0].strip()
  806. allowed = [
  807. "README.md",
  808. "tools.md",
  809. "extraction/phase1-skeleton.md",
  810. "extraction/phase2-normalize.md",
  811. "extraction/phase3-finalize.md",
  812. "taxonomy/type_suggestions.md"
  813. ]
  814. if file_name not in allowed:
  815. self._send(400, "invalid file parameter", "text/plain")
  816. return
  817. target_path = HERE / "procedure-dsl" / "spec" / file_name
  818. if not target_path.is_file():
  819. self._send(404, "spec file not found", "text/plain")
  820. return
  821. try:
  822. content = target_path.read_text(encoding="utf-8", errors="replace")
  823. self._send(200, json.dumps({"content": content}, ensure_ascii=False), "application/json")
  824. except Exception as e:
  825. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  826. elif path == "/api/reeval_status":
  827. q = (params.get("q") or [""])[0].strip()
  828. if not q:
  829. self._send(400, "missing q", "text/plain")
  830. return
  831. if q in ACTIVE_REEVALS:
  832. self._send(200, json.dumps({
  833. "status": ACTIVE_REEVALS[q]["status"],
  834. "error": ACTIVE_REEVALS[q].get("error")
  835. }, ensure_ascii=False), "application/json")
  836. else:
  837. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  838. elif path == "/api/search_eval_status":
  839. q = (params.get("q") or [""])[0].strip()
  840. if not q:
  841. self._send(400, "missing q", "text/plain")
  842. return
  843. if q in ACTIVE_SEARCH_EVALS:
  844. self._send(200, json.dumps({
  845. "status": ACTIVE_SEARCH_EVALS[q]["status"],
  846. "error": ACTIVE_SEARCH_EVALS[q].get("error")
  847. }, ensure_ascii=False), "application/json")
  848. else:
  849. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  850. elif path == "/api/batch_generate_status":
  851. q = (params.get("q") or [""])[0].strip()
  852. if not q:
  853. self._send(400, "missing q", "text/plain")
  854. return
  855. if q in ACTIVE_BATCH_TASKS:
  856. self._send(200, json.dumps({
  857. "status": ACTIVE_BATCH_TASKS[q]["status"],
  858. "error": ACTIVE_BATCH_TASKS[q].get("error")
  859. }, ensure_ascii=False), "application/json")
  860. else:
  861. self._send(200, json.dumps({"status": "not_started"}, ensure_ascii=False), "application/json")
  862. elif path == "/api/batch_generate_log":
  863. q = (params.get("q") or [""])[0].strip()
  864. if not q:
  865. self._send(400, "missing q", "text/plain")
  866. return
  867. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  868. log_path = RUNS_DIR / safe_q / "procedures" / "_batch_extract.log"
  869. if not log_path.is_file():
  870. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  871. return
  872. try:
  873. content = log_path.read_text(encoding="utf-8", errors="replace")
  874. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  875. except Exception as e:
  876. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  877. elif path == "/api/search_eval_log":
  878. q = (params.get("q") or [""])[0].strip()
  879. if not q:
  880. self._send(400, "missing q", "text/plain")
  881. return
  882. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  883. log_path = RUNS_DIR / safe_q / "_search_eval.log"
  884. if not log_path.is_file():
  885. self._send(200, json.dumps({"log": ""}, ensure_ascii=False), "application/json")
  886. return
  887. try:
  888. content = log_path.read_text(encoding="utf-8", errors="replace")
  889. self._send(200, json.dumps({"log": content}, ensure_ascii=False), "application/json")
  890. except Exception as e:
  891. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  892. elif self.path.startswith("/runs_full/") or self.path.startswith("/runs_new/"):
  893. try:
  894. import urllib.parse
  895. clean_path = urllib.parse.unquote(self.path.split("?")[0])
  896. parts = clean_path.strip("/").split("/")
  897. target_file = HERE
  898. for part in parts:
  899. target_file = target_file / part
  900. runs_dir = HERE / parts[0]
  901. if runs_dir.resolve() in target_file.resolve().parents and target_file.is_file():
  902. content = target_file.read_bytes()
  903. ext = target_file.suffix.lower()
  904. ctype = "text/html"
  905. if ext in (".png", ".webp"):
  906. ctype = f"image/{ext[1:]}"
  907. elif ext in (".jpg", ".jpeg"):
  908. ctype = "image/jpeg"
  909. elif ext == ".json":
  910. ctype = "application/json"
  911. elif ext == ".js":
  912. ctype = "application/javascript"
  913. elif ext == ".css":
  914. ctype = "text/css"
  915. self._send(200, content, ctype)
  916. else:
  917. self._send(404, "not found", "text/plain")
  918. except Exception as e:
  919. self._send(500, f"Error: {e}", "text/plain")
  920. else:
  921. self._send(404, "not found", "text/plain")
  922. def do_POST(self):
  923. if self.path == "/api/run_search_eval":
  924. length = int(self.headers.get("Content-Length") or 0)
  925. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  926. try:
  927. payload = json.loads(raw)
  928. except Exception as e:
  929. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  930. q = (payload.get("query") or "").strip()
  931. platforms = (payload.get("platforms") or "xhs,zhihu").strip()
  932. if not q:
  933. self._send(400, json.dumps({"error": "missing query"}, ensure_ascii=False), "application/json"); return
  934. safe_q = re.sub(r'[\x00-\x1f\\/*?:"<>|]', '', q).strip()
  935. if not safe_q:
  936. self._send(400, json.dumps({"error": "invalid query name"}, ensure_ascii=False), "application/json"); return
  937. q_dir = RUNS_DIR / safe_q
  938. q_dir.mkdir(parents=True, exist_ok=True)
  939. temp_queries_file = q_dir / "temp_queries.json"
  940. try:
  941. with open(temp_queries_file, "w", encoding="utf-8") as f:
  942. json.dump([q], f, ensure_ascii=False)
  943. except Exception as e:
  944. self._send(500, json.dumps({"error": f"failed to write temp query: {e}"}), "application/json"); return
  945. log_path = q_dir / "_search_eval.log"
  946. ACTIVE_SEARCH_EVALS[q] = {
  947. "status": "running",
  948. "pid": None,
  949. "error": None
  950. }
  951. def run_search_eval_task(query_text, safe_query_name, queries_file, out_dir, target_platforms, log_file):
  952. try:
  953. cmd = [
  954. sys.executable, "-u", str(SE / "search_and_evaluate.py"),
  955. "--queries", str(queries_file),
  956. "--platforms", target_platforms,
  957. "--output-dir", str(out_dir),
  958. "--max-count", "20"
  959. ]
  960. import os
  961. env = os.environ.copy()
  962. env["PYTHONIOENCODING"] = "utf-8"
  963. env["PYTHONUTF8"] = "1"
  964. # 搜索评估未传 --eval-model → 用 search_and_evaluate 默认 gemini-flash-lite(OpenRouter)
  965. _se_model = cmd[cmd.index("--eval-model") + 1] if "--eval-model" in cmd else "google/gemini-3.1-flash-lite (默认)"
  966. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  967. with open(log_file, "w", encoding="utf-8", buffering=1) as log_fh:
  968. _log_header(log_fh, "🔎 搜索评估", _se_model)
  969. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  970. cwd=str(SE), env=env, creationflags=flags)
  971. ACTIVE_SEARCH_EVALS[query_text]["pid"] = proc.pid
  972. proc.wait()
  973. if proc.returncode == 0:
  974. evaluated_file = out_dir / "evaluated.json"
  975. if evaluated_file.is_file():
  976. with open(evaluated_file, "r", encoding="utf-8") as rf:
  977. eval_data = json.load(rf)
  978. eval_data["form"] = "A"
  979. eval_data["query"] = query_text
  980. eval_data["original_q"] = query_text
  981. with open(out_dir / "form_A.json", "w", encoding="utf-8") as wf:
  982. json.dump(eval_data, wf, ensure_ascii=False, indent=2)
  983. ACTIVE_SEARCH_EVALS[query_text]["status"] = "success"
  984. else:
  985. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  986. ACTIVE_SEARCH_EVALS[query_text]["error"] = "evaluated.json not found after execution"
  987. with open(log_file, "a", encoding="utf-8") as f_err:
  988. f_err.write("\n[server error] evaluated.json not found after execution\n")
  989. else:
  990. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  991. ACTIVE_SEARCH_EVALS[query_text]["error"] = f"search_and_evaluate.py exited with code {proc.returncode}"
  992. except Exception as ex:
  993. ACTIVE_SEARCH_EVALS[query_text]["status"] = "failed"
  994. ACTIVE_SEARCH_EVALS[query_text]["error"] = str(ex)
  995. try:
  996. with open(log_file, "a", encoding="utf-8") as f_err:
  997. f_err.write(f"\n[server error] Exception: {ex}\n")
  998. except Exception:
  999. pass
  1000. finally:
  1001. try:
  1002. if queries_file.is_file():
  1003. queries_file.unlink()
  1004. except Exception:
  1005. pass
  1006. t = threading.Thread(target=run_search_eval_task, args=(q, safe_q, temp_queries_file, q_dir, platforms, log_path))
  1007. t.daemon = True
  1008. t.start()
  1009. self._send(200, json.dumps({
  1010. "status": "started",
  1011. "query": q,
  1012. "log": f"{RUNS_DIR_NAME}/{safe_q}/_search_eval.log"
  1013. }, ensure_ascii=False), "application/json")
  1014. elif self.path == "/api/generate_procedure":
  1015. length = int(self.headers.get("Content-Length") or 0)
  1016. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  1017. try:
  1018. payload = json.loads(raw)
  1019. except Exception as e:
  1020. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  1021. q = (payload.get("q") or "").strip()
  1022. form = (payload.get("form") or "").strip()
  1023. case_id = (payload.get("case_id") or "").strip()
  1024. engine = (payload.get("engine") or "cyber_runner").strip()
  1025. model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
  1026. if MODE != "new" and not re.match(r"^q\d+$", q):
  1027. self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"}, ensure_ascii=False), "application/json"); return
  1028. if form not in ("A", "B", "C"):
  1029. self._send(400, json.dumps({"error": f"bad form: {form!r}"}, ensure_ascii=False), "application/json"); return
  1030. if not case_id:
  1031. self._send(400, json.dumps({"error": "missing case_id"}, ensure_ascii=False), "application/json"); return
  1032. q_dir = RUNS_DIR / q
  1033. form_file = q_dir / f"form_{form}.json"
  1034. if not form_file.is_file():
  1035. self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
  1036. try:
  1037. with open(form_file, encoding="utf-8") as f:
  1038. form_data = json.load(f)
  1039. except Exception as e:
  1040. self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
  1041. matching_result = None
  1042. for r in form_data.get("results", []):
  1043. if r.get("case_id") == case_id:
  1044. matching_result = r
  1045. break
  1046. if not matching_result:
  1047. self._send(404, json.dumps({"error": f"case_id {case_id} not found in form {form}"}, ensure_ascii=False), "application/json"); return
  1048. folder_name = f"{form}_{_short_case(case_id)}"
  1049. out_dir = q_dir / "procedures" / folder_name
  1050. backup_procedure_history(out_dir)
  1051. out_dir.mkdir(parents=True, exist_ok=True)
  1052. src_path = out_dir / "_source.json"
  1053. try:
  1054. with open(src_path, "w", encoding="utf-8") as f:
  1055. json.dump(_source_to_dsl_input(matching_result), f, ensure_ascii=False, indent=2)
  1056. score = _composite_score(matching_result.get("llm_evaluation") or {})
  1057. _write_meta(out_dir, case_id=case_id, from_q=q, form=form, score=score)
  1058. version = _gen_version()
  1059. _stamp_version(out_dir, version)
  1060. except Exception as e:
  1061. self._send(500, json.dumps({"error": f"failed to write inputs: {e}"}, ensure_ascii=False), "application/json"); return
  1062. task_key = f"{q}/{folder_name}"
  1063. ACTIVE_TASKS[task_key] = {
  1064. "status": "running",
  1065. "start_time": datetime.now().isoformat(),
  1066. "pid": None,
  1067. "error": None
  1068. }
  1069. t = threading.Thread(target=run_extraction_task, args=(q, folder_name, case_id, src_path, out_dir, engine, model, version))
  1070. t.daemon = True
  1071. t.start()
  1072. self._send(200, json.dumps({
  1073. "status": "started",
  1074. "task_key": task_key,
  1075. "log": f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/_extract.log"
  1076. }, ensure_ascii=False), "application/json")
  1077. elif self.path == "/api/extract_procedure_model":
  1078. # 模型提取:单次大模型直出 workflow.json(无 agent / 无 validate 循环)。
  1079. # 写同一个 procedures/{form}_{short_case}/,复用 procedure_status/log/展示。
  1080. length = int(self.headers.get("Content-Length") or 0)
  1081. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  1082. try:
  1083. payload = json.loads(raw)
  1084. except Exception as e:
  1085. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  1086. q = (payload.get("q") or "").strip()
  1087. form = (payload.get("form") or "").strip()
  1088. case_id = (payload.get("case_id") or "").strip()
  1089. model = (payload.get("model") or "anthropic/claude-sonnet-4-6").strip() # 大模型工序默认 claude
  1090. if form not in ("A", "B", "C"):
  1091. self._send(400, json.dumps({"error": f"bad form: {form!r}"}, ensure_ascii=False), "application/json"); return
  1092. if not case_id:
  1093. self._send(400, json.dumps({"error": "missing case_id"}, ensure_ascii=False), "application/json"); return
  1094. q_dir = RUNS_DIR / q
  1095. form_file = q_dir / f"form_{form}.json"
  1096. if not form_file.is_file():
  1097. self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
  1098. try:
  1099. with open(form_file, encoding="utf-8") as f:
  1100. form_data = json.load(f)
  1101. except Exception as e:
  1102. self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
  1103. matching_result = next((r for r in form_data.get("results", []) if r.get("case_id") == case_id), None)
  1104. if not matching_result:
  1105. self._send(404, json.dumps({"error": f"case_id {case_id} not found in form {form}"}, ensure_ascii=False), "application/json"); return
  1106. folder_name = f"{form}_{_short_case(case_id)}"
  1107. out_dir = q_dir / "procedures" / folder_name / "_model" # 大模型工序独立子目录,不覆盖 Agent 工序
  1108. backup_procedure_history(out_dir)
  1109. out_dir.mkdir(parents=True, exist_ok=True)
  1110. src_path = out_dir / "_source.json"
  1111. try:
  1112. with open(src_path, "w", encoding="utf-8") as f:
  1113. json.dump(_source_to_dsl_input(matching_result), f, ensure_ascii=False, indent=2)
  1114. _write_meta(out_dir, case_id=case_id, from_q=q, form=form,
  1115. score=_composite_score(matching_result.get("llm_evaluation") or {}))
  1116. version = _gen_version()
  1117. _stamp_version(out_dir, version)
  1118. except Exception as e:
  1119. self._send(500, json.dumps({"error": f"failed to write inputs: {e}"}, ensure_ascii=False), "application/json"); return
  1120. task_key = f"{q}/{folder_name}/_model"
  1121. ACTIVE_TASKS[task_key] = {"status": "running", "start_time": datetime.now().isoformat(), "pid": None, "error": None}
  1122. t = threading.Thread(target=run_model_extraction_task, args=(q, folder_name, case_id, src_path, out_dir, model, version))
  1123. t.daemon = True
  1124. t.start()
  1125. self._send(200, json.dumps({
  1126. "status": "started", "task_key": task_key,
  1127. "log": f"{RUNS_DIR_NAME}/{q}/procedures/{folder_name}/_model/_extract.log"
  1128. }, ensure_ascii=False), "application/json")
  1129. elif self.path == "/api/reeval":
  1130. length = int(self.headers.get("Content-Length") or 0)
  1131. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  1132. try:
  1133. payload = json.loads(raw)
  1134. except Exception as e:
  1135. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  1136. q = (payload.get("q") or "").strip()
  1137. if MODE != "new" and not re.match(r"^q\d+$", q):
  1138. self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"},
  1139. ensure_ascii=False), "application/json"); return
  1140. q_dir = RUNS_DIR / q
  1141. if not q_dir.is_dir():
  1142. self._send(404, json.dumps({"error": f"{RUNS_DIR_NAME}/{q} not found"}, ensure_ascii=False),
  1143. "application/json"); return
  1144. backup_reeval_history(q_dir)
  1145. log_path = q_dir / "_reeval.log"
  1146. try:
  1147. log_fh = open(log_path, "w", encoding="utf-8", buffering=1)
  1148. cmd = [sys.executable, "-u", str(SE / "batch_3forms.py"),
  1149. "--reeval", "--reeval-q", q, "--output-dir", str(RUNS_DIR)]
  1150. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  1151. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  1152. cwd=str(SE), creationflags=flags)
  1153. ACTIVE_REEVALS[q] = {
  1154. "status": "running",
  1155. "pid": proc.pid,
  1156. "error": None
  1157. }
  1158. def wait_reeval(q_key, p_obj, fh):
  1159. try:
  1160. p_obj.wait()
  1161. if p_obj.returncode == 0:
  1162. ACTIVE_REEVALS[q_key]["status"] = "success"
  1163. else:
  1164. ACTIVE_REEVALS[q_key]["status"] = "failed"
  1165. ACTIVE_REEVALS[q_key]["error"] = f"Subprocess exited with code {p_obj.returncode}"
  1166. except Exception as ex:
  1167. ACTIVE_REEVALS[q_key]["status"] = "failed"
  1168. ACTIVE_REEVALS[q_key]["error"] = str(ex)
  1169. finally:
  1170. try:
  1171. fh.close()
  1172. except Exception:
  1173. pass
  1174. t = threading.Thread(target=wait_reeval, args=(q, proc, log_fh))
  1175. t.daemon = True
  1176. t.start()
  1177. self._send(200, json.dumps(
  1178. {"status": "started", "pid": proc.pid, "q": q,
  1179. "log": str(log_path.relative_to(HERE))},
  1180. ensure_ascii=False), "application/json")
  1181. except Exception as e:
  1182. self._send(500, json.dumps({"error": f"failed to start: {e}"},
  1183. ensure_ascii=False), "application/json")
  1184. elif self.path == "/api/batch_generate_procedure":
  1185. length = int(self.headers.get("Content-Length") or 0)
  1186. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  1187. try:
  1188. payload = json.loads(raw)
  1189. except Exception as e:
  1190. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  1191. q = (payload.get("q") or "").strip()
  1192. form = (payload.get("form") or "A").strip()
  1193. engine = (payload.get("engine") or "cyber_runner").strip()
  1194. model = (payload.get("model") or "google/gemini-3.1-flash-lite").strip()
  1195. try:
  1196. concurrency = int(payload.get("concurrency", 4))
  1197. except Exception:
  1198. concurrency = 4
  1199. concurrency = max(1, min(16, concurrency))
  1200. if not q:
  1201. self._send(400, json.dumps({"error": "missing q"}, ensure_ascii=False), "application/json"); return
  1202. q_dir = RUNS_DIR / q
  1203. form_file = q_dir / f"form_{form}.json"
  1204. if not form_file.is_file():
  1205. self._send(404, json.dumps({"error": f"form file not found: {form_file.name}"}, ensure_ascii=False), "application/json"); return
  1206. try:
  1207. with open(form_file, encoding="utf-8") as f:
  1208. form_data = json.load(f)
  1209. except Exception as e:
  1210. self._send(500, json.dumps({"error": f"failed to read form: {e}"}, ensure_ascii=False), "application/json"); return
  1211. valid_results = []
  1212. import copy
  1213. for r in form_data.get("results", []):
  1214. adapted = adapt(r, q, form)
  1215. if adapted.get("decision") == "report" and not adapted.get("anomaly"):
  1216. r_copy = copy.deepcopy(r)
  1217. original_case_id = r.get("case_id", "")
  1218. r_copy["case_id"] = f"{form}_{_short_case(original_case_id)}"
  1219. valid_results.append(r_copy)
  1220. if not valid_results:
  1221. self._send(400, json.dumps({"error": "没有可提取工序的帖子(所有帖子都已被过滤,或都存在解析异常)"}, ensure_ascii=False), "application/json"); return
  1222. procedures_dir = q_dir / "procedures"
  1223. procedures_dir.mkdir(parents=True, exist_ok=True)
  1224. batch_posts_path = procedures_dir / "temp_batch.json"
  1225. try:
  1226. with open(batch_posts_path, "w", encoding="utf-8") as f:
  1227. json.dump({"results": valid_results}, f, ensure_ascii=False, indent=2)
  1228. except Exception as e:
  1229. self._send(500, json.dumps({"error": f"failed to write temp_batch.json: {e}"}, ensure_ascii=False), "application/json"); return
  1230. ACTIVE_BATCH_TASKS[q] = {
  1231. "status": "running",
  1232. "start_time": datetime.now().isoformat(),
  1233. "pid": None,
  1234. "error": None
  1235. }
  1236. def run_batch_extraction():
  1237. try:
  1238. log_path = procedures_dir / "_batch_extract.log"
  1239. cmd = [
  1240. sys.executable, "-u", str(HERE / "procedure-dsl" / "run_cyber.py"),
  1241. str(batch_posts_path),
  1242. "--out-dir", str(procedures_dir),
  1243. "--batch",
  1244. "--batch-workers", str(concurrency),
  1245. "--model", model,
  1246. "--skill", "procedure", # mode_procedure:用直写版 skill
  1247. "--max-turns", "300"
  1248. ]
  1249. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  1250. with open(log_path, "w", encoding="utf-8", buffering=1) as log_fh:
  1251. _log_header(log_fh, "🔧 工序提取(批量 --skill procedure)", model, engine)
  1252. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  1253. cwd=str(HERE / "procedure-dsl"), creationflags=flags) # cwd=本地 procedure-dsl(run_cyber 假设的工作目录,agent 相对路径才对)
  1254. ACTIVE_BATCH_TASKS[q]["pid"] = proc.pid
  1255. proc.wait()
  1256. if proc.returncode == 0:
  1257. try:
  1258. import build_workflows
  1259. build_workflows.write_run(q, runs_dir=RUNS_DIR)
  1260. ACTIVE_BATCH_TASKS[q]["status"] = "success"
  1261. except Exception as ex:
  1262. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1263. ACTIVE_BATCH_TASKS[q]["error"] = f"Workflows compilation failed: {ex}"
  1264. else:
  1265. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1266. ACTIVE_BATCH_TASKS[q]["error"] = f"Batch runner exited with code {proc.returncode}"
  1267. except Exception as ex:
  1268. ACTIVE_BATCH_TASKS[q]["status"] = "failed"
  1269. ACTIVE_BATCH_TASKS[q]["error"] = str(ex)
  1270. finally:
  1271. try:
  1272. if batch_posts_path.is_file():
  1273. batch_posts_path.unlink()
  1274. except Exception:
  1275. pass
  1276. t = threading.Thread(target=run_batch_extraction)
  1277. t.daemon = True
  1278. t.start()
  1279. self._send(200, json.dumps({
  1280. "status": "started",
  1281. "q": q,
  1282. "log": f"{RUNS_DIR_NAME}/{q}/procedures/_batch_extract.log"
  1283. }, ensure_ascii=False), "application/json")
  1284. elif self.path == "/api/save_spec":
  1285. length = int(self.headers.get("Content-Length") or 0)
  1286. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  1287. try:
  1288. payload = json.loads(raw)
  1289. except Exception as e:
  1290. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  1291. file_name = (payload.get("file") or "").strip()
  1292. content = payload.get("content") or ""
  1293. allowed = [
  1294. "README.md",
  1295. "tools.md",
  1296. "extraction/phase1-skeleton.md",
  1297. "extraction/phase2-normalize.md",
  1298. "extraction/phase3-finalize.md",
  1299. "taxonomy/type_suggestions.md"
  1300. ]
  1301. if file_name not in allowed:
  1302. self._send(400, json.dumps({"error": "invalid file parameter"}), "application/json"); return
  1303. target_path = HERE / "procedure-dsl" / "spec" / file_name
  1304. try:
  1305. target_path.parent.mkdir(parents=True, exist_ok=True)
  1306. target_path.write_text(content, encoding="utf-8")
  1307. self._send(200, json.dumps({"status": "ok"}, ensure_ascii=False), "application/json")
  1308. except Exception as e:
  1309. self._send(500, json.dumps({"error": str(e)}, ensure_ascii=False), "application/json")
  1310. else:
  1311. self._send(404, json.dumps({"error": "not found"}), "application/json")
  1312. def log_message(self, *a): pass
  1313. if __name__ == "__main__":
  1314. n = len(scan_runs()["queries"])
  1315. print(f"搜索评估查看 server ({MODE} 模式):http://0.0.0.0:{PORT} ({RUNS_DIR_NAME}/ 下 {n} 个 query,实时扫描)")
  1316. ThreadingHTTPServer(("0.0.0.0", PORT), H).serve_forever()