server.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. # -*- coding: utf-8 -*-
  2. """搜索评估案例查看 server。
  3. 沿用 图文排版搜索评估.html 的版式(卡片 + dialog 详情 + rubric 评分条),
  4. 数据实时扫描 runs_full/*/form_*.json —— runs_full 下每新增一个 q 文件夹,刷新即出现。
  5. 分页:query → 三种形式(A/B/C) → 三个渠道 三行从上到下。
  6. 用法:python server.py [port] 默认 8770,浏览器开 http://0.0.0.0:8770
  7. """
  8. import json, re, glob, sys, pathlib, subprocess
  9. from datetime import datetime
  10. from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
  11. try: # Windows 控制台默认 cp1252,中文 print 会崩,统一切 utf-8
  12. sys.stdout.reconfigure(encoding="utf-8")
  13. except Exception:
  14. pass
  15. HERE = pathlib.Path(__file__).parent
  16. # argv[1] 仅在 __main__ 起服务时当端口用; isdigit 守卫让别的脚本 import 本模块时
  17. # (其自身 argv 多为 --flag) 不会在这里 int() 崩, evaluate_result 因此可被复用
  18. PORT = int(sys.argv[1]) if len(sys.argv) > 1 and sys.argv[1].isdigit() else 8770
  19. PLAT = {"xhs": "小红书", "gzh": "公众号", "zhihu": "知乎", "x": "X", "bili": "B站", "douyin": "抖音",
  20. "sph": "视频号", "youtube": "YouTube", "github": "GitHub", "toutiao": "头条", "weibo": "微博"}
  21. KT = {"procedure": "工序", "step": "步骤", "tool": "工具"}
  22. # 从 taxonomy 取动作叶子/类型名,用于把 original_q 解析回原始维度(动作×类型 正交)
  23. # 路径优先级:search_eval/evaluation/(主源,IDE 编辑那份就是 runtime 实际读的)
  24. # → test_script/evaluation/(历史副本兜底)→ script/evaluation/(更老兜底)
  25. # 谁也找不到时整目录扫空,server 仍能起。
  26. EVALDIR = HERE / "evaluation"
  27. if not EVALDIR.exists():
  28. EVALDIR = HERE.parent.parent / "test_script" / "evaluation"
  29. if not EVALDIR.exists():
  30. EVALDIR = HERE.parent / "evaluation"
  31. try:
  32. _jm = json.load(open(EVALDIR / "judged_matrix.json", encoding="utf-8"))
  33. ACT_L1 = {a["name"]: a["l1"] for a in _jm["actions"]}
  34. ACTION_SET = set(ACT_L1)
  35. TYPE_SET = {t["name"] for t in _jm["types"]}
  36. ACTIONS_TAX = [{"name": a["name"], "l1": a["l1"], "l2": a.get("l2", "")} for a in _jm["actions"]]
  37. TYPES_TAX = [{"name": t["name"], "l1": t["l1"]} for t in _jm["types"]]
  38. # taxonomy 顺序沿用 judged_matrix(严格版);矩阵分值改用 type_action_scores(宽松版) —
  39. # 两份是同一组 27×50 cell 的独立 gemini judging,前者只 53 格到 tier3,后者 156 格到 score3
  40. _tas = json.load(open(EVALDIR / "type_action_scores.json", encoding="utf-8"))["scores"]
  41. _MATRIX = []
  42. for a in _jm["actions"]:
  43. row = []
  44. for t in _jm["types"]:
  45. rec = _tas.get(t["name"], {}).get(a["name"])
  46. row.append({"tier": rec["score"], "r": rec.get("reason", "")} if rec else {})
  47. _MATRIX.append(row)
  48. except Exception:
  49. ACT_L1, ACTION_SET, TYPE_SET, ACTIONS_TAX, TYPES_TAX, _MATRIX = {}, set(), set(), [], [], []
  50. MODSET = {"文", "图", "视频", "音频"}
  51. TOOLQUAL = {"AI": "AI 模型", "软件": "桌面 APP", "电脑端": "桌面 APP", "在线": "云端 Web",
  52. "网页版": "云端 Web", "代码": "API·CLI", "命令行": "API·CLI", "插件": "插件扩展"}
  53. def parse_dims(oq):
  54. """把组合 query(如 '文 元素生成 提示词 教程')解析回 {动作, 类型, 动作L1, 约束}。"""
  55. toks = (oq or "").split()
  56. action = next((t for t in toks if t in ACTION_SET), None)
  57. type_ = next((t for t in toks if t in TYPE_SET), None)
  58. cons = None
  59. if toks:
  60. t0 = toks[0]
  61. if t0 in MODSET:
  62. cons = {"kind": "模态", "value": t0}
  63. elif t0 in TOOLQUAL:
  64. cons = {"kind": "工具类型", "value": TOOLQUAL[t0]}
  65. return {"action": action, "type": type_, "action_l1": ACT_L1.get(action, ""), "constraint": cons}
  66. def flat_scores(sc):
  67. f = {}
  68. for k, v in (sc or {}).items():
  69. if isinstance(v, dict):
  70. for kk, vv in v.items():
  71. try: f[kk] = int(vv)
  72. except Exception: pass
  73. else:
  74. try: f[k] = int(v)
  75. except Exception: pass
  76. return f
  77. def _recency_hard(date_str):
  78. """按 publish_timestamp 头 10 字符(YYYY-MM-DD)算硬时效:半年内=3 / 两年内=2 / 更早=1。
  79. 取代原 LLM 评的 recency 维度——脚本算更稳,发布时间在帖子抓取时就有,无需 LLM token。
  80. """
  81. try:
  82. d = datetime.strptime((date_str or "")[:10], "%Y-%m-%d")
  83. except (ValueError, TypeError):
  84. return None
  85. days = (datetime.now() - d).days
  86. if days <= 180: return 3
  87. if days <= 730: return 2
  88. return 1
  89. def evaluate_result(r):
  90. """纯评分 + 丢弃判定 (不碰文件系统)。server.adapt 与 batch 选片共用这一真源,
  91. 避免评分 schema 再变时两边逻辑漂移。返回 dict 含 overall / decision / knowledge_type 等。"""
  92. p = r.get("post", {}); e = r.get("llm_evaluation", {})
  93. # 1. 解析 知识类型 (knowledge_type)
  94. kt = []
  95. kt_raw = e.get("知识类型") or e.get("knowledge_type") or []
  96. for k in kt_raw:
  97. if k in ("工序", "procedure"): kt.append("procedure")
  98. elif k in ("能力", "步骤", "step"): kt.append("step")
  99. elif k in ("工具", "tool"): kt.append("tool")
  100. fs = {}
  101. score_reasons = {}
  102. # 检测是否为 eval_prompt_sample-mod 里的新版 0-10 分数 schema
  103. is_mod_schema = "相关性" in e and isinstance(e["相关性"], dict) and ("和内容制作知识相关" in e["相关性"] or "和 query 相关" in e["相关性"])
  104. if is_mod_schema:
  105. # 新版 0-10 分数格式解析
  106. # 1. 相关性
  107. rel = e.get("相关性") or {}
  108. for subkey, item in rel.items():
  109. if isinstance(item, dict):
  110. score_val = item.get("得分")
  111. reason_val = item.get("理由")
  112. code_key = None
  113. if "内容制作" in subkey or "知识" in subkey:
  114. code_key = "relevance_production"
  115. elif "query" in subkey or "检索" in subkey:
  116. code_key = "relevance_query"
  117. if code_key and score_val is not None:
  118. try:
  119. fs[code_key] = float(score_val)
  120. if reason_val:
  121. score_reasons[code_key] = reason_val
  122. except Exception:
  123. pass
  124. # 2. 质量
  125. q_block = e.get("质量") or {}
  126. fixed = q_block.get("固定维度") or {}
  127. # 固定维度
  128. fixed_keys = {
  129. "时效性": "recency",
  130. "热度性": "popularity",
  131. "评论反馈": "feedback"
  132. }
  133. for cn, code in fixed_keys.items():
  134. item = fixed.get(cn)
  135. if isinstance(item, dict):
  136. score_val = item.get("得分")
  137. reason_val = item.get("理由")
  138. if score_val is not None:
  139. try:
  140. fs[code] = float(score_val)
  141. if reason_val:
  142. score_reasons[code] = reason_val
  143. except Exception:
  144. pass
  145. # 用例 (真实感, 表现力)
  146. usecase = fixed.get("用例") or {}
  147. usecase_keys = {
  148. "真实感": "realism",
  149. "表现力": "expressiveness"
  150. }
  151. for cn, code in usecase_keys.items():
  152. item = usecase.get(cn)
  153. if isinstance(item, dict):
  154. score_val = item.get("得分")
  155. reason_val = item.get("理由")
  156. if score_val is not None:
  157. try:
  158. fs[code] = float(score_val)
  159. if reason_val:
  160. score_reasons[code] = reason_val
  161. except Exception:
  162. pass
  163. # 动态维度
  164. dynamic = q_block.get("动态维度") or {}
  165. # 工序
  166. proc = dynamic.get("工序") or {}
  167. if proc:
  168. item = proc.get("流程完整性")
  169. if isinstance(item, dict):
  170. score_val = item.get("得分")
  171. reason_val = item.get("理由")
  172. if score_val is not None:
  173. try:
  174. fs["procedure_completeness"] = float(score_val)
  175. if reason_val:
  176. score_reasons["procedure_completeness"] = reason_val
  177. except Exception:
  178. pass
  179. field = proc.get("字段完整性") or {}
  180. field_keys = {
  181. "输入完整性": "procedure_input",
  182. "实现完整性": "procedure_implementation",
  183. "输出完整性": "procedure_output"
  184. }
  185. for cn, code in field_keys.items():
  186. item = field.get(cn)
  187. if isinstance(item, dict):
  188. score_val = item.get("得分")
  189. reason_val = item.get("理由")
  190. if score_val is not None:
  191. try:
  192. fs[code] = float(score_val)
  193. if reason_val:
  194. score_reasons[code] = reason_val
  195. except Exception:
  196. pass
  197. item = proc.get("泛化性")
  198. if isinstance(item, dict):
  199. score_val = item.get("得分")
  200. reason_val = item.get("理由")
  201. if score_val is not None:
  202. try:
  203. fs["procedure_generality"] = float(score_val)
  204. if reason_val:
  205. score_reasons["procedure_generality"] = reason_val
  206. except Exception:
  207. pass
  208. # 能力
  209. cap = dynamic.get("能力") or dynamic.get("步骤") or {}
  210. if cap:
  211. field = cap.get("字段完整性") or {}
  212. field_keys = {
  213. "输入完整性": "step_input",
  214. "实现完整性": "step_implementation",
  215. "输出完整性": "step_output"
  216. }
  217. for cn, code in field_keys.items():
  218. item = field.get(cn)
  219. if isinstance(item, dict):
  220. score_val = item.get("得分")
  221. reason_val = item.get("理由")
  222. if score_val is not None:
  223. try:
  224. fs[code] = float(score_val)
  225. if reason_val:
  226. score_reasons[code] = reason_val
  227. except Exception:
  228. pass
  229. item = cap.get("泛化性")
  230. if isinstance(item, dict):
  231. score_val = item.get("得分")
  232. reason_val = item.get("理由")
  233. if score_val is not None:
  234. try:
  235. fs["step_generality"] = float(score_val)
  236. if reason_val:
  237. score_reasons["step_generality"] = reason_val
  238. except Exception:
  239. pass
  240. # 工具
  241. tool = dynamic.get("工具") or {}
  242. if tool:
  243. tool_keys = {
  244. "能力边界覆盖": "tool_boundary",
  245. "有效比较": "tool_comparison",
  246. "参数/接口具体性": "tool_specificity",
  247. "实操示例": "tool_example",
  248. "版本&限制": "tool_limits"
  249. }
  250. for cn, code in tool_keys.items():
  251. item = tool.get(cn)
  252. if isinstance(item, dict):
  253. score_val = item.get("得分")
  254. reason_val = item.get("理由")
  255. if score_val is not None:
  256. try:
  257. fs[code] = float(score_val)
  258. if reason_val:
  259. score_reasons[code] = reason_val
  260. except Exception:
  261. pass
  262. else:
  263. # 兼容老版 1-5 分数 schema (带 "评分" 或 old-style flatness)
  264. is_new_schema = "评分" in e or "知识类型" in e or "制作相关性" in e
  265. CN_TO_EN = {
  266. "相关性": "relevance",
  267. "成品质量": "result_quality",
  268. "可信度": "credibility",
  269. "具体用例": "concrete_use_case",
  270. "完整性": "completeness",
  271. "步骤结构": "step_structure",
  272. "步骤可复现": "step_reproducibility",
  273. "步骤可复现性": "step_reproducibility",
  274. "能力定义": "capability_definition",
  275. "实现深度": "implementation_depth",
  276. "边界失败": "boundary_failure_eval",
  277. "通用性": "generality",
  278. "能力覆盖": "capability_coverage",
  279. "有效对比": "effective_comparison",
  280. "参数具体": "param_specificity",
  281. "实操示例": "worked_example",
  282. "实操用例": "worked_example",
  283. "示例完整": "worked_example",
  284. "版本限制": "version_limits",
  285. "版本说明": "version_limits",
  286. "限制说明": "version_limits",
  287. }
  288. if is_new_schema:
  289. pf = e.get("评分") or {}
  290. for cat, metrics in pf.items():
  291. if isinstance(metrics, dict):
  292. for metric, val in metrics.items():
  293. en_key = CN_TO_EN.get(metric, metric)
  294. if isinstance(val, dict) and "得分" in val:
  295. try: fs[en_key] = int(val["得分"])
  296. except Exception: pass
  297. elif isinstance(val, (int, float)):
  298. fs[en_key] = int(val)
  299. if isinstance(val, dict) and "理由" in val:
  300. score_reasons[en_key] = val["理由"]
  301. else:
  302. fs = flat_scores(e.get("scores", {}))
  303. # 计算均分 (overall)
  304. if is_mod_schema:
  305. rel_keys = {"relevance_production", "relevance_query"}
  306. rel_vals = [v for k, v in fs.items() if k in rel_keys]
  307. qual_vals = [v for k, v in fs.items() if k not in rel_keys]
  308. rel_avg = sum(rel_vals) / len(rel_vals) if rel_vals else None
  309. qual_avg = sum(qual_vals) / len(qual_vals) if qual_vals else None
  310. if rel_avg is not None and qual_avg is not None:
  311. overall = round((rel_avg + qual_avg) / 2, 1)
  312. elif rel_avg is not None:
  313. overall = round(rel_avg, 1)
  314. elif qual_avg is not None:
  315. overall = round(qual_avg, 1)
  316. else:
  317. overall = 0.0
  318. else:
  319. overall = round(sum(fs.values()) / len(fs), 1) if fs else 0
  320. anomaly = bool(e.get("error")) or not fs
  321. grade = p.get("_quality_grade", "")
  322. fb = r.get("found_by_queries", [])
  323. # 4. 解析 制作相关性 (production_relevance)
  324. if is_mod_schema:
  325. # 新版使用 "相关性" 中的 "和内容制作知识相关" 代表制作相关性
  326. production_relevance = fs.get("relevance_production")
  327. else:
  328. if is_new_schema:
  329. pr_block = e.get("制作相关性") or {}
  330. pr_raw = pr_block.get("得分") if isinstance(pr_block, dict) else pr_block
  331. if isinstance(pr_block, dict) and "理由" in pr_block:
  332. score_reasons["production_relevance"] = pr_block["理由"]
  333. else:
  334. pr_raw = e.get("production_relevance")
  335. try: production_relevance = int(float(pr_raw)) if pr_raw is not None else None
  336. except (TypeError, ValueError): production_relevance = None
  337. recency_hard = _recency_hard(p.get("publish_timestamp", ""))
  338. # 5. 解析 判定决策 (decision) 和 理由 (reason)
  339. reason = e.get("判定理由") or e.get("reason") or ""
  340. # 根据过滤指标决定是否保留 (过滤指标判定逻辑优先,不依赖文字匹配)
  341. is_discard = False
  342. # 制作相关性低于阈值则丢弃 (新版 0-10 满分,因此低于 4 丢弃;老版低于 2 丢弃)
  343. if production_relevance is not None:
  344. threshold = 4 if is_mod_schema else 2
  345. if production_relevance < threshold:
  346. is_discard = True
  347. # 时效性低于 2 被丢弃(发布时间超两年的老帖)
  348. if recency_hard is not None and recency_hard < 2:
  349. is_discard = True
  350. # 综合均分低于阈值被丢弃 (新版低于 6 丢弃;老版低于 3 丢弃)
  351. if overall is not None:
  352. threshold_ov = 6 if is_mod_schema else 3
  353. if overall < threshold_ov:
  354. is_discard = True
  355. decision = "discard" if is_discard else "report"
  356. return {
  357. "p": p, "e": e, "knowledge_type": kt, "scores": fs, "score_reasons": score_reasons,
  358. "overall": overall, "scale": 10 if is_mod_schema else 5, # overall 满分, 跨 schema 归一化用
  359. "anomaly": anomaly, "grade": grade, "found_by": fb,
  360. "production_relevance": production_relevance, "recency_hard": recency_hard,
  361. "reason": reason, "decision": decision,
  362. }
  363. def adapt(r, run, form_name=None):
  364. ev = evaluate_result(r)
  365. p, e = ev["p"], ev["e"]
  366. kt, fs, score_reasons = ev["knowledge_type"], ev["scores"], ev["score_reasons"]
  367. overall, anomaly, grade, fb = ev["overall"], ev["anomaly"], ev["grade"], ev["found_by"]
  368. production_relevance, recency_hard = ev["production_relevance"], ev["recency_hard"]
  369. reason, decision = ev["reason"], ev["decision"]
  370. # Find matching procedure html
  371. procedure_html = None
  372. case_id = r.get("case_id", "")
  373. title = p.get("title", "")
  374. run_dir = HERE / "runs_full" / run
  375. if run_dir.is_dir():
  376. # 1. 优先扫描该帖子对应的文件夹下的任何 HTML 文件 (不限名称)
  377. # 文件夹名格式: {form}_{platform}_{channel_content_id[:8]}
  378. content_id = r.get("channel_content_id") or ""
  379. if not content_id and case_id and "_" in case_id:
  380. content_id = case_id.split("_", 1)[1]
  381. plat_key = r.get("platform") or ""
  382. if form_name and plat_key and content_id:
  383. folder_name = f"{form_name}_{plat_key}_{content_id[:8]}"
  384. case_dir = run_dir / "procedures" / folder_name
  385. if case_dir.is_dir():
  386. html_files = list(case_dir.glob("*.html"))
  387. if html_files:
  388. procedure_html = f"runs_full/{run}/procedures/{folder_name}/{html_files[0].name}"
  389. # 2. 其次匹配标准文件名: case-{case_id}.html 或 {case_id}.html
  390. candidate_dirs = [run_dir, run_dir / "procedures"]
  391. if not procedure_html and case_id:
  392. named_files = [f"case-{case_id}.html", f"{case_id}.html"]
  393. for d_dir in candidate_dirs:
  394. if d_dir.is_dir():
  395. for name in named_files:
  396. if (d_dir / name).is_file():
  397. procedure_html = f"runs_full/{run}/procedures/{name}" if d_dir.name == "procedures" else f"runs_full/{run}/{name}"
  398. break
  399. if procedure_html:
  400. break
  401. # 3. 再次匹配 HTML 内部的标准声明 (meta 标签或 HTML 注释)
  402. if not procedure_html and case_id:
  403. for d_dir in candidate_dirs:
  404. if d_dir.is_dir():
  405. for html_path in d_dir.glob("*.html"):
  406. try:
  407. content = html_path.read_text(encoding="utf-8")
  408. if f'name="case-id" content="{case_id}"' in content or \
  409. f'name="case_id" content="{case_id}"' in content or \
  410. f'<!-- case_id: {case_id} -->' in content or \
  411. f'<!-- case-id: {case_id} -->' in content:
  412. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  413. break
  414. except Exception:
  415. continue
  416. if procedure_html:
  417. break
  418. # 4. 最后使用标题作为兜底模糊匹配
  419. if not procedure_html and title:
  420. for d_dir in candidate_dirs:
  421. if d_dir.is_dir():
  422. for html_path in d_dir.glob("*.html"):
  423. try:
  424. content = html_path.read_text(encoding="utf-8")
  425. if title in content:
  426. procedure_html = f"runs_full/{run}/procedures/{html_path.name}" if d_dir.name == "procedures" else f"runs_full/{run}/{html_path.name}"
  427. break
  428. except Exception:
  429. continue
  430. if procedure_html:
  431. break
  432. return {
  433. "platform": PLAT.get(r.get("platform"), r.get("platform")), "platformKey": r.get("platform"),
  434. "title": p.get("title", "") or "(无标题)", "date": (p.get("publish_timestamp", "") or "")[:10],
  435. "url": r.get("source_url", ""), "engagement": f'{p.get("like_count", 0)} 赞',
  436. "knowledge_type": kt, "decision": decision,
  437. "tools": [KT.get(k, k) for k in kt] + ([f"质量 {grade}"] if grade else []), "found_by": fb,
  438. "images": (p.get("images") or [])[:6], "text": p.get("body_text", "") or "",
  439. "scores": fs, "overall": overall, "reason": reason, "score_reasons": score_reasons,
  440. "grade": grade, "qscore": p.get("_quality_score", 0), "anomaly": anomaly,
  441. "production_relevance": production_relevance, "recency_hard": recency_hard,
  442. "run": run, "procedure_html": procedure_html,
  443. }
  444. def scan_runs():
  445. runs = {}
  446. for f in sorted(glob.glob(str(HERE / "runs_full" / "*" / "form_*.json"))):
  447. try:
  448. d = json.load(open(f, encoding="utf-8"))
  449. except Exception:
  450. continue
  451. run = pathlib.Path(f).parent.name
  452. form_name = d.get("form") or ""
  453. results = [adapt(r, run, form_name) for r in d.get("results", [])]
  454. report_val = sum(1 for r in results if r.get("decision") == "report" and not r.get("anomaly"))
  455. discard_val = sum(1 for r in results if r.get("decision") == "discard" and not r.get("anomaly"))
  456. runs.setdefault(run, []).append({
  457. "form": d.get("form"), "query": d.get("query"), "original_q": d.get("original_q", ""),
  458. "requirement": d.get("requirement", ""),
  459. "platforms": d.get("platforms", []), "total": d.get("total"),
  460. "report": report_val, "discard": discard_val,
  461. "results": results,
  462. })
  463. for v in runs.values():
  464. v.sort(key=lambda x: x.get("form") or "")
  465. def _qnum(name): # "q156" → 156,按数字排,避免 "q156" < "q99" 的字符串误排
  466. m = re.search(r"\d+", name)
  467. return (int(m.group()) if m else 0, name)
  468. out = []
  469. for k, v in sorted(runs.items(), key=lambda kv: _qnum(kv[0])):
  470. oq = v[0].get("original_q") or v[0].get("query") or ""
  471. seen, hits = set(), 0 # 知识命中数 = 各形式采纳(report)且非异常、按 url 去重后的帖子数
  472. for f in v:
  473. for r in f.get("results", []):
  474. if r.get("decision") == "report" and not r.get("anomaly") and r.get("url") not in seen:
  475. seen.add(r.get("url")); hits += 1
  476. out.append({"key": k, "forms": v, "dims": parse_dims(oq), "original_q": oq,
  477. "hits": hits, "tot": sum((f.get("total") or 0) for f in v)})
  478. return {"queries": out, "actions": ACTIONS_TAX, "types": TYPES_TAX, "matrix": _MATRIX}
  479. class H(BaseHTTPRequestHandler):
  480. def _send(self, code, body, ctype):
  481. b = body.encode("utf-8") if isinstance(body, str) else body
  482. self.send_response(code)
  483. if ctype.startswith("text/") or ctype == "application/json" or ctype == "application/javascript":
  484. self.send_header("Content-Type", ctype + "; charset=utf-8")
  485. else:
  486. self.send_header("Content-Type", ctype)
  487. self.send_header("Content-Length", str(len(b))); self.end_headers(); self.wfile.write(b)
  488. def do_GET(self):
  489. if self.path in ("/", "/index.html"):
  490. try:
  491. page = (HERE / "index.html").read_text(encoding="utf-8")
  492. self._send(200, page, "text/html")
  493. except Exception as e:
  494. self._send(500, f"Error reading index.html: {e}", "text/plain")
  495. elif self.path.startswith("/api/data"):
  496. self._send(200, json.dumps(scan_runs(), ensure_ascii=False), "application/json")
  497. elif self.path.startswith("/runs_full/"):
  498. try:
  499. clean_path = self.path.split("?")[0]
  500. parts = clean_path.strip("/").split("/")
  501. target_file = HERE
  502. for part in parts:
  503. target_file = target_file / part
  504. runs_dir = HERE / "runs_full"
  505. if runs_dir.resolve() in target_file.resolve().parents and target_file.is_file():
  506. content = target_file.read_bytes()
  507. ext = target_file.suffix.lower()
  508. ctype = "text/html"
  509. if ext in (".png", ".webp"):
  510. ctype = f"image/{ext[1:]}"
  511. elif ext in (".jpg", ".jpeg"):
  512. ctype = "image/jpeg"
  513. elif ext == ".json":
  514. ctype = "application/json"
  515. elif ext == ".js":
  516. ctype = "application/javascript"
  517. elif ext == ".css":
  518. ctype = "text/css"
  519. self._send(200, content, ctype)
  520. else:
  521. self._send(404, "not found", "text/plain")
  522. except Exception as e:
  523. self._send(500, f"Error: {e}", "text/plain")
  524. else:
  525. self._send(404, "not found", "text/plain")
  526. def do_POST(self):
  527. # /api/reeval —— 后台启动 batch_3forms.py 只对指定 q 复评,立即返回(不等结果)
  528. # 复评是 LLM 调用、几十秒到几分钟;浏览器侧用 fetch 启动 + 提示用户稍后刷新,不阻塞
  529. if self.path != "/api/reeval":
  530. self._send(404, json.dumps({"error": "not found"}), "application/json"); return
  531. length = int(self.headers.get("Content-Length") or 0)
  532. raw = self.rfile.read(length).decode("utf-8") if length > 0 else "{}"
  533. try:
  534. payload = json.loads(raw)
  535. except Exception as e:
  536. self._send(400, json.dumps({"error": f"bad json: {e}"}), "application/json"); return
  537. q = (payload.get("q") or "").strip()
  538. # 限定 qNN 形式避免路径注入
  539. if not re.match(r"^q\d+$", q):
  540. self._send(400, json.dumps({"error": f"bad q (expect 'qNN'): {q!r}"},
  541. ensure_ascii=False), "application/json"); return
  542. q_dir = HERE / "runs_full" / q
  543. if not q_dir.is_dir():
  544. self._send(404, json.dumps({"error": f"runs_full/{q} not found"}, ensure_ascii=False),
  545. "application/json"); return
  546. # 后台跑 batch_3forms.py,stdout/stderr 合并写到 q_dir/_reeval.log(可 tail 看进度)
  547. log_path = q_dir / "_reeval.log"
  548. try:
  549. log_fh = open(log_path, "w", encoding="utf-8", buffering=1)
  550. cmd = [sys.executable, "-u", str(HERE / "batch_3forms.py"),
  551. "--reeval", "--reeval-q", q, "--output-dir", str(HERE / "runs_full")]
  552. flags = subprocess.CREATE_NEW_PROCESS_GROUP if sys.platform == "win32" else 0
  553. proc = subprocess.Popen(cmd, stdout=log_fh, stderr=subprocess.STDOUT,
  554. cwd=str(HERE), creationflags=flags)
  555. self._send(200, json.dumps(
  556. {"status": "started", "pid": proc.pid, "q": q,
  557. "log": str(log_path.relative_to(HERE))},
  558. ensure_ascii=False), "application/json")
  559. except Exception as e:
  560. self._send(500, json.dumps({"error": f"failed to start: {e}"},
  561. ensure_ascii=False), "application/json")
  562. def log_message(self, *a): pass
  563. if __name__ == "__main__":
  564. n = len(scan_runs()["queries"])
  565. print(f"搜索评估查看 server:http://0.0.0.0:{PORT} (runs_full/ 下 {n} 个 query,实时扫描)")
  566. ThreadingHTTPServer(("0.0.0.0", PORT), H).serve_forever()