run_procedure_dsl.py 50 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969
  1. #!/usr/bin/env python3
  2. """
  3. run_procedure_dsl.py — 拿 procedure-dsl/spec.md 当指令本, 让 Claude Agent SDK
  4. (走 ~/.claude OAuth Max 订阅额度) 对单个 post 跑完三阶段提取, 落 case-N.{md,html}.
  5. 设计:
  6. - 复用 Agent 仓 examples/process_pipeline/run_pipeline.py --use-claude-sdk 的 OAuth
  7. 思路: 子进程 env 把 ANTHROPIC_API_KEY / BASE_URL / AUTH_TOKEN 显式置空, 让
  8. claude CLI 回落到 OAuth 凭证.
  9. - 与 agent/llm/claude_code_oauth.py 的 one-shot wrapper 不同: 那个 max_turns=1
  10. + allowed_tools=[], 本脚本让 Agent 真正自主跑——开 Read/Write/Bash/Edit/Glob/Grep,
  11. multi-turn, 自己读 spec.md / source / 调 bin/taxonomy-lookup.py / 写产物.
  12. - 单 post 输入, 多终端 = 多并行 (用户自己开).
  13. 用法 (本脚本位于 procedure-dsl/, SDK cwd 默认设到 procedure-dsl/):
  14. # source 是 input/case-N-raw.json, 脚本自动从 image_url_list 拉图作多模态.
  15. # --out-dir 必填: 是 outputs/ 下的目录名, 产物全落这里; case_id 自动从它的 basename 推.
  16. python run_procedure_dsl.py input/case-2-raw.json --out-dir case-2
  17. python run_procedure_dsl.py input/case-2-raw.json --out-dir case-2 \\
  18. --extra-image /path/to/local-ref.png --model claude-sonnet-4-6
  19. # 跑实验版 spec-test/ 对比 (产物落 outputs/case-2_test/):
  20. python run_procedure_dsl.py input/case-2-raw.json --out-dir case-2_test --version test
  21. # 中断后恢复 (产物保留, agent 重读磁盘接着跑):
  22. python run_procedure_dsl.py input/case-2-raw.json --out-dir case-2 --resume
  23. source 文件 schema (procedure-dsl/input/*.json 约定):
  24. {
  25. "title": str, "link": str, "body_text": str,
  26. "image_url_list": [{"image_type": int, "image_url": str}, ...],
  27. "publish_timestamp": ..., "channel_account_name": str,
  28. }
  29. """
  30. import argparse
  31. import asyncio
  32. import base64
  33. import json
  34. import logging
  35. import sys
  36. import time
  37. from datetime import datetime
  38. from pathlib import Path
  39. from typing import Any, Dict, List
  40. # run_procedure_dsl.py → procedure-dsl/
  41. DSL_ROOT = Path(__file__).resolve().parent
  42. def _derive_case_id(out_dir: str) -> str:
  43. """从 --out-dir 自动派生 case_id (用于 prompt 文件名 + case_data.case_id + suggestions record).
  44. 规则: Path(out_dir).name 去掉 "case-" 前缀 (--out-dir 是 outputs/ 下的相对名).
  45. --out-dir case-5 → "5"
  46. --out-dir case-5-newflow → "5-newflow"
  47. --out-dir photo-album → "photo-album" (无 case- 前缀也行)
  48. --out-dir case-11 → "11"
  49. 这样用户传 --out-dir 控制目录名, case_id 自动跟随 — 不再撞历史 case 目录.
  50. """
  51. name = Path(out_dir).name
  52. if name.startswith("case-"):
  53. name = name[5:]
  54. return name or "?"
  55. def _resolve_out_dir(out_dir_arg: str, workdir: Path) -> Path:
  56. """把 --out-dir 解析成实际工作目录. --out-dir 是 `outputs/` 下的相对名:
  57. --out-dir newdir → <workdir>/outputs/newdir
  58. --out-dir case-5 → <workdir>/outputs/case-5
  59. --out-dir outputs/newdir → <workdir>/outputs/newdir (容忍已带 outputs/ 前缀, 不重复嵌套)
  60. 传绝对路径则原样用 (escape hatch).
  61. """
  62. p = Path(out_dir_arg)
  63. if p.is_absolute():
  64. return p.resolve()
  65. parts = p.parts
  66. if parts and parts[0] == "outputs": # 容忍历史习惯的 outputs/ 前缀
  67. parts = parts[1:]
  68. base = workdir / "outputs"
  69. return (base / Path(*parts)).resolve() if parts else base.resolve()
  70. # Sonnet 4.6 公开价目 (per 1M tokens, USD), 用于 estimated cost.
  71. # OAuth Max 模式 SDK 报的 total_cost_usd 通常是 None — 我们自己按 token 算个
  72. # "如果走 API 等价多少钱"作为 quota 消耗的直观代理.
  73. PRICE_SONNET_4_6 = {
  74. "input": 3.00,
  75. "output": 15.00,
  76. "cache_creation": 3.75,
  77. "cache_read": 0.30,
  78. }
  79. def _estimate_cost_usd(usage: Dict[str, Any]) -> float:
  80. if not usage:
  81. return 0.0
  82. return (
  83. usage.get("input_tokens", 0) / 1_000_000 * PRICE_SONNET_4_6["input"]
  84. + usage.get("output_tokens", 0) / 1_000_000 * PRICE_SONNET_4_6["output"]
  85. + usage.get("cache_creation_input_tokens", 0) / 1_000_000 * PRICE_SONNET_4_6["cache_creation"]
  86. + usage.get("cache_read_input_tokens", 0) / 1_000_000 * PRICE_SONNET_4_6["cache_read"]
  87. )
  88. # ──── 多模态: 图片 → Anthropic content block ──────────────────────────────────
  89. _MEDIA_TYPE = {
  90. ".png": "image/png",
  91. ".jpg": "image/jpeg",
  92. ".jpeg": "image/jpeg",
  93. ".gif": "image/gif",
  94. ".webp": "image/webp",
  95. }
  96. # client-side image cache: DSL_ROOT/.image_cache/<sha256>.<ext>
  97. # 同一 URL 多次跑 / resume 不重复下载. 不影响 SDK / OAuth, 纯本地 IO.
  98. _IMAGE_CACHE_DIR = DSL_ROOT / ".image_cache"
  99. # ── 多图压缩策略 (防止几十张图把单轮 output 撑破 32K/64K 上限) ──────────────────
  100. # 阶梯: 图少 → 单图只降分辨率; 图多 (>OVER) → 拼 3×3 九宫格把"图块数"压 9:1, 模型逐图
  101. # 铺陈的冲动小了, 骨架/output 也跟着小. (跳过极多图帖那一档在 batch 选片侧做, 见
  102. # batch_extract_procedures.py 的 _IMG_SKIP_CAP — 那边知道图数又控制 top-N 名额回填.)
  103. _IMG_MAX_EDGE = 1024 # 单图降分辨率: 长边上限 px
  104. _IMG_MONTAGE_OVER = 18 # 图数 > 此值 → 拼九宫格 (与 batch 选片阈值需各自维护, 语义不同)
  105. _IMG_GRID_CELL = 512 # 九宫格每格 px → 3×3 拼图长边 1536, 压在 Anthropic 1568 上限内
  106. # 浏览器 UA, 避免 mmbiz / 其他图床对裸 python-requests 默认 UA 直接 403.
  107. _DOWNLOAD_UA = (
  108. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
  109. "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
  110. )
  111. def _url_to_cached_path(url: str, timeout: float = 15.0) -> Path:
  112. """下载 url 到 _IMAGE_CACHE_DIR, 返回本地路径. 已缓存直接返回不重下.
  113. 用 URL 的 sha256 当文件名, 扩展名从 URL path 推断 (找不到默认 .png).
  114. 设计目的: 绕过 Anthropic 服务端对 image URL 的 robots.txt 检查
  115. (mmbiz.qpic.cn 等图床 robots disallow 会让 API 直接 400).
  116. """
  117. import hashlib
  118. from urllib.parse import urlparse
  119. from urllib.request import Request, urlopen
  120. _IMAGE_CACHE_DIR.mkdir(exist_ok=True)
  121. digest = hashlib.sha256(url.encode("utf-8")).hexdigest()[:24]
  122. ext = Path(urlparse(url).path).suffix.lower()
  123. if ext not in _MEDIA_TYPE:
  124. ext = ".png"
  125. local = _IMAGE_CACHE_DIR / f"{digest}{ext}"
  126. if local.exists() and local.stat().st_size > 0:
  127. return local
  128. req = Request(url, headers={"User-Agent": _DOWNLOAD_UA, "Accept": "image/*,*/*;q=0.8"})
  129. with urlopen(req, timeout=timeout) as resp:
  130. data = resp.read()
  131. local.write_bytes(data)
  132. return local
  133. def _resolve_local(ref: str) -> Path:
  134. """图 ref → 本地路径: URL 先下载进缓存 (绕 robots.txt), 本地路径校验存在。"""
  135. if ref.startswith(("http://", "https://")):
  136. return _url_to_cached_path(ref)
  137. p = Path(ref).expanduser().resolve()
  138. if not p.exists():
  139. raise FileNotFoundError(f"image not found: {ref}")
  140. return p
  141. def _pil_to_block(im: Any) -> Dict[str, Any]:
  142. """PIL.Image → Anthropic base64 content block (统一 JPEG q90, 体积可控)。"""
  143. import io
  144. buf = io.BytesIO()
  145. im.save(buf, format="JPEG", quality=90)
  146. return {
  147. "type": "image",
  148. "source": {"type": "base64", "media_type": "image/jpeg",
  149. "data": base64.standard_b64encode(buf.getvalue()).decode()},
  150. }
  151. _OCR_ENGINE: Any = None
  152. _OCR_UNAVAILABLE = False
  153. def _ocr_image(im: Any) -> str:
  154. """对一张 PIL.Image 做全分辨率 OCR, 返回拼接后的文字 (失败/无字 → 空串)。
  155. 九宫格会把图压到 512px 小字糊掉, 故在缩放前先 OCR 把文字走文本通道保住。
  156. RapidOCR 未装 → 整体禁用 OCR (只此一次, 之后不再尝试), 退化成纯拼图。"""
  157. global _OCR_ENGINE, _OCR_UNAVAILABLE
  158. if _OCR_UNAVAILABLE:
  159. return ""
  160. try:
  161. if _OCR_ENGINE is None:
  162. from rapidocr_onnxruntime import RapidOCR
  163. _OCR_ENGINE = RapidOCR()
  164. import numpy as np
  165. result, _ = _OCR_ENGINE(np.array(im))
  166. if not result:
  167. return ""
  168. # result: [[box, text, score], ...] 按检测顺序 (大致从上到下) → 直接拼
  169. return " ".join(seg[1] for seg in result if seg and len(seg) >= 2 and seg[1]).strip()
  170. except ImportError:
  171. _OCR_UNAVAILABLE = True
  172. print("[ocr] ⚠️ rapidocr 未装, 本次禁用 OCR (九宫格仅拼图无文字旁挂)", flush=True)
  173. return ""
  174. except Exception as e:
  175. print(f"[ocr] skip 1 张 ({type(e).__name__}: {e})", flush=True)
  176. return ""
  177. def _format_ocr_sidecar(ocr_texts: List[str]) -> str:
  178. """把每图 OCR 文字按"拼图#·格#"标注成一个文本块 (跟九宫格阅读顺序对齐)。
  179. 全空 → 返回空串 (不加块)。i 从 0 起: grid=i//9, cell=i%9 (逐行 左→右 上→下)。"""
  180. lines = []
  181. for i, t in enumerate(ocr_texts):
  182. if not t:
  183. continue
  184. lines.append(f"■ 拼图{i // 9 + 1}·格{i % 9 + 1}: {t}")
  185. if not lines:
  186. return ""
  187. return ("【配图 OCR 文字】图已压成九宫格, 小字可能糊; 以下是各格**全分辨率** OCR 原文, "
  188. "与拼图位置一一对应 (格号即拼图内 从左到右、从上到下 的序), 用它补全图里看不清的"
  189. "菜单/参数/prompt 文字:\n" + "\n".join(lines))
  190. def _montage(pil_imgs: List[Any], cell: int) -> List[Any]:
  191. """把图按每 9 张拼成一张 3×3 九宫格 (每格 cell px, 白底居中)。返回拼图列表。
  192. 最后一组不足 9 张, 余下格子留白。阅读顺序: 逐行 从左到右、从上到下 (封面=第一张拼图左上)。"""
  193. from PIL import Image
  194. grids: List[Any] = []
  195. for i in range(0, len(pil_imgs), 9):
  196. canvas = Image.new("RGB", (cell * 3, cell * 3), (255, 255, 255))
  197. for j, im in enumerate(pil_imgs[i:i + 9]):
  198. t = im.copy()
  199. t.thumbnail((cell, cell)) # 等比缩进格子
  200. row, col = divmod(j, 3)
  201. canvas.paste(t, (col * cell + (cell - t.width) // 2,
  202. row * cell + (cell - t.height) // 2))
  203. grids.append(canvas)
  204. return grids
  205. def _append_image_blocks_raw(blocks: List[Dict[str, Any]], images: List[str]) -> None:
  206. """回落路径 (PIL 不可用时): 逐图原样 base64, 不降分辨率/不拼图。"""
  207. n_ok = n_fail = 0
  208. for ref in images:
  209. try:
  210. local = _resolve_local(ref)
  211. blocks.append({
  212. "type": "image",
  213. "source": {"type": "base64",
  214. "media_type": _MEDIA_TYPE.get(local.suffix.lower(), "image/png"),
  215. "data": base64.standard_b64encode(local.read_bytes()).decode()},
  216. })
  217. n_ok += 1
  218. except Exception as e:
  219. n_fail += 1
  220. print(f"[image] skip {ref[:80]}... ({type(e).__name__}: {e})", flush=True)
  221. if images:
  222. print(f"[image] (无 PIL) {n_ok}/{len(images)} 原样 base64, {n_fail} 失败跳过", flush=True)
  223. def _append_image_blocks(blocks: List[Dict[str, Any]], images: List[str]) -> None:
  224. """把 images 转成 content block 加到 blocks,按阶梯压缩 (见 _IMG_* 常量):
  225. ≤OVER 张 → 逐图降分辨率单发; >OVER 张 → 拼 3×3 九宫格减块数。
  226. 单张下载/解码失败不阻塞整批。PIL 不可用 → 回落原样 base64。
  227. base / variant driver 共用 — 保证 image 容错策略一致。"""
  228. if not images:
  229. return
  230. try:
  231. from PIL import Image
  232. except Exception:
  233. _append_image_blocks_raw(blocks, images)
  234. return
  235. pil_imgs: List[Any] = []
  236. n_fail = 0
  237. for ref in images:
  238. try:
  239. pil_imgs.append(Image.open(_resolve_local(ref)).convert("RGB"))
  240. except Exception as e:
  241. n_fail += 1
  242. print(f"[image] skip {ref[:80]}... ({type(e).__name__}: {e})", flush=True)
  243. n = len(pil_imgs)
  244. if n == 0:
  245. print(f"[image] 0/{len(images)} 可用 ({n_fail} 失败)", flush=True)
  246. return
  247. if n > _IMG_MONTAGE_OVER:
  248. ocr_texts = [_ocr_image(im) for im in pil_imgs] # 缩放前全分辨率 OCR 保文字
  249. grids = _montage(pil_imgs, _IMG_GRID_CELL)
  250. for g in grids:
  251. blocks.append(_pil_to_block(g))
  252. sidecar = _format_ocr_sidecar(ocr_texts)
  253. if sidecar:
  254. blocks.append({"type": "text", "text": sidecar})
  255. n_ocr = sum(1 for t in ocr_texts if t)
  256. print(f"[image] {n} 张 > {_IMG_MONTAGE_OVER} → 拼九宫格 {len(grids)} 张 "
  257. f"(每张≤9图·格{_IMG_GRID_CELL}px) + {n_ocr} 格 OCR 文字旁挂, "
  258. f"{n_fail} 失败跳过", flush=True)
  259. else:
  260. for im in pil_imgs:
  261. im.thumbnail((_IMG_MAX_EDGE, _IMG_MAX_EDGE)) # 等比降分辨率 (小图不放大)
  262. blocks.append(_pil_to_block(im))
  263. print(f"[image] {n} 张单图 (降分辨率≤{_IMG_MAX_EDGE}px), {n_fail} 失败跳过", flush=True)
  264. def _image_prompt_note(n_images: int) -> str:
  265. """配图行文案: 跟 _append_image_blocks 的阶梯保持一致, 拼图时务必告诉模型
  266. 它收到的是九宫格 (否则会以为有 N 张独立图, 引用错位 / 逐图铺陈撑爆 output)。"""
  267. if n_images == 0:
  268. return "本消息未附图."
  269. if n_images > _IMG_MONTAGE_OVER:
  270. import math
  271. n_grid = math.ceil(n_images / 9)
  272. return (f"本消息附了 {n_grid} 张【九宫格拼图】—— 由原始 {n_images} 张配图按 3×3 网格拼接 "
  273. f"(每张最多含 9 个子图, 已统一降分辨率)。原图过多, 拼图是为压 token; "
  274. f"分析时把**每个格子当一张独立配图**看, 阅读顺序: 每张拼图内 从左到右、从上到下逐行; "
  275. f"封面图在第一张拼图的左上格 (老格式 image_type=2 / 新格式原序第一张)。"
  276. f"图后另附一个【配图 OCR 文字】文本块 (各格全分辨率 OCR), 拼图里小字糊掉时**以 OCR 文字为准**.")
  277. return (f"本消息附了 {n_images} 张图作多模态内容 (已降分辨率; URL 抽自老格式的 image_url_list "
  278. f"或新格式的 images; 老格式 image_type=2 封面排最前, 新格式按原序).")
  279. # ──── source JSON → 自动拉 image_url_list ────────────────────────────────────
  280. def _images_from_source(source_path: Path) -> List[str]:
  281. """从 procedure-dsl/input/*.json 抽 URL 列表, 兼容两套 schema:
  282. 1. 老 case-*-raw.json: `image_url_list: [{image_type, image_url}, ...]`,
  283. image_type=2 (封面) 排最前, 其余维持原序.
  284. 2. 新 eval_case-*.json: `images: [url_str, ...]`, 无封面信号, 原序返回.
  285. schema 错配时如果检测到疑似图字段但抽不到任何 URL, 会打 warning —
  286. multimodal pipeline 静默 0 张图比报错更难排查.
  287. """
  288. if source_path.suffix.lower() != ".json":
  289. return []
  290. try:
  291. with source_path.open(encoding="utf-8") as f:
  292. data = json.load(f)
  293. except (json.JSONDecodeError, OSError):
  294. return []
  295. if not isinstance(data, dict):
  296. return []
  297. # 新 schema (eval_case-*.json): 裸 URL 数组
  298. flat = data.get("images")
  299. if isinstance(flat, list):
  300. urls = [u for u in flat if isinstance(u, str) and u]
  301. if not urls and flat:
  302. print(f"[image] ⚠️ {source_path.name} 有 images 字段但抽不到 URL "
  303. f"(首项类型={type(flat[0]).__name__})", flush=True)
  304. return urls
  305. # 老 schema (case-*-raw.json): {image_type, image_url} dict 数组
  306. items = data.get("image_url_list")
  307. if not isinstance(items, list):
  308. return []
  309. covers, others = [], []
  310. for it in items:
  311. if not isinstance(it, dict):
  312. continue
  313. url = it.get("image_url")
  314. if not isinstance(url, str) or not url:
  315. continue
  316. (covers if it.get("image_type") == 2 else others).append(url)
  317. return covers + others
  318. # ──── 初始 user 消息 (text + 图片 blocks) ─────────────────────────────────────
  319. def _build_initial_blocks(
  320. source_ref: str, case_id: str, out_dir_arg: str, images: List[str], workdir: Path,
  321. spec_name: str = "spec",
  322. ) -> List[Dict[str, Any]]:
  323. # 全部路径用绝对路径喂给 Agent — 它训练习惯就吃绝对, 顺水推舟省心.
  324. # 用 .as_posix() 统一 forward slash, 跟 f-string 拼接的 "/..." 一致.
  325. # spec_name 由 --version 决定 (spec / spec-backup / spec-test ...), 默认 "spec".
  326. spec_dir = (workdir / spec_name).as_posix()
  327. spec_readme = (workdir / spec_name / "README.md").as_posix()
  328. spec_tools = (workdir / spec_name / "tools.md").as_posix()
  329. # --out-dir 是 outputs/ 下的相对名 (--out-dir newdir → outputs/newdir); 绝对路径原样用.
  330. _out_dir = _resolve_out_dir(out_dir_arg, workdir)
  331. case_dir = _out_dir.as_posix()
  332. scratch_dir = (_out_dir / "_scratch").as_posix()
  333. # source 路径直接用 source_ref (用户传的 positional arg). 不再从 case_id 推 —
  334. # 否则当 --out-dir 跟 source 文件名不对齐时 (e.g. --out-dir outputs/update + source=input/case-5.json),
  335. # Agent 会收到 "input/case-update-raw.json" 这种不存在路径, 浪费 10+ turn 自我探索.
  336. _src = Path(source_ref)
  337. case_input = _src.as_posix() if _src.is_absolute() else (workdir / _src).resolve().as_posix()
  338. text = f"""请按 {spec_name}/ 目录里的 SKILL 处理这个 post.
  339. ## 起手指令 (路径已给绝对值, 直接照搬, 不要改, 不要先 find 探查)
  340. 1. `Read(file_path="{spec_readme}")` — self-driven skill 的入口, 含完整 phase 加载指南 (累积式) + 自查清单 + 工具调用规则. 读完不要再 Read 它.
  341. 2. `Read(file_path="{spec_tools}")` — 外部脚本接口手册. 读完不要重读.
  342. 3. 按 README phase 加载指南**累积式**前进, 中间产物是**单个 `workflow.json` 文件**, 各 phase 都 in-place Edit 它 (不写多个中间快照):
  343. - Phase 1.1 心智模型 → `{case_dir}/understanding.md` (含 procedure 数量判断)
  344. - Phase 1.2 骨架 → **Write** `{case_dir}/workflow.json` (procedures 数组骨架)
  345. - Phase 1.3 IO 闭合 → **Edit** workflow.json 加 anchor
  346. - Phase 2A/2B/2C 归一化 → **Edit** workflow.json 加 effect/action/type/sub/form + procedures[i].type_registry
  347. - Phase 3 lint + 渲染 → 调 render-case.py + lint-case.py (不写 case_data.json, renderer 内存组装)
  348. - Phase 3 .md → Write `{case_dir}/case-{case_id}-<slug>.md`
  349. ## ❌ 重复读取禁令 (CRITICAL: ZERO REPEATED READS)
  350. 你拥有完美的长期记忆(由于 Context 累积,你读取过的所有文件内容会永远保留在你的 Context 中)。
  351. 请**绝对不要**重复读取任何文件!任何重复的 `Read` 动作都是对 Token 和回合数 Budget 的极大浪费。
  352. - **禁忌 1**:不要因为看到 spec 文档中写了 `详见 [tools.md §2]` 就去重新 `Read(file_path="{spec_tools}")`。你已经在 Turn 3 读过它了,直接检索你的记忆!
  353. - **禁忌 2**:不要因为进入了 Phase 2B,就去重新 `Read` 任何 Phase 2 的 spec 文件(如 `phase2-normalize.md`)。你已经在 Phase 2A 开始时读过它了,它就在你的记忆中,直接使用它!
  354. - 在发出任何 `Read` 指令前,必须自我核对:“我之前读过这个文件吗?”。如果读过,绝对不要再次 Read!
  355. ## 输入
  356. - case 原文: `{case_input}`
  357. schema (两种都可能):
  358. · 老格式 (case-*-raw.json): {{title, link, body_text, image_url_list:[{{image_type,image_url}}], publish_timestamp, channel_account_name}}
  359. · 新格式 (eval_case-*.json): {{title, link, body_text, images:[url_str], videos, channel, content_type, like_count, publish_timestamp, channel_content_id, ...}}
  360. 正文 (body_text) 和元数据从此文件 Read; 两种 schema 的 body_text 含义一致.
  361. - 配图: {_image_prompt_note(len(images))}
  362. ## 输出目录
  363. `{case_dir}/` (这一 case 的所有产物都放这里, 不要污染其他目录)
  364. ## 读取范围 (硬约束)
  365. 你**只能** Read 以下三处 (用绝对路径 + 子路径):
  366. 1. `{spec_dir}/` 及其所有子目录 (skill 全部内容: README.md / tools.md / 各 phase 规格文件 / output/ / taxonomy/ / templates/ / tools/ — 但 tools/*.py **不要 Read 源码**, 只通过 Bash 调用; 具体子目录名以 {spec_name}/README.md 的章节地图为准, **不要脑补 part1-/part2- 这类对称目录**)
  367. 2. `{case_input}` (当前 case 的原文, 只此一个)
  368. 3. `{case_dir}/` 目录 (你自己的工作产物, 含其下 `_scratch/`)
  369. ## 长输出 dump 区 (sanctioned scratch)
  370. 如果你需要 dump 大 Bash 输出做后续分析 (e.g. `taxonomy-lookup --subtree` 输出过大想分段读, 或 `find` 结果想保留), **只写到 `{scratch_dir}/`** (runner 已预创建):
  371. ✅ `python {spec_dir}/tools/taxonomy-lookup.py --dim 实质 --subtree /表象/视觉 > {scratch_dir}/subst_visual.txt`
  372. ✅ 然后 `Read(file_path="{scratch_dir}/subst_visual.txt")` 提取需要的段
  373. ✅ 一次性小探查 / smoke test (验证某条 tool 命令、看某段数据长啥样) 也可以放 `{scratch_dir}/`, 用完即弃
  374. ❌ **不要用 Python 脚本生成 / 批量改写 `workflow.json`** — 它由你**直接 Write 骨架 + 逐字段填** 演化 (见各 phase 规格). 写 build_workflow / add_anchors / normalize 这类脚本去拼 JSON, 反而踩转义 / 控制字符坑 (LLM 拼长 JSON 极易坏), 还把本该逐 step 的语义判断埋进一次性脚本里. 几十处字段要批量改 → 用 **`spec/tools/wf-patch.py`** (path=value 清单, 工具负责安全写 JSON + 写入即校验; 见 {spec_name}/tools.md §2), 零星单处用 Edit. `_scratch/` 的 Python **只用于「dump 大 tool 输出」和「smoke test」, 不是 workflow.json 的生产线**.
  375. ❌ **不要写到项目根 `scratch/`** (那是 dev/repo 维护用的临时区, 禁区)
  376. ❌ **不要 ls / Read 项目根 `scratch/`** (别人的 dev 临时文件, 跟你无关; 你的 scratch 在上面的 `_scratch/`)
  377. **禁读区** (不要 Read 也不要 ls 这些位置): 项目根的 spec.md / design.md (旧文件), examples/ 全目录, input/ 下其他 case-*-raw.json, outputs/case-OTHER/ (其他 case 目录), bin/ 全目录 (已废弃), **项目根 `scratch/`** (跟你的 `_scratch/` 不是一回事 — 上面 ✅ 那个 `_scratch/` 作用类似 outputs 目录下的 `_scratch/`).
  378. ## 阶段三流程 (脚本组装, 不写 case_data.json)
  379. 新工作流: Phase 2 已经把所有结构化数据 in-place Edit 进 `{case_dir}/workflow.json`. Phase 3 直接调 2 个脚本命令, **不需要写 case_data.json** (renderer 在内存把 workflow + source-input + page_title + case_id 组装成 case_data 喂给 build_html).
  380. ```bash
  381. # Step A: lint + 自动 record 新 type 到 type_suggestions.md
  382. python {spec_dir}/tools/lint-case.py --workflow {case_dir}/workflow.json --case-id {case_id}
  383. # stdout 报 "type 完整性: N 个提示" 时, 回 Phase 2 Edit workflow.json 补 procedures[i].type_registry, 重跑 lint
  384. # stdout 报 "已 record M 条新 type" 时, 表示 {spec_name}/taxonomy/type_suggestions.md 已自动同步
  385. # Step B: 渲染 HTML (--source-input 必带, --page-title --case-id 必带)
  386. python {spec_dir}/tools/render-case.py \
  387. --workflow {case_dir}/workflow.json \
  388. --source-input {case_input} \
  389. --page-title "Case {case_id} · <主题>" \
  390. --case-id {case_id} \
  391. --out {case_dir}/case-{case_id}-<slug>.html
  392. # Step C: Write {case_dir}/case-{case_id}-<slug>.md (DSL 文本版, 按 {spec_name}/output/md-structure.md §11 结构)
  393. ```
  394. **`--source-input` 行为**: renderer 自动从 raw 抽 body_text + 封面图 + 图集兜底填到 case_data.source (内存里). 微信公众号长文走 inline 图 + 封面; 小红书短文走 body + "--- 附图 ---" 末尾追加非封面图. 你**不必手工**复制 raw.body_text — 完全交给 renderer.
  395. `workflow.json` 的契约见 [{spec_name}/output/case-data.schema.json]({spec_name}/output/case-data.schema.json) 里 Procedure 的 definition (顶层 `{{procedures: [{{id, name, purpose, declarations, type_registry?, steps, return_row?}}]}}`). **不要参考其他 `outputs/case-*/` 下任何文件**.
  396. ## 其他约束
  397. - 推断补全用 `inferred: true` + `inferred_reason` 标在 IO item 上, 不要静默插入.
  398. - 完成后用一段话总结: 工序梗概 + 输出文件路径 + 你对 DSL 的关键发现.
  399. """
  400. blocks: List[Dict[str, Any]] = [{"type": "text", "text": text}]
  401. _append_image_blocks(blocks, images)
  402. return blocks
  403. # ──── Trace 写盘 (outputs/case-N/_trace.md) ───────────────────────────────────
  404. # 每次 run 把 turn / tool_use / text / result 写到一份 markdown 流水, 实时 append.
  405. # Ctrl-C 后还能离线 review Agent 走过哪些 turn / 读了哪些文件 / 调了哪些 Bash.
  406. def _fmt_tool_input(name: str, inp: Any) -> str:
  407. """tool_use input → 人读 1 行摘要."""
  408. if not isinstance(inp, dict):
  409. return f"`{repr(inp)[:200]}`"
  410. if name == "Read":
  411. suffix = ""
  412. if inp.get("offset") or inp.get("limit"):
  413. suffix = f" (offset={inp.get('offset', 0)}, limit={inp.get('limit', '-')})"
  414. return f"`{inp.get('file_path', '')}`{suffix}"
  415. if name == "Write":
  416. return f"`{inp.get('file_path', '')}` ({len(inp.get('content', '')):,} chars)"
  417. if name == "Edit":
  418. return f"`{inp.get('file_path', '')}`"
  419. if name == "Bash":
  420. cmd = inp.get("command", "")
  421. if len(cmd) > 200:
  422. cmd = cmd[:200] + "..."
  423. return f"`{cmd}`"
  424. if name == "Grep":
  425. return f"pattern=`{inp.get('pattern', '')}` path=`{inp.get('path', '')}`"
  426. if name == "Glob":
  427. return f"pattern=`{inp.get('pattern', '')}`"
  428. if name in ("Task", "Agent"):
  429. return f"`{inp.get('description', '')}` [subagent={inp.get('subagent_type', '?')}]"
  430. preview = str(inp)
  431. return f"`{preview[:200] + '...' if len(preview) > 200 else preview}`"
  432. def _trace_banner(trace_path: Path, args, resume_sid, source_path, n_images: int) -> None:
  433. """session 起手 banner. Append 模式 — 历次 run 累积在同一 _trace.md."""
  434. trace_path.parent.mkdir(parents=True, exist_ok=True)
  435. now = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
  436. if resume_sid:
  437. chunk = (
  438. f"\n\n---\n\n## ▶ Resume @ {now}\n\n"
  439. f"恢复 session `{resume_sid[:8]}` (case-{args.case_id})\n"
  440. )
  441. else:
  442. chunk = (
  443. f"\n\n---\n\n## ▶ Fresh @ {now}\n\n"
  444. f"- case: `{args.case_id}`\n"
  445. f"- source: `{source_path}`\n"
  446. f"- spec: `{getattr(args, 'spec_name', 'spec')}`\n"
  447. f"- images: `{n_images}`\n"
  448. f"- model: `{args.model}`\n"
  449. f"- max_turns: `{args.max_turns}`\n"
  450. )
  451. with trace_path.open("a", encoding="utf-8") as f:
  452. f.write(chunk)
  453. def _trace_init(trace_path: Path, sid: str, data: Dict[str, Any]) -> None:
  454. """SDK init → 写 session_id / model 到 trace."""
  455. line = (
  456. f"\n_session={sid[:8]} · model={data.get('model')!r} "
  457. f"· apiKeySource={data.get('apiKeySource')!r}_\n"
  458. )
  459. with trace_path.open("a", encoding="utf-8") as f:
  460. f.write(line)
  461. def _trace_turn(trace_path: Path, turn: int, msg: Any) -> None:
  462. """每个 AssistantMessage → text + tool_use 摘要追加到 trace."""
  463. now = datetime.now().strftime("%H:%M:%S")
  464. parts = [f"\n\n### Turn {turn} · {now}\n"]
  465. texts: List[str] = []
  466. tools: List[tuple] = []
  467. for block in msg.content:
  468. if hasattr(block, "text"):
  469. t = block.text.strip()
  470. if t:
  471. texts.append(t)
  472. elif hasattr(block, "name") and hasattr(block, "input"):
  473. tools.append((block.name, block.input))
  474. # thinking blocks 跳过
  475. if texts:
  476. for t in texts:
  477. for line in t.split("\n"):
  478. parts.append(f"> {line}\n" if line else ">\n")
  479. parts.append("\n")
  480. if tools:
  481. for name, inp in tools:
  482. parts.append(f"- `{name}` — {_fmt_tool_input(name, inp)}\n")
  483. with trace_path.open("a", encoding="utf-8") as f:
  484. f.write("".join(parts))
  485. def _trace_result(trace_path: Path, msg: Any, elapsed: float,
  486. usage: Dict[str, Any], est_cost: float) -> None:
  487. """ResultMessage → final summary 追加到 trace."""
  488. now = datetime.now().strftime("%H:%M:%S")
  489. sdk_cost = msg.total_cost_usd
  490. sdk_cost_str = f"${sdk_cost:.4f}" if sdk_cost is not None else "None (OAuth Max)"
  491. chunk = (
  492. f"\n\n### ◀ Result · {now}\n\n"
  493. f"- subtype: `{msg.subtype}` · is_error: `{msg.is_error}`\n"
  494. f"- num_turns: `{msg.num_turns}` · duration: `{msg.duration_ms}ms` · wall: `{elapsed:.1f}s`\n"
  495. f"- tokens: in={usage.get('input_tokens', 0):,} "
  496. f"out={usage.get('output_tokens', 0):,} "
  497. f"cache_w={usage.get('cache_creation_input_tokens', 0):,} "
  498. f"cache_r={usage.get('cache_read_input_tokens', 0):,}\n"
  499. f"- cost: sdk={sdk_cost_str}, est_if_api=${est_cost:.4f}\n"
  500. )
  501. with trace_path.open("a", encoding="utf-8") as f:
  502. f.write(chunk)
  503. # ──── 主流程: 跑 ClaudeSDKClient ──────────────────────────────────────────────
  504. async def run(args: argparse.Namespace) -> None:
  505. from claude_agent_sdk import (
  506. AssistantMessage,
  507. ClaudeAgentOptions,
  508. ClaudeSDKClient,
  509. ClaudeSDKError,
  510. RateLimitEvent,
  511. ResultMessage,
  512. TextBlock,
  513. AgentDefinition,
  514. )
  515. workdir = Path(args.workdir or DSL_ROOT).resolve()
  516. if not workdir.exists():
  517. sys.exit(f"❌ workdir not found: {workdir}")
  518. source_path = Path(args.source).expanduser().resolve()
  519. if not source_path.exists():
  520. sys.exit(f"❌ source not found: {source_path}")
  521. auto_imgs = _images_from_source(source_path)
  522. extra_imgs = args.extra_image or []
  523. images = auto_imgs + extra_imgs
  524. # 给 Agent 的路径优先用 workdir 相对路径 (它的 cwd 就是 workdir),
  525. # 不在 workdir 内才回落到绝对路径.
  526. try:
  527. source_for_agent = source_path.relative_to(workdir).as_posix()
  528. except ValueError:
  529. source_for_agent = str(source_path)
  530. # --out-dir 是 outputs/ 下的相对名 (--out-dir newdir → outputs/newdir); 绝对路径原样用.
  531. out_dir = _resolve_out_dir(args.out_dir, workdir)
  532. out_dir.mkdir(parents=True, exist_ok=True)
  533. sid_file = out_dir / ".session_id"
  534. # 预创建 sanctioned scratch 目录, 让 Agent 看到就在那, 不用犹豫能不能 mkdir
  535. (out_dir / "_scratch").mkdir(exist_ok=True)
  536. resume_sid = None
  537. if args.resume:
  538. if not sid_file.exists():
  539. sys.exit(f"❌ --resume 但未找到 {sid_file}; 先正常跑一次建立 session")
  540. resume_sid = sid_file.read_text(encoding="utf-8").strip() or None
  541. if not resume_sid:
  542. sys.exit(f"❌ {sid_file} 为空, 无法 resume")
  543. try:
  544. rel_out = out_dir.relative_to(workdir).as_posix()
  545. except ValueError:
  546. rel_out = out_dir.as_posix()
  547. if resume_sid:
  548. # resume 时不重发图和原始指令 (历史里都有); 只给一段 "接着做" 增量.
  549. # 关键: 提醒 Agent 用户可能改过中间产物, 必须 Read 当前磁盘版本.
  550. blocks = [{"type": "text", "text": (
  551. f"上次中断了, 接续做 case-{args.case_id} 的提取流程.\n\n"
  552. f"先 ls {rel_out}/ 看当前已落盘哪些产物;\n"
  553. f"用户可能在中断期间编辑过任何中间产物 (understanding.md / workflow.json) "
  554. f"或改过 spec/ 内任何文件 — 务必 Read 这些**当前磁盘版本**, "
  555. f"不要凭之前记忆继续. 如发现明显人工修订痕迹, 沿用用户改过的版本.\n\n"
  556. f"⚠️【重要禁令与强制要求】:如果流程进行到 Phase 2(归一化与分类匹配),主 Agent **绝对禁止**手动调用 taxonomy-lookup.py 查询或手动决策!你必须强制阅读最新的 `spec/extraction/phase2-normalize.md` 规范,通过运行 `prepare-subtask.py` 生成物理任务切片,然后调用 `Agent`(或 `Task`)工具将任务分别分发给预定义好的 `phase-2a-normalizer` 和 `phase-2b-matcher` 子 Agent 并行协作执行!"
  557. )}]
  558. else:
  559. blocks = _build_initial_blocks(source_for_agent, args.case_id, args.out_dir, images, workdir, args.spec_name)
  560. print(f"[setup] workdir = {workdir}")
  561. print(f"[setup] version = {args.version if args.version else '(default)'}")
  562. print(f"[setup] spec dir = {args.spec_name}/")
  563. print(f"[setup] source = {source_path}")
  564. print(f"[setup] → as agent = {source_for_agent}")
  565. print(f"[setup] case_id = {args.case_id}")
  566. print(f"[setup] output dir = {out_dir}")
  567. print(f"[setup] auto images = {len(auto_imgs)} (from image_url_list)"
  568. if not resume_sid else f"[setup] auto images = -- (resume 跳过)")
  569. print(f"[setup] extra images = {len(extra_imgs)} {extra_imgs if extra_imgs else ''}"
  570. if not resume_sid else f"[setup] extra images = -- (resume 跳过)")
  571. print(f"[setup] model = {args.model}")
  572. print(f"[setup] max_turns = {args.max_turns}")
  573. if resume_sid:
  574. print(f"[setup] resume = {resume_sid[:8]} (跳过初始 prompt + 图)")
  575. else:
  576. print(f"[setup] resume = no (fresh start)")
  577. print(flush=True)
  578. # 起手写 trace banner. 之后每 turn / result 都会 append 到这个 _trace.md.
  579. trace_path = out_dir / "_trace.md"
  580. _trace_banner(trace_path, args, resume_sid, source_path, len(images))
  581. print(f"[trace] -> {trace_path}", flush=True)
  582. stderr_buf: List[str] = []
  583. def _capture_stderr(line: str) -> None:
  584. if line:
  585. stderr_buf.append(line)
  586. print(f"[stderr] {line}", flush=True)
  587. agents = {
  588. "phase-2a-normalizer": AgentDefinition(
  589. description="Expert in Phase 2A (effect/action/type normalization). Use this agent to read task_2a.json, normalize action/effect/type against spec trees, manage procedure-level type_registry, and generate outputs/case-N/_scratch/patch_2a.json.",
  590. prompt="""You are a dedicated Phase 2A normalization sub-agent.
  591. Your goal is to process the inputs and outputs of a workflow for effect, action, and type normalization:
  592. 1. Read the outputs/case-N/_scratch/task_2a.json file to get the steps and IO variables.
  593. 2. Normalize every step's `effect` and `action` against the taxonomy specs in `spec/taxonomy/effect.json` and `spec/taxonomy/action.json`.
  594. 3. Normalize every IO variable's `type` against `spec/taxonomy/type.json`. If a custom type is used, register it in the procedure's `type_registry` with extends and description.
  595. 4. Output a standard `patch_2a.json` JSON file under outputs/case-N/_scratch/.
  596. IMPORTANT: The format of `patch_2a.json` MUST be a flat JSON array of objects, where each object has a "path" and a "value" key (exactly conforming to the `wf-patch.py` tool contract).
  597. Example format:
  598. [
  599. {"path": "p1.s1.effect", "value": "预处理"},
  600. {"path": "p1.s1.action", "value": "提取/化学提取/反推"},
  601. {"path": "p1.s1.inputs[0].type", "value": "工具选型标准"},
  602. {"path": "p1.type_registry.工具配置.extends", "value": "评语"},
  603. {"path": "p1.type_registry.工具配置.desc", "value": "工具选型依据..."}
  604. ]
  605. Do not output raw dictionary structure or any other nesting. Do not touch or modify other files.""",
  606. tools=["Read", "Grep", "Glob", "Write", "Edit", "Bash"],
  607. model="sonnet",
  608. ),
  609. "phase-2b-matcher": AgentDefinition(
  610. description="Expert in Phase 2B (substance/form taxonomy matching). Use this agent to read task_2b.json, invoke taxonomy-lookup.py to query substance and form paths for each variable, and generate outputs/case-N/_scratch/patch_2b.json.",
  611. prompt="""You are a dedicated Phase 2B taxonomy matching sub-agent.
  612. Your goal is to query and match the substance and form for each workflow IO variable:
  613. 1. Read the outputs/case-N/_scratch/task_2b.json file to get the variables to match.
  614. 2. For each variable, run `python spec/tools/taxonomy-lookup.py --dim 实质 --match "..."` and `--dim 形式 --match "..."` to search for the most precise taxonomy paths matching the variable's value, name, and related_images.
  615. 3. Output a standard `patch_2b.json` JSON file under outputs/case-N/_scratch/.
  616. IMPORTANT: The format of `patch_2b.json` MUST be a flat JSON array of objects, where each object has a "path" and a "value" key (exactly conforming to the `wf-patch.py` tool contract). Substance and form values can be single string paths, multiple paths separated by ' + ', or JSON arrays of strings for multi-path matching.
  617. Example format:
  618. [
  619. {"path": "p1.s1.inputs[0].substance", "value": "/理念/知识/思想/概念范畴/性质属性/功能效用"},
  620. {"path": "p1.s1.inputs[0].form", "value": "/呈现/视觉/视觉制作/构图编排/版面设计/版面结构"},
  621. {"path": "p1.s2.inputs[0].substance", "value": ["/理念/知识/商业/前沿技术/AI智能/AI应用", "/理念/知识/思想"]}
  622. ]
  623. Do not output raw dictionary structure or any other nesting. Do not touch or modify other files.""",
  624. tools=["Read", "Grep", "Glob", "Write", "Edit", "Bash"],
  625. model="sonnet",
  626. )
  627. }
  628. options = ClaudeAgentOptions(
  629. model=args.model,
  630. cwd=str(workdir),
  631. resume=resume_sid,
  632. # ⚠️ 支持旧版 "Task" 与新版 "Agent" 标识符以确保在所有 Claude Code / SDK 版本中均可激活子 agent
  633. allowed_tools=["Read", "Write", "Edit", "Bash", "Glob", "Grep", "Task", "Agent"],
  634. agents=agents,
  635. max_turns=args.max_turns,
  636. permission_mode="bypassPermissions",
  637. setting_sources=[],
  638. env={
  639. "ANTHROPIC_API_KEY": "",
  640. "ANTHROPIC_BASE_URL": "",
  641. "ANTHROPIC_AUTH_TOKEN": "",
  642. # 多图帖 (几十张图) 的提取响应容易顶破默认 32000 output token 上限 →
  643. # 调到 sonnet 4.x 的出参天花板 64000。外部已设同名环境变量则以这里为准。
  644. "CLAUDE_CODE_MAX_OUTPUT_TOKENS": "64000",
  645. },
  646. stderr=_capture_stderr,
  647. )
  648. turn = 0
  649. usage: Dict[str, Any] = {}
  650. t0 = time.time()
  651. try:
  652. async with ClaudeSDKClient(options=options) as client:
  653. async def _input_stream():
  654. yield {
  655. "type": "user",
  656. "message": {"role": "user", "content": blocks},
  657. "parent_tool_use_id": None,
  658. "session_id": "default",
  659. }
  660. await client.query(_input_stream())
  661. async for msg in client.receive_response():
  662. if isinstance(msg, AssistantMessage):
  663. turn += 1
  664. for block in msg.content:
  665. if isinstance(block, TextBlock):
  666. print(f"\n[turn {turn} · text]\n{block.text}\n", flush=True)
  667. elif hasattr(block, "name") and hasattr(block, "input"):
  668. preview = str(block.input)
  669. if len(preview) > 200:
  670. preview = preview[:200] + "..."
  671. print(f"[turn {turn} · tool_use] {block.name}({preview})", flush=True)
  672. elif hasattr(block, "thinking"):
  673. pass # thinking 太长, 跳过
  674. else:
  675. print(f"[turn {turn} · {type(block).__name__}] {block!r}", flush=True)
  676. _trace_turn(trace_path, turn, msg)
  677. elif isinstance(msg, ResultMessage):
  678. if msg.usage:
  679. usage = dict(msg.usage)
  680. elapsed = time.time() - t0
  681. sdk_cost = msg.total_cost_usd
  682. sdk_cost_str = (
  683. f"${sdk_cost:.4f}" if sdk_cost is not None
  684. else "None (OAuth Max — not metered as $)"
  685. )
  686. est_cost = _estimate_cost_usd(usage)
  687. print(
  688. f"\n[result] subtype={msg.subtype} is_error={msg.is_error} "
  689. f"turns={msg.num_turns} duration={msg.duration_ms}ms wall={elapsed:.1f}s\n"
  690. f" tokens: in={usage.get('input_tokens', 0):,} "
  691. f"out={usage.get('output_tokens', 0):,} "
  692. f"cache_w={usage.get('cache_creation_input_tokens', 0):,} "
  693. f"cache_r={usage.get('cache_read_input_tokens', 0):,}\n"
  694. f" cost : sdk={sdk_cost_str}, est_if_api=${est_cost:.4f} "
  695. f"(Sonnet 4.6 价目)",
  696. flush=True,
  697. )
  698. _trace_result(trace_path, msg, elapsed, usage, est_cost)
  699. if msg.is_error:
  700. print(f"❌ result is_error=True", file=sys.stderr)
  701. sys.exit(2)
  702. elif isinstance(msg, RateLimitEvent):
  703. info = getattr(msg, "rate_limit_info", None)
  704. info_status = getattr(info, "status", None) if info else None
  705. if info_status == "allowed_warning":
  706. print(f"⚠️ [rate_limit_warning] Max 订阅 5h 窗口余额较少, `claude /status` 看余量", file=sys.stderr)
  707. elif info_status and info_status not in ("allowed", "allowed_warning"):
  708. print(f"❌ [rate_limit_blocked] {info_status!r} — Max 订阅 5h 窗口已耗尽, `claude /status` 看余量", file=sys.stderr)
  709. sys.exit(3)
  710. else:
  711. name = type(msg).__name__
  712. if name == "SystemMessage":
  713. data = getattr(msg, "data", {}) or {}
  714. subtype = getattr(msg, "subtype", "?")
  715. if subtype == "init":
  716. sid = data.get('session_id', '') or ''
  717. if sid:
  718. # 早写: 一拿到 sid 就落盘, Ctrl-C 再早也能 resume
  719. sid_file.write_text(sid, encoding="utf-8")
  720. _trace_init(trace_path, sid, data)
  721. print(
  722. f"[init] model={data.get('model')!r} "
  723. f"apiKeySource={data.get('apiKeySource')!r} "
  724. f"session={sid[:8]}",
  725. flush=True,
  726. )
  727. except ClaudeSDKError as e:
  728. tail = "\n".join(stderr_buf[-20:])
  729. print(
  730. f"❌ SDK error: {type(e).__name__}: {e}\n"
  731. f"--- CLI stderr (last 20 lines) ---\n{tail}",
  732. file=sys.stderr,
  733. )
  734. sys.exit(1)
  735. # ──── CLI ─────────────────────────────────────────────────────────────────────
  736. def _parse_args() -> argparse.Namespace:
  737. p = argparse.ArgumentParser(
  738. description="跑 procedure-dsl 提取流程 (Claude Agent SDK, OAuth Max)",
  739. formatter_class=argparse.RawDescriptionHelpFormatter,
  740. epilog=__doc__,
  741. )
  742. p.add_argument("source", help="原始 post 文件 (input/case-N-raw.json; 也接受 .md/.txt)")
  743. p.add_argument("--extra-image", action="append", default=[],
  744. help="额外配图: 本地路径 or http(s) URL. 加在 image_url_list "
  745. "自动抽出的图后面. 可多次传.")
  746. p.add_argument("--out-dir", type=str, required=True,
  747. help="输出工作目录名, 落在 outputs/ 下. e.g. --out-dir case-5-newflow → 实际 outputs/case-5-newflow/ . "
  748. "Agent 所有产物 (understanding.md / workflow.json / case-X.html / .md) 都落在这. "
  749. "case_id 自动从 basename 推 (e.g. --out-dir case-5-newflow → case_id='5-newflow'). "
  750. "已带 outputs/ 前缀会被容忍 (不重复嵌套); 传绝对路径则原样用.")
  751. p.add_argument("--model", default="claude-sonnet-4-6",
  752. help="Claude 模型名 (default: claude-sonnet-4-6)")
  753. p.add_argument("--workdir", default=None,
  754. help=f"SDK cwd. 默认 {DSL_ROOT} (即本脚本所在 procedure-dsl/)")
  755. p.add_argument("--version", default=None, metavar="SUFFIX",
  756. help="spec 版本: 跑 workdir 下的 spec-<SUFFIX>/ 目录. "
  757. "e.g. --version backup → spec-backup/, --version test → spec-test/. "
  758. "不传则默认 spec/.")
  759. p.add_argument("--max-turns", type=int, default=300,
  760. help="Agent 最大回合数 (default: 300, 跑完整三阶段 + lint 留足余量)")
  761. p.add_argument("--resume", action="store_true",
  762. help="恢复中断的 session: 从 outputs/case-N/.session_id 读 sid, "
  763. "Agent 拿上次完整历史接着跑. 中断期间可手改任何中间产物, "
  764. "Agent 会重新 Read 磁盘版本而不是凭记忆.")
  765. p.add_argument("--max-retries", type=int, default=3,
  766. help="Stream idle / 临时错误自动 --resume 重试次数 (default: 3). "
  767. "退避 10s/20s/40s, 不重试 RateLimit (exit 3) 和 Ctrl-C.")
  768. return p.parse_args()
  769. def main() -> None:
  770. # Windows 默认控制台编码 (cp1252/gbk) 撑不住 spec.md / Agent 输出里的中文 +
  771. # emoji, 第一次打印就 UnicodeEncodeError. 在 main 入口统一切到 UTF-8.
  772. for stream in (sys.stdout, sys.stderr):
  773. if hasattr(stream, "reconfigure"):
  774. stream.reconfigure(encoding="utf-8", errors="replace")
  775. logging.basicConfig(level=logging.WARNING)
  776. args = _parse_args()
  777. # 注入派生属性: case_id 自动从 --out-dir basename 算 (去掉 case- 前缀).
  778. # 下游 prompt / banner / type_suggestions record 都用这个 args.case_id.
  779. args.case_id = _derive_case_id(args.out_dir)
  780. # spec 目录名: --version backup → "spec-backup", 不传 → "spec".
  781. args.spec_name = "spec" if not args.version else f"spec-{args.version}"
  782. # ──── 自动重试循环 ──────────────────────────────────────────────────────
  783. # exit code 约定:
  784. # 0 = 成功
  785. # 1 = ClaudeSDKError (含 setup 错, 一般非 transient, 不重试)
  786. # 2 = ResultMessage is_error=True (含 stream idle timeout, **可重试**)
  787. # 3 = RateLimitEvent (配额耗尽, 不重试 — 等 5h window 滑动)
  788. # 130 = KeyboardInterrupt (用户主动中断, 不重试)
  789. # 重试退避: 10s → 20s → 40s, 都在 prompt cache TTL (5min) 内
  790. max_retries = args.max_retries
  791. workdir_check = Path(args.workdir or DSL_ROOT).resolve()
  792. out_dir_check = Path(args.out_dir)
  793. sid_file_check = (out_dir_check if out_dir_check.is_absolute() else (workdir_check / out_dir_check)).resolve() / ".session_id"
  794. # --version 选定的 spec 目录必须存在. 在进重试循环前 fail-fast:
  795. # 注意 run() 里 sys.exit(字符串) 的消息会被下面 except SystemExit 吞掉,
  796. # 所以这里在 main 早校验, 直接 print 到 stderr 再 exit(1).
  797. spec_dir_check = workdir_check / args.spec_name
  798. if not spec_dir_check.is_dir():
  799. avail = sorted(p.name for p in workdir_check.glob("spec*") if p.is_dir())
  800. print(
  801. f"❌ spec dir not found: {spec_dir_check}\n"
  802. f" (--version {args.version!r} → 期望目录 '{args.spec_name}/')\n"
  803. f" 可用版本: {avail}",
  804. file=sys.stderr,
  805. )
  806. sys.exit(1)
  807. for attempt in range(max_retries + 1):
  808. try:
  809. asyncio.run(run(args))
  810. return # 成功完成
  811. except KeyboardInterrupt:
  812. # session_id 在 init 时就落盘了, 已写到磁盘的产物全部保留.
  813. # 加 --resume 重启即可接着跑.
  814. print(
  815. f"\n⚠️ 中断 (Ctrl-C). {args.out_dir}/ 产物已保留.\n"
  816. f" 恢复: python run_procedure_dsl.py {args.source} "
  817. f"--out-dir {args.out_dir} --resume",
  818. file=sys.stderr,
  819. )
  820. sys.exit(130)
  821. except SystemExit as e:
  822. code = e.code if isinstance(e.code, int) else (1 if e.code else 0)
  823. # exit 2 = is_error (stream timeout 之类 transient), 可重试
  824. if code == 2 and attempt < max_retries:
  825. wait_s = 10 * (2 ** attempt) # 10s / 20s / 40s
  826. print(
  827. f"\n⚠️ attempt {attempt+1}/{max_retries+1} exit={code} "
  828. f"(可能 stream idle timeout). 等 {wait_s}s 后 --resume 重试...",
  829. file=sys.stderr,
  830. )
  831. time.sleep(wait_s)
  832. # 强制 --resume (前提是 session_id 已落盘 — init 阶段就写了, 通常都在)
  833. if sid_file_check.exists():
  834. args.resume = True
  835. else:
  836. print(f"⚠️ {sid_file_check} 不存在, fresh restart 而非 resume",
  837. file=sys.stderr)
  838. continue
  839. # 不重试: exit 0 (成功) / exit 1 (SDK 错) / exit 3 (RateLimit) / 用尽 retries
  840. sys.exit(code)
  841. # 用尽 max_retries 后还到这里 (按 for-else 语义实际不会, 但保险)
  842. print(f"\n❌ 用尽 {max_retries} 次重试.", file=sys.stderr)
  843. sys.exit(2)
  844. if __name__ == "__main__":
  845. main()