wf-patch.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735
  1. #!/usr/bin/env python3
  2. # -*- coding: utf-8 -*-
  3. """
  4. wf-patch.py — workflow.json 的安全批量字段设置器.
  5. 为什么有这个工具:
  6. workflow.json 由各 phase **直接 Write 骨架 + 逐字段填充** 演化. 但「给几十个 IO
  7. 逐个加 anchor」「给每个 step 填 effect/action/type」这类批量字段赋值, 用 Edit
  8. 一处一处改太碎, 手写整段 JSON 又极易踩转义 / 控制字符坑 (把文件搞坏).
  9. wf-patch 卡在中间: **你只负责语义决策 (path=value), 工具负责安全落盘 + 合法性校验**.
  10. - 安全 IO: 工具自己 json.load -> 改 -> json.dump(ensure_ascii=False), 你永远不手写 JSON.
  11. - 写入即校验 (fail-fast): 每条赋值立刻对照字典树 / type_registry / anchor 格式校验,
  12. **任何一条非法 -> 报具体哪条错, 整批不写** (不产出悄悄错的文件). lint 仍做全局兜底.
  13. 用法:
  14. # 单条 / 多条 --set (path=value, 只在第一个 '=' 处切, value 可含 '=' 和空格)
  15. python spec/tools/wf-patch.py --workflow outputs/case-N/workflow.json \
  16. --set 'p1.s1.inputs[0].anchor=← s0.主角图' \
  17. --set 'p1.s2.effect=主体生成' \
  18. --set 'p1.s2.action=生成/图像生成/文生图'
  19. # 或一次性喂一份 patch 清单 (适合 1.3 加 anchor / 2A 填字段这种几十处批量)
  20. python spec/tools/wf-patch.py --workflow outputs/case-N/workflow.json --patch _scratch/anchors.json
  21. # anchors.json = [{"path": "p1.s1.inputs[0].anchor", "value": "← s0.x"}, ...]
  22. # 只校验不写
  23. python spec/tools/wf-patch.py --workflow ... --set '...' --dry-run
  24. # 删字段 (取代手 Edit 删; 字段不存在则幂等跳过)
  25. python spec/tools/wf-patch.py --workflow ... --unset 'p1.declarations.inputs[0].inferred'
  26. # 只校验不写
  27. python spec/tools/wf-patch.py --workflow ... --set '...' --dry-run
  28. 路径语法 (proc / step 按 id 寻址, 不是下标; 只有真列表才用 [i]; 嵌套步 id 带点 s2.1 也支持):
  29. p1.s2.effect step 标量字段 (effect/via/action/feature/control/kind/intent/group)
  30. p1.s1.inputs[0].anchor IO 字段 (anchor/type/substance/form/name/value)
  31. p1.s2.focus step 的 focus 数组 (逗号分隔: focus=via,action,out-type-0)
  32. p1.purpose procedure 头部字段 (name/purpose/category/platform/author)
  33. p1.declarations.inputs[0].desc declarations 内任意字段 (通用下钻)
  34. source.url case-level 原帖信息 (platform/author/date/url/title/excerpt)
  35. p1.type_registry.场景图.extends 注册 case-specific 类型 (会自动建 type_registry 段)
  36. value 特殊取值:
  37. __null__ -> JSON null (用于 substance/form/url 可空)
  38. 仍用 Write / Edit 的只剩 (尽量别碰生 JSON):
  39. - workflow.json 骨架的首次创建 (Phase 1.2 从 template Write)
  40. - instruction (列表套列表, 手动 Edit; 透传 directive 用 --resolve-passthrough)
  41. 改字段/删字段/改 source 现在都走本工具, 不要再 Read→Edit 改 workflow.json (会反复重读、烧 token).
  42. 自动修引号 (load 时兜底):
  43. workflow.json 由模型直 Write, 偶尔把中文引号写成未转义的 ASCII " → JSON 崩.
  44. 本工具 load 失败时会自动把这类误引号修成「」再 parse; 修成功则继续 patch, 并把
  45. 修复随本次写回落盘 (--dry-run 不写). 修不回才按 exit 2 报错. 不用再手写 _scratch 修复脚本.
  46. 退出码:
  47. 0 全部校验通过并写入 (--dry-run 时为校验通过)
  48. 1 有校验失败 (整批未写) / 路径解析失败
  49. 2 CLI 参数错误 / 文件不存在 / JSON 损坏 (且自动修引号也救不回)
  50. """
  51. from __future__ import annotations
  52. import argparse
  53. import json
  54. import re
  55. import subprocess
  56. import sys
  57. from pathlib import Path
  58. # spec/tools/wf-patch.py -> procedure-dsl/
  59. DSL_ROOT = Path(__file__).resolve().parent.parent.parent
  60. TAX_DIR = DSL_ROOT / 'spec' / 'taxonomy'
  61. LOOKUP = DSL_ROOT / 'spec' / 'tools' / 'taxonomy-lookup.py'
  62. # Windows 控制台 UTF-8
  63. for _s in (sys.stdout, sys.stderr):
  64. if hasattr(_s, 'reconfigure'):
  65. try:
  66. _s.reconfigure(encoding='utf-8', errors='replace')
  67. except Exception:
  68. pass
  69. # 受控词 (与 syntax.md §3 / action.json $control 对齐)
  70. FEATURE_VOCAB = {'随机', '幂等', '人工', '本地', '写外部', '读外部', '-'}
  71. KIND_VOCAB = {'step', 'block', 'nested', 'atom'}
  72. # value/directive 里的「引用占位」文案 — 这些是 anchor 的活, value 应填数据本身.
  73. # 命中即视为「未真正回填」(--resolve-passthrough 会尝试填, lint 会报警).
  74. META_REF = re.compile(r'[((]?\s*同\s*s[\d]|见\s*s[\d]|←\s*s[\d]|同上')
  75. # ===========================================================================
  76. # 自动修引号: 模型直 Write workflow.json 时常把中文引号写成 ASCII " (未转义) → JSON 崩.
  77. # 仅在 json.loads 失败时兜底调用 (合法文件零开销). 判别: 串内一个 ASCII " 之后第一个
  78. # 非空白字符 ∈ {,:}]} 或 EOF → 真·字符串定界符 (保留); 否则是误写的内容引号 → 换直角
  79. # 引号「」(串内交替 开「/闭」). 逻辑独立内置于本文件 (不 import 任何外部模块);
  80. # scratch/repair_workflow_quotes.py 是同款独立实现, 二者无依赖关系. 改完必须能 parse 才用.
  81. # ===========================================================================
  82. _STRUCT_AFTER = set(',:}]')
  83. def repair_ascii_quotes(raw: str):
  84. """→ (修后文本, 改动的内容引号数). 纯走字符, 不依赖能否 parse."""
  85. out, i, n = [], 0, len(raw)
  86. in_str = esc = False
  87. open_q = True
  88. changes = 0
  89. while i < n:
  90. c = raw[i]
  91. if not in_str:
  92. out.append(c)
  93. if c == '"':
  94. in_str, esc, open_q = True, False, True
  95. i += 1
  96. continue
  97. if esc:
  98. out.append(c); esc = False; i += 1; continue
  99. if c == '\\':
  100. out.append(c); esc = True; i += 1; continue
  101. if c == '"':
  102. j = i + 1
  103. while j < n and raw[j] in ' \t\r\n':
  104. j += 1
  105. nxt = raw[j] if j < n else ''
  106. if nxt == '' or nxt in _STRUCT_AFTER:
  107. out.append(c); in_str = False # 真·结束符
  108. else:
  109. out.append('「' if open_q else '」') # 误写的内容引号
  110. open_q = not open_q
  111. changes += 1
  112. i += 1
  113. continue
  114. out.append(c); i += 1
  115. return ''.join(out), changes
  116. class PathError(Exception):
  117. """路径无法解析到 workflow.json 里的目标位置."""
  118. # ===========================================================================
  119. # 字典树加载: leaf 集 + {leaf: 全路径} + 全叶路径集 (与 lint 同款叶子派生)
  120. # ===========================================================================
  121. def _load_tree(name: str):
  122. """读 spec/taxonomy/{name}.json. 返回 (leaves:set, leaf2path:dict, control:list)."""
  123. f = TAX_DIR / f'{name}.json'
  124. if not f.exists():
  125. return set(), {}, []
  126. d = json.loads(f.read_text(encoding='utf-8'))
  127. leaf2path: dict[str, str] = {}
  128. def walk(node: dict, prefix: list[str]):
  129. nm = node.get('分类名称')
  130. if not nm:
  131. return
  132. p = prefix + [nm]
  133. kids = node.get('子分类') or []
  134. if not kids: # 无子分类 = 叶子
  135. leaf2path[nm] = '/'.join(p)
  136. for c in kids:
  137. walk(c, p)
  138. for top in d.get('最终分类树') or []:
  139. walk(top, [])
  140. leaves = set(d.get('$leaves') or leaf2path.keys())
  141. return leaves, leaf2path, (d.get('$control') or [])
  142. EFFECT_LEAVES, EFFECT_PATHS, _ = _load_tree('effect')
  143. ACTION_LEAVES, ACTION_PATHS, ACTION_CONTROL = _load_tree('action')
  144. TYPE_LEAVES, TYPE_PATHS, _ = _load_tree('type')
  145. CONTROL_VOCAB = set(ACTION_CONTROL) | {'-'}
  146. # substance/form 校验结果缓存 (subprocess 较慢)
  147. _taxo_cache: dict[tuple[str, str], bool] = {}
  148. def _taxo_valid(dim: str, path: str) -> bool:
  149. """调 taxonomy-lookup.py --validate, exit 0 = 合法. 结果缓存."""
  150. key = (dim, path)
  151. if key in _taxo_cache:
  152. return _taxo_cache[key]
  153. try:
  154. import os
  155. env = os.environ.copy()
  156. env['PYTHONIOENCODING'] = 'utf-8'
  157. r = subprocess.run(
  158. [sys.executable, str(LOOKUP), '--dim', dim, '--validate', path],
  159. capture_output=True, text=True, encoding='utf-8', errors='replace', env=env,
  160. )
  161. ok = (r.returncode == 0)
  162. except Exception:
  163. ok = False # 校验器跑不起来时, 保守判非法
  164. _taxo_cache[key] = ok
  165. return ok
  166. def _closest(name: str, leaves) -> str:
  167. """给个最接近的叶子名做提示 (子串/前缀朴素匹配, 仅供报错文案)."""
  168. cands = [lf for lf in leaves if name and (name in lf or lf in name)]
  169. return (' 最接近: ' + '/'.join(cands[:3])) if cands else ''
  170. # ===========================================================================
  171. # 字段校验 -> (ok, normalized_value, err_msg)
  172. # ===========================================================================
  173. def validate_field(field: str, value, proc: dict, pending_types: set[str] = None):
  174. # null 哨兵 (substance/form/url 可空)
  175. if value == '__null__':
  176. if field in ('substance', 'form', 'url'):
  177. return True, None, ''
  178. return False, value, f'__null__ 只对 substance/form/url 有意义, {field} 不可为 null'
  179. # focus 是数组: 逗号分隔 → list ('via,action,out-type-0'); 空串 → []
  180. if field == 'focus':
  181. items = [t.strip() for t in str(value).split(',') if t.strip()]
  182. return True, items, ''
  183. if field == 'effect':
  184. if value in EFFECT_LEAVES:
  185. return True, value, ''
  186. # 给了全路径 -> 归一到叶名 (schema 存叶名)
  187. for leaf, path in EFFECT_PATHS.items():
  188. if value == path:
  189. return True, leaf, ''
  190. return False, value, f'effect={value!r} 不是 effect.json 叶子(存叶名).{_closest(value, EFFECT_LEAVES)}'
  191. if field == 'action':
  192. # action 存全路径; 给叶名自动展开, 给全叶路径原样接受
  193. if value in ACTION_PATHS: # 是叶名
  194. return True, ACTION_PATHS[value], ''
  195. if value in ACTION_PATHS.values(): # 是合法叶路径
  196. return True, value, ''
  197. return False, value, (f'action={value!r} 不是合法动作叶子/叶路径 '
  198. f'(形如 生成/图像生成/文生图).{_closest(value.split("/")[-1], ACTION_LEAVES)}')
  199. if field == 'type':
  200. if value in TYPE_LEAVES:
  201. return True, value, ''
  202. reg = proc.get('type_registry') or {}
  203. if value in reg:
  204. return True, value, ''
  205. if pending_types and value in pending_types:
  206. return True, value, ''
  207. return False, value, (f'type={value!r} 不是 type.json 叶子, 也没在本工序 type_registry 注册. '
  208. f'先 --set {proc.get("id")}.type_registry.{value}.extends=<叶子> 再用.{_closest(value, TYPE_LEAVES)}')
  209. if field == 'extends': # type_registry entry 的 extends 必须桥到 stdlib 叶子
  210. if value in TYPE_LEAVES:
  211. return True, value, ''
  212. return False, value, f'type_registry extends={value!r} 必须是 type.json 叶子.{_closest(value, TYPE_LEAVES)}'
  213. if field == 'substance':
  214. if isinstance(value, str):
  215. if '+' in value:
  216. paths = [p.strip() for p in value.split('+') if p.strip()]
  217. else:
  218. paths = [value.strip()]
  219. elif isinstance(value, list):
  220. paths = [str(p).strip() for p in value if str(p).strip()]
  221. else:
  222. return False, value, 'substance 必须是字符串或数组'
  223. invalid_paths = []
  224. for p in paths:
  225. if not _taxo_valid('实质', p):
  226. invalid_paths.append(p)
  227. if invalid_paths:
  228. return False, value, f'以下 substance 路径不在实质词表: {invalid_paths}'
  229. norm_val = paths if (isinstance(value, list) or (isinstance(value, str) and '+' in value)) else paths[0]
  230. return True, norm_val, ''
  231. if field == 'form':
  232. if isinstance(value, str):
  233. if '+' in value:
  234. paths = [p.strip() for p in value.split('+') if p.strip()]
  235. else:
  236. paths = [value.strip()]
  237. elif isinstance(value, list):
  238. paths = [str(p).strip() for p in value if str(p).strip()]
  239. else:
  240. return False, value, 'form 必须是字符串或数组'
  241. invalid_paths = []
  242. for p in paths:
  243. if not _taxo_valid('形式', p):
  244. invalid_paths.append(p)
  245. if invalid_paths:
  246. return False, value, f'以下 form 路径不在形式词表: {invalid_paths}'
  247. norm_val = paths if (isinstance(value, list) or (isinstance(value, str) and '+' in value)) else paths[0]
  248. return True, norm_val, ''
  249. if field == 'anchor':
  250. if re.match(r'^\s*(←|→)', str(value)):
  251. return True, value, ''
  252. return False, value, f'anchor={value!r} 须以 ← (输入引用) 或 → (输出去向) 开头'
  253. if field == 'feature':
  254. if value in FEATURE_VOCAB:
  255. return True, value, ''
  256. return False, value, f'feature={value!r} 不在受控词 {sorted(FEATURE_VOCAB)}'
  257. if field == 'control':
  258. if value in CONTROL_VOCAB:
  259. return True, value, ''
  260. return False, value, f'control={value!r} 不在受控词 {sorted(CONTROL_VOCAB)}'
  261. if field == 'kind':
  262. if value in KIND_VOCAB:
  263. return True, value, ''
  264. return False, value, f'kind={value!r} 不在 {sorted(KIND_VOCAB)}'
  265. # 自由文本字段 (name/value/intent/via/purpose/category/platform/author/desc/group...)
  266. return True, value, ''
  267. # ===========================================================================
  268. # 路径解析 -> (parent_container, key, proc, field_name)
  269. # ===========================================================================
  270. _SEG = re.compile(r'^([^\[]+)(?:\[(\d+)\])?$')
  271. def _split_seg(seg: str):
  272. m = _SEG.match(seg)
  273. if not m:
  274. raise PathError(f'非法路径段 {seg!r}')
  275. return m.group(1), (int(m.group(2)) if m.group(2) is not None else None)
  276. def _descend(container, segs):
  277. """沿 segs 走进 container, 返回 (parent, last_key). 中间节点必须已存在.
  278. segs 每段可带 [i] 下标. last_key 是 dict 键 (str) 或列表下标 (int);
  279. 设置即 parent[last_key]=value, 删除即 del parent[last_key].
  280. 用于 source.* / declarations.* 等通用路径 (proc/step 的 id 寻址不走这里).
  281. """
  282. cur = container
  283. for i, seg in enumerate(segs):
  284. name, idx = _split_seg(seg)
  285. last = (i == len(segs) - 1)
  286. if last and idx is None:
  287. if not isinstance(cur, dict):
  288. raise PathError(f'{name!r} 的父级不是对象')
  289. return cur, name
  290. if not isinstance(cur, dict) or name not in cur:
  291. raise PathError(f'路径段 {name!r} 不存在, 无法下钻')
  292. nxt = cur[name]
  293. if idx is not None:
  294. if not isinstance(nxt, list) or idx >= len(nxt):
  295. raise PathError(f'{name}[{idx}] 越界或非列表')
  296. if last:
  297. return nxt, idx
  298. cur = nxt[idx]
  299. else:
  300. cur = nxt
  301. raise PathError('路径为空')
  302. def locate(data: dict, path: str):
  303. """把 path 解析到目标. 返回 (parent, key, proc, field_name).
  304. 设置即 parent[key] = value. proc 给校验提供 type_registry 上下文.
  305. proc / step 按 id 寻址 (不是下标); inputs/outputs 用 [i] 下标.
  306. step id 可能带点 (嵌套步 s2.1) — 用最长前缀匹配消歧 (s2.1 优先于 s2).
  307. """
  308. if '.' not in path:
  309. raise PathError(f'路径太短 {path!r}, 至少 <proc>.<字段> 或 source.<字段>')
  310. proc_id, remainder = path.split('.', 1)
  311. # --- source.* 分支 (case-level 原帖信息, 无 proc 上下文) ---
  312. if proc_id == 'source':
  313. src = data.setdefault('source', {})
  314. parent, key = _descend(src, remainder.split('.'))
  315. return parent, key, None, (key if isinstance(key, str) else '')
  316. proc = next((p for p in (data.get('procedures') or []) if p.get('id') == proc_id), None)
  317. if proc is None:
  318. ids = [p.get('id') for p in (data.get('procedures') or [])]
  319. raise PathError(f'找不到 procedure id={proc_id!r} (现有: {ids})')
  320. # --- type_registry 分支 (允许自动建段/条目) ---
  321. if remainder == 'type_registry' or remainder.startswith('type_registry.'):
  322. parts = remainder.split('.')
  323. if len(parts) == 3:
  324. reg = proc.setdefault('type_registry', {})
  325. entry = reg.setdefault(parts[1], {})
  326. return entry, parts[2], proc, parts[2]
  327. raise PathError('type_registry 路径形如 p1.type_registry.<类型名>.<extends|desc>')
  328. # --- step 分支 (最长前缀匹配 step id, 兼容带点的嵌套步 id) ---
  329. matched = None
  330. for s in (proc.get('steps') or []):
  331. sid = s.get('id')
  332. if not sid:
  333. continue
  334. if remainder == sid:
  335. raise PathError(f'step 路径要带字段, 形如 {proc_id}.{sid}.effect')
  336. if remainder.startswith(sid + '.') and (matched is None or len(sid) > len(matched['id'])):
  337. matched = s
  338. if matched is not None:
  339. sid = matched['id']
  340. field_part = remainder[len(sid) + 1:] # 'sid.' 之后
  341. fsegs = field_part.split('.')
  342. name2, idx2 = _split_seg(fsegs[0])
  343. if name2 in ('inputs', 'outputs'):
  344. if idx2 is None:
  345. raise PathError(f'{name2} 要带下标, 形如 {name2}[0]')
  346. lst = matched.get(name2)
  347. if not isinstance(lst, list) or idx2 >= len(lst):
  348. raise PathError(f'{proc_id}.{sid}.{name2}[{idx2}] 越界 (该 step 有 {len(lst or [])} 个 {name2})')
  349. if len(fsegs) != 2:
  350. raise PathError(f'IO 路径形如 {proc_id}.{sid}.{name2}[{idx2}].anchor')
  351. return lst[idx2], fsegs[1], proc, fsegs[1]
  352. else:
  353. if len(fsegs) != 1:
  354. raise PathError(f'step 标量字段形如 {proc_id}.{sid}.{name2}')
  355. return matched, name2, proc, name2
  356. # --- proc 内其余路径: 头部字段 / declarations.* / return_row.* 等, 走通用下钻 ---
  357. parent, key = _descend(proc, remainder.split('.'))
  358. return parent, key, proc, (key if isinstance(key, str) else '')
  359. # ===========================================================================
  360. # 透传回填: anchor 为纯 ← sN.varname 的 IO, 从源 output 抄 value (逐字回填)
  361. # ===========================================================================
  362. def _is_fillable(value) -> bool:
  363. """该 value 算「还没真正回填」吗 — 空 / 占位符 / 引用文案."""
  364. if value in (None, '', '-'):
  365. return True
  366. return bool(META_REF.search(str(value)))
  367. def _parse_passthrough(anchor, step_ids: list[str]):
  368. """把 anchor 解析成纯透传源 (src_step, src_name); 非干净透传返回 None.
  369. 只认 `← sN.varname` 形式 (sN 按已知 step id 最长前缀匹配, 兼容 s2.1);
  370. `← 工序输入` / `← s6 (链, 上一张)` / 带容器索引等不算 (无法确定唯一源 value).
  371. varname 末尾的 [i] / (...) 注释会被剥掉再查.
  372. """
  373. m = re.match(r'^\s*←\s*(.+)$', str(anchor or ''))
  374. if not m:
  375. return None
  376. body = m.group(1).strip()
  377. for sid in sorted(step_ids, key=len, reverse=True):
  378. if body.startswith(sid + '.'):
  379. name = body[len(sid) + 1:].strip()
  380. name = re.sub(r'\s*[\[((].*$', '', name).strip() # 剥掉 [i] / (注释)
  381. return (sid, name) if name else None
  382. return None
  383. def _extract_ref(text, step_ids: list[str]):
  384. """从 directive/文案里抽 (src_step, src_name) 引用; 抽不出返回 None.
  385. 认「同 sN.name」「(同 sN.name 全文)」「见 sN.name」等. sN 按已知 step id
  386. 最长前缀匹配 (兼容 s2.1).
  387. """
  388. m = re.search(r'[同见]\s*([^\s)),,。]+)', str(text or ''))
  389. if not m:
  390. return None
  391. body = m.group(1)
  392. for sid in sorted(step_ids, key=len, reverse=True):
  393. if body.startswith(sid + '.'):
  394. name = re.sub(r'\s*[\[((].*$', '', body[len(sid) + 1:]).strip()
  395. return (sid, name) if name else None
  396. return None
  397. def resolve_passthrough(data: dict):
  398. """把 anchor 为纯透传、value/directive 仍空或占位的位置, 用源 output 的 value 逐字填上.
  399. 覆盖两类: (a) IO 的 value (anchor=← sN.varname); (b) instruction 的 directive
  400. (文案里「同 sN.varname」). 迭代到不动点 (处理链式透传). 返回 (filled_msgs, warn_msgs).
  401. """
  402. out_index = {} # (step_id, name) -> output item (读 value)
  403. step_ids: list[str] = []
  404. for p in data.get('procedures') or []:
  405. for s in p.get('steps') or []:
  406. sid = s.get('id')
  407. if sid:
  408. step_ids.append(sid)
  409. for o in s.get('outputs') or []:
  410. if isinstance(o, dict) and o.get('name'):
  411. out_index[(sid, o['name'])] = o
  412. def _src_value(ref):
  413. """源存在且自己已填好 → 返回其 value; 否则 None."""
  414. src = out_index.get(ref)
  415. if src is None or _is_fillable(src.get('value')):
  416. return None
  417. return src['value']
  418. filled: list[str] = []
  419. changed, rounds = True, 0
  420. while changed and rounds < 20:
  421. changed, rounds = False, rounds + 1
  422. for p in data.get('procedures') or []:
  423. for s in p.get('steps') or []:
  424. # (a) IO value
  425. for kind in ('inputs', 'outputs'):
  426. for idx, io in enumerate(s.get(kind) or []):
  427. if not isinstance(io, dict) or not _is_fillable(io.get('value')):
  428. continue
  429. pt = _parse_passthrough(io.get('anchor'), step_ids)
  430. val = _src_value(pt) if pt else None
  431. if val is None:
  432. continue
  433. io['value'] = val
  434. filled.append(
  435. f"{p.get('id')}.{s.get('id')}.{kind}[{idx}].value "
  436. f"← 复制自 {pt[0]}.{pt[1]} ({len(str(val))} 字)"
  437. )
  438. changed = True
  439. # (b) instruction directive (喂给工具的 prompt = 引用的 output 原文)
  440. for di, pair in enumerate(s.get('instruction') or []):
  441. if not (isinstance(pair, list) and len(pair) == 2 and pair[0] == 'directive'):
  442. continue
  443. if not _is_fillable(pair[1]):
  444. continue
  445. ref = _extract_ref(pair[1], step_ids)
  446. val = _src_value(ref) if ref else None
  447. if val is None:
  448. continue
  449. pair[1] = val
  450. filled.append(
  451. f"{p.get('id')}.{s.get('id')}.instruction[{di}](directive) "
  452. f"← 复制自 {ref[0]}.{ref[1]} ({len(str(val))} 字)"
  453. )
  454. changed = True
  455. # 仍填不动的透传 (源找不到) → warn
  456. warns: list[str] = []
  457. for p in data.get('procedures') or []:
  458. for s in p.get('steps') or []:
  459. for kind in ('inputs', 'outputs'):
  460. for idx, io in enumerate(s.get(kind) or []):
  461. if not isinstance(io, dict) or not _is_fillable(io.get('value')):
  462. continue
  463. pt = _parse_passthrough(io.get('anchor'), step_ids)
  464. if pt and out_index.get(pt) is None:
  465. warns.append(
  466. f"{p.get('id')}.{s.get('id')}.{kind}[{idx}] anchor 指向 "
  467. f"{pt[0]}.{pt[1]} 但找不到该 output (检查 anchor / 变量名)"
  468. )
  469. return filled, warns
  470. # ===========================================================================
  471. # 应用
  472. # ===========================================================================
  473. def load_patches(args) -> list[tuple[str, str]]:
  474. """汇总 --set、--patch 与 --set-file 成 [(path, value), ...]."""
  475. def _norm(v):
  476. if isinstance(v, str):
  477. # 将中文全角双角/单引号自动归一化为标准半角引号,更利于 AI 生图引擎和 Prompt 语法识别
  478. v = v.replace('“', '"').replace('”', '"').replace('‘', "'").replace('’', "'")
  479. return v
  480. out: list[tuple[str, str]] = []
  481. for s in args.set or []:
  482. if '=' not in s:
  483. raise SystemExit(f'wf-patch: --set 缺 "=" : {s!r} (形如 path=value)')
  484. path, value = s.split('=', 1) # 只切第一个 '='
  485. out.append((path.strip(), _norm(value)))
  486. # 🟢 新增:从外部文件读取值注入
  487. for sf in getattr(args, 'set_file', None) or []:
  488. if '=' not in sf:
  489. raise SystemExit(f'wf-patch: --set-file 缺 "=" : {sf!r} (形如 path=file_path)')
  490. path, fpath_str = sf.split('=', 1)
  491. fpath = Path(fpath_str.strip())
  492. if not fpath.exists():
  493. raise SystemExit(f'wf-patch: --set-file 指定的文件不存在: {fpath_str}')
  494. try:
  495. value = fpath.read_text(encoding='utf-8')
  496. except Exception as e:
  497. raise SystemExit(f'wf-patch: 无法读取 --set-file 指定的文件 {fpath_str}: {e}')
  498. out.append((path.strip(), _norm(value)))
  499. if args.patch:
  500. if not args.patch.exists():
  501. raise SystemExit(f'wf-patch: --patch 文件不存在 {args.patch}')
  502. try:
  503. items = json.loads(args.patch.read_text(encoding='utf-8'))
  504. except json.JSONDecodeError as e:
  505. raise SystemExit(f'wf-patch: --patch 不是合法 JSON: {e}')
  506. for it in items:
  507. out.append((it['path'], _norm(it['value'])))
  508. return out
  509. def main() -> None:
  510. ap = argparse.ArgumentParser(
  511. prog='wf-patch.py',
  512. description='workflow.json 安全批量字段设置器 (写入即校验, 任何一条非法整批不写)',
  513. )
  514. ap.add_argument('--workflow', type=Path, required=True, help='目标 workflow.json')
  515. ap.add_argument('--set', action='append', metavar='PATH=VALUE',
  516. help='单条赋值, 可重复. 只在第一个 = 处切; value 可含 = 和空格 (记得整体加引号)')
  517. ap.add_argument('--patch', type=Path, default=None,
  518. help='批量赋值清单 .json: [{"path":..,"value":..}, ...]')
  519. ap.add_argument('--set-file', action='append', metavar='PATH=FILE_PATH', default=None,
  520. help='从外部文件读取内容注入指定字段. e.g. p1.s1.outputs[0].value=_scratch/prompt.txt')
  521. ap.add_argument('--unset', action='append', metavar='PATH', default=None,
  522. help='删字段, 可重复. e.g. p1.declarations.inputs[0].inferred (字段不存在则跳过). 取代手 Edit 删字段')
  523. ap.add_argument('--resolve-passthrough', action='store_true',
  524. help='把 anchor 为纯透传 (← sN.varname)、value 仍空/占位的 IO, 顺 anchor 从源 output 逐字抄 value. 可单独跑, 也可跟在 --set/--patch 后 (先赋值再解析). 迭代处理链式透传')
  525. ap.add_argument('--dry-run', action='store_true', help='只校验/预演, 不写')
  526. args = ap.parse_args()
  527. wf = args.workflow
  528. if not wf.exists():
  529. print(f'wf-patch: 文件不存在 {wf}', file=sys.stderr)
  530. sys.exit(2)
  531. raw = wf.read_text(encoding='utf-8')
  532. repaired = 0
  533. try:
  534. data = json.loads(raw)
  535. except json.JSONDecodeError as e:
  536. # 兜底: 试着把误写成 ASCII 的中文引号修成「」再 parse (模型直 Write 常见崩因)
  537. fixed, repaired = repair_ascii_quotes(raw)
  538. try:
  539. data = json.loads(fixed)
  540. except json.JSONDecodeError:
  541. print(f'wf-patch: {wf} 不是合法 JSON (自动修引号也救不回): {e}', file=sys.stderr)
  542. sys.exit(2)
  543. print(f'[wf-patch] ⚠️ 原文件 JSON 非法 ({e.msg} @ line {e.lineno}); 已自动把 '
  544. f'{repaired} 处误写的 ASCII 引号修成「」→ 解析成功, 修复将随本次写回落盘',
  545. file=sys.stderr)
  546. patches = load_patches(args)
  547. unsets = args.unset or []
  548. if not patches and not unsets and not args.resolve_passthrough:
  549. print('wf-patch: 没有 --set / --patch / --unset / --resolve-passthrough, 啥也没干', file=sys.stderr)
  550. sys.exit(2)
  551. # 解析 + 校验; 任何一条失败 -> 整批不写
  552. pending_types = set()
  553. for path, _ in patches:
  554. m = re.match(r'^p\d+\.type_registry\.([^.]+)\.(extends|desc)$', path)
  555. if m:
  556. pending_types.add(m.group(1))
  557. plan = [] # set: (parent, key, normalized_value, path, display)
  558. del_plan = [] # unset: (parent, key, path)
  559. skipped = [] # unset 跳过 (字段本就不在)
  560. errors = [] # (path, msg)
  561. for path, value in patches:
  562. try:
  563. parent, key, proc, field = locate(data, path)
  564. except PathError as e:
  565. errors.append((path, str(e)))
  566. continue
  567. ok, norm, msg = validate_field(field, value, proc, pending_types)
  568. if not ok:
  569. errors.append((path, msg))
  570. continue
  571. plan.append((parent, key, norm, path, norm if norm is not None else 'null'))
  572. for path in unsets:
  573. try:
  574. parent, key, _proc, _field = locate(data, path)
  575. except PathError as e:
  576. errors.append((path, str(e)))
  577. continue
  578. present = (isinstance(parent, dict) and key in parent) or \
  579. (isinstance(parent, list) and isinstance(key, int) and key < len(parent))
  580. (del_plan if present else skipped).append((parent, key, path) if present else path)
  581. if patches or unsets:
  582. print(f'[wf-patch] {wf.name} — set {len(plan)}/{len(patches)} 通过, '
  583. f'unset {len(del_plan)} 删/{len(skipped)} 跳过, {len(errors)} 失败')
  584. for _p, _k, _n, path, disp in plan:
  585. print(f' ✓ set {path} = {disp}')
  586. for _p, _k, path in del_plan:
  587. print(f' ✓ unset {path}')
  588. for path in skipped:
  589. print(f' · skip {path} (字段本就不存在)')
  590. for path, msg in errors:
  591. print(f' ✗ {path} — {msg}')
  592. if errors:
  593. print(f'\n有 {len(errors)} 条失败, 整批未写入 (修正后重跑).', file=sys.stderr)
  594. sys.exit(1)
  595. # 应用到内存 data (set 先 unset 后; resolve 要看到这些改动). 是否持久化由 dry-run 决定.
  596. for parent, key, norm, _, _ in plan:
  597. parent[key] = norm
  598. for parent, key, _path in sorted(del_plan, key=lambda d: -d[1] if isinstance(d[1], int) else 0):
  599. if isinstance(parent, list):
  600. parent.pop(key)
  601. else:
  602. del parent[key]
  603. # 透传回填
  604. filled, warns = [], []
  605. if args.resolve_passthrough:
  606. filled, warns = resolve_passthrough(data)
  607. print(f'[resolve-passthrough] 回填 {len(filled)} 处透传 value, {len(warns)} 处填不动')
  608. for m in filled:
  609. print(f' ✓ {m}')
  610. for w in warns:
  611. print(f' ⚠ {w}')
  612. n_changes = len(plan) + len(del_plan) + len(filled)
  613. if args.dry_run:
  614. extra = f' (+ 自动修复 {repaired} 处引号, dry-run 同样不写)' if repaired else ''
  615. print(f'\n--dry-run: 预演 {n_changes} 处改动{extra}, 未写入.')
  616. sys.exit(0)
  617. # repaired>0 时即便无字段改动也要落盘 (否则修好的引号没存下来, 文件还是坏的)
  618. if n_changes == 0 and not repaired:
  619. print('\n没有改动 (透传 value 都已填好 / 无可赋值), 未写文件.')
  620. sys.exit(0)
  621. # 落盘 (安全序列化, 你从不手写 JSON)
  622. wf.write_text(json.dumps(data, ensure_ascii=False, indent=2) + '\n', encoding='utf-8')
  623. tail = f' (含自动修复 {repaired} 处引号→「」)' if repaired else ''
  624. print(f'\n已写入 {n_changes} 处到 {wf.name}{tail}.')
  625. sys.exit(0)
  626. if __name__ == '__main__':
  627. main()