пре 1 месец · d6c9c63ba8
--- a/examples/process_pipeline/prompts/extract_workflow.prompt
+++ b/examples/process_pipeline/prompts/extract_workflow.prompt
@@ -6,86 +6,120 @@ $system$
 
															 你是 AI 图片制作工序沉淀助手。本阶段是 Stage 1：只抽取语义，不做内容树映射。
														
 
															-# 工序提取规则
														
 
															+# 任务概述
														
 
															+
														
 
															+从帖子中同时完成两件事：
														
 
															+1. 识别 workflow steps（按"提交动作"边界划分）
														
 
															+2. 对每个 step，识别其中的 1+ 原子操作，每个原子操作输出为一个 fragment（带完整 capability 字段）
														
 
															+
														
 
															+# 工序提取规则（workflow steps）
														
 
															 - 将帖子内容总结为 AI 图片制作工序。
														
 
															-- 步骤粒度是“做了什么”，而非“怎么做”。
														
 
															-- 以“触发生成 / 处理的动作”为步骤边界，同一次提交前的所有配置（模型选择、参数调整、描述词输入等）合并为一步。
														
 
															-- 若本质上只有一步，也输出一步，不要返回 strategy=null。
														
 
															+- 步骤粒度是"做了什么"，而非"怎么做"。
														
 
															+- 以"触发生成 / 处理的动作"为步骤边界，同一次提交前的所有配置（模型选择、参数调整、描述词输入等）合并为一步。
														
 
															+- 若本质上只有一步，也输出一步，不要返回 workflow=null。
														
 
															 - 可选步骤也应提取。
														
 
															-- 所有字段均从帖子原文中提取，帖子未提及的信息一律填 null，不得自行推断或补全。
														
 
															+- step 是薄壳：只装结构性元数据（step_id、order、phase、relation、body），不含 capability 字段。
														
 
															 - 若原帖纯营销、信息密度太低或完全没怎么做，则 skip=true。
														
 
															-- 本阶段严禁生成 apply_to，只生成 apply_to_draft。每个 step 都必须有 apply_to_draft，描述该步骤操作的内容点（实质：操作的内容是什么；形式：以什么形式呈现）。
														
 
															 - 不要调用任何工具，不要查树。
														
 
															-本阶段只输出每一步的结构化 `inputs`、`outputs`、`action`。
														
 
															+# step 字段
														
 
															-# steps 字段
														
 
															+每个 step 包含：
														
 
															-每个步骤包含：
														
 
															+- step_id：格式为 "s{order}"，如 "s1"、"s2"
														
 
															+- order：步骤序号，整数
														
 
															+- phase：该步骤所属阶段，取值为「非制作」/「预处理」/「生成」/「编辑」之一
														
 
															+- relation：该步骤输出的去向，格式同 inputs/outputs 的 relation 字段，如 "[去向.最终成品]"、"[去向.s2I]"
														
 
															+- body：具体做法，包含 prompt 写法、参数配置、操作细节等；从帖子原文中提取，未提及则为 null
														
 
															-- order：步骤序号，整数。
														
 
															-- phase: phase: 该步骤所属阶段，取值为「非制作」/「预处理」/「生成」/「编辑」之一；「非制作」指创作与运营层面的行为，如故事构思、选题策划、热点参考、发布节奏等，该阶段不含 action；「预处理」是产出物的目的，不面向最终成品；生成和编辑可复用 action 的定义。
														
 
															-- action: 该步骤的核心动作，格式为「一级动作：二级动作」；若二级动作与一级相同可省略；非制作阶段的 action 值固定为 null。
														
 
															-- body：具体做法，包含 prompt 写法、参数配置、操作细节等；从帖子原文中提取，未提及则为 null。
														
 
															-- inputs：该步骤的输入，包含来源，数组。
														
 
															-- outputs：该步骤的产出，包含去向，数组。
														
 
															-- tools：使用的工具或平台，数组；未提及则为 []。
														
 
															+# fragment 提取规则
														
 
															-# action 字段
														
 
															+- 每个 step 的 body 中识别 1+ 原子操作，每个原子操作输出为一个 fragment。
														
 
															+- 同一 step 内的不同方案（如"用 MJ 生成 / 用 SD 生成"）互为 alternative：
														
 
															+  - 每种方案单独输出一个 fragment，各自填写完整字段（inputs、action、outputs、tools 等均可不同）
														
 
															+  - 在 is_alternative_to 中互相标注对方的 fragment_id
														
 
															+- 帖子中没有 workflow 上下文的能力提及 → fragment，workflow_step_ref = null。
														
 
															+- 不跨 step 合并 fragments。
														
 
															+- fragment_id 格式：步内原子操作用 "f_{step_id}_{i}"（如 "f_s1_0"、"f_s1_1"），standalone 用 "f_standalone_{i}"。
														
 
															+
														
 
															+# fragment 字段
														
 
															+
														
 
															+每个 fragment 包含完整 capability 字段：
														
 
															-## 格式规定
														
 
															+- fragment_id：字符串，见上方规则
														
 
															+- action：{ main_action, mechanism }，见下方 action 字段规则
														
 
															+- inputs / outputs：结构化接口，见下方规则
														
 
															+- body：该原子操作在原帖中的描述（可能是 step body 的子片段）；未提及则为 null
														
 
															+- effects：该原子操作产生的可观测效果，数组，每项为结构体（见下方 effects 字段规则）
														
 
															+- control_target：该操作控制的对象，字符串数组，如 ["人物姿态", "背景风格"]；未提及则为 []
														
 
															+- artifact_type：该操作产出的工件类型，如 "正向提示词"、"蒙版"、"参考图"；未提及则为 null
														
 
															+- tools：使用的工具或平台，数组；未提及则为 []
														
 
															+- apply_to_draft：{ 实质: [...], 形式: [...] }，只写自然语言短语
														
 
															+- workflow_step_ref：{ workflow_id, step_id } 或 null（standalone fragment）
														
 
															+- is_alternative_to：同一 step 内互为可选方案的其他 fragment_id 数组，无则为 []
														
 
															-- 一级动作，即main_action，应从以下五个选项中选择：
														
 
															-  - 1.生成。此时output为新内容，input不延续到output，只作为约束或参考
														
 
															-  - 2.编辑。input延续到output，output只是input的修改
														
 
															-  - 3.提取。output是来源的子集。（来源可以是input或者外部库）
														
 
															-  - 4.组织。多项素材的结构化集合/索引/模板
														
 
															-  - 5.筛选。output是候选集的子集（基于评估）。
														
 
															-- 每个main_action应该有不同的二级动作，即mechanism：
														
 
															-  - 生成：直接生成/参考引导/一致性保持/动画化/多模态合成/多候选生成/......
														
 
															-  - 编辑：局部重绘/风格迁移/颜色调整/蒙板重绘/拼接组合/裁切扩展/......
														
 
															-  - 提取：提示词反推/关键帧提取/蒙板提取/知识库检索/特征向量话/......
														
 
															-  - 组织：分类入库/模板化/标签化/变量抽象/结构抽象/......
														
 
															-  - 筛选：抽卡选优/评分排序top k/人工挑选/阈值过滤/......
														
 
															+# action 字段
														
 
															-## action 写成对象：
														
 
															+action 写成对象：
														
 
															 ```json
														
 
															 { "main_action": "编辑", "mechanism": "局部重绘" }
														
 
															 ```
														
 
															-- main_action：
														
 
															-  - **生成**：以输入内容为条件，输出全新内容；相同模态的输入仅起约束作用，不直接延续到输出中。
														
 
															-  - **编辑**：以输入内容为主体，对其进行修改或变换后输出；输入内容直接延续到输出中。
														
 
															-- mechanism：mai_action 的细分，如编辑的细分是：局部重绘、风格迁移
														
 
															+- main_action 从以下选择：生成 / 编辑 / 提取 / 组织 / 筛选
														
 
															+- mechanism 是 main_action 的细分：
														
 
															+  - 生成：直接生成 / 参考引导 / 一致性保持 / 动画化 / 多模态合成 / 多候选生成
														
 
															+  - 编辑：局部重绘 / 风格迁移 / 颜色调整 / 蒙板重绘 / 拼接组合 / 裁切扩展
														
 
															+  - 提取：提示词反推 / 关键帧提取 / 蒙板提取 / 知识库检索 / 特征向量化
														
 
															+  - 组织：分类入库 / 模板化 / 标签化 / 变量抽象 / 结构抽象
														
 
															+  - 筛选：抽卡选优 / 评分排序top k / 人工挑选 / 阈值过滤
														
 
															-{interface_vocab}
														
 
															+# inputs / outputs
														
 
															+
														
 
															+```json
														
 
															+{
														
 
															+  "modality": "文本",
														
 
															+  "description": "该项在当前步骤中实际起到的作用，用简短名词短语表达",
														
 
															+  "relation": "来源或去向"
														
 
															+}
														
 
															+```
														
 
															-# body 字段
														
 
															-**重要**：在描述具体做法时，应注意结合已有的架构。遇到相关内容时，*必须*使用已有的术语和架构。已有架构如下：
														
 
															-- 制作表，是制作帖子的原始输入，代表了制作一个贴子所需要的全部信息。所有对帖子解构的最终结果都应该是**制作表**。
														
 
															-- 知识库，这是一个庞大的数据库系统，用于保存得到的数据，知识是对数据的抽象提炼，由Agent负责获取。
														
 
															-- 不能新建、生成知识库或其他数据库，**系统里只有一个知识库**。不同种类的数据都存入这一个知识库中，不额外构建新的数据库，但支持根据标签进行筛选，从而隔离不同知识。
														
 
															-- 业务Agent，是一个自建的智能体系统，可以从知识库中获取知识；也可以处理数据，将其变成知识存入知识库；处理制作表，完成任务。**所有需要的智能体系统都应该被业务Agent所替代**
														
 
															+- modality 是数据形态：文本 / 图片 / 视频 / 音频 / 特征点 / 参数 / 模型 / 向量
														
 
															+- 同一次提交给模型的所有文字描述统一合并为一个输入项
														
 
															+- relation 格式：[来源.1O]、[去向.2I]、[来源.原始输入]、[去向.最终成品]
														
 
															-# inputs / outputs
														
 
															+# effects 字段
														
 
															-每个输入 / 输出项写成：
														
 
															+每个 effect 写成结构体：
														
 
															 ```json
														
 
															 {
														
 
															-  "modality": "文本",
														
 
															-  "description": "该项在当前步骤中实际起到的作用，用简短名词短语表达，如：场景参考、角色参考、故事情节与镜头要求等"，不要写指令，中间产物，等没有信息量的词汇。
														
 
															-  "relation": "文本",该输入/输出的来源/去向。
														
 
															+  "statement": "实现XXX",
														
 
															+  "criteria": "判断该效果是否达成的具体标准，一句话描述",
														
 
															+  "judge_method": "vlm",
														
 
															+  "negative_examples": ["反例描述1"]
														
 
															 }
														
 
															 ```
														
 
															-要求：
														
 
															+- statement：以"实现"开头，描述该操作产生的可观测效果
														
 
															+- criteria：判断标准，具体、可操作，描述"什么情况下算达成"
														
 
															+- judge_method：判断方式，从以下选择：
														
 
															+  - `llm`：纯文本推理可判断
														
 
															+  - `vlm`：需要看图才能判断
														
 
															+  - `rule`：可用规则/代码判断（如分辨率、文件大小）
														
 
															+  - `human`：需要人工主观判断
														
 
															+- negative_examples：反例列表，描述"什么情况下算没达成"；无明显反例则为 []
														
 
															+
														
 
															+每个 fragment 必须有 effects，至少一项。
														
 
															+
														
 
															+# apply_to_draft 字段
														
 
															-- modality 是数据形态，如 文本 / 图片 / 视频 / 音频 / 特征点 / 参数 / 模型 / 向量。
														
 
															-- 同一次提交给模型的所有文字描述统一合并为一个输入项，不得按语义功能拆分
														
 
															-- relation 格式应为：[来源.1O]、[去向.2I]、[来源.原始输入]、[去向.最终成品]（含义分别为：来源是序号1的output、去向是序号2的input、从原贴得到的信息、最终的结果，不需要额外的文字描述或标点符号）。input只需要来源，output只需要去向，来源和去向可以有多个。
														
 
															+- 本阶段严禁生成 apply_to，只生成 apply_to_draft。
														
 
															+- apply_to_draft.实质 写内容关于什么：主体、题材、场景、情境等。
														
 
															+- apply_to_draft.形式 写内容怎么呈现：镜头、构图、光线、叙事、排版、质感等。
														
 
															+
														
 
															+{interface_vocab}
														
 
															 $user$
														
@@ -101,59 +135,59 @@ $user$
 
															 {
														
 
															   "skip": false,
														
 
															   "skip_reason": "",
														
 
															-  "strategy": {
														
 
															+  "workflow": {
														
 
															+    "workflow_id": null,
														
 
															     "steps": [
														
 
															       {
														
 
															+        "step_id": "s1",
														
 
															         "order": 1,
														
 
															-        "phase": "...",
														
 
															-        "action": { "main_action": "...", "mechanism": "..." },
														
 
															-        "body": "string | null",
														
 
															-        "inputs": [
														
 
															-          {
														
 
															-            "modality": "...",
														
 
															-            "description": "...",
														
 
															-            "relation": "..."
														
 
															-          }
														
 
															-        ],
														
 
															-        "outputs": [
														
 
															-          {
														
 
															-            "modality": "...",
														
 
															-            "description": "...",
														
 
															-            "relation": "..."
														
 
															-          }
														
 
															-        ],
														
 
															-        "tools": [],
														
 
															-        "apply_to_draft": { "实质": ["该步骤操作的内容点"], "形式": ["该步骤的呈现方式"] }
														
 
															+        "phase": "生成",
														
 
															+        "relation": "[去向.最终成品]",
														
 
															+        "body": "string | null"
														
 
															       }
														
 
															-    ],
														
 
															-    "effects": [
														
 
															-     {                                                               
														
 
															-          "statement": "实现XXX",  // 解决了什么具体需求
														
 
															-          "criteria": "...", // 结果是否成功的判定标准
														
 
															-          "judge_method": "...", // 可选: llm / vlm / rule / human                       
														
 
															-          "negative_examples": [                                                                      
														
 
															-              "...",                                                                                  
														
 
															-              "..."                                                                                     
														
 
															-          ]                                                             
														
 
															-       },
														
 
															-      {                                                               
														
 
															-          "statement": "实现YYY",
														
 
															-          "criteria": "...",
														
 
															-          "judge_method": "...",                 
														
 
															-          "negative_examples": [                                                                      
														
 
															-              "...",                                                                                                                                                                    
														
 
															-          ]                                                             
														
 
															-       }],
														
 
															-    "criterion": null,
														
 
															-    "unstructured_what": []
														
 
															-  }
														
 
															+    ]
														
 
															+  },
														
 
															+  "fragments": [
														
 
															+    {
														
 
															+      "fragment_id": "f_s1_0",
														
 
															+      "action": { "main_action": "生成", "mechanism": "直接生成" },
														
 
															+      "inputs": [
														
 
															+        {
														
 
															+          "modality": "文本",
														
 
															+          "description": "...",
														
 
															+          "relation": "[来源.原始输入]"
														
 
															+        }
														
 
															+      ],
														
 
															+      "outputs": [
														
 
															+        {
														
 
															+          "modality": "图片",
														
 
															+          "description": "...",
														
 
															+          "relation": "[去向.最终成品]"
														
 
															+        }
														
 
															+      ],
														
 
															+      "body": "string | null",
														
 
															+      "effects": [
														
 
															+        {
														
 
															+          "statement": "实现XXX",
														
 
															+          "criteria": "判断标准",
														
 
															+          "judge_method": "vlm",
														
 
															+          "negative_examples": []
														
 
															+        }
														
 
															+      ],
														
 
															+      "control_target": [],
														
 
															+      "artifact_type": null,
														
 
															+      "tools": [],
														
 
															+      "apply_to_draft": { "实质": ["..."], "形式": ["..."] },
														
 
															+      "workflow_step_ref": { "workflow_id": null, "step_id": "s1" },
														
 
															+      "is_alternative_to": []
														
 
															+    }
														
 
															+  ]
														
 
															 }
														
 
															 ```
														
 
															 # 输出硬规则
														
 
															 - 只输出最终严格 JSON，不要 Markdown 代码块。
														
 
															-- strategy 顶层不要输出 inputs / outputs / tools / stage。
														
 
															 - 不要任何前言、解释、标题。
														
 
															 - 字符串值内禁止出现 ASCII 双引号；需要引号请用中文书名号。
														
 
															-- **effects 的 statement 都必须以"实现"开头**，如 "实现快速生成"、"实现风格统一"。
														
 
															+- effects 的每项都必须以"实现"开头。
														
--- a/examples/process_pipeline/prompts/extract_workflow.schema.json
+++ b/examples/process_pipeline/prompts/extract_workflow.schema.json
@@ -1,225 +1,135 @@
 
															 {
														
 
															   "$schema": "http://json-schema.org/draft-07/schema#",
														
 
															-  "title": "extract_workflow_output_v5",
														
 
															+  "title": "extract_workflow_output_v6",
														
 
															   "type": "object",
														
 
															-  "required": [
														
 
															-    "skip",
														
 
															-    "skip_reason",
														
 
															-    "strategy"
														
 
															-  ],
														
 
															+  "required": ["skip", "skip_reason", "workflow", "fragments"],
														
 
															   "properties": {
														
 
															-    "skip": {
														
 
															-      "type": "boolean"
														
 
															-    },
														
 
															-    "skip_reason": {
														
 
															-      "type": "string"
														
 
															-    },
														
 
															-    "strategy": {
														
 
															+    "skip": { "type": "boolean" },
														
 
															+    "skip_reason": { "type": "string" },
														
 
															+    "workflow": {
														
 
															       "anyOf": [
														
 
															-        {
														
 
															-          "type": "null"
														
 
															-        },
														
 
															+        { "type": "null" },
														
 
															         {
														
 
															           "type": "object",
														
 
															-          "required": [
														
 
															-            "steps",
														
 
															-            "effects",
														
 
															-            "criterion",
														
 
															-            "unstructured_what"
														
 
															-          ],
														
 
															+          "required": ["steps"],
														
 
															           "properties": {
														
 
															+            "workflow_id": { "type": ["string", "null"] },
														
 
															             "steps": {
														
 
															               "type": "array",
														
 
															               "minItems": 1,
														
 
															               "items": {
														
 
															                 "type": "object",
														
 
															-                "required": [
														
 
															-                  "order",
														
 
															-                  "phase",
														
 
															-                  "action",
														
 
															-                  "body",
														
 
															-                  "inputs",
														
 
															-                  "outputs",
														
 
															-                  "tools",
														
 
															-                  "apply_to_draft"
														
 
															-                ],
														
 
															+                "required": ["step_id", "order", "phase", "relation", "body"],
														
 
															                 "properties": {
														
 
															-                  "order": {
														
 
															-                    "type": "integer",
														
 
															-                    "minimum": 1
														
 
															-                  },
														
 
															+                  "step_id": { "type": "string", "pattern": "^s[0-9]+$" },
														
 
															+                  "order": { "type": "integer", "minimum": 1 },
														
 
															                   "phase": {
														
 
															                     "type": "string",
														
 
															-                    "enum": [
														
 
															-                      "非制作",
														
 
															-                      "预处理",
														
 
															-                      "生成",
														
 
															-                      "编辑"
														
 
															-                    ]
														
 
															-                  },
														
 
															-                  "action": {
														
 
															-                    "type": "object",
														
 
															-                    "required": [
														
 
															-                      "main_action",
														
 
															-                      "mechanism"
														
 
															-                    ],
														
 
															-                    "properties": {
														
 
															-                      "main_action": {
														
 
															-                        "type": "string",
														
 
															-                        "minLength": 1,
														
 
															-                        "description": "主动作：生成、编辑、提取、组织、筛选"
														
 
															-                      },
														
 
															-                      "mechanism": {
														
 
															-                        "type": "string",
														
 
															-                        "minLength": 1,
														
 
															-                        "description": "动作方式：直接生成、局部重绘、提示词反推、分类入库、抽卡选优等"
														
 
															-                      }
														
 
															-                    }
														
 
															-                  },
														
 
															-                  "body": {
														
 
															-                    "type": [
														
 
															-                      "string",
														
 
															-                      "null"
														
 
															-                    ]
														
 
															-                  },
														
 
															-                  "inputs": {
														
 
															-                    "type": "array",
														
 
															-                    "items": {
														
 
															-                      "type": "object",
														
 
															-                      "required": [
														
 
															-                        "modality",
														
 
															-                        "description",
														
 
															-                        "relation"
														
 
															-                      ],
														
 
															-                      "properties": {
														
 
															-                        "modality": {
														
 
															-                          "type": "string",
														
 
															-                          "minLength": 1,
														
 
															-                          "description": "模态：文本、图片、视频、音频、特征点、参数、模型、向量、表格"
														
 
															-                        },
														
 
															-                        "description": {
														
 
															-                          "type": "string",
														
 
															-                          "minLength": 1,
														
 
															-                          "description": "功能性描述，不写具体内容what"
														
 
															-                        },
														
 
															-                        "relation": {
														
 
															-                          "type": "string",
														
 
															-                          "minLength": 1,
														
 
															-                          "description": "来源与去向，有特定格式"
														
 
															-                        }
														
 
															-                      }
														
 
															-                    }
														
 
															-                  },
														
 
															-                  "outputs": {
														
 
															-                    "type": "array",
														
 
															-                    "items": {
														
 
															-                      "type": "object",
														
 
															-                      "required": [
														
 
															-                        "modality",
														
 
															-                        "description",
														
 
															-                        "relation"
														
 
															-                      ],
														
 
															-                      "properties": {
														
 
															-                        "modality": {
														
 
															-                          "type": "string",
														
 
															-                          "minLength": 1
														
 
															-                        },
														
 
															-                        "description": {
														
 
															-                          "type": "string",
														
 
															-                          "minLength": 1
														
 
															-                        },
														
 
															-                        "relation": {
														
 
															-                          "type": "string",
														
 
															-                          "minLength": 1,
														
 
															-                          "description": "来源与去向，有特定格式"
														
 
															-                        }
														
 
															-                      }
														
 
															-                    }
														
 
															-                  },
														
 
															-                  "tools": {
														
 
															-                    "type": "array",
														
 
															-                    "items": {
														
 
															-                      "type": "string"
														
 
															-                    }
														
 
															+                    "enum": ["非制作", "预处理", "生成", "编辑"]
														
 
															                   },
														
 
															-                  "apply_to_draft": {
														
 
															-                    "type": "object",
														
 
															-                    "required": [
														
 
															-                      "实质",
														
 
															-                      "形式"
														
 
															-                    ],
														
 
															-                    "properties": {
														
 
															-                      "实质": {
														
 
															-                        "type": "array",
														
 
															-                        "items": {
														
 
															-                          "type": "string"
														
 
															-                        }
														
 
															-                      },
														
 
															-                      "形式": {
														
 
															-                        "type": "array",
														
 
															-                        "items": {
														
 
															-                          "type": "string"
														
 
															-                        }
														
 
															-                      }
														
 
															-                    }
														
 
															-                  }
														
 
															+                  "relation": { "type": "string", "minLength": 1 },
														
 
															+                  "body": { "type": ["string", "null"] }
														
 
															                 }
														
 
															               }
														
 
															-            },
														
 
															-            "effects": {
														
 
															-              "type": "array",
														
 
															-              "items": {
														
 
															+            }
														
 
															+          }
														
 
															+        }
														
 
															+      ]
														
 
															+    },
														
 
															+    "fragments": {
														
 
															+      "type": "array",
														
 
															+      "items": {
														
 
															+        "type": "object",
														
 
															+        "required": [
														
 
															+          "fragment_id", "action", "inputs", "outputs",
														
 
															+          "body", "effects", "control_target", "artifact_type",
														
 
															+          "tools", "apply_to_draft",
														
 
															+          "workflow_step_ref", "is_alternative_to"
														
 
															+        ],
														
 
															+        "properties": {
														
 
															+          "fragment_id": { "type": "string", "minLength": 1 },
														
 
															+          "action": {
														
 
															+            "type": "object",
														
 
															+            "required": ["main_action", "mechanism"],
														
 
															+            "properties": {
														
 
															+              "main_action": { "type": "string", "minLength": 1 },
														
 
															+              "mechanism": { "type": "string", "minLength": 1 }
														
 
															+            }
														
 
															+          },
														
 
															+          "inputs": {
														
 
															+            "type": "array",
														
 
															+            "items": {
														
 
															+              "type": "object",
														
 
															+              "required": ["modality", "description", "relation"],
														
 
															+              "properties": {
														
 
															+                "modality": { "type": "string", "minLength": 1 },
														
 
															+                "description": { "type": "string", "minLength": 1 },
														
 
															+                "relation": { "type": "string", "minLength": 1 }
														
 
															+              }
														
 
															+            }
														
 
															+          },
														
 
															+          "outputs": {
														
 
															+            "type": "array",
														
 
															+            "items": {
														
 
															+              "type": "object",
														
 
															+              "required": ["modality", "description", "relation"],
														
 
															+              "properties": {
														
 
															+                "modality": { "type": "string", "minLength": 1 },
														
 
															+                "description": { "type": "string", "minLength": 1 },
														
 
															+                "relation": { "type": "string", "minLength": 1 }
														
 
															+              }
														
 
															+            }
														
 
															+          },
														
 
															+          "body": { "type": ["string", "null"] },
														
 
															+          "effects": {
														
 
															+            "type": "array",
														
 
															+            "items": {
														
 
															+              "type": "object",
														
 
															+              "required": ["statement", "criteria", "judge_method", "negative_examples"],
														
 
															+              "properties": {
														
 
															+                "statement": { "type": "string", "pattern": "^实现" },
														
 
															+                "criteria": { "type": "string", "minLength": 1 },
														
 
															+                "judge_method": { "type": "string", "enum": ["llm", "vlm", "rule", "human"] },
														
 
															+                "negative_examples": { "type": "array", "items": { "type": "string", "minLength": 1 }, "default": [] }
														
 
															+              }
														
 
															+            }
														
 
															+          },
														
 
															+          "control_target": {
														
 
															+            "type": "array",
														
 
															+            "items": { "type": "string", "minLength": 1 }
														
 
															+          },
														
 
															+          "artifact_type": { "type": ["string", "null"] },
														
 
															+          "tools": {
														
 
															+            "type": "array",
														
 
															+            "items": { "type": "string" }
														
 
															+          },
														
 
															+          "apply_to_draft": {
														
 
															+            "type": "object",
														
 
															+            "required": ["实质", "形式"],
														
 
															+            "properties": {
														
 
															+              "实质": { "type": "array", "items": { "type": "string" } },
														
 
															+              "形式": { "type": "array", "items": { "type": "string" } }
														
 
															+            }
														
 
															+          },
														
 
															+          "workflow_step_ref": {
														
 
															+            "anyOf": [
														
 
															+              { "type": "null" },
														
 
															+              {
														
 
															                 "type": "object",
														
 
															-                "required": [
														
 
															-                  "statement",
														
 
															-                  "criteria",
														
 
															-                  "judge_method",
														
 
															-                  "negative_examples"
														
 
															-                ],
														
 
															+                "required": ["step_id"],
														
 
															                 "properties": {
														
 
															-                  "statement": {
														
 
															-                    "type": "string",
														
 
															-                    "pattern": "^实现"
														
 
															-                  },
														
 
															-                  "criteria": {
														
 
															-                    "type": "string",
														
 
															-                    "minLength": 1
														
 
															-                  },
														
 
															-                  "judge_method": {
														
 
															-                    "type": "string",
														
 
															-                    "enum": [
														
 
															-                      "llm",
														
 
															-                      "vlm",
														
 
															-                      "rule",
														
 
															-                      "human"
														
 
															-                    ]
														
 
															-                  },
														
 
															-                  "negative_examples": {
														
 
															-                    "type": "array",
														
 
															-                    "items": {
														
 
															-                      "type": "string",
														
 
															-                      "minLength": 1
														
 
															-                    },
														
 
															-                    "default": []
														
 
															-                  }
														
 
															+                  "workflow_id": { "type": "string" },
														
 
															+                  "step_id": { "type": "string", "pattern": "^s[0-9]+$" }
														
 
															                 }
														
 
															               }
														
 
															-            },
														
 
															-            "criterion": {
														
 
															-              "type": [
														
 
															-                "string",
														
 
															-                "null"
														
 
															-              ]
														
 
															-            },
														
 
															-            "unstructured_what": {
														
 
															-              "type": "array",
														
 
															-              "items": {
														
 
															-                "type": "string"
														
 
															-              }
														
 
															-            }
														
 
															+            ]
														
 
															+          },
														
 
															+          "is_alternative_to": {
														
 
															+            "type": "array",
														
 
															+            "items": { "type": "string" }
														
 
															           }
														
 
															         }
														
 
															-      ]
														
 
															+      }
														
 
															     }
														
 
															   }
														
 
															-}
														
 
															+}
														
--- a/examples/process_pipeline/script/extract_workflow.py
+++ b/examples/process_pipeline/script/extract_workflow.py
@@ -1,14 +1,15 @@
 
															 """
														
 
															-逐 case 提取 workflow (v5版本)
														
 
															+逐 case 提取 workflow + fragments (v6版本)
														
 
															 从 case.json 读取，按 index 遍历每个 case，
														
 
															-调用 LLM 提取 workflow，按 index 原位回填到 case.json
														
 
															-
														
 
															-v5 架构特性：
														
 
															-- 使用结构化 inputs/outputs（role, modality, artifact_type 等10个维度）
														
 
															-- action 对象化：{main_action, mechanism}（替代旧的 method 字符串）
														
 
															-- Stage 1 输出 apply_to_draft（自然语言），为 Stage 2 内容树映射做准备
														
 
															-- strategy 顶层字段（method, inputs, outputs, tools, stage）由脚本自动推导
														
 
															+调用 LLM 同时提取 workflow（薄壳 steps）和 fragments（原子操作，含完整 capability 字段），
														
 
															+按 index 原位回填到 case.json
														
 
															+
														
 
															+v6 架构特性：
														
 
															+- workflow.steps 是薄壳：step_id / order / phase / relation / body，不含 capability 字段
														
 
															+- fragments 是原子操作列表：每个 fragment 含完整 capability 字段 + workflow_step_ref + is_alternative_to
														
 
															+- 步内多原子操作 + 步内 alternative 都在 fragment 层表达
														
 
															+- standalone fragment（workflow_step_ref=null）用于无 workflow 上下文的能力提及
														
 
															 """
														
 
															 import asyncio
														
@@ -91,93 +92,20 @@ def render_method_vocab_block(vocab: Dict[str, list]) -> str:
 
															     return "\n".join(lines)
														
 
															-import re
														
 
															-
														
 
															-
														
 
															-def _infer_stage_from_action(action_obj: dict) -> str:
														
 
															-    """从 action 对象推断 stage（v5版本）"""
														
 
															-    main_action = action_obj.get("main_action", "")
														
 
															-    mechanism = action_obj.get("mechanism", "")
														
 
															-
														
 
															-    # 根据主动作和动作方式推断阶段
														
 
															-    if main_action in ["提取", "改写", "模板化", "训练", "评估"]:
														
 
															-        return "preprocess"
														
 
															-    elif main_action in ["编辑", "修复", "增强", "剪辑", "排版"]:
														
 
															-        return "refine"
														
 
															-    elif mechanism in ["局部重绘", "扩图", "换背景", "换主体", "换装", "擦除", "调色",
														
 
															-                       "前后景融合", "降噪", "补帧", "超分", "稳定化", "质感增强"]:
														
 
															-        return "refine"
														
 
															-    else:
														
 
															-        return "generate"
														
 
															-
														
 
															-
														
 
															-def derive_strategy_rollup(strategy: dict) -> None:
														
 
															-    """
														
 
															-    从 steps 自动推导 strategy 的顶层字段（v5版本）：
														
 
															-    method, inputs, outputs, tools, stage
														
 
															-
														
 
															-    v5 变化：
														
 
															-    - method 从 action.main_action 提取（不再从旧的 method 字符串解析）
														
 
															-    - stage 从 action 对象推断
														
 
															-    """
														
 
															-    steps = [s for s in (strategy.get("steps") or []) if isinstance(s, dict)]
														
 
															-    if not steps:
														
 
															-        return
														
 
															-
														
 
															-    steps.sort(key=lambda s: s.get("order") if isinstance(s.get("order"), int) else 9999)
														
 
															-
														
 
															-    # method = 所有步骤的 main_action 用 "-" 连接
														
 
															-    actions = []
														
 
															-    for s in steps:
														
 
															-        action_obj = s.get("action")
														
 
															-        if isinstance(action_obj, dict):
														
 
															-            main_action = action_obj.get("main_action", "")
														
 
															-            if main_action:
														
 
															-                actions.append(main_action)
														
 
															-
														
 
															-    if actions:
														
 
															-        strategy["method"] = "-".join(actions)
														
 
															-
														
 
															-    # inputs = 第一步的 inputs
														
 
															-    first_inputs = steps[0].get("inputs")
														
 
															-    strategy["inputs"] = first_inputs if isinstance(first_inputs, list) else []
														
 
															-
														
 
															-    # outputs = 最后一步的 outputs
														
 
															-    last_outputs = steps[-1].get("outputs")
														
 
															-    strategy["outputs"] = last_outputs if isinstance(last_outputs, list) else []
														
 
															-
														
 
															-    # tools = 所有步骤的 tools 去重合并
														
 
															-    tools = []
														
 
															-    for step in steps:
														
 
															-        for tool in step.get("tools") or []:
														
 
															-            if isinstance(tool, str) and tool and tool not in tools:
														
 
															-                tools.append(tool)
														
 
															-    strategy["tools"] = tools
														
 
															-
														
 
															-    # stage = 从 action 对象推断
														
 
															-    stages = []
														
 
															-    for step in steps:
														
 
															-        action_obj = step.get("action")
														
 
															-        if isinstance(action_obj, dict):
														
 
															-            stage = _infer_stage_from_action(action_obj)
														
 
															-            if stage not in stages:
														
 
															-                stages.append(stage)
														
 
															-    strategy["stage"] = stages or ["generate"]
														
 
															 async def extract_workflow_from_case(
														
 
															     case_item: Dict[str, Any],
														
 
															     llm_call: Any,
														
 
															     model: str = "anthropic/claude-sonnet-4-5"
														
 
															-) -> tuple[Optional[Dict[str, Any]], float]:
														
 
															+) -> tuple[Optional[Dict[str, Any]], Optional[List[Dict[str, Any]]], float]:
														
 
															     """
														
 
															-    从单个 case item 提取 workflow (v5版本)
														
 
															+    从单个 case item 同时提取 workflow（薄壳 steps）和 fragments（原子操作列表）。
														
 
															-    v5 特性：
														
 
															-    - 结构化 inputs/outputs（role, modality, artifact_type 等）
														
 
															-    - action 对象化：{main_action, mechanism}（替代旧的 method 字符串）
														
 
															-    - 输出 apply_to_draft（自然语言），为 Stage 2 内容树映射做准备
														
 
															-    - strategy 顶层字段由 derive_strategy_rollup 自动推导
														
 
															+    Returns:
														
 
															+        (workflow_dict, fragments_list, cost)
														
 
															+        workflow_dict 为 None 表示 skip 或提取失败
														
 
															+        fragments_list 为 None 表示 skip 或提取失败
														
 
															     """
														
 
															     images = case_item.get("images", [])
														
@@ -185,17 +113,17 @@ async def extract_workflow_from_case(
 
															     case_copy.pop("images", None)
														
 
															     case_copy.pop("_raw", None)
														
 
															     case_copy.pop("workflow", None)
														
 
															+    case_copy.pop("fragments", None)
														
 
															     case_copy.pop("capabilities", None)
														
 
															     if not case_copy and not images:
														
 
															-        return None, 0.0
														
 
															+        return None, None, 0.0
														
 
															     title = case_item.get("title", "")[:20] or "untitled"
														
 
															     context = json.dumps(case_copy, ensure_ascii=False, indent=2)
														
 
															     try:
														
 
															         prompt_template = load_prompt_template("extract_workflow")
														
 
															-        # 添加 v5 词库说明
														
 
															         method_vocab = load_method_vocab()
														
 
															         vocab_block = render_method_vocab_block(method_vocab)
														
@@ -204,57 +132,56 @@ async def extract_workflow_from_case(
 
															         else:
														
 
															             prompt = prompt_template + f"\n\n## 帖子内容\n{context}"
														
 
															-        # 如果 prompt 中有 {interface_vocab} 占位符，替换为词库说明
														
 
															         if "{interface_vocab}" in prompt:
														
 
															             prompt = prompt.replace("{interface_vocab}", vocab_block)
														
 
															         elif vocab_block not in prompt:
														
 
															-            # 如果 prompt 中没有词库说明，添加到末尾
														
 
															             prompt = prompt + "\n" + vocab_block
														
 
															     except Exception as e:
														
 
															         print(f"Warning: Failed to load prompt template: {e}, using fallback")
														
 
															         method_vocab = load_method_vocab()
														
 
															         vocab_block = render_method_vocab_block(method_vocab)
														
 
															-        prompt = f"""将以下帖子内容总结为AI图片生成的工序，以JSON格式输出。
														
 
															+        prompt = f"""将以下帖子内容总结为AI图片生成的工序和原子操作，以JSON格式输出。
														
 
															-# 工序提取规则（v5）
														
 
															-- 步骤粒度是"做了什么"，而非"怎么做"
														
 
															-- 以"触发生成 / 处理的动作"为步骤边界
														
 
															-- 若本质上只有一步，也输出一步，不要返回 strategy=null
														
 
															-- 本阶段严禁生成 apply_to，只生成 apply_to_draft
														
 
															-
														
 
															-# 输出格式（v5）
														
 
															+# 输出格式（v6）
														
 
															 {{
														
 
															   "skip": false,
														
 
															   "skip_reason": "",
														
 
															-  "strategy": {{
														
 
															+  "workflow": {{
														
 
															+    "workflow_id": null,
														
 
															     "steps": [
														
 
															       {{
														
 
															+        "step_id": "s1",
														
 
															         "order": 1,
														
 
															-        "action": {{"main_action": "生成", "mechanism": "直接生成"}},
														
 
															-        "body": "string | null",
														
 
															-        "inputs": [
														
 
															-          {{
														
 
															-            "role": "生成指令",
														
 
															-            "modality": "文本",
														
 
															-            "artifact_type": "正向提示词",
														
 
															-            "control_target": ["主体", "场景"],
														
 
															-            "target_scope": ["整图"],
														
 
															-            "constraint_strength": "硬约束",
														
 
															-            "source": "原帖文本",
														
 
															-            "lifecycle": "原始输入",
														
 
															-            "description": "用于触发图片生成的完整提示词"
														
 
															-          }}
														
 
															-        ],
														
 
															-        "outputs": [...],
														
 
															-        "tools": []
														
 
															+        "phase": "生成",
														
 
															+        "relation": "[去向.最终成品]",
														
 
															+        "body": "string | null"
														
 
															       }}
														
 
															-    ],
														
 
															-    "effects": ["实现 XX 效果"],
														
 
															-    "criterion": null,
														
 
															-    "apply_to_draft": {{"实质": ["相关 what"], "形式": ["相关呈现方式"]}},
														
 
															-    "unstructured_what": []
														
 
															-  }}
														
 
															+    ]
														
 
															+  }},
														
 
															+  "fragments": [
														
 
															+    {{
														
 
															+      "fragment_id": "f_s1_0",
														
 
															+      "action": {{"main_action": "生成", "mechanism": "直接生成"}},
														
 
															+      "inputs": [{{"modality": "文本", "description": "...", "relation": "[来源.原始输入]"}}],
														
 
															+      "outputs": [{{"modality": "图片", "description": "...", "relation": "[去向.最终成品]"}}],
														
 
															+      "body": "string | null",
														
 
															+      "effects": [
														
 
															+        {{
														
 
															+          "statement": "实现XXX",
														
 
															+          "criteria": "判断标准",
														
 
															+          "judge_method": "vlm",
														
 
															+          "negative_examples": []
														
 
															+        }}
														
 
															+      ],
														
 
															+      "control_target": [],
														
 
															+      "artifact_type": null,
														
 
															+      "tools": [],
														
 
															+      "apply_to_draft": {{"实质": ["..."], "形式": ["..."]}},
														
 
															+      "workflow_step_ref": {{"workflow_id": null, "step_id": "s1"}},
														
 
															+      "is_alternative_to": []
														
 
															+    }}
														
 
															+  ]
														
 
															 }}
														
 
															 {vocab_block}
														
@@ -281,27 +208,22 @@ async def extract_workflow_from_case(
 
															         messages=messages,
														
 
															         model=model,
														
 
															         temperature=0.1,
														
 
															-        max_tokens=8000,  # 从2000增加到4000，处理更长的输出
														
 
															-        max_retries=3,    # 从3增加到5，增加重试机会
														
 
															+        max_tokens=10000,
														
 
															+        max_retries=3,
														
 
															         schema_name="extract_workflow",
														
 
															         task_name=f"Workflow_{title}",
														
 
															     )
														
 
															-    # Stage 1 格式：{"skip": bool, "skip_reason": str, "strategy": {...}}
														
 
															-    # 如果 skip=true 或 strategy=null，返回 None
														
 
															     if not result_data:
														
 
															-        return None, cost
														
 
															+        return None, None, cost
														
 
															     if result_data.get("skip"):
														
 
															-        return None, cost
														
 
															-
														
 
															-    workflow_data = result_data.get("strategy")
														
 
															+        return None, None, cost
														
 
															-    # 从 steps 自动推导顶层字段（v5版本）
														
 
															-    if workflow_data and isinstance(workflow_data, dict):
														
 
															-        derive_strategy_rollup(workflow_data)
														
 
															+    workflow_data = result_data.get("workflow")
														
 
															+    fragments_data = result_data.get("fragments", [])
														
 
															-    return workflow_data, cost
														
 
															+    return workflow_data, fragments_data, cost
														
 
															 async def extract_workflow(
														
@@ -345,13 +267,15 @@ async def extract_workflow(
 
															             print(f"  -> [{index}] [{case_id}] extracting workflow: {title[:60]}")
														
 
															-            workflow, cost = await extract_workflow_from_case(case_item, llm_call, model)
														
 
															+            workflow, fragments, cost = await extract_workflow_from_case(case_item, llm_call, model)
														
 
															-            status = "ok" if workflow else "null"
														
 
															+            frag_count = len(fragments) if fragments else 0
														
 
															+            status = f"ok ({frag_count} fragments)" if workflow else "null"
														
 
															             print(f"  <- [{index}] [{case_id}] workflow {status}")
														
 
															             result = dict(case_item)
														
 
															             result["workflow"] = workflow
														
 
															+            result["fragments"] = fragments if fragments is not None else []
														
 
															             return result, cost
														
 
															     tasks = [process_with_semaphore(case) for case in cases_to_process]
														
@@ -361,7 +285,7 @@ async def extract_workflow(
 
															     costs = [r[1] for r in results_with_costs]
														
 
															     total_cost = sum(costs)
														
 
															-    success_count = sum(1 for r in results if r.get("workflow"))
														
 
															+    success_count = sum(1 for r in results if r.get("workflow") and r.get("fragments"))
														
 
															     failed_count = len(results) - success_count
														
 
															     # 如果是部分更新，需要合并回原始 cases 列表
														
@@ -381,10 +305,13 @@ async def extract_workflow(
 
															     with open(case_file, "w", encoding="utf-8") as f:
														
 
															         json.dump(case_data, f, ensure_ascii=False, indent=2)
														
 
															+    fragments_count = sum(len(r.get("fragments") or []) for r in results)
														
 
															+
														
 
															     return {
														
 
															         "total": len(results),
														
 
															         "success": success_count,
														
 
															         "failed": failed_count,
														
 
															+        "fragments_total": fragments_count,
														
 
															         "total_cost": total_cost,
														
 
															         "output_file": str(case_file),
														
 
															     }