pipeline.json 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523
  1. {
  2. "pipelines": [
  3. {
  4. "strategy": {
  5. "name": "混合策略:底图 + 智能文字层",
  6. "description": "使用即梦 AI 4.5 生成带核心文字(核心英文单词、一级标题)和基础框架(笔记本、回形针、纸张纹理)的底图,然后在后期工具中叠加复杂文字内容(正文段落、表格、语义化高亮)和手绘装饰元素",
  7. "reasoning": "analysis.json 中的核心需求是:文字精准度(critical)+ 跨图一致性(critical)+ 材质真实感(high)。S3 策略能够同时满足这三个核心需求:1) 文字精准度:复杂文字内容(正文、表格数值、专业术语)由后期工具精准控制,避免 AI 生成错误;2) 跨图一致性:底图(笔记本框架、回形针、纸张纹理)可复用或基于同一 seed 生成,保证 4 张图的视觉统一;3) 材质真实感:核心视觉元素由 AI 生成,光影、透视、材质纹理自然统一",
  8. "vs_alternatives": [
  9. {
  10. "alternative": "S1 底图 + 分层叠加",
  11. "why_not": "纯分层叠加需要手动处理所有文字,工作量大,且文字与背景的融合度可能不足(缺少纸张纹理渗透感)。S3 让 AI 生成核心文字,减少后期工作量,同时保证光影自然",
  12. "could_switch_if": "如果即梦 AI 4.5 的实际文字生成精度不足以处理核心英文单词和一级标题,则回退到 S1(所有文字后期叠加)"
  13. },
  14. {
  15. "alternative": "S2 端到端文生图",
  16. "why_not": "端到端生成在复杂表格(img_2)和长文本场景下文字错误风险高,且修改成本高(需要重新生成整图)。S3 将复杂内容分离到后期处理,降低风险",
  17. "could_switch_if": "如果发现后期合成文字的光影调整工作量超过预期,且即梦 AI 4.5 的长文本生成精度足够,则可尝试 S2(端到端生成)"
  18. }
  19. ],
  20. "risks_found_during_instantiation": [
  21. {
  22. "stage_id": "stage_1_base_generation",
  23. "risk": "即梦 AI 4.5 生成核心英文单词时可能出现拼写错误",
  24. "severity": "high",
  25. "mitigation": "准备备用方案:AI 生成不带核心文字的底图,核心文字由后期叠加;或多次生成选择最佳结果"
  26. },
  27. {
  28. "stage_id": "stage_2b_img2_table",
  29. "risk": "img_2 的 Tokenization-Embedding 表格结构复杂,AI 难以精准还原行列和数值",
  30. "severity": "high",
  31. "mitigation": "表格完全由后期工具绘制,AI 仅生成表格区域的空白背景"
  32. },
  33. {
  34. "stage_id": "stage_3_decoration",
  35. "risk": "手绘装饰元素(星星、花朵、卡通图标)的跨图一致性难以保证",
  36. "severity": "medium",
  37. "mitigation": "从 AI 生成的底图中提取装饰元素,建立素材库后复用到其他图"
  38. }
  39. ]
  40. },
  41. "goal_tree": {
  42. "stage_id": "root",
  43. "stage_name": "还原目标",
  44. "description": "还原 4 张 AI 知识科普笔记风格海报(img_1-4),核心是拟物化笔记本场景 + 精准文字内容 + 跨图一致性",
  45. "required_spec": [
  46. "核心英文单词(Embedding)精准还原:内容、字号(约 90-100pt)、颜色(img_1 蓝色 RGB:60,140,220,img_2/3/4 黑色)、字体(粗体无衬线)",
  47. "笔记本/活页夹框架跨图一致:img_1 为米白色信纸背景,img_2/3/4 为蓝色活页夹边缘 + 白色纸张 + 边缘虚线和圆孔",
  48. "纸张纹理真实感:米白色(RGB:245,245,235)或纯白色,带横向细线/虚线,表面有不规则褶皱和轻微脏污感",
  49. "回形针装饰立体感:img_2/3/4 顶部有黄色回形针,具有金属质感和阴影",
  50. "结构化排版:居中堆叠(img_1)、左对齐层级列表(img_2/3/4)、虚线分隔",
  51. "手绘装饰元素自然度:星星、花朵、卡通图标等具有手绘的不规则感和笔触感",
  52. "语义化色彩高亮:荧光笔触半透明叠加,能看到底层文字",
  53. "知识可视化图表(img_2):Tokenization-Embedding 对应表格,表头浅绿色(#90EE90),第一列浅紫色(#E6E6FA),数值精准"
  54. ],
  55. "output_spec": [
  56. "4 张成品图(img_1_final.png, img_2_final.png, img_3_final.png, img_4_final.png)",
  57. "所有 required_spec 中的特征 100% 满足"
  58. ],
  59. "children": [
  60. {
  61. "stage_id": "stage_1_base_generation",
  62. "stage_name": "底图生成",
  63. "description": "使用即梦 AI 4.5 生成 4 张图的底图,包含笔记本框架、纸张纹理、回形针装饰、核心英文单词(可选)",
  64. "required_spec": [
  65. "img_1:米白色信纸背景(RGB:245,245,235),带横向浅灰色细线,表面有不规则褶皱和轻微脏污感",
  66. "img_2/3/4:左侧蓝色活页夹边缘(#87CEEB),右侧白色纸张(#FFFFFF),纸张边缘有虚线和三个圆形打孔,顶部黄色回形针",
  67. "核心英文单词'Embedding'(可选由 AI 生成):img_1 蓝色(RGB:60,140,220),img_2/3/4 黑色(#000000),粗体无衬线字体,字号约 90-100pt,居中排版"
  68. ],
  69. "output_spec": [
  70. "4 张底图(img_1_base.png, img_2_base.png, img_3_base.png, img_4_base.png)",
  71. "笔记本框架结构一致(img_2/3/4 使用同一模板或固定 seed)",
  72. "纸张纹理真实(褶皱、脏污感自然分布)",
  73. "回形针具有金属质感和阴影(img_2/3/4)",
  74. "可选:核心英文单词拼写准确、位置正确"
  75. ],
  76. "spec_satisfaction": {
  77. "status": "partial",
  78. "gap": "AI 生成的核心英文单词可能存在拼写错误风险;复杂表格区域(img_2)需要留白供后期绘制",
  79. "mitigation": "准备无文字版本的底图作为备选;img_2 的表格区域在 prompt 中明确留白"
  80. },
  81. "target_images": ["img_1", "img_2", "img_3", "img_4"],
  82. "stage_output": "4 张底图(含框架、纹理、回形针、可选核心文字)",
  83. "input_from": ["即梦 AI 4.5 模型", "制作表中的背景描述(段落 1.1、2.1、3.1、4.1)"],
  84. "covers_requirements": [
  85. "上限点:拟真纸张与笔记本实体",
  86. "上限点:核心英文单词(如果 AI 生成)",
  87. "下限点:跨图元素一致性",
  88. "下限点:材质纹理真实感"
  89. ],
  90. "importance": "下限",
  91. "reasoning": {
  92. "why_needed": "底图是整个还原工作的基础,决定了跨图一致性和材质真实感。如果底图框架不一致或纹理假,后续叠加文字和装饰也无法挽救",
  93. "why_here": "作为依赖树的第一个阶段,底图为后续所有阶段提供基础画布。文字叠加、装饰合成、质感增强都依赖底图的存在"
  94. },
  95. "children": [
  96. {
  97. "stage_id": "stage_1_1_img1_base",
  98. "stage_name": "img_1 信纸底图生成",
  99. "description": "生成 img_1 的米白色信纸背景,带横向细线和褶皱纹理",
  100. "required_spec": [
  101. "背景颜色:米白色(RGB:245,245,235)",
  102. "背景纹理:横向浅灰色细线,表面有不规则褶皱和轻微脏污感",
  103. "尺寸:1200x1700 像素(假设标准尺寸)"
  104. ],
  105. "output_spec": [
  106. "img_1_base.png:米白色信纸背景,纹理自然",
  107. "无文字、无装饰的纯净背景"
  108. ],
  109. "spec_satisfaction": {
  110. "status": "satisfied",
  111. "gap": "",
  112. "mitigation": ""
  113. },
  114. "target_images": ["img_1"],
  115. "stage_output": "img_1_base.png",
  116. "input_from": ["制作表段落 1.1 背景描述"],
  117. "covers_requirements": ["上限点:拟真纸张与笔记本实体", "下限点:材质纹理真实感"],
  118. "importance": "基础",
  119. "reasoning": {
  120. "why_needed": "img_1 使用信纸背景而非螺旋笔记本框架,需要单独生成",
  121. "why_here": "作为 img_1 的底图生成子阶段,为后续文字叠加提供基础"
  122. },
  123. "children": []
  124. },
  125. {
  126. "stage_id": "stage_1_2_img234_base",
  127. "stage_name": "img_2/3/4 笔记本底图生成",
  128. "description": "生成 img_2/3/4 共享的螺旋笔记本框架底图(蓝色活页夹 + 白色纸张 + 回形针)",
  129. "required_spec": [
  130. "左侧蓝色活页夹边缘(#87CEEB),波浪形模拟活页夹环形结构",
  131. "右侧白色纸张(#FFFFFF),边缘有虚线和三个圆形打孔",
  132. "顶部黄色回形针,具有金属质感和阴影",
  133. "纸张表面有横向虚线横格和左侧红色竖向虚线边缘(img_4)"
  134. ],
  135. "output_spec": [
  136. "img_2_base.png, img_3_base.png, img_4_base.png:共享相同框架结构",
  137. "回形针位置、形状、光影一致",
  138. "打孔位置、数量(3 个)一致"
  139. ],
  140. "spec_satisfaction": {
  141. "status": "satisfied",
  142. "gap": "",
  143. "mitigation": ""
  144. },
  145. "target_images": ["img_2", "img_3", "img_4"],
  146. "stage_output": "img_2_base.png, img_3_base.png, img_4_base.png",
  147. "input_from": ["制作表段落 2.1、3.1、4.1 背景描述", "即梦 AI 4.5 固定 seed"],
  148. "covers_requirements": ["上限点:拟真纸张与笔记本实体", "下限点:跨图元素一致性", "下限点:材质纹理真实感"],
  149. "importance": "下限",
  150. "reasoning": {
  151. "why_needed": "img_2/3/4 共享相同的螺旋笔记本框架,必须保证跨图一致性。使用同一模板或固定 seed 生成可确保框架结构、回形针位置、打孔位置完全一致",
  152. "why_here": "作为 img_2/3/4 的底图生成子阶段,为后续文字叠加和装饰合成提供统一基础"
  153. },
  154. "children": []
  155. }
  156. ]
  157. },
  158. {
  159. "stage_id": "stage_2_text_overlay",
  160. "stage_name": "文字叠加",
  161. "description": "在底图上叠加文字内容,包括核心英文单词(如果底图未生成)、标题、正文段落、表格(img_2)",
  162. "required_spec": [
  163. "核心英文单词'Embedding':内容准确,字号约 90-100pt,颜色正确(img_1 蓝色,img_2/3/4 黑色),粗体无衬线字体,居中排版",
  164. "一级标题(如'每天掌握一个'、'AI 知识点'、'什么是 Embedding?'等):字号约 100pt(主标题)或 30pt(一级标题),手写风格粗体中文字体或常规无衬线字体",
  165. "正文段落:字号约 18-20pt,常规无衬线字体,左对齐,段落间距合理",
  166. "表格(img_2):Tokenization-Embedding 对应表,表头浅绿色(#90EE90),第一列浅紫色(#E6E6FA),数值精准,行列对齐"
  167. ],
  168. "output_spec": [
  169. "4 张带文字的图片(img_1_text.png, img_2_text.png, img_3_text.png, img_4_text.png)",
  170. "所有文字内容准确无误",
  171. "字号层级正确(主标题>一级标题>正文)",
  172. "排版对齐方式正确(居中/左对齐)",
  173. "表格结构精准(img_2)"
  174. ],
  175. "spec_satisfaction": {
  176. "status": "satisfied",
  177. "gap": "",
  178. "mitigation": ""
  179. },
  180. "target_images": ["img_1", "img_2", "img_3", "img_4"],
  181. "stage_output": "4 张带文字的图片",
  182. "input_from": ["stage_1_base_generation 的底图", "制作表中的文字内容、颜色、字号、字体、排版描述"],
  183. "covers_requirements": [
  184. "上限点:核心英文单词",
  185. "上限点:结构化排版与导视",
  186. "上限点:知识可视化图表(img_2 表格)",
  187. "下限点:文字内容精准度",
  188. "下限点:排版空间透视关系"
  189. ],
  190. "importance": "上限",
  191. "reasoning": {
  192. "why_needed": "文字内容是知识科普海报的核心信息载体,必须 100% 准确。尤其是专业术语(Embedding)、表格数值、长段正文,AI 生成容易出错,必须由后期工具精准控制",
  193. "why_here": "文字叠加依赖底图的存在(需要知道文字写在哪里),同时为后续的装饰合成和质感增强提供基础。文字层在装饰层下方,避免装饰遮挡文字"
  194. },
  195. "children": [
  196. {
  197. "stage_id": "stage_2a_img1_text",
  198. "stage_name": "img_1 文字叠加",
  199. "description": "在 img_1 底图上叠加标题、副标题、核心英文单词、励志语录、日期等文字",
  200. "required_spec": [
  201. "小葱白工:左上角,深灰色(RGB:60,60,60),手写风格中文字体,约 25pt",
  202. "Growth/Hope/Future:右上角,浅灰色(RGB:150,150,150),手写风格英文衬线字体,约 25pt,椭圆形边框包围",
  203. "每天掌握一个/AI 知识点:中央偏上,深灰色,手写风格粗体中文字体,约 100pt,居中",
  204. "Embedding:中央,蓝色(RGB:60,140,220),粗体无衬线字体,约 90pt,居中",
  205. "你走过的路,都会成为你的底气。:中下部,深灰色,手写风格中文字体,约 30pt,居中",
  206. "The paths you've walked will build your strength.:中文语录下方,深灰色,手写风格英文衬线字体,约 30pt,居中",
  207. "Date: 04.28:左下角,深灰色,手写风格英文无衬线字体,约 25pt"
  208. ],
  209. "output_spec": [
  210. "img_1_text.png:所有文字叠加完成",
  211. "文字内容准确,字号层级正确,排版居中"
  212. ],
  213. "spec_satisfaction": {
  214. "status": "satisfied",
  215. "gap": "",
  216. "mitigation": ""
  217. },
  218. "target_images": ["img_1"],
  219. "stage_output": "img_1_text.png",
  220. "input_from": ["stage_1_1_img1_base 的底图", "制作表段落 1.2、1.3、1.4、1.5 文字描述"],
  221. "covers_requirements": ["上限点:核心英文单词", "上限点:结构化排版与导视", "下限点:文字内容精准度"],
  222. "importance": "上限",
  223. "reasoning": {
  224. "why_needed": "img_1 的文字内容较多,包括中英文标题、励志语录、日期等,需要精准叠加",
  225. "why_here": "作为 img_1 的文字叠加子阶段,为后续装饰合成提供基础"
  226. },
  227. "children": []
  228. },
  229. {
  230. "stage_id": "stage_2b_img2_text",
  231. "stage_name": "img_2 文字叠加",
  232. "description": "在 img_2 底图上叠加标题、问题标题、正文段落、表格",
  233. "required_spec": [
  234. "Embedding:顶部中央,黑色(#000000),粗体无衬线字体,约 100pt,居中",
  235. "1 什么是'Embedding'?:左对齐,数字序号蓝色圆圈包裹,问号黑色,卡通表情右侧",
  236. "正文(Embedding 定义):左对齐,黑色,约 18pt,'语义信息'和'语义相关性'高亮蓝色(#4682B4)",
  237. "2 从文本到'Embedding'的流程:左对齐,带序号和卡通表情",
  238. "正文(流程描述):左对齐,列表形式,项目符号实心圆点",
  239. "表格:Tokenization-Embedding 对应表,表头浅绿色(#90EE90),第一列浅紫色(#E6E6FA),数值精准,居中对齐"
  240. ],
  241. "output_spec": [
  242. "img_2_text.png:所有文字和表格叠加完成",
  243. "表格结构精准,数值无误",
  244. "高亮位置正确"
  245. ],
  246. "spec_satisfaction": {
  247. "status": "satisfied",
  248. "gap": "",
  249. "mitigation": ""
  250. },
  251. "target_images": ["img_2"],
  252. "stage_output": "img_2_text.png",
  253. "input_from": ["stage_1_2_img234_base 的底图", "制作表段落 2.2、2.3 文字和表格描述"],
  254. "covers_requirements": ["上限点:核心英文单词", "上限点:结构化排版与导视", "上限点:知识可视化图表", "下限点:文字内容精准度"],
  255. "importance": "上限",
  256. "reasoning": {
  257. "why_needed": "img_2 包含复杂的表格结构,是还原难度最高的部分。表格必须完全由后期工具绘制,保证数值和行列结构精准",
  258. "why_here": "作为 img_2 的文字叠加子阶段,表格绘制是核心任务"
  259. },
  260. "children": []
  261. },
  262. {
  263. "stage_id": "stage_2c_img3_text",
  264. "stage_name": "img_3 文字叠加",
  265. "description": "在 img_3 底图上叠加标题、问题标题、正文段落",
  266. "required_spec": [
  267. "Embedding:顶部中央,黑色,粗体无衬线字体,约 100pt,居中",
  268. "2) 独立使用的 Embedding (句子/文档级):左对齐",
  269. "正文(独立使用 Embedding 解释):左对齐,黑色,约 18pt",
  270. "3 'Embedding'的本质:语义可'被数字表示':左对齐,数字'3'蓝色,关键词黑色粗体",
  271. "正文(本质解释):左对齐,列表形式,项目符号实心圆点,'Embedding 模型'、'压缩'高亮紫色",
  272. "绿色对勾符号:正文中"
  273. ],
  274. "output_spec": [
  275. "img_3_text.png:所有文字叠加完成",
  276. "高亮位置正确,颜色准确"
  277. ],
  278. "spec_satisfaction": {
  279. "status": "satisfied",
  280. "gap": "",
  281. "mitigation": ""
  282. },
  283. "target_images": ["img_3"],
  284. "stage_output": "img_3_text.png",
  285. "input_from": ["stage_1_2_img234_base 的底图", "制作表段落 3.2、3.3 文字描述"],
  286. "covers_requirements": ["上限点:核心英文单词", "上限点:结构化排版与导视", "上限点:语义化色彩高亮", "下限点:文字内容精准度"],
  287. "importance": "上限",
  288. "reasoning": {
  289. "why_needed": "img_3 的文字内容包含多处语义化高亮,需要精准控制高亮位置和颜色",
  290. "why_here": "作为 img_3 的文字叠加子阶段,为后续装饰合成提供基础"
  291. },
  292. "children": []
  293. },
  294. {
  295. "stage_id": "stage_2d_img4_text",
  296. "stage_name": "img_4 文字叠加",
  297. "description": "在 img_4 底图上叠加标题、应用场景、常见问题答疑",
  298. "required_spec": [
  299. "Embedding:顶部中央,黑色,粗体无衬线字体,约 100pt,居中",
  300. "④ 'Embedding'的应用场景:左对齐,带序号和扳手图标",
  301. "正文(应用场景):左对齐,列表形式,'编码为语义向量'、'检索增强生成 (RAG)'、'相似性判断'、'文本聚类与分析'高亮紫色",
  302. "⑤ 常见问题答疑:左对齐,带序号",
  303. "正文(问答):左对齐,'Q:'和'A:'绿色,'编号 (离散的 ID)'、'承载语义的连续向量'等高亮紫色"
  304. ],
  305. "output_spec": [
  306. "img_4_text.png:所有文字叠加完成",
  307. "高亮位置正确,颜色准确"
  308. ],
  309. "spec_satisfaction": {
  310. "status": "satisfied",
  311. "gap": "",
  312. "mitigation": ""
  313. },
  314. "target_images": ["img_4"],
  315. "stage_output": "img_4_text.png",
  316. "input_from": ["stage_1_2_img234_base 的底图", "制作表段落 4.2、4.3 文字描述"],
  317. "covers_requirements": ["上限点:核心英文单词", "上限点:结构化排版与导视", "上限点:语义化色彩高亮", "下限点:文字内容精准度"],
  318. "importance": "上限",
  319. "reasoning": {
  320. "why_needed": "img_4 的文字内容包含多处语义化高亮和问答格式,需要精准控制",
  321. "why_here": "作为 img_4 的文字叠加子阶段,为后续装饰合成提供基础"
  322. },
  323. "children": []
  324. }
  325. ]
  326. },
  327. {
  328. "stage_id": "stage_3_decoration",
  329. "stage_name": "装饰元素合成",
  330. "description": "在文字层上叠加手绘装饰元素(星星、花朵、卡通图标)、语义化色彩高亮(荧光笔效果)",
  331. "required_spec": [
  332. "img_1:左上角粉色五角星和黄色四角星,左下角三朵黄色花朵(四圆形花瓣 + 圆形花蕊 + 绿色花茎),右下角四颗黄色四角星,副标题下方粉色涂抹区域(RGB:255,180,200,边缘模糊)",
  333. "img_2/3/4:卡通表情(问号、太阳、对勾等),语义化高亮(紫色、蓝色、黄色背景高亮,半透明叠加)",
  334. "所有手绘元素具有手绘的不规则感和笔触感,避免过于规整"
  335. ],
  336. "output_spec": [
  337. "4 张带装饰的图片(img_1_decorated.png, img_2_decorated.png, img_3_decorated.png, img_4_decorated.png)",
  338. "手绘装饰元素位置正确,形状自然",
  339. "语义化高亮半透明,能看到底层文字"
  340. ],
  341. "spec_satisfaction": {
  342. "status": "satisfied",
  343. "gap": "",
  344. "mitigation": ""
  345. },
  346. "target_images": ["img_1", "img_2", "img_3", "img_4"],
  347. "stage_output": "4 张带装饰的图片",
  348. "input_from": ["stage_2_text_overlay 的带文字图片", "制作表中的图案形状、涂抹形状、色彩高亮描述"],
  349. "covers_requirements": [
  350. "上限点:手绘装饰与卡通图标",
  351. "上限点:语义化色彩高亮",
  352. "下限点:手绘元素的自然度",
  353. "下限点:色彩高亮的物理合理性"
  354. ],
  355. "importance": "上限",
  356. "reasoning": {
  357. "why_needed": "手绘装饰元素和语义化高亮是笔记风格的核心特征,中和了科技主题的枯燥感,建立信息层级。必须在文字层上方叠加,避免遮挡文字",
  358. "why_here": "装饰合成依赖文字层的存在(需要知道装饰加在哪里、高亮哪些文字),同时为后续的质感增强提供基础"
  359. },
  360. "children": [
  361. {
  362. "stage_id": "stage_3a_img1_decoration",
  363. "stage_name": "img_1 装饰合成",
  364. "description": "在 img_1 上叠加星星、花朵、粉色涂抹背景",
  365. "required_spec": [
  366. "左上角:粉色五角星 + 黄色四角星",
  367. "左下角:三朵黄色花朵(四圆形花瓣 + 圆形花蕊 + 绿色花茎)",
  368. "右下角:四颗黄色四角星",
  369. "副标题下方:粉色涂抹区域(RGB:255,180,200,边缘模糊,模拟手绘涂抹效果)"
  370. ],
  371. "output_spec": [
  372. "img_1_decorated.png:所有装饰叠加完成",
  373. "手绘元素自然,涂抹边缘模糊"
  374. ],
  375. "spec_satisfaction": {
  376. "status": "satisfied",
  377. "gap": "",
  378. "mitigation": ""
  379. },
  380. "target_images": ["img_1"],
  381. "stage_output": "img_1_decorated.png",
  382. "input_from": ["stage_2a_img1_text 的带文字图片", "制作表段落 1.2.1.1、1.3.4、1.5.2 图案和涂抹描述"],
  383. "covers_requirements": ["上限点:手绘装饰与卡通图标", "下限点:手绘元素的自然度"],
  384. "importance": "上限",
  385. "reasoning": {
  386. "why_needed": "img_1 的手绘装饰元素最多(星星、花朵、涂抹),是笔记风格的关键特征",
  387. "why_here": "作为 img_1 的装饰合成子阶段"
  388. },
  389. "children": []
  390. },
  391. {
  392. "stage_id": "stage_3b_img234_decoration",
  393. "stage_name": "img_2/3/4 装饰合成",
  394. "description": "在 img_2/3/4 上叠加卡通表情、语义化高亮",
  395. "required_spec": [
  396. "img_2:问题标题右侧卡通问号表情,正文中'语义信息'和'语义相关性'蓝色高亮",
  397. "img_3:标题右侧卡通太阳表情,正文中'Embedding 模型'、'压缩'紫色高亮,绿色对勾符号",
  398. "img_4:标题扳手图标,正文中多处紫色高亮,问答'Q:'和'A:'绿色",
  399. "所有高亮半透明,能看到底层文字"
  400. ],
  401. "output_spec": [
  402. "img_2_decorated.png, img_3_decorated.png, img_4_decorated.png:装饰叠加完成",
  403. "高亮半透明,颜色准确"
  404. ],
  405. "spec_satisfaction": {
  406. "status": "satisfied",
  407. "gap": "",
  408. "mitigation": ""
  409. },
  410. "target_images": ["img_2", "img_3", "img_4"],
  411. "stage_output": "img_2_decorated.png, img_3_decorated.png, img_4_decorated.png",
  412. "input_from": ["stage_2b_img2_text、stage_2c_img3_text、stage_2d_img4_text 的带文字图片", "制作表中的色彩高亮描述"],
  413. "covers_requirements": ["上限点:手绘装饰与卡通图标", "上限点:语义化色彩高亮", "下限点:色彩高亮的物理合理性"],
  414. "importance": "上限",
  415. "reasoning": {
  416. "why_needed": "img_2/3/4 的语义化高亮是信息层级的关键,卡通表情增加趣味性",
  417. "why_here": "作为 img_2/3/4 的装饰合成子阶段"
  418. },
  419. "children": []
  420. }
  421. ]
  422. },
  423. {
  424. "stage_id": "stage_4_quality_enhancement",
  425. "stage_name": "质感增强",
  426. "description": "最后添加质感层(噪点、纹理叠加)增强真实感,使用 Overlay 混合模式,不透明度 3-5%",
  427. "required_spec": [
  428. "纸张纹理增强:叠加轻微噪点层,模拟纸张粗糙感",
  429. "光影统一:检查文字、装饰与底图的光影一致性,添加轻微阴影或高光",
  430. "整体色调统一:确保 4 张图的色调一致"
  431. ],
  432. "output_spec": [
  433. "4 张最终成品图(img_1_final.png, img_2_final.png, img_3_final.png, img_4_final.png)",
  434. "纸张质感真实,光影自然",
  435. "4 张图色调统一"
  436. ],
  437. "spec_satisfaction": {
  438. "status": "satisfied",
  439. "gap": "",
  440. "mitigation": ""
  441. },
  442. "target_images": ["img_1", "img_2", "img_3", "img_4"],
  443. "stage_output": "4 张最终成品图",
  444. "input_from": ["stage_3_decoration 的带装饰图片"],
  445. "covers_requirements": [
  446. "上限点:拟真纸张与笔记本实体",
  447. "下限点:材质纹理真实感",
  448. "下限点:跨图元素一致性"
  449. ],
  450. "importance": "基础",
  451. "reasoning": {
  452. "why_needed": "质感增强是最后一步,确保纸张纹理真实、光影自然、色调统一。虽然不改变内容,但决定了整体'真实感'",
  453. "why_here": "作为依赖树的最后一个阶段,质感增强在所有内容叠加完成后进行,避免影响文字和装饰"
  454. },
  455. "children": []
  456. }
  457. ]
  458. },
  459. "requirement_coverage": {
  460. "拟真纸张与笔记本实体": {
  461. "covered_by": ["stage_1_base_generation", "stage_1_1_img1_base", "stage_1_2_img234_base", "stage_4_quality_enhancement"],
  462. "coverage_confidence": "high",
  463. "gap_note": ""
  464. },
  465. "核心英文单词": {
  466. "covered_by": ["stage_1_base_generation", "stage_2_text_overlay", "stage_2a_img1_text", "stage_2b_img2_text", "stage_2c_img3_text", "stage_2d_img4_text"],
  467. "coverage_confidence": "high",
  468. "gap_note": "如果 AI 生成核心文字精度不足,则完全由 stage_2 后期叠加"
  469. },
  470. "结构化排版与导视": {
  471. "covered_by": ["stage_2_text_overlay", "stage_2a_img1_text", "stage_2b_img2_text", "stage_2c_img3_text", "stage_2d_img4_text"],
  472. "coverage_confidence": "high",
  473. "gap_note": ""
  474. },
  475. "知识可视化图表": {
  476. "covered_by": ["stage_2_text_overlay", "stage_2b_img2_text"],
  477. "coverage_confidence": "high",
  478. "gap_note": "表格完全由后期工具绘制,保证精准"
  479. },
  480. "语义化色彩高亮": {
  481. "covered_by": ["stage_2_text_overlay", "stage_3_decoration", "stage_3b_img234_decoration"],
  482. "coverage_confidence": "high",
  483. "gap_note": ""
  484. },
  485. "手绘装饰与卡通图标": {
  486. "covered_by": ["stage_3_decoration", "stage_3a_img1_decoration", "stage_3b_img234_decoration"],
  487. "coverage_confidence": "high",
  488. "gap_note": ""
  489. },
  490. "跨图元素一致性": {
  491. "covered_by": ["stage_1_base_generation", "stage_1_2_img234_base", "stage_4_quality_enhancement"],
  492. "coverage_confidence": "high",
  493. "gap_note": "img_2/3/4 使用同一底图模板或固定 seed 生成"
  494. },
  495. "文字内容精准度": {
  496. "covered_by": ["stage_2_text_overlay", "stage_2a_img1_text", "stage_2b_img2_text", "stage_2c_img3_text", "stage_2d_img4_text"],
  497. "coverage_confidence": "high",
  498. "gap_note": "所有复杂文字由后期工具精准控制"
  499. },
  500. "排版空间透视关系": {
  501. "covered_by": ["stage_1_base_generation", "stage_2_text_overlay"],
  502. "coverage_confidence": "high",
  503. "gap_note": "AI 生成底图时处理核心透视,后期叠加简单文字"
  504. },
  505. "材质纹理真实感": {
  506. "covered_by": ["stage_1_base_generation", "stage_4_quality_enhancement"],
  507. "coverage_confidence": "high",
  508. "gap_note": ""
  509. },
  510. "手绘元素的自然度": {
  511. "covered_by": ["stage_3_decoration", "stage_3a_img1_decoration", "stage_3b_img234_decoration"],
  512. "coverage_confidence": "medium",
  513. "gap_note": "手绘元素自然度取决于素材来源,可能需要从 AI 生成结果中提取或手动绘制"
  514. },
  515. "色彩高亮的物理合理性": {
  516. "covered_by": ["stage_3_decoration", "stage_3b_img234_decoration"],
  517. "coverage_confidence": "high",
  518. "gap_note": "使用半透明叠加模式(Overlay/Soft Light)模拟荧光笔效果"
  519. }
  520. }
  521. }
  522. ]
  523. }