pipeline.json 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. {
  2. "pipelines": [
  3. {
  4. "strategy": {
  5. "name": "垫图法 + 局部重绘组合策略",
  6. "description": "使用 Midjourney 的--cref 参数保持角色一致性生成基础猫咪图,然后用 ComfyUI/Photoshop 进行局部重绘精准控制表情(张嘴闭眼 vs 鼓腮睁眼),最后分层合成所有元素(猫咪、手势、食物、手、文字)",
  7. "reasoning": "analysis.json 中的核心下限点是'猫咪跨图一致性'和'表情精准度',垫图法 (--cref) 可保证角色一致性,局部重绘可精准控制表情,完美匹配需求;制作表中明确了每个元素的布局坐标和视觉特征,支持分层合成策略",
  8. "vs_alternatives": [
  9. {
  10. "alternative": "Seed 值固定 + 描述词微调",
  11. "why_not": "seed 值法不能 100% 保证一致性,对于'猫咪跨图一致性'这一下限点覆盖不足;表情控制依赖描述词精准度,对于'表情精准度'这一下限点覆盖不足",
  12. "could_switch_if": "如果局部重绘工具不可用或效果不佳,可回退到此策略,接受一定的一致性损失"
  13. },
  14. {
  15. "alternative": "Panels 多格生成 + 裁剪",
  16. "why_not": "Panels 法虽然一致性好,但表情精准度不够,难以保证生成特定的'张嘴闭眼'和'鼓腮睁眼'表情;且多格图分辨率较低,影响最终输出质量",
  17. "could_switch_if": "如果时间紧迫且对表情精准度要求降低,可考虑此策略快速出图"
  18. }
  19. ],
  20. "risks_found_during_instantiation": [
  21. {
  22. "stage_id": "stage_2",
  23. "risk": "局部重绘可能导致猫咪面部边缘融合不自然,尤其是嘴巴和眼睛区域",
  24. "severity": "medium",
  25. "mitigation": "使用 Inpaint 专用模型(如 FLUX.1 Fill),配合遮罩模糊和边缘羽化;在 Photoshop 中用匹配颜色工具校正色差"
  26. },
  27. {
  28. "stage_id": "stage_4",
  29. "risk": "三张图(芒果/榴莲/菠萝)的猫咪可能因分批生成而产生细微差异",
  30. "severity": "medium",
  31. "mitigation": "固定 MJ 的--s 风格参数和--v 版本;使用同一张参考图进行--cref;优先生成一套猫咪后复用到三张图"
  32. },
  33. {
  34. "stage_id": "stage_5",
  35. "risk": "手势与猫咪的空间关系可能不自然,看起来像悬浮元素",
  36. "severity": "low",
  37. "mitigation": "制作表中已有明确坐标(如 img_1 拒绝手势位于 600,300,750,400),按坐标精确定位;添加轻微阴影增强融合感"
  38. }
  39. ]
  40. },
  41. "goal_tree": {
  42. "stage_id": "root",
  43. "stage_name": "还原目标",
  44. "description": "还原一组对比叙事型猫咪表情包(img_1 芒果主题、img_2 榴莲主题、img_3 菠萝主题),每张图采用上下分栏结构,上半部分表示拒绝(张嘴闭眼猫咪 + 完整食物 + 拒绝手势),下半部分表示喜爱(鼓腮睁眼猫咪 + 切好食物 + 喜爱手势),所有元素拼贴在纯白背景上",
  45. "required_spec": [
  46. "纵向二分构图(上下分栏 1200x400 + 1200x400)",
  47. "猫咪跨图一致性(同一只白橘猫的两种表情)",
  48. "表情精准度(拒绝:张嘴闭眼;喜爱:鼓腮睁眼)",
  49. "食物形态对比(上:完整带皮/带壳;下:去皮切好)",
  50. "卡通手势拼贴(黄色 Emoji 手势,替代猫咪前肢)",
  51. "纯白背景 + 透明抠图(所有元素独立抠图后合成)",
  52. "文字内容准确(拒绝:'我不爱吃 XX,谢谢';喜爱:'XX!我爱吃!')",
  53. "元素布局符合制作表坐标"
  54. ],
  55. "output_spec": [
  56. "img_1.png(芒果主题,1200x800)",
  57. "img_2.png(榴莲主题,1200x800)",
  58. "img_3.png(菠萝主题,1200x800)"
  59. ],
  60. "children": [
  61. {
  62. "stage_id": "stage_1",
  63. "stage_name": "基础猫咪素材生成",
  64. "description": "生成一只白橘相间的猫咪基础图,作为后续表情编辑的底图。使用 Midjourney --cref 参数保持角色一致性,生成正面/侧面视角的猫咪头像,背景为纯白色以便抠图",
  65. "required_spec": [
  66. "白橘相间的猫咪毛色",
  67. "正面或侧面视角,头部清晰可见",
  68. "纯白背景或透明背景",
  69. "高分辨率(至少 1024x1024)以便裁剪和编辑"
  70. ],
  71. "output_spec": [
  72. "base_cat.png(基础猫咪图,白橘相间,纯白背景)"
  73. ],
  74. "spec_satisfaction": {
  75. "status": "satisfied",
  76. "gap": "",
  77. "mitigation": ""
  78. },
  79. "target_images": ["img_1", "img_2", "img_3"],
  80. "stage_output": "base_cat.png",
  81. "input_from": [],
  82. "covers_requirements": ["猫咪跨图一致性"],
  83. "importance": "下限",
  84. "reasoning": {
  85. "why_needed": "这是整个还原流程的基础,如果基础猫咪图不一致或质量差,后续所有表情编辑和合成都会失败。此阶段解决'猫咪跨图一致性'这一下限点",
  86. "why_here": "这是依赖树的叶节点,不依赖任何其他阶段的产物,是所有猫咪相关阶段的共同输入"
  87. },
  88. "children": []
  89. },
  90. {
  91. "stage_id": "stage_2",
  92. "stage_name": "表情精准编辑",
  93. "description": "基于基础猫咪图,使用局部重绘(Inpaint)技术分别生成两种表情:拒绝表情(张嘴闭眼)和喜爱表情(鼓腮睁眼)。对猫咪面部区域进行精准遮罩和重绘,保持其他部分不变",
  94. "required_spec": [
  95. "拒绝表情:嘴巴张大、眼睛紧闭、表情夸张",
  96. "喜爱表情:脸颊鼓起、眼睛睁开、嘴角上扬",
  97. "保持猫咪毛色、头部轮廓与 base_cat.png 一致",
  98. "边缘融合自然,无明显重绘痕迹"
  99. ],
  100. "output_spec": [
  101. "cat_reject.png(拒绝表情猫咪,透明背景)",
  102. "cat_accept.png(喜爱表情猫咪,透明背景)"
  103. ],
  104. "spec_satisfaction": {
  105. "status": "partial",
  106. "gap": "局部重绘可能导致面部边缘融合不自然,尤其是嘴巴和眼睛区域;多次生成可能产生风格细微差异",
  107. "mitigation": "使用 Inpaint 专用模型(如 FLUX.1 Fill),配合遮罩模糊和边缘羽化;固定 MJ 的--s 风格参数和--v 版本;使用同一张参考图进行--cref"
  108. },
  109. "target_images": ["img_1", "img_2", "img_3"],
  110. "stage_output": "cat_reject.png, cat_accept.png",
  111. "input_from": ["stage_1"],
  112. "covers_requirements": ["表情精准度", "猫咪跨图一致性"],
  113. "importance": "下限",
  114. "reasoning": {
  115. "why_needed": "这是实现'表情精准度'下限点的关键阶段。制作表中明确要求拒绝猫咪'张大嘴巴,闭着眼睛',喜爱猫咪'脸颊鼓起,眼睛睁开',必须通过局部重绘精准控制",
  116. "why_here": "依赖 stage_1 的基础猫咪图,是后续合成阶段的必要输入。表情编辑必须在合成前完成,因为合成后无法单独修改猫咪表情"
  117. },
  118. "children": []
  119. },
  120. {
  121. "stage_id": "stage_3",
  122. "stage_name": "食物素材准备",
  123. "description": "为三张图分别准备上下两种形态的食物素材。img_1:完整芒果 vs 切好芒果块;img_2:完整带刺榴莲 vs 榴莲果肉;img_3:完整带冠芽菠萝 vs 切好菠萝块。所有食物素材需要透明背景以便合成",
  124. "required_spec": [
  125. "img_1_upper:完整带皮芒果,鲜亮黄色,表面有水珠",
  126. "img_1_lower:切好芒果块,不规则立方体状,盛放在白色方形碗中",
  127. "img_2_upper:完整带刺榴莲,椭圆形,表面布满尖刺",
  128. "img_2_lower:剥开的榴莲果肉,鲜亮黄色,部分区域有白色纤维",
  129. "img_3_upper:完整菠萝,金黄色果实 + 深绿色冠芽,表面有网格状纹理",
  130. "img_3_lower:切好菠萝块,金黄色,盛放在透明玻璃碗中",
  131. "所有食物素材透明背景,边缘清晰"
  132. ],
  133. "output_spec": [
  134. "mango_whole.png, mango_cubed.png",
  135. "durian_whole.png, durian_flesh.png",
  136. "pineapple_whole.png, pineapple_cubed.png"
  137. ],
  138. "spec_satisfaction": {
  139. "status": "satisfied",
  140. "gap": "",
  141. "mitigation": ""
  142. },
  143. "target_images": ["img_1", "img_2", "img_3"],
  144. "stage_output": "6 种食物素材图",
  145. "input_from": [],
  146. "covers_requirements": ["食物形态对比", "食物形态准确性"],
  147. "importance": "上限",
  148. "reasoning": {
  149. "why_needed": "这是实现'食物形态对比'上限点的关键阶段。制作表中明确描述了每种食物的形态特征(如'完整带皮'vs'切好块状'),这是猫咪态度转变的诱因,形态错误会导致逻辑不通",
  150. "why_here": "这是依赖树的叶节点,不依赖其他阶段。食物素材是合成阶段的必要输入,可独立于猫咪素材并行准备"
  151. },
  152. "children": []
  153. },
  154. {
  155. "stage_id": "stage_4",
  156. "stage_name": "手势和手素材准备",
  157. "description": "准备黄色卡通手势素材(拒绝手势:拇指食指伸出;喜爱手势:掌心向上双手合拢)和肤色手素材(掌心向上托举姿态)。所有素材需要透明背景",
  158. "required_spec": [
  159. "拒绝手势:黄色 Emoji 风格,拇指和食指伸出,其余手指握拳",
  160. "喜爱手势:黄色 Emoji 风格,掌心向上,手指微曲",
  161. "手:肤色白皙,掌心向上,呈托举状,手腕部分可见或被截断",
  162. "所有素材透明背景,边缘清晰"
  163. ],
  164. "output_spec": [
  165. "gesture_reject.png(拒绝手势)",
  166. "gesture_accept.png(喜爱手势)",
  167. "hand_hold.png(托举的手)"
  168. ],
  169. "spec_satisfaction": {
  170. "status": "satisfied",
  171. "gap": "",
  172. "mitigation": ""
  173. },
  174. "target_images": ["img_1", "img_2", "img_3"],
  175. "stage_output": "gesture_reject.png, gesture_accept.png, hand_hold.png",
  176. "input_from": [],
  177. "covers_requirements": ["拟人化的卡通手势贴纸"],
  178. "importance": "上限",
  179. "reasoning": {
  180. "why_needed": "这是实现'拟人化的卡通手势贴纸'上限点的关键阶段。制作表中明确描述了手势的视觉特征(黄色卡通手势,表示拒绝/接受),这是表情包的标志性元素",
  181. "why_here": "这是依赖树的叶节点,不依赖其他阶段。手势和手素材是合成阶段的必要输入,可独立于猫咪和食物素材并行准备"
  182. },
  183. "children": []
  184. },
  185. {
  186. "stage_id": "stage_5",
  187. "stage_name": "单图分层合成",
  188. "description": "将猫咪、手势、食物、手等元素按制作表中的坐标进行分层合成。每张图分为上下两个场景(拒绝场景/喜爱场景),所有元素放置在纯白背景上。此阶段对 img_1/2/3 分别执行",
  189. "required_spec": [
  190. "画布尺寸:1200x800(上下各 1200x400)",
  191. "背景:纯白色(RGB 255,255,255)",
  192. "元素布局符合制作表坐标(如 img_1 拒绝猫咪位于 600,100,1200,400)",
  193. "手势位于猫咪下方,模拟替代猫咪前肢的空间位置",
  194. "手位于左下角,呈托举食物姿态",
  195. "食物位于手上方",
  196. "所有元素边缘清晰,无明显拼贴痕迹"
  197. ],
  198. "output_spec": [
  199. "img_1_composed.png(芒果主题合成图,不含文字)",
  200. "img_2_composed.png(榴莲主题合成图,不含文字)",
  201. "img_3_composed.png(菠萝主题合成图,不含文字)"
  202. ],
  203. "spec_satisfaction": {
  204. "status": "partial",
  205. "gap": "手势与猫咪的空间关系可能不自然,看起来像悬浮元素;元素间可能存在轻微色差",
  206. "mitigation": "按制作表坐标精确定位;添加轻微阴影增强融合感;使用匹配颜色工具校正色差"
  207. },
  208. "target_images": ["img_1", "img_2", "img_3"],
  209. "stage_output": "img_1_composed.png, img_2_composed.png, img_3_composed.png",
  210. "input_from": ["stage_2", "stage_3", "stage_4"],
  211. "covers_requirements": ["纵向二分的对比叙事构图", "跨次元的拼贴视觉风格", "手势与猫咪的空间关系", "纯白背景与透明抠图"],
  212. "importance": "上限",
  213. "reasoning": {
  214. "why_needed": "这是实现'纵向二分构图'和'拼贴视觉风格'上限点的关键阶段。制作表中详细描述了每个元素的布局坐标和拼接关系,必须通过分层合成精确还原",
  215. "why_here": "此阶段依赖 stage_2(表情猫咪)、stage_3(食物)、stage_4(手势和手)的产物,是将所有独立元素整合为完整场景的中间节点"
  216. },
  217. "children": [
  218. {
  219. "stage_id": "stage_5_1",
  220. "stage_name": "img_1 芒果主题合成",
  221. "description": "合成 img_1 的上下两个场景:上半部分(拒绝文字 + 拒绝猫咪 + 拒绝手势 + 手 + 完整芒果),下半部分(喜爱文字 + 喜爱猫咪 + 喜爱手势 + 手 + 切好芒果块)",
  222. "required_spec": [
  223. "上半部分:文字 (0,0,600,100),拒绝猫咪 (600,100,1200,400),拒绝手势 (600,300,750,400),手 (0,200,400,400),完整芒果 (100,100,400,300)",
  224. "下半部分:文字 (0,400,600,500),喜爱猫咪 (600,500,1200,800),喜爱手势 (600,700,750,800),手 (0,600,400,800),切好芒果块 (100,500,400,700)"
  225. ],
  226. "output_spec": ["img_1_composed.png"],
  227. "spec_satisfaction": {
  228. "status": "satisfied",
  229. "gap": "",
  230. "mitigation": ""
  231. },
  232. "target_images": ["img_1"],
  233. "stage_output": "img_1_composed.png",
  234. "input_from": ["stage_2", "stage_3", "stage_4"],
  235. "covers_requirements": ["纵向二分的对比叙事构图", "跨次元的拼贴视觉风格"],
  236. "importance": "上限",
  237. "reasoning": {
  238. "why_needed": "这是 stage_5 在 img_1 上的具体实例化,芒果主题的食物形态对比是核心亮点",
  239. "why_here": "作为 stage_5 的子节点,继承父阶段的依赖关系"
  240. },
  241. "children": []
  242. },
  243. {
  244. "stage_id": "stage_5_2",
  245. "stage_name": "img_2 榴莲主题合成",
  246. "description": "合成 img_2 的上下两个场景:上半部分(拒绝文字 + 拒绝猫咪 + 拒绝手势 + 手 + 完整榴莲),下半部分(喜爱文字 + 喜爱猫咪 + 喜爱手势 + 手 + 榴莲果肉)",
  247. "required_spec": [
  248. "上半部分:文字 (0,0,600,100),拒绝猫咪 (600,100,1200,400),拒绝手势 (600,300,750,400),手 (0,200,400,400),完整榴莲 (100,100,400,300)",
  249. "下半部分:文字 (0,400,600,500),喜爱猫咪 (600,500,1200,800),喜爱手势 (600,700,750,800),手 (0,600,400,800),榴莲果肉 (100,500,400,700)"
  250. ],
  251. "output_spec": ["img_2_composed.png"],
  252. "spec_satisfaction": {
  253. "status": "satisfied",
  254. "gap": "",
  255. "mitigation": ""
  256. },
  257. "target_images": ["img_2"],
  258. "stage_output": "img_2_composed.png",
  259. "input_from": ["stage_2", "stage_3", "stage_4"],
  260. "covers_requirements": ["纵向二分的对比叙事构图", "跨次元的拼贴视觉风格"],
  261. "importance": "上限",
  262. "reasoning": {
  263. "why_needed": "这是 stage_5 在 img_2 上的具体实例化,榴莲主题的带刺纹理和果肉形态是核心特征",
  264. "why_here": "作为 stage_5 的子节点,继承父阶段的依赖关系"
  265. },
  266. "children": []
  267. },
  268. {
  269. "stage_id": "stage_5_3",
  270. "stage_name": "img_3 菠萝主题合成",
  271. "description": "合成 img_3 的上下两个场景:上半部分(拒绝文字 + 拒绝猫咪 + 拒绝手势 + 手 + 完整菠萝),下半部分(喜爱文字 + 喜爱猫咪 + 喜爱手势 + 手 + 切好菠萝块)",
  272. "required_spec": [
  273. "上半部分:文字 (0,0,600,100),拒绝猫咪 (600,100,1200,400),拒绝手势 (600,300,750,400),手 (0,200,400,400),完整菠萝 (100,100,400,300)",
  274. "下半部分:文字 (0,400,600,500),喜爱猫咪 (600,500,1200,800),喜爱手势 (600,700,750,800),手 (0,600,400,800),切好菠萝块 (100,500,400,700)"
  275. ],
  276. "output_spec": ["img_3_composed.png"],
  277. "spec_satisfaction": {
  278. "status": "satisfied",
  279. "gap": "",
  280. "mitigation": ""
  281. },
  282. "target_images": ["img_3"],
  283. "stage_output": "img_3_composed.png",
  284. "input_from": ["stage_2", "stage_3", "stage_4"],
  285. "covers_requirements": ["纵向二分的对比叙事构图", "跨次元的拼贴视觉风格"],
  286. "importance": "上限",
  287. "reasoning": {
  288. "why_needed": "这是 stage_5 在 img_3 上的具体实例化,菠萝主题的冠芽和网格纹理是核心特征",
  289. "why_here": "作为 stage_5 的子节点,继承父阶段的依赖关系"
  290. },
  291. "children": []
  292. }
  293. ]
  294. },
  295. {
  296. "stage_id": "stage_6",
  297. "stage_name": "文字添加与最终输出",
  298. "description": "为每张合成图添加文字内容。拒绝场景文字:'我不爱吃 XX,谢谢';喜爱场景文字:'XX!我爱吃!'。文字位于左上角,黑色字体,大小约 100x600 像素",
  299. "required_spec": [
  300. "img_1 文字:上半部分'我不爱吃芒果,谢谢',下半部分'芒果!我爱吃!'",
  301. "img_2 文字:上半部分'我不爱吃榴莲,谢谢',下半部分'榴莲!我爱吃!'",
  302. "img_3 文字:上半部分'我不爱吃菠萝,谢谢',下半部分'菠萝!我爱吃!'",
  303. "文字位置:左上角(如 0,0,600,100)",
  304. "文字样式:黑色,清晰可读,无衬线字体"
  305. ],
  306. "output_spec": [
  307. "img_1_final.png(1200x800,含文字)",
  308. "img_2_final.png(1200x800,含文字)",
  309. "img_3_final.png(1200x800,含文字)"
  310. ],
  311. "spec_satisfaction": {
  312. "status": "satisfied",
  313. "gap": "",
  314. "mitigation": ""
  315. },
  316. "target_images": ["img_1", "img_2", "img_3"],
  317. "stage_output": "img_1_final.png, img_2_final.png, img_3_final.png",
  318. "input_from": ["stage_5"],
  319. "covers_requirements": ["文字内容与位置"],
  320. "importance": "基础",
  321. "reasoning": {
  322. "why_needed": "这是实现'文字内容与位置'下限点的阶段。文字是叙事的重要组成部分,与图像共同完成语义表达,文字错误会导致语义混乱",
  323. "why_here": "这是依赖树的根节点附近的最后阶段,依赖 stage_5 的合成图,产出最终成品"
  324. },
  325. "children": []
  326. }
  327. ]
  328. },
  329. "requirement_coverage": {
  330. "极具反差的猫咪神态": {
  331. "covered_by": ["stage_2"],
  332. "coverage_confidence": "high",
  333. "gap_note": ""
  334. },
  335. "纵向二分的对比叙事构图": {
  336. "covered_by": ["stage_5", "stage_5_1", "stage_5_2", "stage_5_3"],
  337. "coverage_confidence": "high",
  338. "gap_note": ""
  339. },
  340. "加工前后的水果形态对比": {
  341. "covered_by": ["stage_3"],
  342. "coverage_confidence": "high",
  343. "gap_note": ""
  344. },
  345. "拟人化的卡通手势贴纸": {
  346. "covered_by": ["stage_4"],
  347. "coverage_confidence": "high",
  348. "gap_note": ""
  349. },
  350. "跨次元的拼贴视觉风格": {
  351. "covered_by": ["stage_5", "stage_5_1", "stage_5_2", "stage_5_3"],
  352. "coverage_confidence": "high",
  353. "gap_note": ""
  354. },
  355. "猫咪跨图一致性": {
  356. "covered_by": ["stage_1", "stage_2"],
  357. "coverage_confidence": "high",
  358. "gap_note": "风险:三张图的猫咪可能因分批生成而产生细微差异,需通过固定参数和复用素材缓解"
  359. },
  360. "表情精准度": {
  361. "covered_by": ["stage_2"],
  362. "coverage_confidence": "medium",
  363. "gap_note": "风险:局部重绘可能导致面部边缘融合不自然,需使用专用模型和后期处理"
  364. },
  365. "手势与猫咪的空间关系": {
  366. "covered_by": ["stage_5"],
  367. "coverage_confidence": "medium",
  368. "gap_note": "风险:手势可能看起来像悬浮元素,需添加阴影增强融合感"
  369. },
  370. "食物形态的准确性": {
  371. "covered_by": ["stage_3"],
  372. "coverage_confidence": "high",
  373. "gap_note": ""
  374. },
  375. "纯白背景与透明抠图": {
  376. "covered_by": ["stage_1", "stage_2", "stage_3", "stage_4", "stage_5"],
  377. "coverage_confidence": "high",
  378. "gap_note": ""
  379. },
  380. "文字内容与位置": {
  381. "covered_by": ["stage_6"],
  382. "coverage_confidence": "high",
  383. "gap_note": ""
  384. }
  385. }
  386. }
  387. ]
  388. }