1 ヶ月前 · 579d70bdfc
--- a/agent/core/runner.py
+++ b/agent/core/runner.py
@@ -2534,8 +2534,8 @@ class AgentRunner:
 
				         # 统计优化情况
			
 
				         stats = {"kept": 0, "downscaled": 0, "described": 0, "cache_hit": 0}
			
 
				 
			
 
				-        # 收集需要降分辨率的图片（用于并发处理）
			
 
				-        downscale_jobs = []  # [(msg_idx, block_idx, image_url, cache_key)]
			
 
				+        # 收集需要降分辨率或尺寸补齐的图片（用于并发处理）
			
 
				+        process_jobs = []  # [(msg_idx, block_idx, image_url, cache_key, max_size, cache_field)]
			
 
				 
			
 
				         # 第一遍：扫描并收集需要处理的图片
			
 
				         for i in range(last_assistant_idx):
			
@@ -2559,21 +2559,24 @@ class AgentRunner:
 
				                     else:
			
 
				                         cache_key = hashlib.md5(image_url.encode()).hexdigest()
			
 
				 
			
 
				-                    # 3-5 轮需要降分辨率
			
 
				-                    if 2 < rounds_ago <= 5:
			
 
				+                    # 1-5 轮都需要检查尺寸
			
 
				+                    if rounds_ago <= 5:
			
 
				                         cached = self._image_opt_cache.get(cache_key, {})
			
 
				-                        if "downscaled" not in cached and image_url.startswith("data:"):
			
 
				-                            downscale_jobs.append((i, block_idx, image_url, cache_key))
			
 
				-
			
 
				-        # 并发处理所有降分辨率任务
			
 
				-        if downscale_jobs:
			
 
				-            downscale_results = await asyncio.gather(
			
 
				-                *[self._downscale_image(url) for _, _, url, _ in downscale_jobs],
			
 
				+                        cache_field = "pad_only" if rounds_ago <= 2 else "downscaled"
			
 
				+                        
			
 
				+                        if cache_field not in cached and image_url.startswith("data:"):
			
 
				+                            max_size = None if rounds_ago <= 2 else 512
			
 
				+                            process_jobs.append((i, block_idx, image_url, cache_key, max_size, cache_field))
			
 
				+
			
 
				+        # 并发处理所有尺寸任务
			
 
				+        if process_jobs:
			
 
				+            process_results = await asyncio.gather(
			
 
				+                *[self._process_image_size(url, max_size=ms) for _, _, url, _, ms, _ in process_jobs],
			
 
				                 return_exceptions=True
			
 
				             )
			
 
				-            for (_, _, _, cache_key), result in zip(downscale_jobs, downscale_results):
			
 
				+            for (_, _, _, cache_key, _, cache_field), result in zip(process_jobs, process_results):
			
 
				                 if not isinstance(result, Exception) and result is not None:
			
 
				-                    self._image_opt_cache.setdefault(cache_key, {})["downscaled"] = result
			
 
				+                    self._image_opt_cache.setdefault(cache_key, {})[cache_field] = result
			
 
				 
			
 
				         # 第二遍：应用处理结果
			
 
				         for i in range(last_assistant_idx):
			
@@ -2603,9 +2606,29 @@ class AgentRunner:
 
				 
			
 
				                     # 根据距离决定处理策略
			
 
				                     if rounds_ago <= 2:
			
 
				-                        # 最近 1-2 轮：保留原图
			
 
				-                        new_content.append(block)
			
 
				-                        stats["kept"] += 1
			
 
				+                        # 最近 1-2 轮：只补齐过小图片，保留原分辨率
			
 
				+                        cached = self._image_opt_cache.get(cache_key, {})
			
 
				+                        if "pad_only" in cached:
			
 
				+                            new_content.append({
			
 
				+                                "type": "image_url",
			
 
				+                                "image_url": {"url": cached["pad_only"]}
			
 
				+                            })
			
 
				+                            stats["kept"] += 1
			
 
				+                            stats["cache_hit"] += 1
			
 
				+                        elif image_url.startswith("data:"):
			
 
				+                            processed = await self._process_image_size(image_url, max_size=None)
			
 
				+                            if processed:
			
 
				+                                self._image_opt_cache.setdefault(cache_key, {})["pad_only"] = processed
			
 
				+                                new_content.append({
			
 
				+                                    "type": "image_url",
			
 
				+                                    "image_url": {"url": processed}
			
 
				+                                })
			
 
				+                            else:
			
 
				+                                new_content.append(block)
			
 
				+                            stats["kept"] += 1
			
 
				+                        else:
			
 
				+                            new_content.append(block)
			
 
				+                            stats["kept"] += 1
			
 
				 
			
 
				                     elif rounds_ago <= 5:
			
 
				                         # 3-5 轮：降低分辨率（优先从缓存取）
			
@@ -2618,13 +2641,13 @@ class AgentRunner:
 
				                             stats["downscaled"] += 1
			
 
				                             stats["cache_hit"] += 1
			
 
				                         elif image_url.startswith("data:"):
			
 
				-                            downscaled = await self._downscale_image(image_url)
			
 
				-                            if downscaled:
			
 
				+                            processed = await self._process_image_size(image_url, max_size=512)
			
 
				+                            if processed:
			
 
				                                 # 缓存结果
			
 
				-                                self._image_opt_cache.setdefault(cache_key, {})["downscaled"] = downscaled
			
 
				+                                self._image_opt_cache.setdefault(cache_key, {})["downscaled"] = processed
			
 
				                                 new_content.append({
			
 
				                                     "type": "image_url",
			
 
				-                                    "image_url": {"url": downscaled}
			
 
				+                                    "image_url": {"url": processed}
			
 
				                                 })
			
 
				                                 stats["downscaled"] += 1
			
 
				                             else:
			
@@ -2668,16 +2691,11 @@ class AgentRunner:
 
				 
			
 
				         return messages
			
 
				 
			
 
				-    async def _downscale_image(self, base64_url: str, max_size: int = 512) -> Optional[str]:
			
 
				+    async def _process_image_size(self, base64_url: str, max_size: Optional[int] = 512, min_size: int = 11) -> Optional[str]:
			
 
				         """
			
 
				-        降低 base64 图片的分辨率
			
 
				-
			
 
				-        Args:
			
 
				-            base64_url: data:image/xxx;base64,... 格式的 URL
			
 
				-            max_size: 最大边长（像素）
			
 
				-
			
 
				-        Returns:
			
 
				-            降分辨率后的 base64 URL，失败返回 None
			
 
				+        处理 base64 图片的尺寸：
			
 
				+        - 若 max_size 不为 None 且大于该值，则等比例缩放
			
 
				+        - 若任意一边小于 min_size，则补充白边 (Padding)
			
 
				         """
			
 
				         try:
			
 
				             from PIL import Image
			
@@ -2695,20 +2713,42 @@ class AgentRunner:
 
				             img_data = base64.b64decode(data)
			
 
				             img = Image.open(io.BytesIO(img_data))
			
 
				 
			
 
				-            # 计算新尺寸（保持宽高比）
			
 
				             width, height = img.size
			
 
				-            if width <= max_size and height <= max_size:
			
 
				-                return base64_url  # 已经够小，不需要缩放
			
 
				 
			
 
				-            if width > height:
			
 
				-                new_width = max_size
			
 
				-                new_height = int(height * max_size / width)
			
 
				+            needs_downscale = max_size is not None and (width > max_size or height > max_size)
			
 
				+            needs_pad = width < min_size or height < min_size
			
 
				+
			
 
				+            # 尺寸正常，无需处理
			
 
				+            if not needs_downscale and not needs_pad:
			
 
				+                return base64_url
			
 
				+
			
 
				+            new_width, new_height = width, height
			
 
				+
			
 
				+            # 1. 降分辨率
			
 
				+            if needs_downscale:
			
 
				+                if width > height:
			
 
				+                    new_width = max_size
			
 
				+                    new_height = int(height * max_size / width)
			
 
				+                else:
			
 
				+                    new_height = max_size
			
 
				+                    new_width = int(width * max_size / height)
			
 
				+            
			
 
				+            if (new_width, new_height) != (width, height):
			
 
				+                img_resized = img.resize((new_width, new_height), Image.Resampling.BILINEAR)
			
 
				             else:
			
 
				-                new_height = max_size
			
 
				-                new_width = int(width * max_size / height)
			
 
				+                img_resized = img
			
 
				+
			
 
				+            # 2. 补齐白边 (Padding)
			
 
				+            pad_width = max(new_width, min_size)
			
 
				+            pad_height = max(new_height, min_size)
			
 
				 
			
 
				-            # 缩放图片（使用更快的 BILINEAR 算法）
			
 
				-            img_resized = img.resize((new_width, new_height), Image.Resampling.BILINEAR)
			
 
				+            if pad_width > new_width or pad_height > new_height:
			
 
				+                # 创建白色背景
			
 
				+                padded_img = Image.new("RGBA" if img_resized.mode in ("RGBA", "P") else "RGB", (pad_width, pad_height), (255, 255, 255, 255))
			
 
				+                offset_x = (pad_width - new_width) // 2
			
 
				+                offset_y = (pad_height - new_height) // 2
			
 
				+                padded_img.paste(img_resized, (offset_x, offset_y))
			
 
				+                img_resized = padded_img
			
 
				 
			
 
				             # 转换为 RGB（JPEG不支持 RGBA, P 等具有透明度或索引的模式）
			
 
				             if img_resized.mode != "RGB":
			
@@ -2722,15 +2762,16 @@ class AgentRunner:
 
				                         img_resized = background
			
 
				                 img_resized = img_resized.convert("RGB")
			
 
				 
			
 
				-            # 重新编码为 JPEG（降低质量以加快速度）
			
 
				+            # 重新编码为 JPEG（如果只是补齐没有缩放，可以稍微保留高点质量）
			
 
				             buffer = io.BytesIO()
			
 
				-            img_resized.save(buffer, format="JPEG", quality=60, optimize=False)
			
 
				+            quality = 60 if needs_downscale else 85
			
 
				+            img_resized.save(buffer, format="JPEG", quality=quality, optimize=False)
			
 
				             new_data = base64.b64encode(buffer.getvalue()).decode("utf-8")
			
 
				 
			
 
				             return f"data:image/jpeg;base64,{new_data}"
			
 
				 
			
 
				         except Exception as e:
			
 
				-            self.log.warning(f"[Image Downscale] 降分辨率失败: {e}")
			
 
				+            self.log.warning(f"[Image Process] 处理图片尺寸失败: {e}")
			
 
				             return None
			
 
				 
			
 
				     async def _generate_image_description(self, image_url: str, current_model: str) -> str:
			
--- a/examples/process_pipeline/prompts/apply_to_grounding.prompt
+++ b/examples/process_pipeline/prompts/apply_to_grounding.prompt
@@ -8,7 +8,6 @@
 
				   - body 是该 capability 在原帖中的做法描述；当需要填写 body_excerpt 时，body 是唯一来源。
			
 
				   - apply_to_draft 是上一阶段抽取的自然语言适用范围线索，用来辅助选择路径候选。
			
 
				 - 路径候选：紧凑 JSON 数组，每个节点包含 id、path、source_type、description，以及可选 elements。
			
 
				-  - apply_to 只能从这些候选节点中选择。
			
 
				   - category_id 对应候选节点 id，category_path 对应候选节点 path。
			
 
				 - 邻近路径参考：只用于 suggest_apply_to 的命名风格参考，不能作为 apply_to 的 category_id/category_path 来源。
			
 
				 
			
@@ -16,7 +15,7 @@
 
				 
			
 
				 输出必须是严格 JSON，顶层只包含 capability 数组。每个输出 capability 只包含：
			
 
				 - capability_id：逐字照抄输入 capability_id。
			
 
				-- apply_to：真实内容树节点映射，按 实质 / 形式 分组；每条必须来自路径候选。
			
 
				+- apply_to：真实内容树节点映射，按 实质 / 形式 分组；每条必须来自路径候选。如果没有足够匹配的，可以为空
			
 
				 - suggest_apply_to：建议补充或调整的理想路径数组；最多 3 条。
			
 
				 
			
 
				 # 绝对规则
			
@@ -25,6 +24,7 @@
 
				 - 处理 capability 数组中的每一条 capability。
			
 
				 - 只输出 apply_to 和 suggest_apply_to 字段；不要回显 inputs、outputs、action、body、effects、stage、tools、criterion、unstructured_what 等字段。
			
 
				 - apply_to.实质 只能选择 source_type=实质 的节点；apply_to.形式 只能选择 source_type=形式 的节点。
			
 
				+- 候选路径中如果没有足够匹配的， apply_to 可以为空。
			
 
				 - element 只有在该节点 elements 中存在时才能填写；否则省略 element 或填 null。
			
 
				 - 只选择你有信心认为与 apply_to_draft 和 capability.body 直接相关的节点；一般每侧不超过 3 项。
			
 
				 - 不确定时选较粗分类；如果仍然没有信心或 body 中没有直接证据，可以置空数组，不要编造。
			
@@ -41,7 +41,7 @@ suggest_apply_to 用来指出：当前候选路径或已有内容树无法很好
 
				 
			
 
				 - 每个 capability 最多输出 3 个 suggest_apply_to 条目。
			
 
				 - suggest_apply_to 的 body_excerpt 和 body_excerpt_note 字段可以为空；如果填写 body_excerpt，必须来自某段 capability.body 的直接证据。
			
 
				-- suggest_apply_to.path 可以基于真实 category_path 续写，也可以提出树上不存在的新路径。
			
 
				+- suggest_apply_to.path 可以基于真实 category_path 续写，也可以提出树上不存在的新路径。注意：路径中各个层级的节点名称是且只能是名词。
			
 
				 - suggest_apply_to.source_type 必须是 实质 或 形式。
			
 
				 - suggest_apply_to 的所有层级必须保持同一种内容类型，不能在一条路径里混入另一套语义体系。
			
 
				 - suggest_apply_to 必须满足 source_type 分类：实质路径中不能出现形式类词汇，形式路径中不能出现实质类词汇。
			
--- a/examples/process_pipeline/prompts/extract_workflow.prompt
+++ b/examples/process_pipeline/prompts/extract_workflow.prompt
@@ -19,7 +19,7 @@
 
				 - step 是 workflow 内的最小步骤,但可以比较抽象。
			
 
				 - capability 是 step 的实现实例:
			
 
				   - 如果一个 step 只有一种实现方法,该 step 对应一个 capability。
			
 
				-  - 如果一个 step 有多种实现方法,该 step 对应多个 capability,这些 capability 是并列替代方案。
			
 
				+  - 如果一个 step 有多种实现方法,该 step 对应多个 capability,这些 capability 是并列替代方案；相同做法，不同工具，不对应多个 capability。
			
 
				   - 同一 step 下的多个 capability 不是更细分的小步骤,不是递进关系,不要把一个连续流程拆到同一 step 的多个 capability 里。
			
 
				 - 若原帖纯营销、信息密度太低或完全没怎么做,则 skip=true。
			
 
				 - skip=true 时 workflow_groups 输出 []。
			
@@ -44,7 +44,7 @@
 
				 - capability_id:字符串,见上方规则
			
 
				 - action:{ description, reasoning },见下方 action 字段规则
			
 
				 - inputs / outputs:结构化接口,见下方规则
			
 
				-- body:该原子操作在原帖中的描述(可能是对应 step 内容里的子片段);该描述需尽可能细致，做到应有尽有，把每一个细节都记录下来，包括但不限于具体的prompt。未提及则为 null
			
 
				+- body:该原子操作在原帖中的描述(可能是对应 step 内容里的子片段);该描述需尽可能细致，做到应有尽有，把每一个细节都记录下来，包括但不限于作者使用的prompt，结果不需要记录。未提及则为 null
			
 
				 - effects:该原子操作产生的可观测效果,数组,每项为结构体(见下方 effects 字段规则)
			
 
				 - control_target:该操作控制的对象,字符串数组,如 ["人物姿态", "背景风格"];未提及则为 []
			
 
				 - artifact_type:该操作产出的工件类型,如 "正向提示词"、"蒙版"、"参考图";未提及则为 null
			
--- a/examples/process_pipeline/ui/app.js
+++ b/examples/process_pipeline/ui/app.js
@@ -1457,8 +1457,8 @@ async function fetchRequirementData(index) {
 
				         if (jsonCapability) {
			
 
				             const reqStr = (index + 1).toString().padStart(3, '0');
			
 
				             jsonCapability.innerHTML = `
			
 
				-                <div id="container-capability" style="height: calc(100vh - 120px); width: 100%;">
			
 
				-                    <iframe src="/static/viz_fragment.html?req=${reqStr}&v=17" style="width: 100%; height: 100%; border: none; border-radius: 8px; background: var(--bg-primary);"></iframe>
			
 
				+                <div id="container-capability" style="height: 100%; width: 100%;">
			
 
				+                    <iframe src="/static/viz_fragment.html?req=${reqStr}&v=19" style="width: 100%; height: 100%; border: none; background: var(--bg-primary);"></iframe>
			
 
				                 </div>
			
 
				             `;
			
 
				         }
			
--- a/examples/process_pipeline/ui/index.html
+++ b/examples/process_pipeline/ui/index.html
@@ -7,7 +7,7 @@
 
				     <meta name="referrer" content="no-referrer">
			
 
				     <title>流水线控制台</title>
			
 
				     <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet">
			
 
				-    <link rel="stylesheet" href="/static/style.css">
			
 
				+    <link rel="stylesheet" href="/static/style.css?v=19">
			
 
				     <style>
			
 
				         .dag-container {
			
 
				             display: flex;
			
@@ -472,7 +472,7 @@
 
				         </div>
			
 
				     </div>
			
 
				 
			
 
				-    <script src="/static/app.js?v=17"></script>
			
 
				+    <script src="/static/app.js?v=19"></script>
			
 
				 </body>
			
 
				 
			
 
				 </html>
			
--- a/examples/process_pipeline/ui/style.css
+++ b/examples/process_pipeline/ui/style.css
@@ -316,15 +316,30 @@ body {
 
				 .tab-content-container {

			
 
				     flex: 1;

			
 
				     overflow: auto;

			
 
				-    padding: 1.5rem;

			
 
				+    padding: 0;

			
 
				     background: #ffffff;

			
 
				+    display: flex;

			
 
				+    flex-direction: column;

			
 
				 }

			
 
				 

			
 
				 .tab-content {

			
 
				     display: none;

			
 
				+    flex: 1;

			
 
				+    height: 100%;

			
 
				+}

			
 
				+.content-viewer {

			
 
				+    flex: 1;

			
 
				+    height: 100%;

			
 
				+    width: 100%;

			
 
				+    display: flex;

			
 
				+    flex-direction: column;

			
 
				+}

			
 
				+#tab-raw, #tab-blueprint {

			
 
				+    padding: 1.5rem;

			
 
				 }

			
 
				 .tab-content.active {

			
 
				-    display: block;

			
 
				+    display: flex;

			
 
				+    flex-direction: column;

			
 
				 }

			
 
				 

			
 
				 .json-viewer {