1 день назад · 38e0e10982
--- a/agent/llm/claude_code_oauth.py
+++ b/agent/llm/claude_code_oauth.py
@@ -23,21 +23,21 @@ from typing import Any, Dict, List, Optional, Tuple
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				 
			
 
				-def _flatten_messages_to_string(
			
 
				+def _convert_messages(
			
 
				     messages: List[Dict[str, Any]],
			
 
				-) -> Tuple[Optional[str], str]:
			
 
				+) -> Tuple[Optional[str], List[Dict[str, Any]], bool]:
			
 
				     """
			
 
				-    把 OpenAI 风格 messages 折叠成 (system_prompt, user_text)。
			
 
				+    把 OpenAI 风格 messages 拆为 (system_prompt, anthropic_content_blocks, has_image)。
			
 
				 
			
 
				     - role=system 拼接为 system_prompt
			
 
				-    - role=user/assistant 的 content 全部拍平为字符串
			
 
				-    - image_url 类型块降级为 `[图片URL: ...]` 文本占位（模型看到 URL 字符串而非画面）
			
 
				-
			
 
				-    使用 string 模式而非 AsyncIterable[dict]，是为了走 SDK 中被生产验证的稳定路径。
			
 
				-    多模态真图传输需要切到 AsyncIterable + Anthropic content block 协议，单独迭代。
			
 
				+    - role=user/assistant 的 content 转为 Anthropic content blocks (text/image)
			
 
				+    - OpenAI {"type":"image_url","image_url":{"url":...}} 转为
			
 
				+      Anthropic {"type":"image","source":{"type":"url","url":...}}
			
 
				+    - has_image：是否包含图片块，用于决定走 string 还是 AsyncIterable 模式
			
 
				     """
			
 
				     system_parts: List[str] = []
			
 
				-    user_parts: List[str] = []
			
 
				+    blocks: List[Dict[str, Any]] = []
			
 
				+    has_image = False
			
 
				 
			
 
				     for msg in messages:
			
 
				         role = msg.get("role")
			
@@ -49,29 +49,44 @@ def _flatten_messages_to_string(
 
				             continue
			
 
				 
			
 
				         if isinstance(content, str):
			
 
				-            user_parts.append(content)
			
 
				+            blocks.append({"type": "text", "text": content})
			
 
				             continue
			
 
				 
			
 
				         if isinstance(content, list):
			
 
				             for block in content:
			
 
				                 if not isinstance(block, dict):
			
 
				-                    user_parts.append(str(block))
			
 
				+                    blocks.append({"type": "text", "text": str(block)})
			
 
				                     continue
			
 
				                 btype = block.get("type")
			
 
				                 if btype == "text":
			
 
				-                    user_parts.append(block.get("text", ""))
			
 
				+                    blocks.append({"type": "text", "text": block.get("text", "")})
			
 
				                 elif btype == "image_url":
			
 
				                     url = (block.get("image_url") or {}).get("url", "")
			
 
				                     if url:
			
 
				-                        user_parts.append(f"[图片URL: {url}]")
			
 
				+                        blocks.append(
			
 
				+                            {"type": "image", "source": {"type": "url", "url": url}}
			
 
				+                        )
			
 
				+                        has_image = True
			
 
				                 elif btype == "image":
			
 
				-                    src = block.get("source") or {}
			
 
				-                    url = src.get("url") or src.get("data", "")[:60]
			
 
				-                    user_parts.append(f"[图片: {url}]")
			
 
				+                    blocks.append(block)
			
 
				+                    has_image = True
			
 
				 
			
 
				     system_prompt = "\n\n".join(system_parts).strip() or None
			
 
				-    user_text = "\n\n".join(p for p in user_parts if p).strip()
			
 
				-    return system_prompt, user_text
			
 
				+    return system_prompt, blocks, has_image
			
 
				+
			
 
				+
			
 
				+def _blocks_to_string(blocks: List[Dict[str, Any]]) -> str:
			
 
				+    """把 content blocks 拍平成字符串（图片降级为 [图片URL: ...] 占位）— string 模式用"""
			
 
				+    parts: List[str] = []
			
 
				+    for block in blocks:
			
 
				+        btype = block.get("type")
			
 
				+        if btype == "text":
			
 
				+            parts.append(block.get("text", ""))
			
 
				+        elif btype == "image":
			
 
				+            src = block.get("source") or {}
			
 
				+            url = src.get("url") or src.get("data", "")[:60]
			
 
				+            parts.append(f"[图片URL: {url}]")
			
 
				+    return "\n\n".join(p for p in parts if p).strip()
			
 
				 
			
 
				 
			
 
				 def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
			
@@ -94,17 +109,19 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
 
				         TextBlock,
			
 
				     )
			
 
				 
			
 
				-    # 从子进程 env 中剥离 API key 相关变量，让 CLI 回落到 OAuth；
			
 
				-    # 父进程 os.environ 不变（其他 LLM provider 仍可用 API key）。
			
 
				-    _stripped_env = {
			
 
				-        k: v
			
 
				-        for k, v in os.environ.items()
			
 
				-        if k not in ("ANTHROPIC_API_KEY", "ANTHROPIC_BASE_URL", "ANTHROPIC_AUTH_TOKEN")
			
 
				+    # 让 SDK 子进程看不到 API key 相关变量，回落到 OAuth。
			
 
				+    # SDK 内部把 options.env 当作"覆盖层"叠在父进程 os.environ 之上，
			
 
				+    # 所以从 dict 里"移除"这些 key 没用 — 必须显式以空串覆盖父值。
			
 
				+    # 父进程 os.environ 不变（其他 LLM provider 继续可用 API key）。
			
 
				+    _override_env: Dict[str, str] = {
			
 
				+        "ANTHROPIC_API_KEY": "",
			
 
				+        "ANTHROPIC_BASE_URL": "",
			
 
				+        "ANTHROPIC_AUTH_TOKEN": "",
			
 
				     }
			
 
				     if "ANTHROPIC_API_KEY" in os.environ or "ANTHROPIC_BASE_URL" in os.environ:
			
 
				         logger.info(
			
 
				-            "[claude_code_oauth] Stripping ANTHROPIC_API_KEY/ANTHROPIC_BASE_URL "
			
 
				-            "from SDK subprocess env so CLI falls back to OAuth credentials."
			
 
				+            "[claude_code_oauth] Overriding ANTHROPIC_API_KEY/ANTHROPIC_BASE_URL "
			
 
				+            "with empty values in SDK subprocess env so CLI falls back to OAuth."
			
 
				         )
			
 
				 
			
 
				     default_model = model
			
@@ -117,9 +134,9 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
 
				     ) -> Dict[str, Any]:
			
 
				         actual_model = (model or default_model).split("/")[-1]
			
 
				 
			
 
				-        system_prompt, user_text = _flatten_messages_to_string(messages)
			
 
				-        if not user_text:
			
 
				-            user_text = " "
			
 
				+        system_prompt, content_blocks, has_image = _convert_messages(messages)
			
 
				+        if not content_blocks:
			
 
				+            content_blocks = [{"type": "text", "text": " "}]
			
 
				 
			
 
				         stderr_lines: List[str] = []
			
 
				 
			
@@ -132,7 +149,7 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
 
				             system_prompt=system_prompt,
			
 
				             allowed_tools=[],
			
 
				             max_turns=1,
			
 
				-            env=_stripped_env,
			
 
				+            env=_override_env,
			
 
				             stderr=_capture_stderr,
			
 
				             # 关键：屏蔽 CLI 加载用户级 ~/.claude/ 配置（output_style/skills/plugins 等）
			
 
				             # 否则这些会被注入 system prompt，浪费 token + 影响输出格式
			
@@ -152,7 +169,20 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
 
				 
			
 
				         try:
			
 
				             async with ClaudeSDKClient(options=options) as client:
			
 
				-                await client.query(user_text)
			
 
				+                if has_image:
			
 
				+                    # 多模态：用 AsyncIterable[dict] 模式发送 Anthropic content blocks
			
 
				+                    async def _input_stream():
			
 
				+                        yield {
			
 
				+                            "type": "user",
			
 
				+                            "message": {"role": "user", "content": content_blocks},
			
 
				+                            "parent_tool_use_id": None,
			
 
				+                            "session_id": "default",
			
 
				+                        }
			
 
				+                    await client.query(_input_stream())
			
 
				+                else:
			
 
				+                    # 纯文本：走 SDK string 模式（已验证稳定路径）
			
 
				+                    await client.query(_blocks_to_string(content_blocks))
			
 
				+
			
 
				                 async for msg in client.receive_response():
			
 
				                     msg_type = type(msg).__name__
			
 
				 
			
--- a/examples/process_pipeline/prompts/extract_workflow.prompt
+++ b/examples/process_pipeline/prompts/extract_workflow.prompt
@@ -69,7 +69,7 @@ action 写成对象:
 
				 ```
			
 
				 - modality 是数据形态:文本 / 图片 / 视频 / 音频 / 特征点 / 参数 / 模型 / 向量
			
 
				 - 同一次提交给模型的所有文字描述统一合并为一个输入项
			
 
				-- relation 格式:[来源.1O]、[去向.2I]、[来源.原始输入]、[去向.最终成品]
			
 
				+- relation 格式:[来源.1O]、[去向.2I]、[来源.原始输入]、[去向.最终成品]（1O和2I含义分别是：step1的output、step2的input）
			
 
				 # effects 字段
			
 
				 每个 effect 写成结构体:
			
 
				 ```json
			
@@ -145,6 +145,9 @@ $user$
 
				       "control_target": [],
			
 
				       "artifact_type": null,
			
 
				       "tools": [],
			
 
				+      "apply_to_draft": { "实质": ["..."], "形式": ["..."] },
			
 
				+      "workflow_step_ref": { "workflow_id": null, "step_id": "s1" },
			
 
				+      "is_alternative_to": []
			
 
				     }
			
 
				   ]
			
 
				 }
			
--- a/examples/process_pipeline/prompts/extract_workflow.schema.json
+++ b/examples/process_pipeline/prompts/extract_workflow.schema.json
@@ -1,13 +1,8 @@
 
				 {
			
 
				   "$schema": "http://json-schema.org/draft-07/schema#",
			
 
				-  "title": "extract_workflow_output_v8",
			
 
				+  "title": "extract_workflow_output_v6",
			
 
				   "type": "object",
			
 
				-  "required": [
			
 
				-    "skip",
			
 
				-    "skip_reason",
			
 
				-    "workflow",
			
 
				-    "capability"
			
 
				-  ],
			
 
				+  "required": ["skip", "skip_reason", "workflow", "capability"],
			
 
				   "properties": {
			
 
				     "skip": {
			
 
				       "type": "boolean"
			
@@ -22,26 +17,17 @@
 
				         },
			
 
				         {
			
 
				           "type": "object",
			
 
				-          "required": [
			
 
				-            "steps"
			
 
				-          ],
			
 
				+          "required": ["steps"],
			
 
				           "properties": {
			
 
				             "workflow_id": {
			
 
				-              "type": [
			
 
				-                "string",
			
 
				-                "null"
			
 
				-              ]
			
 
				+              "type": ["string", "null"]
			
 
				             },
			
 
				             "steps": {
			
 
				               "type": "array",
			
 
				               "minItems": 1,
			
 
				               "items": {
			
 
				                 "type": "object",
			
 
				-                "required": [
			
 
				-                  "step_id",
			
 
				-                  "order",
			
 
				-                  "phase"
			
 
				-                ],
			
 
				+                "required": ["step_id", "order", "phase"],
			
 
				                 "properties": {
			
 
				                   "step_id": {
			
 
				                     "type": "string",
			
@@ -53,21 +39,14 @@
 
				                   },
			
 
				                   "phase": {
			
 
				                     "type": "string",
			
 
				-                    "enum": [
			
 
				-                      "非制作",
			
 
				-                      "预处理",
			
 
				-                      "生成",
			
 
				-                      "编辑"
			
 
				-                    ]
			
 
				+                    "enum": ["非制作", "预处理", "生成", "编辑"]
			
 
				                   }
			
 
				                 }
			
 
				               }
			
 
				-            },
			
 
				-            "additionalProperties": false
			
 
				+            }
			
 
				           }
			
 
				         }
			
 
				-      },
			
 
				-      "additionalProperties": false
			
 
				+      ]
			
 
				     },
			
 
				     "capability": {
			
 
				       "type": "array",
			
@@ -94,10 +73,7 @@
 
				           },
			
 
				           "action": {
			
 
				             "type": "object",
			
 
				-            "required": [
			
 
				-              "description",
			
 
				-              "reasoning"
			
 
				-            ],
			
 
				+            "required": ["description", "reasoning"],
			
 
				             "properties": {
			
 
				               "description": {
			
 
				                 "type": "string",
			
@@ -109,19 +85,11 @@
 
				               }
			
 
				             }
			
 
				           },
			
 
				-          "reasoning": {
			
 
				-            "type": "string",
			
 
				-            "minLength": 1
			
 
				-          },
			
 
				-          "inputs-boundary": {
			
 
				+          "inputs": {
			
 
				             "type": "array",
			
 
				             "items": {
			
 
				               "type": "object",
			
 
				-              "required": [
			
 
				-                "modality",
			
 
				-                "description",
			
 
				-                "relation"
			
 
				-              ],
			
 
				+              "required": ["modality", "description", "relation"],
			
 
				               "properties": {
			
 
				                 "modality": {
			
 
				                   "type": "string",
			
@@ -147,15 +115,11 @@
 
				               }
			
 
				             }
			
 
				           },
			
 
				-          "outputs-boundary": {
			
 
				+          "outputs": {
			
 
				             "type": "array",
			
 
				             "items": {
			
 
				               "type": "object",
			
 
				-              "required": [
			
 
				-                "modality",
			
 
				-                "description",
			
 
				-                "relation"
			
 
				-              ],
			
 
				+              "required": ["modality", "description", "relation"],
			
 
				               "properties": {
			
 
				                 "modality": {
			
 
				                   "type": "string",
			
@@ -182,10 +146,7 @@
 
				             }
			
 
				           },
			
 
				           "body": {
			
 
				-            "type": [
			
 
				-              "string",
			
 
				-              "null"
			
 
				-            ]
			
 
				+            "type": ["string", "null"]
			
 
				           },
			
 
				           "effects": {
			
 
				             "type": "array",
			
@@ -209,12 +170,7 @@
 
				                 },
			
 
				                 "judge_method": {
			
 
				                   "type": "string",
			
 
				-                  "enum": [
			
 
				-                    "llm",
			
 
				-                    "vlm",
			
 
				-                    "rule",
			
 
				-                    "human"
			
 
				-                  ]
			
 
				+                  "enum": ["llm", "vlm", "rule", "human"]
			
 
				                 },
			
 
				                 "negative_examples": {
			
 
				                   "type": "array",
			
@@ -235,10 +191,7 @@
 
				             }
			
 
				           },
			
 
				           "artifact_type": {
			
 
				-            "type": [
			
 
				-              "string",
			
 
				-              "null"
			
 
				-            ]
			
 
				+            "type": ["string", "null"]
			
 
				           },
			
 
				           "tools": {
			
 
				             "type": "array",
			
@@ -248,10 +201,7 @@
 
				           },
			
 
				           "apply_to_draft": {
			
 
				             "type": "object",
			
 
				-            "required": [
			
 
				-              "实质",
			
 
				-              "形式"
			
 
				-            ],
			
 
				+            "required": ["实质", "形式"],
			
 
				             "properties": {
			
 
				               "实质": {
			
 
				                 "type": "array",
			
@@ -274,16 +224,10 @@
 
				               },
			
 
				               {
			
 
				                 "type": "object",
			
 
				-                "required": [
			
 
				-                  "workflow_id",
			
 
				-                  "step_id"
			
 
				-                ],
			
 
				+                "required": ["workflow_id", "step_id"],
			
 
				                 "properties": {
			
 
				                   "workflow_id": {
			
 
				-                    "type": [
			
 
				-                      "string",
			
 
				-                      "null"
			
 
				-                    ]
			
 
				+                    "type": ["string", "null"]
			
 
				                   },
			
 
				                   "step_id": {
			
 
				                     "type": "string",
			
@@ -293,17 +237,15 @@
 
				               }
			
 
				             ]
			
 
				           },
			
 
				-          "tools-boundary": {
			
 
				+          "is_alternative_to": {
			
 
				             "type": "array",
			
 
				             "items": {
			
 
				               "type": "string",
			
 
				               "pattern": "^c_(s[0-9]+_[0-9]+|standalone_[0-9]+)$"
			
 
				             }
			
 
				           }
			
 
				-        },
			
 
				-        "additionalProperties": false
			
 
				+        }
			
 
				       }
			
 
				     }
			
 
				-  },
			
 
				-  "additionalProperties": false
			
 
				-}
			
 
				+  }
			
 
				+}
			
--- a/examples/process_pipeline/ui/app.js
+++ b/examples/process_pipeline/ui/app.js
@@ -178,13 +178,15 @@ function renderRawCases(rawCasesObj) {
 
				     if (detailedCaseObj) {
			
 
				         const cd = detailedCaseObj;
			
 
				         let uniqueCases = new Set();
			
 
				-        let uniqueWorkflow = new Set();
			
 
				-        let uniqueCapabilities = new Set();
			
 
				+        let calcWorkflow = 0;
			
 
				+        let calcCapabilities = 0;
			
 
				 
			
 
				         if (cd.cases) {
			
 
				             cd.cases.forEach(c => {
			
 
				                 const cId = c.case_id || (c._raw && c._raw.case_id);
			
 
				                 const cUrl = c.source_url || c.url;
			
 
				+                const uniqueKey = cId || cUrl || Math.random().toString();
			
 
				+                uniqueCases.add(uniqueKey);
			
 
				                 if (c.workflow) calcWorkflow++;
			
 
				                 if (c.capability && c.capability.length > 0) calcCapabilities++;
			
 
				 
			
@@ -200,8 +202,8 @@ function renderRawCases(rawCasesObj) {
 
				         }
			
 
				 
			
 
				         const displayTotal = uniqueCases.size > 0 ? uniqueCases.size : (cd.total !== undefined ? cd.total : 0);
			
 
				-        const displayWorkflowSuccess = uniqueCases.size > 0 ? uniqueWorkflow.size : (cd.workflow_success !== undefined && cd.workflow_success !== null ? cd.workflow_success : (cd.success !== undefined && cd.success !== null ? cd.success : 0));
			
 
				-        const displayCapabilitiesSuccess = uniqueCases.size > 0 ? uniqueCapabilities.size : (cd.capabilities_success !== undefined && cd.capabilities_success !== null ? cd.capabilities_success : 0);
			
 
				+        const displayWorkflowSuccess = calcWorkflow > 0 ? calcWorkflow : (cd.workflow_success !== undefined && cd.workflow_success !== null ? cd.workflow_success : (cd.success !== undefined && cd.success !== null ? cd.success : 0));
			
 
				+        const displayCapabilitiesSuccess = calcCapabilities > 0 ? calcCapabilities : (cd.capabilities_success !== undefined && cd.capabilities_success !== null ? cd.capabilities_success : 0);
			
 
				 
			
 
				         if (cd.total !== undefined || uniqueCases.size > 0) {
			
 
				             totalStatsHtml = `<div style="display:flex; gap:1rem; margin-bottom:1rem; padding:0.5rem 1rem; background:rgba(0, 0, 0, 0.03); border-radius:8px; align-items:center;">
			
@@ -2882,8 +2884,8 @@ window.renderStructuredData = function (items, type, parentItem = null) {
 
				                         <span class="row-expand-icon">▶</span>
			
 
				                         ${capability && capability.capability_id ? `<span style="display:inline-block; color:#94a3b8; font-size:0.85em; font-weight:400;">${escapeHtml(capability.capability_id)}</span>` : '-'}
			
 
				                     </td>
			
 
				-                    <td class="capability-cell">${renderAction(capability)}</td>
			
 
				                     <td class="capability-cell">${capability && capability.inputs && capability.inputs.length > 0 ? renderDataObjList(capability.inputs) : '-'}</td>
			
 
				+                    <td class="capability-cell">${renderAction(capability)}</td>
			
 
				                     <td class="capability-cell">${capability && capability.outputs && capability.outputs.length > 0 ? renderDataObjList(capability.outputs) : '-'}</td>
			
 
				                     <td class="capability-cell"><div class="capability-clamp">${capability ? renderEffects(capability.effects) : '-'}</div></td>
			
 
				                     <td class="capability-cell" style="font-size:0.9em;"><div class="capability-clamp">${applyTo ? renderApplyToVal(applyTo, suggestApplyTo) : '-'}</div></td>
			
@@ -2924,12 +2926,12 @@ window.renderStructuredData = function (items, type, parentItem = null) {
 
				                                 <th style="padding: 12px 10px; width: 90px;">阶段</th>
			
 
				                                 <th style="padding: 12px 8px; width: 90px;"></th>
			
 
				                                 <th style="padding: 12px 10px; width: 180px;">输入</th>
			
 
				-                                <th style="padding: 12px 10px; width: 140px;">动作/做法</th>
			
 
				+                                <th style="padding: 12px 10px; width: 140px;">动作</th>
			
 
				                                 <th style="padding: 12px 10px; width: 180px;">输出</th>
			
 
				                                 <th style="padding: 12px 10px; width: 360px;">效果</th>
			
 
				-                                <th style="padding: 12px 10px; width: 180px;">用法</th>
			
 
				                                 <th style="padding: 12px 10px; width: 260px;">作用域</th>
			
 
				                                 <th style="padding: 12px 10px; width: 130px;">工具</th>
			
 
				+                                <th style="padding: 12px 10px; width: 300px;">做法</th>
			
 
				                             </tr>
			
 
				                         </thead>
			
 
				                         <tbody>`;
			
--- a/scratch/check_case35.py
+++ b/scratch/check_case35.py
@@ -8,7 +8,7 @@ if not case_35:
 
				     print("case 35 NOT FOUND")
			
 
				 else:
			
 
				     wf = case_35.get("workflow")
			
 
				-    fr = case_35.get("fragments") or []
			
 
				+    fr = case_35.get("capability") or case_35.get("fragments") or []
			
 
				     print("title:", (case_35.get("title") or "")[:50])
			
 
				     print("workflow exists:", wf is not None)
			
 
				     if wf: