guantao 1 день назад
Родитель
Сommit
38e0e10982

+ 61 - 31
agent/llm/claude_code_oauth.py

@@ -23,21 +23,21 @@ from typing import Any, Dict, List, Optional, Tuple
 logger = logging.getLogger(__name__)
 
 
-def _flatten_messages_to_string(
+def _convert_messages(
     messages: List[Dict[str, Any]],
-) -> Tuple[Optional[str], str]:
+) -> Tuple[Optional[str], List[Dict[str, Any]], bool]:
     """
-    把 OpenAI 风格 messages 折叠成 (system_prompt, user_text)。
+    把 OpenAI 风格 messages 拆为 (system_prompt, anthropic_content_blocks, has_image)。
 
     - role=system 拼接为 system_prompt
-    - role=user/assistant 的 content 全部拍平为字符串
-    - image_url 类型块降级为 `[图片URL: ...]` 文本占位(模型看到 URL 字符串而非画面)
-
-    使用 string 模式而非 AsyncIterable[dict],是为了走 SDK 中被生产验证的稳定路径。
-    多模态真图传输需要切到 AsyncIterable + Anthropic content block 协议,单独迭代。
+    - role=user/assistant 的 content 转为 Anthropic content blocks (text/image)
+    - OpenAI {"type":"image_url","image_url":{"url":...}} 转为
+      Anthropic {"type":"image","source":{"type":"url","url":...}}
+    - has_image:是否包含图片块,用于决定走 string 还是 AsyncIterable 模式
     """
     system_parts: List[str] = []
-    user_parts: List[str] = []
+    blocks: List[Dict[str, Any]] = []
+    has_image = False
 
     for msg in messages:
         role = msg.get("role")
@@ -49,29 +49,44 @@ def _flatten_messages_to_string(
             continue
 
         if isinstance(content, str):
-            user_parts.append(content)
+            blocks.append({"type": "text", "text": content})
             continue
 
         if isinstance(content, list):
             for block in content:
                 if not isinstance(block, dict):
-                    user_parts.append(str(block))
+                    blocks.append({"type": "text", "text": str(block)})
                     continue
                 btype = block.get("type")
                 if btype == "text":
-                    user_parts.append(block.get("text", ""))
+                    blocks.append({"type": "text", "text": block.get("text", "")})
                 elif btype == "image_url":
                     url = (block.get("image_url") or {}).get("url", "")
                     if url:
-                        user_parts.append(f"[图片URL: {url}]")
+                        blocks.append(
+                            {"type": "image", "source": {"type": "url", "url": url}}
+                        )
+                        has_image = True
                 elif btype == "image":
-                    src = block.get("source") or {}
-                    url = src.get("url") or src.get("data", "")[:60]
-                    user_parts.append(f"[图片: {url}]")
+                    blocks.append(block)
+                    has_image = True
 
     system_prompt = "\n\n".join(system_parts).strip() or None
-    user_text = "\n\n".join(p for p in user_parts if p).strip()
-    return system_prompt, user_text
+    return system_prompt, blocks, has_image
+
+
+def _blocks_to_string(blocks: List[Dict[str, Any]]) -> str:
+    """把 content blocks 拍平成字符串(图片降级为 [图片URL: ...] 占位)— string 模式用"""
+    parts: List[str] = []
+    for block in blocks:
+        btype = block.get("type")
+        if btype == "text":
+            parts.append(block.get("text", ""))
+        elif btype == "image":
+            src = block.get("source") or {}
+            url = src.get("url") or src.get("data", "")[:60]
+            parts.append(f"[图片URL: {url}]")
+    return "\n\n".join(p for p in parts if p).strip()
 
 
 def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
@@ -94,17 +109,19 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
         TextBlock,
     )
 
-    # 从子进程 env 中剥离 API key 相关变量,让 CLI 回落到 OAuth;
-    # 父进程 os.environ 不变(其他 LLM provider 仍可用 API key)。
-    _stripped_env = {
-        k: v
-        for k, v in os.environ.items()
-        if k not in ("ANTHROPIC_API_KEY", "ANTHROPIC_BASE_URL", "ANTHROPIC_AUTH_TOKEN")
+    # 让 SDK 子进程看不到 API key 相关变量,回落到 OAuth。
+    # SDK 内部把 options.env 当作"覆盖层"叠在父进程 os.environ 之上,
+    # 所以从 dict 里"移除"这些 key 没用 — 必须显式以空串覆盖父值。
+    # 父进程 os.environ 不变(其他 LLM provider 继续可用 API key)。
+    _override_env: Dict[str, str] = {
+        "ANTHROPIC_API_KEY": "",
+        "ANTHROPIC_BASE_URL": "",
+        "ANTHROPIC_AUTH_TOKEN": "",
     }
     if "ANTHROPIC_API_KEY" in os.environ or "ANTHROPIC_BASE_URL" in os.environ:
         logger.info(
-            "[claude_code_oauth] Stripping ANTHROPIC_API_KEY/ANTHROPIC_BASE_URL "
-            "from SDK subprocess env so CLI falls back to OAuth credentials."
+            "[claude_code_oauth] Overriding ANTHROPIC_API_KEY/ANTHROPIC_BASE_URL "
+            "with empty values in SDK subprocess env so CLI falls back to OAuth."
         )
 
     default_model = model
@@ -117,9 +134,9 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
     ) -> Dict[str, Any]:
         actual_model = (model or default_model).split("/")[-1]
 
-        system_prompt, user_text = _flatten_messages_to_string(messages)
-        if not user_text:
-            user_text = " "
+        system_prompt, content_blocks, has_image = _convert_messages(messages)
+        if not content_blocks:
+            content_blocks = [{"type": "text", "text": " "}]
 
         stderr_lines: List[str] = []
 
@@ -132,7 +149,7 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
             system_prompt=system_prompt,
             allowed_tools=[],
             max_turns=1,
-            env=_stripped_env,
+            env=_override_env,
             stderr=_capture_stderr,
             # 关键:屏蔽 CLI 加载用户级 ~/.claude/ 配置(output_style/skills/plugins 等)
             # 否则这些会被注入 system prompt,浪费 token + 影响输出格式
@@ -152,7 +169,20 @@ def create_claude_code_oauth_llm_call(model: str = "claude-sonnet-4-5"):
 
         try:
             async with ClaudeSDKClient(options=options) as client:
-                await client.query(user_text)
+                if has_image:
+                    # 多模态:用 AsyncIterable[dict] 模式发送 Anthropic content blocks
+                    async def _input_stream():
+                        yield {
+                            "type": "user",
+                            "message": {"role": "user", "content": content_blocks},
+                            "parent_tool_use_id": None,
+                            "session_id": "default",
+                        }
+                    await client.query(_input_stream())
+                else:
+                    # 纯文本:走 SDK string 模式(已验证稳定路径)
+                    await client.query(_blocks_to_string(content_blocks))
+
                 async for msg in client.receive_response():
                     msg_type = type(msg).__name__
 

+ 4 - 1
examples/process_pipeline/prompts/extract_workflow.prompt

@@ -69,7 +69,7 @@ action 写成对象:
 ```
 - modality 是数据形态:文本 / 图片 / 视频 / 音频 / 特征点 / 参数 / 模型 / 向量
 - 同一次提交给模型的所有文字描述统一合并为一个输入项
-- relation 格式:[来源.1O]、[去向.2I]、[来源.原始输入]、[去向.最终成品]
+- relation 格式:[来源.1O]、[去向.2I]、[来源.原始输入]、[去向.最终成品](1O和2I含义分别是:step1的output、step2的input)
 # effects 字段
 每个 effect 写成结构体:
 ```json
@@ -145,6 +145,9 @@ $user$
       "control_target": [],
       "artifact_type": null,
       "tools": [],
+      "apply_to_draft": { "实质": ["..."], "形式": ["..."] },
+      "workflow_step_ref": { "workflow_id": null, "step_id": "s1" },
+      "is_alternative_to": []
     }
   ]
 }

+ 23 - 81
examples/process_pipeline/prompts/extract_workflow.schema.json

@@ -1,13 +1,8 @@
 {
   "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "extract_workflow_output_v8",
+  "title": "extract_workflow_output_v6",
   "type": "object",
-  "required": [
-    "skip",
-    "skip_reason",
-    "workflow",
-    "capability"
-  ],
+  "required": ["skip", "skip_reason", "workflow", "capability"],
   "properties": {
     "skip": {
       "type": "boolean"
@@ -22,26 +17,17 @@
         },
         {
           "type": "object",
-          "required": [
-            "steps"
-          ],
+          "required": ["steps"],
           "properties": {
             "workflow_id": {
-              "type": [
-                "string",
-                "null"
-              ]
+              "type": ["string", "null"]
             },
             "steps": {
               "type": "array",
               "minItems": 1,
               "items": {
                 "type": "object",
-                "required": [
-                  "step_id",
-                  "order",
-                  "phase"
-                ],
+                "required": ["step_id", "order", "phase"],
                 "properties": {
                   "step_id": {
                     "type": "string",
@@ -53,21 +39,14 @@
                   },
                   "phase": {
                     "type": "string",
-                    "enum": [
-                      "非制作",
-                      "预处理",
-                      "生成",
-                      "编辑"
-                    ]
+                    "enum": ["非制作", "预处理", "生成", "编辑"]
                   }
                 }
               }
-            },
-            "additionalProperties": false
+            }
           }
         }
-      },
-      "additionalProperties": false
+      ]
     },
     "capability": {
       "type": "array",
@@ -94,10 +73,7 @@
           },
           "action": {
             "type": "object",
-            "required": [
-              "description",
-              "reasoning"
-            ],
+            "required": ["description", "reasoning"],
             "properties": {
               "description": {
                 "type": "string",
@@ -109,19 +85,11 @@
               }
             }
           },
-          "reasoning": {
-            "type": "string",
-            "minLength": 1
-          },
-          "inputs-boundary": {
+          "inputs": {
             "type": "array",
             "items": {
               "type": "object",
-              "required": [
-                "modality",
-                "description",
-                "relation"
-              ],
+              "required": ["modality", "description", "relation"],
               "properties": {
                 "modality": {
                   "type": "string",
@@ -147,15 +115,11 @@
               }
             }
           },
-          "outputs-boundary": {
+          "outputs": {
             "type": "array",
             "items": {
               "type": "object",
-              "required": [
-                "modality",
-                "description",
-                "relation"
-              ],
+              "required": ["modality", "description", "relation"],
               "properties": {
                 "modality": {
                   "type": "string",
@@ -182,10 +146,7 @@
             }
           },
           "body": {
-            "type": [
-              "string",
-              "null"
-            ]
+            "type": ["string", "null"]
           },
           "effects": {
             "type": "array",
@@ -209,12 +170,7 @@
                 },
                 "judge_method": {
                   "type": "string",
-                  "enum": [
-                    "llm",
-                    "vlm",
-                    "rule",
-                    "human"
-                  ]
+                  "enum": ["llm", "vlm", "rule", "human"]
                 },
                 "negative_examples": {
                   "type": "array",
@@ -235,10 +191,7 @@
             }
           },
           "artifact_type": {
-            "type": [
-              "string",
-              "null"
-            ]
+            "type": ["string", "null"]
           },
           "tools": {
             "type": "array",
@@ -248,10 +201,7 @@
           },
           "apply_to_draft": {
             "type": "object",
-            "required": [
-              "实质",
-              "形式"
-            ],
+            "required": ["实质", "形式"],
             "properties": {
               "实质": {
                 "type": "array",
@@ -274,16 +224,10 @@
               },
               {
                 "type": "object",
-                "required": [
-                  "workflow_id",
-                  "step_id"
-                ],
+                "required": ["workflow_id", "step_id"],
                 "properties": {
                   "workflow_id": {
-                    "type": [
-                      "string",
-                      "null"
-                    ]
+                    "type": ["string", "null"]
                   },
                   "step_id": {
                     "type": "string",
@@ -293,17 +237,15 @@
               }
             ]
           },
-          "tools-boundary": {
+          "is_alternative_to": {
             "type": "array",
             "items": {
               "type": "string",
               "pattern": "^c_(s[0-9]+_[0-9]+|standalone_[0-9]+)$"
             }
           }
-        },
-        "additionalProperties": false
+        }
       }
     }
-  },
-  "additionalProperties": false
-}
+  }
+}

+ 9 - 7
examples/process_pipeline/ui/app.js

@@ -178,13 +178,15 @@ function renderRawCases(rawCasesObj) {
     if (detailedCaseObj) {
         const cd = detailedCaseObj;
         let uniqueCases = new Set();
-        let uniqueWorkflow = new Set();
-        let uniqueCapabilities = new Set();
+        let calcWorkflow = 0;
+        let calcCapabilities = 0;
 
         if (cd.cases) {
             cd.cases.forEach(c => {
                 const cId = c.case_id || (c._raw && c._raw.case_id);
                 const cUrl = c.source_url || c.url;
+                const uniqueKey = cId || cUrl || Math.random().toString();
+                uniqueCases.add(uniqueKey);
                 if (c.workflow) calcWorkflow++;
                 if (c.capability && c.capability.length > 0) calcCapabilities++;
 
@@ -200,8 +202,8 @@ function renderRawCases(rawCasesObj) {
         }
 
         const displayTotal = uniqueCases.size > 0 ? uniqueCases.size : (cd.total !== undefined ? cd.total : 0);
-        const displayWorkflowSuccess = uniqueCases.size > 0 ? uniqueWorkflow.size : (cd.workflow_success !== undefined && cd.workflow_success !== null ? cd.workflow_success : (cd.success !== undefined && cd.success !== null ? cd.success : 0));
-        const displayCapabilitiesSuccess = uniqueCases.size > 0 ? uniqueCapabilities.size : (cd.capabilities_success !== undefined && cd.capabilities_success !== null ? cd.capabilities_success : 0);
+        const displayWorkflowSuccess = calcWorkflow > 0 ? calcWorkflow : (cd.workflow_success !== undefined && cd.workflow_success !== null ? cd.workflow_success : (cd.success !== undefined && cd.success !== null ? cd.success : 0));
+        const displayCapabilitiesSuccess = calcCapabilities > 0 ? calcCapabilities : (cd.capabilities_success !== undefined && cd.capabilities_success !== null ? cd.capabilities_success : 0);
 
         if (cd.total !== undefined || uniqueCases.size > 0) {
             totalStatsHtml = `<div style="display:flex; gap:1rem; margin-bottom:1rem; padding:0.5rem 1rem; background:rgba(0, 0, 0, 0.03); border-radius:8px; align-items:center;">
@@ -2882,8 +2884,8 @@ window.renderStructuredData = function (items, type, parentItem = null) {
                         <span class="row-expand-icon">▶</span>
                         ${capability && capability.capability_id ? `<span style="display:inline-block; color:#94a3b8; font-size:0.85em; font-weight:400;">${escapeHtml(capability.capability_id)}</span>` : '-'}
                     </td>
-                    <td class="capability-cell">${renderAction(capability)}</td>
                     <td class="capability-cell">${capability && capability.inputs && capability.inputs.length > 0 ? renderDataObjList(capability.inputs) : '-'}</td>
+                    <td class="capability-cell">${renderAction(capability)}</td>
                     <td class="capability-cell">${capability && capability.outputs && capability.outputs.length > 0 ? renderDataObjList(capability.outputs) : '-'}</td>
                     <td class="capability-cell"><div class="capability-clamp">${capability ? renderEffects(capability.effects) : '-'}</div></td>
                     <td class="capability-cell" style="font-size:0.9em;"><div class="capability-clamp">${applyTo ? renderApplyToVal(applyTo, suggestApplyTo) : '-'}</div></td>
@@ -2924,12 +2926,12 @@ window.renderStructuredData = function (items, type, parentItem = null) {
                                 <th style="padding: 12px 10px; width: 90px;">阶段</th>
                                 <th style="padding: 12px 8px; width: 90px;"></th>
                                 <th style="padding: 12px 10px; width: 180px;">输入</th>
-                                <th style="padding: 12px 10px; width: 140px;">动作/做法</th>
+                                <th style="padding: 12px 10px; width: 140px;">动作</th>
                                 <th style="padding: 12px 10px; width: 180px;">输出</th>
                                 <th style="padding: 12px 10px; width: 360px;">效果</th>
-                                <th style="padding: 12px 10px; width: 180px;">用法</th>
                                 <th style="padding: 12px 10px; width: 260px;">作用域</th>
                                 <th style="padding: 12px 10px; width: 130px;">工具</th>
+                                <th style="padding: 12px 10px; width: 300px;">做法</th>
                             </tr>
                         </thead>
                         <tbody>`;

+ 1 - 1
scratch/check_case35.py

@@ -8,7 +8,7 @@ if not case_35:
     print("case 35 NOT FOUND")
 else:
     wf = case_35.get("workflow")
-    fr = case_35.get("fragments") or []
+    fr = case_35.get("capability") or case_35.get("fragments") or []
     print("title:", (case_35.get("title") or "")[:50])
     print("workflow exists:", wf is not None)
     if wf: