hai 2 meses · 2fad15af2d
--- a/agent/tools/builtin/file/read.py
+++ b/agent/tools/builtin/file/read.py
@@ -15,6 +15,9 @@ import base64
 
															 import mimetypes
														
 
															 from pathlib import Path
														
 
															 from typing import Optional
														
 
															+from urllib.parse import urlparse
														
 
															+
														
 
															+import httpx
														
 
															 from agent.tools import tool, ToolResult, ToolContext
														
@@ -24,7 +27,7 @@ MAX_LINE_LENGTH = 2000
 
															 MAX_BYTES = 50 * 1024  # 50KB
														
 
															-@tool(description="读取文件内容，支持文本文件、图片、PDF 等多种格式")
														
 
															+@tool(description="读取文件内容，支持文本文件、图片、PDF 等多种格式，也支持 HTTP/HTTPS URL")
														
 
															 async def read_file(
														
 
															     file_path: str,
														
 
															     offset: int = 0,
														
@@ -37,7 +40,7 @@ async def read_file(
 
															     参考 OpenCode 实现
														
 
															     Args:
														
 
															-        file_path: 文件路径（绝对路径或相对路径）
														
 
															+        file_path: 文件路径（绝对路径、相对路径或 HTTP/HTTPS URL）
														
 
															         offset: 起始行号（从 0 开始）
														
 
															         limit: 读取行数（默认 2000 行）
														
 
															         context: 工具上下文
														
@@ -45,6 +48,11 @@ async def read_file(
 
															     Returns:
														
 
															         ToolResult: 文件内容
														
 
															     """
														
 
															+    # 检测是否为 HTTP/HTTPS URL
														
 
															+    parsed = urlparse(file_path)
														
 
															+    if parsed.scheme in ("http", "https"):
														
 
															+        return await _read_from_url(file_path)
														
 
															+
														
 
															     # 解析路径
														
 
															     path = Path(file_path)
														
 
															     if not path.is_absolute():
														
@@ -238,3 +246,74 @@ def _is_binary_file(path: Path) -> bool:
 
															     except Exception:
														
 
															         return False
														
 
															+
														
 
															+
														
 
															+async def _read_from_url(url: str) -> ToolResult:
														
 
															+    """
														
 
															+    从 HTTP/HTTPS URL 读取文件内容。
														
 
															+
														
 
															+    主要用于图片等多媒体资源，自动转换为 base64。
														
 
															+    """
														
 
															+    try:
														
 
															+        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
														
 
															+            response = await client.get(url)
														
 
															+            response.raise_for_status()
														
 
															+
														
 
															+            content_type = response.headers.get("content-type", "")
														
 
															+            raw = response.content
														
 
															+
														
 
															+            # 从 URL 提取文件名
														
 
															+            from urllib.parse import urlparse
														
 
															+            parsed = urlparse(url)
														
 
															+            filename = Path(parsed.path).name or "downloaded_file"
														
 
															+
														
 
															+            # 图片文件
														
 
															+            if content_type.startswith("image/") or any(url.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"]):
														
 
															+                mime_type = content_type.split(";")[0] if content_type else "image/jpeg"
														
 
															+                b64_data = base64.b64encode(raw).decode("ascii")
														
 
															+                return ToolResult(
														
 
															+                    title=filename,
														
 
															+                    output=f"图片文件: {filename} (URL: {url}, MIME: {mime_type}, {len(raw)} bytes)",
														
 
															+                    metadata={"mime_type": mime_type, "url": url, "truncated": False},
														
 
															+                    images=[{
														
 
															+                        "type": "base64",
														
 
															+                        "media_type": mime_type,
														
 
															+                        "data": b64_data,
														
 
															+                    }],
														
 
															+                )
														
 
															+
														
 
															+            # 文本文件
														
 
															+            if content_type.startswith("text/") or content_type == "application/json":
														
 
															+                text = raw.decode("utf-8", errors="replace")
														
 
															+                lines = text.split("\n")
														
 
															+                preview = "\n".join(lines[:20])
														
 
															+                return ToolResult(
														
 
															+                    title=filename,
														
 
															+                    output=f"<file>\n{text}\n</file>",
														
 
															+                    metadata={
														
 
															+                        "preview": preview,
														
 
															+                        "url": url,
														
 
															+                        "mime_type": content_type,
														
 
															+                        "total_lines": len(lines),
														
 
															+                    }
														
 
															+                )
														
 
															+
														
 
															+            # 其他二进制文件
														
 
															+            return ToolResult(
														
 
															+                title=filename,
														
 
															+                output=f"二进制文件: {filename} (URL: {url}, {len(raw)} bytes)",
														
 
															+                metadata={"url": url, "mime_type": content_type, "size": len(raw)}
														
 
															+            )
														
 
															+
														
 
															+    except httpx.HTTPStatusError as e:
														
 
															+        return ToolResult(
														
 
															+            title="HTTP 错误",
														
 
															+            output=f"无法下载文件: {url}\nHTTP {e.response.status_code}: {e.response.reason_phrase}",
														
 
															+            error=str(e)
														
 
															+        )
														
 
															+    except Exception as e:
														
 
															+        return ToolResult(
														
 
															+            title="下载失败",
														
 
															+            output=f"无法从 URL 读取文件: {url}\n错误: {str(e)}",
														
 
															+            error=str(e)
														
 
															+        )