2 månader sedan · 2fad15af2d
--- a/agent/tools/builtin/file/read.py
+++ b/agent/tools/builtin/file/read.py
@@ -15,6 +15,9 @@ import base64
 
				 import mimetypes
			
 
				 from pathlib import Path
			
 
				 from typing import Optional
			
 
				+from urllib.parse import urlparse
			
 
				+
			
 
				+import httpx
			
 
				 
			
 
				 from agent.tools import tool, ToolResult, ToolContext
			
 
				 
			
@@ -24,7 +27,7 @@ MAX_LINE_LENGTH = 2000
 
				 MAX_BYTES = 50 * 1024  # 50KB
			
 
				 
			
 
				 
			
 
				-@tool(description="读取文件内容，支持文本文件、图片、PDF 等多种格式")
			
 
				+@tool(description="读取文件内容，支持文本文件、图片、PDF 等多种格式，也支持 HTTP/HTTPS URL")
			
 
				 async def read_file(
			
 
				     file_path: str,
			
 
				     offset: int = 0,
			
@@ -37,7 +40,7 @@ async def read_file(
 
				     参考 OpenCode 实现
			
 
				 
			
 
				     Args:
			
 
				-        file_path: 文件路径（绝对路径或相对路径）
			
 
				+        file_path: 文件路径（绝对路径、相对路径或 HTTP/HTTPS URL）
			
 
				         offset: 起始行号（从 0 开始）
			
 
				         limit: 读取行数（默认 2000 行）
			
 
				         context: 工具上下文
			
@@ -45,6 +48,11 @@ async def read_file(
 
				     Returns:
			
 
				         ToolResult: 文件内容
			
 
				     """
			
 
				+    # 检测是否为 HTTP/HTTPS URL
			
 
				+    parsed = urlparse(file_path)
			
 
				+    if parsed.scheme in ("http", "https"):
			
 
				+        return await _read_from_url(file_path)
			
 
				+
			
 
				     # 解析路径
			
 
				     path = Path(file_path)
			
 
				     if not path.is_absolute():
			
@@ -238,3 +246,74 @@ def _is_binary_file(path: Path) -> bool:
 
				 
			
 
				     except Exception:
			
 
				         return False
			
 
				+
			
 
				+
			
 
				+async def _read_from_url(url: str) -> ToolResult:
			
 
				+    """
			
 
				+    从 HTTP/HTTPS URL 读取文件内容。
			
 
				+
			
 
				+    主要用于图片等多媒体资源，自动转换为 base64。
			
 
				+    """
			
 
				+    try:
			
 
				+        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
			
 
				+            response = await client.get(url)
			
 
				+            response.raise_for_status()
			
 
				+
			
 
				+            content_type = response.headers.get("content-type", "")
			
 
				+            raw = response.content
			
 
				+
			
 
				+            # 从 URL 提取文件名
			
 
				+            from urllib.parse import urlparse
			
 
				+            parsed = urlparse(url)
			
 
				+            filename = Path(parsed.path).name or "downloaded_file"
			
 
				+
			
 
				+            # 图片文件
			
 
				+            if content_type.startswith("image/") or any(url.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"]):
			
 
				+                mime_type = content_type.split(";")[0] if content_type else "image/jpeg"
			
 
				+                b64_data = base64.b64encode(raw).decode("ascii")
			
 
				+                return ToolResult(
			
 
				+                    title=filename,
			
 
				+                    output=f"图片文件: {filename} (URL: {url}, MIME: {mime_type}, {len(raw)} bytes)",
			
 
				+                    metadata={"mime_type": mime_type, "url": url, "truncated": False},
			
 
				+                    images=[{
			
 
				+                        "type": "base64",
			
 
				+                        "media_type": mime_type,
			
 
				+                        "data": b64_data,
			
 
				+                    }],
			
 
				+                )
			
 
				+
			
 
				+            # 文本文件
			
 
				+            if content_type.startswith("text/") or content_type == "application/json":
			
 
				+                text = raw.decode("utf-8", errors="replace")
			
 
				+                lines = text.split("\n")
			
 
				+                preview = "\n".join(lines[:20])
			
 
				+                return ToolResult(
			
 
				+                    title=filename,
			
 
				+                    output=f"<file>\n{text}\n</file>",
			
 
				+                    metadata={
			
 
				+                        "preview": preview,
			
 
				+                        "url": url,
			
 
				+                        "mime_type": content_type,
			
 
				+                        "total_lines": len(lines),
			
 
				+                    }
			
 
				+                )
			
 
				+
			
 
				+            # 其他二进制文件
			
 
				+            return ToolResult(
			
 
				+                title=filename,
			
 
				+                output=f"二进制文件: {filename} (URL: {url}, {len(raw)} bytes)",
			
 
				+                metadata={"url": url, "mime_type": content_type, "size": len(raw)}
			
 
				+            )
			
 
				+
			
 
				+    except httpx.HTTPStatusError as e:
			
 
				+        return ToolResult(
			
 
				+            title="HTTP 错误",
			
 
				+            output=f"无法下载文件: {url}\nHTTP {e.response.status_code}: {e.response.reason_phrase}",
			
 
				+            error=str(e)
			
 
				+        )
			
 
				+    except Exception as e:
			
 
				+        return ToolResult(
			
 
				+            title="下载失败",
			
 
				+            output=f"无法从 URL 读取文件: {url}\n错误: {str(e)}",
			
 
				+            error=str(e)
			
 
				+        )