guantao 1 неделя назад
Родитель
Сommit
2fad15af2d
1 измененных файлов с 81 добавлено и 2 удалено
  1. 81 2
      agent/tools/builtin/file/read.py

+ 81 - 2
agent/tools/builtin/file/read.py

@@ -15,6 +15,9 @@ import base64
 import mimetypes
 import mimetypes
 from pathlib import Path
 from pathlib import Path
 from typing import Optional
 from typing import Optional
+from urllib.parse import urlparse
+
+import httpx
 
 
 from agent.tools import tool, ToolResult, ToolContext
 from agent.tools import tool, ToolResult, ToolContext
 
 
@@ -24,7 +27,7 @@ MAX_LINE_LENGTH = 2000
 MAX_BYTES = 50 * 1024  # 50KB
 MAX_BYTES = 50 * 1024  # 50KB
 
 
 
 
-@tool(description="读取文件内容,支持文本文件、图片、PDF 等多种格式")
+@tool(description="读取文件内容,支持文本文件、图片、PDF 等多种格式,也支持 HTTP/HTTPS URL")
 async def read_file(
 async def read_file(
     file_path: str,
     file_path: str,
     offset: int = 0,
     offset: int = 0,
@@ -37,7 +40,7 @@ async def read_file(
     参考 OpenCode 实现
     参考 OpenCode 实现
 
 
     Args:
     Args:
-        file_path: 文件路径(绝对路径或相对路径
+        file_path: 文件路径(绝对路径、相对路径或 HTTP/HTTPS URL
         offset: 起始行号(从 0 开始)
         offset: 起始行号(从 0 开始)
         limit: 读取行数(默认 2000 行)
         limit: 读取行数(默认 2000 行)
         context: 工具上下文
         context: 工具上下文
@@ -45,6 +48,11 @@ async def read_file(
     Returns:
     Returns:
         ToolResult: 文件内容
         ToolResult: 文件内容
     """
     """
+    # 检测是否为 HTTP/HTTPS URL
+    parsed = urlparse(file_path)
+    if parsed.scheme in ("http", "https"):
+        return await _read_from_url(file_path)
+
     # 解析路径
     # 解析路径
     path = Path(file_path)
     path = Path(file_path)
     if not path.is_absolute():
     if not path.is_absolute():
@@ -238,3 +246,74 @@ def _is_binary_file(path: Path) -> bool:
 
 
     except Exception:
     except Exception:
         return False
         return False
+
+
+async def _read_from_url(url: str) -> ToolResult:
+    """
+    从 HTTP/HTTPS URL 读取文件内容。
+
+    主要用于图片等多媒体资源,自动转换为 base64。
+    """
+    try:
+        async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
+            response = await client.get(url)
+            response.raise_for_status()
+
+            content_type = response.headers.get("content-type", "")
+            raw = response.content
+
+            # 从 URL 提取文件名
+            from urllib.parse import urlparse
+            parsed = urlparse(url)
+            filename = Path(parsed.path).name or "downloaded_file"
+
+            # 图片文件
+            if content_type.startswith("image/") or any(url.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"]):
+                mime_type = content_type.split(";")[0] if content_type else "image/jpeg"
+                b64_data = base64.b64encode(raw).decode("ascii")
+                return ToolResult(
+                    title=filename,
+                    output=f"图片文件: {filename} (URL: {url}, MIME: {mime_type}, {len(raw)} bytes)",
+                    metadata={"mime_type": mime_type, "url": url, "truncated": False},
+                    images=[{
+                        "type": "base64",
+                        "media_type": mime_type,
+                        "data": b64_data,
+                    }],
+                )
+
+            # 文本文件
+            if content_type.startswith("text/") or content_type == "application/json":
+                text = raw.decode("utf-8", errors="replace")
+                lines = text.split("\n")
+                preview = "\n".join(lines[:20])
+                return ToolResult(
+                    title=filename,
+                    output=f"<file>\n{text}\n</file>",
+                    metadata={
+                        "preview": preview,
+                        "url": url,
+                        "mime_type": content_type,
+                        "total_lines": len(lines),
+                    }
+                )
+
+            # 其他二进制文件
+            return ToolResult(
+                title=filename,
+                output=f"二进制文件: {filename} (URL: {url}, {len(raw)} bytes)",
+                metadata={"url": url, "mime_type": content_type, "size": len(raw)}
+            )
+
+    except httpx.HTTPStatusError as e:
+        return ToolResult(
+            title="HTTP 错误",
+            output=f"无法下载文件: {url}\nHTTP {e.response.status_code}: {e.response.reason_phrase}",
+            error=str(e)
+        )
+    except Exception as e:
+        return ToolResult(
+            title="下载失败",
+            output=f"无法从 URL 读取文件: {url}\n错误: {str(e)}",
+            error=str(e)
+        )