|
|
@@ -15,6 +15,9 @@ import base64
|
|
|
import mimetypes
|
|
|
from pathlib import Path
|
|
|
from typing import Optional
|
|
|
+from urllib.parse import urlparse
|
|
|
+
|
|
|
+import httpx
|
|
|
|
|
|
from agent.tools import tool, ToolResult, ToolContext
|
|
|
|
|
|
@@ -24,7 +27,7 @@ MAX_LINE_LENGTH = 2000
|
|
|
MAX_BYTES = 50 * 1024 # 50KB
|
|
|
|
|
|
|
|
|
-@tool(description="读取文件内容,支持文本文件、图片、PDF 等多种格式")
|
|
|
+@tool(description="读取文件内容,支持文本文件、图片、PDF 等多种格式,也支持 HTTP/HTTPS URL")
|
|
|
async def read_file(
|
|
|
file_path: str,
|
|
|
offset: int = 0,
|
|
|
@@ -37,7 +40,7 @@ async def read_file(
|
|
|
参考 OpenCode 实现
|
|
|
|
|
|
Args:
|
|
|
- file_path: 文件路径(绝对路径或相对路径)
|
|
|
+ file_path: 文件路径(绝对路径、相对路径或 HTTP/HTTPS URL)
|
|
|
offset: 起始行号(从 0 开始)
|
|
|
limit: 读取行数(默认 2000 行)
|
|
|
context: 工具上下文
|
|
|
@@ -45,6 +48,11 @@ async def read_file(
|
|
|
Returns:
|
|
|
ToolResult: 文件内容
|
|
|
"""
|
|
|
+ # 检测是否为 HTTP/HTTPS URL
|
|
|
+ parsed = urlparse(file_path)
|
|
|
+ if parsed.scheme in ("http", "https"):
|
|
|
+ return await _read_from_url(file_path)
|
|
|
+
|
|
|
# 解析路径
|
|
|
path = Path(file_path)
|
|
|
if not path.is_absolute():
|
|
|
@@ -238,3 +246,74 @@ def _is_binary_file(path: Path) -> bool:
|
|
|
|
|
|
except Exception:
|
|
|
return False
|
|
|
+
|
|
|
+
|
|
|
+async def _read_from_url(url: str) -> ToolResult:
|
|
|
+ """
|
|
|
+ 从 HTTP/HTTPS URL 读取文件内容。
|
|
|
+
|
|
|
+ 主要用于图片等多媒体资源,自动转换为 base64。
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
|
|
+ response = await client.get(url)
|
|
|
+ response.raise_for_status()
|
|
|
+
|
|
|
+ content_type = response.headers.get("content-type", "")
|
|
|
+ raw = response.content
|
|
|
+
|
|
|
+ # 从 URL 提取文件名
|
|
|
+ from urllib.parse import urlparse
|
|
|
+ parsed = urlparse(url)
|
|
|
+ filename = Path(parsed.path).name or "downloaded_file"
|
|
|
+
|
|
|
+ # 图片文件
|
|
|
+ if content_type.startswith("image/") or any(url.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp"]):
|
|
|
+ mime_type = content_type.split(";")[0] if content_type else "image/jpeg"
|
|
|
+ b64_data = base64.b64encode(raw).decode("ascii")
|
|
|
+ return ToolResult(
|
|
|
+ title=filename,
|
|
|
+ output=f"图片文件: {filename} (URL: {url}, MIME: {mime_type}, {len(raw)} bytes)",
|
|
|
+ metadata={"mime_type": mime_type, "url": url, "truncated": False},
|
|
|
+ images=[{
|
|
|
+ "type": "base64",
|
|
|
+ "media_type": mime_type,
|
|
|
+ "data": b64_data,
|
|
|
+ }],
|
|
|
+ )
|
|
|
+
|
|
|
+ # 文本文件
|
|
|
+ if content_type.startswith("text/") or content_type == "application/json":
|
|
|
+ text = raw.decode("utf-8", errors="replace")
|
|
|
+ lines = text.split("\n")
|
|
|
+ preview = "\n".join(lines[:20])
|
|
|
+ return ToolResult(
|
|
|
+ title=filename,
|
|
|
+ output=f"<file>\n{text}\n</file>",
|
|
|
+ metadata={
|
|
|
+ "preview": preview,
|
|
|
+ "url": url,
|
|
|
+ "mime_type": content_type,
|
|
|
+ "total_lines": len(lines),
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
+ # 其他二进制文件
|
|
|
+ return ToolResult(
|
|
|
+ title=filename,
|
|
|
+ output=f"二进制文件: {filename} (URL: {url}, {len(raw)} bytes)",
|
|
|
+ metadata={"url": url, "mime_type": content_type, "size": len(raw)}
|
|
|
+ )
|
|
|
+
|
|
|
+ except httpx.HTTPStatusError as e:
|
|
|
+ return ToolResult(
|
|
|
+ title="HTTP 错误",
|
|
|
+ output=f"无法下载文件: {url}\nHTTP {e.response.status_code}: {e.response.reason_phrase}",
|
|
|
+ error=str(e)
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ return ToolResult(
|
|
|
+ title="下载失败",
|
|
|
+ output=f"无法从 URL 读取文件: {url}\n错误: {str(e)}",
|
|
|
+ error=str(e)
|
|
|
+ )
|