瀏覽代碼

update: support pdf reading

guantao 3 周之前
父節點
當前提交
0c3672b5de
共有 1 個文件被更改,包括 118 次插入14 次删除
  1. 118 14
      agent/tools/builtin/browser/baseClass.py

+ 118 - 14
agent/tools/builtin/browser/baseClass.py

@@ -1242,19 +1242,118 @@ async def browser_extract_content(query: str, extract_links: bool = False,
             long_term_memory=f"提取内容失败: {query}"
             long_term_memory=f"提取内容失败: {query}"
         )
         )
 
 
+async def _detect_and_download_pdf_via_cdp(browser) -> Optional[str]:
+    """
+    检测当前页面是否为 PDF,如果是则通过 CDP(浏览器内 fetch)下载到本地。
+    优势:自动携带浏览器的 cookies/session,可访问需要登录的 PDF。
+    返回本地文件路径,非 PDF 页面返回 None。
+    """
+    try:
+        current_url = await browser.get_current_page_url()
+        if not current_url:
+            return None
+
+        parsed = urlparse(current_url)
+        is_pdf = parsed.path.lower().endswith('.pdf')
+
+        # URL 不明显是 PDF 时,通过 CDP 检查 content-type
+        if not is_pdf:
+            try:
+                cdp = await browser.get_or_create_cdp_session()
+                ct_result = await cdp.cdp_client.send.Runtime.evaluate(
+                    params={'expression': 'document.contentType'},
+                    session_id=cdp.session_id
+                )
+                content_type = ct_result.get('result', {}).get('value', '')
+                is_pdf = 'pdf' in content_type.lower()
+            except Exception:
+                pass
+
+        if not is_pdf:
+            return None
+
+        # 通过浏览器内 fetch API 下载 PDF(自动携带 cookies)
+        cdp = await browser.get_or_create_cdp_session()
+        js_code = """
+        (async () => {
+            try {
+                const resp = await fetch(window.location.href);
+                if (!resp.ok) return JSON.stringify({error: 'HTTP ' + resp.status});
+                const blob = await resp.blob();
+                return new Promise((resolve, reject) => {
+                    const reader = new FileReader();
+                    reader.onloadend = () => resolve(JSON.stringify({data: reader.result}));
+                    reader.onerror = () => resolve(JSON.stringify({error: 'FileReader failed'}));
+                    reader.readAsDataURL(blob);
+                });
+            } catch(e) {
+                return JSON.stringify({error: e.message});
+            }
+        })()
+        """
+        result = await cdp.cdp_client.send.Runtime.evaluate(
+            params={
+                'expression': js_code,
+                'awaitPromise': True,
+                'returnByValue': True,
+                'timeout': 60000
+            },
+            session_id=cdp.session_id
+        )
+
+        value = result.get('result', {}).get('value', '')
+        if not value:
+            print("⚠️ CDP fetch PDF: 无返回值")
+            return None
+
+        data = json.loads(value)
+        if 'error' in data:
+            print(f"⚠️ CDP fetch PDF 失败: {data['error']}")
+            return None
+
+        # 从 data URL 中提取 base64 并解码
+        data_url = data['data']  # data:application/pdf;base64,JVBERi0...
+        base64_data = data_url.split(',', 1)[1]
+        pdf_bytes = base64.b64decode(base64_data)
+
+        # 保存到本地
+        save_dir = Path.cwd() / ".browser_use_files"
+        save_dir.mkdir(parents=True, exist_ok=True)
+
+        filename = Path(parsed.path).name if parsed.path else ""
+        if not filename or not filename.lower().endswith('.pdf'):
+            import time
+            filename = f"downloaded_{int(time.time())}.pdf"
+        save_path = str(save_dir / filename)
+
+        with open(save_path, 'wb') as f:
+            f.write(pdf_bytes)
+
+        print(f"📄 PDF 已通过 CDP 下载到: {save_path} ({len(pdf_bytes)} bytes)")
+        return save_path
+
+    except Exception as e:
+        print(f"⚠️ PDF 检测/下载异常: {e}")
+        return None
+
+
 @tool()
 @tool()
 async def browser_read_long_content(
 async def browser_read_long_content(
-    goal: Any, 
-    source: str = "page", 
-    context: Any = "",  
-    **kwargs            
+    goal: Any,
+    source: str = "page",
+    context: Any = "",
+    **kwargs
 ) -> ToolResult:
 ) -> ToolResult:
     """
     """
-    智能读取长内容。已修复参数嵌套导致的 Field Missing 报错。
+    智能读取长内容。支持自动检测并读取网页上的 PDF 文件。
+
+    当 source="page" 且当前页面是 PDF 时,会通过 CDP 下载 PDF 并用 pypdf 解析,
+    而非使用 DOM 提取(DOM 无法读取浏览器内置 PDF Viewer 的内容)。
+    通过 CDP 下载可自动携带浏览器的 cookies/session,支持需要登录的 PDF。
     """
     """
     try:
     try:
         browser, tools = await get_browser_session()
         browser, tools = await get_browser_session()
-        
+
         # 1. 提取目标文本 (针对 GoalTree 字典结构)
         # 1. 提取目标文本 (针对 GoalTree 字典结构)
         final_goal_text = ""
         final_goal_text = ""
         if isinstance(goal, dict):
         if isinstance(goal, dict):
@@ -1265,27 +1364,32 @@ async def browser_read_long_content(
         # 2. 清洗业务背景 (过滤框架注入的 dict 类型 context)
         # 2. 清洗业务背景 (过滤框架注入的 dict 类型 context)
         business_context = context if isinstance(context, str) else ""
         business_context = context if isinstance(context, str) else ""
 
 
-        # 3. 验证并实例化
+        # 3. PDF 自动检测:当 source="page" 时检查是否为 PDF 页面
+        available_files = []
+        if source.lower() == "page":
+            pdf_path = await _detect_and_download_pdf_via_cdp(browser)
+            if pdf_path:
+                source = pdf_path
+                available_files.append(pdf_path)
+
+        # 4. 验证并实例化
         action_params = ReadContentAction(
         action_params = ReadContentAction(
-            goal=final_goal_text, 
+            goal=final_goal_text,
             source=source,
             source=source,
             context=business_context
             context=business_context
         )
         )
 
 
-        # --- 4. 核心修复:解包参数 (Unpacking) ---
-        # 使用 ** 将字典解包为平铺参数:goal='...', source='...', context='...'
-        # 这样底层函数就能直接识别到 goal 字段了
+        # 5. 解包参数调用底层方法
         result = await tools.read_long_content(
         result = await tools.read_long_content(
-            **action_params.model_dump(),  
+            **action_params.model_dump(),
             browser_session=browser,
             browser_session=browser,
             page_extraction_llm=RunnableLambda(extraction_adapter),
             page_extraction_llm=RunnableLambda(extraction_adapter),
-            available_file_paths=[] 
+            available_file_paths=available_files
         )
         )
 
 
         return action_result_to_tool_result(result, f"深度读取: {source}")
         return action_result_to_tool_result(result, f"深度读取: {source}")
 
 
     except Exception as e:
     except Exception as e:
-        # 补全 output 参数确保报错链路不崩溃
         return ToolResult(
         return ToolResult(
             title="深度读取失败",
             title="深度读取失败",
             output="",
             output="",