4 weeks ago · 46dd6c5d7c
--- a/pqai_agent/toolkit/web_extractor.py
+++ b/pqai_agent/toolkit/web_extractor.py
@@ -0,0 +1,29 @@
 
															+from typing import Any, Dict, List
														
 
															+import requests
														
 
															+from bs4 import BeautifulSoup
														
 
															+
														
 
															+from pqai_agent.toolkit.base import BaseToolkit
														
 
															+from pqai_agent.toolkit.function_tool import FunctionTool
														
 
															+
														
 
															+class WebExtractor(BaseToolkit):
														
 
															+    def extract_web_content(self, url: str) -> str:
														
 
															+        """
														
 
															+        Extracts the content of a webpage given its URL.
														
 
															+        Args:
														
 
															+            url (str): The URL of the webpage to extract content from.
														
 
															+        Returns:
														
 
															+            str: The extracted content of the webpage.
														
 
															+        """
														
 
															+
														
 
															+        try:
														
 
															+            response = requests.get(url)
														
 
															+            response.raise_for_status()  # Raise an error for bad responses
														
 
															+            soup = BeautifulSoup(response.text, 'html.parser')
														
 
															+
														
 
															+            content = soup.get_text(separator='\n', strip=True)
														
 
															+            return content
														
 
															+        except Exception as e:
														
 
															+            return f"Error occurred while extracting content: {str(e)}"
														
 
															+
														
 
															+    def get_tools(self) -> List[FunctionTool]:
														
 
															+        return [FunctionTool(self.extract_web_content)]