4 weeks ago · 46dd6c5d7c
--- a/pqai_agent/toolkit/web_extractor.py
+++ b/pqai_agent/toolkit/web_extractor.py
@@ -0,0 +1,29 @@
 
				+from typing import Any, Dict, List
			
 
				+import requests
			
 
				+from bs4 import BeautifulSoup
			
 
				+
			
 
				+from pqai_agent.toolkit.base import BaseToolkit
			
 
				+from pqai_agent.toolkit.function_tool import FunctionTool
			
 
				+
			
 
				+class WebExtractor(BaseToolkit):
			
 
				+    def extract_web_content(self, url: str) -> str:
			
 
				+        """
			
 
				+        Extracts the content of a webpage given its URL.
			
 
				+        Args:
			
 
				+            url (str): The URL of the webpage to extract content from.
			
 
				+        Returns:
			
 
				+            str: The extracted content of the webpage.
			
 
				+        """
			
 
				+
			
 
				+        try:
			
 
				+            response = requests.get(url)
			
 
				+            response.raise_for_status()  # Raise an error for bad responses
			
 
				+            soup = BeautifulSoup(response.text, 'html.parser')
			
 
				+
			
 
				+            content = soup.get_text(separator='\n', strip=True)
			
 
				+            return content
			
 
				+        except Exception as e:
			
 
				+            return f"Error occurred while extracting content: {str(e)}"
			
 
				+
			
 
				+    def get_tools(self) -> List[FunctionTool]:
			
 
				+        return [FunctionTool(self.extract_web_content)]