from typing import Any, Dict, List import requests from bs4 import BeautifulSoup from pqai_agent.toolkit.base import BaseToolkit from pqai_agent.toolkit.function_tool import FunctionTool class WebExtractor(BaseToolkit): def extract_web_content(self, url: str) -> str: """ Extracts the content of a webpage given its URL. Args: url (str): The URL of the webpage to extract content from. Returns: str: The extracted content of the webpage. """ try: response = requests.get(url) response.raise_for_status() # Raise an error for bad responses soup = BeautifulSoup(response.text, 'html.parser') content = soup.get_text(separator='\n', strip=True) return content except Exception as e: return f"Error occurred while extracting content: {str(e)}" def get_tools(self) -> List[FunctionTool]: return [FunctionTool(self.extract_web_content)]