1234567891011121314151617181920212223242526272829 |
- from typing import Any, Dict, List
- import requests
- from bs4 import BeautifulSoup
- from pqai_agent.toolkit.base import BaseToolkit
- from pqai_agent.toolkit.function_tool import FunctionTool
- class WebExtractor(BaseToolkit):
- def extract_web_content(self, url: str) -> str:
- """
- Extracts the content of a webpage given its URL.
- Args:
- url (str): The URL of the webpage to extract content from.
- Returns:
- str: The extracted content of the webpage.
- """
- try:
- response = requests.get(url)
- response.raise_for_status() # Raise an error for bad responses
- soup = BeautifulSoup(response.text, 'html.parser')
- content = soup.get_text(separator='\n', strip=True)
- return content
- except Exception as e:
- return f"Error occurred while extracting content: {str(e)}"
- def get_tools(self) -> List[FunctionTool]:
- return [FunctionTool(self.extract_web_content)]
|