diff --git a/cognee/tasks/web_scraper/utils.py b/cognee/tasks/web_scraper/utils.py index bcf12ac32..7c90a87a1 100644 --- a/cognee/tasks/web_scraper/utils.py +++ b/cognee/tasks/web_scraper/utils.py @@ -3,6 +3,23 @@ from bs4 import BeautifulSoup import os import requests from typing import Dict, Any, List, Union +from cognee.shared.logging_utils import get_logger + +logger = get_logger(__name__) + +try: + from tavily import AsyncTavilyClient +except ImportError: + logger.error( + "Failed to import tavily, make sure to install using pip install tavily-python>=0.7.0" + ) + +try: + from bs4 import BeautifulSoup +except ImportError: + logger.error( + "Failed to import bs4, make sure to install using pip install beautifulsoup4>=4.13.1" + ) async def fetch_page_content(urls: Union[str, List[str]], extraction_rules: Dict[str, Any]) -> str: @@ -21,9 +38,9 @@ async def fetch_with_tavily(urls: Union[str, List[str]]) -> Dict[str, str]: return result_dict -async def fetch_with_bs4(urls: Union[str,List[str]], extraction_rules: Dict) -> Dict[str]: +async def fetch_with_bs4(urls: Union[str, List[str]], extraction_rules: Dict) -> Dict[str]: result_dict = {} - if isinstance(urls,str): + if isinstance(urls, str): urls = [urls] for url in urls: response = requests.get(url, headers={"User-Agent": "Cognee-Scraper"})