From 60499c439cb5c0460bf597b405c24051451e9af2 Mon Sep 17 00:00:00 2001 From: Geoff-Robin Date: Thu, 2 Oct 2025 01:54:56 +0530 Subject: [PATCH] Added logging --- cognee/tasks/web_scraper/utils.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/cognee/tasks/web_scraper/utils.py b/cognee/tasks/web_scraper/utils.py index bcf12ac32..7c90a87a1 100644 --- a/cognee/tasks/web_scraper/utils.py +++ b/cognee/tasks/web_scraper/utils.py @@ -3,6 +3,23 @@ from bs4 import BeautifulSoup import os import requests from typing import Dict, Any, List, Union +from cognee.shared.logging_utils import get_logger + +logger = get_logger(__name__) + +try: + from tavily import AsyncTavilyClient +except ImportError: + logger.error( + "Failed to import tavily, make sure to install using pip install tavily-python>=0.7.0" + ) + +try: + from bs4 import BeautifulSoup +except ImportError: + logger.error( + "Failed to import bs4, make sure to install using pip install beautifulsoup4>=4.13.1" + ) async def fetch_page_content(urls: Union[str, List[str]], extraction_rules: Dict[str, Any]) -> str: @@ -21,9 +38,9 @@ async def fetch_with_tavily(urls: Union[str, List[str]]) -> Dict[str, str]: return result_dict -async def fetch_with_bs4(urls: Union[str,List[str]], extraction_rules: Dict) -> Dict[str]: +async def fetch_with_bs4(urls: Union[str, List[str]], extraction_rules: Dict) -> Dict[str]: result_dict = {} - if isinstance(urls,str): + if isinstance(urls, str): urls = [urls] for url in urls: response = requests.get(url, headers={"User-Agent": "Cognee-Scraper"})