diff --git a/cognee/tasks/web_scraper/utils.py b/cognee/tasks/web_scraper/utils.py index 62a614e6e..d5e1301bc 100644 --- a/cognee/tasks/web_scraper/utils.py +++ b/cognee/tasks/web_scraper/utils.py @@ -57,7 +57,10 @@ async def fetch_page_content( logger.error( "Failed to import bs4, make sure to install using pip install beautifulsoup4>=4.13.1" ) - raise + raise ImportError + if not soup_crawler_config or soup_crawler_config.extraction_rules is None: + raise ValueError("extraction_rules must be provided when not using Tavily") + extraction_rules = soup_crawler_config.extraction_rules crawler = BeautifulSoupCrawler( concurrency=soup_crawler_config.concurrency, crawl_delay=soup_crawler_config.crawl_delay, @@ -66,9 +69,6 @@ async def fetch_page_content( retry_delay_factor=soup_crawler_config.retry_delay_factor, headers=soup_crawler_config.headers, ) - if not soup_crawler_config or soup_crawler_config.extraction_rules is None: - raise ValueError("extraction_rules must be provided when not using Tavily") - extraction_rules = soup_crawler_config.extraction_rules try: results = await crawler.fetch_with_bs4( urls, diff --git a/cognee/tests/tasks/web_scraping/web_scraping_test.py b/cognee/tests/tasks/web_scraping/web_scraping_test.py index c598ef536..1c2ba09a8 100644 --- a/cognee/tests/tasks/web_scraping/web_scraping_test.py +++ b/cognee/tests/tasks/web_scraping/web_scraping_test.py @@ -28,7 +28,6 @@ async def test_web_scraping_using_bs4(): retry_delay_factor=0.5, extraction_rules=rules, use_playwright=False, - structured=True, ) await cognee.add(