From a7ff18801866def587d028c7258749d4d9e6d80f Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Tue, 21 Oct 2025 15:22:40 +0100 Subject: [PATCH] add crawler tests --- .../web_url_crawler/test_bs4_crawler.py | 13 +++++++++++++ .../web_url_crawler/test_tavily_crawler.py | 15 +++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 cognee/tests/integration/web_url_crawler/test_bs4_crawler.py create mode 100644 cognee/tests/integration/web_url_crawler/test_tavily_crawler.py diff --git a/cognee/tests/integration/web_url_crawler/test_bs4_crawler.py b/cognee/tests/integration/web_url_crawler/test_bs4_crawler.py new file mode 100644 index 000000000..0e7637d86 --- /dev/null +++ b/cognee/tests/integration/web_url_crawler/test_bs4_crawler.py @@ -0,0 +1,13 @@ +import pytest +from cognee.tasks.web_scraper import BeautifulSoupCrawler + + +@pytest.mark.asyncio +async def test_fetch(): + crawler = BeautifulSoupCrawler() + url = "https://en.wikipedia.org/wiki/Large_language_model" + results = await crawler.fetch_urls(url) + assert len(results) == 1 + assert isinstance(results, dict) + html = results[url] + assert isinstance(html, str) diff --git a/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py b/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py new file mode 100644 index 000000000..7edb9b8d3 --- /dev/null +++ b/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py @@ -0,0 +1,15 @@ +import os +import pytest +from cognee.tasks.web_scraper.config import TavilyConfig +from cognee.tasks.web_scraper.utils import fetch_with_tavily + + +@pytest.mark.asyncio +async def test_fetch(): + url = "https://en.wikipedia.org/wiki/Large_language_model" + tavily_config = TavilyConfig() + results = await fetch_with_tavily(url, tavily_config) + assert len(results) == 1 + assert isinstance(results, dict) + html = results[url] + assert isinstance(html, str)