From 5288ab4ab49333843813c53c39be4f9a883c2f89 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Wed, 22 Oct 2025 13:01:06 +0100 Subject: [PATCH] tests: fix failing tests --- .../loaders/external/beautiful_soup_loader.py | 6 ------ .../web_url_crawler/test_url_adding_e2e.py | 11 +++++------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/cognee/infrastructure/loaders/external/beautiful_soup_loader.py b/cognee/infrastructure/loaders/external/beautiful_soup_loader.py index 5ed0b16c0..8bea8fb6e 100644 --- a/cognee/infrastructure/loaders/external/beautiful_soup_loader.py +++ b/cognee/infrastructure/loaders/external/beautiful_soup_loader.py @@ -148,12 +148,6 @@ class BeautifulSoupLoader(LoaderInterface): "all": True, "join_with": "\n\n", }, - # Spans with data (fallback for inline content) - "data_spans": { - "selector": "span[data-*]", - "all": True, - "join_with": " ", - }, } async def load( diff --git a/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py b/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py index afe2dce7f..c098f5928 100644 --- a/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py +++ b/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py @@ -185,7 +185,7 @@ async def test_beautiful_soup_loader_is_selected_loader_if_preferred_loader_prov @pytest.mark.asyncio -async def test_beautiful_soup_loader_raises_if_required_args_are_missing(): +async def test_beautiful_soup_loader_works_with_and_without_arguments(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) @@ -203,11 +203,10 @@ async def test_beautiful_soup_loader_raises_if_required_args_are_missing(): bs_loader = BeautifulSoupLoader() loader_engine.register_loader(bs_loader) preferred_loaders = {"beautiful_soup_loader": {}} - with pytest.raises(ValueError): - await loader_engine.load_file( - file_path, - preferred_loaders=preferred_loaders, - ) + await loader_engine.load_file( + file_path, + preferred_loaders=preferred_loaders, + ) extraction_rules = { "title": {"selector": "title"}, "headings": {"selector": "h1, h2, h3", "all": True},