diff --git a/cognee/tests/integration/web_url_crawler/test_add.py b/cognee/tests/integration/web_url_crawler/test_add.py index 0c4332c6d..e0dda94a9 100644 --- a/cognee/tests/integration/web_url_crawler/test_add.py +++ b/cognee/tests/integration/web_url_crawler/test_add.py @@ -18,11 +18,19 @@ async def test_add_succesfully_adds_url_when_preferred_loader_specified(): await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) + extraction_rules = { + "title": {"selector": "title", "attr": "text"}, + "headings": {"selector": "h1, h2, h3", "attr": "text", "all": True}, + "links": {"selector": "a", "attr": "href", "all": True}, + "paragraphs": {"selector": "p", "attr": "text", "all": True}, + } + loaders_config = { "web_url_loader": { "soup_config": { "max_depth": 1, "follow_links": False, + "extraction_rules": extraction_rules, } } }