chore: changes url for crawler tests (#1816)

Updates crawler test url to avoid blocking and unavailable sites in CI. ## Description  ## Type of Change  - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable)  ## Pre-submission Checklist  - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-11-20 17:03:36 +01:00 · 2025-11-20 17:03:36 +01:00 · 2176ec16b8
commit 2176ec16b8
parent fe55071849
3 changed files with 15 additions and 15 deletions
--- a/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py
+++ b/cognee/tests/integration/web_url_crawler/test_default_url_crawler.py
@ -5,7 +5,7 @@ from cognee.tasks.web_scraper import DefaultUrlCrawler
@pytest.mark.asyncio
 async def test_fetch():
    crawler = DefaultUrlCrawler()
-    url = "https://httpbin.org/html"
+    url = "http://example.com/"
    results = await crawler.fetch_urls(url)
    assert len(results) == 1
    assert isinstance(results, dict)
--- a/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py
+++ b/cognee/tests/integration/web_url_crawler/test_tavily_crawler.py
@ -11,7 +11,7 @@ skip_in_ci = pytest.mark.skipif(
@skip_in_ci
@pytest.mark.asyncio
 async def test_fetch():
-    url = "https://httpbin.org/html"
+    url = "http://example.com/"
    results = await fetch_with_tavily(url)
    assert isinstance(results, dict)
    assert len(results) == 1
--- a/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py
+++ b/cognee/tests/integration/web_url_crawler/test_url_adding_e2e.py
@ -14,7 +14,7 @@ async def test_url_saves_as_html_file():
    await cognee.prune.prune_system(metadata=True)

    try:
-        original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
+        original_file_path = await save_data_item_to_storage("http://example.com/")
        file_path = get_data_file_path(original_file_path)
        assert file_path.endswith(".html")
        file = Path(file_path)
@ -42,7 +42,7 @@ async def test_saved_html_is_valid():
    await cognee.prune.prune_system(metadata=True)

    try:
-        original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
+        original_file_path = await save_data_item_to_storage("http://example.com/")
        file_path = get_data_file_path(original_file_path)
        content = Path(file_path).read_text()

@ -68,7 +68,7 @@ async def test_add_url():
    await cognee.prune.prune_data()
    await cognee.prune.prune_system(metadata=True)

-    await cognee.add("https://httpbin.org/html")
+    await cognee.add("http://example.com/")


 skip_in_ci = pytest.mark.skipif(
@ -84,7 +84,7 @@ async def test_add_url_with_tavily():
    await cognee.prune.prune_data()
    await cognee.prune.prune_system(metadata=True)

-    await cognee.add("https://httpbin.org/html")
+    await cognee.add("http://example.com/")


@pytest.mark.asyncio
@ -94,7 +94,7 @@ async def test_add_url_without_incremental_loading():

    try:
        await cognee.add(
-            "https://httpbin.org/html",
+            "http://example.com/",
            incremental_loading=False,
        )
    except Exception as e:
@ -108,7 +108,7 @@ async def test_add_url_with_incremental_loading():

    try:
        await cognee.add(
-            "https://httpbin.org/html",
+            "http://example.com/",
            incremental_loading=True,
        )
    except Exception as e:
@ -121,7 +121,7 @@ async def test_add_url_can_define_preferred_loader_as_list_of_str():
    await cognee.prune.prune_system(metadata=True)

    await cognee.add(
-        "https://httpbin.org/html",
+        "http://example.com/",
        preferred_loaders=["beautiful_soup_loader"],
    )

@ -140,7 +140,7 @@ async def test_add_url_with_extraction_rules():

    try:
        await cognee.add(
-            "https://httpbin.org/html",
+            "http://example.com/",
            preferred_loaders={"beautiful_soup_loader": {"extraction_rules": extraction_rules}},
        )
    except Exception as e:
@ -159,7 +159,7 @@ async def test_loader_is_none_by_default():
    }

    try:
-        original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
+        original_file_path = await save_data_item_to_storage("http://example.com/")
        file_path = get_data_file_path(original_file_path)
        assert file_path.endswith(".html")
        file = Path(file_path)
@ -190,7 +190,7 @@ async def test_beautiful_soup_loader_is_selected_loader_if_preferred_loader_prov
    }

    try:
-        original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
+        original_file_path = await save_data_item_to_storage("http://example.com/")
        file_path = get_data_file_path(original_file_path)
        assert file_path.endswith(".html")
        file = Path(file_path)
@ -217,7 +217,7 @@ async def test_beautiful_soup_loader_works_with_and_without_arguments():
    await cognee.prune.prune_system(metadata=True)

    try:
-        original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
+        original_file_path = await save_data_item_to_storage("http://example.com/")
        file_path = get_data_file_path(original_file_path)
        assert file_path.endswith(".html")
        file = Path(file_path)
@ -253,7 +253,7 @@ async def test_beautiful_soup_loader_successfully_loads_file_if_required_args_pr
    await cognee.prune.prune_system(metadata=True)

    try:
-        original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
+        original_file_path = await save_data_item_to_storage("http://example.com/")
        file_path = get_data_file_path(original_file_path)
        assert file_path.endswith(".html")
        file = Path(file_path)
@ -290,7 +290,7 @@ async def test_beautiful_soup_loads_file_successfully():
    }

    try:
-        original_file_path = await save_data_item_to_storage("https://httpbin.org/html")
+        original_file_path = await save_data_item_to_storage("http://example.com/")
        file_path = get_data_file_path(original_file_path)
        assert file_path.endswith(".html")
        original_file = Path(file_path)