From 9b802f651bc642f318ecec07af5fb3e1f46a5146 Mon Sep 17 00:00:00 2001 From: Daulet Amirkhanov Date: Fri, 17 Oct 2025 12:34:50 +0100 Subject: [PATCH] fix: web_url_loader load_data should yield stored_path --- cognee/infrastructure/loaders/external/web_url_loader.py | 6 ++++-- cognee/tasks/ingestion/ingest_data.py | 4 ++-- cognee/tests/integration/web_url_crawler/test_add.py | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/loaders/external/web_url_loader.py b/cognee/infrastructure/loaders/external/web_url_loader.py index f9fce47a9..491428c82 100644 --- a/cognee/infrastructure/loaders/external/web_url_loader.py +++ b/cognee/infrastructure/loaders/external/web_url_loader.py @@ -63,6 +63,8 @@ class WebUrlLoader(LoaderInterface): file_stream: If file stream is provided it will be used to process file instead **kwargs: Additional loader-specific configuration + Returns: + file path to the stored file Raises: Exception: If file cannot be processed """ @@ -107,9 +109,9 @@ class WebUrlLoader(LoaderInterface): content = "" for key, value in data.items(): content += f"{key}:\n{value}\n\n" - await save_data_to_file(content) + stored_path = await save_data_to_file(content) - return content + return stored_path except IngestionError: raise except Exception as e: diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index b742e474e..233bb5f1c 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -116,9 +116,9 @@ async def ingest_data( data_id = ingestion.identify(classified_data, user) original_file_metadata = classified_data.get_metadata() # Override file_path to be the actual data_item (e.g., URL) ? - # original_file_metadata["file_path"] = actual_file_path + original_file_metadata["file_path"] = actual_file_path # Storage metadata is the same as original - # storage_file_metadata = original_file_metadata.copy() + storage_file_metadata = original_file_metadata.copy() from sqlalchemy import select diff --git a/cognee/tests/integration/web_url_crawler/test_add.py b/cognee/tests/integration/web_url_crawler/test_add.py index e0dda94a9..2a75b5054 100644 --- a/cognee/tests/integration/web_url_crawler/test_add.py +++ b/cognee/tests/integration/web_url_crawler/test_add.py @@ -4,6 +4,9 @@ import cognee @pytest.mark.asyncio async def test_add_fails_when_preferred_loader_not_specified(): + from cognee.shared.logging_utils import setup_logging, ERROR + + setup_logging(log_level=ERROR) await cognee.prune.prune_data() await cognee.prune.prune_system(metadata=True) with pytest.raises(ValueError):