fix: web_url_loader load_data should yield stored_path

This commit is contained in:
Daulet Amirkhanov 2025-10-17 12:34:50 +01:00
parent d0f3e224cb
commit 9b802f651b
3 changed files with 9 additions and 4 deletions

View file

@ -63,6 +63,8 @@ class WebUrlLoader(LoaderInterface):
file_stream: If file stream is provided it will be used to process file instead
**kwargs: Additional loader-specific configuration
Returns:
file path to the stored file
Raises:
Exception: If file cannot be processed
"""
@ -107,9 +109,9 @@ class WebUrlLoader(LoaderInterface):
content = ""
for key, value in data.items():
content += f"{key}:\n{value}\n\n"
await save_data_to_file(content)
stored_path = await save_data_to_file(content)
return content
return stored_path
except IngestionError:
raise
except Exception as e:

View file

@ -116,9 +116,9 @@ async def ingest_data(
data_id = ingestion.identify(classified_data, user)
original_file_metadata = classified_data.get_metadata()
# Override file_path to be the actual data_item (e.g., URL) ?
# original_file_metadata["file_path"] = actual_file_path
original_file_metadata["file_path"] = actual_file_path
# Storage metadata is the same as original
# storage_file_metadata = original_file_metadata.copy()
storage_file_metadata = original_file_metadata.copy()
from sqlalchemy import select

View file

@ -4,6 +4,9 @@ import cognee
@pytest.mark.asyncio
async def test_add_fails_when_preferred_loader_not_specified():
from cognee.shared.logging_utils import setup_logging, ERROR
setup_logging(log_level=ERROR)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
with pytest.raises(ValueError):