fix: web_url_loader load_data should yield stored_path
This commit is contained in:
parent
d0f3e224cb
commit
9b802f651b
3 changed files with 9 additions and 4 deletions
|
|
@ -63,6 +63,8 @@ class WebUrlLoader(LoaderInterface):
|
|||
file_stream: If file stream is provided it will be used to process file instead
|
||||
**kwargs: Additional loader-specific configuration
|
||||
|
||||
Returns:
|
||||
file path to the stored file
|
||||
Raises:
|
||||
Exception: If file cannot be processed
|
||||
"""
|
||||
|
|
@ -107,9 +109,9 @@ class WebUrlLoader(LoaderInterface):
|
|||
content = ""
|
||||
for key, value in data.items():
|
||||
content += f"{key}:\n{value}\n\n"
|
||||
await save_data_to_file(content)
|
||||
stored_path = await save_data_to_file(content)
|
||||
|
||||
return content
|
||||
return stored_path
|
||||
except IngestionError:
|
||||
raise
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -116,9 +116,9 @@ async def ingest_data(
|
|||
data_id = ingestion.identify(classified_data, user)
|
||||
original_file_metadata = classified_data.get_metadata()
|
||||
# Override file_path to be the actual data_item (e.g., URL) ?
|
||||
# original_file_metadata["file_path"] = actual_file_path
|
||||
original_file_metadata["file_path"] = actual_file_path
|
||||
# Storage metadata is the same as original
|
||||
# storage_file_metadata = original_file_metadata.copy()
|
||||
storage_file_metadata = original_file_metadata.copy()
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@ import cognee
|
|||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_fails_when_preferred_loader_not_specified():
|
||||
from cognee.shared.logging_utils import setup_logging, ERROR
|
||||
|
||||
setup_logging(log_level=ERROR)
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
with pytest.raises(ValueError):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue