tests: fix failing tests

This commit is contained in:
Daulet Amirkhanov 2025-10-16 15:53:49 +01:00
parent 9a9f9f6836
commit 36364285b2
4 changed files with 37 additions and 17 deletions

View file

@ -76,8 +76,15 @@ class LoaderEngine:
Returns:
LoaderInterface that can handle the file, or None if not found
"""
is_url = data_item_path.startswith(("http://", "https://"))
file_info = filetype.guess(data_item_path)
if is_url:
extension = None
mime_type = None
else:
file_info = filetype.guess(data_item_path)
extension = file_info.extension if file_info else None
mime_type = file_info.mime if file_info else None
# Try preferred loaders first
if preferred_loaders:
@ -85,8 +92,8 @@ class LoaderEngine:
if loader_name in self._loaders:
loader = self._loaders[loader_name]
if loader.can_handle(
extension=file_info.extension,
mime_type=file_info.mime,
extension=extension,
mime_type=mime_type,
data_item_path=data_item_path,
): # TODO: I'd like to refactor this to be just one argument and let loaders get file_info inside, but I'll keep that until review time
return loader
@ -99,7 +106,7 @@ class LoaderEngine:
): # TODO: I'm in favor of adding WebUrlLoader to defaults, but keeping it for review
if loader_name in self._loaders:
loader = self._loaders[loader_name]
if loader.can_handle(extension=file_info.extension, mime_type=file_info.mime):
if loader.can_handle(extension=extension, mime_type=mime_type):
return loader
else:
logger.info(

View file

@ -1,4 +1,4 @@
from cognee.infrastructure.loaders import LoaderInterface
from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
from typing import List
from cognee.modules.ingestion.exceptions.exceptions import IngestionError

View file

@ -50,17 +50,17 @@ async def data_item_to_text_file(
await pull_from_s3(data_item_path, temp_file)
temp_file.flush() # Data needs to be saved to local storage
loader = get_loader_engine()
return await loader.load_file(temp_file.name, preferred_loaders), loader.get_loader(
temp_file.name, preferred_loaders
)
return await loader.load_file(
temp_file.name, None, preferred_loaders
), loader.get_loader(temp_file.name, preferred_loaders)
# data is local file path
elif parsed_url.scheme == "file":
if settings.accept_local_file_path:
loader = get_loader_engine()
return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
data_item_path, preferred_loaders
)
return await loader.load_file(
data_item_path, None, preferred_loaders
), loader.get_loader(data_item_path, preferred_loaders)
else:
raise IngestionError(message="Local files are not accepted.")
@ -71,9 +71,9 @@ async def data_item_to_text_file(
# Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
if settings.accept_local_file_path:
loader = get_loader_engine()
return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
data_item_path, preferred_loaders
)
return await loader.load_file(
data_item_path, None, preferred_loaders
), loader.get_loader(data_item_path, preferred_loaders)
else:
raise IngestionError(message="Local files are not accepted.")
@ -82,8 +82,9 @@ async def data_item_to_text_file(
return (
await loader.load_file(
data_item_path,
None,
preferred_loaders,
loaders_config, # TODO: right now loaders_config is only needed for web_url_loader, so keeping changes minimal
loaders_config=loaders_config, # TODO: right now loaders_config is only needed for web_url_loader, so keeping changes minimal
),
loader.get_loader(data_item_path, preferred_loaders),
)

View file

@ -6,10 +6,10 @@ import cognee
async def test_add_fails_when_preferred_loader_not_specified():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
with pytest.raises:
with pytest.raises(ValueError):
await cognee.add(
"https://en.wikipedia.org/wiki/Large_language_model",
preferred_loaders=["web_url_loader"],
incremental_loading=False, # TODO: incremental loading bypasses regular data ingestion, which breaks. Will fix
)
@ -17,10 +17,22 @@ async def test_add_fails_when_preferred_loader_not_specified():
async def test_add_succesfully_adds_url_when_preferred_loader_specified():
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
loaders_config = {
"web_url_loader": {
"soup_config": {
"max_depth": 1,
"follow_links": False,
}
}
}
try:
await cognee.add(
"https://en.wikipedia.org/wiki/Large_language_model",
preferred_loaders=["web_url_loader"],
incremental_loading=False, # TODO: incremental loading bypasses regular data ingestion, which breaks. Will fix
loaders_config=loaders_config,
)
except Exception as e:
pytest.fail(f"Failed to add url: {e}")