tests: fix failing tests
This commit is contained in:
parent
9a9f9f6836
commit
36364285b2
4 changed files with 37 additions and 17 deletions
|
|
@ -76,8 +76,15 @@ class LoaderEngine:
|
|||
Returns:
|
||||
LoaderInterface that can handle the file, or None if not found
|
||||
"""
|
||||
is_url = data_item_path.startswith(("http://", "https://"))
|
||||
|
||||
file_info = filetype.guess(data_item_path)
|
||||
if is_url:
|
||||
extension = None
|
||||
mime_type = None
|
||||
else:
|
||||
file_info = filetype.guess(data_item_path)
|
||||
extension = file_info.extension if file_info else None
|
||||
mime_type = file_info.mime if file_info else None
|
||||
|
||||
# Try preferred loaders first
|
||||
if preferred_loaders:
|
||||
|
|
@ -85,8 +92,8 @@ class LoaderEngine:
|
|||
if loader_name in self._loaders:
|
||||
loader = self._loaders[loader_name]
|
||||
if loader.can_handle(
|
||||
extension=file_info.extension,
|
||||
mime_type=file_info.mime,
|
||||
extension=extension,
|
||||
mime_type=mime_type,
|
||||
data_item_path=data_item_path,
|
||||
): # TODO: I'd like to refactor this to be just one argument and let loaders get file_info inside, but I'll keep that until review time
|
||||
return loader
|
||||
|
|
@ -99,7 +106,7 @@ class LoaderEngine:
|
|||
): # TODO: I'm in favor of adding WebUrlLoader to defaults, but keeping it for review
|
||||
if loader_name in self._loaders:
|
||||
loader = self._loaders[loader_name]
|
||||
if loader.can_handle(extension=file_info.extension, mime_type=file_info.mime):
|
||||
if loader.can_handle(extension=extension, mime_type=mime_type):
|
||||
return loader
|
||||
else:
|
||||
logger.info(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
from cognee.infrastructure.loaders import LoaderInterface
|
||||
from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
|
||||
from typing import List
|
||||
|
||||
from cognee.modules.ingestion.exceptions.exceptions import IngestionError
|
||||
|
|
|
|||
|
|
@ -50,17 +50,17 @@ async def data_item_to_text_file(
|
|||
await pull_from_s3(data_item_path, temp_file)
|
||||
temp_file.flush() # Data needs to be saved to local storage
|
||||
loader = get_loader_engine()
|
||||
return await loader.load_file(temp_file.name, preferred_loaders), loader.get_loader(
|
||||
temp_file.name, preferred_loaders
|
||||
)
|
||||
return await loader.load_file(
|
||||
temp_file.name, None, preferred_loaders
|
||||
), loader.get_loader(temp_file.name, preferred_loaders)
|
||||
|
||||
# data is local file path
|
||||
elif parsed_url.scheme == "file":
|
||||
if settings.accept_local_file_path:
|
||||
loader = get_loader_engine()
|
||||
return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
|
||||
data_item_path, preferred_loaders
|
||||
)
|
||||
return await loader.load_file(
|
||||
data_item_path, None, preferred_loaders
|
||||
), loader.get_loader(data_item_path, preferred_loaders)
|
||||
else:
|
||||
raise IngestionError(message="Local files are not accepted.")
|
||||
|
||||
|
|
@ -71,9 +71,9 @@ async def data_item_to_text_file(
|
|||
# Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
|
||||
if settings.accept_local_file_path:
|
||||
loader = get_loader_engine()
|
||||
return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
|
||||
data_item_path, preferred_loaders
|
||||
)
|
||||
return await loader.load_file(
|
||||
data_item_path, None, preferred_loaders
|
||||
), loader.get_loader(data_item_path, preferred_loaders)
|
||||
else:
|
||||
raise IngestionError(message="Local files are not accepted.")
|
||||
|
||||
|
|
@ -82,8 +82,9 @@ async def data_item_to_text_file(
|
|||
return (
|
||||
await loader.load_file(
|
||||
data_item_path,
|
||||
None,
|
||||
preferred_loaders,
|
||||
loaders_config, # TODO: right now loaders_config is only needed for web_url_loader, so keeping changes minimal
|
||||
loaders_config=loaders_config, # TODO: right now loaders_config is only needed for web_url_loader, so keeping changes minimal
|
||||
),
|
||||
loader.get_loader(data_item_path, preferred_loaders),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@ import cognee
|
|||
async def test_add_fails_when_preferred_loader_not_specified():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
with pytest.raises:
|
||||
with pytest.raises(ValueError):
|
||||
await cognee.add(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||
preferred_loaders=["web_url_loader"],
|
||||
incremental_loading=False, # TODO: incremental loading bypasses regular data ingestion, which breaks. Will fix
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -17,10 +17,22 @@ async def test_add_fails_when_preferred_loader_not_specified():
|
|||
async def test_add_succesfully_adds_url_when_preferred_loader_specified():
|
||||
await cognee.prune.prune_data()
|
||||
await cognee.prune.prune_system(metadata=True)
|
||||
|
||||
loaders_config = {
|
||||
"web_url_loader": {
|
||||
"soup_config": {
|
||||
"max_depth": 1,
|
||||
"follow_links": False,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
try:
|
||||
await cognee.add(
|
||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||
preferred_loaders=["web_url_loader"],
|
||||
incremental_loading=False, # TODO: incremental loading bypasses regular data ingestion, which breaks. Will fix
|
||||
loaders_config=loaders_config,
|
||||
)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Failed to add url: {e}")
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue