tests: fix failing tests
This commit is contained in:
parent
9a9f9f6836
commit
36364285b2
4 changed files with 37 additions and 17 deletions
|
|
@ -76,8 +76,15 @@ class LoaderEngine:
|
||||||
Returns:
|
Returns:
|
||||||
LoaderInterface that can handle the file, or None if not found
|
LoaderInterface that can handle the file, or None if not found
|
||||||
"""
|
"""
|
||||||
|
is_url = data_item_path.startswith(("http://", "https://"))
|
||||||
|
|
||||||
file_info = filetype.guess(data_item_path)
|
if is_url:
|
||||||
|
extension = None
|
||||||
|
mime_type = None
|
||||||
|
else:
|
||||||
|
file_info = filetype.guess(data_item_path)
|
||||||
|
extension = file_info.extension if file_info else None
|
||||||
|
mime_type = file_info.mime if file_info else None
|
||||||
|
|
||||||
# Try preferred loaders first
|
# Try preferred loaders first
|
||||||
if preferred_loaders:
|
if preferred_loaders:
|
||||||
|
|
@ -85,8 +92,8 @@ class LoaderEngine:
|
||||||
if loader_name in self._loaders:
|
if loader_name in self._loaders:
|
||||||
loader = self._loaders[loader_name]
|
loader = self._loaders[loader_name]
|
||||||
if loader.can_handle(
|
if loader.can_handle(
|
||||||
extension=file_info.extension,
|
extension=extension,
|
||||||
mime_type=file_info.mime,
|
mime_type=mime_type,
|
||||||
data_item_path=data_item_path,
|
data_item_path=data_item_path,
|
||||||
): # TODO: I'd like to refactor this to be just one argument and let loaders get file_info inside, but I'll keep that until review time
|
): # TODO: I'd like to refactor this to be just one argument and let loaders get file_info inside, but I'll keep that until review time
|
||||||
return loader
|
return loader
|
||||||
|
|
@ -99,7 +106,7 @@ class LoaderEngine:
|
||||||
): # TODO: I'm in favor of adding WebUrlLoader to defaults, but keeping it for review
|
): # TODO: I'm in favor of adding WebUrlLoader to defaults, but keeping it for review
|
||||||
if loader_name in self._loaders:
|
if loader_name in self._loaders:
|
||||||
loader = self._loaders[loader_name]
|
loader = self._loaders[loader_name]
|
||||||
if loader.can_handle(extension=file_info.extension, mime_type=file_info.mime):
|
if loader.can_handle(extension=extension, mime_type=mime_type):
|
||||||
return loader
|
return loader
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
from cognee.infrastructure.loaders import LoaderInterface
|
from cognee.infrastructure.loaders.LoaderInterface import LoaderInterface
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from cognee.modules.ingestion.exceptions.exceptions import IngestionError
|
from cognee.modules.ingestion.exceptions.exceptions import IngestionError
|
||||||
|
|
|
||||||
|
|
@ -50,17 +50,17 @@ async def data_item_to_text_file(
|
||||||
await pull_from_s3(data_item_path, temp_file)
|
await pull_from_s3(data_item_path, temp_file)
|
||||||
temp_file.flush() # Data needs to be saved to local storage
|
temp_file.flush() # Data needs to be saved to local storage
|
||||||
loader = get_loader_engine()
|
loader = get_loader_engine()
|
||||||
return await loader.load_file(temp_file.name, preferred_loaders), loader.get_loader(
|
return await loader.load_file(
|
||||||
temp_file.name, preferred_loaders
|
temp_file.name, None, preferred_loaders
|
||||||
)
|
), loader.get_loader(temp_file.name, preferred_loaders)
|
||||||
|
|
||||||
# data is local file path
|
# data is local file path
|
||||||
elif parsed_url.scheme == "file":
|
elif parsed_url.scheme == "file":
|
||||||
if settings.accept_local_file_path:
|
if settings.accept_local_file_path:
|
||||||
loader = get_loader_engine()
|
loader = get_loader_engine()
|
||||||
return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
|
return await loader.load_file(
|
||||||
data_item_path, preferred_loaders
|
data_item_path, None, preferred_loaders
|
||||||
)
|
), loader.get_loader(data_item_path, preferred_loaders)
|
||||||
else:
|
else:
|
||||||
raise IngestionError(message="Local files are not accepted.")
|
raise IngestionError(message="Local files are not accepted.")
|
||||||
|
|
||||||
|
|
@ -71,9 +71,9 @@ async def data_item_to_text_file(
|
||||||
# Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
|
# Handle both Unix absolute paths (/path) and Windows absolute paths (C:\path)
|
||||||
if settings.accept_local_file_path:
|
if settings.accept_local_file_path:
|
||||||
loader = get_loader_engine()
|
loader = get_loader_engine()
|
||||||
return await loader.load_file(data_item_path, preferred_loaders), loader.get_loader(
|
return await loader.load_file(
|
||||||
data_item_path, preferred_loaders
|
data_item_path, None, preferred_loaders
|
||||||
)
|
), loader.get_loader(data_item_path, preferred_loaders)
|
||||||
else:
|
else:
|
||||||
raise IngestionError(message="Local files are not accepted.")
|
raise IngestionError(message="Local files are not accepted.")
|
||||||
|
|
||||||
|
|
@ -82,8 +82,9 @@ async def data_item_to_text_file(
|
||||||
return (
|
return (
|
||||||
await loader.load_file(
|
await loader.load_file(
|
||||||
data_item_path,
|
data_item_path,
|
||||||
|
None,
|
||||||
preferred_loaders,
|
preferred_loaders,
|
||||||
loaders_config, # TODO: right now loaders_config is only needed for web_url_loader, so keeping changes minimal
|
loaders_config=loaders_config, # TODO: right now loaders_config is only needed for web_url_loader, so keeping changes minimal
|
||||||
),
|
),
|
||||||
loader.get_loader(data_item_path, preferred_loaders),
|
loader.get_loader(data_item_path, preferred_loaders),
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,10 @@ import cognee
|
||||||
async def test_add_fails_when_preferred_loader_not_specified():
|
async def test_add_fails_when_preferred_loader_not_specified():
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
with pytest.raises:
|
with pytest.raises(ValueError):
|
||||||
await cognee.add(
|
await cognee.add(
|
||||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||||
preferred_loaders=["web_url_loader"],
|
incremental_loading=False, # TODO: incremental loading bypasses regular data ingestion, which breaks. Will fix
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -17,10 +17,22 @@ async def test_add_fails_when_preferred_loader_not_specified():
|
||||||
async def test_add_succesfully_adds_url_when_preferred_loader_specified():
|
async def test_add_succesfully_adds_url_when_preferred_loader_specified():
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
|
loaders_config = {
|
||||||
|
"web_url_loader": {
|
||||||
|
"soup_config": {
|
||||||
|
"max_depth": 1,
|
||||||
|
"follow_links": False,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await cognee.add(
|
await cognee.add(
|
||||||
"https://en.wikipedia.org/wiki/Large_language_model",
|
"https://en.wikipedia.org/wiki/Large_language_model",
|
||||||
preferred_loaders=["web_url_loader"],
|
preferred_loaders=["web_url_loader"],
|
||||||
|
incremental_loading=False, # TODO: incremental loading bypasses regular data ingestion, which breaks. Will fix
|
||||||
|
loaders_config=loaders_config,
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Failed to add url: {e}")
|
pytest.fail(f"Failed to add url: {e}")
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue