129 lines
4.8 KiB
Python
129 lines
4.8 KiB
Python
import pytest
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from cognee.tasks.ingestion.save_data_item_to_storage import save_data_item_to_storage
|
|
from cognee.tasks.ingestion.resolve_data_directories import resolve_data_directories
|
|
|
|
|
|
class TestPathSupport:
|
|
"""Test Path type support in ingestion functions."""
|
|
|
|
@pytest.fixture
|
|
def temp_text_file(self):
|
|
"""Create a temporary text file for testing."""
|
|
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
|
|
f.write("This is a test file for Path support.\n")
|
|
temp_path = f.name
|
|
|
|
yield temp_path
|
|
|
|
# Cleanup
|
|
if os.path.exists(temp_path):
|
|
os.unlink(temp_path)
|
|
|
|
@pytest.fixture
|
|
def temp_directory(self):
|
|
"""Create a temporary directory with test files."""
|
|
import tempfile
|
|
|
|
temp_dir = tempfile.mkdtemp()
|
|
|
|
# Create some test files
|
|
for i in range(3):
|
|
with open(os.path.join(temp_dir, f"test_{i}.txt"), "w") as f:
|
|
f.write(f"Test file {i} content.\n")
|
|
|
|
yield temp_dir
|
|
|
|
# Cleanup
|
|
import shutil
|
|
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_save_data_item_path_object(self, temp_text_file):
|
|
"""Test save_data_item_to_storage with Path object."""
|
|
path_obj = Path(temp_text_file)
|
|
result = await save_data_item_to_storage(path_obj)
|
|
|
|
# Should return a file:// URL
|
|
assert result.startswith("file://")
|
|
assert str(path_obj.resolve()) in result
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_save_data_item_string_vs_path(self, temp_text_file):
|
|
"""Test that Path object vs string path are handled consistently."""
|
|
path_obj = Path(temp_text_file)
|
|
string_path = str(path_obj.resolve())
|
|
|
|
# Both should work and produce similar results
|
|
result_path = await save_data_item_to_storage(path_obj)
|
|
result_string = await save_data_item_to_storage(string_path)
|
|
|
|
# Both should be file:// URLs pointing to the same file
|
|
assert result_path.startswith("file://")
|
|
assert result_string.startswith("file://")
|
|
|
|
# Extract the actual file paths from the URLs
|
|
path_from_path_obj = result_path.replace("file://", "")
|
|
path_from_string = result_string.replace("file://", "")
|
|
|
|
# They should resolve to the same absolute path
|
|
assert os.path.normpath(path_from_path_obj) == os.path.normpath(path_from_string)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_save_data_item_text_content(self):
|
|
"""Test that plain text strings are handled as content, not paths."""
|
|
text_content = "This is plain text content, not a file path."
|
|
result = await save_data_item_to_storage(text_content)
|
|
|
|
# Should create a file and return file:// URL since this is text content
|
|
assert result.startswith("file://")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_data_directories_path_object(self, temp_directory):
|
|
"""Test resolve_data_directories with Path object."""
|
|
path_obj = Path(temp_directory)
|
|
result = await resolve_data_directories([path_obj])
|
|
|
|
# Should return a list of Path objects for the files in the directory
|
|
assert len(result) == 3 # We created 3 test files
|
|
assert all(isinstance(item, Path) for item in result)
|
|
assert all(item.suffix == ".txt" for item in result)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_data_directories_mixed_types(self, temp_directory, temp_text_file):
|
|
"""Test resolve_data_directories with mixed Path and string types."""
|
|
path_obj = Path(temp_text_file)
|
|
string_path = str(temp_text_file)
|
|
directory_path = Path(temp_directory)
|
|
|
|
# Mix of types
|
|
mixed_data = [path_obj, string_path, directory_path]
|
|
result = await resolve_data_directories(mixed_data)
|
|
|
|
# Should have:
|
|
# - 1 Path object (original file as Path)
|
|
# - 1 string (original file as string)
|
|
# - 3 Path objects (from directory expansion)
|
|
assert len(result) == 5
|
|
|
|
# Count types
|
|
path_objects = [item for item in result if isinstance(item, Path)]
|
|
string_objects = [item for item in result if isinstance(item, str)]
|
|
|
|
assert len(path_objects) == 4 # 1 original + 3 from directory
|
|
assert len(string_objects) == 1 # 1 original string
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_data_directories_path_single_file(self, temp_text_file):
|
|
"""Test resolve_data_directories with a single Path file."""
|
|
path_obj = Path(temp_text_file)
|
|
result = await resolve_data_directories([path_obj])
|
|
|
|
# Should return the same Path object
|
|
assert len(result) == 1
|
|
assert isinstance(result[0], Path)
|
|
assert str(result[0]) == str(path_obj)
|