cognee/tests/unit/tasks/ingestion/test_path_support.py
2025-08-02 20:37:13 +02:00

129 lines
4.8 KiB
Python

import pytest
import tempfile
import os
from pathlib import Path
from cognee.tasks.ingestion.save_data_item_to_storage import save_data_item_to_storage
from cognee.tasks.ingestion.resolve_data_directories import resolve_data_directories
class TestPathSupport:
"""Test Path type support in ingestion functions."""
@pytest.fixture
def temp_text_file(self):
"""Create a temporary text file for testing."""
with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False) as f:
f.write("This is a test file for Path support.\n")
temp_path = f.name
yield temp_path
# Cleanup
if os.path.exists(temp_path):
os.unlink(temp_path)
@pytest.fixture
def temp_directory(self):
"""Create a temporary directory with test files."""
import tempfile
temp_dir = tempfile.mkdtemp()
# Create some test files
for i in range(3):
with open(os.path.join(temp_dir, f"test_{i}.txt"), "w") as f:
f.write(f"Test file {i} content.\n")
yield temp_dir
# Cleanup
import shutil
shutil.rmtree(temp_dir, ignore_errors=True)
@pytest.mark.asyncio
async def test_save_data_item_path_object(self, temp_text_file):
"""Test save_data_item_to_storage with Path object."""
path_obj = Path(temp_text_file)
result = await save_data_item_to_storage(path_obj)
# Should return a file:// URL
assert result.startswith("file://")
assert str(path_obj.resolve()) in result
@pytest.mark.asyncio
async def test_save_data_item_string_vs_path(self, temp_text_file):
"""Test that Path object vs string path are handled consistently."""
path_obj = Path(temp_text_file)
string_path = str(path_obj.resolve())
# Both should work and produce similar results
result_path = await save_data_item_to_storage(path_obj)
result_string = await save_data_item_to_storage(string_path)
# Both should be file:// URLs pointing to the same file
assert result_path.startswith("file://")
assert result_string.startswith("file://")
# Extract the actual file paths from the URLs
path_from_path_obj = result_path.replace("file://", "")
path_from_string = result_string.replace("file://", "")
# They should resolve to the same absolute path
assert os.path.normpath(path_from_path_obj) == os.path.normpath(path_from_string)
@pytest.mark.asyncio
async def test_save_data_item_text_content(self):
"""Test that plain text strings are handled as content, not paths."""
text_content = "This is plain text content, not a file path."
result = await save_data_item_to_storage(text_content)
# Should create a file and return file:// URL since this is text content
assert result.startswith("file://")
@pytest.mark.asyncio
async def test_resolve_data_directories_path_object(self, temp_directory):
"""Test resolve_data_directories with Path object."""
path_obj = Path(temp_directory)
result = await resolve_data_directories([path_obj])
# Should return a list of Path objects for the files in the directory
assert len(result) == 3 # We created 3 test files
assert all(isinstance(item, Path) for item in result)
assert all(item.suffix == ".txt" for item in result)
@pytest.mark.asyncio
async def test_resolve_data_directories_mixed_types(self, temp_directory, temp_text_file):
"""Test resolve_data_directories with mixed Path and string types."""
path_obj = Path(temp_text_file)
string_path = str(temp_text_file)
directory_path = Path(temp_directory)
# Mix of types
mixed_data = [path_obj, string_path, directory_path]
result = await resolve_data_directories(mixed_data)
# Should have:
# - 1 Path object (original file as Path)
# - 1 string (original file as string)
# - 3 Path objects (from directory expansion)
assert len(result) == 5
# Count types
path_objects = [item for item in result if isinstance(item, Path)]
string_objects = [item for item in result if isinstance(item, str)]
assert len(path_objects) == 4 # 1 original + 3 from directory
assert len(string_objects) == 1 # 1 original string
@pytest.mark.asyncio
async def test_resolve_data_directories_path_single_file(self, temp_text_file):
"""Test resolve_data_directories with a single Path file."""
path_obj = Path(temp_text_file)
result = await resolve_data_directories([path_obj])
# Should return the same Path object
assert len(result) == 1
assert isinstance(result[0], Path)
assert str(result[0]) == str(path_obj)