From cb6651a6e9925e48e6f270b610e05f6a0298eae6 Mon Sep 17 00:00:00 2001 From: gneeraj2001 Date: Fri, 29 Aug 2025 02:06:43 -0700 Subject: [PATCH] Fix path handling consistency Signed-off-by: gneeraj2001 --- cognee/base_config.py | 15 ++- .../infrastructure/databases/graph/config.py | 16 ++- .../infrastructure/databases/vector/config.py | 21 ++-- cognee/root_dir.py | 28 +++++ cognee/tests/test_path_config.py | 114 ++++++++++++++++++ 5 files changed, 182 insertions(+), 12 deletions(-) create mode 100644 cognee/tests/test_path_config.py diff --git a/cognee/base_config.py b/cognee/base_config.py index aa0b14008..d80e6197f 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -1,15 +1,28 @@ import os from typing import Optional from functools import lru_cache -from cognee.root_dir import get_absolute_path +from cognee.root_dir import get_absolute_path, ensure_absolute_path from cognee.modules.observability.observers import Observer from pydantic_settings import BaseSettings, SettingsConfigDict +import pydantic class BaseConfig(BaseSettings): data_root_directory: str = get_absolute_path(".data_storage") system_root_directory: str = get_absolute_path(".cognee_system") monitoring_tool: object = Observer.LANGFUSE + + @pydantic.model_validator(mode="after") + def validate_paths(cls, values): + # Require absolute paths for root directories + values.data_root_directory = ensure_absolute_path( + values.data_root_directory, allow_relative=False + ) + values.system_root_directory = ensure_absolute_path( + values.system_root_directory, allow_relative=False + ) + return values + langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY") langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST") diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index cdc001863..60c193d91 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict import pydantic from pydantic import Field from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path from cognee.shared.data_models import KnowledgeGraph @@ -51,15 +52,22 @@ class GraphConfig(BaseSettings): @pydantic.model_validator(mode="after") def fill_derived(cls, values): provider = values.graph_database_provider.lower() + base_config = get_base_config() # Set default filename if no filename is provided if not values.graph_filename: values.graph_filename = f"cognee_graph_{provider}" - # Set file path based on graph database provider if no file path is provided - if not values.graph_file_path: - base_config = get_base_config() - + # Handle graph file path + if values.graph_file_path: + # Convert relative paths to absolute using system_root_directory as base + values.graph_file_path = ensure_absolute_path( + values.graph_file_path, + base_path=base_config.system_root_directory, + allow_relative=True + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename) diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 07a3d1e05..ed846a54b 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -4,6 +4,7 @@ from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path class VectorConfig(BaseSettings): @@ -11,12 +12,10 @@ class VectorConfig(BaseSettings): Manage the configuration settings for the vector database. Public methods: - - to_dict: Convert the configuration to a dictionary. Instance variables: - - - vector_db_url: The URL of the vector database. + - vector_db_url: The URL of the vector database. Can be relative to system_root_directory. - vector_db_port: The port for the vector database. - vector_db_key: The key for accessing the vector database. - vector_db_provider: The provider for the vector database. @@ -30,10 +29,18 @@ class VectorConfig(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="allow") @pydantic.model_validator(mode="after") - def fill_derived(cls, values): - # Set file path based on graph database provider if no file path is provided - if not values.vector_db_url: - base_config = get_base_config() + def validate_paths(cls, values): + base_config = get_base_config() + + if values.vector_db_url: + # Convert relative paths to absolute using system_root_directory as base + values.vector_db_url = ensure_absolute_path( + values.vector_db_url, + base_path=base_config.system_root_directory, + allow_relative=True, + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb") diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 2e21d5ce3..73afd0c12 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Optional ROOT_DIR = Path(__file__).resolve().parent @@ -6,3 +7,30 @@ ROOT_DIR = Path(__file__).resolve().parent def get_absolute_path(path_from_root: str) -> str: absolute_path = ROOT_DIR / path_from_root return str(absolute_path.resolve()) + + +def ensure_absolute_path( + path: str, base_path: Optional[str] = None, allow_relative: bool = False +) -> str: + """Ensures a path is absolute, optionally converting relative paths. + + Args: + path: The path to validate/convert + base_path: Optional base path for relative paths. If None, uses ROOT_DIR + allow_relative: If False, raises error for relative paths instead of converting + + Returns: + Absolute path as string + + Raises: + ValueError: If path is relative and allow_relative is False + """ + path_obj = Path(path) + if path_obj.is_absolute(): + return str(path_obj.resolve()) + + if not allow_relative: + raise ValueError(f"Path must be absolute. Got relative path: {path}") + + base = Path(base_path) if base_path else ROOT_DIR + return str((base / path).resolve()) diff --git a/cognee/tests/test_path_config.py b/cognee/tests/test_path_config.py new file mode 100644 index 000000000..ff1905c5e --- /dev/null +++ b/cognee/tests/test_path_config.py @@ -0,0 +1,114 @@ +import os +from pathlib import Path + +def ensure_absolute_path(path: str, base_path: str = None, allow_relative: bool = False) -> str: + """Ensures a path is absolute, optionally converting relative paths.""" + if path is None: + raise ValueError("Path cannot be None") + + path_obj = Path(path) + if path_obj.is_absolute(): + return str(path_obj.resolve()) + + if not allow_relative: + raise ValueError(f"Path must be absolute. Got relative path: {path}") + + if base_path is None: + raise ValueError("base_path must be provided when converting relative paths") + + base = Path(base_path) + if not base.is_absolute(): + raise ValueError("base_path must be absolute when converting relative paths") + + return str((base / path).resolve()) + +def test_root_dir_absolute_paths(): + """Test absolute path handling in root_dir.py""" + # Test with absolute path + abs_path = "C:/absolute/path" if os.name == 'nt' else "/absolute/path" + result = ensure_absolute_path(abs_path, allow_relative=False) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should fail) + rel_path = "relative/path" + try: + ensure_absolute_path(rel_path, allow_relative=False) + assert False, "Should fail with relative path when allow_relative=False" + except ValueError as e: + assert "must be absolute" in str(e) + + # Test with None path + try: + ensure_absolute_path(None) + assert False, "Should fail with None path" + except ValueError as e: + assert "cannot be None" in str(e) + +def test_database_relative_paths(): + """Test relative path handling for vector and graph databases""" + system_root = "C:/system/root" if os.name == 'nt' else "/system/root" + + # Test with absolute path + abs_path = "C:/data/vector.db" if os.name == 'nt' else "/data/vector.db" + result = ensure_absolute_path(abs_path, base_path=system_root, allow_relative=True) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should convert to absolute) + rel_path = "data/vector.db" + result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) + expected = str((Path(system_root) / rel_path).resolve()) + assert result == expected + + # Test with relative base_path (should fail) + try: + ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True) + assert False, "Should fail when base_path is relative" + except ValueError as e: + assert "base_path must be absolute" in str(e) + + # Test without base_path for relative path + try: + ensure_absolute_path(rel_path, allow_relative=True) + assert False, "Should fail when base_path is not provided for relative path" + except ValueError as e: + assert "base_path must be provided" in str(e) + +def test_path_consistency(): + """Test that paths are handled consistently across configurations""" + system_root = "C:/system/root" if os.name == 'nt' else "/system/root" + + # Root directories must be absolute + data_root = "C:/data/root" if os.name == 'nt' else "/data/root" + assert ensure_absolute_path(data_root, allow_relative=False) == str(Path(data_root).resolve()) + + # Database paths can be relative but must resolve against system_root + db_paths = [ + # Vector DB paths + "vector.db", # Simple relative + "data/vector.db", # Nested relative + "../vector.db", # Parent relative + "./vector.db", # Current dir relative + # Graph DB paths + "graph.db", # Simple relative + "data/graph/db", # Nested relative + "../graph.db", # Parent relative + "./graph.db", # Current dir relative + # With different extensions + "data/vector.lancedb", # Vector DB with extension + "data/graph/kuzu", # Graph DB with extension + ] + + for rel_path in db_paths: + result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True) + expected = str((Path(system_root) / rel_path).resolve()) + assert result == expected, f"Failed to resolve {rel_path} correctly" + +if __name__ == "__main__": + print("Running path configuration tests...") + test_root_dir_absolute_paths() + print("✓ Root directory absolute path tests passed") + test_database_relative_paths() + print("✓ Database relative path tests passed") + test_path_consistency() + print("✓ Path consistency tests passed") + print("All tests passed successfully!")