diff --git a/README.md b/README.md index a7e7f1e05..e618d5bf9 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github ## Get Started -Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo +Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo + + ## Contributing diff --git a/cognee/base_config.py b/cognee/base_config.py index aa0b14008..940846128 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -1,15 +1,24 @@ import os from typing import Optional from functools import lru_cache -from cognee.root_dir import get_absolute_path +from cognee.root_dir import get_absolute_path, ensure_absolute_path from cognee.modules.observability.observers import Observer from pydantic_settings import BaseSettings, SettingsConfigDict +import pydantic class BaseConfig(BaseSettings): data_root_directory: str = get_absolute_path(".data_storage") system_root_directory: str = get_absolute_path(".cognee_system") monitoring_tool: object = Observer.LANGFUSE + + @pydantic.model_validator(mode="after") + def validate_paths(self): + # Require absolute paths for root directories + self.data_root_directory = ensure_absolute_path(self.data_root_directory) + self.system_root_directory = ensure_absolute_path(self.system_root_directory) + return self + langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY") langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY") langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST") diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index cdc001863..d96de4520 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict import pydantic from pydantic import Field from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path from cognee.shared.data_models import KnowledgeGraph @@ -51,15 +52,20 @@ class GraphConfig(BaseSettings): @pydantic.model_validator(mode="after") def fill_derived(cls, values): provider = values.graph_database_provider.lower() + base_config = get_base_config() # Set default filename if no filename is provided if not values.graph_filename: values.graph_filename = f"cognee_graph_{provider}" - # Set file path based on graph database provider if no file path is provided - if not values.graph_file_path: - base_config = get_base_config() - + # Handle graph file path + if values.graph_file_path: + # Check if absolute path is provided + values.graph_file_path = ensure_absolute_path( + os.path.join(values.graph_file_path, values.graph_filename) + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename) diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 07a3d1e05..f8fad473e 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -1,9 +1,11 @@ import os import pydantic +from pathlib import Path from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.base_config import get_base_config +from cognee.root_dir import ensure_absolute_path class VectorConfig(BaseSettings): @@ -11,11 +13,9 @@ class VectorConfig(BaseSettings): Manage the configuration settings for the vector database. Public methods: - - to_dict: Convert the configuration to a dictionary. Instance variables: - - vector_db_url: The URL of the vector database. - vector_db_port: The port for the vector database. - vector_db_key: The key for accessing the vector database. @@ -30,10 +30,17 @@ class VectorConfig(BaseSettings): model_config = SettingsConfigDict(env_file=".env", extra="allow") @pydantic.model_validator(mode="after") - def fill_derived(cls, values): - # Set file path based on graph database provider if no file path is provided - if not values.vector_db_url: - base_config = get_base_config() + def validate_paths(cls, values): + base_config = get_base_config() + + # If vector_db_url is provided and is not a path skip checking if path is absolute (as it can also be a url) + if values.vector_db_url and Path(values.vector_db_url).exists(): + # Relative path to absolute + values.vector_db_url = ensure_absolute_path( + values.vector_db_url, + ) + else: + # Default path databases_directory_path = os.path.join(base_config.system_root_directory, "databases") values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb") diff --git a/cognee/root_dir.py b/cognee/root_dir.py index 2e21d5ce3..46d8fcb69 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Optional ROOT_DIR = Path(__file__).resolve().parent @@ -6,3 +7,21 @@ ROOT_DIR = Path(__file__).resolve().parent def get_absolute_path(path_from_root: str) -> str: absolute_path = ROOT_DIR / path_from_root return str(absolute_path.resolve()) + + +def ensure_absolute_path(path: str) -> str: + """Ensures a path is absolute. + + Args: + path: The path to validate. + + Returns: + Absolute path as string + """ + if path is None: + raise ValueError("Path cannot be None") + path_obj = Path(path).expanduser() + if path_obj.is_absolute(): + return str(path_obj.resolve()) + + raise ValueError(f"Path must be absolute. Got relative path: {path}") diff --git a/cognee/tests/unit/processing/utils/utils_test.py b/cognee/tests/unit/processing/utils/utils_test.py index a684df8ed..ca9f8f065 100644 --- a/cognee/tests/unit/processing/utils/utils_test.py +++ b/cognee/tests/unit/processing/utils/utils_test.py @@ -4,8 +4,9 @@ import pytest from unittest.mock import patch, mock_open from io import BytesIO from uuid import uuid4 +from pathlib import Path - +from cognee.root_dir import ensure_absolute_path from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash from cognee.shared.utils import get_anonymous_id @@ -52,3 +53,21 @@ async def test_get_file_content_hash_stream(): expected_hash = hashlib.md5(b"test_data").hexdigest() result = await get_file_content_hash(stream) assert result == expected_hash + + +@pytest.mark.asyncio +async def test_root_dir_absolute_paths(): + """Test absolute path handling in root_dir.py""" + # Test with absolute path + abs_path = "C:/absolute/path" if os.name == "nt" else "/absolute/path" + result = ensure_absolute_path(abs_path) + assert result == str(Path(abs_path).resolve()) + + # Test with relative path (should fail) + rel_path = "relative/path" + with pytest.raises(ValueError, match="must be absolute"): + ensure_absolute_path(rel_path) + + # Test with None path + with pytest.raises(ValueError, match="cannot be None"): + ensure_absolute_path(None)