diff --git a/README.md b/README.md
index a7e7f1e05..e618d5bf9 100644
--- a/README.md
+++ b/README.md
@@ -79,7 +79,9 @@ More on [use-cases](https://docs.cognee.ai/use-cases) and [evals](https://github
## Get Started
-Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo
+Get started quickly with a Google Colab notebook , Deepnote notebook or starter repo
+
+
## Contributing
diff --git a/cognee/base_config.py b/cognee/base_config.py
index aa0b14008..940846128 100644
--- a/cognee/base_config.py
+++ b/cognee/base_config.py
@@ -1,15 +1,24 @@
import os
from typing import Optional
from functools import lru_cache
-from cognee.root_dir import get_absolute_path
+from cognee.root_dir import get_absolute_path, ensure_absolute_path
from cognee.modules.observability.observers import Observer
from pydantic_settings import BaseSettings, SettingsConfigDict
+import pydantic
class BaseConfig(BaseSettings):
data_root_directory: str = get_absolute_path(".data_storage")
system_root_directory: str = get_absolute_path(".cognee_system")
monitoring_tool: object = Observer.LANGFUSE
+
+ @pydantic.model_validator(mode="after")
+ def validate_paths(self):
+ # Require absolute paths for root directories
+ self.data_root_directory = ensure_absolute_path(self.data_root_directory)
+ self.system_root_directory = ensure_absolute_path(self.system_root_directory)
+ return self
+
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY")
langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST")
diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py
index cdc001863..d96de4520 100644
--- a/cognee/infrastructure/databases/graph/config.py
+++ b/cognee/infrastructure/databases/graph/config.py
@@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
import pydantic
from pydantic import Field
from cognee.base_config import get_base_config
+from cognee.root_dir import ensure_absolute_path
from cognee.shared.data_models import KnowledgeGraph
@@ -51,15 +52,20 @@ class GraphConfig(BaseSettings):
@pydantic.model_validator(mode="after")
def fill_derived(cls, values):
provider = values.graph_database_provider.lower()
+ base_config = get_base_config()
# Set default filename if no filename is provided
if not values.graph_filename:
values.graph_filename = f"cognee_graph_{provider}"
- # Set file path based on graph database provider if no file path is provided
- if not values.graph_file_path:
- base_config = get_base_config()
-
+ # Handle graph file path
+ if values.graph_file_path:
+ # Check if absolute path is provided
+ values.graph_file_path = ensure_absolute_path(
+ os.path.join(values.graph_file_path, values.graph_filename)
+ )
+ else:
+ # Default path
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename)
diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py
index 07a3d1e05..f8fad473e 100644
--- a/cognee/infrastructure/databases/vector/config.py
+++ b/cognee/infrastructure/databases/vector/config.py
@@ -1,9 +1,11 @@
import os
import pydantic
+from pathlib import Path
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.base_config import get_base_config
+from cognee.root_dir import ensure_absolute_path
class VectorConfig(BaseSettings):
@@ -11,11 +13,9 @@ class VectorConfig(BaseSettings):
Manage the configuration settings for the vector database.
Public methods:
-
- to_dict: Convert the configuration to a dictionary.
Instance variables:
-
- vector_db_url: The URL of the vector database.
- vector_db_port: The port for the vector database.
- vector_db_key: The key for accessing the vector database.
@@ -30,10 +30,17 @@ class VectorConfig(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="allow")
@pydantic.model_validator(mode="after")
- def fill_derived(cls, values):
- # Set file path based on graph database provider if no file path is provided
- if not values.vector_db_url:
- base_config = get_base_config()
+ def validate_paths(cls, values):
+ base_config = get_base_config()
+
+ # If vector_db_url is provided and is not a path skip checking if path is absolute (as it can also be a url)
+ if values.vector_db_url and Path(values.vector_db_url).exists():
+ # Relative path to absolute
+ values.vector_db_url = ensure_absolute_path(
+ values.vector_db_url,
+ )
+ else:
+ # Default path
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
diff --git a/cognee/root_dir.py b/cognee/root_dir.py
index 2e21d5ce3..46d8fcb69 100644
--- a/cognee/root_dir.py
+++ b/cognee/root_dir.py
@@ -1,4 +1,5 @@
from pathlib import Path
+from typing import Optional
ROOT_DIR = Path(__file__).resolve().parent
@@ -6,3 +7,21 @@ ROOT_DIR = Path(__file__).resolve().parent
def get_absolute_path(path_from_root: str) -> str:
absolute_path = ROOT_DIR / path_from_root
return str(absolute_path.resolve())
+
+
+def ensure_absolute_path(path: str) -> str:
+ """Ensures a path is absolute.
+
+ Args:
+ path: The path to validate.
+
+ Returns:
+ Absolute path as string
+ """
+ if path is None:
+ raise ValueError("Path cannot be None")
+ path_obj = Path(path).expanduser()
+ if path_obj.is_absolute():
+ return str(path_obj.resolve())
+
+ raise ValueError(f"Path must be absolute. Got relative path: {path}")
diff --git a/cognee/tests/unit/processing/utils/utils_test.py b/cognee/tests/unit/processing/utils/utils_test.py
index a684df8ed..ca9f8f065 100644
--- a/cognee/tests/unit/processing/utils/utils_test.py
+++ b/cognee/tests/unit/processing/utils/utils_test.py
@@ -4,8 +4,9 @@ import pytest
from unittest.mock import patch, mock_open
from io import BytesIO
from uuid import uuid4
+from pathlib import Path
-
+from cognee.root_dir import ensure_absolute_path
from cognee.infrastructure.files.utils.get_file_content_hash import get_file_content_hash
from cognee.shared.utils import get_anonymous_id
@@ -52,3 +53,21 @@ async def test_get_file_content_hash_stream():
expected_hash = hashlib.md5(b"test_data").hexdigest()
result = await get_file_content_hash(stream)
assert result == expected_hash
+
+
+@pytest.mark.asyncio
+async def test_root_dir_absolute_paths():
+ """Test absolute path handling in root_dir.py"""
+ # Test with absolute path
+ abs_path = "C:/absolute/path" if os.name == "nt" else "/absolute/path"
+ result = ensure_absolute_path(abs_path)
+ assert result == str(Path(abs_path).resolve())
+
+ # Test with relative path (should fail)
+ rel_path = "relative/path"
+ with pytest.raises(ValueError, match="must be absolute"):
+ ensure_absolute_path(rel_path)
+
+ # Test with None path
+ with pytest.raises(ValueError, match="cannot be None"):
+ ensure_absolute_path(None)