Fix path handling consistency

Signed-off-by: gneeraj2001 <gneeraj2001@gmail.com>
This commit is contained in:
gneeraj2001 2025-08-29 02:06:43 -07:00
parent f1438e33cd
commit b06fe395b3
5 changed files with 182 additions and 12 deletions

View file

@ -1,15 +1,28 @@
import os
from typing import Optional
from functools import lru_cache
from cognee.root_dir import get_absolute_path
from cognee.root_dir import get_absolute_path, ensure_absolute_path
from cognee.modules.observability.observers import Observer
from pydantic_settings import BaseSettings, SettingsConfigDict
import pydantic
class BaseConfig(BaseSettings):
data_root_directory: str = get_absolute_path(".data_storage")
system_root_directory: str = get_absolute_path(".cognee_system")
monitoring_tool: object = Observer.LANGFUSE
@pydantic.model_validator(mode="after")
def validate_paths(cls, values):
# Require absolute paths for root directories
values.data_root_directory = ensure_absolute_path(
values.data_root_directory, allow_relative=False
)
values.system_root_directory = ensure_absolute_path(
values.system_root_directory, allow_relative=False
)
return values
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY")
langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST")

View file

@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
import pydantic
from pydantic import Field
from cognee.base_config import get_base_config
from cognee.root_dir import ensure_absolute_path
from cognee.shared.data_models import KnowledgeGraph
@ -51,15 +52,22 @@ class GraphConfig(BaseSettings):
@pydantic.model_validator(mode="after")
def fill_derived(cls, values):
provider = values.graph_database_provider.lower()
base_config = get_base_config()
# Set default filename if no filename is provided
if not values.graph_filename:
values.graph_filename = f"cognee_graph_{provider}"
# Set file path based on graph database provider if no file path is provided
if not values.graph_file_path:
base_config = get_base_config()
# Handle graph file path
if values.graph_file_path:
# Convert relative paths to absolute using system_root_directory as base
values.graph_file_path = ensure_absolute_path(
values.graph_file_path,
base_path=base_config.system_root_directory,
allow_relative=True
)
else:
# Default path
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename)

View file

@ -4,6 +4,7 @@ from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.base_config import get_base_config
from cognee.root_dir import ensure_absolute_path
class VectorConfig(BaseSettings):
@ -11,12 +12,10 @@ class VectorConfig(BaseSettings):
Manage the configuration settings for the vector database.
Public methods:
- to_dict: Convert the configuration to a dictionary.
Instance variables:
- vector_db_url: The URL of the vector database.
- vector_db_url: The URL of the vector database. Can be relative to system_root_directory.
- vector_db_port: The port for the vector database.
- vector_db_key: The key for accessing the vector database.
- vector_db_provider: The provider for the vector database.
@ -30,10 +29,18 @@ class VectorConfig(BaseSettings):
model_config = SettingsConfigDict(env_file=".env", extra="allow")
@pydantic.model_validator(mode="after")
def fill_derived(cls, values):
# Set file path based on graph database provider if no file path is provided
if not values.vector_db_url:
base_config = get_base_config()
def validate_paths(cls, values):
base_config = get_base_config()
if values.vector_db_url:
# Convert relative paths to absolute using system_root_directory as base
values.vector_db_url = ensure_absolute_path(
values.vector_db_url,
base_path=base_config.system_root_directory,
allow_relative=True,
)
else:
# Default path
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")

View file

@ -1,4 +1,5 @@
from pathlib import Path
from typing import Optional
ROOT_DIR = Path(__file__).resolve().parent
@ -6,3 +7,30 @@ ROOT_DIR = Path(__file__).resolve().parent
def get_absolute_path(path_from_root: str) -> str:
absolute_path = ROOT_DIR / path_from_root
return str(absolute_path.resolve())
def ensure_absolute_path(
path: str, base_path: Optional[str] = None, allow_relative: bool = False
) -> str:
"""Ensures a path is absolute, optionally converting relative paths.
Args:
path: The path to validate/convert
base_path: Optional base path for relative paths. If None, uses ROOT_DIR
allow_relative: If False, raises error for relative paths instead of converting
Returns:
Absolute path as string
Raises:
ValueError: If path is relative and allow_relative is False
"""
path_obj = Path(path)
if path_obj.is_absolute():
return str(path_obj.resolve())
if not allow_relative:
raise ValueError(f"Path must be absolute. Got relative path: {path}")
base = Path(base_path) if base_path else ROOT_DIR
return str((base / path).resolve())

View file

@ -0,0 +1,114 @@
import os
from pathlib import Path
def ensure_absolute_path(path: str, base_path: str = None, allow_relative: bool = False) -> str:
"""Ensures a path is absolute, optionally converting relative paths."""
if path is None:
raise ValueError("Path cannot be None")
path_obj = Path(path)
if path_obj.is_absolute():
return str(path_obj.resolve())
if not allow_relative:
raise ValueError(f"Path must be absolute. Got relative path: {path}")
if base_path is None:
raise ValueError("base_path must be provided when converting relative paths")
base = Path(base_path)
if not base.is_absolute():
raise ValueError("base_path must be absolute when converting relative paths")
return str((base / path).resolve())
def test_root_dir_absolute_paths():
"""Test absolute path handling in root_dir.py"""
# Test with absolute path
abs_path = "C:/absolute/path" if os.name == 'nt' else "/absolute/path"
result = ensure_absolute_path(abs_path, allow_relative=False)
assert result == str(Path(abs_path).resolve())
# Test with relative path (should fail)
rel_path = "relative/path"
try:
ensure_absolute_path(rel_path, allow_relative=False)
assert False, "Should fail with relative path when allow_relative=False"
except ValueError as e:
assert "must be absolute" in str(e)
# Test with None path
try:
ensure_absolute_path(None)
assert False, "Should fail with None path"
except ValueError as e:
assert "cannot be None" in str(e)
def test_database_relative_paths():
"""Test relative path handling for vector and graph databases"""
system_root = "C:/system/root" if os.name == 'nt' else "/system/root"
# Test with absolute path
abs_path = "C:/data/vector.db" if os.name == 'nt' else "/data/vector.db"
result = ensure_absolute_path(abs_path, base_path=system_root, allow_relative=True)
assert result == str(Path(abs_path).resolve())
# Test with relative path (should convert to absolute)
rel_path = "data/vector.db"
result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True)
expected = str((Path(system_root) / rel_path).resolve())
assert result == expected
# Test with relative base_path (should fail)
try:
ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True)
assert False, "Should fail when base_path is relative"
except ValueError as e:
assert "base_path must be absolute" in str(e)
# Test without base_path for relative path
try:
ensure_absolute_path(rel_path, allow_relative=True)
assert False, "Should fail when base_path is not provided for relative path"
except ValueError as e:
assert "base_path must be provided" in str(e)
def test_path_consistency():
"""Test that paths are handled consistently across configurations"""
system_root = "C:/system/root" if os.name == 'nt' else "/system/root"
# Root directories must be absolute
data_root = "C:/data/root" if os.name == 'nt' else "/data/root"
assert ensure_absolute_path(data_root, allow_relative=False) == str(Path(data_root).resolve())
# Database paths can be relative but must resolve against system_root
db_paths = [
# Vector DB paths
"vector.db", # Simple relative
"data/vector.db", # Nested relative
"../vector.db", # Parent relative
"./vector.db", # Current dir relative
# Graph DB paths
"graph.db", # Simple relative
"data/graph/db", # Nested relative
"../graph.db", # Parent relative
"./graph.db", # Current dir relative
# With different extensions
"data/vector.lancedb", # Vector DB with extension
"data/graph/kuzu", # Graph DB with extension
]
for rel_path in db_paths:
result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True)
expected = str((Path(system_root) / rel_path).resolve())
assert result == expected, f"Failed to resolve {rel_path} correctly"
if __name__ == "__main__":
print("Running path configuration tests...")
test_root_dir_absolute_paths()
print("✓ Root directory absolute path tests passed")
test_database_relative_paths()
print("✓ Database relative path tests passed")
test_path_consistency()
print("✓ Path consistency tests passed")
print("All tests passed successfully!")