Fix path handling consistency
Signed-off-by: gneeraj2001 <gneeraj2001@gmail.com>
This commit is contained in:
parent
f1438e33cd
commit
b06fe395b3
5 changed files with 182 additions and 12 deletions
|
|
@ -1,15 +1,28 @@
|
||||||
import os
|
import os
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from cognee.root_dir import get_absolute_path
|
from cognee.root_dir import get_absolute_path, ensure_absolute_path
|
||||||
from cognee.modules.observability.observers import Observer
|
from cognee.modules.observability.observers import Observer
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
import pydantic
|
||||||
|
|
||||||
|
|
||||||
class BaseConfig(BaseSettings):
|
class BaseConfig(BaseSettings):
|
||||||
data_root_directory: str = get_absolute_path(".data_storage")
|
data_root_directory: str = get_absolute_path(".data_storage")
|
||||||
system_root_directory: str = get_absolute_path(".cognee_system")
|
system_root_directory: str = get_absolute_path(".cognee_system")
|
||||||
monitoring_tool: object = Observer.LANGFUSE
|
monitoring_tool: object = Observer.LANGFUSE
|
||||||
|
|
||||||
|
@pydantic.model_validator(mode="after")
|
||||||
|
def validate_paths(cls, values):
|
||||||
|
# Require absolute paths for root directories
|
||||||
|
values.data_root_directory = ensure_absolute_path(
|
||||||
|
values.data_root_directory, allow_relative=False
|
||||||
|
)
|
||||||
|
values.system_root_directory = ensure_absolute_path(
|
||||||
|
values.system_root_directory, allow_relative=False
|
||||||
|
)
|
||||||
|
return values
|
||||||
|
|
||||||
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
|
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
|
||||||
langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY")
|
langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY")
|
||||||
langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST")
|
langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST")
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
import pydantic
|
import pydantic
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
|
from cognee.root_dir import ensure_absolute_path
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -51,15 +52,22 @@ class GraphConfig(BaseSettings):
|
||||||
@pydantic.model_validator(mode="after")
|
@pydantic.model_validator(mode="after")
|
||||||
def fill_derived(cls, values):
|
def fill_derived(cls, values):
|
||||||
provider = values.graph_database_provider.lower()
|
provider = values.graph_database_provider.lower()
|
||||||
|
base_config = get_base_config()
|
||||||
|
|
||||||
# Set default filename if no filename is provided
|
# Set default filename if no filename is provided
|
||||||
if not values.graph_filename:
|
if not values.graph_filename:
|
||||||
values.graph_filename = f"cognee_graph_{provider}"
|
values.graph_filename = f"cognee_graph_{provider}"
|
||||||
|
|
||||||
# Set file path based on graph database provider if no file path is provided
|
# Handle graph file path
|
||||||
if not values.graph_file_path:
|
if values.graph_file_path:
|
||||||
base_config = get_base_config()
|
# Convert relative paths to absolute using system_root_directory as base
|
||||||
|
values.graph_file_path = ensure_absolute_path(
|
||||||
|
values.graph_file_path,
|
||||||
|
base_path=base_config.system_root_directory,
|
||||||
|
allow_relative=True
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Default path
|
||||||
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
||||||
values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename)
|
values.graph_file_path = os.path.join(databases_directory_path, values.graph_filename)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
|
from cognee.root_dir import ensure_absolute_path
|
||||||
|
|
||||||
|
|
||||||
class VectorConfig(BaseSettings):
|
class VectorConfig(BaseSettings):
|
||||||
|
|
@ -11,12 +12,10 @@ class VectorConfig(BaseSettings):
|
||||||
Manage the configuration settings for the vector database.
|
Manage the configuration settings for the vector database.
|
||||||
|
|
||||||
Public methods:
|
Public methods:
|
||||||
|
|
||||||
- to_dict: Convert the configuration to a dictionary.
|
- to_dict: Convert the configuration to a dictionary.
|
||||||
|
|
||||||
Instance variables:
|
Instance variables:
|
||||||
|
- vector_db_url: The URL of the vector database. Can be relative to system_root_directory.
|
||||||
- vector_db_url: The URL of the vector database.
|
|
||||||
- vector_db_port: The port for the vector database.
|
- vector_db_port: The port for the vector database.
|
||||||
- vector_db_key: The key for accessing the vector database.
|
- vector_db_key: The key for accessing the vector database.
|
||||||
- vector_db_provider: The provider for the vector database.
|
- vector_db_provider: The provider for the vector database.
|
||||||
|
|
@ -30,10 +29,18 @@ class VectorConfig(BaseSettings):
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
@pydantic.model_validator(mode="after")
|
@pydantic.model_validator(mode="after")
|
||||||
def fill_derived(cls, values):
|
def validate_paths(cls, values):
|
||||||
# Set file path based on graph database provider if no file path is provided
|
base_config = get_base_config()
|
||||||
if not values.vector_db_url:
|
|
||||||
base_config = get_base_config()
|
if values.vector_db_url:
|
||||||
|
# Convert relative paths to absolute using system_root_directory as base
|
||||||
|
values.vector_db_url = ensure_absolute_path(
|
||||||
|
values.vector_db_url,
|
||||||
|
base_path=base_config.system_root_directory,
|
||||||
|
allow_relative=True,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Default path
|
||||||
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
databases_directory_path = os.path.join(base_config.system_root_directory, "databases")
|
||||||
values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
|
values.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
ROOT_DIR = Path(__file__).resolve().parent
|
ROOT_DIR = Path(__file__).resolve().parent
|
||||||
|
|
||||||
|
|
@ -6,3 +7,30 @@ ROOT_DIR = Path(__file__).resolve().parent
|
||||||
def get_absolute_path(path_from_root: str) -> str:
|
def get_absolute_path(path_from_root: str) -> str:
|
||||||
absolute_path = ROOT_DIR / path_from_root
|
absolute_path = ROOT_DIR / path_from_root
|
||||||
return str(absolute_path.resolve())
|
return str(absolute_path.resolve())
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_absolute_path(
|
||||||
|
path: str, base_path: Optional[str] = None, allow_relative: bool = False
|
||||||
|
) -> str:
|
||||||
|
"""Ensures a path is absolute, optionally converting relative paths.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: The path to validate/convert
|
||||||
|
base_path: Optional base path for relative paths. If None, uses ROOT_DIR
|
||||||
|
allow_relative: If False, raises error for relative paths instead of converting
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Absolute path as string
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If path is relative and allow_relative is False
|
||||||
|
"""
|
||||||
|
path_obj = Path(path)
|
||||||
|
if path_obj.is_absolute():
|
||||||
|
return str(path_obj.resolve())
|
||||||
|
|
||||||
|
if not allow_relative:
|
||||||
|
raise ValueError(f"Path must be absolute. Got relative path: {path}")
|
||||||
|
|
||||||
|
base = Path(base_path) if base_path else ROOT_DIR
|
||||||
|
return str((base / path).resolve())
|
||||||
|
|
|
||||||
114
cognee/tests/test_path_config.py
Normal file
114
cognee/tests/test_path_config.py
Normal file
|
|
@ -0,0 +1,114 @@
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
def ensure_absolute_path(path: str, base_path: str = None, allow_relative: bool = False) -> str:
|
||||||
|
"""Ensures a path is absolute, optionally converting relative paths."""
|
||||||
|
if path is None:
|
||||||
|
raise ValueError("Path cannot be None")
|
||||||
|
|
||||||
|
path_obj = Path(path)
|
||||||
|
if path_obj.is_absolute():
|
||||||
|
return str(path_obj.resolve())
|
||||||
|
|
||||||
|
if not allow_relative:
|
||||||
|
raise ValueError(f"Path must be absolute. Got relative path: {path}")
|
||||||
|
|
||||||
|
if base_path is None:
|
||||||
|
raise ValueError("base_path must be provided when converting relative paths")
|
||||||
|
|
||||||
|
base = Path(base_path)
|
||||||
|
if not base.is_absolute():
|
||||||
|
raise ValueError("base_path must be absolute when converting relative paths")
|
||||||
|
|
||||||
|
return str((base / path).resolve())
|
||||||
|
|
||||||
|
def test_root_dir_absolute_paths():
|
||||||
|
"""Test absolute path handling in root_dir.py"""
|
||||||
|
# Test with absolute path
|
||||||
|
abs_path = "C:/absolute/path" if os.name == 'nt' else "/absolute/path"
|
||||||
|
result = ensure_absolute_path(abs_path, allow_relative=False)
|
||||||
|
assert result == str(Path(abs_path).resolve())
|
||||||
|
|
||||||
|
# Test with relative path (should fail)
|
||||||
|
rel_path = "relative/path"
|
||||||
|
try:
|
||||||
|
ensure_absolute_path(rel_path, allow_relative=False)
|
||||||
|
assert False, "Should fail with relative path when allow_relative=False"
|
||||||
|
except ValueError as e:
|
||||||
|
assert "must be absolute" in str(e)
|
||||||
|
|
||||||
|
# Test with None path
|
||||||
|
try:
|
||||||
|
ensure_absolute_path(None)
|
||||||
|
assert False, "Should fail with None path"
|
||||||
|
except ValueError as e:
|
||||||
|
assert "cannot be None" in str(e)
|
||||||
|
|
||||||
|
def test_database_relative_paths():
|
||||||
|
"""Test relative path handling for vector and graph databases"""
|
||||||
|
system_root = "C:/system/root" if os.name == 'nt' else "/system/root"
|
||||||
|
|
||||||
|
# Test with absolute path
|
||||||
|
abs_path = "C:/data/vector.db" if os.name == 'nt' else "/data/vector.db"
|
||||||
|
result = ensure_absolute_path(abs_path, base_path=system_root, allow_relative=True)
|
||||||
|
assert result == str(Path(abs_path).resolve())
|
||||||
|
|
||||||
|
# Test with relative path (should convert to absolute)
|
||||||
|
rel_path = "data/vector.db"
|
||||||
|
result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True)
|
||||||
|
expected = str((Path(system_root) / rel_path).resolve())
|
||||||
|
assert result == expected
|
||||||
|
|
||||||
|
# Test with relative base_path (should fail)
|
||||||
|
try:
|
||||||
|
ensure_absolute_path(rel_path, base_path="relative/base", allow_relative=True)
|
||||||
|
assert False, "Should fail when base_path is relative"
|
||||||
|
except ValueError as e:
|
||||||
|
assert "base_path must be absolute" in str(e)
|
||||||
|
|
||||||
|
# Test without base_path for relative path
|
||||||
|
try:
|
||||||
|
ensure_absolute_path(rel_path, allow_relative=True)
|
||||||
|
assert False, "Should fail when base_path is not provided for relative path"
|
||||||
|
except ValueError as e:
|
||||||
|
assert "base_path must be provided" in str(e)
|
||||||
|
|
||||||
|
def test_path_consistency():
|
||||||
|
"""Test that paths are handled consistently across configurations"""
|
||||||
|
system_root = "C:/system/root" if os.name == 'nt' else "/system/root"
|
||||||
|
|
||||||
|
# Root directories must be absolute
|
||||||
|
data_root = "C:/data/root" if os.name == 'nt' else "/data/root"
|
||||||
|
assert ensure_absolute_path(data_root, allow_relative=False) == str(Path(data_root).resolve())
|
||||||
|
|
||||||
|
# Database paths can be relative but must resolve against system_root
|
||||||
|
db_paths = [
|
||||||
|
# Vector DB paths
|
||||||
|
"vector.db", # Simple relative
|
||||||
|
"data/vector.db", # Nested relative
|
||||||
|
"../vector.db", # Parent relative
|
||||||
|
"./vector.db", # Current dir relative
|
||||||
|
# Graph DB paths
|
||||||
|
"graph.db", # Simple relative
|
||||||
|
"data/graph/db", # Nested relative
|
||||||
|
"../graph.db", # Parent relative
|
||||||
|
"./graph.db", # Current dir relative
|
||||||
|
# With different extensions
|
||||||
|
"data/vector.lancedb", # Vector DB with extension
|
||||||
|
"data/graph/kuzu", # Graph DB with extension
|
||||||
|
]
|
||||||
|
|
||||||
|
for rel_path in db_paths:
|
||||||
|
result = ensure_absolute_path(rel_path, base_path=system_root, allow_relative=True)
|
||||||
|
expected = str((Path(system_root) / rel_path).resolve())
|
||||||
|
assert result == expected, f"Failed to resolve {rel_path} correctly"
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("Running path configuration tests...")
|
||||||
|
test_root_dir_absolute_paths()
|
||||||
|
print("✓ Root directory absolute path tests passed")
|
||||||
|
test_database_relative_paths()
|
||||||
|
print("✓ Database relative path tests passed")
|
||||||
|
test_path_consistency()
|
||||||
|
print("✓ Path consistency tests passed")
|
||||||
|
print("All tests passed successfully!")
|
||||||
Loading…
Add table
Reference in a new issue