diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index bcf97ebfa..3ffb5eb32 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -116,13 +116,13 @@ class GraphConfig(BaseSettings): """ return { "graph_database_provider": self.graph_database_provider, + "graph_file_path": self.graph_file_path, "graph_database_url": self.graph_database_url, "graph_database_name": self.graph_database_name, "graph_database_username": self.graph_database_username, "graph_database_password": self.graph_database_password, "graph_database_port": self.graph_database_port, "graph_database_key": self.graph_database_key, - "graph_file_path": self.graph_file_path, "graph_dataset_database_handler": self.graph_dataset_database_handler, } @@ -146,10 +146,18 @@ def get_graph_config(): def get_graph_context_config(): """This function will get the appropriate graph db config based on async context. - This allows the use of multiple graph databases for different threads, async tasks and parallelization + This allows the use of multiple graph databases for different threads, async tasks and parallelization. + Always returns a canonical mapping aligned with create_graph_engine(...) signature + so that caching keys are stable across the codebase. """ from cognee.context_global_variables import graph_db_config + from cognee.infrastructure.databases.graph.get_graph_engine import create_graph_engine + from cognee.infrastructure.databases.utils.canonicalize import ( + canonicalize_kwargs_for_signature, + ) - if graph_db_config.get(): - return graph_db_config.get() - return get_graph_config().to_hashable_dict() + context_cfg = graph_db_config.get() or {} + base_cfg = get_graph_config().to_dict() + return canonicalize_kwargs_for_signature( + raw_params=context_cfg, target_func=create_graph_engine, defaults=base_cfg + ) diff --git a/cognee/infrastructure/databases/utils/canonicalize.py b/cognee/infrastructure/databases/utils/canonicalize.py new file mode 100644 index 000000000..724a35fe9 --- /dev/null +++ b/cognee/infrastructure/databases/utils/canonicalize.py @@ -0,0 +1,30 @@ +import inspect +from typing import Mapping, Callable, Any, Dict + + +def canonicalize_kwargs_for_signature( + raw_params: Mapping[str, Any], + target_func: Callable[..., Any], + defaults: Mapping[str, Any] | None = None, +) -> Dict[str, Any]: + """ + Build a canonical, ordered kwargs dict aligned with the target function's signature. + - Merges provided raw_params over defaults + - Keeps only parameters that the target function accepts + - Orders keys to match the function signature to produce stable cache keys + """ + base: Dict[str, Any] = dict(defaults or {}) + merged: Dict[str, Any] = {**base, **(raw_params or {})} + + sig = inspect.signature(target_func) + ordered: Dict[str, Any] = {} + for name, param in sig.parameters.items(): + if param.kind in (param.POSITIONAL_OR_KEYWORD, param.KEYWORD_ONLY): + if name in merged: + ordered[name] = merged[name] + elif param.default is not inspect._empty: + ordered[name] = param.default + else: + # Ensure the key exists; None will surface missing-required issues downstream if needed + ordered[name] = None + return ordered diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 86b2a0fce..dad836bd2 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -67,6 +67,22 @@ class VectorConfig(BaseSettings): "vector_dataset_database_handler": self.vector_dataset_database_handler, } + def to_hashable_dict(self) -> dict: + """ + Return a canonical, hashable configuration dictionary for vector DB engines. + The keys and their order are aligned with create_vector_engine(...) signature to + ensure consistent lru_cache keys regardless of how the config was originally set. + """ + # NOTE: Order here mirrors create_vector_engine signature + return { + "vector_db_provider": self.vector_db_provider, + "vector_db_url": self.vector_db_url, + "vector_db_name": self.vector_db_name, + "vector_db_port": self.vector_db_port, + "vector_db_key": self.vector_db_key, + "vector_dataset_database_handler": self.vector_dataset_database_handler, + } + @lru_cache def get_vectordb_config(): @@ -87,9 +103,19 @@ def get_vectordb_config(): def get_vectordb_context_config(): - """This function will get the appropriate vector db config based on async context.""" - from cognee.context_global_variables import vector_db_config + """This function will get the appropriate vector db config based on async context. - if vector_db_config.get(): - return vector_db_config.get() - return get_vectordb_config().to_dict() + Always returns a canonical mapping aligned with create_vector_engine(...) signature + so that caching keys are stable across the codebase. + """ + from cognee.context_global_variables import vector_db_config + from cognee.infrastructure.databases.vector.create_vector_engine import create_vector_engine + from cognee.infrastructure.databases.utils.canonicalize import ( + canonicalize_kwargs_for_signature, + ) + + context_cfg = vector_db_config.get() or {} + base_cfg = get_vectordb_config().to_dict() + return canonicalize_kwargs_for_signature( + raw_params=context_cfg, target_func=create_vector_engine, defaults=base_cfg + )