Compare commits
4 commits
main
...
refactor/r
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a8378a429d | ||
|
|
7b0879f88a | ||
|
|
0696d4c340 | ||
|
|
d9761118b2 |
46 changed files with 232 additions and 541 deletions
|
|
@ -7,13 +7,13 @@ from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.infrastructure.llm import get_max_chunk_tokens
|
from cognee.infrastructure.llm import get_max_chunk_tokens
|
||||||
|
|
||||||
from cognee.modules.pipelines import cognee_pipeline
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.pipelines.tasks.task import Task
|
|
||||||
from cognee.modules.chunking.TextChunker import TextChunker
|
from cognee.modules.chunking.TextChunker import TextChunker
|
||||||
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver
|
||||||
|
from cognee.modules.pipelines.tasks.task import Task
|
||||||
|
from cognee.modules.pipelines import cognee_pipeline
|
||||||
from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
|
from cognee.modules.pipelines.models.PipelineRunInfo import PipelineRunCompleted, PipelineRunErrored
|
||||||
from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
|
from cognee.modules.pipelines.queues.pipeline_run_info_queues import push_to_queue
|
||||||
from cognee.modules.users.models import User
|
|
||||||
|
|
||||||
from cognee.tasks.documents import (
|
from cognee.tasks.documents import (
|
||||||
check_permissions_on_dataset,
|
check_permissions_on_dataset,
|
||||||
|
|
@ -30,14 +30,14 @@ update_status_lock = asyncio.Lock()
|
||||||
|
|
||||||
|
|
||||||
async def cognify(
|
async def cognify(
|
||||||
datasets: Union[str, list[str], list[UUID]] = None,
|
datasets: Optional[Union[str, list[str], list[UUID]]] = None,
|
||||||
user: User = None,
|
user: Optional[User] = None,
|
||||||
graph_model: BaseModel = KnowledgeGraph,
|
graph_model: Optional[BaseModel] = KnowledgeGraph,
|
||||||
chunker=TextChunker,
|
chunker=TextChunker,
|
||||||
chunk_size: int = None,
|
chunk_size: Optional[int] = None,
|
||||||
ontology_file_path: Optional[str] = None,
|
ontology_file_path: Optional[str] = None,
|
||||||
vector_db_config: dict = None,
|
vector_db_config: Optional[dict] = None,
|
||||||
graph_db_config: dict = None,
|
graph_db_config: Optional[dict] = None,
|
||||||
run_in_background: bool = False,
|
run_in_background: bool = False,
|
||||||
incremental_loading: bool = True,
|
incremental_loading: bool = True,
|
||||||
):
|
):
|
||||||
|
|
@ -212,8 +212,8 @@ async def run_cognify_blocking(
|
||||||
tasks,
|
tasks,
|
||||||
user,
|
user,
|
||||||
datasets,
|
datasets,
|
||||||
graph_db_config: dict = None,
|
graph_db_config=None,
|
||||||
vector_db_config: dict = False,
|
vector_db_config=None,
|
||||||
incremental_loading: bool = True,
|
incremental_loading: bool = True,
|
||||||
):
|
):
|
||||||
total_run_info = {}
|
total_run_info = {}
|
||||||
|
|
@ -239,8 +239,8 @@ async def run_cognify_as_background_process(
|
||||||
tasks,
|
tasks,
|
||||||
user,
|
user,
|
||||||
datasets,
|
datasets,
|
||||||
graph_db_config: dict = None,
|
graph_db_config=None,
|
||||||
vector_db_config: dict = False,
|
vector_db_config=None,
|
||||||
incremental_loading: bool = True,
|
incremental_loading: bool = True,
|
||||||
):
|
):
|
||||||
# Convert dataset to list if it's a string
|
# Convert dataset to list if it's a string
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.exceptions import InvalidValueError, InvalidAttributeError
|
from cognee.exceptions import InvalidAttributeError
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
|
|
@ -107,12 +107,12 @@ class config:
|
||||||
chunk_config.chunk_engine = chunk_engine
|
chunk_config.chunk_engine = chunk_engine
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_chunk_overlap(chunk_overlap: object):
|
def set_chunk_overlap(chunk_overlap: int):
|
||||||
chunk_config = get_chunk_config()
|
chunk_config = get_chunk_config()
|
||||||
chunk_config.chunk_overlap = chunk_overlap
|
chunk_config.chunk_overlap = chunk_overlap
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_chunk_size(chunk_size: object):
|
def set_chunk_size(chunk_size: int):
|
||||||
chunk_config = get_chunk_config()
|
chunk_config = get_chunk_config()
|
||||||
chunk_config.chunk_size = chunk_size
|
chunk_config.chunk_size = chunk_size
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,19 +10,10 @@ class BaseConfig(BaseSettings):
|
||||||
data_root_directory: str = get_absolute_path(".data_storage")
|
data_root_directory: str = get_absolute_path(".data_storage")
|
||||||
system_root_directory: str = get_absolute_path(".cognee_system")
|
system_root_directory: str = get_absolute_path(".cognee_system")
|
||||||
monitoring_tool: object = Observer.LANGFUSE
|
monitoring_tool: object = Observer.LANGFUSE
|
||||||
langfuse_public_key: Optional[str] = os.getenv("LANGFUSE_PUBLIC_KEY")
|
|
||||||
langfuse_secret_key: Optional[str] = os.getenv("LANGFUSE_SECRET_KEY")
|
|
||||||
langfuse_host: Optional[str] = os.getenv("LANGFUSE_HOST")
|
|
||||||
default_user_email: Optional[str] = os.getenv("DEFAULT_USER_EMAIL")
|
default_user_email: Optional[str] = os.getenv("DEFAULT_USER_EMAIL")
|
||||||
default_user_password: Optional[str] = os.getenv("DEFAULT_USER_PASSWORD")
|
default_user_password: Optional[str] = os.getenv("DEFAULT_USER_PASSWORD")
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
return {
|
|
||||||
"data_root_directory": self.data_root_directory,
|
|
||||||
"system_root_directory": self.system_root_directory,
|
|
||||||
"monitoring_tool": self.monitoring_tool,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,19 @@
|
||||||
import os
|
import os
|
||||||
from contextvars import ContextVar
|
from contextvars import ContextVar
|
||||||
from typing import Union
|
from typing import Optional, Union
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
|
from cognee.infrastructure.databases.graph.config import GraphConfig
|
||||||
|
from cognee.infrastructure.databases.vector.config import VectorConfig
|
||||||
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
||||||
from cognee.infrastructure.files.storage.config import file_storage_config
|
from cognee.infrastructure.files.storage.config import StorageConfig, file_storage_config
|
||||||
from cognee.modules.users.methods import get_user
|
from cognee.modules.users.methods import get_user
|
||||||
|
|
||||||
# Note: ContextVar allows us to use different graph db configurations in Cognee
|
# Note: ContextVar allows us to use different graph db configurations in Cognee
|
||||||
# for different async tasks, threads and processes
|
# for different async tasks, threads and processes
|
||||||
vector_db_config = ContextVar("vector_db_config", default=None)
|
vector_db_config = ContextVar[Optional[VectorConfig]]("vector_db_config", default=None)
|
||||||
graph_db_config = ContextVar("graph_db_config", default=None)
|
graph_db_config = ContextVar[Optional[GraphConfig]]("graph_db_config", default=None)
|
||||||
|
|
||||||
|
|
||||||
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
|
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
|
||||||
|
|
@ -51,24 +53,24 @@ async def set_database_global_context_variables(dataset: Union[str, UUID], user_
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set vector and graph database configuration based on dataset database information
|
# Set vector and graph database configuration based on dataset database information
|
||||||
vector_config = {
|
vector_config = VectorConfig(
|
||||||
"vector_db_url": os.path.join(
|
vector_db_url=os.path.join(
|
||||||
databases_directory_path, dataset_database.vector_database_name
|
databases_directory_path, str(dataset_database.vector_database_name)
|
||||||
),
|
),
|
||||||
"vector_db_key": "",
|
vector_db_key="",
|
||||||
"vector_db_provider": "lancedb",
|
vector_db_provider="lancedb",
|
||||||
}
|
)
|
||||||
|
|
||||||
graph_config = {
|
graph_config = GraphConfig(
|
||||||
"graph_database_provider": "kuzu",
|
graph_database_provider="kuzu",
|
||||||
"graph_file_path": os.path.join(
|
graph_file_path=os.path.join(
|
||||||
databases_directory_path, dataset_database.graph_database_name
|
databases_directory_path, str(dataset_database.graph_database_name)
|
||||||
),
|
),
|
||||||
}
|
)
|
||||||
|
|
||||||
storage_config = {
|
storage_config = StorageConfig(
|
||||||
"data_root_directory": data_root_directory,
|
data_root_directory=data_root_directory,
|
||||||
}
|
)
|
||||||
|
|
||||||
# Use ContextVar to use these graph and vector configurations are used
|
# Use ContextVar to use these graph and vector configurations are used
|
||||||
# in the current async context across Cognee
|
# in the current async context across Cognee
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
from deprecated import deprecated
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
@ -45,6 +46,7 @@ class EvalConfig(BaseSettings):
|
||||||
|
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
|
@deprecated(reason="Call model_dump() instead of calling to_dict() method.")
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"building_corpus_from_scratch": self.building_corpus_from_scratch,
|
"building_corpus_from_scratch": self.building_corpus_from_scratch,
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine
|
|
||||||
from cognee.shared.data_models import ChunkStrategy, ChunkEngine
|
from cognee.shared.data_models import ChunkStrategy, ChunkEngine
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -12,27 +11,11 @@ class ChunkConfig(BaseSettings):
|
||||||
|
|
||||||
chunk_size: int = 1500
|
chunk_size: int = 1500
|
||||||
chunk_overlap: int = 10
|
chunk_overlap: int = 10
|
||||||
chunk_strategy: object = ChunkStrategy.PARAGRAPH
|
chunk_strategy: ChunkStrategy = ChunkStrategy.PARAGRAPH
|
||||||
chunk_engine: object = ChunkEngine.DEFAULT_ENGINE
|
chunk_engine: ChunkEngine = ChunkEngine.DEFAULT_ENGINE
|
||||||
|
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Convert the chunk settings to a dictionary format.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary representation of the chunk configuration settings.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"chunk_size": self.chunk_size,
|
|
||||||
"chunk_overlap": self.chunk_overlap,
|
|
||||||
"chunk_strategy": self.chunk_strategy,
|
|
||||||
"chunk_engine": self.chunk_engine,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_chunk_config():
|
def get_chunk_config():
|
||||||
|
|
|
||||||
|
|
@ -1,25 +1,12 @@
|
||||||
from typing import Dict
|
from cognee.shared.data_models import ChunkEngine, ChunkStrategy
|
||||||
|
|
||||||
|
|
||||||
from cognee.shared.data_models import ChunkEngine
|
def create_chunking_engine(
|
||||||
|
chunk_size: int,
|
||||||
|
chunk_overlap: int,
|
||||||
class ChunkingConfig(Dict):
|
chunk_engine: ChunkEngine,
|
||||||
"""
|
chunk_strategy: ChunkStrategy,
|
||||||
Represent configuration settings for chunking operations, inheriting from the built-in
|
):
|
||||||
Dict class. The class contains the following public attributes:
|
|
||||||
|
|
||||||
- vector_db_url: A string representing the URL of the vector database.
|
|
||||||
- vector_db_key: A string representing the key for accessing the vector database.
|
|
||||||
- vector_db_provider: A string representing the provider of the vector database.
|
|
||||||
"""
|
|
||||||
|
|
||||||
vector_db_url: str
|
|
||||||
vector_db_key: str
|
|
||||||
vector_db_provider: str
|
|
||||||
|
|
||||||
|
|
||||||
def create_chunking_engine(config: ChunkingConfig):
|
|
||||||
"""
|
"""
|
||||||
Create a chunking engine based on the provided configuration.
|
Create a chunking engine based on the provided configuration.
|
||||||
|
|
||||||
|
|
@ -30,7 +17,7 @@ def create_chunking_engine(config: ChunkingConfig):
|
||||||
Parameters:
|
Parameters:
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- config (ChunkingConfig): Configuration object containing the settings for the
|
- config (ChunkConfig): Configuration object containing the settings for the
|
||||||
chunking engine, including the engine type, chunk size, chunk overlap, and chunk
|
chunking engine, including the engine type, chunk size, chunk overlap, and chunk
|
||||||
strategy.
|
strategy.
|
||||||
|
|
||||||
|
|
@ -40,27 +27,27 @@ def create_chunking_engine(config: ChunkingConfig):
|
||||||
An instance of the selected chunking engine class (LangchainChunkEngine,
|
An instance of the selected chunking engine class (LangchainChunkEngine,
|
||||||
DefaultChunkEngine, or HaystackChunkEngine).
|
DefaultChunkEngine, or HaystackChunkEngine).
|
||||||
"""
|
"""
|
||||||
if config["chunk_engine"] == ChunkEngine.LANGCHAIN_ENGINE:
|
if chunk_engine == ChunkEngine.LANGCHAIN_ENGINE:
|
||||||
from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine
|
from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine
|
||||||
|
|
||||||
return LangchainChunkEngine(
|
return LangchainChunkEngine(
|
||||||
chunk_size=config["chunk_size"],
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=config["chunk_overlap"],
|
chunk_overlap=chunk_overlap,
|
||||||
chunk_strategy=config["chunk_strategy"],
|
chunk_strategy=chunk_strategy,
|
||||||
)
|
)
|
||||||
elif config["chunk_engine"] == ChunkEngine.DEFAULT_ENGINE:
|
elif chunk_engine == ChunkEngine.DEFAULT_ENGINE:
|
||||||
from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine
|
from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine
|
||||||
|
|
||||||
return DefaultChunkEngine(
|
return DefaultChunkEngine(
|
||||||
chunk_size=config["chunk_size"],
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=config["chunk_overlap"],
|
chunk_overlap=chunk_overlap,
|
||||||
chunk_strategy=config["chunk_strategy"],
|
chunk_strategy=chunk_strategy,
|
||||||
)
|
)
|
||||||
elif config["chunk_engine"] == ChunkEngine.HAYSTACK_ENGINE:
|
elif chunk_engine == ChunkEngine.HAYSTACK_ENGINE:
|
||||||
from cognee.infrastructure.data.chunking.HaystackChunkEngine import HaystackChunkEngine
|
from cognee.infrastructure.data.chunking.HaystackChunkEngine import HaystackChunkEngine
|
||||||
|
|
||||||
return HaystackChunkEngine(
|
return HaystackChunkEngine(
|
||||||
chunk_size=config["chunk_size"],
|
chunk_size=chunk_size,
|
||||||
chunk_overlap=config["chunk_overlap"],
|
chunk_overlap=chunk_overlap,
|
||||||
chunk_strategy=config["chunk_strategy"],
|
chunk_strategy=chunk_strategy,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -13,4 +13,10 @@ def get_chunk_engine():
|
||||||
Returns an instance of the chunking engine created based on the configuration
|
Returns an instance of the chunking engine created based on the configuration
|
||||||
settings.
|
settings.
|
||||||
"""
|
"""
|
||||||
return create_chunking_engine(get_chunk_config().to_dict())
|
chunk_config = get_chunk_config()
|
||||||
|
return create_chunking_engine(
|
||||||
|
chunk_engine=chunk_config.chunk_engine,
|
||||||
|
chunk_size=chunk_config.chunk_size,
|
||||||
|
chunk_overlap=chunk_config.chunk_overlap,
|
||||||
|
chunk_strategy=chunk_config.chunk_strategy,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,11 @@
|
||||||
"""This module contains the configuration for the graph database."""
|
"""This module contains the configuration for the graph database."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import pydantic
|
||||||
|
from typing import Optional
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
import pydantic
|
|
||||||
from pydantic import Field
|
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
|
|
||||||
|
|
@ -14,10 +15,6 @@ class GraphConfig(BaseSettings):
|
||||||
Represents the configuration for a graph system, including parameters for graph file
|
Represents the configuration for a graph system, including parameters for graph file
|
||||||
storage and database connections.
|
storage and database connections.
|
||||||
|
|
||||||
Public methods:
|
|
||||||
- to_dict
|
|
||||||
- to_hashable_dict
|
|
||||||
|
|
||||||
Instance variables:
|
Instance variables:
|
||||||
- graph_filename
|
- graph_filename
|
||||||
- graph_database_provider
|
- graph_database_provider
|
||||||
|
|
@ -31,10 +28,7 @@ class GraphConfig(BaseSettings):
|
||||||
- model_config
|
- model_config
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Using Field we are able to dynamically load current GRAPH_DATABASE_PROVIDER value in the model validator part
|
graph_database_provider: str = "kuzu"
|
||||||
# and determine default graph db file and path based on this parameter if no values are provided
|
|
||||||
graph_database_provider: str = Field("kuzu", env="GRAPH_DATABASE_PROVIDER")
|
|
||||||
|
|
||||||
graph_database_url: str = ""
|
graph_database_url: str = ""
|
||||||
graph_database_name: str = ""
|
graph_database_name: str = ""
|
||||||
graph_database_username: str = ""
|
graph_database_username: str = ""
|
||||||
|
|
@ -65,54 +59,6 @@ class GraphConfig(BaseSettings):
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Return the configuration as a dictionary.
|
|
||||||
|
|
||||||
This dictionary contains all the configurations related to the graph, which includes
|
|
||||||
details for file storage and database connectivity.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary representation of the configuration settings.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"graph_filename": self.graph_filename,
|
|
||||||
"graph_database_provider": self.graph_database_provider,
|
|
||||||
"graph_database_url": self.graph_database_url,
|
|
||||||
"graph_database_username": self.graph_database_username,
|
|
||||||
"graph_database_password": self.graph_database_password,
|
|
||||||
"graph_database_port": self.graph_database_port,
|
|
||||||
"graph_file_path": self.graph_file_path,
|
|
||||||
"graph_model": self.graph_model,
|
|
||||||
"graph_topology": self.graph_topology,
|
|
||||||
"model_config": self.model_config,
|
|
||||||
}
|
|
||||||
|
|
||||||
def to_hashable_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Return a hashable dictionary with essential database configuration parameters.
|
|
||||||
|
|
||||||
This dictionary excludes certain non-hashable objects and focuses on unique identifiers
|
|
||||||
for database configurations.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary representation of the essential database configuration
|
|
||||||
settings.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"graph_database_provider": self.graph_database_provider,
|
|
||||||
"graph_database_url": self.graph_database_url,
|
|
||||||
"graph_database_name": self.graph_database_name,
|
|
||||||
"graph_database_username": self.graph_database_username,
|
|
||||||
"graph_database_password": self.graph_database_password,
|
|
||||||
"graph_database_port": self.graph_database_port,
|
|
||||||
"graph_file_path": self.graph_file_path,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_graph_config():
|
def get_graph_config():
|
||||||
|
|
@ -128,15 +74,18 @@ def get_graph_config():
|
||||||
|
|
||||||
- GraphConfig: A GraphConfig instance containing the graph configuration settings.
|
- GraphConfig: A GraphConfig instance containing the graph configuration settings.
|
||||||
"""
|
"""
|
||||||
|
context_config = get_graph_context_config()
|
||||||
|
|
||||||
|
if context_config:
|
||||||
|
return context_config
|
||||||
|
|
||||||
return GraphConfig()
|
return GraphConfig()
|
||||||
|
|
||||||
|
|
||||||
def get_graph_context_config():
|
def get_graph_context_config() -> Optional[GraphConfig]:
|
||||||
"""This function will get the appropriate graph db config based on async context.
|
"""This function will get the appropriate graph db config based on async context.
|
||||||
This allows the use of multiple graph databases for different threads, async tasks and parallelization
|
This allows the use of multiple graph databases for different threads, async tasks and parallelization
|
||||||
"""
|
"""
|
||||||
from cognee.context_global_variables import graph_db_config
|
from cognee.context_global_variables import graph_db_config
|
||||||
|
|
||||||
if graph_db_config.get():
|
return graph_db_config.get()
|
||||||
return graph_db_config.get()
|
|
||||||
return get_graph_config().to_hashable_dict()
|
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
from .config import get_graph_context_config
|
from .config import get_graph_config
|
||||||
from .graph_db_interface import GraphDBInterface
|
from .graph_db_interface import GraphDBInterface
|
||||||
from .supported_databases import supported_databases
|
from .supported_databases import supported_databases
|
||||||
|
|
||||||
|
|
@ -10,20 +10,16 @@ from .supported_databases import supported_databases
|
||||||
async def get_graph_engine() -> GraphDBInterface:
|
async def get_graph_engine() -> GraphDBInterface:
|
||||||
"""Factory function to get the appropriate graph client based on the graph type."""
|
"""Factory function to get the appropriate graph client based on the graph type."""
|
||||||
# Get appropriate graph configuration based on current async context
|
# Get appropriate graph configuration based on current async context
|
||||||
config = get_graph_context_config()
|
config = get_graph_config()
|
||||||
|
|
||||||
graph_client = create_graph_engine(**config)
|
graph_client = create_graph_engine(
|
||||||
|
graph_database_provider=config.graph_database_provider,
|
||||||
# Async functions can't be cached. After creating and caching the graph engine
|
graph_file_path=config.graph_file_path,
|
||||||
# handle all necessary async operations for different graph types bellow.
|
graph_database_url=config.graph_database_url,
|
||||||
|
graph_database_username=config.graph_database_username,
|
||||||
# Run any adapter‐specific async initialization
|
graph_database_password=config.graph_database_password,
|
||||||
if hasattr(graph_client, "initialize"):
|
graph_database_port=config.graph_database_port,
|
||||||
await graph_client.initialize()
|
)
|
||||||
|
|
||||||
# Handle loading of graph for NetworkX
|
|
||||||
if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
|
|
||||||
await graph_client.load_graph_from_file()
|
|
||||||
|
|
||||||
return graph_client
|
return graph_client
|
||||||
|
|
||||||
|
|
@ -36,7 +32,7 @@ def create_graph_engine(
|
||||||
graph_database_name="",
|
graph_database_name="",
|
||||||
graph_database_username="",
|
graph_database_username="",
|
||||||
graph_database_password="",
|
graph_database_password="",
|
||||||
graph_database_port="",
|
graph_database_port=None,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a graph engine based on the specified provider type.
|
Create a graph engine based on the specified provider type.
|
||||||
|
|
|
||||||
|
|
@ -8,9 +8,10 @@ from neo4j import AsyncSession
|
||||||
from neo4j import AsyncGraphDatabase
|
from neo4j import AsyncGraphDatabase
|
||||||
from neo4j.exceptions import Neo4jError
|
from neo4j.exceptions import Neo4jError
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
from typing import Optional, Any, List, Dict, Type, Tuple
|
from typing import AsyncGenerator, Optional, Any, List, Dict, Type, Tuple
|
||||||
|
|
||||||
from cognee.infrastructure.engine import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
|
from cognee.infrastructure.utils.run_sync import run_sync
|
||||||
from cognee.shared.logging_utils import get_logger, ERROR
|
from cognee.shared.logging_utils import get_logger, ERROR
|
||||||
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
||||||
GraphDBInterface,
|
GraphDBInterface,
|
||||||
|
|
@ -68,16 +69,17 @@ class Neo4jAdapter(GraphDBInterface):
|
||||||
notifications_min_severity="OFF",
|
notifications_min_severity="OFF",
|
||||||
)
|
)
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
"""
|
"""
|
||||||
Initializes the database: adds uniqueness constraint on id and performs indexing
|
Initializes the database: adds uniqueness constraint on id and performs indexing
|
||||||
"""
|
"""
|
||||||
await self.query(
|
run_sync(
|
||||||
(f"CREATE CONSTRAINT IF NOT EXISTS FOR (n:`{BASE_LABEL}`) REQUIRE n.id IS UNIQUE;")
|
self.query(
|
||||||
|
(f"CREATE CONSTRAINT IF NOT EXISTS FOR (n:`{BASE_LABEL}`) REQUIRE n.id IS UNIQUE;")
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@asynccontextmanager
|
@asynccontextmanager
|
||||||
async def get_session(self) -> AsyncSession:
|
async def get_session(self) -> AsyncGenerator[AsyncSession]:
|
||||||
"""
|
"""
|
||||||
Get a session for database operations.
|
Get a session for database operations.
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from typing import Dict, Any, List, Union, Type, Tuple
|
||||||
|
|
||||||
from cognee.infrastructure.databases.exceptions.exceptions import NodesetFilterNotSupportedError
|
from cognee.infrastructure.databases.exceptions.exceptions import NodesetFilterNotSupportedError
|
||||||
from cognee.infrastructure.files.storage import get_file_storage
|
from cognee.infrastructure.files.storage import get_file_storage
|
||||||
|
from cognee.infrastructure.utils.run_sync import run_sync
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
from cognee.infrastructure.databases.graph.graph_db_interface import (
|
||||||
GraphDBInterface,
|
GraphDBInterface,
|
||||||
|
|
@ -42,6 +43,8 @@ class NetworkXAdapter(GraphDBInterface):
|
||||||
def __init__(self, filename="cognee_graph.pkl"):
|
def __init__(self, filename="cognee_graph.pkl"):
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
|
|
||||||
|
run_sync(self.load_graph_from_file())
|
||||||
|
|
||||||
async def get_graph_data(self):
|
async def get_graph_data(self):
|
||||||
"""
|
"""
|
||||||
Retrieve graph data including nodes and edges.
|
Retrieve graph data including nodes and edges.
|
||||||
|
|
@ -576,7 +579,7 @@ class NetworkXAdapter(GraphDBInterface):
|
||||||
|
|
||||||
await file_storage.store(file_path, json_data, overwrite=True)
|
await file_storage.store(file_path, json_data, overwrite=True)
|
||||||
|
|
||||||
async def load_graph_from_file(self, file_path: str = None):
|
async def load_graph_from_file(self):
|
||||||
"""
|
"""
|
||||||
Load graph data asynchronously from a specified file in JSON format.
|
Load graph data asynchronously from a specified file in JSON format.
|
||||||
|
|
||||||
|
|
@ -586,8 +589,8 @@ class NetworkXAdapter(GraphDBInterface):
|
||||||
- file_path (str): The file path from which to load the graph data; if None, loads
|
- file_path (str): The file path from which to load the graph data; if None, loads
|
||||||
from the default filename. (default None)
|
from the default filename. (default None)
|
||||||
"""
|
"""
|
||||||
if not file_path:
|
file_path = self.filename
|
||||||
file_path = self.filename
|
|
||||||
try:
|
try:
|
||||||
file_dir_path = os.path.dirname(file_path)
|
file_dir_path = os.path.dirname(file_path)
|
||||||
file_name = os.path.basename(file_path)
|
file_name = os.path.basename(file_path)
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ from cognee.infrastructure.databases.graph.graph_db_interface import (
|
||||||
EdgeData,
|
EdgeData,
|
||||||
Node,
|
Node,
|
||||||
)
|
)
|
||||||
from cognee.infrastructure.databases.vector.embeddings import EmbeddingEngine
|
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
||||||
from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
|
from cognee.infrastructure.databases.vector.vector_db_interface import VectorDBInterface
|
||||||
from cognee.infrastructure.engine import DataPoint
|
from cognee.infrastructure.engine import DataPoint
|
||||||
|
|
||||||
|
|
@ -80,7 +80,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
self,
|
self,
|
||||||
database_url: str,
|
database_url: str,
|
||||||
database_port: int,
|
database_port: int,
|
||||||
embedding_engine=EmbeddingEngine,
|
embedding_engine: EmbeddingEngine,
|
||||||
):
|
):
|
||||||
self.driver = FalkorDB(
|
self.driver = FalkorDB(
|
||||||
host=database_url,
|
host=database_url,
|
||||||
|
|
@ -213,7 +213,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
A tuple containing the query string and parameters dictionary.
|
A tuple containing the query string and parameters dictionary.
|
||||||
"""
|
"""
|
||||||
node_label = type(data_point).__name__
|
node_label = type(data_point).__name__
|
||||||
property_names = DataPoint.get_embeddable_property_names(data_point)
|
property_names = data_point.get_embeddable_property_names()
|
||||||
|
|
||||||
properties = {
|
properties = {
|
||||||
**data_point.model_dump(),
|
**data_point.model_dump(),
|
||||||
|
|
@ -357,7 +357,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
vector_map = {}
|
vector_map = {}
|
||||||
|
|
||||||
for data_point in data_points:
|
for data_point in data_points:
|
||||||
property_names = DataPoint.get_embeddable_property_names(data_point)
|
property_names = data_point.get_embeddable_property_names()
|
||||||
key = str(data_point.id)
|
key = str(data_point.id)
|
||||||
vector_map[key] = {}
|
vector_map[key] = {}
|
||||||
|
|
||||||
|
|
@ -377,7 +377,7 @@ class FalkorDBAdapter(VectorDBInterface, GraphDBInterface):
|
||||||
vectorized_values[vector_map[str(data_point.id)][property_name]]
|
vectorized_values[vector_map[str(data_point.id)][property_name]]
|
||||||
if vector_map[str(data_point.id)][property_name] is not None
|
if vector_map[str(data_point.id)][property_name] is not None
|
||||||
else None
|
else None
|
||||||
for property_name in DataPoint.get_embeddable_property_names(data_point)
|
for property_name in data_point.get_embeddable_property_names()
|
||||||
]
|
]
|
||||||
|
|
||||||
query, params = await self.create_data_point_query(data_point, vectorized_data)
|
query, params = await self.create_data_point_query(data_point, vectorized_data)
|
||||||
|
|
|
||||||
|
|
@ -32,26 +32,6 @@ class RelationalConfig(BaseSettings):
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Return the database configuration as a dictionary.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary containing database configuration settings including db_path,
|
|
||||||
db_name, db_host, db_port, db_username, db_password, and db_provider.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"db_path": self.db_path,
|
|
||||||
"db_name": self.db_name,
|
|
||||||
"db_host": self.db_host,
|
|
||||||
"db_port": self.db_port,
|
|
||||||
"db_username": self.db_username,
|
|
||||||
"db_password": self.db_password,
|
|
||||||
"db_provider": self.db_provider,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_relational_config():
|
def get_relational_config():
|
||||||
|
|
@ -75,9 +55,6 @@ class MigrationConfig(BaseSettings):
|
||||||
"""
|
"""
|
||||||
Manage and configure migration settings for a database, inheriting from BaseSettings.
|
Manage and configure migration settings for a database, inheriting from BaseSettings.
|
||||||
|
|
||||||
Public methods:
|
|
||||||
- to_dict: Convert the migration configuration to a dictionary format.
|
|
||||||
|
|
||||||
Instance variables:
|
Instance variables:
|
||||||
- migration_db_path: Path to the migration database.
|
- migration_db_path: Path to the migration database.
|
||||||
- migration_db_name: Name of the migration database.
|
- migration_db_name: Name of the migration database.
|
||||||
|
|
@ -98,25 +75,6 @@ class MigrationConfig(BaseSettings):
|
||||||
|
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Convert migration configuration to dictionary format.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary containing the migration configuration details.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"migration_db_path": self.migration_db_path,
|
|
||||||
"migration_db_name": self.migration_db_name,
|
|
||||||
"migration_db_host": self.migration_db_host,
|
|
||||||
"migration_db_port": self.migration_db_port,
|
|
||||||
"migration_db_username": self.migration_db_username,
|
|
||||||
"migration_db_password": self.migration_db_password,
|
|
||||||
"migration_db_provider": self.migration_db_provider,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_migration_config():
|
def get_migration_config():
|
||||||
|
|
|
||||||
|
|
@ -18,4 +18,12 @@ def get_relational_engine():
|
||||||
"""
|
"""
|
||||||
relational_config = get_relational_config()
|
relational_config = get_relational_config()
|
||||||
|
|
||||||
return create_relational_engine(**relational_config.to_dict())
|
return create_relational_engine(
|
||||||
|
db_path=relational_config.db_path,
|
||||||
|
db_name=relational_config.db_name,
|
||||||
|
db_host=relational_config.db_host,
|
||||||
|
db_port=relational_config.db_port,
|
||||||
|
db_username=relational_config.db_username,
|
||||||
|
db_password=relational_config.db_password,
|
||||||
|
db_provider=relational_config.db_provider,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -295,10 +295,10 @@ class SQLAlchemyAdapter:
|
||||||
storage_config = get_storage_config()
|
storage_config = get_storage_config()
|
||||||
|
|
||||||
if (
|
if (
|
||||||
storage_config["data_root_directory"]
|
storage_config.data_root_directory
|
||||||
in raw_data_location_entities[0].raw_data_location
|
in raw_data_location_entities[0].raw_data_location
|
||||||
):
|
):
|
||||||
file_storage = get_file_storage(storage_config["data_root_directory"])
|
file_storage = get_file_storage(storage_config.data_root_directory)
|
||||||
|
|
||||||
file_path = os.path.basename(raw_data_location_entities[0].raw_data_location)
|
file_path = os.path.basename(raw_data_location_entities[0].raw_data_location)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
from typing import Optional
|
||||||
import pydantic
|
import pydantic
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
@ -10,10 +11,6 @@ class VectorConfig(BaseSettings):
|
||||||
"""
|
"""
|
||||||
Manage the configuration settings for the vector database.
|
Manage the configuration settings for the vector database.
|
||||||
|
|
||||||
Public methods:
|
|
||||||
|
|
||||||
- to_dict: Convert the configuration to a dictionary.
|
|
||||||
|
|
||||||
Instance variables:
|
Instance variables:
|
||||||
|
|
||||||
- vector_db_url: The URL of the vector database.
|
- vector_db_url: The URL of the vector database.
|
||||||
|
|
@ -39,22 +36,6 @@ class VectorConfig(BaseSettings):
|
||||||
|
|
||||||
return values
|
return values
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Convert the configuration settings to a dictionary.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary containing the vector database configuration settings.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"vector_db_url": self.vector_db_url,
|
|
||||||
"vector_db_port": self.vector_db_port,
|
|
||||||
"vector_db_key": self.vector_db_key,
|
|
||||||
"vector_db_provider": self.vector_db_provider,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_vectordb_config():
|
def get_vectordb_config():
|
||||||
|
|
@ -71,13 +52,16 @@ def get_vectordb_config():
|
||||||
- VectorConfig: An instance of `VectorConfig` containing the vector database
|
- VectorConfig: An instance of `VectorConfig` containing the vector database
|
||||||
configuration.
|
configuration.
|
||||||
"""
|
"""
|
||||||
|
context_config = get_vectordb_context_config()
|
||||||
|
|
||||||
|
if context_config:
|
||||||
|
return context_config
|
||||||
|
|
||||||
return VectorConfig()
|
return VectorConfig()
|
||||||
|
|
||||||
|
|
||||||
def get_vectordb_context_config():
|
def get_vectordb_context_config() -> Optional[VectorConfig]:
|
||||||
"""This function will get the appropriate vector db config based on async context."""
|
"""This function will get the appropriate vector db config based on async context."""
|
||||||
from cognee.context_global_variables import vector_db_config
|
from cognee.context_global_variables import vector_db_config
|
||||||
|
|
||||||
if vector_db_config.get():
|
return vector_db_config.get()
|
||||||
return vector_db_config.get()
|
|
||||||
return get_vectordb_config().to_dict()
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
from typing import Optional
|
||||||
from .supported_databases import supported_databases
|
from .supported_databases import supported_databases
|
||||||
from .embeddings import get_embedding_engine
|
from .embeddings import get_embedding_engine
|
||||||
|
|
||||||
|
|
@ -8,7 +9,7 @@ from functools import lru_cache
|
||||||
def create_vector_engine(
|
def create_vector_engine(
|
||||||
vector_db_provider: str,
|
vector_db_provider: str,
|
||||||
vector_db_url: str,
|
vector_db_url: str,
|
||||||
vector_db_port: str = "",
|
vector_db_port: Optional[int] = None,
|
||||||
vector_db_key: str = "",
|
vector_db_key: str = "",
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
|
|
@ -26,7 +27,7 @@ def create_vector_engine(
|
||||||
-----------
|
-----------
|
||||||
|
|
||||||
- vector_db_url (str): The URL for the vector database instance.
|
- vector_db_url (str): The URL for the vector database instance.
|
||||||
- vector_db_port (str): The port for the vector database instance. Required for some
|
- vector_db_port (int): The port for the vector database instance. Required for some
|
||||||
providers.
|
providers.
|
||||||
- vector_db_key (str): The API key or access token for the vector database instance.
|
- vector_db_key (str): The API key or access token for the vector database instance.
|
||||||
- vector_db_provider (str): The name of the vector database provider to use (e.g.,
|
- vector_db_provider (str): The name of the vector database provider to use (e.g.,
|
||||||
|
|
|
||||||
|
|
@ -7,9 +7,6 @@ class EmbeddingConfig(BaseSettings):
|
||||||
"""
|
"""
|
||||||
Manage configuration settings for embedding operations, including provider, model
|
Manage configuration settings for embedding operations, including provider, model
|
||||||
details, API configuration, and tokenizer settings.
|
details, API configuration, and tokenizer settings.
|
||||||
|
|
||||||
Public methods:
|
|
||||||
- to_dict: Serialize the configuration settings to a dictionary.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
embedding_provider: Optional[str] = "openai"
|
embedding_provider: Optional[str] = "openai"
|
||||||
|
|
@ -22,26 +19,6 @@ class EmbeddingConfig(BaseSettings):
|
||||||
huggingface_tokenizer: Optional[str] = None
|
huggingface_tokenizer: Optional[str] = None
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Serialize all embedding configuration settings to a dictionary.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary containing the embedding configuration settings.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"embedding_provider": self.embedding_provider,
|
|
||||||
"embedding_model": self.embedding_model,
|
|
||||||
"embedding_dimensions": self.embedding_dimensions,
|
|
||||||
"embedding_endpoint": self.embedding_endpoint,
|
|
||||||
"embedding_api_key": self.embedding_api_key,
|
|
||||||
"embedding_api_version": self.embedding_api_version,
|
|
||||||
"embedding_max_tokens": self.embedding_max_tokens,
|
|
||||||
"huggingface_tokenizer": self.huggingface_tokenizer,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_embedding_config():
|
def get_embedding_config():
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,14 @@
|
||||||
from .config import get_vectordb_context_config
|
from .config import get_vectordb_config
|
||||||
from .create_vector_engine import create_vector_engine
|
from .create_vector_engine import create_vector_engine
|
||||||
|
|
||||||
|
|
||||||
def get_vector_engine():
|
def get_vector_engine():
|
||||||
# Get appropriate vector db configuration based on current async context
|
# Get appropriate vector db configuration based on current async context
|
||||||
return create_vector_engine(**get_vectordb_context_config())
|
vector_config = get_vectordb_config()
|
||||||
|
|
||||||
|
return create_vector_engine(
|
||||||
|
vector_db_provider=vector_config.vector_db_provider,
|
||||||
|
vector_db_url=vector_config.vector_db_url,
|
||||||
|
vector_db_port=vector_config.vector_db_port,
|
||||||
|
vector_db_key=vector_config.vector_db_key,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,14 @@
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
from ..get_vector_engine import get_vector_engine, get_vectordb_context_config
|
|
||||||
|
from ..config import get_vectordb_config
|
||||||
|
from ..get_vector_engine import get_vector_engine
|
||||||
|
|
||||||
|
|
||||||
async def create_db_and_tables():
|
async def create_db_and_tables():
|
||||||
# Get appropriate vector db configuration based on current async context
|
# Get appropriate vector db configuration based on current async context
|
||||||
vector_config = get_vectordb_context_config()
|
vector_config = get_vectordb_config()
|
||||||
vector_engine = get_vector_engine()
|
vector_engine = get_vector_engine()
|
||||||
|
|
||||||
if vector_config["vector_db_provider"] == "pgvector":
|
if vector_config.vector_db_provider == "pgvector":
|
||||||
async with vector_engine.engine.begin() as connection:
|
async with vector_engine.engine.begin() as connection:
|
||||||
await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,10 @@
|
||||||
import pickle
|
|
||||||
from uuid import UUID, uuid4
|
from uuid import UUID, uuid4
|
||||||
from pydantic import BaseModel, Field, ConfigDict
|
from typing import Optional, List
|
||||||
from datetime import datetime, timezone
|
|
||||||
from typing_extensions import TypedDict
|
from typing_extensions import TypedDict
|
||||||
from typing import Optional, Any, Dict, List
|
from datetime import datetime, timezone
|
||||||
|
from pydantic import BaseModel, Field, ConfigDict
|
||||||
|
|
||||||
|
|
||||||
# Define metadata type
|
|
||||||
class MetaData(TypedDict):
|
class MetaData(TypedDict):
|
||||||
"""
|
"""
|
||||||
Represent a metadata structure with type and index fields.
|
Represent a metadata structure with type and index fields.
|
||||||
|
|
@ -16,7 +14,6 @@ class MetaData(TypedDict):
|
||||||
index_fields: list[str]
|
index_fields: list[str]
|
||||||
|
|
||||||
|
|
||||||
# Updated DataPoint model with versioning and new fields
|
|
||||||
class DataPoint(BaseModel):
|
class DataPoint(BaseModel):
|
||||||
"""
|
"""
|
||||||
Model representing a data point with versioning and metadata support.
|
Model representing a data point with versioning and metadata support.
|
||||||
|
|
@ -26,12 +23,6 @@ class DataPoint(BaseModel):
|
||||||
- get_embeddable_properties
|
- get_embeddable_properties
|
||||||
- get_embeddable_property_names
|
- get_embeddable_property_names
|
||||||
- update_version
|
- update_version
|
||||||
- to_json
|
|
||||||
- from_json
|
|
||||||
- to_pickle
|
|
||||||
- from_pickle
|
|
||||||
- to_dict
|
|
||||||
- from_dict
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||||
|
|
@ -85,52 +76,39 @@ class DataPoint(BaseModel):
|
||||||
return attribute.strip()
|
return attribute.strip()
|
||||||
return attribute
|
return attribute
|
||||||
|
|
||||||
@classmethod
|
def get_embeddable_properties(self):
|
||||||
def get_embeddable_properties(self, data_point: "DataPoint"):
|
|
||||||
"""
|
"""
|
||||||
Retrieve a list of embeddable properties from the data point.
|
Retrieve a list of embeddable properties from the data point.
|
||||||
|
|
||||||
This method returns a list of attribute values based on the index fields defined in the
|
This method returns a list of attribute values based on the index fields defined in the
|
||||||
data point's metadata. If there are no index fields, it returns an empty list.
|
data point's metadata. If there are no index fields, it returns an empty list.
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- data_point ('DataPoint'): The DataPoint instance from which to retrieve embeddable
|
|
||||||
properties.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
|
|
||||||
A list of embeddable property values, or an empty list if none exist.
|
A list of embeddable property values, or an empty list if none exist.
|
||||||
"""
|
"""
|
||||||
if data_point.metadata and len(data_point.metadata["index_fields"]) > 0:
|
if self.metadata and len(self.metadata["index_fields"]) > 0:
|
||||||
return [
|
return [getattr(self, field, None) for field in self.metadata["index_fields"]]
|
||||||
getattr(data_point, field, None) for field in data_point.metadata["index_fields"]
|
|
||||||
]
|
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@classmethod
|
def get_embeddable_property_names(self):
|
||||||
def get_embeddable_property_names(self, data_point: "DataPoint"):
|
|
||||||
"""
|
"""
|
||||||
Retrieve the names of embeddable properties defined in the metadata.
|
Retrieve the names of embeddable properties defined in the metadata.
|
||||||
|
|
||||||
If no index fields are defined in the metadata, this method will return an empty list.
|
If no index fields are defined in the metadata, this method will return an empty list.
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- data_point ('DataPoint'): The DataPoint instance from which to retrieve embeddable
|
|
||||||
property names.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
|
|
||||||
A list of property names corresponding to the index fields, or an empty list if none
|
A list of property names corresponding to the index fields, or an empty list if none
|
||||||
exist.
|
exist.
|
||||||
"""
|
"""
|
||||||
return data_point.metadata["index_fields"] or []
|
if self.metadata:
|
||||||
|
return self.metadata["index_fields"] or []
|
||||||
|
|
||||||
|
return []
|
||||||
|
|
||||||
def update_version(self):
|
def update_version(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -141,80 +119,3 @@ class DataPoint(BaseModel):
|
||||||
"""
|
"""
|
||||||
self.version += 1
|
self.version += 1
|
||||||
self.updated_at = int(datetime.now(timezone.utc).timestamp() * 1000)
|
self.updated_at = int(datetime.now(timezone.utc).timestamp() * 1000)
|
||||||
|
|
||||||
# JSON Serialization
|
|
||||||
def to_json(self) -> str:
|
|
||||||
"""
|
|
||||||
Serialize the DataPoint instance to a JSON string format.
|
|
||||||
|
|
||||||
This method uses the model's built-in serialization functionality to convert the
|
|
||||||
instance into a JSON-compatible string.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- str: The JSON string representation of the DataPoint instance.
|
|
||||||
"""
|
|
||||||
return self.json()
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_json(self, json_str: str):
|
|
||||||
"""
|
|
||||||
Deserialize a DataPoint instance from a JSON string.
|
|
||||||
|
|
||||||
The method transforms the input JSON string back into a DataPoint instance using model
|
|
||||||
validation.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- json_str (str): The JSON string representation of a DataPoint instance to be
|
|
||||||
deserialized.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
A new DataPoint instance created from the JSON data.
|
|
||||||
"""
|
|
||||||
return self.model_validate_json(json_str)
|
|
||||||
|
|
||||||
def to_dict(self, **kwargs) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
Convert the DataPoint instance to a dictionary representation.
|
|
||||||
|
|
||||||
This method uses the model's built-in functionality to serialize the instance attributes
|
|
||||||
to a dictionary, which can optionally include additional arguments.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- **kwargs: Additional keyword arguments for serialization options.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- Dict[str, Any]: A dictionary representation of the DataPoint instance.
|
|
||||||
"""
|
|
||||||
return self.model_dump(**kwargs)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, data: Dict[str, Any]) -> "DataPoint":
|
|
||||||
"""
|
|
||||||
Instantiate a DataPoint from a dictionary of attribute values.
|
|
||||||
|
|
||||||
The method validates the incoming dictionary data against the model's schema and
|
|
||||||
constructs a new DataPoint instance accordingly.
|
|
||||||
|
|
||||||
Parameters:
|
|
||||||
-----------
|
|
||||||
|
|
||||||
- data (Dict[str, Any]): A dictionary containing the attributes of a DataPoint
|
|
||||||
instance.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- 'DataPoint': A new DataPoint instance constructed from the provided dictionary
|
|
||||||
data.
|
|
||||||
"""
|
|
||||||
return cls.model_validate(data)
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,16 @@
|
||||||
|
from typing import Optional
|
||||||
from contextvars import ContextVar
|
from contextvars import ContextVar
|
||||||
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
|
|
||||||
|
|
||||||
file_storage_config = ContextVar("file_storage_config", default=None)
|
class StorageConfig(BaseSettings):
|
||||||
|
"""
|
||||||
|
Manage configuration settings for file storage.
|
||||||
|
"""
|
||||||
|
|
||||||
|
data_root_directory: str = ""
|
||||||
|
|
||||||
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
|
|
||||||
|
file_storage_config = ContextVar[Optional[StorageConfig]]("file_storage_config", default=None)
|
||||||
|
|
|
||||||
|
|
@ -1,17 +1,18 @@
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from .config import file_storage_config
|
from .config import file_storage_config, StorageConfig
|
||||||
|
|
||||||
|
|
||||||
def get_global_storage_config():
|
def get_global_storage_config():
|
||||||
base_config = get_base_config()
|
base_config = get_base_config()
|
||||||
|
|
||||||
return {
|
return StorageConfig(
|
||||||
"data_root_directory": base_config.data_root_directory,
|
data_root_directory=base_config.data_root_directory,
|
||||||
}
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_storage_config():
|
def get_storage_config():
|
||||||
context_config = file_storage_config.get()
|
context_config = file_storage_config.get()
|
||||||
|
|
||||||
if context_config:
|
if context_config:
|
||||||
return context_config
|
return context_config
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,6 @@ class LLMConfig(BaseSettings):
|
||||||
|
|
||||||
Public methods include:
|
Public methods include:
|
||||||
- ensure_env_vars_for_ollama
|
- ensure_env_vars_for_ollama
|
||||||
- to_dict
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
structured_output_framework: str = "instructor"
|
structured_output_framework: str = "instructor"
|
||||||
|
|
@ -153,38 +152,6 @@ class LLMConfig(BaseSettings):
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
"""
|
|
||||||
Convert the LLMConfig instance into a dictionary representation.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- dict: A dictionary containing the configuration settings of the LLMConfig
|
|
||||||
instance.
|
|
||||||
"""
|
|
||||||
return {
|
|
||||||
"provider": self.llm_provider,
|
|
||||||
"model": self.llm_model,
|
|
||||||
"endpoint": self.llm_endpoint,
|
|
||||||
"api_key": self.llm_api_key,
|
|
||||||
"api_version": self.llm_api_version,
|
|
||||||
"temperature": self.llm_temperature,
|
|
||||||
"streaming": self.llm_streaming,
|
|
||||||
"max_tokens": self.llm_max_tokens,
|
|
||||||
"transcription_model": self.transcription_model,
|
|
||||||
"graph_prompt_path": self.graph_prompt_path,
|
|
||||||
"rate_limit_enabled": self.llm_rate_limit_enabled,
|
|
||||||
"rate_limit_requests": self.llm_rate_limit_requests,
|
|
||||||
"rate_limit_interval": self.llm_rate_limit_interval,
|
|
||||||
"embedding_rate_limit_enabled": self.embedding_rate_limit_enabled,
|
|
||||||
"embedding_rate_limit_requests": self.embedding_rate_limit_requests,
|
|
||||||
"embedding_rate_limit_interval": self.embedding_rate_limit_interval,
|
|
||||||
"fallback_api_key": self.fallback_api_key,
|
|
||||||
"fallback_endpoint": self.fallback_endpoint,
|
|
||||||
"fallback_model": self.fallback_model,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_llm_config():
|
def get_llm_config():
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,6 @@
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
from cognee.shared.data_models import DefaultContentPrediction, SummarizedContent
|
from cognee.shared.data_models import DefaultContentPrediction, SummarizedContent
|
||||||
from typing import Optional
|
|
||||||
import os
|
|
||||||
|
|
||||||
|
|
||||||
class CognifyConfig(BaseSettings):
|
class CognifyConfig(BaseSettings):
|
||||||
|
|
@ -10,12 +8,6 @@ class CognifyConfig(BaseSettings):
|
||||||
summarization_model: object = SummarizedContent
|
summarization_model: object = SummarizedContent
|
||||||
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
model_config = SettingsConfigDict(env_file=".env", extra="allow")
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
|
||||||
return {
|
|
||||||
"classification_model": self.classification_model,
|
|
||||||
"summarization_model": self.summarization_model,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_cognify_config():
|
def get_cognify_config():
|
||||||
|
|
|
||||||
|
|
@ -3,5 +3,5 @@ from cognee.infrastructure.files.storage import get_file_storage, get_storage_co
|
||||||
|
|
||||||
async def prune_data():
|
async def prune_data():
|
||||||
storage_config = get_storage_config()
|
storage_config = get_storage_config()
|
||||||
data_root_directory = storage_config["data_root_directory"]
|
data_root_directory = storage_config.data_root_directory
|
||||||
await get_file_storage(data_root_directory).remove_all()
|
await get_file_storage(data_root_directory).remove_all()
|
||||||
|
|
|
||||||
|
|
@ -39,16 +39,3 @@ class Data(Base):
|
||||||
lazy="noload",
|
lazy="noload",
|
||||||
cascade="all, delete",
|
cascade="all, delete",
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_json(self) -> dict:
|
|
||||||
return {
|
|
||||||
"id": str(self.id),
|
|
||||||
"name": self.name,
|
|
||||||
"extension": self.extension,
|
|
||||||
"mimeType": self.mime_type,
|
|
||||||
"rawDataLocation": self.raw_data_location,
|
|
||||||
"createdAt": self.created_at.isoformat(),
|
|
||||||
"updatedAt": self.updated_at.isoformat() if self.updated_at else None,
|
|
||||||
"nodeSet": self.node_set,
|
|
||||||
# "datasets": [dataset.to_json() for dataset in self.datasets]
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -28,13 +28,3 @@ class Dataset(Base):
|
||||||
lazy="noload",
|
lazy="noload",
|
||||||
cascade="all, delete",
|
cascade="all, delete",
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_json(self) -> dict:
|
|
||||||
return {
|
|
||||||
"id": str(self.id),
|
|
||||||
"name": self.name,
|
|
||||||
"createdAt": self.created_at.isoformat(),
|
|
||||||
"updatedAt": self.updated_at.isoformat() if self.updated_at else None,
|
|
||||||
"ownerId": str(self.owner_id),
|
|
||||||
"data": [data.to_json() for data in self.data],
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -27,14 +27,3 @@ class GraphRelationshipLedger(Base):
|
||||||
Index("idx_graph_relationship_ledger_source_node_id", "source_node_id"),
|
Index("idx_graph_relationship_ledger_source_node_id", "source_node_id"),
|
||||||
Index("idx_graph_relationship_ledger_destination_node_id", "destination_node_id"),
|
Index("idx_graph_relationship_ledger_destination_node_id", "destination_node_id"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_json(self) -> dict:
|
|
||||||
return {
|
|
||||||
"id": str(self.id),
|
|
||||||
"source_node_id": str(self.parent_id),
|
|
||||||
"destination_node_id": str(self.child_id),
|
|
||||||
"creator_function": self.creator_function,
|
|
||||||
"created_at": self.created_at.isoformat(),
|
|
||||||
"deleted_at": self.deleted_at.isoformat() if self.deleted_at else None,
|
|
||||||
"user_id": str(self.user_id),
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ from .classify import classify
|
||||||
async def save_data_to_file(data: Union[str, BinaryIO], filename: str = None):
|
async def save_data_to_file(data: Union[str, BinaryIO], filename: str = None):
|
||||||
storage_config = get_storage_config()
|
storage_config = get_storage_config()
|
||||||
|
|
||||||
data_root_directory = storage_config["data_root_directory"]
|
data_root_directory = storage_config.data_root_directory
|
||||||
|
|
||||||
classified_data = classify(data, filename)
|
classified_data = classify(data, filename)
|
||||||
|
|
||||||
|
|
|
||||||
36
cognee/modules/logging/log_cognee_configuration.py
Normal file
36
cognee/modules/logging/log_cognee_configuration.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
def log_cognee_configuration(logger):
|
||||||
|
"""Log the current database configuration for all database types"""
|
||||||
|
# NOTE: Has to be imporated at runtime to avoid circular import
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relational_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Log relational database configuration
|
||||||
|
relational_config = get_relational_config()
|
||||||
|
logger.info(f"Relational database: {relational_config.db_provider}")
|
||||||
|
if relational_config.db_provider == "postgres":
|
||||||
|
logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
|
||||||
|
logger.info(f"Postgres database: {relational_config.db_name}")
|
||||||
|
elif relational_config.db_provider == "sqlite":
|
||||||
|
logger.info(f"SQLite path: {relational_config.db_path}/{relational_config.db_name}")
|
||||||
|
logger.info(f"SQLite database: {relational_config.db_name}")
|
||||||
|
|
||||||
|
# Log vector database configuration
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
logger.info(f"Vector database: {vector_config.vector_db_provider}")
|
||||||
|
if vector_config.vector_db_provider == "lancedb":
|
||||||
|
logger.info(f"Vector database path: {vector_config.vector_db_url}")
|
||||||
|
else:
|
||||||
|
logger.info(f"Vector database URL: {vector_config.vector_db_url}")
|
||||||
|
|
||||||
|
# Log graph database configuration
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
logger.info(f"Graph database: {graph_config.graph_database_provider}")
|
||||||
|
if graph_config.graph_database_provider == "kuzu":
|
||||||
|
logger.info(f"Graph database path: {graph_config.graph_file_path}")
|
||||||
|
else:
|
||||||
|
logger.info(f"Graph database URL: {graph_config.graph_database_url}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Could not retrieve database configuration: {str(e)}")
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from typing import Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.graph.config import GraphConfig
|
||||||
|
from cognee.infrastructure.databases.vector.config import VectorConfig
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
||||||
from cognee.modules.data.models import Data, Dataset
|
from cognee.modules.data.models import Data, Dataset
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@ def get_current_settings() -> SettingsDict:
|
||||||
vector_config = get_vectordb_config()
|
vector_config = get_vectordb_config()
|
||||||
relational_config = get_relational_config()
|
relational_config = get_relational_config()
|
||||||
|
|
||||||
return dict(
|
return SettingsDict(
|
||||||
llm={
|
llm={
|
||||||
"provider": llm_config.llm_provider,
|
"provider": llm_config.llm_provider,
|
||||||
"model": llm_config.llm_model,
|
"model": llm_config.llm_model,
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,12 @@ import logging
|
||||||
import structlog
|
import structlog
|
||||||
import traceback
|
import traceback
|
||||||
import platform
|
import platform
|
||||||
from datetime import datetime
|
from typing import Protocol
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import importlib.metadata
|
from datetime import datetime
|
||||||
|
|
||||||
from cognee import __version__ as cognee_version
|
from cognee import __version__ as cognee_version
|
||||||
from typing import Protocol
|
from cognee.modules.logging.log_cognee_configuration import log_cognee_configuration
|
||||||
|
|
||||||
|
|
||||||
# Configure external library logging
|
# Configure external library logging
|
||||||
|
|
@ -165,44 +165,6 @@ def get_logger(name=None, level=None) -> LoggerInterface:
|
||||||
return logger
|
return logger
|
||||||
|
|
||||||
|
|
||||||
def log_database_configuration(logger):
|
|
||||||
"""Log the current database configuration for all database types"""
|
|
||||||
# NOTE: Has to be imporated at runtime to avoid circular import
|
|
||||||
from cognee.infrastructure.databases.relational.config import get_relational_config
|
|
||||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Log relational database configuration
|
|
||||||
relational_config = get_relational_config()
|
|
||||||
logger.info(f"Relational database: {relational_config.db_provider}")
|
|
||||||
if relational_config.db_provider == "postgres":
|
|
||||||
logger.info(f"Postgres host: {relational_config.db_host}:{relational_config.db_port}")
|
|
||||||
logger.info(f"Postgres database: {relational_config.db_name}")
|
|
||||||
elif relational_config.db_provider == "sqlite":
|
|
||||||
logger.info(f"SQLite path: {relational_config.db_path}")
|
|
||||||
logger.info(f"SQLite database: {relational_config.db_name}")
|
|
||||||
|
|
||||||
# Log vector database configuration
|
|
||||||
vector_config = get_vectordb_config()
|
|
||||||
logger.info(f"Vector database: {vector_config.vector_db_provider}")
|
|
||||||
if vector_config.vector_db_provider == "lancedb":
|
|
||||||
logger.info(f"Vector database path: {vector_config.vector_db_url}")
|
|
||||||
else:
|
|
||||||
logger.info(f"Vector database URL: {vector_config.vector_db_url}")
|
|
||||||
|
|
||||||
# Log graph database configuration
|
|
||||||
graph_config = get_graph_config()
|
|
||||||
logger.info(f"Graph database: {graph_config.graph_database_provider}")
|
|
||||||
if graph_config.graph_database_provider == "kuzu":
|
|
||||||
logger.info(f"Graph database path: {graph_config.graph_file_path}")
|
|
||||||
else:
|
|
||||||
logger.info(f"Graph database URL: {graph_config.graph_database_url}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Could not retrieve database configuration: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_old_logs(logs_dir, max_files):
|
def cleanup_old_logs(logs_dir, max_files):
|
||||||
"""
|
"""
|
||||||
Removes old log files, keeping only the most recent ones.
|
Removes old log files, keeping only the most recent ones.
|
||||||
|
|
@ -396,9 +358,8 @@ def setup_logging(log_level=None, name=None):
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
logger.info("Want to learn more? Visit the Cognee documentation: https://docs.cognee.ai")
|
||||||
|
|
||||||
# Log database configuration
|
# Log database configuration
|
||||||
log_database_configuration(logger)
|
log_cognee_configuration(logger)
|
||||||
|
|
||||||
# Return the configured logger
|
# Return the configured logger
|
||||||
return logger
|
return logger
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,13 @@
|
||||||
"""This module contains utility functions for the cognee."""
|
"""This module contains utility functions for the cognee."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
import requests
|
import requests
|
||||||
from datetime import datetime, timezone
|
|
||||||
import networkx as nx
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import http.server
|
import http.server
|
||||||
import socketserver
|
import socketserver
|
||||||
from threading import Thread
|
|
||||||
import pathlib
|
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
from threading import Thread
|
||||||
from cognee.base_config import get_base_config
|
from datetime import datetime, timezone
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
||||||
|
|
||||||
|
|
||||||
# Analytics Proxy Url, currently hosted by Vercel
|
# Analytics Proxy Url, currently hosted by Vercel
|
||||||
|
|
|
||||||
|
|
@ -158,7 +158,7 @@ async def main():
|
||||||
assert len(history) == 8, "Search history is not correct."
|
assert len(history) == 8, "Search history is not correct."
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@ async def main():
|
||||||
|
|
||||||
# Assert local data files are cleaned properly
|
# Assert local data files are cleaned properly
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
# Assert relational, vector and graph databases have been cleaned properly
|
# Assert relational, vector and graph databases have been cleaned properly
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,7 @@ async def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
|
||||||
|
|
@ -79,7 +79,7 @@ async def main():
|
||||||
|
|
||||||
# Assert local data files are cleaned properly
|
# Assert local data files are cleaned properly
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
# Assert relational, vector and graph databases have been cleaned properly
|
# Assert relational, vector and graph databases have been cleaned properly
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,7 @@ async def main():
|
||||||
assert len(history) == 8, "Search history is not correct."
|
assert len(history) == 8, "Search history is not correct."
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@ async def main():
|
||||||
)
|
)
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
|
||||||
|
|
@ -167,7 +167,7 @@ async def main():
|
||||||
await test_local_file_deletion(text, explanation_file_path)
|
await test_local_file_deletion(text, explanation_file_path)
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ async def main():
|
||||||
assert len(history) == 6, "Search history is not correct."
|
assert len(history) == 6, "Search history is not correct."
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
|
||||||
|
|
@ -93,7 +93,7 @@ async def main():
|
||||||
assert len(history) == 6, "Search history is not correct."
|
assert len(history) == 6, "Search history is not correct."
|
||||||
|
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
await cognee.prune.prune_system(metadata=True)
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ async def main():
|
||||||
|
|
||||||
# Assert local data files are cleaned properly
|
# Assert local data files are cleaned properly
|
||||||
await cognee.prune.prune_data()
|
await cognee.prune.prune_data()
|
||||||
data_root_directory = get_storage_config()["data_root_directory"]
|
data_root_directory = get_storage_config().data_root_directory
|
||||||
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
assert not os.path.isdir(data_root_directory), "Local data files are not deleted"
|
||||||
|
|
||||||
# Assert relational, vector and graph databases have been cleaned properly
|
# Assert relational, vector and graph databases have been cleaned properly
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue