From 1a135bfdf8ad1a026ad028053846a4aaa64e7460 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sun, 26 May 2024 09:36:40 +0200 Subject: [PATCH] Updates to the configs --- cognee/api/v1/config/config.py | 68 ++++++++++--------- .../infrastructure/databases/graph/config.py | 7 +- .../infrastructure/databases/vector/config.py | 13 +++- .../databases/vector/embeddings/config.py | 4 ++ cognee/modules/cognify/config.py | 38 +++++++++++ .../modules/cognify/graph/add_data_chunks.py | 8 ++- 6 files changed, 99 insertions(+), 39 deletions(-) create mode 100644 cognee/modules/cognify/config.py diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index 832d75705..bf8adff35 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -1,54 +1,59 @@ """ This module is used to set the configuration of the system.""" from cognee.infrastructure import infrastructure_config +from cognee.base_config import get_base_config +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.data.chunking.config import get_chunk_config +from cognee.modules.cognify.config import get_cognify_config + +cognify_config = get_cognify_config() +chunk_config = get_chunk_config() +graph_config = get_graph_config() +base_config = get_base_config() + class config(): @staticmethod def system_root_directory(system_root_directory: str): - infrastructure_config.set_config({ - "system_root_directory": system_root_directory - }) + base_config.base_config = system_root_directory @staticmethod def data_root_directory(data_root_directory: str): - infrastructure_config.set_config({ - "data_root_directory": data_root_directory - }) + base_config.data_root_directory = data_root_directory + + @staticmethod + def monitoring_tool(monitoring_tool: object): + base_config.monitoring_tool = monitoring_tool + + + @staticmethod def set_classification_model(classification_model: object): - infrastructure_config.set_config({ - "classification_model": classification_model - }) + cognify_config.classification_model = classification_model @staticmethod def set_summarization_model(summarization_model: object): - infrastructure_config.set_config({ - "summarization_model": summarization_model - }) + cognify_config.summarization_model=summarization_model + @staticmethod def set_labeling_model(labeling_model: object): - infrastructure_config.set_config({ - "labeling_model": labeling_model - }) + cognify_config.labeling_model =labeling_model + @staticmethod def set_graph_model(graph_model: object): - infrastructure_config.set_config({ - "graph_model": graph_model - }) + graph_config.graph_model =graph_model + @staticmethod def set_cognitive_layer_model(cognitive_layer_model: object): - infrastructure_config.set_config({ - "cognitive_layer_model": cognitive_layer_model - }) + cognify_config.cognitive_layer_model =cognitive_layer_model + @staticmethod def set_graph_engine(graph_engine: object): - infrastructure_config.set_config({ - "graph_engine": graph_engine - }) + graph_config.graph_engine =graph_engine @staticmethod def llm_provider(llm_provider: str): @@ -58,20 +63,17 @@ class config(): @staticmethod def intra_layer_score_treshold(intra_layer_score_treshold: str): - infrastructure_config.set_config({ - "intra_layer_score_treshold": intra_layer_score_treshold - }) + cognify_config.intra_layer_score_treshold =intra_layer_score_treshold + @staticmethod def connect_documents(connect_documents: bool): - infrastructure_config.set_config({ - "connect_documents": connect_documents - }) + cognify_config.connect_documents = connect_documents + @staticmethod def set_chunk_strategy(chunk_strategy: object): - infrastructure_config.set_config({ - "chunk_strategy": chunk_strategy - }) + chunk_config.chunk_strategy = chunk_strategy + @staticmethod def set_graph_topology(graph_topology: object): diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index 7b48e65c4..839bf4759 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -3,7 +3,7 @@ import os from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.base_config import get_base_config -from cognee.shared.data_models import DefaultGraphModel +from cognee.shared.data_models import DefaultGraphModel, GraphDBType base_config = get_base_config() @@ -15,7 +15,9 @@ class GraphConfig(BaseSettings): graph_database_username: str = "" graph_database_password: str = "" graph_database_port: int = "" - graph_file_path = os.path.join(base_config.database_directory_path,graph_filename) + graph_file_path: str = os.path.join(base_config.database_directory_path,graph_filename) + graph_engine: object = GraphDBType.NETWORKX + graph_model: object = DefaultGraphModel model_config = SettingsConfigDict(env_file = ".env", extra = "allow") @@ -29,6 +31,7 @@ class GraphConfig(BaseSettings): "graph_database_username": self.graph_database_username, "graph_database_password": self.graph_database_password, "graph_database_port": self.graph_database_port, + "graph_engine": self.graph_engine } diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index 29b017254..a304b5bc5 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -2,13 +2,21 @@ import os from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.base_config import get_base_config +from cognee.infrastructure.databases.vector.lancedb.LanceDBAdapter import LanceDBAdapter +from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config +embeddings_config = get_embedding_config() base_config = get_base_config() class VectorConfig(BaseSettings): vector_db_url: str = "" vector_db_key: str = "" vector_db_path: str = os.path.join(base_config.database_directory_path + "cognee.lancedb") - vector_db_engine: object = "" + vector_engine: object = LanceDBAdapter( + url = vector_db_path, + api_key = None, + embedding_engine = embeddings_config.embedding_engine, + ) + vector_engine_choice:str = "lancedb" model_config = SettingsConfigDict(env_file = ".env", extra = "allow") @@ -17,7 +25,8 @@ class VectorConfig(BaseSettings): "vector_db_url": self.vector_db_url, "vector_db_key": self.vector_db_key, "vector_db_path": self.vector_db_path, - "vector_db_engine": self.vector_db_engine, + "vector_engine": self.vector_engine, + "vector_engine_choice": self.vector_engine_choice, } @lru_cache diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py index 8bdadd587..7a953847b 100644 --- a/cognee/infrastructure/databases/vector/embeddings/config.py +++ b/cognee/infrastructure/databases/vector/embeddings/config.py @@ -1,11 +1,15 @@ from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict +from cognee.infrastructure.databases.vector.embeddings.DefaultEmbeddingEngine import DefaultEmbeddingEngine + + class EmbeddingConfig(BaseSettings): openai_embedding_model: str = "text-embedding-3-large" openai_embedding_dimensions: int = 3072 litellm_embedding_model: str = "text-embedding-3-large" litellm_embedding_dimensions: int = 3072 + embedding_engine:object = DefaultEmbeddingEngine() model_config = SettingsConfigDict(env_file = ".env", extra = "allow") diff --git a/cognee/modules/cognify/config.py b/cognee/modules/cognify/config.py new file mode 100644 index 000000000..852703046 --- /dev/null +++ b/cognee/modules/cognify/config.py @@ -0,0 +1,38 @@ +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict + +from cognee.root_dir import get_absolute_path + +from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \ + DefaultCognitiveLayer + + +# Monitoring tool + + + +class CognifyConfig(BaseSettings): + classification_model: object = DefaultContentPrediction + summarization_model: object = SummarizedContent + labeling_model: object = LabeledContent + cognitive_layer_model: object = DefaultCognitiveLayer + intra_layer_score_treshold: int = 0.98 + connect_documents: bool = False + + + + model_config = SettingsConfigDict(env_file = ".env", extra = "allow") + + def to_dict(self) -> dict: + return { + "classification_model": self.classification_model, + "summarization_model": self.summarization_model, + "labeling_model": self.labeling_model, + "cognitive_layer_model": self.cognitive_layer_model, + "intra_layer_score_treshold": self.intra_layer_score_treshold, + "connect_documents": self.connect_documents, + } + +@lru_cache +def get_cognify_config(): + return CognifyConfig() \ No newline at end of file diff --git a/cognee/modules/cognify/graph/add_data_chunks.py b/cognee/modules/cognify/graph/add_data_chunks.py index fee585ec5..2c01db6b5 100644 --- a/cognee/modules/cognify/graph/add_data_chunks.py +++ b/cognee/modules/cognify/graph/add_data_chunks.py @@ -2,15 +2,19 @@ from typing import TypedDict from pydantic import BaseModel, Field from cognee.infrastructure import infrastructure_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config + from cognee.infrastructure.databases.vector import DataPoint +config = get_vectordb_config() + class TextChunk(TypedDict): text: str chunk_id: str file_metadata: dict async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]): - vector_client = infrastructure_config.get_config("vector_engine") + vector_client = config.vector_engine identified_chunks = [] @@ -52,7 +56,7 @@ async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]): async def add_data_chunks_basic_rag(dataset_data_chunks: dict[str, list[TextChunk]]): - vector_client = infrastructure_config.get_config("vector_engine") + vector_client = config.vector_engine identified_chunks = []