Updates to the configs

This commit is contained in:
Vasilije 2024-05-26 09:36:40 +02:00
parent 59feaa3e4e
commit 1a135bfdf8
6 changed files with 99 additions and 39 deletions

View file

@ -1,54 +1,59 @@
""" This module is used to set the configuration of the system.""" """ This module is used to set the configuration of the system."""
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.modules.cognify.config import get_cognify_config
cognify_config = get_cognify_config()
chunk_config = get_chunk_config()
graph_config = get_graph_config()
base_config = get_base_config()
class config(): class config():
@staticmethod @staticmethod
def system_root_directory(system_root_directory: str): def system_root_directory(system_root_directory: str):
infrastructure_config.set_config({ base_config.base_config = system_root_directory
"system_root_directory": system_root_directory
})
@staticmethod @staticmethod
def data_root_directory(data_root_directory: str): def data_root_directory(data_root_directory: str):
infrastructure_config.set_config({ base_config.data_root_directory = data_root_directory
"data_root_directory": data_root_directory
}) @staticmethod
def monitoring_tool(monitoring_tool: object):
base_config.monitoring_tool = monitoring_tool
@staticmethod @staticmethod
def set_classification_model(classification_model: object): def set_classification_model(classification_model: object):
infrastructure_config.set_config({ cognify_config.classification_model = classification_model
"classification_model": classification_model
})
@staticmethod @staticmethod
def set_summarization_model(summarization_model: object): def set_summarization_model(summarization_model: object):
infrastructure_config.set_config({ cognify_config.summarization_model=summarization_model
"summarization_model": summarization_model
})
@staticmethod @staticmethod
def set_labeling_model(labeling_model: object): def set_labeling_model(labeling_model: object):
infrastructure_config.set_config({ cognify_config.labeling_model =labeling_model
"labeling_model": labeling_model
})
@staticmethod @staticmethod
def set_graph_model(graph_model: object): def set_graph_model(graph_model: object):
infrastructure_config.set_config({ graph_config.graph_model =graph_model
"graph_model": graph_model
})
@staticmethod @staticmethod
def set_cognitive_layer_model(cognitive_layer_model: object): def set_cognitive_layer_model(cognitive_layer_model: object):
infrastructure_config.set_config({ cognify_config.cognitive_layer_model =cognitive_layer_model
"cognitive_layer_model": cognitive_layer_model
})
@staticmethod @staticmethod
def set_graph_engine(graph_engine: object): def set_graph_engine(graph_engine: object):
infrastructure_config.set_config({ graph_config.graph_engine =graph_engine
"graph_engine": graph_engine
})
@staticmethod @staticmethod
def llm_provider(llm_provider: str): def llm_provider(llm_provider: str):
@ -58,20 +63,17 @@ class config():
@staticmethod @staticmethod
def intra_layer_score_treshold(intra_layer_score_treshold: str): def intra_layer_score_treshold(intra_layer_score_treshold: str):
infrastructure_config.set_config({ cognify_config.intra_layer_score_treshold =intra_layer_score_treshold
"intra_layer_score_treshold": intra_layer_score_treshold
})
@staticmethod @staticmethod
def connect_documents(connect_documents: bool): def connect_documents(connect_documents: bool):
infrastructure_config.set_config({ cognify_config.connect_documents = connect_documents
"connect_documents": connect_documents
})
@staticmethod @staticmethod
def set_chunk_strategy(chunk_strategy: object): def set_chunk_strategy(chunk_strategy: object):
infrastructure_config.set_config({ chunk_config.chunk_strategy = chunk_strategy
"chunk_strategy": chunk_strategy
})
@staticmethod @staticmethod
def set_graph_topology(graph_topology: object): def set_graph_topology(graph_topology: object):

View file

@ -3,7 +3,7 @@ import os
from functools import lru_cache from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
from cognee.shared.data_models import DefaultGraphModel from cognee.shared.data_models import DefaultGraphModel, GraphDBType
base_config = get_base_config() base_config = get_base_config()
@ -15,7 +15,9 @@ class GraphConfig(BaseSettings):
graph_database_username: str = "" graph_database_username: str = ""
graph_database_password: str = "" graph_database_password: str = ""
graph_database_port: int = "" graph_database_port: int = ""
graph_file_path = os.path.join(base_config.database_directory_path,graph_filename) graph_file_path: str = os.path.join(base_config.database_directory_path,graph_filename)
graph_engine: object = GraphDBType.NETWORKX
graph_model: object = DefaultGraphModel
model_config = SettingsConfigDict(env_file = ".env", extra = "allow") model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
@ -29,6 +31,7 @@ class GraphConfig(BaseSettings):
"graph_database_username": self.graph_database_username, "graph_database_username": self.graph_database_username,
"graph_database_password": self.graph_database_password, "graph_database_password": self.graph_database_password,
"graph_database_port": self.graph_database_port, "graph_database_port": self.graph_database_port,
"graph_engine": self.graph_engine
} }

View file

@ -2,13 +2,21 @@ import os
from functools import lru_cache from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
from cognee.infrastructure.databases.vector.lancedb.LanceDBAdapter import LanceDBAdapter
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
embeddings_config = get_embedding_config()
base_config = get_base_config() base_config = get_base_config()
class VectorConfig(BaseSettings): class VectorConfig(BaseSettings):
vector_db_url: str = "" vector_db_url: str = ""
vector_db_key: str = "" vector_db_key: str = ""
vector_db_path: str = os.path.join(base_config.database_directory_path + "cognee.lancedb") vector_db_path: str = os.path.join(base_config.database_directory_path + "cognee.lancedb")
vector_db_engine: object = "" vector_engine: object = LanceDBAdapter(
url = vector_db_path,
api_key = None,
embedding_engine = embeddings_config.embedding_engine,
)
vector_engine_choice:str = "lancedb"
model_config = SettingsConfigDict(env_file = ".env", extra = "allow") model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
@ -17,7 +25,8 @@ class VectorConfig(BaseSettings):
"vector_db_url": self.vector_db_url, "vector_db_url": self.vector_db_url,
"vector_db_key": self.vector_db_key, "vector_db_key": self.vector_db_key,
"vector_db_path": self.vector_db_path, "vector_db_path": self.vector_db_path,
"vector_db_engine": self.vector_db_engine, "vector_engine": self.vector_engine,
"vector_engine_choice": self.vector_engine_choice,
} }
@lru_cache @lru_cache

View file

@ -1,11 +1,15 @@
from functools import lru_cache from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.infrastructure.databases.vector.embeddings.DefaultEmbeddingEngine import DefaultEmbeddingEngine
class EmbeddingConfig(BaseSettings): class EmbeddingConfig(BaseSettings):
openai_embedding_model: str = "text-embedding-3-large" openai_embedding_model: str = "text-embedding-3-large"
openai_embedding_dimensions: int = 3072 openai_embedding_dimensions: int = 3072
litellm_embedding_model: str = "text-embedding-3-large" litellm_embedding_model: str = "text-embedding-3-large"
litellm_embedding_dimensions: int = 3072 litellm_embedding_dimensions: int = 3072
embedding_engine:object = DefaultEmbeddingEngine()
model_config = SettingsConfigDict(env_file = ".env", extra = "allow") model_config = SettingsConfigDict(env_file = ".env", extra = "allow")

View file

@ -0,0 +1,38 @@
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.root_dir import get_absolute_path
from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \
DefaultCognitiveLayer
# Monitoring tool
class CognifyConfig(BaseSettings):
classification_model: object = DefaultContentPrediction
summarization_model: object = SummarizedContent
labeling_model: object = LabeledContent
cognitive_layer_model: object = DefaultCognitiveLayer
intra_layer_score_treshold: int = 0.98
connect_documents: bool = False
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
def to_dict(self) -> dict:
return {
"classification_model": self.classification_model,
"summarization_model": self.summarization_model,
"labeling_model": self.labeling_model,
"cognitive_layer_model": self.cognitive_layer_model,
"intra_layer_score_treshold": self.intra_layer_score_treshold,
"connect_documents": self.connect_documents,
}
@lru_cache
def get_cognify_config():
return CognifyConfig()

View file

@ -2,15 +2,19 @@
from typing import TypedDict from typing import TypedDict
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
from cognee.infrastructure.databases.vector import DataPoint from cognee.infrastructure.databases.vector import DataPoint
config = get_vectordb_config()
class TextChunk(TypedDict): class TextChunk(TypedDict):
text: str text: str
chunk_id: str chunk_id: str
file_metadata: dict file_metadata: dict
async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]): async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]):
vector_client = infrastructure_config.get_config("vector_engine") vector_client = config.vector_engine
identified_chunks = [] identified_chunks = []
@ -52,7 +56,7 @@ async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]):
async def add_data_chunks_basic_rag(dataset_data_chunks: dict[str, list[TextChunk]]): async def add_data_chunks_basic_rag(dataset_data_chunks: dict[str, list[TextChunk]]):
vector_client = infrastructure_config.get_config("vector_engine") vector_client = config.vector_engine
identified_chunks = [] identified_chunks = []