fix db engine config

This commit is contained in:
Vasilije 2024-05-25 19:49:24 +02:00
parent 6918ff86be
commit 95e3d16f86
3 changed files with 43 additions and 14 deletions

View file

@ -2,6 +2,7 @@ import logging
import os
from cognee.config import Config
from .data.chunking.config import get_chunk_config
from .databases.relational import DuckDBAdapter, DatabaseEngine
from .databases.vector.vector_db_interface import VectorDBInterface
from .databases.vector.embeddings.DefaultEmbeddingEngine import DefaultEmbeddingEngine
@ -18,6 +19,7 @@ config.load()
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational = get_relationaldb_config()
chunk_config = get_chunk_config()
class InfrastructureConfig():
system_root_directory: str = config.system_root_directory
@ -38,7 +40,7 @@ class InfrastructureConfig():
connect_documents = config.connect_documents
database_directory_path: str = None
database_file_path: str = None
chunk_strategy = config.chunk_strategy
chunk_strategy = chunk_config.chunk_strategy
chunk_engine = None
graph_topology = config.graph_topology
monitoring_tool = config.monitoring_tool
@ -86,10 +88,10 @@ class InfrastructureConfig():
self.connect_documents = config.connect_documents
if self.chunk_strategy is None:
self.chunk_strategy = config.chunk_strategy
self.chunk_strategy = chunk_config.chunk_strategy
if self.chunk_engine is None:
self.chunk_engine = DefaultChunkEngine()
self.chunk_engine = chunk_config.chunk_engine
if self.graph_topology is None:
self.graph_topology = config.graph_topology

View file

@ -0,0 +1,27 @@
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine
from cognee.shared.data_models import ChunkStrategy
class ChunkConfig(BaseSettings):
chunk_size: int = 1500
chunk_overlap: int = 0
chunk_strategy: object = ChunkStrategy.PARAGRAPH
chunk_engine: object = DefaultChunkEngine()
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
def to_dict(self) -> dict:
return {
"chunk_size": self.chunk_size,
"chunk_overlap": self.chunk_overlap,
"chunk_strategy": self.chunk_strategy
}
@lru_cache
def get_chunk_config():
return ChunkConfig()

View file

@ -24,9 +24,9 @@ async def main():
dataset_name = "cs_explanations"
# explanation_file_path = "test_data/Natural_language_processing.txt"
explanation_file_path = os.path.join(os.getcwd(), "test_data/Natural_language_processing.txt")
#
# await cognee.add([explanation_file_path], dataset_name)
await cognee.add([explanation_file_path], dataset_name)
# dataset_name = "short_stories"
# # data_directory_path is defined above
@ -46,15 +46,15 @@ async def main():
Some notable LLMs are OpenAI's GPT series of models (e.g., GPT-3.5 and GPT-4, used in ChatGPT and Microsoft Copilot), Google's PaLM and Gemini (the latter of which is currently used in the chatbot of the same name), xAI's Grok, Meta's LLaMA family of open-source models, Anthropic's Claude models, Mistral AI's open source models, and Databricks' open source DBRX.
"""
dataset_name = "cs_explanations"
await cognee.add(
[
text_1,
text_2
],
dataset_name
)
#
# dataset_name = "cs_explanations"
# await cognee.add(
# [
# text_1,
# text_2
# ],
# dataset_name
# )
await cognee.cognify([ "cs_explanations"])