From a23fc40f6e8773f5fab6ceaae1ee8b731669d35a Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 10 Jun 2024 08:06:32 +0200 Subject: [PATCH] Updates to the configs --- cognee/api/v1/cognify/cognify.py | 4 ++-- cognee/infrastructure/data/chunking/config.py | 7 ++++--- .../data/chunking/create_chunking_engine.py | 15 ++++++++------- cognee/shared/data_models.py | 6 ++++++ 4 files changed, 20 insertions(+), 12 deletions(-) diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 2390e60a6..8d11e9ad9 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -27,7 +27,7 @@ from cognee.modules.data.get_content_categories import get_content_categories from cognee.modules.data.get_content_summary import get_content_summary from cognee.modules.data.get_cognitive_layers import get_cognitive_layers from cognee.modules.data.get_layer_graphs import get_layer_graphs -from cognee.shared.data_models import KnowledgeGraph, ChunkStrategy +from cognee.shared.data_models import KnowledgeGraph, ChunkStrategy, ChunkEngine from cognee.shared.utils import send_telemetry from cognee.modules.tasks import create_task_status_table, update_task_status from cognee.shared.SourceCodeGraph import SourceCodeGraph @@ -281,7 +281,7 @@ if __name__ == "__main__": await add([text], "example_dataset") from cognee.api.v1.config.config import config - config.set_chunk_engine(LangchainChunkEngine() ) + config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE ) config.set_chunk_strategy(ChunkStrategy.LANGCHAIN_CHARACTER) config.embedding_engine = LiteLLMEmbeddingEngine() diff --git a/cognee/infrastructure/data/chunking/config.py b/cognee/infrastructure/data/chunking/config.py index ebac4375f..b917b8ddd 100644 --- a/cognee/infrastructure/data/chunking/config.py +++ b/cognee/infrastructure/data/chunking/config.py @@ -2,14 +2,14 @@ from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine -from cognee.shared.data_models import ChunkStrategy +from cognee.shared.data_models import ChunkStrategy, ChunkEngine class ChunkConfig(BaseSettings): chunk_size: int = 1500 chunk_overlap: int = 10 chunk_strategy: object = ChunkStrategy.PARAGRAPH - chunk_engine = DefaultChunkEngine + chunk_engine: object = ChunkEngine.DEFAULT_ENGINE model_config = SettingsConfigDict(env_file = ".env", extra = "allow") @@ -18,7 +18,8 @@ class ChunkConfig(BaseSettings): return { "chunk_size": self.chunk_size, "chunk_overlap": self.chunk_overlap, - "chunk_strategy": self.chunk_strategy + "chunk_strategy": self.chunk_strategy, + "chunk_engine": self.chunk_engine, } diff --git a/cognee/infrastructure/data/chunking/create_chunking_engine.py b/cognee/infrastructure/data/chunking/create_chunking_engine.py index 55b582577..27fe5d237 100644 --- a/cognee/infrastructure/data/chunking/create_chunking_engine.py +++ b/cognee/infrastructure/data/chunking/create_chunking_engine.py @@ -1,6 +1,7 @@ from typing import Dict -from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine + +from cognee.shared.data_models import ChunkEngine class ChunkingConfig(Dict): @@ -9,8 +10,8 @@ class ChunkingConfig(Dict): vector_db_provider: str def create_chunking_engine(config: ChunkingConfig): - if config["chunk_engine"] == "langchainchunkengine": - + if config["chunk_engine"] == ChunkEngine.LANGCHAIN_ENGINE: + from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine return LangchainChunkEngine( chunk_size=config["chunk_size"], chunk_overlap=config["chunk_overlap"], @@ -18,16 +19,16 @@ def create_chunking_engine(config: ChunkingConfig): ) - elif config["chunk_engine"] == "defaultchunkengine": - from .DefaultChunkEngine import DefaultChunkEngine + elif config["chunk_engine"] == ChunkEngine.DEFAULT_ENGINE: + from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine return DefaultChunkEngine( chunk_size=config["chunk_size"], chunk_overlap=config["chunk_overlap"], chunk_strategy=config["chunk_strategy"], ) - elif config["chunk_engine"] == "haystackchunkengine": - from .HaystackChunkEngine import HaystackChunkEngine + elif config["chunk_engine"] == ChunkEngine.HAYSTACK_ENGINE: + from cognee.infrastructure.data.chunking.HaystackChunkEngine import HaystackChunkEngine return HaystackChunkEngine( chunk_size=config["chunk_size"], diff --git a/cognee/shared/data_models.py b/cognee/shared/data_models.py index 906cd537d..582469d6c 100644 --- a/cognee/shared/data_models.py +++ b/cognee/shared/data_models.py @@ -37,6 +37,12 @@ class ChunkStrategy(Enum): CODE = "code" LANGCHAIN_CHARACTER = "langchain_character" + +class ChunkEngine(Enum): + LANGCHAIN_ENGINE = "langchain" + DEFAULT_ENGINE = "default" + HAYSTACK_ENGINE = "haystack" + class MemorySummary(BaseModel): """ Memory summary. """ nodes: List[Node] = Field(..., default_factory=list)