Updates to the configs

This commit is contained in:
Vasilije 2024-06-10 08:06:32 +02:00
parent 11231b7ada
commit a23fc40f6e
4 changed files with 20 additions and 12 deletions

View file

@ -27,7 +27,7 @@ from cognee.modules.data.get_content_categories import get_content_categories
from cognee.modules.data.get_content_summary import get_content_summary
from cognee.modules.data.get_cognitive_layers import get_cognitive_layers
from cognee.modules.data.get_layer_graphs import get_layer_graphs
from cognee.shared.data_models import KnowledgeGraph, ChunkStrategy
from cognee.shared.data_models import KnowledgeGraph, ChunkStrategy, ChunkEngine
from cognee.shared.utils import send_telemetry
from cognee.modules.tasks import create_task_status_table, update_task_status
from cognee.shared.SourceCodeGraph import SourceCodeGraph
@ -281,7 +281,7 @@ if __name__ == "__main__":
await add([text], "example_dataset")
from cognee.api.v1.config.config import config
config.set_chunk_engine(LangchainChunkEngine() )
config.set_chunk_engine(ChunkEngine.LANGCHAIN_ENGINE )
config.set_chunk_strategy(ChunkStrategy.LANGCHAIN_CHARACTER)
config.embedding_engine = LiteLLMEmbeddingEngine()

View file

@ -2,14 +2,14 @@ from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine
from cognee.shared.data_models import ChunkStrategy
from cognee.shared.data_models import ChunkStrategy, ChunkEngine
class ChunkConfig(BaseSettings):
chunk_size: int = 1500
chunk_overlap: int = 10
chunk_strategy: object = ChunkStrategy.PARAGRAPH
chunk_engine = DefaultChunkEngine
chunk_engine: object = ChunkEngine.DEFAULT_ENGINE
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
@ -18,7 +18,8 @@ class ChunkConfig(BaseSettings):
return {
"chunk_size": self.chunk_size,
"chunk_overlap": self.chunk_overlap,
"chunk_strategy": self.chunk_strategy
"chunk_strategy": self.chunk_strategy,
"chunk_engine": self.chunk_engine,
}

View file

@ -1,6 +1,7 @@
from typing import Dict
from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine
from cognee.shared.data_models import ChunkEngine
class ChunkingConfig(Dict):
@ -9,8 +10,8 @@ class ChunkingConfig(Dict):
vector_db_provider: str
def create_chunking_engine(config: ChunkingConfig):
if config["chunk_engine"] == "langchainchunkengine":
if config["chunk_engine"] == ChunkEngine.LANGCHAIN_ENGINE:
from cognee.infrastructure.data.chunking.LangchainChunkingEngine import LangchainChunkEngine
return LangchainChunkEngine(
chunk_size=config["chunk_size"],
chunk_overlap=config["chunk_overlap"],
@ -18,16 +19,16 @@ def create_chunking_engine(config: ChunkingConfig):
)
elif config["chunk_engine"] == "defaultchunkengine":
from .DefaultChunkEngine import DefaultChunkEngine
elif config["chunk_engine"] == ChunkEngine.DEFAULT_ENGINE:
from cognee.infrastructure.data.chunking.DefaultChunkEngine import DefaultChunkEngine
return DefaultChunkEngine(
chunk_size=config["chunk_size"],
chunk_overlap=config["chunk_overlap"],
chunk_strategy=config["chunk_strategy"],
)
elif config["chunk_engine"] == "haystackchunkengine":
from .HaystackChunkEngine import HaystackChunkEngine
elif config["chunk_engine"] == ChunkEngine.HAYSTACK_ENGINE:
from cognee.infrastructure.data.chunking.HaystackChunkEngine import HaystackChunkEngine
return HaystackChunkEngine(
chunk_size=config["chunk_size"],

View file

@ -37,6 +37,12 @@ class ChunkStrategy(Enum):
CODE = "code"
LANGCHAIN_CHARACTER = "langchain_character"
class ChunkEngine(Enum):
LANGCHAIN_ENGINE = "langchain"
DEFAULT_ENGINE = "default"
HAYSTACK_ENGINE = "haystack"
class MemorySummary(BaseModel):
""" Memory summary. """
nodes: List[Node] = Field(..., default_factory=list)