From 4fb3dc31a4c918427ba87730bf2e383e0389f841 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Mon, 3 Jun 2024 14:03:24 +0200 Subject: [PATCH] fix: enable sdk and fix config --- cognee/__init__.py | 12 +- cognee/api/client.py | 18 ++- cognee/api/v1/add/add.py | 44 ++----- cognee/api/v1/add/remember.py | 21 ---- cognee/api/v1/cognify/cognify.py | 17 ++- cognee/api/v1/config/config.py | 31 ++--- cognee/api/v1/datasets/datasets.py | 2 +- cognee/api/v1/search/search.py | 2 +- cognee/api/v1/topology/add_topology.py | 63 +++------- cognee/base_config.py | 4 +- cognee/config.py | 116 ------------------ cognee/infrastructure/InfrastructureConfig.py | 113 ----------------- cognee/infrastructure/__init__.py | 1 - .../data/utils/extract_keywords.py | 2 +- .../infrastructure/databases/graph/config.py | 3 +- .../databases/relational/__init__.py | 1 + .../databases/relational/config.py | 7 +- .../infrastructure/databases/vector/config.py | 9 +- .../llm/generic_llm_api/adapter.py | 27 ++-- cognee/infrastructure/llm/get_llm_client.py | 1 - cognee/infrastructure/llm/openai/adapter.py | 27 ++-- cognee/modules/cognify/config.py | 14 +-- cognee/modules/cognify/evaluate.py | 10 +- .../graph/add_cognitive_layer_graphs.py | 4 +- .../cognify/graph/add_node_connections.py | 3 +- cognee/modules/cognify/test.py | 8 +- cognee/modules/cognify/train.py | 8 +- .../extract_knowledge_graph_module.py | 2 +- cognee/modules/data/get_cognitive_layers.py | 3 +- cognee/modules/data/get_content_summary.py | 3 +- cognee/modules/data/get_layer_graphs.py | 8 +- cognee/modules/discovery/__init__.py | 1 - cognee/modules/ingestion/__init__.py | 3 + .../discover_directory_datasets.py | 0 .../modules/ingestion/get_matched_datasets.py | 12 ++ cognee/modules/ingestion/save_data_to_file.py | 19 +++ .../modules/search/graph/search_neighbour.py | 1 - cognee/modules/search/graph/search_summary.py | 1 - .../search/llm/get_relevant_summary.py | 7 +- cognee/modules/settings/get_settings.py | 3 - cognee/modules/settings/save_llm_config.py | 6 - .../topology/extraction/extract_topology.py | 6 +- .../modules/topology/infer_data_topology.py | 13 +- cognee/modules/topology/topology.py | 3 - cognee/root_dir.py | 19 +-- cognee/{ => shared}/utils.py | 8 +- evals/simple_rag_vs_cognee_eval.py | 17 ++- notebooks/cognee - Get Started.ipynb | 2 +- notebooks/full_run.ipynb | 7 +- 49 files changed, 194 insertions(+), 518 deletions(-) delete mode 100644 cognee/api/v1/add/remember.py delete mode 100644 cognee/config.py delete mode 100644 cognee/infrastructure/InfrastructureConfig.py delete mode 100644 cognee/modules/discovery/__init__.py rename cognee/modules/{discovery => ingestion}/discover_directory_datasets.py (100%) create mode 100644 cognee/modules/ingestion/get_matched_datasets.py create mode 100644 cognee/modules/ingestion/save_data_to_file.py rename cognee/{ => shared}/utils.py (99%) diff --git a/cognee/__init__.py b/cognee/__init__.py index a4f4a9870..081b3d7f0 100644 --- a/cognee/__init__.py +++ b/cognee/__init__.py @@ -1,6 +1,6 @@ -# from .api.v1.config.config import config -# from .api.v1.add.add import add -# from .api.v1.cognify.cognify import cognify -# from .api.v1.datasets.datasets import datasets -# from .api.v1.search.search import search, SearchType -# from .api.v1.prune import prune +from .api.v1.config.config import config +from .api.v1.add.add import add +from .api.v1.cognify.cognify import cognify +from .api.v1.datasets.datasets import datasets +from .api.v1.search.search import search, SearchType +from .api.v1.prune import prune diff --git a/cognee/api/client.py b/cognee/api/client.py index a24b8aa6a..29116c5d9 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -68,7 +68,7 @@ async def delete_dataset(dataset_id: str): @app.get("/datasets/{dataset_id}/graph", response_model=list) async def get_dataset_graph(dataset_id: str): - from cognee.utils import render_graph + from cognee.shared.utils import render_graph from cognee.infrastructure.databases.graph import get_graph_config from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client @@ -253,14 +253,24 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000): logger.info("Starting server at %s:%s", host, port) from cognee.base_config import get_base_config + from cognee.infrastructure.databases.relational import get_relationaldb_config + from cognee.infrastructure.databases.vector import get_vectordb_config + + cognee_directory_path = os.path.abspath(".cognee_system") + databases_directory_path = os.path.join(cognee_directory_path, "databases") + + relational_config = get_relationaldb_config() + relational_config.db_path = databases_directory_path + relational_config.create_engine() + + vector_config = get_vectordb_config() + vector_config.vector_db_path = databases_directory_path + vector_config.create_engine() base_config = get_base_config() data_directory_path = os.path.abspath(".data_storage") base_config.data_root_directory = data_directory_path - cognee_directory_path = os.path.abspath(".cognee_system") - base_config.system_root_directory = cognee_directory_path - from cognee.modules.data.deletion import prune_system asyncio.run(prune_system()) diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index 0900309e4..66e831dde 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -5,21 +5,23 @@ import dlt import duckdb import cognee.modules.ingestion as ingestion from cognee.infrastructure.files.storage import LocalStorage -from cognee.modules.discovery import discover_directory_datasets -from cognee.utils import send_telemetry +from cognee.modules.ingestion import get_matched_datasets, save_data_to_file +from cognee.shared.utils import send_telemetry from cognee.base_config import get_base_config -base_config = get_base_config() from cognee.infrastructure.databases.relational.config import get_relationaldb_config async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None): if isinstance(data, str): - # data is a data directory path if "data://" in data: - return await add_data_directory(data.replace("data://", ""), dataset_name) - # data is a file path + # data is a data directory path + datasets = get_matched_datasets(data.replace("data://", ""), dataset_name) + return await asyncio.gather(*[add(file_paths, dataset_name) for [dataset_name, file_paths] in datasets]) + if "file://" in data: + # data is a file path return await add([data], dataset_name) - # data is a text + + # data is text else: file_path = save_data_to_file(data, dataset_name) return await add([file_path], dataset_name) @@ -47,7 +49,7 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam return [] async def add_files(file_paths: List[str], dataset_name: str): - # infra_config = infrastructure_config.get_config() + base_config = get_base_config() data_directory_path = base_config.data_root_directory processed_file_paths = [] @@ -107,29 +109,3 @@ async def add_files(file_paths: List[str], dataset_name: str): send_telemetry("cognee.add") return run_info - -async def add_data_directory(data_path: str, dataset_name: str = None): - datasets = discover_directory_datasets(data_path) - - results = [] - - for key in datasets.keys(): - if dataset_name is None or key.startswith(dataset_name): - results.append(add(datasets[key], dataset_name = key)) - - return await asyncio.gather(*results) - -def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None): - data_directory_path = base_config.data_root_directory - - classified_data = ingestion.classify(data, filename) - # data_id = ingestion.identify(classified_data) - - storage_path = data_directory_path + "/" + dataset_name.replace(".", "/") - LocalStorage.ensure_directory_exists(storage_path) - - file_metadata = classified_data.get_metadata() - file_name = file_metadata["name"] - LocalStorage(storage_path).store(file_name, classified_data.get_data()) - - return "file://" + storage_path + "/" + file_name diff --git a/cognee/api/v1/add/remember.py b/cognee/api/v1/add/remember.py deleted file mode 100644 index de11aa71b..000000000 --- a/cognee/api/v1/add/remember.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import List -from enum import Enum -from cognee.modules.users.memory import create_information_points, is_existing_memory - -class MemoryType(Enum): - GRAPH = "GRAPH" - VECTOR = "VECTOR" - RELATIONAL = "RELATIONAL" - -class MemoryException(Exception): - message: str - - def __init__(self, message: str): - self.message = message - - -async def remember(user_id: str, memory_name: str, payload: List[str]): - if await is_existing_memory(memory_name) is False: - raise MemoryException(f"Memory with the name \"{memory_name}\" doesn't exist.") - - await create_information_points(memory_name, payload) \ No newline at end of file diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index eea3d5cd1..7c4122eb8 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -3,8 +3,8 @@ from uuid import uuid4 from typing import List, Union import logging import nltk +from asyncio import Lock from nltk.corpus import stopwords -from cognee.config import Config from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.modules.cognify.graph.add_node_connections import group_nodes_by_layer, \ graph_ready_output, connect_nodes_in_graph @@ -24,18 +24,14 @@ from cognee.modules.data.get_content_summary import get_content_summary from cognee.modules.data.get_cognitive_layers import get_cognitive_layers from cognee.modules.data.get_layer_graphs import get_layer_graphs from cognee.shared.data_models import KnowledgeGraph -from cognee.utils import send_telemetry +from cognee.shared.utils import send_telemetry from cognee.modules.tasks import create_task_status_table, update_task_status from cognee.shared.SourceCodeGraph import SourceCodeGraph -from asyncio import Lock from cognee.modules.tasks import get_task_status from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.modules.cognify.config import get_cognify_config from cognee.infrastructure.databases.relational.config import get_relationaldb_config -config = Config() -config.load() - USER_ID = "default_user" logger = logging.getLogger("cognify") @@ -66,7 +62,7 @@ async def cognify(datasets: Union[str, List[str]] = None): task_status = get_task_status([dataset_name]) if task_status == "DATASET_PROCESSING_STARTED": - logger.error(f"Dataset {dataset_name} is already being processed.") + logger.info(f"Dataset {dataset_name} is being processed.") return update_task_status(dataset_name, "DATASET_PROCESSING_STARTED") @@ -176,8 +172,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi graph_config = get_graph_config() graph_client = await get_graph_client(graph_config.graph_engine) - cognify_config = get_cognify_config() - graph_topology = cognify_config.graph_model + graph_topology = graph_config.graph_model if graph_topology == SourceCodeGraph: classified_categories = [{"data_type": "text", "category_name": "Code and functions"}] @@ -199,6 +194,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi await add_summary_nodes(graph_client, document_id, content_summary) print(f"Chunk ({chunk_id}) summarized.") + cognify_config = get_cognify_config() + cognitive_layers = await get_cognitive_layers(input_text, classified_categories) cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit] @@ -286,7 +283,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi # print("results", out) # # -# # from cognee.utils import render_graph +# # from cognee.shared.utils import render_graph # # # # await render_graph(graph, include_color=True, include_nodes=False, include_size=False) diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index c44ec1dd0..18fc0155d 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -1,58 +1,60 @@ """ This module is used to set the configuration of the system.""" +import os from cognee.base_config import get_base_config -from cognee.infrastructure.databases.graph.config import get_graph_config -from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.modules.cognify.config import get_cognify_config +from cognee.infrastructure.data.chunking.config import get_chunk_config +from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.relational import get_relationaldb_config class config(): @staticmethod def system_root_directory(system_root_directory: str): - base_config = get_base_config() - base_config.system_root_directory = system_root_directory + databases_directory_path = os.path.join(system_root_directory, "databases") + relational_config = get_relationaldb_config() + relational_config.db_path = databases_directory_path + relational_config.create_engine() + + vector_config = get_vectordb_config() + vector_config.vector_db_path = databases_directory_path + vector_config.create_engine() @staticmethod def data_root_directory(data_root_directory: str): base_config = get_base_config() base_config.data_root_directory = data_root_directory - @staticmethod def monitoring_tool(monitoring_tool: object): base_config = get_base_config() base_config.monitoring_tool = monitoring_tool - @staticmethod def set_classification_model(classification_model: object): cognify_config = get_cognify_config() cognify_config.classification_model = classification_model - @staticmethod def set_summarization_model(summarization_model: object): cognify_config = get_cognify_config() cognify_config.summarization_model=summarization_model - @staticmethod def set_labeling_model(labeling_model: object): cognify_config = get_cognify_config() cognify_config.labeling_model =labeling_model - @staticmethod def set_graph_model(graph_model: object): graph_config = get_graph_config() graph_config.graph_model = graph_model - @staticmethod def set_cognitive_layer_model(cognitive_layer_model: object): cognify_config = get_cognify_config() cognify_config.cognitive_layer_model = cognitive_layer_model - @staticmethod def set_graph_engine(graph_engine: object): graph_config = get_graph_config() @@ -78,7 +80,6 @@ class config(): cognify_config = get_cognify_config() cognify_config.intra_layer_score_treshold = intra_layer_score_treshold - @staticmethod def connect_documents(connect_documents: bool): cognify_config = get_cognify_config() @@ -88,9 +89,3 @@ class config(): def set_chunk_strategy(chunk_strategy: object): chunk_config = get_chunk_config() chunk_config.chunk_strategy = chunk_strategy - - - @staticmethod - def set_graph_topology(graph_topology: object): - cognify_config = get_cognify_config() - cognify_config.graph_topology = graph_topology diff --git a/cognee/api/v1/datasets/datasets.py b/cognee/api/v1/datasets/datasets.py index f52e6e937..92ae14ce9 100644 --- a/cognee/api/v1/datasets/datasets.py +++ b/cognee/api/v1/datasets/datasets.py @@ -1,5 +1,5 @@ from duckdb import CatalogException -from cognee.modules.discovery import discover_directory_datasets +from cognee.modules.ingestion import discover_directory_datasets from cognee.modules.tasks import get_task_status from cognee.infrastructure.databases.relational.config import get_relationaldb_config diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 36a5d10df..4273767f3 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -11,7 +11,7 @@ from cognee.modules.search.graph.search_categories import search_categories from cognee.modules.search.graph.search_neighbour import search_neighbour from cognee.modules.search.graph.search_summary import search_summary from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client -from cognee.utils import send_telemetry +from cognee.shared.utils import send_telemetry from cognee.infrastructure.databases.graph.config import get_graph_config class SearchType(Enum): diff --git a/cognee/api/v1/topology/add_topology.py b/cognee/api/v1/topology/add_topology.py index 812a7de03..deb074b25 100644 --- a/cognee/api/v1/topology/add_topology.py +++ b/cognee/api/v1/topology/add_topology.py @@ -1,33 +1,29 @@ -from typing import List, Dict, Any, Union, Optional - -from cognee.infrastructure import infrastructure_config -from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client - -from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel import pandas as pd from pydantic import BaseModel - +from typing import List, Dict, Any, Union, Optional +from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client +from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel +from cognee.infrastructure.databases.graph.config import get_graph_config USER_ID = "default_user" async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any: - graph_db_type = infrastructure_config.get_config()["graph_engine"] + graph_config = get_graph_config() + graph_db_type = graph_config.graph_database_provider graph_client = await get_graph_client(graph_db_type) - graph_topology = infrastructure_config.get_config()["graph_topology"] - engine = TopologyEngine() topology = await engine.infer_from_directory_structure(node_id=USER_ID, repository=directory, model=model) def flatten_model(model: BaseModel, parent_id: Optional[str] = None) -> Dict[str, Any]: """Flatten a single Pydantic model to a dictionary handling nested structures.""" - result = {**model.dict(), 'parent_id': parent_id} - if hasattr(model, 'default_relationship') and model.default_relationship: + result = {**model.dict(), "parent_id": parent_id} + if hasattr(model, "default_relationship") and model.default_relationship: result.update({ - 'relationship_type': model.default_relationship.type, - 'relationship_source': model.default_relationship.source, - 'relationship_target': model.default_relationship.target + "relationship_type": model.default_relationship.type, + "relationship_source": model.default_relationship.source, + "relationship_target": model.default_relationship.target }) return result @@ -39,7 +35,7 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo flat = [flatten_model(items, parent_id)] for field, value in items: if isinstance(value, (BaseModel, list)): - flat.extend(recursive_flatten(value, items.dict().get('node_id', None))) + flat.extend(recursive_flatten(value, items.dict().get("node_id", None))) return flat else: return [] @@ -56,38 +52,11 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo for _, row in df.iterrows(): node_data = row.to_dict() - node_id = node_data.pop('node_id') + node_id = node_data.pop("node_id") - # Remove 'node_id' and get its value + # Remove "node_id" and get its value await graph_client.add_node(node_id, node_data) - if pd.notna(row['relationship_source']) and pd.notna(row['relationship_target']): - await graph_client.add_edge(row['relationship_source'], row['relationship_target'], relationship_name=row['relationship_type']) + if pd.notna(row["relationship_source"]) and pd.notna(row["relationship_target"]): + await graph_client.add_edge(row["relationship_source"], row["relationship_target"], relationship_name=row["relationship_type"]) return graph_client.graph - -if __name__ == "__main__": - async def test() -> None: - # Uncomment and modify the following lines as needed - # await prune.prune_system() - # - # from cognee.api.v1.add import add - # data_directory_path = os.path.abspath("../../../.data") - # # print(data_directory_path) - # # config.data_root_directory(data_directory_path) - # # cognee_directory_path = os.path.abspath("../.cognee_system") - # # config.system_root_directory(cognee_directory_path) - # - # await add("data://" + data_directory_path, "example") - - # graph = await add_topology() - - graph_db_type = infrastructure_config.get_config()["graph_engine"] - - graph_client = await get_graph_client(graph_db_type) - # - from cognee.utils import render_graph - - await render_graph(graph_client.graph, include_color=True, include_nodes=False, include_size=False) - - import asyncio - asyncio.run(test()) diff --git a/cognee/base_config.py b/cognee/base_config.py index 656e70046..1cc79b428 100644 --- a/cognee/base_config.py +++ b/cognee/base_config.py @@ -4,15 +4,15 @@ from cognee.root_dir import get_absolute_path from cognee.shared.data_models import MonitoringTool class BaseConfig(BaseSettings): - system_root_directory: str = get_absolute_path(".cognee_system") data_root_directory: str = get_absolute_path(".data") monitoring_tool: object = MonitoringTool.LANGFUSE + graphistry_username: str + graphistry_password: str model_config = SettingsConfigDict(env_file = ".env", extra = "allow") def to_dict(self) -> dict: return { - "system_root_directory": self.system_root_directory, "data_root_directory": self.data_root_directory, "monitoring_tool": self.monitoring_tool, } diff --git a/cognee/config.py b/cognee/config.py deleted file mode 100644 index 29d885dc6..000000000 --- a/cognee/config.py +++ /dev/null @@ -1,116 +0,0 @@ -"""Configuration for cognee - cognitive architecture framework.""" -import logging -import os -import configparser -import uuid -from typing import Optional, Dict, Any -from dataclasses import dataclass, field -from pathlib import Path -from dotenv import load_dotenv - -logging.basicConfig(level=logging.DEBUG) - -def load_dontenv(): - base_dir = Path(__file__).resolve().parent.parent - # Load the .env file from the base directory - dotenv_path = base_dir / ".env" - load_dotenv(dotenv_path=dotenv_path, override = True) - -try: - load_dontenv() -except: - pass - -@dataclass -class Config: - """ Configuration for cognee - cognitive architecture framework. """ - cognee_dir: str = field( - default_factory=lambda: os.getenv("COG_ARCH_DIR", "cognee") - ) - config_path: str = field( - default_factory=lambda: os.path.join( - os.getenv("COG_ARCH_DIR", "cognee"), "config" - ) - ) - - # custom_model: str = os.getenv("CUSTOM_LLM_MODEL", "llama3-70b-8192") #"mistralai/Mixtral-8x7B-Instruct-v0.1" - # custom_endpoint: str = os.getenv("CUSTOM_ENDPOINT", "https://api.endpoints.anyscale.com/v1") #"https://api.endpoints.anyscale.com/v1" # pass claude endpoint - # custom_key: Optional[str] = os.getenv("CUSTOM_LLM_API_KEY") - # ollama_endpoint: str = os.getenv("CUSTOM_OLLAMA_ENDPOINT", "http://localhost:11434/v1") #"http://localhost:11434/v1" - # ollama_key: Optional[str] = "ollama" - # ollama_model: str = os.getenv("CUSTOM_OLLAMA_MODEL", "mistral:instruct") #"mistral:instruct" - # openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o" ) #"gpt-4o" - # model_endpoint: str = "openai" - # llm_api_key: Optional[str] = os.getenv("OPENAI_API_KEY") - # openai_embedding_model = "text-embedding-3-large" - # openai_embedding_dimensions = 3072 - # litellm_embedding_model = "text-embedding-3-large" - # litellm_embedding_dimensions = 3072 - - graphistry_username = os.getenv("GRAPHISTRY_USERNAME") - graphistry_password = os.getenv("GRAPHISTRY_PASSWORD") - - # Embedding parameters - embedding_model: str = "BAAI/bge-large-en-v1.5" - embedding_dimensions: int = 1024 - connect_documents: bool = False - - # Model parameters and configuration for interlayer scoring - intra_layer_score_treshold: float = 0.98 - - # Client ID - anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex) - - #Chunking parameters - # chunk_size: int = 1500 - # chunk_overlap: int = 0 - # chunk_strategy: str = ChunkStrategy.PARAGRAPH - - def load(self): - """Loads the configuration from a file or environment variables.""" - try: - load_dontenv() - except: - pass - config = configparser.ConfigParser() - config.read(self.config_path) - - # Override with environment variables if they exist - for attr in self.__annotations__: - env_value = os.getenv(attr.upper()) - if env_value is not None: - setattr(self, attr, env_value) - - # Load from config file - if config.sections(): - for section in config.sections(): - for key, value in config.items(section): - if hasattr(self, key): - setattr(self, key, value) - - def save(self): - """Saves the current configuration to a file.""" - config = configparser.ConfigParser() - - # Save the current settings to the config file - for attr, value in self.__dict__.items(): - section, option = attr.split("_", 1) - if not config.has_section(section): - config.add_section(section) - config.set(section, option, str(value)) - - with open(self.config_path, "w") as configfile: - config.write(configfile) - - def to_dict(self) -> Dict[str, Any]: - """Returns a dictionary representation of the configuration.""" - return {attr: getattr(self, attr) for attr in self.__annotations__} - - @classmethod - def from_dict(cls, config_dict: Dict[str, Any]) -> "Config": - """Creates a Config instance from a dictionary.""" - config = cls() - for attr, value in config_dict.items(): - if hasattr(config, attr): - setattr(config, attr, value) - return config \ No newline at end of file diff --git a/cognee/infrastructure/InfrastructureConfig.py b/cognee/infrastructure/InfrastructureConfig.py deleted file mode 100644 index 8711c2e14..000000000 --- a/cognee/infrastructure/InfrastructureConfig.py +++ /dev/null @@ -1,113 +0,0 @@ -import logging -from cognee.config import Config -from .data.chunking.config import get_chunk_config -from .llm.llm_interface import LLMInterface -from .llm.get_llm_client import get_llm_client -from ..shared.data_models import GraphDBType, DefaultContentPrediction, KnowledgeGraph, SummarizedContent, \ - LabeledContent, DefaultCognitiveLayer - -logging.basicConfig(level=logging.DEBUG) - -config = Config() -config.load() - -chunk_config = get_chunk_config() -class InfrastructureConfig(): - graph_engine: GraphDBType = None - llm_engine: LLMInterface = None - classification_model = None - summarization_model = None - labeling_model = None - graph_model = None - cognitive_layer_model = None - intra_layer_score_treshold = None - embedding_engine = None - connect_documents = config.connect_documents - chunk_strategy = chunk_config.chunk_strategy - chunk_engine = None - llm_provider: str = None - llm_model: str = None - llm_endpoint: str = None - llm_api_key: str = None - - def get_config(self, config_entity: str = None) -> dict: - if self.graph_engine is None: - self.graph_engine = GraphDBType.NETWORKX - - if self.classification_model is None: - self.classification_model = DefaultContentPrediction - - if self.summarization_model is None: - self.summarization_model = SummarizedContent - - if self.labeling_model is None: - self.labeling_model = LabeledContent - - if self.graph_model is None: - self.graph_model = KnowledgeGraph - - if self.cognitive_layer_model is None: - self.cognitive_layer_model = DefaultCognitiveLayer - - if self.intra_layer_score_treshold is None: - self.intra_layer_score_treshold = config.intra_layer_score_treshold - - if self.connect_documents is None: - self.connect_documents = config.connect_documents - - if self.chunk_strategy is None: - self.chunk_strategy = chunk_config.chunk_strategy - - if self.chunk_engine is None: - self.chunk_engine = chunk_config.chunk_engine - - if (config_entity is None or config_entity == "llm_engine") and self.llm_engine is None: - self.llm_engine = get_llm_client() - - if config_entity is not None: - return getattr(self, config_entity) - - return { - "llm_engine": self.llm_engine, - "classification_model": self.classification_model, - "summarization_model": self.summarization_model, - "labeling_model": self.labeling_model, - "graph_model": self.graph_model, - "cognitive_layer_model": self.cognitive_layer_model, - "llm_provider": self.llm_provider, - "intra_layer_score_treshold": self.intra_layer_score_treshold, - "embedding_engine": self.embedding_engine, - "connect_documents": self.connect_documents, - "chunk_strategy": self.chunk_strategy, - "chunk_engine": self.chunk_engine, - } - - def set_config(self, new_config: dict): - if "classification_model" in new_config: - self.classification_model = new_config["classification_model"] - - if "summarization_model" in new_config: - self.summarization_model = new_config["summarization_model"] - - if "labeling_model" in new_config: - self.labeling_model = new_config["labeling_model"] - - if "cognitive_layer_model" in new_config: - self.cognitive_layer_model = new_config["cognitive_layer_model"] - - if "intra_layer_score_treshold" in new_config: - self.intra_layer_score_treshold = new_config["intra_layer_score_treshold"] - - if "embedding_engine" in new_config: - self.embedding_engine = new_config["embedding_engine"] - - if "connect_documents" in new_config: - self.connect_documents = new_config["connect_documents"] - - if "chunk_strategy" in new_config: - self.chunk_strategy = new_config["chunk_strategy"] - - if "chunk_engine" in new_config: - self.chunk_engine = new_config["chunk_engine"] - -infrastructure_config = InfrastructureConfig() diff --git a/cognee/infrastructure/__init__.py b/cognee/infrastructure/__init__.py index abdd5daab..e69de29bb 100644 --- a/cognee/infrastructure/__init__.py +++ b/cognee/infrastructure/__init__.py @@ -1 +0,0 @@ -from .InfrastructureConfig import infrastructure_config diff --git a/cognee/infrastructure/data/utils/extract_keywords.py b/cognee/infrastructure/data/utils/extract_keywords.py index 76940fdfc..ab32ddefb 100644 --- a/cognee/infrastructure/data/utils/extract_keywords.py +++ b/cognee/infrastructure/data/utils/extract_keywords.py @@ -1,5 +1,5 @@ from sklearn.feature_extraction.text import TfidfVectorizer -from cognee.utils import extract_pos_tags +from cognee.shared.utils import extract_pos_tags def extract_keywords(text: str) -> list[str]: if len(text) == 0: diff --git a/cognee/infrastructure/databases/graph/config.py b/cognee/infrastructure/databases/graph/config.py index bdb122823..cc9870d4f 100644 --- a/cognee/infrastructure/databases/graph/config.py +++ b/cognee/infrastructure/databases/graph/config.py @@ -4,7 +4,7 @@ import os from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.infrastructure.databases.relational.config import get_relationaldb_config -from cognee.shared.data_models import DefaultGraphModel, GraphDBType, KnowledgeGraph +from cognee.shared.data_models import GraphDBType, KnowledgeGraph class GraphConfig(BaseSettings): @@ -26,7 +26,6 @@ class GraphConfig(BaseSettings): return { "graph_filename": self.graph_filename, "graph_database_provider": self.graph_database_provider, - "graph_topology": self.graph_topology, "graph_file_path": self.graph_file_path, "graph_database_url": self.graph_database_url, "graph_database_username": self.graph_database_username, diff --git a/cognee/infrastructure/databases/relational/__init__.py b/cognee/infrastructure/databases/relational/__init__.py index 4acf63654..277553c82 100644 --- a/cognee/infrastructure/databases/relational/__init__.py +++ b/cognee/infrastructure/databases/relational/__init__.py @@ -2,3 +2,4 @@ from .ModelBase import ModelBase from .DatabaseEngine import DatabaseEngine from .sqlite.SqliteEngine import SqliteEngine from .duckdb.DuckDBAdapter import DuckDBAdapter +from .config import get_relationaldb_config diff --git a/cognee/infrastructure/databases/relational/config.py b/cognee/infrastructure/databases/relational/config.py index 3b259ef26..f0ddfc01d 100644 --- a/cognee/infrastructure/databases/relational/config.py +++ b/cognee/infrastructure/databases/relational/config.py @@ -1,11 +1,11 @@ import os from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict -from cognee.base_config import get_base_config +from cognee.root_dir import get_absolute_path from .create_relational_engine import create_relational_engine class RelationalConfig(BaseSettings): - db_path: str = os.path.join(get_base_config().system_root_directory, "databases") + db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases") db_name: str = "cognee.db" db_host: str = "localhost" db_port: str = "5432" @@ -17,7 +17,8 @@ class RelationalConfig(BaseSettings): model_config = SettingsConfigDict(env_file = ".env", extra = "allow") def create_engine(self): - return create_relational_engine(self.db_path, self.db_name) + self.db_file_path = os.path.join(self.db_path, self.db_name) + self.database_engine = create_relational_engine(self.db_path, self.db_name) def to_dict(self) -> dict: return { diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index a8dfc7e96..de994696d 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -1,12 +1,13 @@ import os from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict -from cognee.infrastructure.databases.relational.config import get_relationaldb_config from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config +from cognee.root_dir import get_absolute_path from .create_vector_engine import create_vector_engine class VectorConfig(BaseSettings): - vector_db_url: str = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb") + vector_db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases") + vector_db_url: str = os.path.join(vector_db_path, "cognee.lancedb") vector_db_key: str = "" vector_engine_provider: str = "lancedb" vector_engine: object = create_vector_engine( @@ -22,7 +23,9 @@ class VectorConfig(BaseSettings): def create_engine(self): if self.vector_engine_provider == "lancedb": - self.vector_db_url = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb") + self.vector_db_url = os.path.join(self.vector_db_path, "cognee.lancedb") + else: + self.vector_db_path = None self.vector_engine = create_vector_engine( get_vectordb_config().to_dict(), diff --git a/cognee/infrastructure/llm/generic_llm_api/adapter.py b/cognee/infrastructure/llm/generic_llm_api/adapter.py index 86fa172ba..f65d559d5 100644 --- a/cognee/infrastructure/llm/generic_llm_api/adapter.py +++ b/cognee/infrastructure/llm/generic_llm_api/adapter.py @@ -5,26 +5,12 @@ from pydantic import BaseModel import instructor from tenacity import retry, stop_after_attempt import openai - - -from cognee.infrastructure import infrastructure_config from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.infrastructure.llm.prompts import read_query_prompt from cognee.shared.data_models import MonitoringTool from cognee.base_config import get_base_config from cognee.infrastructure.llm.config import get_llm_config -llm_config = get_llm_config() -base_config = get_base_config() - -if base_config.monitoring_tool == MonitoringTool.LANGFUSE: - from langfuse.openai import AsyncOpenAI, OpenAI -elif base_config.monitoring_tool == MonitoringTool.LANGSMITH: - from langsmith import wrappers - from openai import AsyncOpenAI - AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI()) -else: - from openai import AsyncOpenAI, OpenAI class GenericAPIAdapter(LLMInterface): """Adapter for Generic API LLM provider API """ @@ -37,6 +23,8 @@ class GenericAPIAdapter(LLMInterface): self.model = model self.api_key = api_key + llm_config = get_llm_config() + if llm_config.llm_provider == "groq": from groq import groq self.aclient = instructor.from_openai( @@ -46,6 +34,17 @@ class GenericAPIAdapter(LLMInterface): mode = instructor.Mode.MD_JSON ) else: + base_config = get_base_config() + + if base_config.monitoring_tool == MonitoringTool.LANGFUSE: + from langfuse.openai import AsyncOpenAI + elif base_config.monitoring_tool == MonitoringTool.LANGSMITH: + from langsmith import wrappers + from openai import AsyncOpenAI + AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI()) + else: + from openai import AsyncOpenAI + self.aclient = instructor.patch( AsyncOpenAI( base_url = api_endpoint, diff --git a/cognee/infrastructure/llm/get_llm_client.py b/cognee/infrastructure/llm/get_llm_client.py index 8840df309..b824bd588 100644 --- a/cognee/infrastructure/llm/get_llm_client.py +++ b/cognee/infrastructure/llm/get_llm_client.py @@ -9,7 +9,6 @@ class LLMProvider(Enum): ANTHROPIC = "anthropic" CUSTOM = "custom" -llm_config = get_llm_config() def get_llm_client(): """Get the LLM client based on the configuration using Enums.""" llm_config = get_llm_config() diff --git a/cognee/infrastructure/llm/openai/adapter.py b/cognee/infrastructure/llm/openai/adapter.py index c936b8684..cc11c4910 100644 --- a/cognee/infrastructure/llm/openai/adapter.py +++ b/cognee/infrastructure/llm/openai/adapter.py @@ -6,26 +6,10 @@ from pydantic import BaseModel from tenacity import retry, stop_after_attempt from cognee.base_config import get_base_config -from cognee.config import Config -from cognee.infrastructure.llm import get_llm_config from cognee.infrastructure.llm.llm_interface import LLMInterface from cognee.infrastructure.llm.prompts import read_query_prompt from cognee.shared.data_models import MonitoringTool -config = Config() -config.load() -llm_config = get_llm_config() -base_config = get_base_config() - -if base_config.monitoring_tool == MonitoringTool.LANGFUSE: - from langfuse.openai import AsyncOpenAI, OpenAI -elif base_config.monitoring_tool == MonitoringTool.LANGSMITH: - from langsmith import wrappers - from openai import AsyncOpenAI - AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI()) -else: - from openai import AsyncOpenAI, OpenAI - class OpenAIAdapter(LLMInterface): name = "OpenAI" model: str @@ -33,6 +17,17 @@ class OpenAIAdapter(LLMInterface): """Adapter for OpenAI's GPT-3, GPT=4 API""" def __init__(self, api_key: str, model:str): + base_config = get_base_config() + + if base_config.monitoring_tool == MonitoringTool.LANGFUSE: + from langfuse.openai import AsyncOpenAI, OpenAI + elif base_config.monitoring_tool == MonitoringTool.LANGSMITH: + from langsmith import wrappers + from openai import AsyncOpenAI + AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI()) + else: + from openai import AsyncOpenAI, OpenAI + self.aclient = instructor.from_openai(AsyncOpenAI(api_key = api_key)) self.client = instructor.from_openai(OpenAI(api_key = api_key)) self.model = model diff --git a/cognee/modules/cognify/config.py b/cognee/modules/cognify/config.py index cc4ccf089..624b00ba4 100644 --- a/cognee/modules/cognify/config.py +++ b/cognee/modules/cognify/config.py @@ -1,12 +1,7 @@ from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.shared.data_models import DefaultContentPrediction, LabeledContent, SummarizedContent, \ - DefaultCognitiveLayer, DefaultGraphModel, KnowledgeGraph - - -# Monitoring tool - - + DefaultCognitiveLayer class CognifyConfig(BaseSettings): classification_model: object = DefaultContentPrediction @@ -15,10 +10,7 @@ class CognifyConfig(BaseSettings): cognitive_layer_model: object = DefaultCognitiveLayer intra_layer_score_treshold: float = 0.98 connect_documents: bool = False - graph_topology: object = DefaultGraphModel cognitive_layers_limit: int = 2 - graph_model:object = KnowledgeGraph - model_config = SettingsConfigDict(env_file = ".env", extra = "allow") @@ -30,11 +22,9 @@ class CognifyConfig(BaseSettings): "cognitive_layer_model": self.cognitive_layer_model, "intra_layer_score_treshold": self.intra_layer_score_treshold, "connect_documents": self.connect_documents, - "graph_topology": self.graph_topology, "cognitive_layers_limit": self.cognitive_layers_limit, - "graph_model": self.graph_model } @lru_cache def get_cognify_config(): - return CognifyConfig() \ No newline at end of file + return CognifyConfig() diff --git a/cognee/modules/cognify/evaluate.py b/cognee/modules/cognify/evaluate.py index 254c0e7ff..626392963 100644 --- a/cognee/modules/cognify/evaluate.py +++ b/cognee/modules/cognify/evaluate.py @@ -4,14 +4,11 @@ from dspy.evaluate.evaluate import Evaluate from dspy.primitives.example import Example from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph from cognee.root_dir import get_absolute_path -from cognee.config import Config from cognee.shared.data_models import Answer +from cognee.infrastructure.llm import get_llm_config from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.modules.cognify.dataset import HotPotQA -config = Config() -config.load() - def evaluate(): dataset = HotPotQA( train_seed = 1, @@ -36,7 +33,8 @@ def evaluate(): evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096) - gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096) + llm_config = get_llm_config() + gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4) compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json")) @@ -58,7 +56,7 @@ def evaluate(): return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \ dsp.passage_match([example.answer], [answer_prediction.answer]) - gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096) + gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) dspy.settings.configure(lm = gpt4) evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer) diff --git a/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py b/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py index 8d2e3fb7c..354d76f59 100644 --- a/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py +++ b/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py @@ -4,7 +4,7 @@ from typing import List, Tuple, TypedDict from pydantic import BaseModel from cognee.infrastructure.databases.vector import DataPoint -# from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader +# from cognee.shared.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.infrastructure.databases.vector.config import get_vectordb_config @@ -69,8 +69,6 @@ async def add_cognitive_layer_graphs( id, type, name, description, *node_properties = node - print("Node properties: ", node_properties) - node_properties = dict(node_properties) graph_nodes.append(( diff --git a/cognee/modules/cognify/graph/add_node_connections.py b/cognee/modules/cognify/graph/add_node_connections.py index aa2cb9cb7..758d538dc 100644 --- a/cognee/modules/cognify/graph/add_node_connections.py +++ b/cognee/modules/cognify/graph/add_node_connections.py @@ -1,6 +1,5 @@ import uuid -# from cognee.infrastructure import infrastructure_config from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.shared.data_models import GraphDBType from cognee.infrastructure.databases.graph.config import get_graph_config @@ -105,7 +104,7 @@ if __name__ == "__main__": # # connect_nodes_in_graph(graph, relationships) - from cognee.utils import render_graph + from cognee.shared.utils import render_graph graph_url = await render_graph(graph) diff --git a/cognee/modules/cognify/test.py b/cognee/modules/cognify/test.py index 210ec7062..c952ae0d5 100644 --- a/cognee/modules/cognify/test.py +++ b/cognee/modules/cognify/test.py @@ -1,13 +1,11 @@ import dspy from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph from cognee.root_dir import get_absolute_path -from cognee.config import Config - -config = Config() -config.load() +from cognee.infrastructure.llm import get_llm_config def run(): - gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096) + llm_config = get_llm_config() + gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4) compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json")) diff --git a/cognee/modules/cognify/train.py b/cognee/modules/cognify/train.py index 62319aa9d..b1d96c32c 100644 --- a/cognee/modules/cognify/train.py +++ b/cognee/modules/cognify/train.py @@ -2,16 +2,13 @@ import dsp import dspy from dspy.teleprompt import BootstrapFewShot from dspy.primitives.example import Example -from cognee.config import Config from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph from cognee.root_dir import get_absolute_path from cognee.infrastructure.files.storage import LocalStorage from cognee.shared.data_models import Answer from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.modules.cognify.dataset import HotPotQA - -config = Config() -config.load() +from cognee.infrastructure.llm import get_llm_config def train(): colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts") @@ -59,7 +56,8 @@ def train(): trainset = [example.with_inputs("context", "question") for example in train_examples] - gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096) + llm_config = get_llm_config() + gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096) compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset) diff --git a/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py b/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py index 7bce5194e..9e4b38496 100644 --- a/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py +++ b/cognee/modules/data/extraction/knowledge_graph/extract_knowledge_graph_module.py @@ -5,7 +5,7 @@ from nltk.corpus import stopwords from nltk.tokenize import word_tokenize from cognee.infrastructure.llm import get_llm_config from cognee.shared.data_models import KnowledgeGraph, Node, Edge -from cognee.utils import trim_text_to_max_tokens +from cognee.shared.utils import trim_text_to_max_tokens # """Instructions: # You are a top-tier algorithm designed for extracting information from text in structured formats to build a knowledge graph. diff --git a/cognee/modules/data/get_cognitive_layers.py b/cognee/modules/data/get_cognitive_layers.py index 53cf6c17b..357e70ee4 100644 --- a/cognee/modules/data/get_cognitive_layers.py +++ b/cognee/modules/data/get_cognitive_layers.py @@ -1,8 +1,7 @@ import logging from typing import List, Dict -from cognee.infrastructure import infrastructure_config -from.extraction.extract_cognitive_layers import extract_cognitive_layers from cognee.modules.cognify.config import get_cognify_config +from .extraction.extract_cognitive_layers import extract_cognitive_layers config = get_cognify_config() diff --git a/cognee/modules/data/get_content_summary.py b/cognee/modules/data/get_content_summary.py index e0ad2f179..392f1fe5a 100644 --- a/cognee/modules/data/get_content_summary.py +++ b/cognee/modules/data/get_content_summary.py @@ -1,7 +1,6 @@ import logging -from cognee.infrastructure import infrastructure_config -from.extraction.extract_summary import extract_summary from cognee.modules.cognify.config import get_cognify_config +from .extraction.extract_summary import extract_summary config = get_cognify_config() logger = logging.getLogger(__name__) diff --git a/cognee/modules/data/get_layer_graphs.py b/cognee/modules/data/get_layer_graphs.py index f84404044..1ab491c4b 100644 --- a/cognee/modules/data/get_layer_graphs.py +++ b/cognee/modules/data/get_layer_graphs.py @@ -1,13 +1,13 @@ import logging import asyncio -from cognee.infrastructure import infrastructure_config +from cognee.infrastructure.databases.graph import get_graph_config from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph -from.extraction.extract_summary import extract_summary -from cognee.modules.cognify.config import get_cognify_config -config = get_cognify_config() + logger = logging.getLogger(__name__) async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]): + config = get_graph_config() + try: graph_awaitables = [ extract_knowledge_graph( diff --git a/cognee/modules/discovery/__init__.py b/cognee/modules/discovery/__init__.py deleted file mode 100644 index cd98490cb..000000000 --- a/cognee/modules/discovery/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .discover_directory_datasets import discover_directory_datasets diff --git a/cognee/modules/ingestion/__init__.py b/cognee/modules/ingestion/__init__.py index 6b0049b52..8f1f7fc58 100644 --- a/cognee/modules/ingestion/__init__.py +++ b/cognee/modules/ingestion/__init__.py @@ -1,2 +1,5 @@ from .classify import classify from .identify import identify +from .save_data_to_file import save_data_to_file +from .get_matched_datasets import get_matched_datasets +from .discover_directory_datasets import discover_directory_datasets diff --git a/cognee/modules/discovery/discover_directory_datasets.py b/cognee/modules/ingestion/discover_directory_datasets.py similarity index 100% rename from cognee/modules/discovery/discover_directory_datasets.py rename to cognee/modules/ingestion/discover_directory_datasets.py diff --git a/cognee/modules/ingestion/get_matched_datasets.py b/cognee/modules/ingestion/get_matched_datasets.py new file mode 100644 index 000000000..563d8016c --- /dev/null +++ b/cognee/modules/ingestion/get_matched_datasets.py @@ -0,0 +1,12 @@ +from .discover_directory_datasets import discover_directory_datasets + +def get_matched_datasets(data_path: str, dataset_name_to_match: str = None): + datasets = discover_directory_datasets(data_path) + + matched_datasets = [] + + for dataset_name, dataset_files in datasets.items(): + if dataset_name_to_match is None or dataset_name.startswith(dataset_name_to_match): + matched_datasets.append([dataset_name, dataset_files]) + + return matched_datasets diff --git a/cognee/modules/ingestion/save_data_to_file.py b/cognee/modules/ingestion/save_data_to_file.py new file mode 100644 index 000000000..5b81f8ced --- /dev/null +++ b/cognee/modules/ingestion/save_data_to_file.py @@ -0,0 +1,19 @@ +from typing import BinaryIO, Union +from cognee.base_config import get_base_config +from cognee.infrastructure.files.storage import LocalStorage +from .classify import classify + +def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None): + base_config = get_base_config() + data_directory_path = base_config.data_root_directory + + classified_data = classify(data, filename) + + storage_path = data_directory_path + "/" + dataset_name.replace(".", "/") + LocalStorage.ensure_directory_exists(storage_path) + + file_metadata = classified_data.get_metadata() + file_name = file_metadata["name"] + LocalStorage(storage_path).store(file_name, classified_data.get_data()) + + return "file://" + storage_path + "/" + file_name diff --git a/cognee/modules/search/graph/search_neighbour.py b/cognee/modules/search/graph/search_neighbour.py index 66364ce5d..1af7c0d9f 100644 --- a/cognee/modules/search/graph/search_neighbour.py +++ b/cognee/modules/search/graph/search_neighbour.py @@ -14,7 +14,6 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str, Parameters: - graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session. - id (str): The identifier of the node to match against. - - infrastructure_config (Dict): Configuration that includes the graph engine type. - other_param (dict, optional): A dictionary that may contain 'node_id' to specify the node. Returns: diff --git a/cognee/modules/search/graph/search_summary.py b/cognee/modules/search/graph/search_summary.py index ed8119411..cd732018b 100644 --- a/cognee/modules/search/graph/search_summary.py +++ b/cognee/modules/search/graph/search_summary.py @@ -19,7 +19,6 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str, Parameters: - graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session. - query (str): The query string to filter nodes by, e.g., 'SUMMARY'. - - infrastructure_config (Dict): Configuration that includes the graph engine type. - other_param (str, optional): An additional parameter, unused in this implementation but could be for future enhancements. Returns: diff --git a/cognee/modules/search/llm/get_relevant_summary.py b/cognee/modules/search/llm/get_relevant_summary.py index a4af4753b..4de0a971f 100644 --- a/cognee/modules/search/llm/get_relevant_summary.py +++ b/cognee/modules/search/llm/get_relevant_summary.py @@ -1,16 +1,17 @@ import logging from typing import List, Dict -from cognee.infrastructure import infrastructure_config -from.extraction.categorize_relevant_summary import categorize_relevant_summary +from cognee.modules.cognify.config import get_cognify_config +from .extraction.categorize_relevant_summary import categorize_relevant_summary logger = logging.getLogger(__name__) async def get_cognitive_layers(content: str, categories: List[Dict]): try: + cognify_config = get_cognify_config() return (await categorize_relevant_summary( content, categories[0], - infrastructure_config.get_config()["categorize_summary_model"] + cognify_config.summarization_model, )).cognitive_layers except Exception as error: logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True) diff --git a/cognee/modules/settings/get_settings.py b/cognee/modules/settings/get_settings.py index f1bd81360..fc043a4e2 100644 --- a/cognee/modules/settings/get_settings.py +++ b/cognee/modules/settings/get_settings.py @@ -1,10 +1,7 @@ -from cognee.config import Config from cognee.infrastructure.databases.vector import get_vectordb_config from cognee.infrastructure.llm import get_llm_config def get_settings(): - config = Config() - config.load() llm_config = get_llm_config() vector_dbs = [{ diff --git a/cognee/modules/settings/save_llm_config.py b/cognee/modules/settings/save_llm_config.py index 0c1f6ccc1..c066c60e3 100644 --- a/cognee/modules/settings/save_llm_config.py +++ b/cognee/modules/settings/save_llm_config.py @@ -1,8 +1,5 @@ -import json -import logging from pydantic import BaseModel from cognee.infrastructure.llm import get_llm_config -from cognee.infrastructure import infrastructure_config class LLMConfig(BaseModel): apiKey: str @@ -17,6 +14,3 @@ async def save_llm_config(new_llm_config: LLMConfig): if "*****" not in new_llm_config.apiKey and len(new_llm_config.apiKey.strip()) > 0: llm_config.llm_api_key = new_llm_config.apiKey - - logging.error(json.dumps(llm_config.to_dict())) - infrastructure_config.llm_engine = None diff --git a/cognee/modules/topology/extraction/extract_topology.py b/cognee/modules/topology/extraction/extract_topology.py index 7fd6fc8ac..6f68cb031 100644 --- a/cognee/modules/topology/extraction/extract_topology.py +++ b/cognee/modules/topology/extraction/extract_topology.py @@ -1,14 +1,14 @@ -from typing import Type, List +from typing import Type from pydantic import BaseModel from cognee.infrastructure.llm.prompts import read_query_prompt from cognee.infrastructure.llm.get_llm_client import get_llm_client -async def extract_categories(content: str, response_model: Type[BaseModel]): +async def extract_topology(content: str, response_model: Type[BaseModel]): llm_client = get_llm_client() system_prompt = read_query_prompt("extract_topology.txt") llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model) - return llm_output.model_dump() \ No newline at end of file + return llm_output.model_dump() diff --git a/cognee/modules/topology/infer_data_topology.py b/cognee/modules/topology/infer_data_topology.py index 6bcb9f86c..5b4683a40 100644 --- a/cognee/modules/topology/infer_data_topology.py +++ b/cognee/modules/topology/infer_data_topology.py @@ -1,18 +1,15 @@ import logging -from typing import List, Dict -from cognee.infrastructure import infrastructure_config -from cognee.modules.topology.extraction.extract_topology import extract_categories -from cognee.modules.cognify.config import get_cognify_config - -cognify_config = get_cognify_config() +from cognee.modules.topology.extraction.extract_topology import extract_topology +from cognee.infrastructure.databases.graph.config import get_graph_config logger = logging.getLogger(__name__) async def infer_data_topology(content: str, graph_topology=None): if graph_topology is None: - graph_topology = cognify_config.graph_topology + graph_config = get_graph_config() + graph_topology = graph_config.graph_model try: - return (await extract_categories( + return (await extract_topology( content, graph_topology )) diff --git a/cognee/modules/topology/topology.py b/cognee/modules/topology/topology.py index 9395b6116..bdd1ccad7 100644 --- a/cognee/modules/topology/topology.py +++ b/cognee/modules/topology/topology.py @@ -2,13 +2,10 @@ import os import glob from pydantic import BaseModel, Field from typing import Dict, List, Optional, Union, Type, Any, Tuple -from datetime import datetime from cognee import config from cognee.base_config import get_base_config -from cognee.infrastructure import infrastructure_config from cognee.modules.cognify.config import get_cognify_config -from cognee.modules.topology.infer_data_topology import infer_data_topology cognify_config = get_cognify_config() base_config = get_base_config() diff --git a/cognee/root_dir.py b/cognee/root_dir.py index d74ba5cf7..0f96b8065 100644 --- a/cognee/root_dir.py +++ b/cognee/root_dir.py @@ -1,22 +1,7 @@ -from os import path -import logging from pathlib import Path -logging.basicConfig(level=logging.DEBUG) -# ROOT_DIR = path.dirname(path.abspath(__file__)) -# -# logging.debug("ROOT_DIR: ", ROOT_DIR) -# -# def get_absolute_path(path_from_root: str) -> str: -# logging.debug("abspath: ", path.abspath(path.join(ROOT_DIR, path_from_root))) -# -# -# return path.abspath(path.join(ROOT_DIR, path_from_root)) -ROOT_DIR = Path(__file__).resolve().parent -logging.basicConfig(level=logging.DEBUG) -logging.debug("ROOT_DIR: %s", ROOT_DIR) +ROOT_DIR = Path(__file__).resolve().parent def get_absolute_path(path_from_root: str) -> str: absolute_path = ROOT_DIR / path_from_root - logging.debug("abspath: %s", absolute_path.resolve()) - return str(absolute_path.resolve()) \ No newline at end of file + return str(absolute_path.resolve()) diff --git a/cognee/utils.py b/cognee/shared/utils.py similarity index 99% rename from cognee/utils.py rename to cognee/shared/utils.py index 4f9d728bd..4385577ea 100644 --- a/cognee/utils.py +++ b/cognee/shared/utils.py @@ -11,12 +11,7 @@ import matplotlib.pyplot as plt import tiktoken import nltk from posthog import Posthog - -from cognee.config import Config - -config = Config() -config.load() - +from cognee.base_config import get_base_config def send_telemetry(event_name: str): if os.getenv("TELEMETRY_DISABLED"): @@ -153,6 +148,7 @@ def generate_color_palette(unique_layers): async def register_graphistry(): + config = get_base_config() graphistry.register(api = 3, username = config.graphistry_username, password = config.graphistry_password) diff --git a/evals/simple_rag_vs_cognee_eval.py b/evals/simple_rag_vs_cognee_eval.py index 29d27eb22..95f69bc1e 100644 --- a/evals/simple_rag_vs_cognee_eval.py +++ b/evals/simple_rag_vs_cognee_eval.py @@ -2,7 +2,7 @@ from deepeval.dataset import EvaluationDataset from pydantic import BaseModel -from typing import List, Type, Dict +from typing import List, Type from deepeval.test_case import LLMTestCase import dotenv dotenv.load_dotenv() @@ -41,7 +41,6 @@ print(dataset) import logging -from cognee.infrastructure import infrastructure_config logger = logging.getLogger(__name__) @@ -81,10 +80,18 @@ async def run_cognify_base_rag(): pass -async def cognify_search_base_rag(content:str, context:str): - infrastructure_config.set_config({"database_directory_path": "/Users/vasa/Projects/cognee/cognee/.cognee_system/databases/cognee.lancedb"}) +import os +from cognee.base_config import get_base_config +from cognee.infrastructure.databases.vector import get_vectordb_config - vector_client = infrastructure_config.get_config("vector_engine") +async def cognify_search_base_rag(content:str, context:str): + base_config = get_base_config() + + cognee_directory_path = os.path.abspath(".cognee_system") + base_config.system_root_directory = cognee_directory_path + + vector_config = get_vectordb_config() + vector_client = vector_config.vector_engine return_ = await vector_client.search(collection_name="basic_rag", query_text=content, limit=10) diff --git a/notebooks/cognee - Get Started.ipynb b/notebooks/cognee - Get Started.ipynb index f31cc718b..d88626ea3 100644 --- a/notebooks/cognee - Get Started.ipynb +++ b/notebooks/cognee - Get Started.ipynb @@ -283,7 +283,7 @@ "outputs": [], "source": [ "import cognee\n", - "from cognee.utils import render_graph\n", + "from cognee.shared.utils import render_graph\n", "\n", "graph = await cognee.cognify()\n", "\n", diff --git a/notebooks/full_run.ipynb b/notebooks/full_run.ipynb index f58a32051..08d217ea5 100644 --- a/notebooks/full_run.ipynb +++ b/notebooks/full_run.ipynb @@ -112,12 +112,11 @@ "outputs": [], "source": [ "import graphistry\n", - "from cognee.config import Config\n", - "from cognee.utils import render_graph\n", + "from cognee.shared.utils import render_graph\n", "from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n", + "from cognee.base_config import get_base_config\n", "\n", - "config = Config()\n", - "config.load()\n", + "config = get_base_config()\n", "\n", "graphistry.register(\n", " api = 3,\n",