From cddf836fce382d15fa97bde3957591411842cf76 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Sun, 26 May 2024 11:46:49 +0200 Subject: [PATCH] Updates to the configs --- cognee/api/v1/add/add.py | 15 +++++--- cognee/api/v1/add/add_standalone.py | 4 ++- cognee/api/v1/cognify/cognify.py | 35 +++++++++++++------ cognee/api/v1/config/config.py | 13 +++---- cognee/api/v1/datasets/datasets.py | 11 +++--- cognee/api/v1/prune/prune.py | 12 +++++-- cognee/api/v1/search/search.py | 4 ++- cognee/api/v1/topology/add_topology.py | 1 + cognee/modules/cognify/config.py | 6 +++- .../graph/add_cognitive_layer_graphs.py | 10 +++--- .../modules/cognify/graph/add_label_nodes.py | 9 +++-- .../cognify/graph/add_node_connections.py | 11 +++--- cognee/modules/cognify/graph/create.py | 11 +++--- .../llm/resolve_cross_graph_references.py | 7 +++- cognee/modules/data/get_cognitive_layers.py | 5 ++- cognee/modules/data/get_content_categories.py | 5 +-- cognee/modules/data/get_content_summary.py | 4 ++- cognee/modules/data/get_layer_graphs.py | 6 ++-- .../modules/ingestion/add_data_to_dataset.py | 4 ++- .../modules/search/graph/search_adjacent.py | 6 ++-- .../modules/search/graph/search_categories.py | 9 +++-- cognee/modules/search/graph/search_cypher.py | 7 ++-- .../modules/search/graph/search_neighbour.py | 9 +++-- cognee/modules/search/graph/search_summary.py | 9 +++-- .../search/vector/search_similarity.py | 9 +++-- .../modules/tasks/create_task_status_table.py | 5 ++- cognee/modules/tasks/update_task_status.py | 5 ++- .../modules/topology/infer_data_topology.py | 4 ++- cognee/modules/topology/topology.py | 9 +++-- 29 files changed, 167 insertions(+), 78 deletions(-) diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index ce460a34b..e7180e437 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -8,6 +8,11 @@ from cognee.infrastructure import infrastructure_config from cognee.infrastructure.files.storage import LocalStorage from cognee.modules.discovery import discover_directory_datasets from cognee.utils import send_telemetry +from cognee.base_config import get_base_config +base_config = get_base_config() +from cognee.infrastructure.databases.relational.config import get_relationaldb_config + +relational_config = get_relationaldb_config() async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None): @@ -46,10 +51,10 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam return [] async def add_files(file_paths: List[str], dataset_name: str): - infra_config = infrastructure_config.get_config() - data_directory_path = infra_config["data_root_directory"] + # infra_config = infrastructure_config.get_config() + data_directory_path = base_config.data_root_directory - LocalStorage.ensure_directory_exists(infra_config["database_directory_path"]) + LocalStorage.ensure_directory_exists(relational_config.database_directory_path) processed_file_paths = [] @@ -68,7 +73,7 @@ async def add_files(file_paths: List[str], dataset_name: str): else: processed_file_paths.append(file_path) - db = duckdb.connect(infra_config["database_path"]) + db = duckdb.connect(relational_config.database_path) destination = dlt.destinations.duckdb( credentials = db, @@ -120,7 +125,7 @@ async def add_data_directory(data_path: str, dataset_name: str = None): return await asyncio.gather(*results) def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None): - data_directory_path = infrastructure_config.get_config()["data_root_directory"] + data_directory_path = base_config.data_root_directory classified_data = ingestion.classify(data, filename) # data_id = ingestion.identify(classified_data) diff --git a/cognee/api/v1/add/add_standalone.py b/cognee/api/v1/add/add_standalone.py index ef7be455b..392508bd2 100644 --- a/cognee/api/v1/add/add_standalone.py +++ b/cognee/api/v1/add/add_standalone.py @@ -3,7 +3,9 @@ from uuid import UUID, uuid4 from typing import Union, BinaryIO, List import cognee.modules.ingestion as ingestion from cognee.infrastructure import infrastructure_config +from cognee.infrastructure.databases.relational.config import get_relationaldb_config +relational_config = get_relationaldb_config() class DatasetException(Exception): message: str @@ -16,7 +18,7 @@ async def add_standalone( dataset_id: UUID = uuid4(), dataset_name: str = None ): - db_engine = infrastructure_config.get_config()["database_engine"] + db_engine = relational_config.database_engine if db_engine.is_db_done is not True: await db_engine.ensure_tables() diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 066849aef..a44b1a6cd 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -34,6 +34,21 @@ graph_config = get_graph_config() config = Config() config.load() +from cognee.base_config import get_base_config +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.data.chunking.config import get_chunk_config +from cognee.modules.cognify.config import get_cognify_config +from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config +from cognee.infrastructure.databases.relational.config import get_relationaldb_config + +relational_config = get_relationaldb_config() + + +cognify_config = get_cognify_config() +chunk_config = get_chunk_config() +base_config = get_base_config() +embedding_config = get_embedding_config() + # aclient = instructor.patch(OpenAI()) USER_ID = "default_user" @@ -47,11 +62,11 @@ async def cognify(datasets: Union[str, List[str]] = None): stopwords.ensure_loaded() create_task_status_table() - graph_db_type = infrastructure_config.get_config()["graph_engine"] + graph_db_type = graph_config.graph_engine graph_client = await get_graph_client(graph_db_type) - db_engine = infrastructure_config.get_config()["database_engine"] + db_engine = relational_config.database_engine if datasets is None or len(datasets) == 0: datasets = db_engine.get_datasets() @@ -77,8 +92,8 @@ async def cognify(datasets: Union[str, List[str]] = None): dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset))) - chunk_engine = infrastructure_config.get_config()["chunk_engine"] - chunk_strategy = infrastructure_config.get_config()["chunk_strategy"] + chunk_engine = chunk_config.chunk_engine + chunk_strategy = chunk_config.chunk_strategy async def process_batch(files_batch): data_chunks = {} @@ -129,7 +144,7 @@ async def cognify(datasets: Union[str, List[str]] = None): for (dataset_name, files) in dataset_files: for file_metadata in files: - graph_topology = infrastructure_config.get_config()["graph_model"] + graph_topology = graph_config.graph_model if graph_topology == SourceCodeGraph: parent_node_id = f"{file_metadata['name']}.{file_metadata['extension']}" @@ -164,7 +179,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi graph_client = await get_graph_client(graph_config.graph_engine) - graph_topology = infrastructure_config.get_config()["graph_model"] + graph_topology = cognify_config.graph_model if graph_topology == SourceCodeGraph: classified_categories = [{"data_type": "text", "category_name": "Code and functions"}] elif graph_topology == KnowledgeGraph: @@ -186,7 +201,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi print(f"Chunk ({chunk_id}) summarized.") cognitive_layers = await get_cognitive_layers(input_text, classified_categories) - cognitive_layers = cognitive_layers[:config.cognitive_layers_limit] + cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit] try: cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2] @@ -197,8 +212,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi pass - if infrastructure_config.get_config()["connect_documents"] is True: - db_engine = infrastructure_config.get_config()["database_engine"] + if cognify_config.connect_documents is True: + db_engine = relational_config.database_engine relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id) list_of_nodes = [] @@ -220,7 +235,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi await connect_nodes_in_graph( graph_client, relationships, - score_threshold = infrastructure_config.get_config()["intra_layer_score_treshold"] + score_threshold = cognify_config.intra_layer_score_treshold ) send_telemetry("cognee.cognify") diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index bf8adff35..e32e755cd 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -6,6 +6,7 @@ from cognee.infrastructure.databases.graph.config import get_graph_config from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.modules.cognify.config import get_cognify_config + cognify_config = get_cognify_config() chunk_config = get_chunk_config() graph_config = get_graph_config() @@ -24,9 +25,6 @@ class config(): def monitoring_tool(monitoring_tool: object): base_config.monitoring_tool = monitoring_tool - - - @staticmethod def set_classification_model(classification_model: object): cognify_config.classification_model = classification_model @@ -57,9 +55,7 @@ class config(): @staticmethod def llm_provider(llm_provider: str): - infrastructure_config.set_config({ - "llm_provider": llm_provider - }) + graph_config.llm_provider = llm_provider @staticmethod def intra_layer_score_treshold(intra_layer_score_treshold: str): @@ -77,6 +73,5 @@ class config(): @staticmethod def set_graph_topology(graph_topology: object): - infrastructure_config.set_config({ - "graph_topology": graph_topology - }) + get_cognify_config.graph_topology =graph_topology + diff --git a/cognee/api/v1/datasets/datasets.py b/cognee/api/v1/datasets/datasets.py index 9b0402585..54a0aeb8c 100644 --- a/cognee/api/v1/datasets/datasets.py +++ b/cognee/api/v1/datasets/datasets.py @@ -1,11 +1,14 @@ from duckdb import CatalogException from cognee.modules.discovery import discover_directory_datasets from cognee.infrastructure import infrastructure_config +from cognee.infrastructure.databases.relational.config import get_relationaldb_config + +relational_config = get_relationaldb_config() class datasets(): @staticmethod def list_datasets(): - db = infrastructure_config.get_config("database_engine") + db = relational_config.db_engine return db.get_datasets() @staticmethod @@ -14,7 +17,7 @@ class datasets(): @staticmethod def list_data(dataset_name: str): - db = infrastructure_config.get_config("database_engine") + db = relational_config.db_engine try: return db.get_files_metadata(dataset_name) except CatalogException: @@ -22,7 +25,7 @@ class datasets(): @staticmethod def get_status(dataset_ids: list[str]) -> dict: - db = infrastructure_config.get_config("database_engine") + db = relational_config.db_engine try: return db.get_data("cognee_task_status", { "data_id": dataset_ids @@ -32,7 +35,7 @@ class datasets(): @staticmethod def delete_dataset(dataset_id: str): - db = infrastructure_config.get_config("database_engine") + db = relational_config.db_engine try: return db.delete_table(dataset_id) except CatalogException: diff --git a/cognee/api/v1/prune/prune.py b/cognee/api/v1/prune/prune.py index 6423f5bd2..de52497a3 100644 --- a/cognee/api/v1/prune/prune.py +++ b/cognee/api/v1/prune/prune.py @@ -1,11 +1,17 @@ +from cognee.base_config import get_base_config from cognee.infrastructure.files.storage import LocalStorage from cognee.infrastructure import infrastructure_config from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client +base_config =get_base_config() +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +graph_config = get_graph_config() +vector_config = get_vectordb_config() class prune(): @staticmethod async def prune_data(): - data_root_directory = infrastructure_config.get_config()["data_root_directory"] + data_root_directory = base_config.data_root_directory LocalStorage.remove_all(data_root_directory) @staticmethod @@ -13,11 +19,11 @@ class prune(): infra_config = infrastructure_config.get_config() if graph: - graph_client = await get_graph_client(infra_config["graph_engine"]) + graph_client = await get_graph_client(graph_config.graph_engine) await graph_client.delete_graph() if vector: - vector_client = infra_config["vector_engine"] + vector_client = vector_config.vector_engine await vector_client.prune() diff --git a/cognee/api/v1/search/search.py b/cognee/api/v1/search/search.py index 4fd78e5a0..dd959b251 100644 --- a/cognee/api/v1/search/search.py +++ b/cognee/api/v1/search/search.py @@ -13,6 +13,8 @@ from cognee.modules.search.graph.search_summary import search_summary from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure import infrastructure_config from cognee.utils import send_telemetry +from cognee.infrastructure.databases.graph.config import get_graph_config +graph_config = get_graph_config() class SearchType(Enum): ADJACENT = 'ADJACENT' @@ -49,7 +51,7 @@ async def search(search_type: str, params: Dict[str, Any]) -> List: async def specific_search(query_params: List[SearchParameters]) -> List: - graph_client = await get_graph_client(infrastructure_config.get_config()["graph_engine"]) + graph_client = await get_graph_client(graph_config.graph_engine) graph = graph_client.graph search_functions: Dict[SearchType, Callable] = { diff --git a/cognee/api/v1/topology/add_topology.py b/cognee/api/v1/topology/add_topology.py index c6a043c8a..812a7de03 100644 --- a/cognee/api/v1/topology/add_topology.py +++ b/cognee/api/v1/topology/add_topology.py @@ -7,6 +7,7 @@ from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryMod import pandas as pd from pydantic import BaseModel + USER_ID = "default_user" async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any: diff --git a/cognee/modules/cognify/config.py b/cognee/modules/cognify/config.py index 879c11775..d0470cdd3 100644 --- a/cognee/modules/cognify/config.py +++ b/cognee/modules/cognify/config.py @@ -4,7 +4,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict from cognee.root_dir import get_absolute_path from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \ - DefaultCognitiveLayer + DefaultCognitiveLayer, DefaultGraphModel # Monitoring tool @@ -18,6 +18,8 @@ class CognifyConfig(BaseSettings): cognitive_layer_model: object = DefaultCognitiveLayer intra_layer_score_treshold: float = 0.98 connect_documents: bool = False + graph_topology: object = DefaultGraphModel + cognitive_layers_limit: int = 2 @@ -31,6 +33,8 @@ class CognifyConfig(BaseSettings): "cognitive_layer_model": self.cognitive_layer_model, "intra_layer_score_treshold": self.intra_layer_score_treshold, "connect_documents": self.connect_documents, + "graph_topology": self.graph_topology, + "cognitive_layers_limit": self.cognitive_layers_limit } @lru_cache diff --git a/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py b/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py index f1801a549..8522eae4f 100644 --- a/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py +++ b/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py @@ -2,10 +2,12 @@ from datetime import datetime from uuid import uuid4 from typing import List, Tuple, TypedDict from pydantic import BaseModel -from cognee.infrastructure import infrastructure_config from cognee.infrastructure.databases.vector import DataPoint from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader - +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +graph_config = get_graph_config() +vectordb_config = get_vectordb_config() class GraphLike(TypedDict): nodes: List edges: List @@ -17,8 +19,8 @@ async def add_cognitive_layer_graphs( chunk_id: str, layer_graphs: List[Tuple[str, GraphLike]], ): - vector_client = infrastructure_config.get_config("vector_engine") - graph_model = infrastructure_config.get_config("graph_model") + vector_client = vectordb_config.vector_engine + graph_model = graph_config.graph_model for (layer_id, layer_graph) in layer_graphs: graph_nodes = [] diff --git a/cognee/modules/cognify/graph/add_label_nodes.py b/cognee/modules/cognify/graph/add_label_nodes.py index 574b19f6c..7993f8ddd 100644 --- a/cognee/modules/cognify/graph/add_label_nodes.py +++ b/cognee/modules/cognify/graph/add_label_nodes.py @@ -2,11 +2,14 @@ from uuid import uuid4 from typing import List from datetime import datetime from pydantic import BaseModel -from cognee.infrastructure import infrastructure_config -from cognee.infrastructure.databases.vector import DataPoint +from cognee.infrastructure.databases.vector import DataPoint +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +graph_config = get_graph_config() +vectordb_config = get_vectordb_config() async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None: - vector_client = infrastructure_config.get_config("vector_engine") + vector_client = vectordb_config.vector_engine keyword_nodes = [] diff --git a/cognee/modules/cognify/graph/add_node_connections.py b/cognee/modules/cognify/graph/add_node_connections.py index cbd043fe3..b5debd8b1 100644 --- a/cognee/modules/cognify/graph/add_node_connections.py +++ b/cognee/modules/cognify/graph/add_node_connections.py @@ -1,9 +1,12 @@ import uuid -from cognee.infrastructure import infrastructure_config +# from cognee.infrastructure import infrastructure_config from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.shared.data_models import GraphDBType - +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +graph_config = get_graph_config() +vectordb_config = get_vectordb_config() async def group_nodes_by_layer(node_descriptions): @@ -41,7 +44,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9): if relationship['score'] > score_threshold: # For NetworkX - if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: + if graph_config.graph_engine == GraphDBType.NETWORKX: searched_node_id_found = await get_node_by_unique_id(graph.graph, relationship['searched_node_id']) original_id_for_search_found = await get_node_by_unique_id(graph.graph, relationship['original_id_for_search']) if searched_node_id_found and original_id_for_search_found: @@ -54,7 +57,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9): ) # For Neo4j - elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: + elif graph_config.graph_engine == GraphDBType.NEO4J: # Neo4j specific logic to add an edge # This is just a placeholder, replace it with actual Neo4j logic print("query is ", f"""MATCH (a), (b) WHERE a.unique_id = '{relationship['searched_node_id']}' AND b.unique_id = '{relationship['original_id_for_search']}' CREATE (a)-[:CONNECTED {{weight:{relationship['score']}}}]->(b)""") diff --git a/cognee/modules/cognify/graph/create.py b/cognee/modules/cognify/graph/create.py index dc1c03181..354552d0d 100644 --- a/cognee/modules/cognify/graph/create.py +++ b/cognee/modules/cognify/graph/create.py @@ -2,10 +2,13 @@ from typing import Optional, Any from pydantic import BaseModel -from cognee.infrastructure import infrastructure_config +# from cognee.infrastructure import infrastructure_config from cognee.shared.data_models import GraphDBType - +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +graph_config = get_graph_config() +vectordb_config = get_vectordb_config() async def generate_node_id(instance: BaseModel) -> str: for field in ["id", "doc_id", "location_id", "type_id", "node_id"]: if hasattr(instance, field): @@ -30,7 +33,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di - Exception: If there is an error during the node or edge addition process, it logs the error and continues without interrupting the execution flow. Note: - - The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the global `infrastructure_config`. + - The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the graph configuration. """ # Initialize result to None to ensure a clear return path @@ -46,7 +49,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di print("added node", result) # Add an edge if a parent ID is provided and the graph engine is NETWORKX - if parent_id and "default_relationship" in node_data and infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: + if parent_id and "default_relationship" in node_data and graph_config.graph_engine == GraphDBType.NETWORKX: try: await client.add_edge(parent_id, node_id, relationship_name = node_data["default_relationship"]["type"], edge_properties = node_data) diff --git a/cognee/modules/cognify/llm/resolve_cross_graph_references.py b/cognee/modules/cognify/llm/resolve_cross_graph_references.py index b13b0c8c7..0b4101979 100644 --- a/cognee/modules/cognify/llm/resolve_cross_graph_references.py +++ b/cognee/modules/cognify/llm/resolve_cross_graph_references.py @@ -1,6 +1,11 @@ from typing import Dict, List from cognee.infrastructure import infrastructure_config +from cognee.infrastructure.databases.graph.config import get_graph_config +from cognee.infrastructure.databases.vector.config import get_vectordb_config +graph_config = get_graph_config() +vectordb_config = get_vectordb_config() + async def resolve_cross_graph_references(nodes_by_layer: Dict): results = [] @@ -16,7 +21,7 @@ async def resolve_cross_graph_references(nodes_by_layer: Dict): return results async def get_nodes_by_layer(layer_id: str, layer_nodes: List): - vector_engine = infrastructure_config.get_config()["vector_engine"] + vector_engine = vectordb_config.vector_engine score_points = await vector_engine.batch_search( layer_id, diff --git a/cognee/modules/data/get_cognitive_layers.py b/cognee/modules/data/get_cognitive_layers.py index d5021a919..53cf6c17b 100644 --- a/cognee/modules/data/get_cognitive_layers.py +++ b/cognee/modules/data/get_cognitive_layers.py @@ -2,6 +2,9 @@ import logging from typing import List, Dict from cognee.infrastructure import infrastructure_config from.extraction.extract_cognitive_layers import extract_cognitive_layers +from cognee.modules.cognify.config import get_cognify_config + +config = get_cognify_config() logger = logging.getLogger(__name__) @@ -10,7 +13,7 @@ async def get_cognitive_layers(content: str, categories: List[Dict]): return (await extract_cognitive_layers( content, categories[0], - infrastructure_config.get_config()["cognitive_layer_model"] + config.cognitive_layer_model )).cognitive_layers except Exception as error: logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True) diff --git a/cognee/modules/data/get_content_categories.py b/cognee/modules/data/get_content_categories.py index d387eedae..8c16ba13f 100644 --- a/cognee/modules/data/get_content_categories.py +++ b/cognee/modules/data/get_content_categories.py @@ -1,14 +1,15 @@ import logging -from cognee.infrastructure import infrastructure_config from .extraction.extract_categories import extract_categories +from cognee.modules.cognify.config import get_cognify_config +config = get_cognify_config() logger = logging.getLogger(__name__) async def get_content_categories(content: str): try: return await extract_categories( content, - infrastructure_config.get_config()["classification_model"] + config.classification_model ) except Exception as error: logger.error("Error extracting categories from content: %s", error, exc_info = True) diff --git a/cognee/modules/data/get_content_summary.py b/cognee/modules/data/get_content_summary.py index e1c7b2fe3..e0ad2f179 100644 --- a/cognee/modules/data/get_content_summary.py +++ b/cognee/modules/data/get_content_summary.py @@ -1,14 +1,16 @@ import logging from cognee.infrastructure import infrastructure_config from.extraction.extract_summary import extract_summary +from cognee.modules.cognify.config import get_cognify_config +config = get_cognify_config() logger = logging.getLogger(__name__) async def get_content_summary(content: str): try: return await extract_summary( content, - infrastructure_config.get_config()["summarization_model"] + config.summarization_model ) except Exception as error: logger.error("Error extracting summary from content: %s", error, exc_info = True) diff --git a/cognee/modules/data/get_layer_graphs.py b/cognee/modules/data/get_layer_graphs.py index 89387e2ff..f84404044 100644 --- a/cognee/modules/data/get_layer_graphs.py +++ b/cognee/modules/data/get_layer_graphs.py @@ -2,7 +2,9 @@ import logging import asyncio from cognee.infrastructure import infrastructure_config from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph - +from.extraction.extract_summary import extract_summary +from cognee.modules.cognify.config import get_cognify_config +config = get_cognify_config() logger = logging.getLogger(__name__) async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]): @@ -11,7 +13,7 @@ async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict] extract_knowledge_graph( content, cognitive_layer_data["name"], - infrastructure_config.get_config()["graph_model"] + config.graph_model ) for (_, cognitive_layer_data) in cognitive_layers ] diff --git a/cognee/modules/ingestion/add_data_to_dataset.py b/cognee/modules/ingestion/add_data_to_dataset.py index 9dbb5e511..c0b0856ff 100644 --- a/cognee/modules/ingestion/add_data_to_dataset.py +++ b/cognee/modules/ingestion/add_data_to_dataset.py @@ -3,11 +3,13 @@ from cognee.infrastructure import infrastructure_config from cognee.infrastructure.data import Dataset, Data from cognee.infrastructure.files import remove_file_from_storage from cognee.infrastructure.databases.relational import DatabaseEngine +from cognee.infrastructure.databases.relational.config import get_relationaldb_config +config = get_relationaldb_config() logger = logging.getLogger(__name__) async def add_data_to_dataset(dataset: Dataset, data: Data): - db_engine: DatabaseEngine = infrastructure_config.get_config()["database_engine"] + db_engine: DatabaseEngine = config.database_engine existing_dataset = (await db_engine.query_entity(dataset)).scalar() existing_data = (await db_engine.query_entity(data)).scalar() diff --git a/cognee/modules/search/graph/search_adjacent.py b/cognee/modules/search/graph/search_adjacent.py index 7b66b9df5..8a6bea0e5 100644 --- a/cognee/modules/search/graph/search_adjacent.py +++ b/cognee/modules/search/graph/search_adjacent.py @@ -4,6 +4,8 @@ from typing import Union, Dict import networkx as nx from cognee.shared.data_models import GraphDBType +from cognee.infrastructure.databases.graph.config import get_graph_config +graph_config = get_graph_config() async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: dict = None) -> Dict[str, str]: """ Find the neighbours of a given node in the graph and return their descriptions. @@ -22,7 +24,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: if node_id is None: return {} from cognee.infrastructure import infrastructure_config - if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: + if graph_config.graph_engine == GraphDBType.NETWORKX: if node_id not in graph: return {} @@ -30,7 +32,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: neighbor_descriptions = {neighbor: graph.nodes[neighbor].get('description') for neighbor in neighbors} return neighbor_descriptions - elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: + elif graph_config.graph_engine == GraphDBType.NEO4J: cypher_query = """ MATCH (node {id: $node_id})-[:CONNECTED_TO]->(neighbor) RETURN neighbor.id AS neighbor_id, neighbor.description AS description diff --git a/cognee/modules/search/graph/search_categories.py b/cognee/modules/search/graph/search_categories.py index 7ff07554e..6ce87f2b8 100644 --- a/cognee/modules/search/graph/search_categories.py +++ b/cognee/modules/search/graph/search_categories.py @@ -10,6 +10,11 @@ from cognee.modules.search.llm.extraction.categorize_relevant_category import ca from cognee.shared.data_models import GraphDBType, DefaultContentPrediction import networkx as nx +from cognee.infrastructure.databases.graph.config import get_graph_config +graph_config = get_graph_config() +from cognee.infrastructure.databases.vector.config import get_vectordb_config +vector_config = get_vectordb_config() + def strip_exact_regex(s, substring): # Escaping substring to be used in a regex pattern pattern = re.escape(substring) @@ -37,7 +42,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label: """ # Determine which client is in use based on the configuration from cognee.infrastructure import infrastructure_config - if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: + if graph_config.graph_engine == GraphDBType.NETWORKX: categories_and_ids = [ {'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']} @@ -53,7 +58,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label: descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes} return descriptions - elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: + elif graph_config.graph_engine == GraphDBType.NEO4J: # Logic for Neo4j cypher_query = """ MATCH (n) diff --git a/cognee/modules/search/graph/search_cypher.py b/cognee/modules/search/graph/search_cypher.py index 1022004c7..705640dcf 100644 --- a/cognee/modules/search/graph/search_cypher.py +++ b/cognee/modules/search/graph/search_cypher.py @@ -7,7 +7,10 @@ from pydantic import BaseModel from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category from cognee.shared.data_models import GraphDBType - +from cognee.infrastructure.databases.graph.config import get_graph_config +graph_config = get_graph_config() +from cognee.infrastructure.databases.vector.config import get_vectordb_config +vector_config = get_vectordb_config() async def search_cypher(query:str, graph: Union[nx.Graph, any]): """ @@ -16,7 +19,7 @@ async def search_cypher(query:str, graph: Union[nx.Graph, any]): from cognee.infrastructure import infrastructure_config - if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: + if graph_config.graph_engine == GraphDBType.NEO4J: result = await graph.run(query) return result diff --git a/cognee/modules/search/graph/search_neighbour.py b/cognee/modules/search/graph/search_neighbour.py index 5deba22d9..f6269f848 100644 --- a/cognee/modules/search/graph/search_neighbour.py +++ b/cognee/modules/search/graph/search_neighbour.py @@ -6,7 +6,10 @@ from neo4j import AsyncSession from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client import networkx as nx from cognee.shared.data_models import GraphDBType - +from cognee.infrastructure.databases.graph.config import get_graph_config +graph_config = get_graph_config() +from cognee.infrastructure.databases.vector.config import get_vectordb_config +vector_config = get_vectordb_config() async def search_neighbour(graph: Union[nx.Graph, any], query: str, other_param: dict = None): """ @@ -28,7 +31,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str, if node_id is None: return [] - if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: + if graph_config.graph_engine == GraphDBType.NETWORKX: relevant_context = [] target_layer_uuid = graph.nodes[node_id].get('layer_uuid') @@ -39,7 +42,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str, return relevant_context - elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: + elif graph_config.graph_engine == GraphDBType.NEO4J: if isinstance(graph, AsyncSession): cypher_query = """ MATCH (target {id: $node_id}) diff --git a/cognee/modules/search/graph/search_summary.py b/cognee/modules/search/graph/search_summary.py index 4618fc04e..d9906f60e 100644 --- a/cognee/modules/search/graph/search_summary.py +++ b/cognee/modules/search/graph/search_summary.py @@ -7,7 +7,10 @@ from cognee.infrastructure import infrastructure_config from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary from cognee.shared.data_models import GraphDBType, ResponseSummaryModel - +from cognee.infrastructure.databases.graph.config import get_graph_config +graph_config = get_graph_config() +from cognee.infrastructure.databases.vector.config import get_vectordb_config +vector_config = get_vectordb_config() import re def strip_exact_regex(s, substring): @@ -30,7 +33,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str, - Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes. """ - if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: + if graph_config.graph_engine == GraphDBType.NETWORKX: print("graph", graph) summaries_and_ids = [ {'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']} @@ -48,7 +51,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str, return descriptions - elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: + elif graph_config.graph_engine == GraphDBType.NEO4J: cypher_query = f""" MATCH (n) WHERE n.id CONTAINS $query AND EXISTS(n.summary) diff --git a/cognee/modules/search/vector/search_similarity.py b/cognee/modules/search/vector/search_similarity.py index 309d98575..f3d10d0ee 100644 --- a/cognee/modules/search/vector/search_similarity.py +++ b/cognee/modules/search/vector/search_similarity.py @@ -1,10 +1,13 @@ from dsp.utils import deduplicate from cognee.infrastructure import infrastructure_config from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client - +from cognee.infrastructure.databases.graph.config import get_graph_config +graph_config = get_graph_config() +from cognee.infrastructure.databases.vector.config import get_vectordb_config +vector_config = get_vectordb_config() async def search_similarity(query: str, graph): - graph_db_type = infrastructure_config.get_config()["graph_engine"] + graph_db_type = graph_config.graph_engine graph_client = await get_graph_client(graph_db_type) @@ -17,7 +20,7 @@ async def search_similarity(query: str, graph): graph_nodes = [] for layer_id in unique_layer_uuids: - vector_engine = infrastructure_config.get_config()["vector_engine"] + vector_engine = vector_config.vector_engine results = await vector_engine.search(layer_id, query_text = query, limit = 10) print("results", results) diff --git a/cognee/modules/tasks/create_task_status_table.py b/cognee/modules/tasks/create_task_status_table.py index 26beceb1c..4f23b19b1 100644 --- a/cognee/modules/tasks/create_task_status_table.py +++ b/cognee/modules/tasks/create_task_status_table.py @@ -1,7 +1,10 @@ from cognee.infrastructure.InfrastructureConfig import infrastructure_config +from cognee.infrastructure.databases.relational.config import get_relationaldb_config + +config = get_relationaldb_config() def create_task_status_table(): - db_engine = infrastructure_config.get_config("database_engine") + db_engine = config.db_engine db_engine.create_table("cognee_task_status", [ dict(name = "data_id", type = "STRING"), diff --git a/cognee/modules/tasks/update_task_status.py b/cognee/modules/tasks/update_task_status.py index a59bc4310..653f72d26 100644 --- a/cognee/modules/tasks/update_task_status.py +++ b/cognee/modules/tasks/update_task_status.py @@ -1,5 +1,8 @@ from cognee.infrastructure.InfrastructureConfig import infrastructure_config +from cognee.infrastructure.databases.relational.config import get_relationaldb_config + +config = get_relationaldb_config() def update_task_status(data_id: str, status: str): - db_engine = infrastructure_config.get_config("database_engine") + db_engine = config.db_engine db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)]) diff --git a/cognee/modules/topology/infer_data_topology.py b/cognee/modules/topology/infer_data_topology.py index 935cc702e..6bcb9f86c 100644 --- a/cognee/modules/topology/infer_data_topology.py +++ b/cognee/modules/topology/infer_data_topology.py @@ -2,13 +2,15 @@ import logging from typing import List, Dict from cognee.infrastructure import infrastructure_config from cognee.modules.topology.extraction.extract_topology import extract_categories +from cognee.modules.cognify.config import get_cognify_config +cognify_config = get_cognify_config() logger = logging.getLogger(__name__) async def infer_data_topology(content: str, graph_topology=None): if graph_topology is None: - graph_topology = infrastructure_config.get_config()["graph_topology"] + graph_topology = cognify_config.graph_topology try: return (await extract_categories( content, diff --git a/cognee/modules/topology/topology.py b/cognee/modules/topology/topology.py index e27336c44..9395b6116 100644 --- a/cognee/modules/topology/topology.py +++ b/cognee/modules/topology/topology.py @@ -1,13 +1,16 @@ import os import glob from pydantic import BaseModel, Field -from typing import Dict, List, Optional, Union, Type, Any +from typing import Dict, List, Optional, Union, Type, Any, Tuple from datetime import datetime from cognee import config +from cognee.base_config import get_base_config from cognee.infrastructure import infrastructure_config +from cognee.modules.cognify.config import get_cognify_config from cognee.modules.topology.infer_data_topology import infer_data_topology - +cognify_config = get_cognify_config() +base_config = get_base_config() class Relationship(BaseModel): type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.") @@ -84,7 +87,7 @@ class TopologyEngine: async def infer_from_directory_structure(self, node_id: str, repository: str, model: Type[BaseModel]) -> GitHubRepositoryModel: """ Infer the topology of a repository from its file structure """ - path = infrastructure_config.get_config()["data_root_directory"] + path = base_config.data_root_directory path = path + "/" + str(repository) print(path)