Updates to the configs

This commit is contained in:
Vasilije 2024-05-26 11:46:49 +02:00
parent 18e47094fd
commit cddf836fce
29 changed files with 167 additions and 78 deletions

View file

@ -8,6 +8,11 @@ from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.files.storage import LocalStorage from cognee.infrastructure.files.storage import LocalStorage
from cognee.modules.discovery import discover_directory_datasets from cognee.modules.discovery import discover_directory_datasets
from cognee.utils import send_telemetry from cognee.utils import send_telemetry
from cognee.base_config import get_base_config
base_config = get_base_config()
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None): async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
@ -46,10 +51,10 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam
return [] return []
async def add_files(file_paths: List[str], dataset_name: str): async def add_files(file_paths: List[str], dataset_name: str):
infra_config = infrastructure_config.get_config() # infra_config = infrastructure_config.get_config()
data_directory_path = infra_config["data_root_directory"] data_directory_path = base_config.data_root_directory
LocalStorage.ensure_directory_exists(infra_config["database_directory_path"]) LocalStorage.ensure_directory_exists(relational_config.database_directory_path)
processed_file_paths = [] processed_file_paths = []
@ -68,7 +73,7 @@ async def add_files(file_paths: List[str], dataset_name: str):
else: else:
processed_file_paths.append(file_path) processed_file_paths.append(file_path)
db = duckdb.connect(infra_config["database_path"]) db = duckdb.connect(relational_config.database_path)
destination = dlt.destinations.duckdb( destination = dlt.destinations.duckdb(
credentials = db, credentials = db,
@ -120,7 +125,7 @@ async def add_data_directory(data_path: str, dataset_name: str = None):
return await asyncio.gather(*results) return await asyncio.gather(*results)
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None): def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
data_directory_path = infrastructure_config.get_config()["data_root_directory"] data_directory_path = base_config.data_root_directory
classified_data = ingestion.classify(data, filename) classified_data = ingestion.classify(data, filename)
# data_id = ingestion.identify(classified_data) # data_id = ingestion.identify(classified_data)

View file

@ -3,7 +3,9 @@ from uuid import UUID, uuid4
from typing import Union, BinaryIO, List from typing import Union, BinaryIO, List
import cognee.modules.ingestion as ingestion import cognee.modules.ingestion as ingestion
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
class DatasetException(Exception): class DatasetException(Exception):
message: str message: str
@ -16,7 +18,7 @@ async def add_standalone(
dataset_id: UUID = uuid4(), dataset_id: UUID = uuid4(),
dataset_name: str = None dataset_name: str = None
): ):
db_engine = infrastructure_config.get_config()["database_engine"] db_engine = relational_config.database_engine
if db_engine.is_db_done is not True: if db_engine.is_db_done is not True:
await db_engine.ensure_tables() await db_engine.ensure_tables()

View file

@ -34,6 +34,21 @@ graph_config = get_graph_config()
config = Config() config = Config()
config.load() config.load()
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
cognify_config = get_cognify_config()
chunk_config = get_chunk_config()
base_config = get_base_config()
embedding_config = get_embedding_config()
# aclient = instructor.patch(OpenAI()) # aclient = instructor.patch(OpenAI())
USER_ID = "default_user" USER_ID = "default_user"
@ -47,11 +62,11 @@ async def cognify(datasets: Union[str, List[str]] = None):
stopwords.ensure_loaded() stopwords.ensure_loaded()
create_task_status_table() create_task_status_table()
graph_db_type = infrastructure_config.get_config()["graph_engine"] graph_db_type = graph_config.graph_engine
graph_client = await get_graph_client(graph_db_type) graph_client = await get_graph_client(graph_db_type)
db_engine = infrastructure_config.get_config()["database_engine"] db_engine = relational_config.database_engine
if datasets is None or len(datasets) == 0: if datasets is None or len(datasets) == 0:
datasets = db_engine.get_datasets() datasets = db_engine.get_datasets()
@ -77,8 +92,8 @@ async def cognify(datasets: Union[str, List[str]] = None):
dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset))) dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset)))
chunk_engine = infrastructure_config.get_config()["chunk_engine"] chunk_engine = chunk_config.chunk_engine
chunk_strategy = infrastructure_config.get_config()["chunk_strategy"] chunk_strategy = chunk_config.chunk_strategy
async def process_batch(files_batch): async def process_batch(files_batch):
data_chunks = {} data_chunks = {}
@ -129,7 +144,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
for (dataset_name, files) in dataset_files: for (dataset_name, files) in dataset_files:
for file_metadata in files: for file_metadata in files:
graph_topology = infrastructure_config.get_config()["graph_model"] graph_topology = graph_config.graph_model
if graph_topology == SourceCodeGraph: if graph_topology == SourceCodeGraph:
parent_node_id = f"{file_metadata['name']}.{file_metadata['extension']}" parent_node_id = f"{file_metadata['name']}.{file_metadata['extension']}"
@ -164,7 +179,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
graph_client = await get_graph_client(graph_config.graph_engine) graph_client = await get_graph_client(graph_config.graph_engine)
graph_topology = infrastructure_config.get_config()["graph_model"] graph_topology = cognify_config.graph_model
if graph_topology == SourceCodeGraph: if graph_topology == SourceCodeGraph:
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}] classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
elif graph_topology == KnowledgeGraph: elif graph_topology == KnowledgeGraph:
@ -186,7 +201,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
print(f"Chunk ({chunk_id}) summarized.") print(f"Chunk ({chunk_id}) summarized.")
cognitive_layers = await get_cognitive_layers(input_text, classified_categories) cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
cognitive_layers = cognitive_layers[:config.cognitive_layers_limit] cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
try: try:
cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2] cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2]
@ -197,8 +212,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
pass pass
if infrastructure_config.get_config()["connect_documents"] is True: if cognify_config.connect_documents is True:
db_engine = infrastructure_config.get_config()["database_engine"] db_engine = relational_config.database_engine
relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id) relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id)
list_of_nodes = [] list_of_nodes = []
@ -220,7 +235,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
await connect_nodes_in_graph( await connect_nodes_in_graph(
graph_client, graph_client,
relationships, relationships,
score_threshold = infrastructure_config.get_config()["intra_layer_score_treshold"] score_threshold = cognify_config.intra_layer_score_treshold
) )
send_telemetry("cognee.cognify") send_telemetry("cognee.cognify")

View file

@ -6,6 +6,7 @@ from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.data.chunking.config import get_chunk_config from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.modules.cognify.config import get_cognify_config from cognee.modules.cognify.config import get_cognify_config
cognify_config = get_cognify_config() cognify_config = get_cognify_config()
chunk_config = get_chunk_config() chunk_config = get_chunk_config()
graph_config = get_graph_config() graph_config = get_graph_config()
@ -24,9 +25,6 @@ class config():
def monitoring_tool(monitoring_tool: object): def monitoring_tool(monitoring_tool: object):
base_config.monitoring_tool = monitoring_tool base_config.monitoring_tool = monitoring_tool
@staticmethod @staticmethod
def set_classification_model(classification_model: object): def set_classification_model(classification_model: object):
cognify_config.classification_model = classification_model cognify_config.classification_model = classification_model
@ -57,9 +55,7 @@ class config():
@staticmethod @staticmethod
def llm_provider(llm_provider: str): def llm_provider(llm_provider: str):
infrastructure_config.set_config({ graph_config.llm_provider = llm_provider
"llm_provider": llm_provider
})
@staticmethod @staticmethod
def intra_layer_score_treshold(intra_layer_score_treshold: str): def intra_layer_score_treshold(intra_layer_score_treshold: str):
@ -77,6 +73,5 @@ class config():
@staticmethod @staticmethod
def set_graph_topology(graph_topology: object): def set_graph_topology(graph_topology: object):
infrastructure_config.set_config({ get_cognify_config.graph_topology =graph_topology
"graph_topology": graph_topology
})

View file

@ -1,11 +1,14 @@
from duckdb import CatalogException from duckdb import CatalogException
from cognee.modules.discovery import discover_directory_datasets from cognee.modules.discovery import discover_directory_datasets
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
class datasets(): class datasets():
@staticmethod @staticmethod
def list_datasets(): def list_datasets():
db = infrastructure_config.get_config("database_engine") db = relational_config.db_engine
return db.get_datasets() return db.get_datasets()
@staticmethod @staticmethod
@ -14,7 +17,7 @@ class datasets():
@staticmethod @staticmethod
def list_data(dataset_name: str): def list_data(dataset_name: str):
db = infrastructure_config.get_config("database_engine") db = relational_config.db_engine
try: try:
return db.get_files_metadata(dataset_name) return db.get_files_metadata(dataset_name)
except CatalogException: except CatalogException:
@ -22,7 +25,7 @@ class datasets():
@staticmethod @staticmethod
def get_status(dataset_ids: list[str]) -> dict: def get_status(dataset_ids: list[str]) -> dict:
db = infrastructure_config.get_config("database_engine") db = relational_config.db_engine
try: try:
return db.get_data("cognee_task_status", { return db.get_data("cognee_task_status", {
"data_id": dataset_ids "data_id": dataset_ids
@ -32,7 +35,7 @@ class datasets():
@staticmethod @staticmethod
def delete_dataset(dataset_id: str): def delete_dataset(dataset_id: str):
db = infrastructure_config.get_config("database_engine") db = relational_config.db_engine
try: try:
return db.delete_table(dataset_id) return db.delete_table(dataset_id)
except CatalogException: except CatalogException:

View file

@ -1,11 +1,17 @@
from cognee.base_config import get_base_config
from cognee.infrastructure.files.storage import LocalStorage from cognee.infrastructure.files.storage import LocalStorage
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
base_config =get_base_config()
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vector_config = get_vectordb_config()
class prune(): class prune():
@staticmethod @staticmethod
async def prune_data(): async def prune_data():
data_root_directory = infrastructure_config.get_config()["data_root_directory"] data_root_directory = base_config.data_root_directory
LocalStorage.remove_all(data_root_directory) LocalStorage.remove_all(data_root_directory)
@staticmethod @staticmethod
@ -13,11 +19,11 @@ class prune():
infra_config = infrastructure_config.get_config() infra_config = infrastructure_config.get_config()
if graph: if graph:
graph_client = await get_graph_client(infra_config["graph_engine"]) graph_client = await get_graph_client(graph_config.graph_engine)
await graph_client.delete_graph() await graph_client.delete_graph()
if vector: if vector:
vector_client = infra_config["vector_engine"] vector_client = vector_config.vector_engine
await vector_client.prune() await vector_client.prune()

View file

@ -13,6 +13,8 @@ from cognee.modules.search.graph.search_summary import search_summary
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.utils import send_telemetry from cognee.utils import send_telemetry
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
class SearchType(Enum): class SearchType(Enum):
ADJACENT = 'ADJACENT' ADJACENT = 'ADJACENT'
@ -49,7 +51,7 @@ async def search(search_type: str, params: Dict[str, Any]) -> List:
async def specific_search(query_params: List[SearchParameters]) -> List: async def specific_search(query_params: List[SearchParameters]) -> List:
graph_client = await get_graph_client(infrastructure_config.get_config()["graph_engine"]) graph_client = await get_graph_client(graph_config.graph_engine)
graph = graph_client.graph graph = graph_client.graph
search_functions: Dict[SearchType, Callable] = { search_functions: Dict[SearchType, Callable] = {

View file

@ -7,6 +7,7 @@ from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryMod
import pandas as pd import pandas as pd
from pydantic import BaseModel from pydantic import BaseModel
USER_ID = "default_user" USER_ID = "default_user"
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any: async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:

View file

@ -4,7 +4,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.root_dir import get_absolute_path from cognee.root_dir import get_absolute_path
from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \ from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \
DefaultCognitiveLayer DefaultCognitiveLayer, DefaultGraphModel
# Monitoring tool # Monitoring tool
@ -18,6 +18,8 @@ class CognifyConfig(BaseSettings):
cognitive_layer_model: object = DefaultCognitiveLayer cognitive_layer_model: object = DefaultCognitiveLayer
intra_layer_score_treshold: float = 0.98 intra_layer_score_treshold: float = 0.98
connect_documents: bool = False connect_documents: bool = False
graph_topology: object = DefaultGraphModel
cognitive_layers_limit: int = 2
@ -31,6 +33,8 @@ class CognifyConfig(BaseSettings):
"cognitive_layer_model": self.cognitive_layer_model, "cognitive_layer_model": self.cognitive_layer_model,
"intra_layer_score_treshold": self.intra_layer_score_treshold, "intra_layer_score_treshold": self.intra_layer_score_treshold,
"connect_documents": self.connect_documents, "connect_documents": self.connect_documents,
"graph_topology": self.graph_topology,
"cognitive_layers_limit": self.cognitive_layers_limit
} }
@lru_cache @lru_cache

View file

@ -2,10 +2,12 @@ from datetime import datetime
from uuid import uuid4 from uuid import uuid4
from typing import List, Tuple, TypedDict from typing import List, Tuple, TypedDict
from pydantic import BaseModel from pydantic import BaseModel
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.vector import DataPoint from cognee.infrastructure.databases.vector import DataPoint
from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
class GraphLike(TypedDict): class GraphLike(TypedDict):
nodes: List nodes: List
edges: List edges: List
@ -17,8 +19,8 @@ async def add_cognitive_layer_graphs(
chunk_id: str, chunk_id: str,
layer_graphs: List[Tuple[str, GraphLike]], layer_graphs: List[Tuple[str, GraphLike]],
): ):
vector_client = infrastructure_config.get_config("vector_engine") vector_client = vectordb_config.vector_engine
graph_model = infrastructure_config.get_config("graph_model") graph_model = graph_config.graph_model
for (layer_id, layer_graph) in layer_graphs: for (layer_id, layer_graph) in layer_graphs:
graph_nodes = [] graph_nodes = []

View file

@ -2,11 +2,14 @@ from uuid import uuid4
from typing import List from typing import List
from datetime import datetime from datetime import datetime
from pydantic import BaseModel from pydantic import BaseModel
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.vector import DataPoint
from cognee.infrastructure.databases.vector import DataPoint
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None: async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None:
vector_client = infrastructure_config.get_config("vector_engine") vector_client = vectordb_config.vector_engine
keyword_nodes = [] keyword_nodes = []

View file

@ -1,9 +1,12 @@
import uuid import uuid
from cognee.infrastructure import infrastructure_config # from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.shared.data_models import GraphDBType from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def group_nodes_by_layer(node_descriptions): async def group_nodes_by_layer(node_descriptions):
@ -41,7 +44,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
if relationship['score'] > score_threshold: if relationship['score'] > score_threshold:
# For NetworkX # For NetworkX
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: if graph_config.graph_engine == GraphDBType.NETWORKX:
searched_node_id_found = await get_node_by_unique_id(graph.graph, relationship['searched_node_id']) searched_node_id_found = await get_node_by_unique_id(graph.graph, relationship['searched_node_id'])
original_id_for_search_found = await get_node_by_unique_id(graph.graph, relationship['original_id_for_search']) original_id_for_search_found = await get_node_by_unique_id(graph.graph, relationship['original_id_for_search'])
if searched_node_id_found and original_id_for_search_found: if searched_node_id_found and original_id_for_search_found:
@ -54,7 +57,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
) )
# For Neo4j # For Neo4j
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: elif graph_config.graph_engine == GraphDBType.NEO4J:
# Neo4j specific logic to add an edge # Neo4j specific logic to add an edge
# This is just a placeholder, replace it with actual Neo4j logic # This is just a placeholder, replace it with actual Neo4j logic
print("query is ", f"""MATCH (a), (b) WHERE a.unique_id = '{relationship['searched_node_id']}' AND b.unique_id = '{relationship['original_id_for_search']}' CREATE (a)-[:CONNECTED {{weight:{relationship['score']}}}]->(b)""") print("query is ", f"""MATCH (a), (b) WHERE a.unique_id = '{relationship['searched_node_id']}' AND b.unique_id = '{relationship['original_id_for_search']}' CREATE (a)-[:CONNECTED {{weight:{relationship['score']}}}]->(b)""")

View file

@ -2,10 +2,13 @@
from typing import Optional, Any from typing import Optional, Any
from pydantic import BaseModel from pydantic import BaseModel
from cognee.infrastructure import infrastructure_config # from cognee.infrastructure import infrastructure_config
from cognee.shared.data_models import GraphDBType from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def generate_node_id(instance: BaseModel) -> str: async def generate_node_id(instance: BaseModel) -> str:
for field in ["id", "doc_id", "location_id", "type_id", "node_id"]: for field in ["id", "doc_id", "location_id", "type_id", "node_id"]:
if hasattr(instance, field): if hasattr(instance, field):
@ -30,7 +33,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
- Exception: If there is an error during the node or edge addition process, it logs the error and continues without interrupting the execution flow. - Exception: If there is an error during the node or edge addition process, it logs the error and continues without interrupting the execution flow.
Note: Note:
- The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the global `infrastructure_config`. - The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the graph configuration.
""" """
# Initialize result to None to ensure a clear return path # Initialize result to None to ensure a clear return path
@ -46,7 +49,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
print("added node", result) print("added node", result)
# Add an edge if a parent ID is provided and the graph engine is NETWORKX # Add an edge if a parent ID is provided and the graph engine is NETWORKX
if parent_id and "default_relationship" in node_data and infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: if parent_id and "default_relationship" in node_data and graph_config.graph_engine == GraphDBType.NETWORKX:
try: try:
await client.add_edge(parent_id, node_id, relationship_name = node_data["default_relationship"]["type"], edge_properties = node_data) await client.add_edge(parent_id, node_id, relationship_name = node_data["default_relationship"]["type"], edge_properties = node_data)

View file

@ -1,6 +1,11 @@
from typing import Dict, List from typing import Dict, List
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def resolve_cross_graph_references(nodes_by_layer: Dict): async def resolve_cross_graph_references(nodes_by_layer: Dict):
results = [] results = []
@ -16,7 +21,7 @@ async def resolve_cross_graph_references(nodes_by_layer: Dict):
return results return results
async def get_nodes_by_layer(layer_id: str, layer_nodes: List): async def get_nodes_by_layer(layer_id: str, layer_nodes: List):
vector_engine = infrastructure_config.get_config()["vector_engine"] vector_engine = vectordb_config.vector_engine
score_points = await vector_engine.batch_search( score_points = await vector_engine.batch_search(
layer_id, layer_id,

View file

@ -2,6 +2,9 @@ import logging
from typing import List, Dict from typing import List, Dict
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from.extraction.extract_cognitive_layers import extract_cognitive_layers from.extraction.extract_cognitive_layers import extract_cognitive_layers
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -10,7 +13,7 @@ async def get_cognitive_layers(content: str, categories: List[Dict]):
return (await extract_cognitive_layers( return (await extract_cognitive_layers(
content, content,
categories[0], categories[0],
infrastructure_config.get_config()["cognitive_layer_model"] config.cognitive_layer_model
)).cognitive_layers )).cognitive_layers
except Exception as error: except Exception as error:
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True) logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)

View file

@ -1,14 +1,15 @@
import logging import logging
from cognee.infrastructure import infrastructure_config
from .extraction.extract_categories import extract_categories from .extraction.extract_categories import extract_categories
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def get_content_categories(content: str): async def get_content_categories(content: str):
try: try:
return await extract_categories( return await extract_categories(
content, content,
infrastructure_config.get_config()["classification_model"] config.classification_model
) )
except Exception as error: except Exception as error:
logger.error("Error extracting categories from content: %s", error, exc_info = True) logger.error("Error extracting categories from content: %s", error, exc_info = True)

View file

@ -1,14 +1,16 @@
import logging import logging
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from.extraction.extract_summary import extract_summary from.extraction.extract_summary import extract_summary
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def get_content_summary(content: str): async def get_content_summary(content: str):
try: try:
return await extract_summary( return await extract_summary(
content, content,
infrastructure_config.get_config()["summarization_model"] config.summarization_model
) )
except Exception as error: except Exception as error:
logger.error("Error extracting summary from content: %s", error, exc_info = True) logger.error("Error extracting summary from content: %s", error, exc_info = True)

View file

@ -2,7 +2,9 @@ import logging
import asyncio import asyncio
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
from.extraction.extract_summary import extract_summary
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]): async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
@ -11,7 +13,7 @@ async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]
extract_knowledge_graph( extract_knowledge_graph(
content, content,
cognitive_layer_data["name"], cognitive_layer_data["name"],
infrastructure_config.get_config()["graph_model"] config.graph_model
) for (_, cognitive_layer_data) in cognitive_layers ) for (_, cognitive_layer_data) in cognitive_layers
] ]

View file

@ -3,11 +3,13 @@ from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.data import Dataset, Data from cognee.infrastructure.data import Dataset, Data
from cognee.infrastructure.files import remove_file_from_storage from cognee.infrastructure.files import remove_file_from_storage
from cognee.infrastructure.databases.relational import DatabaseEngine from cognee.infrastructure.databases.relational import DatabaseEngine
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
config = get_relationaldb_config()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def add_data_to_dataset(dataset: Dataset, data: Data): async def add_data_to_dataset(dataset: Dataset, data: Data):
db_engine: DatabaseEngine = infrastructure_config.get_config()["database_engine"] db_engine: DatabaseEngine = config.database_engine
existing_dataset = (await db_engine.query_entity(dataset)).scalar() existing_dataset = (await db_engine.query_entity(dataset)).scalar()
existing_data = (await db_engine.query_entity(data)).scalar() existing_data = (await db_engine.query_entity(data)).scalar()

View file

@ -4,6 +4,8 @@
from typing import Union, Dict from typing import Union, Dict
import networkx as nx import networkx as nx
from cognee.shared.data_models import GraphDBType from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: dict = None) -> Dict[str, str]: async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: dict = None) -> Dict[str, str]:
""" """
Find the neighbours of a given node in the graph and return their descriptions. Find the neighbours of a given node in the graph and return their descriptions.
@ -22,7 +24,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
if node_id is None: if node_id is None:
return {} return {}
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: if graph_config.graph_engine == GraphDBType.NETWORKX:
if node_id not in graph: if node_id not in graph:
return {} return {}
@ -30,7 +32,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
neighbor_descriptions = {neighbor: graph.nodes[neighbor].get('description') for neighbor in neighbors} neighbor_descriptions = {neighbor: graph.nodes[neighbor].get('description') for neighbor in neighbors}
return neighbor_descriptions return neighbor_descriptions
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: elif graph_config.graph_engine == GraphDBType.NEO4J:
cypher_query = """ cypher_query = """
MATCH (node {id: $node_id})-[:CONNECTED_TO]->(neighbor) MATCH (node {id: $node_id})-[:CONNECTED_TO]->(neighbor)
RETURN neighbor.id AS neighbor_id, neighbor.description AS description RETURN neighbor.id AS neighbor_id, neighbor.description AS description

View file

@ -10,6 +10,11 @@ from cognee.modules.search.llm.extraction.categorize_relevant_category import ca
from cognee.shared.data_models import GraphDBType, DefaultContentPrediction from cognee.shared.data_models import GraphDBType, DefaultContentPrediction
import networkx as nx import networkx as nx
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
def strip_exact_regex(s, substring): def strip_exact_regex(s, substring):
# Escaping substring to be used in a regex pattern # Escaping substring to be used in a regex pattern
pattern = re.escape(substring) pattern = re.escape(substring)
@ -37,7 +42,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
""" """
# Determine which client is in use based on the configuration # Determine which client is in use based on the configuration
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: if graph_config.graph_engine == GraphDBType.NETWORKX:
categories_and_ids = [ categories_and_ids = [
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']} {'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
@ -53,7 +58,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes} descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes}
return descriptions return descriptions
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: elif graph_config.graph_engine == GraphDBType.NEO4J:
# Logic for Neo4j # Logic for Neo4j
cypher_query = """ cypher_query = """
MATCH (n) MATCH (n)

View file

@ -7,7 +7,10 @@ from pydantic import BaseModel
from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category
from cognee.shared.data_models import GraphDBType from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
async def search_cypher(query:str, graph: Union[nx.Graph, any]): async def search_cypher(query:str, graph: Union[nx.Graph, any]):
""" """
@ -16,7 +19,7 @@ async def search_cypher(query:str, graph: Union[nx.Graph, any]):
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: if graph_config.graph_engine == GraphDBType.NEO4J:
result = await graph.run(query) result = await graph.run(query)
return result return result

View file

@ -6,7 +6,10 @@ from neo4j import AsyncSession
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
import networkx as nx import networkx as nx
from cognee.shared.data_models import GraphDBType from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
async def search_neighbour(graph: Union[nx.Graph, any], query: str, async def search_neighbour(graph: Union[nx.Graph, any], query: str,
other_param: dict = None): other_param: dict = None):
""" """
@ -28,7 +31,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
if node_id is None: if node_id is None:
return [] return []
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: if graph_config.graph_engine == GraphDBType.NETWORKX:
relevant_context = [] relevant_context = []
target_layer_uuid = graph.nodes[node_id].get('layer_uuid') target_layer_uuid = graph.nodes[node_id].get('layer_uuid')
@ -39,7 +42,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
return relevant_context return relevant_context
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: elif graph_config.graph_engine == GraphDBType.NEO4J:
if isinstance(graph, AsyncSession): if isinstance(graph, AsyncSession):
cypher_query = """ cypher_query = """
MATCH (target {id: $node_id}) MATCH (target {id: $node_id})

View file

@ -7,7 +7,10 @@ from cognee.infrastructure import infrastructure_config
from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary
from cognee.shared.data_models import GraphDBType, ResponseSummaryModel from cognee.shared.data_models import GraphDBType, ResponseSummaryModel
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
import re import re
def strip_exact_regex(s, substring): def strip_exact_regex(s, substring):
@ -30,7 +33,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
- Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes. - Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes.
""" """
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX: if graph_config.graph_engine == GraphDBType.NETWORKX:
print("graph", graph) print("graph", graph)
summaries_and_ids = [ summaries_and_ids = [
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']} {'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
@ -48,7 +51,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
return descriptions return descriptions
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J: elif graph_config.graph_engine == GraphDBType.NEO4J:
cypher_query = f""" cypher_query = f"""
MATCH (n) MATCH (n)
WHERE n.id CONTAINS $query AND EXISTS(n.summary) WHERE n.id CONTAINS $query AND EXISTS(n.summary)

View file

@ -1,10 +1,13 @@
from dsp.utils import deduplicate from dsp.utils import deduplicate
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
async def search_similarity(query: str, graph): async def search_similarity(query: str, graph):
graph_db_type = infrastructure_config.get_config()["graph_engine"] graph_db_type = graph_config.graph_engine
graph_client = await get_graph_client(graph_db_type) graph_client = await get_graph_client(graph_db_type)
@ -17,7 +20,7 @@ async def search_similarity(query: str, graph):
graph_nodes = [] graph_nodes = []
for layer_id in unique_layer_uuids: for layer_id in unique_layer_uuids:
vector_engine = infrastructure_config.get_config()["vector_engine"] vector_engine = vector_config.vector_engine
results = await vector_engine.search(layer_id, query_text = query, limit = 10) results = await vector_engine.search(layer_id, query_text = query, limit = 10)
print("results", results) print("results", results)

View file

@ -1,7 +1,10 @@
from cognee.infrastructure.InfrastructureConfig import infrastructure_config from cognee.infrastructure.InfrastructureConfig import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
config = get_relationaldb_config()
def create_task_status_table(): def create_task_status_table():
db_engine = infrastructure_config.get_config("database_engine") db_engine = config.db_engine
db_engine.create_table("cognee_task_status", [ db_engine.create_table("cognee_task_status", [
dict(name = "data_id", type = "STRING"), dict(name = "data_id", type = "STRING"),

View file

@ -1,5 +1,8 @@
from cognee.infrastructure.InfrastructureConfig import infrastructure_config from cognee.infrastructure.InfrastructureConfig import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
config = get_relationaldb_config()
def update_task_status(data_id: str, status: str): def update_task_status(data_id: str, status: str):
db_engine = infrastructure_config.get_config("database_engine") db_engine = config.db_engine
db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)]) db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)])

View file

@ -2,13 +2,15 @@ import logging
from typing import List, Dict from typing import List, Dict
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.modules.topology.extraction.extract_topology import extract_categories from cognee.modules.topology.extraction.extract_topology import extract_categories
from cognee.modules.cognify.config import get_cognify_config
cognify_config = get_cognify_config()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def infer_data_topology(content: str, graph_topology=None): async def infer_data_topology(content: str, graph_topology=None):
if graph_topology is None: if graph_topology is None:
graph_topology = infrastructure_config.get_config()["graph_topology"] graph_topology = cognify_config.graph_topology
try: try:
return (await extract_categories( return (await extract_categories(
content, content,

View file

@ -1,13 +1,16 @@
import os import os
import glob import glob
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from typing import Dict, List, Optional, Union, Type, Any from typing import Dict, List, Optional, Union, Type, Any, Tuple
from datetime import datetime from datetime import datetime
from cognee import config from cognee import config
from cognee.base_config import get_base_config
from cognee.infrastructure import infrastructure_config from cognee.infrastructure import infrastructure_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.modules.topology.infer_data_topology import infer_data_topology from cognee.modules.topology.infer_data_topology import infer_data_topology
cognify_config = get_cognify_config()
base_config = get_base_config()
class Relationship(BaseModel): class Relationship(BaseModel):
type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.") type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.")
@ -84,7 +87,7 @@ class TopologyEngine:
async def infer_from_directory_structure(self, node_id: str, repository: str, model: Type[BaseModel]) -> GitHubRepositoryModel: async def infer_from_directory_structure(self, node_id: str, repository: str, model: Type[BaseModel]) -> GitHubRepositoryModel:
""" Infer the topology of a repository from its file structure """ """ Infer the topology of a repository from its file structure """
path = infrastructure_config.get_config()["data_root_directory"] path = base_config.data_root_directory
path = path + "/" + str(repository) path = path + "/" + str(repository)
print(path) print(path)