Updates to the configs

This commit is contained in:
Vasilije 2024-05-26 11:46:49 +02:00
parent 18e47094fd
commit cddf836fce
29 changed files with 167 additions and 78 deletions

View file

@ -8,6 +8,11 @@ from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.files.storage import LocalStorage
from cognee.modules.discovery import discover_directory_datasets
from cognee.utils import send_telemetry
from cognee.base_config import get_base_config
base_config = get_base_config()
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
@ -46,10 +51,10 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam
return []
async def add_files(file_paths: List[str], dataset_name: str):
infra_config = infrastructure_config.get_config()
data_directory_path = infra_config["data_root_directory"]
# infra_config = infrastructure_config.get_config()
data_directory_path = base_config.data_root_directory
LocalStorage.ensure_directory_exists(infra_config["database_directory_path"])
LocalStorage.ensure_directory_exists(relational_config.database_directory_path)
processed_file_paths = []
@ -68,7 +73,7 @@ async def add_files(file_paths: List[str], dataset_name: str):
else:
processed_file_paths.append(file_path)
db = duckdb.connect(infra_config["database_path"])
db = duckdb.connect(relational_config.database_path)
destination = dlt.destinations.duckdb(
credentials = db,
@ -120,7 +125,7 @@ async def add_data_directory(data_path: str, dataset_name: str = None):
return await asyncio.gather(*results)
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
data_directory_path = infrastructure_config.get_config()["data_root_directory"]
data_directory_path = base_config.data_root_directory
classified_data = ingestion.classify(data, filename)
# data_id = ingestion.identify(classified_data)

View file

@ -3,7 +3,9 @@ from uuid import UUID, uuid4
from typing import Union, BinaryIO, List
import cognee.modules.ingestion as ingestion
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
class DatasetException(Exception):
message: str
@ -16,7 +18,7 @@ async def add_standalone(
dataset_id: UUID = uuid4(),
dataset_name: str = None
):
db_engine = infrastructure_config.get_config()["database_engine"]
db_engine = relational_config.database_engine
if db_engine.is_db_done is not True:
await db_engine.ensure_tables()

View file

@ -34,6 +34,21 @@ graph_config = get_graph_config()
config = Config()
config.load()
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
cognify_config = get_cognify_config()
chunk_config = get_chunk_config()
base_config = get_base_config()
embedding_config = get_embedding_config()
# aclient = instructor.patch(OpenAI())
USER_ID = "default_user"
@ -47,11 +62,11 @@ async def cognify(datasets: Union[str, List[str]] = None):
stopwords.ensure_loaded()
create_task_status_table()
graph_db_type = infrastructure_config.get_config()["graph_engine"]
graph_db_type = graph_config.graph_engine
graph_client = await get_graph_client(graph_db_type)
db_engine = infrastructure_config.get_config()["database_engine"]
db_engine = relational_config.database_engine
if datasets is None or len(datasets) == 0:
datasets = db_engine.get_datasets()
@ -77,8 +92,8 @@ async def cognify(datasets: Union[str, List[str]] = None):
dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset)))
chunk_engine = infrastructure_config.get_config()["chunk_engine"]
chunk_strategy = infrastructure_config.get_config()["chunk_strategy"]
chunk_engine = chunk_config.chunk_engine
chunk_strategy = chunk_config.chunk_strategy
async def process_batch(files_batch):
data_chunks = {}
@ -129,7 +144,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
for (dataset_name, files) in dataset_files:
for file_metadata in files:
graph_topology = infrastructure_config.get_config()["graph_model"]
graph_topology = graph_config.graph_model
if graph_topology == SourceCodeGraph:
parent_node_id = f"{file_metadata['name']}.{file_metadata['extension']}"
@ -164,7 +179,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
graph_client = await get_graph_client(graph_config.graph_engine)
graph_topology = infrastructure_config.get_config()["graph_model"]
graph_topology = cognify_config.graph_model
if graph_topology == SourceCodeGraph:
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
elif graph_topology == KnowledgeGraph:
@ -186,7 +201,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
print(f"Chunk ({chunk_id}) summarized.")
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
cognitive_layers = cognitive_layers[:config.cognitive_layers_limit]
cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
try:
cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2]
@ -197,8 +212,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
pass
if infrastructure_config.get_config()["connect_documents"] is True:
db_engine = infrastructure_config.get_config()["database_engine"]
if cognify_config.connect_documents is True:
db_engine = relational_config.database_engine
relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id)
list_of_nodes = []
@ -220,7 +235,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
await connect_nodes_in_graph(
graph_client,
relationships,
score_threshold = infrastructure_config.get_config()["intra_layer_score_treshold"]
score_threshold = cognify_config.intra_layer_score_treshold
)
send_telemetry("cognee.cognify")

View file

@ -6,6 +6,7 @@ from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.modules.cognify.config import get_cognify_config
cognify_config = get_cognify_config()
chunk_config = get_chunk_config()
graph_config = get_graph_config()
@ -24,9 +25,6 @@ class config():
def monitoring_tool(monitoring_tool: object):
base_config.monitoring_tool = monitoring_tool
@staticmethod
def set_classification_model(classification_model: object):
cognify_config.classification_model = classification_model
@ -57,9 +55,7 @@ class config():
@staticmethod
def llm_provider(llm_provider: str):
infrastructure_config.set_config({
"llm_provider": llm_provider
})
graph_config.llm_provider = llm_provider
@staticmethod
def intra_layer_score_treshold(intra_layer_score_treshold: str):
@ -77,6 +73,5 @@ class config():
@staticmethod
def set_graph_topology(graph_topology: object):
infrastructure_config.set_config({
"graph_topology": graph_topology
})
get_cognify_config.graph_topology =graph_topology

View file

@ -1,11 +1,14 @@
from duckdb import CatalogException
from cognee.modules.discovery import discover_directory_datasets
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
relational_config = get_relationaldb_config()
class datasets():
@staticmethod
def list_datasets():
db = infrastructure_config.get_config("database_engine")
db = relational_config.db_engine
return db.get_datasets()
@staticmethod
@ -14,7 +17,7 @@ class datasets():
@staticmethod
def list_data(dataset_name: str):
db = infrastructure_config.get_config("database_engine")
db = relational_config.db_engine
try:
return db.get_files_metadata(dataset_name)
except CatalogException:
@ -22,7 +25,7 @@ class datasets():
@staticmethod
def get_status(dataset_ids: list[str]) -> dict:
db = infrastructure_config.get_config("database_engine")
db = relational_config.db_engine
try:
return db.get_data("cognee_task_status", {
"data_id": dataset_ids
@ -32,7 +35,7 @@ class datasets():
@staticmethod
def delete_dataset(dataset_id: str):
db = infrastructure_config.get_config("database_engine")
db = relational_config.db_engine
try:
return db.delete_table(dataset_id)
except CatalogException:

View file

@ -1,11 +1,17 @@
from cognee.base_config import get_base_config
from cognee.infrastructure.files.storage import LocalStorage
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
base_config =get_base_config()
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vector_config = get_vectordb_config()
class prune():
@staticmethod
async def prune_data():
data_root_directory = infrastructure_config.get_config()["data_root_directory"]
data_root_directory = base_config.data_root_directory
LocalStorage.remove_all(data_root_directory)
@staticmethod
@ -13,11 +19,11 @@ class prune():
infra_config = infrastructure_config.get_config()
if graph:
graph_client = await get_graph_client(infra_config["graph_engine"])
graph_client = await get_graph_client(graph_config.graph_engine)
await graph_client.delete_graph()
if vector:
vector_client = infra_config["vector_engine"]
vector_client = vector_config.vector_engine
await vector_client.prune()

View file

@ -13,6 +13,8 @@ from cognee.modules.search.graph.search_summary import search_summary
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.infrastructure import infrastructure_config
from cognee.utils import send_telemetry
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
class SearchType(Enum):
ADJACENT = 'ADJACENT'
@ -49,7 +51,7 @@ async def search(search_type: str, params: Dict[str, Any]) -> List:
async def specific_search(query_params: List[SearchParameters]) -> List:
graph_client = await get_graph_client(infrastructure_config.get_config()["graph_engine"])
graph_client = await get_graph_client(graph_config.graph_engine)
graph = graph_client.graph
search_functions: Dict[SearchType, Callable] = {

View file

@ -7,6 +7,7 @@ from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryMod
import pandas as pd
from pydantic import BaseModel
USER_ID = "default_user"
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:

View file

@ -4,7 +4,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.root_dir import get_absolute_path
from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \
DefaultCognitiveLayer
DefaultCognitiveLayer, DefaultGraphModel
# Monitoring tool
@ -18,6 +18,8 @@ class CognifyConfig(BaseSettings):
cognitive_layer_model: object = DefaultCognitiveLayer
intra_layer_score_treshold: float = 0.98
connect_documents: bool = False
graph_topology: object = DefaultGraphModel
cognitive_layers_limit: int = 2
@ -31,6 +33,8 @@ class CognifyConfig(BaseSettings):
"cognitive_layer_model": self.cognitive_layer_model,
"intra_layer_score_treshold": self.intra_layer_score_treshold,
"connect_documents": self.connect_documents,
"graph_topology": self.graph_topology,
"cognitive_layers_limit": self.cognitive_layers_limit
}
@lru_cache

View file

@ -2,10 +2,12 @@ from datetime import datetime
from uuid import uuid4
from typing import List, Tuple, TypedDict
from pydantic import BaseModel
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.vector import DataPoint
from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
class GraphLike(TypedDict):
nodes: List
edges: List
@ -17,8 +19,8 @@ async def add_cognitive_layer_graphs(
chunk_id: str,
layer_graphs: List[Tuple[str, GraphLike]],
):
vector_client = infrastructure_config.get_config("vector_engine")
graph_model = infrastructure_config.get_config("graph_model")
vector_client = vectordb_config.vector_engine
graph_model = graph_config.graph_model
for (layer_id, layer_graph) in layer_graphs:
graph_nodes = []

View file

@ -2,11 +2,14 @@ from uuid import uuid4
from typing import List
from datetime import datetime
from pydantic import BaseModel
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.vector import DataPoint
from cognee.infrastructure.databases.vector import DataPoint
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None:
vector_client = infrastructure_config.get_config("vector_engine")
vector_client = vectordb_config.vector_engine
keyword_nodes = []

View file

@ -1,9 +1,12 @@
import uuid
from cognee.infrastructure import infrastructure_config
# from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def group_nodes_by_layer(node_descriptions):
@ -41,7 +44,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
if relationship['score'] > score_threshold:
# For NetworkX
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
if graph_config.graph_engine == GraphDBType.NETWORKX:
searched_node_id_found = await get_node_by_unique_id(graph.graph, relationship['searched_node_id'])
original_id_for_search_found = await get_node_by_unique_id(graph.graph, relationship['original_id_for_search'])
if searched_node_id_found and original_id_for_search_found:
@ -54,7 +57,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
)
# For Neo4j
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
elif graph_config.graph_engine == GraphDBType.NEO4J:
# Neo4j specific logic to add an edge
# This is just a placeholder, replace it with actual Neo4j logic
print("query is ", f"""MATCH (a), (b) WHERE a.unique_id = '{relationship['searched_node_id']}' AND b.unique_id = '{relationship['original_id_for_search']}' CREATE (a)-[:CONNECTED {{weight:{relationship['score']}}}]->(b)""")

View file

@ -2,10 +2,13 @@
from typing import Optional, Any
from pydantic import BaseModel
from cognee.infrastructure import infrastructure_config
# from cognee.infrastructure import infrastructure_config
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def generate_node_id(instance: BaseModel) -> str:
for field in ["id", "doc_id", "location_id", "type_id", "node_id"]:
if hasattr(instance, field):
@ -30,7 +33,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
- Exception: If there is an error during the node or edge addition process, it logs the error and continues without interrupting the execution flow.
Note:
- The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the global `infrastructure_config`.
- The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the graph configuration.
"""
# Initialize result to None to ensure a clear return path
@ -46,7 +49,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
print("added node", result)
# Add an edge if a parent ID is provided and the graph engine is NETWORKX
if parent_id and "default_relationship" in node_data and infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
if parent_id and "default_relationship" in node_data and graph_config.graph_engine == GraphDBType.NETWORKX:
try:
await client.add_edge(parent_id, node_id, relationship_name = node_data["default_relationship"]["type"], edge_properties = node_data)

View file

@ -1,6 +1,11 @@
from typing import Dict, List
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
graph_config = get_graph_config()
vectordb_config = get_vectordb_config()
async def resolve_cross_graph_references(nodes_by_layer: Dict):
results = []
@ -16,7 +21,7 @@ async def resolve_cross_graph_references(nodes_by_layer: Dict):
return results
async def get_nodes_by_layer(layer_id: str, layer_nodes: List):
vector_engine = infrastructure_config.get_config()["vector_engine"]
vector_engine = vectordb_config.vector_engine
score_points = await vector_engine.batch_search(
layer_id,

View file

@ -2,6 +2,9 @@ import logging
from typing import List, Dict
from cognee.infrastructure import infrastructure_config
from.extraction.extract_cognitive_layers import extract_cognitive_layers
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__)
@ -10,7 +13,7 @@ async def get_cognitive_layers(content: str, categories: List[Dict]):
return (await extract_cognitive_layers(
content,
categories[0],
infrastructure_config.get_config()["cognitive_layer_model"]
config.cognitive_layer_model
)).cognitive_layers
except Exception as error:
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)

View file

@ -1,14 +1,15 @@
import logging
from cognee.infrastructure import infrastructure_config
from .extraction.extract_categories import extract_categories
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__)
async def get_content_categories(content: str):
try:
return await extract_categories(
content,
infrastructure_config.get_config()["classification_model"]
config.classification_model
)
except Exception as error:
logger.error("Error extracting categories from content: %s", error, exc_info = True)

View file

@ -1,14 +1,16 @@
import logging
from cognee.infrastructure import infrastructure_config
from.extraction.extract_summary import extract_summary
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__)
async def get_content_summary(content: str):
try:
return await extract_summary(
content,
infrastructure_config.get_config()["summarization_model"]
config.summarization_model
)
except Exception as error:
logger.error("Error extracting summary from content: %s", error, exc_info = True)

View file

@ -2,7 +2,9 @@ import logging
import asyncio
from cognee.infrastructure import infrastructure_config
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
from.extraction.extract_summary import extract_summary
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__)
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
@ -11,7 +13,7 @@ async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]
extract_knowledge_graph(
content,
cognitive_layer_data["name"],
infrastructure_config.get_config()["graph_model"]
config.graph_model
) for (_, cognitive_layer_data) in cognitive_layers
]

View file

@ -3,11 +3,13 @@ from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.data import Dataset, Data
from cognee.infrastructure.files import remove_file_from_storage
from cognee.infrastructure.databases.relational import DatabaseEngine
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
config = get_relationaldb_config()
logger = logging.getLogger(__name__)
async def add_data_to_dataset(dataset: Dataset, data: Data):
db_engine: DatabaseEngine = infrastructure_config.get_config()["database_engine"]
db_engine: DatabaseEngine = config.database_engine
existing_dataset = (await db_engine.query_entity(dataset)).scalar()
existing_data = (await db_engine.query_entity(data)).scalar()

View file

@ -4,6 +4,8 @@
from typing import Union, Dict
import networkx as nx
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: dict = None) -> Dict[str, str]:
"""
Find the neighbours of a given node in the graph and return their descriptions.
@ -22,7 +24,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
if node_id is None:
return {}
from cognee.infrastructure import infrastructure_config
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
if graph_config.graph_engine == GraphDBType.NETWORKX:
if node_id not in graph:
return {}
@ -30,7 +32,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
neighbor_descriptions = {neighbor: graph.nodes[neighbor].get('description') for neighbor in neighbors}
return neighbor_descriptions
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
elif graph_config.graph_engine == GraphDBType.NEO4J:
cypher_query = """
MATCH (node {id: $node_id})-[:CONNECTED_TO]->(neighbor)
RETURN neighbor.id AS neighbor_id, neighbor.description AS description

View file

@ -10,6 +10,11 @@ from cognee.modules.search.llm.extraction.categorize_relevant_category import ca
from cognee.shared.data_models import GraphDBType, DefaultContentPrediction
import networkx as nx
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
def strip_exact_regex(s, substring):
# Escaping substring to be used in a regex pattern
pattern = re.escape(substring)
@ -37,7 +42,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
"""
# Determine which client is in use based on the configuration
from cognee.infrastructure import infrastructure_config
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
if graph_config.graph_engine == GraphDBType.NETWORKX:
categories_and_ids = [
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
@ -53,7 +58,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes}
return descriptions
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
elif graph_config.graph_engine == GraphDBType.NEO4J:
# Logic for Neo4j
cypher_query = """
MATCH (n)

View file

@ -7,7 +7,10 @@ from pydantic import BaseModel
from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
async def search_cypher(query:str, graph: Union[nx.Graph, any]):
"""
@ -16,7 +19,7 @@ async def search_cypher(query:str, graph: Union[nx.Graph, any]):
from cognee.infrastructure import infrastructure_config
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
if graph_config.graph_engine == GraphDBType.NEO4J:
result = await graph.run(query)
return result

View file

@ -6,7 +6,10 @@ from neo4j import AsyncSession
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
import networkx as nx
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
async def search_neighbour(graph: Union[nx.Graph, any], query: str,
other_param: dict = None):
"""
@ -28,7 +31,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
if node_id is None:
return []
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
if graph_config.graph_engine == GraphDBType.NETWORKX:
relevant_context = []
target_layer_uuid = graph.nodes[node_id].get('layer_uuid')
@ -39,7 +42,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
return relevant_context
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
elif graph_config.graph_engine == GraphDBType.NEO4J:
if isinstance(graph, AsyncSession):
cypher_query = """
MATCH (target {id: $node_id})

View file

@ -7,7 +7,10 @@ from cognee.infrastructure import infrastructure_config
from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary
from cognee.shared.data_models import GraphDBType, ResponseSummaryModel
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
import re
def strip_exact_regex(s, substring):
@ -30,7 +33,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
- Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes.
"""
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
if graph_config.graph_engine == GraphDBType.NETWORKX:
print("graph", graph)
summaries_and_ids = [
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
@ -48,7 +51,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
return descriptions
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
elif graph_config.graph_engine == GraphDBType.NEO4J:
cypher_query = f"""
MATCH (n)
WHERE n.id CONTAINS $query AND EXISTS(n.summary)

View file

@ -1,10 +1,13 @@
from dsp.utils import deduplicate
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.infrastructure.databases.graph.config import get_graph_config
graph_config = get_graph_config()
from cognee.infrastructure.databases.vector.config import get_vectordb_config
vector_config = get_vectordb_config()
async def search_similarity(query: str, graph):
graph_db_type = infrastructure_config.get_config()["graph_engine"]
graph_db_type = graph_config.graph_engine
graph_client = await get_graph_client(graph_db_type)
@ -17,7 +20,7 @@ async def search_similarity(query: str, graph):
graph_nodes = []
for layer_id in unique_layer_uuids:
vector_engine = infrastructure_config.get_config()["vector_engine"]
vector_engine = vector_config.vector_engine
results = await vector_engine.search(layer_id, query_text = query, limit = 10)
print("results", results)

View file

@ -1,7 +1,10 @@
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
config = get_relationaldb_config()
def create_task_status_table():
db_engine = infrastructure_config.get_config("database_engine")
db_engine = config.db_engine
db_engine.create_table("cognee_task_status", [
dict(name = "data_id", type = "STRING"),

View file

@ -1,5 +1,8 @@
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
config = get_relationaldb_config()
def update_task_status(data_id: str, status: str):
db_engine = infrastructure_config.get_config("database_engine")
db_engine = config.db_engine
db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)])

View file

@ -2,13 +2,15 @@ import logging
from typing import List, Dict
from cognee.infrastructure import infrastructure_config
from cognee.modules.topology.extraction.extract_topology import extract_categories
from cognee.modules.cognify.config import get_cognify_config
cognify_config = get_cognify_config()
logger = logging.getLogger(__name__)
async def infer_data_topology(content: str, graph_topology=None):
if graph_topology is None:
graph_topology = infrastructure_config.get_config()["graph_topology"]
graph_topology = cognify_config.graph_topology
try:
return (await extract_categories(
content,

View file

@ -1,13 +1,16 @@
import os
import glob
from pydantic import BaseModel, Field
from typing import Dict, List, Optional, Union, Type, Any
from typing import Dict, List, Optional, Union, Type, Any, Tuple
from datetime import datetime
from cognee import config
from cognee.base_config import get_base_config
from cognee.infrastructure import infrastructure_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.modules.topology.infer_data_topology import infer_data_topology
cognify_config = get_cognify_config()
base_config = get_base_config()
class Relationship(BaseModel):
type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.")
@ -84,7 +87,7 @@ class TopologyEngine:
async def infer_from_directory_structure(self, node_id: str, repository: str, model: Type[BaseModel]) -> GitHubRepositoryModel:
""" Infer the topology of a repository from its file structure """
path = infrastructure_config.get_config()["data_root_directory"]
path = base_config.data_root_directory
path = path + "/" + str(repository)
print(path)