Updates to the configs
This commit is contained in:
parent
18e47094fd
commit
cddf836fce
29 changed files with 167 additions and 78 deletions
|
|
@ -8,6 +8,11 @@ from cognee.infrastructure import infrastructure_config
|
|||
from cognee.infrastructure.files.storage import LocalStorage
|
||||
from cognee.modules.discovery import discover_directory_datasets
|
||||
from cognee.utils import send_telemetry
|
||||
from cognee.base_config import get_base_config
|
||||
base_config = get_base_config()
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
relational_config = get_relationaldb_config()
|
||||
|
||||
|
||||
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
|
||||
|
|
@ -46,10 +51,10 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam
|
|||
return []
|
||||
|
||||
async def add_files(file_paths: List[str], dataset_name: str):
|
||||
infra_config = infrastructure_config.get_config()
|
||||
data_directory_path = infra_config["data_root_directory"]
|
||||
# infra_config = infrastructure_config.get_config()
|
||||
data_directory_path = base_config.data_root_directory
|
||||
|
||||
LocalStorage.ensure_directory_exists(infra_config["database_directory_path"])
|
||||
LocalStorage.ensure_directory_exists(relational_config.database_directory_path)
|
||||
|
||||
processed_file_paths = []
|
||||
|
||||
|
|
@ -68,7 +73,7 @@ async def add_files(file_paths: List[str], dataset_name: str):
|
|||
else:
|
||||
processed_file_paths.append(file_path)
|
||||
|
||||
db = duckdb.connect(infra_config["database_path"])
|
||||
db = duckdb.connect(relational_config.database_path)
|
||||
|
||||
destination = dlt.destinations.duckdb(
|
||||
credentials = db,
|
||||
|
|
@ -120,7 +125,7 @@ async def add_data_directory(data_path: str, dataset_name: str = None):
|
|||
return await asyncio.gather(*results)
|
||||
|
||||
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
|
||||
data_directory_path = infrastructure_config.get_config()["data_root_directory"]
|
||||
data_directory_path = base_config.data_root_directory
|
||||
|
||||
classified_data = ingestion.classify(data, filename)
|
||||
# data_id = ingestion.identify(classified_data)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,9 @@ from uuid import UUID, uuid4
|
|||
from typing import Union, BinaryIO, List
|
||||
import cognee.modules.ingestion as ingestion
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
relational_config = get_relationaldb_config()
|
||||
class DatasetException(Exception):
|
||||
message: str
|
||||
|
||||
|
|
@ -16,7 +18,7 @@ async def add_standalone(
|
|||
dataset_id: UUID = uuid4(),
|
||||
dataset_name: str = None
|
||||
):
|
||||
db_engine = infrastructure_config.get_config()["database_engine"]
|
||||
db_engine = relational_config.database_engine
|
||||
if db_engine.is_db_done is not True:
|
||||
await db_engine.ensure_tables()
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,21 @@ graph_config = get_graph_config()
|
|||
config = Config()
|
||||
config.load()
|
||||
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
relational_config = get_relationaldb_config()
|
||||
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
chunk_config = get_chunk_config()
|
||||
base_config = get_base_config()
|
||||
embedding_config = get_embedding_config()
|
||||
|
||||
# aclient = instructor.patch(OpenAI())
|
||||
|
||||
USER_ID = "default_user"
|
||||
|
|
@ -47,11 +62,11 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
|||
stopwords.ensure_loaded()
|
||||
create_task_status_table()
|
||||
|
||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
||||
graph_db_type = graph_config.graph_engine
|
||||
|
||||
graph_client = await get_graph_client(graph_db_type)
|
||||
|
||||
db_engine = infrastructure_config.get_config()["database_engine"]
|
||||
db_engine = relational_config.database_engine
|
||||
|
||||
if datasets is None or len(datasets) == 0:
|
||||
datasets = db_engine.get_datasets()
|
||||
|
|
@ -77,8 +92,8 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
|||
dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset)))
|
||||
|
||||
|
||||
chunk_engine = infrastructure_config.get_config()["chunk_engine"]
|
||||
chunk_strategy = infrastructure_config.get_config()["chunk_strategy"]
|
||||
chunk_engine = chunk_config.chunk_engine
|
||||
chunk_strategy = chunk_config.chunk_strategy
|
||||
|
||||
async def process_batch(files_batch):
|
||||
data_chunks = {}
|
||||
|
|
@ -129,7 +144,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
|||
|
||||
for (dataset_name, files) in dataset_files:
|
||||
for file_metadata in files:
|
||||
graph_topology = infrastructure_config.get_config()["graph_model"]
|
||||
graph_topology = graph_config.graph_model
|
||||
|
||||
if graph_topology == SourceCodeGraph:
|
||||
parent_node_id = f"{file_metadata['name']}.{file_metadata['extension']}"
|
||||
|
|
@ -164,7 +179,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
|||
|
||||
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||
|
||||
graph_topology = infrastructure_config.get_config()["graph_model"]
|
||||
graph_topology = cognify_config.graph_model
|
||||
if graph_topology == SourceCodeGraph:
|
||||
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
|
||||
elif graph_topology == KnowledgeGraph:
|
||||
|
|
@ -186,7 +201,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
|||
print(f"Chunk ({chunk_id}) summarized.")
|
||||
|
||||
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
|
||||
cognitive_layers = cognitive_layers[:config.cognitive_layers_limit]
|
||||
cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
|
||||
|
||||
try:
|
||||
cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2]
|
||||
|
|
@ -197,8 +212,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
|||
pass
|
||||
|
||||
|
||||
if infrastructure_config.get_config()["connect_documents"] is True:
|
||||
db_engine = infrastructure_config.get_config()["database_engine"]
|
||||
if cognify_config.connect_documents is True:
|
||||
db_engine = relational_config.database_engine
|
||||
relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id)
|
||||
|
||||
list_of_nodes = []
|
||||
|
|
@ -220,7 +235,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
|||
await connect_nodes_in_graph(
|
||||
graph_client,
|
||||
relationships,
|
||||
score_threshold = infrastructure_config.get_config()["intra_layer_score_treshold"]
|
||||
score_threshold = cognify_config.intra_layer_score_treshold
|
||||
)
|
||||
|
||||
send_telemetry("cognee.cognify")
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from cognee.infrastructure.databases.graph.config import get_graph_config
|
|||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
chunk_config = get_chunk_config()
|
||||
graph_config = get_graph_config()
|
||||
|
|
@ -24,9 +25,6 @@ class config():
|
|||
def monitoring_tool(monitoring_tool: object):
|
||||
base_config.monitoring_tool = monitoring_tool
|
||||
|
||||
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_classification_model(classification_model: object):
|
||||
cognify_config.classification_model = classification_model
|
||||
|
|
@ -57,9 +55,7 @@ class config():
|
|||
|
||||
@staticmethod
|
||||
def llm_provider(llm_provider: str):
|
||||
infrastructure_config.set_config({
|
||||
"llm_provider": llm_provider
|
||||
})
|
||||
graph_config.llm_provider = llm_provider
|
||||
|
||||
@staticmethod
|
||||
def intra_layer_score_treshold(intra_layer_score_treshold: str):
|
||||
|
|
@ -77,6 +73,5 @@ class config():
|
|||
|
||||
@staticmethod
|
||||
def set_graph_topology(graph_topology: object):
|
||||
infrastructure_config.set_config({
|
||||
"graph_topology": graph_topology
|
||||
})
|
||||
get_cognify_config.graph_topology =graph_topology
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
from duckdb import CatalogException
|
||||
from cognee.modules.discovery import discover_directory_datasets
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
relational_config = get_relationaldb_config()
|
||||
|
||||
class datasets():
|
||||
@staticmethod
|
||||
def list_datasets():
|
||||
db = infrastructure_config.get_config("database_engine")
|
||||
db = relational_config.db_engine
|
||||
return db.get_datasets()
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -14,7 +17,7 @@ class datasets():
|
|||
|
||||
@staticmethod
|
||||
def list_data(dataset_name: str):
|
||||
db = infrastructure_config.get_config("database_engine")
|
||||
db = relational_config.db_engine
|
||||
try:
|
||||
return db.get_files_metadata(dataset_name)
|
||||
except CatalogException:
|
||||
|
|
@ -22,7 +25,7 @@ class datasets():
|
|||
|
||||
@staticmethod
|
||||
def get_status(dataset_ids: list[str]) -> dict:
|
||||
db = infrastructure_config.get_config("database_engine")
|
||||
db = relational_config.db_engine
|
||||
try:
|
||||
return db.get_data("cognee_task_status", {
|
||||
"data_id": dataset_ids
|
||||
|
|
@ -32,7 +35,7 @@ class datasets():
|
|||
|
||||
@staticmethod
|
||||
def delete_dataset(dataset_id: str):
|
||||
db = infrastructure_config.get_config("database_engine")
|
||||
db = relational_config.db_engine
|
||||
try:
|
||||
return db.delete_table(dataset_id)
|
||||
except CatalogException:
|
||||
|
|
|
|||
|
|
@ -1,11 +1,17 @@
|
|||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.files.storage import LocalStorage
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
base_config =get_base_config()
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
graph_config = get_graph_config()
|
||||
vector_config = get_vectordb_config()
|
||||
|
||||
class prune():
|
||||
@staticmethod
|
||||
async def prune_data():
|
||||
data_root_directory = infrastructure_config.get_config()["data_root_directory"]
|
||||
data_root_directory = base_config.data_root_directory
|
||||
LocalStorage.remove_all(data_root_directory)
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -13,11 +19,11 @@ class prune():
|
|||
infra_config = infrastructure_config.get_config()
|
||||
|
||||
if graph:
|
||||
graph_client = await get_graph_client(infra_config["graph_engine"])
|
||||
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||
await graph_client.delete_graph()
|
||||
|
||||
if vector:
|
||||
vector_client = infra_config["vector_engine"]
|
||||
vector_client = vector_config.vector_engine
|
||||
await vector_client.prune()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,8 @@ from cognee.modules.search.graph.search_summary import search_summary
|
|||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.utils import send_telemetry
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
graph_config = get_graph_config()
|
||||
|
||||
class SearchType(Enum):
|
||||
ADJACENT = 'ADJACENT'
|
||||
|
|
@ -49,7 +51,7 @@ async def search(search_type: str, params: Dict[str, Any]) -> List:
|
|||
|
||||
|
||||
async def specific_search(query_params: List[SearchParameters]) -> List:
|
||||
graph_client = await get_graph_client(infrastructure_config.get_config()["graph_engine"])
|
||||
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||
graph = graph_client.graph
|
||||
|
||||
search_functions: Dict[SearchType, Callable] = {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryMod
|
|||
import pandas as pd
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
USER_ID = "default_user"
|
||||
|
||||
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
|||
from cognee.root_dir import get_absolute_path
|
||||
|
||||
from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \
|
||||
DefaultCognitiveLayer
|
||||
DefaultCognitiveLayer, DefaultGraphModel
|
||||
|
||||
|
||||
# Monitoring tool
|
||||
|
|
@ -18,6 +18,8 @@ class CognifyConfig(BaseSettings):
|
|||
cognitive_layer_model: object = DefaultCognitiveLayer
|
||||
intra_layer_score_treshold: float = 0.98
|
||||
connect_documents: bool = False
|
||||
graph_topology: object = DefaultGraphModel
|
||||
cognitive_layers_limit: int = 2
|
||||
|
||||
|
||||
|
||||
|
|
@ -31,6 +33,8 @@ class CognifyConfig(BaseSettings):
|
|||
"cognitive_layer_model": self.cognitive_layer_model,
|
||||
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
||||
"connect_documents": self.connect_documents,
|
||||
"graph_topology": self.graph_topology,
|
||||
"cognitive_layers_limit": self.cognitive_layers_limit
|
||||
}
|
||||
|
||||
@lru_cache
|
||||
|
|
|
|||
|
|
@ -2,10 +2,12 @@ from datetime import datetime
|
|||
from uuid import uuid4
|
||||
from typing import List, Tuple, TypedDict
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.vector import DataPoint
|
||||
from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
graph_config = get_graph_config()
|
||||
vectordb_config = get_vectordb_config()
|
||||
class GraphLike(TypedDict):
|
||||
nodes: List
|
||||
edges: List
|
||||
|
|
@ -17,8 +19,8 @@ async def add_cognitive_layer_graphs(
|
|||
chunk_id: str,
|
||||
layer_graphs: List[Tuple[str, GraphLike]],
|
||||
):
|
||||
vector_client = infrastructure_config.get_config("vector_engine")
|
||||
graph_model = infrastructure_config.get_config("graph_model")
|
||||
vector_client = vectordb_config.vector_engine
|
||||
graph_model = graph_config.graph_model
|
||||
|
||||
for (layer_id, layer_graph) in layer_graphs:
|
||||
graph_nodes = []
|
||||
|
|
|
|||
|
|
@ -2,11 +2,14 @@ from uuid import uuid4
|
|||
from typing import List
|
||||
from datetime import datetime
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.vector import DataPoint
|
||||
|
||||
from cognee.infrastructure.databases.vector import DataPoint
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
graph_config = get_graph_config()
|
||||
vectordb_config = get_vectordb_config()
|
||||
async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None:
|
||||
vector_client = infrastructure_config.get_config("vector_engine")
|
||||
vector_client = vectordb_config.vector_engine
|
||||
|
||||
keyword_nodes = []
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
import uuid
|
||||
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
# from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
graph_config = get_graph_config()
|
||||
vectordb_config = get_vectordb_config()
|
||||
|
||||
|
||||
async def group_nodes_by_layer(node_descriptions):
|
||||
|
|
@ -41,7 +44,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
|
|||
if relationship['score'] > score_threshold:
|
||||
|
||||
# For NetworkX
|
||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
||||
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||
searched_node_id_found = await get_node_by_unique_id(graph.graph, relationship['searched_node_id'])
|
||||
original_id_for_search_found = await get_node_by_unique_id(graph.graph, relationship['original_id_for_search'])
|
||||
if searched_node_id_found and original_id_for_search_found:
|
||||
|
|
@ -54,7 +57,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
|
|||
)
|
||||
|
||||
# For Neo4j
|
||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
||||
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||
# Neo4j specific logic to add an edge
|
||||
# This is just a placeholder, replace it with actual Neo4j logic
|
||||
print("query is ", f"""MATCH (a), (b) WHERE a.unique_id = '{relationship['searched_node_id']}' AND b.unique_id = '{relationship['original_id_for_search']}' CREATE (a)-[:CONNECTED {{weight:{relationship['score']}}}]->(b)""")
|
||||
|
|
|
|||
|
|
@ -2,10 +2,13 @@
|
|||
from typing import Optional, Any
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
# from cognee.infrastructure import infrastructure_config
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
graph_config = get_graph_config()
|
||||
vectordb_config = get_vectordb_config()
|
||||
async def generate_node_id(instance: BaseModel) -> str:
|
||||
for field in ["id", "doc_id", "location_id", "type_id", "node_id"]:
|
||||
if hasattr(instance, field):
|
||||
|
|
@ -30,7 +33,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
|
|||
- Exception: If there is an error during the node or edge addition process, it logs the error and continues without interrupting the execution flow.
|
||||
|
||||
Note:
|
||||
- The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the global `infrastructure_config`.
|
||||
- The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the graph configuration.
|
||||
"""
|
||||
|
||||
# Initialize result to None to ensure a clear return path
|
||||
|
|
@ -46,7 +49,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
|
|||
print("added node", result)
|
||||
|
||||
# Add an edge if a parent ID is provided and the graph engine is NETWORKX
|
||||
if parent_id and "default_relationship" in node_data and infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
||||
if parent_id and "default_relationship" in node_data and graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||
|
||||
try:
|
||||
await client.add_edge(parent_id, node_id, relationship_name = node_data["default_relationship"]["type"], edge_properties = node_data)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
from typing import Dict, List
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
graph_config = get_graph_config()
|
||||
vectordb_config = get_vectordb_config()
|
||||
|
||||
async def resolve_cross_graph_references(nodes_by_layer: Dict):
|
||||
results = []
|
||||
|
||||
|
|
@ -16,7 +21,7 @@ async def resolve_cross_graph_references(nodes_by_layer: Dict):
|
|||
return results
|
||||
|
||||
async def get_nodes_by_layer(layer_id: str, layer_nodes: List):
|
||||
vector_engine = infrastructure_config.get_config()["vector_engine"]
|
||||
vector_engine = vectordb_config.vector_engine
|
||||
|
||||
score_points = await vector_engine.batch_search(
|
||||
layer_id,
|
||||
|
|
|
|||
|
|
@ -2,6 +2,9 @@ import logging
|
|||
from typing import List, Dict
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from.extraction.extract_cognitive_layers import extract_cognitive_layers
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
config = get_cognify_config()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -10,7 +13,7 @@ async def get_cognitive_layers(content: str, categories: List[Dict]):
|
|||
return (await extract_cognitive_layers(
|
||||
content,
|
||||
categories[0],
|
||||
infrastructure_config.get_config()["cognitive_layer_model"]
|
||||
config.cognitive_layer_model
|
||||
)).cognitive_layers
|
||||
except Exception as error:
|
||||
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
|
||||
|
|
|
|||
|
|
@ -1,14 +1,15 @@
|
|||
import logging
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from .extraction.extract_categories import extract_categories
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
config = get_cognify_config()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def get_content_categories(content: str):
|
||||
try:
|
||||
return await extract_categories(
|
||||
content,
|
||||
infrastructure_config.get_config()["classification_model"]
|
||||
config.classification_model
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error("Error extracting categories from content: %s", error, exc_info = True)
|
||||
|
|
|
|||
|
|
@ -1,14 +1,16 @@
|
|||
import logging
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from.extraction.extract_summary import extract_summary
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
config = get_cognify_config()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def get_content_summary(content: str):
|
||||
try:
|
||||
return await extract_summary(
|
||||
content,
|
||||
infrastructure_config.get_config()["summarization_model"]
|
||||
config.summarization_model
|
||||
)
|
||||
except Exception as error:
|
||||
logger.error("Error extracting summary from content: %s", error, exc_info = True)
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@ import logging
|
|||
import asyncio
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
|
||||
|
||||
from.extraction.extract_summary import extract_summary
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
config = get_cognify_config()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
|
||||
|
|
@ -11,7 +13,7 @@ async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]
|
|||
extract_knowledge_graph(
|
||||
content,
|
||||
cognitive_layer_data["name"],
|
||||
infrastructure_config.get_config()["graph_model"]
|
||||
config.graph_model
|
||||
) for (_, cognitive_layer_data) in cognitive_layers
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -3,11 +3,13 @@ from cognee.infrastructure import infrastructure_config
|
|||
from cognee.infrastructure.data import Dataset, Data
|
||||
from cognee.infrastructure.files import remove_file_from_storage
|
||||
from cognee.infrastructure.databases.relational import DatabaseEngine
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
config = get_relationaldb_config()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def add_data_to_dataset(dataset: Dataset, data: Data):
|
||||
db_engine: DatabaseEngine = infrastructure_config.get_config()["database_engine"]
|
||||
db_engine: DatabaseEngine = config.database_engine
|
||||
|
||||
existing_dataset = (await db_engine.query_entity(dataset)).scalar()
|
||||
existing_data = (await db_engine.query_entity(data)).scalar()
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
from typing import Union, Dict
|
||||
import networkx as nx
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
graph_config = get_graph_config()
|
||||
async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: dict = None) -> Dict[str, str]:
|
||||
"""
|
||||
Find the neighbours of a given node in the graph and return their descriptions.
|
||||
|
|
@ -22,7 +24,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
|
|||
if node_id is None:
|
||||
return {}
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
||||
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||
if node_id not in graph:
|
||||
return {}
|
||||
|
||||
|
|
@ -30,7 +32,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
|
|||
neighbor_descriptions = {neighbor: graph.nodes[neighbor].get('description') for neighbor in neighbors}
|
||||
return neighbor_descriptions
|
||||
|
||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
||||
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||
cypher_query = """
|
||||
MATCH (node {id: $node_id})-[:CONNECTED_TO]->(neighbor)
|
||||
RETURN neighbor.id AS neighbor_id, neighbor.description AS description
|
||||
|
|
|
|||
|
|
@ -10,6 +10,11 @@ from cognee.modules.search.llm.extraction.categorize_relevant_category import ca
|
|||
from cognee.shared.data_models import GraphDBType, DefaultContentPrediction
|
||||
import networkx as nx
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
graph_config = get_graph_config()
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
vector_config = get_vectordb_config()
|
||||
|
||||
def strip_exact_regex(s, substring):
|
||||
# Escaping substring to be used in a regex pattern
|
||||
pattern = re.escape(substring)
|
||||
|
|
@ -37,7 +42,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
|
|||
"""
|
||||
# Determine which client is in use based on the configuration
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
||||
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||
|
||||
categories_and_ids = [
|
||||
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
|
||||
|
|
@ -53,7 +58,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
|
|||
descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes}
|
||||
return descriptions
|
||||
|
||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
||||
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||
# Logic for Neo4j
|
||||
cypher_query = """
|
||||
MATCH (n)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,10 @@ from pydantic import BaseModel
|
|||
|
||||
from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
graph_config = get_graph_config()
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
vector_config = get_vectordb_config()
|
||||
|
||||
async def search_cypher(query:str, graph: Union[nx.Graph, any]):
|
||||
"""
|
||||
|
|
@ -16,7 +19,7 @@ async def search_cypher(query:str, graph: Union[nx.Graph, any]):
|
|||
|
||||
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
||||
if graph_config.graph_engine == GraphDBType.NEO4J:
|
||||
result = await graph.run(query)
|
||||
return result
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,10 @@ from neo4j import AsyncSession
|
|||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
import networkx as nx
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
graph_config = get_graph_config()
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
vector_config = get_vectordb_config()
|
||||
async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
||||
other_param: dict = None):
|
||||
"""
|
||||
|
|
@ -28,7 +31,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
|||
if node_id is None:
|
||||
return []
|
||||
|
||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
||||
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||
relevant_context = []
|
||||
target_layer_uuid = graph.nodes[node_id].get('layer_uuid')
|
||||
|
||||
|
|
@ -39,7 +42,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
|||
return relevant_context
|
||||
|
||||
|
||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
||||
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||
if isinstance(graph, AsyncSession):
|
||||
cypher_query = """
|
||||
MATCH (target {id: $node_id})
|
||||
|
|
|
|||
|
|
@ -7,7 +7,10 @@ from cognee.infrastructure import infrastructure_config
|
|||
|
||||
from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary
|
||||
from cognee.shared.data_models import GraphDBType, ResponseSummaryModel
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
graph_config = get_graph_config()
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
vector_config = get_vectordb_config()
|
||||
import re
|
||||
|
||||
def strip_exact_regex(s, substring):
|
||||
|
|
@ -30,7 +33,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
|
|||
- Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes.
|
||||
"""
|
||||
|
||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
||||
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||
print("graph", graph)
|
||||
summaries_and_ids = [
|
||||
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
|
||||
|
|
@ -48,7 +51,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
|
|||
return descriptions
|
||||
|
||||
|
||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
||||
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||
cypher_query = f"""
|
||||
MATCH (n)
|
||||
WHERE n.id CONTAINS $query AND EXISTS(n.summary)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,13 @@
|
|||
from dsp.utils import deduplicate
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
graph_config = get_graph_config()
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
vector_config = get_vectordb_config()
|
||||
|
||||
async def search_similarity(query: str, graph):
|
||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
||||
graph_db_type = graph_config.graph_engine
|
||||
|
||||
graph_client = await get_graph_client(graph_db_type)
|
||||
|
||||
|
|
@ -17,7 +20,7 @@ async def search_similarity(query: str, graph):
|
|||
graph_nodes = []
|
||||
|
||||
for layer_id in unique_layer_uuids:
|
||||
vector_engine = infrastructure_config.get_config()["vector_engine"]
|
||||
vector_engine = vector_config.vector_engine
|
||||
|
||||
results = await vector_engine.search(layer_id, query_text = query, limit = 10)
|
||||
print("results", results)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
config = get_relationaldb_config()
|
||||
|
||||
def create_task_status_table():
|
||||
db_engine = infrastructure_config.get_config("database_engine")
|
||||
db_engine = config.db_engine
|
||||
|
||||
db_engine.create_table("cognee_task_status", [
|
||||
dict(name = "data_id", type = "STRING"),
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
config = get_relationaldb_config()
|
||||
|
||||
def update_task_status(data_id: str, status: str):
|
||||
db_engine = infrastructure_config.get_config("database_engine")
|
||||
db_engine = config.db_engine
|
||||
db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)])
|
||||
|
|
|
|||
|
|
@ -2,13 +2,15 @@ import logging
|
|||
from typing import List, Dict
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.modules.topology.extraction.extract_topology import extract_categories
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def infer_data_topology(content: str, graph_topology=None):
|
||||
if graph_topology is None:
|
||||
graph_topology = infrastructure_config.get_config()["graph_topology"]
|
||||
graph_topology = cognify_config.graph_topology
|
||||
try:
|
||||
return (await extract_categories(
|
||||
content,
|
||||
|
|
|
|||
|
|
@ -1,13 +1,16 @@
|
|||
import os
|
||||
import glob
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, List, Optional, Union, Type, Any
|
||||
from typing import Dict, List, Optional, Union, Type, Any, Tuple
|
||||
from datetime import datetime
|
||||
|
||||
from cognee import config
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from cognee.modules.topology.infer_data_topology import infer_data_topology
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
base_config = get_base_config()
|
||||
|
||||
class Relationship(BaseModel):
|
||||
type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.")
|
||||
|
|
@ -84,7 +87,7 @@ class TopologyEngine:
|
|||
async def infer_from_directory_structure(self, node_id: str, repository: str, model: Type[BaseModel]) -> GitHubRepositoryModel:
|
||||
""" Infer the topology of a repository from its file structure """
|
||||
|
||||
path = infrastructure_config.get_config()["data_root_directory"]
|
||||
path = base_config.data_root_directory
|
||||
path = path + "/" + str(repository)
|
||||
print(path)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue