Updates to the configs
This commit is contained in:
parent
18e47094fd
commit
cddf836fce
29 changed files with 167 additions and 78 deletions
|
|
@ -8,6 +8,11 @@ from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.infrastructure.files.storage import LocalStorage
|
from cognee.infrastructure.files.storage import LocalStorage
|
||||||
from cognee.modules.discovery import discover_directory_datasets
|
from cognee.modules.discovery import discover_directory_datasets
|
||||||
from cognee.utils import send_telemetry
|
from cognee.utils import send_telemetry
|
||||||
|
from cognee.base_config import get_base_config
|
||||||
|
base_config = get_base_config()
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
|
relational_config = get_relationaldb_config()
|
||||||
|
|
||||||
|
|
||||||
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
|
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
|
||||||
|
|
@ -46,10 +51,10 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def add_files(file_paths: List[str], dataset_name: str):
|
async def add_files(file_paths: List[str], dataset_name: str):
|
||||||
infra_config = infrastructure_config.get_config()
|
# infra_config = infrastructure_config.get_config()
|
||||||
data_directory_path = infra_config["data_root_directory"]
|
data_directory_path = base_config.data_root_directory
|
||||||
|
|
||||||
LocalStorage.ensure_directory_exists(infra_config["database_directory_path"])
|
LocalStorage.ensure_directory_exists(relational_config.database_directory_path)
|
||||||
|
|
||||||
processed_file_paths = []
|
processed_file_paths = []
|
||||||
|
|
||||||
|
|
@ -68,7 +73,7 @@ async def add_files(file_paths: List[str], dataset_name: str):
|
||||||
else:
|
else:
|
||||||
processed_file_paths.append(file_path)
|
processed_file_paths.append(file_path)
|
||||||
|
|
||||||
db = duckdb.connect(infra_config["database_path"])
|
db = duckdb.connect(relational_config.database_path)
|
||||||
|
|
||||||
destination = dlt.destinations.duckdb(
|
destination = dlt.destinations.duckdb(
|
||||||
credentials = db,
|
credentials = db,
|
||||||
|
|
@ -120,7 +125,7 @@ async def add_data_directory(data_path: str, dataset_name: str = None):
|
||||||
return await asyncio.gather(*results)
|
return await asyncio.gather(*results)
|
||||||
|
|
||||||
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
|
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
|
||||||
data_directory_path = infrastructure_config.get_config()["data_root_directory"]
|
data_directory_path = base_config.data_root_directory
|
||||||
|
|
||||||
classified_data = ingestion.classify(data, filename)
|
classified_data = ingestion.classify(data, filename)
|
||||||
# data_id = ingestion.identify(classified_data)
|
# data_id = ingestion.identify(classified_data)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,9 @@ from uuid import UUID, uuid4
|
||||||
from typing import Union, BinaryIO, List
|
from typing import Union, BinaryIO, List
|
||||||
import cognee.modules.ingestion as ingestion
|
import cognee.modules.ingestion as ingestion
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
|
relational_config = get_relationaldb_config()
|
||||||
class DatasetException(Exception):
|
class DatasetException(Exception):
|
||||||
message: str
|
message: str
|
||||||
|
|
||||||
|
|
@ -16,7 +18,7 @@ async def add_standalone(
|
||||||
dataset_id: UUID = uuid4(),
|
dataset_id: UUID = uuid4(),
|
||||||
dataset_name: str = None
|
dataset_name: str = None
|
||||||
):
|
):
|
||||||
db_engine = infrastructure_config.get_config()["database_engine"]
|
db_engine = relational_config.database_engine
|
||||||
if db_engine.is_db_done is not True:
|
if db_engine.is_db_done is not True:
|
||||||
await db_engine.ensure_tables()
|
await db_engine.ensure_tables()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,21 @@ graph_config = get_graph_config()
|
||||||
config = Config()
|
config = Config()
|
||||||
config.load()
|
config.load()
|
||||||
|
|
||||||
|
from cognee.base_config import get_base_config
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||||
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
|
relational_config = get_relationaldb_config()
|
||||||
|
|
||||||
|
|
||||||
|
cognify_config = get_cognify_config()
|
||||||
|
chunk_config = get_chunk_config()
|
||||||
|
base_config = get_base_config()
|
||||||
|
embedding_config = get_embedding_config()
|
||||||
|
|
||||||
# aclient = instructor.patch(OpenAI())
|
# aclient = instructor.patch(OpenAI())
|
||||||
|
|
||||||
USER_ID = "default_user"
|
USER_ID = "default_user"
|
||||||
|
|
@ -47,11 +62,11 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
||||||
stopwords.ensure_loaded()
|
stopwords.ensure_loaded()
|
||||||
create_task_status_table()
|
create_task_status_table()
|
||||||
|
|
||||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
graph_db_type = graph_config.graph_engine
|
||||||
|
|
||||||
graph_client = await get_graph_client(graph_db_type)
|
graph_client = await get_graph_client(graph_db_type)
|
||||||
|
|
||||||
db_engine = infrastructure_config.get_config()["database_engine"]
|
db_engine = relational_config.database_engine
|
||||||
|
|
||||||
if datasets is None or len(datasets) == 0:
|
if datasets is None or len(datasets) == 0:
|
||||||
datasets = db_engine.get_datasets()
|
datasets = db_engine.get_datasets()
|
||||||
|
|
@ -77,8 +92,8 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
||||||
dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset)))
|
dataset_files.append((added_dataset, db_engine.get_files_metadata(added_dataset)))
|
||||||
|
|
||||||
|
|
||||||
chunk_engine = infrastructure_config.get_config()["chunk_engine"]
|
chunk_engine = chunk_config.chunk_engine
|
||||||
chunk_strategy = infrastructure_config.get_config()["chunk_strategy"]
|
chunk_strategy = chunk_config.chunk_strategy
|
||||||
|
|
||||||
async def process_batch(files_batch):
|
async def process_batch(files_batch):
|
||||||
data_chunks = {}
|
data_chunks = {}
|
||||||
|
|
@ -129,7 +144,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
||||||
|
|
||||||
for (dataset_name, files) in dataset_files:
|
for (dataset_name, files) in dataset_files:
|
||||||
for file_metadata in files:
|
for file_metadata in files:
|
||||||
graph_topology = infrastructure_config.get_config()["graph_model"]
|
graph_topology = graph_config.graph_model
|
||||||
|
|
||||||
if graph_topology == SourceCodeGraph:
|
if graph_topology == SourceCodeGraph:
|
||||||
parent_node_id = f"{file_metadata['name']}.{file_metadata['extension']}"
|
parent_node_id = f"{file_metadata['name']}.{file_metadata['extension']}"
|
||||||
|
|
@ -164,7 +179,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
||||||
|
|
||||||
graph_client = await get_graph_client(graph_config.graph_engine)
|
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||||
|
|
||||||
graph_topology = infrastructure_config.get_config()["graph_model"]
|
graph_topology = cognify_config.graph_model
|
||||||
if graph_topology == SourceCodeGraph:
|
if graph_topology == SourceCodeGraph:
|
||||||
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
|
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
|
||||||
elif graph_topology == KnowledgeGraph:
|
elif graph_topology == KnowledgeGraph:
|
||||||
|
|
@ -186,7 +201,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
||||||
print(f"Chunk ({chunk_id}) summarized.")
|
print(f"Chunk ({chunk_id}) summarized.")
|
||||||
|
|
||||||
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
|
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
|
||||||
cognitive_layers = cognitive_layers[:config.cognitive_layers_limit]
|
cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2]
|
cognitive_layers = (await add_cognitive_layers(graph_client, document_id, cognitive_layers))[:2]
|
||||||
|
|
@ -197,8 +212,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
if infrastructure_config.get_config()["connect_documents"] is True:
|
if cognify_config.connect_documents is True:
|
||||||
db_engine = infrastructure_config.get_config()["database_engine"]
|
db_engine = relational_config.database_engine
|
||||||
relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id)
|
relevant_documents_to_connect = db_engine.fetch_cognify_data(excluded_document_id = document_id)
|
||||||
|
|
||||||
list_of_nodes = []
|
list_of_nodes = []
|
||||||
|
|
@ -220,7 +235,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
||||||
await connect_nodes_in_graph(
|
await connect_nodes_in_graph(
|
||||||
graph_client,
|
graph_client,
|
||||||
relationships,
|
relationships,
|
||||||
score_threshold = infrastructure_config.get_config()["intra_layer_score_treshold"]
|
score_threshold = cognify_config.intra_layer_score_treshold
|
||||||
)
|
)
|
||||||
|
|
||||||
send_telemetry("cognee.cognify")
|
send_telemetry("cognee.cognify")
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
|
||||||
|
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
chunk_config = get_chunk_config()
|
chunk_config = get_chunk_config()
|
||||||
graph_config = get_graph_config()
|
graph_config = get_graph_config()
|
||||||
|
|
@ -24,9 +25,6 @@ class config():
|
||||||
def monitoring_tool(monitoring_tool: object):
|
def monitoring_tool(monitoring_tool: object):
|
||||||
base_config.monitoring_tool = monitoring_tool
|
base_config.monitoring_tool = monitoring_tool
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_classification_model(classification_model: object):
|
def set_classification_model(classification_model: object):
|
||||||
cognify_config.classification_model = classification_model
|
cognify_config.classification_model = classification_model
|
||||||
|
|
@ -57,9 +55,7 @@ class config():
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def llm_provider(llm_provider: str):
|
def llm_provider(llm_provider: str):
|
||||||
infrastructure_config.set_config({
|
graph_config.llm_provider = llm_provider
|
||||||
"llm_provider": llm_provider
|
|
||||||
})
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def intra_layer_score_treshold(intra_layer_score_treshold: str):
|
def intra_layer_score_treshold(intra_layer_score_treshold: str):
|
||||||
|
|
@ -77,6 +73,5 @@ class config():
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_graph_topology(graph_topology: object):
|
def set_graph_topology(graph_topology: object):
|
||||||
infrastructure_config.set_config({
|
get_cognify_config.graph_topology =graph_topology
|
||||||
"graph_topology": graph_topology
|
|
||||||
})
|
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,14 @@
|
||||||
from duckdb import CatalogException
|
from duckdb import CatalogException
|
||||||
from cognee.modules.discovery import discover_directory_datasets
|
from cognee.modules.discovery import discover_directory_datasets
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
|
relational_config = get_relationaldb_config()
|
||||||
|
|
||||||
class datasets():
|
class datasets():
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def list_datasets():
|
def list_datasets():
|
||||||
db = infrastructure_config.get_config("database_engine")
|
db = relational_config.db_engine
|
||||||
return db.get_datasets()
|
return db.get_datasets()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -14,7 +17,7 @@ class datasets():
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def list_data(dataset_name: str):
|
def list_data(dataset_name: str):
|
||||||
db = infrastructure_config.get_config("database_engine")
|
db = relational_config.db_engine
|
||||||
try:
|
try:
|
||||||
return db.get_files_metadata(dataset_name)
|
return db.get_files_metadata(dataset_name)
|
||||||
except CatalogException:
|
except CatalogException:
|
||||||
|
|
@ -22,7 +25,7 @@ class datasets():
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_status(dataset_ids: list[str]) -> dict:
|
def get_status(dataset_ids: list[str]) -> dict:
|
||||||
db = infrastructure_config.get_config("database_engine")
|
db = relational_config.db_engine
|
||||||
try:
|
try:
|
||||||
return db.get_data("cognee_task_status", {
|
return db.get_data("cognee_task_status", {
|
||||||
"data_id": dataset_ids
|
"data_id": dataset_ids
|
||||||
|
|
@ -32,7 +35,7 @@ class datasets():
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def delete_dataset(dataset_id: str):
|
def delete_dataset(dataset_id: str):
|
||||||
db = infrastructure_config.get_config("database_engine")
|
db = relational_config.db_engine
|
||||||
try:
|
try:
|
||||||
return db.delete_table(dataset_id)
|
return db.delete_table(dataset_id)
|
||||||
except CatalogException:
|
except CatalogException:
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,17 @@
|
||||||
|
from cognee.base_config import get_base_config
|
||||||
from cognee.infrastructure.files.storage import LocalStorage
|
from cognee.infrastructure.files.storage import LocalStorage
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
|
base_config =get_base_config()
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
|
||||||
class prune():
|
class prune():
|
||||||
@staticmethod
|
@staticmethod
|
||||||
async def prune_data():
|
async def prune_data():
|
||||||
data_root_directory = infrastructure_config.get_config()["data_root_directory"]
|
data_root_directory = base_config.data_root_directory
|
||||||
LocalStorage.remove_all(data_root_directory)
|
LocalStorage.remove_all(data_root_directory)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
@ -13,11 +19,11 @@ class prune():
|
||||||
infra_config = infrastructure_config.get_config()
|
infra_config = infrastructure_config.get_config()
|
||||||
|
|
||||||
if graph:
|
if graph:
|
||||||
graph_client = await get_graph_client(infra_config["graph_engine"])
|
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||||
await graph_client.delete_graph()
|
await graph_client.delete_graph()
|
||||||
|
|
||||||
if vector:
|
if vector:
|
||||||
vector_client = infra_config["vector_engine"]
|
vector_client = vector_config.vector_engine
|
||||||
await vector_client.prune()
|
await vector_client.prune()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,8 @@ from cognee.modules.search.graph.search_summary import search_summary
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.utils import send_telemetry
|
from cognee.utils import send_telemetry
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
|
||||||
class SearchType(Enum):
|
class SearchType(Enum):
|
||||||
ADJACENT = 'ADJACENT'
|
ADJACENT = 'ADJACENT'
|
||||||
|
|
@ -49,7 +51,7 @@ async def search(search_type: str, params: Dict[str, Any]) -> List:
|
||||||
|
|
||||||
|
|
||||||
async def specific_search(query_params: List[SearchParameters]) -> List:
|
async def specific_search(query_params: List[SearchParameters]) -> List:
|
||||||
graph_client = await get_graph_client(infrastructure_config.get_config()["graph_engine"])
|
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||||
graph = graph_client.graph
|
graph = graph_client.graph
|
||||||
|
|
||||||
search_functions: Dict[SearchType, Callable] = {
|
search_functions: Dict[SearchType, Callable] = {
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryMod
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
USER_ID = "default_user"
|
USER_ID = "default_user"
|
||||||
|
|
||||||
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
|
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
from cognee.root_dir import get_absolute_path
|
from cognee.root_dir import get_absolute_path
|
||||||
|
|
||||||
from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \
|
from cognee.shared.data_models import MonitoringTool, DefaultContentPrediction, LabeledContent, SummarizedContent, \
|
||||||
DefaultCognitiveLayer
|
DefaultCognitiveLayer, DefaultGraphModel
|
||||||
|
|
||||||
|
|
||||||
# Monitoring tool
|
# Monitoring tool
|
||||||
|
|
@ -18,6 +18,8 @@ class CognifyConfig(BaseSettings):
|
||||||
cognitive_layer_model: object = DefaultCognitiveLayer
|
cognitive_layer_model: object = DefaultCognitiveLayer
|
||||||
intra_layer_score_treshold: float = 0.98
|
intra_layer_score_treshold: float = 0.98
|
||||||
connect_documents: bool = False
|
connect_documents: bool = False
|
||||||
|
graph_topology: object = DefaultGraphModel
|
||||||
|
cognitive_layers_limit: int = 2
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -31,6 +33,8 @@ class CognifyConfig(BaseSettings):
|
||||||
"cognitive_layer_model": self.cognitive_layer_model,
|
"cognitive_layer_model": self.cognitive_layer_model,
|
||||||
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
||||||
"connect_documents": self.connect_documents,
|
"connect_documents": self.connect_documents,
|
||||||
|
"graph_topology": self.graph_topology,
|
||||||
|
"cognitive_layers_limit": self.cognitive_layers_limit
|
||||||
}
|
}
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,12 @@ from datetime import datetime
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
from typing import List, Tuple, TypedDict
|
from typing import List, Tuple, TypedDict
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from cognee.infrastructure.databases.vector import DataPoint
|
from cognee.infrastructure.databases.vector import DataPoint
|
||||||
from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
|
from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
vectordb_config = get_vectordb_config()
|
||||||
class GraphLike(TypedDict):
|
class GraphLike(TypedDict):
|
||||||
nodes: List
|
nodes: List
|
||||||
edges: List
|
edges: List
|
||||||
|
|
@ -17,8 +19,8 @@ async def add_cognitive_layer_graphs(
|
||||||
chunk_id: str,
|
chunk_id: str,
|
||||||
layer_graphs: List[Tuple[str, GraphLike]],
|
layer_graphs: List[Tuple[str, GraphLike]],
|
||||||
):
|
):
|
||||||
vector_client = infrastructure_config.get_config("vector_engine")
|
vector_client = vectordb_config.vector_engine
|
||||||
graph_model = infrastructure_config.get_config("graph_model")
|
graph_model = graph_config.graph_model
|
||||||
|
|
||||||
for (layer_id, layer_graph) in layer_graphs:
|
for (layer_id, layer_graph) in layer_graphs:
|
||||||
graph_nodes = []
|
graph_nodes = []
|
||||||
|
|
|
||||||
|
|
@ -2,11 +2,14 @@ from uuid import uuid4
|
||||||
from typing import List
|
from typing import List
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from cognee.infrastructure.databases.vector import DataPoint
|
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.vector import DataPoint
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
vectordb_config = get_vectordb_config()
|
||||||
async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None:
|
async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None:
|
||||||
vector_client = infrastructure_config.get_config("vector_engine")
|
vector_client = vectordb_config.vector_engine
|
||||||
|
|
||||||
keyword_nodes = []
|
keyword_nodes = []
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,12 @@
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from cognee.infrastructure import infrastructure_config
|
# from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
from cognee.shared.data_models import GraphDBType
|
from cognee.shared.data_models import GraphDBType
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
vectordb_config = get_vectordb_config()
|
||||||
|
|
||||||
|
|
||||||
async def group_nodes_by_layer(node_descriptions):
|
async def group_nodes_by_layer(node_descriptions):
|
||||||
|
|
@ -41,7 +44,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
|
||||||
if relationship['score'] > score_threshold:
|
if relationship['score'] > score_threshold:
|
||||||
|
|
||||||
# For NetworkX
|
# For NetworkX
|
||||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||||
searched_node_id_found = await get_node_by_unique_id(graph.graph, relationship['searched_node_id'])
|
searched_node_id_found = await get_node_by_unique_id(graph.graph, relationship['searched_node_id'])
|
||||||
original_id_for_search_found = await get_node_by_unique_id(graph.graph, relationship['original_id_for_search'])
|
original_id_for_search_found = await get_node_by_unique_id(graph.graph, relationship['original_id_for_search'])
|
||||||
if searched_node_id_found and original_id_for_search_found:
|
if searched_node_id_found and original_id_for_search_found:
|
||||||
|
|
@ -54,7 +57,7 @@ async def connect_nodes_in_graph(graph, relationship_dict, score_threshold=0.9):
|
||||||
)
|
)
|
||||||
|
|
||||||
# For Neo4j
|
# For Neo4j
|
||||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||||
# Neo4j specific logic to add an edge
|
# Neo4j specific logic to add an edge
|
||||||
# This is just a placeholder, replace it with actual Neo4j logic
|
# This is just a placeholder, replace it with actual Neo4j logic
|
||||||
print("query is ", f"""MATCH (a), (b) WHERE a.unique_id = '{relationship['searched_node_id']}' AND b.unique_id = '{relationship['original_id_for_search']}' CREATE (a)-[:CONNECTED {{weight:{relationship['score']}}}]->(b)""")
|
print("query is ", f"""MATCH (a), (b) WHERE a.unique_id = '{relationship['searched_node_id']}' AND b.unique_id = '{relationship['original_id_for_search']}' CREATE (a)-[:CONNECTED {{weight:{relationship['score']}}}]->(b)""")
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,13 @@
|
||||||
from typing import Optional, Any
|
from typing import Optional, Any
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from cognee.infrastructure import infrastructure_config
|
# from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.shared.data_models import GraphDBType
|
from cognee.shared.data_models import GraphDBType
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
vectordb_config = get_vectordb_config()
|
||||||
async def generate_node_id(instance: BaseModel) -> str:
|
async def generate_node_id(instance: BaseModel) -> str:
|
||||||
for field in ["id", "doc_id", "location_id", "type_id", "node_id"]:
|
for field in ["id", "doc_id", "location_id", "type_id", "node_id"]:
|
||||||
if hasattr(instance, field):
|
if hasattr(instance, field):
|
||||||
|
|
@ -30,7 +33,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
|
||||||
- Exception: If there is an error during the node or edge addition process, it logs the error and continues without interrupting the execution flow.
|
- Exception: If there is an error during the node or edge addition process, it logs the error and continues without interrupting the execution flow.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
- The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the global `infrastructure_config`.
|
- The function currently supports adding edges only if the graph database engine is NETWORKX, as specified in the graph configuration.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Initialize result to None to ensure a clear return path
|
# Initialize result to None to ensure a clear return path
|
||||||
|
|
@ -46,7 +49,7 @@ async def add_node(client, parent_id: Optional[str], node_id: str, node_data: di
|
||||||
print("added node", result)
|
print("added node", result)
|
||||||
|
|
||||||
# Add an edge if a parent ID is provided and the graph engine is NETWORKX
|
# Add an edge if a parent ID is provided and the graph engine is NETWORKX
|
||||||
if parent_id and "default_relationship" in node_data and infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
if parent_id and "default_relationship" in node_data and graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await client.add_edge(parent_id, node_id, relationship_name = node_data["default_relationship"]["type"], edge_properties = node_data)
|
await client.add_edge(parent_id, node_id, relationship_name = node_data["default_relationship"]["type"], edge_properties = node_data)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,11 @@
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
vectordb_config = get_vectordb_config()
|
||||||
|
|
||||||
async def resolve_cross_graph_references(nodes_by_layer: Dict):
|
async def resolve_cross_graph_references(nodes_by_layer: Dict):
|
||||||
results = []
|
results = []
|
||||||
|
|
||||||
|
|
@ -16,7 +21,7 @@ async def resolve_cross_graph_references(nodes_by_layer: Dict):
|
||||||
return results
|
return results
|
||||||
|
|
||||||
async def get_nodes_by_layer(layer_id: str, layer_nodes: List):
|
async def get_nodes_by_layer(layer_id: str, layer_nodes: List):
|
||||||
vector_engine = infrastructure_config.get_config()["vector_engine"]
|
vector_engine = vectordb_config.vector_engine
|
||||||
|
|
||||||
score_points = await vector_engine.batch_search(
|
score_points = await vector_engine.batch_search(
|
||||||
layer_id,
|
layer_id,
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,9 @@ import logging
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
from.extraction.extract_cognitive_layers import extract_cognitive_layers
|
from.extraction.extract_cognitive_layers import extract_cognitive_layers
|
||||||
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
|
||||||
|
config = get_cognify_config()
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -10,7 +13,7 @@ async def get_cognitive_layers(content: str, categories: List[Dict]):
|
||||||
return (await extract_cognitive_layers(
|
return (await extract_cognitive_layers(
|
||||||
content,
|
content,
|
||||||
categories[0],
|
categories[0],
|
||||||
infrastructure_config.get_config()["cognitive_layer_model"]
|
config.cognitive_layer_model
|
||||||
)).cognitive_layers
|
)).cognitive_layers
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
|
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,15 @@
|
||||||
import logging
|
import logging
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from .extraction.extract_categories import extract_categories
|
from .extraction.extract_categories import extract_categories
|
||||||
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
|
||||||
|
config = get_cognify_config()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def get_content_categories(content: str):
|
async def get_content_categories(content: str):
|
||||||
try:
|
try:
|
||||||
return await extract_categories(
|
return await extract_categories(
|
||||||
content,
|
content,
|
||||||
infrastructure_config.get_config()["classification_model"]
|
config.classification_model
|
||||||
)
|
)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logger.error("Error extracting categories from content: %s", error, exc_info = True)
|
logger.error("Error extracting categories from content: %s", error, exc_info = True)
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,16 @@
|
||||||
import logging
|
import logging
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
from.extraction.extract_summary import extract_summary
|
from.extraction.extract_summary import extract_summary
|
||||||
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
|
||||||
|
config = get_cognify_config()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def get_content_summary(content: str):
|
async def get_content_summary(content: str):
|
||||||
try:
|
try:
|
||||||
return await extract_summary(
|
return await extract_summary(
|
||||||
content,
|
content,
|
||||||
infrastructure_config.get_config()["summarization_model"]
|
config.summarization_model
|
||||||
)
|
)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logger.error("Error extracting summary from content: %s", error, exc_info = True)
|
logger.error("Error extracting summary from content: %s", error, exc_info = True)
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,9 @@ import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
|
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
|
||||||
|
from.extraction.extract_summary import extract_summary
|
||||||
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
config = get_cognify_config()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
|
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
|
||||||
|
|
@ -11,7 +13,7 @@ async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]
|
||||||
extract_knowledge_graph(
|
extract_knowledge_graph(
|
||||||
content,
|
content,
|
||||||
cognitive_layer_data["name"],
|
cognitive_layer_data["name"],
|
||||||
infrastructure_config.get_config()["graph_model"]
|
config.graph_model
|
||||||
) for (_, cognitive_layer_data) in cognitive_layers
|
) for (_, cognitive_layer_data) in cognitive_layers
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,11 +3,13 @@ from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.infrastructure.data import Dataset, Data
|
from cognee.infrastructure.data import Dataset, Data
|
||||||
from cognee.infrastructure.files import remove_file_from_storage
|
from cognee.infrastructure.files import remove_file_from_storage
|
||||||
from cognee.infrastructure.databases.relational import DatabaseEngine
|
from cognee.infrastructure.databases.relational import DatabaseEngine
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
config = get_relationaldb_config()
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def add_data_to_dataset(dataset: Dataset, data: Data):
|
async def add_data_to_dataset(dataset: Dataset, data: Data):
|
||||||
db_engine: DatabaseEngine = infrastructure_config.get_config()["database_engine"]
|
db_engine: DatabaseEngine = config.database_engine
|
||||||
|
|
||||||
existing_dataset = (await db_engine.query_entity(dataset)).scalar()
|
existing_dataset = (await db_engine.query_entity(dataset)).scalar()
|
||||||
existing_data = (await db_engine.query_entity(data)).scalar()
|
existing_data = (await db_engine.query_entity(data)).scalar()
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
from typing import Union, Dict
|
from typing import Union, Dict
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
from cognee.shared.data_models import GraphDBType
|
from cognee.shared.data_models import GraphDBType
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: dict = None) -> Dict[str, str]:
|
async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param: dict = None) -> Dict[str, str]:
|
||||||
"""
|
"""
|
||||||
Find the neighbours of a given node in the graph and return their descriptions.
|
Find the neighbours of a given node in the graph and return their descriptions.
|
||||||
|
|
@ -22,7 +24,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
|
||||||
if node_id is None:
|
if node_id is None:
|
||||||
return {}
|
return {}
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||||
if node_id not in graph:
|
if node_id not in graph:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
@ -30,7 +32,7 @@ async def search_adjacent(graph: Union[nx.Graph, any], query: str, other_param:
|
||||||
neighbor_descriptions = {neighbor: graph.nodes[neighbor].get('description') for neighbor in neighbors}
|
neighbor_descriptions = {neighbor: graph.nodes[neighbor].get('description') for neighbor in neighbors}
|
||||||
return neighbor_descriptions
|
return neighbor_descriptions
|
||||||
|
|
||||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||||
cypher_query = """
|
cypher_query = """
|
||||||
MATCH (node {id: $node_id})-[:CONNECTED_TO]->(neighbor)
|
MATCH (node {id: $node_id})-[:CONNECTED_TO]->(neighbor)
|
||||||
RETURN neighbor.id AS neighbor_id, neighbor.description AS description
|
RETURN neighbor.id AS neighbor_id, neighbor.description AS description
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,11 @@ from cognee.modules.search.llm.extraction.categorize_relevant_category import ca
|
||||||
from cognee.shared.data_models import GraphDBType, DefaultContentPrediction
|
from cognee.shared.data_models import GraphDBType, DefaultContentPrediction
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
|
||||||
def strip_exact_regex(s, substring):
|
def strip_exact_regex(s, substring):
|
||||||
# Escaping substring to be used in a regex pattern
|
# Escaping substring to be used in a regex pattern
|
||||||
pattern = re.escape(substring)
|
pattern = re.escape(substring)
|
||||||
|
|
@ -37,7 +42,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
|
||||||
"""
|
"""
|
||||||
# Determine which client is in use based on the configuration
|
# Determine which client is in use based on the configuration
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||||
|
|
||||||
categories_and_ids = [
|
categories_and_ids = [
|
||||||
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
|
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
|
||||||
|
|
@ -53,7 +58,7 @@ async def search_categories(query:str, graph: Union[nx.Graph, any], query_label:
|
||||||
descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes}
|
descriptions = {node: graph.nodes[node].get('description', 'No desc available') for node in connected_nodes}
|
||||||
return descriptions
|
return descriptions
|
||||||
|
|
||||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||||
# Logic for Neo4j
|
# Logic for Neo4j
|
||||||
cypher_query = """
|
cypher_query = """
|
||||||
MATCH (n)
|
MATCH (n)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,10 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category
|
from cognee.modules.search.llm.extraction.categorize_relevant_category import categorize_relevant_category
|
||||||
from cognee.shared.data_models import GraphDBType
|
from cognee.shared.data_models import GraphDBType
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
|
||||||
async def search_cypher(query:str, graph: Union[nx.Graph, any]):
|
async def search_cypher(query:str, graph: Union[nx.Graph, any]):
|
||||||
"""
|
"""
|
||||||
|
|
@ -16,7 +19,7 @@ async def search_cypher(query:str, graph: Union[nx.Graph, any]):
|
||||||
|
|
||||||
|
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
if graph_config.graph_engine == GraphDBType.NEO4J:
|
||||||
result = await graph.run(query)
|
result = await graph.run(query)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,10 @@ from neo4j import AsyncSession
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
import networkx as nx
|
import networkx as nx
|
||||||
from cognee.shared.data_models import GraphDBType
|
from cognee.shared.data_models import GraphDBType
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
||||||
other_param: dict = None):
|
other_param: dict = None):
|
||||||
"""
|
"""
|
||||||
|
|
@ -28,7 +31,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
||||||
if node_id is None:
|
if node_id is None:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||||
relevant_context = []
|
relevant_context = []
|
||||||
target_layer_uuid = graph.nodes[node_id].get('layer_uuid')
|
target_layer_uuid = graph.nodes[node_id].get('layer_uuid')
|
||||||
|
|
||||||
|
|
@ -39,7 +42,7 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
||||||
return relevant_context
|
return relevant_context
|
||||||
|
|
||||||
|
|
||||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||||
if isinstance(graph, AsyncSession):
|
if isinstance(graph, AsyncSession):
|
||||||
cypher_query = """
|
cypher_query = """
|
||||||
MATCH (target {id: $node_id})
|
MATCH (target {id: $node_id})
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,10 @@ from cognee.infrastructure import infrastructure_config
|
||||||
|
|
||||||
from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary
|
from cognee.modules.search.llm.extraction.categorize_relevant_summary import categorize_relevant_summary
|
||||||
from cognee.shared.data_models import GraphDBType, ResponseSummaryModel
|
from cognee.shared.data_models import GraphDBType, ResponseSummaryModel
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
import re
|
import re
|
||||||
|
|
||||||
def strip_exact_regex(s, substring):
|
def strip_exact_regex(s, substring):
|
||||||
|
|
@ -30,7 +33,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
|
||||||
- Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes.
|
- Dict[str, str]: A dictionary where keys are node identifiers containing the query string, and values are their 'summary' attributes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if infrastructure_config.get_config()["graph_engine"] == GraphDBType.NETWORKX:
|
if graph_config.graph_engine == GraphDBType.NETWORKX:
|
||||||
print("graph", graph)
|
print("graph", graph)
|
||||||
summaries_and_ids = [
|
summaries_and_ids = [
|
||||||
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
|
{'document_id': strip_exact_regex(_, "DATA_SUMMARY__"), 'Summary': data['summary']}
|
||||||
|
|
@ -48,7 +51,7 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
|
||||||
return descriptions
|
return descriptions
|
||||||
|
|
||||||
|
|
||||||
elif infrastructure_config.get_config()["graph_engine"] == GraphDBType.NEO4J:
|
elif graph_config.graph_engine == GraphDBType.NEO4J:
|
||||||
cypher_query = f"""
|
cypher_query = f"""
|
||||||
MATCH (n)
|
MATCH (n)
|
||||||
WHERE n.id CONTAINS $query AND EXISTS(n.summary)
|
WHERE n.id CONTAINS $query AND EXISTS(n.summary)
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,13 @@
|
||||||
from dsp.utils import deduplicate
|
from dsp.utils import deduplicate
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
graph_config = get_graph_config()
|
||||||
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
|
||||||
async def search_similarity(query: str, graph):
|
async def search_similarity(query: str, graph):
|
||||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
graph_db_type = graph_config.graph_engine
|
||||||
|
|
||||||
graph_client = await get_graph_client(graph_db_type)
|
graph_client = await get_graph_client(graph_db_type)
|
||||||
|
|
||||||
|
|
@ -17,7 +20,7 @@ async def search_similarity(query: str, graph):
|
||||||
graph_nodes = []
|
graph_nodes = []
|
||||||
|
|
||||||
for layer_id in unique_layer_uuids:
|
for layer_id in unique_layer_uuids:
|
||||||
vector_engine = infrastructure_config.get_config()["vector_engine"]
|
vector_engine = vector_config.vector_engine
|
||||||
|
|
||||||
results = await vector_engine.search(layer_id, query_text = query, limit = 10)
|
results = await vector_engine.search(layer_id, query_text = query, limit = 10)
|
||||||
print("results", results)
|
print("results", results)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
|
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
|
config = get_relationaldb_config()
|
||||||
|
|
||||||
def create_task_status_table():
|
def create_task_status_table():
|
||||||
db_engine = infrastructure_config.get_config("database_engine")
|
db_engine = config.db_engine
|
||||||
|
|
||||||
db_engine.create_table("cognee_task_status", [
|
db_engine.create_table("cognee_task_status", [
|
||||||
dict(name = "data_id", type = "STRING"),
|
dict(name = "data_id", type = "STRING"),
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,8 @@
|
||||||
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
|
from cognee.infrastructure.InfrastructureConfig import infrastructure_config
|
||||||
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
|
config = get_relationaldb_config()
|
||||||
|
|
||||||
def update_task_status(data_id: str, status: str):
|
def update_task_status(data_id: str, status: str):
|
||||||
db_engine = infrastructure_config.get_config("database_engine")
|
db_engine = config.db_engine
|
||||||
db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)])
|
db_engine.insert_data("cognee_task_status", [dict(data_id = data_id, status = status)])
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,15 @@ import logging
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
from cognee.modules.topology.extraction.extract_topology import extract_categories
|
from cognee.modules.topology.extraction.extract_topology import extract_categories
|
||||||
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
|
||||||
|
cognify_config = get_cognify_config()
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def infer_data_topology(content: str, graph_topology=None):
|
async def infer_data_topology(content: str, graph_topology=None):
|
||||||
if graph_topology is None:
|
if graph_topology is None:
|
||||||
graph_topology = infrastructure_config.get_config()["graph_topology"]
|
graph_topology = cognify_config.graph_topology
|
||||||
try:
|
try:
|
||||||
return (await extract_categories(
|
return (await extract_categories(
|
||||||
content,
|
content,
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,16 @@
|
||||||
import os
|
import os
|
||||||
import glob
|
import glob
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import Dict, List, Optional, Union, Type, Any
|
from typing import Dict, List, Optional, Union, Type, Any, Tuple
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from cognee import config
|
from cognee import config
|
||||||
|
from cognee.base_config import get_base_config
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure import infrastructure_config
|
||||||
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
from cognee.modules.topology.infer_data_topology import infer_data_topology
|
from cognee.modules.topology.infer_data_topology import infer_data_topology
|
||||||
|
cognify_config = get_cognify_config()
|
||||||
|
base_config = get_base_config()
|
||||||
|
|
||||||
class Relationship(BaseModel):
|
class Relationship(BaseModel):
|
||||||
type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.")
|
type: str = Field(..., description="The type of relationship, e.g., 'belongs_to'.")
|
||||||
|
|
@ -84,7 +87,7 @@ class TopologyEngine:
|
||||||
async def infer_from_directory_structure(self, node_id: str, repository: str, model: Type[BaseModel]) -> GitHubRepositoryModel:
|
async def infer_from_directory_structure(self, node_id: str, repository: str, model: Type[BaseModel]) -> GitHubRepositoryModel:
|
||||||
""" Infer the topology of a repository from its file structure """
|
""" Infer the topology of a repository from its file structure """
|
||||||
|
|
||||||
path = infrastructure_config.get_config()["data_root_directory"]
|
path = base_config.data_root_directory
|
||||||
path = path + "/" + str(repository)
|
path = path + "/" + str(repository)
|
||||||
print(path)
|
print(path)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue