fix: enable sdk and fix config

This commit is contained in:
Boris Arzentar 2024-06-03 14:03:24 +02:00
parent ddf528993c
commit 4fb3dc31a4
49 changed files with 194 additions and 518 deletions

View file

@ -1,6 +1,6 @@
# from .api.v1.config.config import config
# from .api.v1.add.add import add
# from .api.v1.cognify.cognify import cognify
# from .api.v1.datasets.datasets import datasets
# from .api.v1.search.search import search, SearchType
# from .api.v1.prune import prune
from .api.v1.config.config import config
from .api.v1.add.add import add
from .api.v1.cognify.cognify import cognify
from .api.v1.datasets.datasets import datasets
from .api.v1.search.search import search, SearchType
from .api.v1.prune import prune

View file

@ -68,7 +68,7 @@ async def delete_dataset(dataset_id: str):
@app.get("/datasets/{dataset_id}/graph", response_model=list)
async def get_dataset_graph(dataset_id: str):
from cognee.utils import render_graph
from cognee.shared.utils import render_graph
from cognee.infrastructure.databases.graph import get_graph_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
@ -253,14 +253,24 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000):
logger.info("Starting server at %s:%s", host, port)
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.relational import get_relationaldb_config
from cognee.infrastructure.databases.vector import get_vectordb_config
cognee_directory_path = os.path.abspath(".cognee_system")
databases_directory_path = os.path.join(cognee_directory_path, "databases")
relational_config = get_relationaldb_config()
relational_config.db_path = databases_directory_path
relational_config.create_engine()
vector_config = get_vectordb_config()
vector_config.vector_db_path = databases_directory_path
vector_config.create_engine()
base_config = get_base_config()
data_directory_path = os.path.abspath(".data_storage")
base_config.data_root_directory = data_directory_path
cognee_directory_path = os.path.abspath(".cognee_system")
base_config.system_root_directory = cognee_directory_path
from cognee.modules.data.deletion import prune_system
asyncio.run(prune_system())

View file

@ -5,21 +5,23 @@ import dlt
import duckdb
import cognee.modules.ingestion as ingestion
from cognee.infrastructure.files.storage import LocalStorage
from cognee.modules.discovery import discover_directory_datasets
from cognee.utils import send_telemetry
from cognee.modules.ingestion import get_matched_datasets, save_data_to_file
from cognee.shared.utils import send_telemetry
from cognee.base_config import get_base_config
base_config = get_base_config()
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
if isinstance(data, str):
# data is a data directory path
if "data://" in data:
return await add_data_directory(data.replace("data://", ""), dataset_name)
# data is a file path
# data is a data directory path
datasets = get_matched_datasets(data.replace("data://", ""), dataset_name)
return await asyncio.gather(*[add(file_paths, dataset_name) for [dataset_name, file_paths] in datasets])
if "file://" in data:
# data is a file path
return await add([data], dataset_name)
# data is a text
# data is text
else:
file_path = save_data_to_file(data, dataset_name)
return await add([file_path], dataset_name)
@ -47,7 +49,7 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam
return []
async def add_files(file_paths: List[str], dataset_name: str):
# infra_config = infrastructure_config.get_config()
base_config = get_base_config()
data_directory_path = base_config.data_root_directory
processed_file_paths = []
@ -107,29 +109,3 @@ async def add_files(file_paths: List[str], dataset_name: str):
send_telemetry("cognee.add")
return run_info
async def add_data_directory(data_path: str, dataset_name: str = None):
datasets = discover_directory_datasets(data_path)
results = []
for key in datasets.keys():
if dataset_name is None or key.startswith(dataset_name):
results.append(add(datasets[key], dataset_name = key))
return await asyncio.gather(*results)
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
data_directory_path = base_config.data_root_directory
classified_data = ingestion.classify(data, filename)
# data_id = ingestion.identify(classified_data)
storage_path = data_directory_path + "/" + dataset_name.replace(".", "/")
LocalStorage.ensure_directory_exists(storage_path)
file_metadata = classified_data.get_metadata()
file_name = file_metadata["name"]
LocalStorage(storage_path).store(file_name, classified_data.get_data())
return "file://" + storage_path + "/" + file_name

View file

@ -1,21 +0,0 @@
from typing import List
from enum import Enum
from cognee.modules.users.memory import create_information_points, is_existing_memory
class MemoryType(Enum):
GRAPH = "GRAPH"
VECTOR = "VECTOR"
RELATIONAL = "RELATIONAL"
class MemoryException(Exception):
message: str
def __init__(self, message: str):
self.message = message
async def remember(user_id: str, memory_name: str, payload: List[str]):
if await is_existing_memory(memory_name) is False:
raise MemoryException(f"Memory with the name \"{memory_name}\" doesn't exist.")
await create_information_points(memory_name, payload)

View file

@ -3,8 +3,8 @@ from uuid import uuid4
from typing import List, Union
import logging
import nltk
from asyncio import Lock
from nltk.corpus import stopwords
from cognee.config import Config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.modules.cognify.graph.add_node_connections import group_nodes_by_layer, \
graph_ready_output, connect_nodes_in_graph
@ -24,18 +24,14 @@ from cognee.modules.data.get_content_summary import get_content_summary
from cognee.modules.data.get_cognitive_layers import get_cognitive_layers
from cognee.modules.data.get_layer_graphs import get_layer_graphs
from cognee.shared.data_models import KnowledgeGraph
from cognee.utils import send_telemetry
from cognee.shared.utils import send_telemetry
from cognee.modules.tasks import create_task_status_table, update_task_status
from cognee.shared.SourceCodeGraph import SourceCodeGraph
from asyncio import Lock
from cognee.modules.tasks import get_task_status
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
config = Config()
config.load()
USER_ID = "default_user"
logger = logging.getLogger("cognify")
@ -66,7 +62,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
task_status = get_task_status([dataset_name])
if task_status == "DATASET_PROCESSING_STARTED":
logger.error(f"Dataset {dataset_name} is already being processed.")
logger.info(f"Dataset {dataset_name} is being processed.")
return
update_task_status(dataset_name, "DATASET_PROCESSING_STARTED")
@ -176,8 +172,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
graph_config = get_graph_config()
graph_client = await get_graph_client(graph_config.graph_engine)
cognify_config = get_cognify_config()
graph_topology = cognify_config.graph_model
graph_topology = graph_config.graph_model
if graph_topology == SourceCodeGraph:
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
@ -199,6 +194,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
await add_summary_nodes(graph_client, document_id, content_summary)
print(f"Chunk ({chunk_id}) summarized.")
cognify_config = get_cognify_config()
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
@ -286,7 +283,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
# print("results", out)
# #
# # from cognee.utils import render_graph
# # from cognee.shared.utils import render_graph
# #
# # await render_graph(graph, include_color=True, include_nodes=False, include_size=False)

View file

@ -1,58 +1,60 @@
""" This module is used to set the configuration of the system."""
import os
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.infrastructure.data.chunking.config import get_chunk_config
from cognee.infrastructure.databases.vector import get_vectordb_config
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.relational import get_relationaldb_config
class config():
@staticmethod
def system_root_directory(system_root_directory: str):
base_config = get_base_config()
base_config.system_root_directory = system_root_directory
databases_directory_path = os.path.join(system_root_directory, "databases")
relational_config = get_relationaldb_config()
relational_config.db_path = databases_directory_path
relational_config.create_engine()
vector_config = get_vectordb_config()
vector_config.vector_db_path = databases_directory_path
vector_config.create_engine()
@staticmethod
def data_root_directory(data_root_directory: str):
base_config = get_base_config()
base_config.data_root_directory = data_root_directory
@staticmethod
def monitoring_tool(monitoring_tool: object):
base_config = get_base_config()
base_config.monitoring_tool = monitoring_tool
@staticmethod
def set_classification_model(classification_model: object):
cognify_config = get_cognify_config()
cognify_config.classification_model = classification_model
@staticmethod
def set_summarization_model(summarization_model: object):
cognify_config = get_cognify_config()
cognify_config.summarization_model=summarization_model
@staticmethod
def set_labeling_model(labeling_model: object):
cognify_config = get_cognify_config()
cognify_config.labeling_model =labeling_model
@staticmethod
def set_graph_model(graph_model: object):
graph_config = get_graph_config()
graph_config.graph_model = graph_model
@staticmethod
def set_cognitive_layer_model(cognitive_layer_model: object):
cognify_config = get_cognify_config()
cognify_config.cognitive_layer_model = cognitive_layer_model
@staticmethod
def set_graph_engine(graph_engine: object):
graph_config = get_graph_config()
@ -78,7 +80,6 @@ class config():
cognify_config = get_cognify_config()
cognify_config.intra_layer_score_treshold = intra_layer_score_treshold
@staticmethod
def connect_documents(connect_documents: bool):
cognify_config = get_cognify_config()
@ -88,9 +89,3 @@ class config():
def set_chunk_strategy(chunk_strategy: object):
chunk_config = get_chunk_config()
chunk_config.chunk_strategy = chunk_strategy
@staticmethod
def set_graph_topology(graph_topology: object):
cognify_config = get_cognify_config()
cognify_config.graph_topology = graph_topology

View file

@ -1,5 +1,5 @@
from duckdb import CatalogException
from cognee.modules.discovery import discover_directory_datasets
from cognee.modules.ingestion import discover_directory_datasets
from cognee.modules.tasks import get_task_status
from cognee.infrastructure.databases.relational.config import get_relationaldb_config

View file

@ -11,7 +11,7 @@ from cognee.modules.search.graph.search_categories import search_categories
from cognee.modules.search.graph.search_neighbour import search_neighbour
from cognee.modules.search.graph.search_summary import search_summary
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.utils import send_telemetry
from cognee.shared.utils import send_telemetry
from cognee.infrastructure.databases.graph.config import get_graph_config
class SearchType(Enum):

View file

@ -1,33 +1,29 @@
from typing import List, Dict, Any, Union, Optional
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel
import pandas as pd
from pydantic import BaseModel
from typing import List, Dict, Any, Union, Optional
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel
from cognee.infrastructure.databases.graph.config import get_graph_config
USER_ID = "default_user"
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
graph_db_type = infrastructure_config.get_config()["graph_engine"]
graph_config = get_graph_config()
graph_db_type = graph_config.graph_database_provider
graph_client = await get_graph_client(graph_db_type)
graph_topology = infrastructure_config.get_config()["graph_topology"]
engine = TopologyEngine()
topology = await engine.infer_from_directory_structure(node_id=USER_ID, repository=directory, model=model)
def flatten_model(model: BaseModel, parent_id: Optional[str] = None) -> Dict[str, Any]:
"""Flatten a single Pydantic model to a dictionary handling nested structures."""
result = {**model.dict(), 'parent_id': parent_id}
if hasattr(model, 'default_relationship') and model.default_relationship:
result = {**model.dict(), "parent_id": parent_id}
if hasattr(model, "default_relationship") and model.default_relationship:
result.update({
'relationship_type': model.default_relationship.type,
'relationship_source': model.default_relationship.source,
'relationship_target': model.default_relationship.target
"relationship_type": model.default_relationship.type,
"relationship_source": model.default_relationship.source,
"relationship_target": model.default_relationship.target
})
return result
@ -39,7 +35,7 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo
flat = [flatten_model(items, parent_id)]
for field, value in items:
if isinstance(value, (BaseModel, list)):
flat.extend(recursive_flatten(value, items.dict().get('node_id', None)))
flat.extend(recursive_flatten(value, items.dict().get("node_id", None)))
return flat
else:
return []
@ -56,38 +52,11 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo
for _, row in df.iterrows():
node_data = row.to_dict()
node_id = node_data.pop('node_id')
node_id = node_data.pop("node_id")
# Remove 'node_id' and get its value
# Remove "node_id" and get its value
await graph_client.add_node(node_id, node_data)
if pd.notna(row['relationship_source']) and pd.notna(row['relationship_target']):
await graph_client.add_edge(row['relationship_source'], row['relationship_target'], relationship_name=row['relationship_type'])
if pd.notna(row["relationship_source"]) and pd.notna(row["relationship_target"]):
await graph_client.add_edge(row["relationship_source"], row["relationship_target"], relationship_name=row["relationship_type"])
return graph_client.graph
if __name__ == "__main__":
async def test() -> None:
# Uncomment and modify the following lines as needed
# await prune.prune_system()
#
# from cognee.api.v1.add import add
# data_directory_path = os.path.abspath("../../../.data")
# # print(data_directory_path)
# # config.data_root_directory(data_directory_path)
# # cognee_directory_path = os.path.abspath("../.cognee_system")
# # config.system_root_directory(cognee_directory_path)
#
# await add("data://" + data_directory_path, "example")
# graph = await add_topology()
graph_db_type = infrastructure_config.get_config()["graph_engine"]
graph_client = await get_graph_client(graph_db_type)
#
from cognee.utils import render_graph
await render_graph(graph_client.graph, include_color=True, include_nodes=False, include_size=False)
import asyncio
asyncio.run(test())

View file

@ -4,15 +4,15 @@ from cognee.root_dir import get_absolute_path
from cognee.shared.data_models import MonitoringTool
class BaseConfig(BaseSettings):
system_root_directory: str = get_absolute_path(".cognee_system")
data_root_directory: str = get_absolute_path(".data")
monitoring_tool: object = MonitoringTool.LANGFUSE
graphistry_username: str
graphistry_password: str
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
def to_dict(self) -> dict:
return {
"system_root_directory": self.system_root_directory,
"data_root_directory": self.data_root_directory,
"monitoring_tool": self.monitoring_tool,
}

View file

@ -1,116 +0,0 @@
"""Configuration for cognee - cognitive architecture framework."""
import logging
import os
import configparser
import uuid
from typing import Optional, Dict, Any
from dataclasses import dataclass, field
from pathlib import Path
from dotenv import load_dotenv
logging.basicConfig(level=logging.DEBUG)
def load_dontenv():
base_dir = Path(__file__).resolve().parent.parent
# Load the .env file from the base directory
dotenv_path = base_dir / ".env"
load_dotenv(dotenv_path=dotenv_path, override = True)
try:
load_dontenv()
except:
pass
@dataclass
class Config:
""" Configuration for cognee - cognitive architecture framework. """
cognee_dir: str = field(
default_factory=lambda: os.getenv("COG_ARCH_DIR", "cognee")
)
config_path: str = field(
default_factory=lambda: os.path.join(
os.getenv("COG_ARCH_DIR", "cognee"), "config"
)
)
# custom_model: str = os.getenv("CUSTOM_LLM_MODEL", "llama3-70b-8192") #"mistralai/Mixtral-8x7B-Instruct-v0.1"
# custom_endpoint: str = os.getenv("CUSTOM_ENDPOINT", "https://api.endpoints.anyscale.com/v1") #"https://api.endpoints.anyscale.com/v1" # pass claude endpoint
# custom_key: Optional[str] = os.getenv("CUSTOM_LLM_API_KEY")
# ollama_endpoint: str = os.getenv("CUSTOM_OLLAMA_ENDPOINT", "http://localhost:11434/v1") #"http://localhost:11434/v1"
# ollama_key: Optional[str] = "ollama"
# ollama_model: str = os.getenv("CUSTOM_OLLAMA_MODEL", "mistral:instruct") #"mistral:instruct"
# openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o" ) #"gpt-4o"
# model_endpoint: str = "openai"
# llm_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
# openai_embedding_model = "text-embedding-3-large"
# openai_embedding_dimensions = 3072
# litellm_embedding_model = "text-embedding-3-large"
# litellm_embedding_dimensions = 3072
graphistry_username = os.getenv("GRAPHISTRY_USERNAME")
graphistry_password = os.getenv("GRAPHISTRY_PASSWORD")
# Embedding parameters
embedding_model: str = "BAAI/bge-large-en-v1.5"
embedding_dimensions: int = 1024
connect_documents: bool = False
# Model parameters and configuration for interlayer scoring
intra_layer_score_treshold: float = 0.98
# Client ID
anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)
#Chunking parameters
# chunk_size: int = 1500
# chunk_overlap: int = 0
# chunk_strategy: str = ChunkStrategy.PARAGRAPH
def load(self):
"""Loads the configuration from a file or environment variables."""
try:
load_dontenv()
except:
pass
config = configparser.ConfigParser()
config.read(self.config_path)
# Override with environment variables if they exist
for attr in self.__annotations__:
env_value = os.getenv(attr.upper())
if env_value is not None:
setattr(self, attr, env_value)
# Load from config file
if config.sections():
for section in config.sections():
for key, value in config.items(section):
if hasattr(self, key):
setattr(self, key, value)
def save(self):
"""Saves the current configuration to a file."""
config = configparser.ConfigParser()
# Save the current settings to the config file
for attr, value in self.__dict__.items():
section, option = attr.split("_", 1)
if not config.has_section(section):
config.add_section(section)
config.set(section, option, str(value))
with open(self.config_path, "w") as configfile:
config.write(configfile)
def to_dict(self) -> Dict[str, Any]:
"""Returns a dictionary representation of the configuration."""
return {attr: getattr(self, attr) for attr in self.__annotations__}
@classmethod
def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
"""Creates a Config instance from a dictionary."""
config = cls()
for attr, value in config_dict.items():
if hasattr(config, attr):
setattr(config, attr, value)
return config

View file

@ -1,113 +0,0 @@
import logging
from cognee.config import Config
from .data.chunking.config import get_chunk_config
from .llm.llm_interface import LLMInterface
from .llm.get_llm_client import get_llm_client
from ..shared.data_models import GraphDBType, DefaultContentPrediction, KnowledgeGraph, SummarizedContent, \
LabeledContent, DefaultCognitiveLayer
logging.basicConfig(level=logging.DEBUG)
config = Config()
config.load()
chunk_config = get_chunk_config()
class InfrastructureConfig():
graph_engine: GraphDBType = None
llm_engine: LLMInterface = None
classification_model = None
summarization_model = None
labeling_model = None
graph_model = None
cognitive_layer_model = None
intra_layer_score_treshold = None
embedding_engine = None
connect_documents = config.connect_documents
chunk_strategy = chunk_config.chunk_strategy
chunk_engine = None
llm_provider: str = None
llm_model: str = None
llm_endpoint: str = None
llm_api_key: str = None
def get_config(self, config_entity: str = None) -> dict:
if self.graph_engine is None:
self.graph_engine = GraphDBType.NETWORKX
if self.classification_model is None:
self.classification_model = DefaultContentPrediction
if self.summarization_model is None:
self.summarization_model = SummarizedContent
if self.labeling_model is None:
self.labeling_model = LabeledContent
if self.graph_model is None:
self.graph_model = KnowledgeGraph
if self.cognitive_layer_model is None:
self.cognitive_layer_model = DefaultCognitiveLayer
if self.intra_layer_score_treshold is None:
self.intra_layer_score_treshold = config.intra_layer_score_treshold
if self.connect_documents is None:
self.connect_documents = config.connect_documents
if self.chunk_strategy is None:
self.chunk_strategy = chunk_config.chunk_strategy
if self.chunk_engine is None:
self.chunk_engine = chunk_config.chunk_engine
if (config_entity is None or config_entity == "llm_engine") and self.llm_engine is None:
self.llm_engine = get_llm_client()
if config_entity is not None:
return getattr(self, config_entity)
return {
"llm_engine": self.llm_engine,
"classification_model": self.classification_model,
"summarization_model": self.summarization_model,
"labeling_model": self.labeling_model,
"graph_model": self.graph_model,
"cognitive_layer_model": self.cognitive_layer_model,
"llm_provider": self.llm_provider,
"intra_layer_score_treshold": self.intra_layer_score_treshold,
"embedding_engine": self.embedding_engine,
"connect_documents": self.connect_documents,
"chunk_strategy": self.chunk_strategy,
"chunk_engine": self.chunk_engine,
}
def set_config(self, new_config: dict):
if "classification_model" in new_config:
self.classification_model = new_config["classification_model"]
if "summarization_model" in new_config:
self.summarization_model = new_config["summarization_model"]
if "labeling_model" in new_config:
self.labeling_model = new_config["labeling_model"]
if "cognitive_layer_model" in new_config:
self.cognitive_layer_model = new_config["cognitive_layer_model"]
if "intra_layer_score_treshold" in new_config:
self.intra_layer_score_treshold = new_config["intra_layer_score_treshold"]
if "embedding_engine" in new_config:
self.embedding_engine = new_config["embedding_engine"]
if "connect_documents" in new_config:
self.connect_documents = new_config["connect_documents"]
if "chunk_strategy" in new_config:
self.chunk_strategy = new_config["chunk_strategy"]
if "chunk_engine" in new_config:
self.chunk_engine = new_config["chunk_engine"]
infrastructure_config = InfrastructureConfig()

View file

@ -1 +0,0 @@
from .InfrastructureConfig import infrastructure_config

View file

@ -1,5 +1,5 @@
from sklearn.feature_extraction.text import TfidfVectorizer
from cognee.utils import extract_pos_tags
from cognee.shared.utils import extract_pos_tags
def extract_keywords(text: str) -> list[str]:
if len(text) == 0:

View file

@ -4,7 +4,7 @@ import os
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
from cognee.shared.data_models import DefaultGraphModel, GraphDBType, KnowledgeGraph
from cognee.shared.data_models import GraphDBType, KnowledgeGraph
class GraphConfig(BaseSettings):
@ -26,7 +26,6 @@ class GraphConfig(BaseSettings):
return {
"graph_filename": self.graph_filename,
"graph_database_provider": self.graph_database_provider,
"graph_topology": self.graph_topology,
"graph_file_path": self.graph_file_path,
"graph_database_url": self.graph_database_url,
"graph_database_username": self.graph_database_username,

View file

@ -2,3 +2,4 @@ from .ModelBase import ModelBase
from .DatabaseEngine import DatabaseEngine
from .sqlite.SqliteEngine import SqliteEngine
from .duckdb.DuckDBAdapter import DuckDBAdapter
from .config import get_relationaldb_config

View file

@ -1,11 +1,11 @@
import os
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.base_config import get_base_config
from cognee.root_dir import get_absolute_path
from .create_relational_engine import create_relational_engine
class RelationalConfig(BaseSettings):
db_path: str = os.path.join(get_base_config().system_root_directory, "databases")
db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases")
db_name: str = "cognee.db"
db_host: str = "localhost"
db_port: str = "5432"
@ -17,7 +17,8 @@ class RelationalConfig(BaseSettings):
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
def create_engine(self):
return create_relational_engine(self.db_path, self.db_name)
self.db_file_path = os.path.join(self.db_path, self.db_name)
self.database_engine = create_relational_engine(self.db_path, self.db_name)
def to_dict(self) -> dict:
return {

View file

@ -1,12 +1,13 @@
import os
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
from cognee.root_dir import get_absolute_path
from .create_vector_engine import create_vector_engine
class VectorConfig(BaseSettings):
vector_db_url: str = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb")
vector_db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases")
vector_db_url: str = os.path.join(vector_db_path, "cognee.lancedb")
vector_db_key: str = ""
vector_engine_provider: str = "lancedb"
vector_engine: object = create_vector_engine(
@ -22,7 +23,9 @@ class VectorConfig(BaseSettings):
def create_engine(self):
if self.vector_engine_provider == "lancedb":
self.vector_db_url = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb")
self.vector_db_url = os.path.join(self.vector_db_path, "cognee.lancedb")
else:
self.vector_db_path = None
self.vector_engine = create_vector_engine(
get_vectordb_config().to_dict(),

View file

@ -5,26 +5,12 @@ from pydantic import BaseModel
import instructor
from tenacity import retry, stop_after_attempt
import openai
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.shared.data_models import MonitoringTool
from cognee.base_config import get_base_config
from cognee.infrastructure.llm.config import get_llm_config
llm_config = get_llm_config()
base_config = get_base_config()
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
from langfuse.openai import AsyncOpenAI, OpenAI
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
from langsmith import wrappers
from openai import AsyncOpenAI
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
else:
from openai import AsyncOpenAI, OpenAI
class GenericAPIAdapter(LLMInterface):
"""Adapter for Generic API LLM provider API """
@ -37,6 +23,8 @@ class GenericAPIAdapter(LLMInterface):
self.model = model
self.api_key = api_key
llm_config = get_llm_config()
if llm_config.llm_provider == "groq":
from groq import groq
self.aclient = instructor.from_openai(
@ -46,6 +34,17 @@ class GenericAPIAdapter(LLMInterface):
mode = instructor.Mode.MD_JSON
)
else:
base_config = get_base_config()
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
from langfuse.openai import AsyncOpenAI
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
from langsmith import wrappers
from openai import AsyncOpenAI
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
else:
from openai import AsyncOpenAI
self.aclient = instructor.patch(
AsyncOpenAI(
base_url = api_endpoint,

View file

@ -9,7 +9,6 @@ class LLMProvider(Enum):
ANTHROPIC = "anthropic"
CUSTOM = "custom"
llm_config = get_llm_config()
def get_llm_client():
"""Get the LLM client based on the configuration using Enums."""
llm_config = get_llm_config()

View file

@ -6,26 +6,10 @@ from pydantic import BaseModel
from tenacity import retry, stop_after_attempt
from cognee.base_config import get_base_config
from cognee.config import Config
from cognee.infrastructure.llm import get_llm_config
from cognee.infrastructure.llm.llm_interface import LLMInterface
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.shared.data_models import MonitoringTool
config = Config()
config.load()
llm_config = get_llm_config()
base_config = get_base_config()
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
from langfuse.openai import AsyncOpenAI, OpenAI
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
from langsmith import wrappers
from openai import AsyncOpenAI
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
else:
from openai import AsyncOpenAI, OpenAI
class OpenAIAdapter(LLMInterface):
name = "OpenAI"
model: str
@ -33,6 +17,17 @@ class OpenAIAdapter(LLMInterface):
"""Adapter for OpenAI's GPT-3, GPT=4 API"""
def __init__(self, api_key: str, model:str):
base_config = get_base_config()
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
from langfuse.openai import AsyncOpenAI, OpenAI
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
from langsmith import wrappers
from openai import AsyncOpenAI
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
else:
from openai import AsyncOpenAI, OpenAI
self.aclient = instructor.from_openai(AsyncOpenAI(api_key = api_key))
self.client = instructor.from_openai(OpenAI(api_key = api_key))
self.model = model

View file

@ -1,12 +1,7 @@
from functools import lru_cache
from pydantic_settings import BaseSettings, SettingsConfigDict
from cognee.shared.data_models import DefaultContentPrediction, LabeledContent, SummarizedContent, \
DefaultCognitiveLayer, DefaultGraphModel, KnowledgeGraph
# Monitoring tool
DefaultCognitiveLayer
class CognifyConfig(BaseSettings):
classification_model: object = DefaultContentPrediction
@ -15,10 +10,7 @@ class CognifyConfig(BaseSettings):
cognitive_layer_model: object = DefaultCognitiveLayer
intra_layer_score_treshold: float = 0.98
connect_documents: bool = False
graph_topology: object = DefaultGraphModel
cognitive_layers_limit: int = 2
graph_model:object = KnowledgeGraph
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
@ -30,11 +22,9 @@ class CognifyConfig(BaseSettings):
"cognitive_layer_model": self.cognitive_layer_model,
"intra_layer_score_treshold": self.intra_layer_score_treshold,
"connect_documents": self.connect_documents,
"graph_topology": self.graph_topology,
"cognitive_layers_limit": self.cognitive_layers_limit,
"graph_model": self.graph_model
}
@lru_cache
def get_cognify_config():
return CognifyConfig()
return CognifyConfig()

View file

@ -4,14 +4,11 @@ from dspy.evaluate.evaluate import Evaluate
from dspy.primitives.example import Example
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.config import Config
from cognee.shared.data_models import Answer
from cognee.infrastructure.llm import get_llm_config
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.dataset import HotPotQA
config = Config()
config.load()
def evaluate():
dataset = HotPotQA(
train_seed = 1,
@ -36,7 +33,8 @@ def evaluate():
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
@ -58,7 +56,7 @@ def evaluate():
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
dsp.passage_match([example.answer], [answer_prediction.answer])
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
dspy.settings.configure(lm = gpt4)
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)

View file

@ -4,7 +4,7 @@ from typing import List, Tuple, TypedDict
from pydantic import BaseModel
from cognee.infrastructure.databases.vector import DataPoint
# from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
# from cognee.shared.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
from cognee.infrastructure.databases.graph.config import get_graph_config
from cognee.infrastructure.databases.vector.config import get_vectordb_config
@ -69,8 +69,6 @@ async def add_cognitive_layer_graphs(
id, type, name, description, *node_properties = node
print("Node properties: ", node_properties)
node_properties = dict(node_properties)
graph_nodes.append((

View file

@ -1,6 +1,5 @@
import uuid
# from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
from cognee.shared.data_models import GraphDBType
from cognee.infrastructure.databases.graph.config import get_graph_config
@ -105,7 +104,7 @@ if __name__ == "__main__":
#
# connect_nodes_in_graph(graph, relationships)
from cognee.utils import render_graph
from cognee.shared.utils import render_graph
graph_url = await render_graph(graph)

View file

@ -1,13 +1,11 @@
import dspy
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.config import Config
config = Config()
config.load()
from cognee.infrastructure.llm import get_llm_config
def run():
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))

View file

@ -2,16 +2,13 @@ import dsp
import dspy
from dspy.teleprompt import BootstrapFewShot
from dspy.primitives.example import Example
from cognee.config import Config
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
from cognee.root_dir import get_absolute_path
from cognee.infrastructure.files.storage import LocalStorage
from cognee.shared.data_models import Answer
from cognee.infrastructure.llm.get_llm_client import get_llm_client
from cognee.modules.cognify.dataset import HotPotQA
config = Config()
config.load()
from cognee.infrastructure.llm import get_llm_config
def train():
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
@ -59,7 +56,8 @@ def train():
trainset = [example.with_inputs("context", "question") for example in train_examples]
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
llm_config = get_llm_config()
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)

View file

@ -5,7 +5,7 @@ from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from cognee.infrastructure.llm import get_llm_config
from cognee.shared.data_models import KnowledgeGraph, Node, Edge
from cognee.utils import trim_text_to_max_tokens
from cognee.shared.utils import trim_text_to_max_tokens
# """Instructions:
# You are a top-tier algorithm designed for extracting information from text in structured formats to build a knowledge graph.

View file

@ -1,8 +1,7 @@
import logging
from typing import List, Dict
from cognee.infrastructure import infrastructure_config
from.extraction.extract_cognitive_layers import extract_cognitive_layers
from cognee.modules.cognify.config import get_cognify_config
from .extraction.extract_cognitive_layers import extract_cognitive_layers
config = get_cognify_config()

View file

@ -1,7 +1,6 @@
import logging
from cognee.infrastructure import infrastructure_config
from.extraction.extract_summary import extract_summary
from cognee.modules.cognify.config import get_cognify_config
from .extraction.extract_summary import extract_summary
config = get_cognify_config()
logger = logging.getLogger(__name__)

View file

@ -1,13 +1,13 @@
import logging
import asyncio
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph import get_graph_config
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
from.extraction.extract_summary import extract_summary
from cognee.modules.cognify.config import get_cognify_config
config = get_cognify_config()
logger = logging.getLogger(__name__)
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
config = get_graph_config()
try:
graph_awaitables = [
extract_knowledge_graph(

View file

@ -1 +0,0 @@
from .discover_directory_datasets import discover_directory_datasets

View file

@ -1,2 +1,5 @@
from .classify import classify
from .identify import identify
from .save_data_to_file import save_data_to_file
from .get_matched_datasets import get_matched_datasets
from .discover_directory_datasets import discover_directory_datasets

View file

@ -0,0 +1,12 @@
from .discover_directory_datasets import discover_directory_datasets
def get_matched_datasets(data_path: str, dataset_name_to_match: str = None):
datasets = discover_directory_datasets(data_path)
matched_datasets = []
for dataset_name, dataset_files in datasets.items():
if dataset_name_to_match is None or dataset_name.startswith(dataset_name_to_match):
matched_datasets.append([dataset_name, dataset_files])
return matched_datasets

View file

@ -0,0 +1,19 @@
from typing import BinaryIO, Union
from cognee.base_config import get_base_config
from cognee.infrastructure.files.storage import LocalStorage
from .classify import classify
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
base_config = get_base_config()
data_directory_path = base_config.data_root_directory
classified_data = classify(data, filename)
storage_path = data_directory_path + "/" + dataset_name.replace(".", "/")
LocalStorage.ensure_directory_exists(storage_path)
file_metadata = classified_data.get_metadata()
file_name = file_metadata["name"]
LocalStorage(storage_path).store(file_name, classified_data.get_data())
return "file://" + storage_path + "/" + file_name

View file

@ -14,7 +14,6 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
Parameters:
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
- id (str): The identifier of the node to match against.
- infrastructure_config (Dict): Configuration that includes the graph engine type.
- other_param (dict, optional): A dictionary that may contain 'node_id' to specify the node.
Returns:

View file

@ -19,7 +19,6 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
Parameters:
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
- query (str): The query string to filter nodes by, e.g., 'SUMMARY'.
- infrastructure_config (Dict): Configuration that includes the graph engine type.
- other_param (str, optional): An additional parameter, unused in this implementation but could be for future enhancements.
Returns:

View file

@ -1,16 +1,17 @@
import logging
from typing import List, Dict
from cognee.infrastructure import infrastructure_config
from.extraction.categorize_relevant_summary import categorize_relevant_summary
from cognee.modules.cognify.config import get_cognify_config
from .extraction.categorize_relevant_summary import categorize_relevant_summary
logger = logging.getLogger(__name__)
async def get_cognitive_layers(content: str, categories: List[Dict]):
try:
cognify_config = get_cognify_config()
return (await categorize_relevant_summary(
content,
categories[0],
infrastructure_config.get_config()["categorize_summary_model"]
cognify_config.summarization_model,
)).cognitive_layers
except Exception as error:
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)

View file

@ -1,10 +1,7 @@
from cognee.config import Config
from cognee.infrastructure.databases.vector import get_vectordb_config
from cognee.infrastructure.llm import get_llm_config
def get_settings():
config = Config()
config.load()
llm_config = get_llm_config()
vector_dbs = [{

View file

@ -1,8 +1,5 @@
import json
import logging
from pydantic import BaseModel
from cognee.infrastructure.llm import get_llm_config
from cognee.infrastructure import infrastructure_config
class LLMConfig(BaseModel):
apiKey: str
@ -17,6 +14,3 @@ async def save_llm_config(new_llm_config: LLMConfig):
if "*****" not in new_llm_config.apiKey and len(new_llm_config.apiKey.strip()) > 0:
llm_config.llm_api_key = new_llm_config.apiKey
logging.error(json.dumps(llm_config.to_dict()))
infrastructure_config.llm_engine = None

View file

@ -1,14 +1,14 @@
from typing import Type, List
from typing import Type
from pydantic import BaseModel
from cognee.infrastructure.llm.prompts import read_query_prompt
from cognee.infrastructure.llm.get_llm_client import get_llm_client
async def extract_categories(content: str, response_model: Type[BaseModel]):
async def extract_topology(content: str, response_model: Type[BaseModel]):
llm_client = get_llm_client()
system_prompt = read_query_prompt("extract_topology.txt")
llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model)
return llm_output.model_dump()
return llm_output.model_dump()

View file

@ -1,18 +1,15 @@
import logging
from typing import List, Dict
from cognee.infrastructure import infrastructure_config
from cognee.modules.topology.extraction.extract_topology import extract_categories
from cognee.modules.cognify.config import get_cognify_config
cognify_config = get_cognify_config()
from cognee.modules.topology.extraction.extract_topology import extract_topology
from cognee.infrastructure.databases.graph.config import get_graph_config
logger = logging.getLogger(__name__)
async def infer_data_topology(content: str, graph_topology=None):
if graph_topology is None:
graph_topology = cognify_config.graph_topology
graph_config = get_graph_config()
graph_topology = graph_config.graph_model
try:
return (await extract_categories(
return (await extract_topology(
content,
graph_topology
))

View file

@ -2,13 +2,10 @@ import os
import glob
from pydantic import BaseModel, Field
from typing import Dict, List, Optional, Union, Type, Any, Tuple
from datetime import datetime
from cognee import config
from cognee.base_config import get_base_config
from cognee.infrastructure import infrastructure_config
from cognee.modules.cognify.config import get_cognify_config
from cognee.modules.topology.infer_data_topology import infer_data_topology
cognify_config = get_cognify_config()
base_config = get_base_config()

View file

@ -1,22 +1,7 @@
from os import path
import logging
from pathlib import Path
logging.basicConfig(level=logging.DEBUG)
# ROOT_DIR = path.dirname(path.abspath(__file__))
#
# logging.debug("ROOT_DIR: ", ROOT_DIR)
#
# def get_absolute_path(path_from_root: str) -> str:
# logging.debug("abspath: ", path.abspath(path.join(ROOT_DIR, path_from_root)))
#
#
# return path.abspath(path.join(ROOT_DIR, path_from_root))
ROOT_DIR = Path(__file__).resolve().parent
logging.basicConfig(level=logging.DEBUG)
logging.debug("ROOT_DIR: %s", ROOT_DIR)
ROOT_DIR = Path(__file__).resolve().parent
def get_absolute_path(path_from_root: str) -> str:
absolute_path = ROOT_DIR / path_from_root
logging.debug("abspath: %s", absolute_path.resolve())
return str(absolute_path.resolve())
return str(absolute_path.resolve())

View file

@ -11,12 +11,7 @@ import matplotlib.pyplot as plt
import tiktoken
import nltk
from posthog import Posthog
from cognee.config import Config
config = Config()
config.load()
from cognee.base_config import get_base_config
def send_telemetry(event_name: str):
if os.getenv("TELEMETRY_DISABLED"):
@ -153,6 +148,7 @@ def generate_color_palette(unique_layers):
async def register_graphistry():
config = get_base_config()
graphistry.register(api = 3, username = config.graphistry_username, password = config.graphistry_password)

View file

@ -2,7 +2,7 @@ from deepeval.dataset import EvaluationDataset
from pydantic import BaseModel
from typing import List, Type, Dict
from typing import List, Type
from deepeval.test_case import LLMTestCase
import dotenv
dotenv.load_dotenv()
@ -41,7 +41,6 @@ print(dataset)
import logging
from cognee.infrastructure import infrastructure_config
logger = logging.getLogger(__name__)
@ -81,10 +80,18 @@ async def run_cognify_base_rag():
pass
async def cognify_search_base_rag(content:str, context:str):
infrastructure_config.set_config({"database_directory_path": "/Users/vasa/Projects/cognee/cognee/.cognee_system/databases/cognee.lancedb"})
import os
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.vector import get_vectordb_config
vector_client = infrastructure_config.get_config("vector_engine")
async def cognify_search_base_rag(content:str, context:str):
base_config = get_base_config()
cognee_directory_path = os.path.abspath(".cognee_system")
base_config.system_root_directory = cognee_directory_path
vector_config = get_vectordb_config()
vector_client = vector_config.vector_engine
return_ = await vector_client.search(collection_name="basic_rag", query_text=content, limit=10)

View file

@ -283,7 +283,7 @@
"outputs": [],
"source": [
"import cognee\n",
"from cognee.utils import render_graph\n",
"from cognee.shared.utils import render_graph\n",
"\n",
"graph = await cognee.cognify()\n",
"\n",

View file

@ -112,12 +112,11 @@
"outputs": [],
"source": [
"import graphistry\n",
"from cognee.config import Config\n",
"from cognee.utils import render_graph\n",
"from cognee.shared.utils import render_graph\n",
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
"from cognee.base_config import get_base_config\n",
"\n",
"config = Config()\n",
"config.load()\n",
"config = get_base_config()\n",
"\n",
"graphistry.register(\n",
" api = 3,\n",