fix: enable sdk and fix config
This commit is contained in:
parent
ddf528993c
commit
4fb3dc31a4
49 changed files with 194 additions and 518 deletions
|
|
@ -1,6 +1,6 @@
|
|||
# from .api.v1.config.config import config
|
||||
# from .api.v1.add.add import add
|
||||
# from .api.v1.cognify.cognify import cognify
|
||||
# from .api.v1.datasets.datasets import datasets
|
||||
# from .api.v1.search.search import search, SearchType
|
||||
# from .api.v1.prune import prune
|
||||
from .api.v1.config.config import config
|
||||
from .api.v1.add.add import add
|
||||
from .api.v1.cognify.cognify import cognify
|
||||
from .api.v1.datasets.datasets import datasets
|
||||
from .api.v1.search.search import search, SearchType
|
||||
from .api.v1.prune import prune
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ async def delete_dataset(dataset_id: str):
|
|||
|
||||
@app.get("/datasets/{dataset_id}/graph", response_model=list)
|
||||
async def get_dataset_graph(dataset_id: str):
|
||||
from cognee.utils import render_graph
|
||||
from cognee.shared.utils import render_graph
|
||||
from cognee.infrastructure.databases.graph import get_graph_config
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
|
||||
|
|
@ -253,14 +253,24 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
|||
logger.info("Starting server at %s:%s", host, port)
|
||||
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.databases.relational import get_relationaldb_config
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
|
||||
cognee_directory_path = os.path.abspath(".cognee_system")
|
||||
databases_directory_path = os.path.join(cognee_directory_path, "databases")
|
||||
|
||||
relational_config = get_relationaldb_config()
|
||||
relational_config.db_path = databases_directory_path
|
||||
relational_config.create_engine()
|
||||
|
||||
vector_config = get_vectordb_config()
|
||||
vector_config.vector_db_path = databases_directory_path
|
||||
vector_config.create_engine()
|
||||
|
||||
base_config = get_base_config()
|
||||
data_directory_path = os.path.abspath(".data_storage")
|
||||
base_config.data_root_directory = data_directory_path
|
||||
|
||||
cognee_directory_path = os.path.abspath(".cognee_system")
|
||||
base_config.system_root_directory = cognee_directory_path
|
||||
|
||||
from cognee.modules.data.deletion import prune_system
|
||||
asyncio.run(prune_system())
|
||||
|
||||
|
|
|
|||
|
|
@ -5,21 +5,23 @@ import dlt
|
|||
import duckdb
|
||||
import cognee.modules.ingestion as ingestion
|
||||
from cognee.infrastructure.files.storage import LocalStorage
|
||||
from cognee.modules.discovery import discover_directory_datasets
|
||||
from cognee.utils import send_telemetry
|
||||
from cognee.modules.ingestion import get_matched_datasets, save_data_to_file
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.base_config import get_base_config
|
||||
base_config = get_base_config()
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
|
||||
if isinstance(data, str):
|
||||
# data is a data directory path
|
||||
if "data://" in data:
|
||||
return await add_data_directory(data.replace("data://", ""), dataset_name)
|
||||
# data is a file path
|
||||
# data is a data directory path
|
||||
datasets = get_matched_datasets(data.replace("data://", ""), dataset_name)
|
||||
return await asyncio.gather(*[add(file_paths, dataset_name) for [dataset_name, file_paths] in datasets])
|
||||
|
||||
if "file://" in data:
|
||||
# data is a file path
|
||||
return await add([data], dataset_name)
|
||||
# data is a text
|
||||
|
||||
# data is text
|
||||
else:
|
||||
file_path = save_data_to_file(data, dataset_name)
|
||||
return await add([file_path], dataset_name)
|
||||
|
|
@ -47,7 +49,7 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam
|
|||
return []
|
||||
|
||||
async def add_files(file_paths: List[str], dataset_name: str):
|
||||
# infra_config = infrastructure_config.get_config()
|
||||
base_config = get_base_config()
|
||||
data_directory_path = base_config.data_root_directory
|
||||
|
||||
processed_file_paths = []
|
||||
|
|
@ -107,29 +109,3 @@ async def add_files(file_paths: List[str], dataset_name: str):
|
|||
send_telemetry("cognee.add")
|
||||
|
||||
return run_info
|
||||
|
||||
async def add_data_directory(data_path: str, dataset_name: str = None):
|
||||
datasets = discover_directory_datasets(data_path)
|
||||
|
||||
results = []
|
||||
|
||||
for key in datasets.keys():
|
||||
if dataset_name is None or key.startswith(dataset_name):
|
||||
results.append(add(datasets[key], dataset_name = key))
|
||||
|
||||
return await asyncio.gather(*results)
|
||||
|
||||
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
|
||||
data_directory_path = base_config.data_root_directory
|
||||
|
||||
classified_data = ingestion.classify(data, filename)
|
||||
# data_id = ingestion.identify(classified_data)
|
||||
|
||||
storage_path = data_directory_path + "/" + dataset_name.replace(".", "/")
|
||||
LocalStorage.ensure_directory_exists(storage_path)
|
||||
|
||||
file_metadata = classified_data.get_metadata()
|
||||
file_name = file_metadata["name"]
|
||||
LocalStorage(storage_path).store(file_name, classified_data.get_data())
|
||||
|
||||
return "file://" + storage_path + "/" + file_name
|
||||
|
|
|
|||
|
|
@ -1,21 +0,0 @@
|
|||
from typing import List
|
||||
from enum import Enum
|
||||
from cognee.modules.users.memory import create_information_points, is_existing_memory
|
||||
|
||||
class MemoryType(Enum):
|
||||
GRAPH = "GRAPH"
|
||||
VECTOR = "VECTOR"
|
||||
RELATIONAL = "RELATIONAL"
|
||||
|
||||
class MemoryException(Exception):
|
||||
message: str
|
||||
|
||||
def __init__(self, message: str):
|
||||
self.message = message
|
||||
|
||||
|
||||
async def remember(user_id: str, memory_name: str, payload: List[str]):
|
||||
if await is_existing_memory(memory_name) is False:
|
||||
raise MemoryException(f"Memory with the name \"{memory_name}\" doesn't exist.")
|
||||
|
||||
await create_information_points(memory_name, payload)
|
||||
|
|
@ -3,8 +3,8 @@ from uuid import uuid4
|
|||
from typing import List, Union
|
||||
import logging
|
||||
import nltk
|
||||
from asyncio import Lock
|
||||
from nltk.corpus import stopwords
|
||||
from cognee.config import Config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.modules.cognify.graph.add_node_connections import group_nodes_by_layer, \
|
||||
graph_ready_output, connect_nodes_in_graph
|
||||
|
|
@ -24,18 +24,14 @@ from cognee.modules.data.get_content_summary import get_content_summary
|
|||
from cognee.modules.data.get_cognitive_layers import get_cognitive_layers
|
||||
from cognee.modules.data.get_layer_graphs import get_layer_graphs
|
||||
from cognee.shared.data_models import KnowledgeGraph
|
||||
from cognee.utils import send_telemetry
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.modules.tasks import create_task_status_table, update_task_status
|
||||
from cognee.shared.SourceCodeGraph import SourceCodeGraph
|
||||
from asyncio import Lock
|
||||
from cognee.modules.tasks import get_task_status
|
||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
USER_ID = "default_user"
|
||||
|
||||
logger = logging.getLogger("cognify")
|
||||
|
|
@ -66,7 +62,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
|||
task_status = get_task_status([dataset_name])
|
||||
|
||||
if task_status == "DATASET_PROCESSING_STARTED":
|
||||
logger.error(f"Dataset {dataset_name} is already being processed.")
|
||||
logger.info(f"Dataset {dataset_name} is being processed.")
|
||||
return
|
||||
|
||||
update_task_status(dataset_name, "DATASET_PROCESSING_STARTED")
|
||||
|
|
@ -176,8 +172,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
|||
|
||||
graph_config = get_graph_config()
|
||||
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||
cognify_config = get_cognify_config()
|
||||
graph_topology = cognify_config.graph_model
|
||||
graph_topology = graph_config.graph_model
|
||||
|
||||
if graph_topology == SourceCodeGraph:
|
||||
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
|
||||
|
|
@ -199,6 +194,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
|||
await add_summary_nodes(graph_client, document_id, content_summary)
|
||||
print(f"Chunk ({chunk_id}) summarized.")
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
|
||||
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
|
||||
cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
|
||||
|
||||
|
|
@ -286,7 +283,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
|||
|
||||
# print("results", out)
|
||||
# #
|
||||
# # from cognee.utils import render_graph
|
||||
# # from cognee.shared.utils import render_graph
|
||||
# #
|
||||
# # await render_graph(graph, include_color=True, include_nodes=False, include_size=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,58 +1,60 @@
|
|||
""" This module is used to set the configuration of the system."""
|
||||
import os
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.relational import get_relationaldb_config
|
||||
|
||||
class config():
|
||||
@staticmethod
|
||||
def system_root_directory(system_root_directory: str):
|
||||
base_config = get_base_config()
|
||||
base_config.system_root_directory = system_root_directory
|
||||
databases_directory_path = os.path.join(system_root_directory, "databases")
|
||||
|
||||
relational_config = get_relationaldb_config()
|
||||
relational_config.db_path = databases_directory_path
|
||||
relational_config.create_engine()
|
||||
|
||||
vector_config = get_vectordb_config()
|
||||
vector_config.vector_db_path = databases_directory_path
|
||||
vector_config.create_engine()
|
||||
|
||||
@staticmethod
|
||||
def data_root_directory(data_root_directory: str):
|
||||
base_config = get_base_config()
|
||||
base_config.data_root_directory = data_root_directory
|
||||
|
||||
|
||||
@staticmethod
|
||||
def monitoring_tool(monitoring_tool: object):
|
||||
base_config = get_base_config()
|
||||
base_config.monitoring_tool = monitoring_tool
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_classification_model(classification_model: object):
|
||||
cognify_config = get_cognify_config()
|
||||
cognify_config.classification_model = classification_model
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_summarization_model(summarization_model: object):
|
||||
cognify_config = get_cognify_config()
|
||||
cognify_config.summarization_model=summarization_model
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_labeling_model(labeling_model: object):
|
||||
cognify_config = get_cognify_config()
|
||||
cognify_config.labeling_model =labeling_model
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_graph_model(graph_model: object):
|
||||
graph_config = get_graph_config()
|
||||
graph_config.graph_model = graph_model
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_cognitive_layer_model(cognitive_layer_model: object):
|
||||
cognify_config = get_cognify_config()
|
||||
cognify_config.cognitive_layer_model = cognitive_layer_model
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_graph_engine(graph_engine: object):
|
||||
graph_config = get_graph_config()
|
||||
|
|
@ -78,7 +80,6 @@ class config():
|
|||
cognify_config = get_cognify_config()
|
||||
cognify_config.intra_layer_score_treshold = intra_layer_score_treshold
|
||||
|
||||
|
||||
@staticmethod
|
||||
def connect_documents(connect_documents: bool):
|
||||
cognify_config = get_cognify_config()
|
||||
|
|
@ -88,9 +89,3 @@ class config():
|
|||
def set_chunk_strategy(chunk_strategy: object):
|
||||
chunk_config = get_chunk_config()
|
||||
chunk_config.chunk_strategy = chunk_strategy
|
||||
|
||||
|
||||
@staticmethod
|
||||
def set_graph_topology(graph_topology: object):
|
||||
cognify_config = get_cognify_config()
|
||||
cognify_config.graph_topology = graph_topology
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
from duckdb import CatalogException
|
||||
from cognee.modules.discovery import discover_directory_datasets
|
||||
from cognee.modules.ingestion import discover_directory_datasets
|
||||
from cognee.modules.tasks import get_task_status
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from cognee.modules.search.graph.search_categories import search_categories
|
|||
from cognee.modules.search.graph.search_neighbour import search_neighbour
|
||||
from cognee.modules.search.graph.search_summary import search_summary
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognee.utils import send_telemetry
|
||||
from cognee.shared.utils import send_telemetry
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
|
||||
class SearchType(Enum):
|
||||
|
|
|
|||
|
|
@ -1,33 +1,29 @@
|
|||
from typing import List, Dict, Any, Union, Optional
|
||||
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
|
||||
from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel
|
||||
import pandas as pd
|
||||
from pydantic import BaseModel
|
||||
|
||||
from typing import List, Dict, Any, Union, Optional
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
|
||||
USER_ID = "default_user"
|
||||
|
||||
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
|
||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
||||
graph_config = get_graph_config()
|
||||
graph_db_type = graph_config.graph_database_provider
|
||||
|
||||
graph_client = await get_graph_client(graph_db_type)
|
||||
|
||||
graph_topology = infrastructure_config.get_config()["graph_topology"]
|
||||
|
||||
engine = TopologyEngine()
|
||||
topology = await engine.infer_from_directory_structure(node_id=USER_ID, repository=directory, model=model)
|
||||
|
||||
def flatten_model(model: BaseModel, parent_id: Optional[str] = None) -> Dict[str, Any]:
|
||||
"""Flatten a single Pydantic model to a dictionary handling nested structures."""
|
||||
result = {**model.dict(), 'parent_id': parent_id}
|
||||
if hasattr(model, 'default_relationship') and model.default_relationship:
|
||||
result = {**model.dict(), "parent_id": parent_id}
|
||||
if hasattr(model, "default_relationship") and model.default_relationship:
|
||||
result.update({
|
||||
'relationship_type': model.default_relationship.type,
|
||||
'relationship_source': model.default_relationship.source,
|
||||
'relationship_target': model.default_relationship.target
|
||||
"relationship_type": model.default_relationship.type,
|
||||
"relationship_source": model.default_relationship.source,
|
||||
"relationship_target": model.default_relationship.target
|
||||
})
|
||||
return result
|
||||
|
||||
|
|
@ -39,7 +35,7 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo
|
|||
flat = [flatten_model(items, parent_id)]
|
||||
for field, value in items:
|
||||
if isinstance(value, (BaseModel, list)):
|
||||
flat.extend(recursive_flatten(value, items.dict().get('node_id', None)))
|
||||
flat.extend(recursive_flatten(value, items.dict().get("node_id", None)))
|
||||
return flat
|
||||
else:
|
||||
return []
|
||||
|
|
@ -56,38 +52,11 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo
|
|||
|
||||
for _, row in df.iterrows():
|
||||
node_data = row.to_dict()
|
||||
node_id = node_data.pop('node_id')
|
||||
node_id = node_data.pop("node_id")
|
||||
|
||||
# Remove 'node_id' and get its value
|
||||
# Remove "node_id" and get its value
|
||||
await graph_client.add_node(node_id, node_data)
|
||||
if pd.notna(row['relationship_source']) and pd.notna(row['relationship_target']):
|
||||
await graph_client.add_edge(row['relationship_source'], row['relationship_target'], relationship_name=row['relationship_type'])
|
||||
if pd.notna(row["relationship_source"]) and pd.notna(row["relationship_target"]):
|
||||
await graph_client.add_edge(row["relationship_source"], row["relationship_target"], relationship_name=row["relationship_type"])
|
||||
|
||||
return graph_client.graph
|
||||
|
||||
if __name__ == "__main__":
|
||||
async def test() -> None:
|
||||
# Uncomment and modify the following lines as needed
|
||||
# await prune.prune_system()
|
||||
#
|
||||
# from cognee.api.v1.add import add
|
||||
# data_directory_path = os.path.abspath("../../../.data")
|
||||
# # print(data_directory_path)
|
||||
# # config.data_root_directory(data_directory_path)
|
||||
# # cognee_directory_path = os.path.abspath("../.cognee_system")
|
||||
# # config.system_root_directory(cognee_directory_path)
|
||||
#
|
||||
# await add("data://" + data_directory_path, "example")
|
||||
|
||||
# graph = await add_topology()
|
||||
|
||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
||||
|
||||
graph_client = await get_graph_client(graph_db_type)
|
||||
#
|
||||
from cognee.utils import render_graph
|
||||
|
||||
await render_graph(graph_client.graph, include_color=True, include_nodes=False, include_size=False)
|
||||
|
||||
import asyncio
|
||||
asyncio.run(test())
|
||||
|
|
|
|||
|
|
@ -4,15 +4,15 @@ from cognee.root_dir import get_absolute_path
|
|||
from cognee.shared.data_models import MonitoringTool
|
||||
|
||||
class BaseConfig(BaseSettings):
|
||||
system_root_directory: str = get_absolute_path(".cognee_system")
|
||||
data_root_directory: str = get_absolute_path(".data")
|
||||
monitoring_tool: object = MonitoringTool.LANGFUSE
|
||||
graphistry_username: str
|
||||
graphistry_password: str
|
||||
|
||||
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"system_root_directory": self.system_root_directory,
|
||||
"data_root_directory": self.data_root_directory,
|
||||
"monitoring_tool": self.monitoring_tool,
|
||||
}
|
||||
|
|
|
|||
116
cognee/config.py
116
cognee/config.py
|
|
@ -1,116 +0,0 @@
|
|||
"""Configuration for cognee - cognitive architecture framework."""
|
||||
import logging
|
||||
import os
|
||||
import configparser
|
||||
import uuid
|
||||
from typing import Optional, Dict, Any
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
def load_dontenv():
|
||||
base_dir = Path(__file__).resolve().parent.parent
|
||||
# Load the .env file from the base directory
|
||||
dotenv_path = base_dir / ".env"
|
||||
load_dotenv(dotenv_path=dotenv_path, override = True)
|
||||
|
||||
try:
|
||||
load_dontenv()
|
||||
except:
|
||||
pass
|
||||
|
||||
@dataclass
|
||||
class Config:
|
||||
""" Configuration for cognee - cognitive architecture framework. """
|
||||
cognee_dir: str = field(
|
||||
default_factory=lambda: os.getenv("COG_ARCH_DIR", "cognee")
|
||||
)
|
||||
config_path: str = field(
|
||||
default_factory=lambda: os.path.join(
|
||||
os.getenv("COG_ARCH_DIR", "cognee"), "config"
|
||||
)
|
||||
)
|
||||
|
||||
# custom_model: str = os.getenv("CUSTOM_LLM_MODEL", "llama3-70b-8192") #"mistralai/Mixtral-8x7B-Instruct-v0.1"
|
||||
# custom_endpoint: str = os.getenv("CUSTOM_ENDPOINT", "https://api.endpoints.anyscale.com/v1") #"https://api.endpoints.anyscale.com/v1" # pass claude endpoint
|
||||
# custom_key: Optional[str] = os.getenv("CUSTOM_LLM_API_KEY")
|
||||
# ollama_endpoint: str = os.getenv("CUSTOM_OLLAMA_ENDPOINT", "http://localhost:11434/v1") #"http://localhost:11434/v1"
|
||||
# ollama_key: Optional[str] = "ollama"
|
||||
# ollama_model: str = os.getenv("CUSTOM_OLLAMA_MODEL", "mistral:instruct") #"mistral:instruct"
|
||||
# openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o" ) #"gpt-4o"
|
||||
# model_endpoint: str = "openai"
|
||||
# llm_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
|
||||
# openai_embedding_model = "text-embedding-3-large"
|
||||
# openai_embedding_dimensions = 3072
|
||||
# litellm_embedding_model = "text-embedding-3-large"
|
||||
# litellm_embedding_dimensions = 3072
|
||||
|
||||
graphistry_username = os.getenv("GRAPHISTRY_USERNAME")
|
||||
graphistry_password = os.getenv("GRAPHISTRY_PASSWORD")
|
||||
|
||||
# Embedding parameters
|
||||
embedding_model: str = "BAAI/bge-large-en-v1.5"
|
||||
embedding_dimensions: int = 1024
|
||||
connect_documents: bool = False
|
||||
|
||||
# Model parameters and configuration for interlayer scoring
|
||||
intra_layer_score_treshold: float = 0.98
|
||||
|
||||
# Client ID
|
||||
anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)
|
||||
|
||||
#Chunking parameters
|
||||
# chunk_size: int = 1500
|
||||
# chunk_overlap: int = 0
|
||||
# chunk_strategy: str = ChunkStrategy.PARAGRAPH
|
||||
|
||||
def load(self):
|
||||
"""Loads the configuration from a file or environment variables."""
|
||||
try:
|
||||
load_dontenv()
|
||||
except:
|
||||
pass
|
||||
config = configparser.ConfigParser()
|
||||
config.read(self.config_path)
|
||||
|
||||
# Override with environment variables if they exist
|
||||
for attr in self.__annotations__:
|
||||
env_value = os.getenv(attr.upper())
|
||||
if env_value is not None:
|
||||
setattr(self, attr, env_value)
|
||||
|
||||
# Load from config file
|
||||
if config.sections():
|
||||
for section in config.sections():
|
||||
for key, value in config.items(section):
|
||||
if hasattr(self, key):
|
||||
setattr(self, key, value)
|
||||
|
||||
def save(self):
|
||||
"""Saves the current configuration to a file."""
|
||||
config = configparser.ConfigParser()
|
||||
|
||||
# Save the current settings to the config file
|
||||
for attr, value in self.__dict__.items():
|
||||
section, option = attr.split("_", 1)
|
||||
if not config.has_section(section):
|
||||
config.add_section(section)
|
||||
config.set(section, option, str(value))
|
||||
|
||||
with open(self.config_path, "w") as configfile:
|
||||
config.write(configfile)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Returns a dictionary representation of the configuration."""
|
||||
return {attr: getattr(self, attr) for attr in self.__annotations__}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
|
||||
"""Creates a Config instance from a dictionary."""
|
||||
config = cls()
|
||||
for attr, value in config_dict.items():
|
||||
if hasattr(config, attr):
|
||||
setattr(config, attr, value)
|
||||
return config
|
||||
|
|
@ -1,113 +0,0 @@
|
|||
import logging
|
||||
from cognee.config import Config
|
||||
from .data.chunking.config import get_chunk_config
|
||||
from .llm.llm_interface import LLMInterface
|
||||
from .llm.get_llm_client import get_llm_client
|
||||
from ..shared.data_models import GraphDBType, DefaultContentPrediction, KnowledgeGraph, SummarizedContent, \
|
||||
LabeledContent, DefaultCognitiveLayer
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
chunk_config = get_chunk_config()
|
||||
class InfrastructureConfig():
|
||||
graph_engine: GraphDBType = None
|
||||
llm_engine: LLMInterface = None
|
||||
classification_model = None
|
||||
summarization_model = None
|
||||
labeling_model = None
|
||||
graph_model = None
|
||||
cognitive_layer_model = None
|
||||
intra_layer_score_treshold = None
|
||||
embedding_engine = None
|
||||
connect_documents = config.connect_documents
|
||||
chunk_strategy = chunk_config.chunk_strategy
|
||||
chunk_engine = None
|
||||
llm_provider: str = None
|
||||
llm_model: str = None
|
||||
llm_endpoint: str = None
|
||||
llm_api_key: str = None
|
||||
|
||||
def get_config(self, config_entity: str = None) -> dict:
|
||||
if self.graph_engine is None:
|
||||
self.graph_engine = GraphDBType.NETWORKX
|
||||
|
||||
if self.classification_model is None:
|
||||
self.classification_model = DefaultContentPrediction
|
||||
|
||||
if self.summarization_model is None:
|
||||
self.summarization_model = SummarizedContent
|
||||
|
||||
if self.labeling_model is None:
|
||||
self.labeling_model = LabeledContent
|
||||
|
||||
if self.graph_model is None:
|
||||
self.graph_model = KnowledgeGraph
|
||||
|
||||
if self.cognitive_layer_model is None:
|
||||
self.cognitive_layer_model = DefaultCognitiveLayer
|
||||
|
||||
if self.intra_layer_score_treshold is None:
|
||||
self.intra_layer_score_treshold = config.intra_layer_score_treshold
|
||||
|
||||
if self.connect_documents is None:
|
||||
self.connect_documents = config.connect_documents
|
||||
|
||||
if self.chunk_strategy is None:
|
||||
self.chunk_strategy = chunk_config.chunk_strategy
|
||||
|
||||
if self.chunk_engine is None:
|
||||
self.chunk_engine = chunk_config.chunk_engine
|
||||
|
||||
if (config_entity is None or config_entity == "llm_engine") and self.llm_engine is None:
|
||||
self.llm_engine = get_llm_client()
|
||||
|
||||
if config_entity is not None:
|
||||
return getattr(self, config_entity)
|
||||
|
||||
return {
|
||||
"llm_engine": self.llm_engine,
|
||||
"classification_model": self.classification_model,
|
||||
"summarization_model": self.summarization_model,
|
||||
"labeling_model": self.labeling_model,
|
||||
"graph_model": self.graph_model,
|
||||
"cognitive_layer_model": self.cognitive_layer_model,
|
||||
"llm_provider": self.llm_provider,
|
||||
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
||||
"embedding_engine": self.embedding_engine,
|
||||
"connect_documents": self.connect_documents,
|
||||
"chunk_strategy": self.chunk_strategy,
|
||||
"chunk_engine": self.chunk_engine,
|
||||
}
|
||||
|
||||
def set_config(self, new_config: dict):
|
||||
if "classification_model" in new_config:
|
||||
self.classification_model = new_config["classification_model"]
|
||||
|
||||
if "summarization_model" in new_config:
|
||||
self.summarization_model = new_config["summarization_model"]
|
||||
|
||||
if "labeling_model" in new_config:
|
||||
self.labeling_model = new_config["labeling_model"]
|
||||
|
||||
if "cognitive_layer_model" in new_config:
|
||||
self.cognitive_layer_model = new_config["cognitive_layer_model"]
|
||||
|
||||
if "intra_layer_score_treshold" in new_config:
|
||||
self.intra_layer_score_treshold = new_config["intra_layer_score_treshold"]
|
||||
|
||||
if "embedding_engine" in new_config:
|
||||
self.embedding_engine = new_config["embedding_engine"]
|
||||
|
||||
if "connect_documents" in new_config:
|
||||
self.connect_documents = new_config["connect_documents"]
|
||||
|
||||
if "chunk_strategy" in new_config:
|
||||
self.chunk_strategy = new_config["chunk_strategy"]
|
||||
|
||||
if "chunk_engine" in new_config:
|
||||
self.chunk_engine = new_config["chunk_engine"]
|
||||
|
||||
infrastructure_config = InfrastructureConfig()
|
||||
|
|
@ -1 +0,0 @@
|
|||
from .InfrastructureConfig import infrastructure_config
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from cognee.utils import extract_pos_tags
|
||||
from cognee.shared.utils import extract_pos_tags
|
||||
|
||||
def extract_keywords(text: str) -> list[str]:
|
||||
if len(text) == 0:
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import os
|
|||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
from cognee.shared.data_models import DefaultGraphModel, GraphDBType, KnowledgeGraph
|
||||
from cognee.shared.data_models import GraphDBType, KnowledgeGraph
|
||||
|
||||
|
||||
class GraphConfig(BaseSettings):
|
||||
|
|
@ -26,7 +26,6 @@ class GraphConfig(BaseSettings):
|
|||
return {
|
||||
"graph_filename": self.graph_filename,
|
||||
"graph_database_provider": self.graph_database_provider,
|
||||
"graph_topology": self.graph_topology,
|
||||
"graph_file_path": self.graph_file_path,
|
||||
"graph_database_url": self.graph_database_url,
|
||||
"graph_database_username": self.graph_database_username,
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@ from .ModelBase import ModelBase
|
|||
from .DatabaseEngine import DatabaseEngine
|
||||
from .sqlite.SqliteEngine import SqliteEngine
|
||||
from .duckdb.DuckDBAdapter import DuckDBAdapter
|
||||
from .config import get_relationaldb_config
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
import os
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from .create_relational_engine import create_relational_engine
|
||||
|
||||
class RelationalConfig(BaseSettings):
|
||||
db_path: str = os.path.join(get_base_config().system_root_directory, "databases")
|
||||
db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases")
|
||||
db_name: str = "cognee.db"
|
||||
db_host: str = "localhost"
|
||||
db_port: str = "5432"
|
||||
|
|
@ -17,7 +17,8 @@ class RelationalConfig(BaseSettings):
|
|||
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
||||
|
||||
def create_engine(self):
|
||||
return create_relational_engine(self.db_path, self.db_name)
|
||||
self.db_file_path = os.path.join(self.db_path, self.db_name)
|
||||
self.database_engine = create_relational_engine(self.db_path, self.db_name)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -1,12 +1,13 @@
|
|||
import os
|
||||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from .create_vector_engine import create_vector_engine
|
||||
|
||||
class VectorConfig(BaseSettings):
|
||||
vector_db_url: str = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb")
|
||||
vector_db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases")
|
||||
vector_db_url: str = os.path.join(vector_db_path, "cognee.lancedb")
|
||||
vector_db_key: str = ""
|
||||
vector_engine_provider: str = "lancedb"
|
||||
vector_engine: object = create_vector_engine(
|
||||
|
|
@ -22,7 +23,9 @@ class VectorConfig(BaseSettings):
|
|||
|
||||
def create_engine(self):
|
||||
if self.vector_engine_provider == "lancedb":
|
||||
self.vector_db_url = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb")
|
||||
self.vector_db_url = os.path.join(self.vector_db_path, "cognee.lancedb")
|
||||
else:
|
||||
self.vector_db_path = None
|
||||
|
||||
self.vector_engine = create_vector_engine(
|
||||
get_vectordb_config().to_dict(),
|
||||
|
|
|
|||
|
|
@ -5,26 +5,12 @@ from pydantic import BaseModel
|
|||
import instructor
|
||||
from tenacity import retry, stop_after_attempt
|
||||
import openai
|
||||
|
||||
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||
from cognee.shared.data_models import MonitoringTool
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.llm.config import get_llm_config
|
||||
|
||||
llm_config = get_llm_config()
|
||||
base_config = get_base_config()
|
||||
|
||||
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
||||
from langfuse.openai import AsyncOpenAI, OpenAI
|
||||
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
||||
from langsmith import wrappers
|
||||
from openai import AsyncOpenAI
|
||||
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
||||
else:
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
|
||||
class GenericAPIAdapter(LLMInterface):
|
||||
"""Adapter for Generic API LLM provider API """
|
||||
|
|
@ -37,6 +23,8 @@ class GenericAPIAdapter(LLMInterface):
|
|||
self.model = model
|
||||
self.api_key = api_key
|
||||
|
||||
llm_config = get_llm_config()
|
||||
|
||||
if llm_config.llm_provider == "groq":
|
||||
from groq import groq
|
||||
self.aclient = instructor.from_openai(
|
||||
|
|
@ -46,6 +34,17 @@ class GenericAPIAdapter(LLMInterface):
|
|||
mode = instructor.Mode.MD_JSON
|
||||
)
|
||||
else:
|
||||
base_config = get_base_config()
|
||||
|
||||
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
||||
from langfuse.openai import AsyncOpenAI
|
||||
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
||||
from langsmith import wrappers
|
||||
from openai import AsyncOpenAI
|
||||
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
||||
else:
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
self.aclient = instructor.patch(
|
||||
AsyncOpenAI(
|
||||
base_url = api_endpoint,
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ class LLMProvider(Enum):
|
|||
ANTHROPIC = "anthropic"
|
||||
CUSTOM = "custom"
|
||||
|
||||
llm_config = get_llm_config()
|
||||
def get_llm_client():
|
||||
"""Get the LLM client based on the configuration using Enums."""
|
||||
llm_config = get_llm_config()
|
||||
|
|
|
|||
|
|
@ -6,26 +6,10 @@ from pydantic import BaseModel
|
|||
from tenacity import retry, stop_after_attempt
|
||||
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.config import Config
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||
from cognee.shared.data_models import MonitoringTool
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
llm_config = get_llm_config()
|
||||
base_config = get_base_config()
|
||||
|
||||
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
||||
from langfuse.openai import AsyncOpenAI, OpenAI
|
||||
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
||||
from langsmith import wrappers
|
||||
from openai import AsyncOpenAI
|
||||
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
||||
else:
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
|
||||
class OpenAIAdapter(LLMInterface):
|
||||
name = "OpenAI"
|
||||
model: str
|
||||
|
|
@ -33,6 +17,17 @@ class OpenAIAdapter(LLMInterface):
|
|||
|
||||
"""Adapter for OpenAI's GPT-3, GPT=4 API"""
|
||||
def __init__(self, api_key: str, model:str):
|
||||
base_config = get_base_config()
|
||||
|
||||
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
||||
from langfuse.openai import AsyncOpenAI, OpenAI
|
||||
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
||||
from langsmith import wrappers
|
||||
from openai import AsyncOpenAI
|
||||
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
||||
else:
|
||||
from openai import AsyncOpenAI, OpenAI
|
||||
|
||||
self.aclient = instructor.from_openai(AsyncOpenAI(api_key = api_key))
|
||||
self.client = instructor.from_openai(OpenAI(api_key = api_key))
|
||||
self.model = model
|
||||
|
|
|
|||
|
|
@ -1,12 +1,7 @@
|
|||
from functools import lru_cache
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
from cognee.shared.data_models import DefaultContentPrediction, LabeledContent, SummarizedContent, \
|
||||
DefaultCognitiveLayer, DefaultGraphModel, KnowledgeGraph
|
||||
|
||||
|
||||
# Monitoring tool
|
||||
|
||||
|
||||
DefaultCognitiveLayer
|
||||
|
||||
class CognifyConfig(BaseSettings):
|
||||
classification_model: object = DefaultContentPrediction
|
||||
|
|
@ -15,10 +10,7 @@ class CognifyConfig(BaseSettings):
|
|||
cognitive_layer_model: object = DefaultCognitiveLayer
|
||||
intra_layer_score_treshold: float = 0.98
|
||||
connect_documents: bool = False
|
||||
graph_topology: object = DefaultGraphModel
|
||||
cognitive_layers_limit: int = 2
|
||||
graph_model:object = KnowledgeGraph
|
||||
|
||||
|
||||
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
||||
|
||||
|
|
@ -30,11 +22,9 @@ class CognifyConfig(BaseSettings):
|
|||
"cognitive_layer_model": self.cognitive_layer_model,
|
||||
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
||||
"connect_documents": self.connect_documents,
|
||||
"graph_topology": self.graph_topology,
|
||||
"cognitive_layers_limit": self.cognitive_layers_limit,
|
||||
"graph_model": self.graph_model
|
||||
}
|
||||
|
||||
@lru_cache
|
||||
def get_cognify_config():
|
||||
return CognifyConfig()
|
||||
return CognifyConfig()
|
||||
|
|
|
|||
|
|
@ -4,14 +4,11 @@ from dspy.evaluate.evaluate import Evaluate
|
|||
from dspy.primitives.example import Example
|
||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.config import Config
|
||||
from cognee.shared.data_models import Answer
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
from cognee.modules.cognify.dataset import HotPotQA
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
def evaluate():
|
||||
dataset = HotPotQA(
|
||||
train_seed = 1,
|
||||
|
|
@ -36,7 +33,8 @@ def evaluate():
|
|||
|
||||
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
|
||||
|
||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
llm_config = get_llm_config()
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
||||
|
||||
|
|
@ -58,7 +56,7 @@ def evaluate():
|
|||
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
|
||||
dsp.passage_match([example.answer], [answer_prediction.answer])
|
||||
|
||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
dspy.settings.configure(lm = gpt4)
|
||||
|
||||
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from typing import List, Tuple, TypedDict
|
|||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.databases.vector import DataPoint
|
||||
|
||||
# from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
|
||||
# from cognee.shared.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||
|
||||
|
|
@ -69,8 +69,6 @@ async def add_cognitive_layer_graphs(
|
|||
|
||||
id, type, name, description, *node_properties = node
|
||||
|
||||
print("Node properties: ", node_properties)
|
||||
|
||||
node_properties = dict(node_properties)
|
||||
|
||||
graph_nodes.append((
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
import uuid
|
||||
|
||||
# from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||
from cognee.shared.data_models import GraphDBType
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
|
|
@ -105,7 +104,7 @@ if __name__ == "__main__":
|
|||
#
|
||||
# connect_nodes_in_graph(graph, relationships)
|
||||
|
||||
from cognee.utils import render_graph
|
||||
from cognee.shared.utils import render_graph
|
||||
|
||||
graph_url = await render_graph(graph)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
import dspy
|
||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.config import Config
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
||||
def run():
|
||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
llm_config = get_llm_config()
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
||||
|
||||
|
|
|
|||
|
|
@ -2,16 +2,13 @@ import dsp
|
|||
import dspy
|
||||
from dspy.teleprompt import BootstrapFewShot
|
||||
from dspy.primitives.example import Example
|
||||
from cognee.config import Config
|
||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
|
||||
from cognee.root_dir import get_absolute_path
|
||||
from cognee.infrastructure.files.storage import LocalStorage
|
||||
from cognee.shared.data_models import Answer
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
from cognee.modules.cognify.dataset import HotPotQA
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
||||
def train():
|
||||
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
|
||||
|
|
@ -59,7 +56,8 @@ def train():
|
|||
|
||||
trainset = [example.with_inputs("context", "question") for example in train_examples]
|
||||
|
||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
llm_config = get_llm_config()
|
||||
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||
|
||||
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from nltk.corpus import stopwords
|
|||
from nltk.tokenize import word_tokenize
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
from cognee.shared.data_models import KnowledgeGraph, Node, Edge
|
||||
from cognee.utils import trim_text_to_max_tokens
|
||||
from cognee.shared.utils import trim_text_to_max_tokens
|
||||
|
||||
# """Instructions:
|
||||
# You are a top-tier algorithm designed for extracting information from text in structured formats to build a knowledge graph.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,7 @@
|
|||
import logging
|
||||
from typing import List, Dict
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from.extraction.extract_cognitive_layers import extract_cognitive_layers
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from .extraction.extract_cognitive_layers import extract_cognitive_layers
|
||||
|
||||
config = get_cognify_config()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
import logging
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from.extraction.extract_summary import extract_summary
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from .extraction.extract_summary import extract_summary
|
||||
|
||||
config = get_cognify_config()
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
import logging
|
||||
import asyncio
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.infrastructure.databases.graph import get_graph_config
|
||||
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
|
||||
from.extraction.extract_summary import extract_summary
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
config = get_cognify_config()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
|
||||
config = get_graph_config()
|
||||
|
||||
try:
|
||||
graph_awaitables = [
|
||||
extract_knowledge_graph(
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
from .discover_directory_datasets import discover_directory_datasets
|
||||
|
|
@ -1,2 +1,5 @@
|
|||
from .classify import classify
|
||||
from .identify import identify
|
||||
from .save_data_to_file import save_data_to_file
|
||||
from .get_matched_datasets import get_matched_datasets
|
||||
from .discover_directory_datasets import discover_directory_datasets
|
||||
|
|
|
|||
12
cognee/modules/ingestion/get_matched_datasets.py
Normal file
12
cognee/modules/ingestion/get_matched_datasets.py
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
from .discover_directory_datasets import discover_directory_datasets
|
||||
|
||||
def get_matched_datasets(data_path: str, dataset_name_to_match: str = None):
|
||||
datasets = discover_directory_datasets(data_path)
|
||||
|
||||
matched_datasets = []
|
||||
|
||||
for dataset_name, dataset_files in datasets.items():
|
||||
if dataset_name_to_match is None or dataset_name.startswith(dataset_name_to_match):
|
||||
matched_datasets.append([dataset_name, dataset_files])
|
||||
|
||||
return matched_datasets
|
||||
19
cognee/modules/ingestion/save_data_to_file.py
Normal file
19
cognee/modules/ingestion/save_data_to_file.py
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
from typing import BinaryIO, Union
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.files.storage import LocalStorage
|
||||
from .classify import classify
|
||||
|
||||
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
|
||||
base_config = get_base_config()
|
||||
data_directory_path = base_config.data_root_directory
|
||||
|
||||
classified_data = classify(data, filename)
|
||||
|
||||
storage_path = data_directory_path + "/" + dataset_name.replace(".", "/")
|
||||
LocalStorage.ensure_directory_exists(storage_path)
|
||||
|
||||
file_metadata = classified_data.get_metadata()
|
||||
file_name = file_metadata["name"]
|
||||
LocalStorage(storage_path).store(file_name, classified_data.get_data())
|
||||
|
||||
return "file://" + storage_path + "/" + file_name
|
||||
|
|
@ -14,7 +14,6 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
|||
Parameters:
|
||||
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
|
||||
- id (str): The identifier of the node to match against.
|
||||
- infrastructure_config (Dict): Configuration that includes the graph engine type.
|
||||
- other_param (dict, optional): A dictionary that may contain 'node_id' to specify the node.
|
||||
|
||||
Returns:
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
|
|||
Parameters:
|
||||
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
|
||||
- query (str): The query string to filter nodes by, e.g., 'SUMMARY'.
|
||||
- infrastructure_config (Dict): Configuration that includes the graph engine type.
|
||||
- other_param (str, optional): An additional parameter, unused in this implementation but could be for future enhancements.
|
||||
|
||||
Returns:
|
||||
|
|
|
|||
|
|
@ -1,16 +1,17 @@
|
|||
import logging
|
||||
from typing import List, Dict
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from.extraction.categorize_relevant_summary import categorize_relevant_summary
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from .extraction.categorize_relevant_summary import categorize_relevant_summary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def get_cognitive_layers(content: str, categories: List[Dict]):
|
||||
try:
|
||||
cognify_config = get_cognify_config()
|
||||
return (await categorize_relevant_summary(
|
||||
content,
|
||||
categories[0],
|
||||
infrastructure_config.get_config()["categorize_summary_model"]
|
||||
cognify_config.summarization_model,
|
||||
)).cognitive_layers
|
||||
except Exception as error:
|
||||
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,7 @@
|
|||
from cognee.config import Config
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
||||
def get_settings():
|
||||
config = Config()
|
||||
config.load()
|
||||
llm_config = get_llm_config()
|
||||
|
||||
vector_dbs = [{
|
||||
|
|
|
|||
|
|
@ -1,8 +1,5 @@
|
|||
import json
|
||||
import logging
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
apiKey: str
|
||||
|
|
@ -17,6 +14,3 @@ async def save_llm_config(new_llm_config: LLMConfig):
|
|||
|
||||
if "*****" not in new_llm_config.apiKey and len(new_llm_config.apiKey.strip()) > 0:
|
||||
llm_config.llm_api_key = new_llm_config.apiKey
|
||||
|
||||
logging.error(json.dumps(llm_config.to_dict()))
|
||||
infrastructure_config.llm_engine = None
|
||||
|
|
|
|||
|
|
@ -1,14 +1,14 @@
|
|||
from typing import Type, List
|
||||
from typing import Type
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||
|
||||
|
||||
async def extract_categories(content: str, response_model: Type[BaseModel]):
|
||||
async def extract_topology(content: str, response_model: Type[BaseModel]):
|
||||
llm_client = get_llm_client()
|
||||
|
||||
system_prompt = read_query_prompt("extract_topology.txt")
|
||||
|
||||
llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model)
|
||||
|
||||
return llm_output.model_dump()
|
||||
return llm_output.model_dump()
|
||||
|
|
|
|||
|
|
@ -1,18 +1,15 @@
|
|||
import logging
|
||||
from typing import List, Dict
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.modules.topology.extraction.extract_topology import extract_categories
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
|
||||
cognify_config = get_cognify_config()
|
||||
from cognee.modules.topology.extraction.extract_topology import extract_topology
|
||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def infer_data_topology(content: str, graph_topology=None):
|
||||
if graph_topology is None:
|
||||
graph_topology = cognify_config.graph_topology
|
||||
graph_config = get_graph_config()
|
||||
graph_topology = graph_config.graph_model
|
||||
try:
|
||||
return (await extract_categories(
|
||||
return (await extract_topology(
|
||||
content,
|
||||
graph_topology
|
||||
))
|
||||
|
|
|
|||
|
|
@ -2,13 +2,10 @@ import os
|
|||
import glob
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, List, Optional, Union, Type, Any, Tuple
|
||||
from datetime import datetime
|
||||
|
||||
from cognee import config
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
from cognee.modules.cognify.config import get_cognify_config
|
||||
from cognee.modules.topology.infer_data_topology import infer_data_topology
|
||||
cognify_config = get_cognify_config()
|
||||
base_config = get_base_config()
|
||||
|
||||
|
|
|
|||
|
|
@ -1,22 +1,7 @@
|
|||
from os import path
|
||||
import logging
|
||||
from pathlib import Path
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
# ROOT_DIR = path.dirname(path.abspath(__file__))
|
||||
#
|
||||
# logging.debug("ROOT_DIR: ", ROOT_DIR)
|
||||
#
|
||||
# def get_absolute_path(path_from_root: str) -> str:
|
||||
# logging.debug("abspath: ", path.abspath(path.join(ROOT_DIR, path_from_root)))
|
||||
#
|
||||
#
|
||||
# return path.abspath(path.join(ROOT_DIR, path_from_root))
|
||||
ROOT_DIR = Path(__file__).resolve().parent
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
logging.debug("ROOT_DIR: %s", ROOT_DIR)
|
||||
ROOT_DIR = Path(__file__).resolve().parent
|
||||
|
||||
def get_absolute_path(path_from_root: str) -> str:
|
||||
absolute_path = ROOT_DIR / path_from_root
|
||||
logging.debug("abspath: %s", absolute_path.resolve())
|
||||
return str(absolute_path.resolve())
|
||||
return str(absolute_path.resolve())
|
||||
|
|
|
|||
|
|
@ -11,12 +11,7 @@ import matplotlib.pyplot as plt
|
|||
import tiktoken
|
||||
import nltk
|
||||
from posthog import Posthog
|
||||
|
||||
from cognee.config import Config
|
||||
|
||||
config = Config()
|
||||
config.load()
|
||||
|
||||
from cognee.base_config import get_base_config
|
||||
|
||||
def send_telemetry(event_name: str):
|
||||
if os.getenv("TELEMETRY_DISABLED"):
|
||||
|
|
@ -153,6 +148,7 @@ def generate_color_palette(unique_layers):
|
|||
|
||||
|
||||
async def register_graphistry():
|
||||
config = get_base_config()
|
||||
graphistry.register(api = 3, username = config.graphistry_username, password = config.graphistry_password)
|
||||
|
||||
|
||||
|
|
@ -2,7 +2,7 @@ from deepeval.dataset import EvaluationDataset
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
from typing import List, Type, Dict
|
||||
from typing import List, Type
|
||||
from deepeval.test_case import LLMTestCase
|
||||
import dotenv
|
||||
dotenv.load_dotenv()
|
||||
|
|
@ -41,7 +41,6 @@ print(dataset)
|
|||
|
||||
|
||||
import logging
|
||||
from cognee.infrastructure import infrastructure_config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -81,10 +80,18 @@ async def run_cognify_base_rag():
|
|||
pass
|
||||
|
||||
|
||||
async def cognify_search_base_rag(content:str, context:str):
|
||||
infrastructure_config.set_config({"database_directory_path": "/Users/vasa/Projects/cognee/cognee/.cognee_system/databases/cognee.lancedb"})
|
||||
import os
|
||||
from cognee.base_config import get_base_config
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
|
||||
vector_client = infrastructure_config.get_config("vector_engine")
|
||||
async def cognify_search_base_rag(content:str, context:str):
|
||||
base_config = get_base_config()
|
||||
|
||||
cognee_directory_path = os.path.abspath(".cognee_system")
|
||||
base_config.system_root_directory = cognee_directory_path
|
||||
|
||||
vector_config = get_vectordb_config()
|
||||
vector_client = vector_config.vector_engine
|
||||
|
||||
return_ = await vector_client.search(collection_name="basic_rag", query_text=content, limit=10)
|
||||
|
||||
|
|
|
|||
|
|
@ -283,7 +283,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import cognee\n",
|
||||
"from cognee.utils import render_graph\n",
|
||||
"from cognee.shared.utils import render_graph\n",
|
||||
"\n",
|
||||
"graph = await cognee.cognify()\n",
|
||||
"\n",
|
||||
|
|
|
|||
|
|
@ -112,12 +112,11 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"import graphistry\n",
|
||||
"from cognee.config import Config\n",
|
||||
"from cognee.utils import render_graph\n",
|
||||
"from cognee.shared.utils import render_graph\n",
|
||||
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
||||
"from cognee.base_config import get_base_config\n",
|
||||
"\n",
|
||||
"config = Config()\n",
|
||||
"config.load()\n",
|
||||
"config = get_base_config()\n",
|
||||
"\n",
|
||||
"graphistry.register(\n",
|
||||
" api = 3,\n",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue