fix: enable sdk and fix config
This commit is contained in:
parent
ddf528993c
commit
4fb3dc31a4
49 changed files with 194 additions and 518 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
# from .api.v1.config.config import config
|
from .api.v1.config.config import config
|
||||||
# from .api.v1.add.add import add
|
from .api.v1.add.add import add
|
||||||
# from .api.v1.cognify.cognify import cognify
|
from .api.v1.cognify.cognify import cognify
|
||||||
# from .api.v1.datasets.datasets import datasets
|
from .api.v1.datasets.datasets import datasets
|
||||||
# from .api.v1.search.search import search, SearchType
|
from .api.v1.search.search import search, SearchType
|
||||||
# from .api.v1.prune import prune
|
from .api.v1.prune import prune
|
||||||
|
|
|
||||||
|
|
@ -68,7 +68,7 @@ async def delete_dataset(dataset_id: str):
|
||||||
|
|
||||||
@app.get("/datasets/{dataset_id}/graph", response_model=list)
|
@app.get("/datasets/{dataset_id}/graph", response_model=list)
|
||||||
async def get_dataset_graph(dataset_id: str):
|
async def get_dataset_graph(dataset_id: str):
|
||||||
from cognee.utils import render_graph
|
from cognee.shared.utils import render_graph
|
||||||
from cognee.infrastructure.databases.graph import get_graph_config
|
from cognee.infrastructure.databases.graph import get_graph_config
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
|
|
||||||
|
|
@ -253,14 +253,24 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
||||||
logger.info("Starting server at %s:%s", host, port)
|
logger.info("Starting server at %s:%s", host, port)
|
||||||
|
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
|
from cognee.infrastructure.databases.relational import get_relationaldb_config
|
||||||
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
|
|
||||||
|
cognee_directory_path = os.path.abspath(".cognee_system")
|
||||||
|
databases_directory_path = os.path.join(cognee_directory_path, "databases")
|
||||||
|
|
||||||
|
relational_config = get_relationaldb_config()
|
||||||
|
relational_config.db_path = databases_directory_path
|
||||||
|
relational_config.create_engine()
|
||||||
|
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
vector_config.vector_db_path = databases_directory_path
|
||||||
|
vector_config.create_engine()
|
||||||
|
|
||||||
base_config = get_base_config()
|
base_config = get_base_config()
|
||||||
data_directory_path = os.path.abspath(".data_storage")
|
data_directory_path = os.path.abspath(".data_storage")
|
||||||
base_config.data_root_directory = data_directory_path
|
base_config.data_root_directory = data_directory_path
|
||||||
|
|
||||||
cognee_directory_path = os.path.abspath(".cognee_system")
|
|
||||||
base_config.system_root_directory = cognee_directory_path
|
|
||||||
|
|
||||||
from cognee.modules.data.deletion import prune_system
|
from cognee.modules.data.deletion import prune_system
|
||||||
asyncio.run(prune_system())
|
asyncio.run(prune_system())
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,21 +5,23 @@ import dlt
|
||||||
import duckdb
|
import duckdb
|
||||||
import cognee.modules.ingestion as ingestion
|
import cognee.modules.ingestion as ingestion
|
||||||
from cognee.infrastructure.files.storage import LocalStorage
|
from cognee.infrastructure.files.storage import LocalStorage
|
||||||
from cognee.modules.discovery import discover_directory_datasets
|
from cognee.modules.ingestion import get_matched_datasets, save_data_to_file
|
||||||
from cognee.utils import send_telemetry
|
from cognee.shared.utils import send_telemetry
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
base_config = get_base_config()
|
|
||||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
|
async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = None):
|
||||||
if isinstance(data, str):
|
if isinstance(data, str):
|
||||||
# data is a data directory path
|
|
||||||
if "data://" in data:
|
if "data://" in data:
|
||||||
return await add_data_directory(data.replace("data://", ""), dataset_name)
|
# data is a data directory path
|
||||||
# data is a file path
|
datasets = get_matched_datasets(data.replace("data://", ""), dataset_name)
|
||||||
|
return await asyncio.gather(*[add(file_paths, dataset_name) for [dataset_name, file_paths] in datasets])
|
||||||
|
|
||||||
if "file://" in data:
|
if "file://" in data:
|
||||||
|
# data is a file path
|
||||||
return await add([data], dataset_name)
|
return await add([data], dataset_name)
|
||||||
# data is a text
|
|
||||||
|
# data is text
|
||||||
else:
|
else:
|
||||||
file_path = save_data_to_file(data, dataset_name)
|
file_path = save_data_to_file(data, dataset_name)
|
||||||
return await add([file_path], dataset_name)
|
return await add([file_path], dataset_name)
|
||||||
|
|
@ -47,7 +49,7 @@ async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_nam
|
||||||
return []
|
return []
|
||||||
|
|
||||||
async def add_files(file_paths: List[str], dataset_name: str):
|
async def add_files(file_paths: List[str], dataset_name: str):
|
||||||
# infra_config = infrastructure_config.get_config()
|
base_config = get_base_config()
|
||||||
data_directory_path = base_config.data_root_directory
|
data_directory_path = base_config.data_root_directory
|
||||||
|
|
||||||
processed_file_paths = []
|
processed_file_paths = []
|
||||||
|
|
@ -107,29 +109,3 @@ async def add_files(file_paths: List[str], dataset_name: str):
|
||||||
send_telemetry("cognee.add")
|
send_telemetry("cognee.add")
|
||||||
|
|
||||||
return run_info
|
return run_info
|
||||||
|
|
||||||
async def add_data_directory(data_path: str, dataset_name: str = None):
|
|
||||||
datasets = discover_directory_datasets(data_path)
|
|
||||||
|
|
||||||
results = []
|
|
||||||
|
|
||||||
for key in datasets.keys():
|
|
||||||
if dataset_name is None or key.startswith(dataset_name):
|
|
||||||
results.append(add(datasets[key], dataset_name = key))
|
|
||||||
|
|
||||||
return await asyncio.gather(*results)
|
|
||||||
|
|
||||||
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
|
|
||||||
data_directory_path = base_config.data_root_directory
|
|
||||||
|
|
||||||
classified_data = ingestion.classify(data, filename)
|
|
||||||
# data_id = ingestion.identify(classified_data)
|
|
||||||
|
|
||||||
storage_path = data_directory_path + "/" + dataset_name.replace(".", "/")
|
|
||||||
LocalStorage.ensure_directory_exists(storage_path)
|
|
||||||
|
|
||||||
file_metadata = classified_data.get_metadata()
|
|
||||||
file_name = file_metadata["name"]
|
|
||||||
LocalStorage(storage_path).store(file_name, classified_data.get_data())
|
|
||||||
|
|
||||||
return "file://" + storage_path + "/" + file_name
|
|
||||||
|
|
|
||||||
|
|
@ -1,21 +0,0 @@
|
||||||
from typing import List
|
|
||||||
from enum import Enum
|
|
||||||
from cognee.modules.users.memory import create_information_points, is_existing_memory
|
|
||||||
|
|
||||||
class MemoryType(Enum):
|
|
||||||
GRAPH = "GRAPH"
|
|
||||||
VECTOR = "VECTOR"
|
|
||||||
RELATIONAL = "RELATIONAL"
|
|
||||||
|
|
||||||
class MemoryException(Exception):
|
|
||||||
message: str
|
|
||||||
|
|
||||||
def __init__(self, message: str):
|
|
||||||
self.message = message
|
|
||||||
|
|
||||||
|
|
||||||
async def remember(user_id: str, memory_name: str, payload: List[str]):
|
|
||||||
if await is_existing_memory(memory_name) is False:
|
|
||||||
raise MemoryException(f"Memory with the name \"{memory_name}\" doesn't exist.")
|
|
||||||
|
|
||||||
await create_information_points(memory_name, payload)
|
|
||||||
|
|
@ -3,8 +3,8 @@ from uuid import uuid4
|
||||||
from typing import List, Union
|
from typing import List, Union
|
||||||
import logging
|
import logging
|
||||||
import nltk
|
import nltk
|
||||||
|
from asyncio import Lock
|
||||||
from nltk.corpus import stopwords
|
from nltk.corpus import stopwords
|
||||||
from cognee.config import Config
|
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
from cognee.modules.cognify.graph.add_node_connections import group_nodes_by_layer, \
|
from cognee.modules.cognify.graph.add_node_connections import group_nodes_by_layer, \
|
||||||
graph_ready_output, connect_nodes_in_graph
|
graph_ready_output, connect_nodes_in_graph
|
||||||
|
|
@ -24,18 +24,14 @@ from cognee.modules.data.get_content_summary import get_content_summary
|
||||||
from cognee.modules.data.get_cognitive_layers import get_cognitive_layers
|
from cognee.modules.data.get_cognitive_layers import get_cognitive_layers
|
||||||
from cognee.modules.data.get_layer_graphs import get_layer_graphs
|
from cognee.modules.data.get_layer_graphs import get_layer_graphs
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.utils import send_telemetry
|
from cognee.shared.utils import send_telemetry
|
||||||
from cognee.modules.tasks import create_task_status_table, update_task_status
|
from cognee.modules.tasks import create_task_status_table, update_task_status
|
||||||
from cognee.shared.SourceCodeGraph import SourceCodeGraph
|
from cognee.shared.SourceCodeGraph import SourceCodeGraph
|
||||||
from asyncio import Lock
|
|
||||||
from cognee.modules.tasks import get_task_status
|
from cognee.modules.tasks import get_task_status
|
||||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
|
|
||||||
USER_ID = "default_user"
|
USER_ID = "default_user"
|
||||||
|
|
||||||
logger = logging.getLogger("cognify")
|
logger = logging.getLogger("cognify")
|
||||||
|
|
@ -66,7 +62,7 @@ async def cognify(datasets: Union[str, List[str]] = None):
|
||||||
task_status = get_task_status([dataset_name])
|
task_status = get_task_status([dataset_name])
|
||||||
|
|
||||||
if task_status == "DATASET_PROCESSING_STARTED":
|
if task_status == "DATASET_PROCESSING_STARTED":
|
||||||
logger.error(f"Dataset {dataset_name} is already being processed.")
|
logger.info(f"Dataset {dataset_name} is being processed.")
|
||||||
return
|
return
|
||||||
|
|
||||||
update_task_status(dataset_name, "DATASET_PROCESSING_STARTED")
|
update_task_status(dataset_name, "DATASET_PROCESSING_STARTED")
|
||||||
|
|
@ -176,8 +172,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
||||||
|
|
||||||
graph_config = get_graph_config()
|
graph_config = get_graph_config()
|
||||||
graph_client = await get_graph_client(graph_config.graph_engine)
|
graph_client = await get_graph_client(graph_config.graph_engine)
|
||||||
cognify_config = get_cognify_config()
|
graph_topology = graph_config.graph_model
|
||||||
graph_topology = cognify_config.graph_model
|
|
||||||
|
|
||||||
if graph_topology == SourceCodeGraph:
|
if graph_topology == SourceCodeGraph:
|
||||||
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
|
classified_categories = [{"data_type": "text", "category_name": "Code and functions"}]
|
||||||
|
|
@ -199,6 +194,8 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
||||||
await add_summary_nodes(graph_client, document_id, content_summary)
|
await add_summary_nodes(graph_client, document_id, content_summary)
|
||||||
print(f"Chunk ({chunk_id}) summarized.")
|
print(f"Chunk ({chunk_id}) summarized.")
|
||||||
|
|
||||||
|
cognify_config = get_cognify_config()
|
||||||
|
|
||||||
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
|
cognitive_layers = await get_cognitive_layers(input_text, classified_categories)
|
||||||
cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
|
cognitive_layers = cognitive_layers[:cognify_config.cognitive_layers_limit]
|
||||||
|
|
||||||
|
|
@ -286,7 +283,7 @@ async def process_text(chunk_collection: str, chunk_id: str, input_text: str, fi
|
||||||
|
|
||||||
# print("results", out)
|
# print("results", out)
|
||||||
# #
|
# #
|
||||||
# # from cognee.utils import render_graph
|
# # from cognee.shared.utils import render_graph
|
||||||
# #
|
# #
|
||||||
# # await render_graph(graph, include_color=True, include_nodes=False, include_size=False)
|
# # await render_graph(graph, include_color=True, include_nodes=False, include_size=False)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,58 +1,60 @@
|
||||||
""" This module is used to set the configuration of the system."""
|
""" This module is used to set the configuration of the system."""
|
||||||
|
import os
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
|
||||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||||
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.databases.relational import get_relationaldb_config
|
||||||
|
|
||||||
class config():
|
class config():
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def system_root_directory(system_root_directory: str):
|
def system_root_directory(system_root_directory: str):
|
||||||
base_config = get_base_config()
|
databases_directory_path = os.path.join(system_root_directory, "databases")
|
||||||
base_config.system_root_directory = system_root_directory
|
|
||||||
|
|
||||||
|
relational_config = get_relationaldb_config()
|
||||||
|
relational_config.db_path = databases_directory_path
|
||||||
|
relational_config.create_engine()
|
||||||
|
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
vector_config.vector_db_path = databases_directory_path
|
||||||
|
vector_config.create_engine()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def data_root_directory(data_root_directory: str):
|
def data_root_directory(data_root_directory: str):
|
||||||
base_config = get_base_config()
|
base_config = get_base_config()
|
||||||
base_config.data_root_directory = data_root_directory
|
base_config.data_root_directory = data_root_directory
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def monitoring_tool(monitoring_tool: object):
|
def monitoring_tool(monitoring_tool: object):
|
||||||
base_config = get_base_config()
|
base_config = get_base_config()
|
||||||
base_config.monitoring_tool = monitoring_tool
|
base_config.monitoring_tool = monitoring_tool
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_classification_model(classification_model: object):
|
def set_classification_model(classification_model: object):
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
cognify_config.classification_model = classification_model
|
cognify_config.classification_model = classification_model
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_summarization_model(summarization_model: object):
|
def set_summarization_model(summarization_model: object):
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
cognify_config.summarization_model=summarization_model
|
cognify_config.summarization_model=summarization_model
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_labeling_model(labeling_model: object):
|
def set_labeling_model(labeling_model: object):
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
cognify_config.labeling_model =labeling_model
|
cognify_config.labeling_model =labeling_model
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_graph_model(graph_model: object):
|
def set_graph_model(graph_model: object):
|
||||||
graph_config = get_graph_config()
|
graph_config = get_graph_config()
|
||||||
graph_config.graph_model = graph_model
|
graph_config.graph_model = graph_model
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_cognitive_layer_model(cognitive_layer_model: object):
|
def set_cognitive_layer_model(cognitive_layer_model: object):
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
cognify_config.cognitive_layer_model = cognitive_layer_model
|
cognify_config.cognitive_layer_model = cognitive_layer_model
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_graph_engine(graph_engine: object):
|
def set_graph_engine(graph_engine: object):
|
||||||
graph_config = get_graph_config()
|
graph_config = get_graph_config()
|
||||||
|
|
@ -78,7 +80,6 @@ class config():
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
cognify_config.intra_layer_score_treshold = intra_layer_score_treshold
|
cognify_config.intra_layer_score_treshold = intra_layer_score_treshold
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def connect_documents(connect_documents: bool):
|
def connect_documents(connect_documents: bool):
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
|
|
@ -88,9 +89,3 @@ class config():
|
||||||
def set_chunk_strategy(chunk_strategy: object):
|
def set_chunk_strategy(chunk_strategy: object):
|
||||||
chunk_config = get_chunk_config()
|
chunk_config = get_chunk_config()
|
||||||
chunk_config.chunk_strategy = chunk_strategy
|
chunk_config.chunk_strategy = chunk_strategy
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def set_graph_topology(graph_topology: object):
|
|
||||||
cognify_config = get_cognify_config()
|
|
||||||
cognify_config.graph_topology = graph_topology
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from duckdb import CatalogException
|
from duckdb import CatalogException
|
||||||
from cognee.modules.discovery import discover_directory_datasets
|
from cognee.modules.ingestion import discover_directory_datasets
|
||||||
from cognee.modules.tasks import get_task_status
|
from cognee.modules.tasks import get_task_status
|
||||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from cognee.modules.search.graph.search_categories import search_categories
|
||||||
from cognee.modules.search.graph.search_neighbour import search_neighbour
|
from cognee.modules.search.graph.search_neighbour import search_neighbour
|
||||||
from cognee.modules.search.graph.search_summary import search_summary
|
from cognee.modules.search.graph.search_summary import search_summary
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
from cognee.utils import send_telemetry
|
from cognee.shared.utils import send_telemetry
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
|
||||||
class SearchType(Enum):
|
class SearchType(Enum):
|
||||||
|
|
|
||||||
|
|
@ -1,33 +1,29 @@
|
||||||
from typing import List, Dict, Any, Union, Optional
|
|
||||||
|
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
|
||||||
|
|
||||||
from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from typing import List, Dict, Any, Union, Optional
|
||||||
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
|
from cognee.modules.topology.topology import TopologyEngine, GitHubRepositoryModel
|
||||||
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
|
||||||
USER_ID = "default_user"
|
USER_ID = "default_user"
|
||||||
|
|
||||||
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
|
async def add_topology(directory: str = "example", model: BaseModel = GitHubRepositoryModel) -> Any:
|
||||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
graph_config = get_graph_config()
|
||||||
|
graph_db_type = graph_config.graph_database_provider
|
||||||
|
|
||||||
graph_client = await get_graph_client(graph_db_type)
|
graph_client = await get_graph_client(graph_db_type)
|
||||||
|
|
||||||
graph_topology = infrastructure_config.get_config()["graph_topology"]
|
|
||||||
|
|
||||||
engine = TopologyEngine()
|
engine = TopologyEngine()
|
||||||
topology = await engine.infer_from_directory_structure(node_id=USER_ID, repository=directory, model=model)
|
topology = await engine.infer_from_directory_structure(node_id=USER_ID, repository=directory, model=model)
|
||||||
|
|
||||||
def flatten_model(model: BaseModel, parent_id: Optional[str] = None) -> Dict[str, Any]:
|
def flatten_model(model: BaseModel, parent_id: Optional[str] = None) -> Dict[str, Any]:
|
||||||
"""Flatten a single Pydantic model to a dictionary handling nested structures."""
|
"""Flatten a single Pydantic model to a dictionary handling nested structures."""
|
||||||
result = {**model.dict(), 'parent_id': parent_id}
|
result = {**model.dict(), "parent_id": parent_id}
|
||||||
if hasattr(model, 'default_relationship') and model.default_relationship:
|
if hasattr(model, "default_relationship") and model.default_relationship:
|
||||||
result.update({
|
result.update({
|
||||||
'relationship_type': model.default_relationship.type,
|
"relationship_type": model.default_relationship.type,
|
||||||
'relationship_source': model.default_relationship.source,
|
"relationship_source": model.default_relationship.source,
|
||||||
'relationship_target': model.default_relationship.target
|
"relationship_target": model.default_relationship.target
|
||||||
})
|
})
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
@ -39,7 +35,7 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo
|
||||||
flat = [flatten_model(items, parent_id)]
|
flat = [flatten_model(items, parent_id)]
|
||||||
for field, value in items:
|
for field, value in items:
|
||||||
if isinstance(value, (BaseModel, list)):
|
if isinstance(value, (BaseModel, list)):
|
||||||
flat.extend(recursive_flatten(value, items.dict().get('node_id', None)))
|
flat.extend(recursive_flatten(value, items.dict().get("node_id", None)))
|
||||||
return flat
|
return flat
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
@ -56,38 +52,11 @@ async def add_topology(directory: str = "example", model: BaseModel = GitHubRepo
|
||||||
|
|
||||||
for _, row in df.iterrows():
|
for _, row in df.iterrows():
|
||||||
node_data = row.to_dict()
|
node_data = row.to_dict()
|
||||||
node_id = node_data.pop('node_id')
|
node_id = node_data.pop("node_id")
|
||||||
|
|
||||||
# Remove 'node_id' and get its value
|
# Remove "node_id" and get its value
|
||||||
await graph_client.add_node(node_id, node_data)
|
await graph_client.add_node(node_id, node_data)
|
||||||
if pd.notna(row['relationship_source']) and pd.notna(row['relationship_target']):
|
if pd.notna(row["relationship_source"]) and pd.notna(row["relationship_target"]):
|
||||||
await graph_client.add_edge(row['relationship_source'], row['relationship_target'], relationship_name=row['relationship_type'])
|
await graph_client.add_edge(row["relationship_source"], row["relationship_target"], relationship_name=row["relationship_type"])
|
||||||
|
|
||||||
return graph_client.graph
|
return graph_client.graph
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
async def test() -> None:
|
|
||||||
# Uncomment and modify the following lines as needed
|
|
||||||
# await prune.prune_system()
|
|
||||||
#
|
|
||||||
# from cognee.api.v1.add import add
|
|
||||||
# data_directory_path = os.path.abspath("../../../.data")
|
|
||||||
# # print(data_directory_path)
|
|
||||||
# # config.data_root_directory(data_directory_path)
|
|
||||||
# # cognee_directory_path = os.path.abspath("../.cognee_system")
|
|
||||||
# # config.system_root_directory(cognee_directory_path)
|
|
||||||
#
|
|
||||||
# await add("data://" + data_directory_path, "example")
|
|
||||||
|
|
||||||
# graph = await add_topology()
|
|
||||||
|
|
||||||
graph_db_type = infrastructure_config.get_config()["graph_engine"]
|
|
||||||
|
|
||||||
graph_client = await get_graph_client(graph_db_type)
|
|
||||||
#
|
|
||||||
from cognee.utils import render_graph
|
|
||||||
|
|
||||||
await render_graph(graph_client.graph, include_color=True, include_nodes=False, include_size=False)
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
asyncio.run(test())
|
|
||||||
|
|
|
||||||
|
|
@ -4,15 +4,15 @@ from cognee.root_dir import get_absolute_path
|
||||||
from cognee.shared.data_models import MonitoringTool
|
from cognee.shared.data_models import MonitoringTool
|
||||||
|
|
||||||
class BaseConfig(BaseSettings):
|
class BaseConfig(BaseSettings):
|
||||||
system_root_directory: str = get_absolute_path(".cognee_system")
|
|
||||||
data_root_directory: str = get_absolute_path(".data")
|
data_root_directory: str = get_absolute_path(".data")
|
||||||
monitoring_tool: object = MonitoringTool.LANGFUSE
|
monitoring_tool: object = MonitoringTool.LANGFUSE
|
||||||
|
graphistry_username: str
|
||||||
|
graphistry_password: str
|
||||||
|
|
||||||
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"system_root_directory": self.system_root_directory,
|
|
||||||
"data_root_directory": self.data_root_directory,
|
"data_root_directory": self.data_root_directory,
|
||||||
"monitoring_tool": self.monitoring_tool,
|
"monitoring_tool": self.monitoring_tool,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
116
cognee/config.py
116
cognee/config.py
|
|
@ -1,116 +0,0 @@
|
||||||
"""Configuration for cognee - cognitive architecture framework."""
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import configparser
|
|
||||||
import uuid
|
|
||||||
from typing import Optional, Dict, Any
|
|
||||||
from dataclasses import dataclass, field
|
|
||||||
from pathlib import Path
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
|
|
||||||
def load_dontenv():
|
|
||||||
base_dir = Path(__file__).resolve().parent.parent
|
|
||||||
# Load the .env file from the base directory
|
|
||||||
dotenv_path = base_dir / ".env"
|
|
||||||
load_dotenv(dotenv_path=dotenv_path, override = True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
load_dontenv()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Config:
|
|
||||||
""" Configuration for cognee - cognitive architecture framework. """
|
|
||||||
cognee_dir: str = field(
|
|
||||||
default_factory=lambda: os.getenv("COG_ARCH_DIR", "cognee")
|
|
||||||
)
|
|
||||||
config_path: str = field(
|
|
||||||
default_factory=lambda: os.path.join(
|
|
||||||
os.getenv("COG_ARCH_DIR", "cognee"), "config"
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
# custom_model: str = os.getenv("CUSTOM_LLM_MODEL", "llama3-70b-8192") #"mistralai/Mixtral-8x7B-Instruct-v0.1"
|
|
||||||
# custom_endpoint: str = os.getenv("CUSTOM_ENDPOINT", "https://api.endpoints.anyscale.com/v1") #"https://api.endpoints.anyscale.com/v1" # pass claude endpoint
|
|
||||||
# custom_key: Optional[str] = os.getenv("CUSTOM_LLM_API_KEY")
|
|
||||||
# ollama_endpoint: str = os.getenv("CUSTOM_OLLAMA_ENDPOINT", "http://localhost:11434/v1") #"http://localhost:11434/v1"
|
|
||||||
# ollama_key: Optional[str] = "ollama"
|
|
||||||
# ollama_model: str = os.getenv("CUSTOM_OLLAMA_MODEL", "mistral:instruct") #"mistral:instruct"
|
|
||||||
# openai_model: str = os.getenv("OPENAI_MODEL", "gpt-4o" ) #"gpt-4o"
|
|
||||||
# model_endpoint: str = "openai"
|
|
||||||
# llm_api_key: Optional[str] = os.getenv("OPENAI_API_KEY")
|
|
||||||
# openai_embedding_model = "text-embedding-3-large"
|
|
||||||
# openai_embedding_dimensions = 3072
|
|
||||||
# litellm_embedding_model = "text-embedding-3-large"
|
|
||||||
# litellm_embedding_dimensions = 3072
|
|
||||||
|
|
||||||
graphistry_username = os.getenv("GRAPHISTRY_USERNAME")
|
|
||||||
graphistry_password = os.getenv("GRAPHISTRY_PASSWORD")
|
|
||||||
|
|
||||||
# Embedding parameters
|
|
||||||
embedding_model: str = "BAAI/bge-large-en-v1.5"
|
|
||||||
embedding_dimensions: int = 1024
|
|
||||||
connect_documents: bool = False
|
|
||||||
|
|
||||||
# Model parameters and configuration for interlayer scoring
|
|
||||||
intra_layer_score_treshold: float = 0.98
|
|
||||||
|
|
||||||
# Client ID
|
|
||||||
anon_clientid: Optional[str] = field(default_factory=lambda: uuid.uuid4().hex)
|
|
||||||
|
|
||||||
#Chunking parameters
|
|
||||||
# chunk_size: int = 1500
|
|
||||||
# chunk_overlap: int = 0
|
|
||||||
# chunk_strategy: str = ChunkStrategy.PARAGRAPH
|
|
||||||
|
|
||||||
def load(self):
|
|
||||||
"""Loads the configuration from a file or environment variables."""
|
|
||||||
try:
|
|
||||||
load_dontenv()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
config = configparser.ConfigParser()
|
|
||||||
config.read(self.config_path)
|
|
||||||
|
|
||||||
# Override with environment variables if they exist
|
|
||||||
for attr in self.__annotations__:
|
|
||||||
env_value = os.getenv(attr.upper())
|
|
||||||
if env_value is not None:
|
|
||||||
setattr(self, attr, env_value)
|
|
||||||
|
|
||||||
# Load from config file
|
|
||||||
if config.sections():
|
|
||||||
for section in config.sections():
|
|
||||||
for key, value in config.items(section):
|
|
||||||
if hasattr(self, key):
|
|
||||||
setattr(self, key, value)
|
|
||||||
|
|
||||||
def save(self):
|
|
||||||
"""Saves the current configuration to a file."""
|
|
||||||
config = configparser.ConfigParser()
|
|
||||||
|
|
||||||
# Save the current settings to the config file
|
|
||||||
for attr, value in self.__dict__.items():
|
|
||||||
section, option = attr.split("_", 1)
|
|
||||||
if not config.has_section(section):
|
|
||||||
config.add_section(section)
|
|
||||||
config.set(section, option, str(value))
|
|
||||||
|
|
||||||
with open(self.config_path, "w") as configfile:
|
|
||||||
config.write(configfile)
|
|
||||||
|
|
||||||
def to_dict(self) -> Dict[str, Any]:
|
|
||||||
"""Returns a dictionary representation of the configuration."""
|
|
||||||
return {attr: getattr(self, attr) for attr in self.__annotations__}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def from_dict(cls, config_dict: Dict[str, Any]) -> "Config":
|
|
||||||
"""Creates a Config instance from a dictionary."""
|
|
||||||
config = cls()
|
|
||||||
for attr, value in config_dict.items():
|
|
||||||
if hasattr(config, attr):
|
|
||||||
setattr(config, attr, value)
|
|
||||||
return config
|
|
||||||
|
|
@ -1,113 +0,0 @@
|
||||||
import logging
|
|
||||||
from cognee.config import Config
|
|
||||||
from .data.chunking.config import get_chunk_config
|
|
||||||
from .llm.llm_interface import LLMInterface
|
|
||||||
from .llm.get_llm_client import get_llm_client
|
|
||||||
from ..shared.data_models import GraphDBType, DefaultContentPrediction, KnowledgeGraph, SummarizedContent, \
|
|
||||||
LabeledContent, DefaultCognitiveLayer
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
|
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
|
|
||||||
chunk_config = get_chunk_config()
|
|
||||||
class InfrastructureConfig():
|
|
||||||
graph_engine: GraphDBType = None
|
|
||||||
llm_engine: LLMInterface = None
|
|
||||||
classification_model = None
|
|
||||||
summarization_model = None
|
|
||||||
labeling_model = None
|
|
||||||
graph_model = None
|
|
||||||
cognitive_layer_model = None
|
|
||||||
intra_layer_score_treshold = None
|
|
||||||
embedding_engine = None
|
|
||||||
connect_documents = config.connect_documents
|
|
||||||
chunk_strategy = chunk_config.chunk_strategy
|
|
||||||
chunk_engine = None
|
|
||||||
llm_provider: str = None
|
|
||||||
llm_model: str = None
|
|
||||||
llm_endpoint: str = None
|
|
||||||
llm_api_key: str = None
|
|
||||||
|
|
||||||
def get_config(self, config_entity: str = None) -> dict:
|
|
||||||
if self.graph_engine is None:
|
|
||||||
self.graph_engine = GraphDBType.NETWORKX
|
|
||||||
|
|
||||||
if self.classification_model is None:
|
|
||||||
self.classification_model = DefaultContentPrediction
|
|
||||||
|
|
||||||
if self.summarization_model is None:
|
|
||||||
self.summarization_model = SummarizedContent
|
|
||||||
|
|
||||||
if self.labeling_model is None:
|
|
||||||
self.labeling_model = LabeledContent
|
|
||||||
|
|
||||||
if self.graph_model is None:
|
|
||||||
self.graph_model = KnowledgeGraph
|
|
||||||
|
|
||||||
if self.cognitive_layer_model is None:
|
|
||||||
self.cognitive_layer_model = DefaultCognitiveLayer
|
|
||||||
|
|
||||||
if self.intra_layer_score_treshold is None:
|
|
||||||
self.intra_layer_score_treshold = config.intra_layer_score_treshold
|
|
||||||
|
|
||||||
if self.connect_documents is None:
|
|
||||||
self.connect_documents = config.connect_documents
|
|
||||||
|
|
||||||
if self.chunk_strategy is None:
|
|
||||||
self.chunk_strategy = chunk_config.chunk_strategy
|
|
||||||
|
|
||||||
if self.chunk_engine is None:
|
|
||||||
self.chunk_engine = chunk_config.chunk_engine
|
|
||||||
|
|
||||||
if (config_entity is None or config_entity == "llm_engine") and self.llm_engine is None:
|
|
||||||
self.llm_engine = get_llm_client()
|
|
||||||
|
|
||||||
if config_entity is not None:
|
|
||||||
return getattr(self, config_entity)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"llm_engine": self.llm_engine,
|
|
||||||
"classification_model": self.classification_model,
|
|
||||||
"summarization_model": self.summarization_model,
|
|
||||||
"labeling_model": self.labeling_model,
|
|
||||||
"graph_model": self.graph_model,
|
|
||||||
"cognitive_layer_model": self.cognitive_layer_model,
|
|
||||||
"llm_provider": self.llm_provider,
|
|
||||||
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
|
||||||
"embedding_engine": self.embedding_engine,
|
|
||||||
"connect_documents": self.connect_documents,
|
|
||||||
"chunk_strategy": self.chunk_strategy,
|
|
||||||
"chunk_engine": self.chunk_engine,
|
|
||||||
}
|
|
||||||
|
|
||||||
def set_config(self, new_config: dict):
|
|
||||||
if "classification_model" in new_config:
|
|
||||||
self.classification_model = new_config["classification_model"]
|
|
||||||
|
|
||||||
if "summarization_model" in new_config:
|
|
||||||
self.summarization_model = new_config["summarization_model"]
|
|
||||||
|
|
||||||
if "labeling_model" in new_config:
|
|
||||||
self.labeling_model = new_config["labeling_model"]
|
|
||||||
|
|
||||||
if "cognitive_layer_model" in new_config:
|
|
||||||
self.cognitive_layer_model = new_config["cognitive_layer_model"]
|
|
||||||
|
|
||||||
if "intra_layer_score_treshold" in new_config:
|
|
||||||
self.intra_layer_score_treshold = new_config["intra_layer_score_treshold"]
|
|
||||||
|
|
||||||
if "embedding_engine" in new_config:
|
|
||||||
self.embedding_engine = new_config["embedding_engine"]
|
|
||||||
|
|
||||||
if "connect_documents" in new_config:
|
|
||||||
self.connect_documents = new_config["connect_documents"]
|
|
||||||
|
|
||||||
if "chunk_strategy" in new_config:
|
|
||||||
self.chunk_strategy = new_config["chunk_strategy"]
|
|
||||||
|
|
||||||
if "chunk_engine" in new_config:
|
|
||||||
self.chunk_engine = new_config["chunk_engine"]
|
|
||||||
|
|
||||||
infrastructure_config = InfrastructureConfig()
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
from .InfrastructureConfig import infrastructure_config
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
from cognee.utils import extract_pos_tags
|
from cognee.shared.utils import extract_pos_tags
|
||||||
|
|
||||||
def extract_keywords(text: str) -> list[str]:
|
def extract_keywords(text: str) -> list[str]:
|
||||||
if len(text) == 0:
|
if len(text) == 0:
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ import os
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
||||||
from cognee.shared.data_models import DefaultGraphModel, GraphDBType, KnowledgeGraph
|
from cognee.shared.data_models import GraphDBType, KnowledgeGraph
|
||||||
|
|
||||||
|
|
||||||
class GraphConfig(BaseSettings):
|
class GraphConfig(BaseSettings):
|
||||||
|
|
@ -26,7 +26,6 @@ class GraphConfig(BaseSettings):
|
||||||
return {
|
return {
|
||||||
"graph_filename": self.graph_filename,
|
"graph_filename": self.graph_filename,
|
||||||
"graph_database_provider": self.graph_database_provider,
|
"graph_database_provider": self.graph_database_provider,
|
||||||
"graph_topology": self.graph_topology,
|
|
||||||
"graph_file_path": self.graph_file_path,
|
"graph_file_path": self.graph_file_path,
|
||||||
"graph_database_url": self.graph_database_url,
|
"graph_database_url": self.graph_database_url,
|
||||||
"graph_database_username": self.graph_database_username,
|
"graph_database_username": self.graph_database_username,
|
||||||
|
|
|
||||||
|
|
@ -2,3 +2,4 @@ from .ModelBase import ModelBase
|
||||||
from .DatabaseEngine import DatabaseEngine
|
from .DatabaseEngine import DatabaseEngine
|
||||||
from .sqlite.SqliteEngine import SqliteEngine
|
from .sqlite.SqliteEngine import SqliteEngine
|
||||||
from .duckdb.DuckDBAdapter import DuckDBAdapter
|
from .duckdb.DuckDBAdapter import DuckDBAdapter
|
||||||
|
from .config import get_relationaldb_config
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,11 @@
|
||||||
import os
|
import os
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
from cognee.base_config import get_base_config
|
from cognee.root_dir import get_absolute_path
|
||||||
from .create_relational_engine import create_relational_engine
|
from .create_relational_engine import create_relational_engine
|
||||||
|
|
||||||
class RelationalConfig(BaseSettings):
|
class RelationalConfig(BaseSettings):
|
||||||
db_path: str = os.path.join(get_base_config().system_root_directory, "databases")
|
db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases")
|
||||||
db_name: str = "cognee.db"
|
db_name: str = "cognee.db"
|
||||||
db_host: str = "localhost"
|
db_host: str = "localhost"
|
||||||
db_port: str = "5432"
|
db_port: str = "5432"
|
||||||
|
|
@ -17,7 +17,8 @@ class RelationalConfig(BaseSettings):
|
||||||
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
||||||
|
|
||||||
def create_engine(self):
|
def create_engine(self):
|
||||||
return create_relational_engine(self.db_path, self.db_name)
|
self.db_file_path = os.path.join(self.db_path, self.db_name)
|
||||||
|
self.database_engine = create_relational_engine(self.db_path, self.db_name)
|
||||||
|
|
||||||
def to_dict(self) -> dict:
|
def to_dict(self) -> dict:
|
||||||
return {
|
return {
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,13 @@
|
||||||
import os
|
import os
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
from cognee.infrastructure.databases.relational.config import get_relationaldb_config
|
|
||||||
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config
|
||||||
|
from cognee.root_dir import get_absolute_path
|
||||||
from .create_vector_engine import create_vector_engine
|
from .create_vector_engine import create_vector_engine
|
||||||
|
|
||||||
class VectorConfig(BaseSettings):
|
class VectorConfig(BaseSettings):
|
||||||
vector_db_url: str = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb")
|
vector_db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases")
|
||||||
|
vector_db_url: str = os.path.join(vector_db_path, "cognee.lancedb")
|
||||||
vector_db_key: str = ""
|
vector_db_key: str = ""
|
||||||
vector_engine_provider: str = "lancedb"
|
vector_engine_provider: str = "lancedb"
|
||||||
vector_engine: object = create_vector_engine(
|
vector_engine: object = create_vector_engine(
|
||||||
|
|
@ -22,7 +23,9 @@ class VectorConfig(BaseSettings):
|
||||||
|
|
||||||
def create_engine(self):
|
def create_engine(self):
|
||||||
if self.vector_engine_provider == "lancedb":
|
if self.vector_engine_provider == "lancedb":
|
||||||
self.vector_db_url = os.path.join(get_relationaldb_config().db_path, "cognee.lancedb")
|
self.vector_db_url = os.path.join(self.vector_db_path, "cognee.lancedb")
|
||||||
|
else:
|
||||||
|
self.vector_db_path = None
|
||||||
|
|
||||||
self.vector_engine = create_vector_engine(
|
self.vector_engine = create_vector_engine(
|
||||||
get_vectordb_config().to_dict(),
|
get_vectordb_config().to_dict(),
|
||||||
|
|
|
||||||
|
|
@ -5,26 +5,12 @@ from pydantic import BaseModel
|
||||||
import instructor
|
import instructor
|
||||||
from tenacity import retry, stop_after_attempt
|
from tenacity import retry, stop_after_attempt
|
||||||
import openai
|
import openai
|
||||||
|
|
||||||
|
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
||||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||||
from cognee.shared.data_models import MonitoringTool
|
from cognee.shared.data_models import MonitoringTool
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.infrastructure.llm.config import get_llm_config
|
from cognee.infrastructure.llm.config import get_llm_config
|
||||||
|
|
||||||
llm_config = get_llm_config()
|
|
||||||
base_config = get_base_config()
|
|
||||||
|
|
||||||
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
|
||||||
from langfuse.openai import AsyncOpenAI, OpenAI
|
|
||||||
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
|
||||||
from langsmith import wrappers
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
|
||||||
else:
|
|
||||||
from openai import AsyncOpenAI, OpenAI
|
|
||||||
|
|
||||||
class GenericAPIAdapter(LLMInterface):
|
class GenericAPIAdapter(LLMInterface):
|
||||||
"""Adapter for Generic API LLM provider API """
|
"""Adapter for Generic API LLM provider API """
|
||||||
|
|
@ -37,6 +23,8 @@ class GenericAPIAdapter(LLMInterface):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
|
|
||||||
|
llm_config = get_llm_config()
|
||||||
|
|
||||||
if llm_config.llm_provider == "groq":
|
if llm_config.llm_provider == "groq":
|
||||||
from groq import groq
|
from groq import groq
|
||||||
self.aclient = instructor.from_openai(
|
self.aclient = instructor.from_openai(
|
||||||
|
|
@ -46,6 +34,17 @@ class GenericAPIAdapter(LLMInterface):
|
||||||
mode = instructor.Mode.MD_JSON
|
mode = instructor.Mode.MD_JSON
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
base_config = get_base_config()
|
||||||
|
|
||||||
|
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
||||||
|
from langfuse.openai import AsyncOpenAI
|
||||||
|
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
||||||
|
from langsmith import wrappers
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
||||||
|
else:
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
self.aclient = instructor.patch(
|
self.aclient = instructor.patch(
|
||||||
AsyncOpenAI(
|
AsyncOpenAI(
|
||||||
base_url = api_endpoint,
|
base_url = api_endpoint,
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ class LLMProvider(Enum):
|
||||||
ANTHROPIC = "anthropic"
|
ANTHROPIC = "anthropic"
|
||||||
CUSTOM = "custom"
|
CUSTOM = "custom"
|
||||||
|
|
||||||
llm_config = get_llm_config()
|
|
||||||
def get_llm_client():
|
def get_llm_client():
|
||||||
"""Get the LLM client based on the configuration using Enums."""
|
"""Get the LLM client based on the configuration using Enums."""
|
||||||
llm_config = get_llm_config()
|
llm_config = get_llm_config()
|
||||||
|
|
|
||||||
|
|
@ -6,26 +6,10 @@ from pydantic import BaseModel
|
||||||
from tenacity import retry, stop_after_attempt
|
from tenacity import retry, stop_after_attempt
|
||||||
|
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.config import Config
|
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
|
||||||
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
from cognee.infrastructure.llm.llm_interface import LLMInterface
|
||||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||||
from cognee.shared.data_models import MonitoringTool
|
from cognee.shared.data_models import MonitoringTool
|
||||||
|
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
llm_config = get_llm_config()
|
|
||||||
base_config = get_base_config()
|
|
||||||
|
|
||||||
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
|
||||||
from langfuse.openai import AsyncOpenAI, OpenAI
|
|
||||||
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
|
||||||
from langsmith import wrappers
|
|
||||||
from openai import AsyncOpenAI
|
|
||||||
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
|
||||||
else:
|
|
||||||
from openai import AsyncOpenAI, OpenAI
|
|
||||||
|
|
||||||
class OpenAIAdapter(LLMInterface):
|
class OpenAIAdapter(LLMInterface):
|
||||||
name = "OpenAI"
|
name = "OpenAI"
|
||||||
model: str
|
model: str
|
||||||
|
|
@ -33,6 +17,17 @@ class OpenAIAdapter(LLMInterface):
|
||||||
|
|
||||||
"""Adapter for OpenAI's GPT-3, GPT=4 API"""
|
"""Adapter for OpenAI's GPT-3, GPT=4 API"""
|
||||||
def __init__(self, api_key: str, model:str):
|
def __init__(self, api_key: str, model:str):
|
||||||
|
base_config = get_base_config()
|
||||||
|
|
||||||
|
if base_config.monitoring_tool == MonitoringTool.LANGFUSE:
|
||||||
|
from langfuse.openai import AsyncOpenAI, OpenAI
|
||||||
|
elif base_config.monitoring_tool == MonitoringTool.LANGSMITH:
|
||||||
|
from langsmith import wrappers
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
AsyncOpenAI = wrappers.wrap_openai(AsyncOpenAI())
|
||||||
|
else:
|
||||||
|
from openai import AsyncOpenAI, OpenAI
|
||||||
|
|
||||||
self.aclient = instructor.from_openai(AsyncOpenAI(api_key = api_key))
|
self.aclient = instructor.from_openai(AsyncOpenAI(api_key = api_key))
|
||||||
self.client = instructor.from_openai(OpenAI(api_key = api_key))
|
self.client = instructor.from_openai(OpenAI(api_key = api_key))
|
||||||
self.model = model
|
self.model = model
|
||||||
|
|
|
||||||
|
|
@ -1,12 +1,7 @@
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||||
from cognee.shared.data_models import DefaultContentPrediction, LabeledContent, SummarizedContent, \
|
from cognee.shared.data_models import DefaultContentPrediction, LabeledContent, SummarizedContent, \
|
||||||
DefaultCognitiveLayer, DefaultGraphModel, KnowledgeGraph
|
DefaultCognitiveLayer
|
||||||
|
|
||||||
|
|
||||||
# Monitoring tool
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class CognifyConfig(BaseSettings):
|
class CognifyConfig(BaseSettings):
|
||||||
classification_model: object = DefaultContentPrediction
|
classification_model: object = DefaultContentPrediction
|
||||||
|
|
@ -15,10 +10,7 @@ class CognifyConfig(BaseSettings):
|
||||||
cognitive_layer_model: object = DefaultCognitiveLayer
|
cognitive_layer_model: object = DefaultCognitiveLayer
|
||||||
intra_layer_score_treshold: float = 0.98
|
intra_layer_score_treshold: float = 0.98
|
||||||
connect_documents: bool = False
|
connect_documents: bool = False
|
||||||
graph_topology: object = DefaultGraphModel
|
|
||||||
cognitive_layers_limit: int = 2
|
cognitive_layers_limit: int = 2
|
||||||
graph_model:object = KnowledgeGraph
|
|
||||||
|
|
||||||
|
|
||||||
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
model_config = SettingsConfigDict(env_file = ".env", extra = "allow")
|
||||||
|
|
||||||
|
|
@ -30,11 +22,9 @@ class CognifyConfig(BaseSettings):
|
||||||
"cognitive_layer_model": self.cognitive_layer_model,
|
"cognitive_layer_model": self.cognitive_layer_model,
|
||||||
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
"intra_layer_score_treshold": self.intra_layer_score_treshold,
|
||||||
"connect_documents": self.connect_documents,
|
"connect_documents": self.connect_documents,
|
||||||
"graph_topology": self.graph_topology,
|
|
||||||
"cognitive_layers_limit": self.cognitive_layers_limit,
|
"cognitive_layers_limit": self.cognitive_layers_limit,
|
||||||
"graph_model": self.graph_model
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_cognify_config():
|
def get_cognify_config():
|
||||||
return CognifyConfig()
|
return CognifyConfig()
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,11 @@ from dspy.evaluate.evaluate import Evaluate
|
||||||
from dspy.primitives.example import Example
|
from dspy.primitives.example import Example
|
||||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
||||||
from cognee.root_dir import get_absolute_path
|
from cognee.root_dir import get_absolute_path
|
||||||
from cognee.config import Config
|
|
||||||
from cognee.shared.data_models import Answer
|
from cognee.shared.data_models import Answer
|
||||||
|
from cognee.infrastructure.llm import get_llm_config
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
from cognee.modules.cognify.dataset import HotPotQA
|
from cognee.modules.cognify.dataset import HotPotQA
|
||||||
|
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
|
|
||||||
def evaluate():
|
def evaluate():
|
||||||
dataset = HotPotQA(
|
dataset = HotPotQA(
|
||||||
train_seed = 1,
|
train_seed = 1,
|
||||||
|
|
@ -36,7 +33,8 @@ def evaluate():
|
||||||
|
|
||||||
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
|
evaluate_on_hotpotqa = Evaluate(devset = devset, num_threads = 1, display_progress = True, display_table = 5, max_tokens = 4096)
|
||||||
|
|
||||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
llm_config = get_llm_config()
|
||||||
|
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
||||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
||||||
|
|
||||||
|
|
@ -58,7 +56,7 @@ def evaluate():
|
||||||
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
|
return dsp.answer_match(example.answer, [answer_prediction.answer], frac = 0.8) or \
|
||||||
dsp.passage_match([example.answer], [answer_prediction.answer])
|
dsp.passage_match([example.answer], [answer_prediction.answer])
|
||||||
|
|
||||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||||
dspy.settings.configure(lm = gpt4)
|
dspy.settings.configure(lm = gpt4)
|
||||||
|
|
||||||
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)
|
evaluate_on_hotpotqa(compiled_extract_knowledge_graph, metric = evaluate_answer)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ from typing import List, Tuple, TypedDict
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from cognee.infrastructure.databases.vector import DataPoint
|
from cognee.infrastructure.databases.vector import DataPoint
|
||||||
|
|
||||||
# from cognee.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
|
# from cognee.shared.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
from cognee.infrastructure.databases.vector.config import get_vectordb_config
|
||||||
|
|
||||||
|
|
@ -69,8 +69,6 @@ async def add_cognitive_layer_graphs(
|
||||||
|
|
||||||
id, type, name, description, *node_properties = node
|
id, type, name, description, *node_properties = node
|
||||||
|
|
||||||
print("Node properties: ", node_properties)
|
|
||||||
|
|
||||||
node_properties = dict(node_properties)
|
node_properties = dict(node_properties)
|
||||||
|
|
||||||
graph_nodes.append((
|
graph_nodes.append((
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
# from cognee.infrastructure import infrastructure_config
|
|
||||||
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
|
||||||
from cognee.shared.data_models import GraphDBType
|
from cognee.shared.data_models import GraphDBType
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
|
@ -105,7 +104,7 @@ if __name__ == "__main__":
|
||||||
#
|
#
|
||||||
# connect_nodes_in_graph(graph, relationships)
|
# connect_nodes_in_graph(graph, relationships)
|
||||||
|
|
||||||
from cognee.utils import render_graph
|
from cognee.shared.utils import render_graph
|
||||||
|
|
||||||
graph_url = await render_graph(graph)
|
graph_url = await render_graph(graph)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,11 @@
|
||||||
import dspy
|
import dspy
|
||||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph_module import ExtractKnowledgeGraph
|
||||||
from cognee.root_dir import get_absolute_path
|
from cognee.root_dir import get_absolute_path
|
||||||
from cognee.config import Config
|
from cognee.infrastructure.llm import get_llm_config
|
||||||
|
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
llm_config = get_llm_config()
|
||||||
|
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||||
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
compiled_extract_knowledge_graph = ExtractKnowledgeGraph(lm = gpt4)
|
||||||
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
compiled_extract_knowledge_graph.load(get_absolute_path("./programs/extract_knowledge_graph/extract_knowledge_graph.json"))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,16 +2,13 @@ import dsp
|
||||||
import dspy
|
import dspy
|
||||||
from dspy.teleprompt import BootstrapFewShot
|
from dspy.teleprompt import BootstrapFewShot
|
||||||
from dspy.primitives.example import Example
|
from dspy.primitives.example import Example
|
||||||
from cognee.config import Config
|
|
||||||
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
|
from cognee.modules.data.extraction.knowledge_graph.extract_knowledge_graph import ExtractKnowledgeGraph
|
||||||
from cognee.root_dir import get_absolute_path
|
from cognee.root_dir import get_absolute_path
|
||||||
from cognee.infrastructure.files.storage import LocalStorage
|
from cognee.infrastructure.files.storage import LocalStorage
|
||||||
from cognee.shared.data_models import Answer
|
from cognee.shared.data_models import Answer
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
from cognee.modules.cognify.dataset import HotPotQA
|
from cognee.modules.cognify.dataset import HotPotQA
|
||||||
|
from cognee.infrastructure.llm import get_llm_config
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
|
|
||||||
def train():
|
def train():
|
||||||
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
|
colbertv2_wiki17_abstracts = dspy.ColBERTv2(url = "http://20.102.90.50:2017/wiki17_abstracts")
|
||||||
|
|
@ -59,7 +56,8 @@ def train():
|
||||||
|
|
||||||
trainset = [example.with_inputs("context", "question") for example in train_examples]
|
trainset = [example.with_inputs("context", "question") for example in train_examples]
|
||||||
|
|
||||||
gpt4 = dspy.OpenAI(model = config.llm_model, api_key = config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
llm_config = get_llm_config()
|
||||||
|
gpt4 = dspy.OpenAI(model = llm_config.llm_model, api_key = llm_config.llm_api_key, model_type = "chat", max_tokens = 4096)
|
||||||
|
|
||||||
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)
|
compiled_extract_knowledge_graph = optimizer.compile(ExtractKnowledgeGraph(lm = gpt4), trainset = trainset)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from nltk.corpus import stopwords
|
||||||
from nltk.tokenize import word_tokenize
|
from nltk.tokenize import word_tokenize
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
from cognee.infrastructure.llm import get_llm_config
|
||||||
from cognee.shared.data_models import KnowledgeGraph, Node, Edge
|
from cognee.shared.data_models import KnowledgeGraph, Node, Edge
|
||||||
from cognee.utils import trim_text_to_max_tokens
|
from cognee.shared.utils import trim_text_to_max_tokens
|
||||||
|
|
||||||
# """Instructions:
|
# """Instructions:
|
||||||
# You are a top-tier algorithm designed for extracting information from text in structured formats to build a knowledge graph.
|
# You are a top-tier algorithm designed for extracting information from text in structured formats to build a knowledge graph.
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,7 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from.extraction.extract_cognitive_layers import extract_cognitive_layers
|
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
from .extraction.extract_cognitive_layers import extract_cognitive_layers
|
||||||
|
|
||||||
config = get_cognify_config()
|
config = get_cognify_config()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from.extraction.extract_summary import extract_summary
|
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
|
from .extraction.extract_summary import extract_summary
|
||||||
|
|
||||||
config = get_cognify_config()
|
config = get_cognify_config()
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,13 @@
|
||||||
import logging
|
import logging
|
||||||
import asyncio
|
import asyncio
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure.databases.graph import get_graph_config
|
||||||
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
|
from .extraction.knowledge_graph.extract_knowledge_graph import extract_knowledge_graph
|
||||||
from.extraction.extract_summary import extract_summary
|
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
|
||||||
config = get_cognify_config()
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
|
async def get_layer_graphs(content: str, cognitive_layers: list[tuple[str, dict]]):
|
||||||
|
config = get_graph_config()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
graph_awaitables = [
|
graph_awaitables = [
|
||||||
extract_knowledge_graph(
|
extract_knowledge_graph(
|
||||||
|
|
|
||||||
|
|
@ -1 +0,0 @@
|
||||||
from .discover_directory_datasets import discover_directory_datasets
|
|
||||||
|
|
@ -1,2 +1,5 @@
|
||||||
from .classify import classify
|
from .classify import classify
|
||||||
from .identify import identify
|
from .identify import identify
|
||||||
|
from .save_data_to_file import save_data_to_file
|
||||||
|
from .get_matched_datasets import get_matched_datasets
|
||||||
|
from .discover_directory_datasets import discover_directory_datasets
|
||||||
|
|
|
||||||
12
cognee/modules/ingestion/get_matched_datasets.py
Normal file
12
cognee/modules/ingestion/get_matched_datasets.py
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
from .discover_directory_datasets import discover_directory_datasets
|
||||||
|
|
||||||
|
def get_matched_datasets(data_path: str, dataset_name_to_match: str = None):
|
||||||
|
datasets = discover_directory_datasets(data_path)
|
||||||
|
|
||||||
|
matched_datasets = []
|
||||||
|
|
||||||
|
for dataset_name, dataset_files in datasets.items():
|
||||||
|
if dataset_name_to_match is None or dataset_name.startswith(dataset_name_to_match):
|
||||||
|
matched_datasets.append([dataset_name, dataset_files])
|
||||||
|
|
||||||
|
return matched_datasets
|
||||||
19
cognee/modules/ingestion/save_data_to_file.py
Normal file
19
cognee/modules/ingestion/save_data_to_file.py
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
from typing import BinaryIO, Union
|
||||||
|
from cognee.base_config import get_base_config
|
||||||
|
from cognee.infrastructure.files.storage import LocalStorage
|
||||||
|
from .classify import classify
|
||||||
|
|
||||||
|
def save_data_to_file(data: Union[str, BinaryIO], dataset_name: str, filename: str = None):
|
||||||
|
base_config = get_base_config()
|
||||||
|
data_directory_path = base_config.data_root_directory
|
||||||
|
|
||||||
|
classified_data = classify(data, filename)
|
||||||
|
|
||||||
|
storage_path = data_directory_path + "/" + dataset_name.replace(".", "/")
|
||||||
|
LocalStorage.ensure_directory_exists(storage_path)
|
||||||
|
|
||||||
|
file_metadata = classified_data.get_metadata()
|
||||||
|
file_name = file_metadata["name"]
|
||||||
|
LocalStorage(storage_path).store(file_name, classified_data.get_data())
|
||||||
|
|
||||||
|
return "file://" + storage_path + "/" + file_name
|
||||||
|
|
@ -14,7 +14,6 @@ async def search_neighbour(graph: Union[nx.Graph, any], query: str,
|
||||||
Parameters:
|
Parameters:
|
||||||
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
|
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
|
||||||
- id (str): The identifier of the node to match against.
|
- id (str): The identifier of the node to match against.
|
||||||
- infrastructure_config (Dict): Configuration that includes the graph engine type.
|
|
||||||
- other_param (dict, optional): A dictionary that may contain 'node_id' to specify the node.
|
- other_param (dict, optional): A dictionary that may contain 'node_id' to specify the node.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,6 @@ async def search_summary( query: str, graph: Union[nx.Graph, any]) -> Dict[str,
|
||||||
Parameters:
|
Parameters:
|
||||||
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
|
- graph (Union[nx.Graph, AsyncSession]): The graph object or Neo4j session.
|
||||||
- query (str): The query string to filter nodes by, e.g., 'SUMMARY'.
|
- query (str): The query string to filter nodes by, e.g., 'SUMMARY'.
|
||||||
- infrastructure_config (Dict): Configuration that includes the graph engine type.
|
|
||||||
- other_param (str, optional): An additional parameter, unused in this implementation but could be for future enhancements.
|
- other_param (str, optional): An additional parameter, unused in this implementation but could be for future enhancements.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,17 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
from.extraction.categorize_relevant_summary import categorize_relevant_summary
|
from .extraction.categorize_relevant_summary import categorize_relevant_summary
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def get_cognitive_layers(content: str, categories: List[Dict]):
|
async def get_cognitive_layers(content: str, categories: List[Dict]):
|
||||||
try:
|
try:
|
||||||
|
cognify_config = get_cognify_config()
|
||||||
return (await categorize_relevant_summary(
|
return (await categorize_relevant_summary(
|
||||||
content,
|
content,
|
||||||
categories[0],
|
categories[0],
|
||||||
infrastructure_config.get_config()["categorize_summary_model"]
|
cognify_config.summarization_model,
|
||||||
)).cognitive_layers
|
)).cognitive_layers
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
|
logger.error("Error extracting cognitive layers from content: %s", error, exc_info = True)
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,7 @@
|
||||||
from cognee.config import Config
|
|
||||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
from cognee.infrastructure.llm import get_llm_config
|
||||||
|
|
||||||
def get_settings():
|
def get_settings():
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
llm_config = get_llm_config()
|
llm_config = get_llm_config()
|
||||||
|
|
||||||
vector_dbs = [{
|
vector_dbs = [{
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,5 @@
|
||||||
import json
|
|
||||||
import logging
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from cognee.infrastructure.llm import get_llm_config
|
from cognee.infrastructure.llm import get_llm_config
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
|
|
||||||
class LLMConfig(BaseModel):
|
class LLMConfig(BaseModel):
|
||||||
apiKey: str
|
apiKey: str
|
||||||
|
|
@ -17,6 +14,3 @@ async def save_llm_config(new_llm_config: LLMConfig):
|
||||||
|
|
||||||
if "*****" not in new_llm_config.apiKey and len(new_llm_config.apiKey.strip()) > 0:
|
if "*****" not in new_llm_config.apiKey and len(new_llm_config.apiKey.strip()) > 0:
|
||||||
llm_config.llm_api_key = new_llm_config.apiKey
|
llm_config.llm_api_key = new_llm_config.apiKey
|
||||||
|
|
||||||
logging.error(json.dumps(llm_config.to_dict()))
|
|
||||||
infrastructure_config.llm_engine = None
|
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,14 @@
|
||||||
from typing import Type, List
|
from typing import Type
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from cognee.infrastructure.llm.prompts import read_query_prompt
|
from cognee.infrastructure.llm.prompts import read_query_prompt
|
||||||
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
from cognee.infrastructure.llm.get_llm_client import get_llm_client
|
||||||
|
|
||||||
|
|
||||||
async def extract_categories(content: str, response_model: Type[BaseModel]):
|
async def extract_topology(content: str, response_model: Type[BaseModel]):
|
||||||
llm_client = get_llm_client()
|
llm_client = get_llm_client()
|
||||||
|
|
||||||
system_prompt = read_query_prompt("extract_topology.txt")
|
system_prompt = read_query_prompt("extract_topology.txt")
|
||||||
|
|
||||||
llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model)
|
llm_output = await llm_client.acreate_structured_output(content, system_prompt, response_model)
|
||||||
|
|
||||||
return llm_output.model_dump()
|
return llm_output.model_dump()
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,15 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import List, Dict
|
from cognee.modules.topology.extraction.extract_topology import extract_topology
|
||||||
from cognee.infrastructure import infrastructure_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
from cognee.modules.topology.extraction.extract_topology import extract_categories
|
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
|
||||||
|
|
||||||
cognify_config = get_cognify_config()
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
async def infer_data_topology(content: str, graph_topology=None):
|
async def infer_data_topology(content: str, graph_topology=None):
|
||||||
if graph_topology is None:
|
if graph_topology is None:
|
||||||
graph_topology = cognify_config.graph_topology
|
graph_config = get_graph_config()
|
||||||
|
graph_topology = graph_config.graph_model
|
||||||
try:
|
try:
|
||||||
return (await extract_categories(
|
return (await extract_topology(
|
||||||
content,
|
content,
|
||||||
graph_topology
|
graph_topology
|
||||||
))
|
))
|
||||||
|
|
|
||||||
|
|
@ -2,13 +2,10 @@ import os
|
||||||
import glob
|
import glob
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from typing import Dict, List, Optional, Union, Type, Any, Tuple
|
from typing import Dict, List, Optional, Union, Type, Any, Tuple
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from cognee import config
|
from cognee import config
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
from cognee.modules.cognify.config import get_cognify_config
|
from cognee.modules.cognify.config import get_cognify_config
|
||||||
from cognee.modules.topology.infer_data_topology import infer_data_topology
|
|
||||||
cognify_config = get_cognify_config()
|
cognify_config = get_cognify_config()
|
||||||
base_config = get_base_config()
|
base_config = get_base_config()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,7 @@
|
||||||
from os import path
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
|
||||||
# ROOT_DIR = path.dirname(path.abspath(__file__))
|
|
||||||
#
|
|
||||||
# logging.debug("ROOT_DIR: ", ROOT_DIR)
|
|
||||||
#
|
|
||||||
# def get_absolute_path(path_from_root: str) -> str:
|
|
||||||
# logging.debug("abspath: ", path.abspath(path.join(ROOT_DIR, path_from_root)))
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# return path.abspath(path.join(ROOT_DIR, path_from_root))
|
|
||||||
ROOT_DIR = Path(__file__).resolve().parent
|
|
||||||
|
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
ROOT_DIR = Path(__file__).resolve().parent
|
||||||
logging.debug("ROOT_DIR: %s", ROOT_DIR)
|
|
||||||
|
|
||||||
def get_absolute_path(path_from_root: str) -> str:
|
def get_absolute_path(path_from_root: str) -> str:
|
||||||
absolute_path = ROOT_DIR / path_from_root
|
absolute_path = ROOT_DIR / path_from_root
|
||||||
logging.debug("abspath: %s", absolute_path.resolve())
|
return str(absolute_path.resolve())
|
||||||
return str(absolute_path.resolve())
|
|
||||||
|
|
|
||||||
|
|
@ -11,12 +11,7 @@ import matplotlib.pyplot as plt
|
||||||
import tiktoken
|
import tiktoken
|
||||||
import nltk
|
import nltk
|
||||||
from posthog import Posthog
|
from posthog import Posthog
|
||||||
|
from cognee.base_config import get_base_config
|
||||||
from cognee.config import Config
|
|
||||||
|
|
||||||
config = Config()
|
|
||||||
config.load()
|
|
||||||
|
|
||||||
|
|
||||||
def send_telemetry(event_name: str):
|
def send_telemetry(event_name: str):
|
||||||
if os.getenv("TELEMETRY_DISABLED"):
|
if os.getenv("TELEMETRY_DISABLED"):
|
||||||
|
|
@ -153,6 +148,7 @@ def generate_color_palette(unique_layers):
|
||||||
|
|
||||||
|
|
||||||
async def register_graphistry():
|
async def register_graphistry():
|
||||||
|
config = get_base_config()
|
||||||
graphistry.register(api = 3, username = config.graphistry_username, password = config.graphistry_password)
|
graphistry.register(api = 3, username = config.graphistry_username, password = config.graphistry_password)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2,7 +2,7 @@ from deepeval.dataset import EvaluationDataset
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
from typing import List, Type, Dict
|
from typing import List, Type
|
||||||
from deepeval.test_case import LLMTestCase
|
from deepeval.test_case import LLMTestCase
|
||||||
import dotenv
|
import dotenv
|
||||||
dotenv.load_dotenv()
|
dotenv.load_dotenv()
|
||||||
|
|
@ -41,7 +41,6 @@ print(dataset)
|
||||||
|
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
from cognee.infrastructure import infrastructure_config
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -81,10 +80,18 @@ async def run_cognify_base_rag():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
async def cognify_search_base_rag(content:str, context:str):
|
import os
|
||||||
infrastructure_config.set_config({"database_directory_path": "/Users/vasa/Projects/cognee/cognee/.cognee_system/databases/cognee.lancedb"})
|
from cognee.base_config import get_base_config
|
||||||
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
|
|
||||||
vector_client = infrastructure_config.get_config("vector_engine")
|
async def cognify_search_base_rag(content:str, context:str):
|
||||||
|
base_config = get_base_config()
|
||||||
|
|
||||||
|
cognee_directory_path = os.path.abspath(".cognee_system")
|
||||||
|
base_config.system_root_directory = cognee_directory_path
|
||||||
|
|
||||||
|
vector_config = get_vectordb_config()
|
||||||
|
vector_client = vector_config.vector_engine
|
||||||
|
|
||||||
return_ = await vector_client.search(collection_name="basic_rag", query_text=content, limit=10)
|
return_ = await vector_client.search(collection_name="basic_rag", query_text=content, limit=10)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -283,7 +283,7 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import cognee\n",
|
"import cognee\n",
|
||||||
"from cognee.utils import render_graph\n",
|
"from cognee.shared.utils import render_graph\n",
|
||||||
"\n",
|
"\n",
|
||||||
"graph = await cognee.cognify()\n",
|
"graph = await cognee.cognify()\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
|
||||||
|
|
@ -112,12 +112,11 @@
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"import graphistry\n",
|
"import graphistry\n",
|
||||||
"from cognee.config import Config\n",
|
"from cognee.shared.utils import render_graph\n",
|
||||||
"from cognee.utils import render_graph\n",
|
|
||||||
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
"from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client, GraphDBType\n",
|
||||||
|
"from cognee.base_config import get_base_config\n",
|
||||||
"\n",
|
"\n",
|
||||||
"config = Config()\n",
|
"config = get_base_config()\n",
|
||||||
"config.load()\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"graphistry.register(\n",
|
"graphistry.register(\n",
|
||||||
" api = 3,\n",
|
" api = 3,\n",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue