From f79631d5da094a3b26895055584017ad3e07e9b0 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Thu, 6 Jun 2024 12:31:55 +0200 Subject: [PATCH] fix: allow alternative vector db engine to be used --- cognee-frontend/src/app/page.tsx | 2 +- cognee/api/client.py | 27 +++++--- cognee/api/v1/cognify/cognify.py | 11 ++- cognee/api/v1/config/config.py | 4 +- .../databases/vector/__init__.py | 3 +- .../infrastructure/databases/vector/config.py | 27 ++------ .../databases/vector/create_vector_engine.py | 3 - .../embeddings/DefaultEmbeddingEngine.py | 69 ------------------- .../embeddings/FastembedEmbeddingEngine.py | 25 +++++++ .../embeddings/LiteLLMEmbeddingEngine.py | 39 +++++++++++ .../databases/vector/embeddings/__init__.py | 1 + .../databases/vector/embeddings/config.py | 5 +- .../vector/embeddings/get_embedding_engine.py | 7 ++ .../databases/vector/get_vector_engine.py | 6 ++ .../graph/add_cognitive_layer_graphs.py | 15 ++-- .../modules/cognify/graph/add_data_chunks.py | 17 ++--- .../modules/cognify/graph/add_label_nodes.py | 11 ++- .../llm/resolve_cross_graph_references.py | 5 +- cognee/modules/data/deletion/prune_system.py | 7 +- .../search/vector/search_similarity.py | 6 +- cognee/modules/settings/get_settings.py | 2 +- .../modules/settings/save_vector_db_config.py | 1 - cognee/modules/tasks/get_task_status.py | 4 +- evals/simple_rag_vs_cognee_eval.py | 9 ++- 24 files changed, 143 insertions(+), 163 deletions(-) delete mode 100644 cognee/infrastructure/databases/vector/embeddings/DefaultEmbeddingEngine.py create mode 100644 cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py create mode 100644 cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py create mode 100644 cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py create mode 100644 cognee/infrastructure/databases/vector/get_vector_engine.py diff --git a/cognee-frontend/src/app/page.tsx b/cognee-frontend/src/app/page.tsx index c6a1ce5c7..20676c15e 100644 --- a/cognee-frontend/src/app/page.tsx +++ b/cognee-frontend/src/app/page.tsx @@ -112,7 +112,7 @@ export default function Home() { expireIn={notification.expireIn} onClose={notification.delete} > - {notification.message} + {notification.message} ))} diff --git a/cognee/api/client.py b/cognee/api/client.py index 29116c5d9..b3bde178e 100644 --- a/cognee/api/client.py +++ b/cognee/api/client.py @@ -72,15 +72,21 @@ async def get_dataset_graph(dataset_id: str): from cognee.infrastructure.databases.graph import get_graph_config from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client - graph_config = get_graph_config() - graph_engine = graph_config.graph_engine - graph_client = await get_graph_client(graph_engine) - graph_url = await render_graph(graph_client.graph) + try: + graph_config = get_graph_config() + graph_engine = graph_config.graph_engine + graph_client = await get_graph_client(graph_engine) + graph_url = await render_graph(graph_client.graph) - return JSONResponse( - status_code = 200, - content = str(graph_url), - ) + return JSONResponse( + status_code = 200, + content = str(graph_url), + ) + except: + return JSONResponse( + status_code = 409, + content = "Graphistry credentials are not set. Please set them in your .env file.", + ) @app.get("/datasets/{dataset_id}/data", response_model=list) async def get_dataset_data(dataset_id: str): @@ -106,7 +112,7 @@ async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset return JSONResponse( status_code = 200, - content = { dataset["data_id"]: dataset["status"] for dataset in datasets_statuses }, + content = datasets_statuses, ) @app.get("/datasets/{dataset_id}/data/{data_id}/raw", response_class=FileResponse) @@ -264,8 +270,7 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000): relational_config.create_engine() vector_config = get_vectordb_config() - vector_config.vector_db_path = databases_directory_path - vector_config.create_engine() + vector_config.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb") base_config = get_base_config() data_directory_path = os.path.abspath(".data_storage") diff --git a/cognee/api/v1/cognify/cognify.py b/cognee/api/v1/cognify/cognify.py index 7c4122eb8..b60d8f432 100644 --- a/cognee/api/v1/cognify/cognify.py +++ b/cognee/api/v1/cognify/cognify.py @@ -61,14 +61,19 @@ async def cognify(datasets: Union[str, List[str]] = None): async with update_status_lock: task_status = get_task_status([dataset_name]) - if task_status == "DATASET_PROCESSING_STARTED": + if dataset_name in task_status and task_status[dataset_name] == "DATASET_PROCESSING_STARTED": logger.info(f"Dataset {dataset_name} is being processed.") return update_task_status(dataset_name, "DATASET_PROCESSING_STARTED") - await cognify(dataset_name) - update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED") + try: + await cognify(dataset_name) + update_task_status(dataset_name, "DATASET_PROCESSING_FINISHED") + except Exception as error: + update_task_status(dataset_name, "DATASET_PROCESSING_ERROR") + raise error + # datasets is a list of dataset names if isinstance(datasets, list): diff --git a/cognee/api/v1/config/config.py b/cognee/api/v1/config/config.py index 18fc0155d..bc4742ae3 100644 --- a/cognee/api/v1/config/config.py +++ b/cognee/api/v1/config/config.py @@ -17,8 +17,8 @@ class config(): relational_config.create_engine() vector_config = get_vectordb_config() - vector_config.vector_db_path = databases_directory_path - vector_config.create_engine() + if vector_config.vector_engine_provider == "lancedb": + vector_config.vector_db_url = os.path.join(databases_directory_path, "cognee.lancedb") @staticmethod def data_root_directory(data_root_directory: str): diff --git a/cognee/infrastructure/databases/vector/__init__.py b/cognee/infrastructure/databases/vector/__init__.py index 58af4b5fc..604170f1d 100644 --- a/cognee/infrastructure/databases/vector/__init__.py +++ b/cognee/infrastructure/databases/vector/__init__.py @@ -2,4 +2,5 @@ from .models.DataPoint import DataPoint from .models.VectorConfig import VectorConfig from .models.CollectionConfig import CollectionConfig from .vector_db_interface import VectorDBInterface -from .config import get_vectordb_config \ No newline at end of file +from .config import get_vectordb_config +from .get_vector_engine import get_vector_engine diff --git a/cognee/infrastructure/databases/vector/config.py b/cognee/infrastructure/databases/vector/config.py index de994696d..8137a067c 100644 --- a/cognee/infrastructure/databases/vector/config.py +++ b/cognee/infrastructure/databases/vector/config.py @@ -1,37 +1,18 @@ import os from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict -from cognee.infrastructure.databases.vector.embeddings.config import get_embedding_config from cognee.root_dir import get_absolute_path -from .create_vector_engine import create_vector_engine class VectorConfig(BaseSettings): - vector_db_path: str = os.path.join(get_absolute_path(".cognee_system"), "databases") - vector_db_url: str = os.path.join(vector_db_path, "cognee.lancedb") + vector_db_url: str = os.path.join( + os.path.join(get_absolute_path(".cognee_system"), "databases"), + "cognee.lancedb" + ) vector_db_key: str = "" vector_engine_provider: str = "lancedb" - vector_engine: object = create_vector_engine( - { - "vector_db_key": None, - "vector_db_url": vector_db_url, - "vector_db_provider": "lancedb", - }, - get_embedding_config().embedding_engine, - ) model_config = SettingsConfigDict(env_file = ".env", extra = "allow") - def create_engine(self): - if self.vector_engine_provider == "lancedb": - self.vector_db_url = os.path.join(self.vector_db_path, "cognee.lancedb") - else: - self.vector_db_path = None - - self.vector_engine = create_vector_engine( - get_vectordb_config().to_dict(), - get_embedding_config().embedding_engine, - ) - def to_dict(self) -> dict: return { "vector_db_url": self.vector_db_url, diff --git a/cognee/infrastructure/databases/vector/create_vector_engine.py b/cognee/infrastructure/databases/vector/create_vector_engine.py index 6788ae818..62323c14a 100644 --- a/cognee/infrastructure/databases/vector/create_vector_engine.py +++ b/cognee/infrastructure/databases/vector/create_vector_engine.py @@ -28,9 +28,6 @@ def create_vector_engine(config: VectorConfig, embedding_engine): ) else: from .lancedb.LanceDBAdapter import LanceDBAdapter - # from cognee.infrastructure.files.storage import LocalStorage - - # LocalStorage.ensure_directory_exists(config["vector_db_url"]) return LanceDBAdapter( url = config["vector_db_url"], diff --git a/cognee/infrastructure/databases/vector/embeddings/DefaultEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/DefaultEmbeddingEngine.py deleted file mode 100644 index 75fe0e18c..000000000 --- a/cognee/infrastructure/databases/vector/embeddings/DefaultEmbeddingEngine.py +++ /dev/null @@ -1,69 +0,0 @@ -import asyncio -from typing import List, Optional -from fastembed import TextEmbedding -import litellm -from litellm import aembedding -from cognee.root_dir import get_absolute_path -from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine - -litellm.set_verbose = True - -class DefaultEmbeddingEngine(EmbeddingEngine): - embedding_model: str - embedding_dimensions: int - - def __init__( - self, - embedding_model: Optional[str], - embedding_dimensions: Optional[int], - ): - self.embedding_model = embedding_model - self.embedding_dimensions = embedding_dimensions - - async def embed_text(self, text: List[str]) -> List[float]: - embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings")) - embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text))) - - return embeddings_list - - def get_vector_size(self) -> int: - return self.embedding_dimensions - - -class LiteLLMEmbeddingEngine(EmbeddingEngine): - embedding_model: str - embedding_dimensions: int - - def __init__( - self, - embedding_model: Optional[str], - embedding_dimensions: Optional[int], - ): - self.embedding_model = embedding_model - self.embedding_dimensions = embedding_dimensions - - async def embed_text(self, text: List[str]) -> List[List[float]]: - async def get_embedding(text_): - response = await aembedding(self.embedding_model, input=text_) - return response.data[0]['embedding'] - - tasks = [get_embedding(text_) for text_ in text] - result = await asyncio.gather(*tasks) - return result - - def get_vector_size(self) -> int: - return self.embedding_dimensions - - -# if __name__ == "__main__": -# async def gg(): -# openai_embedding_engine = LiteLLMEmbeddingEngine() -# # print(openai_embedding_engine.embed_text(["Hello, how are you?"])) -# # print(openai_embedding_engine.get_vector_size()) -# # default_embedding_engine = DefaultEmbeddingEngine() -# sds = await openai_embedding_engine.embed_text(["Hello, sadasdas are you?"]) -# print(sds) -# # print(default_embedding_engine.get_vector_size()) - -# asyncio.run(gg()) - diff --git a/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py new file mode 100644 index 000000000..3645ae28d --- /dev/null +++ b/cognee/infrastructure/databases/vector/embeddings/FastembedEmbeddingEngine.py @@ -0,0 +1,25 @@ +from typing import List, Optional +from fastembed import TextEmbedding +from cognee.root_dir import get_absolute_path +from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine + +class FastembedEmbeddingEngine(EmbeddingEngine): + embedding_model: str + embedding_dimensions: int + + def __init__( + self, + embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5", + embedding_dimensions: Optional[int] = 1024, + ): + self.embedding_model = embedding_model + self.embedding_dimensions = embedding_dimensions + + async def embed_text(self, text: List[str]) -> List[float]: + embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings")) + embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text))) + + return embeddings_list + + def get_vector_size(self) -> int: + return self.embedding_dimensions diff --git a/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py new file mode 100644 index 000000000..0f0e7bd1a --- /dev/null +++ b/cognee/infrastructure/databases/vector/embeddings/LiteLLMEmbeddingEngine.py @@ -0,0 +1,39 @@ +import asyncio +from typing import List, Optional +import litellm +from litellm import aembedding +from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine + +litellm.set_verbose = True + +class LiteLLMEmbeddingEngine(EmbeddingEngine): + api_key: str + embedding_model: str + embedding_dimensions: int + + def __init__( + self, + embedding_model: Optional[str] = "text-embedding-3-large", + embedding_dimensions: Optional[int] = 3072, + api_key: str = None, + ): + self.api_key = api_key + self.embedding_model = embedding_model + self.embedding_dimensions = embedding_dimensions + + async def embed_text(self, text: List[str]) -> List[List[float]]: + async def get_embedding(text_): + response = await aembedding( + self.embedding_model, + input = text_, + api_key = self.api_key + ) + + return response.data[0]["embedding"] + + tasks = [get_embedding(text_) for text_ in text] + result = await asyncio.gather(*tasks) + return result + + def get_vector_size(self) -> int: + return self.embedding_dimensions diff --git a/cognee/infrastructure/databases/vector/embeddings/__init__.py b/cognee/infrastructure/databases/vector/embeddings/__init__.py index e69de29bb..5ab672743 100644 --- a/cognee/infrastructure/databases/vector/embeddings/__init__.py +++ b/cognee/infrastructure/databases/vector/embeddings/__init__.py @@ -0,0 +1 @@ +from .get_embedding_engine import get_embedding_engine diff --git a/cognee/infrastructure/databases/vector/embeddings/config.py b/cognee/infrastructure/databases/vector/embeddings/config.py index 9837fdd03..8c03d389b 100644 --- a/cognee/infrastructure/databases/vector/embeddings/config.py +++ b/cognee/infrastructure/databases/vector/embeddings/config.py @@ -1,15 +1,12 @@ from functools import lru_cache from pydantic_settings import BaseSettings, SettingsConfigDict -from cognee.infrastructure.databases.vector.embeddings.DefaultEmbeddingEngine import DefaultEmbeddingEngine - - class EmbeddingConfig(BaseSettings): openai_embedding_model: str = "text-embedding-3-large" openai_embedding_dimensions: int = 3072 litellm_embedding_model: str = "BAAI/bge-large-en-v1.5" litellm_embedding_dimensions: int = 1024 - embedding_engine:object = DefaultEmbeddingEngine(embedding_model=litellm_embedding_model, embedding_dimensions=litellm_embedding_dimensions) + # embedding_engine:object = DefaultEmbeddingEngine(embedding_model=litellm_embedding_model, embedding_dimensions=litellm_embedding_dimensions) model_config = SettingsConfigDict(env_file = ".env", extra = "allow") diff --git a/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py new file mode 100644 index 000000000..a82876ef8 --- /dev/null +++ b/cognee/infrastructure/databases/vector/embeddings/get_embedding_engine.py @@ -0,0 +1,7 @@ +from cognee.infrastructure.llm import get_llm_config +from .EmbeddingEngine import EmbeddingEngine +from .LiteLLMEmbeddingEngine import LiteLLMEmbeddingEngine + +def get_embedding_engine() -> EmbeddingEngine: + llm_config = get_llm_config() + return LiteLLMEmbeddingEngine(api_key = llm_config.llm_api_key) diff --git a/cognee/infrastructure/databases/vector/get_vector_engine.py b/cognee/infrastructure/databases/vector/get_vector_engine.py new file mode 100644 index 000000000..8056c9e36 --- /dev/null +++ b/cognee/infrastructure/databases/vector/get_vector_engine.py @@ -0,0 +1,6 @@ +from .config import get_vectordb_config +from .embeddings import get_embedding_engine +from .create_vector_engine import create_vector_engine + +def get_vector_engine(): + return create_vector_engine(get_vectordb_config().to_dict(), get_embedding_engine()) diff --git a/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py b/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py index 354d76f59..d5affc65a 100644 --- a/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py +++ b/cognee/modules/cognify/graph/add_cognitive_layer_graphs.py @@ -2,11 +2,9 @@ from datetime import datetime from uuid import uuid4 from typing import List, Tuple, TypedDict from pydantic import BaseModel -from cognee.infrastructure.databases.vector import DataPoint - -# from cognee.shared.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader +from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine from cognee.infrastructure.databases.graph.config import get_graph_config -from cognee.infrastructure.databases.vector.config import get_vectordb_config +# from cognee.shared.utils import extract_pos_tags, extract_named_entities, extract_sentiment_vader class GraphLike(TypedDict): @@ -20,8 +18,7 @@ async def add_cognitive_layer_graphs( chunk_id: str, layer_graphs: List[Tuple[str, GraphLike]], ): - vectordb_config = get_vectordb_config() - vector_client = vectordb_config.vector_engine + vector_engine = get_vector_engine() graph_config = get_graph_config() graph_model = graph_config.graph_model @@ -127,7 +124,7 @@ async def add_cognitive_layer_graphs( references: References try: - await vector_client.create_collection(layer_id, payload_schema = PayloadSchema) + await vector_engine.create_collection(layer_id, payload_schema = PayloadSchema) except Exception: # It's ok if the collection already exists. pass @@ -146,8 +143,8 @@ async def add_cognitive_layer_graphs( ) for (node_id, node_data) in graph_nodes ] - await vector_client.create_data_points(layer_id, data_points) + await vector_engine.create_data_points(layer_id, data_points) def generate_node_id(node_id: str) -> str: - return node_id.upper().replace(' ', '_').replace("'", "") + return node_id.upper().replace(" ", "_").replace("'", "") diff --git a/cognee/modules/cognify/graph/add_data_chunks.py b/cognee/modules/cognify/graph/add_data_chunks.py index ead109a9f..48f06d907 100644 --- a/cognee/modules/cognify/graph/add_data_chunks.py +++ b/cognee/modules/cognify/graph/add_data_chunks.py @@ -1,8 +1,7 @@ from typing import TypedDict from pydantic import BaseModel, Field -from cognee.infrastructure.databases.vector.config import get_vectordb_config -from cognee.infrastructure.databases.vector import DataPoint +from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine class TextChunk(TypedDict): text: str @@ -10,8 +9,7 @@ class TextChunk(TypedDict): file_metadata: dict async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]): - config = get_vectordb_config() - vector_client = config.vector_engine + vector_engine = get_vector_engine() identified_chunks = [] @@ -21,7 +19,7 @@ async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]): for (dataset_name, chunks) in dataset_data_chunks.items(): try: - await vector_client.create_collection(dataset_name, payload_schema = PayloadSchema) + await vector_engine.create_collection(dataset_name, payload_schema = PayloadSchema) except Exception as error: print(error) pass @@ -38,7 +36,7 @@ async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]): identified_chunks.extend(dataset_chunks) - await vector_client.create_data_points( + await vector_engine.create_data_points( dataset_name, [ DataPoint[PayloadSchema]( @@ -53,8 +51,7 @@ async def add_data_chunks(dataset_data_chunks: dict[str, list[TextChunk]]): async def add_data_chunks_basic_rag(dataset_data_chunks: dict[str, list[TextChunk]]): - config = get_vectordb_config() - vector_client = config.vector_engine + vector_engine = get_vector_engine() identified_chunks = [] @@ -64,7 +61,7 @@ async def add_data_chunks_basic_rag(dataset_data_chunks: dict[str, list[TextChun for (dataset_name, chunks) in dataset_data_chunks.items(): try: - await vector_client.create_collection("basic_rag", payload_schema = PayloadSchema) + await vector_engine.create_collection("basic_rag", payload_schema = PayloadSchema) except Exception as error: print(error) @@ -80,7 +77,7 @@ async def add_data_chunks_basic_rag(dataset_data_chunks: dict[str, list[TextChun identified_chunks.extend(dataset_chunks) - await vector_client.create_data_points( + await vector_engine.create_data_points( "basic_rag", [ DataPoint[PayloadSchema]( diff --git a/cognee/modules/cognify/graph/add_label_nodes.py b/cognee/modules/cognify/graph/add_label_nodes.py index 754516874..4739e47e6 100644 --- a/cognee/modules/cognify/graph/add_label_nodes.py +++ b/cognee/modules/cognify/graph/add_label_nodes.py @@ -2,13 +2,10 @@ from uuid import uuid4 from typing import List from datetime import datetime from pydantic import BaseModel - -from cognee.infrastructure.databases.vector import DataPoint -from cognee.infrastructure.databases.vector.config import get_vectordb_config +from cognee.infrastructure.databases.vector import DataPoint, get_vector_engine async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str]) -> None: - vectordb_config = get_vectordb_config() - vector_client = vectordb_config.vector_engine + vector_engine = get_vector_engine() keyword_nodes = [] @@ -62,9 +59,9 @@ async def add_label_nodes(graph_client, parent_node_id: str, keywords: List[str] ] try: - await vector_client.create_collection(parent_node_id, payload_schema = PayloadSchema) + await vector_engine.create_collection(parent_node_id, payload_schema = PayloadSchema) except Exception as e: # It's ok if the collection already exists. print(e) - await vector_client.create_data_points(parent_node_id, keyword_data_points) \ No newline at end of file + await vector_engine.create_data_points(parent_node_id, keyword_data_points) diff --git a/cognee/modules/cognify/llm/resolve_cross_graph_references.py b/cognee/modules/cognify/llm/resolve_cross_graph_references.py index 44644bd2a..b7ac3bb6d 100644 --- a/cognee/modules/cognify/llm/resolve_cross_graph_references.py +++ b/cognee/modules/cognify/llm/resolve_cross_graph_references.py @@ -1,5 +1,5 @@ from typing import Dict, List -from cognee.infrastructure.databases.vector.config import get_vectordb_config +from cognee.infrastructure.databases.vector import get_vector_engine async def resolve_cross_graph_references(nodes_by_layer: Dict): results = [] @@ -16,8 +16,7 @@ async def resolve_cross_graph_references(nodes_by_layer: Dict): return results async def get_nodes_by_layer(layer_id: str, layer_nodes: List): - vectordb_config = get_vectordb_config() - vector_engine = vectordb_config.vector_engine + vector_engine = get_vector_engine() score_points = await vector_engine.batch_search( layer_id, diff --git a/cognee/modules/data/deletion/prune_system.py b/cognee/modules/data/deletion/prune_system.py index 09ee2b3ea..3fbce112c 100644 --- a/cognee/modules/data/deletion/prune_system.py +++ b/cognee/modules/data/deletion/prune_system.py @@ -1,5 +1,5 @@ +from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.databases.graph.config import get_graph_config -from cognee.infrastructure.databases.vector.config import get_vectordb_config from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client async def prune_system(graph = True, vector = True): @@ -9,6 +9,5 @@ async def prune_system(graph = True, vector = True): await graph_client.delete_graph() if vector: - vector_config = get_vectordb_config() - vector_client = vector_config.vector_engine - await vector_client.prune() + vector_engine = get_vector_engine() + await vector_engine.prune() diff --git a/cognee/modules/search/vector/search_similarity.py b/cognee/modules/search/vector/search_similarity.py index 57164dc8e..17c0e81e4 100644 --- a/cognee/modules/search/vector/search_similarity.py +++ b/cognee/modules/search/vector/search_similarity.py @@ -1,6 +1,6 @@ from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client from cognee.infrastructure.databases.graph.config import get_graph_config -from cognee.infrastructure.databases.vector.config import get_vectordb_config +from cognee.infrastructure.databases.vector import get_vector_engine async def search_similarity(query: str, graph): graph_config = get_graph_config() @@ -17,10 +17,8 @@ async def search_similarity(query: str, graph): graph_nodes = [] - vector_config = get_vectordb_config() - for layer_id in unique_layer_uuids: - vector_engine = vector_config.vector_engine + vector_engine = get_vector_engine() results = await vector_engine.search(layer_id, query_text = query, limit = 10) print("results", results) diff --git a/cognee/modules/settings/get_settings.py b/cognee/modules/settings/get_settings.py index fc043a4e2..2cc4650ac 100644 --- a/cognee/modules/settings/get_settings.py +++ b/cognee/modules/settings/get_settings.py @@ -40,7 +40,7 @@ def get_settings(): "value": llm_config.llm_model, "label": llm_config.llm_model, } if llm_config.llm_model else None, - "apiKey": llm_config.llm_api_key[:-10] + "**********" if llm_config.llm_api_key else None, + "apiKey": (llm_config.llm_api_key[:-10] + "**********") if llm_config.llm_api_key else None, "providers": llm_providers, "models": { "openai": [{ diff --git a/cognee/modules/settings/save_vector_db_config.py b/cognee/modules/settings/save_vector_db_config.py index 884bfe136..bef5f4701 100644 --- a/cognee/modules/settings/save_vector_db_config.py +++ b/cognee/modules/settings/save_vector_db_config.py @@ -13,4 +13,3 @@ async def save_vector_db_config(vector_db_config: VectorDBConfig): vector_config.vector_db_url = vector_db_config.url vector_config.vector_db_key = vector_db_config.apiKey vector_config.vector_engine_provider = vector_db_config.provider - vector_config.create_engine() diff --git a/cognee/modules/tasks/get_task_status.py b/cognee/modules/tasks/get_task_status.py index d2917687a..863ada164 100644 --- a/cognee/modules/tasks/get_task_status.py +++ b/cognee/modules/tasks/get_task_status.py @@ -6,7 +6,7 @@ def get_task_status(data_ids: [str]): formatted_data_ids = ", ".join([f"'{data_id}'" for data_id in data_ids]) - results = db_engine.execute_query( + datasets_statuses = db_engine.execute_query( f"""SELECT data_id, status FROM ( SELECT data_id, status, ROW_NUMBER() OVER (PARTITION BY data_id ORDER BY created_at DESC) as rn @@ -16,4 +16,4 @@ def get_task_status(data_ids: [str]): WHERE rn = 1;""" ) - return results[0] if len(results) > 0 else None + return { dataset["data_id"]: dataset["status"] for dataset in datasets_statuses } diff --git a/evals/simple_rag_vs_cognee_eval.py b/evals/simple_rag_vs_cognee_eval.py index 95f69bc1e..d476aaef5 100644 --- a/evals/simple_rag_vs_cognee_eval.py +++ b/evals/simple_rag_vs_cognee_eval.py @@ -82,18 +82,17 @@ async def run_cognify_base_rag(): import os from cognee.base_config import get_base_config -from cognee.infrastructure.databases.vector import get_vectordb_config +from cognee.infrastructure.databases.vector import get_vector_engine async def cognify_search_base_rag(content:str, context:str): base_config = get_base_config() - + cognee_directory_path = os.path.abspath(".cognee_system") base_config.system_root_directory = cognee_directory_path - vector_config = get_vectordb_config() - vector_client = vector_config.vector_engine + vector_engine = get_vector_engine() - return_ = await vector_client.search(collection_name="basic_rag", query_text=content, limit=10) + return_ = await vector_engine.search(collection_name="basic_rag", query_text=content, limit=10) print("results", return_) return return_