From d9e558e8857fcc260433fdeff5964a1ab9364462 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 16 Dec 2024 11:02:50 +0100 Subject: [PATCH 1/3] fix: Resolve reflection issue when running cognee a second time after pruning data When running cognee a second time after pruning data some metadata doesn't get pruned. This makes cognee believe some tables exist that have been deleted Fix --- .../vector/pgvector/PGVectorAdapter.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 8faf1cd6d..a6b458cbd 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -2,7 +2,7 @@ import asyncio from uuid import UUID from typing import List, Optional, get_type_hints from sqlalchemy.orm import Mapped, mapped_column -from sqlalchemy import JSON, Column, Table, select, delete +from sqlalchemy import JSON, Column, Table, select, delete, MetaData from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker from cognee.exceptions import InvalidValueError @@ -48,10 +48,12 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): async def has_collection(self, collection_name: str) -> bool: async with self.engine.begin() as connection: - # Load the schema information into the MetaData object - await connection.run_sync(Base.metadata.reflect) + # Create a MetaData instance to load table information + metadata = MetaData() + # Load table information from schema into MetaData + await connection.run_sync(metadata.reflect) - if collection_name in Base.metadata.tables: + if collection_name in metadata.tables: return True else: return False @@ -145,10 +147,12 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): with an async engine. """ async with self.engine.begin() as connection: - # Load the schema information into the MetaData object - await connection.run_sync(Base.metadata.reflect) - if collection_name in Base.metadata.tables: - return Base.metadata.tables[collection_name] + # Create a MetaData instance to load table information + metadata = MetaData() + # Load table information from schema into MetaData + await connection.run_sync(metadata.reflect) + if collection_name in metadata.tables: + return metadata.tables[collection_name] else: raise EntityNotFoundError(message=f"Table '{collection_name}' not found.") From 394a0b2dfb9645e58ed31835e8eaec7c90970358 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 16 Dec 2024 11:26:33 +0100 Subject: [PATCH 2/3] fix: Add metadata reflection fix to sqlite as well Added fix when reflecting metadata to sqlite as well Fix --- .../relational/sqlalchemy/SqlAlchemyAdapter.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index 8041aeaea..b1e608059 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -113,10 +113,12 @@ class SQLAlchemyAdapter(): """ async with self.engine.begin() as connection: if self.engine.dialect.name == "sqlite": - # Load the schema information into the MetaData object - await connection.run_sync(Base.metadata.reflect) - if table_name in Base.metadata.tables: - return Base.metadata.tables[table_name] + # Create a MetaData instance to load table information + metadata = MetaData() + # Load table information from schema into MetaData + await connection.run_sync(metadata.reflect) + if table_name in metadata.tables: + return metadata.tables[table_name] else: raise EntityNotFoundError(message=f"Table '{table_name}' not found.") else: @@ -138,8 +140,11 @@ class SQLAlchemyAdapter(): table_names = [] async with self.engine.begin() as connection: if self.engine.dialect.name == "sqlite": - await connection.run_sync(Base.metadata.reflect) - for table in Base.metadata.tables: + # Create a MetaData instance to load table information + metadata = MetaData() + # Load table information from schema into MetaData + await connection.run_sync(metadata.reflect) + for table in metadata.tables: table_names.append(str(table)) else: schema_list = await self.get_schema_list() From 34b139af2665ab5274de6484980e77cfda2985c5 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 16 Dec 2024 13:19:21 +0100 Subject: [PATCH 3/3] Revert "fix: Add metadata reflection fix to sqlite as well" This reverts commit 394a0b2dfb9645e58ed31835e8eaec7c90970358. --- .../relational/sqlalchemy/SqlAlchemyAdapter.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index b1e608059..8041aeaea 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -113,12 +113,10 @@ class SQLAlchemyAdapter(): """ async with self.engine.begin() as connection: if self.engine.dialect.name == "sqlite": - # Create a MetaData instance to load table information - metadata = MetaData() - # Load table information from schema into MetaData - await connection.run_sync(metadata.reflect) - if table_name in metadata.tables: - return metadata.tables[table_name] + # Load the schema information into the MetaData object + await connection.run_sync(Base.metadata.reflect) + if table_name in Base.metadata.tables: + return Base.metadata.tables[table_name] else: raise EntityNotFoundError(message=f"Table '{table_name}' not found.") else: @@ -140,11 +138,8 @@ class SQLAlchemyAdapter(): table_names = [] async with self.engine.begin() as connection: if self.engine.dialect.name == "sqlite": - # Create a MetaData instance to load table information - metadata = MetaData() - # Load table information from schema into MetaData - await connection.run_sync(metadata.reflect) - for table in metadata.tables: + await connection.run_sync(Base.metadata.reflect) + for table in Base.metadata.tables: table_names.append(str(table)) else: schema_list = await self.get_schema_list()