fix: Resolve reflection issue when running cognee a second time after pruning data

When running cognee a second time after pruning data some metadata doesn't get pruned.
This makes cognee believe some tables exist that have been deleted

Fix
This commit is contained in:
Igor Ilic 2024-12-16 11:02:50 +01:00
parent 2f2aa81194
commit d9e558e885

View file

@ -2,7 +2,7 @@ import asyncio
from uuid import UUID from uuid import UUID
from typing import List, Optional, get_type_hints from typing import List, Optional, get_type_hints
from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import JSON, Column, Table, select, delete from sqlalchemy import JSON, Column, Table, select, delete, MetaData
from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker
from cognee.exceptions import InvalidValueError from cognee.exceptions import InvalidValueError
@ -48,10 +48,12 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
async def has_collection(self, collection_name: str) -> bool: async def has_collection(self, collection_name: str) -> bool:
async with self.engine.begin() as connection: async with self.engine.begin() as connection:
# Load the schema information into the MetaData object # Create a MetaData instance to load table information
await connection.run_sync(Base.metadata.reflect) metadata = MetaData()
# Load table information from schema into MetaData
await connection.run_sync(metadata.reflect)
if collection_name in Base.metadata.tables: if collection_name in metadata.tables:
return True return True
else: else:
return False return False
@ -145,10 +147,12 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
with an async engine. with an async engine.
""" """
async with self.engine.begin() as connection: async with self.engine.begin() as connection:
# Load the schema information into the MetaData object # Create a MetaData instance to load table information
await connection.run_sync(Base.metadata.reflect) metadata = MetaData()
if collection_name in Base.metadata.tables: # Load table information from schema into MetaData
return Base.metadata.tables[collection_name] await connection.run_sync(metadata.reflect)
if collection_name in metadata.tables:
return metadata.tables[collection_name]
else: else:
raise EntityNotFoundError(message=f"Table '{collection_name}' not found.") raise EntityNotFoundError(message=f"Table '{collection_name}' not found.")