fix: fixes pgvector deadlock

2025-10-07 19:19:32 +02:00 · 2025-10-07 19:19:32 +02:00 · b288e5d7a0
commit b288e5d7a0
parent 7ec1c75bee
1 changed files with 29 additions and 28 deletions
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@ -125,41 +125,42 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
        data_point_types = get_type_hints(DataPoint)
        vector_size = self.embedding_engine.get_vector_size()

-        async with self.VECTOR_DB_LOCK:
-            if not await self.has_collection(collection_name):
+        if not await self.has_collection(collection_name):
+            async with self.VECTOR_DB_LOCK:
+                if not await self.has_collection(collection_name):

-                class PGVectorDataPoint(Base):
-                    """
-                    Represent a point in a vector data space with associated data and vector representation.
+                    class PGVectorDataPoint(Base):
+                        """
+                        Represent a point in a vector data space with associated data and vector representation.

-                    This class inherits from Base and is associated with a database table defined by
-                    __tablename__. It maintains the following public methods and instance variables:
+                        This class inherits from Base and is associated with a database table defined by
+                        __tablename__. It maintains the following public methods and instance variables:

-                    - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance.
+                        - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance.

-                    Instance variables:
-                    - id: Identifier for the data point, defined by data_point_types.
-                    - payload: JSON data associated with the data point.
-                    - vector: Vector representation of the data point, with size defined by vector_size.
-                    """
+                        Instance variables:
+                        - id: Identifier for the data point, defined by data_point_types.
+                        - payload: JSON data associated with the data point.
+                        - vector: Vector representation of the data point, with size defined by vector_size.
+                        """

-                    __tablename__ = collection_name
-                    __table_args__ = {"extend_existing": True}
-                    # PGVector requires one column to be the primary key
-                    id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True)
-                    payload = Column(JSON)
-                    vector = Column(self.Vector(vector_size))
+                        __tablename__ = collection_name
+                        __table_args__ = {"extend_existing": True}
+                        # PGVector requires one column to be the primary key
+                        id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True)
+                        payload = Column(JSON)
+                        vector = Column(self.Vector(vector_size))

-                    def __init__(self, id, payload, vector):
-                        self.id = id
-                        self.payload = payload
-                        self.vector = vector
+                        def __init__(self, id, payload, vector):
+                            self.id = id
+                            self.payload = payload
+                            self.vector = vector

-                async with self.engine.begin() as connection:
-                    if len(Base.metadata.tables.keys()) > 0:
-                        await connection.run_sync(
-                            Base.metadata.create_all, tables=[PGVectorDataPoint.__table__]
-                        )
+                    async with self.engine.begin() as connection:
+                        if len(Base.metadata.tables.keys()) > 0:
+                            await connection.run_sync(
+                                Base.metadata.create_all, tables=[PGVectorDataPoint.__table__]
+                            )

    @retry(
        retry=retry_if_exception_type(DeadlockDetectedError),