refactor: Don't use async lock if not needed (#1510)

## Description Reduce time PGVector is using async lock, by checking if collection exists before taking async lock ## Type of Change  - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable)  ## Pre-submission Checklist  - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-10-07 20:44:10 +02:00 · 2025-10-07 20:44:10 +02:00 · 840991c36f
commit 840991c36f
parent e7709f7bd0
1 changed files with 29 additions and 28 deletions
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@ -125,41 +125,42 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
        data_point_types = get_type_hints(DataPoint)
        vector_size = self.embedding_engine.get_vector_size()

-        async with self.VECTOR_DB_LOCK:
-            if not await self.has_collection(collection_name):
+        if not await self.has_collection(collection_name):
+            async with self.VECTOR_DB_LOCK:
+                if not await self.has_collection(collection_name):

-                class PGVectorDataPoint(Base):
-                    """
-                    Represent a point in a vector data space with associated data and vector representation.
+                    class PGVectorDataPoint(Base):
+                        """
+                        Represent a point in a vector data space with associated data and vector representation.

-                    This class inherits from Base and is associated with a database table defined by
-                    __tablename__. It maintains the following public methods and instance variables:
+                        This class inherits from Base and is associated with a database table defined by
+                        __tablename__. It maintains the following public methods and instance variables:

-                    - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance.
+                        - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance.

-                    Instance variables:
-                    - id: Identifier for the data point, defined by data_point_types.
-                    - payload: JSON data associated with the data point.
-                    - vector: Vector representation of the data point, with size defined by vector_size.
-                    """
+                        Instance variables:
+                        - id: Identifier for the data point, defined by data_point_types.
+                        - payload: JSON data associated with the data point.
+                        - vector: Vector representation of the data point, with size defined by vector_size.
+                        """

-                    __tablename__ = collection_name
-                    __table_args__ = {"extend_existing": True}
-                    # PGVector requires one column to be the primary key
-                    id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True)
-                    payload = Column(JSON)
-                    vector = Column(self.Vector(vector_size))
+                        __tablename__ = collection_name
+                        __table_args__ = {"extend_existing": True}
+                        # PGVector requires one column to be the primary key
+                        id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True)
+                        payload = Column(JSON)
+                        vector = Column(self.Vector(vector_size))

-                    def __init__(self, id, payload, vector):
-                        self.id = id
-                        self.payload = payload
-                        self.vector = vector
+                        def __init__(self, id, payload, vector):
+                            self.id = id
+                            self.payload = payload
+                            self.vector = vector

-                async with self.engine.begin() as connection:
-                    if len(Base.metadata.tables.keys()) > 0:
-                        await connection.run_sync(
-                            Base.metadata.create_all, tables=[PGVectorDataPoint.__table__]
-                        )
+                    async with self.engine.begin() as connection:
+                        if len(Base.metadata.tables.keys()) > 0:
+                            await connection.run_sync(
+                                Base.metadata.create_all, tables=[PGVectorDataPoint.__table__]
+                            )

    @retry(
        retry=retry_if_exception_type(DeadlockDetectedError),