From 840991c36f189170ba28174327b9dfaff35eec2a Mon Sep 17 00:00:00 2001
From: Igor Ilic <30923996+dexters1@users.noreply.github.com>
Date: Tue, 7 Oct 2025 20:44:10 +0200
Subject: [PATCH] refactor: Don't use async lock if not needed (#1510)

<!-- .github/pull_request_template.md -->

## Description
Reduce time PGVector is using async lock, by checking if collection
exists before taking async lock

## Type of Change
<!-- Please check the relevant option -->
- [ ] Bug fix (non-breaking change that fixes an issue)
- [ ] New feature (non-breaking change that adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to change)
- [ ] Documentation update
- [ ] Code refactoring
- [ ] Performance improvement
- [ ] Other (please specify):

## Screenshots/Videos (if applicable)
<!-- Add screenshots or videos to help explain your changes -->

## Pre-submission Checklist
<!-- Please check all boxes that apply before submitting your PR -->
- [ ] **I have tested my changes thoroughly before submitting this PR**
- [ ] **This PR contains minimal changes necessary to address the
issue/feature**
- [ ] My code follows the project's coding standards and style
guidelines
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added necessary documentation (if applicable)
- [ ] All new and existing tests pass
- [ ] I have searched existing PRs to ensure this change hasn't been
submitted already
- [ ] I have linked any relevant issues in the description
- [ ] My commits have clear and descriptive messages

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.
---
 .../vector/pgvector/PGVectorAdapter.py        | 57 ++++++++++---------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
index 5ddb656a3..1986fae48 100644
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@@ -125,41 +125,42 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
         data_point_types = get_type_hints(DataPoint)
         vector_size = self.embedding_engine.get_vector_size()
 
-        async with self.VECTOR_DB_LOCK:
-            if not await self.has_collection(collection_name):
+        if not await self.has_collection(collection_name):
+            async with self.VECTOR_DB_LOCK:
+                if not await self.has_collection(collection_name):
 
-                class PGVectorDataPoint(Base):
-                    """
-                    Represent a point in a vector data space with associated data and vector representation.
+                    class PGVectorDataPoint(Base):
+                        """
+                        Represent a point in a vector data space with associated data and vector representation.
 
-                    This class inherits from Base and is associated with a database table defined by
-                    __tablename__. It maintains the following public methods and instance variables:
+                        This class inherits from Base and is associated with a database table defined by
+                        __tablename__. It maintains the following public methods and instance variables:
 
-                    - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance.
+                        - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance.
 
-                    Instance variables:
-                    - id: Identifier for the data point, defined by data_point_types.
-                    - payload: JSON data associated with the data point.
-                    - vector: Vector representation of the data point, with size defined by vector_size.
-                    """
+                        Instance variables:
+                        - id: Identifier for the data point, defined by data_point_types.
+                        - payload: JSON data associated with the data point.
+                        - vector: Vector representation of the data point, with size defined by vector_size.
+                        """
 
-                    __tablename__ = collection_name
-                    __table_args__ = {"extend_existing": True}
-                    # PGVector requires one column to be the primary key
-                    id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True)
-                    payload = Column(JSON)
-                    vector = Column(self.Vector(vector_size))
+                        __tablename__ = collection_name
+                        __table_args__ = {"extend_existing": True}
+                        # PGVector requires one column to be the primary key
+                        id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True)
+                        payload = Column(JSON)
+                        vector = Column(self.Vector(vector_size))
 
-                    def __init__(self, id, payload, vector):
-                        self.id = id
-                        self.payload = payload
-                        self.vector = vector
+                        def __init__(self, id, payload, vector):
+                            self.id = id
+                            self.payload = payload
+                            self.vector = vector
 
-                async with self.engine.begin() as connection:
-                    if len(Base.metadata.tables.keys()) > 0:
-                        await connection.run_sync(
-                            Base.metadata.create_all, tables=[PGVectorDataPoint.__table__]
-                        )
+                    async with self.engine.begin() as connection:
+                        if len(Base.metadata.tables.keys()) > 0:
+                            await connection.run_sync(
+                                Base.metadata.create_all, tables=[PGVectorDataPoint.__table__]
+                            )
 
     @retry(
         retry=retry_if_exception_type(DeadlockDetectedError),