From 840991c36f189170ba28174327b9dfaff35eec2a Mon Sep 17 00:00:00 2001 From: Igor Ilic <30923996+dexters1@users.noreply.github.com> Date: Tue, 7 Oct 2025 20:44:10 +0200 Subject: [PATCH] refactor: Don't use async lock if not needed (#1510) ## Description Reduce time PGVector is using async lock, by checking if collection exists before taking async lock ## Type of Change - [ ] Bug fix (non-breaking change that fixes an issue) - [ ] New feature (non-breaking change that adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to change) - [ ] Documentation update - [ ] Code refactoring - [ ] Performance improvement - [ ] Other (please specify): ## Screenshots/Videos (if applicable) ## Pre-submission Checklist - [ ] **I have tested my changes thoroughly before submitting this PR** - [ ] **This PR contains minimal changes necessary to address the issue/feature** - [ ] My code follows the project's coding standards and style guidelines - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] I have added necessary documentation (if applicable) - [ ] All new and existing tests pass - [ ] I have searched existing PRs to ensure this change hasn't been submitted already - [ ] I have linked any relevant issues in the description - [ ] My commits have clear and descriptive messages ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --- .../vector/pgvector/PGVectorAdapter.py | 57 ++++++++++--------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 5ddb656a3..1986fae48 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -125,41 +125,42 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): data_point_types = get_type_hints(DataPoint) vector_size = self.embedding_engine.get_vector_size() - async with self.VECTOR_DB_LOCK: - if not await self.has_collection(collection_name): + if not await self.has_collection(collection_name): + async with self.VECTOR_DB_LOCK: + if not await self.has_collection(collection_name): - class PGVectorDataPoint(Base): - """ - Represent a point in a vector data space with associated data and vector representation. + class PGVectorDataPoint(Base): + """ + Represent a point in a vector data space with associated data and vector representation. - This class inherits from Base and is associated with a database table defined by - __tablename__. It maintains the following public methods and instance variables: + This class inherits from Base and is associated with a database table defined by + __tablename__. It maintains the following public methods and instance variables: - - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance. + - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance. - Instance variables: - - id: Identifier for the data point, defined by data_point_types. - - payload: JSON data associated with the data point. - - vector: Vector representation of the data point, with size defined by vector_size. - """ + Instance variables: + - id: Identifier for the data point, defined by data_point_types. + - payload: JSON data associated with the data point. + - vector: Vector representation of the data point, with size defined by vector_size. + """ - __tablename__ = collection_name - __table_args__ = {"extend_existing": True} - # PGVector requires one column to be the primary key - id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True) - payload = Column(JSON) - vector = Column(self.Vector(vector_size)) + __tablename__ = collection_name + __table_args__ = {"extend_existing": True} + # PGVector requires one column to be the primary key + id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True) + payload = Column(JSON) + vector = Column(self.Vector(vector_size)) - def __init__(self, id, payload, vector): - self.id = id - self.payload = payload - self.vector = vector + def __init__(self, id, payload, vector): + self.id = id + self.payload = payload + self.vector = vector - async with self.engine.begin() as connection: - if len(Base.metadata.tables.keys()) > 0: - await connection.run_sync( - Base.metadata.create_all, tables=[PGVectorDataPoint.__table__] - ) + async with self.engine.begin() as connection: + if len(Base.metadata.tables.keys()) > 0: + await connection.run_sync( + Base.metadata.create_all, tables=[PGVectorDataPoint.__table__] + ) @retry( retry=retry_if_exception_type(DeadlockDetectedError),