fix: fixes pgvector deadlock

This commit is contained in:
hajdul88 2025-10-07 19:19:32 +02:00
parent 7ec1c75bee
commit b288e5d7a0

View file

@ -125,41 +125,42 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
data_point_types = get_type_hints(DataPoint) data_point_types = get_type_hints(DataPoint)
vector_size = self.embedding_engine.get_vector_size() vector_size = self.embedding_engine.get_vector_size()
async with self.VECTOR_DB_LOCK: if not await self.has_collection(collection_name):
if not await self.has_collection(collection_name): async with self.VECTOR_DB_LOCK:
if not await self.has_collection(collection_name):
class PGVectorDataPoint(Base): class PGVectorDataPoint(Base):
""" """
Represent a point in a vector data space with associated data and vector representation. Represent a point in a vector data space with associated data and vector representation.
This class inherits from Base and is associated with a database table defined by This class inherits from Base and is associated with a database table defined by
__tablename__. It maintains the following public methods and instance variables: __tablename__. It maintains the following public methods and instance variables:
- __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance. - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance.
Instance variables: Instance variables:
- id: Identifier for the data point, defined by data_point_types. - id: Identifier for the data point, defined by data_point_types.
- payload: JSON data associated with the data point. - payload: JSON data associated with the data point.
- vector: Vector representation of the data point, with size defined by vector_size. - vector: Vector representation of the data point, with size defined by vector_size.
""" """
__tablename__ = collection_name __tablename__ = collection_name
__table_args__ = {"extend_existing": True} __table_args__ = {"extend_existing": True}
# PGVector requires one column to be the primary key # PGVector requires one column to be the primary key
id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True) id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True)
payload = Column(JSON) payload = Column(JSON)
vector = Column(self.Vector(vector_size)) vector = Column(self.Vector(vector_size))
def __init__(self, id, payload, vector): def __init__(self, id, payload, vector):
self.id = id self.id = id
self.payload = payload self.payload = payload
self.vector = vector self.vector = vector
async with self.engine.begin() as connection: async with self.engine.begin() as connection:
if len(Base.metadata.tables.keys()) > 0: if len(Base.metadata.tables.keys()) > 0:
await connection.run_sync( await connection.run_sync(
Base.metadata.create_all, tables=[PGVectorDataPoint.__table__] Base.metadata.create_all, tables=[PGVectorDataPoint.__table__]
) )
@retry( @retry(
retry=retry_if_exception_type(DeadlockDetectedError), retry=retry_if_exception_type(DeadlockDetectedError),