fix: Resolve duplicate chunk issue for PGVector [COG-895] (#705)
<!-- .github/pull_request_template.md --> ## Description Resolve issues with duplicate chunks for PGVector ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
cd0d321eda
commit
c4a6c94675
1 changed files with 25 additions and 9 deletions
|
|
@ -124,16 +124,32 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
||||||
self.payload = payload
|
self.payload = payload
|
||||||
self.vector = vector
|
self.vector = vector
|
||||||
|
|
||||||
pgvector_data_points = [
|
|
||||||
PGVectorDataPoint(
|
|
||||||
id=data_point.id,
|
|
||||||
vector=data_vectors[data_index],
|
|
||||||
payload=serialize_data(data_point.model_dump()),
|
|
||||||
)
|
|
||||||
for (data_index, data_point) in enumerate(data_points)
|
|
||||||
]
|
|
||||||
|
|
||||||
async with self.get_async_session() as session:
|
async with self.get_async_session() as session:
|
||||||
|
pgvector_data_points = []
|
||||||
|
|
||||||
|
for data_index, data_point in enumerate(data_points):
|
||||||
|
# Check to see if data should be updated or a new data item should be created
|
||||||
|
data_point_db = (
|
||||||
|
await session.execute(
|
||||||
|
select(PGVectorDataPoint).filter(PGVectorDataPoint.id == data_point.id)
|
||||||
|
)
|
||||||
|
).scalar_one_or_none()
|
||||||
|
|
||||||
|
# If data point exists update it, if not create a new one
|
||||||
|
if data_point_db:
|
||||||
|
data_point_db.id = data_point.id
|
||||||
|
data_point_db.vector = data_vectors[data_index]
|
||||||
|
data_point_db.payload = serialize_data(data_point.model_dump())
|
||||||
|
pgvector_data_points.append(data_point_db)
|
||||||
|
else:
|
||||||
|
pgvector_data_points.append(
|
||||||
|
PGVectorDataPoint(
|
||||||
|
id=data_point.id,
|
||||||
|
vector=data_vectors[data_index],
|
||||||
|
payload=serialize_data(data_point.model_dump()),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
session.add_all(pgvector_data_points)
|
session.add_all(pgvector_data_points)
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue