refactor: Update lanceDB and change delete to work async (#770)

<!-- .github/pull_request_template.md -->

## Description
Update LanceDB and rewrite data points to run async

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Boris <boris@topoteretes.com>
Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
This commit is contained in:
Igor Ilic 2025-05-12 11:35:24 -04:00 committed by GitHub
parent a78fec3a91
commit 9c131f0d14
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 4424 additions and 4513 deletions

View file

@ -204,29 +204,12 @@ class LanceDBAdapter(VectorDBInterface):
]
)
def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
async def _delete_data_points():
collection = await self.get_collection(collection_name)
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
collection = await self.get_collection(collection_name)
# Delete one at a time to avoid commit conflicts
for data_point_id in data_point_ids:
await collection.delete(f"id = '{data_point_id}'")
return True
# Check if we're in an event loop
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
# If we're in a running event loop, create a new task
return loop.create_task(_delete_data_points())
else:
# If we're not in an event loop, run it synchronously
return asyncio.run(_delete_data_points())
# Delete one at a time to avoid commit conflicts
for data_point_id in data_point_ids:
await collection.delete(f"id = '{data_point_id}'")
async def create_vector_index(self, index_name: str, index_property_name: str):
await self.create_collection(

1386
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -44,8 +44,8 @@ dependencies = [
"pypdf>=4.1.0,<6.0.0",
"jinja2>=3.1.3,<4",
"matplotlib>=3.8.3,<4",
"networkx>=3.2.1,<4",
"lancedb==0.16.0",
"networkx>=3.4.2,<4",
"lancedb==0.21.0",
"alembic>=1.13.3,<2",
"pre-commit>=4.0.1,<5",
"scikit-learn>=1.6.1,<2",
@ -56,6 +56,7 @@ dependencies = [
"dlt[sqlalchemy]>=1.9.0,<2",
"sentry-sdk[fastapi]>=2.9.0,<3",
"structlog>=25.2.0,<26",
"onnxruntime==1.21.1",
]
[project.optional-dependencies]
@ -121,7 +122,7 @@ dev = [
"ruff>=0.9.2,<1.0.0",
"tweepy==4.14.0",
"gitpython>=3.1.43,<4",
"pylance==0.19.2",
"pylance==0.22.0",
"mkdocs-material>=9.5.42,<10",
"mkdocs-minify-plugin>=0.8.0,<0.9",
"mkdocstrings[python]>=0.26.2,<0.27",

7517
uv.lock generated

File diff suppressed because it is too large Load diff