refactor: Add batch search support for leaving out payload

This commit is contained in:
Igor Ilic 2026-01-16 13:21:42 +01:00
parent e51149c3a2
commit 5d412ed19b
5 changed files with 24 additions and 2 deletions

View file

@ -320,7 +320,12 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
self._na_exception_handler(e, query_string)
async def batch_search(
self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False
self,
collection_name: str,
query_texts: List[str],
limit: int,
with_vectors: bool = False,
include_payload: bool = False,
):
"""
Perform a batch search using multiple text queries against a collection.
@ -343,7 +348,14 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
data_vectors = await self.embedding_engine.embed_text(query_texts)
return await asyncio.gather(
*[
self.search(collection_name, None, vector, limit, with_vectors)
self.search(
collection_name,
None,
vector,
limit,
with_vectors,
include_payload=include_payload,
)
for vector in data_vectors
]
)

View file

@ -442,6 +442,7 @@ class ChromaDBAdapter(VectorDBInterface):
query_texts: List[str],
limit: int = 5,
with_vectors: bool = False,
include_payload: bool = False,
):
"""
Perform multiple searches in a single request for efficiency, returning results for each

View file

@ -260,6 +260,7 @@ class LanceDBAdapter(VectorDBInterface):
.limit(limit)
.to_list()
)
if not result_values:
return []
normalized_values = normalize_distances(result_values)
@ -279,6 +280,7 @@ class LanceDBAdapter(VectorDBInterface):
query_texts: List[str],
limit: Optional[int] = None,
with_vectors: bool = False,
include_payload: bool = False,
):
query_vectors = await self.embedding_engine.embed_text(query_texts)
@ -289,6 +291,7 @@ class LanceDBAdapter(VectorDBInterface):
query_vector=query_vector,
limit=limit,
with_vector=with_vectors,
include_payload=include_payload,
)
for query_vector in query_vectors
]

View file

@ -380,6 +380,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
query_texts: List[str],
limit: int = None,
with_vectors: bool = False,
include_payload: bool = False,
):
query_vectors = await self.embedding_engine.embed_text(query_texts)
@ -390,6 +391,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
query_vector=query_vector,
limit=limit,
with_vector=with_vectors,
include_payload=include_payload,
)
for query_vector in query_vectors
]

View file

@ -117,6 +117,7 @@ class VectorDBInterface(Protocol):
query_texts: List[str],
limit: Optional[int],
with_vectors: bool = False,
include_payload: bool = False,
):
"""
Perform a batch search using multiple text queries against a collection.
@ -129,6 +130,9 @@ class VectorDBInterface(Protocol):
- limit (Optional[int]): The maximum number of results to return for each query.
- with_vectors (bool): Whether to include vector representations with search
results. (default False)
- include_payload (bool): Whether to include the payload data with search. Search is faster when set to False.
Payload contains metadata about the data point, useful for searches that are only based on embedding distances
like the RAG_COMPLETION search type, but not needed when search also contains graph data.
"""
raise NotImplementedError