refactor: Add batch search support for leaving out payload

This commit is contained in:
Igor Ilic 2026-01-16 13:21:42 +01:00
parent e51149c3a2
commit 5d412ed19b
5 changed files with 24 additions and 2 deletions

View file

@ -320,7 +320,12 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
self._na_exception_handler(e, query_string) self._na_exception_handler(e, query_string)
async def batch_search( async def batch_search(
self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False self,
collection_name: str,
query_texts: List[str],
limit: int,
with_vectors: bool = False,
include_payload: bool = False,
): ):
""" """
Perform a batch search using multiple text queries against a collection. Perform a batch search using multiple text queries against a collection.
@ -343,7 +348,14 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
data_vectors = await self.embedding_engine.embed_text(query_texts) data_vectors = await self.embedding_engine.embed_text(query_texts)
return await asyncio.gather( return await asyncio.gather(
*[ *[
self.search(collection_name, None, vector, limit, with_vectors) self.search(
collection_name,
None,
vector,
limit,
with_vectors,
include_payload=include_payload,
)
for vector in data_vectors for vector in data_vectors
] ]
) )

View file

@ -442,6 +442,7 @@ class ChromaDBAdapter(VectorDBInterface):
query_texts: List[str], query_texts: List[str],
limit: int = 5, limit: int = 5,
with_vectors: bool = False, with_vectors: bool = False,
include_payload: bool = False,
): ):
""" """
Perform multiple searches in a single request for efficiency, returning results for each Perform multiple searches in a single request for efficiency, returning results for each

View file

@ -260,6 +260,7 @@ class LanceDBAdapter(VectorDBInterface):
.limit(limit) .limit(limit)
.to_list() .to_list()
) )
if not result_values: if not result_values:
return [] return []
normalized_values = normalize_distances(result_values) normalized_values = normalize_distances(result_values)
@ -279,6 +280,7 @@ class LanceDBAdapter(VectorDBInterface):
query_texts: List[str], query_texts: List[str],
limit: Optional[int] = None, limit: Optional[int] = None,
with_vectors: bool = False, with_vectors: bool = False,
include_payload: bool = False,
): ):
query_vectors = await self.embedding_engine.embed_text(query_texts) query_vectors = await self.embedding_engine.embed_text(query_texts)
@ -289,6 +291,7 @@ class LanceDBAdapter(VectorDBInterface):
query_vector=query_vector, query_vector=query_vector,
limit=limit, limit=limit,
with_vector=with_vectors, with_vector=with_vectors,
include_payload=include_payload,
) )
for query_vector in query_vectors for query_vector in query_vectors
] ]

View file

@ -380,6 +380,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
query_texts: List[str], query_texts: List[str],
limit: int = None, limit: int = None,
with_vectors: bool = False, with_vectors: bool = False,
include_payload: bool = False,
): ):
query_vectors = await self.embedding_engine.embed_text(query_texts) query_vectors = await self.embedding_engine.embed_text(query_texts)
@ -390,6 +391,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
query_vector=query_vector, query_vector=query_vector,
limit=limit, limit=limit,
with_vector=with_vectors, with_vector=with_vectors,
include_payload=include_payload,
) )
for query_vector in query_vectors for query_vector in query_vectors
] ]

View file

@ -117,6 +117,7 @@ class VectorDBInterface(Protocol):
query_texts: List[str], query_texts: List[str],
limit: Optional[int], limit: Optional[int],
with_vectors: bool = False, with_vectors: bool = False,
include_payload: bool = False,
): ):
""" """
Perform a batch search using multiple text queries against a collection. Perform a batch search using multiple text queries against a collection.
@ -129,6 +130,9 @@ class VectorDBInterface(Protocol):
- limit (Optional[int]): The maximum number of results to return for each query. - limit (Optional[int]): The maximum number of results to return for each query.
- with_vectors (bool): Whether to include vector representations with search - with_vectors (bool): Whether to include vector representations with search
results. (default False) results. (default False)
- include_payload (bool): Whether to include the payload data with search. Search is faster when set to False.
Payload contains metadata about the data point, useful for searches that are only based on embedding distances
like the RAG_COMPLETION search type, but not needed when search also contains graph data.
""" """
raise NotImplementedError raise NotImplementedError