refactor: Add batch search support for leaving out payload
This commit is contained in:
parent
e51149c3a2
commit
5d412ed19b
5 changed files with 24 additions and 2 deletions
|
|
@ -320,7 +320,12 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
self._na_exception_handler(e, query_string)
|
||||
|
||||
async def batch_search(
|
||||
self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False
|
||||
self,
|
||||
collection_name: str,
|
||||
query_texts: List[str],
|
||||
limit: int,
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
"""
|
||||
Perform a batch search using multiple text queries against a collection.
|
||||
|
|
@ -343,7 +348,14 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
|
|||
data_vectors = await self.embedding_engine.embed_text(query_texts)
|
||||
return await asyncio.gather(
|
||||
*[
|
||||
self.search(collection_name, None, vector, limit, with_vectors)
|
||||
self.search(
|
||||
collection_name,
|
||||
None,
|
||||
vector,
|
||||
limit,
|
||||
with_vectors,
|
||||
include_payload=include_payload,
|
||||
)
|
||||
for vector in data_vectors
|
||||
]
|
||||
)
|
||||
|
|
|
|||
|
|
@ -442,6 +442,7 @@ class ChromaDBAdapter(VectorDBInterface):
|
|||
query_texts: List[str],
|
||||
limit: int = 5,
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
"""
|
||||
Perform multiple searches in a single request for efficiency, returning results for each
|
||||
|
|
|
|||
|
|
@ -260,6 +260,7 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
.limit(limit)
|
||||
.to_list()
|
||||
)
|
||||
|
||||
if not result_values:
|
||||
return []
|
||||
normalized_values = normalize_distances(result_values)
|
||||
|
|
@ -279,6 +280,7 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
query_texts: List[str],
|
||||
limit: Optional[int] = None,
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
query_vectors = await self.embedding_engine.embed_text(query_texts)
|
||||
|
||||
|
|
@ -289,6 +291,7 @@ class LanceDBAdapter(VectorDBInterface):
|
|||
query_vector=query_vector,
|
||||
limit=limit,
|
||||
with_vector=with_vectors,
|
||||
include_payload=include_payload,
|
||||
)
|
||||
for query_vector in query_vectors
|
||||
]
|
||||
|
|
|
|||
|
|
@ -380,6 +380,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|||
query_texts: List[str],
|
||||
limit: int = None,
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
query_vectors = await self.embedding_engine.embed_text(query_texts)
|
||||
|
||||
|
|
@ -390,6 +391,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|||
query_vector=query_vector,
|
||||
limit=limit,
|
||||
with_vector=with_vectors,
|
||||
include_payload=include_payload,
|
||||
)
|
||||
for query_vector in query_vectors
|
||||
]
|
||||
|
|
|
|||
|
|
@ -117,6 +117,7 @@ class VectorDBInterface(Protocol):
|
|||
query_texts: List[str],
|
||||
limit: Optional[int],
|
||||
with_vectors: bool = False,
|
||||
include_payload: bool = False,
|
||||
):
|
||||
"""
|
||||
Perform a batch search using multiple text queries against a collection.
|
||||
|
|
@ -129,6 +130,9 @@ class VectorDBInterface(Protocol):
|
|||
- limit (Optional[int]): The maximum number of results to return for each query.
|
||||
- with_vectors (bool): Whether to include vector representations with search
|
||||
results. (default False)
|
||||
- include_payload (bool): Whether to include the payload data with search. Search is faster when set to False.
|
||||
Payload contains metadata about the data point, useful for searches that are only based on embedding distances
|
||||
like the RAG_COMPLETION search type, but not needed when search also contains graph data.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue