refactor: Add batch search support for leaving out payload

2026-01-16 13:21:42 +01:00 · 2026-01-16 13:21:42 +01:00 · 5d412ed19b
commit 5d412ed19b
parent e51149c3a2
5 changed files with 24 additions and 2 deletions
--- a/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py
+++ b/cognee/infrastructure/databases/hybrid/neptune_analytics/NeptuneAnalyticsAdapter.py
@ -320,7 +320,12 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
            self._na_exception_handler(e, query_string)

    async def batch_search(
-        self, collection_name: str, query_texts: List[str], limit: int, with_vectors: bool = False
+        self,
+        collection_name: str,
+        query_texts: List[str],
+        limit: int,
+        with_vectors: bool = False,
+        include_payload: bool = False,
    ):
        """
        Perform a batch search using multiple text queries against a collection.
@ -343,7 +348,14 @@ class NeptuneAnalyticsAdapter(NeptuneGraphDB, VectorDBInterface):
        data_vectors = await self.embedding_engine.embed_text(query_texts)
        return await asyncio.gather(
            *[
-                self.search(collection_name, None, vector, limit, with_vectors)
+                self.search(
+                    collection_name,
+                    None,
+                    vector,
+                    limit,
+                    with_vectors,
+                    include_payload=include_payload,
+                )
                for vector in data_vectors
            ]
        )
--- a/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/chromadb/ChromaDBAdapter.py
@ -442,6 +442,7 @@ class ChromaDBAdapter(VectorDBInterface):
        query_texts: List[str],
        limit: int = 5,
        with_vectors: bool = False,
+        include_payload: bool = False,
    ):
        """
        Perform multiple searches in a single request for efficiency, returning results for each
--- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
@ -260,6 +260,7 @@ class LanceDBAdapter(VectorDBInterface):
            .limit(limit)
            .to_list()
        )
+
        if not result_values:
            return []
        normalized_values = normalize_distances(result_values)
@ -279,6 +280,7 @@ class LanceDBAdapter(VectorDBInterface):
        query_texts: List[str],
        limit: Optional[int] = None,
        with_vectors: bool = False,
+        include_payload: bool = False,
    ):
        query_vectors = await self.embedding_engine.embed_text(query_texts)

@ -289,6 +291,7 @@ class LanceDBAdapter(VectorDBInterface):
                    query_vector=query_vector,
                    limit=limit,
                    with_vector=with_vectors,
+                    include_payload=include_payload,
                )
                for query_vector in query_vectors
            ]
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@ -380,6 +380,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
        query_texts: List[str],
        limit: int = None,
        with_vectors: bool = False,
+        include_payload: bool = False,
    ):
        query_vectors = await self.embedding_engine.embed_text(query_texts)

@ -390,6 +391,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
                    query_vector=query_vector,
                    limit=limit,
                    with_vector=with_vectors,
+                    include_payload=include_payload,
                )
                for query_vector in query_vectors
            ]
--- a/cognee/infrastructure/databases/vector/vector_db_interface.py
+++ b/cognee/infrastructure/databases/vector/vector_db_interface.py
@ -117,6 +117,7 @@ class VectorDBInterface(Protocol):
        query_texts: List[str],
        limit: Optional[int],
        with_vectors: bool = False,
+        include_payload: bool = False,
    ):
        """
        Perform a batch search using multiple text queries against a collection.
@ -129,6 +130,9 @@ class VectorDBInterface(Protocol):
            - limit (Optional[int]): The maximum number of results to return for each query.
            - with_vectors (bool): Whether to include vector representations with search
              results. (default False)
+            - include_payload (bool): Whether to include the payload data with search. Search is faster when set to False.
+              Payload contains metadata about the data point, useful for searches that are only based on embedding distances
+              like the RAG_COMPLETION search type, but not needed when search also contains graph data.
        """
        raise NotImplementedError