Add get_vectors_by_ids method and filter out vector data from query results
This commit is contained in:
parent
6cab68bb47
commit
1e2d5252d7
2 changed files with 38 additions and 5 deletions
|
|
@ -210,9 +210,11 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|||
continue
|
||||
|
||||
meta = self._id_to_meta.get(idx, {})
|
||||
# Filter out __vector__ from query results to avoid returning large vector data
|
||||
filtered_meta = {k: v for k, v in meta.items() if k != "__vector__"}
|
||||
results.append(
|
||||
{
|
||||
**meta,
|
||||
**filtered_meta,
|
||||
"id": meta.get("__id__"),
|
||||
"distance": float(dist),
|
||||
"created_at": meta.get("__created_at__"),
|
||||
|
|
@ -424,8 +426,10 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|||
if not metadata:
|
||||
return None
|
||||
|
||||
# Filter out __vector__ from metadata to avoid returning large vector data
|
||||
filtered_metadata = {k: v for k, v in metadata.items() if k != "__vector__"}
|
||||
return {
|
||||
**metadata,
|
||||
**filtered_metadata,
|
||||
"id": metadata.get("__id__"),
|
||||
"created_at": metadata.get("__created_at__"),
|
||||
}
|
||||
|
|
@ -448,9 +452,13 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|||
if fid is not None:
|
||||
metadata = self._id_to_meta.get(fid, {})
|
||||
if metadata:
|
||||
# Filter out __vector__ from metadata to avoid returning large vector data
|
||||
filtered_metadata = {
|
||||
k: v for k, v in metadata.items() if k != "__vector__"
|
||||
}
|
||||
results.append(
|
||||
{
|
||||
**metadata,
|
||||
**filtered_metadata,
|
||||
"id": metadata.get("__id__"),
|
||||
"created_at": metadata.get("__created_at__"),
|
||||
}
|
||||
|
|
@ -458,6 +466,31 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|||
|
||||
return results
|
||||
|
||||
async def get_vectors_by_ids(self, ids: list[str]) -> dict[str, list[float]]:
|
||||
"""Get vectors by their IDs, returning only ID and vector data for efficiency
|
||||
|
||||
Args:
|
||||
ids: List of unique identifiers
|
||||
|
||||
Returns:
|
||||
Dictionary mapping IDs to their vector embeddings
|
||||
Format: {id: [vector_values], ...}
|
||||
"""
|
||||
if not ids:
|
||||
return {}
|
||||
|
||||
vectors_dict = {}
|
||||
for id in ids:
|
||||
# Find the Faiss internal ID for the custom ID
|
||||
fid = self._find_faiss_id_by_custom_id(id)
|
||||
if fid is not None and fid in self._id_to_meta:
|
||||
metadata = self._id_to_meta[fid]
|
||||
# Get the stored vector from metadata
|
||||
if "__vector__" in metadata:
|
||||
vectors_dict[id] = metadata["__vector__"]
|
||||
|
||||
return vectors_dict
|
||||
|
||||
async def drop(self) -> dict[str, str]:
|
||||
"""Drop all vector data from storage and clean up resources
|
||||
|
||||
|
|
|
|||
|
|
@ -2782,8 +2782,8 @@ async def _find_related_text_unit_from_entities(
|
|||
selected_chunk_ids = [] # Initialize to avoid UnboundLocalError
|
||||
|
||||
# Step 4: Apply the selected chunk selection algorithm
|
||||
# Pick by vector similarity:
|
||||
# The order of text chunks aligns with the naive retrieval's destination.
|
||||
# Pick by vector similarity:
|
||||
# The order of text chunks aligns with the naive retrieval's destination.
|
||||
# When reranking is disabled, the text chunks delivered to the LLM tend to favor naive retrieval.
|
||||
if kg_chunk_pick_method == "VECTOR" and query and chunks_vdb:
|
||||
num_of_chunks = int(max_related_chunks * len(entities_with_chunks) / 2)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue