cognee/cognee/modules/search/vector/search_similarity.py
Boris 219afbce68
feat: add lancedb vector storage [COG-176] (#90)
* feat: integrate lancedb

* fix: use futures in weaviate adapter to enable async behaviour
2024-05-03 10:35:41 +02:00

49 lines
1.6 KiB
Python

from dsp.utils import deduplicate
from cognee.infrastructure import infrastructure_config
from cognee.infrastructure.databases.graph.get_graph_client import get_graph_client
async def search_similarity(query: str, graph):
graph_db_type = infrastructure_config.get_config()["graph_engine"]
graph_client = await get_graph_client(graph_db_type)
layer_nodes = await graph_client.get_layer_nodes()
unique_layer_uuids = set(node["layer_id"] for node in layer_nodes)
graph_nodes = []
for layer_id in unique_layer_uuids:
vector_engine = infrastructure_config.get_config()["vector_engine"]
results = await vector_engine.search(layer_id, query_text = query, limit = 10)
if len(results) > 0:
graph_nodes.extend([
dict(
layer_id = result.payload["references"]["cognitive_layer"],
node_id = result.payload["references"]["node_id"],
score = result.score,
) for result in results if result.score > 0.8
])
if len(graph_nodes) == 0:
return []
relevant_context = []
for graph_node_data in graph_nodes:
graph_node = await graph_client.extract_node(graph_node_data["node_id"])
if "chunk_collection" not in graph_node and "chunk_id" not in graph_node:
continue
vector_point = await vector_engine.retrieve(
graph_node["chunk_collection"],
graph_node["chunk_id"],
)
relevant_context.append(vector_point.payload["text"])
return deduplicate(relevant_context)