From 25468447871ea63287b4d4924576efa38a27aaaf Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Mon, 20 Jan 2025 13:42:39 +0100 Subject: [PATCH 1/3] feat: Add normalization to PGVector search Add normalization to PGVector search results --- .../vector/pgvector/PGVectorAdapter.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index df22e8f18..3700fd0fa 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -14,9 +14,9 @@ from ...relational.ModelBase import Base from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter from ..embeddings.EmbeddingEngine import EmbeddingEngine from ..models.ScoredResult import ScoredResult -from ..utils import normalize_distances from ..vector_db_interface import VectorDBInterface from .serialize_data import serialize_data +from ..utils import normalize_distances class IndexSchema(DataPoint): @@ -247,12 +247,22 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): # Extract distances and find min/max for normalization for vector in closest_items: - # TODO: Add normalization of similarity score - vector_list.append(vector) + vector_list.append( + { + "id": UUID(str(vector.id)), + "payload": vector.payload, + "_distance": vector.similarity, + } + ) + + # Normalize vector distance and add this as score information to vector_list + normalized_values = normalize_distances(vector_list) + for i in range(0, len(normalized_values)): + vector_list[i]["score"] = normalized_values[i] # Create and return ScoredResult objects return [ - ScoredResult(id=UUID(str(row.id)), payload=row.payload, score=row.similarity) + ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score")) for row in vector_list ] From b0cec3fcaa9d298e63f4f394640fe55c53cb7f25 Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 24 Jan 2025 19:03:57 +0100 Subject: [PATCH 2/3] refactor: Remove conversion to string --- .../databases/vector/pgvector/PGVectorAdapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 3700fd0fa..6e7ea6318 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -208,7 +208,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): # Create and return ScoredResult objects return [ - ScoredResult(id=UUID(str(row.id)), payload=row.payload, score=row.similarity) + ScoredResult(id=UUID(row.id), payload=row.payload, score=row.similarity) for row in vector_list ] @@ -249,7 +249,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): for vector in closest_items: vector_list.append( { - "id": UUID(str(vector.id)), + "id": UUID(vector.id), "payload": vector.payload, "_distance": vector.similarity, } From 23ecf245edb6ee1a1a0912002535d09c02563aed Mon Sep 17 00:00:00 2001 From: Igor Ilic Date: Fri, 24 Jan 2025 19:20:55 +0100 Subject: [PATCH 3/3] fix: Return string conversion to resolve traceback --- .../databases/vector/pgvector/PGVectorAdapter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index 6e7ea6318..3700fd0fa 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -208,7 +208,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): # Create and return ScoredResult objects return [ - ScoredResult(id=UUID(row.id), payload=row.payload, score=row.similarity) + ScoredResult(id=UUID(str(row.id)), payload=row.payload, score=row.similarity) for row in vector_list ] @@ -249,7 +249,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): for vector in closest_items: vector_list.append( { - "id": UUID(vector.id), + "id": UUID(str(vector.id)), "payload": vector.payload, "_distance": vector.similarity, }