From 3567e0d7e76847b0640c4f66c24d3a41cdcd8404 Mon Sep 17 00:00:00 2001
From: Igor Ilic <igorilic03@gmail.com>
Date: Thu, 31 Oct 2024 00:42:18 +0100
Subject: [PATCH] fix: Fix chunk naive llm classifier

Fixed chunk naive llm classifier uuid issue, added fix for deletion of data points for LanceDB

Fix #COG-472
---
 .../databases/vector/lancedb/LanceDBAdapter.py               | 5 ++++-
 .../chunk_naive_llm_classifier/chunk_naive_llm_classifier.py | 3 +--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
index 404634489..3a3080248 100644
--- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
@@ -164,7 +164,10 @@ class LanceDBAdapter(VectorDBInterface):
     async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
         connection = await self.get_connection()
         collection = await connection.open_table(collection_name)
-        results = await collection.delete(f"id IN {tuple(data_point_ids)}")
+        if len(data_point_ids) == 1:
+            results = await collection.delete(f"id = '{data_point_ids[0]}'")
+        else:
+            results = await collection.delete(f"id IN {tuple(data_point_ids)}")
         return results
 
     async def prune(self):
diff --git a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
index 83b495450..b4bcb9467 100644
--- a/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
+++ b/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py
@@ -21,7 +21,6 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
     for chunk_index, chunk in enumerate(data_chunks):
         chunk_classification = chunk_classifications[chunk_index]
         classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
-        classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
 
         for classification_subclass in chunk_classification.label.subclass:
             classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
@@ -39,7 +38,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
     if await vector_engine.has_collection(collection_name):
         existing_data_points = await vector_engine.retrieve(
             collection_name,
-            list(set(classification_data_points)),
+            [str(classification_data) for classification_data in list(set(classification_data_points))],
         ) if len(classification_data_points) > 0 else []
 
         existing_points_map = {point.id: True for point in existing_data_points}