From 1e40ad37b8b3cd7062a649481c8460d2b173e42c Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Mon, 23 Oct 2023 13:16:48 +0200
Subject: [PATCH] Improve classifier, add turn output data to json

---
 level_3/models/testoutput.py |  4 ++--
 level_3/rag_test_manager.py  | 21 +++++++++++++--------
 level_3/vectordb/vectordb.py | 28 ++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/level_3/models/testoutput.py b/level_3/models/testoutput.py
index af44dfc58..21512b538 100644
--- a/level_3/models/testoutput.py
+++ b/level_3/models/testoutput.py
@@ -9,7 +9,7 @@ from database.database import Base
 
 
 from datetime import datetime
-from sqlalchemy import Column, String, DateTime, ForeignKey
+from sqlalchemy import Column, String, DateTime, ForeignKey, JSON
 from sqlalchemy.orm import relationship
 import os
 import sys
@@ -27,7 +27,7 @@ class TestOutput(Base):
     user_id = Column(String, ForeignKey('users.id'), index=True)  # Added user_id field
     test_set_id = Column(String, ForeignKey('test_sets.id'), index=True)
     operation_id = Column(String, ForeignKey('operations.id'), index=True)
-    test_results = Column(String, nullable=True)
+    test_results = Column(JSON, nullable=True)
     created_at = Column(DateTime, default=datetime.utcnow)
     updated_at = Column(DateTime, onupdate=datetime.utcnow)
 
diff --git a/level_3/rag_test_manager.py b/level_3/rag_test_manager.py
index 40f0ff6a2..ddd6c1dca 100644
--- a/level_3/rag_test_manager.py
+++ b/level_3/rag_test_manager.py
@@ -241,7 +241,7 @@ def data_format_route( data_string: str):
 def data_location_route(data_string: str):
     @ai_classifier
     class LocationRoute(Enum):
-        """Represents classifier for the data location"""
+        """Represents classifier for the data location, if it is device, or database connections string or URL """
 
         DEVICE = "file_path_starting_with_.data_or_containing_it"
         # URL = "url starting with http or https"
@@ -273,6 +273,9 @@ async def start_test(data, test_set=None, user_id=None, params=None, job_id=None
 
         job_id = await fetch_job_id(session, user_id=user_id, job_id=job_id)
         test_set_id = await fetch_test_set_id(session, user_id=user_id, id=job_id)
+        memory = await Memory.create_memory(user_id, session, namespace="SEMANTICMEMORY")
+        await memory.add_memory_instance("ExampleMemory")
+        existing_user = await Memory.check_existing_user(user_id, session)
 
         if job_id is None:
             job_id = str(uuid.uuid4())
@@ -303,9 +306,6 @@ async def start_test(data, test_set=None, user_id=None, params=None, job_id=None
 
             if test_id is None:
                 test_id = str(generate_letter_uuid()) + "_" +"SEMANTICMEMORY"
-            memory = await Memory.create_memory(user_id, session, namespace="SEMANTICMEMORY")
-            await memory.add_memory_instance("ExampleMemory")
-            existing_user = await Memory.check_existing_user(user_id, session)
             await memory.manage_memory_attributes(existing_user)
             test_class = test_id + "_class"
             await memory.add_dynamic_memory_class(test_id.lower(), test_id)
@@ -378,16 +378,21 @@ async def start_test(data, test_set=None, user_id=None, params=None, job_id=None
             await memory.dynamic_method_call(dynamic_memory_class, 'delete_memories',
                                                            namespace=test_id)
 
-            return test_eval_pipeline
+            return test_id, test_eval_pipeline
 
         results = []
+
         if only_llm_context:
-            result = await run_test(test= None, loader_settings=loader_settings, metadata=metadata, only_llm_context=only_llm_context)
+            test_id, result = await run_test(test=None, loader_settings=loader_settings, metadata=metadata,
+                                             only_llm_context=only_llm_context)
             results.append(result)
+
         for param in test_params:
-            result = await run_test(param, loader_settings, metadata,only_llm_context=only_llm_context)
+            test_id, result = await run_test(param, loader_settings, metadata, only_llm_context=only_llm_context)
             results.append(result)
 
+        await add_entity(session, TestOutput(id=test_id, user_id=user_id, test_results=str(json.dumps(results))))
+
         print(results)
 
         return results
@@ -432,7 +437,7 @@ async def main():
     ]
     # "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf"
     #http://public-library.uk/ebooks/59/83.pdf
-    result = await start_test(".data/3ZCCCW.pdf", test_set=test_set, user_id="676", params=None, metadata=metadata)
+    result = await start_test(".data/3ZCCCW.pdf", test_set=test_set, user_id="677", params=None, metadata=metadata)
     #
     # parser = argparse.ArgumentParser(description="Run tests against a document.")
     # parser.add_argument("--url", required=True, help="URL of the document to test.")
diff --git a/level_3/vectordb/vectordb.py b/level_3/vectordb/vectordb.py
index bdad4299b..2382e9d04 100644
--- a/level_3/vectordb/vectordb.py
+++ b/level_3/vectordb/vectordb.py
@@ -95,6 +95,34 @@ class WeaviateVectorDB(VectorDB):
         )
         return client
 
+    from marshmallow import Schema, fields
+
+    def create_document_structure(observation, params, metadata_schema_class=None):
+        """
+        Create and validate a document structure with optional custom fields.
+
+        :param observation: Content of the document.
+        :param params: Metadata information.
+        :param metadata_schema_class: Custom metadata schema class (optional).
+        :return: A list containing the validated document data.
+        """
+        document_data = {
+            "metadata": params,
+            "page_content": observation
+        }
+
+        def get_document_schema():
+            class DynamicDocumentSchema(Schema):
+                metadata = fields.Nested(metadata_schema_class, required=True)
+                page_content = fields.Str(required=True)
+
+            return DynamicDocumentSchema
+
+        # Validate and deserialize, defaulting to "1.0" if not provided
+        CurrentDocumentSchema = get_document_schema()
+        loaded_document = CurrentDocumentSchema().load(document_data)
+        return [loaded_document]
+
     def _stuct(self, observation, params, metadata_schema_class =None):
         """Utility function to create the document structure with optional custom fields."""