From 8c177fbc72e1b41d6335624703b1694ae011f9ba Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Fri, 27 Oct 2023 15:17:43 +0200
Subject: [PATCH] fixes to chunking logic and optimizations

---
 level_3/docker-compose.yml  | 48 ++++++++++-----------
 level_3/rag_test_manager.py | 83 +++++++++++++++++++++----------------
 2 files changed, 71 insertions(+), 60 deletions(-)

diff --git a/level_3/docker-compose.yml b/level_3/docker-compose.yml
index be58cf30c..7444aca82 100644
--- a/level_3/docker-compose.yml
+++ b/level_3/docker-compose.yml
@@ -1,31 +1,31 @@
 version: "3.9"
 
 services:
-  neo4j:
-    image: neo4j:latest
-    container_name: neo4j
-    ports:
-      - "7474:7474"
-      - "7687:7687"
-    environment:
-      - NEO4J_AUTH=neo4j/pleaseletmein
-      - NEO4J_PLUGINS=["apoc"]
-    networks:
-      - promethai_mem_backend
+#  neo4j:
+#    image: neo4j:latest
+#    container_name: neo4j
+#    ports:
+#      - "7474:7474"
+#      - "7687:7687"
+#    environment:
+#      - NEO4J_AUTH=neo4j/pleaseletmein
+#      - NEO4J_PLUGINS=["apoc"]
+#    networks:
+#      - promethai_mem_backend
 
-  promethai_mem:
-    networks:
-      - promethai_mem_backend
-    build:
-      context: ./
-    volumes:
-      - "./:/app"
-    environment:
-      - HOST=0.0.0.0
-    profiles: ["exclude-from-up"]
-    ports:
-      - 8000:8000
-      - 443:443
+#  promethai_mem:
+#    networks:
+#      - promethai_mem_backend
+#    build:
+#      context: ./
+#    volumes:
+#      - "./:/app"
+#    environment:
+#      - HOST=0.0.0.0
+#    profiles: ["exclude-from-up"]
+#    ports:
+#      - 8000:8000
+#      - 443:443
 
   postgres:
     image: postgres
diff --git a/level_3/rag_test_manager.py b/level_3/rag_test_manager.py
index 1b395764e..1dbe0fcaa 100644
--- a/level_3/rag_test_manager.py
+++ b/level_3/rag_test_manager.py
@@ -110,15 +110,13 @@ async def fetch_job_id(session, user_id=None, memory_id=None, job_id=None):
         return None
 
 
-async def fetch_test_set_id(session, user_id, id):
+async def fetch_test_set_id(session, user_id, content):
     try:
         # Await the execution of the query and fetching of the result
-        result = await session.execute(
-            session.query(TestSet.id)
-            .filter_by(user_id=user_id, id=id)
+        result = await session.execute(select(TestSet.id)
+            .filter_by(user_id=user_id, content=content)
             .order_by(TestSet.created_at)
-            .desc()
-            .first()
+
         )
         return (
             result.scalar_one_or_none()
@@ -221,13 +219,11 @@ def generate_param_variants(
         dict(zip(keys, combination)) for combination in itertools.product(*values)
     ]
 
+    logging.info("Param combinations for testing", str(param_variants))
+
     return param_variants
 
 
-# Generate parameter variants and display a sample of the generated combinations
-
-
-
 
 async def generate_chatgpt_output(query: str, context: str = None, api_key=None, model_name="gpt-3.5-turbo"):
     """
@@ -372,7 +368,7 @@ async def start_test(
 
     async with session_scope(session=AsyncSessionLocal()) as session:
         job_id = await fetch_job_id(session, user_id=user_id, job_id=job_id)
-        test_set_id = await fetch_test_set_id(session, user_id=user_id, id=job_id)
+        test_set_id = await fetch_test_set_id(session, user_id=user_id, content=str(test_set))
         memory = await Memory.create_memory(
             user_id, session, namespace="SEMANTICMEMORY"
         )
@@ -395,6 +391,16 @@ async def start_test(
                 "Data location is %s", data_location
             )  # Assume data_location_route is predefined
             test_params = generate_param_variants(included_params=["chunk_size"])
+        if params:
+            data_format = data_format_route(
+                data
+            )  # Assume data_format_route is predefined
+            logging.info("Data format is %s", data_format)
+            data_location = data_location_route(data)
+            logging.info(
+                "Data location is %s", data_location
+            )
+            test_params = generate_param_variants(included_params=params)
 
         print("Here are the test params", str(test_params))
 
@@ -499,6 +505,7 @@ async def start_test(
                     context = ""
                     logging.info("Loading and evaluating test set for LLM context")
                     test_result = await run_eval(test_qa, context)
+
                     test_eval_pipeline.append(test_result)
             elif retriever_type == "single_document_context":
                 if test_set:
@@ -511,6 +518,7 @@ async def start_test(
                     for test_qa in test_set:
                         result = await run_search_element(test_qa, test_id)
                         test_result = await run_eval(test_qa, result)
+                        test_result.append(test)
                         test_eval_pipeline.append(test_result)
                     await memory.dynamic_method_call(
                         dynamic_memory_class, "delete_memories", namespace=test_id
@@ -537,34 +545,37 @@ async def start_test(
 
         elif retriever_type == "single_document_context":
             for param in test_params:
+                logging.info("Running for chunk size %s", param["chunk_size"])
                 test_id, result = await run_test(
                     param, loader_settings, metadata, retriever_type=retriever_type
                 )  # Add the params to the result
-                results.append([result, param])
+                # result.append(param)
+                results.append(result)
 
-        for b, r in results:
-            for result_list in b:
-                for result in result_list:
-                    await add_entity(
-                        session,
-                        TestOutput(
-                            id=test_id,
-                            test_set_id=test_set_id,
-                            operation_id=job_id,
-                            set_id=str(uuid.uuid4()),
-                            user_id=user_id,
-                            test_results=result["success"],
-                            test_score=str(result["score"]),
-                            test_metric_name=result["metric_name"],
-                            test_query=result["query"],
-                            test_output=result["output"],
-                            test_expected_output=str(["expected_output"]),
-                            test_context=result["context"][0],
-                            test_params=str(r),  # Add params to the database table
-                        ),
-                    )
+        for b in results:
+            logging.info("Loading  %s", str(b))
+            for result, chunk in b:
+                logging.info("Loading  %s", str(result))
+                await add_entity(
+                    session,
+                    TestOutput(
+                        id=test_id,
+                        test_set_id=test_set_id,
+                        operation_id=job_id,
+                        set_id=str(uuid.uuid4()),
+                        user_id=user_id,
+                        test_results=result["success"],
+                        test_score=str(result["score"]),
+                        test_metric_name=result["metric_name"],
+                        test_query=result["query"],
+                        test_output=result["output"],
+                        test_expected_output=str(["expected_output"]),
+                        test_context=result["context"][0],
+                        test_params=str(chunk),  # Add params to the database table
+                    ),
+                )
 
-            return results
+        return results
 
 
 async def main():
@@ -607,9 +618,9 @@ async def main():
         ".data/3ZCCCW.pdf",
         test_set=test_set,
         user_id="677",
-        params=None,
+        params=["chunk_size", "search_type"],
         metadata=metadata,
-        retriever_type="llm_context",
+        retriever_type="single_document_context",
     )
     #
     # parser = argparse.ArgumentParser(description="Run tests against a document.")