From 8c177fbc72e1b41d6335624703b1694ae011f9ba Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Fri, 27 Oct 2023 15:17:43 +0200 Subject: [PATCH] fixes to chunking logic and optimizations --- level_3/docker-compose.yml | 48 ++++++++++----------- level_3/rag_test_manager.py | 83 +++++++++++++++++++++---------------- 2 files changed, 71 insertions(+), 60 deletions(-) diff --git a/level_3/docker-compose.yml b/level_3/docker-compose.yml index be58cf30c..7444aca82 100644 --- a/level_3/docker-compose.yml +++ b/level_3/docker-compose.yml @@ -1,31 +1,31 @@ version: "3.9" services: - neo4j: - image: neo4j:latest - container_name: neo4j - ports: - - "7474:7474" - - "7687:7687" - environment: - - NEO4J_AUTH=neo4j/pleaseletmein - - NEO4J_PLUGINS=["apoc"] - networks: - - promethai_mem_backend +# neo4j: +# image: neo4j:latest +# container_name: neo4j +# ports: +# - "7474:7474" +# - "7687:7687" +# environment: +# - NEO4J_AUTH=neo4j/pleaseletmein +# - NEO4J_PLUGINS=["apoc"] +# networks: +# - promethai_mem_backend - promethai_mem: - networks: - - promethai_mem_backend - build: - context: ./ - volumes: - - "./:/app" - environment: - - HOST=0.0.0.0 - profiles: ["exclude-from-up"] - ports: - - 8000:8000 - - 443:443 +# promethai_mem: +# networks: +# - promethai_mem_backend +# build: +# context: ./ +# volumes: +# - "./:/app" +# environment: +# - HOST=0.0.0.0 +# profiles: ["exclude-from-up"] +# ports: +# - 8000:8000 +# - 443:443 postgres: image: postgres diff --git a/level_3/rag_test_manager.py b/level_3/rag_test_manager.py index 1b395764e..1dbe0fcaa 100644 --- a/level_3/rag_test_manager.py +++ b/level_3/rag_test_manager.py @@ -110,15 +110,13 @@ async def fetch_job_id(session, user_id=None, memory_id=None, job_id=None): return None -async def fetch_test_set_id(session, user_id, id): +async def fetch_test_set_id(session, user_id, content): try: # Await the execution of the query and fetching of the result - result = await session.execute( - session.query(TestSet.id) - .filter_by(user_id=user_id, id=id) + result = await session.execute(select(TestSet.id) + .filter_by(user_id=user_id, content=content) .order_by(TestSet.created_at) - .desc() - .first() + ) return ( result.scalar_one_or_none() @@ -221,13 +219,11 @@ def generate_param_variants( dict(zip(keys, combination)) for combination in itertools.product(*values) ] + logging.info("Param combinations for testing", str(param_variants)) + return param_variants -# Generate parameter variants and display a sample of the generated combinations - - - async def generate_chatgpt_output(query: str, context: str = None, api_key=None, model_name="gpt-3.5-turbo"): """ @@ -372,7 +368,7 @@ async def start_test( async with session_scope(session=AsyncSessionLocal()) as session: job_id = await fetch_job_id(session, user_id=user_id, job_id=job_id) - test_set_id = await fetch_test_set_id(session, user_id=user_id, id=job_id) + test_set_id = await fetch_test_set_id(session, user_id=user_id, content=str(test_set)) memory = await Memory.create_memory( user_id, session, namespace="SEMANTICMEMORY" ) @@ -395,6 +391,16 @@ async def start_test( "Data location is %s", data_location ) # Assume data_location_route is predefined test_params = generate_param_variants(included_params=["chunk_size"]) + if params: + data_format = data_format_route( + data + ) # Assume data_format_route is predefined + logging.info("Data format is %s", data_format) + data_location = data_location_route(data) + logging.info( + "Data location is %s", data_location + ) + test_params = generate_param_variants(included_params=params) print("Here are the test params", str(test_params)) @@ -499,6 +505,7 @@ async def start_test( context = "" logging.info("Loading and evaluating test set for LLM context") test_result = await run_eval(test_qa, context) + test_eval_pipeline.append(test_result) elif retriever_type == "single_document_context": if test_set: @@ -511,6 +518,7 @@ async def start_test( for test_qa in test_set: result = await run_search_element(test_qa, test_id) test_result = await run_eval(test_qa, result) + test_result.append(test) test_eval_pipeline.append(test_result) await memory.dynamic_method_call( dynamic_memory_class, "delete_memories", namespace=test_id @@ -537,34 +545,37 @@ async def start_test( elif retriever_type == "single_document_context": for param in test_params: + logging.info("Running for chunk size %s", param["chunk_size"]) test_id, result = await run_test( param, loader_settings, metadata, retriever_type=retriever_type ) # Add the params to the result - results.append([result, param]) + # result.append(param) + results.append(result) - for b, r in results: - for result_list in b: - for result in result_list: - await add_entity( - session, - TestOutput( - id=test_id, - test_set_id=test_set_id, - operation_id=job_id, - set_id=str(uuid.uuid4()), - user_id=user_id, - test_results=result["success"], - test_score=str(result["score"]), - test_metric_name=result["metric_name"], - test_query=result["query"], - test_output=result["output"], - test_expected_output=str(["expected_output"]), - test_context=result["context"][0], - test_params=str(r), # Add params to the database table - ), - ) + for b in results: + logging.info("Loading %s", str(b)) + for result, chunk in b: + logging.info("Loading %s", str(result)) + await add_entity( + session, + TestOutput( + id=test_id, + test_set_id=test_set_id, + operation_id=job_id, + set_id=str(uuid.uuid4()), + user_id=user_id, + test_results=result["success"], + test_score=str(result["score"]), + test_metric_name=result["metric_name"], + test_query=result["query"], + test_output=result["output"], + test_expected_output=str(["expected_output"]), + test_context=result["context"][0], + test_params=str(chunk), # Add params to the database table + ), + ) - return results + return results async def main(): @@ -607,9 +618,9 @@ async def main(): ".data/3ZCCCW.pdf", test_set=test_set, user_id="677", - params=None, + params=["chunk_size", "search_type"], metadata=metadata, - retriever_type="llm_context", + retriever_type="single_document_context", ) # # parser = argparse.ArgumentParser(description="Run tests against a document.")