From 30678539e7c9337937a95308b93b49805c528365 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 9 Oct 2023 21:12:54 +0200 Subject: [PATCH] Fixes and added command line tool to run RAG --- level_3/Readme.md | 4 +++- level_3/example_data/metadata.json | 2 +- level_3/rag_test_manager.py | 28 ++++++++++++++++++++++----- level_3/vectordb/chunkers/chunkers.py | 2 +- level_3/vectordb/loaders/loaders.py | 6 ++++-- 5 files changed, 32 insertions(+), 10 deletions(-) diff --git a/level_3/Readme.md b/level_3/Readme.md index 3e13744a5..3d66b64f1 100644 --- a/level_3/Readme.md +++ b/level_3/Readme.md @@ -29,13 +29,15 @@ After that, you can run: ```docker compose up promethai_mem ``` +``` poetry shell ``` + Make sure to run ``` python scripts/create_database.py ``` After that, you can run: -``` python test_runner.py \ +``` python rag_test_manager.py \ --url "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf" \ --test_set "example_data/test_set.json" \ --user_id "666" \ diff --git a/level_3/example_data/metadata.json b/level_3/example_data/metadata.json index 1ad43755e..8df9036cc 100644 --- a/level_3/example_data/metadata.json +++ b/level_3/example_data/metadata.json @@ -9,5 +9,5 @@ "owner": "John Doe", "license": "MIT", "validity_start": "2023-08-01", - "validity_end": "2024-07-31", + "validity_end": "2024-07-31" } \ No newline at end of file diff --git a/level_3/rag_test_manager.py b/level_3/rag_test_manager.py index 168aba073..204b5f918 100644 --- a/level_3/rag_test_manager.py +++ b/level_3/rag_test_manager.py @@ -436,14 +436,32 @@ async def main(): args = parser.parse_args() - with open(args.test_set, "r") as file: - test_set = json.load(file) + try: + with open(args.test_set, "r") as file: + test_set = json.load(file) + if not isinstance(test_set, list): # Expecting a list + raise TypeError("Parsed test_set JSON is not a list.") + except Exception as e: + print(f"Error loading test_set: {str(e)}") + return - with open(args.metadata, "r") as file: - metadata = json.load(file) + try: + with open(args.metadata, "r") as file: + metadata = json.load(file) + if not isinstance(metadata, dict): + raise TypeError("Parsed metadata JSON is not a dictionary.") + except Exception as e: + print(f"Error loading metadata: {str(e)}") + return if args.params: - params = json.loads(args.params) + try: + params = json.loads(args.params) + if not isinstance(params, dict): + raise TypeError("Parsed params JSON is not a dictionary.") + except json.JSONDecodeError as e: + print(f"Error parsing params: {str(e)}") + return else: params = None diff --git a/level_3/vectordb/chunkers/chunkers.py b/level_3/vectordb/chunkers/chunkers.py index a02433a77..3019b2478 100644 --- a/level_3/vectordb/chunkers/chunkers.py +++ b/level_3/vectordb/chunkers/chunkers.py @@ -1,7 +1,7 @@ from langchain.document_loaders import PyPDFLoader import sys, os sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from level_3.shared.chunk_strategy import ChunkStrategy +from shared.chunk_strategy import ChunkStrategy import re def chunk_data(chunk_strategy=None, source_data=None, chunk_size=None, chunk_overlap=None): diff --git a/level_3/vectordb/loaders/loaders.py b/level_3/vectordb/loaders/loaders.py index 9ecf2e40a..ead2605be 100644 --- a/level_3/vectordb/loaders/loaders.py +++ b/level_3/vectordb/loaders/loaders.py @@ -1,8 +1,10 @@ from io import BytesIO import fitz -# sys.path.append(os.path.dirname(os.path.abspath(__file__))) +import os +import sys +sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from level_3.vectordb.chunkers.chunkers import chunk_data +from vectordb.chunkers.chunkers import chunk_data from llama_hub.file.base import SimpleDirectoryReader import requests