From d2e0a29d429df28bcb576e599bf2120ac4b02fd1 Mon Sep 17 00:00:00 2001 From: Vasilije <8619304+Vasilije1990@users.noreply.github.com> Date: Mon, 9 Oct 2023 20:44:25 +0200 Subject: [PATCH] Added embeddings as an option, added different types of text search --- level_3/Readme.md | 31 ++++++++-- level_3/example_data/metadata.json | 13 +++++ level_3/example_data/test_set.json | 22 +++++++ level_3/rag_test_manager.py | 94 ++++++++++++++++++------------ 4 files changed, 120 insertions(+), 40 deletions(-) create mode 100644 level_3/example_data/metadata.json create mode 100644 level_3/example_data/test_set.json diff --git a/level_3/Readme.md b/level_3/Readme.md index c7f4eadae..3e13744a5 100644 --- a/level_3/Readme.md +++ b/level_3/Readme.md @@ -5,14 +5,21 @@ ### Description -RAG test manager can be used via API (inprogress) or via the CLI +RAG test manager can be used via API (in progress) or via the CLI Make sure to run scripts/create_database.py +After that, you can run: + +``` python test_runner.py \ + --url "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf" \ + --test_set "path/to/test_set.json" \ + --user_id "666" \ + --metadata "path/to/metadata.json" +``` - -#How to use +#How to start ## Installation @@ -22,6 +29,22 @@ Make sure to run scripts/create_database.py ```docker compose up promethai_mem ``` +Make sure to run + +``` python scripts/create_database.py ``` + +After that, you can run: + +``` python test_runner.py \ + --url "https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf" \ + --test_set "example_data/test_set.json" \ + --user_id "666" \ + --metadata "example_data/metadata.json" + +``` + +To see example of test_set.json and metadata.json, check the files in the folder "example_data" + ## Clean database @@ -30,7 +53,7 @@ Make sure to run scripts/create_database.py ```docker volume prune ``` -docker compose up --force-recreate --build promethai_mem +``` docker compose up --force-recreate --build promethai_mem ``` ## Usage diff --git a/level_3/example_data/metadata.json b/level_3/example_data/metadata.json new file mode 100644 index 000000000..1ad43755e --- /dev/null +++ b/level_3/example_data/metadata.json @@ -0,0 +1,13 @@ +{ + "version": "1.0", + "agreement_id": "AG123456", + "privacy_policy": "https://example.com/privacy", + "terms_of_service": "https://example.com/terms", + "format": "json", + "schema_version": "1.1", + "checksum": "a1b2c3d4e5f6", + "owner": "John Doe", + "license": "MIT", + "validity_start": "2023-08-01", + "validity_end": "2024-07-31", + } \ No newline at end of file diff --git a/level_3/example_data/test_set.json b/level_3/example_data/test_set.json new file mode 100644 index 000000000..caf0e2e9f --- /dev/null +++ b/level_3/example_data/test_set.json @@ -0,0 +1,22 @@ + [ + { + "question": "Who is the main character in 'The Call of the Wild'?", + "answer": "Buck" + }, + { + "question": "Who wrote 'The Call of the Wild'?", + "answer": "Jack London" + }, + { + "question": "Where does Buck live at the start of the book?", + "answer": "In the Santa Clara Valley, at Judge Miller’s place." + }, + { + "question": "Why is Buck kidnapped?", + "answer": "He is kidnapped to be sold as a sled dog in the Yukon during the Klondike Gold Rush." + }, + { + "question": "How does Buck become the leader of the sled dog team?", + "answer": "Buck becomes the leader after defeating the original leader, Spitz, in a fight." + } + ] \ No newline at end of file diff --git a/level_3/rag_test_manager.py b/level_3/rag_test_manager.py index 410c8c8d4..168aba073 100644 --- a/level_3/rag_test_manager.py +++ b/level_3/rag_test_manager.py @@ -1,3 +1,5 @@ +import argparse +import json from enum import Enum import sys import os @@ -386,46 +388,66 @@ async def start_test(data, test_set=None, user_id=None, params=None, job_id=None add_entity(session, TestOutput(id=test_id, user_id=user_id, test_results=str(test_result_collection))) async def main(): + # + # params = { + # "version": "1.0", + # "agreement_id": "AG123456", + # "privacy_policy": "https://example.com/privacy", + # "terms_of_service": "https://example.com/terms", + # "format": "json", + # "schema_version": "1.1", + # "checksum": "a1b2c3d4e5f6", + # "owner": "John Doe", + # "license": "MIT", + # "validity_start": "2023-08-01", + # "validity_end": "2024-07-31", + # } + # + # test_set = [ + # { + # "question": "Who is the main character in 'The Call of the Wild'?", + # "answer": "Buck" + # }, + # { + # "question": "Who wrote 'The Call of the Wild'?", + # "answer": "Jack London" + # }, + # { + # "question": "Where does Buck live at the start of the book?", + # "answer": "In the Santa Clara Valley, at Judge Miller’s place." + # }, + # { + # "question": "Why is Buck kidnapped?", + # "answer": "He is kidnapped to be sold as a sled dog in the Yukon during the Klondike Gold Rush." + # }, + # { + # "question": "How does Buck become the leader of the sled dog team?", + # "answer": "Buck becomes the leader after defeating the original leader, Spitz, in a fight." + # } + # ] + # result = await start_test("https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf", test_set=test_set, user_id="666", params=None, metadata=params) + # + parser = argparse.ArgumentParser(description="Run tests against a document.") + parser.add_argument("--url", required=True, help="URL of the document to test.") + parser.add_argument("--test_set", required=True, help="Path to JSON file containing the test set.") + parser.add_argument("--user_id", required=True, help="User ID.") + parser.add_argument("--params", help="Additional parameters in JSON format.") + parser.add_argument("--metadata", required=True, help="Path to JSON file containing metadata.") - params = { - "version": "1.0", - "agreement_id": "AG123456", - "privacy_policy": "https://example.com/privacy", - "terms_of_service": "https://example.com/terms", - "format": "json", - "schema_version": "1.1", - "checksum": "a1b2c3d4e5f6", - "owner": "John Doe", - "license": "MIT", - "validity_start": "2023-08-01", - "validity_end": "2024-07-31", - } + args = parser.parse_args() - test_set = [ - { - "question": "Who is the main character in 'The Call of the Wild'?", - "answer": "Buck" - }, - { - "question": "Who wrote 'The Call of the Wild'?", - "answer": "Jack London" - }, - { - "question": "Where does Buck live at the start of the book?", - "answer": "In the Santa Clara Valley, at Judge Miller’s place." - }, - { - "question": "Why is Buck kidnapped?", - "answer": "He is kidnapped to be sold as a sled dog in the Yukon during the Klondike Gold Rush." - }, - { - "question": "How does Buck become the leader of the sled dog team?", - "answer": "Buck becomes the leader after defeating the original leader, Spitz, in a fight." - } - ] - result = await start_test("https://www.ibiblio.org/ebooks/London/Call%20of%20Wild.pdf", test_set=test_set, user_id="666", params=None, metadata=params) + with open(args.test_set, "r") as file: + test_set = json.load(file) + with open(args.metadata, "r") as file: + metadata = json.load(file) + if args.params: + params = json.loads(args.params) + else: + params = None + + await start_test(args.url, test_set, args.user_id, params, metadata) if __name__ == "__main__": import asyncio