diff --git a/.github/workflows/cd.yaml b/.github/workflows/cd.yaml index 19b3d6cb3..ec4b56300 100644 --- a/.github/workflows/cd.yaml +++ b/.github/workflows/cd.yaml @@ -17,7 +17,7 @@ jobs: publish_docker_to_ecr: name: Publish Cognee Docker image - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 permissions: id-token: write contents: read diff --git a/.github/workflows/cd_prd.yaml b/.github/workflows/cd_prd.yaml index fc40d8884..15c046215 100644 --- a/.github/workflows/cd_prd.yaml +++ b/.github/workflows/cd_prd.yaml @@ -17,7 +17,7 @@ jobs: publish_docker_to_ecr: name: Publish Docker PromethAI image - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 permissions: id-token: write contents: read diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f9acd1be0..7d63aa88d 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,7 +9,7 @@ jobs: build_docker: name: Build Cognee Backend Docker App Image - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Check out Cognee code uses: actions/checkout@v3 diff --git a/.github/workflows/community_greetings.yml b/.github/workflows/community_greetings.yml index b480ed111..a43f7f549 100644 --- a/.github/workflows/community_greetings.yml +++ b/.github/workflows/community_greetings.yml @@ -4,7 +4,7 @@ on: [pull_request, issues] jobs: greeting: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/first-interaction@v1 with: diff --git a/.github/workflows/docker_compose.yml b/.github/workflows/docker_compose.yml index 657f0c8c6..321cbd045 100644 --- a/.github/workflows/docker_compose.yml +++ b/.github/workflows/docker_compose.yml @@ -12,7 +12,7 @@ on: jobs: docker-compose-test: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Checkout repository diff --git a/.github/workflows/profiling.yaml b/.github/workflows/profiling.yaml index 1ce3e975a..2408a8f70 100644 --- a/.github/workflows/profiling.yaml +++ b/.github/workflows/profiling.yaml @@ -7,7 +7,7 @@ on: jobs: profiler: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: # Checkout the code from the repository with full history diff --git a/.github/workflows/release_discord_action.yml b/.github/workflows/release_discord_action.yml index 7b3ae845d..f3113ccb7 100644 --- a/.github/workflows/release_discord_action.yml +++ b/.github/workflows/release_discord_action.yml @@ -6,7 +6,7 @@ on: jobs: github-releases-to-discord: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - name: Checkout uses: actions/checkout@v3 diff --git a/.github/workflows/reusable_notebook.yml b/.github/workflows/reusable_notebook.yml index 6380466b7..9bc09c3a6 100644 --- a/.github/workflows/reusable_notebook.yml +++ b/.github/workflows/reusable_notebook.yml @@ -22,7 +22,7 @@ jobs: run_notebook_test: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 defaults: run: shell: bash diff --git a/.github/workflows/reusable_python_example.yml b/.github/workflows/reusable_python_example.yml index ab28b76af..4aa4aaba6 100644 --- a/.github/workflows/reusable_python_example.yml +++ b/.github/workflows/reusable_python_example.yml @@ -22,7 +22,7 @@ jobs: run_notebook_test: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 defaults: run: shell: bash diff --git a/.github/workflows/test_milvus.yml b/.github/workflows/test_milvus.yml index de38b2f98..51e5f0982 100644 --- a/.github/workflows/test_milvus.yml +++ b/.github/workflows/test_milvus.yml @@ -17,7 +17,7 @@ jobs: run_milvus: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: fail-fast: false defaults: diff --git a/.github/workflows/test_neo4j.yml b/.github/workflows/test_neo4j.yml index 1cbfcffd3..e1d71dcfd 100644 --- a/.github/workflows/test_neo4j.yml +++ b/.github/workflows/test_neo4j.yml @@ -15,7 +15,7 @@ env: jobs: run_neo4j_integration_test: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 defaults: run: diff --git a/.github/workflows/test_pgvector.yml b/.github/workflows/test_pgvector.yml index 083ca8706..d5356d603 100644 --- a/.github/workflows/test_pgvector.yml +++ b/.github/workflows/test_pgvector.yml @@ -17,7 +17,7 @@ jobs: run_pgvector_integration_test: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 defaults: run: shell: bash diff --git a/.github/workflows/test_python_3_10.yml b/.github/workflows/test_python_3_10.yml index 585a58d59..2cf620a17 100644 --- a/.github/workflows/test_python_3_10.yml +++ b/.github/workflows/test_python_3_10.yml @@ -16,7 +16,7 @@ env: jobs: run_common: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: fail-fast: false defaults: diff --git a/.github/workflows/test_python_3_11.yml b/.github/workflows/test_python_3_11.yml index 979a86937..b119dbcb2 100644 --- a/.github/workflows/test_python_3_11.yml +++ b/.github/workflows/test_python_3_11.yml @@ -17,7 +17,7 @@ jobs: run_common: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: fail-fast: false defaults: diff --git a/.github/workflows/test_python_3_12.yml b/.github/workflows/test_python_3_12.yml index 2a1631411..5a032144a 100644 --- a/.github/workflows/test_python_3_12.yml +++ b/.github/workflows/test_python_3_12.yml @@ -17,7 +17,7 @@ jobs: run_common: name: test - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 strategy: fail-fast: false defaults: diff --git a/evals/eval_on_hotpot.py b/evals/eval_on_hotpot.py index 5a95ea05d..ee2435e6b 100644 --- a/evals/eval_on_hotpot.py +++ b/evals/eval_on_hotpot.py @@ -1,11 +1,7 @@ import argparse import asyncio -import json import statistics -from pathlib import Path - import deepeval.metrics -import wget from deepeval.dataset import EvaluationDataset from deepeval.test_case import LLMTestCase from tqdm import tqdm @@ -13,9 +9,9 @@ from tqdm import tqdm import cognee import evals.deepeval_metrics from cognee.api.v1.search import SearchType -from cognee.base_config import get_base_config from cognee.infrastructure.llm.get_llm_client import get_llm_client from cognee.infrastructure.llm.prompts import read_query_prompt, render_prompt +from evals.qa_dataset_utils import load_qa_dataset async def answer_without_cognee(instance): @@ -40,12 +36,8 @@ async def answer_with_cognee(instance): await cognee.prune.prune_system(metadata=True) for title, sentences in instance["context"]: - await cognee.add("\n".join(sentences), dataset_name="HotPotQA") - - for n in range(1, 4): - print(n) - - await cognee.cognify("HotPotQA") + await cognee.add("\n".join(sentences), dataset_name="QA") + await cognee.cognify("QA") search_results = await cognee.search(SearchType.INSIGHTS, query_text=instance["question"]) search_results_second = await cognee.search( @@ -85,20 +77,10 @@ async def eval_answers(instances, answers, eval_metric): return eval_results -async def eval_on_hotpotQA(answer_provider, num_samples, eval_metric): - base_config = get_base_config() - data_root_dir = base_config.data_root_directory - - if not Path(data_root_dir).exists(): - Path(data_root_dir).mkdir() - - filepath = data_root_dir / Path("hotpot_dev_fullwiki_v1.json") - if not filepath.exists(): - url = "http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json" - wget.download(url, out=data_root_dir) - - with open(filepath, "r") as file: - dataset = json.load(file) +async def eval_on_QA_dataset( + dataset_name_or_filename: str, answer_provider, num_samples, eval_metric +): + dataset = load_qa_dataset(dataset_name_or_filename) instances = dataset if not num_samples else dataset[:num_samples] answers = [] @@ -117,6 +99,7 @@ async def eval_on_hotpotQA(answer_provider, num_samples, eval_metric): if __name__ == "__main__": parser = argparse.ArgumentParser() + parser.add_argument("--dataset", type=str, help="Which dataset to evaluate on") parser.add_argument("--with_cognee", action="store_true") parser.add_argument("--num_samples", type=int, default=500) parser.add_argument( @@ -142,5 +125,7 @@ if __name__ == "__main__": else: answer_provider = answer_without_cognee - avg_score = asyncio.run(eval_on_hotpotQA(answer_provider, args.num_samples, metric)) + avg_score = asyncio.run( + eval_on_QA_dataset(args.dataset, answer_provider, args.num_samples, metric) + ) print(f"Average {args.metric}: {avg_score}") diff --git a/evals/qa_dataset_utils.py b/evals/qa_dataset_utils.py new file mode 100644 index 000000000..c570455c4 --- /dev/null +++ b/evals/qa_dataset_utils.py @@ -0,0 +1,82 @@ +from cognee.root_dir import get_absolute_path +import json +import requests +from jsonschema import ValidationError, validate +from pathlib import Path + + +qa_datasets = { + "hotpotqa": { + "filename": "hotpot_dev_fullwiki_v1.json", + "URL": "http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json", + }, + "2wikimultihop": { + "filename": "data/dev.json", + "URL": "https://www.dropbox.com/scl/fi/heid2pkiswhfaqr5g0piw/data.zip?rlkey=ira57daau8lxfj022xvk1irju&e=1", + }, +} + +qa_json_schema = { + "type": "array", + "items": { + "type": "object", + "properties": { + "answer": {"type": "string"}, + "question": {"type": "string"}, + "context": {"type": "array"}, + }, + "required": ["answer", "question", "context"], + "additionalProperties": True, + }, +} + + +def download_qa_dataset(dataset_name: str, filepath: Path): + if dataset_name not in qa_datasets: + raise ValueError(f"{dataset_name} is not a supported dataset.") + + url = qa_datasets[dataset_name]["URL"] + + if dataset_name == "2wikimultihop": + raise Exception( + "Please download 2wikimultihop dataset (data.zip) manually from \ + https://www.dropbox.com/scl/fi/heid2pkiswhfaqr5g0piw/data.zip?rlkey=ira57daau8lxfj022xvk1irju&e=1 \ + and unzip it." + ) + + response = requests.get(url, stream=True) + + if response.status_code == 200: + with open(filepath, "wb") as file: + for chunk in response.iter_content(chunk_size=8192): + file.write(chunk) + print(f"Dataset {dataset_name} downloaded and saved to {filepath}") + else: + print(f"Failed to download {dataset_name}. Status code: {response.status_code}") + + +def load_qa_dataset(dataset_name_or_filename: str): + if dataset_name_or_filename in qa_datasets: + dataset_name = dataset_name_or_filename + filename = qa_datasets[dataset_name]["filename"] + + data_root_dir = get_absolute_path("../.data") + if not Path(data_root_dir).exists(): + Path(data_root_dir).mkdir() + + filepath = data_root_dir / Path(filename) + if not filepath.exists(): + download_qa_dataset(dataset_name, filepath) + else: + filename = dataset_name_or_filename + filepath = Path(filename) + + with open(filepath, "r") as file: + dataset = json.load(file) + + try: + validate(instance=dataset, schema=qa_json_schema) + except ValidationError as e: + print("File is not a valid QA dataset:", e.message) + + return dataset diff --git a/pyproject.toml b/pyproject.toml index 5a0e83057..446e807de 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cognee" -version = "0.1.21" +version = "0.1.22" description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning." authors = ["Vasilije Markovic", "Boris Arzentar"] readme = "README.md"