Merge remote-tracking branch 'origin/dev' into feat/COG-1060-code-pipeline-endpoints

2025-01-28 10:10:38 +01:00 · 2025-01-28 10:10:38 +01:00 · f811ab44e0
commit f811ab44e0
parent 3320bc8f2c d8bde5461a
7 changed files with 1791 additions and 757 deletions
--- a/.github/workflows/clean_stale_pr.yaml
+++ b/.github/workflows/clean_stale_pr.yaml
@ -0,0 +1,24 @@
+name: clean | remove stale PRs
+
+on:
+  # Run this action periodically (daily at 0:00 UTC in this example).
+  schedule:
+    - cron: "0 0 * * *"
+  # Optionally, also run when pull requests are labeled, unlabeled, synchronized, or reopened
+  # to update the stale timer as needed. Uncomment if desired.
+  # pull_request:
+  #   types: [labeled, unlabeled, synchronize, reopened]
+
+jobs:
+  stale:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Mark and Close Stale
+        uses: actions/stale@v6
+        with:
+          # Number of days of inactivity before the pull request is marked stale
+          days-before-stale: 60
+          # Number of days of inactivity after being marked stale before the pull request is closed
+          days-before-close: 7
+          # Comment to post when marking as stale
+          stale-pr-message: "This pull request has been automatically marke
--- a/README.md
+++ b/README.md
@ -12,7 +12,11 @@ We build for developers who need a reliable, production-ready data layer for AI

 ## What is cognee?

-Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
+Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost. 
+
+Cognee merges graph and vector databases to uncover hidden relationships and new patterns in your data. You can automatically model, load and retrieve entities and objects representing your business domain and analyze their relationships, uncovering insights that neither vector stores nor graph stores alone can provide. Learn more about use-cases [here](https://docs.cognee.ai/use_cases)
+
+
 Try it in a Google Colab  <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a>  or have a look at our <a href="https://docs.cognee.ai">documentation</a>

 If you have questions, join our  <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community
--- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
+++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py
@ -14,9 +14,9 @@ from ...relational.ModelBase import Base
 from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
 from ..embeddings.EmbeddingEngine import EmbeddingEngine
 from ..models.ScoredResult import ScoredResult
-from ..utils import normalize_distances
 from ..vector_db_interface import VectorDBInterface
 from .serialize_data import serialize_data
+from ..utils import normalize_distances


 class IndexSchema(DataPoint):
@ -247,12 +247,22 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):

        # Extract distances and find min/max for normalization
        for vector in closest_items:
-            # TODO: Add normalization of similarity score
-            vector_list.append(vector)
+            vector_list.append(
+                {
+                    "id": UUID(str(vector.id)),
+                    "payload": vector.payload,
+                    "_distance": vector.similarity,
+                }
+            )
+
+        # Normalize vector distance and add this as score information to vector_list
+        normalized_values = normalize_distances(vector_list)
+        for i in range(0, len(normalized_values)):
+            vector_list[i]["score"] = normalized_values[i]

        # Create and return ScoredResult objects
        return [
-            ScoredResult(id=UUID(str(row.id)), payload=row.payload, score=row.similarity)
+            ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score"))
            for row in vector_list
        ]

--- a/notebooks/cognee_demo.ipynb
+++ b/notebooks/cognee_demo.ipynb
--- a/notebooks/cognee_hotpot_eval.ipynb
+++ b/notebooks/cognee_hotpot_eval.ipynb
--- a/notebooks/data/graphrag
+++ b/notebooks/data/graphrag
@ -0,0 +1 @@
+Subproject commit 130b84db9270734756d16918e5c86034777140fc
--- a/notebooks/hr_demo.ipynb
+++ b/notebooks/hr_demo.ipynb
@ -618,76 +618,339 @@
   "cell_type": "markdown",
   "id": "e519e30c0423c2a",
   "metadata": {},
-   "source": "## Let's add evals"
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "b22ae3d868fa5606",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2024-12-19T18:01:11.387716Z",
-     "start_time": "2024-12-19T18:01:11.278042Z"
-    }
-   },
-   "outputs": [
-    {
-     "ename": "ModuleNotFoundError",
-     "evalue": "No module named 'deepeval'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m eval_on_hotpotQA\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m answer_with_cognee\n\u001b[1;32m      3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m answer_without_cognee\n",
-      "File \u001b[0;32m~/cognee/evals/eval_on_hotpot.py:7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mstatistics\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdeepeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\n\u001b[1;32m      8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwget\u001b[39;00m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeepeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EvaluationDataset\n",
-      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'deepeval'"
-     ]
-    }
-   ],
   "source": [
-    "from evals.eval_on_hotpot import eval_on_hotpotQA\n",
-    "from evals.eval_on_hotpot import answer_with_cognee\n",
-    "from evals.eval_on_hotpot import answer_without_cognee\n",
-    "from evals.eval_on_hotpot import eval_answers\n",
-    "from cognee.base_config import get_base_config\n",
-    "from pathlib import Path\n",
-    "from tqdm import tqdm\n",
-    "import wget\n",
-    "import json\n",
-    "import statistics"
+    "## Let's add evals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "728355d390e3a01b",
+   "id": "3845443e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install \"cognee[deepeval]\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7a2c3c70",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from evals.eval_on_hotpot import deepeval_answers, answer_qa_instance\n",
+    "from evals.qa_dataset_utils import load_qa_dataset\n",
+    "from evals.qa_metrics_utils import get_metrics\n",
+    "from evals.qa_context_provider_utils import qa_context_providers\n",
+    "from pathlib import Path\n",
+    "from tqdm import tqdm\n",
+    "import statistics\n",
+    "import random"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53a609d8",
   "metadata": {},
   "outputs": [],
   "source": [
-    "answer_provider = answer_with_cognee  # For native LLM answers use answer_without_cognee\n",
    "num_samples = 10  # With cognee, it takes ~1m10s per sample\n",
+    "dataset_name_or_filename = \"hotpotqa\"\n",
+    "dataset = load_qa_dataset(dataset_name_or_filename)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7351ab8f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context_provider_name = \"cognee\"\n",
+    "context_provider = qa_context_providers[context_provider_name]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9346115b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "random.seed(42)\n",
+    "instances = dataset if not num_samples else random.sample(dataset, num_samples)\n",
    "\n",
-    "base_config = get_base_config()\n",
-    "data_root_dir = base_config.data_root_directory\n",
+    "out_path = \"out\" \n",
+    "if not Path(out_path).exists():\n",
+    "    Path(out_path).mkdir()\n",
+    "contexts_filename = out_path / Path(\n",
+    "        f\"contexts_{dataset_name_or_filename.split('.')[0]}_{context_provider_name}.json\"\n",
+    "    )\n",
    "\n",
-    "if not Path(data_root_dir).exists():\n",
-    "    Path(data_root_dir).mkdir()\n",
-    "\n",
-    "filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n",
-    "if not filepath.exists():\n",
-    "    url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n",
-    "    wget.download(url, out=data_root_dir)\n",
-    "\n",
-    "with open(filepath, \"r\") as file:\n",
-    "    dataset = json.load(file)\n",
-    "\n",
-    "instances = dataset if not num_samples else dataset[:num_samples]\n",
    "answers = []\n",
    "for instance in tqdm(instances, desc=\"Getting answers\"):\n",
-    "    answer = answer_provider(instance)\n",
+    "    answer = await answer_qa_instance(instance, context_provider, contexts_filename)\n",
    "    answers.append(answer)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "1e7d872d",
+   "metadata": {},
+   "source": [
+    "#### Define Metrics for Evaluation and Calculate Score\n",
+    "**Options**: \n",
+    "- **Correctness**: Is the actual output factually correct based on the expected output?\n",
+    "- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?\n",
+    "- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?\n",
+    "- **Empowerment**: How well does the answer help the reader understand and make informed judgements about the topic?\n",
+    "- **Directness**: How specifically and clearly does the answer address the question?\n",
+    "- **F1 Score**: the harmonic mean of the precision and recall, using word-level Exact Match\n",
+    "- **EM Score**: the rate at which the predicted strings exactly match their references, ignoring white spaces and capitalization."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c81e2b46",
+   "metadata": {},
+   "source": [
+    "##### Calculate `\"Correctness\"`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ae728344",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric_name_list = [\"Correctness\"]\n",
+    "eval_metrics = get_metrics(metric_name_list)\n",
+    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "764aac6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Correctness = statistics.mean(\n",
+    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
+    ")\n",
+    "print(Correctness)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6d3bbdc5",
+   "metadata": {},
+   "source": [
+    "##### Calculating `\"Comprehensiveness\"`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9793ef78",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric_name_list = [\"Comprehensiveness\"]\n",
+    "eval_metrics = get_metrics(metric_name_list)\n",
+    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9add448a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Comprehensiveness = statistics.mean(\n",
+    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
+    ")\n",
+    "print(Comprehensiveness)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bce2fa25",
+   "metadata": {},
+   "source": [
+    "##### Calculating `\"Diversity\"`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f60a179e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric_name_list = [\"Diversity\"]\n",
+    "eval_metrics = get_metrics(metric_name_list)\n",
+    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ccbd0ab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Diversity = statistics.mean(\n",
+    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
+    ")\n",
+    "print(Diversity)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "191cab63",
+   "metadata": {},
+   "source": [
+    "##### Calculating`\"Empowerment\"`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "66bec0bf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric_name_list = [\"Empowerment\"]\n",
+    "eval_metrics = get_metrics(metric_name_list)\n",
+    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b043a8f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Empowerment = statistics.mean(\n",
+    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
+    ")\n",
+    "print(Empowerment)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2cac3be9",
+   "metadata": {},
+   "source": [
+    "##### Calculating `\"Directness\"`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adaa17c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric_name_list = [\"Directness\"]\n",
+    "eval_metrics = get_metrics(metric_name_list)\n",
+    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a8f97c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Directness = statistics.mean(\n",
+    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
+    ")\n",
+    "print(Directness)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1ad6feb8",
+   "metadata": {},
+   "source": [
+    "##### Calculating `\"F1\"`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bdc48259",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric_name_list = [\"F1\"]\n",
+    "eval_metrics = get_metrics(metric_name_list)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c43c17c8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8bfcc46d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "F1_score = statistics.mean(\n",
+    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
+    ")\n",
+    "print(F1_score)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2583f948",
+   "metadata": {},
+   "source": [
+    "##### Calculating `\"EM\"`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "90a8f630",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metric_name_list = [\"EM\"]\n",
+    "eval_metrics = get_metrics(metric_name_list)\n",
+    "eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"])    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8d1b1ea1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "EM = statistics.mean(\n",
+    "    [result.metrics_data[0].score for result in eval_results.test_results]\n",
+    ")\n",
+    "print(EM)"
+   ]
+  },
  {
   "cell_type": "markdown",
   "id": "288ab570",
@ -700,7 +963,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": ".venv",
+   "display_name": "cognee-c83GrcRT-py3.11",
   "language": "python",
   "name": "python3"
  },
@ -714,7 +977,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
+   "version": "3.11.10"
  }
 },
 "nbformat": 4,
				`@ -0,0 +1 @@`
				`Subproject commit 130b84db9270734756d16918e5c86034777140fc`