Merge remote-tracking branch 'origin/dev' into feat/COG-1060-code-pipeline-endpoints

This commit is contained in:
Boris Arzentar 2025-01-28 10:10:38 +01:00
commit f811ab44e0
7 changed files with 1791 additions and 757 deletions

24
.github/workflows/clean_stale_pr.yaml vendored Normal file
View file

@ -0,0 +1,24 @@
name: clean | remove stale PRs
on:
# Run this action periodically (daily at 0:00 UTC in this example).
schedule:
- cron: "0 0 * * *"
# Optionally, also run when pull requests are labeled, unlabeled, synchronized, or reopened
# to update the stale timer as needed. Uncomment if desired.
# pull_request:
# types: [labeled, unlabeled, synchronize, reopened]
jobs:
stale:
runs-on: ubuntu-latest
steps:
- name: Mark and Close Stale
uses: actions/stale@v6
with:
# Number of days of inactivity before the pull request is marked stale
days-before-stale: 60
# Number of days of inactivity after being marked stale before the pull request is closed
days-before-close: 7
# Comment to post when marking as stale
stale-pr-message: "This pull request has been automatically marke

View file

@ -12,7 +12,11 @@ We build for developers who need a reliable, production-ready data layer for AI
## What is cognee?
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
Cognee merges graph and vector databases to uncover hidden relationships and new patterns in your data. You can automatically model, load and retrieve entities and objects representing your business domain and analyze their relationships, uncovering insights that neither vector stores nor graph stores alone can provide. Learn more about use-cases [here](https://docs.cognee.ai/use_cases)
Try it in a Google Colab <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a> or have a look at our <a href="https://docs.cognee.ai">documentation</a>
If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community

View file

@ -14,9 +14,9 @@ from ...relational.ModelBase import Base
from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
from ..embeddings.EmbeddingEngine import EmbeddingEngine
from ..models.ScoredResult import ScoredResult
from ..utils import normalize_distances
from ..vector_db_interface import VectorDBInterface
from .serialize_data import serialize_data
from ..utils import normalize_distances
class IndexSchema(DataPoint):
@ -247,12 +247,22 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
# Extract distances and find min/max for normalization
for vector in closest_items:
# TODO: Add normalization of similarity score
vector_list.append(vector)
vector_list.append(
{
"id": UUID(str(vector.id)),
"payload": vector.payload,
"_distance": vector.similarity,
}
)
# Normalize vector distance and add this as score information to vector_list
normalized_values = normalize_distances(vector_list)
for i in range(0, len(normalized_values)):
vector_list[i]["score"] = normalized_values[i]
# Create and return ScoredResult objects
return [
ScoredResult(id=UUID(str(row.id)), payload=row.payload, score=row.similarity)
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score"))
for row in vector_list
]

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load diff

@ -0,0 +1 @@
Subproject commit 130b84db9270734756d16918e5c86034777140fc

View file

@ -618,76 +618,339 @@
"cell_type": "markdown",
"id": "e519e30c0423c2a",
"metadata": {},
"source": "## Let's add evals"
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b22ae3d868fa5606",
"metadata": {
"ExecuteTime": {
"end_time": "2024-12-19T18:01:11.387716Z",
"start_time": "2024-12-19T18:01:11.278042Z"
}
},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'deepeval'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m eval_on_hotpotQA\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m answer_with_cognee\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m answer_without_cognee\n",
"File \u001b[0;32m~/cognee/evals/eval_on_hotpot.py:7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mstatistics\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdeepeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwget\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeepeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EvaluationDataset\n",
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'deepeval'"
]
}
],
"source": [
"from evals.eval_on_hotpot import eval_on_hotpotQA\n",
"from evals.eval_on_hotpot import answer_with_cognee\n",
"from evals.eval_on_hotpot import answer_without_cognee\n",
"from evals.eval_on_hotpot import eval_answers\n",
"from cognee.base_config import get_base_config\n",
"from pathlib import Path\n",
"from tqdm import tqdm\n",
"import wget\n",
"import json\n",
"import statistics"
"## Let's add evals"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "728355d390e3a01b",
"id": "3845443e",
"metadata": {},
"outputs": [],
"source": [
"!pip install \"cognee[deepeval]\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a2c3c70",
"metadata": {},
"outputs": [],
"source": [
"from evals.eval_on_hotpot import deepeval_answers, answer_qa_instance\n",
"from evals.qa_dataset_utils import load_qa_dataset\n",
"from evals.qa_metrics_utils import get_metrics\n",
"from evals.qa_context_provider_utils import qa_context_providers\n",
"from pathlib import Path\n",
"from tqdm import tqdm\n",
"import statistics\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "53a609d8",
"metadata": {},
"outputs": [],
"source": [
"answer_provider = answer_with_cognee # For native LLM answers use answer_without_cognee\n",
"num_samples = 10 # With cognee, it takes ~1m10s per sample\n",
"dataset_name_or_filename = \"hotpotqa\"\n",
"dataset = load_qa_dataset(dataset_name_or_filename)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7351ab8f",
"metadata": {},
"outputs": [],
"source": [
"context_provider_name = \"cognee\"\n",
"context_provider = qa_context_providers[context_provider_name]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9346115b",
"metadata": {},
"outputs": [],
"source": [
"random.seed(42)\n",
"instances = dataset if not num_samples else random.sample(dataset, num_samples)\n",
"\n",
"base_config = get_base_config()\n",
"data_root_dir = base_config.data_root_directory\n",
"out_path = \"out\" \n",
"if not Path(out_path).exists():\n",
" Path(out_path).mkdir()\n",
"contexts_filename = out_path / Path(\n",
" f\"contexts_{dataset_name_or_filename.split('.')[0]}_{context_provider_name}.json\"\n",
" )\n",
"\n",
"if not Path(data_root_dir).exists():\n",
" Path(data_root_dir).mkdir()\n",
"\n",
"filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n",
"if not filepath.exists():\n",
" url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n",
" wget.download(url, out=data_root_dir)\n",
"\n",
"with open(filepath, \"r\") as file:\n",
" dataset = json.load(file)\n",
"\n",
"instances = dataset if not num_samples else dataset[:num_samples]\n",
"answers = []\n",
"for instance in tqdm(instances, desc=\"Getting answers\"):\n",
" answer = answer_provider(instance)\n",
" answer = await answer_qa_instance(instance, context_provider, contexts_filename)\n",
" answers.append(answer)"
]
},
{
"cell_type": "markdown",
"id": "1e7d872d",
"metadata": {},
"source": [
"#### Define Metrics for Evaluation and Calculate Score\n",
"**Options**: \n",
"- **Correctness**: Is the actual output factually correct based on the expected output?\n",
"- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?\n",
"- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?\n",
"- **Empowerment**: How well does the answer help the reader understand and make informed judgements about the topic?\n",
"- **Directness**: How specifically and clearly does the answer address the question?\n",
"- **F1 Score**: the harmonic mean of the precision and recall, using word-level Exact Match\n",
"- **EM Score**: the rate at which the predicted strings exactly match their references, ignoring white spaces and capitalization."
]
},
{
"cell_type": "markdown",
"id": "c81e2b46",
"metadata": {},
"source": [
"##### Calculate `\"Correctness\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ae728344",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Correctness\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "764aac6d",
"metadata": {},
"outputs": [],
"source": [
"Correctness = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Correctness)"
]
},
{
"cell_type": "markdown",
"id": "6d3bbdc5",
"metadata": {},
"source": [
"##### Calculating `\"Comprehensiveness\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9793ef78",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Comprehensiveness\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9add448a",
"metadata": {},
"outputs": [],
"source": [
"Comprehensiveness = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Comprehensiveness)"
]
},
{
"cell_type": "markdown",
"id": "bce2fa25",
"metadata": {},
"source": [
"##### Calculating `\"Diversity\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f60a179e",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Diversity\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7ccbd0ab",
"metadata": {},
"outputs": [],
"source": [
"Diversity = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Diversity)"
]
},
{
"cell_type": "markdown",
"id": "191cab63",
"metadata": {},
"source": [
"##### Calculating`\"Empowerment\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66bec0bf",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Empowerment\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1b043a8f",
"metadata": {},
"outputs": [],
"source": [
"Empowerment = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Empowerment)"
]
},
{
"cell_type": "markdown",
"id": "2cac3be9",
"metadata": {},
"source": [
"##### Calculating `\"Directness\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "adaa17c0",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"Directness\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a8f97c9",
"metadata": {},
"outputs": [],
"source": [
"Directness = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(Directness)"
]
},
{
"cell_type": "markdown",
"id": "1ad6feb8",
"metadata": {},
"source": [
"##### Calculating `\"F1\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bdc48259",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"F1\"]\n",
"eval_metrics = get_metrics(metric_name_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c43c17c8",
"metadata": {},
"outputs": [],
"source": [
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bfcc46d",
"metadata": {},
"outputs": [],
"source": [
"F1_score = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(F1_score)"
]
},
{
"cell_type": "markdown",
"id": "2583f948",
"metadata": {},
"source": [
"##### Calculating `\"EM\"`"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90a8f630",
"metadata": {},
"outputs": [],
"source": [
"metric_name_list = [\"EM\"]\n",
"eval_metrics = get_metrics(metric_name_list)\n",
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d1b1ea1",
"metadata": {},
"outputs": [],
"source": [
"EM = statistics.mean(\n",
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
")\n",
"print(EM)"
]
},
{
"cell_type": "markdown",
"id": "288ab570",
@ -700,7 +963,7 @@
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"display_name": "cognee-c83GrcRT-py3.11",
"language": "python",
"name": "python3"
},
@ -714,7 +977,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.11.10"
}
},
"nbformat": 4,