Merge remote-tracking branch 'origin/dev' into feat/COG-1060-code-pipeline-endpoints
This commit is contained in:
commit
f811ab44e0
7 changed files with 1791 additions and 757 deletions
24
.github/workflows/clean_stale_pr.yaml
vendored
Normal file
24
.github/workflows/clean_stale_pr.yaml
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
name: clean | remove stale PRs
|
||||
|
||||
on:
|
||||
# Run this action periodically (daily at 0:00 UTC in this example).
|
||||
schedule:
|
||||
- cron: "0 0 * * *"
|
||||
# Optionally, also run when pull requests are labeled, unlabeled, synchronized, or reopened
|
||||
# to update the stale timer as needed. Uncomment if desired.
|
||||
# pull_request:
|
||||
# types: [labeled, unlabeled, synchronize, reopened]
|
||||
|
||||
jobs:
|
||||
stale:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Mark and Close Stale
|
||||
uses: actions/stale@v6
|
||||
with:
|
||||
# Number of days of inactivity before the pull request is marked stale
|
||||
days-before-stale: 60
|
||||
# Number of days of inactivity after being marked stale before the pull request is closed
|
||||
days-before-close: 7
|
||||
# Comment to post when marking as stale
|
||||
stale-pr-message: "This pull request has been automatically marke
|
||||
|
|
@ -12,7 +12,11 @@ We build for developers who need a reliable, production-ready data layer for AI
|
|||
|
||||
## What is cognee?
|
||||
|
||||
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
|
||||
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
|
||||
|
||||
Cognee merges graph and vector databases to uncover hidden relationships and new patterns in your data. You can automatically model, load and retrieve entities and objects representing your business domain and analyze their relationships, uncovering insights that neither vector stores nor graph stores alone can provide. Learn more about use-cases [here](https://docs.cognee.ai/use_cases)
|
||||
|
||||
|
||||
Try it in a Google Colab <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a> or have a look at our <a href="https://docs.cognee.ai">documentation</a>
|
||||
|
||||
If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community
|
||||
|
|
|
|||
|
|
@ -14,9 +14,9 @@ from ...relational.ModelBase import Base
|
|||
from ...relational.sqlalchemy.SqlAlchemyAdapter import SQLAlchemyAdapter
|
||||
from ..embeddings.EmbeddingEngine import EmbeddingEngine
|
||||
from ..models.ScoredResult import ScoredResult
|
||||
from ..utils import normalize_distances
|
||||
from ..vector_db_interface import VectorDBInterface
|
||||
from .serialize_data import serialize_data
|
||||
from ..utils import normalize_distances
|
||||
|
||||
|
||||
class IndexSchema(DataPoint):
|
||||
|
|
@ -247,12 +247,22 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
|||
|
||||
# Extract distances and find min/max for normalization
|
||||
for vector in closest_items:
|
||||
# TODO: Add normalization of similarity score
|
||||
vector_list.append(vector)
|
||||
vector_list.append(
|
||||
{
|
||||
"id": UUID(str(vector.id)),
|
||||
"payload": vector.payload,
|
||||
"_distance": vector.similarity,
|
||||
}
|
||||
)
|
||||
|
||||
# Normalize vector distance and add this as score information to vector_list
|
||||
normalized_values = normalize_distances(vector_list)
|
||||
for i in range(0, len(normalized_values)):
|
||||
vector_list[i]["score"] = normalized_values[i]
|
||||
|
||||
# Create and return ScoredResult objects
|
||||
return [
|
||||
ScoredResult(id=UUID(str(row.id)), payload=row.payload, score=row.similarity)
|
||||
ScoredResult(id=row.get("id"), payload=row.get("payload"), score=row.get("score"))
|
||||
for row in vector_list
|
||||
]
|
||||
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load diff
1
notebooks/data/graphrag
Submodule
1
notebooks/data/graphrag
Submodule
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 130b84db9270734756d16918e5c86034777140fc
|
||||
|
|
@ -618,76 +618,339 @@
|
|||
"cell_type": "markdown",
|
||||
"id": "e519e30c0423c2a",
|
||||
"metadata": {},
|
||||
"source": "## Let's add evals"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "b22ae3d868fa5606",
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2024-12-19T18:01:11.387716Z",
|
||||
"start_time": "2024-12-19T18:01:11.278042Z"
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "ModuleNotFoundError",
|
||||
"evalue": "No module named 'deepeval'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[3], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m eval_on_hotpotQA\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m answer_with_cognee\n\u001b[1;32m 3\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mevals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01meval_on_hotpot\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m answer_without_cognee\n",
|
||||
"File \u001b[0;32m~/cognee/evals/eval_on_hotpot.py:7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mstatistics\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpathlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Path\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mdeepeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmetrics\u001b[39;00m\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwget\u001b[39;00m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mdeepeval\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdataset\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m EvaluationDataset\n",
|
||||
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'deepeval'"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from evals.eval_on_hotpot import eval_on_hotpotQA\n",
|
||||
"from evals.eval_on_hotpot import answer_with_cognee\n",
|
||||
"from evals.eval_on_hotpot import answer_without_cognee\n",
|
||||
"from evals.eval_on_hotpot import eval_answers\n",
|
||||
"from cognee.base_config import get_base_config\n",
|
||||
"from pathlib import Path\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import wget\n",
|
||||
"import json\n",
|
||||
"import statistics"
|
||||
"## Let's add evals"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "728355d390e3a01b",
|
||||
"id": "3845443e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install \"cognee[deepeval]\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7a2c3c70",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from evals.eval_on_hotpot import deepeval_answers, answer_qa_instance\n",
|
||||
"from evals.qa_dataset_utils import load_qa_dataset\n",
|
||||
"from evals.qa_metrics_utils import get_metrics\n",
|
||||
"from evals.qa_context_provider_utils import qa_context_providers\n",
|
||||
"from pathlib import Path\n",
|
||||
"from tqdm import tqdm\n",
|
||||
"import statistics\n",
|
||||
"import random"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "53a609d8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"answer_provider = answer_with_cognee # For native LLM answers use answer_without_cognee\n",
|
||||
"num_samples = 10 # With cognee, it takes ~1m10s per sample\n",
|
||||
"dataset_name_or_filename = \"hotpotqa\"\n",
|
||||
"dataset = load_qa_dataset(dataset_name_or_filename)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7351ab8f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"context_provider_name = \"cognee\"\n",
|
||||
"context_provider = qa_context_providers[context_provider_name]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9346115b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"random.seed(42)\n",
|
||||
"instances = dataset if not num_samples else random.sample(dataset, num_samples)\n",
|
||||
"\n",
|
||||
"base_config = get_base_config()\n",
|
||||
"data_root_dir = base_config.data_root_directory\n",
|
||||
"out_path = \"out\" \n",
|
||||
"if not Path(out_path).exists():\n",
|
||||
" Path(out_path).mkdir()\n",
|
||||
"contexts_filename = out_path / Path(\n",
|
||||
" f\"contexts_{dataset_name_or_filename.split('.')[0]}_{context_provider_name}.json\"\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"if not Path(data_root_dir).exists():\n",
|
||||
" Path(data_root_dir).mkdir()\n",
|
||||
"\n",
|
||||
"filepath = data_root_dir / Path(\"hotpot_dev_fullwiki_v1.json\")\n",
|
||||
"if not filepath.exists():\n",
|
||||
" url = \"http://curtis.ml.cmu.edu/datasets/hotpot/hotpot_dev_fullwiki_v1.json\"\n",
|
||||
" wget.download(url, out=data_root_dir)\n",
|
||||
"\n",
|
||||
"with open(filepath, \"r\") as file:\n",
|
||||
" dataset = json.load(file)\n",
|
||||
"\n",
|
||||
"instances = dataset if not num_samples else dataset[:num_samples]\n",
|
||||
"answers = []\n",
|
||||
"for instance in tqdm(instances, desc=\"Getting answers\"):\n",
|
||||
" answer = answer_provider(instance)\n",
|
||||
" answer = await answer_qa_instance(instance, context_provider, contexts_filename)\n",
|
||||
" answers.append(answer)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1e7d872d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Define Metrics for Evaluation and Calculate Score\n",
|
||||
"**Options**: \n",
|
||||
"- **Correctness**: Is the actual output factually correct based on the expected output?\n",
|
||||
"- **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?\n",
|
||||
"- **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?\n",
|
||||
"- **Empowerment**: How well does the answer help the reader understand and make informed judgements about the topic?\n",
|
||||
"- **Directness**: How specifically and clearly does the answer address the question?\n",
|
||||
"- **F1 Score**: the harmonic mean of the precision and recall, using word-level Exact Match\n",
|
||||
"- **EM Score**: the rate at which the predicted strings exactly match their references, ignoring white spaces and capitalization."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c81e2b46",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Calculate `\"Correctness\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ae728344",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric_name_list = [\"Correctness\"]\n",
|
||||
"eval_metrics = get_metrics(metric_name_list)\n",
|
||||
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "764aac6d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Correctness = statistics.mean(\n",
|
||||
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
|
||||
")\n",
|
||||
"print(Correctness)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6d3bbdc5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Calculating `\"Comprehensiveness\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9793ef78",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric_name_list = [\"Comprehensiveness\"]\n",
|
||||
"eval_metrics = get_metrics(metric_name_list)\n",
|
||||
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "9add448a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Comprehensiveness = statistics.mean(\n",
|
||||
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
|
||||
")\n",
|
||||
"print(Comprehensiveness)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bce2fa25",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Calculating `\"Diversity\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "f60a179e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric_name_list = [\"Diversity\"]\n",
|
||||
"eval_metrics = get_metrics(metric_name_list)\n",
|
||||
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7ccbd0ab",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Diversity = statistics.mean(\n",
|
||||
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
|
||||
")\n",
|
||||
"print(Diversity)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "191cab63",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Calculating`\"Empowerment\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "66bec0bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric_name_list = [\"Empowerment\"]\n",
|
||||
"eval_metrics = get_metrics(metric_name_list)\n",
|
||||
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "1b043a8f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Empowerment = statistics.mean(\n",
|
||||
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
|
||||
")\n",
|
||||
"print(Empowerment)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2cac3be9",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Calculating `\"Directness\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "adaa17c0",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric_name_list = [\"Directness\"]\n",
|
||||
"eval_metrics = get_metrics(metric_name_list)\n",
|
||||
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3a8f97c9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"Directness = statistics.mean(\n",
|
||||
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
|
||||
")\n",
|
||||
"print(Directness)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ad6feb8",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Calculating `\"F1\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "bdc48259",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric_name_list = [\"F1\"]\n",
|
||||
"eval_metrics = get_metrics(metric_name_list)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c43c17c8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8bfcc46d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"F1_score = statistics.mean(\n",
|
||||
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
|
||||
")\n",
|
||||
"print(F1_score)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2583f948",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"##### Calculating `\"EM\"`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "90a8f630",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"metric_name_list = [\"EM\"]\n",
|
||||
"eval_metrics = get_metrics(metric_name_list)\n",
|
||||
"eval_results = await deepeval_answers(instances, answers, eval_metrics[\"deepeval_metrics\"]) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8d1b1ea1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"EM = statistics.mean(\n",
|
||||
" [result.metrics_data[0].score for result in eval_results.test_results]\n",
|
||||
")\n",
|
||||
"print(EM)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "288ab570",
|
||||
|
|
@ -700,7 +963,7 @@
|
|||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": ".venv",
|
||||
"display_name": "cognee-c83GrcRT-py3.11",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
|
@ -714,7 +977,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.8"
|
||||
"version": "3.11.10"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue