From 62c84dde5e18591c9a4623eaa0729dfc60a19615 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Sat, 8 Mar 2025 08:51:57 -0800
Subject: [PATCH 01/14] feat: added helm clean push (#606)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Introduced a Helm-based deployment package that streamlines setup for
the backend application and PostgreSQL database on Kubernetes.
- Added orchestration support via Docker Compose for managing
multi-container deployments.
- Added new Kubernetes resources including Deployments, Services, and
PersistentVolumeClaims for both the backend and PostgreSQL.

- **Documentation**
- Provided comprehensive infrastructure and deployment instructions for
Kubernetes environments.

- **Chores**
- Established a standardized container build process for the Python
application.
- Introduced configuration settings for service ports, resource limits,
and environment variables.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Co-authored-by: Daniel Molnar <soobrosa@gmail.com>
Co-authored-by: Boris <boris@topoteretes.com>
---
 helm/Chart.yaml                         | 24 ++++++++++
 helm/Dockerfile                         | 59 +++++++++++++++++++++++++
 helm/README.md                          | 25 +++++++++++
 helm/docker-compose-helm.yml            | 46 +++++++++++++++++++
 helm/templates/cognee_deployment.yaml   | 32 ++++++++++++++
 helm/templates/cognee_service.yaml      | 13 ++++++
 helm/templates/postgres_deployment.yaml | 35 +++++++++++++++
 helm/templates/postgres_pvc.yaml        | 10 +++++
 helm/templates/postgres_service.yaml    | 14 ++++++
 helm/values.yaml                        | 22 +++++++++
 10 files changed, 280 insertions(+)
 create mode 100644 helm/Chart.yaml
 create mode 100644 helm/Dockerfile
 create mode 100644 helm/README.md
 create mode 100644 helm/docker-compose-helm.yml
 create mode 100644 helm/templates/cognee_deployment.yaml
 create mode 100644 helm/templates/cognee_service.yaml
 create mode 100644 helm/templates/postgres_deployment.yaml
 create mode 100644 helm/templates/postgres_pvc.yaml
 create mode 100644 helm/templates/postgres_service.yaml
 create mode 100644 helm/values.yaml

diff --git a/helm/Chart.yaml b/helm/Chart.yaml
new file mode 100644
index 000000000..ab9e087df
--- /dev/null
+++ b/helm/Chart.yaml
@@ -0,0 +1,24 @@
+apiVersion: v2
+name: cognee-chart
+description: A helm chart of the cognee backend deployment on Kubernetes environment
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.1.0
+
+# This is the version number of the application being deployed. This version number should be
+# incremented each time you make changes to the application. Versions are not expected to
+# follow Semantic Versioning. They should reflect the version the application is using.
+# It is recommended to use it with quotes.
+appVersion: "1.16.0"
diff --git a/helm/Dockerfile b/helm/Dockerfile
new file mode 100644
index 000000000..153834cd6
--- /dev/null
+++ b/helm/Dockerfile
@@ -0,0 +1,59 @@
+FROM python:3.11-slim
+
+# Define Poetry extras to install
+ARG POETRY_EXTRAS="\
+# Storage & Databases \
+filesystem postgres weaviate qdrant neo4j falkordb milvus \
+# Notebooks & Interactive Environments \
+notebook \
+# LLM & AI Frameworks \
+langchain llama-index gemini huggingface ollama mistral groq \
+# Evaluation & Monitoring \
+deepeval evals posthog \
+# Graph Processing & Code Analysis \
+codegraph graphiti \
+# Document Processing \
+docs"
+
+# Set build argument
+ARG DEBUG
+
+# Set environment variable based on the build argument
+ENV DEBUG=${DEBUG}
+ENV PIP_NO_CACHE_DIR=true
+ENV PATH="${PATH}:/root/.poetry/bin"
+
+
+RUN apt-get install -y \
+  gcc \
+  libpq-dev
+
+
+WORKDIR /app
+COPY pyproject.toml poetry.lock /app/
+
+
+RUN pip install poetry
+
+# Don't create virtualenv since docker is already isolated
+RUN poetry config virtualenvs.create false
+
+# Install the dependencies
+RUN poetry install --extras "${POETRY_EXTRAS}" --no-root --without dev
+
+
+# Set the PYTHONPATH environment variable to include the /app directory
+ENV PYTHONPATH=/app
+
+COPY cognee/ /app/cognee
+
+# Copy Alembic configuration
+COPY alembic.ini /app/alembic.ini
+COPY alembic/ /app/alembic
+
+COPY entrypoint.sh /app/entrypoint.sh
+RUN chmod +x /app/entrypoint.sh
+
+RUN sed -i 's/\r$//' /app/entrypoint.sh
+
+ENTRYPOINT ["/app/entrypoint.sh"]
diff --git a/helm/README.md b/helm/README.md
new file mode 100644
index 000000000..b7aaa6325
--- /dev/null
+++ b/helm/README.md
@@ -0,0 +1,25 @@
+
+# cognee-infra-helm
+General infrastructure setup for Cognee on Kubernetes using a Helm chart.
+
+## Prerequisites
+Before deploying the Helm chart, ensure the following prerequisites are met: 
+
+**Kubernetes Cluster**: A running Kubernetes cluster (e.g., Minikube, GKE, EKS).
+
+**Helm**: Installed and configured for your Kubernetes cluster. You can install Helm by following the [official guide](https://helm.sh/docs/intro/install/). 
+
+**kubectl**: Installed and configured to interact with your cluster. Follow the instructions [here](https://kubernetes.io/docs/tasks/tools/install-kubectl/).
+
+Clone the Repository Clone this repository to your local machine and navigate to the directory.
+
+## Deploy Helm Chart:
+
+   ```bash
+   helm install cognee ./cognee-chart
+   ```
+
+**Uninstall Helm Release**:  
+   ```bash
+   helm uninstall cognee
+   ```
diff --git a/helm/docker-compose-helm.yml b/helm/docker-compose-helm.yml
new file mode 100644
index 000000000..8aaa63816
--- /dev/null
+++ b/helm/docker-compose-helm.yml
@@ -0,0 +1,46 @@
+services:
+  cognee:
+    image : cognee-backend:latest
+    container_name: cognee-backend
+    networks:
+      - cognee-network
+    build:
+      context: .
+      dockerfile: Dockerfile
+    volumes:
+      - .:/app
+      - /app/cognee-frontend/ # Ignore frontend code
+    environment:
+      - HOST=0.0.0.0
+      - ENVIRONMENT=local
+      - PYTHONPATH=.
+    ports:
+      - 8000:8000
+      # - 5678:5678 # Debugging
+    deploy:
+      resources:
+        limits:
+          cpus: '4.0'
+          memory: 8GB
+
+  postgres:
+    image: pgvector/pgvector:pg17
+    container_name: postgres
+    environment:
+      POSTGRES_USER: cognee
+      POSTGRES_PASSWORD: cognee
+      POSTGRES_DB: cognee_db
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    ports:
+      - 5432:5432
+    networks:
+      - cognee-network
+
+networks:
+  cognee-network:
+    name: cognee-network
+
+volumes:
+  postgres_data:
+
diff --git a/helm/templates/cognee_deployment.yaml b/helm/templates/cognee_deployment.yaml
new file mode 100644
index 000000000..f16a475ec
--- /dev/null
+++ b/helm/templates/cognee_deployment.yaml
@@ -0,0 +1,32 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}-cognee
+  labels:
+    app: {{ .Release.Name }}-cognee
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: {{ .Release.Name }}-cognee
+  template:
+    metadata:
+      labels:
+        app: {{ .Release.Name }}-cognee
+    spec:
+      containers:
+        - name: cognee
+          image: {{ .Values.cognee.image }}
+          ports:
+            - containerPort: {{ .Values.cognee.port }}
+          env:
+            - name: HOST
+              value: {{ .Values.cognee.env.HOST }}
+            - name: ENVIRONMENT
+              value: {{ .Values.cognee.env.ENVIRONMENT }}
+            - name: PYTHONPATH
+              value: {{ .Values.cognee.env.PYTHONPATH }}
+          resources:
+            limits:
+              cpu: {{ .Values.cognee.resources.cpu }}
+              memory: {{ .Values.cognee.resources.memory }}
diff --git a/helm/templates/cognee_service.yaml b/helm/templates/cognee_service.yaml
new file mode 100644
index 000000000..21e9e470e
--- /dev/null
+++ b/helm/templates/cognee_service.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-cognee
+  labels:
+    app: {{ .Release.Name }}-cognee
+spec:
+  type: NodePort
+  ports:
+    - port: {{ .Values.cognee.port }}
+      targetPort: {{ .Values.cognee.port }}
+  selector:
+    app: {{ .Release.Name }}-cognee
diff --git a/helm/templates/postgres_deployment.yaml b/helm/templates/postgres_deployment.yaml
new file mode 100644
index 000000000..fc47647a2
--- /dev/null
+++ b/helm/templates/postgres_deployment.yaml
@@ -0,0 +1,35 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Release.Name }}-postgres
+  labels:
+    app: {{ .Release.Name }}-postgres
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: {{ .Release.Name }}-postgres
+  template:
+    metadata:
+      labels:
+        app: {{ .Release.Name }}-postgres
+    spec:
+      containers:
+        - name: postgres
+          image: {{ .Values.postgres.image }}
+          ports:
+            - containerPort: {{ .Values.postgres.port }}
+          env:
+            - name: POSTGRES_USER
+              value: {{ .Values.postgres.env.POSTGRES_USER }}
+            - name: POSTGRES_PASSWORD
+              value: {{ .Values.postgres.env.POSTGRES_PASSWORD }}
+            - name: POSTGRES_DB
+              value: {{ .Values.postgres.env.POSTGRES_DB }}
+          volumeMounts:
+            - name: postgres-storage
+              mountPath: /var/lib/postgresql/data
+      volumes:
+        - name: postgres-storage
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-postgres-pvc
diff --git a/helm/templates/postgres_pvc.yaml b/helm/templates/postgres_pvc.yaml
new file mode 100644
index 000000000..7d7661b16
--- /dev/null
+++ b/helm/templates/postgres_pvc.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ .Release.Name }}-postgres-pvc
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: {{ .Values.postgres.storage }}
diff --git a/helm/templates/postgres_service.yaml b/helm/templates/postgres_service.yaml
new file mode 100644
index 000000000..7a944a128
--- /dev/null
+++ b/helm/templates/postgres_service.yaml
@@ -0,0 +1,14 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Release.Name }}-postgres
+  labels:
+    app: {{ .Release.Name }}-postgres
+spec:
+  type: ClusterIP
+  ports:
+    - port: {{ .Values.postgres.port }}
+      targetPort: {{ .Values.postgres.port }}
+  selector:
+    app: {{ .Release.Name }}-postgres
+
diff --git a/helm/values.yaml b/helm/values.yaml
new file mode 100644
index 000000000..fb2d3f7e3
--- /dev/null
+++ b/helm/values.yaml
@@ -0,0 +1,22 @@
+# Configuration for the 'cognee' application service
+cognee:
+  # Image name (using the local image we’ll build in Minikube)
+  image: "hajdul1988/cognee-backend:latest"
+  port: 8000
+  env:
+    HOST: "0.0.0.0"
+    ENVIRONMENT: "local"
+    PYTHONPATH: "."
+  resources:
+    cpu: "4.0"
+    memory: "8Gi"
+
+# Configuration for the 'postgres' database service
+postgres:
+  image: "pgvector/pgvector:pg17"
+  port: 5432
+  env:
+    POSTGRES_USER: "cognee"
+    POSTGRES_PASSWORD: "cognee"
+    POSTGRES_DB: "cognee_db"
+  storage: "8Gi"

From 56427f287ef85e1548e77d650e648a378c8410ab Mon Sep 17 00:00:00 2001
From: hibajamal <35984866+hibajamal@users.noreply.github.com>
Date: Sat, 8 Mar 2025 20:33:42 +0100
Subject: [PATCH 02/14] Demo for relational db with cognee (#620)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

<!-- .github/pull_request_template.md -->

## Description
This demo uses pydantic models and dlt to pull data from the Pokémon API
and structure it into a relational format. By feeding this structured
data into cognee, it makes searching across multiple tables easier and
more intuitive, thanks to the relational model.

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Introduced a comprehensive Pokémon data processing pipeline, available
as both a Python script and an interactive Jupyter Notebook.
- Enabled asynchronous operations for efficient data collection and
querying, including an integrated search functionality.
- Improved error handling and data validation during the data fetching
and processing stages for a smoother user experience.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

Co-authored-by: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
---
 examples/python/pokemon_datapoints_example.py | 190 +++++++
 notebooks/pokemon_datapoints_notebook.ipynb   | 536 ++++++++++++++++++
 2 files changed, 726 insertions(+)
 create mode 100644 examples/python/pokemon_datapoints_example.py
 create mode 100644 notebooks/pokemon_datapoints_notebook.ipynb

diff --git a/examples/python/pokemon_datapoints_example.py b/examples/python/pokemon_datapoints_example.py
new file mode 100644
index 000000000..058492e63
--- /dev/null
+++ b/examples/python/pokemon_datapoints_example.py
@@ -0,0 +1,190 @@
+# Standard library imports
+import os
+import json
+import asyncio
+import pathlib
+from uuid import uuid5, NAMESPACE_OID
+from typing import List, Optional
+from pathlib import Path
+
+import dlt
+import requests
+import cognee
+from cognee.low_level import DataPoint, setup as cognee_setup
+from cognee.api.v1.search import SearchType
+from cognee.tasks.storage import add_data_points
+from cognee.modules.pipelines.tasks.Task import Task
+from cognee.modules.pipelines import run_tasks
+
+
+BASE_URL = "https://pokeapi.co/api/v2/"
+os.environ["BUCKET_URL"] = "./.data_storage"
+os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "true"
+
+# Data Models
+class Abilities(DataPoint):
+    name: str = "Abilities"
+    metadata: dict = {"index_fields": ["name"]}
+
+class PokemonAbility(DataPoint):
+    name: str
+    ability__name: str
+    ability__url: str
+    is_hidden: bool
+    slot: int
+    _dlt_load_id: str
+    _dlt_id: str
+    _dlt_parent_id: str
+    _dlt_list_idx: str
+    is_type: Abilities
+    metadata: dict = {"index_fields": ["ability__name"]}
+
+class Pokemons(DataPoint):
+    name: str = "Pokemons"
+    have: Abilities
+    metadata: dict = {"index_fields": ["name"]}
+
+class Pokemon(DataPoint):
+    name: str
+    base_experience: int
+    height: int
+    weight: int
+    is_default: bool
+    order: int
+    location_area_encounters: str
+    species__name: str
+    species__url: str
+    cries__latest: str
+    cries__legacy: str
+    sprites__front_default: str
+    sprites__front_shiny: str
+    sprites__back_default: Optional[str]
+    sprites__back_shiny: Optional[str]
+    _dlt_load_id: str
+    _dlt_id: str
+    is_type: Pokemons
+    abilities: List[PokemonAbility]
+    metadata: dict = {"index_fields": ["name"]}
+
+# Data Collection Functions
+@dlt.resource(write_disposition="replace")
+def pokemon_list(limit: int = 50):
+    response = requests.get(f"{BASE_URL}pokemon", params={"limit": limit})
+    response.raise_for_status()
+    yield response.json()["results"]
+
+@dlt.transformer(data_from=pokemon_list)
+def pokemon_details(pokemons):
+    """Fetches detailed info for each Pokémon"""
+    for pokemon in pokemons:
+        response = requests.get(pokemon["url"])
+        response.raise_for_status()
+        yield response.json()
+
+# Data Loading Functions
+def load_abilities_data(jsonl_abilities):
+    abilities_root = Abilities()
+    pokemon_abilities = []
+
+    for jsonl_ability in jsonl_abilities:
+        with open(jsonl_ability, "r") as f:
+            for line in f:
+                ability = json.loads(line)
+                ability["id"] = uuid5(NAMESPACE_OID, ability["_dlt_id"])
+                ability["name"] = ability["ability__name"]
+                ability["is_type"] = abilities_root
+                pokemon_abilities.append(ability)
+
+    return abilities_root, pokemon_abilities
+
+def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):
+    pokemons = []
+
+    for jsonl_pokemon in jsonl_pokemons:
+        with open(jsonl_pokemon, "r") as f:
+            for line in f:
+                pokemon_data = json.loads(line)
+                abilities = [
+                    ability for ability in pokemon_abilities
+                    if ability["_dlt_parent_id"] == pokemon_data["_dlt_id"]
+                ]
+                pokemon_data["external_id"] = pokemon_data["id"]
+                pokemon_data["id"] = uuid5(NAMESPACE_OID, str(pokemon_data["id"]))
+                pokemon_data["abilities"] = [PokemonAbility(**ability) for ability in abilities]
+                pokemon_data["is_type"] = pokemon_root
+                pokemons.append(Pokemon(**pokemon_data))
+
+    return pokemons
+
+# Main Application Logic
+async def setup_and_process_data():
+    """Setup configuration and process Pokemon data"""
+    # Setup configuration
+    data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve())
+    cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve())
+
+    cognee.config.data_root_directory(data_directory_path)
+    cognee.config.system_root_directory(cognee_directory_path)
+
+    # Initialize pipeline and collect data
+    pipeline = dlt.pipeline(
+        pipeline_name="pokemon_pipeline",
+        destination="filesystem",
+        dataset_name="pokemon_data",
+    )
+    info = pipeline.run([pokemon_list, pokemon_details])
+    print(info)
+
+    # Load and process data
+    STORAGE_PATH = Path(".data_storage/pokemon_data/pokemon_details")
+    jsonl_pokemons = sorted(STORAGE_PATH.glob("*.jsonl"))
+    if not jsonl_pokemons:
+        raise FileNotFoundError("No JSONL files found in the storage directory.")
+
+    ABILITIES_PATH = Path(".data_storage/pokemon_data/pokemon_details__abilities")
+    jsonl_abilities = sorted(ABILITIES_PATH.glob("*.jsonl"))
+    if not jsonl_abilities:
+        raise FileNotFoundError("No JSONL files found in the storage directory.")
+
+    # Process data
+    abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)
+    pokemon_root = Pokemons(have=abilities_root)
+    pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)
+
+    return pokemons
+
+async def pokemon_cognify(pokemons):
+    """Process Pokemon data with Cognee and perform search"""
+    # Setup and run Cognee tasks
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    await cognee_setup()
+
+    tasks = [Task(add_data_points, task_config={"batch_size": 50})]
+    results = run_tasks(
+        tasks=tasks,
+        data=pokemons,
+        dataset_id=uuid5(NAMESPACE_OID, "Pokemon"),
+        pipeline_name='pokemon_pipeline',
+    )
+
+    async for result in results:
+        print(result)
+    print("Done")
+
+    # Perform search
+    search_results = await cognee.search(
+        query_type=SearchType.GRAPH_COMPLETION,
+        query_text="pokemons?"
+    )
+
+    print("Search results:")
+    for result_text in search_results:
+        print(result_text)
+
+async def main():
+    pokemons = await setup_and_process_data()
+    await pokemon_cognify(pokemons)
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/notebooks/pokemon_datapoints_notebook.ipynb b/notebooks/pokemon_datapoints_notebook.ipynb
new file mode 100644
index 000000000..9fbc34bc1
--- /dev/null
+++ b/notebooks/pokemon_datapoints_notebook.ipynb
@@ -0,0 +1,536 @@
+{
+ "cells": [
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:58:00.193158Z",
+     "start_time": "2025-03-04T11:58:00.190238Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import nest_asyncio\n",
+    "nest_asyncio.apply()"
+   ],
+   "id": "2efba278d106bb5f",
+   "outputs": [],
+   "execution_count": 2
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "### Environment Configuration\n",
+    "#### Setup required directories and environment variables.\n"
+   ],
+   "id": "ccbb2bc23aa456ee"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:59:33.879188Z",
+     "start_time": "2025-03-04T11:59:33.873682Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import pathlib\n",
+    "import os\n",
+    "import cognee\n",
+    "\n",
+    "notebook_dir = pathlib.Path().resolve()\n",
+    "data_directory_path = str(notebook_dir / \".data_storage\")\n",
+    "cognee_directory_path = str(notebook_dir / \".cognee_system\")\n",
+    "\n",
+    "cognee.config.data_root_directory(data_directory_path)\n",
+    "cognee.config.system_root_directory(cognee_directory_path)\n",
+    "\n",
+    "BASE_URL = \"https://pokeapi.co/api/v2/\"\n",
+    "os.environ[\"BUCKET_URL\"] = data_directory_path\n",
+    "os.environ[\"DATA_WRITER__DISABLE_COMPRESSION\"] = \"true\"\n"
+   ],
+   "id": "662d554f96f211d9",
+   "outputs": [],
+   "execution_count": 8
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "## Initialize DLT Pipeline\n",
+    "### Create the DLT pipeline to fetch Pokémon data.\n"
+   ],
+   "id": "36ae0be71f6e9167"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:58:03.982939Z",
+     "start_time": "2025-03-04T11:58:03.819676Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import dlt\n",
+    "from pathlib import Path\n",
+    "\n",
+    "pipeline = dlt.pipeline(\n",
+    "    pipeline_name=\"pokemon_pipeline\",\n",
+    "    destination=\"filesystem\",\n",
+    "    dataset_name=\"pokemon_data\",\n",
+    ")\n"
+   ],
+   "id": "25101ae5f016ce0c",
+   "outputs": [],
+   "execution_count": 4
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Fetch Pokémon List\n",
+    "### Retrieve a list of Pokémon from the API.\n"
+   ],
+   "id": "9a87ce05a072c48b"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:58:03.990076Z",
+     "start_time": "2025-03-04T11:58:03.987199Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "@dlt.resource(write_disposition=\"replace\")\n",
+    "def pokemon_list(limit: int = 50):\n",
+    "    import requests\n",
+    "    response = requests.get(f\"{BASE_URL}pokemon\", params={\"limit\": limit})\n",
+    "    response.raise_for_status()\n",
+    "    yield response.json()[\"results\"]\n"
+   ],
+   "id": "3b6e60778c61e24a",
+   "outputs": [],
+   "execution_count": 5
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Fetch Pokémon Details\n",
+    "### Fetch detailed information about each Pokémon.\n"
+   ],
+   "id": "9952767846194e97"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:58:03.996394Z",
+     "start_time": "2025-03-04T11:58:03.994122Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "@dlt.transformer(data_from=pokemon_list)\n",
+    "def pokemon_details(pokemons):\n",
+    "    \"\"\"Fetches detailed info for each Pokémon\"\"\"\n",
+    "    import requests\n",
+    "    for pokemon in pokemons:\n",
+    "        response = requests.get(pokemon[\"url\"])\n",
+    "        response.raise_for_status()\n",
+    "        yield response.json()\n"
+   ],
+   "id": "79ec9fef12267485",
+   "outputs": [],
+   "execution_count": 6
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Run Data Pipeline\n",
+    "### Execute the pipeline and store Pokémon data.\n"
+   ],
+   "id": "41e05f660bf9e9d2"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:59:41.571015Z",
+     "start_time": "2025-03-04T11:59:36.840744Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "info = pipeline.run([pokemon_list, pokemon_details])\n",
+    "print(info)\n"
+   ],
+   "id": "20a3b2c7f404677f",
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pipeline pokemon_pipeline load step completed in 0.06 seconds\n",
+      "1 load package(s) were loaded to destination filesystem and into dataset pokemon_data\n",
+      "The filesystem destination used file:///Users/lazar/PycharmProjects/cognee/.data_storage location to store data\n",
+      "Load package 1741089576.860229 is LOADED and contains no failed jobs\n"
+     ]
+    }
+   ],
+   "execution_count": 9
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Load Pokémon Abilities\n",
+    "### Load Pokémon ability data from stored files.\n"
+   ],
+   "id": "937f10b8d1037743"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:59:44.377719Z",
+     "start_time": "2025-03-04T11:59:44.363718Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import json\n",
+    "from cognee.low_level import DataPoint\n",
+    "from uuid import uuid5, NAMESPACE_OID\n",
+    "\n",
+    "class Abilities(DataPoint):\n",
+    "    name: str = \"Abilities\"\n",
+    "    metadata: dict = {\"index_fields\": [\"name\"]}\n",
+    "\n",
+    "def load_abilities_data(jsonl_abilities):\n",
+    "    abilities_root = Abilities()\n",
+    "    pokemon_abilities = []\n",
+    "\n",
+    "    for jsonl_ability in jsonl_abilities:\n",
+    "        with open(jsonl_ability, \"r\") as f:\n",
+    "            for line in f:\n",
+    "                ability = json.loads(line)\n",
+    "                ability[\"id\"] = uuid5(NAMESPACE_OID, ability[\"_dlt_id\"])\n",
+    "                ability[\"name\"] = ability[\"ability__name\"]\n",
+    "                ability[\"is_type\"] = abilities_root\n",
+    "                pokemon_abilities.append(ability)\n",
+    "\n",
+    "    return abilities_root, pokemon_abilities\n"
+   ],
+   "id": "be73050036439ea1",
+   "outputs": [],
+   "execution_count": 10
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Load Pokémon Data\n",
+    "### Load Pokémon details and associate them with abilities.\n"
+   ],
+   "id": "98c97f799f73df77"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:59:46.251306Z",
+     "start_time": "2025-03-04T11:59:46.238283Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from typing import List, Optional\n",
+    "\n",
+    "class Pokemons(DataPoint):\n",
+    "    name: str = \"Pokemons\"\n",
+    "    have: Abilities\n",
+    "    metadata: dict = {\"index_fields\": [\"name\"]}\n",
+    "\n",
+    "class PokemonAbility(DataPoint):\n",
+    "    name: str\n",
+    "    ability__name: str\n",
+    "    ability__url: str\n",
+    "    is_hidden: bool\n",
+    "    slot: int\n",
+    "    _dlt_load_id: str\n",
+    "    _dlt_id: str\n",
+    "    _dlt_parent_id: str\n",
+    "    _dlt_list_idx: str\n",
+    "    is_type: Abilities\n",
+    "    metadata: dict = {\"index_fields\": [\"ability__name\"]}\n",
+    "\n",
+    "class Pokemon(DataPoint):\n",
+    "    name: str\n",
+    "    base_experience: int\n",
+    "    height: int\n",
+    "    weight: int\n",
+    "    is_default: bool\n",
+    "    order: int\n",
+    "    location_area_encounters: str\n",
+    "    species__name: str\n",
+    "    species__url: str\n",
+    "    cries__latest: str\n",
+    "    cries__legacy: str\n",
+    "    sprites__front_default: str\n",
+    "    sprites__front_shiny: str\n",
+    "    sprites__back_default: Optional[str]\n",
+    "    sprites__back_shiny: Optional[str]\n",
+    "    _dlt_load_id: str\n",
+    "    _dlt_id: str\n",
+    "    is_type: Pokemons\n",
+    "    abilities: List[PokemonAbility]\n",
+    "    metadata: dict = {\"index_fields\": [\"name\"]}\n",
+    "\n",
+    "def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):\n",
+    "    pokemons = []\n",
+    "\n",
+    "    for jsonl_pokemon in jsonl_pokemons:\n",
+    "        with open(jsonl_pokemon, \"r\") as f:\n",
+    "            for line in f:\n",
+    "                pokemon_data = json.loads(line)\n",
+    "                abilities = [\n",
+    "                    ability for ability in pokemon_abilities\n",
+    "                    if ability[\"_dlt_parent_id\"] == pokemon_data[\"_dlt_id\"]\n",
+    "                ]\n",
+    "                pokemon_data[\"external_id\"] = pokemon_data[\"id\"]\n",
+    "                pokemon_data[\"id\"] = uuid5(NAMESPACE_OID, str(pokemon_data[\"id\"]))\n",
+    "                pokemon_data[\"abilities\"] = [PokemonAbility(**ability) for ability in abilities]\n",
+    "                pokemon_data[\"is_type\"] = pokemon_root\n",
+    "                pokemons.append(Pokemon(**pokemon_data))\n",
+    "\n",
+    "    return pokemons\n"
+   ],
+   "id": "7862951248df0bf5",
+   "outputs": [],
+   "execution_count": 11
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Process Pokémon Data\n",
+    "### Load and associate Pokémon abilities.\n"
+   ],
+   "id": "676fa5a2b61c2107"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:59:47.365226Z",
+     "start_time": "2025-03-04T11:59:47.356722Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "STORAGE_PATH = Path(\".data_storage/pokemon_data/pokemon_details\")\n",
+    "jsonl_pokemons = sorted(STORAGE_PATH.glob(\"*.jsonl\"))\n",
+    "\n",
+    "ABILITIES_PATH = Path(\".data_storage/pokemon_data/pokemon_details__abilities\")\n",
+    "jsonl_abilities = sorted(ABILITIES_PATH.glob(\"*.jsonl\"))\n",
+    "\n",
+    "abilities_root, pokemon_abilities = load_abilities_data(jsonl_abilities)\n",
+    "pokemon_root = Pokemons(have=abilities_root)\n",
+    "pokemons = load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root)\n"
+   ],
+   "id": "ad14cdecdccd71bb",
+   "outputs": [],
+   "execution_count": 12
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Initialize Cognee\n",
+    "### Setup Cognee for data processing.\n"
+   ],
+   "id": "59dec67b2ae50f0f"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:59:49.244577Z",
+     "start_time": "2025-03-04T11:59:48.618261Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "import asyncio\n",
+    "from cognee.low_level import setup as cognee_setup\n",
+    "\n",
+    "async def initialize_cognee():\n",
+    "    await cognee.prune.prune_data()\n",
+    "    await cognee.prune.prune_system(metadata=True)\n",
+    "    await cognee_setup()\n",
+    "\n",
+    "await initialize_cognee()\n"
+   ],
+   "id": "d2e095ae576a02c1",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully.INFO:cognee.infrastructure.databases.relational.sqlalchemy.SqlAlchemyAdapter:Database deleted successfully."
+     ]
+    }
+   ],
+   "execution_count": 13
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Process Pokémon Data\n",
+    "### Add Pokémon data points to Cognee.\n"
+   ],
+   "id": "5f0b8090bc7b1fe6"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T11:59:57.744035Z",
+     "start_time": "2025-03-04T11:59:50.574033Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from cognee.modules.pipelines.tasks.Task import Task\n",
+    "from cognee.tasks.storage import add_data_points\n",
+    "from cognee.modules.pipelines import run_tasks\n",
+    "\n",
+    "tasks = [Task(add_data_points, task_config={\"batch_size\": 50})]\n",
+    "results = run_tasks(\n",
+    "    tasks=tasks,\n",
+    "    data=pokemons,\n",
+    "    dataset_id=uuid5(NAMESPACE_OID, \"Pokemon\"),\n",
+    "    pipeline_name='pokemon_pipeline',\n",
+    ")\n",
+    "\n",
+    "async for result in results:\n",
+    "    print(result)\n",
+    "print(\"Done\")\n"
+   ],
+   "id": "ffa12fc1f5350d95",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:run_tasks(tasks: [Task], data):Pipeline run started: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`INFO:run_tasks(tasks: [Task], data):Coroutine task started: `add_data_points`"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x300bb3950>\n",
+      "User d347ea85-e512-4cae-b9d7-496fe1745424 has registered.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:79: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
+      "  class PGVectorDataPoint(Base):\n",
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"/Users/lazar/PycharmProjects/cognee/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py:113: SAWarning: This declarative base already contains a class with the same class name and module name as cognee.infrastructure.databases.vector.pgvector.PGVectorAdapter.PGVectorDataPoint, and will be replaced in the string-lookup table.\n",
+      "  class PGVectorDataPoint(Base):\n",
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 8, column: 16, offset: 335} for query: '\\n        UNWIND $nodes AS node\\n        MERGE (n {id: node.node_id})\\n        ON CREATE SET n += node.properties, n.updated_at = timestamp()\\n        ON MATCH SET n += node.properties, n.updated_at = timestamp()\\n        WITH n, node.node_id AS label\\n        CALL apoc.create.addLabels(n, [label]) YIELD node AS labeledNode\\n        RETURN ID(labeledNode) AS internal_id, labeledNode.id AS nodeId\\n        'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:run_tasks(tasks: [Task], data):Coroutine task completed: `add_data_points`INFO:run_tasks(tasks: [Task], data):Pipeline run completed: `fd2ed59d-b550-5b05-bbe6-7b708fe12483`"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<cognee.modules.pipelines.models.PipelineRun.PipelineRun object at 0x30016fd40>\n",
+      "Done\n"
+     ]
+    }
+   ],
+   "execution_count": 14
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "##  Search Pokémon Data\n",
+    "### Execute a search query using Cognee.\n"
+   ],
+   "id": "e0d98d9832a2797a"
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2025-03-04T12:00:02.878871Z",
+     "start_time": "2025-03-04T11:59:59.571965Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "from cognee.api.v1.search import SearchType\n",
+    "\n",
+    "search_results = await cognee.search(\n",
+    "    query_type=SearchType.GRAPH_COMPLETION,\n",
+    "    query_text=\"pokemons?\"\n",
+    ")\n",
+    "\n",
+    "print(\"Search results:\")\n",
+    "for result_text in search_results:\n",
+    "    print(result_text)"
+   ],
+   "id": "bb2476b6b0c2aff",
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 1, column: 18, offset: 17} for query: 'MATCH (n) RETURN ID(n) AS id, labels(n) AS labels, properties(n) AS properties'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 16, offset: 43} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'WARNING:neo4j.notifications:Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated function: `id`.} {position: line: 3, column: 33, offset: 60} for query: '\\n        MATCH (n)-[r]->(m)\\n        RETURN ID(n) AS source, ID(m) AS target, TYPE(r) AS type, properties(r) AS properties\\n        'INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings \"HTTP/1.1 200 OK\"\u001B[92m13:00:02 - LiteLLM:INFO\u001B[0m: utils.py:2784 - \n",
+      "LiteLLM completion() model= gpt-4o-mini; provider = openaiINFO:LiteLLM:\n",
+      "LiteLLM completion() model= gpt-4o-mini; provider = openai"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Search results:\n",
+      "The Pokemons mentioned are: golbat, jigglypuff, raichu, vulpix, and pikachu.\n"
+     ]
+    }
+   ],
+   "execution_count": 15
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "a4c2d3e9c15b017"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From ac0156514d6dbc237559ac44a937fef330a679c3 Mon Sep 17 00:00:00 2001
From: lxobr <122801072+lxobr@users.noreply.github.com>
Date: Mon, 10 Mar 2025 10:55:31 +0100
Subject: [PATCH 03/14] feat: COG-1523 add top_k in run_question_answering
 (#625)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->
- Expose top_k as an optional argument of run_question_answering
- Update retrievers to handle the parameters

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced answer generation and document retrieval capabilities by
introducing an optional parameter that allows users to specify the
number of top results. This improvement adds flexibility when retrieving
question responses and associated context, adapting the output based on
user preference.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 .../answer_generation/run_question_answering_module.py    | 8 +++++---
 cognee/modules/retrieval/completion_retriever.py          | 4 +++-
 cognee/modules/retrieval/graph_completion_retriever.py    | 4 ++--
 .../retrieval/graph_summary_completion_retriever.py       | 2 +-
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/cognee/eval_framework/answer_generation/run_question_answering_module.py b/cognee/eval_framework/answer_generation/run_question_answering_module.py
index 1d3686efb..70938a451 100644
--- a/cognee/eval_framework/answer_generation/run_question_answering_module.py
+++ b/cognee/eval_framework/answer_generation/run_question_answering_module.py
@@ -1,6 +1,6 @@
 import logging
 import json
-from typing import List
+from typing import List, Optional
 from cognee.eval_framework.answer_generation.answer_generation_executor import (
     AnswerGeneratorExecutor,
     retriever_options,
@@ -32,7 +32,7 @@ async def create_and_insert_answers_table(questions_payload):
 
 
 async def run_question_answering(
-    params: dict, system_prompt="answer_simple_question.txt"
+    params: dict, system_prompt="answer_simple_question.txt", top_k: Optional[int] = None
 ) -> List[dict]:
     if params.get("answering_questions"):
         logging.info("Question answering started...")
@@ -48,7 +48,9 @@ async def run_question_answering(
         answer_generator = AnswerGeneratorExecutor()
         answers = await answer_generator.question_answering_non_parallel(
             questions=questions,
-            retriever=retriever_options[params["qa_engine"]](system_prompt_path=system_prompt),
+            retriever=retriever_options[params["qa_engine"]](
+                system_prompt_path=system_prompt, top_k=top_k
+            ),
         )
         with open(params["answers_path"], "w", encoding="utf-8") as f:
             json.dump(answers, f, ensure_ascii=False, indent=4)
diff --git a/cognee/modules/retrieval/completion_retriever.py b/cognee/modules/retrieval/completion_retriever.py
index f2427f062..cf8600f27 100644
--- a/cognee/modules/retrieval/completion_retriever.py
+++ b/cognee/modules/retrieval/completion_retriever.py
@@ -13,15 +13,17 @@ class CompletionRetriever(BaseRetriever):
         self,
         user_prompt_path: str = "context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
+        top_k: Optional[int] = 1,
     ):
         """Initialize retriever with optional custom prompt paths."""
         self.user_prompt_path = user_prompt_path
         self.system_prompt_path = system_prompt_path
+        self.top_k = top_k if top_k is not None else 1
 
     async def get_context(self, query: str) -> Any:
         """Retrieves relevant document chunks as context."""
         vector_engine = get_vector_engine()
-        found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=1)
+        found_chunks = await vector_engine.search("DocumentChunk_text", query, limit=self.top_k)
         if len(found_chunks) == 0:
             raise NoRelevantDataFound
         return found_chunks[0].payload["text"]
diff --git a/cognee/modules/retrieval/graph_completion_retriever.py b/cognee/modules/retrieval/graph_completion_retriever.py
index 709415fa7..80b8855d1 100644
--- a/cognee/modules/retrieval/graph_completion_retriever.py
+++ b/cognee/modules/retrieval/graph_completion_retriever.py
@@ -15,12 +15,12 @@ class GraphCompletionRetriever(BaseRetriever):
         self,
         user_prompt_path: str = "graph_context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
-        top_k: int = 5,
+        top_k: Optional[int] = 5,
     ):
         """Initialize retriever with prompt paths and search parameters."""
         self.user_prompt_path = user_prompt_path
         self.system_prompt_path = system_prompt_path
-        self.top_k = top_k
+        self.top_k = top_k if top_k is not None else 5
 
     async def resolve_edges_to_text(self, retrieved_edges: list) -> str:
         """Converts retrieved graph edges into a human-readable string format."""
diff --git a/cognee/modules/retrieval/graph_summary_completion_retriever.py b/cognee/modules/retrieval/graph_summary_completion_retriever.py
index 536bafe5d..76ed5f5d4 100644
--- a/cognee/modules/retrieval/graph_summary_completion_retriever.py
+++ b/cognee/modules/retrieval/graph_summary_completion_retriever.py
@@ -12,7 +12,7 @@ class GraphSummaryCompletionRetriever(GraphCompletionRetriever):
         user_prompt_path: str = "graph_context_for_question.txt",
         system_prompt_path: str = "answer_simple_question.txt",
         summarize_prompt_path: str = "summarize_search_results.txt",
-        top_k: int = 5,
+        top_k: Optional[int] = 5,
     ):
         """Initialize retriever with default prompt paths and search parameters."""
         super().__init__(

From 7b5bd7897f23d657fe670e802f44ba35664ab4ad Mon Sep 17 00:00:00 2001
From: alekszievr <44192193+alekszievr@users.noreply.github.com>
Date: Mon, 10 Mar 2025 15:27:48 +0100
Subject: [PATCH 04/14] Feat: evaluate retrieved context against golden context
 [cog-1481] (#619)

<!-- .github/pull_request_template.md -->

## Description
- Compare retrieved context to golden context using deepeval's
summarization metric
- Display relevant fields to each metric on metrics dashboard

Example output:

![image](https://github.com/user-attachments/assets/9facf716-b2ab-4573-bfdf-7b343d2a57c5)


## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit

- **New Features**
- Enhanced context handling in answer generation and corpus building to
include extended details.
- Introduced a new context coverage metric for deeper evaluation
insights.
- Upgraded the evaluation dashboard with dynamic presentation of metric
details.
- Added a new parameter to support loading golden context in corpus
loading methods.

- **Bug Fixes**
- Improved clarity in how answers are structured and appended in the
answer generation process.
<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 .../answer_generation_executor.py             | 19 ++++---
 .../benchmark_adapters/dummy_adapter.py       | 19 ++++---
 .../corpus_builder/corpus_builder_executor.py | 16 ++++--
 .../corpus_builder/run_corpus_builder.py      |  5 +-
 .../evaluation/deep_eval_adapter.py           |  3 ++
 .../evaluation/evaluation_executor.py         |  1 +
 .../evaluation/metrics/context_coverage.py    | 50 +++++++++++++++++++
 cognee/eval_framework/metrics_dashboard.py    | 36 +++++++------
 .../eval_framework/deepeval_adapter_test.py   |  7 ++-
 9 files changed, 115 insertions(+), 41 deletions(-)
 create mode 100644 cognee/eval_framework/evaluation/metrics/context_coverage.py

diff --git a/cognee/eval_framework/answer_generation/answer_generation_executor.py b/cognee/eval_framework/answer_generation/answer_generation_executor.py
index 1b984d465..67eb02578 100644
--- a/cognee/eval_framework/answer_generation/answer_generation_executor.py
+++ b/cognee/eval_framework/answer_generation/answer_generation_executor.py
@@ -29,13 +29,16 @@ class AnswerGeneratorExecutor:
             retrieval_context = await retriever.get_context(query_text)
             search_results = await retriever.get_completion(query_text, retrieval_context)
 
-            answers.append(
-                {
-                    "question": query_text,
-                    "answer": search_results[0],
-                    "golden_answer": correct_answer,
-                    "retrieval_context": retrieval_context,
-                }
-            )
+            answer = {
+                "question": query_text,
+                "answer": search_results[0],
+                "golden_answer": correct_answer,
+                "retrieval_context": retrieval_context,
+            }
+
+            if "golden_context" in instance:
+                answer["golden_context"] = instance["golden_context"]
+
+            answers.append(answer)
 
         return answers
diff --git a/cognee/eval_framework/benchmark_adapters/dummy_adapter.py b/cognee/eval_framework/benchmark_adapters/dummy_adapter.py
index 69cc6e518..9bf945d06 100644
--- a/cognee/eval_framework/benchmark_adapters/dummy_adapter.py
+++ b/cognee/eval_framework/benchmark_adapters/dummy_adapter.py
@@ -5,18 +5,21 @@ from cognee.eval_framework.benchmark_adapters.base_benchmark_adapter import Base
 
 class DummyAdapter(BaseBenchmarkAdapter):
     def load_corpus(
-        self, limit: Optional[int] = None, seed: int = 42
+        self, limit: Optional[int] = None, seed: int = 42, load_golden_context: bool = False
     ) -> tuple[list[str], list[dict[str, Any]]]:
         corpus_list = [
             "The cognee is an AI memory engine that supports different vector and graph databases",
             "Neo4j is a graph database supported by cognee",
         ]
-        question_answer_pairs = [
-            {
-                "answer": "Yes",
-                "question": "Is Neo4j supported by cognee?",
-                "type": "dummy",
-            }
-        ]
+        qa_pair = {
+            "answer": "Yes",
+            "question": "Is Neo4j supported by cognee?",
+            "type": "dummy",
+        }
+
+        if load_golden_context:
+            qa_pair["golden_context"] = "Cognee supports Neo4j and NetworkX"
+
+        question_answer_pairs = [qa_pair]
 
         return corpus_list, question_answer_pairs
diff --git a/cognee/eval_framework/corpus_builder/corpus_builder_executor.py b/cognee/eval_framework/corpus_builder/corpus_builder_executor.py
index 2e4a7fd3d..1d2b31e41 100644
--- a/cognee/eval_framework/corpus_builder/corpus_builder_executor.py
+++ b/cognee/eval_framework/corpus_builder/corpus_builder_executor.py
@@ -28,14 +28,22 @@ class CorpusBuilderExecutor:
         self.questions = None
         self.task_getter = task_getter
 
-    def load_corpus(self, limit: Optional[int] = None) -> Tuple[List[Dict], List[str]]:
-        self.raw_corpus, self.questions = self.adapter.load_corpus(limit=limit)
+    def load_corpus(
+        self, limit: Optional[int] = None, load_golden_context: bool = False
+    ) -> Tuple[List[Dict], List[str]]:
+        self.raw_corpus, self.questions = self.adapter.load_corpus(
+            limit=limit, load_golden_context=load_golden_context
+        )
         return self.raw_corpus, self.questions
 
     async def build_corpus(
-        self, limit: Optional[int] = None, chunk_size=1024, chunker=TextChunker
+        self,
+        limit: Optional[int] = None,
+        chunk_size=1024,
+        chunker=TextChunker,
+        load_golden_context: bool = False,
     ) -> List[str]:
-        self.load_corpus(limit=limit)
+        self.load_corpus(limit=limit, load_golden_context=load_golden_context)
         await self.run_cognee(chunk_size=chunk_size, chunker=chunker)
         return self.questions
 
diff --git a/cognee/eval_framework/corpus_builder/run_corpus_builder.py b/cognee/eval_framework/corpus_builder/run_corpus_builder.py
index 2aff21249..6054688d2 100644
--- a/cognee/eval_framework/corpus_builder/run_corpus_builder.py
+++ b/cognee/eval_framework/corpus_builder/run_corpus_builder.py
@@ -47,7 +47,10 @@ async def run_corpus_builder(params: dict, chunk_size=1024, chunker=TextChunker)
             task_getter=task_getter,
         )
         questions = await corpus_builder.build_corpus(
-            limit=params.get("number_of_samples_in_corpus"), chunk_size=chunk_size, chunker=chunker
+            limit=params.get("number_of_samples_in_corpus"),
+            chunk_size=chunk_size,
+            chunker=chunker,
+            load_golden_context=params.get("evaluating_contexts"),
         )
         with open(params["questions_path"], "w", encoding="utf-8") as f:
             json.dump(questions, f, ensure_ascii=False, indent=4)
diff --git a/cognee/eval_framework/evaluation/deep_eval_adapter.py b/cognee/eval_framework/evaluation/deep_eval_adapter.py
index 11f33571b..761d66e05 100644
--- a/cognee/eval_framework/evaluation/deep_eval_adapter.py
+++ b/cognee/eval_framework/evaluation/deep_eval_adapter.py
@@ -4,6 +4,7 @@ from cognee.eval_framework.eval_config import EvalConfig
 from cognee.eval_framework.evaluation.base_eval_adapter import BaseEvalAdapter
 from cognee.eval_framework.evaluation.metrics.exact_match import ExactMatchMetric
 from cognee.eval_framework.evaluation.metrics.f1 import F1ScoreMetric
+from cognee.eval_framework.evaluation.metrics.context_coverage import ContextCoverageMetric
 from typing import Any, Dict, List
 from deepeval.metrics import ContextualRelevancyMetric
 
@@ -15,6 +16,7 @@ class DeepEvalAdapter(BaseEvalAdapter):
             "EM": ExactMatchMetric(),
             "f1": F1ScoreMetric(),
             "contextual_relevancy": ContextualRelevancyMetric(),
+            "context_coverage": ContextCoverageMetric(),
         }
 
     async def evaluate_answers(
@@ -32,6 +34,7 @@ class DeepEvalAdapter(BaseEvalAdapter):
                 actual_output=answer["answer"],
                 expected_output=answer["golden_answer"],
                 retrieval_context=[answer["retrieval_context"]],
+                context=[answer["golden_context"]] if "golden_context" in answer else None,
             )
             metric_results = {}
             for metric in evaluator_metrics:
diff --git a/cognee/eval_framework/evaluation/evaluation_executor.py b/cognee/eval_framework/evaluation/evaluation_executor.py
index 5e56b50c7..1de01f101 100644
--- a/cognee/eval_framework/evaluation/evaluation_executor.py
+++ b/cognee/eval_framework/evaluation/evaluation_executor.py
@@ -23,5 +23,6 @@ class EvaluationExecutor:
     async def execute(self, answers: List[Dict[str, str]], evaluator_metrics: Any) -> Any:
         if self.evaluate_contexts:
             evaluator_metrics.append("contextual_relevancy")
+            evaluator_metrics.append("context_coverage")
         metrics = await self.eval_adapter.evaluate_answers(answers, evaluator_metrics)
         return metrics
diff --git a/cognee/eval_framework/evaluation/metrics/context_coverage.py b/cognee/eval_framework/evaluation/metrics/context_coverage.py
new file mode 100644
index 000000000..9fdd5e14e
--- /dev/null
+++ b/cognee/eval_framework/evaluation/metrics/context_coverage.py
@@ -0,0 +1,50 @@
+from deepeval.metrics import SummarizationMetric
+from deepeval.test_case import LLMTestCase
+from deepeval.metrics.summarization.schema import ScoreType
+from deepeval.metrics.indicator import metric_progress_indicator
+from deepeval.utils import get_or_create_event_loop
+
+
+class ContextCoverageMetric(SummarizationMetric):
+    def measure(
+        self,
+        test_case,
+        _show_indicator: bool = True,
+    ) -> float:
+        mapped_test_case = LLMTestCase(
+            input=test_case.context[0],
+            actual_output=test_case.retrieval_context[0],
+        )
+        self.assessment_questions = None
+        self.evaluation_cost = 0 if self.using_native_model else None
+        with metric_progress_indicator(self, _show_indicator=_show_indicator):
+            if self.async_mode:
+                loop = get_or_create_event_loop()
+                return loop.run_until_complete(
+                    self.a_measure(mapped_test_case, _show_indicator=False)
+                )
+            else:
+                self.coverage_verdicts = self._generate_coverage_verdicts(mapped_test_case)
+                self.alignment_verdicts = []
+                self.score = self._calculate_score(ScoreType.COVERAGE)
+                self.reason = self._generate_reason()
+                self.success = self.score >= self.threshold
+                return self.score
+
+    async def a_measure(
+        self,
+        test_case,
+        _show_indicator: bool = True,
+    ) -> float:
+        self.evaluation_cost = 0 if self.using_native_model else None
+        with metric_progress_indicator(
+            self,
+            async_mode=True,
+            _show_indicator=_show_indicator,
+        ):
+            self.coverage_verdicts = await self._a_generate_coverage_verdicts(test_case)
+            self.alignment_verdicts = []
+            self.score = self._calculate_score(ScoreType.COVERAGE)
+            self.reason = await self._a_generate_reason()
+            self.success = self.score >= self.threshold
+            return self.score
diff --git a/cognee/eval_framework/metrics_dashboard.py b/cognee/eval_framework/metrics_dashboard.py
index 2c917740a..eb4d2ed8e 100644
--- a/cognee/eval_framework/metrics_dashboard.py
+++ b/cognee/eval_framework/metrics_dashboard.py
@@ -3,6 +3,12 @@ import plotly.graph_objects as go
 from typing import Dict, List, Tuple
 from collections import defaultdict
 
+metrics_fields = {
+    "contextual_relevancy": ["question", "retrieval_context"],
+    "context_coverage": ["question", "retrieval_context", "golden_context"],
+}
+default_metrics_fields = ["question", "answer", "golden_answer"]
+
 
 def create_distribution_plots(metrics_data: Dict[str, List[float]]) -> List[str]:
     """Create distribution histogram plots for each metric."""
@@ -59,38 +65,30 @@ def generate_details_html(metrics_data: List[Dict]) -> List[str]:
         for metric, values in entry["metrics"].items():
             if metric not in metric_details:
                 metric_details[metric] = []
+            current_metrics_fields = metrics_fields.get(metric, default_metrics_fields)
             metric_details[metric].append(
-                {
-                    "question": entry["question"],
-                    "answer": entry["answer"],
-                    "golden_answer": entry["golden_answer"],
+                {key: entry[key] for key in current_metrics_fields}
+                | {
                     "reason": values.get("reason", ""),
                     "score": values["score"],
                 }
             )
 
     for metric, details in metric_details.items():
+        formatted_column_names = [key.replace("_", " ").title() for key in details[0].keys()]
         details_html.append(f"<h3>{metric} Details</h3>")
-        details_html.append("""
+        details_html.append(f"""
             <table class="metric-table">
                 <tr>
-                    <th>Question</th>
-                    <th>Answer</th>
-                    <th>Golden Answer</th>
-                    <th>Reason</th>
-                    <th>Score</th>
+                    {"".join(f"<th>{col}</th>" for col in formatted_column_names)}
                 </tr>
         """)
         for item in details:
-            details_html.append(
-                f"<tr>"
-                f"<td>{item['question']}</td>"
-                f"<td>{item['answer']}</td>"
-                f"<td>{item['golden_answer']}</td>"
-                f"<td>{item['reason']}</td>"
-                f"<td>{item['score']}</td>"
-                f"</tr>"
-            )
+            details_html.append(f"""
+                <tr>
+                    {"".join(f"<td>{value}</td>" for value in item.values())}
+                </tr>
+            """)
         details_html.append("</table>")
     return details_html
 
diff --git a/cognee/tests/unit/eval_framework/deepeval_adapter_test.py b/cognee/tests/unit/eval_framework/deepeval_adapter_test.py
index eda9f0b66..48375221c 100644
--- a/cognee/tests/unit/eval_framework/deepeval_adapter_test.py
+++ b/cognee/tests/unit/eval_framework/deepeval_adapter_test.py
@@ -5,7 +5,12 @@ import sys
 
 with patch.dict(
     sys.modules,
-    {"deepeval": MagicMock(), "deepeval.metrics": MagicMock(), "deepeval.test_case": MagicMock()},
+    {
+        "deepeval": MagicMock(),
+        "deepeval.metrics": MagicMock(),
+        "deepeval.test_case": MagicMock(),
+        "cognee.eval_framework.evaluation.metrics.context_coverage": MagicMock(),
+    },
 ):
     from cognee.eval_framework.evaluation.deep_eval_adapter import DeepEvalAdapter
 

From 819e411149248e7ab4ddc839e3a619d6a2e8d59a Mon Sep 17 00:00:00 2001
From: Daniel Molnar <soobrosa@gmail.com>
Date: Mon, 10 Mar 2025 16:07:36 +0100
Subject: [PATCH 05/14] Small clarifications. (#624)

<!-- .github/pull_request_template.md -->

## Description
Small clarifications in README.md.

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **Documentation**
- Updated documentation to feature a single, centrally positioned demo
link for clearer navigation.
- Clarified setup instructions to indicate that default configurations
are applied when custom environment variables are not provided.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index ef37fd157..7af33c120 100644
--- a/README.md
+++ b/README.md
@@ -8,11 +8,11 @@
   cognee - memory layer for AI apps and Agents
 
   <p align="center">
+  <a href="https://www.youtube.com/watch?v=1bezuvLwJmw&t=2s">Demo</a>
+  .
   <a href="https://cognee.ai">Learn more</a>
   ·
   <a href="https://discord.gg/NQPKmU5CCg">Join Discord</a>
-  ·
-  <a href="https://www.youtube.com/watch?v=1bezuvLwJmw&t=2s">Demo</a>
   </p>
 
 
@@ -89,7 +89,7 @@ Add LLM_API_KEY to .env using the command bellow.
 ```
 echo "LLM_API_KEY=YOUR_OPENAI_API_KEY" > .env
 ```
-You can see available env variables in the repository `.env.template` file.
+You can see available env variables in the repository `.env.template` file. If you don't specify it otherwise, like in this example, SQLite (relational database), LanceDB (vector database) and NetworkX (graph store) will be used as default components.
 
 This script will run the default pipeline:
 

From 1d4d54c1f58af0a223bc8ee1ae6a5ccd1a1c8af7 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Tue, 11 Mar 2025 03:01:06 +0100
Subject: [PATCH 06/14] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 111 +++++++++++++++++++++++-------------------------
 1 file changed, 52 insertions(+), 59 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index ba9c3df25..734ae62fe 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,97 +1,90 @@
-# 🚀 How to Contribute to **cognee**
+# 🎉 Welcome to **cognee**! 
 
-Thank you for investing time in contributing to our project! Here's a guide to get you started.
+We're excited that you're interested in contributing to our project! This guide will help you get started and ensure your contributions can be efficiently integrated into the project.
 
-## 1. 🚀 Getting Started
+## 🌟 Quick Links
 
-### 🍴 Fork the Repository
+- [Code of Conduct](CODE_OF_CONDUCT.md)
+- [Discord Community](https://discord.gg/bcy8xFAtfd)  
+- [Issue Tracker](https://github.com/topoteretes/cognee/issues)
 
-To start your journey, you'll need your very own copy of **cognee**. Think of it as your own innovation lab. 🧪
+## 1. 🚀 Ways to Contribute
 
-1. Navigate to the [**cognee**](https://github.com/topoteretes/cognee) repository on GitHub.
-2. In the upper-right corner, click the **'Fork'** button.
+You can contribute to **cognee** in many ways:
 
-### 🚀 Clone the Repository
+- 📝 Submitting bug reports or feature requests
+- 💡 Improving documentation
+- 🔍 Reviewing pull requests
+- 🛠️ Contributing code or tests
+- 🌐 Helping other users
 
-Next, let's bring your newly forked repository to your local machine.
+## 2. 🛠️ Development Setup
 
+### Fork and Clone
+
+1. Fork the [**cognee**](https://github.com/topoteretes/cognee) repository
+2. Clone your fork:
 ```shell
 git clone https://github.com/<your-github-username>/cognee.git
+cd cognee
 ```
 
-## 2. 🛠️ Making Changes
-
-### 🌟 Create a Branch
-
-Get ready to channel your creativity. Begin by creating a new branch for your incredible features. 🧞‍♂️
+### Create a Branch
 
+Create a new branch for your work:
 ```shell
 git checkout -b feature/your-feature-name
 ```
 
-### ✏️ Make Your Changes
+## 3. 🎯 Making Changes
 
-Now's your chance to shine! Dive in and make your contributions. 🌠
-
-## 3. 🚀 Submitting Changes
-
-After making your changes, follow these steps:
-
-### ✅ Run the Tests
-
-Ensure your changes do not break the existing codebase:
+1. **Code Style**: Follow the project's coding standards
+2. **Documentation**: Update relevant documentation
+3. **Tests**: Add tests for new features
+4. **Commits**: Write clear commit messages
 
+### Running Tests
 ```shell
 python cognee/cognee/tests/test_library.py
 ```
 
-### 🚢 Push Your Feature Branch
+## 4. 📤 Submitting Changes
 
+1. Push your changes:
 ```shell
-# Add your changes to the staging area:
 git add .
-
-# Commit changes with an adequate description:
-git commit -m "Describe your changes here"
-
-# Push your feature branch to your forked repository:
+git commit -s -m "Description of your changes"
 git push origin feature/your-feature-name
 ```
 
-### 🚀 Create a Pull Request
+2. Create a Pull Request:
+   - Go to the [**cognee** repository](https://github.com/topoteretes/cognee)
+   - Click "Compare & Pull Request"
+   - Fill in the PR template with details about your changes
 
-You're on the verge of completion! It's time to showcase your hard work. 🌐
+## 5. 📜 Developer Certificate of Origin (DCO)
 
-1. Go to [**cognee**](https://github.com/topoteretes/cognee) on GitHub.
-2. Hit the **"Compare & Pull Request"** button.
-3. Select the base branch (main) and the compare branch (the one with your features).
-4. Craft a **compelling title** and provide a **detailed description** of your contributions. 🎩
+All contributions must be signed-off to indicate agreement with our DCO:
 
-## 4. 🔍 Review and Approval
+```shell
+git config alias.cos "commit -s"  # Create alias for signed commits
+```
 
-The project maintainers will review your work, possibly suggest improvements, or request further details. Once you receive approval, your contributions will become part of **cognee**!
+When your PR is ready, please include:
+> "I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin"
 
+## 6. 🤝 Community Guidelines
 
-## 5. Developer Certificate of Origin
-All contributions to the topoteretes codebase must be signed-off to indicate you have read and agreed to the Developer Certificate of Origin (DCO), which is in the root directory under name DCO. To sign the DCO, simply add -s after all commits that you make, to do this easily you can make a git alias from the command line, for example:
+- Be respectful and inclusive
+- Help others learn and grow
+- Follow our [Code of Conduct](CODE_OF_CONDUCT.md)
+- Provide constructive feedback
+- Ask questions when unsure
 
-$ git config alias.cos "commit -s"
+## 7. 📫 Getting Help
 
-Will allow you to write git cos which will automatically sign-off your commit. By signing a commit you are agreeing to the DCO and agree that you will be banned from the topoteretes GitHub organisation and Discord server if you violate the DCO.
+- Open an [issue](https://github.com/topoteretes/cognee/issues)
+- Join our Discord community
+- Check existing documentation
 
-"When a commit is ready to be merged please use the following template to agree to our developer certificate of origin:
-  'I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin'
-
-We consider the following as violations to the DCO:
-
-Signing the DCO with a fake name or pseudonym, if you are registered on GitHub or another platform with a fake name then you will not be able to contribute to topoteretes before updating your name;
-Submitting a contribution that you did not have the right to submit whether due to licensing, copyright, or any other restrictions.
-
-## 6. 📜 Code of Conduct
-Ensure you adhere to the project's [Code of Conduct](https://github.com/topoteretes/cognee/blob/main/CODE_OF_CONDUCT.md) throughout your participation.
-
-## 7. 📫 Contact
-
-If you need assistance or simply wish to connect, we're here for you. Contact us by filing an issue on the GitHub repository or by messaging us on our Discord server.
-
-Thanks for helping to evolve **cognee**!
+Thank you for contributing to **cognee**! 🌟

From a74c96609f31886c24b8b92e8716aec2a1ad9913 Mon Sep 17 00:00:00 2001
From: Vasilije <8619304+Vasilije1990@users.noreply.github.com>
Date: Tue, 11 Mar 2025 03:07:25 +0100
Subject: [PATCH 07/14] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 734ae62fe..9e97d0d23 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,6 +1,8 @@
 # 🎉 Welcome to **cognee**! 
 
-We're excited that you're interested in contributing to our project! This guide will help you get started and ensure your contributions can be efficiently integrated into the project.
+We're excited that you're interested in contributing to our project! 
+We want to ensure that every user and contributor feels welcome, included and supported to participate in cognee community. 
+This guide will help you get started and ensure your contributions can be efficiently integrated into the project.
 
 ## 🌟 Quick Links
 
@@ -18,6 +20,42 @@ You can contribute to **cognee** in many ways:
 - 🛠️ Contributing code or tests
 - 🌐 Helping other users
 
+## 📫 Get in Touch
+
+There are several ways to connect with the **cognee** team and community:
+
+### GitHub Collaboration
+- [Open an issue](https://github.com/topoteretes/cognee/issues) for bug reports, feature requests, or discussions
+- Submit pull requests to contribute code or documentation
+- Join ongoing discussions in existing issues and PRs
+
+### Community Channels
+- Join our [Discord community](https://discord.gg/bcy8xFAtfd) for real-time discussions
+- Participate in community events and discussions
+- Get help from other community members
+
+### Direct Contact
+- Email: vasilije@cognee.ai
+- For business inquiries or sensitive matters, please reach out via email
+- For general questions, prefer public channels like GitHub issues or Discord
+
+We aim to respond to all communications within 2 business days. For faster responses, consider using our Discord channel where the whole community can help!
+
+## Issue Labels
+
+To help you find the most appropriate issues to work on, we use the following labels:
+
+- `good first issue` - Perfect for newcomers to the project
+- `bug` - Something isn't working as expected
+- `documentation` - Improvements or additions to documentation
+- `enhancement` - New features or improvements
+- `help wanted` - Extra attention or assistance needed
+- `question` - Further information is requested
+- `wontfix` - This will not be worked on
+
+Looking for a place to start? Try filtering for [good first issues](https://github.com/topoteretes/cognee/labels/good%20first%20issue)!
+
+
 ## 2. 🛠️ Development Setup
 
 ### Fork and Clone

From 3f69234776d5f814d0ec41d2b9d857c73d6a7cbb Mon Sep 17 00:00:00 2001
From: Boris Arzentar <borisarzentar@gmail.com>
Date: Tue, 11 Mar 2025 16:41:12 +0100
Subject: [PATCH 08/14] fix: remove double install step from Dockerfile

---
 cognee-mcp/Dockerfile     | 11 ++++++-----
 cognee-mcp/README.md      |  2 +-
 cognee-mcp/pyproject.toml |  2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/cognee-mcp/Dockerfile b/cognee-mcp/Dockerfile
index 45e601e0f..e229fb08a 100644
--- a/cognee-mcp/Dockerfile
+++ b/cognee-mcp/Dockerfile
@@ -20,15 +20,16 @@ RUN apt-get update && apt-get install -y \
     gcc \
     libpq-dev
 
-RUN apt-get install -y \
-    gcc \
-    libpq-dev
-
 COPY . /app
 
 RUN uv sync --reinstall
 
 # Place executables in the environment at the front of the path
-ENV PATH="/app/:/app/.venv/bin:$PATH"
+ENV PATH="/app:/app/.venv/bin:$PATH"
+
+# Set environment variables for MCP server
+ENV PYTHONUNBUFFERED=1
+ENV MCP_LOG_LEVEL=DEBUG
+ENV PYTHONPATH=/app
 
 ENTRYPOINT ["cognee"]
diff --git a/cognee-mcp/README.md b/cognee-mcp/README.md
index c9926270c..fa8888f29 100644
--- a/cognee-mcp/README.md
+++ b/cognee-mcp/README.md
@@ -82,5 +82,5 @@ http://localhost:5173?timeout=120000
 To apply new changes while developing cognee you need to do:
 
 1. `poetry lock` in cognee folder
-2. `uv sync --dev --all-extras --reinstall `
+2. `uv sync --dev --all-extras --reinstall`
 3. `mcp dev src/server.py`
diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml
index 972b44b05..1f9bae195 100644
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "cognee-mcp"
-version = "0.1.0"
+version = "0.2.0"
 description = "A MCP server project"
 readme = "README.md"
 requires-python = ">=3.10"

From deb3e0cce146e950c052b7030b7d9bbd362b0e18 Mon Sep 17 00:00:00 2001
From: Boris Arzentar <borisarzentar@gmail.com>
Date: Tue, 11 Mar 2025 16:41:38 +0100
Subject: [PATCH 09/14] version: v0.1.33

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 147ffe798..7aac688db 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "cognee"
-version = "0.1.32"
+version = "0.1.33"
 description = "Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning."
 authors = ["Vasilije Markovic", "Boris Arzentar"]
 readme = "README.md"

From 40c0015f0d339e75fd0a7ae146455e9feffa7f1f Mon Sep 17 00:00:00 2001
From: Boris Arzentar <borisarzentar@gmail.com>
Date: Tue, 11 Mar 2025 16:43:22 +0100
Subject: [PATCH 10/14] chore: update uv.lock

---
 cognee-mcp/uv.lock | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cognee-mcp/uv.lock b/cognee-mcp/uv.lock
index 182f35103..88f66e626 100644
--- a/cognee-mcp/uv.lock
+++ b/cognee-mcp/uv.lock
@@ -547,7 +547,7 @@ huggingface = [
 
 [[package]]
 name = "cognee-mcp"
-version = "0.1.0"
+version = "0.2.0"
 source = { editable = "." }
 dependencies = [
     { name = "cognee", extra = ["codegraph", "gemini", "huggingface"] },

From 2e4aab9a9ad53cc76e97e9a786627f079922ec77 Mon Sep 17 00:00:00 2001
From: Boris Arzentar <borisarzentar@gmail.com>
Date: Tue, 11 Mar 2025 16:44:00 +0100
Subject: [PATCH 11/14] fix: example ruff errors

---
 examples/python/pokemon_datapoints_example.py | 28 +++++++++++++++----
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/examples/python/pokemon_datapoints_example.py b/examples/python/pokemon_datapoints_example.py
index 058492e63..83179cf9f 100644
--- a/examples/python/pokemon_datapoints_example.py
+++ b/examples/python/pokemon_datapoints_example.py
@@ -21,11 +21,13 @@ BASE_URL = "https://pokeapi.co/api/v2/"
 os.environ["BUCKET_URL"] = "./.data_storage"
 os.environ["DATA_WRITER__DISABLE_COMPRESSION"] = "true"
 
+
 # Data Models
 class Abilities(DataPoint):
     name: str = "Abilities"
     metadata: dict = {"index_fields": ["name"]}
 
+
 class PokemonAbility(DataPoint):
     name: str
     ability__name: str
@@ -39,11 +41,13 @@ class PokemonAbility(DataPoint):
     is_type: Abilities
     metadata: dict = {"index_fields": ["ability__name"]}
 
+
 class Pokemons(DataPoint):
     name: str = "Pokemons"
     have: Abilities
     metadata: dict = {"index_fields": ["name"]}
 
+
 class Pokemon(DataPoint):
     name: str
     base_experience: int
@@ -66,6 +70,7 @@ class Pokemon(DataPoint):
     abilities: List[PokemonAbility]
     metadata: dict = {"index_fields": ["name"]}
 
+
 # Data Collection Functions
 @dlt.resource(write_disposition="replace")
 def pokemon_list(limit: int = 50):
@@ -73,6 +78,7 @@ def pokemon_list(limit: int = 50):
     response.raise_for_status()
     yield response.json()["results"]
 
+
 @dlt.transformer(data_from=pokemon_list)
 def pokemon_details(pokemons):
     """Fetches detailed info for each Pokémon"""
@@ -81,6 +87,7 @@ def pokemon_details(pokemons):
         response.raise_for_status()
         yield response.json()
 
+
 # Data Loading Functions
 def load_abilities_data(jsonl_abilities):
     abilities_root = Abilities()
@@ -97,6 +104,7 @@ def load_abilities_data(jsonl_abilities):
 
     return abilities_root, pokemon_abilities
 
+
 def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):
     pokemons = []
 
@@ -105,7 +113,8 @@ def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):
             for line in f:
                 pokemon_data = json.loads(line)
                 abilities = [
-                    ability for ability in pokemon_abilities
+                    ability
+                    for ability in pokemon_abilities
                     if ability["_dlt_parent_id"] == pokemon_data["_dlt_id"]
                 ]
                 pokemon_data["external_id"] = pokemon_data["id"]
@@ -116,12 +125,17 @@ def load_pokemon_data(jsonl_pokemons, pokemon_abilities, pokemon_root):
 
     return pokemons
 
+
 # Main Application Logic
 async def setup_and_process_data():
     """Setup configuration and process Pokemon data"""
     # Setup configuration
-    data_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve())
-    cognee_directory_path = str(pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve())
+    data_directory_path = str(
+        pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".data_storage")).resolve()
+    )
+    cognee_directory_path = str(
+        pathlib.Path(os.path.join(pathlib.Path(__file__).parent, ".cognee_system")).resolve()
+    )
 
     cognee.config.data_root_directory(data_directory_path)
     cognee.config.system_root_directory(cognee_directory_path)
@@ -153,6 +167,7 @@ async def setup_and_process_data():
 
     return pokemons
 
+
 async def pokemon_cognify(pokemons):
     """Process Pokemon data with Cognee and perform search"""
     # Setup and run Cognee tasks
@@ -165,7 +180,7 @@ async def pokemon_cognify(pokemons):
         tasks=tasks,
         data=pokemons,
         dataset_id=uuid5(NAMESPACE_OID, "Pokemon"),
-        pipeline_name='pokemon_pipeline',
+        pipeline_name="pokemon_pipeline",
     )
 
     async for result in results:
@@ -174,17 +189,18 @@ async def pokemon_cognify(pokemons):
 
     # Perform search
     search_results = await cognee.search(
-        query_type=SearchType.GRAPH_COMPLETION,
-        query_text="pokemons?"
+        query_type=SearchType.GRAPH_COMPLETION, query_text="pokemons?"
     )
 
     print("Search results:")
     for result_text in search_results:
         print(result_text)
 
+
 async def main():
     pokemons = await setup_and_process_data()
     await pokemon_cognify(pokemons)
 
+
 if __name__ == "__main__":
     asyncio.run(main())

From d5d01109a2df9b077fc6a496ec82e304681267af Mon Sep 17 00:00:00 2001
From: Boris Arzentar <borisarzentar@gmail.com>
Date: Tue, 11 Mar 2025 18:02:43 +0100
Subject: [PATCH 12/14] fix: use new Dockerfile for mcp server

---
 cognee-mcp/Dockerfile | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/cognee-mcp/Dockerfile b/cognee-mcp/Dockerfile
index e229fb08a..4d7d455ee 100644
--- a/cognee-mcp/Dockerfile
+++ b/cognee-mcp/Dockerfile
@@ -1,13 +1,7 @@
 # Use a Python image with uv pre-installed
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv
 
-# Set build argument
-ARG DEBUG
-
-# Set environment variable based on the build argument
-ENV DEBUG=${DEBUG}
-ENV PIP_NO_CACHE_DIR=true
-
+# Install the project into `/app`
 WORKDIR /app
 
 # Enable bytecode compilation
@@ -16,20 +10,27 @@ ENV UV_COMPILE_BYTECODE=1
 # Copy from the cache instead of linking since it's a mounted volume
 ENV UV_LINK_MODE=copy
 
-RUN apt-get update && apt-get install -y \
-    gcc \
-    libpq-dev
+# Install the project's dependencies using the lockfile and settings
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --no-install-project --no-dev --no-editable
 
-COPY . /app
+# Then, add the rest of the project source code and install it
+# Installing separately from its dependencies allows optimal layer caching
+ADD . /app
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev --no-editable
 
-RUN uv sync --reinstall
+FROM python:3.12-slim-bookworm
+
+WORKDIR /app
+ 
+COPY --from=uv /root/.local /root/.local
+COPY --from=uv --chown=app:app /app/.venv /app/.venv
+COPY --from=uv --chown=app:app /app/src /app/src
 
 # Place executables in the environment at the front of the path
-ENV PATH="/app:/app/.venv/bin:$PATH"
-
-# Set environment variables for MCP server
-ENV PYTHONUNBUFFERED=1
-ENV MCP_LOG_LEVEL=DEBUG
-ENV PYTHONPATH=/app
+ENV PATH="/app/.venv/bin:$PATH"
 
 ENTRYPOINT ["cognee"]

From 4719b82c562ab0a95421b9b53d40655092609a5d Mon Sep 17 00:00:00 2001
From: Boris Arzentar <borisarzentar@gmail.com>
Date: Tue, 11 Mar 2025 18:34:35 +0100
Subject: [PATCH 13/14] fix: don't compile python to bytecode in Dockerfile

---
 cognee-mcp/Dockerfile | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/cognee-mcp/Dockerfile b/cognee-mcp/Dockerfile
index 4d7d455ee..8efb7c46e 100644
--- a/cognee-mcp/Dockerfile
+++ b/cognee-mcp/Dockerfile
@@ -5,7 +5,7 @@ FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv
 WORKDIR /app
 
 # Enable bytecode compilation
-ENV UV_COMPILE_BYTECODE=1
+# ENV UV_COMPILE_BYTECODE=1
 
 # Copy from the cache instead of linking since it's a mounted volume
 ENV UV_LINK_MODE=copy
@@ -27,8 +27,7 @@ FROM python:3.12-slim-bookworm
 WORKDIR /app
  
 COPY --from=uv /root/.local /root/.local
-COPY --from=uv --chown=app:app /app/.venv /app/.venv
-COPY --from=uv --chown=app:app /app/src /app/src
+COPY --from=uv --chown=app:app /app /app
 
 # Place executables in the environment at the front of the path
 ENV PATH="/app/.venv/bin:$PATH"

From 68b337f0b60492d982005944fb1f32a4676fc86a Mon Sep 17 00:00:00 2001
From: Daniel Molnar <soobrosa@gmail.com>
Date: Tue, 11 Mar 2025 18:44:56 +0100
Subject: [PATCH 14/14] Cline for VSCode demo runs. (#631)

<!-- .github/pull_request_template.md -->

## Description
Missing dependency.

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin


<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->

## Summary by CodeRabbit

- **New Features**
- Enabled PostgreSQL integration, expanding support for additional
database options and enhancing overall functionality.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->
---
 cognee-mcp/pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cognee-mcp/pyproject.toml b/cognee-mcp/pyproject.toml
index 1f9bae195..7ccb826f4 100644
--- a/cognee-mcp/pyproject.toml
+++ b/cognee-mcp/pyproject.toml
@@ -6,7 +6,7 @@ readme = "README.md"
 requires-python = ">=3.10"
 
 dependencies = [
-    "cognee[codegraph,gemini,huggingface]",
+    "cognee[postgres,codegraph,gemini,huggingface]",
     "mcp==1.2.1",
     "uv>=0.6.3",
 ]