fix: cognee docker image (#820)

## Description  ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
2025-05-15 10:05:27 +02:00 · 2025-05-15 10:05:27 +02:00 · 0f3522eea6
commit 0f3522eea6
parent 91f3cd9ef7
26 changed files with 3923 additions and 3816 deletions
--- a/79
+++ b/79
@ -1,60 +1,61 @@
-FROM python:3.11-slim
+# Use a Python image with uv pre-installed
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv

-# Define Poetry extras to install
-ARG POETRY_EXTRAS="\
-# API \
-api \
-# Storage & Databases \
-postgres weaviate qdrant neo4j falkordb milvus kuzu chromadb \
-# Notebooks & Interactive Environments \
-notebook \
-# LLM & AI Frameworks \
-langchain llama-index gemini huggingface ollama mistral groq anthropic \
-# Evaluation & Monitoring \
-deepeval evals posthog \
-# Graph Processing & Code Analysis \
-codegraph graphiti \
-# Document Processing \
-docs"
+# Install the project into `/app`
+WORKDIR /app
+
+# Enable bytecode compilation
+# ENV UV_COMPILE_BYTECODE=1
+
+# Copy from the cache instead of linking since it's a mounted volume
+ENV UV_LINK_MODE=copy

 # Set build argument
 ARG DEBUG

 # Set environment variable based on the build argument
 ENV DEBUG=${DEBUG}
-ENV PIP_NO_CACHE_DIR=true
-ENV PATH="${PATH}:/root/.poetry/bin"

-RUN apt-get update
-
-RUN apt-get install -y \
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
    gcc \
+    libpq-dev \
+    git \
+    curl \
+    clang \
    build-essential \
-  libpq-dev
+    && rm -rf /var/lib/apt/lists/*

-WORKDIR /app
-COPY pyproject.toml poetry.lock /app/
+# Copy pyproject.toml and lockfile first for better caching
+COPY README.md pyproject.toml uv.lock entrypoint.sh ./

-RUN pip install poetry
-
-# Don't create virtualenv since Docker is already isolated
-RUN poetry config virtualenvs.create false
-
-# Install the dependencies using the defined extras
-RUN poetry install --extras "${POETRY_EXTRAS}" --no-root
-
-# Set the PYTHONPATH environment variable to include the /app directory
-ENV PYTHONPATH=/app
-
-COPY cognee/ /app/cognee
+# Install the project's dependencies using the lockfile and settings
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable

 # Copy Alembic configuration
 COPY alembic.ini /app/alembic.ini
 COPY alembic/ /app/alembic

-COPY entrypoint.sh /app/entrypoint.sh
+# Then, add the rest of the project source code and install it
+# Installing separately from its dependencies allows optimal layer caching
+COPY ./cognee /app/cognee
+RUN --mount=type=cache,target=/root/.cache/uv \
+uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
+
+FROM python:3.12-slim-bookworm
+
+WORKDIR /app
+
+COPY --from=uv /app /app
+# COPY --from=uv /app/.venv /app/.venv
+# COPY --from=uv /root/.local /root/.local
+
 RUN chmod +x /app/entrypoint.sh

-RUN sed -i 's/\r$//' /app/entrypoint.sh
+# Place executables in the environment at the front of the path
+ENV PATH="/app/.venv/bin:$PATH"
+
+ENV PYTHONPATH=/app

 ENTRYPOINT ["/app/entrypoint.sh"]
--- a/cognee-frontend/src/app/page.tsx
+++ b/cognee-frontend/src/app/page.tsx
@ -43,7 +43,7 @@ export default function Home() {
  const onDataAdd = useCallback((dataset: { id: string }, files: File[]) => {
    return addData(dataset, files)
      .then(() => {
-        showNotification("Data added successfully.", 5000);
+        showNotification("Data added successfully. Please run \"Cognify\" when ready.", 5000);
        openDatasetData(dataset);
      });
  }, [showNotification])
@ -60,6 +60,14 @@ export default function Home() {
      });
  }, [showNotification]);

+  const onCognify = useCallback(() => {
+    const dataset = datasets.find((dataset) => dataset.id === selectedDataset);
+    return onDatasetCognify({
+      id: dataset!.id,
+      name: dataset!.name,
+    });
+  }, [datasets, onDatasetCognify, selectedDataset]);
+
  const {
    value: isSettingsModalOpen,
    setTrue: openSettingsModal,
@ -95,6 +103,7 @@ export default function Home() {
                datasetId={selectedDataset}
                onClose={closeDatasetData}
                onDataAdd={onDataAdd}
+                onCognify={onCognify}
              />
            </div>
          )}
--- a/cognee-frontend/src/app/wizard/CognifyStep/CognifyStep.tsx
+++ b/cognee-frontend/src/app/wizard/CognifyStep/CognifyStep.tsx
@ -7,7 +7,7 @@ import cognifyDataset from '@/modules/datasets/cognifyDataset';

 interface ConfigStepProps {
  onNext: () => void;
-  dataset: { id: string }
+  dataset: { name: string }
 }

 export default function CognifyStep({ onNext, dataset }: ConfigStepProps) {
--- a/cognee-frontend/src/app/wizard/ExploreStep/ExploreStep.tsx
+++ b/cognee-frontend/src/app/wizard/ExploreStep/ExploreStep.tsx
@ -2,13 +2,13 @@ import { Explorer } from '@/ui/Partials';
 import { Spacer } from 'ohmy-ui';

 interface ExploreStepProps {
-  dataset: { id: string };
+  dataset: { name: string };
 }

 export default function ExploreStep({ dataset }: ExploreStepProps) {
  return (
    <Spacer horizontal="3">
-      <Explorer dataset={dataset!} />
+      <Explorer dataset={dataset} />
    </Spacer>
  )
 }
--- a/cognee-frontend/src/app/wizard/WizardPage.tsx
+++ b/cognee-frontend/src/app/wizard/WizardPage.tsx
@ -29,7 +29,7 @@ export default function WizardPage({
    setFalse: closeSettingsModal,
  } = useBoolean(false);

-  const dataset = { id: 'main' };
+  const dataset = { name: 'main' };

  return (
    <main className={styles.main}>
--- a/cognee-frontend/src/modules/datasets/cognifyDataset.ts
+++ b/cognee-frontend/src/modules/datasets/cognifyDataset.ts
@ -1,13 +1,13 @@
 import { fetch } from '@/utils';

-export default function cognifyDataset(dataset: { id: string, name: string }) {
+export default function cognifyDataset(dataset: { id?: string, name?: string }) {
  return fetch('/v1/cognify', {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
    },
    body: JSON.stringify({
-      datasets: [dataset.id],
+      datasets: [dataset.id || dataset.name],
    }),
  }).then((response) => response.json());
 }
--- a/cognee-frontend/src/modules/exploration/getExplorationGraphUrl.ts
+++ b/cognee-frontend/src/modules/exploration/getExplorationGraphUrl.ts
@ -1,6 +1,6 @@
 import { fetch } from '@/utils';

-export default function getExplorationGraphUrl(dataset: { id: string }) {
+export default function getExplorationGraphUrl(dataset: { name: string }) {
  return fetch('/v1/visualize')
      .then(async (response) => {
        if (response.status !== 200) {
--- a/cognee-frontend/src/modules/ingestion/DataView/DataView.tsx
+++ b/cognee-frontend/src/modules/ingestion/DataView/DataView.tsx
@ -6,6 +6,8 @@ import {
  Text,
  UploadInput,
  CloseIcon,
+  CTAButton,
+  useBoolean,
 } from "ohmy-ui";
 import { fetch } from '@/utils';
 import RawDataPreview from './RawDataPreview';
@ -28,9 +30,10 @@ interface DataViewProps {
  datasetId: string;
  onClose: () => void;
  onDataAdd: (dataset: DatasetLike, files: File[]) => void;
+  onCognify: () => Promise<any>;
 }

-export default function DataView({ datasetId, data, onClose, onDataAdd }: DataViewProps) {
+export default function DataView({ datasetId, data, onClose, onDataAdd, onCognify }: DataViewProps) {
  // const handleDataDelete = () => {};
  const [rawData, setRawData] = useState<ArrayBuffer | null>(null);
  const [selectedData, setSelectedData] = useState<Data | null>(null);
@ -52,7 +55,19 @@ export default function DataView({ datasetId, data, onClose, onDataAdd }: DataVi

  const handleDataAdd = (files: File[]) => {
    onDataAdd({ id: datasetId }, files);
-  }
+  };
+
+  const {
+    value: isCognifyButtonDisabled,
+    setTrue: disableCognifyButton,
+    setFalse: enableCognifyButton,
+  } = useBoolean(false);
+
+  const handleCognify = () => {
+    disableCognifyButton();
+    onCognify()
+      .finally(() => enableCognifyButton());
+  };

  return (
    <Stack orientation="vertical" gap="4">
@ -62,6 +77,11 @@ export default function DataView({ datasetId, data, onClose, onDataAdd }: DataVi
            <Text>Add data</Text>
          </UploadInput>
        </div>
+        <div>
+          <CTAButton disabled={isCognifyButtonDisabled} onClick={handleCognify}>
+              <Text>Cognify</Text>
+          </CTAButton>
+        </div>
        <GhostButton hugContent onClick={onClose}>
          <CloseIcon />
        </GhostButton>
--- a/cognee-frontend/src/ui/Partials/Explorer/Explorer.tsx
+++ b/cognee-frontend/src/ui/Partials/Explorer/Explorer.tsx
@ -7,7 +7,7 @@ import { getExplorationGraphUrl } from '@/modules/exploration';
 import styles from './Explorer.module.css';

 interface ExplorerProps {
-  dataset: { id: string };
+  dataset: { name: string };
  className?: string;
  style?: React.CSSProperties;
 }
--- a/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
+++ b/cognee-frontend/src/ui/Partials/SearchView/SearchView.tsx
@ -28,9 +28,6 @@ export default function SearchView() {
  }, []);

  const searchOptions = [{
-    value: 'INSIGHTS',
-    label: 'Query insights from documents',
-  }, {
    value: 'GRAPH_COMPLETION',
    label: 'Completion using Cognee\'s graph based memory',
  }, {
@ -81,6 +78,8 @@ export default function SearchView() {

    scrollToBottom();

+    setInputValue('');
+
    const searchTypeValue = searchType.value;

    fetch('/v1/search', {
@ -103,10 +102,12 @@ export default function SearchView() {
            text: convertToSearchTypeOutput(systemMessage, searchTypeValue),
          },
        ]);
-        setInputValue('');

        scrollToBottom();
      })
+      .catch(() => {
+        setInputValue(inputValue);
+      });
  }, [inputValue, scrollToBottom, searchType.value]);

  const {
--- a/cognee-frontend/src/utils/fetch.ts
+++ b/cognee-frontend/src/utils/fetch.ts
@ -1,7 +1,7 @@
 import handleServerErrors from './handleServerErrors';

 export default function fetch(url: string, options: RequestInit = {}): Promise<Response> {
-  return global.fetch('http://127.0.0.1:8000/api' + url, {
+  return global.fetch('http://localhost:8000/api' + url, {
    ...options,
    headers: {
      ...options.headers,
--- a/cognee-mcp/src/server.py
+++ b/cognee-mcp/src/server.py
@ -144,7 +144,9 @@ async def cognify_status():
    """Get status of cognify pipeline"""
    with redirect_stdout(sys.stderr):
        user = await get_default_user()
-        status = await get_pipeline_status([await get_unique_dataset_id("main_dataset", user)])
+        status = await get_pipeline_status(
+            [await get_unique_dataset_id("main_dataset", user)], "cognify_pipeline"
+        )
        return [types.TextContent(type="text", text=str(status))]


@ -153,7 +155,9 @@ async def codify_status():
    """Get status of codify pipeline"""
    with redirect_stdout(sys.stderr):
        user = await get_default_user()
-        status = await get_pipeline_status([await get_unique_dataset_id("codebase", user)])
+        status = await get_pipeline_status(
+            [await get_unique_dataset_id("codebase", user)], "cognify_code_pipeline"
+        )
        return [types.TextContent(type="text", text=str(status))]


--- a/cognee/api/v1/add/add.py
+++ b/cognee/api/v1/add/add.py
@ -1,8 +1,9 @@
 from typing import Union, BinaryIO, List, Optional
-from cognee.modules.users.models import User
+
 from cognee.modules.pipelines import Task
-from cognee.tasks.ingestion import ingest_data, resolve_data_directories
+from cognee.modules.users.models import User
 from cognee.modules.pipelines import cognee_pipeline
+from cognee.tasks.ingestion import ingest_data, resolve_data_directories


 async def add(
--- a/cognee/api/v1/cognify/cognify.py
+++ b/cognee/api/v1/cognify/cognify.py
@ -34,7 +34,9 @@ async def cognify(
 ):
    tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)

-    return await cognee_pipeline(tasks=tasks, datasets=datasets, user=user)
+    return await cognee_pipeline(
+        tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline"
+    )


 async def get_default_tasks(  # TODO: Find out a better way to do this (Boris's comment)
--- a/cognee/api/v1/datasets/datasets.py
+++ b/cognee/api/v1/datasets/datasets.py
@ -28,7 +28,7 @@ class datasets:

    @staticmethod
    async def get_status(dataset_ids: list[UUID]) -> dict:
-        return await get_pipeline_status(dataset_ids)
+        return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")

    @staticmethod
    async def delete_dataset(dataset_id: str):
--- a/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
+++ b/cognee/infrastructure/databases/vector/lancedb/LanceDBAdapter.py
@ -1,8 +1,8 @@
 import asyncio
-from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
 import lancedb
-from lancedb.pydantic import LanceModel, Vector
 from pydantic import BaseModel
+from lancedb.pydantic import LanceModel, Vector
+from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints

 from cognee.exceptions import InvalidValueError
 from cognee.infrastructure.engine import DataPoint
@ -16,8 +16,6 @@ from ..models.ScoredResult import ScoredResult
 from ..utils import normalize_distances
 from ..vector_db_interface import VectorDBInterface

-from tenacity import retry, stop_after_attempt, wait_exponential
-

 class IndexSchema(DataPoint):
    id: str
--- a/cognee/modules/pipelines/models/PipelineRun.py
+++ b/cognee/modules/pipelines/models/PipelineRun.py
@ -6,6 +6,7 @@ from cognee.infrastructure.databases.relational import Base


 class PipelineRunStatus(enum.Enum):
+    DATASET_PROCESSING_INITIATED = "DATASET_PROCESSING_INITIATED"
    DATASET_PROCESSING_STARTED = "DATASET_PROCESSING_STARTED"
    DATASET_PROCESSING_COMPLETED = "DATASET_PROCESSING_COMPLETED"
    DATASET_PROCESSING_ERRORED = "DATASET_PROCESSING_ERRORED"
--- a/cognee/modules/pipelines/operations/init.py
+++ b/cognee/modules/pipelines/operations/init.py
@ -1,3 +1,4 @@
+from .log_pipeline_run_initiated import log_pipeline_run_initiated
 from .log_pipeline_run_start import log_pipeline_run_start
 from .log_pipeline_run_complete import log_pipeline_run_complete
 from .log_pipeline_run_error import log_pipeline_run_error
--- a/cognee/modules/pipelines/operations/get_pipeline_status.py
+++ b/cognee/modules/pipelines/operations/get_pipeline_status.py
@ -5,7 +5,7 @@ from ..models import PipelineRun
 from sqlalchemy.orm import aliased


-async def get_pipeline_status(dataset_ids: list[UUID]):
+async def get_pipeline_status(dataset_ids: list[UUID], pipeline_name: str):
    db_engine = get_relational_engine()

    async with db_engine.get_async_session() as session:
@ -20,6 +20,7 @@ async def get_pipeline_status(dataset_ids: list[UUID]):
                .label("rn"),
            )
            .filter(PipelineRun.dataset_id.in_(dataset_ids))
+            .filter(PipelineRun.pipeline_name == pipeline_name)
            .subquery()
        )

--- a/cognee/modules/pipelines/operations/log_pipeline_run_initiated.py
+++ b/cognee/modules/pipelines/operations/log_pipeline_run_initiated.py
@ -0,0 +1,22 @@
+from uuid import UUID, uuid4
+from cognee.infrastructure.databases.relational import get_relational_engine
+from cognee.modules.pipelines.models import PipelineRun, PipelineRunStatus
+
+
+async def log_pipeline_run_initiated(pipeline_id: str, pipeline_name: str, dataset_id: UUID):
+    pipeline_run = PipelineRun(
+        pipeline_run_id=uuid4(),
+        pipeline_name=pipeline_name,
+        pipeline_id=pipeline_id,
+        status=PipelineRunStatus.DATASET_PROCESSING_INITIATED,
+        dataset_id=dataset_id,
+        run_info={},
+    )
+
+    db_engine = get_relational_engine()
+
+    async with db_engine.get_async_session() as session:
+        session.add(pipeline_run)
+        await session.commit()
+
+    return pipeline_run
--- a/cognee/modules/pipelines/operations/pipeline.py
+++ b/cognee/modules/pipelines/operations/pipeline.py
@ -1,9 +1,9 @@
 import asyncio
-from cognee.shared.logging_utils import get_logger
 from typing import Union
-from uuid import uuid5, NAMESPACE_OID
+from uuid import NAMESPACE_OID, uuid5

-from cognee.modules.data.methods import get_datasets, get_datasets_by_name
+from cognee.shared.logging_utils import get_logger
+from cognee.modules.data.methods import get_datasets
 from cognee.modules.data.methods.get_dataset_data import get_dataset_data
 from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
 from cognee.modules.data.models import Data, Dataset
@ -13,6 +13,7 @@ from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline
 from cognee.modules.pipelines.tasks.task import Task
 from cognee.modules.users.methods import get_default_user
 from cognee.modules.users.models import User
+from cognee.modules.pipelines.operations import log_pipeline_run_initiated

 from cognee.infrastructure.databases.relational import (
    create_db_and_tables as create_relational_db_and_tables,
@ -59,15 +60,36 @@ async def cognee_pipeline(

    # If no datasets are provided, work with all existing datasets.
    existing_datasets = await get_datasets(user.id)
-    if datasets is None or len(datasets) == 0:
+
+    if not datasets:
+        # Get datasets from database if none sent.
        datasets = existing_datasets
-        if isinstance(datasets[0], str):
-            datasets = await get_datasets_by_name(datasets, user.id)
    else:
-        # Try to get datasets objects from database, if they don't exist use dataset name
-        datasets_names = await get_datasets_by_name(datasets, user.id)
-        if datasets_names:
-            datasets = datasets_names
+        # If dataset is already in database, use it, otherwise create a new instance.
+        dataset_instances = []
+
+        for dataset_name in datasets:
+            is_dataset_found = False
+
+            for existing_dataset in existing_datasets:
+                if (
+                    existing_dataset.name == dataset_name
+                    or str(existing_dataset.id) == dataset_name
+                ):
+                    dataset_instances.append(existing_dataset)
+                    is_dataset_found = True
+                    break
+
+            if not is_dataset_found:
+                dataset_instances.append(
+                    Dataset(
+                        id=await get_unique_dataset_id(dataset_name=dataset_name, user=user),
+                        name=dataset_name,
+                        owner_id=user.id,
+                    )
+                )
+
+        datasets = dataset_instances

    awaitables = []

@ -88,31 +110,48 @@ async def run_pipeline(
    data=None,
    pipeline_name: str = "custom_pipeline",
 ):
-    if isinstance(dataset, Dataset):
    check_dataset_name(dataset.name)
+
+    # Ugly hack, but no easier way to do this.
+    if pipeline_name == "add_pipeline":
+        # Refresh the add pipeline status so data is added to a dataset.
+        # Without this the app_pipeline status will be DATASET_PROCESSING_COMPLETED and will skip the execution.
+        dataset_id = uuid5(NAMESPACE_OID, f"{dataset.name}{str(user.id)}")
+
+        await log_pipeline_run_initiated(
+            pipeline_id=uuid5(NAMESPACE_OID, "add_pipeline"),
+            pipeline_name="add_pipeline",
+            dataset_id=dataset_id,
+        )
+
+        # Refresh the cognify pipeline status after we add new files.
+        # Without this the cognify_pipeline status will be DATASET_PROCESSING_COMPLETED and will skip the execution.
+        await log_pipeline_run_initiated(
+            pipeline_id=uuid5(NAMESPACE_OID, "cognify_pipeline"),
+            pipeline_name="cognify_pipeline",
+            dataset_id=dataset_id,
+        )
+
    dataset_id = dataset.id
-    elif isinstance(dataset, str):
-        check_dataset_name(dataset)
-        # Generate id based on unique dataset_id formula
-        dataset_id = await get_unique_dataset_id(dataset_name=dataset, user=user)

    if not data:
        data: list[Data] = await get_dataset_data(dataset_id=dataset_id)

    # async with update_status_lock: TODO: Add UI lock to prevent multiple backend requests
    if isinstance(dataset, Dataset):
-        task_status = await get_pipeline_status([dataset_id])
+        task_status = await get_pipeline_status([dataset_id], pipeline_name)
    else:
        task_status = [
            PipelineRunStatus.DATASET_PROCESSING_COMPLETED
        ]  # TODO: this is a random assignment, find permanent solution

-    if (
-        str(dataset_id) in task_status
-        and task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED
-    ):
+    if str(dataset_id) in task_status:
+        if task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED:
            logger.info("Dataset %s is already being processed.", dataset_id)
            return
+        if task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_COMPLETED:
+            logger.info("Dataset %s is already processed.", dataset_id)
+            return

    if not isinstance(tasks, list):
        raise ValueError("Tasks must be a list")
--- a/cognee/modules/settings/get_settings.py
+++ b/cognee/modules/settings/get_settings.py
@ -1,4 +1,5 @@
 from enum import Enum
+from typing import Optional
 from pydantic import BaseModel
 from cognee.infrastructure.databases.vector import get_vectordb_config
 from cognee.infrastructure.llm import get_llm_config
@ -20,8 +21,8 @@ class LLMConfig(BaseModel):
    api_key: str
    model: str
    provider: str
-    endpoint: str
-    api_version: str
+    endpoint: Optional[str]
+    api_version: Optional[str]
    models: dict[str, list[ConfigChoice]]
    providers: list[ConfigChoice]

--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -13,7 +13,7 @@ echo "Environment: $ENVIRONMENT"
 # inconsistencies and should cause the startup to fail. This check allows for
 # smooth redeployments and container restarts while maintaining data integrity.
 echo "Running database migrations..."
-MIGRATION_OUTPUT=$(poetry run alembic upgrade head 2>&1) || {
+MIGRATION_OUTPUT=$(alembic upgrade head 2>&1) || {
    if [[ $MIGRATION_OUTPUT == *"UserAlreadyExists"* ]] || [[ $MIGRATION_OUTPUT == *"User default_user@example.com already exists"* ]]; then
        echo "Warning: Default user already exists, continuing startup..."
    else
@ -22,8 +22,9 @@ MIGRATION_OUTPUT=$(poetry run alembic upgrade head 2>&1) || {
        exit 1
    fi
 }
+echo "Database migrations done."

-echo "Starting Gunicorn"
+echo "Starting server..."

 # Add startup delay to ensure DB is ready
 sleep 2
@ -32,10 +33,10 @@ sleep 2
 if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
    if [ "$DEBUG" = "true" ]; then
        echo "Waiting for the debugger to attach..."
-        exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
+        debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
    else
-        exec gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
+        gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
    fi
 else
-    exec gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app 
+    gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app 
 fi
--- a/poetry.lock
+++ b/poetry.lock
@ -1538,7 +1538,7 @@ description = "An implementation of the Debug Adapter Protocol for Python"
 optional = true
 python-versions = ">=3.8"
 groups = ["main"]
-markers = "extra == \"notebook\" or extra == \"dev\""
+markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"debug\""
 files = [
    {file = "debugpy-1.8.9-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:cfe1e6c6ad7178265f74981edf1154ffce97b69005212fbc90ca22ddfe3d017e"},
    {file = "debugpy-1.8.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada7fb65102a4d2c9ab62e8908e9e9f12aed9d76ef44880367bc9308ebe49a0f"},
@ -11961,8 +11961,9 @@ anthropic = ["anthropic"]
 api = ["gunicorn", "uvicorn"]
 chromadb = ["chromadb", "pypika"]
 codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
+debug = ["debugpy"]
 deepeval = ["deepeval"]
-dev = ["coverage", "debugpy", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
+dev = ["coverage", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
 docs = ["unstructured"]
 evals = ["gdown", "plotly"]
 falkordb = ["falkordb"]
@ -11987,4 +11988,4 @@ weaviate = ["weaviate-client"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.10,<=3.13"
-content-hash = "9d2a3beda2b6c329e69319e51c28c90d2806d8a257cd971e990600e99c1d96bd"
+content-hash = "15b319ff8dbe5bd88e41ead93f4e9140b2b7d86d57a707682dd3a308e78ef245"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -118,7 +118,6 @@ dev = [
    "mypy>=1.7.1,<2",
    "notebook>=7.1.0,<8",
    "deptry>=0.20.0,<0.21",
-    "debugpy==1.8.9",
    "pylint>=3.0.3,<4",
    "ruff>=0.9.2,<1.0.0",
    "tweepy==4.14.0",
@ -127,6 +126,7 @@ dev = [
    "mkdocs-minify-plugin>=0.8.0,<0.9",
    "mkdocstrings[python]>=0.26.2,<0.27",
 ]
+debug = ["debugpy==1.8.9"]

 [project.urls]
 Homepage = "https://www.cognee.ai"
--- a/uv.lock
+++ b/uv.lock