Merge branch 'dev' into add-column-value-db-migration

This commit is contained in:
Igor Ilic 2025-05-15 05:41:33 -04:00 committed by GitHub
commit 6f78462f3c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 3924 additions and 3817 deletions

View file

@ -97,7 +97,7 @@ git push origin feature/your-feature-name
2. Create a Pull Request: 2. Create a Pull Request:
- Go to the [**cognee** repository](https://github.com/topoteretes/cognee) - Go to the [**cognee** repository](https://github.com/topoteretes/cognee)
- Click "Compare & Pull Request" and make sure to open PR against dev branch - Click "Compare & Pull Request"
- Fill in the PR template with details about your changes - Fill in the PR template with details about your changes
## 5. 📜 Developer Certificate of Origin (DCO) ## 5. 📜 Developer Certificate of Origin (DCO)

View file

@ -1,60 +1,61 @@
FROM python:3.11-slim # Use a Python image with uv pre-installed
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv
# Define Poetry extras to install # Install the project into `/app`
ARG POETRY_EXTRAS="\ WORKDIR /app
# API \
api \ # Enable bytecode compilation
# Storage & Databases \ # ENV UV_COMPILE_BYTECODE=1
postgres weaviate qdrant neo4j falkordb milvus kuzu chromadb \
# Notebooks & Interactive Environments \ # Copy from the cache instead of linking since it's a mounted volume
notebook \ ENV UV_LINK_MODE=copy
# LLM & AI Frameworks \
langchain llama-index gemini huggingface ollama mistral groq anthropic \
# Evaluation & Monitoring \
deepeval evals posthog \
# Graph Processing & Code Analysis \
codegraph graphiti \
# Document Processing \
docs"
# Set build argument # Set build argument
ARG DEBUG ARG DEBUG
# Set environment variable based on the build argument # Set environment variable based on the build argument
ENV DEBUG=${DEBUG} ENV DEBUG=${DEBUG}
ENV PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin"
RUN apt-get update # Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
libpq-dev \
git \
curl \
clang \
build-essential \
&& rm -rf /var/lib/apt/lists/*
RUN apt-get install -y \ # Copy pyproject.toml and lockfile first for better caching
gcc \ COPY README.md pyproject.toml uv.lock entrypoint.sh ./
build-essential \
libpq-dev
WORKDIR /app # Install the project's dependencies using the lockfile and settings
COPY pyproject.toml poetry.lock /app/ RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
RUN pip install poetry
# Don't create virtualenv since Docker is already isolated
RUN poetry config virtualenvs.create false
# Install the dependencies using the defined extras
RUN poetry install --extras "${POETRY_EXTRAS}" --no-root
# Set the PYTHONPATH environment variable to include the /app directory
ENV PYTHONPATH=/app
COPY cognee/ /app/cognee
# Copy Alembic configuration # Copy Alembic configuration
COPY alembic.ini /app/alembic.ini COPY alembic.ini /app/alembic.ini
COPY alembic/ /app/alembic COPY alembic/ /app/alembic
COPY entrypoint.sh /app/entrypoint.sh # Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
COPY ./cognee /app/cognee
RUN --mount=type=cache,target=/root/.cache/uv \
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
FROM python:3.12-slim-bookworm
WORKDIR /app
COPY --from=uv /app /app
# COPY --from=uv /app/.venv /app/.venv
# COPY --from=uv /root/.local /root/.local
RUN chmod +x /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh
RUN sed -i 's/\r$//' /app/entrypoint.sh # Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"
ENV PYTHONPATH=/app
ENTRYPOINT ["/app/entrypoint.sh"] ENTRYPOINT ["/app/entrypoint.sh"]

Binary file not shown.

Before

Width:  |  Height:  |  Size: 2.3 MiB

After

Width:  |  Height:  |  Size: 353 KiB

View file

@ -43,7 +43,7 @@ export default function Home() {
const onDataAdd = useCallback((dataset: { id: string }, files: File[]) => { const onDataAdd = useCallback((dataset: { id: string }, files: File[]) => {
return addData(dataset, files) return addData(dataset, files)
.then(() => { .then(() => {
showNotification("Data added successfully.", 5000); showNotification("Data added successfully. Please run \"Cognify\" when ready.", 5000);
openDatasetData(dataset); openDatasetData(dataset);
}); });
}, [showNotification]) }, [showNotification])
@ -60,6 +60,14 @@ export default function Home() {
}); });
}, [showNotification]); }, [showNotification]);
const onCognify = useCallback(() => {
const dataset = datasets.find((dataset) => dataset.id === selectedDataset);
return onDatasetCognify({
id: dataset!.id,
name: dataset!.name,
});
}, [datasets, onDatasetCognify, selectedDataset]);
const { const {
value: isSettingsModalOpen, value: isSettingsModalOpen,
setTrue: openSettingsModal, setTrue: openSettingsModal,
@ -95,6 +103,7 @@ export default function Home() {
datasetId={selectedDataset} datasetId={selectedDataset}
onClose={closeDatasetData} onClose={closeDatasetData}
onDataAdd={onDataAdd} onDataAdd={onDataAdd}
onCognify={onCognify}
/> />
</div> </div>
)} )}

View file

@ -7,7 +7,7 @@ import cognifyDataset from '@/modules/datasets/cognifyDataset';
interface ConfigStepProps { interface ConfigStepProps {
onNext: () => void; onNext: () => void;
dataset: { id: string } dataset: { name: string }
} }
export default function CognifyStep({ onNext, dataset }: ConfigStepProps) { export default function CognifyStep({ onNext, dataset }: ConfigStepProps) {

View file

@ -2,13 +2,13 @@ import { Explorer } from '@/ui/Partials';
import { Spacer } from 'ohmy-ui'; import { Spacer } from 'ohmy-ui';
interface ExploreStepProps { interface ExploreStepProps {
dataset: { id: string }; dataset: { name: string };
} }
export default function ExploreStep({ dataset }: ExploreStepProps) { export default function ExploreStep({ dataset }: ExploreStepProps) {
return ( return (
<Spacer horizontal="3"> <Spacer horizontal="3">
<Explorer dataset={dataset!} /> <Explorer dataset={dataset} />
</Spacer> </Spacer>
) )
} }

View file

@ -29,7 +29,7 @@ export default function WizardPage({
setFalse: closeSettingsModal, setFalse: closeSettingsModal,
} = useBoolean(false); } = useBoolean(false);
const dataset = { id: 'main' }; const dataset = { name: 'main' };
return ( return (
<main className={styles.main}> <main className={styles.main}>

View file

@ -1,13 +1,13 @@
import { fetch } from '@/utils'; import { fetch } from '@/utils';
export default function cognifyDataset(dataset: { id: string, name: string }) { export default function cognifyDataset(dataset: { id?: string, name?: string }) {
return fetch('/v1/cognify', { return fetch('/v1/cognify', {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
}, },
body: JSON.stringify({ body: JSON.stringify({
datasets: [dataset.id], datasets: [dataset.id || dataset.name],
}), }),
}).then((response) => response.json()); }).then((response) => response.json());
} }

View file

@ -1,6 +1,6 @@
import { fetch } from '@/utils'; import { fetch } from '@/utils';
export default function getExplorationGraphUrl(dataset: { id: string }) { export default function getExplorationGraphUrl(dataset: { name: string }) {
return fetch('/v1/visualize') return fetch('/v1/visualize')
.then(async (response) => { .then(async (response) => {
if (response.status !== 200) { if (response.status !== 200) {

View file

@ -6,6 +6,8 @@ import {
Text, Text,
UploadInput, UploadInput,
CloseIcon, CloseIcon,
CTAButton,
useBoolean,
} from "ohmy-ui"; } from "ohmy-ui";
import { fetch } from '@/utils'; import { fetch } from '@/utils';
import RawDataPreview from './RawDataPreview'; import RawDataPreview from './RawDataPreview';
@ -28,9 +30,10 @@ interface DataViewProps {
datasetId: string; datasetId: string;
onClose: () => void; onClose: () => void;
onDataAdd: (dataset: DatasetLike, files: File[]) => void; onDataAdd: (dataset: DatasetLike, files: File[]) => void;
onCognify: () => Promise<any>;
} }
export default function DataView({ datasetId, data, onClose, onDataAdd }: DataViewProps) { export default function DataView({ datasetId, data, onClose, onDataAdd, onCognify }: DataViewProps) {
// const handleDataDelete = () => {}; // const handleDataDelete = () => {};
const [rawData, setRawData] = useState<ArrayBuffer | null>(null); const [rawData, setRawData] = useState<ArrayBuffer | null>(null);
const [selectedData, setSelectedData] = useState<Data | null>(null); const [selectedData, setSelectedData] = useState<Data | null>(null);
@ -52,7 +55,19 @@ export default function DataView({ datasetId, data, onClose, onDataAdd }: DataVi
const handleDataAdd = (files: File[]) => { const handleDataAdd = (files: File[]) => {
onDataAdd({ id: datasetId }, files); onDataAdd({ id: datasetId }, files);
} };
const {
value: isCognifyButtonDisabled,
setTrue: disableCognifyButton,
setFalse: enableCognifyButton,
} = useBoolean(false);
const handleCognify = () => {
disableCognifyButton();
onCognify()
.finally(() => enableCognifyButton());
};
return ( return (
<Stack orientation="vertical" gap="4"> <Stack orientation="vertical" gap="4">
@ -62,6 +77,11 @@ export default function DataView({ datasetId, data, onClose, onDataAdd }: DataVi
<Text>Add data</Text> <Text>Add data</Text>
</UploadInput> </UploadInput>
</div> </div>
<div>
<CTAButton disabled={isCognifyButtonDisabled} onClick={handleCognify}>
<Text>Cognify</Text>
</CTAButton>
</div>
<GhostButton hugContent onClick={onClose}> <GhostButton hugContent onClick={onClose}>
<CloseIcon /> <CloseIcon />
</GhostButton> </GhostButton>

View file

@ -7,7 +7,7 @@ import { getExplorationGraphUrl } from '@/modules/exploration';
import styles from './Explorer.module.css'; import styles from './Explorer.module.css';
interface ExplorerProps { interface ExplorerProps {
dataset: { id: string }; dataset: { name: string };
className?: string; className?: string;
style?: React.CSSProperties; style?: React.CSSProperties;
} }

View file

@ -28,9 +28,6 @@ export default function SearchView() {
}, []); }, []);
const searchOptions = [{ const searchOptions = [{
value: 'INSIGHTS',
label: 'Query insights from documents',
}, {
value: 'GRAPH_COMPLETION', value: 'GRAPH_COMPLETION',
label: 'Completion using Cognee\'s graph based memory', label: 'Completion using Cognee\'s graph based memory',
}, { }, {
@ -81,6 +78,8 @@ export default function SearchView() {
scrollToBottom(); scrollToBottom();
setInputValue('');
const searchTypeValue = searchType.value; const searchTypeValue = searchType.value;
fetch('/v1/search', { fetch('/v1/search', {
@ -103,10 +102,12 @@ export default function SearchView() {
text: convertToSearchTypeOutput(systemMessage, searchTypeValue), text: convertToSearchTypeOutput(systemMessage, searchTypeValue),
}, },
]); ]);
setInputValue('');
scrollToBottom(); scrollToBottom();
}) })
.catch(() => {
setInputValue(inputValue);
});
}, [inputValue, scrollToBottom, searchType.value]); }, [inputValue, scrollToBottom, searchType.value]);
const { const {

View file

@ -1,7 +1,7 @@
import handleServerErrors from './handleServerErrors'; import handleServerErrors from './handleServerErrors';
export default function fetch(url: string, options: RequestInit = {}): Promise<Response> { export default function fetch(url: string, options: RequestInit = {}): Promise<Response> {
return global.fetch('http://127.0.0.1:8000/api' + url, { return global.fetch('http://localhost:8000/api' + url, {
...options, ...options,
headers: { headers: {
...options.headers, ...options.headers,

View file

@ -144,7 +144,9 @@ async def cognify_status():
"""Get status of cognify pipeline""" """Get status of cognify pipeline"""
with redirect_stdout(sys.stderr): with redirect_stdout(sys.stderr):
user = await get_default_user() user = await get_default_user()
status = await get_pipeline_status([await get_unique_dataset_id("main_dataset", user)]) status = await get_pipeline_status(
[await get_unique_dataset_id("main_dataset", user)], "cognify_pipeline"
)
return [types.TextContent(type="text", text=str(status))] return [types.TextContent(type="text", text=str(status))]
@ -153,7 +155,9 @@ async def codify_status():
"""Get status of codify pipeline""" """Get status of codify pipeline"""
with redirect_stdout(sys.stderr): with redirect_stdout(sys.stderr):
user = await get_default_user() user = await get_default_user()
status = await get_pipeline_status([await get_unique_dataset_id("codebase", user)]) status = await get_pipeline_status(
[await get_unique_dataset_id("codebase", user)], "cognify_code_pipeline"
)
return [types.TextContent(type="text", text=str(status))] return [types.TextContent(type="text", text=str(status))]

View file

@ -1,8 +1,9 @@
from typing import Union, BinaryIO, List, Optional from typing import Union, BinaryIO, List, Optional
from cognee.modules.users.models import User
from cognee.modules.pipelines import Task from cognee.modules.pipelines import Task
from cognee.tasks.ingestion import ingest_data, resolve_data_directories from cognee.modules.users.models import User
from cognee.modules.pipelines import cognee_pipeline from cognee.modules.pipelines import cognee_pipeline
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
async def add( async def add(

View file

@ -34,7 +34,9 @@ async def cognify(
): ):
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
return await cognee_pipeline(tasks=tasks, datasets=datasets, user=user) return await cognee_pipeline(
tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline"
)
async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment) async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)

View file

@ -28,7 +28,7 @@ class datasets:
@staticmethod @staticmethod
async def get_status(dataset_ids: list[UUID]) -> dict: async def get_status(dataset_ids: list[UUID]) -> dict:
return await get_pipeline_status(dataset_ids) return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")
@staticmethod @staticmethod
async def delete_dataset(dataset_id: str): async def delete_dataset(dataset_id: str):

View file

@ -1,8 +1,8 @@
import asyncio import asyncio
from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
import lancedb import lancedb
from lancedb.pydantic import LanceModel, Vector
from pydantic import BaseModel from pydantic import BaseModel
from lancedb.pydantic import LanceModel, Vector
from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
from cognee.exceptions import InvalidValueError from cognee.exceptions import InvalidValueError
from cognee.infrastructure.engine import DataPoint from cognee.infrastructure.engine import DataPoint
@ -16,8 +16,6 @@ from ..models.ScoredResult import ScoredResult
from ..utils import normalize_distances from ..utils import normalize_distances
from ..vector_db_interface import VectorDBInterface from ..vector_db_interface import VectorDBInterface
from tenacity import retry, stop_after_attempt, wait_exponential
class IndexSchema(DataPoint): class IndexSchema(DataPoint):
id: str id: str

View file

@ -6,6 +6,7 @@ from cognee.infrastructure.databases.relational import Base
class PipelineRunStatus(enum.Enum): class PipelineRunStatus(enum.Enum):
DATASET_PROCESSING_INITIATED = "DATASET_PROCESSING_INITIATED"
DATASET_PROCESSING_STARTED = "DATASET_PROCESSING_STARTED" DATASET_PROCESSING_STARTED = "DATASET_PROCESSING_STARTED"
DATASET_PROCESSING_COMPLETED = "DATASET_PROCESSING_COMPLETED" DATASET_PROCESSING_COMPLETED = "DATASET_PROCESSING_COMPLETED"
DATASET_PROCESSING_ERRORED = "DATASET_PROCESSING_ERRORED" DATASET_PROCESSING_ERRORED = "DATASET_PROCESSING_ERRORED"

View file

@ -1,3 +1,4 @@
from .log_pipeline_run_initiated import log_pipeline_run_initiated
from .log_pipeline_run_start import log_pipeline_run_start from .log_pipeline_run_start import log_pipeline_run_start
from .log_pipeline_run_complete import log_pipeline_run_complete from .log_pipeline_run_complete import log_pipeline_run_complete
from .log_pipeline_run_error import log_pipeline_run_error from .log_pipeline_run_error import log_pipeline_run_error

View file

@ -5,7 +5,7 @@ from ..models import PipelineRun
from sqlalchemy.orm import aliased from sqlalchemy.orm import aliased
async def get_pipeline_status(dataset_ids: list[UUID]): async def get_pipeline_status(dataset_ids: list[UUID], pipeline_name: str):
db_engine = get_relational_engine() db_engine = get_relational_engine()
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
@ -20,6 +20,7 @@ async def get_pipeline_status(dataset_ids: list[UUID]):
.label("rn"), .label("rn"),
) )
.filter(PipelineRun.dataset_id.in_(dataset_ids)) .filter(PipelineRun.dataset_id.in_(dataset_ids))
.filter(PipelineRun.pipeline_name == pipeline_name)
.subquery() .subquery()
) )

View file

@ -0,0 +1,22 @@
from uuid import UUID, uuid4
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.pipelines.models import PipelineRun, PipelineRunStatus
async def log_pipeline_run_initiated(pipeline_id: str, pipeline_name: str, dataset_id: UUID):
pipeline_run = PipelineRun(
pipeline_run_id=uuid4(),
pipeline_name=pipeline_name,
pipeline_id=pipeline_id,
status=PipelineRunStatus.DATASET_PROCESSING_INITIATED,
dataset_id=dataset_id,
run_info={},
)
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
session.add(pipeline_run)
await session.commit()
return pipeline_run

View file

@ -1,9 +1,9 @@
import asyncio import asyncio
from cognee.shared.logging_utils import get_logger
from typing import Union from typing import Union
from uuid import uuid5, NAMESPACE_OID from uuid import NAMESPACE_OID, uuid5
from cognee.modules.data.methods import get_datasets, get_datasets_by_name from cognee.shared.logging_utils import get_logger
from cognee.modules.data.methods import get_datasets
from cognee.modules.data.methods.get_dataset_data import get_dataset_data from cognee.modules.data.methods.get_dataset_data import get_dataset_data
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.data.models import Data, Dataset from cognee.modules.data.models import Data, Dataset
@ -13,6 +13,7 @@ from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline
from cognee.modules.pipelines.tasks.task import Task from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.pipelines.operations import log_pipeline_run_initiated
from cognee.infrastructure.databases.relational import ( from cognee.infrastructure.databases.relational import (
create_db_and_tables as create_relational_db_and_tables, create_db_and_tables as create_relational_db_and_tables,
@ -59,15 +60,36 @@ async def cognee_pipeline(
# If no datasets are provided, work with all existing datasets. # If no datasets are provided, work with all existing datasets.
existing_datasets = await get_datasets(user.id) existing_datasets = await get_datasets(user.id)
if datasets is None or len(datasets) == 0:
if not datasets:
# Get datasets from database if none sent.
datasets = existing_datasets datasets = existing_datasets
if isinstance(datasets[0], str):
datasets = await get_datasets_by_name(datasets, user.id)
else: else:
# Try to get datasets objects from database, if they don't exist use dataset name # If dataset is already in database, use it, otherwise create a new instance.
datasets_names = await get_datasets_by_name(datasets, user.id) dataset_instances = []
if datasets_names:
datasets = datasets_names for dataset_name in datasets:
is_dataset_found = False
for existing_dataset in existing_datasets:
if (
existing_dataset.name == dataset_name
or str(existing_dataset.id) == dataset_name
):
dataset_instances.append(existing_dataset)
is_dataset_found = True
break
if not is_dataset_found:
dataset_instances.append(
Dataset(
id=await get_unique_dataset_id(dataset_name=dataset_name, user=user),
name=dataset_name,
owner_id=user.id,
)
)
datasets = dataset_instances
awaitables = [] awaitables = []
@ -88,31 +110,48 @@ async def run_pipeline(
data=None, data=None,
pipeline_name: str = "custom_pipeline", pipeline_name: str = "custom_pipeline",
): ):
if isinstance(dataset, Dataset): check_dataset_name(dataset.name)
check_dataset_name(dataset.name)
dataset_id = dataset.id # Ugly hack, but no easier way to do this.
elif isinstance(dataset, str): if pipeline_name == "add_pipeline":
check_dataset_name(dataset) # Refresh the add pipeline status so data is added to a dataset.
# Generate id based on unique dataset_id formula # Without this the app_pipeline status will be DATASET_PROCESSING_COMPLETED and will skip the execution.
dataset_id = await get_unique_dataset_id(dataset_name=dataset, user=user) dataset_id = uuid5(NAMESPACE_OID, f"{dataset.name}{str(user.id)}")
await log_pipeline_run_initiated(
pipeline_id=uuid5(NAMESPACE_OID, "add_pipeline"),
pipeline_name="add_pipeline",
dataset_id=dataset_id,
)
# Refresh the cognify pipeline status after we add new files.
# Without this the cognify_pipeline status will be DATASET_PROCESSING_COMPLETED and will skip the execution.
await log_pipeline_run_initiated(
pipeline_id=uuid5(NAMESPACE_OID, "cognify_pipeline"),
pipeline_name="cognify_pipeline",
dataset_id=dataset_id,
)
dataset_id = dataset.id
if not data: if not data:
data: list[Data] = await get_dataset_data(dataset_id=dataset_id) data: list[Data] = await get_dataset_data(dataset_id=dataset_id)
# async with update_status_lock: TODO: Add UI lock to prevent multiple backend requests # async with update_status_lock: TODO: Add UI lock to prevent multiple backend requests
if isinstance(dataset, Dataset): if isinstance(dataset, Dataset):
task_status = await get_pipeline_status([dataset_id]) task_status = await get_pipeline_status([dataset_id], pipeline_name)
else: else:
task_status = [ task_status = [
PipelineRunStatus.DATASET_PROCESSING_COMPLETED PipelineRunStatus.DATASET_PROCESSING_COMPLETED
] # TODO: this is a random assignment, find permanent solution ] # TODO: this is a random assignment, find permanent solution
if ( if str(dataset_id) in task_status:
str(dataset_id) in task_status if task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED:
and task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED logger.info("Dataset %s is already being processed.", dataset_id)
): return
logger.info("Dataset %s is already being processed.", dataset_id) if task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_COMPLETED:
return logger.info("Dataset %s is already processed.", dataset_id)
return
if not isinstance(tasks, list): if not isinstance(tasks, list):
raise ValueError("Tasks must be a list") raise ValueError("Tasks must be a list")

View file

@ -1,4 +1,5 @@
from enum import Enum from enum import Enum
from typing import Optional
from pydantic import BaseModel from pydantic import BaseModel
from cognee.infrastructure.databases.vector import get_vectordb_config from cognee.infrastructure.databases.vector import get_vectordb_config
from cognee.infrastructure.llm import get_llm_config from cognee.infrastructure.llm import get_llm_config
@ -20,8 +21,8 @@ class LLMConfig(BaseModel):
api_key: str api_key: str
model: str model: str
provider: str provider: str
endpoint: str endpoint: Optional[str]
api_version: str api_version: Optional[str]
models: dict[str, list[ConfigChoice]] models: dict[str, list[ConfigChoice]]
providers: list[ConfigChoice] providers: list[ConfigChoice]

View file

@ -13,7 +13,7 @@ echo "Environment: $ENVIRONMENT"
# inconsistencies and should cause the startup to fail. This check allows for # inconsistencies and should cause the startup to fail. This check allows for
# smooth redeployments and container restarts while maintaining data integrity. # smooth redeployments and container restarts while maintaining data integrity.
echo "Running database migrations..." echo "Running database migrations..."
MIGRATION_OUTPUT=$(poetry run alembic upgrade head 2>&1) || { MIGRATION_OUTPUT=$(alembic upgrade head 2>&1) || {
if [[ $MIGRATION_OUTPUT == *"UserAlreadyExists"* ]] || [[ $MIGRATION_OUTPUT == *"User default_user@example.com already exists"* ]]; then if [[ $MIGRATION_OUTPUT == *"UserAlreadyExists"* ]] || [[ $MIGRATION_OUTPUT == *"User default_user@example.com already exists"* ]]; then
echo "Warning: Default user already exists, continuing startup..." echo "Warning: Default user already exists, continuing startup..."
else else
@ -22,8 +22,9 @@ MIGRATION_OUTPUT=$(poetry run alembic upgrade head 2>&1) || {
exit 1 exit 1
fi fi
} }
echo "Database migrations done."
echo "Starting Gunicorn" echo "Starting server..."
# Add startup delay to ensure DB is ready # Add startup delay to ensure DB is ready
sleep 2 sleep 2
@ -32,10 +33,10 @@ sleep 2
if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
if [ "$DEBUG" = "true" ]; then if [ "$DEBUG" = "true" ]; then
echo "Waiting for the debugger to attach..." echo "Waiting for the debugger to attach..."
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
else else
exec gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
fi fi
else else
exec gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app
fi fi

7
poetry.lock generated
View file

@ -1538,7 +1538,7 @@ description = "An implementation of the Debug Adapter Protocol for Python"
optional = true optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"notebook\" or extra == \"dev\"" markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"debug\""
files = [ files = [
{file = "debugpy-1.8.9-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:cfe1e6c6ad7178265f74981edf1154ffce97b69005212fbc90ca22ddfe3d017e"}, {file = "debugpy-1.8.9-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:cfe1e6c6ad7178265f74981edf1154ffce97b69005212fbc90ca22ddfe3d017e"},
{file = "debugpy-1.8.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada7fb65102a4d2c9ab62e8908e9e9f12aed9d76ef44880367bc9308ebe49a0f"}, {file = "debugpy-1.8.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada7fb65102a4d2c9ab62e8908e9e9f12aed9d76ef44880367bc9308ebe49a0f"},
@ -11961,8 +11961,9 @@ anthropic = ["anthropic"]
api = ["gunicorn", "uvicorn"] api = ["gunicorn", "uvicorn"]
chromadb = ["chromadb", "pypika"] chromadb = ["chromadb", "pypika"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
debug = ["debugpy"]
deepeval = ["deepeval"] deepeval = ["deepeval"]
dev = ["coverage", "debugpy", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"] dev = ["coverage", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
docs = ["unstructured"] docs = ["unstructured"]
evals = ["gdown", "plotly"] evals = ["gdown", "plotly"]
falkordb = ["falkordb"] falkordb = ["falkordb"]
@ -11987,4 +11988,4 @@ weaviate = ["weaviate-client"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = ">=3.10,<=3.13" python-versions = ">=3.10,<=3.13"
content-hash = "9d2a3beda2b6c329e69319e51c28c90d2806d8a257cd971e990600e99c1d96bd" content-hash = "15b319ff8dbe5bd88e41ead93f4e9140b2b7d86d57a707682dd3a308e78ef245"

View file

@ -118,7 +118,6 @@ dev = [
"mypy>=1.7.1,<2", "mypy>=1.7.1,<2",
"notebook>=7.1.0,<8", "notebook>=7.1.0,<8",
"deptry>=0.20.0,<0.21", "deptry>=0.20.0,<0.21",
"debugpy==1.8.9",
"pylint>=3.0.3,<4", "pylint>=3.0.3,<4",
"ruff>=0.9.2,<1.0.0", "ruff>=0.9.2,<1.0.0",
"tweepy==4.14.0", "tweepy==4.14.0",
@ -127,6 +126,7 @@ dev = [
"mkdocs-minify-plugin>=0.8.0,<0.9", "mkdocs-minify-plugin>=0.8.0,<0.9",
"mkdocstrings[python]>=0.26.2,<0.27", "mkdocstrings[python]>=0.26.2,<0.27",
] ]
debug = ["debugpy==1.8.9"]
[project.urls] [project.urls]
Homepage = "https://www.cognee.ai" Homepage = "https://www.cognee.ai"

7430
uv.lock generated

File diff suppressed because it is too large Load diff