fix: cognee docker image (#820)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin.
This commit is contained in:
parent
91f3cd9ef7
commit
0f3522eea6
26 changed files with 3923 additions and 3816 deletions
79
Dockerfile
79
Dockerfile
|
|
@ -1,60 +1,61 @@
|
|||
FROM python:3.11-slim
|
||||
# Use a Python image with uv pre-installed
|
||||
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS uv
|
||||
|
||||
# Define Poetry extras to install
|
||||
ARG POETRY_EXTRAS="\
|
||||
# API \
|
||||
api \
|
||||
# Storage & Databases \
|
||||
postgres weaviate qdrant neo4j falkordb milvus kuzu chromadb \
|
||||
# Notebooks & Interactive Environments \
|
||||
notebook \
|
||||
# LLM & AI Frameworks \
|
||||
langchain llama-index gemini huggingface ollama mistral groq anthropic \
|
||||
# Evaluation & Monitoring \
|
||||
deepeval evals posthog \
|
||||
# Graph Processing & Code Analysis \
|
||||
codegraph graphiti \
|
||||
# Document Processing \
|
||||
docs"
|
||||
# Install the project into `/app`
|
||||
WORKDIR /app
|
||||
|
||||
# Enable bytecode compilation
|
||||
# ENV UV_COMPILE_BYTECODE=1
|
||||
|
||||
# Copy from the cache instead of linking since it's a mounted volume
|
||||
ENV UV_LINK_MODE=copy
|
||||
|
||||
# Set build argument
|
||||
ARG DEBUG
|
||||
|
||||
# Set environment variable based on the build argument
|
||||
ENV DEBUG=${DEBUG}
|
||||
ENV PIP_NO_CACHE_DIR=true
|
||||
ENV PATH="${PATH}:/root/.poetry/bin"
|
||||
|
||||
RUN apt-get update
|
||||
|
||||
RUN apt-get install -y \
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
libpq-dev \
|
||||
git \
|
||||
curl \
|
||||
clang \
|
||||
build-essential \
|
||||
libpq-dev
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /app
|
||||
COPY pyproject.toml poetry.lock /app/
|
||||
# Copy pyproject.toml and lockfile first for better caching
|
||||
COPY README.md pyproject.toml uv.lock entrypoint.sh ./
|
||||
|
||||
RUN pip install poetry
|
||||
|
||||
# Don't create virtualenv since Docker is already isolated
|
||||
RUN poetry config virtualenvs.create false
|
||||
|
||||
# Install the dependencies using the defined extras
|
||||
RUN poetry install --extras "${POETRY_EXTRAS}" --no-root
|
||||
|
||||
# Set the PYTHONPATH environment variable to include the /app directory
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
COPY cognee/ /app/cognee
|
||||
# Install the project's dependencies using the lockfile and settings
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-install-project --no-dev --no-editable
|
||||
|
||||
# Copy Alembic configuration
|
||||
COPY alembic.ini /app/alembic.ini
|
||||
COPY alembic/ /app/alembic
|
||||
|
||||
COPY entrypoint.sh /app/entrypoint.sh
|
||||
# Then, add the rest of the project source code and install it
|
||||
# Installing separately from its dependencies allows optimal layer caching
|
||||
COPY ./cognee /app/cognee
|
||||
RUN --mount=type=cache,target=/root/.cache/uv \
|
||||
uv sync --extra debug --extra api --extra postgres --extra weaviate --extra qdrant --extra neo4j --extra kuzu --extra llama-index --extra gemini --extra ollama --extra mistral --extra groq --extra anthropic --frozen --no-dev --no-editable
|
||||
|
||||
FROM python:3.12-slim-bookworm
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY --from=uv /app /app
|
||||
# COPY --from=uv /app/.venv /app/.venv
|
||||
# COPY --from=uv /root/.local /root/.local
|
||||
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
|
||||
RUN sed -i 's/\r$//' /app/entrypoint.sh
|
||||
# Place executables in the environment at the front of the path
|
||||
ENV PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ export default function Home() {
|
|||
const onDataAdd = useCallback((dataset: { id: string }, files: File[]) => {
|
||||
return addData(dataset, files)
|
||||
.then(() => {
|
||||
showNotification("Data added successfully.", 5000);
|
||||
showNotification("Data added successfully. Please run \"Cognify\" when ready.", 5000);
|
||||
openDatasetData(dataset);
|
||||
});
|
||||
}, [showNotification])
|
||||
|
|
@ -60,6 +60,14 @@ export default function Home() {
|
|||
});
|
||||
}, [showNotification]);
|
||||
|
||||
const onCognify = useCallback(() => {
|
||||
const dataset = datasets.find((dataset) => dataset.id === selectedDataset);
|
||||
return onDatasetCognify({
|
||||
id: dataset!.id,
|
||||
name: dataset!.name,
|
||||
});
|
||||
}, [datasets, onDatasetCognify, selectedDataset]);
|
||||
|
||||
const {
|
||||
value: isSettingsModalOpen,
|
||||
setTrue: openSettingsModal,
|
||||
|
|
@ -95,6 +103,7 @@ export default function Home() {
|
|||
datasetId={selectedDataset}
|
||||
onClose={closeDatasetData}
|
||||
onDataAdd={onDataAdd}
|
||||
onCognify={onCognify}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import cognifyDataset from '@/modules/datasets/cognifyDataset';
|
|||
|
||||
interface ConfigStepProps {
|
||||
onNext: () => void;
|
||||
dataset: { id: string }
|
||||
dataset: { name: string }
|
||||
}
|
||||
|
||||
export default function CognifyStep({ onNext, dataset }: ConfigStepProps) {
|
||||
|
|
|
|||
|
|
@ -2,13 +2,13 @@ import { Explorer } from '@/ui/Partials';
|
|||
import { Spacer } from 'ohmy-ui';
|
||||
|
||||
interface ExploreStepProps {
|
||||
dataset: { id: string };
|
||||
dataset: { name: string };
|
||||
}
|
||||
|
||||
export default function ExploreStep({ dataset }: ExploreStepProps) {
|
||||
return (
|
||||
<Spacer horizontal="3">
|
||||
<Explorer dataset={dataset!} />
|
||||
<Explorer dataset={dataset} />
|
||||
</Spacer>
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ export default function WizardPage({
|
|||
setFalse: closeSettingsModal,
|
||||
} = useBoolean(false);
|
||||
|
||||
const dataset = { id: 'main' };
|
||||
const dataset = { name: 'main' };
|
||||
|
||||
return (
|
||||
<main className={styles.main}>
|
||||
|
|
|
|||
|
|
@ -1,13 +1,13 @@
|
|||
import { fetch } from '@/utils';
|
||||
|
||||
export default function cognifyDataset(dataset: { id: string, name: string }) {
|
||||
export default function cognifyDataset(dataset: { id?: string, name?: string }) {
|
||||
return fetch('/v1/cognify', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
datasets: [dataset.id],
|
||||
datasets: [dataset.id || dataset.name],
|
||||
}),
|
||||
}).then((response) => response.json());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { fetch } from '@/utils';
|
||||
|
||||
export default function getExplorationGraphUrl(dataset: { id: string }) {
|
||||
export default function getExplorationGraphUrl(dataset: { name: string }) {
|
||||
return fetch('/v1/visualize')
|
||||
.then(async (response) => {
|
||||
if (response.status !== 200) {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ import {
|
|||
Text,
|
||||
UploadInput,
|
||||
CloseIcon,
|
||||
CTAButton,
|
||||
useBoolean,
|
||||
} from "ohmy-ui";
|
||||
import { fetch } from '@/utils';
|
||||
import RawDataPreview from './RawDataPreview';
|
||||
|
|
@ -28,9 +30,10 @@ interface DataViewProps {
|
|||
datasetId: string;
|
||||
onClose: () => void;
|
||||
onDataAdd: (dataset: DatasetLike, files: File[]) => void;
|
||||
onCognify: () => Promise<any>;
|
||||
}
|
||||
|
||||
export default function DataView({ datasetId, data, onClose, onDataAdd }: DataViewProps) {
|
||||
export default function DataView({ datasetId, data, onClose, onDataAdd, onCognify }: DataViewProps) {
|
||||
// const handleDataDelete = () => {};
|
||||
const [rawData, setRawData] = useState<ArrayBuffer | null>(null);
|
||||
const [selectedData, setSelectedData] = useState<Data | null>(null);
|
||||
|
|
@ -52,7 +55,19 @@ export default function DataView({ datasetId, data, onClose, onDataAdd }: DataVi
|
|||
|
||||
const handleDataAdd = (files: File[]) => {
|
||||
onDataAdd({ id: datasetId }, files);
|
||||
}
|
||||
};
|
||||
|
||||
const {
|
||||
value: isCognifyButtonDisabled,
|
||||
setTrue: disableCognifyButton,
|
||||
setFalse: enableCognifyButton,
|
||||
} = useBoolean(false);
|
||||
|
||||
const handleCognify = () => {
|
||||
disableCognifyButton();
|
||||
onCognify()
|
||||
.finally(() => enableCognifyButton());
|
||||
};
|
||||
|
||||
return (
|
||||
<Stack orientation="vertical" gap="4">
|
||||
|
|
@ -62,6 +77,11 @@ export default function DataView({ datasetId, data, onClose, onDataAdd }: DataVi
|
|||
<Text>Add data</Text>
|
||||
</UploadInput>
|
||||
</div>
|
||||
<div>
|
||||
<CTAButton disabled={isCognifyButtonDisabled} onClick={handleCognify}>
|
||||
<Text>Cognify</Text>
|
||||
</CTAButton>
|
||||
</div>
|
||||
<GhostButton hugContent onClick={onClose}>
|
||||
<CloseIcon />
|
||||
</GhostButton>
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import { getExplorationGraphUrl } from '@/modules/exploration';
|
|||
import styles from './Explorer.module.css';
|
||||
|
||||
interface ExplorerProps {
|
||||
dataset: { id: string };
|
||||
dataset: { name: string };
|
||||
className?: string;
|
||||
style?: React.CSSProperties;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,9 +28,6 @@ export default function SearchView() {
|
|||
}, []);
|
||||
|
||||
const searchOptions = [{
|
||||
value: 'INSIGHTS',
|
||||
label: 'Query insights from documents',
|
||||
}, {
|
||||
value: 'GRAPH_COMPLETION',
|
||||
label: 'Completion using Cognee\'s graph based memory',
|
||||
}, {
|
||||
|
|
@ -81,6 +78,8 @@ export default function SearchView() {
|
|||
|
||||
scrollToBottom();
|
||||
|
||||
setInputValue('');
|
||||
|
||||
const searchTypeValue = searchType.value;
|
||||
|
||||
fetch('/v1/search', {
|
||||
|
|
@ -103,10 +102,12 @@ export default function SearchView() {
|
|||
text: convertToSearchTypeOutput(systemMessage, searchTypeValue),
|
||||
},
|
||||
]);
|
||||
setInputValue('');
|
||||
|
||||
scrollToBottom();
|
||||
})
|
||||
.catch(() => {
|
||||
setInputValue(inputValue);
|
||||
});
|
||||
}, [inputValue, scrollToBottom, searchType.value]);
|
||||
|
||||
const {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
import handleServerErrors from './handleServerErrors';
|
||||
|
||||
export default function fetch(url: string, options: RequestInit = {}): Promise<Response> {
|
||||
return global.fetch('http://127.0.0.1:8000/api' + url, {
|
||||
return global.fetch('http://localhost:8000/api' + url, {
|
||||
...options,
|
||||
headers: {
|
||||
...options.headers,
|
||||
|
|
|
|||
|
|
@ -144,7 +144,9 @@ async def cognify_status():
|
|||
"""Get status of cognify pipeline"""
|
||||
with redirect_stdout(sys.stderr):
|
||||
user = await get_default_user()
|
||||
status = await get_pipeline_status([await get_unique_dataset_id("main_dataset", user)])
|
||||
status = await get_pipeline_status(
|
||||
[await get_unique_dataset_id("main_dataset", user)], "cognify_pipeline"
|
||||
)
|
||||
return [types.TextContent(type="text", text=str(status))]
|
||||
|
||||
|
||||
|
|
@ -153,7 +155,9 @@ async def codify_status():
|
|||
"""Get status of codify pipeline"""
|
||||
with redirect_stdout(sys.stderr):
|
||||
user = await get_default_user()
|
||||
status = await get_pipeline_status([await get_unique_dataset_id("codebase", user)])
|
||||
status = await get_pipeline_status(
|
||||
[await get_unique_dataset_id("codebase", user)], "cognify_code_pipeline"
|
||||
)
|
||||
return [types.TextContent(type="text", text=str(status))]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
from typing import Union, BinaryIO, List, Optional
|
||||
from cognee.modules.users.models import User
|
||||
|
||||
from cognee.modules.pipelines import Task
|
||||
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.pipelines import cognee_pipeline
|
||||
from cognee.tasks.ingestion import ingest_data, resolve_data_directories
|
||||
|
||||
|
||||
async def add(
|
||||
|
|
|
|||
|
|
@ -34,7 +34,9 @@ async def cognify(
|
|||
):
|
||||
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
|
||||
|
||||
return await cognee_pipeline(tasks=tasks, datasets=datasets, user=user)
|
||||
return await cognee_pipeline(
|
||||
tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline"
|
||||
)
|
||||
|
||||
|
||||
async def get_default_tasks( # TODO: Find out a better way to do this (Boris's comment)
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class datasets:
|
|||
|
||||
@staticmethod
|
||||
async def get_status(dataset_ids: list[UUID]) -> dict:
|
||||
return await get_pipeline_status(dataset_ids)
|
||||
return await get_pipeline_status(dataset_ids, pipeline_name="cognify_pipeline")
|
||||
|
||||
@staticmethod
|
||||
async def delete_dataset(dataset_id: str):
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
import asyncio
|
||||
from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
|
||||
import lancedb
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from pydantic import BaseModel
|
||||
from lancedb.pydantic import LanceModel, Vector
|
||||
from typing import Generic, List, Optional, TypeVar, Union, get_args, get_origin, get_type_hints
|
||||
|
||||
from cognee.exceptions import InvalidValueError
|
||||
from cognee.infrastructure.engine import DataPoint
|
||||
|
|
@ -16,8 +16,6 @@ from ..models.ScoredResult import ScoredResult
|
|||
from ..utils import normalize_distances
|
||||
from ..vector_db_interface import VectorDBInterface
|
||||
|
||||
from tenacity import retry, stop_after_attempt, wait_exponential
|
||||
|
||||
|
||||
class IndexSchema(DataPoint):
|
||||
id: str
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from cognee.infrastructure.databases.relational import Base
|
|||
|
||||
|
||||
class PipelineRunStatus(enum.Enum):
|
||||
DATASET_PROCESSING_INITIATED = "DATASET_PROCESSING_INITIATED"
|
||||
DATASET_PROCESSING_STARTED = "DATASET_PROCESSING_STARTED"
|
||||
DATASET_PROCESSING_COMPLETED = "DATASET_PROCESSING_COMPLETED"
|
||||
DATASET_PROCESSING_ERRORED = "DATASET_PROCESSING_ERRORED"
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
from .log_pipeline_run_initiated import log_pipeline_run_initiated
|
||||
from .log_pipeline_run_start import log_pipeline_run_start
|
||||
from .log_pipeline_run_complete import log_pipeline_run_complete
|
||||
from .log_pipeline_run_error import log_pipeline_run_error
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ from ..models import PipelineRun
|
|||
from sqlalchemy.orm import aliased
|
||||
|
||||
|
||||
async def get_pipeline_status(dataset_ids: list[UUID]):
|
||||
async def get_pipeline_status(dataset_ids: list[UUID], pipeline_name: str):
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
|
|
@ -20,6 +20,7 @@ async def get_pipeline_status(dataset_ids: list[UUID]):
|
|||
.label("rn"),
|
||||
)
|
||||
.filter(PipelineRun.dataset_id.in_(dataset_ids))
|
||||
.filter(PipelineRun.pipeline_name == pipeline_name)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,22 @@
|
|||
from uuid import UUID, uuid4
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from cognee.modules.pipelines.models import PipelineRun, PipelineRunStatus
|
||||
|
||||
|
||||
async def log_pipeline_run_initiated(pipeline_id: str, pipeline_name: str, dataset_id: UUID):
|
||||
pipeline_run = PipelineRun(
|
||||
pipeline_run_id=uuid4(),
|
||||
pipeline_name=pipeline_name,
|
||||
pipeline_id=pipeline_id,
|
||||
status=PipelineRunStatus.DATASET_PROCESSING_INITIATED,
|
||||
dataset_id=dataset_id,
|
||||
run_info={},
|
||||
)
|
||||
|
||||
db_engine = get_relational_engine()
|
||||
|
||||
async with db_engine.get_async_session() as session:
|
||||
session.add(pipeline_run)
|
||||
await session.commit()
|
||||
|
||||
return pipeline_run
|
||||
|
|
@ -1,9 +1,9 @@
|
|||
import asyncio
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from typing import Union
|
||||
from uuid import uuid5, NAMESPACE_OID
|
||||
from uuid import NAMESPACE_OID, uuid5
|
||||
|
||||
from cognee.modules.data.methods import get_datasets, get_datasets_by_name
|
||||
from cognee.shared.logging_utils import get_logger
|
||||
from cognee.modules.data.methods import get_datasets
|
||||
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
||||
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
|
||||
from cognee.modules.data.models import Data, Dataset
|
||||
|
|
@ -13,6 +13,7 @@ from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline
|
|||
from cognee.modules.pipelines.tasks.task import Task
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.pipelines.operations import log_pipeline_run_initiated
|
||||
|
||||
from cognee.infrastructure.databases.relational import (
|
||||
create_db_and_tables as create_relational_db_and_tables,
|
||||
|
|
@ -59,15 +60,36 @@ async def cognee_pipeline(
|
|||
|
||||
# If no datasets are provided, work with all existing datasets.
|
||||
existing_datasets = await get_datasets(user.id)
|
||||
if datasets is None or len(datasets) == 0:
|
||||
|
||||
if not datasets:
|
||||
# Get datasets from database if none sent.
|
||||
datasets = existing_datasets
|
||||
if isinstance(datasets[0], str):
|
||||
datasets = await get_datasets_by_name(datasets, user.id)
|
||||
else:
|
||||
# Try to get datasets objects from database, if they don't exist use dataset name
|
||||
datasets_names = await get_datasets_by_name(datasets, user.id)
|
||||
if datasets_names:
|
||||
datasets = datasets_names
|
||||
# If dataset is already in database, use it, otherwise create a new instance.
|
||||
dataset_instances = []
|
||||
|
||||
for dataset_name in datasets:
|
||||
is_dataset_found = False
|
||||
|
||||
for existing_dataset in existing_datasets:
|
||||
if (
|
||||
existing_dataset.name == dataset_name
|
||||
or str(existing_dataset.id) == dataset_name
|
||||
):
|
||||
dataset_instances.append(existing_dataset)
|
||||
is_dataset_found = True
|
||||
break
|
||||
|
||||
if not is_dataset_found:
|
||||
dataset_instances.append(
|
||||
Dataset(
|
||||
id=await get_unique_dataset_id(dataset_name=dataset_name, user=user),
|
||||
name=dataset_name,
|
||||
owner_id=user.id,
|
||||
)
|
||||
)
|
||||
|
||||
datasets = dataset_instances
|
||||
|
||||
awaitables = []
|
||||
|
||||
|
|
@ -88,31 +110,48 @@ async def run_pipeline(
|
|||
data=None,
|
||||
pipeline_name: str = "custom_pipeline",
|
||||
):
|
||||
if isinstance(dataset, Dataset):
|
||||
check_dataset_name(dataset.name)
|
||||
|
||||
# Ugly hack, but no easier way to do this.
|
||||
if pipeline_name == "add_pipeline":
|
||||
# Refresh the add pipeline status so data is added to a dataset.
|
||||
# Without this the app_pipeline status will be DATASET_PROCESSING_COMPLETED and will skip the execution.
|
||||
dataset_id = uuid5(NAMESPACE_OID, f"{dataset.name}{str(user.id)}")
|
||||
|
||||
await log_pipeline_run_initiated(
|
||||
pipeline_id=uuid5(NAMESPACE_OID, "add_pipeline"),
|
||||
pipeline_name="add_pipeline",
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
|
||||
# Refresh the cognify pipeline status after we add new files.
|
||||
# Without this the cognify_pipeline status will be DATASET_PROCESSING_COMPLETED and will skip the execution.
|
||||
await log_pipeline_run_initiated(
|
||||
pipeline_id=uuid5(NAMESPACE_OID, "cognify_pipeline"),
|
||||
pipeline_name="cognify_pipeline",
|
||||
dataset_id=dataset_id,
|
||||
)
|
||||
|
||||
dataset_id = dataset.id
|
||||
elif isinstance(dataset, str):
|
||||
check_dataset_name(dataset)
|
||||
# Generate id based on unique dataset_id formula
|
||||
dataset_id = await get_unique_dataset_id(dataset_name=dataset, user=user)
|
||||
|
||||
if not data:
|
||||
data: list[Data] = await get_dataset_data(dataset_id=dataset_id)
|
||||
|
||||
# async with update_status_lock: TODO: Add UI lock to prevent multiple backend requests
|
||||
if isinstance(dataset, Dataset):
|
||||
task_status = await get_pipeline_status([dataset_id])
|
||||
task_status = await get_pipeline_status([dataset_id], pipeline_name)
|
||||
else:
|
||||
task_status = [
|
||||
PipelineRunStatus.DATASET_PROCESSING_COMPLETED
|
||||
] # TODO: this is a random assignment, find permanent solution
|
||||
|
||||
if (
|
||||
str(dataset_id) in task_status
|
||||
and task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED
|
||||
):
|
||||
if str(dataset_id) in task_status:
|
||||
if task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_STARTED:
|
||||
logger.info("Dataset %s is already being processed.", dataset_id)
|
||||
return
|
||||
if task_status[str(dataset_id)] == PipelineRunStatus.DATASET_PROCESSING_COMPLETED:
|
||||
logger.info("Dataset %s is already processed.", dataset_id)
|
||||
return
|
||||
|
||||
if not isinstance(tasks, list):
|
||||
raise ValueError("Tasks must be a list")
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
|
@ -20,8 +21,8 @@ class LLMConfig(BaseModel):
|
|||
api_key: str
|
||||
model: str
|
||||
provider: str
|
||||
endpoint: str
|
||||
api_version: str
|
||||
endpoint: Optional[str]
|
||||
api_version: Optional[str]
|
||||
models: dict[str, list[ConfigChoice]]
|
||||
providers: list[ConfigChoice]
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ echo "Environment: $ENVIRONMENT"
|
|||
# inconsistencies and should cause the startup to fail. This check allows for
|
||||
# smooth redeployments and container restarts while maintaining data integrity.
|
||||
echo "Running database migrations..."
|
||||
MIGRATION_OUTPUT=$(poetry run alembic upgrade head 2>&1) || {
|
||||
MIGRATION_OUTPUT=$(alembic upgrade head 2>&1) || {
|
||||
if [[ $MIGRATION_OUTPUT == *"UserAlreadyExists"* ]] || [[ $MIGRATION_OUTPUT == *"User default_user@example.com already exists"* ]]; then
|
||||
echo "Warning: Default user already exists, continuing startup..."
|
||||
else
|
||||
|
|
@ -22,8 +22,9 @@ MIGRATION_OUTPUT=$(poetry run alembic upgrade head 2>&1) || {
|
|||
exit 1
|
||||
fi
|
||||
}
|
||||
echo "Database migrations done."
|
||||
|
||||
echo "Starting Gunicorn"
|
||||
echo "Starting server..."
|
||||
|
||||
# Add startup delay to ensure DB is ready
|
||||
sleep 2
|
||||
|
|
@ -32,10 +33,10 @@ sleep 2
|
|||
if [ "$ENVIRONMENT" = "dev" ] || [ "$ENVIRONMENT" = "local" ]; then
|
||||
if [ "$DEBUG" = "true" ]; then
|
||||
echo "Waiting for the debugger to attach..."
|
||||
exec python -m debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
|
||||
debugpy --wait-for-client --listen 0.0.0.0:5678 -m gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
|
||||
else
|
||||
exec gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
|
||||
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level debug --reload cognee.api.client:app
|
||||
fi
|
||||
else
|
||||
exec gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app
|
||||
gunicorn -w 3 -k uvicorn.workers.UvicornWorker -t 30000 --bind=0.0.0.0:8000 --log-level error cognee.api.client:app
|
||||
fi
|
||||
|
|
|
|||
7
poetry.lock
generated
7
poetry.lock
generated
|
|
@ -1538,7 +1538,7 @@ description = "An implementation of the Debug Adapter Protocol for Python"
|
|||
optional = true
|
||||
python-versions = ">=3.8"
|
||||
groups = ["main"]
|
||||
markers = "extra == \"notebook\" or extra == \"dev\""
|
||||
markers = "extra == \"notebook\" or extra == \"dev\" or extra == \"debug\""
|
||||
files = [
|
||||
{file = "debugpy-1.8.9-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:cfe1e6c6ad7178265f74981edf1154ffce97b69005212fbc90ca22ddfe3d017e"},
|
||||
{file = "debugpy-1.8.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ada7fb65102a4d2c9ab62e8908e9e9f12aed9d76ef44880367bc9308ebe49a0f"},
|
||||
|
|
@ -11961,8 +11961,9 @@ anthropic = ["anthropic"]
|
|||
api = ["gunicorn", "uvicorn"]
|
||||
chromadb = ["chromadb", "pypika"]
|
||||
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
|
||||
debug = ["debugpy"]
|
||||
deepeval = ["deepeval"]
|
||||
dev = ["coverage", "debugpy", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
|
||||
dev = ["coverage", "deptry", "gitpython", "mkdocs-material", "mkdocs-minify-plugin", "mkdocstrings", "mypy", "notebook", "pylint", "pytest", "pytest-asyncio", "pytest-cov", "ruff", "tweepy"]
|
||||
docs = ["unstructured"]
|
||||
evals = ["gdown", "plotly"]
|
||||
falkordb = ["falkordb"]
|
||||
|
|
@ -11987,4 +11988,4 @@ weaviate = ["weaviate-client"]
|
|||
[metadata]
|
||||
lock-version = "2.1"
|
||||
python-versions = ">=3.10,<=3.13"
|
||||
content-hash = "9d2a3beda2b6c329e69319e51c28c90d2806d8a257cd971e990600e99c1d96bd"
|
||||
content-hash = "15b319ff8dbe5bd88e41ead93f4e9140b2b7d86d57a707682dd3a308e78ef245"
|
||||
|
|
|
|||
|
|
@ -118,7 +118,6 @@ dev = [
|
|||
"mypy>=1.7.1,<2",
|
||||
"notebook>=7.1.0,<8",
|
||||
"deptry>=0.20.0,<0.21",
|
||||
"debugpy==1.8.9",
|
||||
"pylint>=3.0.3,<4",
|
||||
"ruff>=0.9.2,<1.0.0",
|
||||
"tweepy==4.14.0",
|
||||
|
|
@ -127,6 +126,7 @@ dev = [
|
|||
"mkdocs-minify-plugin>=0.8.0,<0.9",
|
||||
"mkdocstrings[python]>=0.26.2,<0.27",
|
||||
]
|
||||
debug = ["debugpy==1.8.9"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://www.cognee.ai"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue