Merge branch 'main' of github.com:topoteretes/cognee into COG-170-PGvector-adapter

This commit is contained in:
Igor Ilic 2024-10-18 12:05:06 +02:00
commit 58e5854943
25 changed files with 585 additions and 368 deletions

View file

@ -7,6 +7,7 @@ on:
- feature/* - feature/*
paths-ignore: paths-ignore:
- '**.md' - '**.md'
- 'examples/**'
env: env:
AWS_ROLE_DEV_CICD: "arn:aws:iam::463722570299:role/cognee-dev-base-role-github-ci-cd" AWS_ROLE_DEV_CICD: "arn:aws:iam::463722570299:role/cognee-dev-base-role-github-ci-cd"
@ -76,7 +77,7 @@ jobs:
script: | script: |
await github.rest.actions.createWorkflowDispatch({ await github.rest.actions.createWorkflowDispatch({
owner: 'topoteretes', owner: 'topoteretes',
repo: 'PromethAI-Infra', repo: 'cognee-infra',
workflow_id: 'terraform.apply.yml', workflow_id: 'terraform.apply.yml',
ref: 'main' ref: 'main'
}) })

View file

@ -22,8 +22,9 @@ jobs:
id-token: write id-token: write
contents: read contents: read
steps: steps:
- name: Take code from repo - name: Checkout code from repo
uses: actions/checkout@v3 uses: actions/checkout@v4
- name: Set environment variable for stage - name: Set environment variable for stage
id: set-env id: set-env
run: | run: |
@ -34,68 +35,72 @@ jobs:
echo "STAGE=dev" >> $GITHUB_ENV echo "STAGE=dev" >> $GITHUB_ENV
echo "::set-output name=stage::dev" echo "::set-output name=stage::dev"
fi fi
# - name: Use output
# run: echo "The stage is ${{ steps.set-env.outputs.stage }}" - name: Use output
# - name: Configure AWS credentials run: echo "The stage is ${{ steps.set-env.outputs.stage }}"
# uses: aws-actions/configure-aws-credentials@v1
# with: - name: Configure AWS credentials
# role-to-assume: ${{ env.AWS_ROLE_DEV_CICD }} uses: aws-actions/configure-aws-credentials@v4
# aws-region: eu-west-1 with:
# - name: Create Docker image and push to ECR role-to-assume: ${{ env.AWS_ROLE_DEV_CICD }}
# uses: ./.github/actions/image_builder aws-region: eu-west-1
# id: generate-promethai-docker
# with: - name: Build Docker image and push to ECR
# stage: prd uses: ./.github/actions/image_builder
# aws_account_id: ${{ env.AWS_ACCOUNT_ID_DEV }} id: generate-promethai-docker
# should_publish: true with:
# ecr_image_repo_name: promethai-prd-backend-promethai-backend stage: prd
# # ecr_image_repo_name: cognee-prd-backend-cognee-ecr aws_account_id: ${{ env.AWS_ACCOUNT_ID_DEV }}
# dockerfile_location: ./ should_publish: true
# - name: Export Docker image tag ecr_image_repo_name: cognee-prd-backend-cognee-ecr
# id: export-promethai-docker-tag dockerfile_location: ./
# run: |
# export DOCKER_TAG=$(cat /tmp/.DOCKER_IMAGE_VERSION) - name: Export Docker image tag
# echo "Docker tag is: $DOCKER_TAG" id: export-cognee-docker-tag
# echo "promethai_docker_tag_backend=$DOCKER_TAG" >> $GITHUB_OUTPUT run: |
# export DOCKER_TAG=$(cat /tmp/.DOCKER_IMAGE_VERSION)
## - name: Create Tag and Release echo "Docker tag is: $DOCKER_TAG"
## runs-on: ubuntu-latest echo "cognee_image_tag=$DOCKER_TAG" >> $GITHUB_OUTPUT
## uses: actions/checkout@v3
## needs: publish_docker_to_ecr # ensure this job runs after Docker image is pushed # - name: Create Tag and Release
## steps: # runs-on: ubuntu-latest
## - name: Check out code # uses: actions/checkout@v3
## uses: actions/checkout@v3 # needs: publish_docker_to_ecr # ensure this job runs after Docker image is pushed
## - name: Bump version and push tag # steps:
## id: bump_version_and_push_tag # - name: Check out code
## uses: anothrNick/github-tag-action@1.34.0 # uses: actions/checkout@v3
## env: # - name: Bump version and push tag
## GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }} # id: bump_version_and_push_tag
## WITH_V: true # uses: anothrNick/github-tag-action@1.34.0
## DEFAULT_BUMP: 'minor' # or 'minor' or 'major' # env:
## - name: Create Release # GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
## id: create_release # WITH_V: true
## uses: actions/create-release@v1 # DEFAULT_BUMP: 'minor' # or 'minor' or 'major'
## env: # - name: Create Release
## GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }} # id: create_release
## with: # uses: actions/create-release@v1
## tag_name: ${{ steps.bump_version_and_push_tag.outputs.tag }} # env:
## release_name: Release ${{ steps.bump_version_and_push_tag.outputs.tag }} # GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
# outputs: # with:
# promethai_docker_tag_backend: ${{ steps.export-promethai-docker-tag.outputs.promethai_docker_tag_backend }} # tag_name: ${{ steps.bump_version_and_push_tag.outputs.tag }}
# # release_name: Release ${{ steps.bump_version_and_push_tag.outputs.tag }}
# apply_tf:
# name: Trigger terraform apply workflow outputs:
# runs-on: ubuntu-latest cognee_image_tag: ${{ steps.export-promethai-docker-tag.outputs.cognee_image_tag }}
# needs: publish_docker_to_ecr
# steps: trigger_deployment:
# - name: TF apply workflow triggers step name: Trigger deployment
# uses: actions/github-script@v6 runs-on: ubuntu-latest
# with: needs: publish_docker_to_ecr
# github-token: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }} steps:
# script: | - name: TF apply workflow triggers step
# await github.rest.actions.createWorkflowDispatch({ uses: actions/github-script@v7
# owner: 'topoteretes', with:
# repo: 'PromethAI-Infra', github-token: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
# workflow_id: 'terraform.apply.yml', script: |
# ref: 'main' await github.rest.actions.createWorkflowDispatch({
# }) owner: 'topoteretes',
repo: 'cognee-infra',
workflow_id: 'terraform.apply.yml',
ref: 'main'
})

View file

@ -1,38 +0,0 @@
#name: analytics | Send Twitter Followers to Segment
#
#on: pull_request
#
##on:
## schedule:
## - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed.
## workflow_dispatch: # Allows manual triggering of the workflow
#
#jobs:
# send-followers:
# runs-on: ubuntu-latest
#
# steps:
# - name: Checkout repository
# uses: actions/checkout@v3
#
# - name: Set up Python
# uses: actions/setup-python@v4
# with:
# python-version: '3.x'
#
# - name: Install dependencies
# run: |
# pip install tweepy requests
#
# - name: Send Twitter Followers to Segment
# env:
# TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }}
# TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }}
# TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
# TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
# SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }}
# TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }}
# run: |
# cd tools
# python daily_twitter_stats.py
#

View file

@ -13,7 +13,7 @@ COPY pyproject.toml poetry.lock /app/
RUN pip install poetry RUN pip install poetry
# Create virtualenv # Don't create virtualenv since docker is already isolated
RUN poetry config virtualenvs.create false RUN poetry config virtualenvs.create false
# Install the dependencies # Install the dependencies
@ -22,7 +22,11 @@ RUN poetry install --no-root --no-dev
# Set the PYTHONPATH environment variable to include the /app directory # Set the PYTHONPATH environment variable to include the /app directory
ENV PYTHONPATH=/app ENV PYTHONPATH=/app
COPY cognee/ /app/cognee COPY cognee/ cognee/
# Copy Alembic configuration
COPY alembic.ini ./
COPY alembic/ alembic/
COPY entrypoint.sh /app/entrypoint.sh COPY entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh RUN chmod +x /app/entrypoint.sh

View file

@ -1,49 +0,0 @@
FROM python:3.11
# Set build argument
ARG DEBUG
# Set environment variable based on the build argument
ENV DEBUG=${DEBUG}
ENV PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin"
RUN pip install poetry
WORKDIR /app
COPY pyproject.toml poetry.lock /app/
# Install the dependencies
RUN poetry config virtualenvs.create false && \
poetry install --no-root --no-dev
RUN apt-get update -q && \
apt-get install -y -q \
gcc \
python3-dev \
curl \
zip \
jq \
netcat-traditional && \
pip install poetry && \
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip -qq awscliv2.zip && \
./aws/install && \
apt-get clean && \
rm -rf \
awscliv2.zip \
/var/lib/apt/lists/* \
/tmp/* \
/var/tmp/*
WORKDIR /app
# Set the PYTHONPATH environment variable to include the /app directory
ENV PYTHONPATH=/app
COPY cognee/ /app/cognee
COPY entrypoint.sh /app/entrypoint.sh
RUN chmod +x /app/entrypoint.sh
ENTRYPOINT ["/app/entrypoint.sh"]

View file

@ -12,8 +12,8 @@ We build for developers who need a reliable, production-ready data layer for AI
## What is cognee? ## What is cognee?
cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you ability to interconnect and retrieve past conversations, documents, audio transcriptions, while also reducing hallucinations, developer effort and cost. Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
Try it in a Google collab <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a> or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a> Try it in a Google Colab <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a> or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a>
If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community
@ -57,10 +57,10 @@ To use different LLM providers, for more info check out our <a href="https://top
If you are using Networkx, create an account on Graphistry to visualize results: If you are using Networkx, create an account on Graphistry to visualize results:
``` ```
cognee.config.set_graphistry_config({ cognee.config.set_graphistry_config({
"username": "YOUR_USERNAME", "username": "YOUR_USERNAME",
"password": "YOUR_PASSWORD" "password": "YOUR_PASSWORD"
}) })
``` ```
(Optional) To run the UI, go to cognee-frontend directory and run: (Optional) To run the UI, go to cognee-frontend directory and run:
@ -207,12 +207,6 @@ Check out our demo notebook [here](https://github.com/topoteretes/cognee/blob/ma
[<img src="https://i3.ytimg.com/vi/-ARUfIzhzC4/maxresdefault.jpg" width="100%">](https://www.youtube.com/watch?v=BDFt4xVPmro "Learn about cognee: 55") [<img src="https://i3.ytimg.com/vi/-ARUfIzhzC4/maxresdefault.jpg" width="100%">](https://www.youtube.com/watch?v=BDFt4xVPmro "Learn about cognee: 55")
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=topoteretes/cognee&type=Date)](https://star-history.com/#topoteretes/cognee&Date)
## Get Started ## Get Started
### Install Server ### Install Server
@ -224,7 +218,6 @@ docker compose up
``` ```
### Install SDK ### Install SDK
Please see the cognee [Development Guide](https://topoteretes.github.io/cognee/quickstart/) for important beta information and usage instructions. Please see the cognee [Development Guide](https://topoteretes.github.io/cognee/quickstart/) for important beta information and usage instructions.
@ -232,3 +225,16 @@ Please see the cognee [Development Guide](https://topoteretes.github.io/cognee/q
```bash ```bash
pip install cognee pip install cognee
``` ```
## Star History
[![Star History Chart](https://api.star-history.com/svg?repos=topoteretes/cognee&type=Date)](https://star-history.com/#topoteretes/cognee&Date)
## 💫 Contributors
<a href="https://github.com/topoteretes/cognee/graphs/contributors">
<img alt="contributors" src="https://contrib.rocks/image?repo=topoteretes/cognee"/>
</a>

View file

@ -0,0 +1,27 @@
"""Add default user
Revision ID: 482cd6517ce4
Revises: 8057ae7329c2
Create Date: 2024-10-16 22:17:18.634638
"""
from typing import Sequence, Union
from sqlalchemy.util import await_only
from cognee.modules.users.methods import create_default_user, delete_user
# revision identifiers, used by Alembic.
revision: str = '482cd6517ce4'
down_revision: Union[str, None] = '8057ae7329c2'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
await_only(create_default_user())
def downgrade() -> None:
await_only(delete_user("default_user@example.com"))

View file

@ -82,7 +82,7 @@ export default function Settings({ onDone = () => {}, submitButtonText = 'Save'
}, },
body: JSON.stringify({ body: JSON.stringify({
llm: newLLMConfig, llm: newLLMConfig,
vectorDB: newVectorConfig, vectorDb: newVectorConfig,
}), }),
}) })
.then(() => { .then(() => {
@ -145,7 +145,7 @@ export default function Settings({ onDone = () => {}, submitButtonText = 'Save'
settings.llm.model = settings.llm.models[settings.llm.provider.value][0]; settings.llm.model = settings.llm.models[settings.llm.provider.value][0];
} }
setLLMConfig(settings.llm); setLLMConfig(settings.llm);
setVectorDBConfig(settings.vectorDB); setVectorDBConfig(settings.vectorDb);
}; };
fetchConfig(); fetchConfig();
}, []); }, []);

15
cognee/api/DTO.py Normal file
View file

@ -0,0 +1,15 @@
from pydantic import BaseModel, ConfigDict
from pydantic.alias_generators import to_camel, to_snake
class OutDTO(BaseModel):
model_config = ConfigDict(
alias_generator = to_camel,
populate_by_name = True,
)
class InDTO(BaseModel):
model_config = ConfigDict(
alias_generator = to_camel,
populate_by_name = True,
)

View file

@ -1,18 +1,23 @@
""" FastAPI server for the Cognee API. """ """ FastAPI server for the Cognee API. """
from datetime import datetime
import os import os
from uuid import UUID
import aiohttp import aiohttp
import uvicorn import uvicorn
import logging import logging
import sentry_sdk import sentry_sdk
from typing import Dict, Any, List, Union, Optional, Literal from typing import List, Union, Optional, Literal
from typing_extensions import Annotated from typing_extensions import Annotated
from fastapi import FastAPI, HTTPException, Form, UploadFile, Query, Depends from fastapi import FastAPI, HTTPException, Form, UploadFile, Query, Depends
from fastapi.responses import JSONResponse, FileResponse, Response from fastapi.responses import JSONResponse, FileResponse, Response
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel from pydantic import BaseModel
from cognee.api.DTO import InDTO, OutDTO
from cognee.api.v1.search import SearchType from cognee.api.v1.search import SearchType
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.methods import get_authenticated_user
from cognee.modules.pipelines.models import PipelineRunStatus
# Set up logging # Set up logging
@ -124,6 +129,7 @@ async def root():
""" """
return {"message": "Hello, World, I am alive!"} return {"message": "Hello, World, I am alive!"}
@app.get("/health") @app.get("/health")
def health_check(): def health_check():
""" """
@ -131,41 +137,46 @@ def health_check():
""" """
return Response(status_code = 200) return Response(status_code = 200)
@app.get("/api/v1/datasets", response_model = list)
class ErrorResponseDTO(BaseModel):
message: str
class DatasetDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
owner_id: UUID
@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
async def get_datasets(user: User = Depends(get_authenticated_user)): async def get_datasets(user: User = Depends(get_authenticated_user)):
try: try:
from cognee.modules.data.methods import get_datasets from cognee.modules.data.methods import get_datasets
datasets = await get_datasets(user.id) datasets = await get_datasets(user.id)
return JSONResponse( return datasets
status_code = 200,
content = [dataset.to_json() for dataset in datasets],
)
except Exception as error: except Exception as error:
logger.error(f"Error retrieving datasets: {str(error)}")
raise HTTPException(status_code = 500, detail = f"Error retrieving datasets: {str(error)}") from error raise HTTPException(status_code = 500, detail = f"Error retrieving datasets: {str(error)}") from error
@app.delete("/api/v1/datasets/{dataset_id}", response_model = dict)
@app.delete("/api/v1/datasets/{dataset_id}", response_model = None, responses = { 404: { "model": ErrorResponseDTO }})
async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)): async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.data.methods import get_dataset, delete_dataset from cognee.modules.data.methods import get_dataset, delete_dataset
dataset = get_dataset(user.id, dataset_id) dataset = await get_dataset(user.id, dataset_id)
if dataset is None: if dataset is None:
return JSONResponse( raise HTTPException(
status_code = 404, status_code = 404,
content = { detail = f"Dataset ({dataset_id}) not found."
"detail": f"Dataset ({dataset_id}) not found."
}
) )
await delete_dataset(dataset) await delete_dataset(dataset)
return JSONResponse(
status_code = 200,
content = "OK",
)
@app.get("/api/v1/datasets/{dataset_id}/graph", response_model=list) @app.get("/api/v1/datasets/{dataset_id}/graph", response_model = str)
async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)): async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)):
from cognee.shared.utils import render_graph from cognee.shared.utils import render_graph
from cognee.infrastructure.databases.graph import get_graph_engine from cognee.infrastructure.databases.graph import get_graph_engine
@ -184,7 +195,17 @@ async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authentica
content = "Graphistry credentials are not set. Please set them in your .env file.", content = "Graphistry credentials are not set. Please set them in your .env file.",
) )
@app.get("/api/v1/datasets/{dataset_id}/data", response_model=list)
class DataDTO(OutDTO):
id: UUID
name: str
created_at: datetime
updated_at: Optional[datetime]
extension: str
mime_type: str
raw_data_location: str
@app.get("/api/v1/datasets/{dataset_id}/data", response_model = list[DataDTO], responses = { 404: { "model": ErrorResponseDTO }})
async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)): async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.data.methods import get_dataset_data, get_dataset from cognee.modules.data.methods import get_dataset_data, get_dataset
@ -193,38 +214,33 @@ async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticat
if dataset is None: if dataset is None:
return JSONResponse( return JSONResponse(
status_code = 404, status_code = 404,
content = { content = ErrorResponseDTO(f"Dataset ({dataset_id}) not found."),
"detail": f"Dataset ({dataset_id}) not found."
}
) )
dataset_data = await get_dataset_data(dataset_id = dataset.id) dataset_data = await get_dataset_data(dataset_id = dataset.id)
if dataset_data is None: if dataset_data is None:
raise HTTPException(status_code = 404, detail = f"Dataset ({dataset.id}) not found.") return []
return [ return dataset_data
data.to_json() for data in dataset_data
]
@app.get("/api/v1/datasets/status", response_model=dict)
@app.get("/api/v1/datasets/status", response_model = dict[str, PipelineRunStatus])
async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None, user: User = Depends(get_authenticated_user)): async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None, user: User = Depends(get_authenticated_user)):
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
try: try:
datasets_statuses = await cognee_datasets.get_status(datasets) datasets_statuses = await cognee_datasets.get_status(datasets)
return JSONResponse( return datasets_statuses
status_code = 200,
content = datasets_statuses,
)
except Exception as error: except Exception as error:
return JSONResponse( return JSONResponse(
status_code = 409, status_code = 409,
content = {"error": str(error)} content = {"error": str(error)}
) )
@app.get("/api/v1/datasets/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
@app.get("/api/v1/datasets/{dataset_id}/data/{data_id}/raw", response_class = FileResponse)
async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)): async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.data.methods import get_dataset, get_dataset_data from cognee.modules.data.methods import get_dataset, get_dataset_data
@ -255,13 +271,8 @@ async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_a
return data.raw_data_location return data.raw_data_location
class AddPayload(BaseModel):
data: Union[str, UploadFile, List[Union[str, UploadFile]]]
dataset_id: str
class Config:
arbitrary_types_allowed = True
@app.post("/api/v1/add", response_model=dict) @app.post("/api/v1/add", response_model = None)
async def add( async def add(
data: List[UploadFile], data: List[UploadFile],
datasetId: str = Form(...), datasetId: str = Form(...),
@ -297,90 +308,89 @@ async def add(
datasetId, datasetId,
user = user, user = user,
) )
return JSONResponse(
status_code = 200,
content = {
"message": "OK"
}
)
except Exception as error: except Exception as error:
return JSONResponse( return JSONResponse(
status_code = 409, status_code = 409,
content = {"error": str(error)} content = {"error": str(error)}
) )
class CognifyPayload(BaseModel):
class CognifyPayloadDTO(BaseModel):
datasets: List[str] datasets: List[str]
@app.post("/api/v1/cognify", response_model=dict) @app.post("/api/v1/cognify", response_model = None)
async def cognify(payload: CognifyPayload, user: User = Depends(get_authenticated_user)): async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
""" This endpoint is responsible for the cognitive processing of the content.""" """ This endpoint is responsible for the cognitive processing of the content."""
from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify
try: try:
await cognee_cognify(payload.datasets, user) await cognee_cognify(payload.datasets, user)
return JSONResponse(
status_code = 200,
content = {
"message": "OK"
}
)
except Exception as error: except Exception as error:
return JSONResponse( return JSONResponse(
status_code = 409, status_code = 409,
content = {"error": str(error)} content = {"error": str(error)}
) )
class SearchPayload(BaseModel):
searchType: SearchType class SearchPayloadDTO(InDTO):
search_type: SearchType
query: str query: str
@app.post("/api/v1/search", response_model=list) @app.post("/api/v1/search", response_model = list)
async def search(payload: SearchPayload, user: User = Depends(get_authenticated_user)): async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
""" This endpoint is responsible for searching for nodes in the graph.""" """ This endpoint is responsible for searching for nodes in the graph."""
from cognee.api.v1.search import search as cognee_search from cognee.api.v1.search import search as cognee_search
try:
results = await cognee_search(payload.searchType, payload.query, user)
return JSONResponse( try:
status_code = 200, results = await cognee_search(payload.search_type, payload.query, user)
content = results,
) return results
except Exception as error: except Exception as error:
return JSONResponse( return JSONResponse(
status_code = 409, status_code = 409,
content = {"error": str(error)} content = {"error": str(error)}
) )
@app.get("/api/v1/settings", response_model=dict) from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig
class LLMConfigDTO(OutDTO, LLMConfig):
pass
class VectorDBConfigDTO(OutDTO, VectorDBConfig):
pass
class SettingsDTO(OutDTO):
llm: LLMConfigDTO
vector_db: VectorDBConfigDTO
@app.get("/api/v1/settings", response_model = SettingsDTO)
async def get_settings(user: User = Depends(get_authenticated_user)): async def get_settings(user: User = Depends(get_authenticated_user)):
from cognee.modules.settings import get_settings as get_cognee_settings from cognee.modules.settings import get_settings as get_cognee_settings
return get_cognee_settings() return get_cognee_settings()
class LLMConfig(BaseModel):
class LLMConfigDTO(InDTO):
provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]] provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
model: str model: str
apiKey: str api_key: str
class VectorDBConfig(BaseModel): class VectorDBConfigDTO(InDTO):
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]] provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
url: str url: str
apiKey: str api_key: str
class SettingsPayload(BaseModel): class SettingsPayloadDTO(InDTO):
llm: Optional[LLMConfig] = None llm: Optional[LLMConfigDTO] = None
vectorDB: Optional[VectorDBConfig] = None vector_db: Optional[VectorDBConfigDTO] = None
@app.post("/api/v1/settings", response_model=dict) @app.post("/api/v1/settings", response_model = None)
async def save_config(new_settings: SettingsPayload, user: User = Depends(get_authenticated_user)): async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
from cognee.modules.settings import save_llm_config, save_vector_db_config from cognee.modules.settings import save_llm_config, save_vector_db_config
if new_settings.llm is not None: if new_settings.llm is not None:
await save_llm_config(new_settings.llm) await save_llm_config(new_settings.llm)
if new_settings.vectorDB is not None:
await save_vector_db_config(new_settings.vectorDB) if new_settings.vector_db is not None:
return JSONResponse( await save_vector_db_config(new_settings.vector_db)
status_code=200,
content="OK",
)
def start_api_server(host: str = "0.0.0.0", port: int = 8000): def start_api_server(host: str = "0.0.0.0", port: int = 8000):

View file

@ -12,6 +12,7 @@ from cognee.modules.pipelines.tasks.Task import Task
from cognee.modules.pipelines import run_tasks, run_tasks_parallel from cognee.modules.pipelines import run_tasks, run_tasks_parallel
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
from cognee.modules.pipelines.models import PipelineRunStatus
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status
from cognee.tasks import chunk_naive_llm_classifier, \ from cognee.tasks import chunk_naive_llm_classifier, \
@ -75,11 +76,11 @@ async def run_cognify_pipeline(dataset: Dataset, user: User):
async with update_status_lock: async with update_status_lock:
task_status = await get_pipeline_status([dataset_id]) task_status = await get_pipeline_status([dataset_id])
if dataset_id in task_status and task_status[dataset_id] == "DATASET_PROCESSING_STARTED": if dataset_id in task_status and task_status[dataset_id] == PipelineRunStatus.DATASET_PROCESSING_STARTED:
logger.info("Dataset %s is already being processed.", dataset_name) logger.info("Dataset %s is already being processed.", dataset_name)
return return
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_STARTED", { await log_pipeline_status(dataset_id, PipelineRunStatus.DATASET_PROCESSING_STARTED, {
"dataset_name": dataset_name, "dataset_name": dataset_name,
"files": document_ids_str, "files": document_ids_str,
}) })
@ -120,14 +121,14 @@ async def run_cognify_pipeline(dataset: Dataset, user: User):
send_telemetry("cognee.cognify EXECUTION COMPLETED", user.id) send_telemetry("cognee.cognify EXECUTION COMPLETED", user.id)
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_COMPLETED", { await log_pipeline_status(dataset_id, PipelineRunStatus.DATASET_PROCESSING_COMPLETED, {
"dataset_name": dataset_name, "dataset_name": dataset_name,
"files": document_ids_str, "files": document_ids_str,
}) })
except Exception as error: except Exception as error:
send_telemetry("cognee.cognify EXECUTION ERRORED", user.id) send_telemetry("cognee.cognify EXECUTION ERRORED", user.id)
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERRORED", { await log_pipeline_status(dataset_id, PipelineRunStatus.DATASET_PROCESSING_ERRORED, {
"dataset_name": dataset_name, "dataset_name": dataset_name,
"files": document_ids_str, "files": document_ids_str,
}) })

View file

@ -18,10 +18,10 @@ class LLMConfig(BaseSettings):
"provider": self.llm_provider, "provider": self.llm_provider,
"model": self.llm_model, "model": self.llm_model,
"endpoint": self.llm_endpoint, "endpoint": self.llm_endpoint,
"apiKey": self.llm_api_key, "api_key": self.llm_api_key,
"temperature": self.llm_temperature, "temperature": self.llm_temperature,
"streaming": self.llm_stream, "streaming": self.llm_streaming,
"transcriptionModel": self.transcription_model "transcription_model": self.transcription_model
} }
@lru_cache @lru_cache

View file

@ -1,8 +1,14 @@
import enum
from uuid import uuid4 from uuid import uuid4
from datetime import datetime, timezone from datetime import datetime, timezone
from sqlalchemy import Column, DateTime, String, JSON from sqlalchemy import Column, DateTime, JSON, Enum
from cognee.infrastructure.databases.relational import Base, UUID from cognee.infrastructure.databases.relational import Base, UUID
class PipelineRunStatus(enum.Enum):
DATASET_PROCESSING_STARTED = "DATASET_PROCESSING_STARTED"
DATASET_PROCESSING_COMPLETED = "DATASET_PROCESSING_COMPLETED"
DATASET_PROCESSING_ERRORED = "DATASET_PROCESSING_ERRORED"
class PipelineRun(Base): class PipelineRun(Base):
__tablename__ = "pipeline_runs" __tablename__ = "pipeline_runs"
@ -10,7 +16,7 @@ class PipelineRun(Base):
created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc)) created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
status = Column(String) status = Column(Enum(PipelineRunStatus))
run_id = Column(UUID, index = True) run_id = Column(UUID, index = True)
run_info = Column(JSON) run_info = Column(JSON)

View file

@ -1 +1 @@
from .PipelineRun import PipelineRun from .PipelineRun import PipelineRun, PipelineRunStatus

View file

@ -1,3 +1,3 @@
from .get_settings import get_settings from .get_settings import get_settings, SettingsDict
from .save_llm_config import save_llm_config from .save_llm_config import save_llm_config
from .save_vector_db_config import save_vector_db_config from .save_vector_db_config import save_vector_db_config

View file

@ -1,7 +1,35 @@
from enum import Enum
from pydantic import BaseModel
from cognee.infrastructure.databases.vector import get_vectordb_config from cognee.infrastructure.databases.vector import get_vectordb_config
from cognee.infrastructure.llm import get_llm_config from cognee.infrastructure.llm import get_llm_config
def get_settings(): class ConfigChoice(BaseModel):
value: str
label: str
class ModelName(Enum):
openai = "openai"
ollama = "ollama"
anthropic = "anthropic"
class LLMConfig(BaseModel):
api_key: str
model: ConfigChoice
provider: ConfigChoice
models: dict[str, list[ConfigChoice]]
providers: list[ConfigChoice]
class VectorDBConfig(BaseModel):
api_key: str
url: str
provider: ConfigChoice
providers: list[ConfigChoice]
class SettingsDict(BaseModel):
llm: LLMConfig
vector_db: VectorDBConfig
def get_settings() -> SettingsDict:
llm_config = get_llm_config() llm_config = get_llm_config()
vector_dbs = [{ vector_dbs = [{
@ -31,9 +59,7 @@ def get_settings():
"label": "Anthropic", "label": "Anthropic",
}] }]
llm_config = get_llm_config() return SettingsDict.model_validate(dict(
return dict(
llm = { llm = {
"provider": { "provider": {
"label": llm_config.llm_provider, "label": llm_config.llm_provider,
@ -43,7 +69,7 @@ def get_settings():
"value": llm_config.llm_model, "value": llm_config.llm_model,
"label": llm_config.llm_model, "label": llm_config.llm_model,
} if llm_config.llm_model else None, } if llm_config.llm_model else None,
"apiKey": (llm_config.llm_api_key[:-10] + "**********") if llm_config.llm_api_key else None, "api_key": (llm_config.llm_api_key[:-10] + "**********") if llm_config.llm_api_key else None,
"providers": llm_providers, "providers": llm_providers,
"models": { "models": {
"openai": [{ "openai": [{
@ -75,13 +101,13 @@ def get_settings():
}] }]
}, },
}, },
vectorDB = { vector_db = {
"provider": { "provider": {
"label": vector_config.vector_engine_provider, "label": vector_config.vector_engine_provider,
"value": vector_config.vector_engine_provider.lower(), "value": vector_config.vector_engine_provider.lower(),
}, },
"url": vector_config.vector_db_url, "url": vector_config.vector_db_url,
"apiKey": vector_config.vector_db_key, "api_key": vector_config.vector_db_key,
"options": vector_dbs, "providers": vector_dbs,
}, },
) ))

View file

@ -2,7 +2,7 @@ from pydantic import BaseModel
from cognee.infrastructure.llm import get_llm_config from cognee.infrastructure.llm import get_llm_config
class LLMConfig(BaseModel): class LLMConfig(BaseModel):
apiKey: str api_key: str
model: str model: str
provider: str provider: str
@ -12,5 +12,5 @@ async def save_llm_config(new_llm_config: LLMConfig):
llm_config.llm_provider = new_llm_config.provider llm_config.llm_provider = new_llm_config.provider
llm_config.llm_model = new_llm_config.model llm_config.llm_model = new_llm_config.model
if "*****" not in new_llm_config.apiKey and len(new_llm_config.apiKey.strip()) > 0: if "*****" not in new_llm_config.api_key and len(new_llm_config.api_key.strip()) > 0:
llm_config.llm_api_key = new_llm_config.apiKey llm_config.llm_api_key = new_llm_config.api_key

View file

@ -4,12 +4,12 @@ from cognee.infrastructure.databases.vector import get_vectordb_config
class VectorDBConfig(BaseModel): class VectorDBConfig(BaseModel):
url: str url: str
apiKey: str api_key: str
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]] provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
async def save_vector_db_config(vector_db_config: VectorDBConfig): async def save_vector_db_config(vector_db_config: VectorDBConfig):
vector_config = get_vectordb_config() vector_config = get_vectordb_config()
vector_config.vector_db_url = vector_db_config.url vector_config.vector_db_url = vector_db_config.url
vector_config.vector_db_key = vector_db_config.apiKey vector_config.vector_db_key = vector_db_config.api_key
vector_config.vector_engine_provider = vector_db_config.provider vector_config.vector_engine_provider = vector_db_config.provider

View file

@ -1,5 +1,6 @@
from .get_user import get_user from .get_user import get_user
from .create_user import create_user from .create_user import create_user
from .delete_user import delete_user
from .get_default_user import get_default_user from .get_default_user import get_default_user
from .create_default_user import create_default_user from .create_default_user import create_default_user
from .get_authenticated_user import get_authenticated_user from .get_authenticated_user import get_authenticated_user

View file

@ -32,7 +32,6 @@ async def create_user(
await session.refresh(user) await session.refresh(user)
return user return user
print(f"User created: {user.email}")
except UserAlreadyExists as error: except UserAlreadyExists as error:
print(f"User {email} already exists") print(f"User {email} already exists")
raise error raise error

View file

@ -0,0 +1,17 @@
from fastapi_users.exceptions import UserNotExists
from cognee.infrastructure.databases.relational import get_relational_engine
from ..get_user_manager import get_user_manager_context
from ..get_user_db import get_user_db_context
async def delete_user(email: str):
try:
relational_engine = get_relational_engine()
async with relational_engine.get_async_session() as session:
async with get_user_db_context(session) as user_db:
async with get_user_manager_context(user_db) as user_manager:
user = await user_manager.get_by_email(email)
await user_manager.delete(user)
except UserNotExists as error:
print(f"User {email} doesn't exist")
raise error

View file

@ -23,7 +23,10 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"message": "Hello, World, I am alive!" "status": 200,
"body": {
"message": "Hello, World, I am alive!"
}
} }
``` ```
@ -37,7 +40,7 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"status": "OK" "status": 200
} }
``` ```
@ -50,15 +53,18 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
[ {
{ "status": 200,
"id": "dataset_id_1", "body": [
"name": "Dataset Name 1", {
"description": "Description of Dataset 1", "id": "dataset_id_1",
"name": "Dataset Name 1",
"description": "Description of Dataset 1",
...
},
... ...
}, ]
... }
]
``` ```
### 4. Delete Dataset ### 4. Delete Dataset
@ -74,7 +80,7 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"status": "OK" "status": 200
} }
``` ```
@ -105,14 +111,17 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
[ {
{ "status": 200,
"data_id": "data_id_1", "body": [
"content": "Data content here", {
"data_id": "data_id_1",
"content": "Data content here",
...
},
... ...
}, ]
... }
]
``` ```
### 7. Get Dataset Status ### 7. Get Dataset Status
@ -128,9 +137,12 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"dataset_id_1": "Status 1", "status": 200,
"dataset_id_2": "Status 2", "body": {
... "dataset_id_1": "Status 1",
"dataset_id_2": "Status 2",
...
}
} }
``` ```
@ -169,7 +181,7 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"message": "OK" "status": 200
} }
``` ```
@ -190,7 +202,7 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"message": "OK" "status": 200
} }
``` ```
@ -204,7 +216,7 @@ The base URL for all API requests is determined by the server's deployment envir
**Request Body**: **Request Body**:
```json ```json
{ {
"searchType": "INSIGHTS", # Or "SUMMARIES" or "CHUNKS" "searchType": "INSIGHTS", // Or "SUMMARIES" or "CHUNKS"
"query": "QUERY_TO_MATCH_DATA" "query": "QUERY_TO_MATCH_DATA"
} }
``` ```
@ -213,31 +225,40 @@ The base URL for all API requests is determined by the server's deployment envir
For "INSIGHTS" search type: For "INSIGHTS" search type:
```json ```json
[[ {
{ "name" "source_node_name" }, "status": 200,
{ "relationship_name" "between_nodes_relationship_name" }, "body": [[
{ "name" "target_node_name" }, { "name" "source_node_name" },
]] { "relationship_name" "between_nodes_relationship_name" },
{ "name" "target_node_name" },
]]
}
``` ```
For "SUMMARIES" search type: For "SUMMARIES" search type:
```json ```json
[ {
{ "text" "summary_text" }, "status": 200,
{ "text" "summary_text" }, "body": [
{ "text" "summary_text" }, { "text" "summary_text" },
... { "text" "summary_text" },
] { "text" "summary_text" },
...
]
}
``` ```
For "CHUNKS" search type: For "CHUNKS" search type:
```json ```json
[ {
{ "text" "chunk_text" }, "status": 200,
{ "text" "chunk_text" }, "body": [
{ "text" "chunk_text" }, { "text" "chunk_text" },
... { "text" "chunk_text" },
] { "text" "chunk_text" },
...
]
}
``` ```
### 12. Get Settings ### 12. Get Settings
@ -250,9 +271,12 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"llm": {...}, "status": 200,
"vectorDB": {...}, "body": {
... "llm": {...},
"vectorDB": {...},
...
}
} }
``` ```
@ -270,6 +294,6 @@ The base URL for all API requests is determined by the server's deployment envir
**Response**: **Response**:
```json ```json
{ {
"status": "OK" "status": 200
} }
``` ```

View file

@ -3,10 +3,11 @@
!!! tip "cognee uses tasks grouped into pipelines to populate graph and vector stores" !!! tip "cognee uses tasks grouped into pipelines to populate graph and vector stores"
Cognee uses tasks grouped into pipelines to populate graph and vector stores. These tasks are designed to analyze and enrich your data, improving the answers generated by Large Language Models (LLMs). Cognee organizes tasks into pipelines that populate graph and vector stores. These tasks analyze and enrich data, enhancing the quality of answers produced by Large Language Models (LLMs).
This section provides a template to help you structure your data and build pipelines. \
These tasks serve as a starting point for using Cognee to create reliable LLM pipelines.
In this section, you'll find a template that you can use to structure your data and build pipelines.
These tasks are designed to help you get started with cognee and build reliable LLM pipelines
@ -15,7 +16,7 @@ These tasks are designed to help you get started with cognee and build reliable
## Task 1: Category Extraction ## Task 1: Category Extraction
Data enrichment is the process of enhancing raw data with additional information to make it more valuable. This template is a sample task that extract categories from a document and populates a graph with the extracted categories. Data enrichment is the process of enhancing raw data with additional information to make it more valuable. This template is a sample task that extracts categories from a document and populates a graph with the extracted categories.
Let's go over the steps to use this template [full code provided here](https://github.com/topoteretes/cognee/blob/main/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py): Let's go over the steps to use this template [full code provided here](https://github.com/topoteretes/cognee/blob/main/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py):
@ -239,4 +240,4 @@ for dataset in datasets:
if dataset_name in existing_datasets: if dataset_name in existing_datasets:
awaitables.append(run_cognify_pipeline(dataset)) awaitables.append(run_cognify_pipeline(dataset))
return await asyncio.gather(*awaitables) return await asyncio.gather(*awaitables)
``` ```

View file

@ -3,6 +3,9 @@
echo "Debug mode: $DEBUG" echo "Debug mode: $DEBUG"
echo "Environment: $ENVIRONMENT" echo "Environment: $ENVIRONMENT"
# Run migrations
poetry run alembic upgrade head
echo "Starting Gunicorn" echo "Starting Gunicorn"
if [ "$ENVIRONMENT" = "dev" ]; then if [ "$ENVIRONMENT" = "dev" ]; then

File diff suppressed because one or more lines are too long