Merge branch 'main' of github.com:topoteretes/cognee into COG-170-PGvector-adapter
This commit is contained in:
commit
58e5854943
25 changed files with 585 additions and 368 deletions
3
.github/workflows/cd.yaml
vendored
3
.github/workflows/cd.yaml
vendored
|
|
@ -7,6 +7,7 @@ on:
|
|||
- feature/*
|
||||
paths-ignore:
|
||||
- '**.md'
|
||||
- 'examples/**'
|
||||
|
||||
env:
|
||||
AWS_ROLE_DEV_CICD: "arn:aws:iam::463722570299:role/cognee-dev-base-role-github-ci-cd"
|
||||
|
|
@ -76,7 +77,7 @@ jobs:
|
|||
script: |
|
||||
await github.rest.actions.createWorkflowDispatch({
|
||||
owner: 'topoteretes',
|
||||
repo: 'PromethAI-Infra',
|
||||
repo: 'cognee-infra',
|
||||
workflow_id: 'terraform.apply.yml',
|
||||
ref: 'main'
|
||||
})
|
||||
|
|
|
|||
139
.github/workflows/cd_prd.yaml
vendored
139
.github/workflows/cd_prd.yaml
vendored
|
|
@ -22,8 +22,9 @@ jobs:
|
|||
id-token: write
|
||||
contents: read
|
||||
steps:
|
||||
- name: Take code from repo
|
||||
uses: actions/checkout@v3
|
||||
- name: Checkout code from repo
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set environment variable for stage
|
||||
id: set-env
|
||||
run: |
|
||||
|
|
@ -34,68 +35,72 @@ jobs:
|
|||
echo "STAGE=dev" >> $GITHUB_ENV
|
||||
echo "::set-output name=stage::dev"
|
||||
fi
|
||||
# - name: Use output
|
||||
# run: echo "The stage is ${{ steps.set-env.outputs.stage }}"
|
||||
# - name: Configure AWS credentials
|
||||
# uses: aws-actions/configure-aws-credentials@v1
|
||||
# with:
|
||||
# role-to-assume: ${{ env.AWS_ROLE_DEV_CICD }}
|
||||
# aws-region: eu-west-1
|
||||
# - name: Create Docker image and push to ECR
|
||||
# uses: ./.github/actions/image_builder
|
||||
# id: generate-promethai-docker
|
||||
# with:
|
||||
# stage: prd
|
||||
# aws_account_id: ${{ env.AWS_ACCOUNT_ID_DEV }}
|
||||
# should_publish: true
|
||||
# ecr_image_repo_name: promethai-prd-backend-promethai-backend
|
||||
# # ecr_image_repo_name: cognee-prd-backend-cognee-ecr
|
||||
# dockerfile_location: ./
|
||||
# - name: Export Docker image tag
|
||||
# id: export-promethai-docker-tag
|
||||
# run: |
|
||||
# export DOCKER_TAG=$(cat /tmp/.DOCKER_IMAGE_VERSION)
|
||||
# echo "Docker tag is: $DOCKER_TAG"
|
||||
# echo "promethai_docker_tag_backend=$DOCKER_TAG" >> $GITHUB_OUTPUT
|
||||
#
|
||||
## - name: Create Tag and Release
|
||||
## runs-on: ubuntu-latest
|
||||
## uses: actions/checkout@v3
|
||||
## needs: publish_docker_to_ecr # ensure this job runs after Docker image is pushed
|
||||
## steps:
|
||||
## - name: Check out code
|
||||
## uses: actions/checkout@v3
|
||||
## - name: Bump version and push tag
|
||||
## id: bump_version_and_push_tag
|
||||
## uses: anothrNick/github-tag-action@1.34.0
|
||||
## env:
|
||||
## GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
|
||||
## WITH_V: true
|
||||
## DEFAULT_BUMP: 'minor' # or 'minor' or 'major'
|
||||
## - name: Create Release
|
||||
## id: create_release
|
||||
## uses: actions/create-release@v1
|
||||
## env:
|
||||
## GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
|
||||
## with:
|
||||
## tag_name: ${{ steps.bump_version_and_push_tag.outputs.tag }}
|
||||
## release_name: Release ${{ steps.bump_version_and_push_tag.outputs.tag }}
|
||||
# outputs:
|
||||
# promethai_docker_tag_backend: ${{ steps.export-promethai-docker-tag.outputs.promethai_docker_tag_backend }}
|
||||
#
|
||||
# apply_tf:
|
||||
# name: Trigger terraform apply workflow
|
||||
# runs-on: ubuntu-latest
|
||||
# needs: publish_docker_to_ecr
|
||||
# steps:
|
||||
# - name: TF apply workflow triggers step
|
||||
# uses: actions/github-script@v6
|
||||
# with:
|
||||
# github-token: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
|
||||
# script: |
|
||||
# await github.rest.actions.createWorkflowDispatch({
|
||||
# owner: 'topoteretes',
|
||||
# repo: 'PromethAI-Infra',
|
||||
# workflow_id: 'terraform.apply.yml',
|
||||
# ref: 'main'
|
||||
# })
|
||||
|
||||
- name: Use output
|
||||
run: echo "The stage is ${{ steps.set-env.outputs.stage }}"
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
role-to-assume: ${{ env.AWS_ROLE_DEV_CICD }}
|
||||
aws-region: eu-west-1
|
||||
|
||||
- name: Build Docker image and push to ECR
|
||||
uses: ./.github/actions/image_builder
|
||||
id: generate-promethai-docker
|
||||
with:
|
||||
stage: prd
|
||||
aws_account_id: ${{ env.AWS_ACCOUNT_ID_DEV }}
|
||||
should_publish: true
|
||||
ecr_image_repo_name: cognee-prd-backend-cognee-ecr
|
||||
dockerfile_location: ./
|
||||
|
||||
- name: Export Docker image tag
|
||||
id: export-cognee-docker-tag
|
||||
run: |
|
||||
export DOCKER_TAG=$(cat /tmp/.DOCKER_IMAGE_VERSION)
|
||||
echo "Docker tag is: $DOCKER_TAG"
|
||||
echo "cognee_image_tag=$DOCKER_TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
# - name: Create Tag and Release
|
||||
# runs-on: ubuntu-latest
|
||||
# uses: actions/checkout@v3
|
||||
# needs: publish_docker_to_ecr # ensure this job runs after Docker image is pushed
|
||||
# steps:
|
||||
# - name: Check out code
|
||||
# uses: actions/checkout@v3
|
||||
# - name: Bump version and push tag
|
||||
# id: bump_version_and_push_tag
|
||||
# uses: anothrNick/github-tag-action@1.34.0
|
||||
# env:
|
||||
# GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
|
||||
# WITH_V: true
|
||||
# DEFAULT_BUMP: 'minor' # or 'minor' or 'major'
|
||||
# - name: Create Release
|
||||
# id: create_release
|
||||
# uses: actions/create-release@v1
|
||||
# env:
|
||||
# GITHUB_TOKEN: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
|
||||
# with:
|
||||
# tag_name: ${{ steps.bump_version_and_push_tag.outputs.tag }}
|
||||
# release_name: Release ${{ steps.bump_version_and_push_tag.outputs.tag }}
|
||||
|
||||
outputs:
|
||||
cognee_image_tag: ${{ steps.export-promethai-docker-tag.outputs.cognee_image_tag }}
|
||||
|
||||
trigger_deployment:
|
||||
name: Trigger deployment
|
||||
runs-on: ubuntu-latest
|
||||
needs: publish_docker_to_ecr
|
||||
steps:
|
||||
- name: TF apply workflow triggers step
|
||||
uses: actions/github-script@v7
|
||||
with:
|
||||
github-token: ${{ secrets.PAT_FOR_CROSS_REPOS_CICD_TRIGGERING }}
|
||||
script: |
|
||||
await github.rest.actions.createWorkflowDispatch({
|
||||
owner: 'topoteretes',
|
||||
repo: 'cognee-infra',
|
||||
workflow_id: 'terraform.apply.yml',
|
||||
ref: 'main'
|
||||
})
|
||||
|
|
|
|||
38
.github/workflows/daily_twitter_stats.yaml
vendored
38
.github/workflows/daily_twitter_stats.yaml
vendored
|
|
@ -1,38 +0,0 @@
|
|||
#name: analytics | Send Twitter Followers to Segment
|
||||
#
|
||||
#on: pull_request
|
||||
#
|
||||
##on:
|
||||
## schedule:
|
||||
## - cron: '0 0 * * *' # Runs daily at midnight UTC. Adjust as needed.
|
||||
## workflow_dispatch: # Allows manual triggering of the workflow
|
||||
#
|
||||
#jobs:
|
||||
# send-followers:
|
||||
# runs-on: ubuntu-latest
|
||||
#
|
||||
# steps:
|
||||
# - name: Checkout repository
|
||||
# uses: actions/checkout@v3
|
||||
#
|
||||
# - name: Set up Python
|
||||
# uses: actions/setup-python@v4
|
||||
# with:
|
||||
# python-version: '3.x'
|
||||
#
|
||||
# - name: Install dependencies
|
||||
# run: |
|
||||
# pip install tweepy requests
|
||||
#
|
||||
# - name: Send Twitter Followers to Segment
|
||||
# env:
|
||||
# TWITTER_API_KEY: ${{ secrets.TWITTER_API_KEY }}
|
||||
# TWITTER_API_SECRET: ${{ secrets.TWITTER_API_SECRET }}
|
||||
# TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }}
|
||||
# TWITTER_ACCESS_SECRET: ${{ secrets.TWITTER_ACCESS_SECRET }}
|
||||
# SEGMENT_WRITE_KEY: ${{ secrets.SEGMENT_WRITE_KEY }}
|
||||
# TWITTER_USERNAME: ${{ secrets.TWITTER_USERNAME }}
|
||||
# run: |
|
||||
# cd tools
|
||||
# python daily_twitter_stats.py
|
||||
#
|
||||
|
|
@ -13,7 +13,7 @@ COPY pyproject.toml poetry.lock /app/
|
|||
|
||||
RUN pip install poetry
|
||||
|
||||
# Create virtualenv
|
||||
# Don't create virtualenv since docker is already isolated
|
||||
RUN poetry config virtualenvs.create false
|
||||
|
||||
# Install the dependencies
|
||||
|
|
@ -22,7 +22,11 @@ RUN poetry install --no-root --no-dev
|
|||
# Set the PYTHONPATH environment variable to include the /app directory
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
COPY cognee/ /app/cognee
|
||||
COPY cognee/ cognee/
|
||||
|
||||
# Copy Alembic configuration
|
||||
COPY alembic.ini ./
|
||||
COPY alembic/ alembic/
|
||||
|
||||
COPY entrypoint.sh /app/entrypoint.sh
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
|
|
|
|||
|
|
@ -1,49 +0,0 @@
|
|||
FROM python:3.11
|
||||
|
||||
# Set build argument
|
||||
ARG DEBUG
|
||||
|
||||
# Set environment variable based on the build argument
|
||||
ENV DEBUG=${DEBUG}
|
||||
ENV PIP_NO_CACHE_DIR=true
|
||||
ENV PATH="${PATH}:/root/.poetry/bin"
|
||||
|
||||
RUN pip install poetry
|
||||
|
||||
WORKDIR /app
|
||||
COPY pyproject.toml poetry.lock /app/
|
||||
|
||||
# Install the dependencies
|
||||
RUN poetry config virtualenvs.create false && \
|
||||
poetry install --no-root --no-dev
|
||||
|
||||
RUN apt-get update -q && \
|
||||
apt-get install -y -q \
|
||||
gcc \
|
||||
python3-dev \
|
||||
curl \
|
||||
zip \
|
||||
jq \
|
||||
netcat-traditional && \
|
||||
pip install poetry && \
|
||||
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
|
||||
unzip -qq awscliv2.zip && \
|
||||
./aws/install && \
|
||||
apt-get clean && \
|
||||
rm -rf \
|
||||
awscliv2.zip \
|
||||
/var/lib/apt/lists/* \
|
||||
/tmp/* \
|
||||
/var/tmp/*
|
||||
|
||||
WORKDIR /app
|
||||
# Set the PYTHONPATH environment variable to include the /app directory
|
||||
ENV PYTHONPATH=/app
|
||||
|
||||
COPY cognee/ /app/cognee
|
||||
|
||||
|
||||
COPY entrypoint.sh /app/entrypoint.sh
|
||||
RUN chmod +x /app/entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/app/entrypoint.sh"]
|
||||
32
README.md
32
README.md
|
|
@ -12,8 +12,8 @@ We build for developers who need a reliable, production-ready data layer for AI
|
|||
|
||||
## What is cognee?
|
||||
|
||||
cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you ability to interconnect and retrieve past conversations, documents, audio transcriptions, while also reducing hallucinations, developer effort and cost.
|
||||
Try it in a Google collab <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a> or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a>
|
||||
Cognee implements scalable, modular ECL (Extract, Cognify, Load) pipelines that allow you to interconnect and retrieve past conversations, documents, and audio transcriptions while reducing hallucinations, developer effort, and cost.
|
||||
Try it in a Google Colab <a href="https://colab.research.google.com/drive/1g-Qnx6l_ecHZi0IOw23rg0qC4TYvEvWZ?usp=sharing">notebook</a> or have a look at our <a href="https://topoteretes.github.io/cognee">documentation</a>
|
||||
|
||||
If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord</a> community
|
||||
|
||||
|
|
@ -57,10 +57,10 @@ To use different LLM providers, for more info check out our <a href="https://top
|
|||
|
||||
If you are using Networkx, create an account on Graphistry to visualize results:
|
||||
```
|
||||
cognee.config.set_graphistry_config({
|
||||
"username": "YOUR_USERNAME",
|
||||
"password": "YOUR_PASSWORD"
|
||||
})
|
||||
cognee.config.set_graphistry_config({
|
||||
"username": "YOUR_USERNAME",
|
||||
"password": "YOUR_PASSWORD"
|
||||
})
|
||||
```
|
||||
|
||||
(Optional) To run the UI, go to cognee-frontend directory and run:
|
||||
|
|
@ -207,12 +207,6 @@ Check out our demo notebook [here](https://github.com/topoteretes/cognee/blob/ma
|
|||
[<img src="https://i3.ytimg.com/vi/-ARUfIzhzC4/maxresdefault.jpg" width="100%">](https://www.youtube.com/watch?v=BDFt4xVPmro "Learn about cognee: 55")
|
||||
|
||||
|
||||
|
||||
## Star History
|
||||
|
||||
|
||||
[](https://star-history.com/#topoteretes/cognee&Date)
|
||||
|
||||
## Get Started
|
||||
|
||||
### Install Server
|
||||
|
|
@ -224,7 +218,6 @@ docker compose up
|
|||
```
|
||||
|
||||
|
||||
|
||||
### Install SDK
|
||||
|
||||
Please see the cognee [Development Guide](https://topoteretes.github.io/cognee/quickstart/) for important beta information and usage instructions.
|
||||
|
|
@ -232,3 +225,16 @@ Please see the cognee [Development Guide](https://topoteretes.github.io/cognee/q
|
|||
```bash
|
||||
pip install cognee
|
||||
```
|
||||
|
||||
## Star History
|
||||
|
||||
[](https://star-history.com/#topoteretes/cognee&Date)
|
||||
|
||||
|
||||
## 💫 Contributors
|
||||
|
||||
<a href="https://github.com/topoteretes/cognee/graphs/contributors">
|
||||
<img alt="contributors" src="https://contrib.rocks/image?repo=topoteretes/cognee"/>
|
||||
</a>
|
||||
|
||||
|
||||
|
|
|
|||
27
alembic/versions/482cd6517ce4_add_default_user.py
Normal file
27
alembic/versions/482cd6517ce4_add_default_user.py
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
"""Add default user
|
||||
|
||||
Revision ID: 482cd6517ce4
|
||||
Revises: 8057ae7329c2
|
||||
Create Date: 2024-10-16 22:17:18.634638
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from sqlalchemy.util import await_only
|
||||
|
||||
from cognee.modules.users.methods import create_default_user, delete_user
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '482cd6517ce4'
|
||||
down_revision: Union[str, None] = '8057ae7329c2'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
await_only(create_default_user())
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
await_only(delete_user("default_user@example.com"))
|
||||
|
|
@ -82,7 +82,7 @@ export default function Settings({ onDone = () => {}, submitButtonText = 'Save'
|
|||
},
|
||||
body: JSON.stringify({
|
||||
llm: newLLMConfig,
|
||||
vectorDB: newVectorConfig,
|
||||
vectorDb: newVectorConfig,
|
||||
}),
|
||||
})
|
||||
.then(() => {
|
||||
|
|
@ -145,7 +145,7 @@ export default function Settings({ onDone = () => {}, submitButtonText = 'Save'
|
|||
settings.llm.model = settings.llm.models[settings.llm.provider.value][0];
|
||||
}
|
||||
setLLMConfig(settings.llm);
|
||||
setVectorDBConfig(settings.vectorDB);
|
||||
setVectorDBConfig(settings.vectorDb);
|
||||
};
|
||||
fetchConfig();
|
||||
}, []);
|
||||
|
|
|
|||
15
cognee/api/DTO.py
Normal file
15
cognee/api/DTO.py
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
from pydantic import BaseModel, ConfigDict
|
||||
from pydantic.alias_generators import to_camel, to_snake
|
||||
|
||||
|
||||
class OutDTO(BaseModel):
|
||||
model_config = ConfigDict(
|
||||
alias_generator = to_camel,
|
||||
populate_by_name = True,
|
||||
)
|
||||
|
||||
class InDTO(BaseModel):
|
||||
model_config = ConfigDict(
|
||||
alias_generator = to_camel,
|
||||
populate_by_name = True,
|
||||
)
|
||||
|
|
@ -1,18 +1,23 @@
|
|||
""" FastAPI server for the Cognee API. """
|
||||
from datetime import datetime
|
||||
import os
|
||||
from uuid import UUID
|
||||
import aiohttp
|
||||
import uvicorn
|
||||
import logging
|
||||
import sentry_sdk
|
||||
from typing import Dict, Any, List, Union, Optional, Literal
|
||||
from typing import List, Union, Optional, Literal
|
||||
from typing_extensions import Annotated
|
||||
from fastapi import FastAPI, HTTPException, Form, UploadFile, Query, Depends
|
||||
from fastapi.responses import JSONResponse, FileResponse, Response
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel
|
||||
|
||||
from cognee.api.DTO import InDTO, OutDTO
|
||||
from cognee.api.v1.search import SearchType
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.users.methods import get_authenticated_user
|
||||
from cognee.modules.pipelines.models import PipelineRunStatus
|
||||
|
||||
|
||||
# Set up logging
|
||||
|
|
@ -124,6 +129,7 @@ async def root():
|
|||
"""
|
||||
return {"message": "Hello, World, I am alive!"}
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
def health_check():
|
||||
"""
|
||||
|
|
@ -131,41 +137,46 @@ def health_check():
|
|||
"""
|
||||
return Response(status_code = 200)
|
||||
|
||||
@app.get("/api/v1/datasets", response_model = list)
|
||||
|
||||
class ErrorResponseDTO(BaseModel):
|
||||
message: str
|
||||
|
||||
|
||||
class DatasetDTO(OutDTO):
|
||||
id: UUID
|
||||
name: str
|
||||
created_at: datetime
|
||||
updated_at: Optional[datetime]
|
||||
owner_id: UUID
|
||||
|
||||
@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
|
||||
async def get_datasets(user: User = Depends(get_authenticated_user)):
|
||||
try:
|
||||
from cognee.modules.data.methods import get_datasets
|
||||
datasets = await get_datasets(user.id)
|
||||
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = [dataset.to_json() for dataset in datasets],
|
||||
)
|
||||
return datasets
|
||||
except Exception as error:
|
||||
logger.error(f"Error retrieving datasets: {str(error)}")
|
||||
raise HTTPException(status_code = 500, detail = f"Error retrieving datasets: {str(error)}") from error
|
||||
|
||||
@app.delete("/api/v1/datasets/{dataset_id}", response_model = dict)
|
||||
|
||||
@app.delete("/api/v1/datasets/{dataset_id}", response_model = None, responses = { 404: { "model": ErrorResponseDTO }})
|
||||
async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
||||
from cognee.modules.data.methods import get_dataset, delete_dataset
|
||||
|
||||
dataset = get_dataset(user.id, dataset_id)
|
||||
dataset = await get_dataset(user.id, dataset_id)
|
||||
|
||||
if dataset is None:
|
||||
return JSONResponse(
|
||||
raise HTTPException(
|
||||
status_code = 404,
|
||||
content = {
|
||||
"detail": f"Dataset ({dataset_id}) not found."
|
||||
}
|
||||
detail = f"Dataset ({dataset_id}) not found."
|
||||
)
|
||||
|
||||
await delete_dataset(dataset)
|
||||
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = "OK",
|
||||
)
|
||||
|
||||
@app.get("/api/v1/datasets/{dataset_id}/graph", response_model=list)
|
||||
@app.get("/api/v1/datasets/{dataset_id}/graph", response_model = str)
|
||||
async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
||||
from cognee.shared.utils import render_graph
|
||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
|
@ -184,7 +195,17 @@ async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authentica
|
|||
content = "Graphistry credentials are not set. Please set them in your .env file.",
|
||||
)
|
||||
|
||||
@app.get("/api/v1/datasets/{dataset_id}/data", response_model=list)
|
||||
|
||||
class DataDTO(OutDTO):
|
||||
id: UUID
|
||||
name: str
|
||||
created_at: datetime
|
||||
updated_at: Optional[datetime]
|
||||
extension: str
|
||||
mime_type: str
|
||||
raw_data_location: str
|
||||
|
||||
@app.get("/api/v1/datasets/{dataset_id}/data", response_model = list[DataDTO], responses = { 404: { "model": ErrorResponseDTO }})
|
||||
async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
||||
from cognee.modules.data.methods import get_dataset_data, get_dataset
|
||||
|
||||
|
|
@ -193,38 +214,33 @@ async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticat
|
|||
if dataset is None:
|
||||
return JSONResponse(
|
||||
status_code = 404,
|
||||
content = {
|
||||
"detail": f"Dataset ({dataset_id}) not found."
|
||||
}
|
||||
content = ErrorResponseDTO(f"Dataset ({dataset_id}) not found."),
|
||||
)
|
||||
|
||||
dataset_data = await get_dataset_data(dataset_id = dataset.id)
|
||||
|
||||
if dataset_data is None:
|
||||
raise HTTPException(status_code = 404, detail = f"Dataset ({dataset.id}) not found.")
|
||||
return []
|
||||
|
||||
return [
|
||||
data.to_json() for data in dataset_data
|
||||
]
|
||||
return dataset_data
|
||||
|
||||
@app.get("/api/v1/datasets/status", response_model=dict)
|
||||
|
||||
@app.get("/api/v1/datasets/status", response_model = dict[str, PipelineRunStatus])
|
||||
async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None, user: User = Depends(get_authenticated_user)):
|
||||
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
|
||||
|
||||
try:
|
||||
datasets_statuses = await cognee_datasets.get_status(datasets)
|
||||
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = datasets_statuses,
|
||||
)
|
||||
return datasets_statuses
|
||||
except Exception as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = {"error": str(error)}
|
||||
)
|
||||
|
||||
@app.get("/api/v1/datasets/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
|
||||
|
||||
@app.get("/api/v1/datasets/{dataset_id}/data/{data_id}/raw", response_class = FileResponse)
|
||||
async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
|
||||
from cognee.modules.data.methods import get_dataset, get_dataset_data
|
||||
|
||||
|
|
@ -255,13 +271,8 @@ async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_a
|
|||
|
||||
return data.raw_data_location
|
||||
|
||||
class AddPayload(BaseModel):
|
||||
data: Union[str, UploadFile, List[Union[str, UploadFile]]]
|
||||
dataset_id: str
|
||||
class Config:
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
@app.post("/api/v1/add", response_model=dict)
|
||||
@app.post("/api/v1/add", response_model = None)
|
||||
async def add(
|
||||
data: List[UploadFile],
|
||||
datasetId: str = Form(...),
|
||||
|
|
@ -297,90 +308,89 @@ async def add(
|
|||
datasetId,
|
||||
user = user,
|
||||
)
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = {
|
||||
"message": "OK"
|
||||
}
|
||||
)
|
||||
except Exception as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = {"error": str(error)}
|
||||
)
|
||||
|
||||
class CognifyPayload(BaseModel):
|
||||
|
||||
class CognifyPayloadDTO(BaseModel):
|
||||
datasets: List[str]
|
||||
|
||||
@app.post("/api/v1/cognify", response_model=dict)
|
||||
async def cognify(payload: CognifyPayload, user: User = Depends(get_authenticated_user)):
|
||||
@app.post("/api/v1/cognify", response_model = None)
|
||||
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||
""" This endpoint is responsible for the cognitive processing of the content."""
|
||||
from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify
|
||||
try:
|
||||
await cognee_cognify(payload.datasets, user)
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = {
|
||||
"message": "OK"
|
||||
}
|
||||
)
|
||||
except Exception as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = {"error": str(error)}
|
||||
)
|
||||
|
||||
class SearchPayload(BaseModel):
|
||||
searchType: SearchType
|
||||
|
||||
class SearchPayloadDTO(InDTO):
|
||||
search_type: SearchType
|
||||
query: str
|
||||
|
||||
@app.post("/api/v1/search", response_model=list)
|
||||
async def search(payload: SearchPayload, user: User = Depends(get_authenticated_user)):
|
||||
@app.post("/api/v1/search", response_model = list)
|
||||
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||
""" This endpoint is responsible for searching for nodes in the graph."""
|
||||
from cognee.api.v1.search import search as cognee_search
|
||||
try:
|
||||
results = await cognee_search(payload.searchType, payload.query, user)
|
||||
|
||||
return JSONResponse(
|
||||
status_code = 200,
|
||||
content = results,
|
||||
)
|
||||
try:
|
||||
results = await cognee_search(payload.search_type, payload.query, user)
|
||||
|
||||
return results
|
||||
except Exception as error:
|
||||
return JSONResponse(
|
||||
status_code = 409,
|
||||
content = {"error": str(error)}
|
||||
)
|
||||
|
||||
@app.get("/api/v1/settings", response_model=dict)
|
||||
from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig
|
||||
|
||||
class LLMConfigDTO(OutDTO, LLMConfig):
|
||||
pass
|
||||
|
||||
class VectorDBConfigDTO(OutDTO, VectorDBConfig):
|
||||
pass
|
||||
|
||||
class SettingsDTO(OutDTO):
|
||||
llm: LLMConfigDTO
|
||||
vector_db: VectorDBConfigDTO
|
||||
|
||||
@app.get("/api/v1/settings", response_model = SettingsDTO)
|
||||
async def get_settings(user: User = Depends(get_authenticated_user)):
|
||||
from cognee.modules.settings import get_settings as get_cognee_settings
|
||||
return get_cognee_settings()
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
|
||||
class LLMConfigDTO(InDTO):
|
||||
provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
|
||||
model: str
|
||||
apiKey: str
|
||||
api_key: str
|
||||
|
||||
class VectorDBConfig(BaseModel):
|
||||
class VectorDBConfigDTO(InDTO):
|
||||
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
|
||||
url: str
|
||||
apiKey: str
|
||||
api_key: str
|
||||
|
||||
class SettingsPayload(BaseModel):
|
||||
llm: Optional[LLMConfig] = None
|
||||
vectorDB: Optional[VectorDBConfig] = None
|
||||
class SettingsPayloadDTO(InDTO):
|
||||
llm: Optional[LLMConfigDTO] = None
|
||||
vector_db: Optional[VectorDBConfigDTO] = None
|
||||
|
||||
@app.post("/api/v1/settings", response_model=dict)
|
||||
async def save_config(new_settings: SettingsPayload, user: User = Depends(get_authenticated_user)):
|
||||
@app.post("/api/v1/settings", response_model = None)
|
||||
async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||
from cognee.modules.settings import save_llm_config, save_vector_db_config
|
||||
|
||||
if new_settings.llm is not None:
|
||||
await save_llm_config(new_settings.llm)
|
||||
if new_settings.vectorDB is not None:
|
||||
await save_vector_db_config(new_settings.vectorDB)
|
||||
return JSONResponse(
|
||||
status_code=200,
|
||||
content="OK",
|
||||
)
|
||||
|
||||
if new_settings.vector_db is not None:
|
||||
await save_vector_db_config(new_settings.vector_db)
|
||||
|
||||
|
||||
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ from cognee.modules.pipelines.tasks.Task import Task
|
|||
from cognee.modules.pipelines import run_tasks, run_tasks_parallel
|
||||
from cognee.modules.users.models import User
|
||||
from cognee.modules.users.methods import get_default_user
|
||||
from cognee.modules.pipelines.models import PipelineRunStatus
|
||||
from cognee.modules.pipelines.operations.get_pipeline_status import get_pipeline_status
|
||||
from cognee.modules.pipelines.operations.log_pipeline_status import log_pipeline_status
|
||||
from cognee.tasks import chunk_naive_llm_classifier, \
|
||||
|
|
@ -75,11 +76,11 @@ async def run_cognify_pipeline(dataset: Dataset, user: User):
|
|||
async with update_status_lock:
|
||||
task_status = await get_pipeline_status([dataset_id])
|
||||
|
||||
if dataset_id in task_status and task_status[dataset_id] == "DATASET_PROCESSING_STARTED":
|
||||
if dataset_id in task_status and task_status[dataset_id] == PipelineRunStatus.DATASET_PROCESSING_STARTED:
|
||||
logger.info("Dataset %s is already being processed.", dataset_name)
|
||||
return
|
||||
|
||||
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_STARTED", {
|
||||
await log_pipeline_status(dataset_id, PipelineRunStatus.DATASET_PROCESSING_STARTED, {
|
||||
"dataset_name": dataset_name,
|
||||
"files": document_ids_str,
|
||||
})
|
||||
|
|
@ -120,14 +121,14 @@ async def run_cognify_pipeline(dataset: Dataset, user: User):
|
|||
|
||||
send_telemetry("cognee.cognify EXECUTION COMPLETED", user.id)
|
||||
|
||||
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_COMPLETED", {
|
||||
await log_pipeline_status(dataset_id, PipelineRunStatus.DATASET_PROCESSING_COMPLETED, {
|
||||
"dataset_name": dataset_name,
|
||||
"files": document_ids_str,
|
||||
})
|
||||
except Exception as error:
|
||||
send_telemetry("cognee.cognify EXECUTION ERRORED", user.id)
|
||||
|
||||
await log_pipeline_status(dataset_id, "DATASET_PROCESSING_ERRORED", {
|
||||
await log_pipeline_status(dataset_id, PipelineRunStatus.DATASET_PROCESSING_ERRORED, {
|
||||
"dataset_name": dataset_name,
|
||||
"files": document_ids_str,
|
||||
})
|
||||
|
|
|
|||
|
|
@ -18,10 +18,10 @@ class LLMConfig(BaseSettings):
|
|||
"provider": self.llm_provider,
|
||||
"model": self.llm_model,
|
||||
"endpoint": self.llm_endpoint,
|
||||
"apiKey": self.llm_api_key,
|
||||
"api_key": self.llm_api_key,
|
||||
"temperature": self.llm_temperature,
|
||||
"streaming": self.llm_stream,
|
||||
"transcriptionModel": self.transcription_model
|
||||
"streaming": self.llm_streaming,
|
||||
"transcription_model": self.transcription_model
|
||||
}
|
||||
|
||||
@lru_cache
|
||||
|
|
|
|||
|
|
@ -1,8 +1,14 @@
|
|||
import enum
|
||||
from uuid import uuid4
|
||||
from datetime import datetime, timezone
|
||||
from sqlalchemy import Column, DateTime, String, JSON
|
||||
from sqlalchemy import Column, DateTime, JSON, Enum
|
||||
from cognee.infrastructure.databases.relational import Base, UUID
|
||||
|
||||
class PipelineRunStatus(enum.Enum):
|
||||
DATASET_PROCESSING_STARTED = "DATASET_PROCESSING_STARTED"
|
||||
DATASET_PROCESSING_COMPLETED = "DATASET_PROCESSING_COMPLETED"
|
||||
DATASET_PROCESSING_ERRORED = "DATASET_PROCESSING_ERRORED"
|
||||
|
||||
class PipelineRun(Base):
|
||||
__tablename__ = "pipeline_runs"
|
||||
|
||||
|
|
@ -10,7 +16,7 @@ class PipelineRun(Base):
|
|||
|
||||
created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
|
||||
|
||||
status = Column(String)
|
||||
status = Column(Enum(PipelineRunStatus))
|
||||
|
||||
run_id = Column(UUID, index = True)
|
||||
run_info = Column(JSON)
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
from .PipelineRun import PipelineRun
|
||||
from .PipelineRun import PipelineRun, PipelineRunStatus
|
||||
|
|
|
|||
|
|
@ -1,3 +1,3 @@
|
|||
from .get_settings import get_settings
|
||||
from .get_settings import get_settings, SettingsDict
|
||||
from .save_llm_config import save_llm_config
|
||||
from .save_vector_db_config import save_vector_db_config
|
||||
|
|
|
|||
|
|
@ -1,7 +1,35 @@
|
|||
from enum import Enum
|
||||
from pydantic import BaseModel
|
||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
||||
def get_settings():
|
||||
class ConfigChoice(BaseModel):
|
||||
value: str
|
||||
label: str
|
||||
|
||||
class ModelName(Enum):
|
||||
openai = "openai"
|
||||
ollama = "ollama"
|
||||
anthropic = "anthropic"
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
api_key: str
|
||||
model: ConfigChoice
|
||||
provider: ConfigChoice
|
||||
models: dict[str, list[ConfigChoice]]
|
||||
providers: list[ConfigChoice]
|
||||
|
||||
class VectorDBConfig(BaseModel):
|
||||
api_key: str
|
||||
url: str
|
||||
provider: ConfigChoice
|
||||
providers: list[ConfigChoice]
|
||||
|
||||
class SettingsDict(BaseModel):
|
||||
llm: LLMConfig
|
||||
vector_db: VectorDBConfig
|
||||
|
||||
def get_settings() -> SettingsDict:
|
||||
llm_config = get_llm_config()
|
||||
|
||||
vector_dbs = [{
|
||||
|
|
@ -31,9 +59,7 @@ def get_settings():
|
|||
"label": "Anthropic",
|
||||
}]
|
||||
|
||||
llm_config = get_llm_config()
|
||||
|
||||
return dict(
|
||||
return SettingsDict.model_validate(dict(
|
||||
llm = {
|
||||
"provider": {
|
||||
"label": llm_config.llm_provider,
|
||||
|
|
@ -43,7 +69,7 @@ def get_settings():
|
|||
"value": llm_config.llm_model,
|
||||
"label": llm_config.llm_model,
|
||||
} if llm_config.llm_model else None,
|
||||
"apiKey": (llm_config.llm_api_key[:-10] + "**********") if llm_config.llm_api_key else None,
|
||||
"api_key": (llm_config.llm_api_key[:-10] + "**********") if llm_config.llm_api_key else None,
|
||||
"providers": llm_providers,
|
||||
"models": {
|
||||
"openai": [{
|
||||
|
|
@ -75,13 +101,13 @@ def get_settings():
|
|||
}]
|
||||
},
|
||||
},
|
||||
vectorDB = {
|
||||
vector_db = {
|
||||
"provider": {
|
||||
"label": vector_config.vector_engine_provider,
|
||||
"value": vector_config.vector_engine_provider.lower(),
|
||||
},
|
||||
"url": vector_config.vector_db_url,
|
||||
"apiKey": vector_config.vector_db_key,
|
||||
"options": vector_dbs,
|
||||
"api_key": vector_config.vector_db_key,
|
||||
"providers": vector_dbs,
|
||||
},
|
||||
)
|
||||
))
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from pydantic import BaseModel
|
|||
from cognee.infrastructure.llm import get_llm_config
|
||||
|
||||
class LLMConfig(BaseModel):
|
||||
apiKey: str
|
||||
api_key: str
|
||||
model: str
|
||||
provider: str
|
||||
|
||||
|
|
@ -12,5 +12,5 @@ async def save_llm_config(new_llm_config: LLMConfig):
|
|||
llm_config.llm_provider = new_llm_config.provider
|
||||
llm_config.llm_model = new_llm_config.model
|
||||
|
||||
if "*****" not in new_llm_config.apiKey and len(new_llm_config.apiKey.strip()) > 0:
|
||||
llm_config.llm_api_key = new_llm_config.apiKey
|
||||
if "*****" not in new_llm_config.api_key and len(new_llm_config.api_key.strip()) > 0:
|
||||
llm_config.llm_api_key = new_llm_config.api_key
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@ from cognee.infrastructure.databases.vector import get_vectordb_config
|
|||
|
||||
class VectorDBConfig(BaseModel):
|
||||
url: str
|
||||
apiKey: str
|
||||
api_key: str
|
||||
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
|
||||
|
||||
async def save_vector_db_config(vector_db_config: VectorDBConfig):
|
||||
vector_config = get_vectordb_config()
|
||||
|
||||
vector_config.vector_db_url = vector_db_config.url
|
||||
vector_config.vector_db_key = vector_db_config.apiKey
|
||||
vector_config.vector_db_key = vector_db_config.api_key
|
||||
vector_config.vector_engine_provider = vector_db_config.provider
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
from .get_user import get_user
|
||||
from .create_user import create_user
|
||||
from .delete_user import delete_user
|
||||
from .get_default_user import get_default_user
|
||||
from .create_default_user import create_default_user
|
||||
from .get_authenticated_user import get_authenticated_user
|
||||
|
|
|
|||
|
|
@ -32,7 +32,6 @@ async def create_user(
|
|||
await session.refresh(user)
|
||||
|
||||
return user
|
||||
print(f"User created: {user.email}")
|
||||
except UserAlreadyExists as error:
|
||||
print(f"User {email} already exists")
|
||||
raise error
|
||||
|
|
|
|||
17
cognee/modules/users/methods/delete_user.py
Normal file
17
cognee/modules/users/methods/delete_user.py
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
from fastapi_users.exceptions import UserNotExists
|
||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||
from ..get_user_manager import get_user_manager_context
|
||||
from ..get_user_db import get_user_db_context
|
||||
|
||||
async def delete_user(email: str):
|
||||
try:
|
||||
relational_engine = get_relational_engine()
|
||||
|
||||
async with relational_engine.get_async_session() as session:
|
||||
async with get_user_db_context(session) as user_db:
|
||||
async with get_user_manager_context(user_db) as user_manager:
|
||||
user = await user_manager.get_by_email(email)
|
||||
await user_manager.delete(user)
|
||||
except UserNotExists as error:
|
||||
print(f"User {email} doesn't exist")
|
||||
raise error
|
||||
|
|
@ -23,7 +23,10 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"message": "Hello, World, I am alive!"
|
||||
"status": 200,
|
||||
"body": {
|
||||
"message": "Hello, World, I am alive!"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -37,7 +40,7 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"status": "OK"
|
||||
"status": 200
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -50,15 +53,18 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
|
||||
**Response**:
|
||||
```json
|
||||
[
|
||||
{
|
||||
"id": "dataset_id_1",
|
||||
"name": "Dataset Name 1",
|
||||
"description": "Description of Dataset 1",
|
||||
{
|
||||
"status": 200,
|
||||
"body": [
|
||||
{
|
||||
"id": "dataset_id_1",
|
||||
"name": "Dataset Name 1",
|
||||
"description": "Description of Dataset 1",
|
||||
...
|
||||
},
|
||||
...
|
||||
},
|
||||
...
|
||||
]
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Delete Dataset
|
||||
|
|
@ -74,7 +80,7 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"status": "OK"
|
||||
"status": 200
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -105,14 +111,17 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
|
||||
**Response**:
|
||||
```json
|
||||
[
|
||||
{
|
||||
"data_id": "data_id_1",
|
||||
"content": "Data content here",
|
||||
{
|
||||
"status": 200,
|
||||
"body": [
|
||||
{
|
||||
"data_id": "data_id_1",
|
||||
"content": "Data content here",
|
||||
...
|
||||
},
|
||||
...
|
||||
},
|
||||
...
|
||||
]
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 7. Get Dataset Status
|
||||
|
|
@ -128,9 +137,12 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"dataset_id_1": "Status 1",
|
||||
"dataset_id_2": "Status 2",
|
||||
...
|
||||
"status": 200,
|
||||
"body": {
|
||||
"dataset_id_1": "Status 1",
|
||||
"dataset_id_2": "Status 2",
|
||||
...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -169,7 +181,7 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"message": "OK"
|
||||
"status": 200
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -190,7 +202,7 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"message": "OK"
|
||||
"status": 200
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -204,7 +216,7 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Request Body**:
|
||||
```json
|
||||
{
|
||||
"searchType": "INSIGHTS", # Or "SUMMARIES" or "CHUNKS"
|
||||
"searchType": "INSIGHTS", // Or "SUMMARIES" or "CHUNKS"
|
||||
"query": "QUERY_TO_MATCH_DATA"
|
||||
}
|
||||
```
|
||||
|
|
@ -213,31 +225,40 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
|
||||
For "INSIGHTS" search type:
|
||||
```json
|
||||
[[
|
||||
{ "name" "source_node_name" },
|
||||
{ "relationship_name" "between_nodes_relationship_name" },
|
||||
{ "name" "target_node_name" },
|
||||
]]
|
||||
{
|
||||
"status": 200,
|
||||
"body": [[
|
||||
{ "name" "source_node_name" },
|
||||
{ "relationship_name" "between_nodes_relationship_name" },
|
||||
{ "name" "target_node_name" },
|
||||
]]
|
||||
}
|
||||
```
|
||||
|
||||
For "SUMMARIES" search type:
|
||||
```json
|
||||
[
|
||||
{ "text" "summary_text" },
|
||||
{ "text" "summary_text" },
|
||||
{ "text" "summary_text" },
|
||||
...
|
||||
]
|
||||
{
|
||||
"status": 200,
|
||||
"body": [
|
||||
{ "text" "summary_text" },
|
||||
{ "text" "summary_text" },
|
||||
{ "text" "summary_text" },
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
For "CHUNKS" search type:
|
||||
```json
|
||||
[
|
||||
{ "text" "chunk_text" },
|
||||
{ "text" "chunk_text" },
|
||||
{ "text" "chunk_text" },
|
||||
...
|
||||
]
|
||||
{
|
||||
"status": 200,
|
||||
"body": [
|
||||
{ "text" "chunk_text" },
|
||||
{ "text" "chunk_text" },
|
||||
{ "text" "chunk_text" },
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
### 12. Get Settings
|
||||
|
|
@ -250,9 +271,12 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"llm": {...},
|
||||
"vectorDB": {...},
|
||||
...
|
||||
"status": 200,
|
||||
"body": {
|
||||
"llm": {...},
|
||||
"vectorDB": {...},
|
||||
...
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
@ -270,6 +294,6 @@ The base URL for all API requests is determined by the server's deployment envir
|
|||
**Response**:
|
||||
```json
|
||||
{
|
||||
"status": "OK"
|
||||
"status": 200
|
||||
}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -3,10 +3,11 @@
|
|||
!!! tip "cognee uses tasks grouped into pipelines to populate graph and vector stores"
|
||||
|
||||
|
||||
Cognee uses tasks grouped into pipelines to populate graph and vector stores. These tasks are designed to analyze and enrich your data, improving the answers generated by Large Language Models (LLMs).
|
||||
Cognee organizes tasks into pipelines that populate graph and vector stores. These tasks analyze and enrich data, enhancing the quality of answers produced by Large Language Models (LLMs).
|
||||
|
||||
This section provides a template to help you structure your data and build pipelines. \
|
||||
These tasks serve as a starting point for using Cognee to create reliable LLM pipelines.
|
||||
|
||||
In this section, you'll find a template that you can use to structure your data and build pipelines.
|
||||
These tasks are designed to help you get started with cognee and build reliable LLM pipelines
|
||||
|
||||
|
||||
|
||||
|
|
@ -15,7 +16,7 @@ These tasks are designed to help you get started with cognee and build reliable
|
|||
|
||||
## Task 1: Category Extraction
|
||||
|
||||
Data enrichment is the process of enhancing raw data with additional information to make it more valuable. This template is a sample task that extract categories from a document and populates a graph with the extracted categories.
|
||||
Data enrichment is the process of enhancing raw data with additional information to make it more valuable. This template is a sample task that extracts categories from a document and populates a graph with the extracted categories.
|
||||
|
||||
Let's go over the steps to use this template [full code provided here](https://github.com/topoteretes/cognee/blob/main/cognee/tasks/chunk_naive_llm_classifier/chunk_naive_llm_classifier.py):
|
||||
|
||||
|
|
@ -239,4 +240,4 @@ for dataset in datasets:
|
|||
if dataset_name in existing_datasets:
|
||||
awaitables.append(run_cognify_pipeline(dataset))
|
||||
return await asyncio.gather(*awaitables)
|
||||
```
|
||||
```
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
echo "Debug mode: $DEBUG"
|
||||
echo "Environment: $ENVIRONMENT"
|
||||
|
||||
# Run migrations
|
||||
poetry run alembic upgrade head
|
||||
|
||||
echo "Starting Gunicorn"
|
||||
|
||||
if [ "$ENVIRONMENT" = "dev" ]; then
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
Loading…
Add table
Reference in a new issue