Merge remote-tracking branch 'origin/main' into feat/COG-184-add-falkordb
This commit is contained in:
commit
758698a35b
49 changed files with 2109 additions and 1674 deletions
81
.github/workflows/auto-comment.yml
vendored
Normal file
81
.github/workflows/auto-comment.yml
vendored
Normal file
|
|
@ -0,0 +1,81 @@
|
||||||
|
name: Issue and PR Auto Comments
|
||||||
|
on:
|
||||||
|
issues:
|
||||||
|
types:
|
||||||
|
- opened
|
||||||
|
- closed
|
||||||
|
- assigned
|
||||||
|
pull_request_target:
|
||||||
|
types:
|
||||||
|
- opened
|
||||||
|
- closed
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
auto-comment:
|
||||||
|
permissions:
|
||||||
|
issues: write
|
||||||
|
pull-requests: write
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
# configuration for auto-comment actions
|
||||||
|
- name: Configure Auto Comments
|
||||||
|
uses: wow-actions/auto-comment@v1
|
||||||
|
with:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
|
||||||
|
issuesOpened: |
|
||||||
|
👋 @{{ author }}
|
||||||
|
|
||||||
|
Thank you for raising an issue. We will investigate the matter and get back to you as soon as possible.
|
||||||
|
|
||||||
|
To help us address your issue efficiently, please ensure you have provided:
|
||||||
|
- A clear description of the problem
|
||||||
|
- Steps to reproduce (if applicable)
|
||||||
|
- Expected vs actual behavior
|
||||||
|
- Any relevant screenshots or error messages
|
||||||
|
|
||||||
|
Our team typically responds within 2-3 business days.
|
||||||
|
|
||||||
|
issuesClosed: |
|
||||||
|
✅ @{{ author }}
|
||||||
|
|
||||||
|
This issue has been closed. If you have any further questions or if the issue resurfaces,
|
||||||
|
please feel free to:
|
||||||
|
- Add a comment to this thread
|
||||||
|
- Open a new issue with reference to this one
|
||||||
|
|
||||||
|
Thank you for helping us improve!
|
||||||
|
|
||||||
|
pullRequestOpened: |
|
||||||
|
👍 @{{ author }}
|
||||||
|
|
||||||
|
Thank you for your pull request and contributing to our community!
|
||||||
|
|
||||||
|
Please ensure you have:
|
||||||
|
- [ ] Followed our contributing guidelines
|
||||||
|
- [ ] Added/updated tests (if applicable)
|
||||||
|
- [ ] Updated documentation (if applicable)
|
||||||
|
- [ ] Added a descriptive PR title
|
||||||
|
|
||||||
|
Our team will review your contribution as soon as possible. Feel free to reach out if you need any assistance.
|
||||||
|
|
||||||
|
# Separate action for merged PRs
|
||||||
|
- name: Handle Merged Pull Requests
|
||||||
|
if: github.event.pull_request.merged == true
|
||||||
|
uses: actions-cool/pr-welcome@v1.2.1
|
||||||
|
with:
|
||||||
|
token: ${{ secrets.GH_TOKEN }}
|
||||||
|
comment: |
|
||||||
|
🎉 Fantastic work @${{ github.event.pull_request.user.login }}! 🎉
|
||||||
|
|
||||||
|
Your pull request has been merged successfully. Thank you for your valuable contribution!
|
||||||
|
|
||||||
|
We appreciate the time and effort you've put into improving our project.
|
||||||
|
Your changes will be included in our next release.
|
||||||
|
|
||||||
|
Keep up the great work! 💪
|
||||||
|
emoji: 'rocket'
|
||||||
|
pr-emoji: '+1, heart, rocket'
|
||||||
|
|
||||||
6
.github/workflows/docker_compose.yml
vendored
6
.github/workflows/docker_compose.yml
vendored
|
|
@ -20,10 +20,16 @@ jobs:
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
- name: Build Docker images
|
- name: Build Docker images
|
||||||
|
env:
|
||||||
|
ENVIRONMENT: dev
|
||||||
|
ENV: dev
|
||||||
run: |
|
run: |
|
||||||
docker compose -f docker-compose.yml build
|
docker compose -f docker-compose.yml build
|
||||||
|
|
||||||
- name: Run Docker Compose
|
- name: Run Docker Compose
|
||||||
|
env:
|
||||||
|
ENVIRONMENT: dev
|
||||||
|
ENV: dev
|
||||||
run: |
|
run: |
|
||||||
docker compose -f docker-compose.yml up -d
|
docker compose -f docker-compose.yml up -d
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ RUN pip install poetry
|
||||||
RUN poetry config virtualenvs.create false
|
RUN poetry config virtualenvs.create false
|
||||||
|
|
||||||
# Install the dependencies
|
# Install the dependencies
|
||||||
RUN poetry install --no-root --no-dev
|
RUN poetry install --all-extras --no-root --no-dev
|
||||||
|
|
||||||
|
|
||||||
# Set the PYTHONPATH environment variable to include the /app directory
|
# Set the PYTHONPATH environment variable to include the /app directory
|
||||||
|
|
|
||||||
59
README.md
59
README.md
|
|
@ -29,6 +29,10 @@ If you have questions, join our <a href="https://discord.gg/NQPKmU5CCg">Discord
|
||||||
pip install cognee
|
pip install cognee
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### With pip with PostgreSQL support
|
||||||
|
```bash
|
||||||
|
pip install cognee[postgres]
|
||||||
|
```
|
||||||
|
|
||||||
### With poetry
|
### With poetry
|
||||||
|
|
||||||
|
|
@ -36,6 +40,11 @@ pip install cognee
|
||||||
poetry add cognee
|
poetry add cognee
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### With poetry with PostgreSQL support
|
||||||
|
|
||||||
|
```bash
|
||||||
|
poetry add cognee -E postgres
|
||||||
|
```
|
||||||
|
|
||||||
## 💻 Basic Usage
|
## 💻 Basic Usage
|
||||||
|
|
||||||
|
|
@ -50,7 +59,7 @@ os.environ["LLM_API_KEY"] = "YOUR OPENAI_API_KEY"
|
||||||
or
|
or
|
||||||
```
|
```
|
||||||
import cognee
|
import cognee
|
||||||
cognee.config.llm_api_key = "YOUR_OPENAI_API_KEY"
|
cognee.config.set_llm_api_key("YOUR_OPENAI_API_KEY")
|
||||||
```
|
```
|
||||||
You can also set the variables by creating .env file, here is our <a href="https://github.com/topoteretes/cognee/blob/main/.env.template">template.</a>
|
You can also set the variables by creating .env file, here is our <a href="https://github.com/topoteretes/cognee/blob/main/.env.template">template.</a>
|
||||||
To use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>
|
To use different LLM providers, for more info check out our <a href="https://topoteretes.github.io/cognee">documentation</a>
|
||||||
|
|
@ -73,26 +82,54 @@ docker-compose up
|
||||||
```
|
```
|
||||||
Then navigate to localhost:3000
|
Then navigate to localhost:3000
|
||||||
|
|
||||||
|
If you want to use the UI with PostgreSQL through docker-compose make sure to set the following values in the .env file:
|
||||||
|
```
|
||||||
|
DB_PROVIDER=postgres
|
||||||
|
|
||||||
|
DB_HOST=postgres
|
||||||
|
DB_PORT=5432
|
||||||
|
|
||||||
|
DB_NAME=cognee_db
|
||||||
|
DB_USERNAME=cognee
|
||||||
|
DB_PASSWORD=cognee
|
||||||
|
```
|
||||||
|
|
||||||
### Simple example
|
### Simple example
|
||||||
|
|
||||||
Run the default cognee pipeline:
|
First, copy `.env.template` to `.env` and add your OpenAI API key to the LLM_API_KEY field.
|
||||||
|
|
||||||
```
|
Optionally, set `VECTOR_DB_PROVIDER="lancedb"` in `.env` to simplify setup.
|
||||||
|
|
||||||
|
This script will run the default pipeline:
|
||||||
|
|
||||||
|
```python
|
||||||
import cognee
|
import cognee
|
||||||
|
import asyncio
|
||||||
|
from cognee.api.v1.search import SearchType
|
||||||
|
|
||||||
text = """Natural language processing (NLP) is an interdisciplinary
|
async def main():
|
||||||
subfield of computer science and information retrieval"""
|
await cognee.prune.prune_data() # Reset cognee data
|
||||||
|
await cognee.prune.prune_system(metadata=True) # Reset cognee system state
|
||||||
|
|
||||||
await cognee.add(text) # Add a new piece of information
|
text = """
|
||||||
|
Natural language processing (NLP) is an interdisciplinary
|
||||||
|
subfield of computer science and information retrieval.
|
||||||
|
"""
|
||||||
|
|
||||||
await cognee.cognify() # Use LLMs and cognee to create a knowledge graph
|
await cognee.add(text) # Add text to cognee
|
||||||
|
await cognee.cognify() # Use LLMs and cognee to create knowledge graph
|
||||||
|
|
||||||
search_results = await cognee.search("INSIGHTS", {'query': 'NLP'}) # Query cognee for the insights
|
search_results = await cognee.search( # Search cognee for insights
|
||||||
|
SearchType.INSIGHTS,
|
||||||
|
{'query': 'Tell me about NLP'}
|
||||||
|
)
|
||||||
|
|
||||||
for result in search_results:
|
for result_text in search_results: # Display results
|
||||||
do_something_with_result(result)
|
print(result_text)
|
||||||
|
|
||||||
|
asyncio.run(main())
|
||||||
```
|
```
|
||||||
|
A version of this example is here: `examples/pyton/simple_example.py`
|
||||||
|
|
||||||
### Create your own memory store
|
### Create your own memory store
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,24 +1,11 @@
|
||||||
""" FastAPI server for the Cognee API. """
|
""" FastAPI server for the Cognee API. """
|
||||||
from datetime import datetime
|
|
||||||
import os
|
import os
|
||||||
from uuid import UUID
|
|
||||||
import aiohttp
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
import logging
|
import logging
|
||||||
import sentry_sdk
|
import sentry_sdk
|
||||||
from typing import List, Union, Optional, Literal
|
from fastapi import FastAPI
|
||||||
from typing_extensions import Annotated
|
from fastapi.responses import JSONResponse, Response
|
||||||
from fastapi import FastAPI, HTTPException, Form, UploadFile, Query, Depends
|
|
||||||
from fastapi.responses import JSONResponse, FileResponse, Response
|
|
||||||
from fastapi.middleware.cors import CORSMiddleware
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
from pydantic import BaseModel
|
|
||||||
|
|
||||||
from cognee.api.DTO import InDTO, OutDTO
|
|
||||||
from cognee.api.v1.search import SearchType
|
|
||||||
from cognee.modules.users.models import User
|
|
||||||
from cognee.modules.users.methods import get_authenticated_user
|
|
||||||
from cognee.modules.pipelines.models import PipelineRunStatus
|
|
||||||
|
|
||||||
|
|
||||||
# Set up logging
|
# Set up logging
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
|
|
@ -65,9 +52,12 @@ app.add_middleware(
|
||||||
|
|
||||||
from cognee.api.v1.users.routers import get_auth_router, get_register_router,\
|
from cognee.api.v1.users.routers import get_auth_router, get_register_router,\
|
||||||
get_reset_password_router, get_verify_router, get_users_router
|
get_reset_password_router, get_verify_router, get_users_router
|
||||||
|
from cognee.api.v1.permissions.routers import get_permissions_router
|
||||||
from cognee.api.v1.permissions.get_permissions_router import get_permissions_router
|
from cognee.api.v1.settings.routers import get_settings_router
|
||||||
|
from cognee.api.v1.datasets.routers import get_datasets_router
|
||||||
|
from cognee.api.v1.cognify.routers import get_cognify_router
|
||||||
|
from cognee.api.v1.search.routers import get_search_router
|
||||||
|
from cognee.api.v1.add.routers import get_add_router
|
||||||
|
|
||||||
from fastapi import Request
|
from fastapi import Request
|
||||||
from fastapi.encoders import jsonable_encoder
|
from fastapi.encoders import jsonable_encoder
|
||||||
|
|
@ -137,261 +127,35 @@ def health_check():
|
||||||
"""
|
"""
|
||||||
return Response(status_code = 200)
|
return Response(status_code = 200)
|
||||||
|
|
||||||
|
app.include_router(
|
||||||
|
get_datasets_router(),
|
||||||
|
prefix="/api/v1/datasets",
|
||||||
|
tags=["datasets"]
|
||||||
|
)
|
||||||
|
|
||||||
class ErrorResponseDTO(BaseModel):
|
app.include_router(
|
||||||
message: str
|
get_add_router(),
|
||||||
|
prefix="/api/v1/add",
|
||||||
|
tags=["add"]
|
||||||
|
)
|
||||||
|
|
||||||
|
app.include_router(
|
||||||
|
get_cognify_router(),
|
||||||
|
prefix="/api/v1/cognify",
|
||||||
|
tags=["cognify"]
|
||||||
|
)
|
||||||
|
|
||||||
class DatasetDTO(OutDTO):
|
app.include_router(
|
||||||
id: UUID
|
get_search_router(),
|
||||||
name: str
|
prefix="/api/v1/search",
|
||||||
created_at: datetime
|
tags=["search"]
|
||||||
updated_at: Optional[datetime]
|
)
|
||||||
owner_id: UUID
|
|
||||||
|
|
||||||
@app.get("/api/v1/datasets", response_model = list[DatasetDTO])
|
|
||||||
async def get_datasets(user: User = Depends(get_authenticated_user)):
|
|
||||||
try:
|
|
||||||
from cognee.modules.data.methods import get_datasets
|
|
||||||
datasets = await get_datasets(user.id)
|
|
||||||
|
|
||||||
return datasets
|
|
||||||
except Exception as error:
|
|
||||||
logger.error(f"Error retrieving datasets: {str(error)}")
|
|
||||||
raise HTTPException(status_code = 500, detail = f"Error retrieving datasets: {str(error)}") from error
|
|
||||||
|
|
||||||
|
|
||||||
@app.delete("/api/v1/datasets/{dataset_id}", response_model = None, responses = { 404: { "model": ErrorResponseDTO }})
|
|
||||||
async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
|
||||||
from cognee.modules.data.methods import get_dataset, delete_dataset
|
|
||||||
|
|
||||||
dataset = await get_dataset(user.id, dataset_id)
|
|
||||||
|
|
||||||
if dataset is None:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code = 404,
|
|
||||||
detail = f"Dataset ({dataset_id}) not found."
|
|
||||||
)
|
|
||||||
|
|
||||||
await delete_dataset(dataset)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/v1/datasets/{dataset_id}/graph", response_model = str)
|
|
||||||
async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
|
||||||
from cognee.shared.utils import render_graph
|
|
||||||
from cognee.infrastructure.databases.graph import get_graph_engine
|
|
||||||
|
|
||||||
try:
|
|
||||||
graph_client = await get_graph_engine()
|
|
||||||
graph_url = await render_graph(graph_client.graph)
|
|
||||||
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 200,
|
|
||||||
content = str(graph_url),
|
|
||||||
)
|
|
||||||
except:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 409,
|
|
||||||
content = "Graphistry credentials are not set. Please set them in your .env file.",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class DataDTO(OutDTO):
|
|
||||||
id: UUID
|
|
||||||
name: str
|
|
||||||
created_at: datetime
|
|
||||||
updated_at: Optional[datetime]
|
|
||||||
extension: str
|
|
||||||
mime_type: str
|
|
||||||
raw_data_location: str
|
|
||||||
|
|
||||||
@app.get("/api/v1/datasets/{dataset_id}/data", response_model = list[DataDTO], responses = { 404: { "model": ErrorResponseDTO }})
|
|
||||||
async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
|
||||||
from cognee.modules.data.methods import get_dataset_data, get_dataset
|
|
||||||
|
|
||||||
dataset = await get_dataset(user.id, dataset_id)
|
|
||||||
|
|
||||||
if dataset is None:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 404,
|
|
||||||
content = ErrorResponseDTO(f"Dataset ({dataset_id}) not found."),
|
|
||||||
)
|
|
||||||
|
|
||||||
dataset_data = await get_dataset_data(dataset_id = dataset.id)
|
|
||||||
|
|
||||||
if dataset_data is None:
|
|
||||||
return []
|
|
||||||
|
|
||||||
return dataset_data
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/v1/datasets/status", response_model = dict[str, PipelineRunStatus])
|
|
||||||
async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None, user: User = Depends(get_authenticated_user)):
|
|
||||||
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
|
|
||||||
|
|
||||||
try:
|
|
||||||
datasets_statuses = await cognee_datasets.get_status(datasets)
|
|
||||||
|
|
||||||
return datasets_statuses
|
|
||||||
except Exception as error:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 409,
|
|
||||||
content = {"error": str(error)}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/v1/datasets/{dataset_id}/data/{data_id}/raw", response_class = FileResponse)
|
|
||||||
async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
|
|
||||||
from cognee.modules.data.methods import get_dataset, get_dataset_data
|
|
||||||
|
|
||||||
dataset = await get_dataset(user.id, dataset_id)
|
|
||||||
|
|
||||||
if dataset is None:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 404,
|
|
||||||
content = {
|
|
||||||
"detail": f"Dataset ({dataset_id}) not found."
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
dataset_data = await get_dataset_data(dataset.id)
|
|
||||||
|
|
||||||
if dataset_data is None:
|
|
||||||
raise HTTPException(status_code = 404, detail = f"Dataset ({dataset_id}) not found.")
|
|
||||||
|
|
||||||
data = [data for data in dataset_data if str(data.id) == data_id][0]
|
|
||||||
|
|
||||||
if data is None:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 404,
|
|
||||||
content = {
|
|
||||||
"detail": f"Data ({data_id}) not found in dataset ({dataset_id})."
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return data.raw_data_location
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/v1/add", response_model = None)
|
|
||||||
async def add(
|
|
||||||
data: List[UploadFile],
|
|
||||||
datasetId: str = Form(...),
|
|
||||||
user: User = Depends(get_authenticated_user),
|
|
||||||
):
|
|
||||||
""" This endpoint is responsible for adding data to the graph."""
|
|
||||||
from cognee.api.v1.add import add as cognee_add
|
|
||||||
try:
|
|
||||||
if isinstance(data, str) and data.startswith("http"):
|
|
||||||
if "github" in data:
|
|
||||||
# Perform git clone if the URL is from GitHub
|
|
||||||
repo_name = data.split("/")[-1].replace(".git", "")
|
|
||||||
os.system(f"git clone {data} .data/{repo_name}")
|
|
||||||
await cognee_add(
|
|
||||||
"data://.data/",
|
|
||||||
f"{repo_name}",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Fetch and store the data from other types of URL using curl
|
|
||||||
async with aiohttp.ClientSession() as session:
|
|
||||||
async with session.get(data) as resp:
|
|
||||||
if resp.status == 200:
|
|
||||||
file_data = await resp.read()
|
|
||||||
with open(f".data/{data.split('/')[-1]}", "wb") as f:
|
|
||||||
f.write(file_data)
|
|
||||||
await cognee_add(
|
|
||||||
"data://.data/",
|
|
||||||
f"{data.split('/')[-1]}",
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await cognee_add(
|
|
||||||
data,
|
|
||||||
datasetId,
|
|
||||||
user = user,
|
|
||||||
)
|
|
||||||
except Exception as error:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 409,
|
|
||||||
content = {"error": str(error)}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class CognifyPayloadDTO(BaseModel):
|
|
||||||
datasets: List[str]
|
|
||||||
|
|
||||||
@app.post("/api/v1/cognify", response_model = None)
|
|
||||||
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
|
||||||
""" This endpoint is responsible for the cognitive processing of the content."""
|
|
||||||
from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify
|
|
||||||
try:
|
|
||||||
await cognee_cognify(payload.datasets, user)
|
|
||||||
except Exception as error:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 409,
|
|
||||||
content = {"error": str(error)}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class SearchPayloadDTO(InDTO):
|
|
||||||
search_type: SearchType
|
|
||||||
query: str
|
|
||||||
|
|
||||||
@app.post("/api/v1/search", response_model = list)
|
|
||||||
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
|
|
||||||
""" This endpoint is responsible for searching for nodes in the graph."""
|
|
||||||
from cognee.api.v1.search import search as cognee_search
|
|
||||||
|
|
||||||
try:
|
|
||||||
results = await cognee_search(payload.search_type, payload.query, user)
|
|
||||||
|
|
||||||
return results
|
|
||||||
except Exception as error:
|
|
||||||
return JSONResponse(
|
|
||||||
status_code = 409,
|
|
||||||
content = {"error": str(error)}
|
|
||||||
)
|
|
||||||
|
|
||||||
from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig
|
|
||||||
|
|
||||||
class LLMConfigDTO(OutDTO, LLMConfig):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class VectorDBConfigDTO(OutDTO, VectorDBConfig):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class SettingsDTO(OutDTO):
|
|
||||||
llm: LLMConfigDTO
|
|
||||||
vector_db: VectorDBConfigDTO
|
|
||||||
|
|
||||||
@app.get("/api/v1/settings", response_model = SettingsDTO)
|
|
||||||
async def get_settings(user: User = Depends(get_authenticated_user)):
|
|
||||||
from cognee.modules.settings import get_settings as get_cognee_settings
|
|
||||||
return get_cognee_settings()
|
|
||||||
|
|
||||||
|
|
||||||
class LLMConfigDTO(InDTO):
|
|
||||||
provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
|
|
||||||
model: str
|
|
||||||
api_key: str
|
|
||||||
|
|
||||||
class VectorDBConfigDTO(InDTO):
|
|
||||||
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
|
|
||||||
url: str
|
|
||||||
api_key: str
|
|
||||||
|
|
||||||
class SettingsPayloadDTO(InDTO):
|
|
||||||
llm: Optional[LLMConfigDTO] = None
|
|
||||||
vector_db: Optional[VectorDBConfigDTO] = None
|
|
||||||
|
|
||||||
@app.post("/api/v1/settings", response_model = None)
|
|
||||||
async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
|
|
||||||
from cognee.modules.settings import save_llm_config, save_vector_db_config
|
|
||||||
|
|
||||||
if new_settings.llm is not None:
|
|
||||||
await save_llm_config(new_settings.llm)
|
|
||||||
|
|
||||||
if new_settings.vector_db is not None:
|
|
||||||
await save_vector_db_config(new_settings.vector_db)
|
|
||||||
|
|
||||||
|
app.include_router(
|
||||||
|
get_settings_router(),
|
||||||
|
prefix="/api/v1/settings",
|
||||||
|
tags=["settings"]
|
||||||
|
)
|
||||||
|
|
||||||
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
def start_api_server(host: str = "0.0.0.0", port: int = 8000):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
1
cognee/api/v1/add/routers/__init__.py
Normal file
1
cognee/api/v1/add/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_add_router import get_add_router
|
||||||
60
cognee/api/v1/add/routers/get_add_router.py
Normal file
60
cognee/api/v1/add/routers/get_add_router.py
Normal file
|
|
@ -0,0 +1,60 @@
|
||||||
|
from fastapi import Form, UploadFile, Depends
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from typing import List
|
||||||
|
import aiohttp
|
||||||
|
import subprocess
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def get_add_router() -> APIRouter:
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.post("/", response_model=None)
|
||||||
|
async def add(
|
||||||
|
data: List[UploadFile],
|
||||||
|
datasetId: str = Form(...),
|
||||||
|
user: User = Depends(get_authenticated_user),
|
||||||
|
):
|
||||||
|
""" This endpoint is responsible for adding data to the graph."""
|
||||||
|
from cognee.api.v1.add import add as cognee_add
|
||||||
|
try:
|
||||||
|
if isinstance(data, str) and data.startswith("http"):
|
||||||
|
if "github" in data:
|
||||||
|
# Perform git clone if the URL is from GitHub
|
||||||
|
repo_name = data.split("/")[-1].replace(".git", "")
|
||||||
|
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
|
||||||
|
await cognee_add(
|
||||||
|
"data://.data/",
|
||||||
|
f"{repo_name}",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Fetch and store the data from other types of URL using curl
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(data) as resp:
|
||||||
|
if resp.status == 200:
|
||||||
|
file_data = await resp.read()
|
||||||
|
filename = os.path.basename(data)
|
||||||
|
with open(f".data/{filename}", "wb") as f:
|
||||||
|
f.write(file_data)
|
||||||
|
await cognee_add(
|
||||||
|
"data://.data/",
|
||||||
|
f"{data.split('/')[-1]}",
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await cognee_add(
|
||||||
|
data,
|
||||||
|
datasetId,
|
||||||
|
user=user,
|
||||||
|
)
|
||||||
|
except Exception as error:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=409,
|
||||||
|
content={"error": str(error)}
|
||||||
|
)
|
||||||
|
|
||||||
|
return router
|
||||||
1
cognee/api/v1/cognify/routers/__init__.py
Normal file
1
cognee/api/v1/cognify/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_cognify_router import get_cognify_router
|
||||||
27
cognee/api/v1/cognify/routers/get_cognify_router.py
Normal file
27
cognee/api/v1/cognify/routers/get_cognify_router.py
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from typing import List
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
from fastapi import Depends
|
||||||
|
|
||||||
|
class CognifyPayloadDTO(BaseModel):
|
||||||
|
datasets: List[str]
|
||||||
|
|
||||||
|
def get_cognify_router() -> APIRouter:
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.post("/", response_model=None)
|
||||||
|
async def cognify(payload: CognifyPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||||
|
""" This endpoint is responsible for the cognitive processing of the content."""
|
||||||
|
from cognee.api.v1.cognify.cognify_v2 import cognify as cognee_cognify
|
||||||
|
try:
|
||||||
|
await cognee_cognify(payload.datasets, user)
|
||||||
|
except Exception as error:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=409,
|
||||||
|
content={"error": str(error)}
|
||||||
|
)
|
||||||
|
|
||||||
|
return router
|
||||||
|
|
@ -5,6 +5,7 @@ from cognee.modules.cognify.config import get_cognify_config
|
||||||
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
from cognee.infrastructure.data.chunking.config import get_chunk_config
|
||||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
|
from cognee.infrastructure.llm.config import get_llm_config
|
||||||
from cognee.infrastructure.databases.relational import get_relational_config
|
from cognee.infrastructure.databases.relational import get_relational_config
|
||||||
from cognee.infrastructure.files.storage import LocalStorage
|
from cognee.infrastructure.files.storage import LocalStorage
|
||||||
|
|
||||||
|
|
@ -55,19 +56,36 @@ class config():
|
||||||
graph_config.graph_database_provider = graph_database_provider
|
graph_config.graph_database_provider = graph_database_provider
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def llm_provider(llm_provider: str):
|
def set_llm_provider(llm_provider: str):
|
||||||
graph_config = get_graph_config()
|
llm_config = get_llm_config()
|
||||||
graph_config.llm_provider = llm_provider
|
llm_config.llm_provider = llm_provider
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def llm_endpoint(llm_endpoint: str):
|
def set_llm_endpoint(llm_endpoint: str):
|
||||||
graph_config = get_graph_config()
|
llm_config = get_llm_config()
|
||||||
graph_config.llm_endpoint = llm_endpoint
|
llm_config.llm_endpoint = llm_endpoint
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def llm_model(llm_model: str):
|
def set_llm_model(llm_model: str):
|
||||||
graph_config = get_graph_config()
|
llm_config = get_llm_config()
|
||||||
graph_config.llm_model = llm_model
|
llm_config.llm_model = llm_model
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_llm_api_key(llm_api_key: str):
|
||||||
|
llm_config = get_llm_config()
|
||||||
|
llm_config.llm_api_key = llm_api_key
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def set_llm_config(config_dict: dict):
|
||||||
|
"""
|
||||||
|
Updates the llm config with values from config_dict.
|
||||||
|
"""
|
||||||
|
llm_config = get_llm_config()
|
||||||
|
for key, value in config_dict.items():
|
||||||
|
if hasattr(llm_config, key):
|
||||||
|
object.__setattr__(llm_config, key, value)
|
||||||
|
else:
|
||||||
|
raise AttributeError(f"'{key}' is not a valid attribute of the config.")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def set_chunk_strategy(chunk_strategy: object):
|
def set_chunk_strategy(chunk_strategy: object):
|
||||||
|
|
@ -137,5 +155,5 @@ class config():
|
||||||
if "username" not in graphistry_config or "password" not in graphistry_config:
|
if "username" not in graphistry_config or "password" not in graphistry_config:
|
||||||
raise ValueError("graphistry_config dictionary must contain 'username' and 'password' keys.")
|
raise ValueError("graphistry_config dictionary must contain 'username' and 'password' keys.")
|
||||||
|
|
||||||
base_config.graphistry_username = graphistry_config.username
|
base_config.graphistry_username = graphistry_config.get("username")
|
||||||
base_config.graphistry_password = graphistry_config.password
|
base_config.graphistry_password = graphistry_config.get("password")
|
||||||
|
|
|
||||||
1
cognee/api/v1/datasets/routers/__init__.py
Normal file
1
cognee/api/v1/datasets/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_datasets_router import get_datasets_router
|
||||||
178
cognee/api/v1/datasets/routers/get_datasets_router.py
Normal file
178
cognee/api/v1/datasets/routers/get_datasets_router.py
Normal file
|
|
@ -0,0 +1,178 @@
|
||||||
|
import logging
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from datetime import datetime
|
||||||
|
from uuid import UUID
|
||||||
|
from typing import List, Optional
|
||||||
|
from typing_extensions import Annotated
|
||||||
|
from fastapi import HTTPException, Query, Depends
|
||||||
|
from fastapi.responses import JSONResponse, FileResponse
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from cognee.api.DTO import OutDTO
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
from cognee.modules.pipelines.models import PipelineRunStatus
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ErrorResponseDTO(BaseModel):
|
||||||
|
message: str
|
||||||
|
|
||||||
|
class DatasetDTO(OutDTO):
|
||||||
|
id: UUID
|
||||||
|
name: str
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: Optional[datetime] = None
|
||||||
|
owner_id: UUID
|
||||||
|
|
||||||
|
class DataDTO(OutDTO):
|
||||||
|
id: UUID
|
||||||
|
name: str
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: Optional[datetime] = None
|
||||||
|
extension: str
|
||||||
|
mime_type: str
|
||||||
|
raw_data_location: str
|
||||||
|
|
||||||
|
def get_datasets_router() -> APIRouter:
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.get("/", response_model=list[DatasetDTO])
|
||||||
|
async def get_datasets(user: User = Depends(get_authenticated_user)):
|
||||||
|
try:
|
||||||
|
from cognee.modules.data.methods import get_datasets
|
||||||
|
datasets = await get_datasets(user.id)
|
||||||
|
|
||||||
|
return datasets
|
||||||
|
except Exception as error:
|
||||||
|
logger.error(f"Error retrieving datasets: {str(error)}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Error retrieving datasets: {str(error)}") from error
|
||||||
|
|
||||||
|
@router.delete("/{dataset_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}})
|
||||||
|
async def delete_dataset(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.modules.data.methods import get_dataset, delete_dataset
|
||||||
|
|
||||||
|
dataset = await get_dataset(user.id, dataset_id)
|
||||||
|
|
||||||
|
if dataset is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Dataset ({dataset_id}) not found."
|
||||||
|
)
|
||||||
|
|
||||||
|
await delete_dataset(dataset)
|
||||||
|
|
||||||
|
@router.delete("/{dataset_id}/data/{data_id}", response_model=None, responses={404: {"model": ErrorResponseDTO}})
|
||||||
|
async def delete_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.modules.data.methods import get_data, delete_data
|
||||||
|
from cognee.modules.data.methods import get_dataset
|
||||||
|
|
||||||
|
# Check if user has permission to access dataset and data by trying to get the dataset
|
||||||
|
dataset = await get_dataset(user.id, dataset_id)
|
||||||
|
|
||||||
|
#TODO: Handle situation differently if user doesn't have permission to access data?
|
||||||
|
if dataset is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Dataset ({dataset_id}) not found."
|
||||||
|
)
|
||||||
|
|
||||||
|
data = await get_data(data_id)
|
||||||
|
|
||||||
|
if data is None:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=404,
|
||||||
|
detail=f"Dataset ({data_id}) not found."
|
||||||
|
)
|
||||||
|
|
||||||
|
await delete_data(data)
|
||||||
|
|
||||||
|
@router.get("/{dataset_id}/graph", response_model=str)
|
||||||
|
async def get_dataset_graph(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.shared.utils import render_graph
|
||||||
|
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||||
|
|
||||||
|
try:
|
||||||
|
graph_client = await get_graph_engine()
|
||||||
|
graph_url = await render_graph(graph_client.graph)
|
||||||
|
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=200,
|
||||||
|
content=str(graph_url),
|
||||||
|
)
|
||||||
|
except:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=409,
|
||||||
|
content="Graphistry credentials are not set. Please set them in your .env file.",
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.get("/{dataset_id}/data", response_model=list[DataDTO],
|
||||||
|
responses={404: {"model": ErrorResponseDTO}})
|
||||||
|
async def get_dataset_data(dataset_id: str, user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.modules.data.methods import get_dataset_data, get_dataset
|
||||||
|
|
||||||
|
dataset = await get_dataset(user.id, dataset_id)
|
||||||
|
|
||||||
|
if dataset is None:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=404,
|
||||||
|
content=ErrorResponseDTO(f"Dataset ({dataset_id}) not found."),
|
||||||
|
)
|
||||||
|
|
||||||
|
dataset_data = await get_dataset_data(dataset_id=dataset.id)
|
||||||
|
|
||||||
|
if dataset_data is None:
|
||||||
|
return []
|
||||||
|
|
||||||
|
return dataset_data
|
||||||
|
|
||||||
|
@router.get("/status", response_model=dict[str, PipelineRunStatus])
|
||||||
|
async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None,
|
||||||
|
user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
|
||||||
|
|
||||||
|
try:
|
||||||
|
datasets_statuses = await cognee_datasets.get_status(datasets)
|
||||||
|
|
||||||
|
return datasets_statuses
|
||||||
|
except Exception as error:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=409,
|
||||||
|
content={"error": str(error)}
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.get("/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
|
||||||
|
async def get_raw_data(dataset_id: str, data_id: str, user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.modules.data.methods import get_dataset, get_dataset_data
|
||||||
|
|
||||||
|
dataset = await get_dataset(user.id, dataset_id)
|
||||||
|
|
||||||
|
if dataset is None:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=404,
|
||||||
|
content={
|
||||||
|
"detail": f"Dataset ({dataset_id}) not found."
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
dataset_data = await get_dataset_data(dataset.id)
|
||||||
|
|
||||||
|
if dataset_data is None:
|
||||||
|
raise HTTPException(status_code=404, detail=f"No data found in dataset ({dataset_id}).")
|
||||||
|
|
||||||
|
matching_data = [data for data in dataset_data if str(data.id) == data_id]
|
||||||
|
|
||||||
|
# Check if matching_data contains an element
|
||||||
|
if len(matching_data) == 0:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code=404,
|
||||||
|
content={
|
||||||
|
"detail": f"Data ({data_id}) not found in dataset ({dataset_id})."
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
data = matching_data[0]
|
||||||
|
|
||||||
|
return data.raw_data_location
|
||||||
|
|
||||||
|
return router
|
||||||
1
cognee/api/v1/permissions/routers/__init__.py
Normal file
1
cognee/api/v1/permissions/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_permissions_router import get_permissions_router
|
||||||
1
cognee/api/v1/search/routers/__init__.py
Normal file
1
cognee/api/v1/search/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_search_router import get_search_router
|
||||||
31
cognee/api/v1/search/routers/get_search_router.py
Normal file
31
cognee/api/v1/search/routers/get_search_router.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
from cognee.api.v1.search import SearchType
|
||||||
|
from fastapi.responses import JSONResponse
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from fastapi import Depends, APIRouter
|
||||||
|
from cognee.api.DTO import InDTO
|
||||||
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
|
||||||
|
|
||||||
|
class SearchPayloadDTO(InDTO):
|
||||||
|
search_type: SearchType
|
||||||
|
query: str
|
||||||
|
|
||||||
|
def get_search_router() -> APIRouter:
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.post("/", response_model = list)
|
||||||
|
async def search(payload: SearchPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||||
|
""" This endpoint is responsible for searching for nodes in the graph."""
|
||||||
|
from cognee.api.v1.search import search as cognee_search
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = await cognee_search(payload.search_type, payload.query, user)
|
||||||
|
|
||||||
|
return results
|
||||||
|
except Exception as error:
|
||||||
|
return JSONResponse(
|
||||||
|
status_code = 409,
|
||||||
|
content = {"error": str(error)}
|
||||||
|
)
|
||||||
|
|
||||||
|
return router
|
||||||
1
cognee/api/v1/settings/routers/__init__.py
Normal file
1
cognee/api/v1/settings/routers/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_settings_router import get_settings_router
|
||||||
51
cognee/api/v1/settings/routers/get_settings_router.py
Normal file
51
cognee/api/v1/settings/routers/get_settings_router.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
from fastapi import APIRouter
|
||||||
|
from cognee.api.DTO import InDTO, OutDTO
|
||||||
|
from typing import Union, Optional, Literal
|
||||||
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
from fastapi import Depends
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from cognee.modules.settings.get_settings import LLMConfig, VectorDBConfig
|
||||||
|
|
||||||
|
class LLMConfigOutputDTO(OutDTO, LLMConfig):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class VectorDBConfigOutputDTO(OutDTO, VectorDBConfig):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class SettingsDTO(OutDTO):
|
||||||
|
llm: LLMConfigOutputDTO
|
||||||
|
vector_db: VectorDBConfigOutputDTO
|
||||||
|
|
||||||
|
class LLMConfigInputDTO(InDTO):
|
||||||
|
provider: Union[Literal["openai"], Literal["ollama"], Literal["anthropic"]]
|
||||||
|
model: str
|
||||||
|
api_key: str
|
||||||
|
|
||||||
|
class VectorDBConfigInputDTO(InDTO):
|
||||||
|
provider: Union[Literal["lancedb"], Literal["qdrant"], Literal["weaviate"], Literal["pgvector"]]
|
||||||
|
url: str
|
||||||
|
api_key: str
|
||||||
|
|
||||||
|
class SettingsPayloadDTO(InDTO):
|
||||||
|
llm: Optional[LLMConfigInputDTO] = None
|
||||||
|
vector_db: Optional[VectorDBConfigInputDTO] = None
|
||||||
|
|
||||||
|
def get_settings_router() -> APIRouter:
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.get("/", response_model=SettingsDTO)
|
||||||
|
async def get_settings(user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.modules.settings import get_settings as get_cognee_settings
|
||||||
|
return get_cognee_settings()
|
||||||
|
|
||||||
|
@router.post("/", response_model=None)
|
||||||
|
async def save_settings(new_settings: SettingsPayloadDTO, user: User = Depends(get_authenticated_user)):
|
||||||
|
from cognee.modules.settings import save_llm_config, save_vector_db_config
|
||||||
|
|
||||||
|
if new_settings.llm is not None:
|
||||||
|
await save_llm_config(new_settings.llm)
|
||||||
|
|
||||||
|
if new_settings.vector_db is not None:
|
||||||
|
await save_vector_db_config(new_settings.vector_db)
|
||||||
|
|
||||||
|
return router
|
||||||
|
|
@ -89,10 +89,22 @@ class SQLAlchemyAdapter():
|
||||||
"""
|
"""
|
||||||
Delete data in given table based on id. Table must have an id Column.
|
Delete data in given table based on id. Table must have an id Column.
|
||||||
"""
|
"""
|
||||||
async with self.get_async_session() as session:
|
if self.engine.dialect.name == "sqlite":
|
||||||
TableModel = await self.get_table(table_name, schema_name)
|
async with self.get_async_session() as session:
|
||||||
await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
|
TableModel = await self.get_table(table_name, schema_name)
|
||||||
await session.commit()
|
|
||||||
|
# Foreign key constraints are disabled by default in SQLite (for backwards compatibility),
|
||||||
|
# so must be enabled for each database connection/session separately.
|
||||||
|
await session.execute(text("PRAGMA foreign_keys = ON;"))
|
||||||
|
|
||||||
|
await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
|
||||||
|
await session.commit()
|
||||||
|
else:
|
||||||
|
async with self.get_async_session() as session:
|
||||||
|
TableModel = await self.get_table(table_name, schema_name)
|
||||||
|
await session.execute(TableModel.delete().where(TableModel.c.id == data_id))
|
||||||
|
await session.commit()
|
||||||
|
|
||||||
|
|
||||||
async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table:
|
async def get_table(self, table_name: str, schema_name: Optional[str] = "public") -> Table:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -1,25 +0,0 @@
|
||||||
from typing import List, Optional
|
|
||||||
from fastembed import TextEmbedding
|
|
||||||
from cognee.root_dir import get_absolute_path
|
|
||||||
from cognee.infrastructure.databases.vector.embeddings.EmbeddingEngine import EmbeddingEngine
|
|
||||||
|
|
||||||
class FastembedEmbeddingEngine(EmbeddingEngine):
|
|
||||||
embedding_model: str
|
|
||||||
embedding_dimensions: int
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
embedding_model: Optional[str] = "BAAI/bge-large-en-v1.5",
|
|
||||||
embedding_dimensions: Optional[int] = 1024,
|
|
||||||
):
|
|
||||||
self.embedding_model = embedding_model
|
|
||||||
self.embedding_dimensions = embedding_dimensions
|
|
||||||
|
|
||||||
async def embed_text(self, text: List[str]) -> List[float]:
|
|
||||||
embedding_model = TextEmbedding(model_name = self.embedding_model, cache_dir = get_absolute_path("cache/embeddings"))
|
|
||||||
embeddings_list = list(map(lambda embedding: embedding.tolist(), embedding_model.embed(text)))
|
|
||||||
|
|
||||||
return embeddings_list
|
|
||||||
|
|
||||||
def get_vector_size(self) -> int:
|
|
||||||
return self.embedding_dimensions
|
|
||||||
|
|
@ -193,7 +193,10 @@ class LanceDBAdapter(VectorDBInterface):
|
||||||
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
|
async def delete_data_points(self, collection_name: str, data_point_ids: list[str]):
|
||||||
connection = await self.get_connection()
|
connection = await self.get_connection()
|
||||||
collection = await connection.open_table(collection_name)
|
collection = await connection.open_table(collection_name)
|
||||||
results = await collection.delete(f"id IN {tuple(data_point_ids)}")
|
if len(data_point_ids) == 1:
|
||||||
|
results = await collection.delete(f"id = '{data_point_ids[0]}'")
|
||||||
|
else:
|
||||||
|
results = await collection.delete(f"id IN {tuple(data_point_ids)}")
|
||||||
return results
|
return results
|
||||||
|
|
||||||
async def create_vector_index(self, index_name: str, index_property_name: str):
|
async def create_vector_index(self, index_name: str, index_property_name: str):
|
||||||
|
|
|
||||||
|
|
@ -33,7 +33,6 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface):
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.embedding_engine = embedding_engine
|
self.embedding_engine = embedding_engine
|
||||||
self.db_uri: str = connection_string
|
self.db_uri: str = connection_string
|
||||||
|
|
||||||
self.engine = create_async_engine(self.db_uri)
|
self.engine = create_async_engine(self.db_uri)
|
||||||
self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
|
self.sessionmaker = async_sessionmaker(bind=self.engine, expire_on_commit=False)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
from typing import BinaryIO
|
from typing import BinaryIO
|
||||||
from pypdf import PdfReader
|
from pypdf import PdfReader
|
||||||
|
import filetype
|
||||||
|
|
||||||
def extract_text_from_file(file: BinaryIO, file_type) -> str:
|
def extract_text_from_file(file: BinaryIO, file_type: filetype.Type) -> str:
|
||||||
"""Extract text from a file"""
|
"""Extract text from a file"""
|
||||||
if file_type.extension == "pdf":
|
if file_type.extension == "pdf":
|
||||||
reader = PdfReader(stream = file)
|
reader = PdfReader(stream = file)
|
||||||
|
|
|
||||||
|
|
@ -1,5 +0,0 @@
|
||||||
import os
|
|
||||||
|
|
||||||
def get_file_size(file_path: str):
|
|
||||||
"""Get the size of a file"""
|
|
||||||
return os.path.getsize(file_path)
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import dsp
|
|
||||||
import dspy
|
import dspy
|
||||||
from dspy.evaluate.evaluate import Evaluate
|
from dspy.evaluate.evaluate import Evaluate
|
||||||
from dspy.primitives.example import Example
|
from dspy.primitives.example import Example
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
import dsp
|
|
||||||
import dspy
|
import dspy
|
||||||
from dspy.teleprompt import BootstrapFewShot
|
from dspy.teleprompt import BootstrapFewShot
|
||||||
from dspy.primitives.example import Example
|
from dspy.primitives.example import Example
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,8 @@ from .get_dataset import get_dataset
|
||||||
from .get_datasets import get_datasets
|
from .get_datasets import get_datasets
|
||||||
from .get_datasets_by_name import get_datasets_by_name
|
from .get_datasets_by_name import get_datasets_by_name
|
||||||
from .get_dataset_data import get_dataset_data
|
from .get_dataset_data import get_dataset_data
|
||||||
|
from .get_data import get_data
|
||||||
|
|
||||||
# Delete
|
# Delete
|
||||||
from .delete_dataset import delete_dataset
|
from .delete_dataset import delete_dataset
|
||||||
|
from .delete_data import delete_data
|
||||||
19
cognee/modules/data/methods/delete_data.py
Normal file
19
cognee/modules/data/methods/delete_data.py
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
from cognee.modules.data.models import Data
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
||||||
|
|
||||||
|
async def delete_data(data: Data):
|
||||||
|
"""Delete a data record from the database.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data (Data): The data object to be deleted.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the data object is invalid.
|
||||||
|
"""
|
||||||
|
if not hasattr(data, '__tablename__'):
|
||||||
|
raise ValueError("The provided data object is missing the required '__tablename__' attribute.")
|
||||||
|
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
return await db_engine.delete_data_by_id(data.__tablename__, data.id)
|
||||||
|
|
@ -4,4 +4,4 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
async def delete_dataset(dataset: Dataset):
|
async def delete_dataset(dataset: Dataset):
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
return await db_engine.delete_table(dataset.id)
|
return await db_engine.delete_data_by_id(dataset.__tablename__, dataset.id)
|
||||||
|
|
|
||||||
20
cognee/modules/data/methods/get_data.py
Normal file
20
cognee/modules/data/methods/get_data.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
from uuid import UUID
|
||||||
|
from typing import Optional
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from ..models import Data
|
||||||
|
|
||||||
|
async def get_data(data_id: UUID) -> Optional[Data]:
|
||||||
|
"""Retrieve data by ID.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data_id (UUID): ID of the data to retrieve
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Optional[Data]: The requested data object if found, None otherwise
|
||||||
|
"""
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
data = await session.get(Data, data_id)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
|
from typing import Optional
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from ..models import Dataset
|
from ..models import Dataset
|
||||||
|
|
||||||
async def get_dataset(user_id: UUID, dataset_id: UUID) -> Dataset:
|
async def get_dataset(user_id: UUID, dataset_id: UUID) -> Optional[Dataset]:
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
|
|
|
||||||
|
|
@ -20,9 +20,11 @@ class Data(Base):
|
||||||
updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
|
updated_at = Column(DateTime(timezone = True), onupdate = lambda: datetime.now(timezone.utc))
|
||||||
|
|
||||||
datasets: Mapped[List["Dataset"]] = relationship(
|
datasets: Mapped[List["Dataset"]] = relationship(
|
||||||
|
"Dataset",
|
||||||
secondary = DatasetData.__tablename__,
|
secondary = DatasetData.__tablename__,
|
||||||
back_populates = "data",
|
back_populates = "data",
|
||||||
lazy = "noload",
|
lazy = "noload",
|
||||||
|
cascade="all, delete"
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_json(self) -> dict:
|
def to_json(self) -> dict:
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,11 @@ class Dataset(Base):
|
||||||
owner_id = Column(UUID, index = True)
|
owner_id = Column(UUID, index = True)
|
||||||
|
|
||||||
data: Mapped[List["Data"]] = relationship(
|
data: Mapped[List["Data"]] = relationship(
|
||||||
|
"Data",
|
||||||
secondary = DatasetData.__tablename__,
|
secondary = DatasetData.__tablename__,
|
||||||
back_populates = "datasets",
|
back_populates = "datasets",
|
||||||
lazy = "noload",
|
lazy = "noload",
|
||||||
|
cascade="all, delete"
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_json(self) -> dict:
|
def to_json(self) -> dict:
|
||||||
|
|
|
||||||
|
|
@ -7,5 +7,5 @@ class DatasetData(Base):
|
||||||
|
|
||||||
created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
|
created_at = Column(DateTime(timezone = True), default = lambda: datetime.now(timezone.utc))
|
||||||
|
|
||||||
dataset_id = Column(UUID, ForeignKey("datasets.id"), primary_key = True)
|
dataset_id = Column(UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key = True)
|
||||||
data_id = Column(UUID, ForeignKey("data.id"), primary_key = True)
|
data_id = Column(UUID, ForeignKey("data.id", ondelete="CASCADE"), primary_key = True)
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,7 @@ from .models.Task import Task
|
||||||
|
|
||||||
class PipelineConfig(BaseModel):
|
class PipelineConfig(BaseModel):
|
||||||
batch_count: int = 10
|
batch_count: int = 10
|
||||||
description: Optional[str]
|
description: Optional[str] = None
|
||||||
|
|
||||||
class Pipeline():
|
class Pipeline():
|
||||||
id: UUID = uuid4()
|
id: UUID = uuid4()
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
from typing import Any, Callable, Generator
|
from typing import Any, Callable, Generator, List
|
||||||
import asyncio
|
import asyncio
|
||||||
from ..tasks.Task import Task
|
from ..tasks.Task import Task
|
||||||
|
|
||||||
def run_tasks_parallel(tasks: [Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
|
def run_tasks_parallel(tasks: List[Task]) -> Callable[[Any], Generator[Any, Any, Any]]:
|
||||||
async def parallel_run(*args, **kwargs):
|
async def parallel_run(*args, **kwargs):
|
||||||
parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]
|
parallel_tasks = [asyncio.create_task(task.run(*args, **kwargs)) for task in tasks]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ class Directory(BaseModel):
|
||||||
directories: List['Directory'] = []
|
directories: List['Directory'] = []
|
||||||
|
|
||||||
# Allows recursive Directory Model
|
# Allows recursive Directory Model
|
||||||
Directory.update_forward_refs()
|
Directory.model_rebuild()
|
||||||
|
|
||||||
class RepositoryProperties(BaseModel):
|
class RepositoryProperties(BaseModel):
|
||||||
custom_properties: Optional[Dict[str, Any]] = None
|
custom_properties: Optional[Dict[str, Any]] = None
|
||||||
|
|
|
||||||
|
|
@ -6,15 +6,15 @@ class BaseClass(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
type: Literal["Class"] = "Class"
|
type: Literal["Class"] = "Class"
|
||||||
description: str
|
description: str
|
||||||
constructor_parameters: Optional[List[str]]
|
constructor_parameters: Optional[List[str]] = None
|
||||||
|
|
||||||
class Class(BaseModel):
|
class Class(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
name: str
|
name: str
|
||||||
type: Literal["Class"] = "Class"
|
type: Literal["Class"] = "Class"
|
||||||
description: str
|
description: str
|
||||||
constructor_parameters: Optional[List[str]]
|
constructor_parameters: Optional[List[str]] = None
|
||||||
from_class: Optional[BaseClass]
|
from_class: Optional[BaseClass] = None
|
||||||
|
|
||||||
class ClassInstance(BaseModel):
|
class ClassInstance(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
|
|
@ -28,7 +28,7 @@ class Function(BaseModel):
|
||||||
name: str
|
name: str
|
||||||
type: Literal["Function"] = "Function"
|
type: Literal["Function"] = "Function"
|
||||||
description: str
|
description: str
|
||||||
parameters: Optional[List[str]]
|
parameters: Optional[List[str]] = None
|
||||||
return_type: str
|
return_type: str
|
||||||
is_static: Optional[bool] = False
|
is_static: Optional[bool] = False
|
||||||
|
|
||||||
|
|
@ -38,7 +38,7 @@ class Variable(BaseModel):
|
||||||
type: Literal["Variable"] = "Variable"
|
type: Literal["Variable"] = "Variable"
|
||||||
description: str
|
description: str
|
||||||
is_static: Optional[bool] = False
|
is_static: Optional[bool] = False
|
||||||
default_value: Optional[str]
|
default_value: Optional[str] = None
|
||||||
|
|
||||||
class Operator(BaseModel):
|
class Operator(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,6 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
||||||
for chunk_index, chunk in enumerate(data_chunks):
|
for chunk_index, chunk in enumerate(data_chunks):
|
||||||
chunk_classification = chunk_classifications[chunk_index]
|
chunk_classification = chunk_classifications[chunk_index]
|
||||||
classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
|
classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
|
||||||
classification_data_points.append(uuid5(NAMESPACE_OID, chunk_classification.label.type))
|
|
||||||
|
|
||||||
for classification_subclass in chunk_classification.label.subclass:
|
for classification_subclass in chunk_classification.label.subclass:
|
||||||
classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
|
classification_data_points.append(uuid5(NAMESPACE_OID, classification_subclass.value))
|
||||||
|
|
@ -39,7 +38,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
||||||
if await vector_engine.has_collection(collection_name):
|
if await vector_engine.has_collection(collection_name):
|
||||||
existing_data_points = await vector_engine.retrieve(
|
existing_data_points = await vector_engine.retrieve(
|
||||||
collection_name,
|
collection_name,
|
||||||
list(set(classification_data_points)),
|
[str(classification_data) for classification_data in list(set(classification_data_points))],
|
||||||
) if len(classification_data_points) > 0 else []
|
) if len(classification_data_points) > 0 else []
|
||||||
|
|
||||||
existing_points_map = {point.id: True for point in existing_data_points}
|
existing_points_map = {point.id: True for point in existing_data_points}
|
||||||
|
|
@ -60,7 +59,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
||||||
data_points.append(
|
data_points.append(
|
||||||
DataPoint[Keyword](
|
DataPoint[Keyword](
|
||||||
id=str(classification_type_id),
|
id=str(classification_type_id),
|
||||||
payload=Keyword.parse_obj({
|
payload=Keyword.model_validate({
|
||||||
"uuid": str(classification_type_id),
|
"uuid": str(classification_type_id),
|
||||||
"text": classification_type_label,
|
"text": classification_type_label,
|
||||||
"chunk_id": str(data_chunk.chunk_id),
|
"chunk_id": str(data_chunk.chunk_id),
|
||||||
|
|
@ -99,7 +98,7 @@ async def chunk_naive_llm_classifier(data_chunks: list[DocumentChunk], classific
|
||||||
data_points.append(
|
data_points.append(
|
||||||
DataPoint[Keyword](
|
DataPoint[Keyword](
|
||||||
id=str(classification_subtype_id),
|
id=str(classification_subtype_id),
|
||||||
payload=Keyword.parse_obj({
|
payload=Keyword.model_validate({
|
||||||
"uuid": str(classification_subtype_id),
|
"uuid": str(classification_subtype_id),
|
||||||
"text": classification_subtype_label,
|
"text": classification_subtype_label,
|
||||||
"chunk_id": str(data_chunk.chunk_id),
|
"chunk_id": str(data_chunk.chunk_id),
|
||||||
|
|
|
||||||
|
|
@ -56,7 +56,7 @@ class OntologyEngine:
|
||||||
for item in items:
|
for item in items:
|
||||||
flat_list.extend(await self.recursive_flatten(item, parent_id))
|
flat_list.extend(await self.recursive_flatten(item, parent_id))
|
||||||
elif isinstance(items, dict):
|
elif isinstance(items, dict):
|
||||||
model = NodeModel.parse_obj(items)
|
model = NodeModel.model_validate(items)
|
||||||
flat_list.append(await self.flatten_model(model, parent_id))
|
flat_list.append(await self.flatten_model(model, parent_id))
|
||||||
for child in model.children:
|
for child in model.children:
|
||||||
flat_list.extend(await self.recursive_flatten(child, model.node_id))
|
flat_list.extend(await self.recursive_flatten(child, model.node_id))
|
||||||
|
|
|
||||||
31
cognee/tasks/infer_data_ontology/models/models.py
Normal file
31
cognee/tasks/infer_data_ontology/models/models.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
from typing import Any, Dict, List, Optional, Union
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
class RelationshipModel(BaseModel):
|
||||||
|
type: str
|
||||||
|
source: str
|
||||||
|
target: str
|
||||||
|
|
||||||
|
class NodeModel(BaseModel):
|
||||||
|
node_id: str
|
||||||
|
name: str
|
||||||
|
default_relationship: Optional[RelationshipModel] = None
|
||||||
|
children: List[Union[Dict[str, Any], "NodeModel"]] = Field(default_factory=list)
|
||||||
|
|
||||||
|
NodeModel.model_rebuild()
|
||||||
|
|
||||||
|
|
||||||
|
class OntologyNode(BaseModel):
|
||||||
|
id: str = Field(..., description = "Unique identifier made from node name.")
|
||||||
|
name: str
|
||||||
|
description: str
|
||||||
|
|
||||||
|
class OntologyEdge(BaseModel):
|
||||||
|
id: str
|
||||||
|
source_id: str
|
||||||
|
target_id: str
|
||||||
|
relationship_type: str
|
||||||
|
|
||||||
|
class GraphOntology(BaseModel):
|
||||||
|
nodes: list[OntologyNode]
|
||||||
|
edges: list[OntologyEdge]
|
||||||
|
|
@ -49,7 +49,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ async def main():
|
||||||
|
|
||||||
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
search_results = await cognee.search(SearchType.SUMMARIES, query = random_node_name)
|
||||||
assert len(search_results) != 0, "Query related summaries don't exist."
|
assert len(search_results) != 0, "Query related summaries don't exist."
|
||||||
print("\n\Extracted summaries are:\n")
|
print("\nExtracted summaries are:\n")
|
||||||
for result in search_results:
|
for result in search_results:
|
||||||
print(f"{result}\n")
|
print(f"{result}\n")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -14,9 +14,11 @@ Check available configuration options:
|
||||||
from cognee.infrastructure.databases.vector import get_vectordb_config
|
from cognee.infrastructure.databases.vector import get_vectordb_config
|
||||||
from cognee.infrastructure.databases.graph.config import get_graph_config
|
from cognee.infrastructure.databases.graph.config import get_graph_config
|
||||||
from cognee.infrastructure.databases.relational import get_relational_config
|
from cognee.infrastructure.databases.relational import get_relational_config
|
||||||
|
from cognee.infrastructure.llm.config import get_llm_config
|
||||||
print(get_vectordb_config().to_dict())
|
print(get_vectordb_config().to_dict())
|
||||||
print(get_graph_config().to_dict())
|
print(get_graph_config().to_dict())
|
||||||
print(get_relational_config().to_dict())
|
print(get_relational_config().to_dict())
|
||||||
|
print(get_llm_config().to_dict())
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -29,8 +31,7 @@ GRAPH_DATABASE_PROVIDER = 'lancedb'
|
||||||
Otherwise, you can set the configuration yourself:
|
Otherwise, you can set the configuration yourself:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
cognee.config.set_llm_provider('ollama')
|
||||||
cognee.config.llm_provider = 'ollama'
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🚀 Getting Started with Local Models
|
## 🚀 Getting Started with Local Models
|
||||||
|
|
@ -52,15 +53,14 @@ LLM_PROVIDER = 'ollama'
|
||||||
Otherwise, you can set the configuration for the model:
|
Otherwise, you can set the configuration for the model:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cognee.config.llm_provider = 'ollama'
|
cognee.config.set_llm_provider('ollama')
|
||||||
|
|
||||||
```
|
```
|
||||||
You can also set the HOST and model name:
|
You can also set the HOST and model name:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
cognee.config.set_llm_endpoint("http://localhost:11434/v1")
|
||||||
cognee.config.llm_endpoint = "http://localhost:11434/v1"
|
cognee.config.set_llm_model("mistral:instruct")
|
||||||
cognee.config.llm_model = "mistral:instruct"
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -73,7 +73,7 @@ LLM_PROVIDER = 'custom'
|
||||||
Otherwise, you can set the configuration for the model:
|
Otherwise, you can set the configuration for the model:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cognee.config.llm_provider = 'custom'
|
cognee.config.set_llm_provider('custom')
|
||||||
|
|
||||||
```
|
```
|
||||||
You can also set the HOST and model name:
|
You can also set the HOST and model name:
|
||||||
|
|
|
||||||
39
examples/python/simple_example.py
Normal file
39
examples/python/simple_example.py
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
import cognee
|
||||||
|
import asyncio
|
||||||
|
from cognee.api.v1.search import SearchType
|
||||||
|
|
||||||
|
# Prerequisites:
|
||||||
|
# 1. Copy `.env.template` and rename it to `.env`.
|
||||||
|
# 2. Add your OpenAI API key to the `.env` file in the `LLM_API_KEY` field:
|
||||||
|
# LLM_API_KEY = "your_key_here"
|
||||||
|
# 3. (Optional) To minimize setup effort, set `VECTOR_DB_PROVIDER="lancedb"` in `.env".
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
# Create a clean slate for cognee -- reset data and system state
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
|
# cognee knowledge graph will be created based on this text
|
||||||
|
text = """
|
||||||
|
Natural language processing (NLP) is an interdisciplinary
|
||||||
|
subfield of computer science and information retrieval.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Add the text, and make it available for cognify
|
||||||
|
await cognee.add(text)
|
||||||
|
|
||||||
|
# Use LLMs and cognee to create knowledge graph
|
||||||
|
await cognee.cognify()
|
||||||
|
|
||||||
|
# Query cognee for insights on the added text
|
||||||
|
search_results = await cognee.search(
|
||||||
|
SearchType.INSIGHTS,
|
||||||
|
{'query': 'Tell me about NLP'}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Display search results
|
||||||
|
for result_text in search_results:
|
||||||
|
print(result_text)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
asyncio.run(main())
|
||||||
2664
poetry.lock
generated
2664
poetry.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -19,60 +19,57 @@ classifiers = [
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = ">=3.9.0,<3.12"
|
python = ">=3.9.0,<3.12"
|
||||||
openai = "1.27.0"
|
openai = "1.52.0"
|
||||||
pydantic = "2.8.2"
|
pydantic = "2.8.2"
|
||||||
python-dotenv = "1.0.1"
|
python-dotenv = "1.0.1"
|
||||||
fastapi = "^0.109.2"
|
fastapi = "^0.109.2"
|
||||||
uvicorn = "0.22.0"
|
uvicorn = "0.22.0"
|
||||||
|
requests = "2.32.3"
|
||||||
|
aiohttp = "3.10.10"
|
||||||
|
typing_extensions = "4.12.2"
|
||||||
|
dspy = "2.5.25"
|
||||||
|
nest_asyncio = "1.6.0"
|
||||||
|
numpy = "1.26.4"
|
||||||
|
datasets = "3.1.0"
|
||||||
|
falkordb = "1.0.9"
|
||||||
boto3 = "^1.26.125"
|
boto3 = "^1.26.125"
|
||||||
|
botocore="^1.35.54"
|
||||||
gunicorn = "^20.1.0"
|
gunicorn = "^20.1.0"
|
||||||
sqlalchemy = "2.0.35"
|
sqlalchemy = "2.0.35"
|
||||||
instructor = "1.3.5"
|
instructor = "1.6.3"
|
||||||
networkx = "^3.2.1"
|
networkx = "^3.2.1"
|
||||||
debugpy = "1.8.2"
|
|
||||||
pyarrow = "15.0.0"
|
|
||||||
pylint = "^3.0.3"
|
|
||||||
aiosqlite = "^0.20.0"
|
aiosqlite = "^0.20.0"
|
||||||
pandas = "2.0.3"
|
pandas = "2.0.3"
|
||||||
greenlet = "^3.0.3"
|
|
||||||
ruff = "^0.2.2"
|
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
nltk = "^3.8.1"
|
nltk = "^3.8.1"
|
||||||
dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
|
dlt = {extras = ["sqlalchemy"], version = "^1.2.0"}
|
||||||
overrides = "^7.7.0"
|
|
||||||
aiofiles = "^23.2.1"
|
aiofiles = "^23.2.1"
|
||||||
qdrant-client = "^1.9.0"
|
qdrant-client = "^1.9.0"
|
||||||
graphistry = "^0.33.5"
|
graphistry = "^0.33.5"
|
||||||
tenacity = "^8.2.3"
|
tenacity = "^9.0.0"
|
||||||
weaviate-client = "4.6.7"
|
weaviate-client = "4.6.7"
|
||||||
scikit-learn = "^1.5.0"
|
scikit-learn = "^1.5.0"
|
||||||
fastembed = "0.2.7"
|
|
||||||
pypdf = "^4.1.0"
|
pypdf = "^4.1.0"
|
||||||
neo4j = "^5.20.0"
|
neo4j = "^5.20.0"
|
||||||
jinja2 = "^3.1.3"
|
jinja2 = "^3.1.3"
|
||||||
matplotlib = "^3.8.3"
|
matplotlib = "^3.8.3"
|
||||||
structlog = "^24.1.0"
|
|
||||||
tiktoken = "0.7.0"
|
tiktoken = "0.7.0"
|
||||||
|
langchain_text_splitters = "0.3.2"
|
||||||
|
langsmith = "0.1.139"
|
||||||
|
langdetect = "1.0.9"
|
||||||
posthog = "^3.5.0"
|
posthog = "^3.5.0"
|
||||||
lancedb = "0.15.0"
|
lancedb = "0.15.0"
|
||||||
litellm = "1.38.10"
|
litellm = "1.49.1"
|
||||||
groq = "0.8.0"
|
groq = "0.8.0"
|
||||||
tantivy = "^0.22.0"
|
|
||||||
tokenizers ="0.15.2"
|
|
||||||
transformers ="4.39.0"
|
|
||||||
python-multipart = "^0.0.9"
|
|
||||||
langfuse = "^2.32.0"
|
langfuse = "^2.32.0"
|
||||||
protobuf = "<5.0.0"
|
|
||||||
pydantic-settings = "^2.2.1"
|
pydantic-settings = "^2.2.1"
|
||||||
anthropic = "^0.26.1"
|
anthropic = "^0.26.1"
|
||||||
pdfplumber = "^0.11.1"
|
|
||||||
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
sentry-sdk = {extras = ["fastapi"], version = "^2.9.0"}
|
||||||
fastapi-users = { version = "*", extras = ["sqlalchemy"] }
|
fastapi-users = {version = "*", extras = ["sqlalchemy"]}
|
||||||
asyncpg = "^0.29.0"
|
asyncpg = "^0.29.0"
|
||||||
alembic = "^1.13.3"
|
alembic = "^1.13.3"
|
||||||
pgvector = "^0.3.5"
|
pgvector = "^0.3.5"
|
||||||
psycopg2 = {version = "^2.9.10", optional = true}
|
psycopg2 = {version = "^2.9.10", optional = true}
|
||||||
falkordb = "^1.0.9"
|
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
filesystem = ["s3fs", "botocore"]
|
filesystem = ["s3fs", "botocore"]
|
||||||
|
|
@ -89,6 +86,11 @@ pytest-asyncio = "^0.21.1"
|
||||||
coverage = "^7.3.2"
|
coverage = "^7.3.2"
|
||||||
mypy = "^1.7.1"
|
mypy = "^1.7.1"
|
||||||
notebook = "^7.1.1"
|
notebook = "^7.1.1"
|
||||||
|
deptry = "^0.20.0"
|
||||||
|
debugpy = "1.8.2"
|
||||||
|
pylint = "^3.0.3"
|
||||||
|
ruff = "^0.2.2"
|
||||||
|
tweepy = "4.14.0"
|
||||||
|
|
||||||
[tool.poetry.group.docs.dependencies]
|
[tool.poetry.group.docs.dependencies]
|
||||||
mkdocs-material = "^9.5.42"
|
mkdocs-material = "^9.5.42"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue