feat: new Dataset permissions (#869)
<!-- .github/pull_request_template.md --> ## Description <!-- Provide a clear description of the changes in this PR --> ## DCO Affirmation I affirm that all code in every commit of this pull request conforms to the terms of the Topoteretes Developer Certificate of Origin. --------- Co-authored-by: Boris Arzentar <borisarzentar@gmail.com> Co-authored-by: Boris <boris@topoteretes.com>
This commit is contained in:
parent
ebebbb8958
commit
1ed6cfd918
76 changed files with 5322 additions and 4154 deletions
|
|
@ -69,3 +69,11 @@ LITELLM_LOG="ERROR"
|
||||||
# Set this environment variable to disable sending telemetry data
|
# Set this environment variable to disable sending telemetry data
|
||||||
# TELEMETRY_DISABLED=1
|
# TELEMETRY_DISABLED=1
|
||||||
|
|
||||||
|
# Set this variable to True to enforce usage of backend access control for Cognee
|
||||||
|
# Note: This is only currently supported by the following databases:
|
||||||
|
# Relational: SQLite, Postgres
|
||||||
|
# Vector: LanceDB
|
||||||
|
# Graph: KuzuDB
|
||||||
|
#
|
||||||
|
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
|
||||||
|
ENABLE_BACKEND_ACCESS_CONTROL=False
|
||||||
|
|
|
||||||
31
.github/workflows/e2e_tests.yml
vendored
31
.github/workflows/e2e_tests.yml
vendored
|
|
@ -215,3 +215,34 @@ jobs:
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
run: poetry run python ./cognee/tests/test_s3.py
|
run: poetry run python ./cognee/tests/test_s3.py
|
||||||
|
|
||||||
|
test-parallel-databases:
|
||||||
|
name: Test using different async databases in parallel in Cognee
|
||||||
|
runs-on: ubuntu-22.04
|
||||||
|
steps:
|
||||||
|
- name: Check out repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Cognee Setup
|
||||||
|
uses: ./.github/actions/cognee_setup
|
||||||
|
with:
|
||||||
|
python-version: '3.11.x'
|
||||||
|
|
||||||
|
- name: Install specific graph db dependency
|
||||||
|
run: |
|
||||||
|
poetry install -E kuzu
|
||||||
|
|
||||||
|
- name: Run parallel databases test
|
||||||
|
env:
|
||||||
|
ENV: 'dev'
|
||||||
|
LLM_MODEL: ${{ secrets.LLM_MODEL }}
|
||||||
|
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
|
||||||
|
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
|
||||||
|
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
|
||||||
|
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
|
||||||
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
|
||||||
|
run: poetry run python ./cognee/tests/test_parallel_databases.py
|
||||||
|
|
|
||||||
4
.github/workflows/test_suites.yml
vendored
4
.github/workflows/test_suites.yml
vendored
|
|
@ -47,7 +47,7 @@ jobs:
|
||||||
|
|
||||||
relational-db-migration-tests:
|
relational-db-migration-tests:
|
||||||
name: Relational DB Migration Tests
|
name: Relational DB Migration Tests
|
||||||
needs: [ basic-tests, e2e-tests ]
|
needs: [ basic-tests, e2e-tests, graph-db-tests]
|
||||||
uses: ./.github/workflows/relational_db_migration_tests.yml
|
uses: ./.github/workflows/relational_db_migration_tests.yml
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
|
@ -79,7 +79,7 @@ jobs:
|
||||||
|
|
||||||
db-examples-tests:
|
db-examples-tests:
|
||||||
name: DB Examples Tests
|
name: DB Examples Tests
|
||||||
needs: [vector-db-tests]
|
needs: [vector-db-tests, graph-db-tests, relational-db-migration-tests]
|
||||||
uses: ./.github/workflows/db_examples_tests.yml
|
uses: ./.github/workflows/db_examples_tests.yml
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|
||||||
|
|
|
||||||
27
.github/workflows/vector_db_tests.yml
vendored
27
.github/workflows/vector_db_tests.yml
vendored
|
|
@ -135,6 +135,16 @@ jobs:
|
||||||
run:
|
run:
|
||||||
shell: bash
|
shell: bash
|
||||||
|
|
||||||
|
services:
|
||||||
|
qdrant:
|
||||||
|
image: qdrant/qdrant:v1.14.1
|
||||||
|
env:
|
||||||
|
QDRANT__LOG_LEVEL: ERROR
|
||||||
|
QDRANT__SERVICE__API_KEY: qdrant_api_key
|
||||||
|
QDRANT__SERVICE__ENABLE_TLS: 0
|
||||||
|
ports:
|
||||||
|
- 6333:6333
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Check out
|
- name: Check out
|
||||||
uses: actions/checkout@master
|
uses: actions/checkout@master
|
||||||
|
|
@ -148,6 +158,19 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
poetry install -E qdrant
|
poetry install -E qdrant
|
||||||
|
|
||||||
|
- name: Wait for Qdrant to be healthy
|
||||||
|
run: |
|
||||||
|
for i in {1..10}; do
|
||||||
|
if curl -f http://127.0.0.1:6333/healthz; then
|
||||||
|
echo "Qdrant is healthy!"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo "Waiting for Qdrant to be healthy..."
|
||||||
|
sleep 3
|
||||||
|
done
|
||||||
|
echo "Qdrant failed to become healthy in time"
|
||||||
|
exit 1
|
||||||
|
|
||||||
- name: Run default Qdrant
|
- name: Run default Qdrant
|
||||||
env:
|
env:
|
||||||
ENV: 'dev'
|
ENV: 'dev'
|
||||||
|
|
@ -159,8 +182,8 @@ jobs:
|
||||||
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
|
||||||
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
|
||||||
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
|
||||||
VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
|
VECTOR_DB_URL: 127.0.0.1
|
||||||
VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
|
VECTOR_DB_KEY: qdrant_api_key
|
||||||
run: poetry run python ./cognee/tests/test_qdrant.py
|
run: poetry run python ./cognee/tests/test_qdrant.py
|
||||||
|
|
||||||
run-postgres-tests:
|
run-postgres-tests:
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
"""FastAPI server for the Cognee API."""
|
"""FastAPI server for the Cognee API."""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import uvicorn
|
import uvicorn
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
import sentry_sdk
|
import sentry_sdk
|
||||||
|
|
@ -63,6 +64,7 @@ async def lifespan(app: FastAPI):
|
||||||
|
|
||||||
app = FastAPI(debug=app_environment != "prod", lifespan=lifespan)
|
app = FastAPI(debug=app_environment != "prod", lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
app.add_middleware(
|
app.add_middleware(
|
||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=["*"],
|
allow_origins=["*"],
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
from uuid import UUID
|
||||||
from typing import Union, BinaryIO, List, Optional
|
from typing import Union, BinaryIO, List, Optional
|
||||||
|
|
||||||
from cognee.modules.pipelines import Task
|
from cognee.modules.pipelines import Task
|
||||||
|
|
@ -11,9 +12,21 @@ async def add(
|
||||||
dataset_name: str = "main_dataset",
|
dataset_name: str = "main_dataset",
|
||||||
user: User = None,
|
user: User = None,
|
||||||
node_set: Optional[List[str]] = None,
|
node_set: Optional[List[str]] = None,
|
||||||
|
vector_db_config: dict = None,
|
||||||
|
graph_db_config: dict = None,
|
||||||
|
dataset_id: UUID = None,
|
||||||
):
|
):
|
||||||
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)]
|
tasks = [
|
||||||
|
Task(resolve_data_directories),
|
||||||
|
Task(ingest_data, dataset_name, user, node_set, dataset_id),
|
||||||
|
]
|
||||||
|
|
||||||
await cognee_pipeline(
|
await cognee_pipeline(
|
||||||
tasks=tasks, datasets=dataset_name, data=data, user=user, pipeline_name="add_pipeline"
|
tasks=tasks,
|
||||||
|
datasets=dataset_id if dataset_id else dataset_name,
|
||||||
|
data=data,
|
||||||
|
user=user,
|
||||||
|
pipeline_name="add_pipeline",
|
||||||
|
vector_db_config=vector_db_config,
|
||||||
|
graph_db_config=graph_db_config,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
from fastapi import Form, UploadFile, Depends
|
from fastapi import Form, UploadFile, Depends
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter
|
||||||
|
|
@ -20,8 +21,8 @@ def get_add_router() -> APIRouter:
|
||||||
@router.post("/", response_model=None)
|
@router.post("/", response_model=None)
|
||||||
async def add(
|
async def add(
|
||||||
data: List[UploadFile],
|
data: List[UploadFile],
|
||||||
|
datasetName: str,
|
||||||
datasetId: Optional[UUID] = Form(default=None),
|
datasetId: Optional[UUID] = Form(default=None),
|
||||||
datasetName: Optional[str] = Form(default=None),
|
|
||||||
user: User = Depends(get_authenticated_user),
|
user: User = Depends(get_authenticated_user),
|
||||||
):
|
):
|
||||||
"""This endpoint is responsible for adding data to the graph."""
|
"""This endpoint is responsible for adding data to the graph."""
|
||||||
|
|
@ -30,19 +31,13 @@ def get_add_router() -> APIRouter:
|
||||||
if not datasetId and not datasetName:
|
if not datasetId and not datasetName:
|
||||||
raise ValueError("Either datasetId or datasetName must be provided.")
|
raise ValueError("Either datasetId or datasetName must be provided.")
|
||||||
|
|
||||||
if datasetId and not datasetName:
|
|
||||||
dataset = await get_dataset(user_id=user.id, dataset_id=datasetId)
|
|
||||||
try:
|
|
||||||
datasetName = dataset.name
|
|
||||||
except IndexError:
|
|
||||||
raise ValueError("No dataset found with the provided datasetName.")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if isinstance(data, str) and data.startswith("http"):
|
if isinstance(data, str) and data.startswith("http"):
|
||||||
if "github" in data:
|
if "github" in data:
|
||||||
# Perform git clone if the URL is from GitHub
|
# Perform git clone if the URL is from GitHub
|
||||||
repo_name = data.split("/")[-1].replace(".git", "")
|
repo_name = data.split("/")[-1].replace(".git", "")
|
||||||
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
|
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
|
||||||
|
# TODO: Update add call with dataset info
|
||||||
await cognee_add(
|
await cognee_add(
|
||||||
"data://.data/",
|
"data://.data/",
|
||||||
f"{repo_name}",
|
f"{repo_name}",
|
||||||
|
|
@ -53,10 +48,10 @@ def get_add_router() -> APIRouter:
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
file_data = await response.content()
|
file_data = await response.content()
|
||||||
|
# TODO: Update add call with dataset info
|
||||||
return await cognee_add(file_data)
|
return await cognee_add(file_data)
|
||||||
else:
|
else:
|
||||||
await cognee_add(data, datasetName, user=user)
|
await cognee_add(data, dataset_name=datasetName, user=user, dataset_id=datasetId)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
return JSONResponse(status_code=409, content={"error": str(error)})
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@ from cognee.modules.pipelines.tasks.task import Task
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.tasks.documents import (
|
from cognee.tasks.documents import (
|
||||||
check_permissions_on_documents,
|
check_permissions_on_dataset,
|
||||||
classify_documents,
|
classify_documents,
|
||||||
extract_chunks_from_documents,
|
extract_chunks_from_documents,
|
||||||
)
|
)
|
||||||
|
|
@ -31,11 +31,18 @@ async def cognify(
|
||||||
chunker=TextChunker,
|
chunker=TextChunker,
|
||||||
chunk_size: int = None,
|
chunk_size: int = None,
|
||||||
ontology_file_path: Optional[str] = None,
|
ontology_file_path: Optional[str] = None,
|
||||||
|
vector_db_config: dict = None,
|
||||||
|
graph_db_config: dict = None,
|
||||||
):
|
):
|
||||||
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
|
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
|
||||||
|
|
||||||
return await cognee_pipeline(
|
return await cognee_pipeline(
|
||||||
tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline"
|
tasks=tasks,
|
||||||
|
datasets=datasets,
|
||||||
|
user=user,
|
||||||
|
pipeline_name="cognify_pipeline",
|
||||||
|
vector_db_config=vector_db_config,
|
||||||
|
graph_db_config=graph_db_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -48,7 +55,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
|
||||||
) -> list[Task]:
|
) -> list[Task]:
|
||||||
default_tasks = [
|
default_tasks = [
|
||||||
Task(classify_documents),
|
Task(classify_documents),
|
||||||
Task(check_permissions_on_documents, user=user, permissions=["write"]),
|
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
||||||
Task(
|
Task(
|
||||||
extract_chunks_from_documents,
|
extract_chunks_from_documents,
|
||||||
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
max_chunk_size=chunk_size or get_max_chunk_tokens(),
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
from uuid import UUID
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from fastapi import Depends
|
from fastapi import Depends
|
||||||
|
|
@ -10,6 +11,7 @@ from cognee.shared.data_models import KnowledgeGraph
|
||||||
|
|
||||||
class CognifyPayloadDTO(BaseModel):
|
class CognifyPayloadDTO(BaseModel):
|
||||||
datasets: List[str]
|
datasets: List[str]
|
||||||
|
dataset_ids: Optional[List[UUID]]
|
||||||
graph_model: Optional[BaseModel] = KnowledgeGraph
|
graph_model: Optional[BaseModel] = KnowledgeGraph
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -22,7 +24,9 @@ def get_cognify_router() -> APIRouter:
|
||||||
from cognee.api.v1.cognify import cognify as cognee_cognify
|
from cognee.api.v1.cognify import cognify as cognee_cognify
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await cognee_cognify(payload.datasets, user, payload.graph_model)
|
# Send dataset UUIDs if they are given, if not send dataset names
|
||||||
|
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
|
||||||
|
await cognee_cognify(datasets, user, payload.graph_model)
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
return JSONResponse(status_code=409, content={"error": str(error)})
|
return JSONResponse(status_code=409, content={"error": str(error)})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,66 +1,69 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
from typing import List
|
||||||
|
|
||||||
from fastapi import APIRouter
|
from fastapi import APIRouter, Depends
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
|
||||||
|
|
||||||
def get_permissions_router() -> APIRouter:
|
def get_permissions_router() -> APIRouter:
|
||||||
permissions_router = APIRouter()
|
permissions_router = APIRouter()
|
||||||
|
|
||||||
@permissions_router.post("/roles/{role_id}/permissions")
|
@permissions_router.post("/datasets/{principal_id}/")
|
||||||
async def give_default_permission_to_role(role_id: UUID, permission_name: str):
|
async def give_datasets_permission_to_principal(
|
||||||
from cognee.modules.users.permissions.methods import (
|
permission_name: str,
|
||||||
give_default_permission_to_role as set_default_permission_to_role,
|
dataset_ids: List[UUID],
|
||||||
|
principal_id: UUID,
|
||||||
|
user: User = Depends(get_authenticated_user),
|
||||||
|
):
|
||||||
|
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
|
||||||
|
|
||||||
|
await authorized_give_permission_on_datasets(
|
||||||
|
principal_id,
|
||||||
|
[dataset_id for dataset_id in dataset_ids],
|
||||||
|
permission_name,
|
||||||
|
user.id,
|
||||||
)
|
)
|
||||||
|
|
||||||
await set_default_permission_to_role(role_id, permission_name)
|
return JSONResponse(
|
||||||
|
status_code=200, content={"message": "Permission assigned to principal"}
|
||||||
return JSONResponse(status_code=200, content={"message": "Permission assigned to role"})
|
|
||||||
|
|
||||||
@permissions_router.post("/tenants/{tenant_id}/permissions")
|
|
||||||
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
|
|
||||||
from cognee.modules.users.permissions.methods import (
|
|
||||||
give_default_permission_to_tenant as set_tenant_default_permissions,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
await set_tenant_default_permissions(tenant_id, permission_name)
|
|
||||||
|
|
||||||
return JSONResponse(status_code=200, content={"message": "Permission assigned to tenant"})
|
|
||||||
|
|
||||||
@permissions_router.post("/users/{user_id}/permissions")
|
|
||||||
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
|
|
||||||
from cognee.modules.users.permissions.methods import (
|
|
||||||
give_default_permission_to_user as set_default_permission_to_user,
|
|
||||||
)
|
|
||||||
|
|
||||||
await set_default_permission_to_user(user_id, permission_name)
|
|
||||||
|
|
||||||
return JSONResponse(status_code=200, content={"message": "Permission assigned to user"})
|
|
||||||
|
|
||||||
@permissions_router.post("/roles")
|
@permissions_router.post("/roles")
|
||||||
async def create_role(
|
async def create_role(role_name: str, user: User = Depends(get_authenticated_user)):
|
||||||
role_name: str,
|
|
||||||
tenant_id: UUID,
|
|
||||||
):
|
|
||||||
from cognee.modules.users.roles.methods import create_role as create_role_method
|
from cognee.modules.users.roles.methods import create_role as create_role_method
|
||||||
|
|
||||||
await create_role_method(role_name=role_name, tenant_id=tenant_id)
|
await create_role_method(role_name=role_name, owner_id=user.id)
|
||||||
|
|
||||||
return JSONResponse(status_code=200, content={"message": "Role created for tenant"})
|
return JSONResponse(status_code=200, content={"message": "Role created for tenant"})
|
||||||
|
|
||||||
@permissions_router.post("/users/{user_id}/roles")
|
@permissions_router.post("/users/{user_id}/roles")
|
||||||
async def add_user_to_role(user_id: UUID, role_id: UUID):
|
async def add_user_to_role(
|
||||||
|
user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user)
|
||||||
|
):
|
||||||
from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method
|
from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method
|
||||||
|
|
||||||
await add_user_to_role_method(user_id=user_id, role_id=role_id)
|
await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id)
|
||||||
|
|
||||||
return JSONResponse(status_code=200, content={"message": "User added to role"})
|
return JSONResponse(status_code=200, content={"message": "User added to role"})
|
||||||
|
|
||||||
|
@permissions_router.post("/users/{user_id}/tenants")
|
||||||
|
async def add_user_to_tenant(
|
||||||
|
user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user)
|
||||||
|
):
|
||||||
|
from cognee.modules.users.tenants.methods import add_user_to_tenant
|
||||||
|
|
||||||
|
await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id)
|
||||||
|
|
||||||
|
return JSONResponse(status_code=200, content={"message": "User added to tenant"})
|
||||||
|
|
||||||
@permissions_router.post("/tenants")
|
@permissions_router.post("/tenants")
|
||||||
async def create_tenant(tenant_name: str):
|
async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)):
|
||||||
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
|
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
|
||||||
|
|
||||||
await create_tenant_method(tenant_name=tenant_name)
|
await create_tenant_method(tenant_name=tenant_name, user_id=user.id)
|
||||||
|
|
||||||
return JSONResponse(status_code=200, content={"message": "Tenant created."})
|
return JSONResponse(status_code=200, content={"message": "Tenant created."})
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
from typing import Optional, Union
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from fastapi import Depends, APIRouter
|
from fastapi import Depends, APIRouter
|
||||||
from fastapi.responses import JSONResponse
|
from fastapi.responses import JSONResponse
|
||||||
|
|
@ -9,8 +10,12 @@ from cognee.modules.search.operations import get_history
|
||||||
from cognee.modules.users.methods import get_authenticated_user
|
from cognee.modules.users.methods import get_authenticated_user
|
||||||
|
|
||||||
|
|
||||||
|
# Note: Datasets sent by name will only map to datasets owned by the request sender
|
||||||
|
# To search for datasets not owned by the request sender dataset UUID is needed
|
||||||
class SearchPayloadDTO(InDTO):
|
class SearchPayloadDTO(InDTO):
|
||||||
search_type: SearchType
|
search_type: SearchType
|
||||||
|
datasets: Optional[list[str]] = None
|
||||||
|
dataset_ids: Optional[list[UUID]] = None
|
||||||
query: str
|
query: str
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -39,7 +44,11 @@ def get_search_router() -> APIRouter:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = await cognee_search(
|
results = await cognee_search(
|
||||||
query_text=payload.query, query_type=payload.search_type, user=user
|
query_text=payload.query,
|
||||||
|
query_type=payload.search_type,
|
||||||
|
user=user,
|
||||||
|
datasets=payload.datasets,
|
||||||
|
dataset_ids=payload.dataset_ids,
|
||||||
)
|
)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,43 @@
|
||||||
|
from uuid import UUID
|
||||||
from typing import Union, Optional, List, Type
|
from typing import Union, Optional, List, Type
|
||||||
|
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.search.types import SearchType
|
from cognee.modules.search.types import SearchType
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.search.methods import search as search_function
|
from cognee.modules.search.methods import search as search_function
|
||||||
|
from cognee.modules.data.methods import get_authorized_existing_datasets
|
||||||
|
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||||
|
|
||||||
|
|
||||||
async def search(
|
async def search(
|
||||||
query_text: str,
|
query_text: str,
|
||||||
query_type: SearchType = SearchType.GRAPH_COMPLETION,
|
query_type: SearchType = SearchType.GRAPH_COMPLETION,
|
||||||
user: User = None,
|
user: User = None,
|
||||||
datasets: Union[list[str], str, None] = None,
|
datasets: Optional[Union[list[str], str]] = None,
|
||||||
|
dataset_ids: Optional[Union[list[UUID], UUID]] = None,
|
||||||
system_prompt_path: str = "answer_simple_question.txt",
|
system_prompt_path: str = "answer_simple_question.txt",
|
||||||
top_k: int = 10,
|
top_k: int = 10,
|
||||||
node_type: Optional[Type] = None,
|
node_type: Optional[Type] = None,
|
||||||
node_name: Optional[List[str]] = None,
|
node_name: Optional[List[str]] = None,
|
||||||
) -> list:
|
) -> list:
|
||||||
# We use lists from now on for datasets
|
# We use lists from now on for datasets
|
||||||
if isinstance(datasets, str):
|
if isinstance(datasets, UUID) or isinstance(datasets, str):
|
||||||
datasets = [datasets]
|
datasets = [datasets]
|
||||||
|
|
||||||
if user is None:
|
if user is None:
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
|
|
||||||
|
# Transform string based datasets to UUID - String based datasets can only be found for current user
|
||||||
|
if datasets is not None and [all(isinstance(dataset, str) for dataset in datasets)]:
|
||||||
|
datasets = await get_authorized_existing_datasets(datasets, "read", user)
|
||||||
|
datasets = [dataset.id for dataset in datasets]
|
||||||
|
if not datasets:
|
||||||
|
raise DatasetNotFoundError(message="No datasets found.")
|
||||||
|
|
||||||
filtered_search_results = await search_function(
|
filtered_search_results = await search_function(
|
||||||
query_text=query_text,
|
query_text=query_text,
|
||||||
query_type=query_type,
|
query_type=query_type,
|
||||||
datasets=datasets,
|
dataset_ids=dataset_ids if dataset_ids else datasets,
|
||||||
user=user,
|
user=user,
|
||||||
system_prompt_path=system_prompt_path,
|
system_prompt_path=system_prompt_path,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
|
|
||||||
67
cognee/context_global_variables.py
Normal file
67
cognee/context_global_variables.py
Normal file
|
|
@ -0,0 +1,67 @@
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
from contextvars import ContextVar
|
||||||
|
from typing import Union
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
|
||||||
|
from cognee.modules.users.methods import get_user
|
||||||
|
|
||||||
|
# Note: ContextVar allows us to use different graph db configurations in Cognee
|
||||||
|
# for different async tasks, threads and processes
|
||||||
|
vector_db_config = ContextVar("vector_db_config", default=None)
|
||||||
|
graph_db_config = ContextVar("graph_db_config", default=None)
|
||||||
|
|
||||||
|
|
||||||
|
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
|
||||||
|
"""
|
||||||
|
If backend access control is enabled this function will ensure all datasets have their own databases,
|
||||||
|
access to which will be enforced by given permissions.
|
||||||
|
Database name will be determined by dataset_id and LanceDB and KuzuDB use will be enforced.
|
||||||
|
|
||||||
|
Note: This is only currently supported by the following databases:
|
||||||
|
Relational: SQLite, Postgres
|
||||||
|
Vector: LanceDB
|
||||||
|
Graph: KuzuDB
|
||||||
|
|
||||||
|
Args:
|
||||||
|
dataset: Cognee dataset name or id
|
||||||
|
user_id: UUID of the owner of the dataset
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
||||||
|
return
|
||||||
|
|
||||||
|
user = await get_user(user_id)
|
||||||
|
|
||||||
|
# To ensure permissions are enforced properly all datasets will have their own databases
|
||||||
|
dataset_database = await get_or_create_dataset_database(dataset, user)
|
||||||
|
|
||||||
|
# TODO: Find better location for database files
|
||||||
|
cognee_directory_path = str(
|
||||||
|
pathlib.Path(
|
||||||
|
os.path.join(pathlib.Path(__file__).parent, f".cognee_system/databases/{user.id}")
|
||||||
|
).resolve()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set vector and graph database configuration based on dataset database information
|
||||||
|
vector_config = {
|
||||||
|
"vector_db_url": os.path.join(cognee_directory_path, dataset_database.vector_database_name),
|
||||||
|
"vector_db_key": "",
|
||||||
|
"vector_db_provider": "lancedb",
|
||||||
|
}
|
||||||
|
|
||||||
|
graph_config = {
|
||||||
|
"graph_database_provider": "kuzu",
|
||||||
|
"graph_file_path": os.path.join(
|
||||||
|
cognee_directory_path, dataset_database.graph_database_name
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Use ContextVar to use these graph and vector configurations are used
|
||||||
|
# in the current async context across Cognee
|
||||||
|
graph_db_config.set(graph_config)
|
||||||
|
vector_db_config.set(vector_config)
|
||||||
|
|
@ -8,7 +8,7 @@ from cognee.modules.users.models import User
|
||||||
from cognee.shared.data_models import KnowledgeGraph
|
from cognee.shared.data_models import KnowledgeGraph
|
||||||
from cognee.shared.utils import send_telemetry
|
from cognee.shared.utils import send_telemetry
|
||||||
from cognee.tasks.documents import (
|
from cognee.tasks.documents import (
|
||||||
check_permissions_on_documents,
|
check_permissions_on_dataset,
|
||||||
classify_documents,
|
classify_documents,
|
||||||
extract_chunks_from_documents,
|
extract_chunks_from_documents,
|
||||||
)
|
)
|
||||||
|
|
@ -31,7 +31,7 @@ async def get_cascade_graph_tasks(
|
||||||
cognee_config = get_cognify_config()
|
cognee_config = get_cognify_config()
|
||||||
default_tasks = [
|
default_tasks = [
|
||||||
Task(classify_documents),
|
Task(classify_documents),
|
||||||
Task(check_permissions_on_documents, user=user, permissions=["write"]),
|
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
|
||||||
Task(
|
Task(
|
||||||
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
|
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
|
||||||
), # Extract text chunks based on the document type.
|
), # Extract text chunks based on the document type.
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,9 @@ class CogneeApiError(Exception):
|
||||||
|
|
||||||
super().__init__(self.message, self.name)
|
super().__init__(self.message, self.name)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return f"{self.name}: {self.message} (Status code: {self.status_code})"
|
||||||
|
|
||||||
|
|
||||||
class ServiceError(CogneeApiError):
|
class ServiceError(CogneeApiError):
|
||||||
"""Failures in external services or APIs, like a database or a third-party service"""
|
"""Failures in external services or APIs, like a database or a third-party service"""
|
||||||
|
|
|
||||||
|
|
@ -105,3 +105,14 @@ def get_graph_config():
|
||||||
- GraphConfig: A GraphConfig instance containing the graph configuration settings.
|
- GraphConfig: A GraphConfig instance containing the graph configuration settings.
|
||||||
"""
|
"""
|
||||||
return GraphConfig()
|
return GraphConfig()
|
||||||
|
|
||||||
|
|
||||||
|
def get_graph_context_config():
|
||||||
|
"""This function will get the appropriate graph db config based on async context.
|
||||||
|
This allows the use of multiple graph databases for different threads, async tasks and parallelization
|
||||||
|
"""
|
||||||
|
from cognee.context_global_variables import graph_db_config
|
||||||
|
|
||||||
|
if graph_db_config.get():
|
||||||
|
return graph_db_config.get()
|
||||||
|
return get_graph_config().to_hashable_dict()
|
||||||
|
|
|
||||||
|
|
@ -2,36 +2,22 @@
|
||||||
|
|
||||||
from functools import lru_cache
|
from functools import lru_cache
|
||||||
|
|
||||||
|
from .config import get_graph_context_config
|
||||||
from .config import get_graph_config
|
|
||||||
from .graph_db_interface import GraphDBInterface
|
from .graph_db_interface import GraphDBInterface
|
||||||
from .supported_databases import supported_databases
|
from .supported_databases import supported_databases
|
||||||
|
|
||||||
|
|
||||||
async def get_graph_engine() -> GraphDBInterface:
|
async def get_graph_engine() -> GraphDBInterface:
|
||||||
"""
|
"""Factory function to get the appropriate graph client based on the graph type."""
|
||||||
Factory function to get the appropriate graph client based on the graph type.
|
# Get appropriate graph configuration based on current async context
|
||||||
|
config = get_graph_context_config()
|
||||||
|
|
||||||
This function retrieves the graph configuration and creates a graph engine by calling
|
graph_client = create_graph_engine(**config)
|
||||||
the `create_graph_engine` function. If the configured graph database provider is
|
|
||||||
'networkx', it ensures that the graph is loaded from a file asynchronously if it hasn't
|
|
||||||
been loaded yet. It raises an `EnvironmentError` if the necessary configurations for the
|
|
||||||
selected graph provider are missing.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
- GraphDBInterface: Returns an instance of GraphDBInterface which represents the
|
|
||||||
selected graph client.
|
|
||||||
"""
|
|
||||||
config = get_graph_config()
|
|
||||||
|
|
||||||
graph_client = create_graph_engine(**get_graph_config().to_hashable_dict())
|
|
||||||
|
|
||||||
# Async functions can't be cached. After creating and caching the graph engine
|
# Async functions can't be cached. After creating and caching the graph engine
|
||||||
# handle all necessary async operations for different graph types bellow.
|
# handle all necessary async operations for different graph types bellow.
|
||||||
# Handle loading of graph for NetworkX
|
# Handle loading of graph for NetworkX
|
||||||
if config.graph_database_provider.lower() == "networkx" and graph_client.graph is None:
|
if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
|
||||||
await graph_client.load_graph_from_file()
|
await graph_client.load_graph_from_file()
|
||||||
|
|
||||||
return graph_client
|
return graph_client
|
||||||
|
|
@ -40,11 +26,11 @@ async def get_graph_engine() -> GraphDBInterface:
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def create_graph_engine(
|
def create_graph_engine(
|
||||||
graph_database_provider,
|
graph_database_provider,
|
||||||
graph_database_url,
|
|
||||||
graph_database_username,
|
|
||||||
graph_database_password,
|
|
||||||
graph_database_port,
|
|
||||||
graph_file_path,
|
graph_file_path,
|
||||||
|
graph_database_url="",
|
||||||
|
graph_database_username="",
|
||||||
|
graph_database_password="",
|
||||||
|
graph_database_port="",
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a graph engine based on the specified provider type.
|
Create a graph engine based on the specified provider type.
|
||||||
|
|
|
||||||
1
cognee/infrastructure/databases/utils/__init__.py
Normal file
1
cognee/infrastructure/databases/utils/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .get_or_create_dataset_database import get_or_create_dataset_database
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
from uuid import UUID
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
from cognee.modules.data.methods import create_dataset
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.data.methods import get_unique_dataset_id
|
||||||
|
from cognee.modules.users.models import DatasetDatabase
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
|
||||||
|
|
||||||
|
async def get_or_create_dataset_database(
|
||||||
|
dataset: Union[str, UUID],
|
||||||
|
user: User,
|
||||||
|
) -> DatasetDatabase:
|
||||||
|
"""
|
||||||
|
Return the `DatasetDatabase` row for the given owner + dataset.
|
||||||
|
|
||||||
|
• If the row already exists, it is fetched and returned.
|
||||||
|
• Otherwise a new one is created atomically and returned.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
user : User
|
||||||
|
Principal that owns this dataset.
|
||||||
|
dataset : Union[str, UUID]
|
||||||
|
Dataset being linked.
|
||||||
|
"""
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
dataset_id = await get_unique_dataset_id(dataset, user)
|
||||||
|
|
||||||
|
vector_db_name = f"{dataset_id}.lance.db"
|
||||||
|
graph_db_name = f"{dataset_id}.pkl"
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
# Create dataset if it doesn't exist
|
||||||
|
if isinstance(dataset, str):
|
||||||
|
dataset = await create_dataset(dataset, user, session)
|
||||||
|
|
||||||
|
# Try to fetch an existing row first
|
||||||
|
stmt = select(DatasetDatabase).where(
|
||||||
|
DatasetDatabase.owner_id == user.id,
|
||||||
|
DatasetDatabase.dataset_id == dataset_id,
|
||||||
|
)
|
||||||
|
existing: DatasetDatabase = await session.scalar(stmt)
|
||||||
|
if existing:
|
||||||
|
return existing
|
||||||
|
|
||||||
|
# If there are no existing rows build a new row
|
||||||
|
record = DatasetDatabase(
|
||||||
|
owner_id=user.id,
|
||||||
|
dataset_id=dataset_id,
|
||||||
|
vector_database_name=vector_db_name,
|
||||||
|
graph_database_name=graph_db_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
session.add(record)
|
||||||
|
await session.commit()
|
||||||
|
await session.refresh(record)
|
||||||
|
return record
|
||||||
|
|
||||||
|
except IntegrityError:
|
||||||
|
await session.rollback()
|
||||||
|
raise
|
||||||
|
|
@ -62,3 +62,12 @@ def get_vectordb_config():
|
||||||
configuration.
|
configuration.
|
||||||
"""
|
"""
|
||||||
return VectorConfig()
|
return VectorConfig()
|
||||||
|
|
||||||
|
|
||||||
|
def get_vectordb_context_config():
|
||||||
|
"""This function will get the appropriate vector db config based on async context."""
|
||||||
|
from cognee.context_global_variables import vector_db_config
|
||||||
|
|
||||||
|
if vector_db_config.get():
|
||||||
|
return vector_db_config.get()
|
||||||
|
return get_vectordb_config().to_dict()
|
||||||
|
|
|
||||||
|
|
@ -6,10 +6,10 @@ from functools import lru_cache
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def create_vector_engine(
|
def create_vector_engine(
|
||||||
vector_db_url: str,
|
|
||||||
vector_db_port: str,
|
|
||||||
vector_db_key: str,
|
|
||||||
vector_db_provider: str,
|
vector_db_provider: str,
|
||||||
|
vector_db_url: str,
|
||||||
|
vector_db_port: str = "",
|
||||||
|
vector_db_key: str = "",
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a vector database engine based on the specified provider.
|
Create a vector database engine based on the specified provider.
|
||||||
|
|
|
||||||
|
|
@ -1,14 +1,7 @@
|
||||||
from .config import get_vectordb_config
|
from .config import get_vectordb_context_config
|
||||||
from .create_vector_engine import create_vector_engine
|
from .create_vector_engine import create_vector_engine
|
||||||
|
|
||||||
|
|
||||||
def get_vector_engine():
|
def get_vector_engine():
|
||||||
"""
|
# Get appropriate vector db configuration based on current async context
|
||||||
Create and return a vector engine instance.
|
return create_vector_engine(**get_vectordb_context_config())
|
||||||
|
|
||||||
Returns:
|
|
||||||
--------
|
|
||||||
|
|
||||||
A vector engine instance created from the vector database configuration.
|
|
||||||
"""
|
|
||||||
return create_vector_engine(**get_vectordb_config().to_dict())
|
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,13 @@
|
||||||
from ..get_vector_engine import get_vector_engine, get_vectordb_config
|
from ..get_vector_engine import get_vector_engine, get_vectordb_context_config
|
||||||
from sqlalchemy import text
|
from sqlalchemy import text
|
||||||
|
from cognee.context_global_variables import vector_db_config as context_vector_db_config
|
||||||
|
|
||||||
|
|
||||||
async def create_db_and_tables():
|
async def create_db_and_tables():
|
||||||
"""
|
# Get appropriate vector db configuration based on current async context
|
||||||
Create the database and its associated tables if necessary.
|
vector_config = get_vectordb_context_config()
|
||||||
|
|
||||||
This function checks the vector database provider configuration and, if it is set to
|
|
||||||
"pgvector", creates the necessary vector extension in the PostgreSQL database using an
|
|
||||||
asynchronous context manager.
|
|
||||||
"""
|
|
||||||
vector_config = get_vectordb_config()
|
|
||||||
vector_engine = get_vector_engine()
|
vector_engine = get_vector_engine()
|
||||||
|
|
||||||
if vector_config.vector_db_provider == "pgvector":
|
if vector_config["vector_db_provider"] == "pgvector":
|
||||||
async with vector_engine.engine.begin() as connection:
|
async with vector_engine.engine.begin() as connection:
|
||||||
await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import os
|
||||||
from typing import Dict, List, Optional
|
from typing import Dict, List, Optional
|
||||||
from qdrant_client import AsyncQdrantClient, models
|
from qdrant_client import AsyncQdrantClient, models
|
||||||
|
|
||||||
|
|
@ -147,14 +148,15 @@ class QDrantAdapter(VectorDBInterface):
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
--------
|
--------
|
||||||
|
|
||||||
- AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
|
- AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
|
||||||
operations.
|
operations.
|
||||||
"""
|
"""
|
||||||
|
is_prod = os.getenv("ENV").lower() == "prod"
|
||||||
|
|
||||||
if self.qdrant_path is not None:
|
if self.qdrant_path is not None:
|
||||||
return AsyncQdrantClient(path=self.qdrant_path, port=6333)
|
return AsyncQdrantClient(path=self.qdrant_path, port=6333, https=is_prod)
|
||||||
elif self.url is not None:
|
elif self.url is not None:
|
||||||
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333)
|
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333, https=is_prod)
|
||||||
|
|
||||||
return AsyncQdrantClient(location=":memory:")
|
return AsyncQdrantClient(location=":memory:")
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,4 +7,6 @@ This module defines a set of exceptions for handling various data errors
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
UnstructuredLibraryImportError,
|
UnstructuredLibraryImportError,
|
||||||
UnauthorizedDataAccessError,
|
UnauthorizedDataAccessError,
|
||||||
|
DatasetNotFoundError,
|
||||||
|
DatasetTypeError,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -20,3 +20,23 @@ class UnauthorizedDataAccessError(CogneeApiError):
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetNotFoundError(CogneeApiError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "Dataset not found.",
|
||||||
|
name: str = "DatasetNotFoundError",
|
||||||
|
status_code=status.HTTP_404_NOT_FOUND,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetTypeError(CogneeApiError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "Dataset type not supported.",
|
||||||
|
name: str = "DatasetTypeError",
|
||||||
|
status_code=status.HTTP_400_BAD_REQUEST,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,15 @@ from .get_datasets_by_name import get_datasets_by_name
|
||||||
from .get_dataset_data import get_dataset_data
|
from .get_dataset_data import get_dataset_data
|
||||||
from .get_data import get_data
|
from .get_data import get_data
|
||||||
from .get_unique_dataset_id import get_unique_dataset_id
|
from .get_unique_dataset_id import get_unique_dataset_id
|
||||||
|
from .get_authorized_existing_datasets import get_authorized_existing_datasets
|
||||||
|
from .get_dataset_ids import get_dataset_ids
|
||||||
|
|
||||||
# Delete
|
# Delete
|
||||||
from .delete_dataset import delete_dataset
|
from .delete_dataset import delete_dataset
|
||||||
from .delete_data import delete_data
|
from .delete_data import delete_data
|
||||||
|
|
||||||
|
# Create
|
||||||
|
from .load_or_create_datasets import load_or_create_datasets
|
||||||
|
|
||||||
|
# Check
|
||||||
|
from .check_dataset_name import check_dataset_name
|
||||||
|
|
|
||||||
3
cognee/modules/data/methods/check_dataset_name.py
Normal file
3
cognee/modules/data/methods/check_dataset_name.py
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
def check_dataset_name(dataset_name: str):
|
||||||
|
if "." in dataset_name or " " in dataset_name:
|
||||||
|
raise ValueError("Dataset name cannot contain spaces or underscores")
|
||||||
|
|
@ -1,4 +1,3 @@
|
||||||
from uuid import UUID, uuid5, NAMESPACE_OID
|
|
||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.orm import joinedload
|
from sqlalchemy.orm import joinedload
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,39 @@
|
||||||
|
from typing import Union
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.modules.data.models import Dataset
|
||||||
|
from cognee.modules.users.models import User
|
||||||
|
from cognee.modules.data.methods.get_dataset_ids import get_dataset_ids
|
||||||
|
from cognee.modules.users.permissions.methods import get_all_user_permission_datasets
|
||||||
|
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
||||||
|
|
||||||
|
|
||||||
|
async def get_authorized_existing_datasets(
|
||||||
|
datasets: Union[list[str], list[UUID]], permission_type: str, user: User
|
||||||
|
) -> list[Dataset]:
|
||||||
|
"""
|
||||||
|
Function returns a list of existing dataset objects user has access for based on datasets input.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
datasets:
|
||||||
|
user:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list of Dataset objects
|
||||||
|
|
||||||
|
"""
|
||||||
|
if datasets:
|
||||||
|
# Function handles transforming dataset input to dataset IDs (if possible)
|
||||||
|
dataset_ids = await get_dataset_ids(datasets, user)
|
||||||
|
# If dataset_ids are provided filter these datasets based on what user has permission for.
|
||||||
|
if dataset_ids:
|
||||||
|
existing_datasets = await get_specific_user_permission_datasets(
|
||||||
|
user.id, permission_type, dataset_ids
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
existing_datasets = []
|
||||||
|
else:
|
||||||
|
# If no datasets are provided, work with all existing datasets user has permission for.
|
||||||
|
existing_datasets = await get_all_user_permission_datasets(user, permission_type)
|
||||||
|
|
||||||
|
return existing_datasets
|
||||||
36
cognee/modules/data/methods/get_dataset_ids.py
Normal file
36
cognee/modules/data/methods/get_dataset_ids.py
Normal file
|
|
@ -0,0 +1,36 @@
|
||||||
|
from typing import Union
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.modules.data.exceptions import DatasetTypeError
|
||||||
|
from cognee.modules.data.methods import get_datasets
|
||||||
|
|
||||||
|
|
||||||
|
async def get_dataset_ids(datasets: Union[list[str], list[UUID]], user):
|
||||||
|
"""
|
||||||
|
Function returns dataset IDs necessary based on provided input.
|
||||||
|
It transforms raw strings into real dataset_ids with keeping write permissions in mind.
|
||||||
|
If a user wants to write to a dataset he is not the owner of it must be provided through UUID.
|
||||||
|
Args:
|
||||||
|
datasets:
|
||||||
|
pipeline_name:
|
||||||
|
user:
|
||||||
|
|
||||||
|
Returns: a list of write access dataset_ids if they exist
|
||||||
|
|
||||||
|
"""
|
||||||
|
if all(isinstance(dataset, UUID) for dataset in datasets):
|
||||||
|
# Return list of dataset UUIDs
|
||||||
|
dataset_ids = datasets
|
||||||
|
else:
|
||||||
|
# Convert list of dataset names to dataset UUID
|
||||||
|
if all(isinstance(dataset, str) for dataset in datasets):
|
||||||
|
# Get all user owned dataset objects (If a user wants to write to a dataset he is not the owner of it must be provided through UUID.)
|
||||||
|
user_datasets = await get_datasets(user.id)
|
||||||
|
# Filter out non name mentioned datasets
|
||||||
|
dataset_ids = [dataset.id for dataset in user_datasets if dataset.name in datasets]
|
||||||
|
else:
|
||||||
|
raise DatasetTypeError(
|
||||||
|
f"One or more of the provided dataset types is not handled: f{datasets}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return dataset_ids
|
||||||
|
|
@ -1,6 +1,9 @@
|
||||||
from uuid import UUID, uuid5, NAMESPACE_OID
|
from uuid import UUID, uuid5, NAMESPACE_OID
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
|
from typing import Union
|
||||||
|
|
||||||
|
|
||||||
async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID:
|
async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID:
|
||||||
|
if isinstance(dataset_name, UUID):
|
||||||
|
return dataset_name
|
||||||
return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
|
return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")
|
||||||
|
|
|
||||||
42
cognee/modules/data/methods/load_or_create_datasets.py
Normal file
42
cognee/modules/data/methods/load_or_create_datasets.py
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
from typing import List, Union
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.modules.data.models import Dataset
|
||||||
|
from cognee.modules.data.methods import get_unique_dataset_id
|
||||||
|
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||||
|
|
||||||
|
|
||||||
|
async def load_or_create_datasets(
|
||||||
|
dataset_names: List[Union[str, UUID]], existing_datasets: List[Dataset], user
|
||||||
|
) -> List[Dataset]:
|
||||||
|
"""
|
||||||
|
Given a list of dataset identifiers (names or UUIDs), return Dataset instances:
|
||||||
|
- If an identifier matches an existing Dataset (by name or id), reuse it.
|
||||||
|
- Otherwise, create a new Dataset with a unique id. Note: Created dataset is not stored to database.
|
||||||
|
"""
|
||||||
|
result: List[Dataset] = []
|
||||||
|
|
||||||
|
for identifier in dataset_names:
|
||||||
|
# Try to find a matching dataset in the existing list
|
||||||
|
# If no matching dataset is found return None
|
||||||
|
match = next(
|
||||||
|
(ds for ds in existing_datasets if ds.name == identifier or ds.id == identifier), None
|
||||||
|
)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
result.append(match)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If the identifier is a UUID but nothing matched, that's an error
|
||||||
|
if isinstance(identifier, UUID):
|
||||||
|
raise DatasetNotFoundError(f"Dataset with given UUID does not exist: {identifier}")
|
||||||
|
|
||||||
|
# Otherwise, create a new Dataset instance
|
||||||
|
new_dataset = Dataset(
|
||||||
|
id=await get_unique_dataset_id(dataset_name=identifier, user=user),
|
||||||
|
name=identifier,
|
||||||
|
owner_id=user.id,
|
||||||
|
)
|
||||||
|
result.append(new_dataset)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
@ -33,9 +33,6 @@ class Data(Base):
|
||||||
cascade="all, delete",
|
cascade="all, delete",
|
||||||
)
|
)
|
||||||
|
|
||||||
# New relationship for ACLs with cascade deletion
|
|
||||||
acls = relationship("ACL", back_populates="data", cascade="all, delete-orphan")
|
|
||||||
|
|
||||||
def to_json(self) -> dict:
|
def to_json(self) -> dict:
|
||||||
return {
|
return {
|
||||||
"id": str(self.id),
|
"id": str(self.id),
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,8 @@ class Dataset(Base):
|
||||||
|
|
||||||
owner_id = Column(UUID, index=True)
|
owner_id = Column(UUID, index=True)
|
||||||
|
|
||||||
|
acls = relationship("ACL", back_populates="dataset", cascade="all, delete-orphan")
|
||||||
|
|
||||||
data: Mapped[List["Data"]] = relationship(
|
data: Mapped[List["Data"]] = relationship(
|
||||||
"Data",
|
"Data",
|
||||||
secondary=DatasetData.__tablename__,
|
secondary=DatasetData.__tablename__,
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,9 @@
|
||||||
import asyncio
|
import asyncio
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from uuid import NAMESPACE_OID, uuid5
|
from uuid import NAMESPACE_OID, uuid5, UUID
|
||||||
|
|
||||||
from cognee.shared.logging_utils import get_logger
|
from cognee.shared.logging_utils import get_logger
|
||||||
from cognee.modules.data.methods import get_datasets
|
|
||||||
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
|
||||||
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
|
|
||||||
from cognee.modules.data.models import Data, Dataset
|
from cognee.modules.data.models import Data, Dataset
|
||||||
from cognee.modules.pipelines.operations.run_tasks import run_tasks
|
from cognee.modules.pipelines.operations.run_tasks import run_tasks
|
||||||
from cognee.modules.pipelines.models import PipelineRunStatus
|
from cognee.modules.pipelines.models import PipelineRunStatus
|
||||||
|
|
@ -14,6 +12,13 @@ from cognee.modules.pipelines.tasks.task import Task
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.pipelines.operations import log_pipeline_run_initiated
|
from cognee.modules.pipelines.operations import log_pipeline_run_initiated
|
||||||
|
from cognee.context_global_variables import set_database_global_context_variables
|
||||||
|
from cognee.modules.data.exceptions import DatasetNotFoundError
|
||||||
|
from cognee.modules.data.methods import (
|
||||||
|
get_authorized_existing_datasets,
|
||||||
|
load_or_create_datasets,
|
||||||
|
check_dataset_name,
|
||||||
|
)
|
||||||
|
|
||||||
from cognee.infrastructure.databases.relational import (
|
from cognee.infrastructure.databases.relational import (
|
||||||
create_db_and_tables as create_relational_db_and_tables,
|
create_db_and_tables as create_relational_db_and_tables,
|
||||||
|
|
@ -21,6 +26,10 @@ from cognee.infrastructure.databases.relational import (
|
||||||
from cognee.infrastructure.databases.vector.pgvector import (
|
from cognee.infrastructure.databases.vector.pgvector import (
|
||||||
create_db_and_tables as create_pgvector_db_and_tables,
|
create_db_and_tables as create_pgvector_db_and_tables,
|
||||||
)
|
)
|
||||||
|
from cognee.context_global_variables import (
|
||||||
|
graph_db_config as context_graph_db_config,
|
||||||
|
vector_db_config as context_vector_db_config,
|
||||||
|
)
|
||||||
|
|
||||||
logger = get_logger("cognee.pipeline")
|
logger = get_logger("cognee.pipeline")
|
||||||
|
|
||||||
|
|
@ -30,10 +39,19 @@ update_status_lock = asyncio.Lock()
|
||||||
async def cognee_pipeline(
|
async def cognee_pipeline(
|
||||||
tasks: list[Task],
|
tasks: list[Task],
|
||||||
data=None,
|
data=None,
|
||||||
datasets: Union[str, list[str]] = None,
|
datasets: Union[str, list[str], list[UUID]] = None,
|
||||||
user: User = None,
|
user: User = None,
|
||||||
pipeline_name: str = "custom_pipeline",
|
pipeline_name: str = "custom_pipeline",
|
||||||
|
vector_db_config: dict = None,
|
||||||
|
graph_db_config: dict = None,
|
||||||
):
|
):
|
||||||
|
# Note: These context variables allow different value assignment for databases in Cognee
|
||||||
|
# per async task, thread, process and etc.
|
||||||
|
if vector_db_config:
|
||||||
|
context_vector_db_config.set(vector_db_config)
|
||||||
|
if graph_db_config:
|
||||||
|
context_graph_db_config.set(graph_db_config)
|
||||||
|
|
||||||
# Create tables for databases
|
# Create tables for databases
|
||||||
await create_relational_db_and_tables()
|
await create_relational_db_and_tables()
|
||||||
await create_pgvector_db_and_tables()
|
await create_pgvector_db_and_tables()
|
||||||
|
|
@ -54,49 +72,35 @@ async def cognee_pipeline(
|
||||||
if user is None:
|
if user is None:
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
|
|
||||||
# Convert datasets to list in case it's a string
|
# Convert datasets to list
|
||||||
if isinstance(datasets, str):
|
if isinstance(datasets, str) or isinstance(datasets, UUID):
|
||||||
datasets = [datasets]
|
datasets = [datasets]
|
||||||
|
|
||||||
# If no datasets are provided, work with all existing datasets.
|
# Get datasets user wants write permissions for (verify user has permissions if datasets are provided as well)
|
||||||
existing_datasets = await get_datasets(user.id)
|
# NOTE: If a user wants to write to a dataset he does not own it must be provided through UUID
|
||||||
|
existing_datasets = await get_authorized_existing_datasets(datasets, "write", user)
|
||||||
|
|
||||||
if not datasets:
|
if not datasets:
|
||||||
# Get datasets from database if none sent.
|
# Get datasets from database if none sent.
|
||||||
datasets = existing_datasets
|
datasets = existing_datasets
|
||||||
else:
|
else:
|
||||||
# If dataset is already in database, use it, otherwise create a new instance.
|
# If dataset matches an existing Dataset (by name or id), reuse it. Otherwise, create a new Dataset.
|
||||||
dataset_instances = []
|
datasets = await load_or_create_datasets(datasets, existing_datasets, user)
|
||||||
|
|
||||||
for dataset_name in datasets:
|
if not datasets:
|
||||||
is_dataset_found = False
|
raise DatasetNotFoundError("There are no datasets to work with.")
|
||||||
|
|
||||||
for existing_dataset in existing_datasets:
|
|
||||||
if (
|
|
||||||
existing_dataset.name == dataset_name
|
|
||||||
or str(existing_dataset.id) == dataset_name
|
|
||||||
):
|
|
||||||
dataset_instances.append(existing_dataset)
|
|
||||||
is_dataset_found = True
|
|
||||||
break
|
|
||||||
|
|
||||||
if not is_dataset_found:
|
|
||||||
dataset_instances.append(
|
|
||||||
Dataset(
|
|
||||||
id=await get_unique_dataset_id(dataset_name=dataset_name, user=user),
|
|
||||||
name=dataset_name,
|
|
||||||
owner_id=user.id,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
datasets = dataset_instances
|
|
||||||
|
|
||||||
awaitables = []
|
awaitables = []
|
||||||
|
|
||||||
for dataset in datasets:
|
for dataset in datasets:
|
||||||
awaitables.append(
|
awaitables.append(
|
||||||
run_pipeline(
|
run_pipeline(
|
||||||
dataset=dataset, user=user, tasks=tasks, data=data, pipeline_name=pipeline_name
|
dataset=dataset,
|
||||||
|
user=user,
|
||||||
|
tasks=tasks,
|
||||||
|
data=data,
|
||||||
|
pipeline_name=pipeline_name,
|
||||||
|
context={"dataset": dataset},
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -109,9 +113,13 @@ async def run_pipeline(
|
||||||
tasks: list[Task],
|
tasks: list[Task],
|
||||||
data=None,
|
data=None,
|
||||||
pipeline_name: str = "custom_pipeline",
|
pipeline_name: str = "custom_pipeline",
|
||||||
|
context: dict = None,
|
||||||
):
|
):
|
||||||
check_dataset_name(dataset.name)
|
check_dataset_name(dataset.name)
|
||||||
|
|
||||||
|
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
|
||||||
|
await set_database_global_context_variables(dataset.name, user.id)
|
||||||
|
|
||||||
# Ugly hack, but no easier way to do this.
|
# Ugly hack, but no easier way to do this.
|
||||||
if pipeline_name == "add_pipeline":
|
if pipeline_name == "add_pipeline":
|
||||||
# Refresh the add pipeline status so data is added to a dataset.
|
# Refresh the add pipeline status so data is added to a dataset.
|
||||||
|
|
@ -160,15 +168,10 @@ async def run_pipeline(
|
||||||
if not isinstance(task, Task):
|
if not isinstance(task, Task):
|
||||||
raise ValueError(f"Task {task} is not an instance of Task")
|
raise ValueError(f"Task {task} is not an instance of Task")
|
||||||
|
|
||||||
pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name)
|
pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name, context=context)
|
||||||
pipeline_run_status = None
|
pipeline_run_status = None
|
||||||
|
|
||||||
async for run_status in pipeline_run:
|
async for run_status in pipeline_run:
|
||||||
pipeline_run_status = run_status
|
pipeline_run_status = run_status
|
||||||
|
|
||||||
return pipeline_run_status
|
return pipeline_run_status
|
||||||
|
|
||||||
|
|
||||||
def check_dataset_name(dataset_name: str) -> str:
|
|
||||||
if "." in dataset_name or " " in dataset_name:
|
|
||||||
raise ValueError("Dataset name cannot contain spaces or underscores")
|
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,11 @@
|
||||||
|
import os
|
||||||
import json
|
import json
|
||||||
from typing import Callable, Optional, List, Type
|
import asyncio
|
||||||
|
from uuid import UUID
|
||||||
|
from typing import Callable, List, Optional, Type, Union
|
||||||
|
|
||||||
|
from cognee.context_global_variables import set_database_global_context_variables
|
||||||
from cognee.exceptions import InvalidValueError
|
from cognee.exceptions import InvalidValueError
|
||||||
from cognee.infrastructure.engine.utils import parse_id
|
|
||||||
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
|
||||||
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
|
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
|
||||||
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
|
||||||
|
|
@ -21,24 +24,45 @@ from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageR
|
||||||
from cognee.modules.search.types import SearchType
|
from cognee.modules.search.types import SearchType
|
||||||
from cognee.modules.storage.utils import JSONEncoder
|
from cognee.modules.storage.utils import JSONEncoder
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.users.permissions.methods import get_document_ids_for_user
|
from cognee.modules.data.models import Dataset
|
||||||
from cognee.shared.utils import send_telemetry
|
from cognee.shared.utils import send_telemetry
|
||||||
|
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
||||||
from cognee.modules.search.operations import log_query, log_result
|
from cognee.modules.search.operations import log_query, log_result
|
||||||
|
|
||||||
|
|
||||||
async def search(
|
async def search(
|
||||||
query_text: str,
|
query_text: str,
|
||||||
query_type: SearchType,
|
query_type: SearchType,
|
||||||
datasets: list[str],
|
dataset_ids: Union[list[UUID], None],
|
||||||
user: User,
|
user: User,
|
||||||
system_prompt_path="answer_simple_question.txt",
|
system_prompt_path="answer_simple_question.txt",
|
||||||
top_k: int = 10,
|
top_k: int = 10,
|
||||||
node_type: Optional[Type] = None,
|
node_type: Optional[Type] = None,
|
||||||
node_name: Optional[List[str]] = None,
|
node_name: Optional[List[str]] = None,
|
||||||
):
|
):
|
||||||
|
"""
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query_text:
|
||||||
|
query_type:
|
||||||
|
datasets:
|
||||||
|
user:
|
||||||
|
system_prompt_path:
|
||||||
|
top_k:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
|
||||||
|
"""
|
||||||
|
# Use search function filtered by permissions if access control is enabled
|
||||||
|
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
|
||||||
|
return await permissions_search(
|
||||||
|
query_text, query_type, user, dataset_ids, system_prompt_path, top_k
|
||||||
|
)
|
||||||
|
|
||||||
query = await log_query(query_text, query_type.value, user.id)
|
query = await log_query(query_text, query_type.value, user.id)
|
||||||
|
|
||||||
own_document_ids = await get_document_ids_for_user(user.id, datasets)
|
|
||||||
search_results = await specific_search(
|
search_results = await specific_search(
|
||||||
query_type,
|
query_type,
|
||||||
query_text,
|
query_text,
|
||||||
|
|
@ -49,18 +73,9 @@ async def search(
|
||||||
node_name=node_name,
|
node_name=node_name,
|
||||||
)
|
)
|
||||||
|
|
||||||
filtered_search_results = []
|
await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
|
||||||
|
|
||||||
for search_result in search_results:
|
return search_results
|
||||||
document_id = search_result["document_id"] if "document_id" in search_result else None
|
|
||||||
document_id = parse_id(document_id)
|
|
||||||
|
|
||||||
if document_id is None or document_id in own_document_ids:
|
|
||||||
filtered_search_results.append(search_result)
|
|
||||||
|
|
||||||
await log_result(query.id, json.dumps(filtered_search_results, cls=JSONEncoder), user.id)
|
|
||||||
|
|
||||||
return filtered_search_results
|
|
||||||
|
|
||||||
|
|
||||||
async def specific_search(
|
async def specific_search(
|
||||||
|
|
@ -120,3 +135,62 @@ async def specific_search(
|
||||||
send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
|
send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
|
||||||
|
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
async def permissions_search(
|
||||||
|
query_text: str,
|
||||||
|
query_type: SearchType,
|
||||||
|
user: User = None,
|
||||||
|
dataset_ids: Optional[list[UUID]] = None,
|
||||||
|
system_prompt_path: str = "answer_simple_question.txt",
|
||||||
|
top_k: int = 10,
|
||||||
|
) -> list:
|
||||||
|
"""
|
||||||
|
Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
|
||||||
|
Not to be used outside of active access control mode.
|
||||||
|
"""
|
||||||
|
|
||||||
|
query = await log_query(query_text, query_type.value, user.id)
|
||||||
|
|
||||||
|
# Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
|
||||||
|
search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
|
||||||
|
|
||||||
|
# Searches all provided datasets and handles setting up of appropriate database context based on permissions
|
||||||
|
search_results = await specific_search_by_context(
|
||||||
|
search_datasets, query_text, query_type, user, system_prompt_path, top_k
|
||||||
|
)
|
||||||
|
|
||||||
|
await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
|
||||||
|
|
||||||
|
return search_results
|
||||||
|
|
||||||
|
|
||||||
|
async def specific_search_by_context(
|
||||||
|
search_datasets: list[Dataset],
|
||||||
|
query_text: str,
|
||||||
|
query_type: SearchType,
|
||||||
|
user: User,
|
||||||
|
system_prompt_path: str,
|
||||||
|
top_k: int,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Searches all provided datasets and handles setting up of appropriate database context based on permissions.
|
||||||
|
Not to be used outside of active access control mode.
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def _search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k):
|
||||||
|
# Set database configuration in async context for each dataset user has access for
|
||||||
|
await set_database_global_context_variables(dataset.id, dataset.owner_id)
|
||||||
|
search_results = await specific_search(
|
||||||
|
query_type, query_text, user, system_prompt_path=system_prompt_path, top_k=top_k
|
||||||
|
)
|
||||||
|
return {dataset.name: search_results}
|
||||||
|
|
||||||
|
# Search every dataset async based on query and appropriate database configuration
|
||||||
|
tasks = []
|
||||||
|
for dataset in search_datasets:
|
||||||
|
tasks.append(
|
||||||
|
_search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k)
|
||||||
|
)
|
||||||
|
|
||||||
|
return await asyncio.gather(*tasks)
|
||||||
|
|
|
||||||
|
|
@ -19,17 +19,14 @@ class CustomJWTStrategy(JWTStrategy):
|
||||||
# JoinLoad tenant and role information to user object
|
# JoinLoad tenant and role information to user object
|
||||||
user = await get_user(user.id)
|
user = await get_user(user.id)
|
||||||
|
|
||||||
if user.tenant:
|
data = {"user_id": str(user.id)}
|
||||||
data = {"user_id": str(user.id), "tenant_id": str(user.tenant.id), "roles": user.roles}
|
|
||||||
else:
|
|
||||||
# The default tenant is None
|
|
||||||
data = {"user_id": str(user.id), "tenant_id": None, "roles": user.roles}
|
|
||||||
return generate_jwt(data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm)
|
return generate_jwt(data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm)
|
||||||
|
|
||||||
|
|
||||||
@lru_cache
|
@lru_cache
|
||||||
def get_auth_backend():
|
def get_auth_backend():
|
||||||
bearer_transport = BearerTransport(tokenUrl="auth/jwt/login")
|
bearer_transport = BearerTransport(tokenUrl="api/v1/auth/login")
|
||||||
|
|
||||||
def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]:
|
def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]:
|
||||||
secret = os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret")
|
secret = os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret")
|
||||||
|
|
|
||||||
|
|
@ -9,4 +9,5 @@ from .exceptions import (
|
||||||
UserNotFoundError,
|
UserNotFoundError,
|
||||||
PermissionDeniedError,
|
PermissionDeniedError,
|
||||||
TenantNotFoundError,
|
TenantNotFoundError,
|
||||||
|
PermissionNotFoundError,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -46,3 +46,13 @@ class PermissionDeniedError(CogneeApiError):
|
||||||
status_code=status.HTTP_403_FORBIDDEN,
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
):
|
):
|
||||||
super().__init__(message, name, status_code)
|
super().__init__(message, name, status_code)
|
||||||
|
|
||||||
|
|
||||||
|
class PermissionNotFoundError(CogneeApiError):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
message: str = "Permission type does not exist.",
|
||||||
|
name: str = "PermissionNotFoundError",
|
||||||
|
status_code=status.HTTP_403_FORBIDDEN,
|
||||||
|
):
|
||||||
|
super().__init__(message, name, status_code)
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,8 @@
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
|
|
||||||
from ..get_fastapi_users import get_fastapi_users
|
from ..get_fastapi_users import get_fastapi_users
|
||||||
from fastapi import HTTPException, Header
|
from fastapi import HTTPException, Security
|
||||||
|
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
||||||
import os
|
import os
|
||||||
import jwt
|
import jwt
|
||||||
|
|
||||||
|
|
@ -9,28 +10,29 @@ from uuid import UUID
|
||||||
|
|
||||||
fastapi_users = get_fastapi_users()
|
fastapi_users = get_fastapi_users()
|
||||||
|
|
||||||
|
# Allows Swagger to understand authorization type and allow single sign on for the Swagger docs to test backend
|
||||||
|
bearer_scheme = HTTPBearer(scheme_name="BearerAuth", description="Paste **Bearer <JWT>**")
|
||||||
|
|
||||||
async def get_authenticated_user(authorization: str = Header(...)) -> SimpleNamespace:
|
|
||||||
"""Extract and validate JWT from Authorization header."""
|
async def get_authenticated_user(
|
||||||
|
creds: HTTPAuthorizationCredentials = Security(bearer_scheme),
|
||||||
|
) -> SimpleNamespace:
|
||||||
|
"""
|
||||||
|
Extract and validate the JWT presented in the Authorization header.
|
||||||
|
"""
|
||||||
|
if creds is None: # header missing
|
||||||
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
||||||
|
|
||||||
|
if creds.scheme.lower() != "bearer": # shouldn't happen extra guard
|
||||||
|
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
|
||||||
|
|
||||||
|
token = creds.credentials
|
||||||
try:
|
try:
|
||||||
scheme, token = authorization.split()
|
|
||||||
if scheme.lower() != "bearer":
|
|
||||||
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
|
|
||||||
|
|
||||||
payload = jwt.decode(
|
payload = jwt.decode(
|
||||||
token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"]
|
token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"]
|
||||||
)
|
)
|
||||||
|
|
||||||
if payload.get("tenant_id"):
|
auth_data = SimpleNamespace(id=UUID(payload["user_id"]))
|
||||||
# SimpleNamespace lets us access dictionary elements like attributes
|
|
||||||
auth_data = SimpleNamespace(
|
|
||||||
id=UUID(payload["user_id"]),
|
|
||||||
tenant_id=UUID(payload["tenant_id"]),
|
|
||||||
roles=payload["roles"],
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
auth_data = SimpleNamespace(id=UUID(payload["user_id"]), tenant_id=None, roles=[])
|
|
||||||
|
|
||||||
return auth_data
|
return auth_data
|
||||||
|
|
||||||
except jwt.ExpiredSignatureError:
|
except jwt.ExpiredSignatureError:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
from sqlalchemy.orm import selectinload
|
from sqlalchemy.orm import selectinload
|
||||||
|
from sqlalchemy.exc import NoResultFound
|
||||||
from sqlalchemy.future import select
|
from sqlalchemy.future import select
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.base_config import get_base_config
|
from cognee.base_config import get_base_config
|
||||||
|
|
@ -33,5 +34,6 @@ async def get_default_user() -> SimpleNamespace:
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
if "principals" in str(error.args):
|
if "principals" in str(error.args):
|
||||||
raise DatabaseNotCreatedError() from error
|
raise DatabaseNotCreatedError() from error
|
||||||
|
if isinstance(error, NoResultFound):
|
||||||
raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error
|
raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error
|
||||||
|
raise
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from sqlalchemy.orm import joinedload
|
from sqlalchemy.orm import selectinload
|
||||||
|
import sqlalchemy.exc
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.infrastructure.databases.exceptions import EntityNotFoundError
|
||||||
from ..models import User
|
from ..models import User
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -12,9 +14,12 @@ async def get_user(user_id: UUID):
|
||||||
user = (
|
user = (
|
||||||
await session.execute(
|
await session.execute(
|
||||||
select(User)
|
select(User)
|
||||||
.options(joinedload(User.roles), joinedload(User.tenant))
|
.options(selectinload(User.roles), selectinload(User.tenant))
|
||||||
.where(User.id == user_id)
|
.where(User.id == user_id)
|
||||||
)
|
)
|
||||||
).scalar()
|
).scalar()
|
||||||
|
|
||||||
|
if not user:
|
||||||
|
raise EntityNotFoundError(message=f"Could not find user: {user_id}")
|
||||||
|
|
||||||
return user
|
return user
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,8 @@ class ACL(Base):
|
||||||
|
|
||||||
principal_id = Column(UUID, ForeignKey("principals.id"))
|
principal_id = Column(UUID, ForeignKey("principals.id"))
|
||||||
permission_id = Column(UUID, ForeignKey("permissions.id"))
|
permission_id = Column(UUID, ForeignKey("permissions.id"))
|
||||||
data_id = Column(UUID, ForeignKey("data.id", ondelete="CASCADE"))
|
dataset_id = Column(UUID, ForeignKey("datasets.id", ondelete="CASCADE"))
|
||||||
|
|
||||||
principal = relationship("Principal")
|
principal = relationship("Principal")
|
||||||
permission = relationship("Permission")
|
permission = relationship("Permission")
|
||||||
data = relationship("Data", back_populates="acls")
|
dataset = relationship("Dataset", back_populates="acls")
|
||||||
|
|
|
||||||
19
cognee/modules/users/models/DatasetDatabase.py
Normal file
19
cognee/modules/users/models/DatasetDatabase.py
Normal file
|
|
@ -0,0 +1,19 @@
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from sqlalchemy import Column, DateTime, String, UUID, ForeignKey
|
||||||
|
from cognee.infrastructure.databases.relational import Base
|
||||||
|
|
||||||
|
|
||||||
|
class DatasetDatabase(Base):
|
||||||
|
__tablename__ = "dataset_database"
|
||||||
|
|
||||||
|
owner_id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), index=True)
|
||||||
|
dataset_id = Column(
|
||||||
|
UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
|
||||||
|
)
|
||||||
|
|
||||||
|
vector_database_name = Column(String, unique=True, nullable=False)
|
||||||
|
graph_database_name = Column(String, unique=True, nullable=False)
|
||||||
|
|
||||||
|
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
|
||||||
|
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))
|
||||||
|
|
@ -11,6 +11,8 @@ class Tenant(Principal):
|
||||||
id = Column(UUID, ForeignKey("principals.id"), primary_key=True)
|
id = Column(UUID, ForeignKey("principals.id"), primary_key=True)
|
||||||
name = Column(String, unique=True, nullable=False, index=True)
|
name = Column(String, unique=True, nullable=False, index=True)
|
||||||
|
|
||||||
|
owner_id = Column(UUID, index=True)
|
||||||
|
|
||||||
# One-to-Many relationship with User; specify the join via User.tenant_id
|
# One-to-Many relationship with User; specify the join via User.tenant_id
|
||||||
users = relationship(
|
users = relationship(
|
||||||
"User",
|
"User",
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from .User import User
|
from .User import User
|
||||||
from .Role import Role
|
from .Role import Role
|
||||||
from .UserRole import UserRole
|
from .UserRole import UserRole
|
||||||
|
from .DatasetDatabase import DatasetDatabase
|
||||||
from .RoleDefaultPermissions import RoleDefaultPermissions
|
from .RoleDefaultPermissions import RoleDefaultPermissions
|
||||||
from .UserDefaultPermissions import UserDefaultPermissions
|
from .UserDefaultPermissions import UserDefaultPermissions
|
||||||
from .TenantDefaultPermissions import TenantDefaultPermissions
|
from .TenantDefaultPermissions import TenantDefaultPermissions
|
||||||
|
|
|
||||||
1
cognee/modules/users/permissions/__init__.py
Normal file
1
cognee/modules/users/permissions/__init__.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
from .permission_types import PERMISSION_TYPES
|
||||||
|
|
@ -1,6 +1,13 @@
|
||||||
from .check_permission_on_documents import check_permission_on_documents
|
from .get_role import get_role
|
||||||
from .give_permission_on_document import give_permission_on_document
|
from .get_tenant import get_tenant
|
||||||
|
from .get_principal import get_principal
|
||||||
|
from .get_principal_datasets import get_principal_datasets
|
||||||
|
from .get_all_user_permission_datasets import get_all_user_permission_datasets
|
||||||
|
from .get_specific_user_permission_datasets import get_specific_user_permission_datasets
|
||||||
|
from .check_permission_on_dataset import check_permission_on_dataset
|
||||||
|
from .give_permission_on_dataset import give_permission_on_dataset
|
||||||
from .get_document_ids_for_user import get_document_ids_for_user
|
from .get_document_ids_for_user import get_document_ids_for_user
|
||||||
|
from .authorized_give_permission_on_datasets import authorized_give_permission_on_datasets
|
||||||
from .give_default_permission_to_tenant import give_default_permission_to_tenant
|
from .give_default_permission_to_tenant import give_default_permission_to_tenant
|
||||||
from .give_default_permission_to_role import give_default_permission_to_role
|
from .give_default_permission_to_role import give_default_permission_to_role
|
||||||
from .give_default_permission_to_user import give_default_permission_to_user
|
from .give_default_permission_to_user import give_default_permission_to_user
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
from typing import Union, List
|
||||||
|
|
||||||
|
from cognee.modules.users.permissions.methods import get_principal
|
||||||
|
from cognee.modules.users.permissions.methods import give_permission_on_dataset
|
||||||
|
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
|
||||||
|
async def authorized_give_permission_on_datasets(
|
||||||
|
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
|
||||||
|
):
|
||||||
|
# If only a single dataset UUID is provided transform it to a list
|
||||||
|
if not isinstance(dataset_ids, list):
|
||||||
|
dataset_ids = [dataset_ids]
|
||||||
|
|
||||||
|
principal = await get_principal(principal_id)
|
||||||
|
|
||||||
|
# Check if request owner has permission to share dataset access
|
||||||
|
datasets = await get_specific_user_permission_datasets(owner_id, "share", dataset_ids)
|
||||||
|
|
||||||
|
# TODO: Do we want to enforce sharing of datasets to only be between users of the same tenant?
|
||||||
|
for dataset in datasets:
|
||||||
|
await give_permission_on_dataset(principal, dataset.id, permission_name)
|
||||||
|
|
@ -13,29 +13,29 @@ from ...models.ACL import ACL
|
||||||
logger = get_logger()
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
async def check_permission_on_documents(user: User, permission_type: str, document_ids: list[UUID]):
|
async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
|
||||||
if user is None:
|
if user is None:
|
||||||
user = await get_default_user()
|
user = await get_default_user()
|
||||||
|
|
||||||
# TODO: Enable user role permissions again. Temporarily disabled during rework.
|
# # TODO: Enable user role permissions again. Temporarily disabled during rework.
|
||||||
# user_roles_ids = [role.id for role in user.roles]
|
# user_roles_ids = [role.id for role in user.roles]
|
||||||
user_roles_ids = []
|
user_roles_ids = []
|
||||||
|
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
|
# If dataset id was returned it means the user has permission to access it
|
||||||
result = await session.execute(
|
result = await session.execute(
|
||||||
select(ACL)
|
select(ACL)
|
||||||
.join(ACL.permission)
|
.join(ACL.permission)
|
||||||
.options(joinedload(ACL.data))
|
.options(joinedload(ACL.dataset))
|
||||||
.where(ACL.principal_id.in_([user.id, *user_roles_ids]))
|
.where(ACL.principal_id.in_([user.id, *user_roles_ids]))
|
||||||
.where(ACL.permission.has(name=permission_type))
|
.where(ACL.permission.has(name=permission_type))
|
||||||
)
|
)
|
||||||
acls = result.unique().scalars().all()
|
acls = result.unique().scalars().all()
|
||||||
data_ids = [acl.data.id for acl in acls]
|
has_permission = dataset_id in [acl.dataset.id for acl in acls]
|
||||||
has_permissions = all(document_id in data_ids for document_id in document_ids)
|
|
||||||
|
|
||||||
if not has_permissions:
|
if not has_permission:
|
||||||
raise PermissionDeniedError(
|
raise PermissionDeniedError(
|
||||||
message=f"User {user.id} does not have {permission_type} permission on documents"
|
message=f"User {user.id} does not have {permission_type} permission on documents"
|
||||||
)
|
)
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
|
||||||
|
from ...models.User import User
|
||||||
|
from cognee.modules.data.models.Dataset import Dataset
|
||||||
|
from cognee.modules.users.permissions.methods import get_principal_datasets
|
||||||
|
from cognee.modules.users.permissions.methods import get_role, get_tenant
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
|
||||||
|
datasets = list()
|
||||||
|
# Get all datasets User has explicit access to
|
||||||
|
datasets.extend(await get_principal_datasets(user, permission_type))
|
||||||
|
|
||||||
|
if user.tenant_id:
|
||||||
|
# Get all datasets all tenants have access to
|
||||||
|
tenant = await get_tenant(user.tenant_id)
|
||||||
|
datasets.extend(await get_principal_datasets(tenant, permission_type))
|
||||||
|
# Get all datasets Users roles have access to
|
||||||
|
for role_name in user.roles:
|
||||||
|
role = await get_role(user.tenant_id, role_name)
|
||||||
|
datasets.extend(await get_principal_datasets(role, permission_type))
|
||||||
|
|
||||||
|
# Deduplicate datasets with same ID
|
||||||
|
unique = {}
|
||||||
|
for dataset in datasets:
|
||||||
|
# If the dataset id key already exists, leave the dictionary unchanged.
|
||||||
|
unique.setdefault(dataset.id, dataset)
|
||||||
|
|
||||||
|
return list(unique.values())
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
from uuid import UUID
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.modules.data.methods import get_dataset_data
|
||||||
from sqlalchemy import select
|
from sqlalchemy import select
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.modules.data.models import Dataset, DatasetData, Data
|
from cognee.modules.data.models import Dataset, DatasetData
|
||||||
from ...models import ACL, Permission
|
from ...models import ACL, Permission
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -10,10 +12,10 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
async with session.begin():
|
async with session.begin():
|
||||||
document_ids = (
|
dataset_ids = (
|
||||||
await session.scalars(
|
await session.scalars(
|
||||||
select(Data.id)
|
select(Dataset.id)
|
||||||
.join(ACL.data)
|
.join(ACL.dataset)
|
||||||
.join(ACL.permission)
|
.join(ACL.permission)
|
||||||
.where(
|
.where(
|
||||||
ACL.principal_id == user_id,
|
ACL.principal_id == user_id,
|
||||||
|
|
@ -22,9 +24,15 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -
|
||||||
)
|
)
|
||||||
).all()
|
).all()
|
||||||
|
|
||||||
|
# Get documents from datasets user has read access for
|
||||||
|
document_ids = []
|
||||||
|
for dataset_id in dataset_ids:
|
||||||
|
data_list = await get_dataset_data(dataset_id)
|
||||||
|
document_ids.extend([data.id for data in data_list])
|
||||||
|
|
||||||
if datasets:
|
if datasets:
|
||||||
documents_ids_in_dataset = set()
|
|
||||||
# If datasets are specified filter out documents that aren't part of the specified datasets
|
# If datasets are specified filter out documents that aren't part of the specified datasets
|
||||||
|
documents_ids_in_dataset = set()
|
||||||
for dataset in datasets:
|
for dataset in datasets:
|
||||||
# Find dataset id for dataset element
|
# Find dataset id for dataset element
|
||||||
dataset_id = (
|
dataset_id = (
|
||||||
|
|
|
||||||
14
cognee/modules/users/permissions/methods/get_principal.py
Normal file
14
cognee/modules/users/permissions/methods/get_principal.py
Normal file
|
|
@ -0,0 +1,14 @@
|
||||||
|
from sqlalchemy import select
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from ...models.Principal import Principal
|
||||||
|
|
||||||
|
|
||||||
|
async def get_principal(principal_id: UUID):
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
result = await session.execute(select(Principal).where(Principal.id == principal_id))
|
||||||
|
principal = result.unique().scalar_one()
|
||||||
|
return principal
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
from sqlalchemy import select
|
||||||
|
from sqlalchemy.orm import joinedload
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
||||||
|
from ...models.Principal import Principal
|
||||||
|
from cognee.modules.data.models.Dataset import Dataset
|
||||||
|
from ...models.ACL import ACL
|
||||||
|
|
||||||
|
|
||||||
|
async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
# If dataset id was returned it means the principal has permission to access it
|
||||||
|
result = await session.execute(
|
||||||
|
select(ACL)
|
||||||
|
.join(ACL.permission)
|
||||||
|
.options(joinedload(ACL.dataset))
|
||||||
|
.where(ACL.principal_id == principal.id)
|
||||||
|
.where(ACL.permission.has(name=permission_type))
|
||||||
|
)
|
||||||
|
acls = result.unique().scalars().all()
|
||||||
|
return [acl.dataset for acl in acls]
|
||||||
24
cognee/modules/users/permissions/methods/get_role.py
Normal file
24
cognee/modules/users/permissions/methods/get_role.py
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
import sqlalchemy.exc
|
||||||
|
from sqlalchemy import select
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.users.exceptions import RoleNotFoundError
|
||||||
|
|
||||||
|
from ...models.Role import Role
|
||||||
|
|
||||||
|
|
||||||
|
async def get_role(tenant_id: UUID, role_name: str):
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
try:
|
||||||
|
result = await session.execute(
|
||||||
|
select(Role).where(Role.name == role_name).where(Role.tenant_id == tenant_id)
|
||||||
|
)
|
||||||
|
role = result.unique().scalar_one()
|
||||||
|
if not role:
|
||||||
|
raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")
|
||||||
|
return role
|
||||||
|
except sqlalchemy.exc.NoResultFound:
|
||||||
|
raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
from uuid import UUID
|
||||||
|
from cognee.modules.data.models.Dataset import Dataset
|
||||||
|
from cognee.modules.users.permissions.methods.get_all_user_permission_datasets import (
|
||||||
|
get_all_user_permission_datasets,
|
||||||
|
)
|
||||||
|
from cognee.modules.users.exceptions import PermissionDeniedError
|
||||||
|
from cognee.modules.users.methods import get_user
|
||||||
|
|
||||||
|
|
||||||
|
async def get_specific_user_permission_datasets(
|
||||||
|
user_id: UUID, permission_type: str, dataset_ids: list[UUID] = None
|
||||||
|
) -> list[Dataset]:
|
||||||
|
"""
|
||||||
|
Return a list of datasets user has given permission for. If a list of datasets is provided,
|
||||||
|
verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
|
||||||
|
Args:
|
||||||
|
user_id:
|
||||||
|
permission_type:
|
||||||
|
dataset_ids:
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
list[Dataset]: List of datasets user has permission for
|
||||||
|
"""
|
||||||
|
user = await get_user(user_id)
|
||||||
|
# Find all datasets user has permission for
|
||||||
|
user_permission_access_datasets = await get_all_user_permission_datasets(user, permission_type)
|
||||||
|
|
||||||
|
# if specific datasets are provided filter out non provided datasets
|
||||||
|
if dataset_ids:
|
||||||
|
search_datasets = [
|
||||||
|
dataset for dataset in user_permission_access_datasets if dataset.id in dataset_ids
|
||||||
|
]
|
||||||
|
# If there are requested datasets that user does not have access to raise error
|
||||||
|
if len(search_datasets) != len(dataset_ids):
|
||||||
|
raise PermissionDeniedError(
|
||||||
|
f"Request owner does not have necessary permission: [{permission_type}] for all datasets requested."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
search_datasets = user_permission_access_datasets
|
||||||
|
|
||||||
|
if len(search_datasets) == 0:
|
||||||
|
raise PermissionDeniedError(
|
||||||
|
f"Request owner does not have permission: [{permission_type}] for any dataset."
|
||||||
|
)
|
||||||
|
|
||||||
|
return search_datasets
|
||||||
21
cognee/modules/users/permissions/methods/get_tenant.py
Normal file
21
cognee/modules/users/permissions/methods/get_tenant.py
Normal file
|
|
@ -0,0 +1,21 @@
|
||||||
|
import sqlalchemy.exc
|
||||||
|
from sqlalchemy import select
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.users.exceptions import TenantNotFoundError
|
||||||
|
from ...models.Tenant import Tenant
|
||||||
|
|
||||||
|
|
||||||
|
async def get_tenant(tenant_id: UUID):
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
try:
|
||||||
|
result = await session.execute(select(Tenant).where(Tenant.id == tenant_id))
|
||||||
|
tenant = result.unique().scalar_one()
|
||||||
|
if not tenant:
|
||||||
|
raise TenantNotFoundError
|
||||||
|
return tenant
|
||||||
|
except sqlalchemy.exc.NoResultFound:
|
||||||
|
raise TenantNotFoundError(message=f"Could not find tenant: {tenant_id}")
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
from sqlalchemy.future import select
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from ...models import Principal, ACL, Permission
|
||||||
|
from uuid import UUID
|
||||||
|
from cognee.modules.users.permissions import PERMISSION_TYPES
|
||||||
|
from cognee.modules.users.exceptions import PermissionNotFoundError
|
||||||
|
|
||||||
|
|
||||||
|
async def give_permission_on_dataset(
|
||||||
|
principal: Principal,
|
||||||
|
dataset_id: UUID,
|
||||||
|
permission_name: str,
|
||||||
|
):
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
permission = (
|
||||||
|
(await session.execute(select(Permission).filter(Permission.name == permission_name)))
|
||||||
|
.scalars()
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
|
if permission_name not in PERMISSION_TYPES:
|
||||||
|
# If permission is not in allowed permission types
|
||||||
|
raise PermissionNotFoundError(
|
||||||
|
message=f"{permission_name} not found or not in allowed permission types"
|
||||||
|
)
|
||||||
|
elif permission is None:
|
||||||
|
permission = Permission(name=permission_name)
|
||||||
|
existing_acl = None
|
||||||
|
else:
|
||||||
|
# Check if the ACL entry already exists to avoid duplicates
|
||||||
|
existing_acl = await session.execute(
|
||||||
|
select(ACL).filter(
|
||||||
|
ACL.principal_id == principal.id,
|
||||||
|
ACL.dataset_id == dataset_id,
|
||||||
|
ACL.permission_id == permission.id,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
existing_acl = existing_acl.scalars().first()
|
||||||
|
|
||||||
|
# If no existing ACL entry is found, proceed to add a new one
|
||||||
|
if existing_acl is None:
|
||||||
|
acl = ACL(principal_id=principal.id, dataset_id=dataset_id, permission=permission)
|
||||||
|
session.add(acl)
|
||||||
|
await session.commit()
|
||||||
|
|
@ -1,27 +0,0 @@
|
||||||
from sqlalchemy.future import select
|
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
|
||||||
from ...models import User, ACL, Permission
|
|
||||||
|
|
||||||
|
|
||||||
async def give_permission_on_document(
|
|
||||||
user: User,
|
|
||||||
document_id: str,
|
|
||||||
permission_name: str,
|
|
||||||
):
|
|
||||||
db_engine = get_relational_engine()
|
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
|
||||||
permission = (
|
|
||||||
(await session.execute(select(Permission).filter(Permission.name == permission_name)))
|
|
||||||
.scalars()
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
|
|
||||||
if permission is None:
|
|
||||||
permission = Permission(name=permission_name)
|
|
||||||
|
|
||||||
acl = ACL(principal_id=user.id, data_id=document_id, permission=permission)
|
|
||||||
|
|
||||||
session.add(acl)
|
|
||||||
|
|
||||||
await session.commit()
|
|
||||||
1
cognee/modules/users/permissions/permission_types.py
Normal file
1
cognee/modules/users/permissions/permission_types.py
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
PERMISSION_TYPES = ["read", "write", "delete", "share"]
|
||||||
|
|
@ -9,24 +9,40 @@ from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.modules.users.exceptions import (
|
from cognee.modules.users.exceptions import (
|
||||||
UserNotFoundError,
|
UserNotFoundError,
|
||||||
RoleNotFoundError,
|
RoleNotFoundError,
|
||||||
|
TenantNotFoundError,
|
||||||
|
PermissionDeniedError,
|
||||||
)
|
)
|
||||||
from cognee.modules.users.models import (
|
from cognee.modules.users.models import (
|
||||||
User,
|
User,
|
||||||
Role,
|
Role,
|
||||||
|
Tenant,
|
||||||
UserRole,
|
UserRole,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def add_user_to_role(user_id: UUID, role_id: UUID):
|
async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
|
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
|
||||||
role = (await session.execute(select(Role).where(Role.id == role_id))).scalars().first()
|
role = (await session.execute(select(Role).where(Role.id == role_id))).scalars().first()
|
||||||
|
tenant = (
|
||||||
|
(await session.execute(select(Tenant).where(Tenant.id == role.tenant_id)))
|
||||||
|
.scalars()
|
||||||
|
.first()
|
||||||
|
)
|
||||||
|
|
||||||
if not user:
|
if not user:
|
||||||
raise UserNotFoundError
|
raise UserNotFoundError
|
||||||
elif not role:
|
elif not role:
|
||||||
raise RoleNotFoundError
|
raise RoleNotFoundError
|
||||||
|
elif user.tenant_id != role.tenant_id:
|
||||||
|
raise TenantNotFoundError(
|
||||||
|
message="User tenant does not match role tenant. User cannot be added to role."
|
||||||
|
)
|
||||||
|
elif tenant.owner_id != owner_id:
|
||||||
|
raise PermissionDeniedError(
|
||||||
|
message="User submitting request does not have permission to add user to role."
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Add association directly to the association table
|
# Add association directly to the association table
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,9 @@ from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.users.methods import get_user
|
||||||
|
from cognee.modules.users.permissions.methods import get_tenant
|
||||||
|
from cognee.modules.users.exceptions import PermissionDeniedError
|
||||||
from cognee.modules.users.models import (
|
from cognee.modules.users.models import (
|
||||||
Role,
|
Role,
|
||||||
)
|
)
|
||||||
|
|
@ -11,13 +14,21 @@ from cognee.modules.users.models import (
|
||||||
|
|
||||||
async def create_role(
|
async def create_role(
|
||||||
role_name: str,
|
role_name: str,
|
||||||
tenant_id: UUID,
|
owner_id: UUID,
|
||||||
):
|
):
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
|
user = await get_user(owner_id)
|
||||||
|
tenant = await get_tenant(user.tenant_id)
|
||||||
|
|
||||||
|
if owner_id != tenant.owner_id:
|
||||||
|
raise PermissionDeniedError(
|
||||||
|
"User submitting request does not have permission to create role for tenant."
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Add association directly to the association table
|
# Add association directly to the association table
|
||||||
role = Role(name=role_name, tenant_id=tenant_id)
|
role = Role(name=role_name, tenant_id=tenant.id)
|
||||||
session.add(role)
|
session.add(role)
|
||||||
except IntegrityError:
|
except IntegrityError:
|
||||||
raise EntityAlreadyExistsError(message="Role already exists for tenant.")
|
raise EntityAlreadyExistsError(message="Role already exists for tenant.")
|
||||||
|
|
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
from .create_tenant import create_tenant
|
from .create_tenant import create_tenant
|
||||||
|
from .add_user_to_tenant import add_user_to_tenant
|
||||||
|
|
|
||||||
44
cognee/modules/users/tenants/methods/add_user_to_tenant.py
Normal file
44
cognee/modules/users/tenants/methods/add_user_to_tenant.py
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
from uuid import UUID
|
||||||
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
|
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
||||||
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
from cognee.modules.users.methods import get_user
|
||||||
|
from cognee.modules.users.permissions.methods import get_tenant
|
||||||
|
from cognee.modules.users.exceptions import (
|
||||||
|
UserNotFoundError,
|
||||||
|
TenantNotFoundError,
|
||||||
|
PermissionDeniedError,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
|
||||||
|
db_engine = get_relational_engine()
|
||||||
|
async with db_engine.get_async_session() as session:
|
||||||
|
user = await get_user(user_id)
|
||||||
|
tenant = await get_tenant(tenant_id)
|
||||||
|
|
||||||
|
if not user:
|
||||||
|
raise UserNotFoundError
|
||||||
|
elif not tenant:
|
||||||
|
raise TenantNotFoundError
|
||||||
|
|
||||||
|
if tenant.owner_id != owner_id:
|
||||||
|
raise PermissionDeniedError(
|
||||||
|
message="Only tenant owner can add other users to organization."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if user.tenant_id is None:
|
||||||
|
user.tenant_id = tenant_id
|
||||||
|
elif user.tenant_id == tenant_id:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
raise IntegrityError
|
||||||
|
|
||||||
|
await session.merge(user)
|
||||||
|
await session.commit()
|
||||||
|
except IntegrityError:
|
||||||
|
raise EntityAlreadyExistsError(
|
||||||
|
message="User is already part of a tenant. Only one tenant can be assigned to user."
|
||||||
|
)
|
||||||
|
|
@ -1,19 +1,28 @@
|
||||||
|
from uuid import UUID
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
|
|
||||||
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
from cognee.modules.users.models import Tenant
|
from cognee.modules.users.models import Tenant
|
||||||
|
from cognee.modules.users.methods import get_user
|
||||||
|
|
||||||
|
|
||||||
async def create_tenant(tenant_name: str):
|
async def create_tenant(tenant_name: str, user_id: UUID):
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
try:
|
try:
|
||||||
# Add association directly to the association table
|
user = await get_user(user_id)
|
||||||
tenant = Tenant(name=tenant_name)
|
if user.tenant_id:
|
||||||
|
raise EntityAlreadyExistsError(
|
||||||
|
message="User already has a tenant. New tenant cannot be created."
|
||||||
|
)
|
||||||
|
|
||||||
|
tenant = Tenant(name=tenant_name, owner_id=user_id)
|
||||||
session.add(tenant)
|
session.add(tenant)
|
||||||
|
await session.flush()
|
||||||
|
|
||||||
|
user.tenant_id = tenant.id
|
||||||
|
await session.merge(user)
|
||||||
|
await session.commit()
|
||||||
except IntegrityError:
|
except IntegrityError:
|
||||||
raise EntityAlreadyExistsError(message="Tenant already exists.")
|
raise EntityAlreadyExistsError(message="Tenant already exists.")
|
||||||
|
|
||||||
await session.commit()
|
|
||||||
await session.refresh(tenant)
|
|
||||||
|
|
|
||||||
|
|
@ -2,4 +2,4 @@ from .translate_text import translate_text
|
||||||
from .detect_language import detect_language
|
from .detect_language import detect_language
|
||||||
from .classify_documents import classify_documents
|
from .classify_documents import classify_documents
|
||||||
from .extract_chunks_from_documents import extract_chunks_from_documents
|
from .extract_chunks_from_documents import extract_chunks_from_documents
|
||||||
from .check_permissions_on_documents import check_permissions_on_documents
|
from .check_permissions_on_dataset import check_permissions_on_dataset
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,10 @@
|
||||||
from cognee.modules.data.processing.document_types import Document
|
from cognee.modules.data.processing.document_types import Document
|
||||||
from cognee.modules.users.permissions.methods import check_permission_on_documents
|
from cognee.modules.users.permissions.methods import check_permission_on_dataset
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
|
||||||
async def check_permissions_on_documents(
|
async def check_permissions_on_dataset(
|
||||||
documents: list[Document], user, permissions
|
documents: List[Document], context: dict, user, permissions
|
||||||
) -> List[Document]:
|
) -> List[Document]:
|
||||||
"""
|
"""
|
||||||
Validates a user's permissions on a list of documents.
|
Validates a user's permissions on a list of documents.
|
||||||
|
|
@ -14,13 +14,12 @@ async def check_permissions_on_documents(
|
||||||
- It is designed to validate multiple permissions in a sequential manner for the same set of documents.
|
- It is designed to validate multiple permissions in a sequential manner for the same set of documents.
|
||||||
- Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
|
- Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
|
||||||
"""
|
"""
|
||||||
document_ids = [document.id for document in documents]
|
|
||||||
|
|
||||||
for permission in permissions:
|
for permission in permissions:
|
||||||
await check_permission_on_documents(
|
await check_permission_on_dataset(
|
||||||
user,
|
user,
|
||||||
permission,
|
permission,
|
||||||
document_ids,
|
context["dataset"].id,
|
||||||
)
|
)
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
|
|
@ -2,6 +2,7 @@ import dlt
|
||||||
import s3fs
|
import s3fs
|
||||||
import json
|
import json
|
||||||
import inspect
|
import inspect
|
||||||
|
from uuid import UUID
|
||||||
from typing import Union, BinaryIO, Any, List, Optional
|
from typing import Union, BinaryIO, Any, List, Optional
|
||||||
import cognee.modules.ingestion as ingestion
|
import cognee.modules.ingestion as ingestion
|
||||||
from cognee.infrastructure.databases.relational import get_relational_engine
|
from cognee.infrastructure.databases.relational import get_relational_engine
|
||||||
|
|
@ -9,7 +10,8 @@ from cognee.modules.data.methods import create_dataset, get_dataset_data, get_da
|
||||||
from cognee.modules.users.methods import get_default_user
|
from cognee.modules.users.methods import get_default_user
|
||||||
from cognee.modules.data.models.DatasetData import DatasetData
|
from cognee.modules.data.models.DatasetData import DatasetData
|
||||||
from cognee.modules.users.models import User
|
from cognee.modules.users.models import User
|
||||||
from cognee.modules.users.permissions.methods import give_permission_on_document
|
from cognee.modules.users.permissions.methods import give_permission_on_dataset
|
||||||
|
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
|
||||||
from .get_dlt_destination import get_dlt_destination
|
from .get_dlt_destination import get_dlt_destination
|
||||||
from .save_data_item_to_storage import save_data_item_to_storage
|
from .save_data_item_to_storage import save_data_item_to_storage
|
||||||
|
|
||||||
|
|
@ -18,7 +20,11 @@ from cognee.api.v1.add.config import get_s3_config
|
||||||
|
|
||||||
|
|
||||||
async def ingest_data(
|
async def ingest_data(
|
||||||
data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None
|
data: Any,
|
||||||
|
dataset_name: str,
|
||||||
|
user: User,
|
||||||
|
node_set: Optional[List[str]] = None,
|
||||||
|
dataset_id: UUID = None,
|
||||||
):
|
):
|
||||||
destination = get_dlt_destination()
|
destination = get_dlt_destination()
|
||||||
|
|
||||||
|
|
@ -73,7 +79,11 @@ async def ingest_data(
|
||||||
}
|
}
|
||||||
|
|
||||||
async def store_data_to_dataset(
|
async def store_data_to_dataset(
|
||||||
data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None
|
data: Any,
|
||||||
|
dataset_name: str,
|
||||||
|
user: User,
|
||||||
|
node_set: Optional[List[str]] = None,
|
||||||
|
dataset_id: UUID = None,
|
||||||
):
|
):
|
||||||
if not isinstance(data, list):
|
if not isinstance(data, list):
|
||||||
# Convert data to a list as we work with lists further down.
|
# Convert data to a list as we work with lists further down.
|
||||||
|
|
@ -104,7 +114,17 @@ async def ingest_data(
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
async with db_engine.get_async_session() as session:
|
async with db_engine.get_async_session() as session:
|
||||||
dataset = await create_dataset(dataset_name, user, session)
|
if dataset_id:
|
||||||
|
# Retrieve existing dataset
|
||||||
|
dataset = await get_specific_user_permission_datasets(
|
||||||
|
user.id, "write", [dataset_id]
|
||||||
|
)
|
||||||
|
# Convert from list to Dataset element
|
||||||
|
if isinstance(dataset, list):
|
||||||
|
dataset = dataset[0]
|
||||||
|
else:
|
||||||
|
# Create new one
|
||||||
|
dataset = await create_dataset(dataset_name, user, session)
|
||||||
|
|
||||||
# Check to see if data should be updated
|
# Check to see if data should be updated
|
||||||
data_point = (
|
data_point = (
|
||||||
|
|
@ -138,6 +158,7 @@ async def ingest_data(
|
||||||
node_set=json.dumps(node_set) if node_set else None,
|
node_set=json.dumps(node_set) if node_set else None,
|
||||||
token_count=-1,
|
token_count=-1,
|
||||||
)
|
)
|
||||||
|
session.add(data_point)
|
||||||
|
|
||||||
# Check if data is already in dataset
|
# Check if data is already in dataset
|
||||||
dataset_data = (
|
dataset_data = (
|
||||||
|
|
@ -150,17 +171,20 @@ async def ingest_data(
|
||||||
# If data is not present in dataset add it
|
# If data is not present in dataset add it
|
||||||
if dataset_data is None:
|
if dataset_data is None:
|
||||||
dataset.data.append(data_point)
|
dataset.data.append(data_point)
|
||||||
|
await session.merge(dataset)
|
||||||
|
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
await give_permission_on_document(user, data_id, "read")
|
await give_permission_on_dataset(user, dataset.id, "read")
|
||||||
await give_permission_on_document(user, data_id, "write")
|
await give_permission_on_dataset(user, dataset.id, "write")
|
||||||
|
await give_permission_on_dataset(user, dataset.id, "delete")
|
||||||
|
await give_permission_on_dataset(user, dataset.id, "share")
|
||||||
|
|
||||||
return file_paths
|
return file_paths
|
||||||
|
|
||||||
db_engine = get_relational_engine()
|
db_engine = get_relational_engine()
|
||||||
|
|
||||||
file_paths = await store_data_to_dataset(data, dataset_name, user, node_set)
|
file_paths = await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id)
|
||||||
|
|
||||||
# Note: DLT pipeline has its own event loop, therefore objects created in another event loop
|
# Note: DLT pipeline has its own event loop, therefore objects created in another event loop
|
||||||
# can't be used inside the pipeline
|
# can't be used inside the pipeline
|
||||||
|
|
|
||||||
71
cognee/tests/test_parallel_databases.py
Executable file
71
cognee/tests/test_parallel_databases.py
Executable file
|
|
@ -0,0 +1,71 @@
|
||||||
|
import os
|
||||||
|
import pathlib
|
||||||
|
import cognee
|
||||||
|
from cognee.modules.search.operations import get_history
|
||||||
|
from cognee.modules.users.methods import get_default_user
|
||||||
|
from cognee.shared.logging_utils import get_logger
|
||||||
|
from cognee.modules.search.types import SearchType
|
||||||
|
|
||||||
|
logger = get_logger()
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
data_directory_path = str(
|
||||||
|
pathlib.Path(
|
||||||
|
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_library")
|
||||||
|
).resolve()
|
||||||
|
)
|
||||||
|
cognee.config.data_root_directory(data_directory_path)
|
||||||
|
cognee_directory_path = str(
|
||||||
|
pathlib.Path(
|
||||||
|
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_library")
|
||||||
|
).resolve()
|
||||||
|
)
|
||||||
|
cognee.config.system_root_directory(cognee_directory_path)
|
||||||
|
|
||||||
|
await cognee.prune.prune_data()
|
||||||
|
await cognee.prune.prune_system(metadata=True)
|
||||||
|
|
||||||
|
await cognee.add(["TEST1"], "test1")
|
||||||
|
await cognee.add(["TEST2"], "test2")
|
||||||
|
|
||||||
|
task_1_config = {
|
||||||
|
"vector_db_url": "cognee1.test",
|
||||||
|
"vector_db_key": "",
|
||||||
|
"vector_db_provider": "lancedb",
|
||||||
|
}
|
||||||
|
task_2_config = {
|
||||||
|
"vector_db_url": "cognee2.test",
|
||||||
|
"vector_db_key": "",
|
||||||
|
"vector_db_provider": "lancedb",
|
||||||
|
}
|
||||||
|
|
||||||
|
task_1_graph_config = {
|
||||||
|
"graph_database_provider": "kuzu",
|
||||||
|
"graph_file_path": "kuzu1.db",
|
||||||
|
}
|
||||||
|
task_2_graph_config = {
|
||||||
|
"graph_database_provider": "kuzu",
|
||||||
|
"graph_file_path": "kuzu2.db",
|
||||||
|
}
|
||||||
|
|
||||||
|
# schedule both cognify calls concurrently
|
||||||
|
task1 = asyncio.create_task(
|
||||||
|
cognee.cognify(
|
||||||
|
["test1"], vector_db_config=task_1_config, graph_db_config=task_1_graph_config
|
||||||
|
)
|
||||||
|
)
|
||||||
|
task2 = asyncio.create_task(
|
||||||
|
cognee.cognify(
|
||||||
|
["test2"], vector_db_config=task_2_config, graph_db_config=task_2_graph_config
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
# wait until both are done (raises first error if any)
|
||||||
|
await asyncio.gather(task1, task2)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
asyncio.run(main(), debug=True)
|
||||||
|
|
@ -144,7 +144,6 @@ async def main():
|
||||||
graph_completion = await cognee.search(
|
graph_completion = await cognee.search(
|
||||||
query_type=SearchType.GRAPH_COMPLETION,
|
query_type=SearchType.GRAPH_COMPLETION,
|
||||||
query_text=random_node_name,
|
query_text=random_node_name,
|
||||||
datasets=[dataset_name_2],
|
|
||||||
)
|
)
|
||||||
assert len(graph_completion) != 0, "Completion result is empty."
|
assert len(graph_completion) != 0, "Completion result is empty."
|
||||||
print("Completion result is:")
|
print("Completion result is:")
|
||||||
|
|
|
||||||
|
|
@ -49,7 +49,11 @@ async def main():
|
||||||
from cognee.infrastructure.databases.vector import get_vector_engine
|
from cognee.infrastructure.databases.vector import get_vector_engine
|
||||||
|
|
||||||
vector_engine = get_vector_engine()
|
vector_engine = get_vector_engine()
|
||||||
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
|
search_results = await vector_engine.search("Entity_name", "Quantum computer")
|
||||||
|
|
||||||
|
assert len(search_results) != 0, "The search results list is empty."
|
||||||
|
|
||||||
|
random_node = search_results[0]
|
||||||
random_node_name = random_node.payload["text"]
|
random_node_name = random_node.payload["text"]
|
||||||
|
|
||||||
search_results = await cognee.search(
|
search_results = await cognee.search(
|
||||||
|
|
|
||||||
|
|
@ -24,13 +24,9 @@ def mock_user():
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
@patch.object(search_module, "log_query")
|
@patch.object(search_module, "log_query")
|
||||||
@patch.object(search_module, "log_result")
|
@patch.object(search_module, "log_result")
|
||||||
@patch.object(search_module, "get_document_ids_for_user")
|
|
||||||
@patch.object(search_module, "specific_search")
|
@patch.object(search_module, "specific_search")
|
||||||
@patch.object(search_module, "parse_id")
|
|
||||||
async def test_search(
|
async def test_search(
|
||||||
mock_parse_id,
|
|
||||||
mock_specific_search,
|
mock_specific_search,
|
||||||
mock_get_document_ids,
|
|
||||||
mock_log_result,
|
mock_log_result,
|
||||||
mock_log_query,
|
mock_log_query,
|
||||||
mock_user,
|
mock_user,
|
||||||
|
|
@ -48,26 +44,19 @@ async def test_search(
|
||||||
# Mock document IDs
|
# Mock document IDs
|
||||||
doc_id1 = uuid.uuid4()
|
doc_id1 = uuid.uuid4()
|
||||||
doc_id2 = uuid.uuid4()
|
doc_id2 = uuid.uuid4()
|
||||||
doc_id3 = uuid.uuid4() # This one will be filtered out
|
|
||||||
mock_get_document_ids.return_value = [doc_id1, doc_id2]
|
|
||||||
|
|
||||||
# Mock search results
|
# Mock search results
|
||||||
search_results = [
|
search_results = [
|
||||||
{"document_id": str(doc_id1), "content": "Result 1"},
|
{"document_id": str(doc_id1), "content": "Result 1"},
|
||||||
{"document_id": str(doc_id2), "content": "Result 2"},
|
{"document_id": str(doc_id2), "content": "Result 2"},
|
||||||
{"document_id": str(doc_id3), "content": "Result 3"}, # Should be filtered out
|
|
||||||
]
|
]
|
||||||
mock_specific_search.return_value = search_results
|
mock_specific_search.return_value = search_results
|
||||||
|
|
||||||
# Mock parse_id to return the same UUID
|
|
||||||
mock_parse_id.side_effect = lambda x: uuid.UUID(x) if x else None
|
|
||||||
|
|
||||||
# Execute
|
# Execute
|
||||||
results = await search(query_text, query_type, datasets, mock_user)
|
await search(query_text, query_type, datasets, mock_user)
|
||||||
|
|
||||||
# Verify
|
# Verify
|
||||||
mock_log_query.assert_called_once_with(query_text, query_type.value, mock_user.id)
|
mock_log_query.assert_called_once_with(query_text, query_type.value, mock_user.id)
|
||||||
mock_get_document_ids.assert_called_once_with(mock_user.id, datasets)
|
|
||||||
mock_specific_search.assert_called_once_with(
|
mock_specific_search.assert_called_once_with(
|
||||||
query_type,
|
query_type,
|
||||||
query_text,
|
query_text,
|
||||||
|
|
@ -78,11 +67,6 @@ async def test_search(
|
||||||
node_name=None,
|
node_name=None,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Only the first two results should be included (doc_id3 is filtered out)
|
|
||||||
assert len(results) == 2
|
|
||||||
assert results[0]["document_id"] == str(doc_id1)
|
|
||||||
assert results[1]["document_id"] == str(doc_id2)
|
|
||||||
|
|
||||||
# Verify result logging
|
# Verify result logging
|
||||||
mock_log_result.assert_called_once()
|
mock_log_result.assert_called_once()
|
||||||
# Check that the first argument is the query ID
|
# Check that the first argument is the query ID
|
||||||
|
|
|
||||||
282
poetry.lock
generated
282
poetry.lock
generated
|
|
@ -435,7 +435,7 @@ description = "Timeout context manager for asyncio programs"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.11\""
|
markers = "python_version == \"3.10\""
|
||||||
files = [
|
files = [
|
||||||
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
|
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
|
||||||
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
|
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
|
||||||
|
|
@ -448,7 +448,7 @@ description = "Timeout context manager for asyncio programs"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"falkordb\" and python_full_version < \"3.11.3\" and python_version == \"3.11\""
|
markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\" and extra == \"falkordb\""
|
||||||
files = [
|
files = [
|
||||||
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
|
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
|
||||||
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
|
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
|
||||||
|
|
@ -593,7 +593,7 @@ description = "Backport of CPython tarfile module"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"deepeval\" and python_version <= \"3.11\""
|
markers = "(python_version == \"3.10\" or python_version == \"3.11\") and extra == \"deepeval\""
|
||||||
files = [
|
files = [
|
||||||
{file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
|
{file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
|
||||||
{file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
|
{file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
|
||||||
|
|
@ -1226,7 +1226,7 @@ description = "Cross-platform colored terminal text."
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "(sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (python_version < \"3.13\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\")"
|
markers = "(platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or platform_system == \"Windows\")"
|
||||||
files = [
|
files = [
|
||||||
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
|
||||||
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
|
||||||
|
|
@ -2098,7 +2098,7 @@ description = "Backport of PEP 654 (exception groups)"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.7"
|
python-versions = ">=3.7"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.11\""
|
markers = "python_version == \"3.10\""
|
||||||
files = [
|
files = [
|
||||||
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
|
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
|
||||||
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
|
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
|
||||||
|
|
@ -2225,7 +2225,7 @@ description = "Fast, light, accurate library built for retrieval embedding gener
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9.0"
|
python-versions = ">=3.9.0"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.13\" and extra == \"codegraph\""
|
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
|
||||||
files = [
|
files = [
|
||||||
{file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"},
|
{file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"},
|
||||||
{file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"},
|
{file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"},
|
||||||
|
|
@ -2974,7 +2974,7 @@ description = "HTTP/2-based RPC framework"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\" or python_version < \"3.11\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\")"
|
markers = "python_version == \"3.10\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\") or extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\""
|
||||||
files = [
|
files = [
|
||||||
{file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
|
{file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
|
||||||
{file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"},
|
{file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"},
|
||||||
|
|
@ -3078,7 +3078,7 @@ description = "Protobuf code generator for gRPC"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"weaviate\" or python_version >= \"3.13\" and (extra == \"weaviate\" or extra == \"qdrant\")"
|
markers = "extra == \"weaviate\""
|
||||||
files = [
|
files = [
|
||||||
{file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"},
|
{file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"},
|
||||||
{file = "grpcio_tools-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:6a722bba714392de2386569c40942566b83725fa5c5450b8910e3832a5379469"},
|
{file = "grpcio_tools-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:6a722bba714392de2386569c40942566b83725fa5c5450b8910e3832a5379469"},
|
||||||
|
|
@ -3631,7 +3631,7 @@ description = "IPython: Productive Interactive Computing"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.10"
|
python-versions = ">=3.10"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.11\" and (extra == \"notebook\" or extra == \"dev\")"
|
markers = "python_version == \"3.10\" and (extra == \"notebook\" or extra == \"dev\")"
|
||||||
files = [
|
files = [
|
||||||
{file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"},
|
{file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"},
|
||||||
{file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"},
|
{file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"},
|
||||||
|
|
@ -4454,50 +4454,50 @@ files = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "kuzu"
|
name = "kuzu"
|
||||||
version = "0.8.2"
|
version = "0.9.0"
|
||||||
description = "Highly scalable, extremely fast, easy-to-use embeddable graph database"
|
description = "Highly scalable, extremely fast, easy-to-use embeddable graph database"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"kuzu\""
|
markers = "extra == \"api\" or extra == \"kuzu\""
|
||||||
files = [
|
files = [
|
||||||
{file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:78bcdf6cc7b130bce8b307709e8d7bddd2e9104b2b696a9dc52574556e754570"},
|
{file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec9f216d67c092ea52086c99cf4b1deabe0f8daaf47c80cf1892b3b41c57d58a"},
|
||||||
{file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b42e3e9b1eacf830700287b05e96f9455b89dd4140085053e6c86b32c61e8d5c"},
|
{file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bda6d845bf1c7da204ffa7730573118f2d43fe6b14b1a5d0d2845ec3d3481362"},
|
||||||
{file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf06c602dc0231268d9cfa56a62afef15f8fca3be1ccd2cad22047a14bff4ae0"},
|
{file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab5b28f101c93899fc15668b6cb25f6db3d4a9844fcc4affed293caaaafaa4b7"},
|
||||||
{file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50a873e7cd0c2e8e3093e9af14cffb14e49f1f67eceb32df3d0454ce101402d3"},
|
{file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:183bb1de19ffec1c3b07c0b4d5eecf02eb4eeafc1d50aea409bc91e1fad4d6d2"},
|
||||||
{file = "kuzu-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4d36261444d31432606f3f3ed00624f1a3a8edcf7d830564c72b76ffbdf4d318"},
|
{file = "kuzu-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:2e36ce7da1bbebb538082656de18a717895d9352a33c8bcac170ef2fc22a4902"},
|
||||||
{file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6c1694c6d1b19c46ad5d416cac429ccf1fe91aca4d367664e3aa0afa59800f93"},
|
{file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82dd690d823df816e7826945e5243a4ae65e3e948ef512709a59205b84b9f6dd"},
|
||||||
{file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:00156c64523a1377ffced998bdb031709336f90543da69544c0ab4b40d533692"},
|
{file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:c394e019a14e9c5636228cf1acd333997c31e5da3d9a60a1df2c03b828438432"},
|
||||||
{file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc75f26afe8815b046cfb0d931303da6c36ce3afb49d4ae18a3899f23e62020f"},
|
{file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7d493f88ed31eada4b88a92b115bc6085c60498c47336ab06a489e75a727bab"},
|
||||||
{file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f0de6910724a74cc492354e903cf76db78b6353eef1e2edfa0b79d600c3c572"},
|
{file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:171b47cf2b3923c813f1ed88fb9d3964a9355129b5d3ebca54eba3450bfc1f97"},
|
||||||
{file = "kuzu-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:56e99c39a725943aa7ad96ada8f29706da3d53cc98385f2c663b8ea026f0dce3"},
|
{file = "kuzu-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:3c8a8a611f599801c8db6aeffb978cd1badcfa3ec8f79c15b701810fee71765f"},
|
||||||
{file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adcc250b34963a6eea62b59d47a091018d83e61fb2e95552795ab61f103052be"},
|
{file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:509af4029f9dcb9c3e843a825df44ec30009a70fad891cbcfb611c3b8cdfefd6"},
|
||||||
{file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:f72036924466143675980baed02a26c0fca15b6254c11de9a9c18d28fe66247e"},
|
{file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:885f17f6e46c15ecef121fc57a941f8b60f0a5c1d3995813bb7a4c7437fb2259"},
|
||||||
{file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2fd7895fdfd9df880091d32bfb79c148f849659c67e2b9e185f952a6bde9139"},
|
{file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f2e35aa345b543a4a21de0e82b70eac4c753987cfa4ded75ae7f9f23edbf11"},
|
||||||
{file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:68486e291aa8a61264be7e31233ec34eeb6da2402f4b980c3f2b67f9ccbbea3a"},
|
{file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67430c9813607a3b901c4a1e6bfb3b93538af230bc821e675c552a162818f589"},
|
||||||
{file = "kuzu-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:7cce7d06e6f09cd488c62be7cafe78752b037ed9e6585ed3da9df029104b1987"},
|
{file = "kuzu-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:549f4a72f815554fb998582876c5875cb0917a192e6a58d196e8247fd8902701"},
|
||||||
{file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa0495f856f2e5f5067e281dab3fbc170aba0721d1f56156a8cd9fa50e706f91"},
|
{file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ec2e709599b4015d0a179a191dd7850e7bf076f83b37b70d0dc2e4ee59ce7725"},
|
||||||
{file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:823577b472ba63c3b36e5ff81e2b744736f9eaf0b71585c247f3defc9d268f53"},
|
{file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:8aad4fbd74b283ffb0b115138dfc62d9775c8f19ba62ab243e55e3cd648652b6"},
|
||||||
{file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bde76f38d293f49ad283a4831bd32d41f185b93a75d388d67f9b8996678203e9"},
|
{file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba9dd4f412e31d34345b6461fc9489955ae9566abf426e56af478b6e791b735a"},
|
||||||
{file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cdb189012613ecd26630096796e3817c260deea85782e764309cd36b2c39dac5"},
|
{file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:340502cbce54f21a5b2440a75c28d61ddfd26d6d6848e9daa6140798bdd5b367"},
|
||||||
{file = "kuzu-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:71fb98721f9c46f960a5c3baea6b083026485c4b9a3e74ab01418243e29e3753"},
|
{file = "kuzu-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:e1ddb189dfa2aee0123dcd1a5ccc5b831a7f297233a09fccfd76294fc2f9e6bd"},
|
||||||
{file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e12726af2cb552ab7b60e2b4312469359bb3b4b45ddbcfb75220def4be6f566"},
|
{file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fae68db87ba48268228c89e70ed1fde2f43843d8ed6b2debaafd314c45e8542"},
|
||||||
{file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055f2cd9741bf39161f9ccff80428f8fb80b1910b2450b05bbe848487ba694f5"},
|
{file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0279ba37c639d96f303eb6ad4481e634495be31210991d8008c385ee50b4e0a"},
|
||||||
{file = "kuzu-0.8.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:18cb3da3a650f8dfde3639fbd6319a5ad6f98f60689c5dd96d20d8d1fc184d4c"},
|
{file = "kuzu-0.9.0-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3ca7424fe3831df687552b89903aa57fb88efff9c25df15c5d678fae7c933199"},
|
||||||
{file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e55a8fddc21ac3e27b3cf2815d93264dd3c89e9ad8c7f3960d51bdfe48a02709"},
|
{file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bce9284913434661f47cecfc763f8997a61ebd2bb7bfe993970c1403924708fa"},
|
||||||
{file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d93600aceacdd7903aa39f016cb641811f96e4825b027a135aaaa1d82e23d24"},
|
{file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:66040cdf9a59a5423b49c3d2bc01a089114b573ee1345d5a7c912276fbca0135"},
|
||||||
{file = "kuzu-0.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:68601d9e741c7815c3d3f46a9c6884853388bcc6920945f069d5dc4f9492c9c5"},
|
{file = "kuzu-0.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8e195774364123845df071eddb18873ce8c78244dd6f854badfe65053b058088"},
|
||||||
{file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32d7ff56d793df27f76129b8b15bd85c940e59bcb67acd189b6a5ed1af5e8b44"},
|
{file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2906f29ee36f9f642bdb8f5222c94f667092e38bde7dc53ebb252f9eb524ab6a"},
|
||||||
{file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:5e639f24be2fca78bf3890774f273aa1a6b149bfdbeb5c7e966e03b8f610be98"},
|
{file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:4c3218e266766080fe1b31325d0156d1b334f62ae23dac854c3e4919115ef8c6"},
|
||||||
{file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1caf46e2721dabed94b65cdcf3990551af2f3913c3f2dcd39f3e5397f0134243"},
|
{file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a26214c1600c21f5e4aa96585706953a8792ad77e14788710d78f8af0d6b74ec"},
|
||||||
{file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5333c9e4557ccbfef7b822793ec382848411c8d11fdee063064b41bd1828404"},
|
{file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b153fb28db9336757346eabb24b8c179b4ed48578a0ef158210fbc935df2184"},
|
||||||
{file = "kuzu-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:765a8bd4c5b9d24583eb8aaa20ecd753d78220138a82bf643ec592ffb8128298"},
|
{file = "kuzu-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:b6ee075e2571b11a434efb004cb0b3a2fbd7aa416ae680816869f1388e5fc734"},
|
||||||
{file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a215ff235d17a41c50d1cf2bd8e67a196eff32f23e59d989b1a40e6192f2008"},
|
{file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:56874ae750ff99b15c959d884b175adf24ac912ab08e084c42784902b2bce2fb"},
|
||||||
{file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:074b5440186e4214b653d46f8d5a15d4b4cae1185d4656eaf598fe9b840fcdca"},
|
{file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:6e0265b1ad445500397dc0df3cc4e7faddfd67fcd3d0952d9a4cdab6b77b47e9"},
|
||||||
{file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32303a9533674a35e52d429f1446a82e2fc97c423618bc86aaafef1d4d2621e4"},
|
{file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d66e69a3e135ea123cc7c9c2e507bbb614ffdbfe7be835782c6a588ae63ff900"},
|
||||||
{file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0baea115bc55c8ed710f2beae8f02e46cf2bac42326b4e2c3acd25a76031f59d"},
|
{file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e11c8b7186798ad95563e1d7ebf84495d817c406bd28c21af7170467e37e35e"},
|
||||||
{file = "kuzu-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:70e031131c5b8e327edd63993b05fb04196b74d0ade1baf0f4005968610310ed"},
|
{file = "kuzu-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:4fb80eb6c71b02c4e57e3570b079c494082f7ff819d4c06ac482914f29211294"},
|
||||||
{file = "kuzu-0.8.2.tar.gz", hash = "sha256:68ad72b3ef6a32a41ecfa955fa4ca9ca0c8a36d3a1bc13e34cc70c971b2b8ca7"},
|
{file = "kuzu-0.9.0.tar.gz", hash = "sha256:2e59f3d4d1fc385e9e90d7ae09f072ec2f4cfeff508582523a0034ceb076f6eb"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -5087,7 +5087,7 @@ description = "Python logging made (stupidly) simple"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = "<4.0,>=3.5"
|
python-versions = "<4.0,>=3.5"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.13\" and extra == \"codegraph\""
|
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
|
||||||
files = [
|
files = [
|
||||||
{file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"},
|
{file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"},
|
||||||
{file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"},
|
{file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"},
|
||||||
|
|
@ -5827,7 +5827,7 @@ description = "Python extension for MurmurHash (MurmurHash3), a set of fast and
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.13\" and extra == \"codegraph\""
|
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
|
||||||
files = [
|
files = [
|
||||||
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec"},
|
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec"},
|
||||||
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a"},
|
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a"},
|
||||||
|
|
@ -6437,6 +6437,7 @@ description = "Fundamental package for array computing in Python"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
|
markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
|
||||||
files = [
|
files = [
|
||||||
{file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
|
{file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
|
||||||
{file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
|
{file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
|
||||||
|
|
@ -6476,6 +6477,69 @@ files = [
|
||||||
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
|
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "numpy"
|
||||||
|
version = "2.1.0"
|
||||||
|
description = "Fundamental package for array computing in Python"
|
||||||
|
optional = false
|
||||||
|
python-versions = ">=3.10"
|
||||||
|
groups = ["main"]
|
||||||
|
markers = "python_version >= \"3.13\""
|
||||||
|
files = [
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6326ab99b52fafdcdeccf602d6286191a79fe2fda0ae90573c5814cd2b0bc1b8"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0937e54c09f7a9a68da6889362ddd2ff584c02d015ec92672c099b61555f8911"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:30014b234f07b5fec20f4146f69e13cfb1e33ee9a18a1879a0142fbb00d47673"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:899da829b362ade41e1e7eccad2cf274035e1cb36ba73034946fccd4afd8606b"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08801848a40aea24ce16c2ecde3b756f9ad756586fb2d13210939eb69b023f5b"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:398049e237d1aae53d82a416dade04defed1a47f87d18d5bd615b6e7d7e41d1f"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0abb3916a35d9090088a748636b2c06dc9a6542f99cd476979fb156a18192b84"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e2350aea18d04832319aac0f887d5fcec1b36abd485d14f173e3e900b83e33"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-win32.whl", hash = "sha256:f6b26e6c3b98adb648243670fddc8cab6ae17473f9dc58c51574af3e64d61211"},
|
||||||
|
{file = "numpy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f505264735ee074250a9c78247ee8618292091d9d1fcc023290e9ac67e8f1afa"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:76368c788ccb4f4782cf9c842b316140142b4cbf22ff8db82724e82fe1205dce"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f8e93a01a35be08d31ae33021e5268f157a2d60ebd643cfc15de6ab8e4722eb1"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9523f8b46485db6939bd069b28b642fec86c30909cea90ef550373787f79530e"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54139e0eb219f52f60656d163cbe67c31ede51d13236c950145473504fa208cb"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:378cb4f24c7d93066ee4103204f73ed046eb88f9ad5bb2275bb9fa0f6a02bd36"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8f699a709120b220dfe173f79c73cb2a2cab2c0b88dd59d7b49407d032b8ebd"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-win32.whl", hash = "sha256:ffbd6faeb190aaf2b5e9024bac9622d2ee549b7ec89ef3a9373fa35313d44e0e"},
|
||||||
|
{file = "numpy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0af3a5987f59d9c529c022c8c2a64805b339b7ef506509fba7d0556649b9714b"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe76d75b345dc045acdbc006adcb197cc680754afd6c259de60d358d60c93736"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f358ea9e47eb3c2d6eba121ab512dfff38a88db719c38d1e67349af210bc7529"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:dd94ce596bda40a9618324547cfaaf6650b1a24f5390350142499aa4e34e53d1"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b47c551c6724960479cefd7353656498b86e7232429e3a41ab83be4da1b109e8"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0756a179afa766ad7cb6f036de622e8a8f16ffdd55aa31f296c870b5679d745"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24003ba8ff22ea29a8c306e61d316ac74111cebf942afbf692df65509a05f111"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b34fa5e3b5d6dc7e0a4243fa0f81367027cb6f4a7215a17852979634b5544ee0"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4f982715e65036c34897eb598d64aef15150c447be2cfc6643ec7a11af06574"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-win32.whl", hash = "sha256:c4cd94dfefbefec3f8b544f61286584292d740e6e9d4677769bc76b8f41deb02"},
|
||||||
|
{file = "numpy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0cdef204199278f5c461a0bed6ed2e052998276e6d8ab2963d5b5c39a0500bc"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ab81ccd753859ab89e67199b9da62c543850f819993761c1e94a75a814ed667"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442596f01913656d579309edcd179a2a2f9977d9a14ff41d042475280fc7f34e"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:848c6b5cad9898e4b9ef251b6f934fa34630371f2e916261070a4eb9092ffd33"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:54c6a63e9d81efe64bfb7bcb0ec64332a87d0b87575f6009c8ba67ea6374770b"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652e92fc409e278abdd61e9505649e3938f6d04ce7ef1953f2ec598a50e7c195"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab32eb9170bf8ffcbb14f11613f4a0b108d3ffee0832457c5d4808233ba8977"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:8fb49a0ba4d8f41198ae2d52118b050fd34dace4b8f3fb0ee34e23eb4ae775b1"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44e44973262dc3ae79e9063a1284a73e09d01b894b534a769732ccd46c28cc62"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-win32.whl", hash = "sha256:ab83adc099ec62e044b1fbb3a05499fa1e99f6d53a1dde102b2d85eff66ed324"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:de844aaa4815b78f6023832590d77da0e3b6805c644c33ce94a1e449f16d6ab5"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:343e3e152bf5a087511cd325e3b7ecfd5b92d369e80e74c12cd87826e263ec06"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f07fa2f15dabe91259828ce7d71b5ca9e2eb7c8c26baa822c825ce43552f4883"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5474dad8c86ee9ba9bb776f4b99ef2d41b3b8f4e0d199d4f7304728ed34d0300"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1f817c71683fd1bb5cff1529a1d085a57f02ccd2ebc5cd2c566f9a01118e3b7d"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a3336fbfa0d38d3deacd3fe7f3d07e13597f29c13abf4d15c3b6dc2291cbbdd"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a894c51fd8c4e834f00ac742abad73fc485df1062f1b875661a3c1e1fb1c2f6"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:9156ca1f79fc4acc226696e95bfcc2b486f165a6a59ebe22b2c1f82ab190384a"},
|
||||||
|
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:624884b572dff8ca8f60fab591413f077471de64e376b17d291b19f56504b2bb"},
|
||||||
|
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15ef8b2177eeb7e37dd5ef4016f30b7659c57c2c0b57a779f1d537ff33a72c7b"},
|
||||||
|
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e5f0642cdf4636198a4990de7a71b693d824c56a757862230454629cf62e323d"},
|
||||||
|
{file = "numpy-2.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15976718c004466406342789f31b6673776360f3b1e3c575f25302d7e789575"},
|
||||||
|
{file = "numpy-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6c1de77ded79fef664d5098a66810d4d27ca0224e9051906e634b3f7ead134c2"},
|
||||||
|
{file = "numpy-2.1.0.tar.gz", hash = "sha256:7dc90da0081f7e1da49ec4e398ede6a8e9cc4f5ebe5f9e06b443ed889ee9aaa2"},
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "oauthlib"
|
name = "oauthlib"
|
||||||
version = "3.2.2"
|
version = "3.2.2"
|
||||||
|
|
@ -6929,8 +6993,8 @@ files = [
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
numpy = [
|
numpy = [
|
||||||
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
|
||||||
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
|
||||||
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
|
||||||
|
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
|
||||||
]
|
]
|
||||||
python-dateutil = ">=2.8.2"
|
python-dateutil = ">=2.8.2"
|
||||||
pytz = ">=2020.1"
|
pytz = ">=2020.1"
|
||||||
|
|
@ -7028,7 +7092,7 @@ description = "Python datetimes made easy"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.13\""
|
markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
|
||||||
files = [
|
files = [
|
||||||
{file = "pendulum-3.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aa545a59e6517cf43597455a6fb44daa4a6e08473d67a7ad34e4fa951efb9620"},
|
{file = "pendulum-3.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aa545a59e6517cf43597455a6fb44daa4a6e08473d67a7ad34e4fa951efb9620"},
|
||||||
{file = "pendulum-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:299df2da6c490ede86bb8d58c65e33d7a2a42479d21475a54b467b03ccb88531"},
|
{file = "pendulum-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:299df2da6c490ede86bb8d58c65e33d7a2a42479d21475a54b467b03ccb88531"},
|
||||||
|
|
@ -7713,7 +7777,7 @@ description = "Fast and parallel snowball stemmer"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = "*"
|
python-versions = "*"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.13\" and extra == \"codegraph\""
|
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
|
||||||
files = [
|
files = [
|
||||||
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"},
|
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"},
|
||||||
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"},
|
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"},
|
||||||
|
|
@ -8117,8 +8181,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0"
|
||||||
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
|
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
|
||||||
dill = [
|
dill = [
|
||||||
{version = ">=0.2", markers = "python_version < \"3.11\""},
|
{version = ">=0.2", markers = "python_version < \"3.11\""},
|
||||||
|
{version = ">=0.3.6", markers = "python_version >= \"3.11\""},
|
||||||
{version = ">=0.3.7", markers = "python_version >= \"3.12\""},
|
{version = ">=0.3.7", markers = "python_version >= \"3.12\""},
|
||||||
{version = ">=0.3.6", markers = "python_version == \"3.11\""},
|
|
||||||
]
|
]
|
||||||
isort = ">=4.2.5,<5.13 || >5.13,<7"
|
isort = ">=4.2.5,<5.13 || >5.13,<7"
|
||||||
mccabe = ">=0.6,<0.8"
|
mccabe = ">=0.6,<0.8"
|
||||||
|
|
@ -8861,41 +8925,15 @@ files = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "qdrant-client"
|
name = "qdrant-client"
|
||||||
version = "1.12.1"
|
version = "1.14.2"
|
||||||
description = "Client library for the Qdrant vector search engine"
|
|
||||||
optional = true
|
|
||||||
python-versions = ">=3.8"
|
|
||||||
groups = ["main"]
|
|
||||||
markers = "python_version >= \"3.13\" and extra == \"qdrant\""
|
|
||||||
files = [
|
|
||||||
{file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"},
|
|
||||||
{file = "qdrant_client-1.12.1.tar.gz", hash = "sha256:35e8e646f75b7b883b3d2d0ee4c69c5301000bba41c82aa546e985db0f1aeb72"},
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.dependencies]
|
|
||||||
grpcio = ">=1.41.0"
|
|
||||||
grpcio-tools = ">=1.41.0"
|
|
||||||
httpx = {version = ">=0.20.0", extras = ["http2"]}
|
|
||||||
numpy = {version = ">=1.26", markers = "python_version >= \"3.12\""}
|
|
||||||
portalocker = ">=2.7.0,<3.0.0"
|
|
||||||
pydantic = ">=1.10.8"
|
|
||||||
urllib3 = ">=1.26.14,<3"
|
|
||||||
|
|
||||||
[package.extras]
|
|
||||||
fastembed = ["fastembed (==0.3.6) ; python_version < \"3.13\""]
|
|
||||||
fastembed-gpu = ["fastembed-gpu (==0.3.6) ; python_version < \"3.13\""]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "qdrant-client"
|
|
||||||
version = "1.14.1"
|
|
||||||
description = "Client library for the Qdrant vector search engine"
|
description = "Client library for the Qdrant vector search engine"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9"
|
python-versions = ">=3.9"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.13\" and extra == \"qdrant\""
|
markers = "extra == \"qdrant\""
|
||||||
files = [
|
files = [
|
||||||
{file = "qdrant_client-1.14.1-py3-none-any.whl", hash = "sha256:1c4d5ed791873698da8b5df68df16bb203ec1b0cd6cec0fd6002572a06291a1b"},
|
{file = "qdrant_client-1.14.2-py3-none-any.whl", hash = "sha256:7c283b1f0e71db9c21b85d898fb395791caca2a6d56ee751da96d797b001410c"},
|
||||||
{file = "qdrant_client-1.14.1.tar.gz", hash = "sha256:75352057ea59fdd7987313dc9cef4d83953591d083028d94eac99cd0e5e2f607"},
|
{file = "qdrant_client-1.14.2.tar.gz", hash = "sha256:da5cab4d367d099d1330b6f30d45aefc8bd76f8b8f9d8fa5d4f813501b93af0d"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
|
@ -8904,6 +8942,7 @@ httpx = {version = ">=0.20.0", extras = ["http2"]}
|
||||||
numpy = [
|
numpy = [
|
||||||
{version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""},
|
{version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""},
|
||||||
{version = ">=1.26", markers = "python_version == \"3.12\""},
|
{version = ">=1.26", markers = "python_version == \"3.12\""},
|
||||||
|
{version = ">=2.1.0", markers = "python_version >= \"3.13\""},
|
||||||
]
|
]
|
||||||
portalocker = ">=2.7.0,<3.0.0"
|
portalocker = ">=2.7.0,<3.0.0"
|
||||||
protobuf = ">=3.20.0"
|
protobuf = ">=3.20.0"
|
||||||
|
|
@ -10485,7 +10524,7 @@ description = "A lil' TOML parser"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.8"
|
python-versions = ">=3.8"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "python_version < \"3.11\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")"
|
markers = "python_version == \"3.10\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")"
|
||||||
files = [
|
files = [
|
||||||
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
|
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
|
||||||
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
|
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
|
||||||
|
|
@ -10981,7 +11020,7 @@ description = "A library that prepares raw documents for downstream ML tasks."
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.9.0"
|
python-versions = ">=3.9.0"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"docs\""
|
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"docs\""
|
||||||
files = [
|
files = [
|
||||||
{file = "unstructured-0.16.25-py3-none-any.whl", hash = "sha256:14719ccef2830216cf1c5bf654f75e2bf07b17ca5dcee9da5ac74618130fd337"},
|
{file = "unstructured-0.16.25-py3-none-any.whl", hash = "sha256:14719ccef2830216cf1c5bf654f75e2bf07b17ca5dcee9da5ac74618130fd337"},
|
||||||
{file = "unstructured-0.16.25.tar.gz", hash = "sha256:73b9b0f51dbb687af572ecdb849a6811710b9cac797ddeab8ee80fa07d8aa5e6"},
|
{file = "unstructured-0.16.25.tar.gz", hash = "sha256:73b9b0f51dbb687af572ecdb849a6811710b9cac797ddeab8ee80fa07d8aa5e6"},
|
||||||
|
|
@ -11039,6 +11078,71 @@ rtf = ["pypandoc"]
|
||||||
tsv = ["pandas"]
|
tsv = ["pandas"]
|
||||||
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
|
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "unstructured"
|
||||||
|
version = "0.17.2"
|
||||||
|
description = "A library that prepares raw documents for downstream ML tasks."
|
||||||
|
optional = true
|
||||||
|
python-versions = ">=3.9.0"
|
||||||
|
groups = ["main"]
|
||||||
|
markers = "python_version >= \"3.13\" and extra == \"docs\""
|
||||||
|
files = [
|
||||||
|
{file = "unstructured-0.17.2-py3-none-any.whl", hash = "sha256:527dd26a4b273aebef2f9119c9d4f0d0ce17640038d92296d23abe89be123840"},
|
||||||
|
{file = "unstructured-0.17.2.tar.gz", hash = "sha256:af18c3caef0a6c562cf77e34ee8b6ff522b605031d2336ffe565df66f126aa46"},
|
||||||
|
]
|
||||||
|
|
||||||
|
[package.dependencies]
|
||||||
|
backoff = "*"
|
||||||
|
beautifulsoup4 = "*"
|
||||||
|
chardet = "*"
|
||||||
|
dataclasses-json = "*"
|
||||||
|
emoji = "*"
|
||||||
|
filetype = "*"
|
||||||
|
html5lib = "*"
|
||||||
|
langdetect = "*"
|
||||||
|
lxml = "*"
|
||||||
|
markdown = {version = "*", optional = true, markers = "extra == \"md\""}
|
||||||
|
networkx = {version = "*", optional = true, markers = "extra == \"xlsx\""}
|
||||||
|
nltk = "*"
|
||||||
|
numpy = "*"
|
||||||
|
openpyxl = {version = "*", optional = true, markers = "extra == \"xlsx\""}
|
||||||
|
pandas = {version = "*", optional = true, markers = "extra == \"csv\" or extra == \"tsv\" or extra == \"xlsx\""}
|
||||||
|
psutil = "*"
|
||||||
|
pypandoc = {version = "*", optional = true, markers = "extra == \"epub\" or extra == \"odt\" or extra == \"org\" or extra == \"rst\" or extra == \"rtf\""}
|
||||||
|
python-docx = {version = ">=1.1.2", optional = true, markers = "extra == \"doc\" or extra == \"docx\" or extra == \"odt\""}
|
||||||
|
python-iso639 = "*"
|
||||||
|
python-magic = "*"
|
||||||
|
python-oxmsg = "*"
|
||||||
|
python-pptx = {version = ">=1.0.1", optional = true, markers = "extra == \"ppt\" or extra == \"pptx\""}
|
||||||
|
rapidfuzz = "*"
|
||||||
|
requests = "*"
|
||||||
|
tqdm = "*"
|
||||||
|
typing-extensions = "*"
|
||||||
|
unstructured-client = "*"
|
||||||
|
wrapt = "*"
|
||||||
|
xlrd = {version = "*", optional = true, markers = "extra == \"xlsx\""}
|
||||||
|
|
||||||
|
[package.extras]
|
||||||
|
all-docs = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
|
||||||
|
csv = ["pandas"]
|
||||||
|
doc = ["python-docx (>=1.1.2)"]
|
||||||
|
docx = ["python-docx (>=1.1.2)"]
|
||||||
|
epub = ["pypandoc"]
|
||||||
|
huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"]
|
||||||
|
image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
|
||||||
|
local-inference = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
|
||||||
|
md = ["markdown"]
|
||||||
|
odt = ["pypandoc", "python-docx (>=1.1.2)"]
|
||||||
|
org = ["pypandoc"]
|
||||||
|
paddleocr = ["paddlepaddle (>=3.0.0b1)", "unstructured.paddleocr (==2.10.0)"]
|
||||||
|
pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
|
||||||
|
ppt = ["python-pptx (>=1.0.1)"]
|
||||||
|
pptx = ["python-pptx (>=1.0.1)"]
|
||||||
|
rst = ["pypandoc"]
|
||||||
|
rtf = ["pypandoc"]
|
||||||
|
tsv = ["pandas"]
|
||||||
|
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unstructured-client"
|
name = "unstructured-client"
|
||||||
version = "0.25.9"
|
version = "0.25.9"
|
||||||
|
|
@ -11578,7 +11682,7 @@ description = "A small Python utility to set file creation time on Windows"
|
||||||
optional = true
|
optional = true
|
||||||
python-versions = ">=3.5"
|
python-versions = ">=3.5"
|
||||||
groups = ["main"]
|
groups = ["main"]
|
||||||
markers = "extra == \"codegraph\" and sys_platform == \"win32\" and python_version < \"3.13\""
|
markers = "extra == \"codegraph\" and sys_platform == \"win32\" and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\")"
|
||||||
files = [
|
files = [
|
||||||
{file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"},
|
{file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"},
|
||||||
{file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"},
|
{file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"},
|
||||||
|
|
@ -11962,7 +12066,7 @@ cffi = ["cffi (>=1.11)"]
|
||||||
|
|
||||||
[extras]
|
[extras]
|
||||||
anthropic = ["anthropic"]
|
anthropic = ["anthropic"]
|
||||||
api = ["gunicorn", "uvicorn"]
|
api = ["gunicorn", "kuzu", "uvicorn"]
|
||||||
chromadb = ["chromadb", "pypika"]
|
chromadb = ["chromadb", "pypika"]
|
||||||
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
|
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
|
||||||
debug = ["debugpy"]
|
debug = ["debugpy"]
|
||||||
|
|
@ -11992,4 +12096,4 @@ weaviate = ["weaviate-client"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.1"
|
lock-version = "2.1"
|
||||||
python-versions = ">=3.10,<=3.13"
|
python-versions = ">=3.10,<=3.13"
|
||||||
content-hash = "15b319ff8dbe5bd88e41ead93f4e9140b2b7d86d57a707682dd3a308e78ef245"
|
content-hash = "5bd213f69d6dada714e632097121394992b46bd6d322afa024396847cb945f95"
|
||||||
|
|
|
||||||
|
|
@ -64,9 +64,10 @@ dependencies = [
|
||||||
api = [
|
api = [
|
||||||
"uvicorn==0.34.0",
|
"uvicorn==0.34.0",
|
||||||
"gunicorn>=20.1.0,<21",
|
"gunicorn>=20.1.0,<21",
|
||||||
|
"kuzu==0.9.0",
|
||||||
]
|
]
|
||||||
weaviate = ["weaviate-client==4.9.6"]
|
weaviate = ["weaviate-client==4.9.6"]
|
||||||
qdrant = ["qdrant-client>=1.9.0,<2"]
|
qdrant = ["qdrant-client>=1.14.2,<2"]
|
||||||
neo4j = ["neo4j>=5.20.0,<6"]
|
neo4j = ["neo4j>=5.20.0,<6"]
|
||||||
postgres = [
|
postgres = [
|
||||||
"psycopg2>=2.9.10,<3",
|
"psycopg2>=2.9.10,<3",
|
||||||
|
|
@ -87,14 +88,14 @@ anthropic = ["anthropic>=0.26.1,<0.27"]
|
||||||
deepeval = ["deepeval>=2.0.1,<3"]
|
deepeval = ["deepeval>=2.0.1,<3"]
|
||||||
posthog = ["posthog>=3.5.0,<4"]
|
posthog = ["posthog>=3.5.0,<4"]
|
||||||
falkordb = ["falkordb==1.0.9"]
|
falkordb = ["falkordb==1.0.9"]
|
||||||
kuzu = ["kuzu==0.8.2"]
|
kuzu = ["kuzu==0.9.0"]
|
||||||
groq = ["groq==0.8.0"]
|
groq = ["groq==0.8.0"]
|
||||||
milvus = ["pymilvus>=2.5.0,<3"]
|
milvus = ["pymilvus>=2.5.0,<3"]
|
||||||
chromadb = [
|
chromadb = [
|
||||||
"chromadb>=0.3.0,<0.7",
|
"chromadb>=0.3.0,<0.7",
|
||||||
"pypika==0.48.8",
|
"pypika==0.48.8",
|
||||||
]
|
]
|
||||||
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<0.17"]
|
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<18"]
|
||||||
codegraph = [
|
codegraph = [
|
||||||
"fastembed<=0.6.0 ; python_version < '3.13'",
|
"fastembed<=0.6.0 ; python_version < '3.13'",
|
||||||
"transformers>=4.46.3,<5",
|
"transformers>=4.46.3,<5",
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue