feat: new Dataset permissions (#869)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
Co-authored-by: Boris <boris@topoteretes.com>
This commit is contained in:
Igor Ilic 2025-06-06 14:20:57 +02:00 committed by GitHub
parent ebebbb8958
commit 1ed6cfd918
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
76 changed files with 5322 additions and 4154 deletions

View file

@ -69,3 +69,11 @@ LITELLM_LOG="ERROR"
# Set this environment variable to disable sending telemetry data
# TELEMETRY_DISABLED=1
# Set this variable to True to enforce usage of backend access control for Cognee
# Note: This is only currently supported by the following databases:
# Relational: SQLite, Postgres
# Vector: LanceDB
# Graph: KuzuDB
#
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
ENABLE_BACKEND_ACCESS_CONTROL=False

View file

@ -215,3 +215,34 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: poetry run python ./cognee/tests/test_s3.py
test-parallel-databases:
name: Test using different async databases in parallel in Cognee
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Install specific graph db dependency
run: |
poetry install -E kuzu
- name: Run parallel databases test
env:
ENV: 'dev'
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: poetry run python ./cognee/tests/test_parallel_databases.py

View file

@ -47,7 +47,7 @@ jobs:
relational-db-migration-tests:
name: Relational DB Migration Tests
needs: [ basic-tests, e2e-tests ]
needs: [ basic-tests, e2e-tests, graph-db-tests]
uses: ./.github/workflows/relational_db_migration_tests.yml
secrets: inherit
@ -79,7 +79,7 @@ jobs:
db-examples-tests:
name: DB Examples Tests
needs: [vector-db-tests]
needs: [vector-db-tests, graph-db-tests, relational-db-migration-tests]
uses: ./.github/workflows/db_examples_tests.yml
secrets: inherit

View file

@ -135,6 +135,16 @@ jobs:
run:
shell: bash
services:
qdrant:
image: qdrant/qdrant:v1.14.1
env:
QDRANT__LOG_LEVEL: ERROR
QDRANT__SERVICE__API_KEY: qdrant_api_key
QDRANT__SERVICE__ENABLE_TLS: 0
ports:
- 6333:6333
steps:
- name: Check out
uses: actions/checkout@master
@ -148,6 +158,19 @@ jobs:
run: |
poetry install -E qdrant
- name: Wait for Qdrant to be healthy
run: |
for i in {1..10}; do
if curl -f http://127.0.0.1:6333/healthz; then
echo "Qdrant is healthy!"
exit 0
fi
echo "Waiting for Qdrant to be healthy..."
sleep 3
done
echo "Qdrant failed to become healthy in time"
exit 1
- name: Run default Qdrant
env:
ENV: 'dev'
@ -159,8 +182,8 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }}
VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }}
VECTOR_DB_URL: 127.0.0.1
VECTOR_DB_KEY: qdrant_api_key
run: poetry run python ./cognee/tests/test_qdrant.py
run-postgres-tests:

View file

@ -1,6 +1,7 @@
"""FastAPI server for the Cognee API."""
import os
import uvicorn
from cognee.shared.logging_utils import get_logger
import sentry_sdk
@ -63,6 +64,7 @@ async def lifespan(app: FastAPI):
app = FastAPI(debug=app_environment != "prod", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],

View file

@ -1,3 +1,4 @@
from uuid import UUID
from typing import Union, BinaryIO, List, Optional
from cognee.modules.pipelines import Task
@ -11,9 +12,21 @@ async def add(
dataset_name: str = "main_dataset",
user: User = None,
node_set: Optional[List[str]] = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
dataset_id: UUID = None,
):
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)]
tasks = [
Task(resolve_data_directories),
Task(ingest_data, dataset_name, user, node_set, dataset_id),
]
await cognee_pipeline(
tasks=tasks, datasets=dataset_name, data=data, user=user, pipeline_name="add_pipeline"
tasks=tasks,
datasets=dataset_id if dataset_id else dataset_name,
data=data,
user=user,
pipeline_name="add_pipeline",
vector_db_config=vector_db_config,
graph_db_config=graph_db_config,
)

View file

@ -1,4 +1,5 @@
from uuid import UUID
from fastapi import Form, UploadFile, Depends
from fastapi.responses import JSONResponse
from fastapi import APIRouter
@ -20,8 +21,8 @@ def get_add_router() -> APIRouter:
@router.post("/", response_model=None)
async def add(
data: List[UploadFile],
datasetName: str,
datasetId: Optional[UUID] = Form(default=None),
datasetName: Optional[str] = Form(default=None),
user: User = Depends(get_authenticated_user),
):
"""This endpoint is responsible for adding data to the graph."""
@ -30,19 +31,13 @@ def get_add_router() -> APIRouter:
if not datasetId and not datasetName:
raise ValueError("Either datasetId or datasetName must be provided.")
if datasetId and not datasetName:
dataset = await get_dataset(user_id=user.id, dataset_id=datasetId)
try:
datasetName = dataset.name
except IndexError:
raise ValueError("No dataset found with the provided datasetName.")
try:
if isinstance(data, str) and data.startswith("http"):
if "github" in data:
# Perform git clone if the URL is from GitHub
repo_name = data.split("/")[-1].replace(".git", "")
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
# TODO: Update add call with dataset info
await cognee_add(
"data://.data/",
f"{repo_name}",
@ -53,10 +48,10 @@ def get_add_router() -> APIRouter:
response.raise_for_status()
file_data = await response.content()
# TODO: Update add call with dataset info
return await cognee_add(file_data)
else:
await cognee_add(data, datasetName, user=user)
await cognee_add(data, dataset_name=datasetName, user=user, dataset_id=datasetId)
except Exception as error:
return JSONResponse(status_code=409, content={"error": str(error)})

View file

@ -9,7 +9,7 @@ from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.models import User
from cognee.shared.data_models import KnowledgeGraph
from cognee.tasks.documents import (
check_permissions_on_documents,
check_permissions_on_dataset,
classify_documents,
extract_chunks_from_documents,
)
@ -31,11 +31,18 @@ async def cognify(
chunker=TextChunker,
chunk_size: int = None,
ontology_file_path: Optional[str] = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
):
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
return await cognee_pipeline(
tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline"
tasks=tasks,
datasets=datasets,
user=user,
pipeline_name="cognify_pipeline",
vector_db_config=vector_db_config,
graph_db_config=graph_db_config,
)
@ -48,7 +55,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
) -> list[Task]:
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_documents, user=user, permissions=["write"]),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents,
max_chunk_size=chunk_size or get_max_chunk_tokens(),

View file

@ -1,3 +1,4 @@
from uuid import UUID
from typing import List, Optional
from pydantic import BaseModel
from fastapi import Depends
@ -10,6 +11,7 @@ from cognee.shared.data_models import KnowledgeGraph
class CognifyPayloadDTO(BaseModel):
datasets: List[str]
dataset_ids: Optional[List[UUID]]
graph_model: Optional[BaseModel] = KnowledgeGraph
@ -22,7 +24,9 @@ def get_cognify_router() -> APIRouter:
from cognee.api.v1.cognify import cognify as cognee_cognify
try:
await cognee_cognify(payload.datasets, user, payload.graph_model)
# Send dataset UUIDs if they are given, if not send dataset names
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
await cognee_cognify(datasets, user, payload.graph_model)
except Exception as error:
return JSONResponse(status_code=409, content={"error": str(error)})

View file

@ -1,66 +1,69 @@
from uuid import UUID
from typing import List
from fastapi import APIRouter
from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
def get_permissions_router() -> APIRouter:
permissions_router = APIRouter()
@permissions_router.post("/roles/{role_id}/permissions")
async def give_default_permission_to_role(role_id: UUID, permission_name: str):
from cognee.modules.users.permissions.methods import (
give_default_permission_to_role as set_default_permission_to_role,
@permissions_router.post("/datasets/{principal_id}/")
async def give_datasets_permission_to_principal(
permission_name: str,
dataset_ids: List[UUID],
principal_id: UUID,
user: User = Depends(get_authenticated_user),
):
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
await authorized_give_permission_on_datasets(
principal_id,
[dataset_id for dataset_id in dataset_ids],
permission_name,
user.id,
)
await set_default_permission_to_role(role_id, permission_name)
return JSONResponse(status_code=200, content={"message": "Permission assigned to role"})
@permissions_router.post("/tenants/{tenant_id}/permissions")
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
from cognee.modules.users.permissions.methods import (
give_default_permission_to_tenant as set_tenant_default_permissions,
return JSONResponse(
status_code=200, content={"message": "Permission assigned to principal"}
)
await set_tenant_default_permissions(tenant_id, permission_name)
return JSONResponse(status_code=200, content={"message": "Permission assigned to tenant"})
@permissions_router.post("/users/{user_id}/permissions")
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
from cognee.modules.users.permissions.methods import (
give_default_permission_to_user as set_default_permission_to_user,
)
await set_default_permission_to_user(user_id, permission_name)
return JSONResponse(status_code=200, content={"message": "Permission assigned to user"})
@permissions_router.post("/roles")
async def create_role(
role_name: str,
tenant_id: UUID,
):
async def create_role(role_name: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.users.roles.methods import create_role as create_role_method
await create_role_method(role_name=role_name, tenant_id=tenant_id)
await create_role_method(role_name=role_name, owner_id=user.id)
return JSONResponse(status_code=200, content={"message": "Role created for tenant"})
@permissions_router.post("/users/{user_id}/roles")
async def add_user_to_role(user_id: UUID, role_id: UUID):
async def add_user_to_role(
user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user)
):
from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method
await add_user_to_role_method(user_id=user_id, role_id=role_id)
await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id)
return JSONResponse(status_code=200, content={"message": "User added to role"})
@permissions_router.post("/users/{user_id}/tenants")
async def add_user_to_tenant(
user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user)
):
from cognee.modules.users.tenants.methods import add_user_to_tenant
await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id)
return JSONResponse(status_code=200, content={"message": "User added to tenant"})
@permissions_router.post("/tenants")
async def create_tenant(tenant_name: str):
async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
await create_tenant_method(tenant_name=tenant_name)
await create_tenant_method(tenant_name=tenant_name, user_id=user.id)
return JSONResponse(status_code=200, content={"message": "Tenant created."})

View file

@ -1,4 +1,5 @@
from uuid import UUID
from typing import Optional, Union
from datetime import datetime
from fastapi import Depends, APIRouter
from fastapi.responses import JSONResponse
@ -9,8 +10,12 @@ from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_authenticated_user
# Note: Datasets sent by name will only map to datasets owned by the request sender
# To search for datasets not owned by the request sender dataset UUID is needed
class SearchPayloadDTO(InDTO):
search_type: SearchType
datasets: Optional[list[str]] = None
dataset_ids: Optional[list[UUID]] = None
query: str
@ -39,7 +44,11 @@ def get_search_router() -> APIRouter:
try:
results = await cognee_search(
query_text=payload.query, query_type=payload.search_type, user=user
query_text=payload.query,
query_type=payload.search_type,
user=user,
datasets=payload.datasets,
dataset_ids=payload.dataset_ids,
)
return results

View file

@ -1,32 +1,43 @@
from uuid import UUID
from typing import Union, Optional, List, Type
from cognee.modules.users.models import User
from cognee.modules.search.types import SearchType
from cognee.modules.users.methods import get_default_user
from cognee.modules.search.methods import search as search_function
from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.modules.data.exceptions import DatasetNotFoundError
async def search(
query_text: str,
query_type: SearchType = SearchType.GRAPH_COMPLETION,
user: User = None,
datasets: Union[list[str], str, None] = None,
datasets: Optional[Union[list[str], str]] = None,
dataset_ids: Optional[Union[list[UUID], UUID]] = None,
system_prompt_path: str = "answer_simple_question.txt",
top_k: int = 10,
node_type: Optional[Type] = None,
node_name: Optional[List[str]] = None,
) -> list:
# We use lists from now on for datasets
if isinstance(datasets, str):
if isinstance(datasets, UUID) or isinstance(datasets, str):
datasets = [datasets]
if user is None:
user = await get_default_user()
# Transform string based datasets to UUID - String based datasets can only be found for current user
if datasets is not None and [all(isinstance(dataset, str) for dataset in datasets)]:
datasets = await get_authorized_existing_datasets(datasets, "read", user)
datasets = [dataset.id for dataset in datasets]
if not datasets:
raise DatasetNotFoundError(message="No datasets found.")
filtered_search_results = await search_function(
query_text=query_text,
query_type=query_type,
datasets=datasets,
dataset_ids=dataset_ids if dataset_ids else datasets,
user=user,
system_prompt_path=system_prompt_path,
top_k=top_k,

View file

@ -0,0 +1,67 @@
import os
import pathlib
from contextvars import ContextVar
from typing import Union
from uuid import UUID
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
from cognee.modules.users.methods import get_user
# Note: ContextVar allows us to use different graph db configurations in Cognee
# for different async tasks, threads and processes
vector_db_config = ContextVar("vector_db_config", default=None)
graph_db_config = ContextVar("graph_db_config", default=None)
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
"""
If backend access control is enabled this function will ensure all datasets have their own databases,
access to which will be enforced by given permissions.
Database name will be determined by dataset_id and LanceDB and KuzuDB use will be enforced.
Note: This is only currently supported by the following databases:
Relational: SQLite, Postgres
Vector: LanceDB
Graph: KuzuDB
Args:
dataset: Cognee dataset name or id
user_id: UUID of the owner of the dataset
Returns:
"""
if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
return
user = await get_user(user_id)
# To ensure permissions are enforced properly all datasets will have their own databases
dataset_database = await get_or_create_dataset_database(dataset, user)
# TODO: Find better location for database files
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, f".cognee_system/databases/{user.id}")
).resolve()
)
# Set vector and graph database configuration based on dataset database information
vector_config = {
"vector_db_url": os.path.join(cognee_directory_path, dataset_database.vector_database_name),
"vector_db_key": "",
"vector_db_provider": "lancedb",
}
graph_config = {
"graph_database_provider": "kuzu",
"graph_file_path": os.path.join(
cognee_directory_path, dataset_database.graph_database_name
),
}
# Use ContextVar to use these graph and vector configurations are used
# in the current async context across Cognee
graph_db_config.set(graph_config)
vector_db_config.set(vector_config)

View file

@ -8,7 +8,7 @@ from cognee.modules.users.models import User
from cognee.shared.data_models import KnowledgeGraph
from cognee.shared.utils import send_telemetry
from cognee.tasks.documents import (
check_permissions_on_documents,
check_permissions_on_dataset,
classify_documents,
extract_chunks_from_documents,
)
@ -31,7 +31,7 @@ async def get_cascade_graph_tasks(
cognee_config = get_cognify_config()
default_tasks = [
Task(classify_documents),
Task(check_permissions_on_documents, user=user, permissions=["write"]),
Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task(
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
), # Extract text chunks based on the document type.

View file

@ -31,6 +31,9 @@ class CogneeApiError(Exception):
super().__init__(self.message, self.name)
def __str__(self):
return f"{self.name}: {self.message} (Status code: {self.status_code})"
class ServiceError(CogneeApiError):
"""Failures in external services or APIs, like a database or a third-party service"""

View file

@ -105,3 +105,14 @@ def get_graph_config():
- GraphConfig: A GraphConfig instance containing the graph configuration settings.
"""
return GraphConfig()
def get_graph_context_config():
"""This function will get the appropriate graph db config based on async context.
This allows the use of multiple graph databases for different threads, async tasks and parallelization
"""
from cognee.context_global_variables import graph_db_config
if graph_db_config.get():
return graph_db_config.get()
return get_graph_config().to_hashable_dict()

View file

@ -2,36 +2,22 @@
from functools import lru_cache
from .config import get_graph_config
from .config import get_graph_context_config
from .graph_db_interface import GraphDBInterface
from .supported_databases import supported_databases
async def get_graph_engine() -> GraphDBInterface:
"""
Factory function to get the appropriate graph client based on the graph type.
"""Factory function to get the appropriate graph client based on the graph type."""
# Get appropriate graph configuration based on current async context
config = get_graph_context_config()
This function retrieves the graph configuration and creates a graph engine by calling
the `create_graph_engine` function. If the configured graph database provider is
'networkx', it ensures that the graph is loaded from a file asynchronously if it hasn't
been loaded yet. It raises an `EnvironmentError` if the necessary configurations for the
selected graph provider are missing.
Returns:
--------
- GraphDBInterface: Returns an instance of GraphDBInterface which represents the
selected graph client.
"""
config = get_graph_config()
graph_client = create_graph_engine(**get_graph_config().to_hashable_dict())
graph_client = create_graph_engine(**config)
# Async functions can't be cached. After creating and caching the graph engine
# handle all necessary async operations for different graph types bellow.
# Handle loading of graph for NetworkX
if config.graph_database_provider.lower() == "networkx" and graph_client.graph is None:
if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
await graph_client.load_graph_from_file()
return graph_client
@ -40,11 +26,11 @@ async def get_graph_engine() -> GraphDBInterface:
@lru_cache
def create_graph_engine(
graph_database_provider,
graph_database_url,
graph_database_username,
graph_database_password,
graph_database_port,
graph_file_path,
graph_database_url="",
graph_database_username="",
graph_database_password="",
graph_database_port="",
):
"""
Create a graph engine based on the specified provider type.

View file

@ -0,0 +1 @@
from .get_or_create_dataset_database import get_or_create_dataset_database

View file

@ -0,0 +1,68 @@
from uuid import UUID
from typing import Union
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from cognee.modules.data.methods import create_dataset
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.methods import get_unique_dataset_id
from cognee.modules.users.models import DatasetDatabase
from cognee.modules.users.models import User
async def get_or_create_dataset_database(
dataset: Union[str, UUID],
user: User,
) -> DatasetDatabase:
"""
Return the `DatasetDatabase` row for the given owner + dataset.
If the row already exists, it is fetched and returned.
Otherwise a new one is created atomically and returned.
Parameters
----------
user : User
Principal that owns this dataset.
dataset : Union[str, UUID]
Dataset being linked.
"""
db_engine = get_relational_engine()
dataset_id = await get_unique_dataset_id(dataset, user)
vector_db_name = f"{dataset_id}.lance.db"
graph_db_name = f"{dataset_id}.pkl"
async with db_engine.get_async_session() as session:
# Create dataset if it doesn't exist
if isinstance(dataset, str):
dataset = await create_dataset(dataset, user, session)
# Try to fetch an existing row first
stmt = select(DatasetDatabase).where(
DatasetDatabase.owner_id == user.id,
DatasetDatabase.dataset_id == dataset_id,
)
existing: DatasetDatabase = await session.scalar(stmt)
if existing:
return existing
# If there are no existing rows build a new row
record = DatasetDatabase(
owner_id=user.id,
dataset_id=dataset_id,
vector_database_name=vector_db_name,
graph_database_name=graph_db_name,
)
try:
session.add(record)
await session.commit()
await session.refresh(record)
return record
except IntegrityError:
await session.rollback()
raise

View file

@ -62,3 +62,12 @@ def get_vectordb_config():
configuration.
"""
return VectorConfig()
def get_vectordb_context_config():
"""This function will get the appropriate vector db config based on async context."""
from cognee.context_global_variables import vector_db_config
if vector_db_config.get():
return vector_db_config.get()
return get_vectordb_config().to_dict()

View file

@ -6,10 +6,10 @@ from functools import lru_cache
@lru_cache
def create_vector_engine(
vector_db_url: str,
vector_db_port: str,
vector_db_key: str,
vector_db_provider: str,
vector_db_url: str,
vector_db_port: str = "",
vector_db_key: str = "",
):
"""
Create a vector database engine based on the specified provider.

View file

@ -1,14 +1,7 @@
from .config import get_vectordb_config
from .config import get_vectordb_context_config
from .create_vector_engine import create_vector_engine
def get_vector_engine():
"""
Create and return a vector engine instance.
Returns:
--------
A vector engine instance created from the vector database configuration.
"""
return create_vector_engine(**get_vectordb_config().to_dict())
# Get appropriate vector db configuration based on current async context
return create_vector_engine(**get_vectordb_context_config())

View file

@ -1,18 +1,13 @@
from ..get_vector_engine import get_vector_engine, get_vectordb_config
from ..get_vector_engine import get_vector_engine, get_vectordb_context_config
from sqlalchemy import text
from cognee.context_global_variables import vector_db_config as context_vector_db_config
async def create_db_and_tables():
"""
Create the database and its associated tables if necessary.
This function checks the vector database provider configuration and, if it is set to
"pgvector", creates the necessary vector extension in the PostgreSQL database using an
asynchronous context manager.
"""
vector_config = get_vectordb_config()
# Get appropriate vector db configuration based on current async context
vector_config = get_vectordb_context_config()
vector_engine = get_vector_engine()
if vector_config.vector_db_provider == "pgvector":
if vector_config["vector_db_provider"] == "pgvector":
async with vector_engine.engine.begin() as connection:
await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))

View file

@ -1,3 +1,4 @@
import os
from typing import Dict, List, Optional
from qdrant_client import AsyncQdrantClient, models
@ -147,14 +148,15 @@ class QDrantAdapter(VectorDBInterface):
Returns:
--------
- AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
operations.
"""
is_prod = os.getenv("ENV").lower() == "prod"
if self.qdrant_path is not None:
return AsyncQdrantClient(path=self.qdrant_path, port=6333)
return AsyncQdrantClient(path=self.qdrant_path, port=6333, https=is_prod)
elif self.url is not None:
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333)
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333, https=is_prod)
return AsyncQdrantClient(location=":memory:")

View file

@ -7,4 +7,6 @@ This module defines a set of exceptions for handling various data errors
from .exceptions import (
UnstructuredLibraryImportError,
UnauthorizedDataAccessError,
DatasetNotFoundError,
DatasetTypeError,
)

View file

@ -20,3 +20,23 @@ class UnauthorizedDataAccessError(CogneeApiError):
status_code=status.HTTP_401_UNAUTHORIZED,
):
super().__init__(message, name, status_code)
class DatasetNotFoundError(CogneeApiError):
def __init__(
self,
message: str = "Dataset not found.",
name: str = "DatasetNotFoundError",
status_code=status.HTTP_404_NOT_FOUND,
):
super().__init__(message, name, status_code)
class DatasetTypeError(CogneeApiError):
def __init__(
self,
message: str = "Dataset type not supported.",
name: str = "DatasetTypeError",
status_code=status.HTTP_400_BAD_REQUEST,
):
super().__init__(message, name, status_code)

View file

@ -8,7 +8,15 @@ from .get_datasets_by_name import get_datasets_by_name
from .get_dataset_data import get_dataset_data
from .get_data import get_data
from .get_unique_dataset_id import get_unique_dataset_id
from .get_authorized_existing_datasets import get_authorized_existing_datasets
from .get_dataset_ids import get_dataset_ids
# Delete
from .delete_dataset import delete_dataset
from .delete_data import delete_data
# Create
from .load_or_create_datasets import load_or_create_datasets
# Check
from .check_dataset_name import check_dataset_name

View file

@ -0,0 +1,3 @@
def check_dataset_name(dataset_name: str):
if "." in dataset_name or " " in dataset_name:
raise ValueError("Dataset name cannot contain spaces or underscores")

View file

@ -1,4 +1,3 @@
from uuid import UUID, uuid5, NAMESPACE_OID
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select
from sqlalchemy.orm import joinedload

View file

@ -0,0 +1,39 @@
from typing import Union
from uuid import UUID
from cognee.modules.data.models import Dataset
from cognee.modules.users.models import User
from cognee.modules.data.methods.get_dataset_ids import get_dataset_ids
from cognee.modules.users.permissions.methods import get_all_user_permission_datasets
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
async def get_authorized_existing_datasets(
datasets: Union[list[str], list[UUID]], permission_type: str, user: User
) -> list[Dataset]:
"""
Function returns a list of existing dataset objects user has access for based on datasets input.
Args:
datasets:
user:
Returns:
list of Dataset objects
"""
if datasets:
# Function handles transforming dataset input to dataset IDs (if possible)
dataset_ids = await get_dataset_ids(datasets, user)
# If dataset_ids are provided filter these datasets based on what user has permission for.
if dataset_ids:
existing_datasets = await get_specific_user_permission_datasets(
user.id, permission_type, dataset_ids
)
else:
existing_datasets = []
else:
# If no datasets are provided, work with all existing datasets user has permission for.
existing_datasets = await get_all_user_permission_datasets(user, permission_type)
return existing_datasets

View file

@ -0,0 +1,36 @@
from typing import Union
from uuid import UUID
from cognee.modules.data.exceptions import DatasetTypeError
from cognee.modules.data.methods import get_datasets
async def get_dataset_ids(datasets: Union[list[str], list[UUID]], user):
"""
Function returns dataset IDs necessary based on provided input.
It transforms raw strings into real dataset_ids with keeping write permissions in mind.
If a user wants to write to a dataset he is not the owner of it must be provided through UUID.
Args:
datasets:
pipeline_name:
user:
Returns: a list of write access dataset_ids if they exist
"""
if all(isinstance(dataset, UUID) for dataset in datasets):
# Return list of dataset UUIDs
dataset_ids = datasets
else:
# Convert list of dataset names to dataset UUID
if all(isinstance(dataset, str) for dataset in datasets):
# Get all user owned dataset objects (If a user wants to write to a dataset he is not the owner of it must be provided through UUID.)
user_datasets = await get_datasets(user.id)
# Filter out non name mentioned datasets
dataset_ids = [dataset.id for dataset in user_datasets if dataset.name in datasets]
else:
raise DatasetTypeError(
f"One or more of the provided dataset types is not handled: f{datasets}"
)
return dataset_ids

View file

@ -1,6 +1,9 @@
from uuid import UUID, uuid5, NAMESPACE_OID
from cognee.modules.users.models import User
from typing import Union
async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID:
async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID:
if isinstance(dataset_name, UUID):
return dataset_name
return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")

View file

@ -0,0 +1,42 @@
from typing import List, Union
from uuid import UUID
from cognee.modules.data.models import Dataset
from cognee.modules.data.methods import get_unique_dataset_id
from cognee.modules.data.exceptions import DatasetNotFoundError
async def load_or_create_datasets(
dataset_names: List[Union[str, UUID]], existing_datasets: List[Dataset], user
) -> List[Dataset]:
"""
Given a list of dataset identifiers (names or UUIDs), return Dataset instances:
- If an identifier matches an existing Dataset (by name or id), reuse it.
- Otherwise, create a new Dataset with a unique id. Note: Created dataset is not stored to database.
"""
result: List[Dataset] = []
for identifier in dataset_names:
# Try to find a matching dataset in the existing list
# If no matching dataset is found return None
match = next(
(ds for ds in existing_datasets if ds.name == identifier or ds.id == identifier), None
)
if match:
result.append(match)
continue
# If the identifier is a UUID but nothing matched, that's an error
if isinstance(identifier, UUID):
raise DatasetNotFoundError(f"Dataset with given UUID does not exist: {identifier}")
# Otherwise, create a new Dataset instance
new_dataset = Dataset(
id=await get_unique_dataset_id(dataset_name=identifier, user=user),
name=identifier,
owner_id=user.id,
)
result.append(new_dataset)
return result

View file

@ -33,9 +33,6 @@ class Data(Base):
cascade="all, delete",
)
# New relationship for ACLs with cascade deletion
acls = relationship("ACL", back_populates="data", cascade="all, delete-orphan")
def to_json(self) -> dict:
return {
"id": str(self.id),

View file

@ -19,6 +19,8 @@ class Dataset(Base):
owner_id = Column(UUID, index=True)
acls = relationship("ACL", back_populates="dataset", cascade="all, delete-orphan")
data: Mapped[List["Data"]] = relationship(
"Data",
secondary=DatasetData.__tablename__,

View file

@ -1,11 +1,9 @@
import asyncio
from typing import Union
from uuid import NAMESPACE_OID, uuid5
from uuid import NAMESPACE_OID, uuid5, UUID
from cognee.shared.logging_utils import get_logger
from cognee.modules.data.methods import get_datasets
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.data.models import Data, Dataset
from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.models import PipelineRunStatus
@ -14,6 +12,13 @@ from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User
from cognee.modules.pipelines.operations import log_pipeline_run_initiated
from cognee.context_global_variables import set_database_global_context_variables
from cognee.modules.data.exceptions import DatasetNotFoundError
from cognee.modules.data.methods import (
get_authorized_existing_datasets,
load_or_create_datasets,
check_dataset_name,
)
from cognee.infrastructure.databases.relational import (
create_db_and_tables as create_relational_db_and_tables,
@ -21,6 +26,10 @@ from cognee.infrastructure.databases.relational import (
from cognee.infrastructure.databases.vector.pgvector import (
create_db_and_tables as create_pgvector_db_and_tables,
)
from cognee.context_global_variables import (
graph_db_config as context_graph_db_config,
vector_db_config as context_vector_db_config,
)
logger = get_logger("cognee.pipeline")
@ -30,10 +39,19 @@ update_status_lock = asyncio.Lock()
async def cognee_pipeline(
tasks: list[Task],
data=None,
datasets: Union[str, list[str]] = None,
datasets: Union[str, list[str], list[UUID]] = None,
user: User = None,
pipeline_name: str = "custom_pipeline",
vector_db_config: dict = None,
graph_db_config: dict = None,
):
# Note: These context variables allow different value assignment for databases in Cognee
# per async task, thread, process and etc.
if vector_db_config:
context_vector_db_config.set(vector_db_config)
if graph_db_config:
context_graph_db_config.set(graph_db_config)
# Create tables for databases
await create_relational_db_and_tables()
await create_pgvector_db_and_tables()
@ -54,49 +72,35 @@ async def cognee_pipeline(
if user is None:
user = await get_default_user()
# Convert datasets to list in case it's a string
if isinstance(datasets, str):
# Convert datasets to list
if isinstance(datasets, str) or isinstance(datasets, UUID):
datasets = [datasets]
# If no datasets are provided, work with all existing datasets.
existing_datasets = await get_datasets(user.id)
# Get datasets user wants write permissions for (verify user has permissions if datasets are provided as well)
# NOTE: If a user wants to write to a dataset he does not own it must be provided through UUID
existing_datasets = await get_authorized_existing_datasets(datasets, "write", user)
if not datasets:
# Get datasets from database if none sent.
datasets = existing_datasets
else:
# If dataset is already in database, use it, otherwise create a new instance.
dataset_instances = []
# If dataset matches an existing Dataset (by name or id), reuse it. Otherwise, create a new Dataset.
datasets = await load_or_create_datasets(datasets, existing_datasets, user)
for dataset_name in datasets:
is_dataset_found = False
for existing_dataset in existing_datasets:
if (
existing_dataset.name == dataset_name
or str(existing_dataset.id) == dataset_name
):
dataset_instances.append(existing_dataset)
is_dataset_found = True
break
if not is_dataset_found:
dataset_instances.append(
Dataset(
id=await get_unique_dataset_id(dataset_name=dataset_name, user=user),
name=dataset_name,
owner_id=user.id,
)
)
datasets = dataset_instances
if not datasets:
raise DatasetNotFoundError("There are no datasets to work with.")
awaitables = []
for dataset in datasets:
awaitables.append(
run_pipeline(
dataset=dataset, user=user, tasks=tasks, data=data, pipeline_name=pipeline_name
dataset=dataset,
user=user,
tasks=tasks,
data=data,
pipeline_name=pipeline_name,
context={"dataset": dataset},
)
)
@ -109,9 +113,13 @@ async def run_pipeline(
tasks: list[Task],
data=None,
pipeline_name: str = "custom_pipeline",
context: dict = None,
):
check_dataset_name(dataset.name)
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
await set_database_global_context_variables(dataset.name, user.id)
# Ugly hack, but no easier way to do this.
if pipeline_name == "add_pipeline":
# Refresh the add pipeline status so data is added to a dataset.
@ -160,15 +168,10 @@ async def run_pipeline(
if not isinstance(task, Task):
raise ValueError(f"Task {task} is not an instance of Task")
pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name)
pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name, context=context)
pipeline_run_status = None
async for run_status in pipeline_run:
pipeline_run_status = run_status
return pipeline_run_status
def check_dataset_name(dataset_name: str) -> str:
if "." in dataset_name or " " in dataset_name:
raise ValueError("Dataset name cannot contain spaces or underscores")

View file

@ -1,8 +1,11 @@
import os
import json
from typing import Callable, Optional, List, Type
import asyncio
from uuid import UUID
from typing import Callable, List, Optional, Type, Union
from cognee.context_global_variables import set_database_global_context_variables
from cognee.exceptions import InvalidValueError
from cognee.infrastructure.engine.utils import parse_id
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
from cognee.modules.retrieval.insights_retriever import InsightsRetriever
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
@ -21,24 +24,45 @@ from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageR
from cognee.modules.search.types import SearchType
from cognee.modules.storage.utils import JSONEncoder
from cognee.modules.users.models import User
from cognee.modules.users.permissions.methods import get_document_ids_for_user
from cognee.modules.data.models import Dataset
from cognee.shared.utils import send_telemetry
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
from cognee.modules.search.operations import log_query, log_result
async def search(
query_text: str,
query_type: SearchType,
datasets: list[str],
dataset_ids: Union[list[UUID], None],
user: User,
system_prompt_path="answer_simple_question.txt",
top_k: int = 10,
node_type: Optional[Type] = None,
node_name: Optional[List[str]] = None,
):
"""
Args:
query_text:
query_type:
datasets:
user:
system_prompt_path:
top_k:
Returns:
Notes:
Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
"""
# Use search function filtered by permissions if access control is enabled
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
return await permissions_search(
query_text, query_type, user, dataset_ids, system_prompt_path, top_k
)
query = await log_query(query_text, query_type.value, user.id)
own_document_ids = await get_document_ids_for_user(user.id, datasets)
search_results = await specific_search(
query_type,
query_text,
@ -49,18 +73,9 @@ async def search(
node_name=node_name,
)
filtered_search_results = []
await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
for search_result in search_results:
document_id = search_result["document_id"] if "document_id" in search_result else None
document_id = parse_id(document_id)
if document_id is None or document_id in own_document_ids:
filtered_search_results.append(search_result)
await log_result(query.id, json.dumps(filtered_search_results, cls=JSONEncoder), user.id)
return filtered_search_results
return search_results
async def specific_search(
@ -120,3 +135,62 @@ async def specific_search(
send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
return results
async def permissions_search(
query_text: str,
query_type: SearchType,
user: User = None,
dataset_ids: Optional[list[UUID]] = None,
system_prompt_path: str = "answer_simple_question.txt",
top_k: int = 10,
) -> list:
"""
Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
Not to be used outside of active access control mode.
"""
query = await log_query(query_text, query_type.value, user.id)
# Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
# Searches all provided datasets and handles setting up of appropriate database context based on permissions
search_results = await specific_search_by_context(
search_datasets, query_text, query_type, user, system_prompt_path, top_k
)
await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
return search_results
async def specific_search_by_context(
search_datasets: list[Dataset],
query_text: str,
query_type: SearchType,
user: User,
system_prompt_path: str,
top_k: int,
):
"""
Searches all provided datasets and handles setting up of appropriate database context based on permissions.
Not to be used outside of active access control mode.
"""
async def _search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k):
# Set database configuration in async context for each dataset user has access for
await set_database_global_context_variables(dataset.id, dataset.owner_id)
search_results = await specific_search(
query_type, query_text, user, system_prompt_path=system_prompt_path, top_k=top_k
)
return {dataset.name: search_results}
# Search every dataset async based on query and appropriate database configuration
tasks = []
for dataset in search_datasets:
tasks.append(
_search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k)
)
return await asyncio.gather(*tasks)

View file

@ -19,17 +19,14 @@ class CustomJWTStrategy(JWTStrategy):
# JoinLoad tenant and role information to user object
user = await get_user(user.id)
if user.tenant:
data = {"user_id": str(user.id), "tenant_id": str(user.tenant.id), "roles": user.roles}
else:
# The default tenant is None
data = {"user_id": str(user.id), "tenant_id": None, "roles": user.roles}
data = {"user_id": str(user.id)}
return generate_jwt(data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm)
@lru_cache
def get_auth_backend():
bearer_transport = BearerTransport(tokenUrl="auth/jwt/login")
bearer_transport = BearerTransport(tokenUrl="api/v1/auth/login")
def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]:
secret = os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret")

View file

@ -9,4 +9,5 @@ from .exceptions import (
UserNotFoundError,
PermissionDeniedError,
TenantNotFoundError,
PermissionNotFoundError,
)

View file

@ -46,3 +46,13 @@ class PermissionDeniedError(CogneeApiError):
status_code=status.HTTP_403_FORBIDDEN,
):
super().__init__(message, name, status_code)
class PermissionNotFoundError(CogneeApiError):
def __init__(
self,
message: str = "Permission type does not exist.",
name: str = "PermissionNotFoundError",
status_code=status.HTTP_403_FORBIDDEN,
):
super().__init__(message, name, status_code)

View file

@ -1,7 +1,8 @@
from types import SimpleNamespace
from ..get_fastapi_users import get_fastapi_users
from fastapi import HTTPException, Header
from fastapi import HTTPException, Security
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import os
import jwt
@ -9,28 +10,29 @@ from uuid import UUID
fastapi_users = get_fastapi_users()
# Allows Swagger to understand authorization type and allow single sign on for the Swagger docs to test backend
bearer_scheme = HTTPBearer(scheme_name="BearerAuth", description="Paste **Bearer &lt;JWT&gt;**")
async def get_authenticated_user(authorization: str = Header(...)) -> SimpleNamespace:
"""Extract and validate JWT from Authorization header."""
async def get_authenticated_user(
creds: HTTPAuthorizationCredentials = Security(bearer_scheme),
) -> SimpleNamespace:
"""
Extract and validate the JWT presented in the Authorization header.
"""
if creds is None: # header missing
raise HTTPException(status_code=401, detail="Not authenticated")
if creds.scheme.lower() != "bearer": # shouldn't happen extra guard
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
token = creds.credentials
try:
scheme, token = authorization.split()
if scheme.lower() != "bearer":
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
payload = jwt.decode(
token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"]
)
if payload.get("tenant_id"):
# SimpleNamespace lets us access dictionary elements like attributes
auth_data = SimpleNamespace(
id=UUID(payload["user_id"]),
tenant_id=UUID(payload["tenant_id"]),
roles=payload["roles"],
)
else:
auth_data = SimpleNamespace(id=UUID(payload["user_id"]), tenant_id=None, roles=[])
auth_data = SimpleNamespace(id=UUID(payload["user_id"]))
return auth_data
except jwt.ExpiredSignatureError:

View file

@ -1,5 +1,6 @@
from types import SimpleNamespace
from sqlalchemy.orm import selectinload
from sqlalchemy.exc import NoResultFound
from sqlalchemy.future import select
from cognee.modules.users.models import User
from cognee.base_config import get_base_config
@ -33,5 +34,6 @@ async def get_default_user() -> SimpleNamespace:
except Exception as error:
if "principals" in str(error.args):
raise DatabaseNotCreatedError() from error
raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error
if isinstance(error, NoResultFound):
raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error
raise

View file

@ -1,7 +1,9 @@
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from sqlalchemy.orm import selectinload
import sqlalchemy.exc
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.infrastructure.databases.exceptions import EntityNotFoundError
from ..models import User
@ -12,9 +14,12 @@ async def get_user(user_id: UUID):
user = (
await session.execute(
select(User)
.options(joinedload(User.roles), joinedload(User.tenant))
.options(selectinload(User.roles), selectinload(User.tenant))
.where(User.id == user_id)
)
).scalar()
if not user:
raise EntityNotFoundError(message=f"Could not find user: {user_id}")
return user

View file

@ -15,8 +15,8 @@ class ACL(Base):
principal_id = Column(UUID, ForeignKey("principals.id"))
permission_id = Column(UUID, ForeignKey("permissions.id"))
data_id = Column(UUID, ForeignKey("data.id", ondelete="CASCADE"))
dataset_id = Column(UUID, ForeignKey("datasets.id", ondelete="CASCADE"))
principal = relationship("Principal")
permission = relationship("Permission")
data = relationship("Data", back_populates="acls")
dataset = relationship("Dataset", back_populates="acls")

View file

@ -0,0 +1,19 @@
from datetime import datetime, timezone
from sqlalchemy import Column, DateTime, String, UUID, ForeignKey
from cognee.infrastructure.databases.relational import Base
class DatasetDatabase(Base):
__tablename__ = "dataset_database"
owner_id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), index=True)
dataset_id = Column(
UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
)
vector_database_name = Column(String, unique=True, nullable=False)
graph_database_name = Column(String, unique=True, nullable=False)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))

View file

@ -11,6 +11,8 @@ class Tenant(Principal):
id = Column(UUID, ForeignKey("principals.id"), primary_key=True)
name = Column(String, unique=True, nullable=False, index=True)
owner_id = Column(UUID, index=True)
# One-to-Many relationship with User; specify the join via User.tenant_id
users = relationship(
"User",

View file

@ -1,6 +1,7 @@
from .User import User
from .Role import Role
from .UserRole import UserRole
from .DatasetDatabase import DatasetDatabase
from .RoleDefaultPermissions import RoleDefaultPermissions
from .UserDefaultPermissions import UserDefaultPermissions
from .TenantDefaultPermissions import TenantDefaultPermissions

View file

@ -0,0 +1 @@
from .permission_types import PERMISSION_TYPES

View file

@ -1,6 +1,13 @@
from .check_permission_on_documents import check_permission_on_documents
from .give_permission_on_document import give_permission_on_document
from .get_role import get_role
from .get_tenant import get_tenant
from .get_principal import get_principal
from .get_principal_datasets import get_principal_datasets
from .get_all_user_permission_datasets import get_all_user_permission_datasets
from .get_specific_user_permission_datasets import get_specific_user_permission_datasets
from .check_permission_on_dataset import check_permission_on_dataset
from .give_permission_on_dataset import give_permission_on_dataset
from .get_document_ids_for_user import get_document_ids_for_user
from .authorized_give_permission_on_datasets import authorized_give_permission_on_datasets
from .give_default_permission_to_tenant import give_default_permission_to_tenant
from .give_default_permission_to_role import give_default_permission_to_role
from .give_default_permission_to_user import give_default_permission_to_user

View file

@ -0,0 +1,23 @@
from typing import Union, List
from cognee.modules.users.permissions.methods import get_principal
from cognee.modules.users.permissions.methods import give_permission_on_dataset
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
from uuid import UUID
async def authorized_give_permission_on_datasets(
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
):
# If only a single dataset UUID is provided transform it to a list
if not isinstance(dataset_ids, list):
dataset_ids = [dataset_ids]
principal = await get_principal(principal_id)
# Check if request owner has permission to share dataset access
datasets = await get_specific_user_permission_datasets(owner_id, "share", dataset_ids)
# TODO: Do we want to enforce sharing of datasets to only be between users of the same tenant?
for dataset in datasets:
await give_permission_on_dataset(principal, dataset.id, permission_name)

View file

@ -13,29 +13,29 @@ from ...models.ACL import ACL
logger = get_logger()
async def check_permission_on_documents(user: User, permission_type: str, document_ids: list[UUID]):
async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
if user is None:
user = await get_default_user()
# TODO: Enable user role permissions again. Temporarily disabled during rework.
# # TODO: Enable user role permissions again. Temporarily disabled during rework.
# user_roles_ids = [role.id for role in user.roles]
user_roles_ids = []
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
# If dataset id was returned it means the user has permission to access it
result = await session.execute(
select(ACL)
.join(ACL.permission)
.options(joinedload(ACL.data))
.options(joinedload(ACL.dataset))
.where(ACL.principal_id.in_([user.id, *user_roles_ids]))
.where(ACL.permission.has(name=permission_type))
)
acls = result.unique().scalars().all()
data_ids = [acl.data.id for acl in acls]
has_permissions = all(document_id in data_ids for document_id in document_ids)
has_permission = dataset_id in [acl.dataset.id for acl in acls]
if not has_permissions:
if not has_permission:
raise PermissionDeniedError(
message=f"User {user.id} does not have {permission_type} permission on documents"
)

View file

@ -0,0 +1,31 @@
from cognee.shared.logging_utils import get_logger
from ...models.User import User
from cognee.modules.data.models.Dataset import Dataset
from cognee.modules.users.permissions.methods import get_principal_datasets
from cognee.modules.users.permissions.methods import get_role, get_tenant
logger = get_logger()
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
datasets = list()
# Get all datasets User has explicit access to
datasets.extend(await get_principal_datasets(user, permission_type))
if user.tenant_id:
# Get all datasets all tenants have access to
tenant = await get_tenant(user.tenant_id)
datasets.extend(await get_principal_datasets(tenant, permission_type))
# Get all datasets Users roles have access to
for role_name in user.roles:
role = await get_role(user.tenant_id, role_name)
datasets.extend(await get_principal_datasets(role, permission_type))
# Deduplicate datasets with same ID
unique = {}
for dataset in datasets:
# If the dataset id key already exists, leave the dictionary unchanged.
unique.setdefault(dataset.id, dataset)
return list(unique.values())

View file

@ -1,7 +1,9 @@
from uuid import UUID
from cognee.modules.data.methods import get_dataset_data
from sqlalchemy import select
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.models import Dataset, DatasetData, Data
from cognee.modules.data.models import Dataset, DatasetData
from ...models import ACL, Permission
@ -10,10 +12,10 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -
async with db_engine.get_async_session() as session:
async with session.begin():
document_ids = (
dataset_ids = (
await session.scalars(
select(Data.id)
.join(ACL.data)
select(Dataset.id)
.join(ACL.dataset)
.join(ACL.permission)
.where(
ACL.principal_id == user_id,
@ -22,9 +24,15 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -
)
).all()
# Get documents from datasets user has read access for
document_ids = []
for dataset_id in dataset_ids:
data_list = await get_dataset_data(dataset_id)
document_ids.extend([data.id for data in data_list])
if datasets:
documents_ids_in_dataset = set()
# If datasets are specified filter out documents that aren't part of the specified datasets
documents_ids_in_dataset = set()
for dataset in datasets:
# Find dataset id for dataset element
dataset_id = (

View file

@ -0,0 +1,14 @@
from sqlalchemy import select
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models.Principal import Principal
async def get_principal(principal_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
result = await session.execute(select(Principal).where(Principal.id == principal_id))
principal = result.unique().scalar_one()
return principal

View file

@ -0,0 +1,24 @@
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models.Principal import Principal
from cognee.modules.data.models.Dataset import Dataset
from ...models.ACL import ACL
async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
# If dataset id was returned it means the principal has permission to access it
result = await session.execute(
select(ACL)
.join(ACL.permission)
.options(joinedload(ACL.dataset))
.where(ACL.principal_id == principal.id)
.where(ACL.permission.has(name=permission_type))
)
acls = result.unique().scalars().all()
return [acl.dataset for acl in acls]

View file

@ -0,0 +1,24 @@
import sqlalchemy.exc
from sqlalchemy import select
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.exceptions import RoleNotFoundError
from ...models.Role import Role
async def get_role(tenant_id: UUID, role_name: str):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
try:
result = await session.execute(
select(Role).where(Role.name == role_name).where(Role.tenant_id == tenant_id)
)
role = result.unique().scalar_one()
if not role:
raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")
return role
except sqlalchemy.exc.NoResultFound:
raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")

View file

@ -0,0 +1,46 @@
from uuid import UUID
from cognee.modules.data.models.Dataset import Dataset
from cognee.modules.users.permissions.methods.get_all_user_permission_datasets import (
get_all_user_permission_datasets,
)
from cognee.modules.users.exceptions import PermissionDeniedError
from cognee.modules.users.methods import get_user
async def get_specific_user_permission_datasets(
user_id: UUID, permission_type: str, dataset_ids: list[UUID] = None
) -> list[Dataset]:
"""
Return a list of datasets user has given permission for. If a list of datasets is provided,
verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
Args:
user_id:
permission_type:
dataset_ids:
Returns:
list[Dataset]: List of datasets user has permission for
"""
user = await get_user(user_id)
# Find all datasets user has permission for
user_permission_access_datasets = await get_all_user_permission_datasets(user, permission_type)
# if specific datasets are provided filter out non provided datasets
if dataset_ids:
search_datasets = [
dataset for dataset in user_permission_access_datasets if dataset.id in dataset_ids
]
# If there are requested datasets that user does not have access to raise error
if len(search_datasets) != len(dataset_ids):
raise PermissionDeniedError(
f"Request owner does not have necessary permission: [{permission_type}] for all datasets requested."
)
else:
search_datasets = user_permission_access_datasets
if len(search_datasets) == 0:
raise PermissionDeniedError(
f"Request owner does not have permission: [{permission_type}] for any dataset."
)
return search_datasets

View file

@ -0,0 +1,21 @@
import sqlalchemy.exc
from sqlalchemy import select
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.exceptions import TenantNotFoundError
from ...models.Tenant import Tenant
async def get_tenant(tenant_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
try:
result = await session.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.unique().scalar_one()
if not tenant:
raise TenantNotFoundError
return tenant
except sqlalchemy.exc.NoResultFound:
raise TenantNotFoundError(message=f"Could not find tenant: {tenant_id}")

View file

@ -0,0 +1,46 @@
from sqlalchemy.future import select
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models import Principal, ACL, Permission
from uuid import UUID
from cognee.modules.users.permissions import PERMISSION_TYPES
from cognee.modules.users.exceptions import PermissionNotFoundError
async def give_permission_on_dataset(
principal: Principal,
dataset_id: UUID,
permission_name: str,
):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
permission = (
(await session.execute(select(Permission).filter(Permission.name == permission_name)))
.scalars()
.first()
)
if permission_name not in PERMISSION_TYPES:
# If permission is not in allowed permission types
raise PermissionNotFoundError(
message=f"{permission_name} not found or not in allowed permission types"
)
elif permission is None:
permission = Permission(name=permission_name)
existing_acl = None
else:
# Check if the ACL entry already exists to avoid duplicates
existing_acl = await session.execute(
select(ACL).filter(
ACL.principal_id == principal.id,
ACL.dataset_id == dataset_id,
ACL.permission_id == permission.id,
)
)
existing_acl = existing_acl.scalars().first()
# If no existing ACL entry is found, proceed to add a new one
if existing_acl is None:
acl = ACL(principal_id=principal.id, dataset_id=dataset_id, permission=permission)
session.add(acl)
await session.commit()

View file

@ -1,27 +0,0 @@
from sqlalchemy.future import select
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models import User, ACL, Permission
async def give_permission_on_document(
user: User,
document_id: str,
permission_name: str,
):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
permission = (
(await session.execute(select(Permission).filter(Permission.name == permission_name)))
.scalars()
.first()
)
if permission is None:
permission = Permission(name=permission_name)
acl = ACL(principal_id=user.id, data_id=document_id, permission=permission)
session.add(acl)
await session.commit()

View file

@ -0,0 +1 @@
PERMISSION_TYPES = ["read", "write", "delete", "share"]

View file

@ -9,24 +9,40 @@ from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.exceptions import (
UserNotFoundError,
RoleNotFoundError,
TenantNotFoundError,
PermissionDeniedError,
)
from cognee.modules.users.models import (
User,
Role,
Tenant,
UserRole,
)
async def add_user_to_role(user_id: UUID, role_id: UUID):
async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
role = (await session.execute(select(Role).where(Role.id == role_id))).scalars().first()
tenant = (
(await session.execute(select(Tenant).where(Tenant.id == role.tenant_id)))
.scalars()
.first()
)
if not user:
raise UserNotFoundError
elif not role:
raise RoleNotFoundError
elif user.tenant_id != role.tenant_id:
raise TenantNotFoundError(
message="User tenant does not match role tenant. User cannot be added to role."
)
elif tenant.owner_id != owner_id:
raise PermissionDeniedError(
message="User submitting request does not have permission to add user to role."
)
try:
# Add association directly to the association table

View file

@ -4,6 +4,9 @@ from sqlalchemy.exc import IntegrityError
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.methods import get_user
from cognee.modules.users.permissions.methods import get_tenant
from cognee.modules.users.exceptions import PermissionDeniedError
from cognee.modules.users.models import (
Role,
)
@ -11,13 +14,21 @@ from cognee.modules.users.models import (
async def create_role(
role_name: str,
tenant_id: UUID,
owner_id: UUID,
):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = await get_user(owner_id)
tenant = await get_tenant(user.tenant_id)
if owner_id != tenant.owner_id:
raise PermissionDeniedError(
"User submitting request does not have permission to create role for tenant."
)
try:
# Add association directly to the association table
role = Role(name=role_name, tenant_id=tenant_id)
role = Role(name=role_name, tenant_id=tenant.id)
session.add(role)
except IntegrityError:
raise EntityAlreadyExistsError(message="Role already exists for tenant.")

View file

@ -1 +1,2 @@
from .create_tenant import create_tenant
from .add_user_to_tenant import add_user_to_tenant

View file

@ -0,0 +1,44 @@
from uuid import UUID
from sqlalchemy.exc import IntegrityError
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.methods import get_user
from cognee.modules.users.permissions.methods import get_tenant
from cognee.modules.users.exceptions import (
UserNotFoundError,
TenantNotFoundError,
PermissionDeniedError,
)
async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = await get_user(user_id)
tenant = await get_tenant(tenant_id)
if not user:
raise UserNotFoundError
elif not tenant:
raise TenantNotFoundError
if tenant.owner_id != owner_id:
raise PermissionDeniedError(
message="Only tenant owner can add other users to organization."
)
try:
if user.tenant_id is None:
user.tenant_id = tenant_id
elif user.tenant_id == tenant_id:
return
else:
raise IntegrityError
await session.merge(user)
await session.commit()
except IntegrityError:
raise EntityAlreadyExistsError(
message="User is already part of a tenant. Only one tenant can be assigned to user."
)

View file

@ -1,19 +1,28 @@
from uuid import UUID
from sqlalchemy.exc import IntegrityError
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.models import Tenant
from cognee.modules.users.methods import get_user
async def create_tenant(tenant_name: str):
async def create_tenant(tenant_name: str, user_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
try:
# Add association directly to the association table
tenant = Tenant(name=tenant_name)
user = await get_user(user_id)
if user.tenant_id:
raise EntityAlreadyExistsError(
message="User already has a tenant. New tenant cannot be created."
)
tenant = Tenant(name=tenant_name, owner_id=user_id)
session.add(tenant)
await session.flush()
user.tenant_id = tenant.id
await session.merge(user)
await session.commit()
except IntegrityError:
raise EntityAlreadyExistsError(message="Tenant already exists.")
await session.commit()
await session.refresh(tenant)

View file

@ -2,4 +2,4 @@ from .translate_text import translate_text
from .detect_language import detect_language
from .classify_documents import classify_documents
from .extract_chunks_from_documents import extract_chunks_from_documents
from .check_permissions_on_documents import check_permissions_on_documents
from .check_permissions_on_dataset import check_permissions_on_dataset

View file

@ -1,10 +1,10 @@
from cognee.modules.data.processing.document_types import Document
from cognee.modules.users.permissions.methods import check_permission_on_documents
from cognee.modules.users.permissions.methods import check_permission_on_dataset
from typing import List
async def check_permissions_on_documents(
documents: list[Document], user, permissions
async def check_permissions_on_dataset(
documents: List[Document], context: dict, user, permissions
) -> List[Document]:
"""
Validates a user's permissions on a list of documents.
@ -14,13 +14,12 @@ async def check_permissions_on_documents(
- It is designed to validate multiple permissions in a sequential manner for the same set of documents.
- Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
"""
document_ids = [document.id for document in documents]
for permission in permissions:
await check_permission_on_documents(
await check_permission_on_dataset(
user,
permission,
document_ids,
context["dataset"].id,
)
return documents

View file

@ -2,6 +2,7 @@ import dlt
import s3fs
import json
import inspect
from uuid import UUID
from typing import Union, BinaryIO, Any, List, Optional
import cognee.modules.ingestion as ingestion
from cognee.infrastructure.databases.relational import get_relational_engine
@ -9,7 +10,8 @@ from cognee.modules.data.methods import create_dataset, get_dataset_data, get_da
from cognee.modules.users.methods import get_default_user
from cognee.modules.data.models.DatasetData import DatasetData
from cognee.modules.users.models import User
from cognee.modules.users.permissions.methods import give_permission_on_document
from cognee.modules.users.permissions.methods import give_permission_on_dataset
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
from .get_dlt_destination import get_dlt_destination
from .save_data_item_to_storage import save_data_item_to_storage
@ -18,7 +20,11 @@ from cognee.api.v1.add.config import get_s3_config
async def ingest_data(
data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None
data: Any,
dataset_name: str,
user: User,
node_set: Optional[List[str]] = None,
dataset_id: UUID = None,
):
destination = get_dlt_destination()
@ -73,7 +79,11 @@ async def ingest_data(
}
async def store_data_to_dataset(
data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None
data: Any,
dataset_name: str,
user: User,
node_set: Optional[List[str]] = None,
dataset_id: UUID = None,
):
if not isinstance(data, list):
# Convert data to a list as we work with lists further down.
@ -104,7 +114,17 @@ async def ingest_data(
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
dataset = await create_dataset(dataset_name, user, session)
if dataset_id:
# Retrieve existing dataset
dataset = await get_specific_user_permission_datasets(
user.id, "write", [dataset_id]
)
# Convert from list to Dataset element
if isinstance(dataset, list):
dataset = dataset[0]
else:
# Create new one
dataset = await create_dataset(dataset_name, user, session)
# Check to see if data should be updated
data_point = (
@ -138,6 +158,7 @@ async def ingest_data(
node_set=json.dumps(node_set) if node_set else None,
token_count=-1,
)
session.add(data_point)
# Check if data is already in dataset
dataset_data = (
@ -150,17 +171,20 @@ async def ingest_data(
# If data is not present in dataset add it
if dataset_data is None:
dataset.data.append(data_point)
await session.merge(dataset)
await session.commit()
await give_permission_on_document(user, data_id, "read")
await give_permission_on_document(user, data_id, "write")
await give_permission_on_dataset(user, dataset.id, "read")
await give_permission_on_dataset(user, dataset.id, "write")
await give_permission_on_dataset(user, dataset.id, "delete")
await give_permission_on_dataset(user, dataset.id, "share")
return file_paths
db_engine = get_relational_engine()
file_paths = await store_data_to_dataset(data, dataset_name, user, node_set)
file_paths = await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id)
# Note: DLT pipeline has its own event loop, therefore objects created in another event loop
# can't be used inside the pipeline

View file

@ -0,0 +1,71 @@
import os
import pathlib
import cognee
from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import get_logger
from cognee.modules.search.types import SearchType
logger = get_logger()
async def main():
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_library")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_library")
).resolve()
)
cognee.config.system_root_directory(cognee_directory_path)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
await cognee.add(["TEST1"], "test1")
await cognee.add(["TEST2"], "test2")
task_1_config = {
"vector_db_url": "cognee1.test",
"vector_db_key": "",
"vector_db_provider": "lancedb",
}
task_2_config = {
"vector_db_url": "cognee2.test",
"vector_db_key": "",
"vector_db_provider": "lancedb",
}
task_1_graph_config = {
"graph_database_provider": "kuzu",
"graph_file_path": "kuzu1.db",
}
task_2_graph_config = {
"graph_database_provider": "kuzu",
"graph_file_path": "kuzu2.db",
}
# schedule both cognify calls concurrently
task1 = asyncio.create_task(
cognee.cognify(
["test1"], vector_db_config=task_1_config, graph_db_config=task_1_graph_config
)
)
task2 = asyncio.create_task(
cognee.cognify(
["test2"], vector_db_config=task_2_config, graph_db_config=task_2_graph_config
)
)
# wait until both are done (raises first error if any)
await asyncio.gather(task1, task2)
if __name__ == "__main__":
import asyncio
asyncio.run(main(), debug=True)

View file

@ -144,7 +144,6 @@ async def main():
graph_completion = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION,
query_text=random_node_name,
datasets=[dataset_name_2],
)
assert len(graph_completion) != 0, "Completion result is empty."
print("Completion result is:")

View file

@ -49,7 +49,11 @@ async def main():
from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine()
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0]
search_results = await vector_engine.search("Entity_name", "Quantum computer")
assert len(search_results) != 0, "The search results list is empty."
random_node = search_results[0]
random_node_name = random_node.payload["text"]
search_results = await cognee.search(

View file

@ -24,13 +24,9 @@ def mock_user():
@pytest.mark.asyncio
@patch.object(search_module, "log_query")
@patch.object(search_module, "log_result")
@patch.object(search_module, "get_document_ids_for_user")
@patch.object(search_module, "specific_search")
@patch.object(search_module, "parse_id")
async def test_search(
mock_parse_id,
mock_specific_search,
mock_get_document_ids,
mock_log_result,
mock_log_query,
mock_user,
@ -48,26 +44,19 @@ async def test_search(
# Mock document IDs
doc_id1 = uuid.uuid4()
doc_id2 = uuid.uuid4()
doc_id3 = uuid.uuid4() # This one will be filtered out
mock_get_document_ids.return_value = [doc_id1, doc_id2]
# Mock search results
search_results = [
{"document_id": str(doc_id1), "content": "Result 1"},
{"document_id": str(doc_id2), "content": "Result 2"},
{"document_id": str(doc_id3), "content": "Result 3"}, # Should be filtered out
]
mock_specific_search.return_value = search_results
# Mock parse_id to return the same UUID
mock_parse_id.side_effect = lambda x: uuid.UUID(x) if x else None
# Execute
results = await search(query_text, query_type, datasets, mock_user)
await search(query_text, query_type, datasets, mock_user)
# Verify
mock_log_query.assert_called_once_with(query_text, query_type.value, mock_user.id)
mock_get_document_ids.assert_called_once_with(mock_user.id, datasets)
mock_specific_search.assert_called_once_with(
query_type,
query_text,
@ -78,11 +67,6 @@ async def test_search(
node_name=None,
)
# Only the first two results should be included (doc_id3 is filtered out)
assert len(results) == 2
assert results[0]["document_id"] == str(doc_id1)
assert results[1]["document_id"] == str(doc_id2)
# Verify result logging
mock_log_result.assert_called_once()
# Check that the first argument is the query ID

282
poetry.lock generated
View file

@ -435,7 +435,7 @@ description = "Timeout context manager for asyncio programs"
optional = false
python-versions = ">=3.7"
groups = ["main"]
markers = "python_version < \"3.11\""
markers = "python_version == \"3.10\""
files = [
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
@ -448,7 +448,7 @@ description = "Timeout context manager for asyncio programs"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"falkordb\" and python_full_version < \"3.11.3\" and python_version == \"3.11\""
markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\" and extra == \"falkordb\""
files = [
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@ -593,7 +593,7 @@ description = "Backport of CPython tarfile module"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"deepeval\" and python_version <= \"3.11\""
markers = "(python_version == \"3.10\" or python_version == \"3.11\") and extra == \"deepeval\""
files = [
{file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
{file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
@ -1226,7 +1226,7 @@ description = "Cross-platform colored terminal text."
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
groups = ["main"]
markers = "(sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (python_version < \"3.13\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\")"
markers = "(platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or platform_system == \"Windows\")"
files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@ -2098,7 +2098,7 @@ description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
groups = ["main"]
markers = "python_version < \"3.11\""
markers = "python_version == \"3.10\""
files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@ -2225,7 +2225,7 @@ description = "Fast, light, accurate library built for retrieval embedding gener
optional = true
python-versions = ">=3.9.0"
groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\""
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [
{file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"},
{file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"},
@ -2974,7 +2974,7 @@ description = "HTTP/2-based RPC framework"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\" or python_version < \"3.11\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\")"
markers = "python_version == \"3.10\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\") or extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\""
files = [
{file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
{file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"},
@ -3078,7 +3078,7 @@ description = "Protobuf code generator for gRPC"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "extra == \"weaviate\" or python_version >= \"3.13\" and (extra == \"weaviate\" or extra == \"qdrant\")"
markers = "extra == \"weaviate\""
files = [
{file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"},
{file = "grpcio_tools-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:6a722bba714392de2386569c40942566b83725fa5c5450b8910e3832a5379469"},
@ -3631,7 +3631,7 @@ description = "IPython: Productive Interactive Computing"
optional = true
python-versions = ">=3.10"
groups = ["main"]
markers = "python_version < \"3.11\" and (extra == \"notebook\" or extra == \"dev\")"
markers = "python_version == \"3.10\" and (extra == \"notebook\" or extra == \"dev\")"
files = [
{file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"},
{file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"},
@ -4454,50 +4454,50 @@ files = [
[[package]]
name = "kuzu"
version = "0.8.2"
version = "0.9.0"
description = "Highly scalable, extremely fast, easy-to-use embeddable graph database"
optional = true
python-versions = "*"
groups = ["main"]
markers = "extra == \"kuzu\""
markers = "extra == \"api\" or extra == \"kuzu\""
files = [
{file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:78bcdf6cc7b130bce8b307709e8d7bddd2e9104b2b696a9dc52574556e754570"},
{file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b42e3e9b1eacf830700287b05e96f9455b89dd4140085053e6c86b32c61e8d5c"},
{file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf06c602dc0231268d9cfa56a62afef15f8fca3be1ccd2cad22047a14bff4ae0"},
{file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50a873e7cd0c2e8e3093e9af14cffb14e49f1f67eceb32df3d0454ce101402d3"},
{file = "kuzu-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4d36261444d31432606f3f3ed00624f1a3a8edcf7d830564c72b76ffbdf4d318"},
{file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6c1694c6d1b19c46ad5d416cac429ccf1fe91aca4d367664e3aa0afa59800f93"},
{file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:00156c64523a1377ffced998bdb031709336f90543da69544c0ab4b40d533692"},
{file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc75f26afe8815b046cfb0d931303da6c36ce3afb49d4ae18a3899f23e62020f"},
{file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f0de6910724a74cc492354e903cf76db78b6353eef1e2edfa0b79d600c3c572"},
{file = "kuzu-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:56e99c39a725943aa7ad96ada8f29706da3d53cc98385f2c663b8ea026f0dce3"},
{file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adcc250b34963a6eea62b59d47a091018d83e61fb2e95552795ab61f103052be"},
{file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:f72036924466143675980baed02a26c0fca15b6254c11de9a9c18d28fe66247e"},
{file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2fd7895fdfd9df880091d32bfb79c148f849659c67e2b9e185f952a6bde9139"},
{file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:68486e291aa8a61264be7e31233ec34eeb6da2402f4b980c3f2b67f9ccbbea3a"},
{file = "kuzu-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:7cce7d06e6f09cd488c62be7cafe78752b037ed9e6585ed3da9df029104b1987"},
{file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa0495f856f2e5f5067e281dab3fbc170aba0721d1f56156a8cd9fa50e706f91"},
{file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:823577b472ba63c3b36e5ff81e2b744736f9eaf0b71585c247f3defc9d268f53"},
{file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bde76f38d293f49ad283a4831bd32d41f185b93a75d388d67f9b8996678203e9"},
{file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cdb189012613ecd26630096796e3817c260deea85782e764309cd36b2c39dac5"},
{file = "kuzu-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:71fb98721f9c46f960a5c3baea6b083026485c4b9a3e74ab01418243e29e3753"},
{file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e12726af2cb552ab7b60e2b4312469359bb3b4b45ddbcfb75220def4be6f566"},
{file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055f2cd9741bf39161f9ccff80428f8fb80b1910b2450b05bbe848487ba694f5"},
{file = "kuzu-0.8.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:18cb3da3a650f8dfde3639fbd6319a5ad6f98f60689c5dd96d20d8d1fc184d4c"},
{file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e55a8fddc21ac3e27b3cf2815d93264dd3c89e9ad8c7f3960d51bdfe48a02709"},
{file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d93600aceacdd7903aa39f016cb641811f96e4825b027a135aaaa1d82e23d24"},
{file = "kuzu-0.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:68601d9e741c7815c3d3f46a9c6884853388bcc6920945f069d5dc4f9492c9c5"},
{file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32d7ff56d793df27f76129b8b15bd85c940e59bcb67acd189b6a5ed1af5e8b44"},
{file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:5e639f24be2fca78bf3890774f273aa1a6b149bfdbeb5c7e966e03b8f610be98"},
{file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1caf46e2721dabed94b65cdcf3990551af2f3913c3f2dcd39f3e5397f0134243"},
{file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5333c9e4557ccbfef7b822793ec382848411c8d11fdee063064b41bd1828404"},
{file = "kuzu-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:765a8bd4c5b9d24583eb8aaa20ecd753d78220138a82bf643ec592ffb8128298"},
{file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a215ff235d17a41c50d1cf2bd8e67a196eff32f23e59d989b1a40e6192f2008"},
{file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:074b5440186e4214b653d46f8d5a15d4b4cae1185d4656eaf598fe9b840fcdca"},
{file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32303a9533674a35e52d429f1446a82e2fc97c423618bc86aaafef1d4d2621e4"},
{file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0baea115bc55c8ed710f2beae8f02e46cf2bac42326b4e2c3acd25a76031f59d"},
{file = "kuzu-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:70e031131c5b8e327edd63993b05fb04196b74d0ade1baf0f4005968610310ed"},
{file = "kuzu-0.8.2.tar.gz", hash = "sha256:68ad72b3ef6a32a41ecfa955fa4ca9ca0c8a36d3a1bc13e34cc70c971b2b8ca7"},
{file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec9f216d67c092ea52086c99cf4b1deabe0f8daaf47c80cf1892b3b41c57d58a"},
{file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bda6d845bf1c7da204ffa7730573118f2d43fe6b14b1a5d0d2845ec3d3481362"},
{file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab5b28f101c93899fc15668b6cb25f6db3d4a9844fcc4affed293caaaafaa4b7"},
{file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:183bb1de19ffec1c3b07c0b4d5eecf02eb4eeafc1d50aea409bc91e1fad4d6d2"},
{file = "kuzu-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:2e36ce7da1bbebb538082656de18a717895d9352a33c8bcac170ef2fc22a4902"},
{file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82dd690d823df816e7826945e5243a4ae65e3e948ef512709a59205b84b9f6dd"},
{file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:c394e019a14e9c5636228cf1acd333997c31e5da3d9a60a1df2c03b828438432"},
{file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7d493f88ed31eada4b88a92b115bc6085c60498c47336ab06a489e75a727bab"},
{file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:171b47cf2b3923c813f1ed88fb9d3964a9355129b5d3ebca54eba3450bfc1f97"},
{file = "kuzu-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:3c8a8a611f599801c8db6aeffb978cd1badcfa3ec8f79c15b701810fee71765f"},
{file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:509af4029f9dcb9c3e843a825df44ec30009a70fad891cbcfb611c3b8cdfefd6"},
{file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:885f17f6e46c15ecef121fc57a941f8b60f0a5c1d3995813bb7a4c7437fb2259"},
{file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f2e35aa345b543a4a21de0e82b70eac4c753987cfa4ded75ae7f9f23edbf11"},
{file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67430c9813607a3b901c4a1e6bfb3b93538af230bc821e675c552a162818f589"},
{file = "kuzu-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:549f4a72f815554fb998582876c5875cb0917a192e6a58d196e8247fd8902701"},
{file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ec2e709599b4015d0a179a191dd7850e7bf076f83b37b70d0dc2e4ee59ce7725"},
{file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:8aad4fbd74b283ffb0b115138dfc62d9775c8f19ba62ab243e55e3cd648652b6"},
{file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba9dd4f412e31d34345b6461fc9489955ae9566abf426e56af478b6e791b735a"},
{file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:340502cbce54f21a5b2440a75c28d61ddfd26d6d6848e9daa6140798bdd5b367"},
{file = "kuzu-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:e1ddb189dfa2aee0123dcd1a5ccc5b831a7f297233a09fccfd76294fc2f9e6bd"},
{file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fae68db87ba48268228c89e70ed1fde2f43843d8ed6b2debaafd314c45e8542"},
{file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0279ba37c639d96f303eb6ad4481e634495be31210991d8008c385ee50b4e0a"},
{file = "kuzu-0.9.0-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3ca7424fe3831df687552b89903aa57fb88efff9c25df15c5d678fae7c933199"},
{file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bce9284913434661f47cecfc763f8997a61ebd2bb7bfe993970c1403924708fa"},
{file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:66040cdf9a59a5423b49c3d2bc01a089114b573ee1345d5a7c912276fbca0135"},
{file = "kuzu-0.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8e195774364123845df071eddb18873ce8c78244dd6f854badfe65053b058088"},
{file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2906f29ee36f9f642bdb8f5222c94f667092e38bde7dc53ebb252f9eb524ab6a"},
{file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:4c3218e266766080fe1b31325d0156d1b334f62ae23dac854c3e4919115ef8c6"},
{file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a26214c1600c21f5e4aa96585706953a8792ad77e14788710d78f8af0d6b74ec"},
{file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b153fb28db9336757346eabb24b8c179b4ed48578a0ef158210fbc935df2184"},
{file = "kuzu-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:b6ee075e2571b11a434efb004cb0b3a2fbd7aa416ae680816869f1388e5fc734"},
{file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:56874ae750ff99b15c959d884b175adf24ac912ab08e084c42784902b2bce2fb"},
{file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:6e0265b1ad445500397dc0df3cc4e7faddfd67fcd3d0952d9a4cdab6b77b47e9"},
{file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d66e69a3e135ea123cc7c9c2e507bbb614ffdbfe7be835782c6a588ae63ff900"},
{file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e11c8b7186798ad95563e1d7ebf84495d817c406bd28c21af7170467e37e35e"},
{file = "kuzu-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:4fb80eb6c71b02c4e57e3570b079c494082f7ff819d4c06ac482914f29211294"},
{file = "kuzu-0.9.0.tar.gz", hash = "sha256:2e59f3d4d1fc385e9e90d7ae09f072ec2f4cfeff508582523a0034ceb076f6eb"},
]
[[package]]
@ -5087,7 +5087,7 @@ description = "Python logging made (stupidly) simple"
optional = true
python-versions = "<4.0,>=3.5"
groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\""
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [
{file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"},
{file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"},
@ -5827,7 +5827,7 @@ description = "Python extension for MurmurHash (MurmurHash3), a set of fast and
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\""
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec"},
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a"},
@ -6437,6 +6437,7 @@ description = "Fundamental package for array computing in Python"
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
files = [
{file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
{file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@ -6476,6 +6477,69 @@ files = [
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
]
[[package]]
name = "numpy"
version = "2.1.0"
description = "Fundamental package for array computing in Python"
optional = false
python-versions = ">=3.10"
groups = ["main"]
markers = "python_version >= \"3.13\""
files = [
{file = "numpy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6326ab99b52fafdcdeccf602d6286191a79fe2fda0ae90573c5814cd2b0bc1b8"},
{file = "numpy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0937e54c09f7a9a68da6889362ddd2ff584c02d015ec92672c099b61555f8911"},
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:30014b234f07b5fec20f4146f69e13cfb1e33ee9a18a1879a0142fbb00d47673"},
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:899da829b362ade41e1e7eccad2cf274035e1cb36ba73034946fccd4afd8606b"},
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08801848a40aea24ce16c2ecde3b756f9ad756586fb2d13210939eb69b023f5b"},
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:398049e237d1aae53d82a416dade04defed1a47f87d18d5bd615b6e7d7e41d1f"},
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0abb3916a35d9090088a748636b2c06dc9a6542f99cd476979fb156a18192b84"},
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e2350aea18d04832319aac0f887d5fcec1b36abd485d14f173e3e900b83e33"},
{file = "numpy-2.1.0-cp310-cp310-win32.whl", hash = "sha256:f6b26e6c3b98adb648243670fddc8cab6ae17473f9dc58c51574af3e64d61211"},
{file = "numpy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f505264735ee074250a9c78247ee8618292091d9d1fcc023290e9ac67e8f1afa"},
{file = "numpy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:76368c788ccb4f4782cf9c842b316140142b4cbf22ff8db82724e82fe1205dce"},
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f8e93a01a35be08d31ae33021e5268f157a2d60ebd643cfc15de6ab8e4722eb1"},
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9523f8b46485db6939bd069b28b642fec86c30909cea90ef550373787f79530e"},
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54139e0eb219f52f60656d163cbe67c31ede51d13236c950145473504fa208cb"},
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3"},
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:378cb4f24c7d93066ee4103204f73ed046eb88f9ad5bb2275bb9fa0f6a02bd36"},
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8f699a709120b220dfe173f79c73cb2a2cab2c0b88dd59d7b49407d032b8ebd"},
{file = "numpy-2.1.0-cp311-cp311-win32.whl", hash = "sha256:ffbd6faeb190aaf2b5e9024bac9622d2ee549b7ec89ef3a9373fa35313d44e0e"},
{file = "numpy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0af3a5987f59d9c529c022c8c2a64805b339b7ef506509fba7d0556649b9714b"},
{file = "numpy-2.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe76d75b345dc045acdbc006adcb197cc680754afd6c259de60d358d60c93736"},
{file = "numpy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f358ea9e47eb3c2d6eba121ab512dfff38a88db719c38d1e67349af210bc7529"},
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:dd94ce596bda40a9618324547cfaaf6650b1a24f5390350142499aa4e34e53d1"},
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b47c551c6724960479cefd7353656498b86e7232429e3a41ab83be4da1b109e8"},
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0756a179afa766ad7cb6f036de622e8a8f16ffdd55aa31f296c870b5679d745"},
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24003ba8ff22ea29a8c306e61d316ac74111cebf942afbf692df65509a05f111"},
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b34fa5e3b5d6dc7e0a4243fa0f81367027cb6f4a7215a17852979634b5544ee0"},
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4f982715e65036c34897eb598d64aef15150c447be2cfc6643ec7a11af06574"},
{file = "numpy-2.1.0-cp312-cp312-win32.whl", hash = "sha256:c4cd94dfefbefec3f8b544f61286584292d740e6e9d4677769bc76b8f41deb02"},
{file = "numpy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0cdef204199278f5c461a0bed6ed2e052998276e6d8ab2963d5b5c39a0500bc"},
{file = "numpy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ab81ccd753859ab89e67199b9da62c543850f819993761c1e94a75a814ed667"},
{file = "numpy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442596f01913656d579309edcd179a2a2f9977d9a14ff41d042475280fc7f34e"},
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:848c6b5cad9898e4b9ef251b6f934fa34630371f2e916261070a4eb9092ffd33"},
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:54c6a63e9d81efe64bfb7bcb0ec64332a87d0b87575f6009c8ba67ea6374770b"},
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652e92fc409e278abdd61e9505649e3938f6d04ce7ef1953f2ec598a50e7c195"},
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab32eb9170bf8ffcbb14f11613f4a0b108d3ffee0832457c5d4808233ba8977"},
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:8fb49a0ba4d8f41198ae2d52118b050fd34dace4b8f3fb0ee34e23eb4ae775b1"},
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44e44973262dc3ae79e9063a1284a73e09d01b894b534a769732ccd46c28cc62"},
{file = "numpy-2.1.0-cp313-cp313-win32.whl", hash = "sha256:ab83adc099ec62e044b1fbb3a05499fa1e99f6d53a1dde102b2d85eff66ed324"},
{file = "numpy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:de844aaa4815b78f6023832590d77da0e3b6805c644c33ce94a1e449f16d6ab5"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:343e3e152bf5a087511cd325e3b7ecfd5b92d369e80e74c12cd87826e263ec06"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f07fa2f15dabe91259828ce7d71b5ca9e2eb7c8c26baa822c825ce43552f4883"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5474dad8c86ee9ba9bb776f4b99ef2d41b3b8f4e0d199d4f7304728ed34d0300"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1f817c71683fd1bb5cff1529a1d085a57f02ccd2ebc5cd2c566f9a01118e3b7d"},
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a3336fbfa0d38d3deacd3fe7f3d07e13597f29c13abf4d15c3b6dc2291cbbdd"},
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a894c51fd8c4e834f00ac742abad73fc485df1062f1b875661a3c1e1fb1c2f6"},
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:9156ca1f79fc4acc226696e95bfcc2b486f165a6a59ebe22b2c1f82ab190384a"},
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:624884b572dff8ca8f60fab591413f077471de64e376b17d291b19f56504b2bb"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15ef8b2177eeb7e37dd5ef4016f30b7659c57c2c0b57a779f1d537ff33a72c7b"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e5f0642cdf4636198a4990de7a71b693d824c56a757862230454629cf62e323d"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15976718c004466406342789f31b6673776360f3b1e3c575f25302d7e789575"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6c1de77ded79fef664d5098a66810d4d27ca0224e9051906e634b3f7ead134c2"},
{file = "numpy-2.1.0.tar.gz", hash = "sha256:7dc90da0081f7e1da49ec4e398ede6a8e9cc4f5ebe5f9e06b443ed889ee9aaa2"},
]
[[package]]
name = "oauthlib"
version = "3.2.2"
@ -6929,8 +6993,8 @@ files = [
[package.dependencies]
numpy = [
{version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
]
python-dateutil = ">=2.8.2"
pytz = ">=2020.1"
@ -7028,7 +7092,7 @@ description = "Python datetimes made easy"
optional = false
python-versions = ">=3.9"
groups = ["main"]
markers = "python_version < \"3.13\""
markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
files = [
{file = "pendulum-3.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aa545a59e6517cf43597455a6fb44daa4a6e08473d67a7ad34e4fa951efb9620"},
{file = "pendulum-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:299df2da6c490ede86bb8d58c65e33d7a2a42479d21475a54b467b03ccb88531"},
@ -7713,7 +7777,7 @@ description = "Fast and parallel snowball stemmer"
optional = true
python-versions = "*"
groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\""
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"},
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"},
@ -8117,8 +8181,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [
{version = ">=0.2", markers = "python_version < \"3.11\""},
{version = ">=0.3.6", markers = "python_version >= \"3.11\""},
{version = ">=0.3.7", markers = "python_version >= \"3.12\""},
{version = ">=0.3.6", markers = "python_version == \"3.11\""},
]
isort = ">=4.2.5,<5.13 || >5.13,<7"
mccabe = ">=0.6,<0.8"
@ -8861,41 +8925,15 @@ files = [
[[package]]
name = "qdrant-client"
version = "1.12.1"
description = "Client library for the Qdrant vector search engine"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version >= \"3.13\" and extra == \"qdrant\""
files = [
{file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"},
{file = "qdrant_client-1.12.1.tar.gz", hash = "sha256:35e8e646f75b7b883b3d2d0ee4c69c5301000bba41c82aa546e985db0f1aeb72"},
]
[package.dependencies]
grpcio = ">=1.41.0"
grpcio-tools = ">=1.41.0"
httpx = {version = ">=0.20.0", extras = ["http2"]}
numpy = {version = ">=1.26", markers = "python_version >= \"3.12\""}
portalocker = ">=2.7.0,<3.0.0"
pydantic = ">=1.10.8"
urllib3 = ">=1.26.14,<3"
[package.extras]
fastembed = ["fastembed (==0.3.6) ; python_version < \"3.13\""]
fastembed-gpu = ["fastembed-gpu (==0.3.6) ; python_version < \"3.13\""]
[[package]]
name = "qdrant-client"
version = "1.14.1"
version = "1.14.2"
description = "Client library for the Qdrant vector search engine"
optional = true
python-versions = ">=3.9"
groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"qdrant\""
markers = "extra == \"qdrant\""
files = [
{file = "qdrant_client-1.14.1-py3-none-any.whl", hash = "sha256:1c4d5ed791873698da8b5df68df16bb203ec1b0cd6cec0fd6002572a06291a1b"},
{file = "qdrant_client-1.14.1.tar.gz", hash = "sha256:75352057ea59fdd7987313dc9cef4d83953591d083028d94eac99cd0e5e2f607"},
{file = "qdrant_client-1.14.2-py3-none-any.whl", hash = "sha256:7c283b1f0e71db9c21b85d898fb395791caca2a6d56ee751da96d797b001410c"},
{file = "qdrant_client-1.14.2.tar.gz", hash = "sha256:da5cab4d367d099d1330b6f30d45aefc8bd76f8b8f9d8fa5d4f813501b93af0d"},
]
[package.dependencies]
@ -8904,6 +8942,7 @@ httpx = {version = ">=0.20.0", extras = ["http2"]}
numpy = [
{version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""},
{version = ">=1.26", markers = "python_version == \"3.12\""},
{version = ">=2.1.0", markers = "python_version >= \"3.13\""},
]
portalocker = ">=2.7.0,<3.0.0"
protobuf = ">=3.20.0"
@ -10485,7 +10524,7 @@ description = "A lil' TOML parser"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version < \"3.11\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")"
markers = "python_version == \"3.10\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")"
files = [
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@ -10981,7 +11020,7 @@ description = "A library that prepares raw documents for downstream ML tasks."
optional = true
python-versions = ">=3.9.0"
groups = ["main"]
markers = "extra == \"docs\""
markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"docs\""
files = [
{file = "unstructured-0.16.25-py3-none-any.whl", hash = "sha256:14719ccef2830216cf1c5bf654f75e2bf07b17ca5dcee9da5ac74618130fd337"},
{file = "unstructured-0.16.25.tar.gz", hash = "sha256:73b9b0f51dbb687af572ecdb849a6811710b9cac797ddeab8ee80fa07d8aa5e6"},
@ -11039,6 +11078,71 @@ rtf = ["pypandoc"]
tsv = ["pandas"]
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
[[package]]
name = "unstructured"
version = "0.17.2"
description = "A library that prepares raw documents for downstream ML tasks."
optional = true
python-versions = ">=3.9.0"
groups = ["main"]
markers = "python_version >= \"3.13\" and extra == \"docs\""
files = [
{file = "unstructured-0.17.2-py3-none-any.whl", hash = "sha256:527dd26a4b273aebef2f9119c9d4f0d0ce17640038d92296d23abe89be123840"},
{file = "unstructured-0.17.2.tar.gz", hash = "sha256:af18c3caef0a6c562cf77e34ee8b6ff522b605031d2336ffe565df66f126aa46"},
]
[package.dependencies]
backoff = "*"
beautifulsoup4 = "*"
chardet = "*"
dataclasses-json = "*"
emoji = "*"
filetype = "*"
html5lib = "*"
langdetect = "*"
lxml = "*"
markdown = {version = "*", optional = true, markers = "extra == \"md\""}
networkx = {version = "*", optional = true, markers = "extra == \"xlsx\""}
nltk = "*"
numpy = "*"
openpyxl = {version = "*", optional = true, markers = "extra == \"xlsx\""}
pandas = {version = "*", optional = true, markers = "extra == \"csv\" or extra == \"tsv\" or extra == \"xlsx\""}
psutil = "*"
pypandoc = {version = "*", optional = true, markers = "extra == \"epub\" or extra == \"odt\" or extra == \"org\" or extra == \"rst\" or extra == \"rtf\""}
python-docx = {version = ">=1.1.2", optional = true, markers = "extra == \"doc\" or extra == \"docx\" or extra == \"odt\""}
python-iso639 = "*"
python-magic = "*"
python-oxmsg = "*"
python-pptx = {version = ">=1.0.1", optional = true, markers = "extra == \"ppt\" or extra == \"pptx\""}
rapidfuzz = "*"
requests = "*"
tqdm = "*"
typing-extensions = "*"
unstructured-client = "*"
wrapt = "*"
xlrd = {version = "*", optional = true, markers = "extra == \"xlsx\""}
[package.extras]
all-docs = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
csv = ["pandas"]
doc = ["python-docx (>=1.1.2)"]
docx = ["python-docx (>=1.1.2)"]
epub = ["pypandoc"]
huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"]
image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
local-inference = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
md = ["markdown"]
odt = ["pypandoc", "python-docx (>=1.1.2)"]
org = ["pypandoc"]
paddleocr = ["paddlepaddle (>=3.0.0b1)", "unstructured.paddleocr (==2.10.0)"]
pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
ppt = ["python-pptx (>=1.0.1)"]
pptx = ["python-pptx (>=1.0.1)"]
rst = ["pypandoc"]
rtf = ["pypandoc"]
tsv = ["pandas"]
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
[[package]]
name = "unstructured-client"
version = "0.25.9"
@ -11578,7 +11682,7 @@ description = "A small Python utility to set file creation time on Windows"
optional = true
python-versions = ">=3.5"
groups = ["main"]
markers = "extra == \"codegraph\" and sys_platform == \"win32\" and python_version < \"3.13\""
markers = "extra == \"codegraph\" and sys_platform == \"win32\" and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\")"
files = [
{file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"},
{file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"},
@ -11962,7 +12066,7 @@ cffi = ["cffi (>=1.11)"]
[extras]
anthropic = ["anthropic"]
api = ["gunicorn", "uvicorn"]
api = ["gunicorn", "kuzu", "uvicorn"]
chromadb = ["chromadb", "pypika"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
debug = ["debugpy"]
@ -11992,4 +12096,4 @@ weaviate = ["weaviate-client"]
[metadata]
lock-version = "2.1"
python-versions = ">=3.10,<=3.13"
content-hash = "15b319ff8dbe5bd88e41ead93f4e9140b2b7d86d57a707682dd3a308e78ef245"
content-hash = "5bd213f69d6dada714e632097121394992b46bd6d322afa024396847cb945f95"

View file

@ -64,9 +64,10 @@ dependencies = [
api = [
"uvicorn==0.34.0",
"gunicorn>=20.1.0,<21",
"kuzu==0.9.0",
]
weaviate = ["weaviate-client==4.9.6"]
qdrant = ["qdrant-client>=1.9.0,<2"]
qdrant = ["qdrant-client>=1.14.2,<2"]
neo4j = ["neo4j>=5.20.0,<6"]
postgres = [
"psycopg2>=2.9.10,<3",
@ -87,14 +88,14 @@ anthropic = ["anthropic>=0.26.1,<0.27"]
deepeval = ["deepeval>=2.0.1,<3"]
posthog = ["posthog>=3.5.0,<4"]
falkordb = ["falkordb==1.0.9"]
kuzu = ["kuzu==0.8.2"]
kuzu = ["kuzu==0.9.0"]
groq = ["groq==0.8.0"]
milvus = ["pymilvus>=2.5.0,<3"]
chromadb = [
"chromadb>=0.3.0,<0.7",
"pypika==0.48.8",
]
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<0.17"]
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<18"]
codegraph = [
"fastembed<=0.6.0 ; python_version < '3.13'",
"transformers>=4.46.3,<5",

7732
uv.lock generated

File diff suppressed because it is too large Load diff