feat: new Dataset permissions (#869)

<!-- .github/pull_request_template.md -->

## Description
<!-- Provide a clear description of the changes in this PR -->

## DCO Affirmation
I affirm that all code in every commit of this pull request conforms to
the terms of the Topoteretes Developer Certificate of Origin.

---------

Co-authored-by: Boris Arzentar <borisarzentar@gmail.com>
Co-authored-by: Boris <boris@topoteretes.com>
This commit is contained in:
Igor Ilic 2025-06-06 14:20:57 +02:00 committed by GitHub
parent ebebbb8958
commit 1ed6cfd918
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
76 changed files with 5322 additions and 4154 deletions

View file

@ -69,3 +69,11 @@ LITELLM_LOG="ERROR"
# Set this environment variable to disable sending telemetry data # Set this environment variable to disable sending telemetry data
# TELEMETRY_DISABLED=1 # TELEMETRY_DISABLED=1
# Set this variable to True to enforce usage of backend access control for Cognee
# Note: This is only currently supported by the following databases:
# Relational: SQLite, Postgres
# Vector: LanceDB
# Graph: KuzuDB
#
# It enforces LanceDB and KuzuDB use and uses them to create databases per Cognee user + dataset
ENABLE_BACKEND_ACCESS_CONTROL=False

View file

@ -215,3 +215,34 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: poetry run python ./cognee/tests/test_s3.py run: poetry run python ./cognee/tests/test_s3.py
test-parallel-databases:
name: Test using different async databases in parallel in Cognee
runs-on: ubuntu-22.04
steps:
- name: Check out repository
uses: actions/checkout@v4
- name: Cognee Setup
uses: ./.github/actions/cognee_setup
with:
python-version: '3.11.x'
- name: Install specific graph db dependency
run: |
poetry install -E kuzu
- name: Run parallel databases test
env:
ENV: 'dev'
LLM_MODEL: ${{ secrets.LLM_MODEL }}
LLM_ENDPOINT: ${{ secrets.LLM_ENDPOINT }}
LLM_API_KEY: ${{ secrets.LLM_API_KEY }}
LLM_API_VERSION: ${{ secrets.LLM_API_VERSION }}
EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: poetry run python ./cognee/tests/test_parallel_databases.py

View file

@ -47,7 +47,7 @@ jobs:
relational-db-migration-tests: relational-db-migration-tests:
name: Relational DB Migration Tests name: Relational DB Migration Tests
needs: [ basic-tests, e2e-tests ] needs: [ basic-tests, e2e-tests, graph-db-tests]
uses: ./.github/workflows/relational_db_migration_tests.yml uses: ./.github/workflows/relational_db_migration_tests.yml
secrets: inherit secrets: inherit
@ -79,7 +79,7 @@ jobs:
db-examples-tests: db-examples-tests:
name: DB Examples Tests name: DB Examples Tests
needs: [vector-db-tests] needs: [vector-db-tests, graph-db-tests, relational-db-migration-tests]
uses: ./.github/workflows/db_examples_tests.yml uses: ./.github/workflows/db_examples_tests.yml
secrets: inherit secrets: inherit

View file

@ -135,6 +135,16 @@ jobs:
run: run:
shell: bash shell: bash
services:
qdrant:
image: qdrant/qdrant:v1.14.1
env:
QDRANT__LOG_LEVEL: ERROR
QDRANT__SERVICE__API_KEY: qdrant_api_key
QDRANT__SERVICE__ENABLE_TLS: 0
ports:
- 6333:6333
steps: steps:
- name: Check out - name: Check out
uses: actions/checkout@master uses: actions/checkout@master
@ -148,6 +158,19 @@ jobs:
run: | run: |
poetry install -E qdrant poetry install -E qdrant
- name: Wait for Qdrant to be healthy
run: |
for i in {1..10}; do
if curl -f http://127.0.0.1:6333/healthz; then
echo "Qdrant is healthy!"
exit 0
fi
echo "Waiting for Qdrant to be healthy..."
sleep 3
done
echo "Qdrant failed to become healthy in time"
exit 1
- name: Run default Qdrant - name: Run default Qdrant
env: env:
ENV: 'dev' ENV: 'dev'
@ -159,8 +182,8 @@ jobs:
EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }} EMBEDDING_ENDPOINT: ${{ secrets.EMBEDDING_ENDPOINT }}
EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }} EMBEDDING_API_KEY: ${{ secrets.EMBEDDING_API_KEY }}
EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }} EMBEDDING_API_VERSION: ${{ secrets.EMBEDDING_API_VERSION }}
VECTOR_DB_URL: ${{ secrets.QDRANT_API_URL }} VECTOR_DB_URL: 127.0.0.1
VECTOR_DB_KEY: ${{ secrets.QDRANT_API_KEY }} VECTOR_DB_KEY: qdrant_api_key
run: poetry run python ./cognee/tests/test_qdrant.py run: poetry run python ./cognee/tests/test_qdrant.py
run-postgres-tests: run-postgres-tests:

View file

@ -1,6 +1,7 @@
"""FastAPI server for the Cognee API.""" """FastAPI server for the Cognee API."""
import os import os
import uvicorn import uvicorn
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
import sentry_sdk import sentry_sdk
@ -63,6 +64,7 @@ async def lifespan(app: FastAPI):
app = FastAPI(debug=app_environment != "prod", lifespan=lifespan) app = FastAPI(debug=app_environment != "prod", lifespan=lifespan)
app.add_middleware( app.add_middleware(
CORSMiddleware, CORSMiddleware,
allow_origins=["*"], allow_origins=["*"],

View file

@ -1,3 +1,4 @@
from uuid import UUID
from typing import Union, BinaryIO, List, Optional from typing import Union, BinaryIO, List, Optional
from cognee.modules.pipelines import Task from cognee.modules.pipelines import Task
@ -11,9 +12,21 @@ async def add(
dataset_name: str = "main_dataset", dataset_name: str = "main_dataset",
user: User = None, user: User = None,
node_set: Optional[List[str]] = None, node_set: Optional[List[str]] = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
dataset_id: UUID = None,
): ):
tasks = [Task(resolve_data_directories), Task(ingest_data, dataset_name, user, node_set)] tasks = [
Task(resolve_data_directories),
Task(ingest_data, dataset_name, user, node_set, dataset_id),
]
await cognee_pipeline( await cognee_pipeline(
tasks=tasks, datasets=dataset_name, data=data, user=user, pipeline_name="add_pipeline" tasks=tasks,
datasets=dataset_id if dataset_id else dataset_name,
data=data,
user=user,
pipeline_name="add_pipeline",
vector_db_config=vector_db_config,
graph_db_config=graph_db_config,
) )

View file

@ -1,4 +1,5 @@
from uuid import UUID from uuid import UUID
from fastapi import Form, UploadFile, Depends from fastapi import Form, UploadFile, Depends
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi import APIRouter from fastapi import APIRouter
@ -20,8 +21,8 @@ def get_add_router() -> APIRouter:
@router.post("/", response_model=None) @router.post("/", response_model=None)
async def add( async def add(
data: List[UploadFile], data: List[UploadFile],
datasetName: str,
datasetId: Optional[UUID] = Form(default=None), datasetId: Optional[UUID] = Form(default=None),
datasetName: Optional[str] = Form(default=None),
user: User = Depends(get_authenticated_user), user: User = Depends(get_authenticated_user),
): ):
"""This endpoint is responsible for adding data to the graph.""" """This endpoint is responsible for adding data to the graph."""
@ -30,19 +31,13 @@ def get_add_router() -> APIRouter:
if not datasetId and not datasetName: if not datasetId and not datasetName:
raise ValueError("Either datasetId or datasetName must be provided.") raise ValueError("Either datasetId or datasetName must be provided.")
if datasetId and not datasetName:
dataset = await get_dataset(user_id=user.id, dataset_id=datasetId)
try:
datasetName = dataset.name
except IndexError:
raise ValueError("No dataset found with the provided datasetName.")
try: try:
if isinstance(data, str) and data.startswith("http"): if isinstance(data, str) and data.startswith("http"):
if "github" in data: if "github" in data:
# Perform git clone if the URL is from GitHub # Perform git clone if the URL is from GitHub
repo_name = data.split("/")[-1].replace(".git", "") repo_name = data.split("/")[-1].replace(".git", "")
subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True) subprocess.run(["git", "clone", data, f".data/{repo_name}"], check=True)
# TODO: Update add call with dataset info
await cognee_add( await cognee_add(
"data://.data/", "data://.data/",
f"{repo_name}", f"{repo_name}",
@ -53,10 +48,10 @@ def get_add_router() -> APIRouter:
response.raise_for_status() response.raise_for_status()
file_data = await response.content() file_data = await response.content()
# TODO: Update add call with dataset info
return await cognee_add(file_data) return await cognee_add(file_data)
else: else:
await cognee_add(data, datasetName, user=user) await cognee_add(data, dataset_name=datasetName, user=user, dataset_id=datasetId)
except Exception as error: except Exception as error:
return JSONResponse(status_code=409, content={"error": str(error)}) return JSONResponse(status_code=409, content={"error": str(error)})

View file

@ -9,7 +9,7 @@ from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.tasks.documents import ( from cognee.tasks.documents import (
check_permissions_on_documents, check_permissions_on_dataset,
classify_documents, classify_documents,
extract_chunks_from_documents, extract_chunks_from_documents,
) )
@ -31,11 +31,18 @@ async def cognify(
chunker=TextChunker, chunker=TextChunker,
chunk_size: int = None, chunk_size: int = None,
ontology_file_path: Optional[str] = None, ontology_file_path: Optional[str] = None,
vector_db_config: dict = None,
graph_db_config: dict = None,
): ):
tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path) tasks = await get_default_tasks(user, graph_model, chunker, chunk_size, ontology_file_path)
return await cognee_pipeline( return await cognee_pipeline(
tasks=tasks, datasets=datasets, user=user, pipeline_name="cognify_pipeline" tasks=tasks,
datasets=datasets,
user=user,
pipeline_name="cognify_pipeline",
vector_db_config=vector_db_config,
graph_db_config=graph_db_config,
) )
@ -48,7 +55,7 @@ async def get_default_tasks( # TODO: Find out a better way to do this (Boris's
) -> list[Task]: ) -> list[Task]:
default_tasks = [ default_tasks = [
Task(classify_documents), Task(classify_documents),
Task(check_permissions_on_documents, user=user, permissions=["write"]), Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task( Task(
extract_chunks_from_documents, extract_chunks_from_documents,
max_chunk_size=chunk_size or get_max_chunk_tokens(), max_chunk_size=chunk_size or get_max_chunk_tokens(),

View file

@ -1,3 +1,4 @@
from uuid import UUID
from typing import List, Optional from typing import List, Optional
from pydantic import BaseModel from pydantic import BaseModel
from fastapi import Depends from fastapi import Depends
@ -10,6 +11,7 @@ from cognee.shared.data_models import KnowledgeGraph
class CognifyPayloadDTO(BaseModel): class CognifyPayloadDTO(BaseModel):
datasets: List[str] datasets: List[str]
dataset_ids: Optional[List[UUID]]
graph_model: Optional[BaseModel] = KnowledgeGraph graph_model: Optional[BaseModel] = KnowledgeGraph
@ -22,7 +24,9 @@ def get_cognify_router() -> APIRouter:
from cognee.api.v1.cognify import cognify as cognee_cognify from cognee.api.v1.cognify import cognify as cognee_cognify
try: try:
await cognee_cognify(payload.datasets, user, payload.graph_model) # Send dataset UUIDs if they are given, if not send dataset names
datasets = payload.dataset_ids if payload.dataset_ids else payload.datasets
await cognee_cognify(datasets, user, payload.graph_model)
except Exception as error: except Exception as error:
return JSONResponse(status_code=409, content={"error": str(error)}) return JSONResponse(status_code=409, content={"error": str(error)})

View file

@ -1,66 +1,69 @@
from uuid import UUID from uuid import UUID
from typing import List
from fastapi import APIRouter from fastapi import APIRouter, Depends
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from cognee.modules.users.models import User
from cognee.modules.users.methods import get_authenticated_user
def get_permissions_router() -> APIRouter: def get_permissions_router() -> APIRouter:
permissions_router = APIRouter() permissions_router = APIRouter()
@permissions_router.post("/roles/{role_id}/permissions") @permissions_router.post("/datasets/{principal_id}/")
async def give_default_permission_to_role(role_id: UUID, permission_name: str): async def give_datasets_permission_to_principal(
from cognee.modules.users.permissions.methods import ( permission_name: str,
give_default_permission_to_role as set_default_permission_to_role, dataset_ids: List[UUID],
principal_id: UUID,
user: User = Depends(get_authenticated_user),
):
from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
await authorized_give_permission_on_datasets(
principal_id,
[dataset_id for dataset_id in dataset_ids],
permission_name,
user.id,
) )
await set_default_permission_to_role(role_id, permission_name) return JSONResponse(
status_code=200, content={"message": "Permission assigned to principal"}
return JSONResponse(status_code=200, content={"message": "Permission assigned to role"})
@permissions_router.post("/tenants/{tenant_id}/permissions")
async def give_default_permission_to_tenant(tenant_id: UUID, permission_name: str):
from cognee.modules.users.permissions.methods import (
give_default_permission_to_tenant as set_tenant_default_permissions,
) )
await set_tenant_default_permissions(tenant_id, permission_name)
return JSONResponse(status_code=200, content={"message": "Permission assigned to tenant"})
@permissions_router.post("/users/{user_id}/permissions")
async def give_default_permission_to_user(user_id: UUID, permission_name: str):
from cognee.modules.users.permissions.methods import (
give_default_permission_to_user as set_default_permission_to_user,
)
await set_default_permission_to_user(user_id, permission_name)
return JSONResponse(status_code=200, content={"message": "Permission assigned to user"})
@permissions_router.post("/roles") @permissions_router.post("/roles")
async def create_role( async def create_role(role_name: str, user: User = Depends(get_authenticated_user)):
role_name: str,
tenant_id: UUID,
):
from cognee.modules.users.roles.methods import create_role as create_role_method from cognee.modules.users.roles.methods import create_role as create_role_method
await create_role_method(role_name=role_name, tenant_id=tenant_id) await create_role_method(role_name=role_name, owner_id=user.id)
return JSONResponse(status_code=200, content={"message": "Role created for tenant"}) return JSONResponse(status_code=200, content={"message": "Role created for tenant"})
@permissions_router.post("/users/{user_id}/roles") @permissions_router.post("/users/{user_id}/roles")
async def add_user_to_role(user_id: UUID, role_id: UUID): async def add_user_to_role(
user_id: UUID, role_id: UUID, user: User = Depends(get_authenticated_user)
):
from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method from cognee.modules.users.roles.methods import add_user_to_role as add_user_to_role_method
await add_user_to_role_method(user_id=user_id, role_id=role_id) await add_user_to_role_method(user_id=user_id, role_id=role_id, owner_id=user.id)
return JSONResponse(status_code=200, content={"message": "User added to role"}) return JSONResponse(status_code=200, content={"message": "User added to role"})
@permissions_router.post("/users/{user_id}/tenants")
async def add_user_to_tenant(
user_id: UUID, tenant_id: UUID, user: User = Depends(get_authenticated_user)
):
from cognee.modules.users.tenants.methods import add_user_to_tenant
await add_user_to_tenant(user_id=user_id, tenant_id=tenant_id, owner_id=user.id)
return JSONResponse(status_code=200, content={"message": "User added to tenant"})
@permissions_router.post("/tenants") @permissions_router.post("/tenants")
async def create_tenant(tenant_name: str): async def create_tenant(tenant_name: str, user: User = Depends(get_authenticated_user)):
from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method from cognee.modules.users.tenants.methods import create_tenant as create_tenant_method
await create_tenant_method(tenant_name=tenant_name) await create_tenant_method(tenant_name=tenant_name, user_id=user.id)
return JSONResponse(status_code=200, content={"message": "Tenant created."}) return JSONResponse(status_code=200, content={"message": "Tenant created."})

View file

@ -1,4 +1,5 @@
from uuid import UUID from uuid import UUID
from typing import Optional, Union
from datetime import datetime from datetime import datetime
from fastapi import Depends, APIRouter from fastapi import Depends, APIRouter
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
@ -9,8 +10,12 @@ from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_authenticated_user from cognee.modules.users.methods import get_authenticated_user
# Note: Datasets sent by name will only map to datasets owned by the request sender
# To search for datasets not owned by the request sender dataset UUID is needed
class SearchPayloadDTO(InDTO): class SearchPayloadDTO(InDTO):
search_type: SearchType search_type: SearchType
datasets: Optional[list[str]] = None
dataset_ids: Optional[list[UUID]] = None
query: str query: str
@ -39,7 +44,11 @@ def get_search_router() -> APIRouter:
try: try:
results = await cognee_search( results = await cognee_search(
query_text=payload.query, query_type=payload.search_type, user=user query_text=payload.query,
query_type=payload.search_type,
user=user,
datasets=payload.datasets,
dataset_ids=payload.dataset_ids,
) )
return results return results

View file

@ -1,32 +1,43 @@
from uuid import UUID
from typing import Union, Optional, List, Type from typing import Union, Optional, List, Type
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.search.types import SearchType from cognee.modules.search.types import SearchType
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
from cognee.modules.search.methods import search as search_function from cognee.modules.search.methods import search as search_function
from cognee.modules.data.methods import get_authorized_existing_datasets
from cognee.modules.data.exceptions import DatasetNotFoundError
async def search( async def search(
query_text: str, query_text: str,
query_type: SearchType = SearchType.GRAPH_COMPLETION, query_type: SearchType = SearchType.GRAPH_COMPLETION,
user: User = None, user: User = None,
datasets: Union[list[str], str, None] = None, datasets: Optional[Union[list[str], str]] = None,
dataset_ids: Optional[Union[list[UUID], UUID]] = None,
system_prompt_path: str = "answer_simple_question.txt", system_prompt_path: str = "answer_simple_question.txt",
top_k: int = 10, top_k: int = 10,
node_type: Optional[Type] = None, node_type: Optional[Type] = None,
node_name: Optional[List[str]] = None, node_name: Optional[List[str]] = None,
) -> list: ) -> list:
# We use lists from now on for datasets # We use lists from now on for datasets
if isinstance(datasets, str): if isinstance(datasets, UUID) or isinstance(datasets, str):
datasets = [datasets] datasets = [datasets]
if user is None: if user is None:
user = await get_default_user() user = await get_default_user()
# Transform string based datasets to UUID - String based datasets can only be found for current user
if datasets is not None and [all(isinstance(dataset, str) for dataset in datasets)]:
datasets = await get_authorized_existing_datasets(datasets, "read", user)
datasets = [dataset.id for dataset in datasets]
if not datasets:
raise DatasetNotFoundError(message="No datasets found.")
filtered_search_results = await search_function( filtered_search_results = await search_function(
query_text=query_text, query_text=query_text,
query_type=query_type, query_type=query_type,
datasets=datasets, dataset_ids=dataset_ids if dataset_ids else datasets,
user=user, user=user,
system_prompt_path=system_prompt_path, system_prompt_path=system_prompt_path,
top_k=top_k, top_k=top_k,

View file

@ -0,0 +1,67 @@
import os
import pathlib
from contextvars import ContextVar
from typing import Union
from uuid import UUID
from cognee.infrastructure.databases.utils import get_or_create_dataset_database
from cognee.modules.users.methods import get_user
# Note: ContextVar allows us to use different graph db configurations in Cognee
# for different async tasks, threads and processes
vector_db_config = ContextVar("vector_db_config", default=None)
graph_db_config = ContextVar("graph_db_config", default=None)
async def set_database_global_context_variables(dataset: Union[str, UUID], user_id: UUID):
"""
If backend access control is enabled this function will ensure all datasets have their own databases,
access to which will be enforced by given permissions.
Database name will be determined by dataset_id and LanceDB and KuzuDB use will be enforced.
Note: This is only currently supported by the following databases:
Relational: SQLite, Postgres
Vector: LanceDB
Graph: KuzuDB
Args:
dataset: Cognee dataset name or id
user_id: UUID of the owner of the dataset
Returns:
"""
if not os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
return
user = await get_user(user_id)
# To ensure permissions are enforced properly all datasets will have their own databases
dataset_database = await get_or_create_dataset_database(dataset, user)
# TODO: Find better location for database files
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, f".cognee_system/databases/{user.id}")
).resolve()
)
# Set vector and graph database configuration based on dataset database information
vector_config = {
"vector_db_url": os.path.join(cognee_directory_path, dataset_database.vector_database_name),
"vector_db_key": "",
"vector_db_provider": "lancedb",
}
graph_config = {
"graph_database_provider": "kuzu",
"graph_file_path": os.path.join(
cognee_directory_path, dataset_database.graph_database_name
),
}
# Use ContextVar to use these graph and vector configurations are used
# in the current async context across Cognee
graph_db_config.set(graph_config)
vector_db_config.set(vector_config)

View file

@ -8,7 +8,7 @@ from cognee.modules.users.models import User
from cognee.shared.data_models import KnowledgeGraph from cognee.shared.data_models import KnowledgeGraph
from cognee.shared.utils import send_telemetry from cognee.shared.utils import send_telemetry
from cognee.tasks.documents import ( from cognee.tasks.documents import (
check_permissions_on_documents, check_permissions_on_dataset,
classify_documents, classify_documents,
extract_chunks_from_documents, extract_chunks_from_documents,
) )
@ -31,7 +31,7 @@ async def get_cascade_graph_tasks(
cognee_config = get_cognify_config() cognee_config = get_cognify_config()
default_tasks = [ default_tasks = [
Task(classify_documents), Task(classify_documents),
Task(check_permissions_on_documents, user=user, permissions=["write"]), Task(check_permissions_on_dataset, user=user, permissions=["write"]),
Task( Task(
extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens() extract_chunks_from_documents, max_chunk_tokens=get_max_chunk_tokens()
), # Extract text chunks based on the document type. ), # Extract text chunks based on the document type.

View file

@ -31,6 +31,9 @@ class CogneeApiError(Exception):
super().__init__(self.message, self.name) super().__init__(self.message, self.name)
def __str__(self):
return f"{self.name}: {self.message} (Status code: {self.status_code})"
class ServiceError(CogneeApiError): class ServiceError(CogneeApiError):
"""Failures in external services or APIs, like a database or a third-party service""" """Failures in external services or APIs, like a database or a third-party service"""

View file

@ -105,3 +105,14 @@ def get_graph_config():
- GraphConfig: A GraphConfig instance containing the graph configuration settings. - GraphConfig: A GraphConfig instance containing the graph configuration settings.
""" """
return GraphConfig() return GraphConfig()
def get_graph_context_config():
"""This function will get the appropriate graph db config based on async context.
This allows the use of multiple graph databases for different threads, async tasks and parallelization
"""
from cognee.context_global_variables import graph_db_config
if graph_db_config.get():
return graph_db_config.get()
return get_graph_config().to_hashable_dict()

View file

@ -2,36 +2,22 @@
from functools import lru_cache from functools import lru_cache
from .config import get_graph_context_config
from .config import get_graph_config
from .graph_db_interface import GraphDBInterface from .graph_db_interface import GraphDBInterface
from .supported_databases import supported_databases from .supported_databases import supported_databases
async def get_graph_engine() -> GraphDBInterface: async def get_graph_engine() -> GraphDBInterface:
""" """Factory function to get the appropriate graph client based on the graph type."""
Factory function to get the appropriate graph client based on the graph type. # Get appropriate graph configuration based on current async context
config = get_graph_context_config()
This function retrieves the graph configuration and creates a graph engine by calling graph_client = create_graph_engine(**config)
the `create_graph_engine` function. If the configured graph database provider is
'networkx', it ensures that the graph is loaded from a file asynchronously if it hasn't
been loaded yet. It raises an `EnvironmentError` if the necessary configurations for the
selected graph provider are missing.
Returns:
--------
- GraphDBInterface: Returns an instance of GraphDBInterface which represents the
selected graph client.
"""
config = get_graph_config()
graph_client = create_graph_engine(**get_graph_config().to_hashable_dict())
# Async functions can't be cached. After creating and caching the graph engine # Async functions can't be cached. After creating and caching the graph engine
# handle all necessary async operations for different graph types bellow. # handle all necessary async operations for different graph types bellow.
# Handle loading of graph for NetworkX # Handle loading of graph for NetworkX
if config.graph_database_provider.lower() == "networkx" and graph_client.graph is None: if config["graph_database_provider"].lower() == "networkx" and graph_client.graph is None:
await graph_client.load_graph_from_file() await graph_client.load_graph_from_file()
return graph_client return graph_client
@ -40,11 +26,11 @@ async def get_graph_engine() -> GraphDBInterface:
@lru_cache @lru_cache
def create_graph_engine( def create_graph_engine(
graph_database_provider, graph_database_provider,
graph_database_url,
graph_database_username,
graph_database_password,
graph_database_port,
graph_file_path, graph_file_path,
graph_database_url="",
graph_database_username="",
graph_database_password="",
graph_database_port="",
): ):
""" """
Create a graph engine based on the specified provider type. Create a graph engine based on the specified provider type.

View file

@ -0,0 +1 @@
from .get_or_create_dataset_database import get_or_create_dataset_database

View file

@ -0,0 +1,68 @@
from uuid import UUID
from typing import Union
from sqlalchemy import select
from sqlalchemy.exc import IntegrityError
from cognee.modules.data.methods import create_dataset
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.methods import get_unique_dataset_id
from cognee.modules.users.models import DatasetDatabase
from cognee.modules.users.models import User
async def get_or_create_dataset_database(
dataset: Union[str, UUID],
user: User,
) -> DatasetDatabase:
"""
Return the `DatasetDatabase` row for the given owner + dataset.
If the row already exists, it is fetched and returned.
Otherwise a new one is created atomically and returned.
Parameters
----------
user : User
Principal that owns this dataset.
dataset : Union[str, UUID]
Dataset being linked.
"""
db_engine = get_relational_engine()
dataset_id = await get_unique_dataset_id(dataset, user)
vector_db_name = f"{dataset_id}.lance.db"
graph_db_name = f"{dataset_id}.pkl"
async with db_engine.get_async_session() as session:
# Create dataset if it doesn't exist
if isinstance(dataset, str):
dataset = await create_dataset(dataset, user, session)
# Try to fetch an existing row first
stmt = select(DatasetDatabase).where(
DatasetDatabase.owner_id == user.id,
DatasetDatabase.dataset_id == dataset_id,
)
existing: DatasetDatabase = await session.scalar(stmt)
if existing:
return existing
# If there are no existing rows build a new row
record = DatasetDatabase(
owner_id=user.id,
dataset_id=dataset_id,
vector_database_name=vector_db_name,
graph_database_name=graph_db_name,
)
try:
session.add(record)
await session.commit()
await session.refresh(record)
return record
except IntegrityError:
await session.rollback()
raise

View file

@ -62,3 +62,12 @@ def get_vectordb_config():
configuration. configuration.
""" """
return VectorConfig() return VectorConfig()
def get_vectordb_context_config():
"""This function will get the appropriate vector db config based on async context."""
from cognee.context_global_variables import vector_db_config
if vector_db_config.get():
return vector_db_config.get()
return get_vectordb_config().to_dict()

View file

@ -6,10 +6,10 @@ from functools import lru_cache
@lru_cache @lru_cache
def create_vector_engine( def create_vector_engine(
vector_db_url: str,
vector_db_port: str,
vector_db_key: str,
vector_db_provider: str, vector_db_provider: str,
vector_db_url: str,
vector_db_port: str = "",
vector_db_key: str = "",
): ):
""" """
Create a vector database engine based on the specified provider. Create a vector database engine based on the specified provider.

View file

@ -1,14 +1,7 @@
from .config import get_vectordb_config from .config import get_vectordb_context_config
from .create_vector_engine import create_vector_engine from .create_vector_engine import create_vector_engine
def get_vector_engine(): def get_vector_engine():
""" # Get appropriate vector db configuration based on current async context
Create and return a vector engine instance. return create_vector_engine(**get_vectordb_context_config())
Returns:
--------
A vector engine instance created from the vector database configuration.
"""
return create_vector_engine(**get_vectordb_config().to_dict())

View file

@ -1,18 +1,13 @@
from ..get_vector_engine import get_vector_engine, get_vectordb_config from ..get_vector_engine import get_vector_engine, get_vectordb_context_config
from sqlalchemy import text from sqlalchemy import text
from cognee.context_global_variables import vector_db_config as context_vector_db_config
async def create_db_and_tables(): async def create_db_and_tables():
""" # Get appropriate vector db configuration based on current async context
Create the database and its associated tables if necessary. vector_config = get_vectordb_context_config()
This function checks the vector database provider configuration and, if it is set to
"pgvector", creates the necessary vector extension in the PostgreSQL database using an
asynchronous context manager.
"""
vector_config = get_vectordb_config()
vector_engine = get_vector_engine() vector_engine = get_vector_engine()
if vector_config.vector_db_provider == "pgvector": if vector_config["vector_db_provider"] == "pgvector":
async with vector_engine.engine.begin() as connection: async with vector_engine.engine.begin() as connection:
await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) await connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))

View file

@ -1,3 +1,4 @@
import os
from typing import Dict, List, Optional from typing import Dict, List, Optional
from qdrant_client import AsyncQdrantClient, models from qdrant_client import AsyncQdrantClient, models
@ -147,14 +148,15 @@ class QDrantAdapter(VectorDBInterface):
Returns: Returns:
-------- --------
- AsyncQdrantClient: An instance of AsyncQdrantClient configured for database - AsyncQdrantClient: An instance of AsyncQdrantClient configured for database
operations. operations.
""" """
is_prod = os.getenv("ENV").lower() == "prod"
if self.qdrant_path is not None: if self.qdrant_path is not None:
return AsyncQdrantClient(path=self.qdrant_path, port=6333) return AsyncQdrantClient(path=self.qdrant_path, port=6333, https=is_prod)
elif self.url is not None: elif self.url is not None:
return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333) return AsyncQdrantClient(url=self.url, api_key=self.api_key, port=6333, https=is_prod)
return AsyncQdrantClient(location=":memory:") return AsyncQdrantClient(location=":memory:")

View file

@ -7,4 +7,6 @@ This module defines a set of exceptions for handling various data errors
from .exceptions import ( from .exceptions import (
UnstructuredLibraryImportError, UnstructuredLibraryImportError,
UnauthorizedDataAccessError, UnauthorizedDataAccessError,
DatasetNotFoundError,
DatasetTypeError,
) )

View file

@ -20,3 +20,23 @@ class UnauthorizedDataAccessError(CogneeApiError):
status_code=status.HTTP_401_UNAUTHORIZED, status_code=status.HTTP_401_UNAUTHORIZED,
): ):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class DatasetNotFoundError(CogneeApiError):
def __init__(
self,
message: str = "Dataset not found.",
name: str = "DatasetNotFoundError",
status_code=status.HTTP_404_NOT_FOUND,
):
super().__init__(message, name, status_code)
class DatasetTypeError(CogneeApiError):
def __init__(
self,
message: str = "Dataset type not supported.",
name: str = "DatasetTypeError",
status_code=status.HTTP_400_BAD_REQUEST,
):
super().__init__(message, name, status_code)

View file

@ -8,7 +8,15 @@ from .get_datasets_by_name import get_datasets_by_name
from .get_dataset_data import get_dataset_data from .get_dataset_data import get_dataset_data
from .get_data import get_data from .get_data import get_data
from .get_unique_dataset_id import get_unique_dataset_id from .get_unique_dataset_id import get_unique_dataset_id
from .get_authorized_existing_datasets import get_authorized_existing_datasets
from .get_dataset_ids import get_dataset_ids
# Delete # Delete
from .delete_dataset import delete_dataset from .delete_dataset import delete_dataset
from .delete_data import delete_data from .delete_data import delete_data
# Create
from .load_or_create_datasets import load_or_create_datasets
# Check
from .check_dataset_name import check_dataset_name

View file

@ -0,0 +1,3 @@
def check_dataset_name(dataset_name: str):
if "." in dataset_name or " " in dataset_name:
raise ValueError("Dataset name cannot contain spaces or underscores")

View file

@ -1,4 +1,3 @@
from uuid import UUID, uuid5, NAMESPACE_OID
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.orm import joinedload from sqlalchemy.orm import joinedload

View file

@ -0,0 +1,39 @@
from typing import Union
from uuid import UUID
from cognee.modules.data.models import Dataset
from cognee.modules.users.models import User
from cognee.modules.data.methods.get_dataset_ids import get_dataset_ids
from cognee.modules.users.permissions.methods import get_all_user_permission_datasets
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
async def get_authorized_existing_datasets(
datasets: Union[list[str], list[UUID]], permission_type: str, user: User
) -> list[Dataset]:
"""
Function returns a list of existing dataset objects user has access for based on datasets input.
Args:
datasets:
user:
Returns:
list of Dataset objects
"""
if datasets:
# Function handles transforming dataset input to dataset IDs (if possible)
dataset_ids = await get_dataset_ids(datasets, user)
# If dataset_ids are provided filter these datasets based on what user has permission for.
if dataset_ids:
existing_datasets = await get_specific_user_permission_datasets(
user.id, permission_type, dataset_ids
)
else:
existing_datasets = []
else:
# If no datasets are provided, work with all existing datasets user has permission for.
existing_datasets = await get_all_user_permission_datasets(user, permission_type)
return existing_datasets

View file

@ -0,0 +1,36 @@
from typing import Union
from uuid import UUID
from cognee.modules.data.exceptions import DatasetTypeError
from cognee.modules.data.methods import get_datasets
async def get_dataset_ids(datasets: Union[list[str], list[UUID]], user):
"""
Function returns dataset IDs necessary based on provided input.
It transforms raw strings into real dataset_ids with keeping write permissions in mind.
If a user wants to write to a dataset he is not the owner of it must be provided through UUID.
Args:
datasets:
pipeline_name:
user:
Returns: a list of write access dataset_ids if they exist
"""
if all(isinstance(dataset, UUID) for dataset in datasets):
# Return list of dataset UUIDs
dataset_ids = datasets
else:
# Convert list of dataset names to dataset UUID
if all(isinstance(dataset, str) for dataset in datasets):
# Get all user owned dataset objects (If a user wants to write to a dataset he is not the owner of it must be provided through UUID.)
user_datasets = await get_datasets(user.id)
# Filter out non name mentioned datasets
dataset_ids = [dataset.id for dataset in user_datasets if dataset.name in datasets]
else:
raise DatasetTypeError(
f"One or more of the provided dataset types is not handled: f{datasets}"
)
return dataset_ids

View file

@ -1,6 +1,9 @@
from uuid import UUID, uuid5, NAMESPACE_OID from uuid import UUID, uuid5, NAMESPACE_OID
from cognee.modules.users.models import User from cognee.modules.users.models import User
from typing import Union
async def get_unique_dataset_id(dataset_name: str, user: User) -> UUID: async def get_unique_dataset_id(dataset_name: Union[str, UUID], user: User) -> UUID:
if isinstance(dataset_name, UUID):
return dataset_name
return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}") return uuid5(NAMESPACE_OID, f"{dataset_name}{str(user.id)}")

View file

@ -0,0 +1,42 @@
from typing import List, Union
from uuid import UUID
from cognee.modules.data.models import Dataset
from cognee.modules.data.methods import get_unique_dataset_id
from cognee.modules.data.exceptions import DatasetNotFoundError
async def load_or_create_datasets(
dataset_names: List[Union[str, UUID]], existing_datasets: List[Dataset], user
) -> List[Dataset]:
"""
Given a list of dataset identifiers (names or UUIDs), return Dataset instances:
- If an identifier matches an existing Dataset (by name or id), reuse it.
- Otherwise, create a new Dataset with a unique id. Note: Created dataset is not stored to database.
"""
result: List[Dataset] = []
for identifier in dataset_names:
# Try to find a matching dataset in the existing list
# If no matching dataset is found return None
match = next(
(ds for ds in existing_datasets if ds.name == identifier or ds.id == identifier), None
)
if match:
result.append(match)
continue
# If the identifier is a UUID but nothing matched, that's an error
if isinstance(identifier, UUID):
raise DatasetNotFoundError(f"Dataset with given UUID does not exist: {identifier}")
# Otherwise, create a new Dataset instance
new_dataset = Dataset(
id=await get_unique_dataset_id(dataset_name=identifier, user=user),
name=identifier,
owner_id=user.id,
)
result.append(new_dataset)
return result

View file

@ -33,9 +33,6 @@ class Data(Base):
cascade="all, delete", cascade="all, delete",
) )
# New relationship for ACLs with cascade deletion
acls = relationship("ACL", back_populates="data", cascade="all, delete-orphan")
def to_json(self) -> dict: def to_json(self) -> dict:
return { return {
"id": str(self.id), "id": str(self.id),

View file

@ -19,6 +19,8 @@ class Dataset(Base):
owner_id = Column(UUID, index=True) owner_id = Column(UUID, index=True)
acls = relationship("ACL", back_populates="dataset", cascade="all, delete-orphan")
data: Mapped[List["Data"]] = relationship( data: Mapped[List["Data"]] = relationship(
"Data", "Data",
secondary=DatasetData.__tablename__, secondary=DatasetData.__tablename__,

View file

@ -1,11 +1,9 @@
import asyncio import asyncio
from typing import Union from typing import Union
from uuid import NAMESPACE_OID, uuid5 from uuid import NAMESPACE_OID, uuid5, UUID
from cognee.shared.logging_utils import get_logger from cognee.shared.logging_utils import get_logger
from cognee.modules.data.methods import get_datasets
from cognee.modules.data.methods.get_dataset_data import get_dataset_data from cognee.modules.data.methods.get_dataset_data import get_dataset_data
from cognee.modules.data.methods.get_unique_dataset_id import get_unique_dataset_id
from cognee.modules.data.models import Data, Dataset from cognee.modules.data.models import Data, Dataset
from cognee.modules.pipelines.operations.run_tasks import run_tasks from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.models import PipelineRunStatus from cognee.modules.pipelines.models import PipelineRunStatus
@ -14,6 +12,13 @@ from cognee.modules.pipelines.tasks.task import Task
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.pipelines.operations import log_pipeline_run_initiated from cognee.modules.pipelines.operations import log_pipeline_run_initiated
from cognee.context_global_variables import set_database_global_context_variables
from cognee.modules.data.exceptions import DatasetNotFoundError
from cognee.modules.data.methods import (
get_authorized_existing_datasets,
load_or_create_datasets,
check_dataset_name,
)
from cognee.infrastructure.databases.relational import ( from cognee.infrastructure.databases.relational import (
create_db_and_tables as create_relational_db_and_tables, create_db_and_tables as create_relational_db_and_tables,
@ -21,6 +26,10 @@ from cognee.infrastructure.databases.relational import (
from cognee.infrastructure.databases.vector.pgvector import ( from cognee.infrastructure.databases.vector.pgvector import (
create_db_and_tables as create_pgvector_db_and_tables, create_db_and_tables as create_pgvector_db_and_tables,
) )
from cognee.context_global_variables import (
graph_db_config as context_graph_db_config,
vector_db_config as context_vector_db_config,
)
logger = get_logger("cognee.pipeline") logger = get_logger("cognee.pipeline")
@ -30,10 +39,19 @@ update_status_lock = asyncio.Lock()
async def cognee_pipeline( async def cognee_pipeline(
tasks: list[Task], tasks: list[Task],
data=None, data=None,
datasets: Union[str, list[str]] = None, datasets: Union[str, list[str], list[UUID]] = None,
user: User = None, user: User = None,
pipeline_name: str = "custom_pipeline", pipeline_name: str = "custom_pipeline",
vector_db_config: dict = None,
graph_db_config: dict = None,
): ):
# Note: These context variables allow different value assignment for databases in Cognee
# per async task, thread, process and etc.
if vector_db_config:
context_vector_db_config.set(vector_db_config)
if graph_db_config:
context_graph_db_config.set(graph_db_config)
# Create tables for databases # Create tables for databases
await create_relational_db_and_tables() await create_relational_db_and_tables()
await create_pgvector_db_and_tables() await create_pgvector_db_and_tables()
@ -54,49 +72,35 @@ async def cognee_pipeline(
if user is None: if user is None:
user = await get_default_user() user = await get_default_user()
# Convert datasets to list in case it's a string # Convert datasets to list
if isinstance(datasets, str): if isinstance(datasets, str) or isinstance(datasets, UUID):
datasets = [datasets] datasets = [datasets]
# If no datasets are provided, work with all existing datasets. # Get datasets user wants write permissions for (verify user has permissions if datasets are provided as well)
existing_datasets = await get_datasets(user.id) # NOTE: If a user wants to write to a dataset he does not own it must be provided through UUID
existing_datasets = await get_authorized_existing_datasets(datasets, "write", user)
if not datasets: if not datasets:
# Get datasets from database if none sent. # Get datasets from database if none sent.
datasets = existing_datasets datasets = existing_datasets
else: else:
# If dataset is already in database, use it, otherwise create a new instance. # If dataset matches an existing Dataset (by name or id), reuse it. Otherwise, create a new Dataset.
dataset_instances = [] datasets = await load_or_create_datasets(datasets, existing_datasets, user)
for dataset_name in datasets: if not datasets:
is_dataset_found = False raise DatasetNotFoundError("There are no datasets to work with.")
for existing_dataset in existing_datasets:
if (
existing_dataset.name == dataset_name
or str(existing_dataset.id) == dataset_name
):
dataset_instances.append(existing_dataset)
is_dataset_found = True
break
if not is_dataset_found:
dataset_instances.append(
Dataset(
id=await get_unique_dataset_id(dataset_name=dataset_name, user=user),
name=dataset_name,
owner_id=user.id,
)
)
datasets = dataset_instances
awaitables = [] awaitables = []
for dataset in datasets: for dataset in datasets:
awaitables.append( awaitables.append(
run_pipeline( run_pipeline(
dataset=dataset, user=user, tasks=tasks, data=data, pipeline_name=pipeline_name dataset=dataset,
user=user,
tasks=tasks,
data=data,
pipeline_name=pipeline_name,
context={"dataset": dataset},
) )
) )
@ -109,9 +113,13 @@ async def run_pipeline(
tasks: list[Task], tasks: list[Task],
data=None, data=None,
pipeline_name: str = "custom_pipeline", pipeline_name: str = "custom_pipeline",
context: dict = None,
): ):
check_dataset_name(dataset.name) check_dataset_name(dataset.name)
# Will only be used if ENABLE_BACKEND_ACCESS_CONTROL is set to True
await set_database_global_context_variables(dataset.name, user.id)
# Ugly hack, but no easier way to do this. # Ugly hack, but no easier way to do this.
if pipeline_name == "add_pipeline": if pipeline_name == "add_pipeline":
# Refresh the add pipeline status so data is added to a dataset. # Refresh the add pipeline status so data is added to a dataset.
@ -160,15 +168,10 @@ async def run_pipeline(
if not isinstance(task, Task): if not isinstance(task, Task):
raise ValueError(f"Task {task} is not an instance of Task") raise ValueError(f"Task {task} is not an instance of Task")
pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name) pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name, context=context)
pipeline_run_status = None pipeline_run_status = None
async for run_status in pipeline_run: async for run_status in pipeline_run:
pipeline_run_status = run_status pipeline_run_status = run_status
return pipeline_run_status return pipeline_run_status
def check_dataset_name(dataset_name: str) -> str:
if "." in dataset_name or " " in dataset_name:
raise ValueError("Dataset name cannot contain spaces or underscores")

View file

@ -1,8 +1,11 @@
import os
import json import json
from typing import Callable, Optional, List, Type import asyncio
from uuid import UUID
from typing import Callable, List, Optional, Type, Union
from cognee.context_global_variables import set_database_global_context_variables
from cognee.exceptions import InvalidValueError from cognee.exceptions import InvalidValueError
from cognee.infrastructure.engine.utils import parse_id
from cognee.modules.retrieval.chunks_retriever import ChunksRetriever from cognee.modules.retrieval.chunks_retriever import ChunksRetriever
from cognee.modules.retrieval.insights_retriever import InsightsRetriever from cognee.modules.retrieval.insights_retriever import InsightsRetriever
from cognee.modules.retrieval.summaries_retriever import SummariesRetriever from cognee.modules.retrieval.summaries_retriever import SummariesRetriever
@ -21,24 +24,45 @@ from cognee.modules.retrieval.natural_language_retriever import NaturalLanguageR
from cognee.modules.search.types import SearchType from cognee.modules.search.types import SearchType
from cognee.modules.storage.utils import JSONEncoder from cognee.modules.storage.utils import JSONEncoder
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.users.permissions.methods import get_document_ids_for_user from cognee.modules.data.models import Dataset
from cognee.shared.utils import send_telemetry from cognee.shared.utils import send_telemetry
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
from cognee.modules.search.operations import log_query, log_result from cognee.modules.search.operations import log_query, log_result
async def search( async def search(
query_text: str, query_text: str,
query_type: SearchType, query_type: SearchType,
datasets: list[str], dataset_ids: Union[list[UUID], None],
user: User, user: User,
system_prompt_path="answer_simple_question.txt", system_prompt_path="answer_simple_question.txt",
top_k: int = 10, top_k: int = 10,
node_type: Optional[Type] = None, node_type: Optional[Type] = None,
node_name: Optional[List[str]] = None, node_name: Optional[List[str]] = None,
): ):
"""
Args:
query_text:
query_type:
datasets:
user:
system_prompt_path:
top_k:
Returns:
Notes:
Searching by dataset is only available in ENABLE_BACKEND_ACCESS_CONTROL mode
"""
# Use search function filtered by permissions if access control is enabled
if os.getenv("ENABLE_BACKEND_ACCESS_CONTROL", "false").lower() == "true":
return await permissions_search(
query_text, query_type, user, dataset_ids, system_prompt_path, top_k
)
query = await log_query(query_text, query_type.value, user.id) query = await log_query(query_text, query_type.value, user.id)
own_document_ids = await get_document_ids_for_user(user.id, datasets)
search_results = await specific_search( search_results = await specific_search(
query_type, query_type,
query_text, query_text,
@ -49,18 +73,9 @@ async def search(
node_name=node_name, node_name=node_name,
) )
filtered_search_results = [] await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
for search_result in search_results: return search_results
document_id = search_result["document_id"] if "document_id" in search_result else None
document_id = parse_id(document_id)
if document_id is None or document_id in own_document_ids:
filtered_search_results.append(search_result)
await log_result(query.id, json.dumps(filtered_search_results, cls=JSONEncoder), user.id)
return filtered_search_results
async def specific_search( async def specific_search(
@ -120,3 +135,62 @@ async def specific_search(
send_telemetry("cognee.search EXECUTION COMPLETED", user.id) send_telemetry("cognee.search EXECUTION COMPLETED", user.id)
return results return results
async def permissions_search(
query_text: str,
query_type: SearchType,
user: User = None,
dataset_ids: Optional[list[UUID]] = None,
system_prompt_path: str = "answer_simple_question.txt",
top_k: int = 10,
) -> list:
"""
Verifies access for provided datasets or uses all datasets user has read access for and performs search per dataset.
Not to be used outside of active access control mode.
"""
query = await log_query(query_text, query_type.value, user.id)
# Find datasets user has read access for (if datasets are provided only return them. Provided user has read access)
search_datasets = await get_specific_user_permission_datasets(user.id, "read", dataset_ids)
# Searches all provided datasets and handles setting up of appropriate database context based on permissions
search_results = await specific_search_by_context(
search_datasets, query_text, query_type, user, system_prompt_path, top_k
)
await log_result(query.id, json.dumps(search_results, cls=JSONEncoder), user.id)
return search_results
async def specific_search_by_context(
search_datasets: list[Dataset],
query_text: str,
query_type: SearchType,
user: User,
system_prompt_path: str,
top_k: int,
):
"""
Searches all provided datasets and handles setting up of appropriate database context based on permissions.
Not to be used outside of active access control mode.
"""
async def _search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k):
# Set database configuration in async context for each dataset user has access for
await set_database_global_context_variables(dataset.id, dataset.owner_id)
search_results = await specific_search(
query_type, query_text, user, system_prompt_path=system_prompt_path, top_k=top_k
)
return {dataset.name: search_results}
# Search every dataset async based on query and appropriate database configuration
tasks = []
for dataset in search_datasets:
tasks.append(
_search_by_context(dataset, user, query_type, query_text, system_prompt_path, top_k)
)
return await asyncio.gather(*tasks)

View file

@ -19,17 +19,14 @@ class CustomJWTStrategy(JWTStrategy):
# JoinLoad tenant and role information to user object # JoinLoad tenant and role information to user object
user = await get_user(user.id) user = await get_user(user.id)
if user.tenant: data = {"user_id": str(user.id)}
data = {"user_id": str(user.id), "tenant_id": str(user.tenant.id), "roles": user.roles}
else:
# The default tenant is None
data = {"user_id": str(user.id), "tenant_id": None, "roles": user.roles}
return generate_jwt(data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm) return generate_jwt(data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm)
@lru_cache @lru_cache
def get_auth_backend(): def get_auth_backend():
bearer_transport = BearerTransport(tokenUrl="auth/jwt/login") bearer_transport = BearerTransport(tokenUrl="api/v1/auth/login")
def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]: def get_jwt_strategy() -> JWTStrategy[models.UP, models.ID]:
secret = os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret") secret = os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret")

View file

@ -9,4 +9,5 @@ from .exceptions import (
UserNotFoundError, UserNotFoundError,
PermissionDeniedError, PermissionDeniedError,
TenantNotFoundError, TenantNotFoundError,
PermissionNotFoundError,
) )

View file

@ -46,3 +46,13 @@ class PermissionDeniedError(CogneeApiError):
status_code=status.HTTP_403_FORBIDDEN, status_code=status.HTTP_403_FORBIDDEN,
): ):
super().__init__(message, name, status_code) super().__init__(message, name, status_code)
class PermissionNotFoundError(CogneeApiError):
def __init__(
self,
message: str = "Permission type does not exist.",
name: str = "PermissionNotFoundError",
status_code=status.HTTP_403_FORBIDDEN,
):
super().__init__(message, name, status_code)

View file

@ -1,7 +1,8 @@
from types import SimpleNamespace from types import SimpleNamespace
from ..get_fastapi_users import get_fastapi_users from ..get_fastapi_users import get_fastapi_users
from fastapi import HTTPException, Header from fastapi import HTTPException, Security
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import os import os
import jwt import jwt
@ -9,28 +10,29 @@ from uuid import UUID
fastapi_users = get_fastapi_users() fastapi_users = get_fastapi_users()
# Allows Swagger to understand authorization type and allow single sign on for the Swagger docs to test backend
bearer_scheme = HTTPBearer(scheme_name="BearerAuth", description="Paste **Bearer &lt;JWT&gt;**")
async def get_authenticated_user(authorization: str = Header(...)) -> SimpleNamespace:
"""Extract and validate JWT from Authorization header.""" async def get_authenticated_user(
creds: HTTPAuthorizationCredentials = Security(bearer_scheme),
) -> SimpleNamespace:
"""
Extract and validate the JWT presented in the Authorization header.
"""
if creds is None: # header missing
raise HTTPException(status_code=401, detail="Not authenticated")
if creds.scheme.lower() != "bearer": # shouldn't happen extra guard
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
token = creds.credentials
try: try:
scheme, token = authorization.split()
if scheme.lower() != "bearer":
raise HTTPException(status_code=401, detail="Invalid authentication scheme")
payload = jwt.decode( payload = jwt.decode(
token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"] token, os.getenv("FASTAPI_USERS_JWT_SECRET", "super_secret"), algorithms=["HS256"]
) )
if payload.get("tenant_id"): auth_data = SimpleNamespace(id=UUID(payload["user_id"]))
# SimpleNamespace lets us access dictionary elements like attributes
auth_data = SimpleNamespace(
id=UUID(payload["user_id"]),
tenant_id=UUID(payload["tenant_id"]),
roles=payload["roles"],
)
else:
auth_data = SimpleNamespace(id=UUID(payload["user_id"]), tenant_id=None, roles=[])
return auth_data return auth_data
except jwt.ExpiredSignatureError: except jwt.ExpiredSignatureError:

View file

@ -1,5 +1,6 @@
from types import SimpleNamespace from types import SimpleNamespace
from sqlalchemy.orm import selectinload from sqlalchemy.orm import selectinload
from sqlalchemy.exc import NoResultFound
from sqlalchemy.future import select from sqlalchemy.future import select
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.base_config import get_base_config from cognee.base_config import get_base_config
@ -33,5 +34,6 @@ async def get_default_user() -> SimpleNamespace:
except Exception as error: except Exception as error:
if "principals" in str(error.args): if "principals" in str(error.args):
raise DatabaseNotCreatedError() from error raise DatabaseNotCreatedError() from error
if isinstance(error, NoResultFound):
raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error raise UserNotFoundError(f"Failed to retrieve default user: {default_email}") from error
raise

View file

@ -1,7 +1,9 @@
from uuid import UUID from uuid import UUID
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.orm import joinedload from sqlalchemy.orm import selectinload
import sqlalchemy.exc
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.infrastructure.databases.exceptions import EntityNotFoundError
from ..models import User from ..models import User
@ -12,9 +14,12 @@ async def get_user(user_id: UUID):
user = ( user = (
await session.execute( await session.execute(
select(User) select(User)
.options(joinedload(User.roles), joinedload(User.tenant)) .options(selectinload(User.roles), selectinload(User.tenant))
.where(User.id == user_id) .where(User.id == user_id)
) )
).scalar() ).scalar()
if not user:
raise EntityNotFoundError(message=f"Could not find user: {user_id}")
return user return user

View file

@ -15,8 +15,8 @@ class ACL(Base):
principal_id = Column(UUID, ForeignKey("principals.id")) principal_id = Column(UUID, ForeignKey("principals.id"))
permission_id = Column(UUID, ForeignKey("permissions.id")) permission_id = Column(UUID, ForeignKey("permissions.id"))
data_id = Column(UUID, ForeignKey("data.id", ondelete="CASCADE")) dataset_id = Column(UUID, ForeignKey("datasets.id", ondelete="CASCADE"))
principal = relationship("Principal") principal = relationship("Principal")
permission = relationship("Permission") permission = relationship("Permission")
data = relationship("Data", back_populates="acls") dataset = relationship("Dataset", back_populates="acls")

View file

@ -0,0 +1,19 @@
from datetime import datetime, timezone
from sqlalchemy import Column, DateTime, String, UUID, ForeignKey
from cognee.infrastructure.databases.relational import Base
class DatasetDatabase(Base):
__tablename__ = "dataset_database"
owner_id = Column(UUID, ForeignKey("principals.id", ondelete="CASCADE"), index=True)
dataset_id = Column(
UUID, ForeignKey("datasets.id", ondelete="CASCADE"), primary_key=True, index=True
)
vector_database_name = Column(String, unique=True, nullable=False)
graph_database_name = Column(String, unique=True, nullable=False)
created_at = Column(DateTime(timezone=True), default=lambda: datetime.now(timezone.utc))
updated_at = Column(DateTime(timezone=True), onupdate=lambda: datetime.now(timezone.utc))

View file

@ -11,6 +11,8 @@ class Tenant(Principal):
id = Column(UUID, ForeignKey("principals.id"), primary_key=True) id = Column(UUID, ForeignKey("principals.id"), primary_key=True)
name = Column(String, unique=True, nullable=False, index=True) name = Column(String, unique=True, nullable=False, index=True)
owner_id = Column(UUID, index=True)
# One-to-Many relationship with User; specify the join via User.tenant_id # One-to-Many relationship with User; specify the join via User.tenant_id
users = relationship( users = relationship(
"User", "User",

View file

@ -1,6 +1,7 @@
from .User import User from .User import User
from .Role import Role from .Role import Role
from .UserRole import UserRole from .UserRole import UserRole
from .DatasetDatabase import DatasetDatabase
from .RoleDefaultPermissions import RoleDefaultPermissions from .RoleDefaultPermissions import RoleDefaultPermissions
from .UserDefaultPermissions import UserDefaultPermissions from .UserDefaultPermissions import UserDefaultPermissions
from .TenantDefaultPermissions import TenantDefaultPermissions from .TenantDefaultPermissions import TenantDefaultPermissions

View file

@ -0,0 +1 @@
from .permission_types import PERMISSION_TYPES

View file

@ -1,6 +1,13 @@
from .check_permission_on_documents import check_permission_on_documents from .get_role import get_role
from .give_permission_on_document import give_permission_on_document from .get_tenant import get_tenant
from .get_principal import get_principal
from .get_principal_datasets import get_principal_datasets
from .get_all_user_permission_datasets import get_all_user_permission_datasets
from .get_specific_user_permission_datasets import get_specific_user_permission_datasets
from .check_permission_on_dataset import check_permission_on_dataset
from .give_permission_on_dataset import give_permission_on_dataset
from .get_document_ids_for_user import get_document_ids_for_user from .get_document_ids_for_user import get_document_ids_for_user
from .authorized_give_permission_on_datasets import authorized_give_permission_on_datasets
from .give_default_permission_to_tenant import give_default_permission_to_tenant from .give_default_permission_to_tenant import give_default_permission_to_tenant
from .give_default_permission_to_role import give_default_permission_to_role from .give_default_permission_to_role import give_default_permission_to_role
from .give_default_permission_to_user import give_default_permission_to_user from .give_default_permission_to_user import give_default_permission_to_user

View file

@ -0,0 +1,23 @@
from typing import Union, List
from cognee.modules.users.permissions.methods import get_principal
from cognee.modules.users.permissions.methods import give_permission_on_dataset
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
from uuid import UUID
async def authorized_give_permission_on_datasets(
principal_id: UUID, dataset_ids: Union[List[UUID], UUID], permission_name: str, owner_id: UUID
):
# If only a single dataset UUID is provided transform it to a list
if not isinstance(dataset_ids, list):
dataset_ids = [dataset_ids]
principal = await get_principal(principal_id)
# Check if request owner has permission to share dataset access
datasets = await get_specific_user_permission_datasets(owner_id, "share", dataset_ids)
# TODO: Do we want to enforce sharing of datasets to only be between users of the same tenant?
for dataset in datasets:
await give_permission_on_dataset(principal, dataset.id, permission_name)

View file

@ -13,29 +13,29 @@ from ...models.ACL import ACL
logger = get_logger() logger = get_logger()
async def check_permission_on_documents(user: User, permission_type: str, document_ids: list[UUID]): async def check_permission_on_dataset(user: User, permission_type: str, dataset_id: UUID):
if user is None: if user is None:
user = await get_default_user() user = await get_default_user()
# TODO: Enable user role permissions again. Temporarily disabled during rework. # # TODO: Enable user role permissions again. Temporarily disabled during rework.
# user_roles_ids = [role.id for role in user.roles] # user_roles_ids = [role.id for role in user.roles]
user_roles_ids = [] user_roles_ids = []
db_engine = get_relational_engine() db_engine = get_relational_engine()
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
# If dataset id was returned it means the user has permission to access it
result = await session.execute( result = await session.execute(
select(ACL) select(ACL)
.join(ACL.permission) .join(ACL.permission)
.options(joinedload(ACL.data)) .options(joinedload(ACL.dataset))
.where(ACL.principal_id.in_([user.id, *user_roles_ids])) .where(ACL.principal_id.in_([user.id, *user_roles_ids]))
.where(ACL.permission.has(name=permission_type)) .where(ACL.permission.has(name=permission_type))
) )
acls = result.unique().scalars().all() acls = result.unique().scalars().all()
data_ids = [acl.data.id for acl in acls] has_permission = dataset_id in [acl.dataset.id for acl in acls]
has_permissions = all(document_id in data_ids for document_id in document_ids)
if not has_permissions: if not has_permission:
raise PermissionDeniedError( raise PermissionDeniedError(
message=f"User {user.id} does not have {permission_type} permission on documents" message=f"User {user.id} does not have {permission_type} permission on documents"
) )

View file

@ -0,0 +1,31 @@
from cognee.shared.logging_utils import get_logger
from ...models.User import User
from cognee.modules.data.models.Dataset import Dataset
from cognee.modules.users.permissions.methods import get_principal_datasets
from cognee.modules.users.permissions.methods import get_role, get_tenant
logger = get_logger()
async def get_all_user_permission_datasets(user: User, permission_type: str) -> list[Dataset]:
datasets = list()
# Get all datasets User has explicit access to
datasets.extend(await get_principal_datasets(user, permission_type))
if user.tenant_id:
# Get all datasets all tenants have access to
tenant = await get_tenant(user.tenant_id)
datasets.extend(await get_principal_datasets(tenant, permission_type))
# Get all datasets Users roles have access to
for role_name in user.roles:
role = await get_role(user.tenant_id, role_name)
datasets.extend(await get_principal_datasets(role, permission_type))
# Deduplicate datasets with same ID
unique = {}
for dataset in datasets:
# If the dataset id key already exists, leave the dictionary unchanged.
unique.setdefault(dataset.id, dataset)
return list(unique.values())

View file

@ -1,7 +1,9 @@
from uuid import UUID from uuid import UUID
from cognee.modules.data.methods import get_dataset_data
from sqlalchemy import select from sqlalchemy import select
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.data.models import Dataset, DatasetData, Data from cognee.modules.data.models import Dataset, DatasetData
from ...models import ACL, Permission from ...models import ACL, Permission
@ -10,10 +12,10 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
async with session.begin(): async with session.begin():
document_ids = ( dataset_ids = (
await session.scalars( await session.scalars(
select(Data.id) select(Dataset.id)
.join(ACL.data) .join(ACL.dataset)
.join(ACL.permission) .join(ACL.permission)
.where( .where(
ACL.principal_id == user_id, ACL.principal_id == user_id,
@ -22,9 +24,15 @@ async def get_document_ids_for_user(user_id: UUID, datasets: list[str] = None) -
) )
).all() ).all()
# Get documents from datasets user has read access for
document_ids = []
for dataset_id in dataset_ids:
data_list = await get_dataset_data(dataset_id)
document_ids.extend([data.id for data in data_list])
if datasets: if datasets:
documents_ids_in_dataset = set()
# If datasets are specified filter out documents that aren't part of the specified datasets # If datasets are specified filter out documents that aren't part of the specified datasets
documents_ids_in_dataset = set()
for dataset in datasets: for dataset in datasets:
# Find dataset id for dataset element # Find dataset id for dataset element
dataset_id = ( dataset_id = (

View file

@ -0,0 +1,14 @@
from sqlalchemy import select
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models.Principal import Principal
async def get_principal(principal_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
result = await session.execute(select(Principal).where(Principal.id == principal_id))
principal = result.unique().scalar_one()
return principal

View file

@ -0,0 +1,24 @@
from sqlalchemy import select
from sqlalchemy.orm import joinedload
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models.Principal import Principal
from cognee.modules.data.models.Dataset import Dataset
from ...models.ACL import ACL
async def get_principal_datasets(principal: Principal, permission_type: str) -> list[Dataset]:
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
# If dataset id was returned it means the principal has permission to access it
result = await session.execute(
select(ACL)
.join(ACL.permission)
.options(joinedload(ACL.dataset))
.where(ACL.principal_id == principal.id)
.where(ACL.permission.has(name=permission_type))
)
acls = result.unique().scalars().all()
return [acl.dataset for acl in acls]

View file

@ -0,0 +1,24 @@
import sqlalchemy.exc
from sqlalchemy import select
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.exceptions import RoleNotFoundError
from ...models.Role import Role
async def get_role(tenant_id: UUID, role_name: str):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
try:
result = await session.execute(
select(Role).where(Role.name == role_name).where(Role.tenant_id == tenant_id)
)
role = result.unique().scalar_one()
if not role:
raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")
return role
except sqlalchemy.exc.NoResultFound:
raise RoleNotFoundError(message=f"Could not find {role_name} for given tenant")

View file

@ -0,0 +1,46 @@
from uuid import UUID
from cognee.modules.data.models.Dataset import Dataset
from cognee.modules.users.permissions.methods.get_all_user_permission_datasets import (
get_all_user_permission_datasets,
)
from cognee.modules.users.exceptions import PermissionDeniedError
from cognee.modules.users.methods import get_user
async def get_specific_user_permission_datasets(
user_id: UUID, permission_type: str, dataset_ids: list[UUID] = None
) -> list[Dataset]:
"""
Return a list of datasets user has given permission for. If a list of datasets is provided,
verify for which datasets user has appropriate permission for and return list of datasets he has permission for.
Args:
user_id:
permission_type:
dataset_ids:
Returns:
list[Dataset]: List of datasets user has permission for
"""
user = await get_user(user_id)
# Find all datasets user has permission for
user_permission_access_datasets = await get_all_user_permission_datasets(user, permission_type)
# if specific datasets are provided filter out non provided datasets
if dataset_ids:
search_datasets = [
dataset for dataset in user_permission_access_datasets if dataset.id in dataset_ids
]
# If there are requested datasets that user does not have access to raise error
if len(search_datasets) != len(dataset_ids):
raise PermissionDeniedError(
f"Request owner does not have necessary permission: [{permission_type}] for all datasets requested."
)
else:
search_datasets = user_permission_access_datasets
if len(search_datasets) == 0:
raise PermissionDeniedError(
f"Request owner does not have permission: [{permission_type}] for any dataset."
)
return search_datasets

View file

@ -0,0 +1,21 @@
import sqlalchemy.exc
from sqlalchemy import select
from uuid import UUID
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.exceptions import TenantNotFoundError
from ...models.Tenant import Tenant
async def get_tenant(tenant_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
try:
result = await session.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.unique().scalar_one()
if not tenant:
raise TenantNotFoundError
return tenant
except sqlalchemy.exc.NoResultFound:
raise TenantNotFoundError(message=f"Could not find tenant: {tenant_id}")

View file

@ -0,0 +1,46 @@
from sqlalchemy.future import select
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models import Principal, ACL, Permission
from uuid import UUID
from cognee.modules.users.permissions import PERMISSION_TYPES
from cognee.modules.users.exceptions import PermissionNotFoundError
async def give_permission_on_dataset(
principal: Principal,
dataset_id: UUID,
permission_name: str,
):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
permission = (
(await session.execute(select(Permission).filter(Permission.name == permission_name)))
.scalars()
.first()
)
if permission_name not in PERMISSION_TYPES:
# If permission is not in allowed permission types
raise PermissionNotFoundError(
message=f"{permission_name} not found or not in allowed permission types"
)
elif permission is None:
permission = Permission(name=permission_name)
existing_acl = None
else:
# Check if the ACL entry already exists to avoid duplicates
existing_acl = await session.execute(
select(ACL).filter(
ACL.principal_id == principal.id,
ACL.dataset_id == dataset_id,
ACL.permission_id == permission.id,
)
)
existing_acl = existing_acl.scalars().first()
# If no existing ACL entry is found, proceed to add a new one
if existing_acl is None:
acl = ACL(principal_id=principal.id, dataset_id=dataset_id, permission=permission)
session.add(acl)
await session.commit()

View file

@ -1,27 +0,0 @@
from sqlalchemy.future import select
from cognee.infrastructure.databases.relational import get_relational_engine
from ...models import User, ACL, Permission
async def give_permission_on_document(
user: User,
document_id: str,
permission_name: str,
):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
permission = (
(await session.execute(select(Permission).filter(Permission.name == permission_name)))
.scalars()
.first()
)
if permission is None:
permission = Permission(name=permission_name)
acl = ACL(principal_id=user.id, data_id=document_id, permission=permission)
session.add(acl)
await session.commit()

View file

@ -0,0 +1 @@
PERMISSION_TYPES = ["read", "write", "delete", "share"]

View file

@ -9,24 +9,40 @@ from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.exceptions import ( from cognee.modules.users.exceptions import (
UserNotFoundError, UserNotFoundError,
RoleNotFoundError, RoleNotFoundError,
TenantNotFoundError,
PermissionDeniedError,
) )
from cognee.modules.users.models import ( from cognee.modules.users.models import (
User, User,
Role, Role,
Tenant,
UserRole, UserRole,
) )
async def add_user_to_role(user_id: UUID, role_id: UUID): async def add_user_to_role(user_id: UUID, role_id: UUID, owner_id: UUID):
db_engine = get_relational_engine() db_engine = get_relational_engine()
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
user = (await session.execute(select(User).where(User.id == user_id))).scalars().first() user = (await session.execute(select(User).where(User.id == user_id))).scalars().first()
role = (await session.execute(select(Role).where(Role.id == role_id))).scalars().first() role = (await session.execute(select(Role).where(Role.id == role_id))).scalars().first()
tenant = (
(await session.execute(select(Tenant).where(Tenant.id == role.tenant_id)))
.scalars()
.first()
)
if not user: if not user:
raise UserNotFoundError raise UserNotFoundError
elif not role: elif not role:
raise RoleNotFoundError raise RoleNotFoundError
elif user.tenant_id != role.tenant_id:
raise TenantNotFoundError(
message="User tenant does not match role tenant. User cannot be added to role."
)
elif tenant.owner_id != owner_id:
raise PermissionDeniedError(
message="User submitting request does not have permission to add user to role."
)
try: try:
# Add association directly to the association table # Add association directly to the association table

View file

@ -4,6 +4,9 @@ from sqlalchemy.exc import IntegrityError
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.methods import get_user
from cognee.modules.users.permissions.methods import get_tenant
from cognee.modules.users.exceptions import PermissionDeniedError
from cognee.modules.users.models import ( from cognee.modules.users.models import (
Role, Role,
) )
@ -11,13 +14,21 @@ from cognee.modules.users.models import (
async def create_role( async def create_role(
role_name: str, role_name: str,
tenant_id: UUID, owner_id: UUID,
): ):
db_engine = get_relational_engine() db_engine = get_relational_engine()
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
user = await get_user(owner_id)
tenant = await get_tenant(user.tenant_id)
if owner_id != tenant.owner_id:
raise PermissionDeniedError(
"User submitting request does not have permission to create role for tenant."
)
try: try:
# Add association directly to the association table # Add association directly to the association table
role = Role(name=role_name, tenant_id=tenant_id) role = Role(name=role_name, tenant_id=tenant.id)
session.add(role) session.add(role)
except IntegrityError: except IntegrityError:
raise EntityAlreadyExistsError(message="Role already exists for tenant.") raise EntityAlreadyExistsError(message="Role already exists for tenant.")

View file

@ -1 +1,2 @@
from .create_tenant import create_tenant from .create_tenant import create_tenant
from .add_user_to_tenant import add_user_to_tenant

View file

@ -0,0 +1,44 @@
from uuid import UUID
from sqlalchemy.exc import IntegrityError
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.methods import get_user
from cognee.modules.users.permissions.methods import get_tenant
from cognee.modules.users.exceptions import (
UserNotFoundError,
TenantNotFoundError,
PermissionDeniedError,
)
async def add_user_to_tenant(user_id: UUID, tenant_id: UUID, owner_id: UUID):
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
user = await get_user(user_id)
tenant = await get_tenant(tenant_id)
if not user:
raise UserNotFoundError
elif not tenant:
raise TenantNotFoundError
if tenant.owner_id != owner_id:
raise PermissionDeniedError(
message="Only tenant owner can add other users to organization."
)
try:
if user.tenant_id is None:
user.tenant_id = tenant_id
elif user.tenant_id == tenant_id:
return
else:
raise IntegrityError
await session.merge(user)
await session.commit()
except IntegrityError:
raise EntityAlreadyExistsError(
message="User is already part of a tenant. Only one tenant can be assigned to user."
)

View file

@ -1,19 +1,28 @@
from uuid import UUID
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError from cognee.infrastructure.databases.exceptions import EntityAlreadyExistsError
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
from cognee.modules.users.models import Tenant from cognee.modules.users.models import Tenant
from cognee.modules.users.methods import get_user
async def create_tenant(tenant_name: str): async def create_tenant(tenant_name: str, user_id: UUID):
db_engine = get_relational_engine() db_engine = get_relational_engine()
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
try: try:
# Add association directly to the association table user = await get_user(user_id)
tenant = Tenant(name=tenant_name) if user.tenant_id:
raise EntityAlreadyExistsError(
message="User already has a tenant. New tenant cannot be created."
)
tenant = Tenant(name=tenant_name, owner_id=user_id)
session.add(tenant) session.add(tenant)
await session.flush()
user.tenant_id = tenant.id
await session.merge(user)
await session.commit()
except IntegrityError: except IntegrityError:
raise EntityAlreadyExistsError(message="Tenant already exists.") raise EntityAlreadyExistsError(message="Tenant already exists.")
await session.commit()
await session.refresh(tenant)

View file

@ -2,4 +2,4 @@ from .translate_text import translate_text
from .detect_language import detect_language from .detect_language import detect_language
from .classify_documents import classify_documents from .classify_documents import classify_documents
from .extract_chunks_from_documents import extract_chunks_from_documents from .extract_chunks_from_documents import extract_chunks_from_documents
from .check_permissions_on_documents import check_permissions_on_documents from .check_permissions_on_dataset import check_permissions_on_dataset

View file

@ -1,10 +1,10 @@
from cognee.modules.data.processing.document_types import Document from cognee.modules.data.processing.document_types import Document
from cognee.modules.users.permissions.methods import check_permission_on_documents from cognee.modules.users.permissions.methods import check_permission_on_dataset
from typing import List from typing import List
async def check_permissions_on_documents( async def check_permissions_on_dataset(
documents: list[Document], user, permissions documents: List[Document], context: dict, user, permissions
) -> List[Document]: ) -> List[Document]:
""" """
Validates a user's permissions on a list of documents. Validates a user's permissions on a list of documents.
@ -14,13 +14,12 @@ async def check_permissions_on_documents(
- It is designed to validate multiple permissions in a sequential manner for the same set of documents. - It is designed to validate multiple permissions in a sequential manner for the same set of documents.
- Ensure that the `Document` and `user` objects conform to the expected structure and interfaces. - Ensure that the `Document` and `user` objects conform to the expected structure and interfaces.
""" """
document_ids = [document.id for document in documents]
for permission in permissions: for permission in permissions:
await check_permission_on_documents( await check_permission_on_dataset(
user, user,
permission, permission,
document_ids, context["dataset"].id,
) )
return documents return documents

View file

@ -2,6 +2,7 @@ import dlt
import s3fs import s3fs
import json import json
import inspect import inspect
from uuid import UUID
from typing import Union, BinaryIO, Any, List, Optional from typing import Union, BinaryIO, Any, List, Optional
import cognee.modules.ingestion as ingestion import cognee.modules.ingestion as ingestion
from cognee.infrastructure.databases.relational import get_relational_engine from cognee.infrastructure.databases.relational import get_relational_engine
@ -9,7 +10,8 @@ from cognee.modules.data.methods import create_dataset, get_dataset_data, get_da
from cognee.modules.users.methods import get_default_user from cognee.modules.users.methods import get_default_user
from cognee.modules.data.models.DatasetData import DatasetData from cognee.modules.data.models.DatasetData import DatasetData
from cognee.modules.users.models import User from cognee.modules.users.models import User
from cognee.modules.users.permissions.methods import give_permission_on_document from cognee.modules.users.permissions.methods import give_permission_on_dataset
from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets
from .get_dlt_destination import get_dlt_destination from .get_dlt_destination import get_dlt_destination
from .save_data_item_to_storage import save_data_item_to_storage from .save_data_item_to_storage import save_data_item_to_storage
@ -18,7 +20,11 @@ from cognee.api.v1.add.config import get_s3_config
async def ingest_data( async def ingest_data(
data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None data: Any,
dataset_name: str,
user: User,
node_set: Optional[List[str]] = None,
dataset_id: UUID = None,
): ):
destination = get_dlt_destination() destination = get_dlt_destination()
@ -73,7 +79,11 @@ async def ingest_data(
} }
async def store_data_to_dataset( async def store_data_to_dataset(
data: Any, dataset_name: str, user: User, node_set: Optional[List[str]] = None data: Any,
dataset_name: str,
user: User,
node_set: Optional[List[str]] = None,
dataset_id: UUID = None,
): ):
if not isinstance(data, list): if not isinstance(data, list):
# Convert data to a list as we work with lists further down. # Convert data to a list as we work with lists further down.
@ -104,7 +114,17 @@ async def ingest_data(
db_engine = get_relational_engine() db_engine = get_relational_engine()
async with db_engine.get_async_session() as session: async with db_engine.get_async_session() as session:
dataset = await create_dataset(dataset_name, user, session) if dataset_id:
# Retrieve existing dataset
dataset = await get_specific_user_permission_datasets(
user.id, "write", [dataset_id]
)
# Convert from list to Dataset element
if isinstance(dataset, list):
dataset = dataset[0]
else:
# Create new one
dataset = await create_dataset(dataset_name, user, session)
# Check to see if data should be updated # Check to see if data should be updated
data_point = ( data_point = (
@ -138,6 +158,7 @@ async def ingest_data(
node_set=json.dumps(node_set) if node_set else None, node_set=json.dumps(node_set) if node_set else None,
token_count=-1, token_count=-1,
) )
session.add(data_point)
# Check if data is already in dataset # Check if data is already in dataset
dataset_data = ( dataset_data = (
@ -150,17 +171,20 @@ async def ingest_data(
# If data is not present in dataset add it # If data is not present in dataset add it
if dataset_data is None: if dataset_data is None:
dataset.data.append(data_point) dataset.data.append(data_point)
await session.merge(dataset)
await session.commit() await session.commit()
await give_permission_on_document(user, data_id, "read") await give_permission_on_dataset(user, dataset.id, "read")
await give_permission_on_document(user, data_id, "write") await give_permission_on_dataset(user, dataset.id, "write")
await give_permission_on_dataset(user, dataset.id, "delete")
await give_permission_on_dataset(user, dataset.id, "share")
return file_paths return file_paths
db_engine = get_relational_engine() db_engine = get_relational_engine()
file_paths = await store_data_to_dataset(data, dataset_name, user, node_set) file_paths = await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id)
# Note: DLT pipeline has its own event loop, therefore objects created in another event loop # Note: DLT pipeline has its own event loop, therefore objects created in another event loop
# can't be used inside the pipeline # can't be used inside the pipeline

View file

@ -0,0 +1,71 @@
import os
import pathlib
import cognee
from cognee.modules.search.operations import get_history
from cognee.modules.users.methods import get_default_user
from cognee.shared.logging_utils import get_logger
from cognee.modules.search.types import SearchType
logger = get_logger()
async def main():
data_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".data_storage/test_library")
).resolve()
)
cognee.config.data_root_directory(data_directory_path)
cognee_directory_path = str(
pathlib.Path(
os.path.join(pathlib.Path(__file__).parent, ".cognee_system/test_library")
).resolve()
)
cognee.config.system_root_directory(cognee_directory_path)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
await cognee.add(["TEST1"], "test1")
await cognee.add(["TEST2"], "test2")
task_1_config = {
"vector_db_url": "cognee1.test",
"vector_db_key": "",
"vector_db_provider": "lancedb",
}
task_2_config = {
"vector_db_url": "cognee2.test",
"vector_db_key": "",
"vector_db_provider": "lancedb",
}
task_1_graph_config = {
"graph_database_provider": "kuzu",
"graph_file_path": "kuzu1.db",
}
task_2_graph_config = {
"graph_database_provider": "kuzu",
"graph_file_path": "kuzu2.db",
}
# schedule both cognify calls concurrently
task1 = asyncio.create_task(
cognee.cognify(
["test1"], vector_db_config=task_1_config, graph_db_config=task_1_graph_config
)
)
task2 = asyncio.create_task(
cognee.cognify(
["test2"], vector_db_config=task_2_config, graph_db_config=task_2_graph_config
)
)
# wait until both are done (raises first error if any)
await asyncio.gather(task1, task2)
if __name__ == "__main__":
import asyncio
asyncio.run(main(), debug=True)

View file

@ -144,7 +144,6 @@ async def main():
graph_completion = await cognee.search( graph_completion = await cognee.search(
query_type=SearchType.GRAPH_COMPLETION, query_type=SearchType.GRAPH_COMPLETION,
query_text=random_node_name, query_text=random_node_name,
datasets=[dataset_name_2],
) )
assert len(graph_completion) != 0, "Completion result is empty." assert len(graph_completion) != 0, "Completion result is empty."
print("Completion result is:") print("Completion result is:")

View file

@ -49,7 +49,11 @@ async def main():
from cognee.infrastructure.databases.vector import get_vector_engine from cognee.infrastructure.databases.vector import get_vector_engine
vector_engine = get_vector_engine() vector_engine = get_vector_engine()
random_node = (await vector_engine.search("Entity_name", "Quantum computer"))[0] search_results = await vector_engine.search("Entity_name", "Quantum computer")
assert len(search_results) != 0, "The search results list is empty."
random_node = search_results[0]
random_node_name = random_node.payload["text"] random_node_name = random_node.payload["text"]
search_results = await cognee.search( search_results = await cognee.search(

View file

@ -24,13 +24,9 @@ def mock_user():
@pytest.mark.asyncio @pytest.mark.asyncio
@patch.object(search_module, "log_query") @patch.object(search_module, "log_query")
@patch.object(search_module, "log_result") @patch.object(search_module, "log_result")
@patch.object(search_module, "get_document_ids_for_user")
@patch.object(search_module, "specific_search") @patch.object(search_module, "specific_search")
@patch.object(search_module, "parse_id")
async def test_search( async def test_search(
mock_parse_id,
mock_specific_search, mock_specific_search,
mock_get_document_ids,
mock_log_result, mock_log_result,
mock_log_query, mock_log_query,
mock_user, mock_user,
@ -48,26 +44,19 @@ async def test_search(
# Mock document IDs # Mock document IDs
doc_id1 = uuid.uuid4() doc_id1 = uuid.uuid4()
doc_id2 = uuid.uuid4() doc_id2 = uuid.uuid4()
doc_id3 = uuid.uuid4() # This one will be filtered out
mock_get_document_ids.return_value = [doc_id1, doc_id2]
# Mock search results # Mock search results
search_results = [ search_results = [
{"document_id": str(doc_id1), "content": "Result 1"}, {"document_id": str(doc_id1), "content": "Result 1"},
{"document_id": str(doc_id2), "content": "Result 2"}, {"document_id": str(doc_id2), "content": "Result 2"},
{"document_id": str(doc_id3), "content": "Result 3"}, # Should be filtered out
] ]
mock_specific_search.return_value = search_results mock_specific_search.return_value = search_results
# Mock parse_id to return the same UUID
mock_parse_id.side_effect = lambda x: uuid.UUID(x) if x else None
# Execute # Execute
results = await search(query_text, query_type, datasets, mock_user) await search(query_text, query_type, datasets, mock_user)
# Verify # Verify
mock_log_query.assert_called_once_with(query_text, query_type.value, mock_user.id) mock_log_query.assert_called_once_with(query_text, query_type.value, mock_user.id)
mock_get_document_ids.assert_called_once_with(mock_user.id, datasets)
mock_specific_search.assert_called_once_with( mock_specific_search.assert_called_once_with(
query_type, query_type,
query_text, query_text,
@ -78,11 +67,6 @@ async def test_search(
node_name=None, node_name=None,
) )
# Only the first two results should be included (doc_id3 is filtered out)
assert len(results) == 2
assert results[0]["document_id"] == str(doc_id1)
assert results[1]["document_id"] == str(doc_id2)
# Verify result logging # Verify result logging
mock_log_result.assert_called_once() mock_log_result.assert_called_once()
# Check that the first argument is the query ID # Check that the first argument is the query ID

282
poetry.lock generated
View file

@ -435,7 +435,7 @@ description = "Timeout context manager for asyncio programs"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.11\"" markers = "python_version == \"3.10\""
files = [ files = [
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
@ -448,7 +448,7 @@ description = "Timeout context manager for asyncio programs"
optional = true optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"falkordb\" and python_full_version < \"3.11.3\" and python_version == \"3.11\"" markers = "python_version == \"3.11\" and python_full_version < \"3.11.3\" and extra == \"falkordb\""
files = [ files = [
{file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"},
{file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"},
@ -593,7 +593,7 @@ description = "Backport of CPython tarfile module"
optional = true optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"deepeval\" and python_version <= \"3.11\"" markers = "(python_version == \"3.10\" or python_version == \"3.11\") and extra == \"deepeval\""
files = [ files = [
{file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"}, {file = "backports.tarfile-1.2.0-py3-none-any.whl", hash = "sha256:77e284d754527b01fb1e6fa8a1afe577858ebe4e9dad8919e34c862cb399bc34"},
{file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"}, {file = "backports_tarfile-1.2.0.tar.gz", hash = "sha256:d75e02c268746e1b8144c278978b6e98e85de6ad16f8e4b0844a154557eca991"},
@ -1226,7 +1226,7 @@ description = "Cross-platform colored terminal text."
optional = false optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
groups = ["main"] groups = ["main"]
markers = "(sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (sys_platform == \"win32\" or platform_system == \"Windows\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (python_version < \"3.13\" or platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\")" markers = "(platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\") and (platform_system == \"Windows\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or extra == \"codegraph\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\") and (platform_system == \"Windows\" or sys_platform == \"win32\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"dev\" or extra == \"chromadb\" or extra == \"codegraph\") and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\" or extra == \"notebook\" or extra == \"dev\" or extra == \"llama-index\" or extra == \"deepeval\" or extra == \"chromadb\" or platform_system == \"Windows\")"
files = [ files = [
{file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
{file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
@ -2098,7 +2098,7 @@ description = "Backport of PEP 654 (exception groups)"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.11\"" markers = "python_version == \"3.10\""
files = [ files = [
{file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
{file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@ -2225,7 +2225,7 @@ description = "Fast, light, accurate library built for retrieval embedding gener
optional = true optional = true
python-versions = ">=3.9.0" python-versions = ">=3.9.0"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\"" markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [ files = [
{file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"}, {file = "fastembed-0.6.0-py3-none-any.whl", hash = "sha256:a08385e9388adea0529a586004f2d588c9787880a510e4e5d167127a11e75328"},
{file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"}, {file = "fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733"},
@ -2974,7 +2974,7 @@ description = "HTTP/2-based RPC framework"
optional = true optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\" or python_version < \"3.11\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\")" markers = "python_version == \"3.10\" and (extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"gemini\" or extra == \"milvus\") or extra == \"gemini\" or extra == \"deepeval\" or extra == \"weaviate\" or extra == \"qdrant\" or extra == \"milvus\""
files = [ files = [
{file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"},
{file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"}, {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"},
@ -3078,7 +3078,7 @@ description = "Protobuf code generator for gRPC"
optional = true optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "extra == \"weaviate\" or python_version >= \"3.13\" and (extra == \"weaviate\" or extra == \"qdrant\")" markers = "extra == \"weaviate\""
files = [ files = [
{file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"}, {file = "grpcio_tools-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:c701aaa51fde1f2644bd94941aa94c337adb86f25cd03cf05e37387aaea25800"},
{file = "grpcio_tools-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:6a722bba714392de2386569c40942566b83725fa5c5450b8910e3832a5379469"}, {file = "grpcio_tools-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:6a722bba714392de2386569c40942566b83725fa5c5450b8910e3832a5379469"},
@ -3631,7 +3631,7 @@ description = "IPython: Productive Interactive Computing"
optional = true optional = true
python-versions = ">=3.10" python-versions = ">=3.10"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.11\" and (extra == \"notebook\" or extra == \"dev\")" markers = "python_version == \"3.10\" and (extra == \"notebook\" or extra == \"dev\")"
files = [ files = [
{file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"}, {file = "ipython-8.35.0-py3-none-any.whl", hash = "sha256:e6b7470468ba6f1f0a7b116bb688a3ece2f13e2f94138e508201fad677a788ba"},
{file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"}, {file = "ipython-8.35.0.tar.gz", hash = "sha256:d200b7d93c3f5883fc36ab9ce28a18249c7706e51347681f80a0aef9895f2520"},
@ -4454,50 +4454,50 @@ files = [
[[package]] [[package]]
name = "kuzu" name = "kuzu"
version = "0.8.2" version = "0.9.0"
description = "Highly scalable, extremely fast, easy-to-use embeddable graph database" description = "Highly scalable, extremely fast, easy-to-use embeddable graph database"
optional = true optional = true
python-versions = "*" python-versions = "*"
groups = ["main"] groups = ["main"]
markers = "extra == \"kuzu\"" markers = "extra == \"api\" or extra == \"kuzu\""
files = [ files = [
{file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:78bcdf6cc7b130bce8b307709e8d7bddd2e9104b2b696a9dc52574556e754570"}, {file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:ec9f216d67c092ea52086c99cf4b1deabe0f8daaf47c80cf1892b3b41c57d58a"},
{file = "kuzu-0.8.2-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:b42e3e9b1eacf830700287b05e96f9455b89dd4140085053e6c86b32c61e8d5c"}, {file = "kuzu-0.9.0-cp310-cp310-macosx_11_0_x86_64.whl", hash = "sha256:bda6d845bf1c7da204ffa7730573118f2d43fe6b14b1a5d0d2845ec3d3481362"},
{file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf06c602dc0231268d9cfa56a62afef15f8fca3be1ccd2cad22047a14bff4ae0"}, {file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab5b28f101c93899fc15668b6cb25f6db3d4a9844fcc4affed293caaaafaa4b7"},
{file = "kuzu-0.8.2-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50a873e7cd0c2e8e3093e9af14cffb14e49f1f67eceb32df3d0454ce101402d3"}, {file = "kuzu-0.9.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:183bb1de19ffec1c3b07c0b4d5eecf02eb4eeafc1d50aea409bc91e1fad4d6d2"},
{file = "kuzu-0.8.2-cp310-cp310-win_amd64.whl", hash = "sha256:4d36261444d31432606f3f3ed00624f1a3a8edcf7d830564c72b76ffbdf4d318"}, {file = "kuzu-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:2e36ce7da1bbebb538082656de18a717895d9352a33c8bcac170ef2fc22a4902"},
{file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6c1694c6d1b19c46ad5d416cac429ccf1fe91aca4d367664e3aa0afa59800f93"}, {file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:82dd690d823df816e7826945e5243a4ae65e3e948ef512709a59205b84b9f6dd"},
{file = "kuzu-0.8.2-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:00156c64523a1377ffced998bdb031709336f90543da69544c0ab4b40d533692"}, {file = "kuzu-0.9.0-cp311-cp311-macosx_11_0_x86_64.whl", hash = "sha256:c394e019a14e9c5636228cf1acd333997c31e5da3d9a60a1df2c03b828438432"},
{file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc75f26afe8815b046cfb0d931303da6c36ce3afb49d4ae18a3899f23e62020f"}, {file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f7d493f88ed31eada4b88a92b115bc6085c60498c47336ab06a489e75a727bab"},
{file = "kuzu-0.8.2-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5f0de6910724a74cc492354e903cf76db78b6353eef1e2edfa0b79d600c3c572"}, {file = "kuzu-0.9.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:171b47cf2b3923c813f1ed88fb9d3964a9355129b5d3ebca54eba3450bfc1f97"},
{file = "kuzu-0.8.2-cp311-cp311-win_amd64.whl", hash = "sha256:56e99c39a725943aa7ad96ada8f29706da3d53cc98385f2c663b8ea026f0dce3"}, {file = "kuzu-0.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:3c8a8a611f599801c8db6aeffb978cd1badcfa3ec8f79c15b701810fee71765f"},
{file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:adcc250b34963a6eea62b59d47a091018d83e61fb2e95552795ab61f103052be"}, {file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:509af4029f9dcb9c3e843a825df44ec30009a70fad891cbcfb611c3b8cdfefd6"},
{file = "kuzu-0.8.2-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:f72036924466143675980baed02a26c0fca15b6254c11de9a9c18d28fe66247e"}, {file = "kuzu-0.9.0-cp312-cp312-macosx_11_0_x86_64.whl", hash = "sha256:885f17f6e46c15ecef121fc57a941f8b60f0a5c1d3995813bb7a4c7437fb2259"},
{file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a2fd7895fdfd9df880091d32bfb79c148f849659c67e2b9e185f952a6bde9139"}, {file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94f2e35aa345b543a4a21de0e82b70eac4c753987cfa4ded75ae7f9f23edbf11"},
{file = "kuzu-0.8.2-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:68486e291aa8a61264be7e31233ec34eeb6da2402f4b980c3f2b67f9ccbbea3a"}, {file = "kuzu-0.9.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:67430c9813607a3b901c4a1e6bfb3b93538af230bc821e675c552a162818f589"},
{file = "kuzu-0.8.2-cp312-cp312-win_amd64.whl", hash = "sha256:7cce7d06e6f09cd488c62be7cafe78752b037ed9e6585ed3da9df029104b1987"}, {file = "kuzu-0.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:549f4a72f815554fb998582876c5875cb0917a192e6a58d196e8247fd8902701"},
{file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:aa0495f856f2e5f5067e281dab3fbc170aba0721d1f56156a8cd9fa50e706f91"}, {file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ec2e709599b4015d0a179a191dd7850e7bf076f83b37b70d0dc2e4ee59ce7725"},
{file = "kuzu-0.8.2-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:823577b472ba63c3b36e5ff81e2b744736f9eaf0b71585c247f3defc9d268f53"}, {file = "kuzu-0.9.0-cp313-cp313-macosx_11_0_x86_64.whl", hash = "sha256:8aad4fbd74b283ffb0b115138dfc62d9775c8f19ba62ab243e55e3cd648652b6"},
{file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bde76f38d293f49ad283a4831bd32d41f185b93a75d388d67f9b8996678203e9"}, {file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba9dd4f412e31d34345b6461fc9489955ae9566abf426e56af478b6e791b735a"},
{file = "kuzu-0.8.2-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:cdb189012613ecd26630096796e3817c260deea85782e764309cd36b2c39dac5"}, {file = "kuzu-0.9.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:340502cbce54f21a5b2440a75c28d61ddfd26d6d6848e9daa6140798bdd5b367"},
{file = "kuzu-0.8.2-cp313-cp313-win_amd64.whl", hash = "sha256:71fb98721f9c46f960a5c3baea6b083026485c4b9a3e74ab01418243e29e3753"}, {file = "kuzu-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:e1ddb189dfa2aee0123dcd1a5ccc5b831a7f297233a09fccfd76294fc2f9e6bd"},
{file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e12726af2cb552ab7b60e2b4312469359bb3b4b45ddbcfb75220def4be6f566"}, {file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1fae68db87ba48268228c89e70ed1fde2f43843d8ed6b2debaafd314c45e8542"},
{file = "kuzu-0.8.2-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055f2cd9741bf39161f9ccff80428f8fb80b1910b2450b05bbe848487ba694f5"}, {file = "kuzu-0.9.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a0279ba37c639d96f303eb6ad4481e634495be31210991d8008c385ee50b4e0a"},
{file = "kuzu-0.8.2-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:18cb3da3a650f8dfde3639fbd6319a5ad6f98f60689c5dd96d20d8d1fc184d4c"}, {file = "kuzu-0.9.0-cp37-cp37m-macosx_11_0_x86_64.whl", hash = "sha256:3ca7424fe3831df687552b89903aa57fb88efff9c25df15c5d678fae7c933199"},
{file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e55a8fddc21ac3e27b3cf2815d93264dd3c89e9ad8c7f3960d51bdfe48a02709"}, {file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bce9284913434661f47cecfc763f8997a61ebd2bb7bfe993970c1403924708fa"},
{file = "kuzu-0.8.2-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d93600aceacdd7903aa39f016cb641811f96e4825b027a135aaaa1d82e23d24"}, {file = "kuzu-0.9.0-cp37-cp37m-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:66040cdf9a59a5423b49c3d2bc01a089114b573ee1345d5a7c912276fbca0135"},
{file = "kuzu-0.8.2-cp37-cp37m-win_amd64.whl", hash = "sha256:68601d9e741c7815c3d3f46a9c6884853388bcc6920945f069d5dc4f9492c9c5"}, {file = "kuzu-0.9.0-cp37-cp37m-win_amd64.whl", hash = "sha256:8e195774364123845df071eddb18873ce8c78244dd6f854badfe65053b058088"},
{file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:32d7ff56d793df27f76129b8b15bd85c940e59bcb67acd189b6a5ed1af5e8b44"}, {file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:2906f29ee36f9f642bdb8f5222c94f667092e38bde7dc53ebb252f9eb524ab6a"},
{file = "kuzu-0.8.2-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:5e639f24be2fca78bf3890774f273aa1a6b149bfdbeb5c7e966e03b8f610be98"}, {file = "kuzu-0.9.0-cp38-cp38-macosx_11_0_x86_64.whl", hash = "sha256:4c3218e266766080fe1b31325d0156d1b334f62ae23dac854c3e4919115ef8c6"},
{file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1caf46e2721dabed94b65cdcf3990551af2f3913c3f2dcd39f3e5397f0134243"}, {file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a26214c1600c21f5e4aa96585706953a8792ad77e14788710d78f8af0d6b74ec"},
{file = "kuzu-0.8.2-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a5333c9e4557ccbfef7b822793ec382848411c8d11fdee063064b41bd1828404"}, {file = "kuzu-0.9.0-cp38-cp38-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1b153fb28db9336757346eabb24b8c179b4ed48578a0ef158210fbc935df2184"},
{file = "kuzu-0.8.2-cp38-cp38-win_amd64.whl", hash = "sha256:765a8bd4c5b9d24583eb8aaa20ecd753d78220138a82bf643ec592ffb8128298"}, {file = "kuzu-0.9.0-cp38-cp38-win_amd64.whl", hash = "sha256:b6ee075e2571b11a434efb004cb0b3a2fbd7aa416ae680816869f1388e5fc734"},
{file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3a215ff235d17a41c50d1cf2bd8e67a196eff32f23e59d989b1a40e6192f2008"}, {file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:56874ae750ff99b15c959d884b175adf24ac912ab08e084c42784902b2bce2fb"},
{file = "kuzu-0.8.2-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:074b5440186e4214b653d46f8d5a15d4b4cae1185d4656eaf598fe9b840fcdca"}, {file = "kuzu-0.9.0-cp39-cp39-macosx_11_0_x86_64.whl", hash = "sha256:6e0265b1ad445500397dc0df3cc4e7faddfd67fcd3d0952d9a4cdab6b77b47e9"},
{file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:32303a9533674a35e52d429f1446a82e2fc97c423618bc86aaafef1d4d2621e4"}, {file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d66e69a3e135ea123cc7c9c2e507bbb614ffdbfe7be835782c6a588ae63ff900"},
{file = "kuzu-0.8.2-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0baea115bc55c8ed710f2beae8f02e46cf2bac42326b4e2c3acd25a76031f59d"}, {file = "kuzu-0.9.0-cp39-cp39-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e11c8b7186798ad95563e1d7ebf84495d817c406bd28c21af7170467e37e35e"},
{file = "kuzu-0.8.2-cp39-cp39-win_amd64.whl", hash = "sha256:70e031131c5b8e327edd63993b05fb04196b74d0ade1baf0f4005968610310ed"}, {file = "kuzu-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:4fb80eb6c71b02c4e57e3570b079c494082f7ff819d4c06ac482914f29211294"},
{file = "kuzu-0.8.2.tar.gz", hash = "sha256:68ad72b3ef6a32a41ecfa955fa4ca9ca0c8a36d3a1bc13e34cc70c971b2b8ca7"}, {file = "kuzu-0.9.0.tar.gz", hash = "sha256:2e59f3d4d1fc385e9e90d7ae09f072ec2f4cfeff508582523a0034ceb076f6eb"},
] ]
[[package]] [[package]]
@ -5087,7 +5087,7 @@ description = "Python logging made (stupidly) simple"
optional = true optional = true
python-versions = "<4.0,>=3.5" python-versions = "<4.0,>=3.5"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\"" markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [ files = [
{file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"}, {file = "loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c"},
{file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"}, {file = "loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6"},
@ -5827,7 +5827,7 @@ description = "Python extension for MurmurHash (MurmurHash3), a set of fast and
optional = true optional = true
python-versions = ">=3.9" python-versions = ">=3.9"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\"" markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [ files = [
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec"}, {file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec"},
{file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a"}, {file = "mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a"},
@ -6437,6 +6437,7 @@ description = "Fundamental package for array computing in Python"
optional = false optional = false
python-versions = ">=3.9" python-versions = ">=3.9"
groups = ["main"] groups = ["main"]
markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
files = [ files = [
{file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"}, {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
{file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"}, {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@ -6476,6 +6477,69 @@ files = [
{file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"}, {file = "numpy-1.26.4.tar.gz", hash = "sha256:2a02aba9ed12e4ac4eb3ea9421c420301a0c6460d9830d74a9df87efa4912010"},
] ]
[[package]]
name = "numpy"
version = "2.1.0"
description = "Fundamental package for array computing in Python"
optional = false
python-versions = ">=3.10"
groups = ["main"]
markers = "python_version >= \"3.13\""
files = [
{file = "numpy-2.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6326ab99b52fafdcdeccf602d6286191a79fe2fda0ae90573c5814cd2b0bc1b8"},
{file = "numpy-2.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0937e54c09f7a9a68da6889362ddd2ff584c02d015ec92672c099b61555f8911"},
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:30014b234f07b5fec20f4146f69e13cfb1e33ee9a18a1879a0142fbb00d47673"},
{file = "numpy-2.1.0-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:899da829b362ade41e1e7eccad2cf274035e1cb36ba73034946fccd4afd8606b"},
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:08801848a40aea24ce16c2ecde3b756f9ad756586fb2d13210939eb69b023f5b"},
{file = "numpy-2.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:398049e237d1aae53d82a416dade04defed1a47f87d18d5bd615b6e7d7e41d1f"},
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0abb3916a35d9090088a748636b2c06dc9a6542f99cd476979fb156a18192b84"},
{file = "numpy-2.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10e2350aea18d04832319aac0f887d5fcec1b36abd485d14f173e3e900b83e33"},
{file = "numpy-2.1.0-cp310-cp310-win32.whl", hash = "sha256:f6b26e6c3b98adb648243670fddc8cab6ae17473f9dc58c51574af3e64d61211"},
{file = "numpy-2.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:f505264735ee074250a9c78247ee8618292091d9d1fcc023290e9ac67e8f1afa"},
{file = "numpy-2.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:76368c788ccb4f4782cf9c842b316140142b4cbf22ff8db82724e82fe1205dce"},
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:f8e93a01a35be08d31ae33021e5268f157a2d60ebd643cfc15de6ab8e4722eb1"},
{file = "numpy-2.1.0-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:9523f8b46485db6939bd069b28b642fec86c30909cea90ef550373787f79530e"},
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:54139e0eb219f52f60656d163cbe67c31ede51d13236c950145473504fa208cb"},
{file = "numpy-2.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ebbf9fbdabed208d4ecd2e1dfd2c0741af2f876e7ae522c2537d404ca895c3"},
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:378cb4f24c7d93066ee4103204f73ed046eb88f9ad5bb2275bb9fa0f6a02bd36"},
{file = "numpy-2.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8f699a709120b220dfe173f79c73cb2a2cab2c0b88dd59d7b49407d032b8ebd"},
{file = "numpy-2.1.0-cp311-cp311-win32.whl", hash = "sha256:ffbd6faeb190aaf2b5e9024bac9622d2ee549b7ec89ef3a9373fa35313d44e0e"},
{file = "numpy-2.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0af3a5987f59d9c529c022c8c2a64805b339b7ef506509fba7d0556649b9714b"},
{file = "numpy-2.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:fe76d75b345dc045acdbc006adcb197cc680754afd6c259de60d358d60c93736"},
{file = "numpy-2.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f358ea9e47eb3c2d6eba121ab512dfff38a88db719c38d1e67349af210bc7529"},
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:dd94ce596bda40a9618324547cfaaf6650b1a24f5390350142499aa4e34e53d1"},
{file = "numpy-2.1.0-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:b47c551c6724960479cefd7353656498b86e7232429e3a41ab83be4da1b109e8"},
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0756a179afa766ad7cb6f036de622e8a8f16ffdd55aa31f296c870b5679d745"},
{file = "numpy-2.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24003ba8ff22ea29a8c306e61d316ac74111cebf942afbf692df65509a05f111"},
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b34fa5e3b5d6dc7e0a4243fa0f81367027cb6f4a7215a17852979634b5544ee0"},
{file = "numpy-2.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c4f982715e65036c34897eb598d64aef15150c447be2cfc6643ec7a11af06574"},
{file = "numpy-2.1.0-cp312-cp312-win32.whl", hash = "sha256:c4cd94dfefbefec3f8b544f61286584292d740e6e9d4677769bc76b8f41deb02"},
{file = "numpy-2.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0cdef204199278f5c461a0bed6ed2e052998276e6d8ab2963d5b5c39a0500bc"},
{file = "numpy-2.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8ab81ccd753859ab89e67199b9da62c543850f819993761c1e94a75a814ed667"},
{file = "numpy-2.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:442596f01913656d579309edcd179a2a2f9977d9a14ff41d042475280fc7f34e"},
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:848c6b5cad9898e4b9ef251b6f934fa34630371f2e916261070a4eb9092ffd33"},
{file = "numpy-2.1.0-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:54c6a63e9d81efe64bfb7bcb0ec64332a87d0b87575f6009c8ba67ea6374770b"},
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:652e92fc409e278abdd61e9505649e3938f6d04ce7ef1953f2ec598a50e7c195"},
{file = "numpy-2.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ab32eb9170bf8ffcbb14f11613f4a0b108d3ffee0832457c5d4808233ba8977"},
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:8fb49a0ba4d8f41198ae2d52118b050fd34dace4b8f3fb0ee34e23eb4ae775b1"},
{file = "numpy-2.1.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:44e44973262dc3ae79e9063a1284a73e09d01b894b534a769732ccd46c28cc62"},
{file = "numpy-2.1.0-cp313-cp313-win32.whl", hash = "sha256:ab83adc099ec62e044b1fbb3a05499fa1e99f6d53a1dde102b2d85eff66ed324"},
{file = "numpy-2.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:de844aaa4815b78f6023832590d77da0e3b6805c644c33ce94a1e449f16d6ab5"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:343e3e152bf5a087511cd325e3b7ecfd5b92d369e80e74c12cd87826e263ec06"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f07fa2f15dabe91259828ce7d71b5ca9e2eb7c8c26baa822c825ce43552f4883"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5474dad8c86ee9ba9bb776f4b99ef2d41b3b8f4e0d199d4f7304728ed34d0300"},
{file = "numpy-2.1.0-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:1f817c71683fd1bb5cff1529a1d085a57f02ccd2ebc5cd2c566f9a01118e3b7d"},
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a3336fbfa0d38d3deacd3fe7f3d07e13597f29c13abf4d15c3b6dc2291cbbdd"},
{file = "numpy-2.1.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a894c51fd8c4e834f00ac742abad73fc485df1062f1b875661a3c1e1fb1c2f6"},
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:9156ca1f79fc4acc226696e95bfcc2b486f165a6a59ebe22b2c1f82ab190384a"},
{file = "numpy-2.1.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:624884b572dff8ca8f60fab591413f077471de64e376b17d291b19f56504b2bb"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:15ef8b2177eeb7e37dd5ef4016f30b7659c57c2c0b57a779f1d537ff33a72c7b"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:e5f0642cdf4636198a4990de7a71b693d824c56a757862230454629cf62e323d"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15976718c004466406342789f31b6673776360f3b1e3c575f25302d7e789575"},
{file = "numpy-2.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6c1de77ded79fef664d5098a66810d4d27ca0224e9051906e634b3f7ead134c2"},
{file = "numpy-2.1.0.tar.gz", hash = "sha256:7dc90da0081f7e1da49ec4e398ede6a8e9cc4f5ebe5f9e06b443ed889ee9aaa2"},
]
[[package]] [[package]]
name = "oauthlib" name = "oauthlib"
version = "3.2.2" version = "3.2.2"
@ -6929,8 +6993,8 @@ files = [
[package.dependencies] [package.dependencies]
numpy = [ numpy = [
{version = ">=1.22.4", markers = "python_version < \"3.11\""}, {version = ">=1.22.4", markers = "python_version < \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
{version = ">=1.23.2", markers = "python_version == \"3.11\""}, {version = ">=1.23.2", markers = "python_version == \"3.11\""},
{version = ">=1.26.0", markers = "python_version >= \"3.12\""},
] ]
python-dateutil = ">=2.8.2" python-dateutil = ">=2.8.2"
pytz = ">=2020.1" pytz = ">=2020.1"
@ -7028,7 +7092,7 @@ description = "Python datetimes made easy"
optional = false optional = false
python-versions = ">=3.9" python-versions = ">=3.9"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.13\"" markers = "python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\""
files = [ files = [
{file = "pendulum-3.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aa545a59e6517cf43597455a6fb44daa4a6e08473d67a7ad34e4fa951efb9620"}, {file = "pendulum-3.1.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:aa545a59e6517cf43597455a6fb44daa4a6e08473d67a7ad34e4fa951efb9620"},
{file = "pendulum-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:299df2da6c490ede86bb8d58c65e33d7a2a42479d21475a54b467b03ccb88531"}, {file = "pendulum-3.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:299df2da6c490ede86bb8d58c65e33d7a2a42479d21475a54b467b03ccb88531"},
@ -7713,7 +7777,7 @@ description = "Fast and parallel snowball stemmer"
optional = true optional = true
python-versions = "*" python-versions = "*"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"codegraph\"" markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"codegraph\""
files = [ files = [
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"}, {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:bfbd9034ae00419ff2154e33b8f5b4c4d99d1f9271f31ed059e5c7e9fa005844"},
{file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"}, {file = "py_rust_stemmers-0.1.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7162ae66df2bb0fc39b350c24a049f5f5151c03c046092ba095c2141ec223a2"},
@ -8117,8 +8181,8 @@ astroid = ">=3.3.8,<=3.4.0.dev0"
colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
dill = [ dill = [
{version = ">=0.2", markers = "python_version < \"3.11\""}, {version = ">=0.2", markers = "python_version < \"3.11\""},
{version = ">=0.3.6", markers = "python_version >= \"3.11\""},
{version = ">=0.3.7", markers = "python_version >= \"3.12\""}, {version = ">=0.3.7", markers = "python_version >= \"3.12\""},
{version = ">=0.3.6", markers = "python_version == \"3.11\""},
] ]
isort = ">=4.2.5,<5.13 || >5.13,<7" isort = ">=4.2.5,<5.13 || >5.13,<7"
mccabe = ">=0.6,<0.8" mccabe = ">=0.6,<0.8"
@ -8861,41 +8925,15 @@ files = [
[[package]] [[package]]
name = "qdrant-client" name = "qdrant-client"
version = "1.12.1" version = "1.14.2"
description = "Client library for the Qdrant vector search engine"
optional = true
python-versions = ">=3.8"
groups = ["main"]
markers = "python_version >= \"3.13\" and extra == \"qdrant\""
files = [
{file = "qdrant_client-1.12.1-py3-none-any.whl", hash = "sha256:b2d17ce18e9e767471368380dd3bbc4a0e3a0e2061fedc9af3542084b48451e0"},
{file = "qdrant_client-1.12.1.tar.gz", hash = "sha256:35e8e646f75b7b883b3d2d0ee4c69c5301000bba41c82aa546e985db0f1aeb72"},
]
[package.dependencies]
grpcio = ">=1.41.0"
grpcio-tools = ">=1.41.0"
httpx = {version = ">=0.20.0", extras = ["http2"]}
numpy = {version = ">=1.26", markers = "python_version >= \"3.12\""}
portalocker = ">=2.7.0,<3.0.0"
pydantic = ">=1.10.8"
urllib3 = ">=1.26.14,<3"
[package.extras]
fastembed = ["fastembed (==0.3.6) ; python_version < \"3.13\""]
fastembed-gpu = ["fastembed-gpu (==0.3.6) ; python_version < \"3.13\""]
[[package]]
name = "qdrant-client"
version = "1.14.1"
description = "Client library for the Qdrant vector search engine" description = "Client library for the Qdrant vector search engine"
optional = true optional = true
python-versions = ">=3.9" python-versions = ">=3.9"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.13\" and extra == \"qdrant\"" markers = "extra == \"qdrant\""
files = [ files = [
{file = "qdrant_client-1.14.1-py3-none-any.whl", hash = "sha256:1c4d5ed791873698da8b5df68df16bb203ec1b0cd6cec0fd6002572a06291a1b"}, {file = "qdrant_client-1.14.2-py3-none-any.whl", hash = "sha256:7c283b1f0e71db9c21b85d898fb395791caca2a6d56ee751da96d797b001410c"},
{file = "qdrant_client-1.14.1.tar.gz", hash = "sha256:75352057ea59fdd7987313dc9cef4d83953591d083028d94eac99cd0e5e2f607"}, {file = "qdrant_client-1.14.2.tar.gz", hash = "sha256:da5cab4d367d099d1330b6f30d45aefc8bd76f8b8f9d8fa5d4f813501b93af0d"},
] ]
[package.dependencies] [package.dependencies]
@ -8904,6 +8942,7 @@ httpx = {version = ">=0.20.0", extras = ["http2"]}
numpy = [ numpy = [
{version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""}, {version = ">=1.21", markers = "python_version >= \"3.10\" and python_version < \"3.12\""},
{version = ">=1.26", markers = "python_version == \"3.12\""}, {version = ">=1.26", markers = "python_version == \"3.12\""},
{version = ">=2.1.0", markers = "python_version >= \"3.13\""},
] ]
portalocker = ">=2.7.0,<3.0.0" portalocker = ">=2.7.0,<3.0.0"
protobuf = ">=3.20.0" protobuf = ">=3.20.0"
@ -10485,7 +10524,7 @@ description = "A lil' TOML parser"
optional = true optional = true
python-versions = ">=3.8" python-versions = ">=3.8"
groups = ["main"] groups = ["main"]
markers = "python_version < \"3.11\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")" markers = "python_version == \"3.10\" and (extra == \"dev\" or extra == \"notebook\" or extra == \"deepeval\")"
files = [ files = [
{file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"},
{file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"},
@ -10981,7 +11020,7 @@ description = "A library that prepares raw documents for downstream ML tasks."
optional = true optional = true
python-versions = ">=3.9.0" python-versions = ">=3.9.0"
groups = ["main"] groups = ["main"]
markers = "extra == \"docs\"" markers = "(python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\") and extra == \"docs\""
files = [ files = [
{file = "unstructured-0.16.25-py3-none-any.whl", hash = "sha256:14719ccef2830216cf1c5bf654f75e2bf07b17ca5dcee9da5ac74618130fd337"}, {file = "unstructured-0.16.25-py3-none-any.whl", hash = "sha256:14719ccef2830216cf1c5bf654f75e2bf07b17ca5dcee9da5ac74618130fd337"},
{file = "unstructured-0.16.25.tar.gz", hash = "sha256:73b9b0f51dbb687af572ecdb849a6811710b9cac797ddeab8ee80fa07d8aa5e6"}, {file = "unstructured-0.16.25.tar.gz", hash = "sha256:73b9b0f51dbb687af572ecdb849a6811710b9cac797ddeab8ee80fa07d8aa5e6"},
@ -11039,6 +11078,71 @@ rtf = ["pypandoc"]
tsv = ["pandas"] tsv = ["pandas"]
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"] xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
[[package]]
name = "unstructured"
version = "0.17.2"
description = "A library that prepares raw documents for downstream ML tasks."
optional = true
python-versions = ">=3.9.0"
groups = ["main"]
markers = "python_version >= \"3.13\" and extra == \"docs\""
files = [
{file = "unstructured-0.17.2-py3-none-any.whl", hash = "sha256:527dd26a4b273aebef2f9119c9d4f0d0ce17640038d92296d23abe89be123840"},
{file = "unstructured-0.17.2.tar.gz", hash = "sha256:af18c3caef0a6c562cf77e34ee8b6ff522b605031d2336ffe565df66f126aa46"},
]
[package.dependencies]
backoff = "*"
beautifulsoup4 = "*"
chardet = "*"
dataclasses-json = "*"
emoji = "*"
filetype = "*"
html5lib = "*"
langdetect = "*"
lxml = "*"
markdown = {version = "*", optional = true, markers = "extra == \"md\""}
networkx = {version = "*", optional = true, markers = "extra == \"xlsx\""}
nltk = "*"
numpy = "*"
openpyxl = {version = "*", optional = true, markers = "extra == \"xlsx\""}
pandas = {version = "*", optional = true, markers = "extra == \"csv\" or extra == \"tsv\" or extra == \"xlsx\""}
psutil = "*"
pypandoc = {version = "*", optional = true, markers = "extra == \"epub\" or extra == \"odt\" or extra == \"org\" or extra == \"rst\" or extra == \"rtf\""}
python-docx = {version = ">=1.1.2", optional = true, markers = "extra == \"doc\" or extra == \"docx\" or extra == \"odt\""}
python-iso639 = "*"
python-magic = "*"
python-oxmsg = "*"
python-pptx = {version = ">=1.0.1", optional = true, markers = "extra == \"ppt\" or extra == \"pptx\""}
rapidfuzz = "*"
requests = "*"
tqdm = "*"
typing-extensions = "*"
unstructured-client = "*"
wrapt = "*"
xlrd = {version = "*", optional = true, markers = "extra == \"xlsx\""}
[package.extras]
all-docs = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
csv = ["pandas"]
doc = ["python-docx (>=1.1.2)"]
docx = ["python-docx (>=1.1.2)"]
epub = ["pypandoc"]
huggingface = ["langdetect", "sacremoses", "sentencepiece", "torch", "transformers"]
image = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
local-inference = ["effdet", "google-cloud-vision", "markdown", "networkx", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "openpyxl", "pandas", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypandoc", "pypdf", "python-docx (>=1.1.2)", "python-pptx (>=1.0.1)", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)", "xlrd"]
md = ["markdown"]
odt = ["pypandoc", "python-docx (>=1.1.2)"]
org = ["pypandoc"]
paddleocr = ["paddlepaddle (>=3.0.0b1)", "unstructured.paddleocr (==2.10.0)"]
pdf = ["effdet", "google-cloud-vision", "onnx (>=1.17.0)", "onnxruntime (>=1.19.0)", "pdf2image", "pdfminer.six", "pi-heif", "pikepdf", "pypdf", "unstructured-inference (>=0.8.10)", "unstructured.pytesseract (>=0.3.12)"]
ppt = ["python-pptx (>=1.0.1)"]
pptx = ["python-pptx (>=1.0.1)"]
rst = ["pypandoc"]
rtf = ["pypandoc"]
tsv = ["pandas"]
xlsx = ["networkx", "openpyxl", "pandas", "xlrd"]
[[package]] [[package]]
name = "unstructured-client" name = "unstructured-client"
version = "0.25.9" version = "0.25.9"
@ -11578,7 +11682,7 @@ description = "A small Python utility to set file creation time on Windows"
optional = true optional = true
python-versions = ">=3.5" python-versions = ">=3.5"
groups = ["main"] groups = ["main"]
markers = "extra == \"codegraph\" and sys_platform == \"win32\" and python_version < \"3.13\"" markers = "extra == \"codegraph\" and sys_platform == \"win32\" and (python_version == \"3.10\" or python_version == \"3.11\" or python_version == \"3.12\")"
files = [ files = [
{file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"}, {file = "win32_setctime-1.2.0-py3-none-any.whl", hash = "sha256:95d644c4e708aba81dc3704a116d8cbc974d70b3bdb8be1d150e36be6e9d1390"},
{file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"}, {file = "win32_setctime-1.2.0.tar.gz", hash = "sha256:ae1fdf948f5640aae05c511ade119313fb6a30d7eabe25fef9764dca5873c4c0"},
@ -11962,7 +12066,7 @@ cffi = ["cffi (>=1.11)"]
[extras] [extras]
anthropic = ["anthropic"] anthropic = ["anthropic"]
api = ["gunicorn", "uvicorn"] api = ["gunicorn", "kuzu", "uvicorn"]
chromadb = ["chromadb", "pypika"] chromadb = ["chromadb", "pypika"]
codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"] codegraph = ["fastembed", "transformers", "tree-sitter", "tree-sitter-python"]
debug = ["debugpy"] debug = ["debugpy"]
@ -11992,4 +12096,4 @@ weaviate = ["weaviate-client"]
[metadata] [metadata]
lock-version = "2.1" lock-version = "2.1"
python-versions = ">=3.10,<=3.13" python-versions = ">=3.10,<=3.13"
content-hash = "15b319ff8dbe5bd88e41ead93f4e9140b2b7d86d57a707682dd3a308e78ef245" content-hash = "5bd213f69d6dada714e632097121394992b46bd6d322afa024396847cb945f95"

View file

@ -64,9 +64,10 @@ dependencies = [
api = [ api = [
"uvicorn==0.34.0", "uvicorn==0.34.0",
"gunicorn>=20.1.0,<21", "gunicorn>=20.1.0,<21",
"kuzu==0.9.0",
] ]
weaviate = ["weaviate-client==4.9.6"] weaviate = ["weaviate-client==4.9.6"]
qdrant = ["qdrant-client>=1.9.0,<2"] qdrant = ["qdrant-client>=1.14.2,<2"]
neo4j = ["neo4j>=5.20.0,<6"] neo4j = ["neo4j>=5.20.0,<6"]
postgres = [ postgres = [
"psycopg2>=2.9.10,<3", "psycopg2>=2.9.10,<3",
@ -87,14 +88,14 @@ anthropic = ["anthropic>=0.26.1,<0.27"]
deepeval = ["deepeval>=2.0.1,<3"] deepeval = ["deepeval>=2.0.1,<3"]
posthog = ["posthog>=3.5.0,<4"] posthog = ["posthog>=3.5.0,<4"]
falkordb = ["falkordb==1.0.9"] falkordb = ["falkordb==1.0.9"]
kuzu = ["kuzu==0.8.2"] kuzu = ["kuzu==0.9.0"]
groq = ["groq==0.8.0"] groq = ["groq==0.8.0"]
milvus = ["pymilvus>=2.5.0,<3"] milvus = ["pymilvus>=2.5.0,<3"]
chromadb = [ chromadb = [
"chromadb>=0.3.0,<0.7", "chromadb>=0.3.0,<0.7",
"pypika==0.48.8", "pypika==0.48.8",
] ]
docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<0.17"] docs = ["unstructured[csv, doc, docx, epub, md, odt, org, ppt, pptx, rst, rtf, tsv, xlsx]>=0.16.13,<18"]
codegraph = [ codegraph = [
"fastembed<=0.6.0 ; python_version < '3.13'", "fastembed<=0.6.0 ; python_version < '3.13'",
"transformers>=4.46.3,<5", "transformers>=4.46.3,<5",

7732
uv.lock generated

File diff suppressed because it is too large Load diff