From 00dd3b8d974dac4d03396332fe7dd6cf3ad34238 Mon Sep 17 00:00:00 2001 From: Boris Arzentar Date: Fri, 4 Jul 2025 15:28:05 +0200 Subject: [PATCH] fix: run cognee distributed with modal --- cognee/api/v1/add/add.py | 2 +- .../databases/graph/neo4j_driver/adapter.py | 16 +- .../infrastructure/databases/graph/utils.py | 19 +++ .../vector/pgvector/PGVectorAdapter.py | 39 +++-- .../files/utils/get_file_metadata.py | 12 +- .../files/utils/guess_file_type.py | 2 +- cognee/infrastructure/utils/run_sync.py | 25 +++ cognee/modules/ingestion/classify.py | 10 -- .../modules/pipelines/operations/pipeline.py | 2 +- .../modules/pipelines/operations/run_tasks.py | 73 +++------ .../operations/run_tasks_distributed.py | 119 +++++++++++++++ .../operations/run_tasks_with_telemetry.py | 60 ++++++++ .../methods/give_permission_on_dataset.py | 15 +- cognee/tasks/ingestion/ingest_data.py | 54 +------ distributed/Dockerfile | 5 +- distributed/entrypoint.py | 144 +++--------------- distributed/models/TextDocument.py | 15 -- distributed/tasks/extract_graph_from_data.py | 41 ----- distributed/tasks/queued_add_edges.py | 12 ++ distributed/tasks/queued_add_nodes.py | 12 ++ distributed/tasks/save_data_points.py | 116 -------------- distributed/tasks/summarize_text.py | 88 ----------- .../workers/data_point_saver_worker.py | 25 ++- .../workers/graph_extraction_worker.py | 42 ----- poetry.lock | 31 ++-- pyproject.toml | 2 +- uv.lock | 59 ++++--- 27 files changed, 431 insertions(+), 609 deletions(-) create mode 100644 cognee/infrastructure/databases/graph/utils.py create mode 100644 cognee/infrastructure/utils/run_sync.py create mode 100644 cognee/modules/pipelines/operations/run_tasks_distributed.py create mode 100644 cognee/modules/pipelines/operations/run_tasks_with_telemetry.py delete mode 100644 distributed/models/TextDocument.py delete mode 100644 distributed/tasks/extract_graph_from_data.py create mode 100644 distributed/tasks/queued_add_edges.py create mode 100644 distributed/tasks/queued_add_nodes.py delete mode 100644 distributed/tasks/save_data_points.py delete mode 100644 distributed/tasks/summarize_text.py delete mode 100644 distributed/workers/graph_extraction_worker.py diff --git a/cognee/api/v1/add/add.py b/cognee/api/v1/add/add.py index e1dafce5f..8f987743f 100644 --- a/cognee/api/v1/add/add.py +++ b/cognee/api/v1/add/add.py @@ -17,7 +17,7 @@ async def add( dataset_id: UUID = None, ): tasks = [ - Task(resolve_data_directories), + Task(resolve_data_directories, include_subdirectories=True), Task(ingest_data, dataset_name, user, node_set, dataset_id), ] diff --git a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py index e8bf6373f..74cae62eb 100644 --- a/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py +++ b/cognee/infrastructure/databases/graph/neo4j_driver/adapter.py @@ -1,21 +1,27 @@ """Neo4j Adapter for Graph Database""" import json -from cognee.shared.logging_utils import get_logger, ERROR import asyncio -from textwrap import dedent -from typing import Optional, Any, List, Dict, Type, Tuple -from contextlib import asynccontextmanager from uuid import UUID +from textwrap import dedent from neo4j import AsyncSession from neo4j import AsyncGraphDatabase from neo4j.exceptions import Neo4jError +from contextlib import asynccontextmanager +from typing import Optional, Any, List, Dict, Type, Tuple + from cognee.infrastructure.engine import DataPoint +from cognee.shared.logging_utils import get_logger, ERROR +from cognee.infrastructure.databases.graph.utils import override_distributed from cognee.infrastructure.databases.graph.graph_db_interface import ( GraphDBInterface, record_graph_changes, ) from cognee.modules.storage.utils import JSONEncoder + +from distributed.tasks.queued_add_nodes import queued_add_nodes +from distributed.tasks.queued_add_edges import queued_add_edges + from .neo4j_metrics_utils import ( get_avg_clustering, get_edge_density, @@ -166,6 +172,7 @@ class Neo4jAdapter(GraphDBInterface): return await self.query(query, params) @record_graph_changes + @override_distributed(queued_add_nodes) async def add_nodes(self, nodes: list[DataPoint]) -> None: """ Add multiple nodes to the database in a single query. @@ -404,6 +411,7 @@ class Neo4jAdapter(GraphDBInterface): return await self.query(query, params) @record_graph_changes + @override_distributed(queued_add_edges) async def add_edges(self, edges: list[tuple[str, str, str, dict[str, Any]]]) -> None: """ Add multiple edges between nodes in a single query. diff --git a/cognee/infrastructure/databases/graph/utils.py b/cognee/infrastructure/databases/graph/utils.py new file mode 100644 index 000000000..2a6872655 --- /dev/null +++ b/cognee/infrastructure/databases/graph/utils.py @@ -0,0 +1,19 @@ +import os +from functools import wraps + + +def override_distributed(new_func): + def decorator(func): + @wraps(func) + async def wrapper(self, *args, distributed=None, **kwargs): + default_distributed_value = os.getenv("COGNEE_DISTRIBUTED", "False").lower() == "true" + distributed = default_distributed_value if distributed is None else distributed + + if distributed: + return await new_func(*args, **kwargs) + else: + return await func(self, *args, **kwargs) + + return wrapper + + return decorator diff --git a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py index c512b6ccc..f22e287c2 100644 --- a/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py +++ b/cognee/infrastructure/databases/vector/pgvector/PGVectorAdapter.py @@ -1,10 +1,14 @@ import asyncio from uuid import UUID, uuid4 -from typing import List, Optional, get_type_hints +from sqlalchemy.inspection import inspect +from typing import List, Optional, Union, get_type_hints from sqlalchemy.orm import Mapped, mapped_column -from sqlalchemy import JSON, Column, Table, select, delete, MetaData, text +from sqlalchemy.dialects.postgresql import insert +from asyncpg import DeadlockDetectedError, DuplicateTableError, UniqueViolationError +from sqlalchemy import JSON, Column, Table, select, delete, MetaData from sqlalchemy.ext.asyncio import create_async_engine, async_sessionmaker +from tenacity import retry, retry_if_exception_type, stop_after_attempt from cognee.exceptions import InvalidValueError from cognee.shared.logging_utils import get_logger @@ -107,6 +111,11 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): else: return False + @retry( + retry=retry_if_exception_type(Union[DuplicateTableError, UniqueViolationError]), + stop=stop_after_attempt(3), + sleep=1, + ) async def create_collection(self, collection_name: str, payload_schema=None): data_point_types = get_type_hints(DataPoint) vector_size = self.embedding_engine.get_vector_size() @@ -123,7 +132,6 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): - __init__(self, id, payload, vector): Initializes a new PGVectorDataPoint instance. Instance variables: - - primary_key: Unique identifier for the data point, generated by uuid4. - id: Identifier for the data point, defined by data_point_types. - payload: JSON data associated with the data point. - vector: Vector representation of the data point, with size defined by vector_size. @@ -132,8 +140,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): __tablename__ = collection_name __table_args__ = {"extend_existing": True} # PGVector requires one column to be the primary key - primary_key: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4) - id: Mapped[data_point_types["id"]] + id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True) payload = Column(JSON) vector = Column(self.Vector(vector_size)) @@ -148,6 +155,11 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): Base.metadata.create_all, tables=[PGVectorDataPoint.__table__] ) + @retry( + retry=retry_if_exception_type(DeadlockDetectedError), + stop=stop_after_attempt(3), + sleep=1, + ) async def create_data_points(self, collection_name: str, data_points: List[DataPoint]): data_point_types = get_type_hints(DataPoint) if not await self.has_collection(collection_name): @@ -168,7 +180,6 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): the SQLAlchemy ORM. It contains the following public instance variables: - - primary_key: The primary key of the data point, generated automatically. - id: An identifier for the data point. - payload: A JSON object containing additional data related to the data point. - vector: A vector representation of the data point, configured to the specified size. @@ -177,8 +188,7 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): __tablename__ = collection_name __table_args__ = {"extend_existing": True} # PGVector requires one column to be the primary key - primary_key: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4) - id: Mapped[data_point_types["id"]] + id: Mapped[data_point_types["id"]] = mapped_column(primary_key=True) payload = Column(JSON) vector = Column(self.Vector(vector_size)) @@ -213,7 +223,18 @@ class PGVectorAdapter(SQLAlchemyAdapter, VectorDBInterface): ) ) - session.add_all(pgvector_data_points) + def to_dict(obj): + return { + column.key: getattr(obj, column.key) + for column in inspect(obj).mapper.column_attrs + } + + # session.add_all(pgvector_data_points) + insert_statement = insert(PGVectorDataPoint).values( + [to_dict(data_point) for data_point in pgvector_data_points] + ) + insert_statement = insert_statement.on_conflict_do_nothing(index_elements=["id"]) + await session.execute(insert_statement) await session.commit() async def create_vector_index(self, index_name: str, index_property_name: str): diff --git a/cognee/infrastructure/files/utils/get_file_metadata.py b/cognee/infrastructure/files/utils/get_file_metadata.py index e683fbee3..2742e3454 100644 --- a/cognee/infrastructure/files/utils/get_file_metadata.py +++ b/cognee/infrastructure/files/utils/get_file_metadata.py @@ -1,3 +1,4 @@ +import io from typing import BinaryIO, TypedDict from .guess_file_type import guess_file_type from cognee.shared.utils import get_file_content_hash @@ -38,9 +39,14 @@ def get_file_metadata(file: BinaryIO) -> FileMetadata: - FileMetadata: A dictionary containing the file's name, path, MIME type, file extension, and content hash. """ - file.seek(0) - content_hash = get_file_content_hash(file) - file.seek(0) + try: + file.seek(0) + content_hash = get_file_content_hash(file) + file.seek(0) + except io.UnsupportedOperation as error: + raise Exception( + f"Error retrieving metadata from file: {file.name} \n{str(error)}\n\n" + ) from error file_type = guess_file_type(file) diff --git a/cognee/infrastructure/files/utils/guess_file_type.py b/cognee/infrastructure/files/utils/guess_file_type.py index 7daa954ea..e7fa2a80f 100644 --- a/cognee/infrastructure/files/utils/guess_file_type.py +++ b/cognee/infrastructure/files/utils/guess_file_type.py @@ -125,6 +125,6 @@ def guess_file_type(file: BinaryIO) -> filetype.Type: file_type = filetype.guess(file) if file_type is None: - raise FileTypeException("Unknown file detected.") + raise FileTypeException(f"Unknown file detected: {file.name}.") return file_type diff --git a/cognee/infrastructure/utils/run_sync.py b/cognee/infrastructure/utils/run_sync.py new file mode 100644 index 000000000..4945b0b98 --- /dev/null +++ b/cognee/infrastructure/utils/run_sync.py @@ -0,0 +1,25 @@ +import asyncio +import threading + + +def run_sync(coro, timeout=None): + result = None + exception = None + + def runner(): + nonlocal result, exception + try: + result = asyncio.run(coro) + except Exception as e: + exception = e + + thread = threading.Thread(target=runner) + thread.start() + thread.join(timeout) + + if thread.is_alive(): + raise asyncio.TimeoutError("Coroutine execution timed out.") + if exception: + raise exception + + return result diff --git a/cognee/modules/ingestion/classify.py b/cognee/modules/ingestion/classify.py index d1650e5dc..44f1cf013 100644 --- a/cognee/modules/ingestion/classify.py +++ b/cognee/modules/ingestion/classify.py @@ -20,17 +20,7 @@ def classify(data: Union[str, BinaryIO], filename: str = None, s3fs: Optional[An if isinstance(data, BufferedReader) or isinstance(data, SpooledTemporaryFile): return BinaryData(data, str(data.name).split("/")[-1] if data.name else filename) - try: - from importlib import import_module - - s3core = import_module("s3fs.core") - S3File = s3core.S3File - except ImportError: - S3File = None - if S3File is not None: - from cognee.modules.ingestion.data_types.S3BinaryData import S3BinaryData - if isinstance(data, S3File): derived_filename = str(data.full_name).split("/")[-1] if data.full_name else filename return S3BinaryData(s3_path=data.full_name, name=derived_filename, s3=s3fs) diff --git a/cognee/modules/pipelines/operations/pipeline.py b/cognee/modules/pipelines/operations/pipeline.py index ab2ccef67..8ffe33a8a 100644 --- a/cognee/modules/pipelines/operations/pipeline.py +++ b/cognee/modules/pipelines/operations/pipeline.py @@ -184,7 +184,7 @@ async def run_pipeline( if not isinstance(task, Task): raise ValueError(f"Task {task} is not an instance of Task") - pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name, context=context) + pipeline_run = run_tasks(tasks, dataset_id, data, user, pipeline_name, context) async for pipeline_run_info in pipeline_run: yield pipeline_run_info diff --git a/cognee/modules/pipelines/operations/run_tasks.py b/cognee/modules/pipelines/operations/run_tasks.py index 93171a556..45f5a75d8 100644 --- a/cognee/modules/pipelines/operations/run_tasks.py +++ b/cognee/modules/pipelines/operations/run_tasks.py @@ -1,8 +1,11 @@ -import json +import os +from uuid import UUID from typing import Any -from uuid import UUID, uuid4 +from functools import wraps from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.pipelines.operations.run_tasks_distributed import run_tasks_distributed +from cognee.modules.users.models import User from cognee.shared.logging_utils import get_logger from cognee.modules.users.methods import get_default_user from cognee.modules.pipelines.utils import generate_pipeline_id @@ -18,64 +21,34 @@ from cognee.modules.pipelines.operations import ( log_pipeline_run_complete, log_pipeline_run_error, ) -from cognee.modules.settings import get_current_settings -from cognee.modules.users.models import User -from cognee.shared.utils import send_telemetry - -from .run_tasks_base import run_tasks_base +from .run_tasks_with_telemetry import run_tasks_with_telemetry from ..tasks.task import Task + logger = get_logger("run_tasks(tasks: [Task], data)") -async def run_tasks_with_telemetry( - tasks: list[Task], data, user: User, pipeline_name: str, context: dict = None -): - config = get_current_settings() +def override_run_tasks(new_gen): + def decorator(original_gen): + @wraps(original_gen) + async def wrapper(*args, distributed=None, **kwargs): + default_distributed_value = os.getenv("COGNEE_DISTRIBUTED", "False").lower() == "true" + distributed = default_distributed_value if distributed is None else distributed - logger.debug("\nRunning pipeline with configuration:\n%s\n", json.dumps(config, indent=1)) + print(f"run_tasks_distributed: {distributed}") + if distributed: + async for run_info in new_gen(*args, **kwargs): + yield run_info + else: + async for run_info in original_gen(*args, **kwargs): + yield run_info - try: - logger.info("Pipeline run started: `%s`", pipeline_name) - send_telemetry( - "Pipeline Run Started", - user.id, - additional_properties={ - "pipeline_name": str(pipeline_name), - } - | config, - ) + return wrapper - async for result in run_tasks_base(tasks, data, user, context): - yield result - - logger.info("Pipeline run completed: `%s`", pipeline_name) - send_telemetry( - "Pipeline Run Completed", - user.id, - additional_properties={ - "pipeline_name": str(pipeline_name), - }, - ) - except Exception as error: - logger.error( - "Pipeline run errored: `%s`\n%s\n", - pipeline_name, - str(error), - exc_info=True, - ) - send_telemetry( - "Pipeline Run Errored", - user.id, - additional_properties={ - "pipeline_name": str(pipeline_name), - } - | config, - ) - - raise error + return decorator +@override_run_tasks(run_tasks_distributed) async def run_tasks( tasks: list[Task], dataset_id: UUID, diff --git a/cognee/modules/pipelines/operations/run_tasks_distributed.py b/cognee/modules/pipelines/operations/run_tasks_distributed.py new file mode 100644 index 000000000..012f52233 --- /dev/null +++ b/cognee/modules/pipelines/operations/run_tasks_distributed.py @@ -0,0 +1,119 @@ +try: + import modal +except ModuleNotFoundError: + modal = None + +from cognee.infrastructure.databases.relational import get_relational_engine +from cognee.modules.pipelines.models import ( + PipelineRunStarted, + PipelineRunYield, + PipelineRunCompleted, +) +from cognee.modules.pipelines.operations import log_pipeline_run_start, log_pipeline_run_complete +from cognee.modules.pipelines.utils.generate_pipeline_id import generate_pipeline_id +from cognee.modules.users.methods import get_default_user +from cognee.shared.logging_utils import get_logger + +from .run_tasks_with_telemetry import run_tasks_with_telemetry + + +logger = get_logger("run_tasks_distributed()") + + +if modal: + from distributed.app import app + from distributed.modal_image import image + + @app.function( + image=image, + timeout=86400, + max_containers=100, + secrets=[modal.Secret.from_name("distributed_cognee")], + ) + async def run_tasks_on_modal(tasks, data_item, user, pipeline_name, context): + pipeline_run = run_tasks_with_telemetry(tasks, data_item, user, pipeline_name, context) + + run_info = None + + async for pipeline_run_info in pipeline_run: + run_info = pipeline_run_info + + return run_info + + +async def run_tasks_distributed(tasks, dataset_id, data, user, pipeline_name, context): + if not user: + user = get_default_user() + + db_engine = get_relational_engine() + async with db_engine.get_async_session() as session: + from cognee.modules.data.models import Dataset + + dataset = await session.get(Dataset, dataset_id) + + pipeline_id = generate_pipeline_id(user.id, dataset.id, pipeline_name) + + pipeline_run = await log_pipeline_run_start(pipeline_id, pipeline_name, dataset_id, data) + + pipeline_run_id = pipeline_run.pipeline_run_id + + yield PipelineRunStarted( + pipeline_run_id=pipeline_run_id, + dataset_id=dataset.id, + dataset_name=dataset.name, + payload=data, + ) + + data_count = len(data) if isinstance(data, list) else 1 + + print(f"Data count: {data_count}") + print(f"Data item to process: {type(data)} - {data}") + + arguments = [ + [tasks] * data_count, + [[data_item] for data_item in data[:data_count]] if data_count > 1 else [data], + [user] * data_count, + [pipeline_name] * data_count, + [context] * data_count, + ] + + async for result in run_tasks_on_modal.map.aio(*arguments): + logger.info(f"Received result: {result}") + + yield PipelineRunYield( + pipeline_run_id=pipeline_run_id, + dataset_id=dataset.id, + dataset_name=dataset.name, + payload=result, + ) + + # producer_futures = [] + + # for data_item in data[:5]: + # producer_future = run_tasks_distributed( + # run_tasks_with_telemetry, tasks, [data_item], user, pipeline_name, context + # ) + # producer_futures.append(producer_future) + + # batch_results = [] + # for producer_future in producer_futures: + # try: + # result = producer_future.get() + # except Exception as e: + # result = e + # batch_results.append(result) + + # yield PipelineRunYield( + # pipeline_run_id=pipeline_run_id, + # dataset_id=dataset.id, + # dataset_name=dataset.name, + # payload=result, + # ) + + await log_pipeline_run_complete(pipeline_run_id, pipeline_id, pipeline_name, dataset_id, data) + + yield PipelineRunCompleted( + pipeline_run_id=pipeline_run_id, + dataset_id=dataset.id, + dataset_name=dataset.name, + ) diff --git a/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py b/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py new file mode 100644 index 000000000..a2af18be6 --- /dev/null +++ b/cognee/modules/pipelines/operations/run_tasks_with_telemetry.py @@ -0,0 +1,60 @@ +import json + +from cognee.modules.settings import get_current_settings +from cognee.modules.users.models import User +from cognee.shared.logging_utils import get_logger +from cognee.shared.utils import send_telemetry + +from .run_tasks_base import run_tasks_base +from ..tasks.task import Task + + +logger = get_logger("run_tasks_with_telemetry()") + + +async def run_tasks_with_telemetry( + tasks: list[Task], data, user: User, pipeline_name: str, context: dict = None +): + config = get_current_settings() + + logger.debug("\nRunning pipeline with configuration:\n%s\n", json.dumps(config, indent=1)) + + try: + logger.info("Pipeline run started: `%s`", pipeline_name) + send_telemetry( + "Pipeline Run Started", + user.id, + additional_properties={ + "pipeline_name": str(pipeline_name), + } + | config, + ) + + async for result in run_tasks_base(tasks, data, user, context): + yield result + + logger.info("Pipeline run completed: `%s`", pipeline_name) + send_telemetry( + "Pipeline Run Completed", + user.id, + additional_properties={ + "pipeline_name": str(pipeline_name), + }, + ) + except Exception as error: + logger.error( + "Pipeline run errored: `%s`\n%s\n", + pipeline_name, + str(error), + exc_info=True, + ) + send_telemetry( + "Pipeline Run Errored", + user.id, + additional_properties={ + "pipeline_name": str(pipeline_name), + } + | config, + ) + + raise error diff --git a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py index afa8ef5b5..daa6aae6c 100644 --- a/cognee/modules/users/permissions/methods/give_permission_on_dataset.py +++ b/cognee/modules/users/permissions/methods/give_permission_on_dataset.py @@ -1,11 +1,20 @@ -from sqlalchemy.future import select -from cognee.infrastructure.databases.relational import get_relational_engine -from ...models import Principal, ACL, Permission from uuid import UUID +from sqlalchemy.future import select +from asyncpg import UniqueViolationError +from tenacity import retry, retry_if_exception_type, stop_after_attempt + +from cognee.infrastructure.databases.relational import get_relational_engine from cognee.modules.users.permissions import PERMISSION_TYPES from cognee.modules.users.exceptions import PermissionNotFoundError +from ...models import Principal, ACL, Permission + +@retry( + retry=retry_if_exception_type(UniqueViolationError), + stop=stop_after_attempt(3), + sleep=1, +) async def give_permission_on_dataset( principal: Principal, dataset_id: UUID, diff --git a/cognee/tasks/ingestion/ingest_data.py b/cognee/tasks/ingestion/ingest_data.py index 1e6181df2..9fd45434a 100644 --- a/cognee/tasks/ingestion/ingest_data.py +++ b/cognee/tasks/ingestion/ingest_data.py @@ -1,4 +1,3 @@ -import dlt import json import inspect from uuid import UUID @@ -11,7 +10,6 @@ from cognee.modules.data.models.DatasetData import DatasetData from cognee.modules.users.models import User from cognee.modules.users.permissions.methods import give_permission_on_dataset from cognee.modules.users.permissions.methods import get_specific_user_permission_datasets -from .get_dlt_destination import get_dlt_destination from .save_data_item_to_storage import save_data_item_to_storage @@ -25,16 +23,9 @@ async def ingest_data( node_set: Optional[List[str]] = None, dataset_id: UUID = None, ): - destination = get_dlt_destination() - if not user: user = await get_default_user() - pipeline = dlt.pipeline( - pipeline_name="metadata_extraction_pipeline", - destination=destination, - ) - s3_config = get_s3_config() fs = None @@ -58,27 +49,6 @@ async def ingest_data( else: return {} - @dlt.resource(standalone=True, primary_key="id", merge_key="id") - async def data_resources(file_paths: List[str], user: User): - for file_path in file_paths: - with open_data_file(file_path) as file: - if file_path.startswith("s3://"): - classified_data = ingestion.classify(file, s3fs=fs) - else: - classified_data = ingestion.classify(file) - data_id = ingestion.identify(classified_data, user) - file_metadata = classified_data.get_metadata() - yield { - "id": data_id, - "name": file_metadata["name"], - "file_path": file_metadata["file_path"], - "extension": file_metadata["extension"], - "mime_type": file_metadata["mime_type"], - "content_hash": file_metadata["content_hash"], - "owner_id": str(user.id), - "node_set": json.dumps(node_set) if node_set else None, - } - async def store_data_to_dataset( data: Any, dataset_name: str, @@ -187,29 +157,7 @@ async def ingest_data( return file_paths - db_engine = get_relational_engine() - - file_paths = await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id) - - # Note: DLT pipeline has its own event loop, therefore objects created in another event loop - # can't be used inside the pipeline - if db_engine.engine.dialect.name == "sqlite": - # To use sqlite with dlt dataset_name must be set to "main". - # Sqlite doesn't support schemas - pipeline.run( - data_resources(file_paths, user), - table_name="file_metadata", - dataset_name="main", - write_disposition="merge", - ) - else: - # Data should be stored in the same schema to allow deduplication - pipeline.run( - data_resources(file_paths, user), - table_name="file_metadata", - dataset_name="public", - write_disposition="merge", - ) + await store_data_to_dataset(data, dataset_name, user, node_set, dataset_id) datasets = await get_datasets_by_name(dataset_name, user.id) diff --git a/distributed/Dockerfile b/distributed/Dockerfile index efa3197e0..6ac818d45 100644 --- a/distributed/Dockerfile +++ b/distributed/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12-bookworm-slim +FROM python:3.11-slim # Set environment variables ENV PIP_NO_CACHE_DIR=true @@ -6,6 +6,7 @@ ENV PATH="${PATH}:/root/.poetry/bin" ENV PYTHONPATH=/app ENV RUN_MODE=modal ENV SKIP_MIGRATIONS=true +ENV COGNEE_DISTRIBUTED=true # System dependencies RUN apt-get update && apt-get install -y \ @@ -24,7 +25,7 @@ RUN pip install poetry RUN poetry config virtualenvs.create false -RUN poetry install --extras neo4j --extras postgres --no-root +RUN poetry install --extras neo4j --extras postgres --extras aws --extras distributed --no-root COPY cognee/ /app/cognee COPY distributed/ /app/distributed diff --git a/distributed/entrypoint.py b/distributed/entrypoint.py index 7124923f6..301867ea5 100644 --- a/distributed/entrypoint.py +++ b/distributed/entrypoint.py @@ -1,162 +1,50 @@ -import pathlib -from os import path +import asyncio +import os -# from cognee.api.v1.add import add +import cognee from cognee.api.v1.prune import prune -from cognee.infrastructure.databases.relational import get_relational_engine -from cognee.infrastructure.llm.utils import get_max_chunk_tokens -from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.chunking.models.DocumentChunk import DocumentChunk -from cognee.modules.data.models import Data -from cognee.modules.data.processing.document_types import Document -from cognee.modules.engine.operations.setup import setup -from cognee.modules.ingestion.get_text_content_hash import get_text_content_hash -from cognee.modules.pipelines.operations.run_tasks import run_tasks -from cognee.modules.pipelines.tasks.task import Task -from cognee.modules.users.methods.get_default_user import get_default_user -# from cognee.modules.data.methods.get_dataset_data import get_dataset_data -# from cognee.modules.data.methods.get_datasets_by_name import get_datasets_by_name - from cognee.shared.logging_utils import get_logger -from cognee.tasks.documents.extract_chunks_from_documents import extract_chunks_from_documents +from cognee.modules.engine.operations.setup import setup from distributed.app import app -from distributed.models.TextDocument import TextDocument from distributed.queues import save_data_points_queue from distributed.workers.data_point_saver_worker import data_point_saver_worker -from distributed.workers.graph_extraction_worker import graph_extraction_worker logger = get_logger() +os.environ["COGNEE_DISTRIBUTED"] = "True" + + @app.local_entrypoint() async def main(): # Clear queues - save_data_points_queue.clear() + await save_data_points_queue.clear.aio() - # dataset_name = "main" - data_directory_name = ".data" - data_directory_path = path.join(pathlib.Path(__file__).parent, data_directory_name) - - number_of_data_saving_workers = 1 # Total number of graph_extraction_worker functions to spawn - document_batch_size = 50 # Batch size for producers + number_of_data_saving_workers = 1 # Total number of data_point_saver_worker functions to spawn results = [] consumer_futures = [] + # await prune.prune_data() # We don't want to delete files on s3 # Delete DBs and saved files from metastore - await prune.prune_data() await prune.prune_system(metadata=True) await setup() - # Add files to the metastore - # await add(data=data_directory_path, dataset_name=dataset_name) - - user = await get_default_user() - # datasets = await get_datasets_by_name(dataset_name, user.id) - # documents = await get_dataset_data(dataset_id=datasets[0].id) - - import duckdb - - connection = duckdb.connect() - dataset_file_name = "de-00000-of-00003-f8e581c008ccc7f2.parquet" - dataset_file_path = path.join(data_directory_path, dataset_file_name) - df = connection.execute(f"SELECT * FROM '{dataset_file_path}'").fetchdf() - - documents = [] - - for _, row in df.iterrows(): - file_id = str(row["id"]) - content = row["text"] - - documents.append( - TextDocument( - name=file_id, - content=content, - raw_data_location=f"{dataset_file_name}_{file_id}", - external_metadata="", - ) - ) - - documents: list[TextDocument] = documents[0:100] - print(f"We have {len(documents)} documents in the dataset.") - - data_documents = [ - Data( - id=document.id, - name=document.name, - raw_data_location=document.raw_data_location, - extension="txt", - mime_type=document.mime_type, - owner_id=user.id, - content_hash=get_text_content_hash(document.content), - external_metadata=document.external_metadata, - node_set=None, - token_count=-1, - ) - for document in documents - ] - - db_engine = get_relational_engine() - - async with db_engine.get_async_session() as session: - session.add_all(data_documents) - await session.commit() - # Start data_point_saver_worker functions for _ in range(number_of_data_saving_workers): worker_future = data_point_saver_worker.spawn() consumer_futures.append(worker_future) - producer_futures = [] + s3_bucket_name = "s3://s3-test-laszlo/Database for KG v1" - def process_chunks_remotely(document_chunks: list[DocumentChunk], document: Document): - producer_future = graph_extraction_worker.spawn( - user=user, document_name=document.name, document_chunks=document_chunks - ) - producer_futures.append(producer_future) - return producer_future + await cognee.add(s3_bucket_name, dataset_name="s3-files") - # Produce chunks and spawn a graph_extraction_worker job for each batch of chunks - for i in range(0, len(documents), document_batch_size): - batch = documents[i : i + document_batch_size] - - for item in batch: - async for worker_feature in run_tasks( - [ - Task( - extract_chunks_from_documents, - max_chunk_size=2000, - chunker=TextChunker, - ), - Task( - process_chunks_remotely, - document=item, - task_config={"batch_size": 10}, - ), - ], - data=[item], - user=user, - pipeline_name="chunk_processing", - ): - pass - - batch_results = [] - - for producer_future in producer_futures: - try: - result = producer_future.get() - except Exception as e: - result = e - - batch_results.append(result) - - print(f"Number of documents processed: {len(results)}") - results.extend(batch_results) + await cognee.cognify(datasets=["s3-files"]) # Push empty tuple into the queue to signal the end of data. - save_data_points_queue.put(()) + await save_data_points_queue.put.aio(()) for consumer_future in consumer_futures: try: @@ -167,3 +55,7 @@ async def main(): logger.error(e) print(results) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/distributed/models/TextDocument.py b/distributed/models/TextDocument.py deleted file mode 100644 index 1603c70ef..000000000 --- a/distributed/models/TextDocument.py +++ /dev/null @@ -1,15 +0,0 @@ -from cognee.modules.chunking.Chunker import Chunker -from cognee.modules.data.processing.document_types import Document - - -class TextDocument(Document): - type: str = "text" - mime_type: str = "text/plain" - content: str - - def read(self, chunker_cls: Chunker, max_chunk_size: int): - def get_text(): - yield self.content - - chunker: Chunker = chunker_cls(self, max_chunk_size=max_chunk_size, get_text=get_text) - yield from chunker.read() diff --git a/distributed/tasks/extract_graph_from_data.py b/distributed/tasks/extract_graph_from_data.py deleted file mode 100644 index 6e4186cf1..000000000 --- a/distributed/tasks/extract_graph_from_data.py +++ /dev/null @@ -1,41 +0,0 @@ -import asyncio -from typing import Type, List - -from pydantic import BaseModel - -from cognee.modules.graph.utils import ( - expand_with_nodes_and_edges, - retrieve_existing_edges, -) -from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.chunking.models.DocumentChunk import DocumentChunk -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver -from cognee.modules.data.extraction.knowledge_graph import extract_content_graph - - -async def extract_graph_from_data( - data_chunks: list[DocumentChunk], - graph_model: Type[BaseModel], - ontology_adapter: OntologyResolver = OntologyResolver(), -) -> List[DocumentChunk]: - """Extracts and integrates a knowledge graph from the text content of document chunks using a specified graph model.""" - chunk_graphs = await asyncio.gather( - *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] - ) - - if graph_model is not KnowledgeGraph: - for chunk_index, chunk_graph in enumerate(chunk_graphs): - data_chunks[chunk_index].contains = chunk_graph - - return data_chunks - - existing_edges_map = await retrieve_existing_edges( - data_chunks, - chunk_graphs, - ) - - graph_nodes, graph_edges = expand_with_nodes_and_edges( - data_chunks, chunk_graphs, ontology_adapter, existing_edges_map - ) - - return data_chunks, graph_nodes, graph_edges diff --git a/distributed/tasks/queued_add_edges.py b/distributed/tasks/queued_add_edges.py new file mode 100644 index 000000000..44a0c4530 --- /dev/null +++ b/distributed/tasks/queued_add_edges.py @@ -0,0 +1,12 @@ +async def queued_add_edges(edge_batch): + from ..queues import save_data_points_queue + + try: + await save_data_points_queue.put.aio(([], edge_batch)) + except Exception: + first_half, second_half = ( + edge_batch[: len(edge_batch) // 2], + edge_batch[len(edge_batch) // 2 :], + ) + await save_data_points_queue.put.aio(([], first_half)) + await save_data_points_queue.put.aio(([], second_half)) diff --git a/distributed/tasks/queued_add_nodes.py b/distributed/tasks/queued_add_nodes.py new file mode 100644 index 000000000..44278c734 --- /dev/null +++ b/distributed/tasks/queued_add_nodes.py @@ -0,0 +1,12 @@ +async def queued_add_nodes(node_batch): + from ..queues import save_data_points_queue + + try: + await save_data_points_queue.put.aio((node_batch, [])) + except Exception: + first_half, second_half = ( + node_batch[: len(node_batch) // 2], + node_batch[len(node_batch) // 2 :], + ) + await save_data_points_queue.put.aio((first_half, [])) + await save_data_points_queue.put.aio((second_half, [])) diff --git a/distributed/tasks/save_data_points.py b/distributed/tasks/save_data_points.py deleted file mode 100644 index 23039f6b6..000000000 --- a/distributed/tasks/save_data_points.py +++ /dev/null @@ -1,116 +0,0 @@ -# import json -# import asyncio -from pympler import asizeof - -# from cognee.modules.storage.utils import JSONEncoder -from distributed.queues import save_data_points_queue -# from cognee.modules.graph.utils import get_graph_from_model - - -async def save_data_points(data_points_and_relationships: tuple[list, list]): - # data_points = data_points_and_relationships[0] - # data_point_connections = data_points_and_relationships[1] - - # added_nodes = {} - # added_edges = {} - # visited_properties = {} - - # nodes_and_edges: list[tuple] = await asyncio.gather( - # *[ - # get_graph_from_model( - # data_point, - # added_nodes=added_nodes, - # added_edges=added_edges, - # visited_properties=visited_properties, - # ) - # for data_point in data_points - # ] - # ) - - # graph_data_deduplication = GraphDataDeduplication() - # deduplicated_nodes_and_edges = [graph_data_deduplication.deduplicate_nodes_and_edges(nodes, edges + data_point_connections) for nodes, edges in nodes_and_edges] - - node_batch = [] - edge_batch = [] - - for nodes, edges in data_points_and_relationships: - for node in nodes: - if asizeof.asizeof(node) >= 500000: - try_pushing_nodes_to_queue([node]) - continue - # print(f"Node too large:\n{node.id}\n") - - node_batch.append(node) - - if asizeof.asizeof(node_batch) >= 500000: - try_pushing_nodes_to_queue(node_batch) - node_batch = [] - - if len(node_batch) > 0: - try_pushing_nodes_to_queue(node_batch) - node_batch = [] - - for edge in edges: - edge_batch.append(edge) - - if asizeof.asizeof(edge_batch) >= 500000: - try_pushing_edges_to_queue(edge_batch) - edge_batch = [] - - if len(edge_batch) > 0: - try_pushing_edges_to_queue(edge_batch) - edge_batch = [] - - # graph_data_deduplication.reset() - - -class GraphDataDeduplication: - nodes_and_edges_map: dict - - def __init__(self): - self.reset() - - def reset(self): - self.nodes_and_edges_map = {} - - def deduplicate_nodes_and_edges(self, nodes: list, edges: list): - final_nodes = [] - final_edges = [] - - for node in nodes: - node_key = str(node.id) - if node_key not in self.nodes_and_edges_map: - self.nodes_and_edges_map[node_key] = True - final_nodes.append(node) - - for edge in edges: - edge_key = str(edge[0]) + str(edge[2]) + str(edge[1]) - if edge_key not in self.nodes_and_edges_map: - self.nodes_and_edges_map[edge_key] = True - final_edges.append(edge) - - return final_nodes, final_edges - - -def try_pushing_nodes_to_queue(node_batch): - try: - save_data_points_queue.put((node_batch, [])) - except Exception: - first_half, second_half = ( - node_batch[: len(node_batch) // 2], - node_batch[len(node_batch) // 2 :], - ) - save_data_points_queue.put((first_half, [])) - save_data_points_queue.put((second_half, [])) - - -def try_pushing_edges_to_queue(edge_batch): - try: - save_data_points_queue.put(([], edge_batch)) - except Exception: - first_half, second_half = ( - edge_batch[: len(edge_batch) // 2], - edge_batch[len(edge_batch) // 2 :], - ) - save_data_points_queue.put(([], first_half)) - save_data_points_queue.put(([], second_half)) diff --git a/distributed/tasks/summarize_text.py b/distributed/tasks/summarize_text.py deleted file mode 100644 index 72ad36e49..000000000 --- a/distributed/tasks/summarize_text.py +++ /dev/null @@ -1,88 +0,0 @@ -import asyncio -from typing import Type -from uuid import uuid5 -from pydantic import BaseModel -from cognee.modules.graph.utils import get_graph_from_model -from cognee.tasks.summarization.models import TextSummary -from cognee.infrastructure.engine.models.DataPoint import DataPoint -from cognee.modules.chunking.models.DocumentChunk import DocumentChunk -from cognee.modules.data.extraction.extract_summary import extract_summary - - -async def summarize_text( - data_points_and_relationships: tuple[list[DocumentChunk], list[DataPoint], list], - summarization_model: Type[BaseModel], -): - document_chunks = data_points_and_relationships[0] - nodes = data_points_and_relationships[1] - relationships = data_points_and_relationships[2] - - if len(document_chunks) == 0: - return document_chunks - - chunk_summaries = await asyncio.gather( - *[extract_summary(chunk.text, summarization_model) for chunk in document_chunks] - ) - - summaries = [ - TextSummary( - id=uuid5(chunk.id, "TextSummary"), - made_from=chunk, - text=chunk_summaries[chunk_index].summary, - ) - for (chunk_index, chunk) in enumerate(document_chunks) - ] - - data_points = summaries + nodes - - added_nodes = {} - added_edges = {} - visited_properties = {} - - nodes_and_edges: list[tuple] = await asyncio.gather( - *[ - get_graph_from_model( - data_point, - added_nodes=added_nodes, - added_edges=added_edges, - visited_properties=visited_properties, - ) - for data_point in data_points - ] - ) - - graph_data_deduplication = GraphDataDeduplication() - deduplicated_nodes_and_edges = [ - graph_data_deduplication.deduplicate_nodes_and_edges(nodes, edges + relationships) - for nodes, edges in nodes_and_edges - ] - - return deduplicated_nodes_and_edges - - -class GraphDataDeduplication: - nodes_and_edges_map: dict - - def __init__(self): - self.reset() - - def reset(self): - self.nodes_and_edges_map = {} - - def deduplicate_nodes_and_edges(self, nodes: list, edges: list): - final_nodes = [] - final_edges = [] - - for node in nodes: - node_key = str(node.id) - if node_key not in self.nodes_and_edges_map: - self.nodes_and_edges_map[node_key] = True - final_nodes.append(node) - - for edge in edges: - edge_key = str(edge[0]) + str(edge[2]) + str(edge[1]) - if edge_key not in self.nodes_and_edges_map: - self.nodes_and_edges_map[edge_key] = True - final_edges.append(edge) - - return final_nodes, final_edges diff --git a/distributed/workers/data_point_saver_worker.py b/distributed/workers/data_point_saver_worker.py index bf0035c2a..8db8f32ee 100644 --- a/distributed/workers/data_point_saver_worker.py +++ b/distributed/workers/data_point_saver_worker.py @@ -1,20 +1,35 @@ +import modal import asyncio from distributed.app import app from distributed.modal_image import image from distributed.queues import save_data_points_queue + +from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.graph import get_graph_engine -@app.function(image=image, timeout=86400, max_containers=100) +logger = get_logger("data_point_saver_worker") + + +@app.function( + image=image, + timeout=86400, + max_containers=100, + secrets=[modal.Secret.from_name("distributed_cognee")], +) async def data_point_saver_worker(): print("Started processing of nodes and edges; starting graph engine queue.") graph_engine = await get_graph_engine() while True: - if save_data_points_queue.len() != 0: - nodes_and_edges = save_data_points_queue.get(block=False) + if await save_data_points_queue.len.aio() != 0: + try: + nodes_and_edges = await save_data_points_queue.get.aio(block=False) + except modal.exception.DeserializationError as error: + logger.error(f"Deserialization error: {str(error)}") + continue if len(nodes_and_edges) == 0: print("Finished processing all nodes and edges; stopping graph engine queue.") @@ -28,10 +43,10 @@ async def data_point_saver_worker(): edges = nodes_and_edges[1] if nodes: - await graph_engine.add_nodes(nodes) + await graph_engine.add_nodes(nodes, distributed=False) if edges: - await graph_engine.add_edges(edges) + await graph_engine.add_edges(edges, distributed=False) print("Finished processing nodes and edges.") else: diff --git a/distributed/workers/graph_extraction_worker.py b/distributed/workers/graph_extraction_worker.py deleted file mode 100644 index 698521405..000000000 --- a/distributed/workers/graph_extraction_worker.py +++ /dev/null @@ -1,42 +0,0 @@ -# ------------------------------------------------------------------------------ -# Producer function that produces data points from documents and pushes them into the queue. -# ------------------------------------------------------------------------------ -from cognee.modules.cognify.config import get_cognify_config -from cognee.modules.pipelines.operations.run_tasks import run_tasks -from cognee.modules.pipelines.tasks.task import Task -from cognee.shared.data_models import KnowledgeGraph - -from distributed.app import app -from distributed.modal_image import image -from distributed.tasks.summarize_text import summarize_text -from distributed.tasks.extract_graph_from_data import extract_graph_from_data -from distributed.tasks.save_data_points import save_data_points - - -@app.function(image=image, timeout=86400, max_containers=100) -async def graph_extraction_worker(user, document_name: str, document_chunks: list): - cognee_config = get_cognify_config() - - tasks = [ - Task( - extract_graph_from_data, - graph_model=KnowledgeGraph, - ), - Task( - summarize_text, - summarization_model=cognee_config.summarization_model, - ), - Task(save_data_points), - ] - - async for _ in run_tasks( - tasks, - data=document_chunks, - pipeline_name=f"modal_execution_file_{document_name}", - user=user, - ): - pass - - print(f"File execution finished: {document_name}") - - return document_name diff --git a/poetry.lock b/poetry.lock index 344a0ba2d..2d3eb60c2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1114,14 +1114,14 @@ uvicorn = {version = ">=0.18.3", extras = ["standard"]} [[package]] name = "click" -version = "8.2.1" +version = "8.1.8" description = "Composable command line interface toolkit" optional = false -python-versions = ">=3.10" +python-versions = ">=3.7" groups = ["main"] files = [ - {file = "click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b"}, - {file = "click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202"}, + {file = "click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2"}, + {file = "click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a"}, ] [package.dependencies] @@ -5841,26 +5841,25 @@ type = ["mypy (==1.14.1)"] [[package]] name = "modal" -version = "0.74.15" +version = "1.0.5" description = "Python client library for Modal" optional = true python-versions = ">=3.9" groups = ["main"] markers = "extra == \"distributed\"" files = [ - {file = "modal-0.74.15-py3-none-any.whl", hash = "sha256:084e898ab202ccd698fd277d9dc9e9cec8d4b0954a1c09d4ba529f0446ab3526"}, - {file = "modal-0.74.15.tar.gz", hash = "sha256:95512811ebd42a52fa03724f60d0d1c32259788351e798d0d695974d94b2e49c"}, + {file = "modal-1.0.5-py3-none-any.whl", hash = "sha256:e575414b7e10c5ad97ae0a77149456f021e9f2c457559bb4a5af70771f481c09"}, + {file = "modal-1.0.5.tar.gz", hash = "sha256:1c6927dd5eb9bd8d86d83ee499b87d64366525f1dd3e5b31d1ac1ed62090f72a"}, ] [package.dependencies] aiohttp = "*" certifi = "*" -click = ">=8.1.0" -fastapi = "*" -grpclib = "0.4.7" -protobuf = ">=3.19,<4.24.0 || >4.24.0,<6.0" +click = ">=8.1.0,<8.2.0" +grpclib = ">=0.4.7,<0.4.9" +protobuf = ">=3.19,<4.24.0 || >4.24.0,<7.0" rich = ">=12.0.0" -synchronicity = ">=0.9.10,<0.10.0" +synchronicity = ">=0.9.15,<0.10.0" toml = "*" typer = ">=0.9" types-certifi = "*" @@ -10492,15 +10491,15 @@ dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] [[package]] name = "synchronicity" -version = "0.9.12" +version = "0.9.16" description = "Export blocking and async library versions from a single async implementation" optional = true python-versions = ">=3.8" groups = ["main"] markers = "extra == \"distributed\"" files = [ - {file = "synchronicity-0.9.12-py3-none-any.whl", hash = "sha256:b006f57bd216d55e578316096a11b6dc16016d6b48e2766bcffabe40c88f9793"}, - {file = "synchronicity-0.9.12.tar.gz", hash = "sha256:977f3ed8f6e35de4d1a3f0aeee4937143ba8d913f531d33e8df7c539b2792fb8"}, + {file = "synchronicity-0.9.16-py3-none-any.whl", hash = "sha256:79dd3451c063a06489d466b0585808e1d5d257fc3c2304f5f733a142ef269e97"}, + {file = "synchronicity-0.9.16.tar.gz", hash = "sha256:bc174b756d12330b1e6027f8293c3b944cf3684dec241af009cb7bb169a0072f"}, ] [package.dependencies] @@ -12237,4 +12236,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "963b6d6541a74c8d063e8acc529489f3876c38cab3616a12a17cbe4e48fa40b6" +content-hash = "ca2a3e8260092933419793efe202d50ae7b1c6ce738750876fd5f64a31718790" diff --git a/pyproject.toml b/pyproject.toml index 7b72095d3..a0f74e5aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ api = [ "websockets>=15.0.1" ] distributed = [ - "modal==0.74.15", + "modal==1.0.5", ] weaviate = ["weaviate-client==4.9.6"] qdrant = ["qdrant-client>=1.14.2,<2"] diff --git a/uv.lock b/uv.lock index b2cf6d573..29243545d 100644 --- a/uv.lock +++ b/uv.lock @@ -58,7 +58,6 @@ dependencies = [ { name = "aiohappyeyeballs" }, { name = "aiosignal" }, { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "async-timeout", version = "5.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_version < '0'" }, { name = "attrs" }, { name = "frozenlist" }, { name = "multidict" }, @@ -346,7 +345,6 @@ version = "0.30.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "async-timeout", version = "4.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "async-timeout", version = "5.0.1", source = { registry = "https://pypi.org/simple" }, marker = "python_version < '0'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/2f/4c/7c991e080e106d854809030d8584e15b2e996e26f16aee6d757e387bc17d/asyncpg-0.30.0.tar.gz", hash = "sha256:c551e9928ab6707602f44811817f82ba3c446e018bfe1d3abecc8ba5f3eac851", size = 957746, upload-time = "2024-10-20T00:30:41.127Z" } wheels = [ @@ -795,14 +793,14 @@ wheels = [ [[package]] name = "click" -version = "8.2.1" +version = "8.1.8" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/60/6c/8ca2efa64cf75a977a0d7fac081354553ebe483345c734fb6b6515d96bbc/click-8.2.1.tar.gz", hash = "sha256:27c491cc05d968d271d5a1db13e3b5a184636d9d930f148c50b038f0d0646202", size = 286342, upload-time = "2025-05-20T23:19:49.832Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b9/2e/0090cbf739cee7d23781ad4b89a9894a41538e4fcf4c31dcdd705b78eb8b/click-8.1.8.tar.gz", hash = "sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a", size = 226593, upload-time = "2024-12-21T18:38:44.339Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" }, + { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188, upload-time = "2024-12-21T18:38:41.666Z" }, ] [[package]] @@ -1072,7 +1070,7 @@ requires-dist = [ { name = "mkdocs-material", marker = "extra == 'dev'", specifier = ">=9.5.42,<10" }, { name = "mkdocs-minify-plugin", marker = "extra == 'dev'", specifier = ">=0.8.0,<0.9" }, { name = "mkdocstrings", extras = ["python"], marker = "extra == 'dev'", specifier = ">=0.26.2,<0.27" }, - { name = "modal", marker = "extra == 'distributed'", specifier = "==0.74.15" }, + { name = "modal", marker = "extra == 'distributed'", specifier = "==1.0.5" }, { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.7.1,<2" }, { name = "neo4j", marker = "extra == 'neo4j'", specifier = ">=5.20.0,<6" }, { name = "networkx", specifier = ">=3.4.2,<4" }, @@ -1791,17 +1789,17 @@ name = "fastembed" version = "0.6.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "huggingface-hub" }, - { name = "loguru" }, - { name = "mmh3" }, + { name = "huggingface-hub", marker = "python_full_version < '3.13'" }, + { name = "loguru", marker = "python_full_version < '3.13'" }, + { name = "mmh3", marker = "python_full_version < '3.13'" }, { name = "numpy", version = "1.26.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" }, - { name = "numpy", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" }, - { name = "onnxruntime" }, - { name = "pillow" }, - { name = "py-rust-stemmers" }, - { name = "requests" }, - { name = "tokenizers" }, - { name = "tqdm" }, + { name = "numpy", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.12.*'" }, + { name = "onnxruntime", marker = "python_full_version < '3.13'" }, + { name = "pillow", marker = "python_full_version < '3.13'" }, + { name = "py-rust-stemmers", marker = "python_full_version < '3.13'" }, + { name = "requests", marker = "python_full_version < '3.13'" }, + { name = "tokenizers", marker = "python_full_version < '3.13'" }, + { name = "tqdm", marker = "python_full_version < '3.13'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/c6/f4/036a656c605f63dc25f11284f60f69900a54a19c513e1ae60d21d6977e75/fastembed-0.6.0.tar.gz", hash = "sha256:5c9ead25f23449535b07243bbe1f370b820dcc77ec2931e61674e3fe7ff24733", size = 50731, upload-time = "2025-02-26T13:50:33.031Z" } wheels = [ @@ -3794,8 +3792,8 @@ name = "loguru" version = "0.7.3" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "win32-setctime", marker = "sys_platform == 'win32'" }, + { name = "colorama", marker = "python_full_version < '3.13' and sys_platform == 'win32'" }, + { name = "win32-setctime", marker = "python_full_version < '3.13' and sys_platform == 'win32'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/3a/05/a1dae3dffd1116099471c643b8924f5aa6524411dc6c63fdae648c4f1aca/loguru-0.7.3.tar.gz", hash = "sha256:19480589e77d47b8d85b2c827ad95d49bf31b0dcde16593892eb51dd18706eb6", size = 63559, upload-time = "2024-12-06T11:20:56.608Z" } wheels = [ @@ -3842,10 +3840,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9a/55/2cb24ea48aa30c99f805921c1c7860c1f45c0e811e44ee4e6a155668de06/lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563", size = 4952289, upload-time = "2025-06-28T18:47:25.602Z" }, { url = "https://files.pythonhosted.org/packages/31/c0/b25d9528df296b9a3306ba21ff982fc5b698c45ab78b94d18c2d6ae71fd9/lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7", size = 5111310, upload-time = "2025-06-28T18:47:28.136Z" }, { url = "https://files.pythonhosted.org/packages/e9/af/681a8b3e4f668bea6e6514cbcb297beb6de2b641e70f09d3d78655f4f44c/lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7", size = 5025457, upload-time = "2025-06-26T16:26:15.068Z" }, + { url = "https://files.pythonhosted.org/packages/99/b6/3a7971aa05b7be7dfebc7ab57262ec527775c2c3c5b2f43675cac0458cad/lxml-6.0.0-cp312-cp312-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d6e200909a119626744dd81bae409fc44134389e03fbf1d68ed2a55a2fb10991", size = 5657016, upload-time = "2025-07-03T19:19:06.008Z" }, { url = "https://files.pythonhosted.org/packages/69/f8/693b1a10a891197143c0673fcce5b75fc69132afa81a36e4568c12c8faba/lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da", size = 5257565, upload-time = "2025-06-26T16:26:17.906Z" }, { url = "https://files.pythonhosted.org/packages/a8/96/e08ff98f2c6426c98c8964513c5dab8d6eb81dadcd0af6f0c538ada78d33/lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e", size = 4713390, upload-time = "2025-06-26T16:26:20.292Z" }, { url = "https://files.pythonhosted.org/packages/a8/83/6184aba6cc94d7413959f6f8f54807dc318fdcd4985c347fe3ea6937f772/lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741", size = 5066103, upload-time = "2025-06-26T16:26:22.765Z" }, { url = "https://files.pythonhosted.org/packages/ee/01/8bf1f4035852d0ff2e36a4d9aacdbcc57e93a6cd35a54e05fa984cdf73ab/lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3", size = 4791428, upload-time = "2025-06-26T16:26:26.461Z" }, + { url = "https://files.pythonhosted.org/packages/29/31/c0267d03b16954a85ed6b065116b621d37f559553d9339c7dcc4943a76f1/lxml-6.0.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c907516d49f77f6cd8ead1322198bdfd902003c3c330c77a1c5f3cc32a0e4d16", size = 5678523, upload-time = "2025-07-03T19:19:09.837Z" }, { url = "https://files.pythonhosted.org/packages/5c/f7/5495829a864bc5f8b0798d2b52a807c89966523140f3d6fa3a58ab6720ea/lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0", size = 5281290, upload-time = "2025-06-26T16:26:29.406Z" }, { url = "https://files.pythonhosted.org/packages/79/56/6b8edb79d9ed294ccc4e881f4db1023af56ba451909b9ce79f2a2cd7c532/lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a", size = 3613495, upload-time = "2025-06-26T16:26:31.588Z" }, { url = "https://files.pythonhosted.org/packages/0b/1e/cc32034b40ad6af80b6fd9b66301fc0f180f300002e5c3eb5a6110a93317/lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3", size = 4014711, upload-time = "2025-06-26T16:26:33.723Z" }, @@ -3856,10 +3856,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/46/3572761efc1bd45fcafb44a63b3b0feeb5b3f0066886821e94b0254f9253/lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81", size = 4947559, upload-time = "2025-06-28T18:47:31.091Z" }, { url = "https://files.pythonhosted.org/packages/94/8a/5e40de920e67c4f2eef9151097deb9b52d86c95762d8ee238134aff2125d/lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1", size = 5102143, upload-time = "2025-06-28T18:47:33.612Z" }, { url = "https://files.pythonhosted.org/packages/7c/4b/20555bdd75d57945bdabfbc45fdb1a36a1a0ff9eae4653e951b2b79c9209/lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24", size = 5021931, upload-time = "2025-06-26T16:26:47.503Z" }, + { url = "https://files.pythonhosted.org/packages/b6/6e/cf03b412f3763d4ca23b25e70c96a74cfece64cec3addf1c4ec639586b13/lxml-6.0.0-cp313-cp313-manylinux_2_27_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a78d6c9168f5bcb20971bf3329c2b83078611fbe1f807baadc64afc70523b3a", size = 5645469, upload-time = "2025-07-03T19:19:13.32Z" }, { url = "https://files.pythonhosted.org/packages/d4/dd/39c8507c16db6031f8c1ddf70ed95dbb0a6d466a40002a3522c128aba472/lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29", size = 5247467, upload-time = "2025-06-26T16:26:49.998Z" }, { url = "https://files.pythonhosted.org/packages/4d/56/732d49def0631ad633844cfb2664563c830173a98d5efd9b172e89a4800d/lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4", size = 4720601, upload-time = "2025-06-26T16:26:52.564Z" }, { url = "https://files.pythonhosted.org/packages/8f/7f/6b956fab95fa73462bca25d1ea7fc8274ddf68fb8e60b78d56c03b65278e/lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca", size = 5060227, upload-time = "2025-06-26T16:26:55.054Z" }, { url = "https://files.pythonhosted.org/packages/97/06/e851ac2924447e8b15a294855caf3d543424364a143c001014d22c8ca94c/lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf", size = 4790637, upload-time = "2025-06-26T16:26:57.384Z" }, + { url = "https://files.pythonhosted.org/packages/06/d4/fd216f3cd6625022c25b336c7570d11f4a43adbaf0a56106d3d496f727a7/lxml-6.0.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4d6036c3a296707357efb375cfc24bb64cd955b9ec731abf11ebb1e40063949f", size = 5662049, upload-time = "2025-07-03T19:19:16.409Z" }, { url = "https://files.pythonhosted.org/packages/52/03/0e764ce00b95e008d76b99d432f1807f3574fb2945b496a17807a1645dbd/lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef", size = 5272430, upload-time = "2025-06-26T16:27:00.031Z" }, { url = "https://files.pythonhosted.org/packages/5f/01/d48cc141bc47bc1644d20fe97bbd5e8afb30415ec94f146f2f76d0d9d098/lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181", size = 3612896, upload-time = "2025-06-26T16:27:04.251Z" }, { url = "https://files.pythonhosted.org/packages/f4/87/6456b9541d186ee7d4cb53bf1b9a0d7f3b1068532676940fdd594ac90865/lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e", size = 4013132, upload-time = "2025-06-26T16:27:06.415Z" }, @@ -4372,13 +4374,12 @@ wheels = [ [[package]] name = "modal" -version = "0.74.15" +version = "1.0.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiohttp" }, { name = "certifi" }, { name = "click" }, - { name = "fastapi" }, { name = "grpclib" }, { name = "protobuf" }, { name = "rich" }, @@ -4390,9 +4391,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "watchfiles" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/be/0e/c9fcd1e891a78eb0ea48414323c9340fbded091f8a0fdaa4284a926c26d4/modal-0.74.15.tar.gz", hash = "sha256:95512811ebd42a52fa03724f60d0d1c32259788351e798d0d695974d94b2e49c", size = 496513, upload-time = "2025-04-21T22:31:38.229Z" } +sdist = { url = "https://files.pythonhosted.org/packages/91/27/6225210eedbc469c76c5ef499d75a5eceaa2222e7622c0bed21934b8b5ec/modal-1.0.5.tar.gz", hash = "sha256:1c6927dd5eb9bd8d86d83ee499b87d64366525f1dd3e5b31d1ac1ed62090f72a", size = 540527, upload-time = "2025-06-27T23:14:37.897Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/9f/b5/1ac0408456b506a47781bdbb5330830a68233b9e6b13972bdb49ef409eb8/modal-0.74.15-py3-none-any.whl", hash = "sha256:084e898ab202ccd698fd277d9dc9e9cec8d4b0954a1c09d4ba529f0446ab3526", size = 563317, upload-time = "2025-04-21T22:31:34.97Z" }, + { url = "https://files.pythonhosted.org/packages/a0/c8/676d95080841ca2438717d9c52b02669d3221fae6eff0a67562e29030a2e/modal-1.0.5-py3-none-any.whl", hash = "sha256:e575414b7e10c5ad97ae0a77149456f021e9f2c457559bb4a5af70771f481c09", size = 626045, upload-time = "2025-06-27T23:14:35.742Z" }, ] [[package]] @@ -5324,6 +5325,8 @@ sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/d0d6dea55cd152ce3 wheels = [ { url = "https://files.pythonhosted.org/packages/4c/5d/45a3553a253ac8763f3561371432a90bdbe6000fbdcf1397ffe502aa206c/pillow-11.3.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1b9c17fd4ace828b3003dfd1e30bff24863e0eb59b535e8f80194d9cc7ecf860", size = 5316554, upload-time = "2025-07-01T09:13:39.342Z" }, { url = "https://files.pythonhosted.org/packages/7c/c8/67c12ab069ef586a25a4a79ced553586748fad100c77c0ce59bb4983ac98/pillow-11.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:65dc69160114cdd0ca0f35cb434633c75e8e7fad4cf855177a05bf38678f73ad", size = 4686548, upload-time = "2025-07-01T09:13:41.835Z" }, + { url = "https://files.pythonhosted.org/packages/2f/bd/6741ebd56263390b382ae4c5de02979af7f8bd9807346d068700dd6d5cf9/pillow-11.3.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7107195ddc914f656c7fc8e4a5e1c25f32e9236ea3ea860f257b0436011fddd0", size = 5859742, upload-time = "2025-07-03T13:09:47.439Z" }, + { url = "https://files.pythonhosted.org/packages/ca/0b/c412a9e27e1e6a829e6ab6c2dca52dd563efbedf4c9c6aa453d9a9b77359/pillow-11.3.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc3e831b563b3114baac7ec2ee86819eb03caa1a2cef0b481a5675b59c4fe23b", size = 7633087, upload-time = "2025-07-03T13:09:51.796Z" }, { url = "https://files.pythonhosted.org/packages/59/9d/9b7076aaf30f5dd17e5e5589b2d2f5a5d7e30ff67a171eb686e4eecc2adf/pillow-11.3.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1f182ebd2303acf8c380a54f615ec883322593320a9b00438eb842c1f37ae50", size = 5963350, upload-time = "2025-07-01T09:13:43.865Z" }, { url = "https://files.pythonhosted.org/packages/f0/16/1a6bf01fb622fb9cf5c91683823f073f053005c849b1f52ed613afcf8dae/pillow-11.3.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4445fa62e15936a028672fd48c4c11a66d641d2c05726c7ec1f8ba6a572036ae", size = 6631840, upload-time = "2025-07-01T09:13:46.161Z" }, { url = "https://files.pythonhosted.org/packages/7b/e6/6ff7077077eb47fde78739e7d570bdcd7c10495666b6afcd23ab56b19a43/pillow-11.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:71f511f6b3b91dd543282477be45a033e4845a40278fa8dcdbfdb07109bf18f9", size = 6074005, upload-time = "2025-07-01T09:13:47.829Z" }, @@ -5333,6 +5336,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9f/28/4f4a0203165eefb3763939c6789ba31013a2e90adffb456610f30f613850/pillow-11.3.0-cp310-cp310-win_arm64.whl", hash = "sha256:819931d25e57b513242859ce1876c58c59dc31587847bf74cfe06b2e0cb22d2f", size = 2422899, upload-time = "2025-07-01T09:13:57.497Z" }, { url = "https://files.pythonhosted.org/packages/db/26/77f8ed17ca4ffd60e1dcd220a6ec6d71210ba398cfa33a13a1cd614c5613/pillow-11.3.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1cd110edf822773368b396281a2293aeb91c90a2db00d78ea43e7e861631b722", size = 5316531, upload-time = "2025-07-01T09:13:59.203Z" }, { url = "https://files.pythonhosted.org/packages/cb/39/ee475903197ce709322a17a866892efb560f57900d9af2e55f86db51b0a5/pillow-11.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9c412fddd1b77a75aa904615ebaa6001f169b26fd467b4be93aded278266b288", size = 4686560, upload-time = "2025-07-01T09:14:01.101Z" }, + { url = "https://files.pythonhosted.org/packages/d5/90/442068a160fd179938ba55ec8c97050a612426fae5ec0a764e345839f76d/pillow-11.3.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7d1aa4de119a0ecac0a34a9c8bde33f34022e2e8f99104e47a3ca392fd60e37d", size = 5870978, upload-time = "2025-07-03T13:09:55.638Z" }, + { url = "https://files.pythonhosted.org/packages/13/92/dcdd147ab02daf405387f0218dcf792dc6dd5b14d2573d40b4caeef01059/pillow-11.3.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:91da1d88226663594e3f6b4b8c3c8d85bd504117d043740a8e0ec449087cc494", size = 7641168, upload-time = "2025-07-03T13:10:00.37Z" }, { url = "https://files.pythonhosted.org/packages/6e/db/839d6ba7fd38b51af641aa904e2960e7a5644d60ec754c046b7d2aee00e5/pillow-11.3.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:643f189248837533073c405ec2f0bb250ba54598cf80e8c1e043381a60632f58", size = 5973053, upload-time = "2025-07-01T09:14:04.491Z" }, { url = "https://files.pythonhosted.org/packages/f2/2f/d7675ecae6c43e9f12aa8d58b6012683b20b6edfbdac7abcb4e6af7a3784/pillow-11.3.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:106064daa23a745510dabce1d84f29137a37224831d88eb4ce94bb187b1d7e5f", size = 6640273, upload-time = "2025-07-01T09:14:06.235Z" }, { url = "https://files.pythonhosted.org/packages/45/ad/931694675ede172e15b2ff03c8144a0ddaea1d87adb72bb07655eaffb654/pillow-11.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cd8ff254faf15591e724dc7c4ddb6bf4793efcbe13802a4ae3e863cd300b493e", size = 6082043, upload-time = "2025-07-01T09:14:07.978Z" }, @@ -5342,6 +5347,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/df/90bd886fabd544c25addd63e5ca6932c86f2b701d5da6c7839387a076b4a/pillow-11.3.0-cp311-cp311-win_arm64.whl", hash = "sha256:30807c931ff7c095620fe04448e2c2fc673fcbb1ffe2a7da3fb39613489b1ddd", size = 2423079, upload-time = "2025-07-01T09:14:15.268Z" }, { url = "https://files.pythonhosted.org/packages/40/fe/1bc9b3ee13f68487a99ac9529968035cca2f0a51ec36892060edcc51d06a/pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4", size = 5278800, upload-time = "2025-07-01T09:14:17.648Z" }, { url = "https://files.pythonhosted.org/packages/2c/32/7e2ac19b5713657384cec55f89065fb306b06af008cfd87e572035b27119/pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69", size = 4686296, upload-time = "2025-07-01T09:14:19.828Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1e/b9e12bbe6e4c2220effebc09ea0923a07a6da1e1f1bfbc8d7d29a01ce32b/pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d", size = 5871726, upload-time = "2025-07-03T13:10:04.448Z" }, + { url = "https://files.pythonhosted.org/packages/8d/33/e9200d2bd7ba00dc3ddb78df1198a6e80d7669cce6c2bdbeb2530a74ec58/pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6", size = 7644652, upload-time = "2025-07-03T13:10:10.391Z" }, { url = "https://files.pythonhosted.org/packages/41/f1/6f2427a26fc683e00d985bc391bdd76d8dd4e92fac33d841127eb8fb2313/pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7", size = 5977787, upload-time = "2025-07-01T09:14:21.63Z" }, { url = "https://files.pythonhosted.org/packages/e4/c9/06dd4a38974e24f932ff5f98ea3c546ce3f8c995d3f0985f8e5ba48bba19/pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024", size = 6645236, upload-time = "2025-07-01T09:14:23.321Z" }, { url = "https://files.pythonhosted.org/packages/40/e7/848f69fb79843b3d91241bad658e9c14f39a32f71a301bcd1d139416d1be/pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809", size = 6086950, upload-time = "2025-07-01T09:14:25.237Z" }, @@ -5354,6 +5361,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/86/3f758a28a6e381758545f7cdb4942e1cb79abd271bea932998fc0db93cb6/pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f", size = 2227443, upload-time = "2025-07-01T09:14:39.344Z" }, { url = "https://files.pythonhosted.org/packages/01/f4/91d5b3ffa718df2f53b0dc109877993e511f4fd055d7e9508682e8aba092/pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c", size = 5278474, upload-time = "2025-07-01T09:14:41.843Z" }, { url = "https://files.pythonhosted.org/packages/f9/0e/37d7d3eca6c879fbd9dba21268427dffda1ab00d4eb05b32923d4fbe3b12/pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd", size = 4686038, upload-time = "2025-07-01T09:14:44.008Z" }, + { url = "https://files.pythonhosted.org/packages/ff/b0/3426e5c7f6565e752d81221af9d3676fdbb4f352317ceafd42899aaf5d8a/pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e", size = 5864407, upload-time = "2025-07-03T13:10:15.628Z" }, + { url = "https://files.pythonhosted.org/packages/fc/c1/c6c423134229f2a221ee53f838d4be9d82bab86f7e2f8e75e47b6bf6cd77/pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1", size = 7639094, upload-time = "2025-07-03T13:10:21.857Z" }, { url = "https://files.pythonhosted.org/packages/ba/c9/09e6746630fe6372c67c648ff9deae52a2bc20897d51fa293571977ceb5d/pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805", size = 5973503, upload-time = "2025-07-01T09:14:45.698Z" }, { url = "https://files.pythonhosted.org/packages/d5/1c/a2a29649c0b1983d3ef57ee87a66487fdeb45132df66ab30dd37f7dbe162/pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8", size = 6642574, upload-time = "2025-07-01T09:14:47.415Z" }, { url = "https://files.pythonhosted.org/packages/36/de/d5cc31cc4b055b6c6fd990e3e7f0f8aaf36229a2698501bcb0cdf67c7146/pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2", size = 6084060, upload-time = "2025-07-01T09:14:49.636Z" }, @@ -5363,6 +5372,8 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/17/d2/622f4547f69cd173955194b78e4d19ca4935a1b0f03a302d655c9f6aae65/pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580", size = 2423055, upload-time = "2025-07-01T09:14:58.072Z" }, { url = "https://files.pythonhosted.org/packages/dd/80/a8a2ac21dda2e82480852978416cfacd439a4b490a501a288ecf4fe2532d/pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e", size = 5281110, upload-time = "2025-07-01T09:14:59.79Z" }, { url = "https://files.pythonhosted.org/packages/44/d6/b79754ca790f315918732e18f82a8146d33bcd7f4494380457ea89eb883d/pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d", size = 4689547, upload-time = "2025-07-01T09:15:01.648Z" }, + { url = "https://files.pythonhosted.org/packages/49/20/716b8717d331150cb00f7fdd78169c01e8e0c219732a78b0e59b6bdb2fd6/pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced", size = 5901554, upload-time = "2025-07-03T13:10:27.018Z" }, + { url = "https://files.pythonhosted.org/packages/74/cf/a9f3a2514a65bb071075063a96f0a5cf949c2f2fce683c15ccc83b1c1cab/pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c", size = 7669132, upload-time = "2025-07-03T13:10:33.01Z" }, { url = "https://files.pythonhosted.org/packages/98/3c/da78805cbdbee9cb43efe8261dd7cc0b4b93f2ac79b676c03159e9db2187/pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8", size = 6005001, upload-time = "2025-07-01T09:15:03.365Z" }, { url = "https://files.pythonhosted.org/packages/6c/fa/ce044b91faecf30e635321351bba32bab5a7e034c60187fe9698191aef4f/pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59", size = 6668814, upload-time = "2025-07-01T09:15:05.655Z" }, { url = "https://files.pythonhosted.org/packages/7b/51/90f9291406d09bf93686434f9183aba27b831c10c87746ff49f127ee80cb/pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe", size = 6113124, upload-time = "2025-07-01T09:15:07.358Z" }, @@ -5372,11 +5383,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f3/7e/b623008460c09a0cb38263c93b828c666493caee2eb34ff67f778b87e58c/pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e", size = 2424803, upload-time = "2025-07-01T09:15:15.695Z" }, { url = "https://files.pythonhosted.org/packages/6f/8b/209bd6b62ce8367f47e68a218bffac88888fdf2c9fcf1ecadc6c3ec1ebc7/pillow-11.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:3cee80663f29e3843b68199b9d6f4f54bd1d4a6b59bdd91bceefc51238bcb967", size = 5270556, upload-time = "2025-07-01T09:16:09.961Z" }, { url = "https://files.pythonhosted.org/packages/2e/e6/231a0b76070c2cfd9e260a7a5b504fb72da0a95279410fa7afd99d9751d6/pillow-11.3.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b5f56c3f344f2ccaf0dd875d3e180f631dc60a51b314295a3e681fe8cf851fbe", size = 4654625, upload-time = "2025-07-01T09:16:11.913Z" }, + { url = "https://files.pythonhosted.org/packages/13/f4/10cf94fda33cb12765f2397fc285fa6d8eb9c29de7f3185165b702fc7386/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e67d793d180c9df62f1f40aee3accca4829d3794c95098887edc18af4b8b780c", size = 4874207, upload-time = "2025-07-03T13:11:10.201Z" }, + { url = "https://files.pythonhosted.org/packages/72/c9/583821097dc691880c92892e8e2d41fe0a5a3d6021f4963371d2f6d57250/pillow-11.3.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d000f46e2917c705e9fb93a3606ee4a819d1e3aa7a9b442f6444f07e77cf5e25", size = 6583939, upload-time = "2025-07-03T13:11:15.68Z" }, { url = "https://files.pythonhosted.org/packages/3b/8e/5c9d410f9217b12320efc7c413e72693f48468979a013ad17fd690397b9a/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:527b37216b6ac3a12d7838dc3bd75208ec57c1c6d11ef01902266a5a0c14fc27", size = 4957166, upload-time = "2025-07-01T09:16:13.74Z" }, { url = "https://files.pythonhosted.org/packages/62/bb/78347dbe13219991877ffb3a91bf09da8317fbfcd4b5f9140aeae020ad71/pillow-11.3.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:be5463ac478b623b9dd3937afd7fb7ab3d79dd290a28e2b6df292dc75063eb8a", size = 5581482, upload-time = "2025-07-01T09:16:16.107Z" }, { url = "https://files.pythonhosted.org/packages/d9/28/1000353d5e61498aaeaaf7f1e4b49ddb05f2c6575f9d4f9f914a3538b6e1/pillow-11.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:8dc70ca24c110503e16918a658b869019126ecfe03109b754c402daff12b3d9f", size = 6984596, upload-time = "2025-07-01T09:16:18.07Z" }, { url = "https://files.pythonhosted.org/packages/9e/e3/6fa84033758276fb31da12e5fb66ad747ae83b93c67af17f8c6ff4cc8f34/pillow-11.3.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7c8ec7a017ad1bd562f93dbd8505763e688d388cde6e4a010ae1486916e713e6", size = 5270566, upload-time = "2025-07-01T09:16:19.801Z" }, { url = "https://files.pythonhosted.org/packages/5b/ee/e8d2e1ab4892970b561e1ba96cbd59c0d28cf66737fc44abb2aec3795a4e/pillow-11.3.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:9ab6ae226de48019caa8074894544af5b53a117ccb9d3b3dcb2871464c829438", size = 4654618, upload-time = "2025-07-01T09:16:21.818Z" }, + { url = "https://files.pythonhosted.org/packages/f2/6d/17f80f4e1f0761f02160fc433abd4109fa1548dcfdca46cfdadaf9efa565/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fe27fb049cdcca11f11a7bfda64043c37b30e6b91f10cb5bab275806c32f6ab3", size = 4874248, upload-time = "2025-07-03T13:11:20.738Z" }, + { url = "https://files.pythonhosted.org/packages/de/5f/c22340acd61cef960130585bbe2120e2fd8434c214802f07e8c03596b17e/pillow-11.3.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:465b9e8844e3c3519a983d58b80be3f668e2a7a5db97f2784e7079fbc9f9822c", size = 6583963, upload-time = "2025-07-03T13:11:26.283Z" }, { url = "https://files.pythonhosted.org/packages/31/5e/03966aedfbfcbb4d5f8aa042452d3361f325b963ebbadddac05b122e47dd/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5418b53c0d59b3824d05e029669efa023bbef0f3e92e75ec8428f3799487f361", size = 4957170, upload-time = "2025-07-01T09:16:23.762Z" }, { url = "https://files.pythonhosted.org/packages/cc/2d/e082982aacc927fc2cab48e1e731bdb1643a1406acace8bed0900a61464e/pillow-11.3.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:504b6f59505f08ae014f724b6207ff6222662aab5cc9542577fb084ed0676ac7", size = 5581505, upload-time = "2025-07-01T09:16:25.593Z" }, { url = "https://files.pythonhosted.org/packages/34/e7/ae39f538fd6844e982063c3a5e4598b8ced43b9633baa3a85ef33af8c05c/pillow-11.3.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:c84d689db21a1c397d001aa08241044aa2069e7587b398c8cc63020390b1c1b8", size = 6984598, upload-time = "2025-07-01T09:16:27.732Z" },