diff --git a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py index f20904ba7..c163d4dd4 100644 --- a/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py +++ b/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py @@ -1,6 +1,5 @@ import os from os import path -from cognee.shared.logging_utils import get_logger from uuid import UUID from typing import Optional from typing import AsyncGenerator, List @@ -10,6 +9,7 @@ from sqlalchemy.orm import joinedload from sqlalchemy.exc import NoResultFound from sqlalchemy.ext.asyncio import AsyncSession, create_async_engine, async_sessionmaker +from cognee.shared.logging_utils import get_logger from cognee.infrastructure.databases.exceptions import EntityNotFoundError from cognee.modules.data.models.Data import Data diff --git a/Dockerfile_modal b/distributed/Dockerfile similarity index 58% rename from Dockerfile_modal rename to distributed/Dockerfile index f8ca663a8..ff2aa0cfd 100644 --- a/Dockerfile_modal +++ b/distributed/Dockerfile @@ -18,15 +18,16 @@ RUN apt-get update && apt-get install -y \ WORKDIR /app - -ENV PYTHONPATH=/app -WORKDIR /app -COPY pyproject.toml poetry.lock /app/ - +COPY pyproject.toml poetry.lock README.md /app/ RUN pip install poetry -RUN poetry install --all-extras --no-root --without dev +RUN poetry config virtualenvs.create false + +RUN poetry install --extras neo4j --extras qdrant --no-root --without dev COPY cognee/ /app/cognee -COPY README.md /app/README.md +COPY distributed/ /app/distributed +RUN chmod +x /app/distributed/entrypoint.sh + +ENTRYPOINT ["/app/distributed/entrypoint.sh"] diff --git a/distributed/__init__.py b/distributed/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/distributed/app.py b/distributed/app.py new file mode 100644 index 000000000..fee97e7f8 --- /dev/null +++ b/distributed/app.py @@ -0,0 +1,4 @@ +from modal import App + + +app = App("cognee_modal_distributed") diff --git a/distributed/entrypoint.py b/distributed/entrypoint.py new file mode 100644 index 000000000..29c009f33 --- /dev/null +++ b/distributed/entrypoint.py @@ -0,0 +1,113 @@ +import pathlib +from os import path + +from cognee.api.v1.add import add +from cognee.api.v1.prune import prune +from cognee.infrastructure.llm.utils import get_max_chunk_tokens +from cognee.modules.chunking.TextChunker import TextChunker +from cognee.modules.chunking.models.DocumentChunk import DocumentChunk +from cognee.modules.data.processing.document_types import Document +from cognee.modules.pipelines.operations.run_tasks import run_tasks +from cognee.modules.pipelines.tasks.task import Task +from cognee.modules.users.methods.get_default_user import get_default_user +from cognee.modules.data.methods.get_dataset_data import get_dataset_data +from cognee.modules.data.methods.get_datasets_by_name import get_datasets_by_name + +from cognee.shared.logging_utils import get_logger +from cognee.tasks.documents.classify_documents import classify_documents +from cognee.tasks.documents.extract_chunks_from_documents import extract_chunks_from_documents + +from distributed.app import app +from distributed.queues import finished_jobs_queue, save_data_points_queue +from distributed.workers.data_point_saver_worker import data_point_saver_worker +from distributed.workers.graph_extraction_worker import graph_extraction_worker + +logger = get_logger() + + +@app.local_entrypoint() +async def main(): + # Clear queues + finished_jobs_queue.clear() + save_data_points_queue.clear() + + dataset_name = "main" + data_directory_name = ".data" + data_directory_path = path.join(pathlib.Path(__file__).parent, data_directory_name) + + number_of_data_saving_workers = 1 # Total number of graph_extraction_worker functions to spawn + document_batch_size = 50 # Batch size for producers + + results = [] + consumer_futures = [] + + # Delete DBs and saved files from metastore + await prune.prune_data() + await prune.prune_system(metadata=True) + + # Add files to the metastore + await add(data=data_directory_path, dataset_name=dataset_name) + + user = await get_default_user() + datasets = await get_datasets_by_name(dataset_name, user.id) + documents = await get_dataset_data(dataset_id=datasets[0].id) + + print(f"We have {len(documents)} documents in the dataset.") + + # Start data_point_saver_worker functions + for _ in range(number_of_data_saving_workers): + worker_future = data_point_saver_worker.spawn(total_number_of_workers=len(documents)) + consumer_futures.append(worker_future) + + def process_chunks_remotely(document_chunks: list[DocumentChunk], document: Document): + return graph_extraction_worker.spawn( + user=user, document_name=document.name, document_chunks=document_chunks + ) + + # Produce chunks and spawn a graph_extraction_worker job for each batch of chunks + for i in range(0, len(documents), document_batch_size): + batch = documents[i : i + document_batch_size] + + producer_futures = [] + + for item in batch: + async for run_info in run_tasks( + [ + Task(classify_documents), + Task( + extract_chunks_from_documents, + max_chunk_size=get_max_chunk_tokens(), + chunker=TextChunker, + ), + Task( + process_chunks_remotely, + document=item, + task_config={"batch_size": 50}, + ), + ], + data=[item], + user=user, + pipeline_name="chunk_processing", + ): + producer_futures.append(run_info) + + batch_results = [] + for producer_future in producer_futures: + try: + result = producer_future.get() + except Exception as e: + result = e + batch_results.append(result) + + results.extend(batch_results) + finished_jobs_queue.put(len(results)) + + for consumer_future in consumer_futures: + try: + print("Finished but waiting") + consumer_final = consumer_future.get() + print(f"We got all futures {consumer_final}") + except Exception as e: + logger.error(e) + + print(results) diff --git a/distributed/entrypoint.sh b/distributed/entrypoint.sh new file mode 100644 index 000000000..eea16d892 --- /dev/null +++ b/distributed/entrypoint.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash + +echo "$@" + +exec "$@" # Runs the command passed to the entrypoint script. diff --git a/distributed/modal_distributed.py b/distributed/modal_distributed.py deleted file mode 100644 index 214d255b9..000000000 --- a/distributed/modal_distributed.py +++ /dev/null @@ -1,282 +0,0 @@ -import os -import json -import pathlib -import asyncio -from typing import Optional -from pydantic import BaseModel -from dotenv import dotenv_values -from cognee.modules.chunking.models import DocumentChunk -from modal import App, Queue, Image - -import cognee -from cognee.shared.logging_utils import get_logger -from cognee.shared.data_models import KnowledgeGraph -from cognee.modules.pipelines.operations import run_tasks -from cognee.modules.users.methods import get_default_user -from cognee.infrastructure.llm import get_max_chunk_tokens -from cognee.modules.chunking.TextChunker import TextChunker -from cognee.modules.data.methods import get_datasets_by_name -from cognee.modules.cognify.config import get_cognify_config -from cognee.infrastructure.databases.graph import get_graph_engine -from cognee.modules.data.methods.get_dataset_data import get_dataset_data -from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver -from cognee.modules.graph.utils import deduplicate_nodes_and_edges, get_graph_from_model -from cognee.modules.pipelines.tasks import Task - - -# Global tasks -from cognee.tasks.documents import ( - classify_documents, - extract_chunks_from_documents, -) -from cognee.tasks.storage.index_data_points import index_data_points - -# Local tasks -from .tasks.extract_graph_from_data import extract_graph_from_data -from .tasks.summarize_text import summarize_text - - -# ------------------------------------------------------------------------------ -# App and Queue Initialization -# ------------------------------------------------------------------------------ - -# Initialize the Modal application -app = App("cognee_modal_distributed") -logger = get_logger("cognee_modal_distributed") - -local_env_vars = dict(dotenv_values(".env")) -logger.info("Modal deployment started with the following environmental variables:") -logger.info(json.dumps(local_env_vars, indent=4)) - -image = ( - Image.from_dockerfile(path="Dockerfile_modal", force_build=False) - .add_local_file("pyproject.toml", remote_path="/root/pyproject.toml", copy=True) - .add_local_file("poetry.lock", remote_path="/root/poetry.lock", copy=True) - .env(local_env_vars) - .poetry_install_from_file(poetry_pyproject_toml="pyproject.toml") - # .pip_install("protobuf", "h2", "neo4j", "asyncpg", "pgvector") - .add_local_python_source("../cognee") -) - - -# Create (or get) two queues: -# - graph_nodes_and_edges: Stores messages produced by the producer functions. -# - finished_producers: Keeps track of the number of finished producer jobs. -graph_nodes_and_edges = Queue.from_name("graph_nodes_and_edges", create_if_missing=True) - -finished_producers = Queue.from_name("finished_producers", create_if_missing=True) - -# ------------------------------------------------------------------------------ -# Cognee pipeline steps -# ------------------------------------------------------------------------------ - - -def add_data_to_save_queue(document_chunks: list[DocumentChunk]): - future = producer.spawn(file_name=document_name, chunk_list=event.result) - futures.append(future) - -# Preprocessing steps. This gets called in the entrypoint -async def get_preprocessing_steps(chunker=TextChunker) -> list[Task]: - preprocessing_tasks = [ - Task(classify_documents), - Task( # Extract text chunks based on the document type. - extract_chunks_from_documents, - max_chunk_size=None or get_max_chunk_tokens(), - chunker=chunker, - ), - Task( - add_data_to_save_queue, - task_config={"batch_size": 50}, - ), - ] - - return preprocessing_tasks - - -# This is the last step of the pipeline that gets executed on modal executors (functions) -async def save_data_points(data_points: list = None, data_point_connections: list = None): - data_point_connections = data_point_connections or [] - - nodes = [] - edges = [] - - added_nodes = {} - added_edges = {} - visited_properties = {} - - results = await asyncio.gather( - *[ - get_graph_from_model( - data_point, - added_nodes=added_nodes, - added_edges=added_edges, - visited_properties=visited_properties, - ) - for data_point in data_points - ] - ) - - for result_nodes, result_edges in results: - nodes.extend(result_nodes) - edges.extend(result_edges) - - nodes, edges = deduplicate_nodes_and_edges(nodes, edges) - - await index_data_points(nodes) - - graph_nodes_and_edges.put((nodes, edges + data_point_connections)) - - -# This is the pipeline for the modal executors -async def get_graph_tasks( - graph_model: BaseModel = KnowledgeGraph, - ontology_file_path: Optional[str] = None, -) -> list[Task]: - cognee_config = get_cognify_config() - - ontology_adapter = OntologyResolver(ontology_file=ontology_file_path) - - step_two_tasks = [ - Task( - extract_graph_from_data, - graph_model=graph_model, - ontology_adapter=ontology_adapter, - ), - Task( - summarize_text, - summarization_model=cognee_config.summarization_model, - ), - Task(save_data_points), - ] - - return step_two_tasks - - -# ------------------------------------------------------------------------------ -# Producer Function -# ------------------------------------------------------------------------------ - - -@app.function(image=image, timeout=86400, max_containers=100) -async def producer(file_name: str, chunk_list: list): - modal_tasks = await get_graph_tasks() - async for _ in run_tasks( - modal_tasks, data=chunk_list, pipeline_name=f"modal_execution_file_{file_name}" - ): - pass - - print(f"File execution finished: {file_name}") - - return file_name - - -# ------------------------------------------------------------------------------ -# Consumer Function -# ------------------------------------------------------------------------------ - - -@app.function(image=image, timeout=86400, max_containers=100) -async def consumer(number_of_files: int): - graph_engine = await get_graph_engine() - - while True: - if graph_nodes_and_edges.len() != 0: - nodes_and_edges = graph_nodes_and_edges.get(block=False) - if nodes_and_edges is not None: - if nodes_and_edges[0] is not None: - await graph_engine.add_nodes(nodes_and_edges[0]) - if nodes_and_edges[1] is not None: - await graph_engine.add_edges(nodes_and_edges[1]) - else: - print(f"Nodes and edges are: {nodes_and_edges}") - else: - await asyncio.sleep(5) - - number_of_finished_jobs = finished_producers.get(block=False) - - if number_of_finished_jobs == number_of_files: - # We put it back for the other consumers to see that we finished - finished_producers.put(number_of_finished_jobs) - - print("Finished processing all nodes and edges; stopping graph engine queue.") - return True - - -# ------------------------------------------------------------------------------ -# Entrypoint -# ------------------------------------------------------------------------------ - - -@app.local_entrypoint() -async def main(): - # Clear queues - graph_nodes_and_edges.clear() - finished_producers.clear() - - dataset_name = "main" - data_directory_name = ".data" - data_directory_path = os.path.join(pathlib.Path(__file__).parent, data_directory_name) - - number_of_consumers = 1 # Total number of consumer functions to spawn - batch_size = 50 # Batch size for producers - - results = [] - consumer_futures = [] - - # Delete DBs and saved files from metastore - await cognee.prune.prune_data() - await cognee.prune.prune_system(metadata=True) - - # Add files to the metastore - await cognee.add(data=data_directory_path, dataset_name=dataset_name) - - user = await get_default_user() - datasets = await get_datasets_by_name(dataset_name, user.id) - documents = await get_dataset_data(dataset_id=datasets[0].id) - - print(f"We have {len(documents)} documents in the dataset.") - - preprocessing_tasks = await get_preprocessing_steps(user) - - # Start consumer functions - for _ in range(number_of_consumers): - consumer_future = consumer.spawn(number_of_files=len(documents)) - consumer_futures.append(consumer_future) - - # Process producer jobs in batches - for i in range(0, len(documents), batch_size): - batch = documents[i : i + batch_size] - futures = [] - for item in batch: - document_name = item.name - async for event in run_tasks( - preprocessing_tasks, data=[item], pipeline_name="preprocessing_steps" - ): - if ( - isinstance(event, TaskExecutionCompleted) - and event.task is extract_chunks_from_documents - ): - future = producer.spawn(file_name=document_name, chunk_list=event.result) - futures.append(future) - - batch_results = [] - for future in futures: - try: - result = future.get() - except Exception as e: - result = e - batch_results.append(result) - - results.extend(batch_results) - finished_producers.put(len(results)) - - for consumer_future in consumer_futures: - try: - print("Finished but waiting") - consumer_final = consumer_future.get() - print(f"We got all futures{consumer_final}") - except Exception as e: - print(e) - pass - - print(results) diff --git a/distributed/modal_image.py b/distributed/modal_image.py new file mode 100644 index 000000000..0176e9f64 --- /dev/null +++ b/distributed/modal_image.py @@ -0,0 +1,24 @@ +import json +import pathlib +from os import path +from modal import Image +from logging import getLogger +from dotenv import dotenv_values + +logger = getLogger("modal_image_creation") + +local_env_vars = dict(dotenv_values(".env")) + +logger.debug("Modal deployment started with the following environmental variables:") +logger.debug(json.dumps(local_env_vars, indent=4)) + +image = ( + Image.from_dockerfile( + path=pathlib.Path(path.join(path.dirname(__file__), "Dockerfile")).resolve(), + force_build=False, + ).env(local_env_vars) + # .pip_install_from_pyproject(pyproject_toml=pathlib.Path(path.join(path.dirname(__file__), "../pyproject.toml")).resolve()) + # .poetry_install_from_file(poetry_pyproject_toml=pathlib.Path(path.join(path.dirname(__file__), "../pyproject.toml")).resolve()) + # .add_local_dir(pathlib.Path("./venv/bin").resolve(), remote_path="/app/.venv") + # .add_local_python_source(pathlib.Path("./cognee").resolve()) +) diff --git a/distributed/queues.py b/distributed/queues.py new file mode 100644 index 000000000..98f2e7b66 --- /dev/null +++ b/distributed/queues.py @@ -0,0 +1,10 @@ +from modal import Queue + + +# Create (or get) two queues: +# - save_data_points_queue: Stores messages produced by the producer functions. +# - finished_jobs_queue: Keeps track of the number of finished producer jobs. + +save_data_points_queue = Queue.from_name("save_data_points_queue", create_if_missing=True) + +finished_jobs_queue = Queue.from_name("finished_jobs_queue", create_if_missing=True) diff --git a/distributed/tasks/__init__.py b/distributed/tasks/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/distributed/tasks/extract_graph_from_data.py b/distributed/tasks/extract_graph_from_data.py index eab51a726..4f39dbd56 100644 --- a/distributed/tasks/extract_graph_from_data.py +++ b/distributed/tasks/extract_graph_from_data.py @@ -8,7 +8,6 @@ from cognee.modules.graph.utils import ( retrieve_existing_edges, ) from cognee.shared.data_models import KnowledgeGraph -from cognee.infrastructure.databases.graph import get_graph_engine from cognee.modules.chunking.models.DocumentChunk import DocumentChunk from cognee.modules.ontology.rdf_xml.OntologyResolver import OntologyResolver from cognee.modules.data.extraction.knowledge_graph import extract_content_graph @@ -23,7 +22,6 @@ async def extract_graph_from_data( chunk_graphs = await asyncio.gather( *[extract_content_graph(chunk.text, graph_model) for chunk in data_chunks] ) - graph_engine = await get_graph_engine() if graph_model is not KnowledgeGraph: for chunk_index, chunk_graph in enumerate(chunk_graphs): @@ -34,7 +32,6 @@ async def extract_graph_from_data( existing_edges_map = await retrieve_existing_edges( data_chunks, chunk_graphs, - graph_engine, ) graph_nodes, graph_edges = expand_with_nodes_and_edges( diff --git a/distributed/tasks/save_data_points.py b/distributed/tasks/save_data_points.py new file mode 100644 index 000000000..794cb36e1 --- /dev/null +++ b/distributed/tasks/save_data_points.py @@ -0,0 +1,40 @@ +import asyncio + +from cognee.modules.graph.utils import deduplicate_nodes_and_edges, get_graph_from_model +from distributed.queues import save_data_points_queue + + +async def save_data_points( + data_points_and_relationships: tuple[list, list] +): + data_points = data_points_and_relationships[0] + data_point_connections = data_points_and_relationships[1] + + nodes = [] + edges = [] + + added_nodes = {} + added_edges = {} + visited_properties = {} + + results = await asyncio.gather( + *[ + get_graph_from_model( + data_point, + added_nodes=added_nodes, + added_edges=added_edges, + visited_properties=visited_properties, + ) + for data_point in data_points + ] + ) + + for result_nodes, result_edges in results: + nodes.extend(result_nodes) + edges.extend(result_edges) + + nodes, edges = deduplicate_nodes_and_edges(nodes, edges + data_point_connections) + + # await index_data_points(nodes) + + save_data_points_queue.put((nodes, edges)) diff --git a/distributed/tasks/summarize_text.py b/distributed/tasks/summarize_text.py index d48fc484a..fde4b19b1 100644 --- a/distributed/tasks/summarize_text.py +++ b/distributed/tasks/summarize_text.py @@ -7,7 +7,12 @@ from cognee.modules.data.extraction.extract_summary import extract_summary from cognee.modules.chunking.models.DocumentChunk import DocumentChunk -async def summarize_text(data_chunks: list[DocumentChunk], edges: list, summarization_model: Type[BaseModel]): +async def summarize_text( + data_points_and_relationships: tuple[list[DocumentChunk], list], summarization_model: Type[BaseModel] +): + data_chunks = data_points_and_relationships[0] + edges = data_points_and_relationships[1] + if len(data_chunks) == 0: return data_chunks diff --git a/distributed/test.py b/distributed/test.py new file mode 100644 index 000000000..d2b35a285 --- /dev/null +++ b/distributed/test.py @@ -0,0 +1,28 @@ +from modal import App + +app = App("cognee_distributed_test") + + +@app.function() +def sum_distributed(numbers: list): + result = sum(numbers) + + return result + + +@app.local_entrypoint() +def main(): + sum = 0 + numbers = range(100) + batch_size = 10 + + local_sum = sum_distributed.local(numbers=numbers) + + print(f"Local sum: {local_sum}") + + batches = [list(numbers[i : i + batch_size]) for i in range(0, len(numbers), batch_size)] + + for result in sum_distributed.map(batches): + sum += result + + print(f"Distributed sum: {sum}") diff --git a/distributed/workers/data_point_saver_worker.py b/distributed/workers/data_point_saver_worker.py new file mode 100644 index 000000000..1318dfe8d --- /dev/null +++ b/distributed/workers/data_point_saver_worker.py @@ -0,0 +1,32 @@ +import asyncio + +from cognee.infrastructure.databases.graph.get_graph_engine import get_graph_engine + +from distributed.app import app +from distributed.modal_image import image +from distributed.queues import finished_jobs_queue, save_data_points_queue + + +@app.function(image=image, timeout=86400, max_containers=100) +async def data_point_saver_worker(total_number_of_workers: int): + graph_engine = await get_graph_engine() + + while True: + if save_data_points_queue.len() != 0: + nodes_and_edges = save_data_points_queue.get(block=False) + if nodes_and_edges and len(nodes_and_edges) == 2: + await graph_engine.add_nodes(nodes_and_edges[0]) + await graph_engine.add_edges(nodes_and_edges[1]) + else: + print(f"Nodes and edges are: {nodes_and_edges}") + else: + await asyncio.sleep(5) + + number_of_finished_jobs = finished_jobs_queue.get(block=False) + + if number_of_finished_jobs == total_number_of_workers: + # We put it back for the other consumers to see that we finished + finished_jobs_queue.put(number_of_finished_jobs) + + print("Finished processing all nodes and edges; stopping graph engine queue.") + return True diff --git a/distributed/workers/graph_extraction_worker.py b/distributed/workers/graph_extraction_worker.py new file mode 100644 index 000000000..698521405 --- /dev/null +++ b/distributed/workers/graph_extraction_worker.py @@ -0,0 +1,42 @@ +# ------------------------------------------------------------------------------ +# Producer function that produces data points from documents and pushes them into the queue. +# ------------------------------------------------------------------------------ +from cognee.modules.cognify.config import get_cognify_config +from cognee.modules.pipelines.operations.run_tasks import run_tasks +from cognee.modules.pipelines.tasks.task import Task +from cognee.shared.data_models import KnowledgeGraph + +from distributed.app import app +from distributed.modal_image import image +from distributed.tasks.summarize_text import summarize_text +from distributed.tasks.extract_graph_from_data import extract_graph_from_data +from distributed.tasks.save_data_points import save_data_points + + +@app.function(image=image, timeout=86400, max_containers=100) +async def graph_extraction_worker(user, document_name: str, document_chunks: list): + cognee_config = get_cognify_config() + + tasks = [ + Task( + extract_graph_from_data, + graph_model=KnowledgeGraph, + ), + Task( + summarize_text, + summarization_model=cognee_config.summarization_model, + ), + Task(save_data_points), + ] + + async for _ in run_tasks( + tasks, + data=document_chunks, + pipeline_name=f"modal_execution_file_{document_name}", + user=user, + ): + pass + + print(f"File execution finished: {document_name}") + + return document_name diff --git a/poetry.lock b/poetry.lock index 65c4818c8..01be5e0ca 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3075,6 +3075,24 @@ grpcio = ">=1.67.1" protobuf = ">=5.26.1,<6.0dev" setuptools = "*" +[[package]] +name = "grpclib" +version = "0.4.7" +description = "Pure-Python gRPC implementation for asyncio" +optional = false +python-versions = ">=3.7" +groups = ["main"] +files = [ + {file = "grpclib-0.4.7.tar.gz", hash = "sha256:2988ef57c02b22b7a2e8e961792c41ccf97efc2ace91ae7a5b0de03c363823c3"}, +] + +[package.dependencies] +h2 = ">=3.1.0,<5" +multidict = "*" + +[package.extras] +protobuf = ["protobuf (>=3.20.0)"] + [[package]] name = "gunicorn" version = "20.1.0" @@ -3113,10 +3131,9 @@ files = [ name = "h2" version = "4.2.0" description = "Pure-Python HTTP/2 protocol implementation" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"qdrant\"" files = [ {file = "h2-4.2.0-py3-none-any.whl", hash = "sha256:479a53ad425bb29af087f3458a61d30780bc818e4ebcf01f0b536ba916462ed0"}, {file = "h2-4.2.0.tar.gz", hash = "sha256:c8a52129695e88b1a0578d8d2cc6842bbd79128ac685463b887ee278126ad01f"}, @@ -3147,10 +3164,9 @@ test = ["eth_utils (>=2.0.0)", "hypothesis (>=3.44.24,<=6.31.6)", "pytest (>=7.0 name = "hpack" version = "4.1.0" description = "Pure-Python HPACK header encoding" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"qdrant\"" files = [ {file = "hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496"}, {file = "hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca"}, @@ -3395,10 +3411,9 @@ tests = ["freezegun", "pytest", "pytest-cov"] name = "hyperframe" version = "6.1.0" description = "Pure-Python HTTP/2 framing" -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"qdrant\"" files = [ {file = "hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5"}, {file = "hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08"}, @@ -5811,6 +5826,34 @@ plot = ["matplotlib (==3.10.0)", "pandas (==2.2.3)"] test = ["pytest (==8.3.4)", "pytest-sugar (==1.0.0)"] type = ["mypy (==1.14.1)"] +[[package]] +name = "modal" +version = "0.74.15" +description = "Python client library for Modal" +optional = false +python-versions = ">=3.9" +groups = ["main"] +files = [ + {file = "modal-0.74.15-py3-none-any.whl", hash = "sha256:084e898ab202ccd698fd277d9dc9e9cec8d4b0954a1c09d4ba529f0446ab3526"}, + {file = "modal-0.74.15.tar.gz", hash = "sha256:95512811ebd42a52fa03724f60d0d1c32259788351e798d0d695974d94b2e49c"}, +] + +[package.dependencies] +aiohttp = "*" +certifi = "*" +click = ">=8.1.0" +fastapi = "*" +grpclib = "0.4.7" +protobuf = ">=3.19,<4.24.0 || >4.24.0,<6.0" +rich = ">=12.0.0" +synchronicity = ">=0.9.10,<0.10.0" +toml = "*" +typer = ">=0.9" +types-certifi = "*" +types-toml = "*" +typing_extensions = ">=4.6,<5.0" +watchfiles = "*" + [[package]] name = "monotonic" version = "1.6" @@ -7524,10 +7567,9 @@ testing = ["google-api-core (>=1.31.5)"] name = "protobuf" version = "5.29.4" description = "" -optional = true +optional = false python-versions = ">=3.8" groups = ["main"] -markers = "python_version == \"3.10\" and extra == \"codegraph\" or (extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"gemini\" or extra == \"milvus\") and python_version < \"3.11\" or (python_version == \"3.12\" or extra == \"gemini\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"chromadb\" or extra == \"deepeval\" or extra == \"milvus\") and (extra == \"codegraph\" or extra == \"gemini\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"milvus\") and python_version >= \"3.12\" or python_version == \"3.11\" and (extra == \"codegraph\" or extra == \"gemini\" or extra == \"chromadb\" or extra == \"qdrant\" or extra == \"weaviate\" or extra == \"deepeval\" or extra == \"milvus\")" files = [ {file = "protobuf-5.29.4-cp310-abi3-win32.whl", hash = "sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7"}, {file = "protobuf-5.29.4-cp310-abi3-win_amd64.whl", hash = "sha256:bcefcdf3976233f8a502d265eb65ea740c989bacc6c30a58290ed0e519eb4b8d"}, @@ -9832,6 +9874,25 @@ files = [ {file = "shiboken6-6.9.0-cp39-abi3-win_arm64.whl", hash = "sha256:24f53857458881b54798d7e35704611d07f6b6885bcdf80f13a4c8bb485b8df2"}, ] +[[package]] +name = "sigtools" +version = "4.0.1" +description = "Utilities for working with inspect.Signature objects." +optional = false +python-versions = ">=3.6" +groups = ["main"] +files = [ + {file = "sigtools-4.0.1-py2.py3-none-any.whl", hash = "sha256:d216b4cf920bbab0fce636ddc429ed8463a5b533d9e1492acb45a2a1bc36ac6c"}, + {file = "sigtools-4.0.1.tar.gz", hash = "sha256:4b8e135a9cd4d2ea00da670c093372d74e672ba3abb87f4c98d8e73dea54445c"}, +] + +[package.dependencies] +attrs = "*" + +[package.extras] +test = ["coverage", "mock", "repeated-test (>=2.2.1)", "sphinx"] +tests = ["coverage", "mock", "repeated-test (>=2.2.1)", "sphinx"] + [[package]] name = "simplejson" version = "3.20.1" @@ -10215,6 +10276,22 @@ mpmath = ">=1.1.0,<1.4" [package.extras] dev = ["hypothesis (>=6.70.0)", "pytest (>=7.1.0)"] +[[package]] +name = "synchronicity" +version = "0.9.11" +description = "Export blocking and async library versions from a single async implementation" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "synchronicity-0.9.11-py3-none-any.whl", hash = "sha256:231129654d2f56b1aa148e85ebd8545231be135771f6d2196d414175b1594ef6"}, + {file = "synchronicity-0.9.11.tar.gz", hash = "sha256:cb5dbbcb43d637e516ae50db05a776da51a705d1e1a9c0e301f6049afc3c2cae"}, +] + +[package.dependencies] +sigtools = ">=4.0.1" +typing-extensions = ">=4.12.2" + [[package]] name = "tabulate" version = "0.9.0" @@ -10381,6 +10458,18 @@ dev = ["tokenizers[testing]"] docs = ["setuptools-rust", "sphinx", "sphinx-rtd-theme"] testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"] +[[package]] +name = "toml" +version = "0.10.2" +description = "Python Library for Tom's Obvious, Minimal Language" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +groups = ["main"] +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + [[package]] name = "tomli" version = "2.2.1" @@ -10701,6 +10790,18 @@ rich = ">=10.11.0" shellingham = ">=1.3.0" typing-extensions = ">=3.7.4.3" +[[package]] +name = "types-certifi" +version = "2021.10.8.3" +description = "Typing stubs for certifi" +optional = false +python-versions = "*" +groups = ["main"] +files = [ + {file = "types-certifi-2021.10.8.3.tar.gz", hash = "sha256:72cf7798d165bc0b76e1c10dd1ea3097c7063c42c21d664523b928e88b554a4f"}, + {file = "types_certifi-2021.10.8.3-py3-none-any.whl", hash = "sha256:b2d1e325e69f71f7c78e5943d410e650b4707bb0ef32e4ddf3da37f54176e88a"}, +] + [[package]] name = "types-python-dateutil" version = "2.9.0.20241206" @@ -10728,6 +10829,18 @@ files = [ [package.dependencies] setuptools = "*" +[[package]] +name = "types-toml" +version = "0.10.8.20240310" +description = "Typing stubs for toml" +optional = false +python-versions = ">=3.8" +groups = ["main"] +files = [ + {file = "types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331"}, + {file = "types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -11172,10 +11285,9 @@ watchmedo = ["PyYAML (>=3.10)"] name = "watchfiles" version = "1.0.5" description = "Simple, modern and high performance file watching and code reload in python." -optional = true +optional = false python-versions = ">=3.9" groups = ["main"] -markers = "extra == \"chromadb\"" files = [ {file = "watchfiles-1.0.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:5c40fe7dd9e5f81e0847b1ea64e1f5dd79dd61afbedb57759df06767ac719b40"}, {file = "watchfiles-1.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8c0db396e6003d99bb2d7232c957b5f0b5634bbd1b24e381a5afcc880f7373fb"}, @@ -11737,4 +11849,4 @@ weaviate = ["weaviate-client"] [metadata] lock-version = "2.1" python-versions = ">=3.10,<=3.13" -content-hash = "15fe7b2b02efa88fb8070dcde58f32cdc1577df7966e4cd438cbb4f197935e1f" +content-hash = "795d545668c7ef4e056cc33ca5ee5b39ca6f61de4a42eb6f514270358e85e507" diff --git a/pyproject.toml b/pyproject.toml index 5bfde2dc8..0daa5de67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -88,6 +88,7 @@ pyside6 = {version = "^6.8.3", optional = true} google-generativeai = {version = "^0.8.4", optional = true} notebook = {version = "^7.1.0", optional = true} s3fs = "^2025.3.2" +modal = "^0.74.15" [tool.poetry.extras]