fix: track producer features and stop the app when done

This commit is contained in:
Boris Arzentar 2025-04-23 13:13:29 +02:00
parent 631f816323
commit bf9678065c

View file

@ -59,19 +59,21 @@ async def main():
worker_future = data_point_saver_worker.spawn(total_number_of_workers=len(documents)) worker_future = data_point_saver_worker.spawn(total_number_of_workers=len(documents))
consumer_futures.append(worker_future) consumer_futures.append(worker_future)
producer_futures = []
def process_chunks_remotely(document_chunks: list[DocumentChunk], document: Document): def process_chunks_remotely(document_chunks: list[DocumentChunk], document: Document):
return graph_extraction_worker.spawn( producer_future = graph_extraction_worker.spawn(
user=user, document_name=document.name, document_chunks=document_chunks user=user, document_name=document.name, document_chunks=document_chunks
) )
producer_futures.append(producer_future)
return producer_future
# Produce chunks and spawn a graph_extraction_worker job for each batch of chunks # Produce chunks and spawn a graph_extraction_worker job for each batch of chunks
for i in range(0, len(documents), document_batch_size): for i in range(0, len(documents), document_batch_size):
batch = documents[i : i + document_batch_size] batch = documents[i : i + document_batch_size]
producer_futures = []
for item in batch: for item in batch:
async for run_info in run_tasks( async for worker_feature in run_tasks(
[ [
Task(classify_documents), Task(classify_documents),
Task( Task(
@ -89,7 +91,7 @@ async def main():
user=user, user=user,
pipeline_name="chunk_processing", pipeline_name="chunk_processing",
): ):
producer_futures.append(run_info) pass
batch_results = [] batch_results = []
for producer_future in producer_futures: for producer_future in producer_futures: