fix: decrease batch size for chunks
This commit is contained in:
parent
6c42346359
commit
dda3460715
5 changed files with 8 additions and 5 deletions
|
|
@ -24,7 +24,7 @@ RUN pip install poetry
|
|||
|
||||
RUN poetry config virtualenvs.create false
|
||||
|
||||
RUN poetry install --extras neo4j --extras qdrant --no-root
|
||||
RUN poetry install --extras neo4j --extras postgres --no-root
|
||||
|
||||
COPY cognee/ /app/cognee
|
||||
COPY distributed/ /app/distributed
|
||||
|
|
|
|||
|
|
@ -133,7 +133,7 @@ async def main():
|
|||
Task(
|
||||
process_chunks_remotely,
|
||||
document=item,
|
||||
task_config={"batch_size": 50},
|
||||
task_config={"batch_size": 10},
|
||||
),
|
||||
],
|
||||
data=[item],
|
||||
|
|
@ -155,6 +155,7 @@ async def main():
|
|||
print(f"Number of documents processed: {len(results)}")
|
||||
results.extend(batch_results)
|
||||
|
||||
# Push empty tuple into the queue to signal the end of data.
|
||||
save_data_points_queue.put(())
|
||||
|
||||
for consumer_future in consumer_futures:
|
||||
|
|
|
|||
|
|
@ -35,7 +35,9 @@ async def save_data_points(data_points_and_relationships: tuple[list, list]):
|
|||
for nodes, edges in data_points_and_relationships:
|
||||
for node in nodes:
|
||||
if asizeof.asizeof(node) >= 500000:
|
||||
print(f"Node too large:\n{node.id}\n")
|
||||
try_pushing_nodes_to_queue([node])
|
||||
continue
|
||||
# print(f"Node too large:\n{node.id}\n")
|
||||
|
||||
node_batch.append(node)
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from distributed.queues import save_data_points_queue
|
|||
from cognee.infrastructure.databases.graph import get_graph_engine
|
||||
|
||||
|
||||
@app.function(image=image, timeout=7200, max_containers=100)
|
||||
@app.function(image=image, timeout=86400, max_containers=100)
|
||||
async def data_point_saver_worker():
|
||||
print("Started processing of nodes and edges; starting graph engine queue.")
|
||||
graph_engine = await get_graph_engine()
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from distributed.tasks.extract_graph_from_data import extract_graph_from_data
|
|||
from distributed.tasks.save_data_points import save_data_points
|
||||
|
||||
|
||||
@app.function(image=image, timeout=7200, max_containers=100)
|
||||
@app.function(image=image, timeout=86400, max_containers=100)
|
||||
async def graph_extraction_worker(user, document_name: str, document_chunks: list):
|
||||
cognee_config = get_cognify_config()
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue