From c51079335eacd9daa92ed949385f8076d50daae7 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 26 Jun 2025 14:14:52 +0800 Subject: [PATCH] Optimize node label retrieval with aggregation - Enable allowDiskUse for large datasets --- lightrag/kg/mongo_impl.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index f5a87cbe..45f6f4de 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -697,7 +697,12 @@ class MongoGraphStorage(BaseGraphStorage): [id1, id2, ...] # Alphabetically sorted id list """ - cursor = self.collection.find({}, projection={"_id": 1}, sort=[("_id", 1)]) + # Use aggregation with allowDiskUse for large datasets + pipeline = [ + {"$project": {"_id": 1}}, + {"$sort": {"_id": 1}} + ] + cursor = await self.collection.aggregate(pipeline, allowDiskUse=True) labels = [] async for doc in cursor: labels.append(doc["_id"])