From c51079335eacd9daa92ed949385f8076d50daae7 Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Thu, 26 Jun 2025 14:14:52 +0800
Subject: [PATCH] Optimize node label retrieval with aggregation

- Enable allowDiskUse for large datasets
---
 lightrag/kg/mongo_impl.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py
index f5a87cbe..45f6f4de 100644
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -697,7 +697,12 @@ class MongoGraphStorage(BaseGraphStorage):
             [id1, id2, ...]  # Alphabetically sorted id list
         """
 
-        cursor = self.collection.find({}, projection={"_id": 1}, sort=[("_id", 1)])
+        # Use aggregation with allowDiskUse for large datasets
+        pipeline = [
+            {"$project": {"_id": 1}},
+            {"$sort": {"_id": 1}}
+        ]
+        cursor = await self.collection.aggregate(pipeline, allowDiskUse=True)
         labels = []
         async for doc in cursor:
             labels.append(doc["_id"])