Merge pull request #1718 from danielaskdd/fix-mongo-llm-cache

Fix LLM cache handling for MongoKVStorage to address document deletion scenarios
2025-06-29 15:17:30 +08:00 · 2025-06-29 15:17:30 +08:00 · a2d821a31c
commit a2d821a31c
parent dd12b08708 e2824b721e
2 changed files with 21 additions and 3 deletions
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@ -98,7 +98,17 @@ class MongoKVStorage(BaseKVStorage):
            self._data = None

    async def get_by_id(self, id: str) -> dict[str, Any] | None:
-        return await self._data.find_one({"_id": id})
+        if id == "default":
+            # Find all documents with _id starting with "default_"
+            cursor = self._data.find({"_id": {"$regex": "^default_"}})
+            result = {}
+            async for doc in cursor:
+                # Use the complete _id as key
+                result[doc["_id"]] = doc
+            return result if result else None
+        else:
+            # Original behavior for non-"default" ids
+            return await self._data.find_one({"_id": id})

    async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
        cursor = self._data.find({"_id": {"$in": ids}})
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@ -504,14 +504,22 @@ class PGKVStorage(BaseKVStorage):
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        """Get doc_full data by id."""
        sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
-        params = {"workspace": self.db.workspace, "id": id}
        if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
+            # For LLM cache, the id parameter actually represents the mode
+            params = {"workspace": self.db.workspace, "mode": id}
            array_res = await self.db.query(sql, params, multirows=True)
            res = {}
            for row in array_res:
-                res[row["id"]] = row
+                # Dynamically add cache_type field based on mode
+                row_with_cache_type = dict(row)
+                if id == "default":
+                    row_with_cache_type["cache_type"] = "extract"
+                else:
+                    row_with_cache_type["cache_type"] = "unknown"
+                res[row["id"]] = row_with_cache_type
            return res if res else None
        else:
+            params = {"workspace": self.db.workspace, "id": id}
            response = await self.db.query(sql, params)
            return response if response else None