Merge pull request #1718 from danielaskdd/fix-mongo-llm-cache

Fix LLM cache handling for MongoKVStorage to address document deletion scenarios
This commit is contained in:
Daniel.y 2025-06-29 15:17:30 +08:00 committed by GitHub
commit a2d821a31c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 21 additions and 3 deletions

View file

@ -98,7 +98,17 @@ class MongoKVStorage(BaseKVStorage):
self._data = None
async def get_by_id(self, id: str) -> dict[str, Any] | None:
return await self._data.find_one({"_id": id})
if id == "default":
# Find all documents with _id starting with "default_"
cursor = self._data.find({"_id": {"$regex": "^default_"}})
result = {}
async for doc in cursor:
# Use the complete _id as key
result[doc["_id"]] = doc
return result if result else None
else:
# Original behavior for non-"default" ids
return await self._data.find_one({"_id": id})
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
cursor = self._data.find({"_id": {"$in": ids}})

View file

@ -504,14 +504,22 @@ class PGKVStorage(BaseKVStorage):
async def get_by_id(self, id: str) -> dict[str, Any] | None:
"""Get doc_full data by id."""
sql = SQL_TEMPLATES["get_by_id_" + self.namespace]
params = {"workspace": self.db.workspace, "id": id}
if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
# For LLM cache, the id parameter actually represents the mode
params = {"workspace": self.db.workspace, "mode": id}
array_res = await self.db.query(sql, params, multirows=True)
res = {}
for row in array_res:
res[row["id"]] = row
# Dynamically add cache_type field based on mode
row_with_cache_type = dict(row)
if id == "default":
row_with_cache_type["cache_type"] = "extract"
else:
row_with_cache_type["cache_type"] = "unknown"
res[row["id"]] = row_with_cache_type
return res if res else None
else:
params = {"workspace": self.db.workspace, "id": id}
response = await self.db.query(sql, params)
return response if response else None