Fix LLM cache handling for Redis to address document deletion scenarios.

- Implements bulk scan for "extract" cache entries
- Maintains backward compatibility for normal IDs
This commit is contained in:
yangdx 2025-06-29 15:13:42 +08:00
parent e2824b721e
commit 14cda93988

View file

@ -79,6 +79,45 @@ class RedisKVStorage(BaseKVStorage):
await self.close() await self.close()
async def get_by_id(self, id: str) -> dict[str, Any] | None: async def get_by_id(self, id: str) -> dict[str, Any] | None:
if id == "default":
# Find all cache entries with cache_type == "extract"
async with self._get_redis_connection() as redis:
try:
result = {}
pattern = f"{self.namespace}:*"
cursor = 0
while True:
cursor, keys = await redis.scan(cursor, match=pattern, count=100)
if keys:
# Batch get values for these keys
pipe = redis.pipeline()
for key in keys:
pipe.get(key)
values = await pipe.execute()
# Check each value for cache_type == "extract"
for key, value in zip(keys, values):
if value:
try:
data = json.loads(value)
if isinstance(data, dict) and data.get("cache_type") == "extract":
# Extract cache key (remove namespace prefix)
cache_key = key.replace(f"{self.namespace}:", "")
result[cache_key] = data
except json.JSONDecodeError:
continue
if cursor == 0:
break
return result if result else None
except Exception as e:
logger.error(f"Error scanning Redis for extract cache entries: {e}")
return None
else:
# Original behavior for non-"default" ids
async with self._get_redis_connection() as redis: async with self._get_redis_connection() as redis:
try: try:
data = await redis.get(f"{self.namespace}:{id}") data = await redis.get(f"{self.namespace}:{id}")