Fix LLM cache handling for Redis to address document deletion scenarios.

- Implements bulk scan for "extract" cache entries - Maintains backward compatibility for normal IDs
2025-06-29 15:13:42 +08:00 · 2025-06-29 15:13:42 +08:00 · 14cda93988
commit 14cda93988
parent e2824b721e
1 changed files with 46 additions and 7 deletions
--- a/lightrag/kg/redis_impl.py
+++ b/lightrag/kg/redis_impl.py
@ -79,6 +79,45 @@ class RedisKVStorage(BaseKVStorage):
        await self.close()
    async def get_by_id(self, id: str) -> dict[str, Any] | None:
        if id == "default":
            # Find all cache entries with cache_type == "extract"
            async with self._get_redis_connection() as redis:
                try:
                    result = {}
                    pattern = f"{self.namespace}:*"
                    cursor = 0
                    while True:
                        cursor, keys = await redis.scan(cursor, match=pattern, count=100)
                        if keys:
                            # Batch get values for these keys
                            pipe = redis.pipeline()
                            for key in keys:
                                pipe.get(key)
                            values = await pipe.execute()
                            # Check each value for cache_type == "extract"
                            for key, value in zip(keys, values):
                                if value:
                                    try:
                                        data = json.loads(value)
                                        if isinstance(data, dict) and data.get("cache_type") == "extract":
                                            # Extract cache key (remove namespace prefix)
                                            cache_key = key.replace(f"{self.namespace}:", "")
                                            result[cache_key] = data
                                    except json.JSONDecodeError:
                                        continue
                        if cursor == 0:
                            break
                    return result if result else None
                except Exception as e:
                    logger.error(f"Error scanning Redis for extract cache entries: {e}")
                    return None
        else:
            # Original behavior for non-"default" ids
            async with self._get_redis_connection() as redis:
                try:
                    data = await redis.get(f"{self.namespace}:{id}")