LightRAG/tests/test_fts_cache.py
clssck 59e89772de refactor: consolidate to PostgreSQL-only backend and modernize stack
Remove legacy storage implementations and deprecated examples:
- Delete FAISS, JSON, Memgraph, Milvus, MongoDB, Nano Vector DB, Neo4j, NetworkX, Qdrant, Redis storage backends
- Remove Kubernetes deployment manifests and installation scripts
- Delete unofficial examples for deprecated backends and offline deployment docs
Streamline core infrastructure:
- Consolidate storage layer to PostgreSQL-only implementation
- Add full-text search caching with FTS cache module
- Implement metrics collection and monitoring pipeline
- Add explain and metrics API routes
Modernize frontend and tooling:
- Switch web UI to Bun with bun.lock, remove npm and pnpm lockfiles
- Update Dockerfile for PostgreSQL-only deployment
- Add Makefile for common development tasks
- Update environment and configuration examples
Enhance evaluation and testing capabilities:
- Add prompt optimization with DSPy and auto-tuning
- Implement ground truth regeneration and variant testing
- Add prompt debugging and response comparison utilities
- Expand test coverage with new integration scenarios
Simplify dependencies and configuration:
- Remove offline-specific requirement files
- Update pyproject.toml with streamlined dependencies
- Add Python version pinning with .python-version
- Create project guidelines in CLAUDE.md and AGENTS.md
2025-12-12 16:28:49 +01:00

159 lines
5.6 KiB
Python

"""
Unit tests for Full-Text Search caching.
"""
import pytest
from lightrag.cache.fts_cache import (
_compute_cache_key,
_fts_cache,
get_cached_fts_results,
get_fts_cache_stats,
invalidate_fts_cache_for_workspace,
store_fts_results,
)
class TestFTSCacheKey:
"""Test cache key computation."""
def test_cache_key_deterministic(self):
"""Same inputs produce same cache key."""
key1 = _compute_cache_key('test query', 'default', 10, 'english')
key2 = _compute_cache_key('test query', 'default', 10, 'english')
assert key1 == key2
def test_cache_key_differs_by_query(self):
"""Different queries produce different keys."""
key1 = _compute_cache_key('query A', 'default', 10, 'english')
key2 = _compute_cache_key('query B', 'default', 10, 'english')
assert key1 != key2
def test_cache_key_differs_by_workspace(self):
"""Different workspaces produce different keys."""
key1 = _compute_cache_key('test', 'workspace1', 10, 'english')
key2 = _compute_cache_key('test', 'workspace2', 10, 'english')
assert key1 != key2
def test_cache_key_differs_by_limit(self):
"""Different limits produce different keys."""
key1 = _compute_cache_key('test', 'default', 10, 'english')
key2 = _compute_cache_key('test', 'default', 20, 'english')
assert key1 != key2
def test_cache_key_differs_by_language(self):
"""Different languages produce different keys."""
key1 = _compute_cache_key('test', 'default', 10, 'english')
key2 = _compute_cache_key('test', 'default', 10, 'french')
assert key1 != key2
def test_cache_key_length(self):
"""Cache key is 16 characters (SHA256[:16])."""
key = _compute_cache_key('test', 'default', 10, 'english')
assert len(key) == 16
class TestFTSCacheOperations:
"""Test cache operations."""
@pytest.fixture(autouse=True)
def clear_cache(self):
"""Clear cache before each test."""
_fts_cache.clear()
yield
_fts_cache.clear()
@pytest.mark.asyncio
async def test_cache_miss_returns_none(self):
"""Cache miss returns None."""
result = await get_cached_fts_results('new query', 'default', 10, 'english')
assert result is None
@pytest.mark.asyncio
async def test_store_and_retrieve(self):
"""Stored results can be retrieved."""
test_results = [{'id': 'chunk1', 'content': 'test', 'score': 0.9}]
await store_fts_results('test query', 'default', 10, 'english', test_results)
cached = await get_cached_fts_results('test query', 'default', 10, 'english')
assert cached == test_results
@pytest.mark.asyncio
async def test_cache_expiration(self):
"""Expired entries are not returned."""
test_results = [{'id': 'chunk1', 'content': 'test', 'score': 0.9}]
await store_fts_results('test query', 'default', 10, 'english', test_results)
# Manually expire the entry
cache_key = _compute_cache_key('test query', 'default', 10, 'english')
_fts_cache[cache_key] = (test_results, 0) # timestamp = 0 (expired)
cached = await get_cached_fts_results('test query', 'default', 10, 'english')
assert cached is None
@pytest.mark.asyncio
async def test_invalidation_clears_cache(self):
"""Invalidation clears cache entries."""
test_results = [{'id': 'chunk1', 'content': 'test', 'score': 0.9}]
await store_fts_results('test query', 'default', 10, 'english', test_results)
assert len(_fts_cache) > 0
await invalidate_fts_cache_for_workspace('default')
assert len(_fts_cache) == 0
@pytest.mark.asyncio
async def test_empty_results_not_cached(self):
"""Empty results are not cached (by design of caller)."""
# The store function should handle empty results
# but the main function only calls store if results exist
await store_fts_results('test query', 'default', 10, 'english', [])
# Empty results are still stored (for negative caching)
cached = await get_cached_fts_results('test query', 'default', 10, 'english')
assert cached == []
class TestFTSCacheStats:
"""Test cache statistics."""
@pytest.fixture(autouse=True)
def clear_cache(self):
"""Clear cache before each test."""
_fts_cache.clear()
yield
_fts_cache.clear()
@pytest.mark.asyncio
async def test_stats_empty_cache(self):
"""Stats for empty cache."""
stats = get_fts_cache_stats()
assert stats['total_entries'] == 0
assert stats['valid_entries'] == 0
@pytest.mark.asyncio
async def test_stats_with_entries(self):
"""Stats reflect cache entries."""
await store_fts_results('q1', 'ws', 10, 'en', [{'id': '1'}])
await store_fts_results('q2', 'ws', 10, 'en', [{'id': '2'}])
stats = get_fts_cache_stats()
assert stats['total_entries'] == 2
assert stats['valid_entries'] == 2
@pytest.mark.asyncio
async def test_stats_expired_entries(self):
"""Stats correctly count expired entries."""
await store_fts_results('q1', 'ws', 10, 'en', [{'id': '1'}])
# Manually expire one entry
cache_key = _compute_cache_key('q1', 'ws', 10, 'en')
_fts_cache[cache_key] = ([{'id': '1'}], 0)
await store_fts_results('q2', 'ws', 10, 'en', [{'id': '2'}])
stats = get_fts_cache_stats()
assert stats['total_entries'] == 2
assert stats['valid_entries'] == 1 # Only q2 is valid