LightRAG/tests/test_tenant_kb_document_count.py
Raphaël MANSUY 3905d40b37 chore: clean PR for upstream integration
- Remove debug artifacts from git tracking:
  - .playwright-mcp/ (screenshot artifacts)
  - logs/ (development task logs)
  - test_rag_env/ (test data fixtures)
  - docs/archives/ (internal planning docs)
  - docs/diff_hku/ (merge planning docs)

- Remove unnecessary root-level scripts:
  - check_import.py, debug_env.py, simple_debug_env.py
  - debug_postgres_graph.py, write_script.py
  - test_multitenant.sh, build-and-push.sh

- Update .gitignore with proper exclusions

- Fix documentation accuracy:
  - 0001-quick-start.md: correct LightRAG initialization examples
  - 0002-architecture-overview.md: fix version and links
  - 0003-api-reference.md: fix /documents/upload route name
  - Update version to 1.4.9.2 across all docs

- Fix lint issues:
  - Add noqa for E402 in test file with sys.path manipulation
  - Remove unused imports

All 245 tests pass, ruff check clean
2025-12-05 15:28:34 +08:00

275 lines
9.7 KiB
Python

"""
Test suite for Knowledge Base document count functionality.
Tests the document count calculation for knowledge bases in multi-tenant mode.
This was a bug fix where the document count was showing 0 even when documents existed.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock
from dataclasses import dataclass
from datetime import datetime, timezone
@dataclass
class MockKBRow:
"""Mock database row for knowledge base."""
kb_id: str
tenant_id: str
name: str
description: str
created_at: datetime
updated_at: datetime
document_count: int = 0
entity_count: int = 0
relationship_count: int = 0
def __getitem__(self, key):
return getattr(self, key)
def get(self, key, default=None):
return getattr(self, key, default)
class TestKBDocumentCount:
"""Test document count is correctly computed for knowledge bases."""
@pytest.fixture
def mock_db(self):
"""Create a mock database connection."""
db = AsyncMock()
return db
@pytest.fixture
def mock_kv_storage(self, mock_db):
"""Create a mock KV storage with database."""
storage = MagicMock()
storage.db = mock_db
return storage
@pytest.mark.asyncio
async def test_list_kbs_returns_document_count(self, mock_kv_storage):
"""Test that list_knowledge_bases returns document count from database."""
from lightrag.services.tenant_service import TenantService
# Setup mock response with document counts
mock_row = {
'kb_id': 'kb-main',
'tenant_id': 'techstart',
'name': 'Main KB',
'description': 'Test KB',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': 5,
'entity_count': 100,
'relationship_count': 50,
}
mock_kv_storage.db.query = AsyncMock(return_value=[mock_row])
# Create service and call list_knowledge_bases
service = TenantService(kv_storage=mock_kv_storage)
result = await service.list_knowledge_bases(tenant_id='techstart')
# Verify document count is returned
assert len(result['items']) == 1
kb = result['items'][0]
assert kb.document_count == 5
assert kb.entity_count == 100
assert kb.relationship_count == 50
@pytest.mark.asyncio
async def test_list_kbs_with_zero_documents(self, mock_kv_storage):
"""Test that list_knowledge_bases correctly shows 0 documents when empty."""
from lightrag.services.tenant_service import TenantService
mock_row = {
'kb_id': 'kb-empty',
'tenant_id': 'techstart',
'name': 'Empty KB',
'description': 'Empty KB',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': 0,
'entity_count': 0,
'relationship_count': 0,
}
mock_kv_storage.db.query = AsyncMock(return_value=[mock_row])
service = TenantService(kv_storage=mock_kv_storage)
result = await service.list_knowledge_bases(tenant_id='techstart')
assert len(result['items']) == 1
kb = result['items'][0]
assert kb.document_count == 0
@pytest.mark.asyncio
async def test_get_kb_returns_document_count(self, mock_kv_storage):
"""Test that get_knowledge_base returns document count from database."""
from lightrag.services.tenant_service import TenantService
mock_row = {
'kb_id': 'kb-main',
'tenant_id': 'techstart',
'name': 'Main KB',
'description': 'Test KB',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': 10,
'entity_count': 200,
'relationship_count': 100,
}
mock_kv_storage.db.query = AsyncMock(return_value=mock_row)
service = TenantService(kv_storage=mock_kv_storage)
kb = await service.get_knowledge_base(tenant_id='techstart', kb_id='kb-main')
assert kb is not None
assert kb.document_count == 10
assert kb.entity_count == 200
assert kb.relationship_count == 100
@pytest.mark.asyncio
async def test_list_kbs_multiple_kbs_different_counts(self, mock_kv_storage):
"""Test document counts are correct for multiple KBs."""
from lightrag.services.tenant_service import TenantService
mock_rows = [
{
'kb_id': 'kb-main',
'tenant_id': 'techstart',
'name': 'Main KB',
'description': 'Main',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': 5,
'entity_count': 50,
'relationship_count': 25,
},
{
'kb_id': 'kb-backup',
'tenant_id': 'techstart',
'name': 'Backup KB',
'description': 'Backup',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': 10,
'entity_count': 100,
'relationship_count': 50,
},
]
mock_kv_storage.db.query = AsyncMock(return_value=mock_rows)
service = TenantService(kv_storage=mock_kv_storage)
result = await service.list_knowledge_bases(tenant_id='techstart')
assert len(result['items']) == 2
# Find KBs by ID
kb_main = next((kb for kb in result['items'] if kb.kb_id == 'kb-main'), None)
kb_backup = next((kb for kb in result['items'] if kb.kb_id == 'kb-backup'), None)
assert kb_main is not None
assert kb_main.document_count == 5
assert kb_backup is not None
assert kb_backup.document_count == 10
@pytest.mark.asyncio
async def test_db_query_failure_fallback(self, mock_kv_storage):
"""Test that when DB query fails, we fallback gracefully."""
from lightrag.services.tenant_service import TenantService
# Make db query raise an exception
mock_kv_storage.db.query = AsyncMock(side_effect=Exception("DB Error"))
mock_kv_storage.get_by_id = AsyncMock(return_value=None)
service = TenantService(kv_storage=mock_kv_storage)
result = await service.list_knowledge_bases(tenant_id='techstart')
# Should return empty list gracefully
assert result['items'] == []
assert result['total'] == 0
class TestDocumentCountIsolation:
"""Test document counts are properly isolated between tenants."""
@pytest.mark.asyncio
async def test_different_tenants_independent_counts(self):
"""Test that document counts are independent per tenant."""
from lightrag.services.tenant_service import TenantService
# Mock for tenant A
mock_storage_a = MagicMock()
mock_storage_a.db = AsyncMock()
mock_storage_a.db.query = AsyncMock(return_value=[{
'kb_id': 'kb-main',
'tenant_id': 'tenant-a',
'name': 'KB A',
'description': '',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': 100,
'entity_count': 0,
'relationship_count': 0,
}])
service_a = TenantService(kv_storage=mock_storage_a)
result_a = await service_a.list_knowledge_bases(tenant_id='tenant-a')
# Mock for tenant B
mock_storage_b = MagicMock()
mock_storage_b.db = AsyncMock()
mock_storage_b.db.query = AsyncMock(return_value=[{
'kb_id': 'kb-main',
'tenant_id': 'tenant-b',
'name': 'KB B',
'description': '',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': 50,
'entity_count': 0,
'relationship_count': 0,
}])
service_b = TenantService(kv_storage=mock_storage_b)
result_b = await service_b.list_knowledge_bases(tenant_id='tenant-b')
# Verify counts are independent
assert result_a['items'][0].document_count == 100
assert result_b['items'][0].document_count == 50
class TestNullHandling:
"""Test handling of NULL values from database."""
@pytest.mark.asyncio
async def test_null_counts_default_to_zero(self):
"""Test that NULL counts default to 0."""
from lightrag.services.tenant_service import TenantService
mock_storage = MagicMock()
mock_storage.db = AsyncMock()
mock_storage.db.query = AsyncMock(return_value=[{
'kb_id': 'kb-main',
'tenant_id': 'techstart',
'name': 'KB',
'description': '',
'created_at': datetime.now(timezone.utc),
'updated_at': datetime.now(timezone.utc),
'document_count': None, # NULL from DB
'entity_count': None,
'relationship_count': None,
}])
service = TenantService(kv_storage=mock_storage)
result = await service.list_knowledge_bases(tenant_id='techstart')
kb = result['items'][0]
assert kb.document_count == 0 # NULL should become 0
assert kb.entity_count == 0
assert kb.relationship_count == 0