LightRAG/tests/test_empty_model_suffix.py

"""
Tests for handling empty model suffix in PostgreSQL and Qdrant storage.

This test module verifies that both storage backends gracefully handle
the case when _generate_collection_suffix() returns an empty string.
"""

import pytest
from unittest.mock import Mock, patch, MagicMock
from lightrag.base import BaseVectorStorage
from lightrag.utils import EmbeddingFunc


def dummy_embedding_func(*args, **kwargs):
    """Dummy embedding function for testing."""
    pass


class TestEmptyModelSuffix:
    """Test suite for handling empty model suffix scenarios."""

    def test_postgres_table_name_with_empty_suffix(self):
        """
        Test PostgreSQL table name generation when model_suffix is empty.

        Bug Fix Verification:
        - Before: table_name = "LIGHTRAG_VDB_CHUNKS_" (trailing underscore)
        - After: table_name = "LIGHTRAG_VDB_CHUNKS" (fallback to base name)
        """
        from lightrag.kg.postgres_impl import PostgresVectorDBStorage
        from lightrag.kg.shared_storage import namespace_to_table_name

        # Create a mock embedding function without get_model_identifier
        mock_embedding_func = Mock(spec=["embedding_dim"])
        mock_embedding_func.embedding_dim = 1536

        # Setup global_config without embedding_func
        global_config = {
            "embedding_batch_num": 100,
            "pgvector_precision": "hybrid",
            "pg_host": "localhost",
            "pg_port": 5432,
            "pg_user": "user",
            "pg_password": "password",
            "pg_database": "lightrag",
        }

        # Create PostgreSQL storage instance
        storage = PostgresVectorDBStorage(
            namespace="chunks",
            workspace="test",
            global_config=global_config,
            embedding_func=mock_embedding_func,
        )

        # Verify that:
        # 1. model_suffix is empty
        # 2. table_name doesn't have trailing underscore
        # 3. table_name equals the base table name
        assert storage.model_suffix == "", "model_suffix should be empty"
        assert (
            not storage.table_name.endswith("_"),
            f"table_name should not have trailing underscore: {storage.table_name}",
        )

        # Expected base table name
        expected_base = namespace_to_table_name("chunks")
        assert storage.table_name == expected_base, (
            f"table_name should fallback to base name when model_suffix is empty. "
            f"Expected: {expected_base}, Got: {storage.table_name}"
        )

    def test_qdrant_collection_name_with_empty_suffix(self):
        """
        Test Qdrant collection name generation when model_suffix is empty.

        Bug Fix Verification:
        - Before: final_namespace = "lightrag_vdb_chunks_" (trailing underscore)
        - After: final_namespace = "lightrag_vdb_chunks" (fallback to legacy name)
        """
        from lightrag.kg.qdrant_impl import QdrantVectorDBStorage

        # Create a mock embedding function without get_model_identifier
        mock_embedding_func = Mock(spec=["embedding_dim"])
        mock_embedding_func.embedding_dim = 1536

        # Setup global_config without embedding_func
        global_config = {
            "embedding_batch_num": 100,
            "qdrant_url": "http://localhost:6333",
        }

        # Create Qdrant storage instance
        storage = QdrantVectorDBStorage(
            namespace="chunks",
            workspace="test",
            global_config=global_config,
            embedding_func=mock_embedding_func,
        )

        # Verify that:
        # 1. model_suffix is empty
        # 2. final_namespace doesn't have trailing underscore
        # 3. final_namespace equals the legacy namespace
        assert storage._generate_collection_suffix() == "", (
            "model_suffix should be empty"
        )
        assert (
            not storage.final_namespace.endswith("_"),
            f"final_namespace should not have trailing underscore: {storage.final_namespace}",
        )
        assert storage.final_namespace == storage.legacy_namespace, (
            f"final_namespace should fallback to legacy_namespace when model_suffix is empty. "
            f"Expected: {storage.legacy_namespace}, Got: {storage.final_namespace}"
        )

    def test_postgres_table_name_with_valid_suffix(self):
        """
        Test PostgreSQL table name generation with valid model suffix.

        Verification:
        - When embedding_func has get_model_identifier, use it
        - table_name has proper format: base_table_model_suffix
        """
        from lightrag.kg.postgres_impl import PostgresVectorDBStorage
        from lightrag.kg.shared_storage import namespace_to_table_name

        # Create a proper embedding function with model_name
        embedding_func = EmbeddingFunc(
            embedding_dim=1536, func=dummy_embedding_func, model_name="text-embedding-ada-002"
        )

        # Setup global_config
        global_config = {
            "embedding_batch_num": 100,
            "pgvector_precision": "hybrid",
            "pg_host": "localhost",
            "pg_port": 5432,
            "pg_user": "user",
            "pg_password": "password",
            "pg_database": "lightrag",
            "embedding_func": embedding_func,
        }

        # Create PostgreSQL storage instance
        storage = PostgresVectorDBStorage(
            namespace="chunks",
            workspace="test",
            global_config=global_config,
            embedding_func=embedding_func,
        )

        # Verify that:
        # 1. model_suffix is not empty
        # 2. table_name has correct format
        assert storage.model_suffix != "", "model_suffix should not be empty"
        assert "_" in storage.table_name, "table_name should contain underscore as separator"

        # Expected format: base_table_model_suffix
        expected_base = namespace_to_table_name("chunks")
        expected_model_id = embedding_func.get_model_identifier()
        expected_table_name = f"{expected_base}_{expected_model_id}"

        assert storage.table_name == expected_table_name, (
            f"table_name format incorrect. Expected: {expected_table_name}, Got: {storage.table_name}"
        )

    def test_qdrant_collection_name_with_valid_suffix(self):
        """
        Test Qdrant collection name generation with valid model suffix.

        Verification:
        - When embedding_func has get_model_identifier, use it
        - final_namespace has proper format: lightrag_vdb_namespace_model_suffix
        """
        from lightrag.kg.qdrant_impl import QdrantVectorDBStorage

        # Create a proper embedding function with model_name
        embedding_func = EmbeddingFunc(
            embedding_dim=1536, func=dummy_embedding_func, model_name="text-embedding-ada-002"
        )

        # Setup global_config
        global_config = {
            "embedding_batch_num": 100,
            "qdrant_url": "http://localhost:6333",
            "embedding_func": embedding_func,
        }

        # Create Qdrant storage instance
        storage = QdrantVectorDBStorage(
            namespace="chunks",
            workspace="test",
            global_config=global_config,
            embedding_func=embedding_func,
        )

        # Verify that:
        # 1. model_suffix is not empty
        # 2. final_namespace has correct format
        model_suffix = storage._generate_collection_suffix()
        assert model_suffix != "", "model_suffix should not be empty"
        assert "_" in storage.final_namespace, (
            "final_namespace should contain underscore as separator"
        )

        # Expected format: lightrag_vdb_namespace_model_suffix
        expected_model_id = embedding_func.get_model_identifier()
        expected_collection_name = f"lightrag_vdb_chunks_{expected_model_id}"

        assert storage.final_namespace == expected_collection_name, (
            f"final_namespace format incorrect. Expected: {expected_collection_name}, Got: {storage.final_namespace}"
        )

    def test_suffix_generation_fallback_chain(self):
        """
        Test the fallback chain in _generate_collection_suffix.

        Verification:
        1. Direct method: embedding_func.get_model_identifier()
        2. Global config fallback: global_config["embedding_func"].get_model_identifier()
        3. Final fallback: return empty string
        """
        from lightrag.base import BaseVectorStorage

        # Create a concrete implementation for testing
        class TestStorage(BaseVectorStorage):
            async def query(self, *args, **kwargs):
                pass

            async def upsert(self, *args, **kwargs):
                pass

            async def delete_entity(self, *args, **kwargs):
                pass

            async def delete_entity_relation(self, *args, **kwargs):
                pass

            async def get_by_id(self, *args, **kwargs):
                pass

            async def get_by_ids(self, *args, **kwargs):
                pass

            async def delete(self, *args, **kwargs):
                pass

            async def get_vectors_by_ids(self, *args, **kwargs):
                pass

            async def index_done_callback(self):
                pass

            async def drop(self):
                pass

        # Case 1: Direct method available
        embedding_func = EmbeddingFunc(
            embedding_dim=1536, func=dummy_embedding_func, model_name="test-model"
        )
        storage = TestStorage(
            namespace="test",
            workspace="test",
            global_config={},
            embedding_func=embedding_func,
        )
        assert (
            storage._generate_collection_suffix() == "test_model_1536d"
        ), "Should use direct method when available"

        # Case 2: Global config fallback
        mock_embedding_func = Mock(spec=[])  # No get_model_identifier
        storage = TestStorage(
            namespace="test",
            workspace="test",
            global_config={"embedding_func": embedding_func},
            embedding_func=mock_embedding_func,
        )
        assert (
            storage._generate_collection_suffix() == "test_model_1536d"
        ), "Should fallback to global_config embedding_func"

        # Case 3: Final fallback (no embedding_func anywhere)
        storage = TestStorage(
            namespace="test",
            workspace="test",
            global_config={},
            embedding_func=mock_embedding_func,
        )
        assert storage._generate_collection_suffix() == "", (
            "Should return empty string when no model_identifier available"
        )