From 71a185112b96400b3ffa86c47bdc7dddb14295e3 Mon Sep 17 00:00:00 2001 From: Yasiru Rangana Date: Fri, 7 Nov 2025 12:20:01 +1100 Subject: [PATCH 1/2] Add collection suffix support to QdrantVectorDBStorage --- lightrag/kg/qdrant_impl.py | 57 ++++++++++++++++++++++++++++++++++---- 1 file changed, 52 insertions(+), 5 deletions(-) diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index d51d8898..6ca51232 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -69,6 +69,34 @@ def workspace_filter_condition(workspace: str) -> models.FieldCondition: @final @dataclass class QdrantVectorDBStorage(BaseVectorStorage): + """ + Qdrant vector database storage implementation. + + This class provides a storage backend for vector embeddings using Qdrant. + It supports multi-tenant isolation through workspace-based filtering and + optional collection suffixes for different embedding dimensions or other purposes. + + Configuration: + - Standard parameters: namespace, workspace, embedding_func, etc. + - Qdrant-specific parameters in vector_db_storage_cls_kwargs: + - cosine_better_than_threshold: Required similarity threshold + - collection_suffix: Optional suffix for collection names + + Note on collection_suffix: + If specified, this suffix will be appended to all collection names. + This allows creating separate sets of collections for different purposes. + To access this data later, you must use the same suffix in all LightRAG + instances that need to access this data. + + Examples of collection_suffix usage: + - Embedding dimensions: "768d", "1536d", "3072d" + - Environments: "dev", "staging", "prod" + - Testing: "test", "benchmark", "experiment1" + - Versions: "v1", "v2", "2023q4" + - Models: "ada002", "e5large", "bge" + - Special purposes: "filtered", "augmented", "synthetic" + """ + def __init__( self, namespace, global_config, embedding_func, workspace=None, meta_fields=None ): @@ -287,23 +315,42 @@ class QdrantVectorDBStorage(BaseVectorStorage): f"Using passed workspace parameter: '{effective_workspace}'" ) + # Extract Qdrant-specific settings + kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {}) + collection_suffix = kwargs.get("collection_suffix", "") + cosine_threshold = kwargs.get("cosine_better_than_threshold") + # Get legacy namespace for data migration from old version if effective_workspace: - self.legacy_namespace = f"{effective_workspace}_{self.namespace}" + if collection_suffix: + self.legacy_namespace = f"{effective_workspace}_{self.namespace}_{collection_suffix}" + else: + self.legacy_namespace = f"{effective_workspace}_{self.namespace}" else: - self.legacy_namespace = self.namespace + if collection_suffix: + self.legacy_namespace = f"{self.namespace}_{collection_suffix}" + else: + self.legacy_namespace = self.namespace self.effective_workspace = effective_workspace or DEFAULT_WORKSPACE # Use a shared collection with payload-based partitioning (Qdrant's recommended approach) # Ref: https://qdrant.tech/documentation/guides/multiple-partitions/ - self.final_namespace = f"lightrag_vdb_{self.namespace}" + if collection_suffix: + self.final_namespace = f"lightrag_vdb_{self.namespace}_{collection_suffix}" + logger.info( + f"Using collection suffix '{collection_suffix}' for {self.namespace}. " + f"Collection name: '{self.final_namespace}'. " + f"Note: To access this data later, you must use the same suffix." + ) + else: + self.final_namespace = f"lightrag_vdb_{self.namespace}" + logger.debug( f"Using shared collection '{self.final_namespace}' with workspace '{self.effective_workspace}' for payload-based partitioning" ) - kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {}) - cosine_threshold = kwargs.get("cosine_better_than_threshold") + # Check for required cosine threshold parameter if cosine_threshold is None: raise ValueError( "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs" From 598de90d939c8d7c6eac5b550c1d4ff86ce42019 Mon Sep 17 00:00:00 2001 From: Yasiru Rangana Date: Fri, 7 Nov 2025 12:25:07 +1100 Subject: [PATCH 2/2] Format code with ruff --- lightrag/kg/qdrant_impl.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index 6ca51232..34d7620a 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -71,23 +71,23 @@ def workspace_filter_condition(workspace: str) -> models.FieldCondition: class QdrantVectorDBStorage(BaseVectorStorage): """ Qdrant vector database storage implementation. - + This class provides a storage backend for vector embeddings using Qdrant. It supports multi-tenant isolation through workspace-based filtering and optional collection suffixes for different embedding dimensions or other purposes. - + Configuration: - Standard parameters: namespace, workspace, embedding_func, etc. - Qdrant-specific parameters in vector_db_storage_cls_kwargs: - cosine_better_than_threshold: Required similarity threshold - collection_suffix: Optional suffix for collection names - + Note on collection_suffix: If specified, this suffix will be appended to all collection names. This allows creating separate sets of collections for different purposes. To access this data later, you must use the same suffix in all LightRAG instances that need to access this data. - + Examples of collection_suffix usage: - Embedding dimensions: "768d", "1536d", "3072d" - Environments: "dev", "staging", "prod" @@ -96,7 +96,7 @@ class QdrantVectorDBStorage(BaseVectorStorage): - Models: "ada002", "e5large", "bge" - Special purposes: "filtered", "augmented", "synthetic" """ - + def __init__( self, namespace, global_config, embedding_func, workspace=None, meta_fields=None ): @@ -319,11 +319,13 @@ class QdrantVectorDBStorage(BaseVectorStorage): kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {}) collection_suffix = kwargs.get("collection_suffix", "") cosine_threshold = kwargs.get("cosine_better_than_threshold") - + # Get legacy namespace for data migration from old version if effective_workspace: if collection_suffix: - self.legacy_namespace = f"{effective_workspace}_{self.namespace}_{collection_suffix}" + self.legacy_namespace = ( + f"{effective_workspace}_{self.namespace}_{collection_suffix}" + ) else: self.legacy_namespace = f"{effective_workspace}_{self.namespace}" else: @@ -345,7 +347,7 @@ class QdrantVectorDBStorage(BaseVectorStorage): ) else: self.final_namespace = f"lightrag_vdb_{self.namespace}" - + logger.debug( f"Using shared collection '{self.final_namespace}' with workspace '{self.effective_workspace}' for payload-based partitioning" )