Merge 598de90d93 into 9562a974d2
This commit is contained in:
commit
a68466581e
1 changed files with 54 additions and 5 deletions
|
|
@ -69,6 +69,34 @@ def workspace_filter_condition(workspace: str) -> models.FieldCondition:
|
||||||
@final
|
@final
|
||||||
@dataclass
|
@dataclass
|
||||||
class QdrantVectorDBStorage(BaseVectorStorage):
|
class QdrantVectorDBStorage(BaseVectorStorage):
|
||||||
|
"""
|
||||||
|
Qdrant vector database storage implementation.
|
||||||
|
|
||||||
|
This class provides a storage backend for vector embeddings using Qdrant.
|
||||||
|
It supports multi-tenant isolation through workspace-based filtering and
|
||||||
|
optional collection suffixes for different embedding dimensions or other purposes.
|
||||||
|
|
||||||
|
Configuration:
|
||||||
|
- Standard parameters: namespace, workspace, embedding_func, etc.
|
||||||
|
- Qdrant-specific parameters in vector_db_storage_cls_kwargs:
|
||||||
|
- cosine_better_than_threshold: Required similarity threshold
|
||||||
|
- collection_suffix: Optional suffix for collection names
|
||||||
|
|
||||||
|
Note on collection_suffix:
|
||||||
|
If specified, this suffix will be appended to all collection names.
|
||||||
|
This allows creating separate sets of collections for different purposes.
|
||||||
|
To access this data later, you must use the same suffix in all LightRAG
|
||||||
|
instances that need to access this data.
|
||||||
|
|
||||||
|
Examples of collection_suffix usage:
|
||||||
|
- Embedding dimensions: "768d", "1536d", "3072d"
|
||||||
|
- Environments: "dev", "staging", "prod"
|
||||||
|
- Testing: "test", "benchmark", "experiment1"
|
||||||
|
- Versions: "v1", "v2", "2023q4"
|
||||||
|
- Models: "ada002", "e5large", "bge"
|
||||||
|
- Special purposes: "filtered", "augmented", "synthetic"
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, namespace, global_config, embedding_func, workspace=None, meta_fields=None
|
self, namespace, global_config, embedding_func, workspace=None, meta_fields=None
|
||||||
):
|
):
|
||||||
|
|
@ -287,23 +315,44 @@ class QdrantVectorDBStorage(BaseVectorStorage):
|
||||||
f"Using passed workspace parameter: '{effective_workspace}'"
|
f"Using passed workspace parameter: '{effective_workspace}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Extract Qdrant-specific settings
|
||||||
|
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||||
|
collection_suffix = kwargs.get("collection_suffix", "")
|
||||||
|
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
||||||
|
|
||||||
# Get legacy namespace for data migration from old version
|
# Get legacy namespace for data migration from old version
|
||||||
if effective_workspace:
|
if effective_workspace:
|
||||||
self.legacy_namespace = f"{effective_workspace}_{self.namespace}"
|
if collection_suffix:
|
||||||
|
self.legacy_namespace = (
|
||||||
|
f"{effective_workspace}_{self.namespace}_{collection_suffix}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.legacy_namespace = f"{effective_workspace}_{self.namespace}"
|
||||||
else:
|
else:
|
||||||
self.legacy_namespace = self.namespace
|
if collection_suffix:
|
||||||
|
self.legacy_namespace = f"{self.namespace}_{collection_suffix}"
|
||||||
|
else:
|
||||||
|
self.legacy_namespace = self.namespace
|
||||||
|
|
||||||
self.effective_workspace = effective_workspace or DEFAULT_WORKSPACE
|
self.effective_workspace = effective_workspace or DEFAULT_WORKSPACE
|
||||||
|
|
||||||
# Use a shared collection with payload-based partitioning (Qdrant's recommended approach)
|
# Use a shared collection with payload-based partitioning (Qdrant's recommended approach)
|
||||||
# Ref: https://qdrant.tech/documentation/guides/multiple-partitions/
|
# Ref: https://qdrant.tech/documentation/guides/multiple-partitions/
|
||||||
self.final_namespace = f"lightrag_vdb_{self.namespace}"
|
if collection_suffix:
|
||||||
|
self.final_namespace = f"lightrag_vdb_{self.namespace}_{collection_suffix}"
|
||||||
|
logger.info(
|
||||||
|
f"Using collection suffix '{collection_suffix}' for {self.namespace}. "
|
||||||
|
f"Collection name: '{self.final_namespace}'. "
|
||||||
|
f"Note: To access this data later, you must use the same suffix."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.final_namespace = f"lightrag_vdb_{self.namespace}"
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Using shared collection '{self.final_namespace}' with workspace '{self.effective_workspace}' for payload-based partitioning"
|
f"Using shared collection '{self.final_namespace}' with workspace '{self.effective_workspace}' for payload-based partitioning"
|
||||||
)
|
)
|
||||||
|
|
||||||
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
# Check for required cosine threshold parameter
|
||||||
cosine_threshold = kwargs.get("cosine_better_than_threshold")
|
|
||||||
if cosine_threshold is None:
|
if cosine_threshold is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
|
"cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue