From e24b2ed4fa7451faf5c089f9741f22f148bfbb04 Mon Sep 17 00:00:00 2001 From: BukeLy Date: Thu, 20 Nov 2025 02:34:55 +0800 Subject: [PATCH] fix: Prioritize workspace-specific legacy collections in Qdrant migration Why this change is needed: The E2E test test_backward_compat_old_workspace_naming_qdrant was failing because _find_legacy_collection() searched for generic "lightrag_vdb_{namespace}" before workspace-specific "{workspace}_{namespace}" collections. When both existed, it would always find the generic one first (which might be empty), ignoring the workspace collection that actually contained the data to migrate. How it solves it: Reordered the candidates list in _find_legacy_collection() to prioritize more specific naming patterns over generic ones: 1. {workspace}_{namespace} (most specific, old workspace format) 2. lightrag_vdb_{namespace} (generic legacy format) 3. {namespace} (most generic, oldest format) This ensures the migration finds the correct source collection with actual data. Impact: - Fixes test_backward_compat_old_workspace_naming_qdrant which creates a "prod_chunks" collection with 10 points - Migration will now correctly find and migrate from workspace-specific legacy collections before falling back to generic collections - Maintains backward compatibility with all legacy naming patterns Testing: Run: pytest tests/test_e2e_multi_instance.py::test_backward_compat_old_workspace_naming_qdrant -v --- lightrag/kg/qdrant_impl.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index b1e24de0..4e85db81 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -75,8 +75,8 @@ def _find_legacy_collection( This function tries multiple naming patterns to locate legacy collections created by older versions of LightRAG: - 1. lightrag_vdb_{namespace} - Current legacy format - 2. {workspace}_{namespace} - Old format with workspace (pre-model-isolation) + 1. {workspace}_{namespace} - Old format with workspace (pre-model-isolation) - HIGHEST PRIORITY + 2. lightrag_vdb_{namespace} - Current legacy format 3. {namespace} - Old format without workspace (pre-model-isolation) Args: @@ -88,10 +88,11 @@ def _find_legacy_collection( Collection name if found, None otherwise """ # Try multiple naming patterns for backward compatibility + # More specific names (with workspace) have higher priority candidates = [ + f"{workspace}_{namespace}" if workspace else None, # Old format with workspace - most specific f"lightrag_vdb_{namespace}", # New legacy format - f"{workspace}_{namespace}" if workspace else None, # Old format with workspace - namespace, # Old format without workspace + namespace, # Old format without workspace - most generic ] for candidate in candidates: