fix: prevent data loss in PostgreSQL migration and add doc_status table creation
This commit fixes two critical issues in PostgreSQL storage: BUG 1: Legacy table cleanup causing data loss across workspaces --------------------------------------------------------------- PROBLEM: - After migrating workspace_a data from legacy table, the ENTIRE legacy table was deleted - This caused workspace_b's data (still in legacy table) to be lost - Multi-tenant data isolation was violated FIX: - Implement workspace-aware cleanup: only delete migrated workspace's data - Check if other workspaces still have data before dropping table - Only drop legacy table when it becomes completely empty - If other workspace data exists, preserve legacy table with remaining records Location: postgres_impl.py PGVectorStorage.setup_table() lines 2510-2567 Test verification: - test_workspace_migration_isolation_e2e_postgres validates this fix BUG 2: PGDocStatusStorage missing table initialization ------------------------------------------------------- PROBLEM: - PGDocStatusStorage.initialize() only set workspace, never created table - Caused "relation 'lightrag_doc_status' does not exist" errors - document insertion (ainsert) failed immediately FIX: - Add table creation to initialize() method using _pg_create_table() - Consistent with other storage implementations: * MongoDocStatusStorage creates collections * JsonDocStatusStorage creates directories * PGDocStatusStorage now creates tables ✓ Location: postgres_impl.py PGDocStatusStorage.initialize() lines 2965-2971 Test Results: - Unit tests: 13/13 passed (test_unified_lock_safety, test_workspace_migration_isolation, test_dimension_mismatch) - E2E tests require PostgreSQL server Related: PR #2391 (Vector Storage Model Isolation)
This commit is contained in:
parent
204a2535c8
commit
16fff353d9
1 changed files with 60 additions and 14 deletions
|
|
@ -2507,23 +2507,63 @@ class PGVectorStorage(BaseVectorStorage):
|
|||
# Create vector index after successful migration
|
||||
await db._create_vector_index(table_name, embedding_dim)
|
||||
|
||||
# Delete legacy table after successful migration
|
||||
# Data has been verified to match, so legacy table is no longer needed
|
||||
# and keeping it would cause Case 1 warnings on next startup
|
||||
# Clean up migrated data from legacy table
|
||||
# CRITICAL: Only delete current workspace's data, not the entire table!
|
||||
# Other workspaces may still have data in the legacy table.
|
||||
try:
|
||||
logger.info(
|
||||
f"PostgreSQL: Deleting legacy table '{legacy_table_name}'..."
|
||||
)
|
||||
drop_query = f"DROP TABLE {legacy_table_name}"
|
||||
await db.execute(drop_query, None)
|
||||
logger.info(
|
||||
f"PostgreSQL: Legacy table '{legacy_table_name}' deleted successfully"
|
||||
)
|
||||
if workspace:
|
||||
# Delete only current workspace's migrated data
|
||||
logger.info(
|
||||
f"PostgreSQL: Deleting migrated workspace '{workspace}' data from legacy table '{legacy_table_name}'..."
|
||||
)
|
||||
delete_query = (
|
||||
f"DELETE FROM {legacy_table_name} WHERE workspace = $1"
|
||||
)
|
||||
await db.execute(delete_query, [workspace])
|
||||
logger.info(
|
||||
f"PostgreSQL: Deleted workspace '{workspace}' data from legacy table"
|
||||
)
|
||||
|
||||
# Check if legacy table still has data from other workspaces
|
||||
remaining_query = (
|
||||
f"SELECT COUNT(*) as count FROM {legacy_table_name}"
|
||||
)
|
||||
remaining_result = await db.query(remaining_query, [])
|
||||
remaining_count = (
|
||||
remaining_result.get("count", 0) if remaining_result else 0
|
||||
)
|
||||
|
||||
if remaining_count == 0:
|
||||
# Table is now empty, safe to drop
|
||||
logger.info(
|
||||
f"PostgreSQL: Legacy table '{legacy_table_name}' is empty, deleting..."
|
||||
)
|
||||
drop_query = f"DROP TABLE {legacy_table_name}"
|
||||
await db.execute(drop_query, None)
|
||||
logger.info(
|
||||
f"PostgreSQL: Legacy table '{legacy_table_name}' deleted successfully"
|
||||
)
|
||||
else:
|
||||
# Table still has data from other workspaces, preserve it
|
||||
logger.info(
|
||||
f"PostgreSQL: Legacy table '{legacy_table_name}' preserved ({remaining_count} records from other workspaces remain)"
|
||||
)
|
||||
else:
|
||||
# No workspace specified - delete entire table (legacy behavior for backward compatibility)
|
||||
logger.warning(
|
||||
f"PostgreSQL: No workspace specified, deleting entire legacy table '{legacy_table_name}'..."
|
||||
)
|
||||
drop_query = f"DROP TABLE {legacy_table_name}"
|
||||
await db.execute(drop_query, None)
|
||||
logger.info(
|
||||
f"PostgreSQL: Legacy table '{legacy_table_name}' deleted"
|
||||
)
|
||||
|
||||
except Exception as delete_error:
|
||||
# If deletion fails, user will see Case 1 warning on next startup
|
||||
# If cleanup fails, log warning but don't fail migration
|
||||
logger.warning(
|
||||
f"PostgreSQL: Failed to delete legacy table '{legacy_table_name}': {delete_error}. "
|
||||
"You may need to delete it manually."
|
||||
f"PostgreSQL: Failed to clean up legacy table '{legacy_table_name}': {delete_error}. "
|
||||
"Migration succeeded, but manual cleanup may be needed."
|
||||
)
|
||||
|
||||
except PostgreSQLMigrationError:
|
||||
|
|
@ -2922,6 +2962,12 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||
# Use "default" for compatibility (lowest priority)
|
||||
self.workspace = "default"
|
||||
|
||||
# Create table if not exists
|
||||
table_name = namespace_to_table_name(self.namespace)
|
||||
table_exists = await _pg_table_exists(self.db, table_name)
|
||||
if not table_exists:
|
||||
await _pg_create_table(self.db, table_name, table_name)
|
||||
|
||||
async def finalize(self):
|
||||
if self.db is not None:
|
||||
await ClientManager.release_client(self.db)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue