From fa7a43a6d24a47ff089b83398718bb1b4084ddc4 Mon Sep 17 00:00:00 2001 From: BukeLy Date: Thu, 20 Nov 2025 00:55:06 +0800 Subject: [PATCH] fix: preserve EmbeddingFunc object in global_config Why this change is needed: asdict() converts nested dataclasses to dicts. When LightRAG creates global_config with asdict(self), the embedding_func field (which is an EmbeddingFunc dataclass) gets converted to a plain dict, losing its get_model_identifier() method. How it solves it: 1. Save original EmbeddingFunc object before asdict() call 2. Restore it in global_config after asdict() 3. Add null check and debug logging in _generate_collection_suffix Impact: - E2E tests with full LightRAG initialization now work correctly - Vector storage model isolation features function properly - Maintains backward compatibility Testing: All unit tests pass (12/12 in migration tests) --- lightrag/base.py | 6 +++++- lightrag/lightrag.py | 18 +++++++++++------- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lightrag/base.py b/lightrag/base.py index 9671f1b7..b89e114d 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -233,8 +233,12 @@ class BaseVectorStorage(StorageNameSpace, ABC): return self.embedding_func.get_model_identifier() elif 'embedding_func' in self.global_config: original_embedding_func = self.global_config['embedding_func'] - if hasattr(original_embedding_func, 'get_model_identifier'): + if original_embedding_func is not None and hasattr(original_embedding_func, 'get_model_identifier'): return original_embedding_func.get_model_identifier() + else: + # Debug: log why we couldn't get model identifier + from lightrag.utils import logger + logger.debug(f"Could not get model_identifier: embedding_func is {type(original_embedding_func)}, has method={hasattr(original_embedding_func, 'get_model_identifier') if original_embedding_func else False}") # Fallback: no model identifier available return "" diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 8a638759..9fd5a4b3 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -518,14 +518,10 @@ class LightRAG: f"max_total_tokens({self.summary_max_tokens}) should greater than summary_length_recommended({self.summary_length_recommended})" ) - # Fix global_config now - global_config = asdict(self) - - _print_config = ",\n ".join([f"{k} = {v}" for k, v in global_config.items()]) - logger.debug(f"LightRAG init with param:\n {_print_config}\n") - # Init Embedding - # Step 1: Capture max_token_size before applying decorator (decorator strips dataclass attributes) + # Step 1: Capture embedding_func and max_token_size before applying decorator + # (decorator strips dataclass attributes, and asdict() converts EmbeddingFunc to dict) + original_embedding_func = self.embedding_func embedding_max_token_size = None if self.embedding_func and hasattr(self.embedding_func, "max_token_size"): embedding_max_token_size = self.embedding_func.max_token_size @@ -534,6 +530,14 @@ class LightRAG: ) self.embedding_token_limit = embedding_max_token_size + # Fix global_config now + global_config = asdict(self) + # Restore original EmbeddingFunc object (asdict converts it to dict) + global_config['embedding_func'] = original_embedding_func + + _print_config = ",\n ".join([f"{k} = {v}" for k, v in global_config.items()]) + logger.debug(f"LightRAG init with param:\n {_print_config}\n") + # Step 2: Apply priority wrapper decorator self.embedding_func = priority_limit_async_func_call( self.embedding_func_max_async,