fix: preserve EmbeddingFunc object in global_config
Why this change is needed: asdict() converts nested dataclasses to dicts. When LightRAG creates global_config with asdict(self), the embedding_func field (which is an EmbeddingFunc dataclass) gets converted to a plain dict, losing its get_model_identifier() method. How it solves it: 1. Save original EmbeddingFunc object before asdict() call 2. Restore it in global_config after asdict() 3. Add null check and debug logging in _generate_collection_suffix Impact: - E2E tests with full LightRAG initialization now work correctly - Vector storage model isolation features function properly - Maintains backward compatibility Testing: All unit tests pass (12/12 in migration tests)
This commit is contained in:
parent
519f7f61c4
commit
fa7a43a6d2
2 changed files with 16 additions and 8 deletions
|
|
@ -233,8 +233,12 @@ class BaseVectorStorage(StorageNameSpace, ABC):
|
|||
return self.embedding_func.get_model_identifier()
|
||||
elif 'embedding_func' in self.global_config:
|
||||
original_embedding_func = self.global_config['embedding_func']
|
||||
if hasattr(original_embedding_func, 'get_model_identifier'):
|
||||
if original_embedding_func is not None and hasattr(original_embedding_func, 'get_model_identifier'):
|
||||
return original_embedding_func.get_model_identifier()
|
||||
else:
|
||||
# Debug: log why we couldn't get model identifier
|
||||
from lightrag.utils import logger
|
||||
logger.debug(f"Could not get model_identifier: embedding_func is {type(original_embedding_func)}, has method={hasattr(original_embedding_func, 'get_model_identifier') if original_embedding_func else False}")
|
||||
|
||||
# Fallback: no model identifier available
|
||||
return ""
|
||||
|
|
|
|||
|
|
@ -518,14 +518,10 @@ class LightRAG:
|
|||
f"max_total_tokens({self.summary_max_tokens}) should greater than summary_length_recommended({self.summary_length_recommended})"
|
||||
)
|
||||
|
||||
# Fix global_config now
|
||||
global_config = asdict(self)
|
||||
|
||||
_print_config = ",\n ".join([f"{k} = {v}" for k, v in global_config.items()])
|
||||
logger.debug(f"LightRAG init with param:\n {_print_config}\n")
|
||||
|
||||
# Init Embedding
|
||||
# Step 1: Capture max_token_size before applying decorator (decorator strips dataclass attributes)
|
||||
# Step 1: Capture embedding_func and max_token_size before applying decorator
|
||||
# (decorator strips dataclass attributes, and asdict() converts EmbeddingFunc to dict)
|
||||
original_embedding_func = self.embedding_func
|
||||
embedding_max_token_size = None
|
||||
if self.embedding_func and hasattr(self.embedding_func, "max_token_size"):
|
||||
embedding_max_token_size = self.embedding_func.max_token_size
|
||||
|
|
@ -534,6 +530,14 @@ class LightRAG:
|
|||
)
|
||||
self.embedding_token_limit = embedding_max_token_size
|
||||
|
||||
# Fix global_config now
|
||||
global_config = asdict(self)
|
||||
# Restore original EmbeddingFunc object (asdict converts it to dict)
|
||||
global_config['embedding_func'] = original_embedding_func
|
||||
|
||||
_print_config = ",\n ".join([f"{k} = {v}" for k, v in global_config.items()])
|
||||
logger.debug(f"LightRAG init with param:\n {_print_config}\n")
|
||||
|
||||
# Step 2: Apply priority wrapper decorator
|
||||
self.embedding_func = priority_limit_async_func_call(
|
||||
self.embedding_func_max_async,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue