From fa7a43a6d24a47ff089b83398718bb1b4084ddc4 Mon Sep 17 00:00:00 2001
From: BukeLy <bukely0119@foxmail.com>
Date: Thu, 20 Nov 2025 00:55:06 +0800
Subject: [PATCH] fix: preserve EmbeddingFunc object in global_config

Why this change is needed:
asdict() converts nested dataclasses to dicts. When LightRAG creates
global_config with asdict(self), the embedding_func field (which is an
EmbeddingFunc dataclass) gets converted to a plain dict, losing its
get_model_identifier() method.

How it solves it:
1. Save original EmbeddingFunc object before asdict() call
2. Restore it in global_config after asdict()
3. Add null check and debug logging in _generate_collection_suffix

Impact:
- E2E tests with full LightRAG initialization now work correctly
- Vector storage model isolation features function properly
- Maintains backward compatibility

Testing:
All unit tests pass (12/12 in migration tests)
---
 lightrag/base.py     |  6 +++++-
 lightrag/lightrag.py | 18 +++++++++++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/lightrag/base.py b/lightrag/base.py
index 9671f1b7..b89e114d 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -233,8 +233,12 @@ class BaseVectorStorage(StorageNameSpace, ABC):
             return self.embedding_func.get_model_identifier()
         elif 'embedding_func' in self.global_config:
             original_embedding_func = self.global_config['embedding_func']
-            if hasattr(original_embedding_func, 'get_model_identifier'):
+            if original_embedding_func is not None and hasattr(original_embedding_func, 'get_model_identifier'):
                 return original_embedding_func.get_model_identifier()
+            else:
+                # Debug: log why we couldn't get model identifier
+                from lightrag.utils import logger
+                logger.debug(f"Could not get model_identifier: embedding_func is {type(original_embedding_func)}, has method={hasattr(original_embedding_func, 'get_model_identifier') if original_embedding_func else False}")
 
         # Fallback: no model identifier available
         return ""
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 8a638759..9fd5a4b3 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -518,14 +518,10 @@ class LightRAG:
                 f"max_total_tokens({self.summary_max_tokens}) should greater than summary_length_recommended({self.summary_length_recommended})"
             )
 
-        # Fix global_config now
-        global_config = asdict(self)
-
-        _print_config = ",\n  ".join([f"{k} = {v}" for k, v in global_config.items()])
-        logger.debug(f"LightRAG init with param:\n  {_print_config}\n")
-
         # Init Embedding
-        # Step 1: Capture max_token_size before applying decorator (decorator strips dataclass attributes)
+        # Step 1: Capture embedding_func and max_token_size before applying decorator
+        # (decorator strips dataclass attributes, and asdict() converts EmbeddingFunc to dict)
+        original_embedding_func = self.embedding_func
         embedding_max_token_size = None
         if self.embedding_func and hasattr(self.embedding_func, "max_token_size"):
             embedding_max_token_size = self.embedding_func.max_token_size
@@ -534,6 +530,14 @@ class LightRAG:
             )
         self.embedding_token_limit = embedding_max_token_size
 
+        # Fix global_config now
+        global_config = asdict(self)
+        # Restore original EmbeddingFunc object (asdict converts it to dict)
+        global_config['embedding_func'] = original_embedding_func
+
+        _print_config = ",\n  ".join([f"{k} = {v}" for k, v in global_config.items()])
+        logger.debug(f"LightRAG init with param:\n  {_print_config}\n")
+
         # Step 2: Apply priority wrapper decorator
         self.embedding_func = priority_limit_async_func_call(
             self.embedding_func_max_async,