diff --git a/env.example b/env.example
index 534bd22a..a95ff9bf 100644
--- a/env.example
+++ b/env.example
@@ -29,7 +29,7 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
# OLLAMA_EMULATING_MODEL_NAME=lightrag
OLLAMA_EMULATING_MODEL_TAG=latest
-### Max nodes return from graph retrieval in webui
+### Max nodes for graph retrieval (Ensure WebUI local settings are also updated, which is limited to this value)
# MAX_GRAPH_NODES=1000
### Logging level
@@ -172,6 +172,8 @@ MAX_PARALLEL_INSERT=2
### LLM Configuration
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock, gemini
### LLM_BINDING_HOST: host only for Ollama, endpoint for other LLM service
+### If LightRAG deployed in Docker:
+### uses host.docker.internal instead of localhost in LLM_BINDING_HOST
###########################################################################
### LLM request timeout setting for all llm (0 means no timeout for Ollma)
# LLM_TIMEOUT=180
@@ -181,7 +183,7 @@ LLM_MODEL=gpt-4o
LLM_BINDING_HOST=https://api.openai.com/v1
LLM_BINDING_API_KEY=your_api_key
-### Optional for Azure
+### Env vars for Azure openai
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
# AZURE_OPENAI_DEPLOYMENT=gpt-4o
@@ -196,22 +198,16 @@ LLM_BINDING_API_KEY=your_api_key
# LLM_MODEL=gemini-flash-latest
# LLM_BINDING_API_KEY=your_gemini_api_key
# LLM_BINDING_HOST=https://generativelanguage.googleapis.com
-GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
+
+### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
+### lightrag-server --llm-binding gemini --help
+### Gemini Specific Parameters
# GEMINI_LLM_MAX_OUTPUT_TOKENS=9000
# GEMINI_LLM_TEMPERATURE=0.7
-
-### OpenAI Compatible API Specific Parameters
-### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
-# OPENAI_LLM_TEMPERATURE=0.9
-### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
-### Typically, max_tokens does not include prompt content, though some models, such as Gemini Models, are exceptions
-### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
-# OPENAI_LLM_MAX_TOKENS=9000
-### For OpenAI o1-mini or newer modles
-OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
-
-#### OpenAI's new API utilizes max_completion_tokens instead of max_tokens
-# OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+### Enable Thinking
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": -1, "include_thoughts": true}'
+### Disable Thinking
+# GEMINI_LLM_THINKING_CONFIG='{"thinking_budget": 0, "include_thoughts": false}'
### use the following command to see all support options for OpenAI, azure_openai or OpenRouter
### lightrag-server --llm-binding openai --help
@@ -222,8 +218,17 @@ OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
### Qwen3 Specific Parameters deploy by vLLM
# OPENAI_LLM_EXTRA_BODY='{"chat_template_kwargs": {"enable_thinking": false}}'
+### OpenAI Compatible API Specific Parameters
+### Increased temperature values may mitigate infinite inference loops in certain LLM, such as Qwen3-30B.
+# OPENAI_LLM_TEMPERATURE=0.9
+### Set the max_tokens to mitigate endless output of some LLM (less than LLM_TIMEOUT * llm_output_tokens/second, i.e. 9000 = 180s * 50 tokens/s)
+### Typically, max_tokens does not include prompt content
+### For vLLM/SGLang deployed models, or most of OpenAI compatible API provider
+# OPENAI_LLM_MAX_TOKENS=9000
+### For OpenAI o1-mini or newer modles utilizes max_completion_tokens instead of max_tokens
+OPENAI_LLM_MAX_COMPLETION_TOKENS=9000
+
### use the following command to see all support options for Ollama LLM
-### If LightRAG deployed in Docker uses host.docker.internal instead of localhost in LLM_BINDING_HOST
### lightrag-server --llm-binding ollama --help
### Ollama Server Specific Parameters
### OLLAMA_LLM_NUM_CTX must be provided, and should at least larger than MAX_TOTAL_TOKENS + 2000
@@ -240,6 +245,8 @@ OLLAMA_LLM_NUM_CTX=32768
### Embedding Configuration (Should not be changed after the first file processed)
### EMBEDDING_BINDING: ollama, openai, azure_openai, jina, lollms, aws_bedrock
### EMBEDDING_BINDING_HOST: host only for Ollama, endpoint for other Embedding service
+### If LightRAG deployed in Docker:
+### uses host.docker.internal instead of localhost in EMBEDDING_BINDING_HOST
#######################################################################################
# EMBEDDING_TIMEOUT=30
diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py
index 02b01dcc..8fdc9fae 100644
--- a/lightrag/api/__init__.py
+++ b/lightrag/api/__init__.py
@@ -1 +1 @@
-__api_version__ = "0252"
+__api_version__ = "0254"
diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py
index 3e479a53..f86bcea8 100644
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@@ -1081,11 +1081,11 @@ async def pipeline_enqueue_file(
result = converter.convert(file_path)
content = result.document.export_to_markdown()
else:
- if not pm.is_installed("pypdf2"): # type: ignore
- pm.install("pypdf2")
+ if not pm.is_installed("pypdf"): # type: ignore
+ pm.install("pypdf")
if not pm.is_installed("pycryptodome"): # type: ignore
pm.install("pycryptodome")
- from PyPDF2 import PdfReader # type: ignore
+ from pypdf import PdfReader # type: ignore
from io import BytesIO
pdf_file = BytesIO(file)
diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py
index 014499f2..bf6e7b17 100644
--- a/lightrag/kg/json_doc_status_impl.py
+++ b/lightrag/kg/json_doc_status_impl.py
@@ -161,7 +161,20 @@ class JsonDocStatusStorage(DocStatusStorage):
logger.debug(
f"[{self.workspace}] Process {os.getpid()} doc status writting {len(data_dict)} records to {self.namespace}"
)
- write_json(data_dict, self._file_name)
+
+ # Write JSON and check if sanitization was applied
+ needs_reload = write_json(data_dict, self._file_name)
+
+ # If data was sanitized, reload cleaned data to update shared memory
+ if needs_reload:
+ logger.info(
+ f"[{self.workspace}] Reloading sanitized data into shared memory for {self.namespace}"
+ )
+ cleaned_data = load_json(self._file_name)
+ if cleaned_data is not None:
+ self._data.clear()
+ self._data.update(cleaned_data)
+
await clear_all_update_flags(self.final_namespace)
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
diff --git a/lightrag/kg/json_kv_impl.py b/lightrag/kg/json_kv_impl.py
index fd016b14..f9adb20f 100644
--- a/lightrag/kg/json_kv_impl.py
+++ b/lightrag/kg/json_kv_impl.py
@@ -81,7 +81,20 @@ class JsonKVStorage(BaseKVStorage):
logger.debug(
f"[{self.workspace}] Process {os.getpid()} KV writting {data_count} records to {self.namespace}"
)
- write_json(data_dict, self._file_name)
+
+ # Write JSON and check if sanitization was applied
+ needs_reload = write_json(data_dict, self._file_name)
+
+ # If data was sanitized, reload cleaned data to update shared memory
+ if needs_reload:
+ logger.info(
+ f"[{self.workspace}] Reloading sanitized data into shared memory for {self.namespace}"
+ )
+ cleaned_data = load_json(self._file_name)
+ if cleaned_data is not None:
+ self._data.clear()
+ self._data.update(cleaned_data)
+
await clear_all_update_flags(self.final_namespace)
async def get_by_id(self, id: str) -> dict[str, Any] | None:
@@ -224,7 +237,7 @@ class JsonKVStorage(BaseKVStorage):
data: Original data dictionary that may contain legacy structure
Returns:
- Migrated data dictionary with flattened cache keys
+ Migrated data dictionary with flattened cache keys (sanitized if needed)
"""
from lightrag.utils import generate_cache_key
@@ -261,8 +274,17 @@ class JsonKVStorage(BaseKVStorage):
logger.info(
f"[{self.workspace}] Migrated {migration_count} legacy cache entries to flattened structure"
)
- # Persist migrated data immediately
- write_json(migrated_data, self._file_name)
+ # Persist migrated data immediately and check if sanitization was applied
+ needs_reload = write_json(migrated_data, self._file_name)
+
+ # If data was sanitized during write, reload cleaned data
+ if needs_reload:
+ logger.info(
+ f"[{self.workspace}] Reloading sanitized migration data for {self.namespace}"
+ )
+ cleaned_data = load_json(self._file_name)
+ if cleaned_data is not None:
+ return cleaned_data # Return cleaned data to update shared memory
return migrated_data
diff --git a/lightrag/tools/clean_llm_query_cache.py b/lightrag/tools/clean_llm_query_cache.py
index 1b688c29..eca658c7 100644
--- a/lightrag/tools/clean_llm_query_cache.py
+++ b/lightrag/tools/clean_llm_query_cache.py
@@ -873,6 +873,31 @@ class CleanupTool:
storage_name = STORAGE_TYPES[choice]
+ # Special warning for JsonKVStorage about concurrent access
+ if storage_name == "JsonKVStorage":
+ print("\n" + "=" * 60)
+ print(f"{BOLD_RED}⚠️ IMPORTANT WARNING - JsonKVStorage Concurrency{RESET}")
+ print("=" * 60)
+ print("\nJsonKVStorage is an in-memory database that does NOT support")
+ print("concurrent access to the same file by multiple programs.")
+ print("\nBefore proceeding, please ensure that:")
+ print(" • LightRAG Server is completely shut down")
+ print(" • No other programs are accessing the storage files")
+ print("\n" + "=" * 60)
+
+ confirm = (
+ input("\nHas LightRAG Server been shut down? (yes/no): ")
+ .strip()
+ .lower()
+ )
+ if confirm != "yes":
+ print(
+ "\n✓ Operation cancelled - Please shut down LightRAG Server first"
+ )
+ return None, None, None
+
+ print("✓ Proceeding with JsonKVStorage cleanup...")
+
# Check configuration (warnings only, doesn't block)
print("\nChecking configuration...")
self.check_env_vars(storage_name)
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 460ede3c..b78b7523 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -56,6 +56,9 @@ if not logger.handlers:
# Set httpx logging level to WARNING
logging.getLogger("httpx").setLevel(logging.WARNING)
+# Precompile regex pattern for JSON sanitization (module-level, compiled once)
+_SURROGATE_PATTERN = re.compile(r"[\uD800-\uDFFF\uFFFE\uFFFF]")
+
# Global import for pypinyin with startup-time logging
try:
import pypinyin
@@ -927,9 +930,123 @@ def load_json(file_name):
return json.load(f)
+def _sanitize_string_for_json(text: str) -> str:
+ """Remove characters that cannot be encoded in UTF-8 for JSON serialization.
+
+ Uses regex for optimal performance with zero-copy optimization for clean strings.
+ Fast detection path for clean strings (99% of cases) with efficient removal for dirty strings.
+
+ Args:
+ text: String to sanitize
+
+ Returns:
+ Original string if clean (zero-copy), sanitized string if dirty
+ """
+ if not text:
+ return text
+
+ # Fast path: Check if sanitization is needed using C-level regex search
+ if not _SURROGATE_PATTERN.search(text):
+ return text # Zero-copy for clean strings - most common case
+
+ # Slow path: Remove problematic characters using C-level regex substitution
+ return _SURROGATE_PATTERN.sub("", text)
+
+
+class SanitizingJSONEncoder(json.JSONEncoder):
+ """
+ Custom JSON encoder that sanitizes data during serialization.
+
+ This encoder cleans strings during the encoding process without creating
+ a full copy of the data structure, making it memory-efficient for large datasets.
+ """
+
+ def encode(self, o):
+ """Override encode method to handle simple string cases"""
+ if isinstance(o, str):
+ return json.encoder.encode_basestring(_sanitize_string_for_json(o))
+ return super().encode(o)
+
+ def iterencode(self, o, _one_shot=False):
+ """
+ Override iterencode to sanitize strings during serialization.
+ This is the core method that handles complex nested structures.
+ """
+ # Preprocess: sanitize all strings in the object
+ sanitized = self._sanitize_for_encoding(o)
+
+ # Call parent's iterencode with sanitized data
+ for chunk in super().iterencode(sanitized, _one_shot):
+ yield chunk
+
+ def _sanitize_for_encoding(self, obj):
+ """
+ Recursively sanitize strings in an object.
+ Creates new objects only when necessary to avoid deep copies.
+
+ Args:
+ obj: Object to sanitize
+
+ Returns:
+ Sanitized object with cleaned strings
+ """
+ if isinstance(obj, str):
+ return _sanitize_string_for_json(obj)
+
+ elif isinstance(obj, dict):
+ # Create new dict with sanitized keys and values
+ new_dict = {}
+ for k, v in obj.items():
+ clean_k = _sanitize_string_for_json(k) if isinstance(k, str) else k
+ clean_v = self._sanitize_for_encoding(v)
+ new_dict[clean_k] = clean_v
+ return new_dict
+
+ elif isinstance(obj, (list, tuple)):
+ # Sanitize list/tuple elements
+ cleaned = [self._sanitize_for_encoding(item) for item in obj]
+ return type(obj)(cleaned) if isinstance(obj, tuple) else cleaned
+
+ else:
+ # Numbers, booleans, None, etc. remain unchanged
+ return obj
+
+
def write_json(json_obj, file_name):
+ """
+ Write JSON data to file with optimized sanitization strategy.
+
+ This function uses a two-stage approach:
+ 1. Fast path: Try direct serialization (works for clean data ~99% of time)
+ 2. Slow path: Use custom encoder that sanitizes during serialization
+
+ The custom encoder approach avoids creating a deep copy of the data,
+ making it memory-efficient. When sanitization occurs, the caller should
+ reload the cleaned data from the file to update shared memory.
+
+ Args:
+ json_obj: Object to serialize (may be a shallow copy from shared memory)
+ file_name: Output file path
+
+ Returns:
+ bool: True if sanitization was applied (caller should reload data),
+ False if direct write succeeded (no reload needed)
+ """
+ try:
+ # Strategy 1: Fast path - try direct serialization
+ with open(file_name, "w", encoding="utf-8") as f:
+ json.dump(json_obj, f, indent=2, ensure_ascii=False)
+ return False # No sanitization needed, no reload required
+
+ except (UnicodeEncodeError, UnicodeDecodeError) as e:
+ logger.debug(f"Direct JSON write failed, using sanitizing encoder: {e}")
+
+ # Strategy 2: Use custom encoder (sanitizes during serialization, zero memory copy)
with open(file_name, "w", encoding="utf-8") as f:
- json.dump(json_obj, f, indent=2, ensure_ascii=False)
+ json.dump(json_obj, f, indent=2, ensure_ascii=False, cls=SanitizingJSONEncoder)
+
+ logger.info(f"JSON sanitization applied during write: {file_name}")
+ return True # Sanitization applied, reload recommended
class TokenizerInterface(Protocol):
diff --git a/lightrag_webui/src/components/retrieval/QuerySettings.tsx b/lightrag_webui/src/components/retrieval/QuerySettings.tsx
index 4ffebbb1..0b0096c0 100644
--- a/lightrag_webui/src/components/retrieval/QuerySettings.tsx
+++ b/lightrag_webui/src/components/retrieval/QuerySettings.tsx
@@ -40,7 +40,6 @@ export default function QuerySettings() {
// Default values for reset functionality
const defaultValues = useMemo(() => ({
mode: 'mix' as QueryMode,
- response_type: 'Multiple Paragraphs',
top_k: 40,
chunk_top_k: 20,
max_entity_tokens: 6000,
@@ -153,46 +152,6 @@ export default function QuerySettings() {
>
- {/* Response Format */}
- <>
- {t('retrievePanel.querySettings.responseFormatTooltip')}