From ff0a18e08c720f7ac63a36bf6c3f40dd2897d26e Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Wed, 27 Aug 2025 12:23:22 +0800
Subject: [PATCH] Unify SUMMARY_LANGUANGE and ENTITY_TYPES implementation
 method

---
 env.example                     | 11 +++++++----
 lightrag/api/config.py          |  2 +-
 lightrag/api/lightrag_server.py | 10 ++++++++--
 lightrag/constants.py           | 12 ++++++++++--
 lightrag/lightrag.py            |  7 +++++--
 lightrag/operate.py             |  7 +++----
 lightrag/prompt.py              |  1 -
 7 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/env.example b/env.example
index a00b8f88..6cb0dce7 100644
--- a/env.example
+++ b/env.example
@@ -119,9 +119,14 @@ RERANK_BINDING=null
 ########################################
 ### Document processing configuration
 ########################################
-### Language: English, Chinese, French, German ...
-SUMMARY_LANGUAGE=English
 ENABLE_LLM_CACHE_FOR_EXTRACT=true
+
+### Document processing outpu language: English, Chinese, French, German ...
+SUMMARY_LANGUAGE=English
+
+### Entity types that the LLM will attempt to recognize
+# ENTITY_TYPES=["person", "organization", "location", "event", "concept"]
+
 ### Chunk size for document splitting, 500~1500 is recommended
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
@@ -134,8 +139,6 @@ ENABLE_LLM_CACHE_FOR_EXTRACT=true
 # SUMMARY_LENGTH_RECOMMENDED_=600
 ### Maximum context size sent to LLM for description summary
 # SUMMARY_CONTEXT_SIZE=12000
-### Customize the entities that the LLM will attempt to recognize
-# ENTITY_TYPES=["person", "organization", "location", "event", "concept"]
 
 ###############################
 ### Concurrency Configuration
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index 70b855f2..eae2f45b 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -38,7 +38,7 @@ from lightrag.constants import (
     DEFAULT_OLLAMA_MODEL_NAME,
     DEFAULT_OLLAMA_MODEL_TAG,
     DEFAULT_RERANK_BINDING,
-    DEFAULT_ENTITY_TYPES
+    DEFAULT_ENTITY_TYPES,
 )
 
 # use the .env that is inside the current folder
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 26c99961..a2a4d848 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -499,7 +499,10 @@ def create_app(args):
             rerank_model_func=rerank_model_func,
             max_parallel_insert=args.max_parallel_insert,
             max_graph_nodes=args.max_graph_nodes,
-            addon_params={"language": args.summary_language, "entity_types": args.entity_types},
+            addon_params={
+                "language": args.summary_language,
+                "entity_types": args.entity_types,
+            },
             ollama_server_infos=ollama_server_infos,
         )
     else:  # azure_openai
@@ -526,7 +529,10 @@ def create_app(args):
             rerank_model_func=rerank_model_func,
             max_parallel_insert=args.max_parallel_insert,
             max_graph_nodes=args.max_graph_nodes,
-            addon_params={"language": args.summary_language, "entity_types": args.entity_types},
+            addon_params={
+                "language": args.summary_language,
+                "entity_types": args.entity_types,
+            },
             ollama_server_infos=ollama_server_infos,
         )
 
diff --git a/lightrag/constants.py b/lightrag/constants.py
index d0271be4..4e85325b 100644
--- a/lightrag/constants.py
+++ b/lightrag/constants.py
@@ -11,7 +11,7 @@ DEFAULT_WOKERS = 2
 DEFAULT_MAX_GRAPH_NODES = 1000
 
 # Default values for extraction settings
-DEFAULT_SUMMARY_LANGUAGE = "English"  # Default language for summaries
+DEFAULT_SUMMARY_LANGUAGE = "English"  # Default language for document processing
 DEFAULT_MAX_GLEANING = 1
 
 # Number of description fragments to trigger LLM summary
@@ -23,7 +23,15 @@ DEFAULT_SUMMARY_LENGTH_RECOMMENDED = 600
 # Maximum token size sent to LLM for summary
 DEFAULT_SUMMARY_CONTEXT_SIZE = 12000
 # Default entities to extract if ENTITY_TYPES is not specified in .env
-DEFAULT_ENTITY_TYPES = ["organization", "person", "geo", "event", "category"]
+DEFAULT_ENTITY_TYPES = [
+    "organization",
+    "person",
+    "geo",
+    "event",
+    "category",
+    "Equipment",
+    "Location",
+]
 
 # Separator for graph fields
 GRAPH_FIELD_SEP = "<SEP>"
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 235345e8..34ff87e6 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -39,7 +39,8 @@ from lightrag.constants import (
     DEFAULT_MAX_ASYNC,
     DEFAULT_MAX_PARALLEL_INSERT,
     DEFAULT_MAX_GRAPH_NODES,
-    DEFAULT_ENTITY_TYPES
+    DEFAULT_ENTITY_TYPES,
+    DEFAULT_SUMMARY_LANGUAGE,
 )
 from lightrag.utils import get_env_value
 
@@ -348,7 +349,9 @@ class LightRAG:
 
     addon_params: dict[str, Any] = field(
         default_factory=lambda: {
-            "language": get_env_value("SUMMARY_LANGUAGE", "English", str),
+            "language": get_env_value(
+                "SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE, str
+            ),
             "entity_types": get_env_value("ENTITY_TYPES", DEFAULT_ENTITY_TYPES, list),
         }
     )
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 76a0b2c1..38771f7b 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -47,7 +47,8 @@ from .constants import (
     DEFAULT_MAX_TOTAL_TOKENS,
     DEFAULT_RELATED_CHUNK_NUMBER,
     DEFAULT_KG_CHUNK_PICK_METHOD,
-    DEFAULT_ENTITY_TYPES
+    DEFAULT_ENTITY_TYPES,
+    DEFAULT_SUMMARY_LANGUAGE,
 )
 from .kg.shared_storage import get_storage_keyed_lock
 import time
@@ -1651,9 +1652,7 @@ async def extract_entities(
 
     ordered_chunks = list(chunks.items())
     # add language and example number params to prompt
-    language = global_config["addon_params"].get(
-        "language", PROMPTS["DEFAULT_LANGUAGE"]
-    )
+    language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE)
     entity_types = global_config["addon_params"].get(
         "entity_types", DEFAULT_ENTITY_TYPES
     )
diff --git a/lightrag/prompt.py b/lightrag/prompt.py
index 69fb2ef3..f8ea6589 100644
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -4,7 +4,6 @@ from typing import Any
 
 PROMPTS: dict[str, Any] = {}
 
-PROMPTS["DEFAULT_LANGUAGE"] = "English"
 PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
 PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
 PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"