Merge branch 'entityTypesServerSupport'

This commit is contained in:
yangdx 2025-08-27 12:23:58 +08:00
commit 0be4f0144b
8 changed files with 40 additions and 13 deletions

View file

@ -119,9 +119,14 @@ RERANK_BINDING=null
########################################
### Document processing configuration
########################################
### Language: English, Chinese, French, German ...
SUMMARY_LANGUAGE=English
ENABLE_LLM_CACHE_FOR_EXTRACT=true
### Document processing outpu language: English, Chinese, French, German ...
SUMMARY_LANGUAGE=English
### Entity types that the LLM will attempt to recognize
# ENTITY_TYPES=["person", "organization", "location", "event", "concept"]
### Chunk size for document splitting, 500~1500 is recommended
# CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100

View file

@ -38,6 +38,7 @@ from lightrag.constants import (
DEFAULT_OLLAMA_MODEL_NAME,
DEFAULT_OLLAMA_MODEL_TAG,
DEFAULT_RERANK_BINDING,
DEFAULT_ENTITY_TYPES,
)
# use the .env that is inside the current folder
@ -351,6 +352,7 @@ def parse_args() -> argparse.Namespace:
# Add environment variables that were previously read directly
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
args.summary_language = get_env_value("SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE)
args.entity_types = get_env_value("ENTITY_TYPES", DEFAULT_ENTITY_TYPES)
args.whitelist_paths = get_env_value("WHITELIST_PATHS", "/health,/api/*")
# For JWT Auth

View file

@ -499,7 +499,10 @@ def create_app(args):
rerank_model_func=rerank_model_func,
max_parallel_insert=args.max_parallel_insert,
max_graph_nodes=args.max_graph_nodes,
addon_params={"language": args.summary_language},
addon_params={
"language": args.summary_language,
"entity_types": args.entity_types,
},
ollama_server_infos=ollama_server_infos,
)
else: # azure_openai
@ -526,7 +529,10 @@ def create_app(args):
rerank_model_func=rerank_model_func,
max_parallel_insert=args.max_parallel_insert,
max_graph_nodes=args.max_graph_nodes,
addon_params={"language": args.summary_language},
addon_params={
"language": args.summary_language,
"entity_types": args.entity_types,
},
ollama_server_infos=ollama_server_infos,
)

View file

@ -264,6 +264,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.magenta("\n⚙️ RAG Configuration:")
ASCIIColors.white(" ├─ Summary Language: ", end="")
ASCIIColors.yellow(f"{args.summary_language}")
ASCIIColors.white(" ├─ Entity Types: ", end="")
ASCIIColors.yellow(f"{args.entity_types}")
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
ASCIIColors.yellow(f"{args.max_parallel_insert}")
ASCIIColors.white(" ├─ Chunk Size: ", end="")

View file

@ -11,7 +11,7 @@ DEFAULT_WOKERS = 2
DEFAULT_MAX_GRAPH_NODES = 1000
# Default values for extraction settings
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for document processing
DEFAULT_MAX_GLEANING = 1
# Number of description fragments to trigger LLM summary
@ -22,6 +22,16 @@ DEFAULT_SUMMARY_MAX_TOKENS = 1200
DEFAULT_SUMMARY_LENGTH_RECOMMENDED = 600
# Maximum token size sent to LLM for summary
DEFAULT_SUMMARY_CONTEXT_SIZE = 12000
# Default entities to extract if ENTITY_TYPES is not specified in .env
DEFAULT_ENTITY_TYPES = [
"organization",
"person",
"geo",
"event",
"category",
"Equipment",
"Location",
]
# Separator for graph fields
GRAPH_FIELD_SEP = "<SEP>"

View file

@ -39,6 +39,8 @@ from lightrag.constants import (
DEFAULT_MAX_ASYNC,
DEFAULT_MAX_PARALLEL_INSERT,
DEFAULT_MAX_GRAPH_NODES,
DEFAULT_ENTITY_TYPES,
DEFAULT_SUMMARY_LANGUAGE,
)
from lightrag.utils import get_env_value
@ -347,7 +349,10 @@ class LightRAG:
addon_params: dict[str, Any] = field(
default_factory=lambda: {
"language": get_env_value("SUMMARY_LANGUAGE", "English", str)
"language": get_env_value(
"SUMMARY_LANGUAGE", DEFAULT_SUMMARY_LANGUAGE, str
),
"entity_types": get_env_value("ENTITY_TYPES", DEFAULT_ENTITY_TYPES, list),
}
)

View file

@ -47,6 +47,8 @@ from .constants import (
DEFAULT_MAX_TOTAL_TOKENS,
DEFAULT_RELATED_CHUNK_NUMBER,
DEFAULT_KG_CHUNK_PICK_METHOD,
DEFAULT_ENTITY_TYPES,
DEFAULT_SUMMARY_LANGUAGE,
)
from .kg.shared_storage import get_storage_keyed_lock
import time
@ -1650,11 +1652,9 @@ async def extract_entities(
ordered_chunks = list(chunks.items())
# add language and example number params to prompt
language = global_config["addon_params"].get(
"language", PROMPTS["DEFAULT_LANGUAGE"]
)
language = global_config["addon_params"].get("language", DEFAULT_SUMMARY_LANGUAGE)
entity_types = global_config["addon_params"].get(
"entity_types", PROMPTS["DEFAULT_ENTITY_TYPES"]
"entity_types", DEFAULT_ENTITY_TYPES
)
example_number = global_config["addon_params"].get("example_number", None)
if example_number and example_number < len(PROMPTS["entity_extraction_examples"]):

View file

@ -4,13 +4,10 @@ from typing import Any
PROMPTS: dict[str, Any] = {}
PROMPTS["DEFAULT_LANGUAGE"] = "English"
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
PROMPTS["DEFAULT_USER_PROMPT"] = "n/a"
PROMPTS["entity_extraction"] = """---Goal---