Import from env and use default if none and removed useless import

2025-10-14 16:14:03 +05:00 · 2025-10-14 16:14:03 +05:00 · 4e740af79b
commit 4e740af79b
parent 7871600d8a
3 changed files with 7 additions and 4 deletions
--- a/env.example
+++ b/env.example
@ -73,6 +73,8 @@ ENABLE_LLM_CACHE=true
 # MAX_RELATION_TOKENS=8000
 ### control the maximum tokens send to LLM (include entities, relations and chunks)
 # MAX_TOTAL_TOKENS=30000
+### control the maximum chunk_ids stored in vector db
+# MAX_CHUNK_IDS_PER_ENTITY=500

 ### maximum number of related chunks per source entity or relation
 ###     The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -53,7 +53,6 @@ from .constants import (
    DEFAULT_KG_CHUNK_PICK_METHOD,
    DEFAULT_ENTITY_TYPES,
    DEFAULT_SUMMARY_LANGUAGE,
-    DEFAULT_MAX_CHUNK_IDS_PER_ENTITY,
 )
 from .kg.shared_storage import get_storage_keyed_lock
 import time
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -2469,13 +2469,15 @@ def truncate_entity_source_id(chunk_ids: set, entity_name: str) -> set:
    """Limit chunk_ids, for entities that appear a HUGE no of times (To not break VDB hard upper limits)"""
    already_len: int = len(chunk_ids)

-    if already_len >= DEFAULT_MAX_CHUNK_IDS_PER_ENTITY:
+    max_chunk_ids_per_entity = get_env_value("MAX_CHUNK_IDS_PER_ENTITY", DEFAULT_MAX_CHUNK_IDS_PER_ENTITY, int)
+
+    if already_len >= max_chunk_ids_per_entity:
        logger.warning(
-            f"Chunk Ids already exceeds {DEFAULT_MAX_CHUNK_IDS_PER_ENTITY} for {entity_name}, "
+            f"Chunk Ids already exceeds {max_chunk_ids_per_entity } for {entity_name}, "
            f"current size: {already_len} entries."
        )
    
-    truncated_chunk_ids = set(list(chunk_ids)[0:DEFAULT_MAX_CHUNK_IDS_PER_ENTITY])
+    truncated_chunk_ids = set(list(chunk_ids)[0:max_chunk_ids_per_entity ])

    return truncated_chunk_ids