Import from env and use default if none and removed useless import
This commit is contained in:
parent
7871600d8a
commit
4e740af79b
3 changed files with 7 additions and 4 deletions
|
|
@ -73,6 +73,8 @@ ENABLE_LLM_CACHE=true
|
||||||
# MAX_RELATION_TOKENS=8000
|
# MAX_RELATION_TOKENS=8000
|
||||||
### control the maximum tokens send to LLM (include entities, relations and chunks)
|
### control the maximum tokens send to LLM (include entities, relations and chunks)
|
||||||
# MAX_TOTAL_TOKENS=30000
|
# MAX_TOTAL_TOKENS=30000
|
||||||
|
### control the maximum chunk_ids stored in vector db
|
||||||
|
# MAX_CHUNK_IDS_PER_ENTITY=500
|
||||||
|
|
||||||
### maximum number of related chunks per source entity or relation
|
### maximum number of related chunks per source entity or relation
|
||||||
### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
|
### The chunk picker uses this value to determine the total number of chunks selected from KG(knowledge graph)
|
||||||
|
|
|
||||||
|
|
@ -53,7 +53,6 @@ from .constants import (
|
||||||
DEFAULT_KG_CHUNK_PICK_METHOD,
|
DEFAULT_KG_CHUNK_PICK_METHOD,
|
||||||
DEFAULT_ENTITY_TYPES,
|
DEFAULT_ENTITY_TYPES,
|
||||||
DEFAULT_SUMMARY_LANGUAGE,
|
DEFAULT_SUMMARY_LANGUAGE,
|
||||||
DEFAULT_MAX_CHUNK_IDS_PER_ENTITY,
|
|
||||||
)
|
)
|
||||||
from .kg.shared_storage import get_storage_keyed_lock
|
from .kg.shared_storage import get_storage_keyed_lock
|
||||||
import time
|
import time
|
||||||
|
|
|
||||||
|
|
@ -2469,13 +2469,15 @@ def truncate_entity_source_id(chunk_ids: set, entity_name: str) -> set:
|
||||||
"""Limit chunk_ids, for entities that appear a HUGE no of times (To not break VDB hard upper limits)"""
|
"""Limit chunk_ids, for entities that appear a HUGE no of times (To not break VDB hard upper limits)"""
|
||||||
already_len: int = len(chunk_ids)
|
already_len: int = len(chunk_ids)
|
||||||
|
|
||||||
if already_len >= DEFAULT_MAX_CHUNK_IDS_PER_ENTITY:
|
max_chunk_ids_per_entity = get_env_value("MAX_CHUNK_IDS_PER_ENTITY", DEFAULT_MAX_CHUNK_IDS_PER_ENTITY, int)
|
||||||
|
|
||||||
|
if already_len >= max_chunk_ids_per_entity:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Chunk Ids already exceeds {DEFAULT_MAX_CHUNK_IDS_PER_ENTITY} for {entity_name}, "
|
f"Chunk Ids already exceeds {max_chunk_ids_per_entity } for {entity_name}, "
|
||||||
f"current size: {already_len} entries."
|
f"current size: {already_len} entries."
|
||||||
)
|
)
|
||||||
|
|
||||||
truncated_chunk_ids = set(list(chunk_ids)[0:DEFAULT_MAX_CHUNK_IDS_PER_ENTITY])
|
truncated_chunk_ids = set(list(chunk_ids)[0:max_chunk_ids_per_entity ])
|
||||||
|
|
||||||
return truncated_chunk_ids
|
return truncated_chunk_ids
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue