Add DEFAULT_RELATED_CHUNK_NUMBER

This commit is contained in:
zrguo 2025-07-15 21:35:12 +08:00
parent 42f1fd60f4
commit 1541034816
3 changed files with 32 additions and 28 deletions

View file

@ -28,3 +28,6 @@ GRAPH_FIELD_SEP = "<SEP>"
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename
# Related Chunk Number for Single Entity or Relation
DEFAULT_RELATED_CHUNK_NUMBER = 5

View file

@ -41,6 +41,7 @@ from .constants import (
DEFAULT_MAX_ENTITY_TOKENS, DEFAULT_MAX_ENTITY_TOKENS,
DEFAULT_MAX_RELATION_TOKENS, DEFAULT_MAX_RELATION_TOKENS,
DEFAULT_MAX_TOTAL_TOKENS, DEFAULT_MAX_TOTAL_TOKENS,
DEFAULT_RELATED_CHUNK_NUMBER,
) )
from .kg.shared_storage import get_storage_keyed_lock from .kg.shared_storage import get_storage_keyed_lock
import time import time
@ -2052,34 +2053,28 @@ async def _build_query_context(
# Create filtered data based on truncated context # Create filtered data based on truncated context
final_node_datas = [] final_node_datas = []
final_edge_datas = []
if entities_context and original_node_datas: if entities_context and original_node_datas:
# Create a set of entity names from final truncated context final_entity_names = {e["entity"] for e in entities_context}
final_entity_names = {entity["entity"] for entity in entities_context} seen_nodes = set()
# Filter original node data based on final entities for node in original_node_datas:
final_node_datas = [ name = node.get("entity_name")
node if name in final_entity_names and name not in seen_nodes:
for node in original_node_datas final_node_datas.append(node)
if node.get("entity_name") in final_entity_names seen_nodes.add(name)
]
final_edge_datas = []
if relations_context and original_edge_datas: if relations_context and original_edge_datas:
# Create a set of relation pairs from final truncated context final_relation_pairs = {(r["entity1"], r["entity2"]) for r in relations_context}
final_relation_pairs = { seen_edges = set()
(rel["entity1"], rel["entity2"]) for rel in relations_context for edge in original_edge_datas:
} src, tgt = edge.get("src_id"), edge.get("tgt_id")
# Filter original edge data based on final relations if src is None or tgt is None:
final_edge_datas = [ src, tgt = edge.get("src_tgt", (None, None))
edge
for edge in original_edge_datas pair = (src, tgt)
if (edge.get("src_id"), edge.get("tgt_id")) in final_relation_pairs if pair in final_relation_pairs and pair not in seen_edges:
or ( final_edge_datas.append(edge)
edge.get("src_tgt", (None, None))[0], seen_edges.add(pair)
edge.get("src_tgt", (None, None))[1],
)
in final_relation_pairs
]
# Get text chunks based on final filtered data # Get text chunks based on final filtered data
text_chunk_tasks = [] text_chunk_tasks = []
@ -2370,7 +2365,9 @@ async def _find_most_related_text_unit_from_entities(
logger.debug(f"Searching text chunks for {len(node_datas)} entities") logger.debug(f"Searching text chunks for {len(node_datas)} entities")
text_units = [ text_units = [
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP]) split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
:DEFAULT_RELATED_CHUNK_NUMBER
]
for dp in node_datas for dp in node_datas
if dp["source_id"] is not None if dp["source_id"] is not None
] ]
@ -2684,7 +2681,9 @@ async def _find_related_text_unit_from_relationships(
logger.debug(f"Searching text chunks for {len(edge_datas)} relationships") logger.debug(f"Searching text chunks for {len(edge_datas)} relationships")
text_units = [ text_units = [
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP]) split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
:DEFAULT_RELATED_CHUNK_NUMBER
]
for dp in edge_datas for dp in edge_datas
if dp["source_id"] is not None if dp["source_id"] is not None
] ]

View file

@ -795,7 +795,9 @@ def process_combine_contexts(*context_lists):
if not context_list: # Skip empty lists if not context_list: # Skip empty lists
continue continue
for item in context_list: for item in context_list:
content_dict = {k: v for k, v in item.items() if k != "id"} content_dict = {
k: v for k, v in item.items() if k != "id" and k != "created_at"
}
content_key = tuple(sorted(content_dict.items())) content_key = tuple(sorted(content_dict.items()))
if content_key not in seen_content: if content_key not in seen_content:
seen_content[content_key] = item seen_content[content_key] = item