Add DEFAULT_RELATED_CHUNK_NUMBER
This commit is contained in:
parent
42f1fd60f4
commit
1541034816
3 changed files with 32 additions and 28 deletions
|
|
@ -28,3 +28,6 @@ GRAPH_FIELD_SEP = "<SEP>"
|
|||
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
||||
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
||||
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename
|
||||
|
||||
# Related Chunk Number for Single Entity or Relation
|
||||
DEFAULT_RELATED_CHUNK_NUMBER = 5
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ from .constants import (
|
|||
DEFAULT_MAX_ENTITY_TOKENS,
|
||||
DEFAULT_MAX_RELATION_TOKENS,
|
||||
DEFAULT_MAX_TOTAL_TOKENS,
|
||||
DEFAULT_RELATED_CHUNK_NUMBER,
|
||||
)
|
||||
from .kg.shared_storage import get_storage_keyed_lock
|
||||
import time
|
||||
|
|
@ -2052,34 +2053,28 @@ async def _build_query_context(
|
|||
|
||||
# Create filtered data based on truncated context
|
||||
final_node_datas = []
|
||||
final_edge_datas = []
|
||||
|
||||
if entities_context and original_node_datas:
|
||||
# Create a set of entity names from final truncated context
|
||||
final_entity_names = {entity["entity"] for entity in entities_context}
|
||||
# Filter original node data based on final entities
|
||||
final_node_datas = [
|
||||
node
|
||||
for node in original_node_datas
|
||||
if node.get("entity_name") in final_entity_names
|
||||
]
|
||||
final_entity_names = {e["entity"] for e in entities_context}
|
||||
seen_nodes = set()
|
||||
for node in original_node_datas:
|
||||
name = node.get("entity_name")
|
||||
if name in final_entity_names and name not in seen_nodes:
|
||||
final_node_datas.append(node)
|
||||
seen_nodes.add(name)
|
||||
|
||||
final_edge_datas = []
|
||||
if relations_context and original_edge_datas:
|
||||
# Create a set of relation pairs from final truncated context
|
||||
final_relation_pairs = {
|
||||
(rel["entity1"], rel["entity2"]) for rel in relations_context
|
||||
}
|
||||
# Filter original edge data based on final relations
|
||||
final_edge_datas = [
|
||||
edge
|
||||
for edge in original_edge_datas
|
||||
if (edge.get("src_id"), edge.get("tgt_id")) in final_relation_pairs
|
||||
or (
|
||||
edge.get("src_tgt", (None, None))[0],
|
||||
edge.get("src_tgt", (None, None))[1],
|
||||
)
|
||||
in final_relation_pairs
|
||||
]
|
||||
final_relation_pairs = {(r["entity1"], r["entity2"]) for r in relations_context}
|
||||
seen_edges = set()
|
||||
for edge in original_edge_datas:
|
||||
src, tgt = edge.get("src_id"), edge.get("tgt_id")
|
||||
if src is None or tgt is None:
|
||||
src, tgt = edge.get("src_tgt", (None, None))
|
||||
|
||||
pair = (src, tgt)
|
||||
if pair in final_relation_pairs and pair not in seen_edges:
|
||||
final_edge_datas.append(edge)
|
||||
seen_edges.add(pair)
|
||||
|
||||
# Get text chunks based on final filtered data
|
||||
text_chunk_tasks = []
|
||||
|
|
@ -2370,7 +2365,9 @@ async def _find_most_related_text_unit_from_entities(
|
|||
logger.debug(f"Searching text chunks for {len(node_datas)} entities")
|
||||
|
||||
text_units = [
|
||||
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
|
||||
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
|
||||
:DEFAULT_RELATED_CHUNK_NUMBER
|
||||
]
|
||||
for dp in node_datas
|
||||
if dp["source_id"] is not None
|
||||
]
|
||||
|
|
@ -2684,7 +2681,9 @@ async def _find_related_text_unit_from_relationships(
|
|||
logger.debug(f"Searching text chunks for {len(edge_datas)} relationships")
|
||||
|
||||
text_units = [
|
||||
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
|
||||
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
|
||||
:DEFAULT_RELATED_CHUNK_NUMBER
|
||||
]
|
||||
for dp in edge_datas
|
||||
if dp["source_id"] is not None
|
||||
]
|
||||
|
|
|
|||
|
|
@ -795,7 +795,9 @@ def process_combine_contexts(*context_lists):
|
|||
if not context_list: # Skip empty lists
|
||||
continue
|
||||
for item in context_list:
|
||||
content_dict = {k: v for k, v in item.items() if k != "id"}
|
||||
content_dict = {
|
||||
k: v for k, v in item.items() if k != "id" and k != "created_at"
|
||||
}
|
||||
content_key = tuple(sorted(content_dict.items()))
|
||||
if content_key not in seen_content:
|
||||
seen_content[content_key] = item
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue