Add DEFAULT_RELATED_CHUNK_NUMBER
This commit is contained in:
parent
42f1fd60f4
commit
1541034816
3 changed files with 32 additions and 28 deletions
|
|
@ -28,3 +28,6 @@ GRAPH_FIELD_SEP = "<SEP>"
|
||||||
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
||||||
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
||||||
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename
|
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename
|
||||||
|
|
||||||
|
# Related Chunk Number for Single Entity or Relation
|
||||||
|
DEFAULT_RELATED_CHUNK_NUMBER = 5
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ from .constants import (
|
||||||
DEFAULT_MAX_ENTITY_TOKENS,
|
DEFAULT_MAX_ENTITY_TOKENS,
|
||||||
DEFAULT_MAX_RELATION_TOKENS,
|
DEFAULT_MAX_RELATION_TOKENS,
|
||||||
DEFAULT_MAX_TOTAL_TOKENS,
|
DEFAULT_MAX_TOTAL_TOKENS,
|
||||||
|
DEFAULT_RELATED_CHUNK_NUMBER,
|
||||||
)
|
)
|
||||||
from .kg.shared_storage import get_storage_keyed_lock
|
from .kg.shared_storage import get_storage_keyed_lock
|
||||||
import time
|
import time
|
||||||
|
|
@ -2052,34 +2053,28 @@ async def _build_query_context(
|
||||||
|
|
||||||
# Create filtered data based on truncated context
|
# Create filtered data based on truncated context
|
||||||
final_node_datas = []
|
final_node_datas = []
|
||||||
final_edge_datas = []
|
|
||||||
|
|
||||||
if entities_context and original_node_datas:
|
if entities_context and original_node_datas:
|
||||||
# Create a set of entity names from final truncated context
|
final_entity_names = {e["entity"] for e in entities_context}
|
||||||
final_entity_names = {entity["entity"] for entity in entities_context}
|
seen_nodes = set()
|
||||||
# Filter original node data based on final entities
|
for node in original_node_datas:
|
||||||
final_node_datas = [
|
name = node.get("entity_name")
|
||||||
node
|
if name in final_entity_names and name not in seen_nodes:
|
||||||
for node in original_node_datas
|
final_node_datas.append(node)
|
||||||
if node.get("entity_name") in final_entity_names
|
seen_nodes.add(name)
|
||||||
]
|
|
||||||
|
|
||||||
|
final_edge_datas = []
|
||||||
if relations_context and original_edge_datas:
|
if relations_context and original_edge_datas:
|
||||||
# Create a set of relation pairs from final truncated context
|
final_relation_pairs = {(r["entity1"], r["entity2"]) for r in relations_context}
|
||||||
final_relation_pairs = {
|
seen_edges = set()
|
||||||
(rel["entity1"], rel["entity2"]) for rel in relations_context
|
for edge in original_edge_datas:
|
||||||
}
|
src, tgt = edge.get("src_id"), edge.get("tgt_id")
|
||||||
# Filter original edge data based on final relations
|
if src is None or tgt is None:
|
||||||
final_edge_datas = [
|
src, tgt = edge.get("src_tgt", (None, None))
|
||||||
edge
|
|
||||||
for edge in original_edge_datas
|
pair = (src, tgt)
|
||||||
if (edge.get("src_id"), edge.get("tgt_id")) in final_relation_pairs
|
if pair in final_relation_pairs and pair not in seen_edges:
|
||||||
or (
|
final_edge_datas.append(edge)
|
||||||
edge.get("src_tgt", (None, None))[0],
|
seen_edges.add(pair)
|
||||||
edge.get("src_tgt", (None, None))[1],
|
|
||||||
)
|
|
||||||
in final_relation_pairs
|
|
||||||
]
|
|
||||||
|
|
||||||
# Get text chunks based on final filtered data
|
# Get text chunks based on final filtered data
|
||||||
text_chunk_tasks = []
|
text_chunk_tasks = []
|
||||||
|
|
@ -2370,7 +2365,9 @@ async def _find_most_related_text_unit_from_entities(
|
||||||
logger.debug(f"Searching text chunks for {len(node_datas)} entities")
|
logger.debug(f"Searching text chunks for {len(node_datas)} entities")
|
||||||
|
|
||||||
text_units = [
|
text_units = [
|
||||||
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
|
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
|
||||||
|
:DEFAULT_RELATED_CHUNK_NUMBER
|
||||||
|
]
|
||||||
for dp in node_datas
|
for dp in node_datas
|
||||||
if dp["source_id"] is not None
|
if dp["source_id"] is not None
|
||||||
]
|
]
|
||||||
|
|
@ -2684,7 +2681,9 @@ async def _find_related_text_unit_from_relationships(
|
||||||
logger.debug(f"Searching text chunks for {len(edge_datas)} relationships")
|
logger.debug(f"Searching text chunks for {len(edge_datas)} relationships")
|
||||||
|
|
||||||
text_units = [
|
text_units = [
|
||||||
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
|
split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])[
|
||||||
|
:DEFAULT_RELATED_CHUNK_NUMBER
|
||||||
|
]
|
||||||
for dp in edge_datas
|
for dp in edge_datas
|
||||||
if dp["source_id"] is not None
|
if dp["source_id"] is not None
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -795,7 +795,9 @@ def process_combine_contexts(*context_lists):
|
||||||
if not context_list: # Skip empty lists
|
if not context_list: # Skip empty lists
|
||||||
continue
|
continue
|
||||||
for item in context_list:
|
for item in context_list:
|
||||||
content_dict = {k: v for k, v in item.items() if k != "id"}
|
content_dict = {
|
||||||
|
k: v for k, v in item.items() if k != "id" and k != "created_at"
|
||||||
|
}
|
||||||
content_key = tuple(sorted(content_dict.items()))
|
content_key = tuple(sorted(content_dict.items()))
|
||||||
if content_key not in seen_content:
|
if content_key not in seen_content:
|
||||||
seen_content[content_key] = item
|
seen_content[content_key] = item
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue