From d3fde60938c7e14f5e5dff07d4fbd4196e86cbce Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 18 Aug 2025 18:30:09 +0800 Subject: [PATCH] refactor: remove file_path and created_at from context, improve token truncation - Remove file_path and created_at fields from entity and relationship contexts - Update token truncation to include full JSON serialization instead of content only --- lightrag/operate.py | 18 ++++++++++++------ lightrag/utils.py | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index acb75f0f..d1ee8233 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2297,8 +2297,11 @@ async def _build_query_context( if entities_context: # Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields for entity in entities_context: - if "file_path" in entity and entity["file_path"]: - entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";") + # remove file_path and created_at + entity.pop("file_path", None) + entity.pop("created_at", None) + # if "file_path" in entity and entity["file_path"]: + # entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";") entities_context = truncate_list_by_token_size( entities_context, @@ -2311,10 +2314,13 @@ async def _build_query_context( if relations_context: # Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields for relation in relations_context: - if "file_path" in relation and relation["file_path"]: - relation["file_path"] = relation["file_path"].replace( - GRAPH_FIELD_SEP, ";" - ) + # remove file_path and created_at + relation.pop("file_path", None) + relation.pop("created_at", None) + # if "file_path" in relation and relation["file_path"]: + # relation["file_path"] = relation["file_path"].replace( + # GRAPH_FIELD_SEP, ";" + # ) relations_context = truncate_list_by_token_size( relations_context, diff --git a/lightrag/utils.py b/lightrag/utils.py index 5052897b..a9950f9b 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -1994,7 +1994,7 @@ async def process_chunks_unified( unique_chunks = truncate_list_by_token_size( unique_chunks, - key=lambda x: x.get("content", ""), + key=lambda x: json.dumps(x, ensure_ascii=False), max_token_size=chunk_token_limit, tokenizer=tokenizer, )