From d3fde60938c7e14f5e5dff07d4fbd4196e86cbce Mon Sep 17 00:00:00 2001
From: yangdx <gzdaniel@me.com>
Date: Mon, 18 Aug 2025 18:30:09 +0800
Subject: [PATCH] refactor: remove file_path and created_at from context,
 improve token truncation

- Remove file_path and created_at fields from entity and relationship contexts
- Update token truncation to include full JSON serialization instead of content only
---
 lightrag/operate.py | 18 ++++++++++++------
 lightrag/utils.py   |  2 +-
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index acb75f0f..d1ee8233 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -2297,8 +2297,11 @@ async def _build_query_context(
     if entities_context:
         # Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields
         for entity in entities_context:
-            if "file_path" in entity and entity["file_path"]:
-                entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
+            # remove file_path and created_at
+            entity.pop("file_path", None)
+            entity.pop("created_at", None)
+            # if "file_path" in entity and entity["file_path"]:
+            #     entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
 
         entities_context = truncate_list_by_token_size(
             entities_context,
@@ -2311,10 +2314,13 @@ async def _build_query_context(
     if relations_context:
         # Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields
         for relation in relations_context:
-            if "file_path" in relation and relation["file_path"]:
-                relation["file_path"] = relation["file_path"].replace(
-                    GRAPH_FIELD_SEP, ";"
-                )
+            # remove file_path and created_at
+            relation.pop("file_path", None)
+            relation.pop("created_at", None)
+            # if "file_path" in relation and relation["file_path"]:
+            #     relation["file_path"] = relation["file_path"].replace(
+            #         GRAPH_FIELD_SEP, ";"
+            #     )
 
         relations_context = truncate_list_by_token_size(
             relations_context,
diff --git a/lightrag/utils.py b/lightrag/utils.py
index 5052897b..a9950f9b 100644
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -1994,7 +1994,7 @@ async def process_chunks_unified(
 
         unique_chunks = truncate_list_by_token_size(
             unique_chunks,
-            key=lambda x: x.get("content", ""),
+            key=lambda x: json.dumps(x, ensure_ascii=False),
             max_token_size=chunk_token_limit,
             tokenizer=tokenizer,
         )