refactor: remove file_path and created_at from context, improve token truncation

- Remove file_path and created_at fields from entity and relationship contexts - Update token truncation to include full JSON serialization instead of content only
2025-08-18 18:30:09 +08:00 · 2025-08-18 18:30:09 +08:00 · d3fde60938
commit d3fde60938
parent 1484c4adfa
2 changed files with 13 additions and 7 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -2297,8 +2297,11 @@ async def _build_query_context(
    if entities_context:
        # Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields
        for entity in entities_context:
-            if "file_path" in entity and entity["file_path"]:
+            # remove file_path and created_at
-                entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
+            entity.pop("file_path", None)
            entity.pop("created_at", None)
            # if "file_path" in entity and entity["file_path"]:
            #     entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
        entities_context = truncate_list_by_token_size(
            entities_context,
@ -2311,10 +2314,13 @@ async def _build_query_context(
    if relations_context:
        # Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields
        for relation in relations_context:
-            if "file_path" in relation and relation["file_path"]:
+            # remove file_path and created_at
-                relation["file_path"] = relation["file_path"].replace(
+            relation.pop("file_path", None)
-                    GRAPH_FIELD_SEP, ";"
+            relation.pop("created_at", None)
-                )
+            # if "file_path" in relation and relation["file_path"]:
            #     relation["file_path"] = relation["file_path"].replace(
            #         GRAPH_FIELD_SEP, ";"
            #     )
        relations_context = truncate_list_by_token_size(
            relations_context,
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -1994,7 +1994,7 @@ async def process_chunks_unified(
        unique_chunks = truncate_list_by_token_size(
            unique_chunks,
-            key=lambda x: x.get("content", ""),
+            key=lambda x: json.dumps(x, ensure_ascii=False),
            max_token_size=chunk_token_limit,
            tokenizer=tokenizer,
        )