refactor: remove file_path and created_at from context, improve token truncation

- Remove file_path and created_at fields from entity and relationship contexts
- Update token truncation to include full JSON serialization instead of content only
This commit is contained in:
yangdx 2025-08-18 18:30:09 +08:00
parent 1484c4adfa
commit d3fde60938
2 changed files with 13 additions and 7 deletions

View file

@ -2297,8 +2297,11 @@ async def _build_query_context(
if entities_context:
# Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields
for entity in entities_context:
if "file_path" in entity and entity["file_path"]:
entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
# remove file_path and created_at
entity.pop("file_path", None)
entity.pop("created_at", None)
# if "file_path" in entity and entity["file_path"]:
# entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
entities_context = truncate_list_by_token_size(
entities_context,
@ -2311,10 +2314,13 @@ async def _build_query_context(
if relations_context:
# Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields
for relation in relations_context:
if "file_path" in relation and relation["file_path"]:
relation["file_path"] = relation["file_path"].replace(
GRAPH_FIELD_SEP, ";"
)
# remove file_path and created_at
relation.pop("file_path", None)
relation.pop("created_at", None)
# if "file_path" in relation and relation["file_path"]:
# relation["file_path"] = relation["file_path"].replace(
# GRAPH_FIELD_SEP, ";"
# )
relations_context = truncate_list_by_token_size(
relations_context,

View file

@ -1994,7 +1994,7 @@ async def process_chunks_unified(
unique_chunks = truncate_list_by_token_size(
unique_chunks,
key=lambda x: x.get("content", ""),
key=lambda x: json.dumps(x, ensure_ascii=False),
max_token_size=chunk_token_limit,
tokenizer=tokenizer,
)