refactor: remove file_path and created_at from context, improve token truncation

- Remove file_path and created_at fields from entity and relationship contexts
- Update token truncation to include full JSON serialization instead of content only
This commit is contained in:
yangdx 2025-08-18 18:30:09 +08:00
parent 1484c4adfa
commit d3fde60938
2 changed files with 13 additions and 7 deletions

View file

@ -2297,8 +2297,11 @@ async def _build_query_context(
if entities_context: if entities_context:
# Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields # Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields
for entity in entities_context: for entity in entities_context:
if "file_path" in entity and entity["file_path"]: # remove file_path and created_at
entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";") entity.pop("file_path", None)
entity.pop("created_at", None)
# if "file_path" in entity and entity["file_path"]:
# entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
entities_context = truncate_list_by_token_size( entities_context = truncate_list_by_token_size(
entities_context, entities_context,
@ -2311,10 +2314,13 @@ async def _build_query_context(
if relations_context: if relations_context:
# Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields # Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields
for relation in relations_context: for relation in relations_context:
if "file_path" in relation and relation["file_path"]: # remove file_path and created_at
relation["file_path"] = relation["file_path"].replace( relation.pop("file_path", None)
GRAPH_FIELD_SEP, ";" relation.pop("created_at", None)
) # if "file_path" in relation and relation["file_path"]:
# relation["file_path"] = relation["file_path"].replace(
# GRAPH_FIELD_SEP, ";"
# )
relations_context = truncate_list_by_token_size( relations_context = truncate_list_by_token_size(
relations_context, relations_context,

View file

@ -1994,7 +1994,7 @@ async def process_chunks_unified(
unique_chunks = truncate_list_by_token_size( unique_chunks = truncate_list_by_token_size(
unique_chunks, unique_chunks,
key=lambda x: x.get("content", ""), key=lambda x: json.dumps(x, ensure_ascii=False),
max_token_size=chunk_token_limit, max_token_size=chunk_token_limit,
tokenizer=tokenizer, tokenizer=tokenizer,
) )