refactor: remove file_path and created_at from context, improve token truncation
- Remove file_path and created_at fields from entity and relationship contexts - Update token truncation to include full JSON serialization instead of content only
This commit is contained in:
parent
1484c4adfa
commit
d3fde60938
2 changed files with 13 additions and 7 deletions
|
|
@ -2297,8 +2297,11 @@ async def _build_query_context(
|
|||
if entities_context:
|
||||
# Process entities context to replace GRAPH_FIELD_SEP with : in file_path fields
|
||||
for entity in entities_context:
|
||||
if "file_path" in entity and entity["file_path"]:
|
||||
entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
|
||||
# remove file_path and created_at
|
||||
entity.pop("file_path", None)
|
||||
entity.pop("created_at", None)
|
||||
# if "file_path" in entity and entity["file_path"]:
|
||||
# entity["file_path"] = entity["file_path"].replace(GRAPH_FIELD_SEP, ";")
|
||||
|
||||
entities_context = truncate_list_by_token_size(
|
||||
entities_context,
|
||||
|
|
@ -2311,10 +2314,13 @@ async def _build_query_context(
|
|||
if relations_context:
|
||||
# Process relations context to replace GRAPH_FIELD_SEP with : in file_path fields
|
||||
for relation in relations_context:
|
||||
if "file_path" in relation and relation["file_path"]:
|
||||
relation["file_path"] = relation["file_path"].replace(
|
||||
GRAPH_FIELD_SEP, ";"
|
||||
)
|
||||
# remove file_path and created_at
|
||||
relation.pop("file_path", None)
|
||||
relation.pop("created_at", None)
|
||||
# if "file_path" in relation and relation["file_path"]:
|
||||
# relation["file_path"] = relation["file_path"].replace(
|
||||
# GRAPH_FIELD_SEP, ";"
|
||||
# )
|
||||
|
||||
relations_context = truncate_list_by_token_size(
|
||||
relations_context,
|
||||
|
|
|
|||
|
|
@ -1994,7 +1994,7 @@ async def process_chunks_unified(
|
|||
|
||||
unique_chunks = truncate_list_by_token_size(
|
||||
unique_chunks,
|
||||
key=lambda x: x.get("content", ""),
|
||||
key=lambda x: json.dumps(x, ensure_ascii=False),
|
||||
max_token_size=chunk_token_limit,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue