Code formatting improvements and style consistency fixes

* Remove trailing whitespace
* Fix function signature ellipsis style
This commit is contained in:
yangdx 2025-09-14 17:49:02 +08:00
parent 87bb8a023b
commit 82a67354d0
3 changed files with 82 additions and 72 deletions

View file

@ -2122,7 +2122,7 @@ class LightRAG:
) -> dict[str, Any]: ) -> dict[str, Any]:
""" """
Asynchronous data retrieval API: returns structured retrieval results without LLM generation. Asynchronous data retrieval API: returns structured retrieval results without LLM generation.
This function reuses the same logic as aquery but stops before LLM generation, This function reuses the same logic as aquery but stops before LLM generation,
returning the final processed entities, relationships, and chunks data that would be sent to LLM. returning the final processed entities, relationships, and chunks data that would be sent to LLM.
@ -2167,12 +2167,12 @@ class LightRAG:
"chunks": [], "chunks": [],
"metadata": { "metadata": {
"query_mode": "bypass", "query_mode": "bypass",
"keywords": {"high_level": [], "low_level": []} "keywords": {"high_level": [], "low_level": []},
} },
} }
else: else:
raise ValueError(f"Unknown mode {param.mode}") raise ValueError(f"Unknown mode {param.mode}")
await self._query_done() await self._query_done()
return final_data return final_data

View file

@ -2241,8 +2241,7 @@ async def kg_query(
system_prompt: str | None = None, system_prompt: str | None = None,
chunks_vdb: BaseVectorStorage = None, chunks_vdb: BaseVectorStorage = None,
return_raw_data: Literal[True] = False, return_raw_data: Literal[True] = False,
) -> dict[str, Any]: ) -> dict[str, Any]: ...
...
@overload @overload
@ -2258,8 +2257,7 @@ async def kg_query(
system_prompt: str | None = None, system_prompt: str | None = None,
chunks_vdb: BaseVectorStorage = None, chunks_vdb: BaseVectorStorage = None,
return_raw_data: Literal[False] = False, return_raw_data: Literal[False] = False,
) -> str | AsyncIterator[str]: ) -> str | AsyncIterator[str]: ...
...
async def kg_query( async def kg_query(
@ -3281,14 +3279,11 @@ async def _build_llm_context(
``` ```
""" """
# If final data is requested, return both context and complete data structure # If final data is requested, return both context and complete data structure
if return_final_data: if return_final_data:
final_data = _convert_to_user_format( final_data = _convert_to_user_format(
entities_context, entities_context, relations_context, truncated_chunks, query_param.mode
relations_context,
truncated_chunks,
query_param.mode
) )
return result, final_data return result, final_data
else: else:
@ -3378,34 +3373,42 @@ async def _build_query_context(
chunk_tracking=search_result["chunk_tracking"], chunk_tracking=search_result["chunk_tracking"],
return_final_data=True, return_final_data=True,
) )
if isinstance(context_result, tuple): if isinstance(context_result, tuple):
context, final_chunks = context_result context, final_chunks = context_result
else: else:
# Handle case where no final chunks are returned # Handle case where no final chunks are returned
context = context_result context = context_result
final_chunks = merged_chunks final_chunks = merged_chunks
# Build raw data structure with the same data that goes to LLM # Build raw data structure with the same data that goes to LLM
raw_data = { raw_data = {
"entities": truncation_result["filtered_entities"], # Use filtered entities (same as LLM) "entities": truncation_result[
"relationships": truncation_result["filtered_relations"], # Use filtered relations (same as LLM) "filtered_entities"
], # Use filtered entities (same as LLM)
"relationships": truncation_result[
"filtered_relations"
], # Use filtered relations (same as LLM)
"chunks": final_chunks, # Use final processed chunks (same as LLM) "chunks": final_chunks, # Use final processed chunks (same as LLM)
"metadata": { "metadata": {
"query_mode": query_param.mode, "query_mode": query_param.mode,
"keywords": { "keywords": {
"high_level": hl_keywords.split(", ") if hl_keywords else [], "high_level": hl_keywords.split(", ") if hl_keywords else [],
"low_level": ll_keywords.split(", ") if ll_keywords else [] "low_level": ll_keywords.split(", ") if ll_keywords else [],
}, },
"processing_info": { "processing_info": {
"total_entities_found": len(search_result["final_entities"]), "total_entities_found": len(search_result["final_entities"]),
"total_relations_found": len(search_result["final_relations"]), "total_relations_found": len(search_result["final_relations"]),
"entities_after_truncation": len(truncation_result["filtered_entities"]), "entities_after_truncation": len(
"relations_after_truncation": len(truncation_result["filtered_relations"]), truncation_result["filtered_entities"]
),
"relations_after_truncation": len(
truncation_result["filtered_relations"]
),
"merged_chunks_count": len(merged_chunks), "merged_chunks_count": len(merged_chunks),
"final_chunks_count": len(final_chunks) "final_chunks_count": len(final_chunks),
} },
} },
} }
return context, raw_data return context, raw_data
else: else:
@ -4003,8 +4006,8 @@ async def naive_query(
hashing_kv: BaseKVStorage | None = None, hashing_kv: BaseKVStorage | None = None,
system_prompt: str | None = None, system_prompt: str | None = None,
return_raw_data: Literal[True] = True, return_raw_data: Literal[True] = True,
) -> dict[str, Any]: ) -> dict[str, Any]: ...
...
@overload @overload
async def naive_query( async def naive_query(
@ -4015,8 +4018,8 @@ async def naive_query(
hashing_kv: BaseKVStorage | None = None, hashing_kv: BaseKVStorage | None = None,
system_prompt: str | None = None, system_prompt: str | None = None,
return_raw_data: Literal[False] = False, return_raw_data: Literal[False] = False,
) -> str | AsyncIterator[str]: ) -> str | AsyncIterator[str]: ...
...
async def naive_query( async def naive_query(
query: str, query: str,
@ -4069,14 +4072,14 @@ async def naive_query(
"chunks": [], "chunks": [],
"metadata": { "metadata": {
"query_mode": "naive", "query_mode": "naive",
"keywords": {"high_level": [], "low_level": []} "keywords": {"high_level": [], "low_level": []},
} },
} }
# If only raw data is requested, return it directly # If only raw data is requested, return it directly
if return_raw_data: if return_raw_data:
return empty_raw_data return empty_raw_data
return PROMPTS["fail_response"] return PROMPTS["fail_response"]
# Calculate dynamic token limit for chunks # Calculate dynamic token limit for chunks
@ -4143,8 +4146,8 @@ async def naive_query(
"chunks": processed_chunks, # Use processed chunks (same as LLM) "chunks": processed_chunks, # Use processed chunks (same as LLM)
"metadata": { "metadata": {
"query_mode": "naive", "query_mode": "naive",
"keywords": {"high_level": [], "low_level": []} "keywords": {"high_level": [], "low_level": []},
} },
} }
return raw_data return raw_data

View file

@ -2626,7 +2626,7 @@ def fix_tuple_delimiter_corruption(
) )
# Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >) # Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >)
#
record = re.sub( record = re.sub(
rf"<\|{escaped_delimiter_core}\|+(?!>)", rf"<\|{escaped_delimiter_core}\|+(?!>)",
tuple_delimiter, tuple_delimiter,
@ -2715,7 +2715,7 @@ def create_prefixed_exception(original_exception: Exception, prefix: str) -> Exc
def _convert_to_user_format( def _convert_to_user_format(
entities_context: list[dict], entities_context: list[dict],
relations_context: list[dict], relations_context: list[dict],
final_chunks: list[dict], final_chunks: list[dict],
query_mode: str, query_mode: str,
hl_keywords: list[str] = None, hl_keywords: list[str] = None,
@ -2725,65 +2725,72 @@ def _convert_to_user_format(
merged_chunks: list[dict] = None, merged_chunks: list[dict] = None,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Convert internal data format to user-friendly format""" """Convert internal data format to user-friendly format"""
# Convert entities format # Convert entities format
formatted_entities = [] formatted_entities = []
for entity in entities_context: for entity in entities_context:
formatted_entities.append({ formatted_entities.append(
"entity_name": entity.get("entity", ""), {
"entity_type": entity.get("type", "UNKNOWN"), "entity_name": entity.get("entity", ""),
"description": entity.get("description", ""), "entity_type": entity.get("type", "UNKNOWN"),
"source_id": entity.get("source_id", ""), "description": entity.get("description", ""),
"file_path": entity.get("file_path", "unknown_source"), "source_id": entity.get("source_id", ""),
"created_at": entity.get("created_at", ""), "file_path": entity.get("file_path", "unknown_source"),
}) "created_at": entity.get("created_at", ""),
}
)
# Convert relationships format # Convert relationships format
formatted_relationships = [] formatted_relationships = []
for relation in relations_context: for relation in relations_context:
formatted_relationships.append({ formatted_relationships.append(
"src_id": relation.get("entity1", ""), {
"tgt_id": relation.get("entity2", ""), "src_id": relation.get("entity1", ""),
"description": relation.get("description", ""), "tgt_id": relation.get("entity2", ""),
"keywords": relation.get("keywords", ""), "description": relation.get("description", ""),
"weight": relation.get("weight", 1.0), "keywords": relation.get("keywords", ""),
"source_id": relation.get("source_id", ""), "weight": relation.get("weight", 1.0),
"file_path": relation.get("file_path", "unknown_source"), "source_id": relation.get("source_id", ""),
"created_at": relation.get("created_at", ""), "file_path": relation.get("file_path", "unknown_source"),
}) "created_at": relation.get("created_at", ""),
}
)
# Convert chunks format # Convert chunks format
formatted_chunks = [] formatted_chunks = []
for chunk in final_chunks: for chunk in final_chunks:
formatted_chunks.append({ formatted_chunks.append(
"content": chunk.get("content", ""), {
"file_path": chunk.get("file_path", "unknown_source"), "content": chunk.get("content", ""),
"chunk_id": chunk.get("chunk_id", ""), "file_path": chunk.get("file_path", "unknown_source"),
}) "chunk_id": chunk.get("chunk_id", ""),
}
)
# Build metadata with processing info # Build metadata with processing info
metadata = { metadata = {
"query_mode": query_mode, "query_mode": query_mode,
"keywords": { "keywords": {"high_level": hl_keywords or [], "low_level": ll_keywords or []},
"high_level": hl_keywords or [],
"low_level": ll_keywords or []
}
} }
# Add processing info if available # Add processing info if available
if search_result and truncation_result and merged_chunks is not None: if search_result and truncation_result and merged_chunks is not None:
metadata["processing_info"] = { metadata["processing_info"] = {
"total_entities_found": len(search_result.get("final_entities", [])), "total_entities_found": len(search_result.get("final_entities", [])),
"total_relations_found": len(search_result.get("final_relations", [])), "total_relations_found": len(search_result.get("final_relations", [])),
"entities_after_truncation": len(truncation_result.get("filtered_entities", [])), "entities_after_truncation": len(
"relations_after_truncation": len(truncation_result.get("filtered_relations", [])), truncation_result.get("filtered_entities", [])
),
"relations_after_truncation": len(
truncation_result.get("filtered_relations", [])
),
"merged_chunks_count": len(merged_chunks), "merged_chunks_count": len(merged_chunks),
"final_chunks_count": len(final_chunks) "final_chunks_count": len(final_chunks),
} }
return { return {
"entities": formatted_entities, "entities": formatted_entities,
"relationships": formatted_relationships, "relationships": formatted_relationships,
"chunks": formatted_chunks, "chunks": formatted_chunks,
"metadata": metadata "metadata": metadata,
} }