Code formatting improvements and style consistency fixes
* Remove trailing whitespace * Fix function signature ellipsis style
This commit is contained in:
parent
87bb8a023b
commit
82a67354d0
3 changed files with 82 additions and 72 deletions
|
|
@ -2122,7 +2122,7 @@ class LightRAG:
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Asynchronous data retrieval API: returns structured retrieval results without LLM generation.
|
Asynchronous data retrieval API: returns structured retrieval results without LLM generation.
|
||||||
|
|
||||||
This function reuses the same logic as aquery but stops before LLM generation,
|
This function reuses the same logic as aquery but stops before LLM generation,
|
||||||
returning the final processed entities, relationships, and chunks data that would be sent to LLM.
|
returning the final processed entities, relationships, and chunks data that would be sent to LLM.
|
||||||
|
|
||||||
|
|
@ -2167,12 +2167,12 @@ class LightRAG:
|
||||||
"chunks": [],
|
"chunks": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"query_mode": "bypass",
|
"query_mode": "bypass",
|
||||||
"keywords": {"high_level": [], "low_level": []}
|
"keywords": {"high_level": [], "low_level": []},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown mode {param.mode}")
|
raise ValueError(f"Unknown mode {param.mode}")
|
||||||
|
|
||||||
await self._query_done()
|
await self._query_done()
|
||||||
return final_data
|
return final_data
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2241,8 +2241,7 @@ async def kg_query(
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
chunks_vdb: BaseVectorStorage = None,
|
chunks_vdb: BaseVectorStorage = None,
|
||||||
return_raw_data: Literal[True] = False,
|
return_raw_data: Literal[True] = False,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]: ...
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
|
|
@ -2258,8 +2257,7 @@ async def kg_query(
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
chunks_vdb: BaseVectorStorage = None,
|
chunks_vdb: BaseVectorStorage = None,
|
||||||
return_raw_data: Literal[False] = False,
|
return_raw_data: Literal[False] = False,
|
||||||
) -> str | AsyncIterator[str]:
|
) -> str | AsyncIterator[str]: ...
|
||||||
...
|
|
||||||
|
|
||||||
|
|
||||||
async def kg_query(
|
async def kg_query(
|
||||||
|
|
@ -3281,14 +3279,11 @@ async def _build_llm_context(
|
||||||
```
|
```
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# If final data is requested, return both context and complete data structure
|
# If final data is requested, return both context and complete data structure
|
||||||
if return_final_data:
|
if return_final_data:
|
||||||
final_data = _convert_to_user_format(
|
final_data = _convert_to_user_format(
|
||||||
entities_context,
|
entities_context, relations_context, truncated_chunks, query_param.mode
|
||||||
relations_context,
|
|
||||||
truncated_chunks,
|
|
||||||
query_param.mode
|
|
||||||
)
|
)
|
||||||
return result, final_data
|
return result, final_data
|
||||||
else:
|
else:
|
||||||
|
|
@ -3378,34 +3373,42 @@ async def _build_query_context(
|
||||||
chunk_tracking=search_result["chunk_tracking"],
|
chunk_tracking=search_result["chunk_tracking"],
|
||||||
return_final_data=True,
|
return_final_data=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
if isinstance(context_result, tuple):
|
if isinstance(context_result, tuple):
|
||||||
context, final_chunks = context_result
|
context, final_chunks = context_result
|
||||||
else:
|
else:
|
||||||
# Handle case where no final chunks are returned
|
# Handle case where no final chunks are returned
|
||||||
context = context_result
|
context = context_result
|
||||||
final_chunks = merged_chunks
|
final_chunks = merged_chunks
|
||||||
|
|
||||||
# Build raw data structure with the same data that goes to LLM
|
# Build raw data structure with the same data that goes to LLM
|
||||||
raw_data = {
|
raw_data = {
|
||||||
"entities": truncation_result["filtered_entities"], # Use filtered entities (same as LLM)
|
"entities": truncation_result[
|
||||||
"relationships": truncation_result["filtered_relations"], # Use filtered relations (same as LLM)
|
"filtered_entities"
|
||||||
|
], # Use filtered entities (same as LLM)
|
||||||
|
"relationships": truncation_result[
|
||||||
|
"filtered_relations"
|
||||||
|
], # Use filtered relations (same as LLM)
|
||||||
"chunks": final_chunks, # Use final processed chunks (same as LLM)
|
"chunks": final_chunks, # Use final processed chunks (same as LLM)
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"query_mode": query_param.mode,
|
"query_mode": query_param.mode,
|
||||||
"keywords": {
|
"keywords": {
|
||||||
"high_level": hl_keywords.split(", ") if hl_keywords else [],
|
"high_level": hl_keywords.split(", ") if hl_keywords else [],
|
||||||
"low_level": ll_keywords.split(", ") if ll_keywords else []
|
"low_level": ll_keywords.split(", ") if ll_keywords else [],
|
||||||
},
|
},
|
||||||
"processing_info": {
|
"processing_info": {
|
||||||
"total_entities_found": len(search_result["final_entities"]),
|
"total_entities_found": len(search_result["final_entities"]),
|
||||||
"total_relations_found": len(search_result["final_relations"]),
|
"total_relations_found": len(search_result["final_relations"]),
|
||||||
"entities_after_truncation": len(truncation_result["filtered_entities"]),
|
"entities_after_truncation": len(
|
||||||
"relations_after_truncation": len(truncation_result["filtered_relations"]),
|
truncation_result["filtered_entities"]
|
||||||
|
),
|
||||||
|
"relations_after_truncation": len(
|
||||||
|
truncation_result["filtered_relations"]
|
||||||
|
),
|
||||||
"merged_chunks_count": len(merged_chunks),
|
"merged_chunks_count": len(merged_chunks),
|
||||||
"final_chunks_count": len(final_chunks)
|
"final_chunks_count": len(final_chunks),
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
return context, raw_data
|
return context, raw_data
|
||||||
else:
|
else:
|
||||||
|
|
@ -4003,8 +4006,8 @@ async def naive_query(
|
||||||
hashing_kv: BaseKVStorage | None = None,
|
hashing_kv: BaseKVStorage | None = None,
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
return_raw_data: Literal[True] = True,
|
return_raw_data: Literal[True] = True,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]: ...
|
||||||
...
|
|
||||||
|
|
||||||
@overload
|
@overload
|
||||||
async def naive_query(
|
async def naive_query(
|
||||||
|
|
@ -4015,8 +4018,8 @@ async def naive_query(
|
||||||
hashing_kv: BaseKVStorage | None = None,
|
hashing_kv: BaseKVStorage | None = None,
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
return_raw_data: Literal[False] = False,
|
return_raw_data: Literal[False] = False,
|
||||||
) -> str | AsyncIterator[str]:
|
) -> str | AsyncIterator[str]: ...
|
||||||
...
|
|
||||||
|
|
||||||
async def naive_query(
|
async def naive_query(
|
||||||
query: str,
|
query: str,
|
||||||
|
|
@ -4069,14 +4072,14 @@ async def naive_query(
|
||||||
"chunks": [],
|
"chunks": [],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"query_mode": "naive",
|
"query_mode": "naive",
|
||||||
"keywords": {"high_level": [], "low_level": []}
|
"keywords": {"high_level": [], "low_level": []},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
# If only raw data is requested, return it directly
|
# If only raw data is requested, return it directly
|
||||||
if return_raw_data:
|
if return_raw_data:
|
||||||
return empty_raw_data
|
return empty_raw_data
|
||||||
|
|
||||||
return PROMPTS["fail_response"]
|
return PROMPTS["fail_response"]
|
||||||
|
|
||||||
# Calculate dynamic token limit for chunks
|
# Calculate dynamic token limit for chunks
|
||||||
|
|
@ -4143,8 +4146,8 @@ async def naive_query(
|
||||||
"chunks": processed_chunks, # Use processed chunks (same as LLM)
|
"chunks": processed_chunks, # Use processed chunks (same as LLM)
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"query_mode": "naive",
|
"query_mode": "naive",
|
||||||
"keywords": {"high_level": [], "low_level": []}
|
"keywords": {"high_level": [], "low_level": []},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
return raw_data
|
return raw_data
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2626,7 +2626,7 @@ def fix_tuple_delimiter_corruption(
|
||||||
)
|
)
|
||||||
|
|
||||||
# Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >)
|
# Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >)
|
||||||
#
|
|
||||||
record = re.sub(
|
record = re.sub(
|
||||||
rf"<\|{escaped_delimiter_core}\|+(?!>)",
|
rf"<\|{escaped_delimiter_core}\|+(?!>)",
|
||||||
tuple_delimiter,
|
tuple_delimiter,
|
||||||
|
|
@ -2715,7 +2715,7 @@ def create_prefixed_exception(original_exception: Exception, prefix: str) -> Exc
|
||||||
|
|
||||||
def _convert_to_user_format(
|
def _convert_to_user_format(
|
||||||
entities_context: list[dict],
|
entities_context: list[dict],
|
||||||
relations_context: list[dict],
|
relations_context: list[dict],
|
||||||
final_chunks: list[dict],
|
final_chunks: list[dict],
|
||||||
query_mode: str,
|
query_mode: str,
|
||||||
hl_keywords: list[str] = None,
|
hl_keywords: list[str] = None,
|
||||||
|
|
@ -2725,65 +2725,72 @@ def _convert_to_user_format(
|
||||||
merged_chunks: list[dict] = None,
|
merged_chunks: list[dict] = None,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Convert internal data format to user-friendly format"""
|
"""Convert internal data format to user-friendly format"""
|
||||||
|
|
||||||
# Convert entities format
|
# Convert entities format
|
||||||
formatted_entities = []
|
formatted_entities = []
|
||||||
for entity in entities_context:
|
for entity in entities_context:
|
||||||
formatted_entities.append({
|
formatted_entities.append(
|
||||||
"entity_name": entity.get("entity", ""),
|
{
|
||||||
"entity_type": entity.get("type", "UNKNOWN"),
|
"entity_name": entity.get("entity", ""),
|
||||||
"description": entity.get("description", ""),
|
"entity_type": entity.get("type", "UNKNOWN"),
|
||||||
"source_id": entity.get("source_id", ""),
|
"description": entity.get("description", ""),
|
||||||
"file_path": entity.get("file_path", "unknown_source"),
|
"source_id": entity.get("source_id", ""),
|
||||||
"created_at": entity.get("created_at", ""),
|
"file_path": entity.get("file_path", "unknown_source"),
|
||||||
})
|
"created_at": entity.get("created_at", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Convert relationships format
|
# Convert relationships format
|
||||||
formatted_relationships = []
|
formatted_relationships = []
|
||||||
for relation in relations_context:
|
for relation in relations_context:
|
||||||
formatted_relationships.append({
|
formatted_relationships.append(
|
||||||
"src_id": relation.get("entity1", ""),
|
{
|
||||||
"tgt_id": relation.get("entity2", ""),
|
"src_id": relation.get("entity1", ""),
|
||||||
"description": relation.get("description", ""),
|
"tgt_id": relation.get("entity2", ""),
|
||||||
"keywords": relation.get("keywords", ""),
|
"description": relation.get("description", ""),
|
||||||
"weight": relation.get("weight", 1.0),
|
"keywords": relation.get("keywords", ""),
|
||||||
"source_id": relation.get("source_id", ""),
|
"weight": relation.get("weight", 1.0),
|
||||||
"file_path": relation.get("file_path", "unknown_source"),
|
"source_id": relation.get("source_id", ""),
|
||||||
"created_at": relation.get("created_at", ""),
|
"file_path": relation.get("file_path", "unknown_source"),
|
||||||
})
|
"created_at": relation.get("created_at", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Convert chunks format
|
# Convert chunks format
|
||||||
formatted_chunks = []
|
formatted_chunks = []
|
||||||
for chunk in final_chunks:
|
for chunk in final_chunks:
|
||||||
formatted_chunks.append({
|
formatted_chunks.append(
|
||||||
"content": chunk.get("content", ""),
|
{
|
||||||
"file_path": chunk.get("file_path", "unknown_source"),
|
"content": chunk.get("content", ""),
|
||||||
"chunk_id": chunk.get("chunk_id", ""),
|
"file_path": chunk.get("file_path", "unknown_source"),
|
||||||
})
|
"chunk_id": chunk.get("chunk_id", ""),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# Build metadata with processing info
|
# Build metadata with processing info
|
||||||
metadata = {
|
metadata = {
|
||||||
"query_mode": query_mode,
|
"query_mode": query_mode,
|
||||||
"keywords": {
|
"keywords": {"high_level": hl_keywords or [], "low_level": ll_keywords or []},
|
||||||
"high_level": hl_keywords or [],
|
|
||||||
"low_level": ll_keywords or []
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Add processing info if available
|
# Add processing info if available
|
||||||
if search_result and truncation_result and merged_chunks is not None:
|
if search_result and truncation_result and merged_chunks is not None:
|
||||||
metadata["processing_info"] = {
|
metadata["processing_info"] = {
|
||||||
"total_entities_found": len(search_result.get("final_entities", [])),
|
"total_entities_found": len(search_result.get("final_entities", [])),
|
||||||
"total_relations_found": len(search_result.get("final_relations", [])),
|
"total_relations_found": len(search_result.get("final_relations", [])),
|
||||||
"entities_after_truncation": len(truncation_result.get("filtered_entities", [])),
|
"entities_after_truncation": len(
|
||||||
"relations_after_truncation": len(truncation_result.get("filtered_relations", [])),
|
truncation_result.get("filtered_entities", [])
|
||||||
|
),
|
||||||
|
"relations_after_truncation": len(
|
||||||
|
truncation_result.get("filtered_relations", [])
|
||||||
|
),
|
||||||
"merged_chunks_count": len(merged_chunks),
|
"merged_chunks_count": len(merged_chunks),
|
||||||
"final_chunks_count": len(final_chunks)
|
"final_chunks_count": len(final_chunks),
|
||||||
}
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"entities": formatted_entities,
|
"entities": formatted_entities,
|
||||||
"relationships": formatted_relationships,
|
"relationships": formatted_relationships,
|
||||||
"chunks": formatted_chunks,
|
"chunks": formatted_chunks,
|
||||||
"metadata": metadata
|
"metadata": metadata,
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue