diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 1ea4e4b9..c1a48d62 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -2122,7 +2122,7 @@ class LightRAG: ) -> dict[str, Any]: """ Asynchronous data retrieval API: returns structured retrieval results without LLM generation. - + This function reuses the same logic as aquery but stops before LLM generation, returning the final processed entities, relationships, and chunks data that would be sent to LLM. @@ -2167,12 +2167,12 @@ class LightRAG: "chunks": [], "metadata": { "query_mode": "bypass", - "keywords": {"high_level": [], "low_level": []} - } + "keywords": {"high_level": [], "low_level": []}, + }, } else: raise ValueError(f"Unknown mode {param.mode}") - + await self._query_done() return final_data diff --git a/lightrag/operate.py b/lightrag/operate.py index 7821ae38..02f2c0c0 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2241,8 +2241,7 @@ async def kg_query( system_prompt: str | None = None, chunks_vdb: BaseVectorStorage = None, return_raw_data: Literal[True] = False, -) -> dict[str, Any]: - ... +) -> dict[str, Any]: ... @overload @@ -2258,8 +2257,7 @@ async def kg_query( system_prompt: str | None = None, chunks_vdb: BaseVectorStorage = None, return_raw_data: Literal[False] = False, -) -> str | AsyncIterator[str]: - ... +) -> str | AsyncIterator[str]: ... async def kg_query( @@ -3281,14 +3279,11 @@ async def _build_llm_context( ``` """ - + # If final data is requested, return both context and complete data structure if return_final_data: final_data = _convert_to_user_format( - entities_context, - relations_context, - truncated_chunks, - query_param.mode + entities_context, relations_context, truncated_chunks, query_param.mode ) return result, final_data else: @@ -3378,34 +3373,42 @@ async def _build_query_context( chunk_tracking=search_result["chunk_tracking"], return_final_data=True, ) - + if isinstance(context_result, tuple): context, final_chunks = context_result else: # Handle case where no final chunks are returned context = context_result final_chunks = merged_chunks - + # Build raw data structure with the same data that goes to LLM raw_data = { - "entities": truncation_result["filtered_entities"], # Use filtered entities (same as LLM) - "relationships": truncation_result["filtered_relations"], # Use filtered relations (same as LLM) + "entities": truncation_result[ + "filtered_entities" + ], # Use filtered entities (same as LLM) + "relationships": truncation_result[ + "filtered_relations" + ], # Use filtered relations (same as LLM) "chunks": final_chunks, # Use final processed chunks (same as LLM) "metadata": { "query_mode": query_param.mode, "keywords": { "high_level": hl_keywords.split(", ") if hl_keywords else [], - "low_level": ll_keywords.split(", ") if ll_keywords else [] + "low_level": ll_keywords.split(", ") if ll_keywords else [], }, "processing_info": { "total_entities_found": len(search_result["final_entities"]), "total_relations_found": len(search_result["final_relations"]), - "entities_after_truncation": len(truncation_result["filtered_entities"]), - "relations_after_truncation": len(truncation_result["filtered_relations"]), + "entities_after_truncation": len( + truncation_result["filtered_entities"] + ), + "relations_after_truncation": len( + truncation_result["filtered_relations"] + ), "merged_chunks_count": len(merged_chunks), - "final_chunks_count": len(final_chunks) - } - } + "final_chunks_count": len(final_chunks), + }, + }, } return context, raw_data else: @@ -4003,8 +4006,8 @@ async def naive_query( hashing_kv: BaseKVStorage | None = None, system_prompt: str | None = None, return_raw_data: Literal[True] = True, -) -> dict[str, Any]: - ... +) -> dict[str, Any]: ... + @overload async def naive_query( @@ -4015,8 +4018,8 @@ async def naive_query( hashing_kv: BaseKVStorage | None = None, system_prompt: str | None = None, return_raw_data: Literal[False] = False, -) -> str | AsyncIterator[str]: - ... +) -> str | AsyncIterator[str]: ... + async def naive_query( query: str, @@ -4069,14 +4072,14 @@ async def naive_query( "chunks": [], "metadata": { "query_mode": "naive", - "keywords": {"high_level": [], "low_level": []} - } + "keywords": {"high_level": [], "low_level": []}, + }, } - + # If only raw data is requested, return it directly if return_raw_data: return empty_raw_data - + return PROMPTS["fail_response"] # Calculate dynamic token limit for chunks @@ -4143,8 +4146,8 @@ async def naive_query( "chunks": processed_chunks, # Use processed chunks (same as LLM) "metadata": { "query_mode": "naive", - "keywords": {"high_level": [], "low_level": []} - } + "keywords": {"high_level": [], "low_level": []}, + }, } return raw_data diff --git a/lightrag/utils.py b/lightrag/utils.py index 77bd9c47..1944a64c 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -2626,7 +2626,7 @@ def fix_tuple_delimiter_corruption( ) # Fix: <|#| -> <|#|>, <|#|| -> <|#|> (missing closing >) - # + record = re.sub( rf"<\|{escaped_delimiter_core}\|+(?!>)", tuple_delimiter, @@ -2715,7 +2715,7 @@ def create_prefixed_exception(original_exception: Exception, prefix: str) -> Exc def _convert_to_user_format( entities_context: list[dict], - relations_context: list[dict], + relations_context: list[dict], final_chunks: list[dict], query_mode: str, hl_keywords: list[str] = None, @@ -2725,65 +2725,72 @@ def _convert_to_user_format( merged_chunks: list[dict] = None, ) -> dict[str, Any]: """Convert internal data format to user-friendly format""" - + # Convert entities format formatted_entities = [] for entity in entities_context: - formatted_entities.append({ - "entity_name": entity.get("entity", ""), - "entity_type": entity.get("type", "UNKNOWN"), - "description": entity.get("description", ""), - "source_id": entity.get("source_id", ""), - "file_path": entity.get("file_path", "unknown_source"), - "created_at": entity.get("created_at", ""), - }) - + formatted_entities.append( + { + "entity_name": entity.get("entity", ""), + "entity_type": entity.get("type", "UNKNOWN"), + "description": entity.get("description", ""), + "source_id": entity.get("source_id", ""), + "file_path": entity.get("file_path", "unknown_source"), + "created_at": entity.get("created_at", ""), + } + ) + # Convert relationships format formatted_relationships = [] for relation in relations_context: - formatted_relationships.append({ - "src_id": relation.get("entity1", ""), - "tgt_id": relation.get("entity2", ""), - "description": relation.get("description", ""), - "keywords": relation.get("keywords", ""), - "weight": relation.get("weight", 1.0), - "source_id": relation.get("source_id", ""), - "file_path": relation.get("file_path", "unknown_source"), - "created_at": relation.get("created_at", ""), - }) - + formatted_relationships.append( + { + "src_id": relation.get("entity1", ""), + "tgt_id": relation.get("entity2", ""), + "description": relation.get("description", ""), + "keywords": relation.get("keywords", ""), + "weight": relation.get("weight", 1.0), + "source_id": relation.get("source_id", ""), + "file_path": relation.get("file_path", "unknown_source"), + "created_at": relation.get("created_at", ""), + } + ) + # Convert chunks format formatted_chunks = [] for chunk in final_chunks: - formatted_chunks.append({ - "content": chunk.get("content", ""), - "file_path": chunk.get("file_path", "unknown_source"), - "chunk_id": chunk.get("chunk_id", ""), - }) - + formatted_chunks.append( + { + "content": chunk.get("content", ""), + "file_path": chunk.get("file_path", "unknown_source"), + "chunk_id": chunk.get("chunk_id", ""), + } + ) + # Build metadata with processing info metadata = { "query_mode": query_mode, - "keywords": { - "high_level": hl_keywords or [], - "low_level": ll_keywords or [] - } + "keywords": {"high_level": hl_keywords or [], "low_level": ll_keywords or []}, } - + # Add processing info if available if search_result and truncation_result and merged_chunks is not None: metadata["processing_info"] = { "total_entities_found": len(search_result.get("final_entities", [])), "total_relations_found": len(search_result.get("final_relations", [])), - "entities_after_truncation": len(truncation_result.get("filtered_entities", [])), - "relations_after_truncation": len(truncation_result.get("filtered_relations", [])), + "entities_after_truncation": len( + truncation_result.get("filtered_entities", []) + ), + "relations_after_truncation": len( + truncation_result.get("filtered_relations", []) + ), "merged_chunks_count": len(merged_chunks), - "final_chunks_count": len(final_chunks) + "final_chunks_count": len(final_chunks), } - + return { "entities": formatted_entities, "relationships": formatted_relationships, "chunks": formatted_chunks, - "metadata": metadata + "metadata": metadata, }