Enhance query API with streaming control and comprehensive documentation

- Add stream parameter to QueryRequest - Support non-streaming in /query/stream - Add detailed OpenAPI response schemas - Expand endpoint documentation - Include usage examples and error handling
2025-09-27 11:53:31 +08:00 · 2025-09-27 11:53:31 +08:00 · 81caee3498
commit 81caee3498
parent 4772148901
1 changed files with 764 additions and 47 deletions
--- a/lightrag/api/routers/query_routes.py
+++ b/lightrag/api/routers/query_routes.py
@ -93,6 +93,11 @@ class QueryRequest(BaseModel):
        description="If True, includes reference list in responses. Affects /query and /query/stream endpoints. /query/data always includes references.",
    )

+    stream: Optional[bool] = Field(
+        default=True,
+        description="If True, enables streaming output for real-time responses. Only affects /query/stream endpoint.",
+    )
+
    @field_validator("query", mode="after")
    @classmethod
    def query_strip_after(cls, query: str) -> str:
@ -129,7 +134,7 @@ class QueryResponse(BaseModel):
    )
    references: Optional[List[Dict[str, str]]] = Field(
        default=None,
-        description="Reference list (only included when include_references=True, /query/data always includes references.)",
+        description="Reference list (Disabled when include_references=False, /query/data always includes references.)",
    )


@ -144,25 +149,196 @@ class QueryDataResponse(BaseModel):
    )


+class StreamChunkResponse(BaseModel):
+    """Response model for streaming chunks in NDJSON format"""
+    references: Optional[List[Dict[str, str]]] = Field(
+        default=None,
+        description="Reference list (only in first chunk when include_references=True)"
+    )
+    response: Optional[str] = Field(
+        default=None,
+        description="Response content chunk or complete response"
+    )
+    error: Optional[str] = Field(
+        default=None,
+        description="Error message if processing fails"
+    )
+
+
 def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
    combined_auth = get_combined_auth_dependency(api_key)

    @router.post(
-        "/query", response_model=QueryResponse, dependencies=[Depends(combined_auth)]
+        "/query", 
+        response_model=QueryResponse, 
+        dependencies=[Depends(combined_auth)],
+        responses={
+            200: {
+                "description": "Successful RAG query response",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "response": {
+                                    "type": "string",
+                                    "description": "The generated response from the RAG system"
+                                },
+                                "references": {
+                                    "type": "array",
+                                    "items": {
+                                        "type": "object",
+                                        "properties": {
+                                            "reference_id": {"type": "string"},
+                                            "file_path": {"type": "string"}
+                                        }
+                                    },
+                                    "description": "Reference list (only included when include_references=True)"
+                                }
+                            },
+                            "required": ["response"]
+                        },
+                        "examples": {
+                            "with_references": {
+                                "summary": "Response with references",
+                                "description": "Example response when include_references=True",
+                                "value": {
+                                    "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.",
+                                    "references": [
+                                        {"reference_id": "1", "file_path": "/documents/ai_overview.pdf"},
+                                        {"reference_id": "2", "file_path": "/documents/machine_learning.txt"}
+                                    ]
+                                }
+                            },
+                            "without_references": {
+                                "summary": "Response without references",
+                                "description": "Example response when include_references=False",
+                                "value": {
+                                    "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving."
+                                }
+                            },
+                            "different_modes": {
+                                "summary": "Different query modes",
+                                "description": "Examples of responses from different query modes",
+                                "value": {
+                                    "local_mode": "Focuses on specific entities and their relationships",
+                                    "global_mode": "Provides broader context from relationship patterns",
+                                    "hybrid_mode": "Combines local and global approaches",
+                                    "naive_mode": "Simple vector similarity search",
+                                    "mix_mode": "Integrates knowledge graph and vector retrieval"
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+            400: {
+                "description": "Bad Request - Invalid input parameters",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "detail": {"type": "string"}
+                            }
+                        },
+                        "example": {
+                            "detail": "Query text must be at least 3 characters long"
+                        }
+                    }
+                }
+            },
+            500: {
+                "description": "Internal Server Error - Query processing failed",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "detail": {"type": "string"}
+                            }
+                        },
+                        "example": {
+                            "detail": "Failed to process query: LLM service unavailable"
+                        }
+                    }
+                }
+            }
+        }
    )
    async def query_text(request: QueryRequest):
        """
-        This endpoint performs a RAG query with non-streaming response.
+        Comprehensive RAG query endpoint with non-streaming response.
+        
+        This endpoint performs Retrieval-Augmented Generation (RAG) queries using various modes
+        to provide intelligent responses based on your knowledge base.
+        
+        **Query Modes:**
+        - **local**: Focuses on specific entities and their direct relationships
+        - **global**: Analyzes broader patterns and relationships across the knowledge graph
+        - **hybrid**: Combines local and global approaches for comprehensive results
+        - **naive**: Simple vector similarity search without knowledge graph
+        - **mix**: Integrates knowledge graph retrieval with vector search (recommended)
+        - **bypass**: Direct LLM query without knowledge retrieval
+        
+        **Key Features:**
+        - Non-streaming response for simple integration
+        - Optional reference citations for source attribution
+        - Flexible token control for response length management
+        - Conversation history support for multi-turn dialogues
+        - Multiple response format options
+        
+        **Usage Examples:**
+        
+        Basic query:
+        ```json
+        {
+            "query": "What is machine learning?",
+            "mode": "mix"
+        }
+        ```
+        
+        Advanced query with references:
+        ```json
+        {
+            "query": "Explain neural networks",
+            "mode": "hybrid",
+            "include_references": true,
+            "response_type": "Multiple Paragraphs",
+            "top_k": 10
+        }
+        ```
+        
+        Conversation with history:
+        ```json
+        {
+            "query": "Can you give me more details?",
+            "conversation_history": [
+                {"role": "user", "content": "What is AI?"},
+                {"role": "assistant", "content": "AI is artificial intelligence..."}
+            ]
+        }
+        ```
+
+        Args:
+            request (QueryRequest): The request object containing query parameters:
+                - **query**: The question or prompt to process (min 3 characters)
+                - **mode**: Query strategy - "mix" recommended for best results
+                - **include_references**: Whether to include source citations
+                - **response_type**: Format preference (e.g., "Multiple Paragraphs")
+                - **top_k**: Number of top entities/relations to retrieve
+                - **conversation_history**: Previous dialogue context
+                - **max_total_tokens**: Token budget for the entire response

-        Parameters:
-            request (QueryRequest): The request object containing the query parameters.
        Returns:
-            QueryResponse: A Pydantic model containing the result of the query processing.
-                       If include_references=True, also includes reference list.
+            QueryResponse: JSON response containing:
+                - **response**: The generated answer to your query
+                - **references**: Source citations (if include_references=True)

        Raises:
-            HTTPException: Raised when an error occurs during the request handling process,
-                       with status code 500 and detail containing the exception message.
+            HTTPException: 
+                - 400: Invalid input parameters (e.g., query too short)
+                - 500: Internal processing error (e.g., LLM service unavailable)
        """
        try:
            param = request.to_query_params(
@ -192,29 +368,228 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
            trace_exception(e)
            raise HTTPException(status_code=500, detail=str(e))

-    @router.post("/query/stream", dependencies=[Depends(combined_auth)])
+    @router.post(
+        "/query/stream", 
+        dependencies=[Depends(combined_auth)],
+        responses={
+            200: {
+                "description": "Flexible RAG query response - format depends on stream parameter",
+                "content": {
+                    "application/x-ndjson": {
+                        "schema": {
+                            "type": "string",
+                            "format": "ndjson",
+                            "description": "Newline-delimited JSON (NDJSON) format used for both streaming and non-streaming responses. For streaming: multiple lines with separate JSON objects. For non-streaming: single line with complete JSON object.",
+                            "example": '{"references": [{"reference_id": "1", "file_path": "/documents/ai.pdf"}]}\n{"response": "Artificial Intelligence is"}\n{"response": " a field of computer science"}\n{"response": " that focuses on creating intelligent machines."}'
+                        },
+                        "examples": {
+                            "streaming_with_references": {
+                                "summary": "Streaming mode with references (stream=true)",
+                                "description": "Multiple NDJSON lines when stream=True and include_references=True. First line contains references, subsequent lines contain response chunks.",
+                                "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai_overview.pdf"}, {"reference_id": "2", "file_path": "/documents/ml_basics.txt"}]}\n{"response": "Artificial Intelligence (AI) is a branch of computer science"}\n{"response": " that aims to create intelligent machines capable of performing"}\n{"response": " tasks that typically require human intelligence, such as learning,"}\n{"response": " reasoning, and problem-solving."}'
+                            },
+                            "streaming_without_references": {
+                                "summary": "Streaming mode without references (stream=true)",
+                                "description": "Multiple NDJSON lines when stream=True and include_references=False. Only response chunks are sent.",
+                                "value": '{"response": "Machine learning is a subset of artificial intelligence"}\n{"response": " that enables computers to learn and improve from experience"}\n{"response": " without being explicitly programmed for every task."}'
+                            },
+                            "non_streaming_with_references": {
+                                "summary": "Non-streaming mode with references (stream=false)",
+                                "description": "Single NDJSON line when stream=False and include_references=True. Complete response with references in one message.",
+                                "value": '{"references": [{"reference_id": "1", "file_path": "/documents/neural_networks.pdf"}], "response": "Neural networks are computational models inspired by biological neural networks that consist of interconnected nodes (neurons) organized in layers. They are fundamental to deep learning and can learn complex patterns from data through training processes."}'
+                            },
+                            "non_streaming_without_references": {
+                                "summary": "Non-streaming mode without references (stream=false)",
+                                "description": "Single NDJSON line when stream=False and include_references=False. Complete response only.",
+                                "value": '{"response": "Deep learning is a subset of machine learning that uses neural networks with multiple layers (hence deep) to model and understand complex patterns in data. It has revolutionized fields like computer vision, natural language processing, and speech recognition."}'
+                            },
+                            "error_response": {
+                                "summary": "Error during streaming",
+                                "description": "Error handling in NDJSON format when an error occurs during processing.",
+                                "value": '{"references": [{"reference_id": "1", "file_path": "/documents/ai.pdf"}]}\n{"response": "Artificial Intelligence is"}\n{"error": "LLM service temporarily unavailable"}'
+                            }
+                        }
+                    }
+                }
+            },
+            400: {
+                "description": "Bad Request - Invalid input parameters",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "detail": {"type": "string"}
+                            }
+                        },
+                        "example": {
+                            "detail": "Query text must be at least 3 characters long"
+                        }
+                    }
+                }
+            },
+            500: {
+                "description": "Internal Server Error - Query processing failed",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "detail": {"type": "string"}
+                            }
+                        },
+                        "example": {
+                            "detail": "Failed to process streaming query: Knowledge graph unavailable"
+                        }
+                    }
+                }
+            }
+        }
+    )
    async def query_text_stream(request: QueryRequest):
        """
-        This endpoint performs RAG query with streaming response.
-        Streaming can be turn off by setting stream=False in QueryRequest.
-
-        The streaming response includes:
-        1. Reference list (sent first as a single message, if include_references=True)
-        2. LLM response content (streamed as multiple chunks)
+        Advanced RAG query endpoint with flexible streaming and non-streaming response modes.
+        
+        This endpoint provides the most flexible querying experience, supporting both real-time streaming
+        and complete response delivery based on your integration needs.
+        
+        **Response Modes:**
+        
+        **Streaming Mode (stream=True, default):**
+        - Real-time response delivery as content is generated
+        - NDJSON format: each line is a separate JSON object
+        - First line: `{"references": [...]}` (if include_references=True)
+        - Subsequent lines: `{"response": "content chunk"}`
+        - Error handling: `{"error": "error message"}`
+        - Perfect for chat interfaces and real-time applications
+        
+        **Non-Streaming Mode (stream=False):**
+        - Complete response delivered in a single message
+        - NDJSON format: single line with complete content
+        - Format: `{"references": [...], "response": "complete content"}`
+        - Ideal for batch processing and simple integrations
+        
+        **Response Format Details:**
+        - **Content-Type**: `application/x-ndjson` (Newline-Delimited JSON)
+        - **Structure**: Each line is an independent, valid JSON object
+        - **Parsing**: Process line-by-line, each line is self-contained
+        - **Headers**: Includes cache control and connection management
+        
+        **Query Modes (same as /query endpoint):**
+        - **local**: Entity-focused retrieval with direct relationships
+        - **global**: Pattern analysis across the knowledge graph
+        - **hybrid**: Combined local and global strategies
+        - **naive**: Vector similarity search only
+        - **mix**: Integrated knowledge graph + vector retrieval (recommended)
+        - **bypass**: Direct LLM query without knowledge retrieval
+        
+        **Key Features:**
+        - Dual-mode operation (streaming/non-streaming) in single endpoint
+        - Real-time response delivery for interactive applications
+        - Complete response option for batch processing
+        - Optional reference citations with source attribution
+        - Conversation history support for multi-turn dialogues
+        - Comprehensive error handling with graceful degradation
+        - Token control for response length management
+        
+        **Usage Examples:**
+        
+        Real-time streaming query:
+        ```json
+        {
+            "query": "Explain machine learning algorithms",
+            "mode": "mix",
+            "stream": true,
+            "include_references": true
+        }
+        ```
+        
+        Complete response query:
+        ```json
+        {
+            "query": "What is deep learning?",
+            "mode": "hybrid",
+            "stream": false,
+            "response_type": "Multiple Paragraphs"
+        }
+        ```
+        
+        Conversation with context:
+        ```json
+        {
+            "query": "Can you elaborate on that?",
+            "stream": true,
+            "conversation_history": [
+                {"role": "user", "content": "What is neural network?"},
+                {"role": "assistant", "content": "A neural network is..."}
+            ]
+        }
+        ```
+        
+        **Response Processing:**
+        
+        For streaming responses, process each line:
+        ```python
+        async for line in response.iter_lines():
+            data = json.loads(line)
+            if "references" in data:
+                # Handle references (first message)
+                references = data["references"]
+            elif "response" in data:
+                # Handle content chunk
+                content_chunk = data["response"]
+            elif "error" in data:
+                # Handle error
+                error_message = data["error"]
+        ```
+        
+        For non-streaming responses:
+        ```python
+        line = await response.text()
+        data = json.loads(line.strip())
+        complete_response = data["response"]
+        references = data.get("references", [])
+        ```
+        
+        **Error Handling:**
+        - Streaming errors are delivered as `{"error": "message"}` lines
+        - Non-streaming errors raise HTTP exceptions
+        - Partial responses may be delivered before errors in streaming mode
+        - Always check for error objects when processing streaming responses

        Args:
-            request (QueryRequest): The request object containing the query parameters.
+            request (QueryRequest): The request object containing query parameters:
+                - **query**: The question or prompt to process (min 3 characters)
+                - **mode**: Query strategy - "mix" recommended for best results
+                - **stream**: Enable streaming (True) or complete response (False)
+                - **include_references**: Whether to include source citations
+                - **response_type**: Format preference (e.g., "Multiple Paragraphs")
+                - **top_k**: Number of top entities/relations to retrieve
+                - **conversation_history**: Previous dialogue context for multi-turn conversations
+                - **max_total_tokens**: Token budget for the entire response

        Returns:
-            StreamingResponse: A streaming response containing:
-                - First message: {"references": [...]} - Complete reference list (if requested)
-                - Subsequent messages: {"response": "..."} - LLM response chunks
-                - Error messages: {"error": "..."} - If any errors occur
+            StreamingResponse: NDJSON streaming response containing:
+                - **Streaming mode**: Multiple JSON objects, one per line
+                  - References object (if requested): `{"references": [...]}`
+                  - Content chunks: `{"response": "chunk content"}`
+                  - Error objects: `{"error": "error message"}`
+                - **Non-streaming mode**: Single JSON object
+                  - Complete response: `{"references": [...], "response": "complete content"}`
+
+        Raises:
+            HTTPException: 
+                - 400: Invalid input parameters (e.g., query too short, invalid mode)
+                - 500: Internal processing error (e.g., LLM service unavailable)
+                
+        Note:
+            This endpoint is ideal for applications requiring flexible response delivery.
+            Use streaming mode for real-time interfaces and non-streaming for batch processing.
        """
        try:
-            param = request.to_query_params(
-                True
-            )  # Ensure stream=True for streaming endpoint
+            # Use the stream parameter from the request, defaulting to True if not specified
+            stream_mode = request.stream if request.stream is not None else True
+            param = request.to_query_params(stream_mode)

            from fastapi.responses import StreamingResponse

@ -226,12 +601,11 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
                references = result.get("data", {}).get("references", [])
                llm_response = result.get("llm_response", {})

-                # Send reference list first if requested
-                if request.include_references:
-                    yield f"{json.dumps({'references': references})}\n"
-
-                # Then stream the LLM response content
                if llm_response.get("is_streaming"):
+                    # Streaming mode: send references first, then stream response chunks
+                    if request.include_references:
+                        yield f"{json.dumps({'references': references})}\n"
+
                    response_stream = llm_response.get("response_iterator")
                    if response_stream:
                        try:
@ -242,12 +616,17 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
                            logging.error(f"Streaming error: {str(e)}")
                            yield f"{json.dumps({'error': str(e)})}\n"
                else:
-                    # Non-streaming response (fallback)
+                    # Non-streaming mode: send complete response in one message
                    response_content = llm_response.get("content", "")
-                    if response_content:
-                        yield f"{json.dumps({'response': response_content})}\n"
-                    else:
-                        yield f"{json.dumps({'response': 'No relevant context found for the query.'})}\n"
+                    if not response_content:
+                        response_content = "No relevant context found for the query."
+
+                    # Create complete response object
+                    complete_response = {"response": response_content}
+                    if request.include_references:
+                        complete_response["references"] = references
+
+                    yield f"{json.dumps(complete_response)}\n"

            return StreamingResponse(
                stream_generator(),
@ -267,26 +646,364 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
        "/query/data",
        response_model=QueryDataResponse,
        dependencies=[Depends(combined_auth)],
+        responses={
+            200: {
+                "description": "Successful data retrieval response with structured RAG data",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "status": {
+                                    "type": "string",
+                                    "enum": ["success", "failure"],
+                                    "description": "Query execution status"
+                                },
+                                "message": {
+                                    "type": "string",
+                                    "description": "Status message describing the result"
+                                },
+                                "data": {
+                                    "type": "object",
+                                    "properties": {
+                                        "entities": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "object",
+                                                "properties": {
+                                                    "entity_name": {"type": "string"},
+                                                    "entity_type": {"type": "string"},
+                                                    "description": {"type": "string"},
+                                                    "source_id": {"type": "string"},
+                                                    "file_path": {"type": "string"},
+                                                    "reference_id": {"type": "string"}
+                                                }
+                                            },
+                                            "description": "Retrieved entities from knowledge graph"
+                                        },
+                                        "relationships": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "object",
+                                                "properties": {
+                                                    "src_id": {"type": "string"},
+                                                    "tgt_id": {"type": "string"},
+                                                    "description": {"type": "string"},
+                                                    "keywords": {"type": "string"},
+                                                    "weight": {"type": "number"},
+                                                    "source_id": {"type": "string"},
+                                                    "file_path": {"type": "string"},
+                                                    "reference_id": {"type": "string"}
+                                                }
+                                            },
+                                            "description": "Retrieved relationships from knowledge graph"
+                                        },
+                                        "chunks": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "object",
+                                                "properties": {
+                                                    "content": {"type": "string"},
+                                                    "file_path": {"type": "string"},
+                                                    "chunk_id": {"type": "string"},
+                                                    "reference_id": {"type": "string"}
+                                                }
+                                            },
+                                            "description": "Retrieved text chunks from vector database"
+                                        },
+                                        "references": {
+                                            "type": "array",
+                                            "items": {
+                                                "type": "object",
+                                                "properties": {
+                                                    "reference_id": {"type": "string"},
+                                                    "file_path": {"type": "string"}
+                                                }
+                                            },
+                                            "description": "Reference list for citation purposes"
+                                        }
+                                    },
+                                    "description": "Structured retrieval data containing entities, relationships, chunks, and references"
+                                },
+                                "metadata": {
+                                    "type": "object",
+                                    "properties": {
+                                        "query_mode": {"type": "string"},
+                                        "keywords": {
+                                            "type": "object",
+                                            "properties": {
+                                                "high_level": {"type": "array", "items": {"type": "string"}},
+                                                "low_level": {"type": "array", "items": {"type": "string"}}
+                                            }
+                                        },
+                                        "processing_info": {
+                                            "type": "object",
+                                            "properties": {
+                                                "total_entities_found": {"type": "integer"},
+                                                "total_relations_found": {"type": "integer"},
+                                                "entities_after_truncation": {"type": "integer"},
+                                                "relations_after_truncation": {"type": "integer"},
+                                                "final_chunks_count": {"type": "integer"}
+                                            }
+                                        }
+                                    },
+                                    "description": "Query metadata including mode, keywords, and processing information"
+                                }
+                            },
+                            "required": ["status", "message", "data", "metadata"]
+                        },
+                        "examples": {
+                            "successful_local_mode": {
+                                "summary": "Local mode data retrieval",
+                                "description": "Example of structured data from local mode query focusing on specific entities",
+                                "value": {
+                                    "status": "success",
+                                    "message": "Query executed successfully",
+                                    "data": {
+                                        "entities": [
+                                            {
+                                                "entity_name": "Neural Networks",
+                                                "entity_type": "CONCEPT",
+                                                "description": "Computational models inspired by biological neural networks",
+                                                "source_id": "chunk-123",
+                                                "file_path": "/documents/ai_basics.pdf",
+                                                "reference_id": "1"
+                                            }
+                                        ],
+                                        "relationships": [
+                                            {
+                                                "src_id": "Neural Networks",
+                                                "tgt_id": "Machine Learning",
+                                                "description": "Neural networks are a subset of machine learning algorithms",
+                                                "keywords": "subset, algorithm, learning",
+                                                "weight": 0.85,
+                                                "source_id": "chunk-123",
+                                                "file_path": "/documents/ai_basics.pdf",
+                                                "reference_id": "1"
+                                            }
+                                        ],
+                                        "chunks": [
+                                            {
+                                                "content": "Neural networks are computational models that mimic the way biological neural networks work...",
+                                                "file_path": "/documents/ai_basics.pdf",
+                                                "chunk_id": "chunk-123",
+                                                "reference_id": "1"
+                                            }
+                                        ],
+                                        "references": [
+                                            {"reference_id": "1", "file_path": "/documents/ai_basics.pdf"}
+                                        ]
+                                    },
+                                    "metadata": {
+                                        "query_mode": "local",
+                                        "keywords": {
+                                            "high_level": ["neural", "networks"],
+                                            "low_level": ["computation", "model", "algorithm"]
+                                        },
+                                        "processing_info": {
+                                            "total_entities_found": 5,
+                                            "total_relations_found": 3,
+                                            "entities_after_truncation": 1,
+                                            "relations_after_truncation": 1,
+                                            "final_chunks_count": 1
+                                        }
+                                    }
+                                }
+                            },
+                            "global_mode": {
+                                "summary": "Global mode data retrieval",
+                                "description": "Example of structured data from global mode query analyzing broader patterns",
+                                "value": {
+                                    "status": "success",
+                                    "message": "Query executed successfully",
+                                    "data": {
+                                        "entities": [],
+                                        "relationships": [
+                                            {
+                                                "src_id": "Artificial Intelligence",
+                                                "tgt_id": "Machine Learning",
+                                                "description": "AI encompasses machine learning as a core component",
+                                                "keywords": "encompasses, component, field",
+                                                "weight": 0.92,
+                                                "source_id": "chunk-456",
+                                                "file_path": "/documents/ai_overview.pdf",
+                                                "reference_id": "2"
+                                            }
+                                        ],
+                                        "chunks": [],
+                                        "references": [
+                                            {"reference_id": "2", "file_path": "/documents/ai_overview.pdf"}
+                                        ]
+                                    },
+                                    "metadata": {
+                                        "query_mode": "global",
+                                        "keywords": {
+                                            "high_level": ["artificial", "intelligence", "overview"],
+                                            "low_level": []
+                                        }
+                                    }
+                                }
+                            },
+                            "naive_mode": {
+                                "summary": "Naive mode data retrieval",
+                                "description": "Example of structured data from naive mode using only vector search",
+                                "value": {
+                                    "status": "success",
+                                    "message": "Query executed successfully",
+                                    "data": {
+                                        "entities": [],
+                                        "relationships": [],
+                                        "chunks": [
+                                            {
+                                                "content": "Deep learning is a subset of machine learning that uses neural networks with multiple layers...",
+                                                "file_path": "/documents/deep_learning.pdf",
+                                                "chunk_id": "chunk-789",
+                                                "reference_id": "3"
+                                            }
+                                        ],
+                                        "references": [
+                                            {"reference_id": "3", "file_path": "/documents/deep_learning.pdf"}
+                                        ]
+                                    },
+                                    "metadata": {
+                                        "query_mode": "naive",
+                                        "keywords": {
+                                            "high_level": [],
+                                            "low_level": []
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            },
+            400: {
+                "description": "Bad Request - Invalid input parameters",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "detail": {"type": "string"}
+                            }
+                        },
+                        "example": {
+                            "detail": "Query text must be at least 3 characters long"
+                        }
+                    }
+                }
+            },
+            500: {
+                "description": "Internal Server Error - Data retrieval failed",
+                "content": {
+                    "application/json": {
+                        "schema": {
+                            "type": "object",
+                            "properties": {
+                                "detail": {"type": "string"}
+                            }
+                        },
+                        "example": {
+                            "detail": "Failed to retrieve data: Knowledge graph unavailable"
+                        }
+                    }
+                }
+            }
+        }
    )
    async def query_data(request: QueryRequest):
        """
-        Retrieve structured data without LLM generation.
-
-        This endpoint returns raw retrieval results including entities, relationships,
-        and text chunks that would be used for RAG, but without generating a final response.
-        All parameters are compatible with the regular /query endpoint.
-
-        Parameters:
-            request (QueryRequest): The request object containing the query parameters.
+        Advanced data retrieval endpoint for structured RAG analysis.
+        
+        This endpoint provides raw retrieval results without LLM generation, perfect for:
+        - **Data Analysis**: Examine what information would be used for RAG
+        - **System Integration**: Get structured data for custom processing
+        - **Debugging**: Understand retrieval behavior and quality
+        - **Research**: Analyze knowledge graph structure and relationships
+        
+        **Key Features:**
+        - No LLM generation - pure data retrieval
+        - Complete structured output with entities, relationships, and chunks
+        - Always includes references for citation
+        - Detailed metadata about processing and keywords
+        - Compatible with all query modes and parameters
+        
+        **Query Mode Behaviors:**
+        - **local**: Returns entities and their direct relationships + related chunks
+        - **global**: Returns relationship patterns across the knowledge graph
+        - **hybrid**: Combines local and global retrieval strategies
+        - **naive**: Returns only vector-retrieved text chunks (no knowledge graph)
+        - **mix**: Integrates knowledge graph data with vector-retrieved chunks
+        - **bypass**: Returns empty data arrays (used for direct LLM queries)
+        
+        **Data Structure:**
+        - **entities**: Knowledge graph entities with descriptions and metadata
+        - **relationships**: Connections between entities with weights and descriptions
+        - **chunks**: Text segments from documents with source information
+        - **references**: Citation information mapping reference IDs to file paths
+        - **metadata**: Processing information, keywords, and query statistics
+        
+        **Usage Examples:**
+        
+        Analyze entity relationships:
+        ```json
+        {
+            "query": "machine learning algorithms",
+            "mode": "local",
+            "top_k": 10
+        }
+        ```
+        
+        Explore global patterns:
+        ```json
+        {
+            "query": "artificial intelligence trends",
+            "mode": "global",
+            "max_relation_tokens": 2000
+        }
+        ```
+        
+        Vector similarity search:
+        ```json
+        {
+            "query": "neural network architectures",
+            "mode": "naive",
+            "chunk_top_k": 5
+        }
+        ```
+        
+        **Response Analysis:**
+        - **Empty arrays**: Normal for certain modes (e.g., naive mode has no entities/relationships)
+        - **Processing info**: Shows retrieval statistics and token usage
+        - **Keywords**: High-level and low-level keywords extracted from query
+        - **Reference mapping**: Links all data back to source documents
+        
+        Args:
+            request (QueryRequest): The request object containing query parameters:
+                - **query**: The search query to analyze (min 3 characters)
+                - **mode**: Retrieval strategy affecting data types returned
+                - **top_k**: Number of top entities/relationships to retrieve
+                - **chunk_top_k**: Number of text chunks to retrieve
+                - **max_entity_tokens**: Token limit for entity context
+                - **max_relation_tokens**: Token limit for relationship context
+                - **max_total_tokens**: Overall token budget for retrieval

        Returns:
-            QueryDataResponse: A Pydantic model containing structured data with status,
-                             message, data (entities, relationships, chunks, references),
-                             and metadata.
+            QueryDataResponse: Structured JSON response containing:
+                - **status**: "success" or "failure"
+                - **message**: Human-readable status description
+                - **data**: Complete retrieval results with entities, relationships, chunks, references
+                - **metadata**: Query processing information and statistics

        Raises:
-            HTTPException: Raised when an error occurs during the request handling process,
-                         with status code 500 and detail containing the exception message.
+            HTTPException: 
+                - 400: Invalid input parameters (e.g., query too short, invalid mode)
+                - 500: Internal processing error (e.g., knowledge graph unavailable)
+                
+        Note:
+            This endpoint always includes references regardless of the include_references parameter,
+            as structured data analysis typically requires source attribution.
        """
        try:
            param = request.to_query_params(False)  # No streaming for data endpoint