From 963ad4c63789f3d9325481dc3f65ff064776857c Mon Sep 17 00:00:00 2001 From: anouarbm Date: Sun, 2 Nov 2025 17:53:05 +0100 Subject: [PATCH] docs: Add documentation and examples for include_chunk_content parameter Added comprehensive documentation for the new include_chunk_content parameter that enables retrieval of actual chunk text content in API responses. Documentation Updates: - Added "Include Chunk Content in References" section to API README - Explained use cases: RAG evaluation, debugging, citations, transparency - Provided JSON request/response examples - Clarified parameter interaction with include_references OpenAPI/Swagger Examples: - Added "Response with chunk content" example to /query endpoint - Shows complete reference structure with content field - Demonstrates realistic chunk text content This makes the feature discoverable through: 1. API documentation (README.md) 2. Interactive Swagger UI (http://localhost:9621/docs) 3. Code examples for developers --- lightrag/api/README.md | 44 ++++++++++++++++++++++++++++ lightrag/api/routers/query_routes.py | 19 ++++++++++++ 2 files changed, 63 insertions(+) diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 339500da..d4df8106 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -463,6 +463,50 @@ The `/query` and `/query/stream` API endpoints include an `enable_rerank` parame RERANK_BY_DEFAULT=False ``` +### Include Chunk Content in References + +By default, the `/query` and `/query/stream` endpoints return references with only `reference_id` and `file_path`. For evaluation, debugging, or citation purposes, you can request the actual retrieved chunk content to be included in references. + +The `include_chunk_content` parameter (default: `false`) controls whether the actual text content of retrieved chunks is included in the response references. This is particularly useful for: + +- **RAG Evaluation**: Testing systems like RAGAS that need access to retrieved contexts +- **Debugging**: Verifying what content was actually used to generate the answer +- **Citation Display**: Showing users the exact text passages that support the response +- **Transparency**: Providing full visibility into the RAG retrieval process + +**Example API Request:** + +```json +{ + "query": "What is LightRAG?", + "mode": "mix", + "include_references": true, + "include_chunk_content": true +} +``` + +**Example Response (with chunk content):** + +```json +{ + "response": "LightRAG is a graph-based RAG system...", + "references": [ + { + "reference_id": "1", + "file_path": "/documents/intro.md", + "content": "LightRAG is a retrieval-augmented generation system that combines knowledge graphs with vector similarity search..." + }, + { + "reference_id": "2", + "file_path": "/documents/features.md", + "content": "The system provides multiple query modes including local, global, hybrid, and mix modes..." + } + ] +} +``` + +**Note**: This parameter only works when `include_references=true`. Setting `include_chunk_content=true` without including references has no effect. + ### .env Examples ```bash diff --git a/lightrag/api/routers/query_routes.py b/lightrag/api/routers/query_routes.py index b8f95a8f..b83d5106 100644 --- a/lightrag/api/routers/query_routes.py +++ b/lightrag/api/routers/query_routes.py @@ -233,6 +233,25 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60): ], }, }, + "with_chunk_content": { + "summary": "Response with chunk content", + "description": "Example response when include_references=True and include_chunk_content=True", + "value": { + "response": "Artificial Intelligence (AI) is a branch of computer science that aims to create intelligent machines capable of performing tasks that typically require human intelligence, such as learning, reasoning, and problem-solving.", + "references": [ + { + "reference_id": "1", + "file_path": "/documents/ai_overview.pdf", + "content": "Artificial Intelligence (AI) represents a transformative field in computer science focused on creating systems that can perform tasks requiring human-like intelligence. These tasks include learning from experience, understanding natural language, recognizing patterns, and making decisions.", + }, + { + "reference_id": "2", + "file_path": "/documents/machine_learning.txt", + "content": "Machine learning is a subset of AI that enables computers to learn and improve from experience without being explicitly programmed. It focuses on the development of algorithms that can access data and use it to learn for themselves.", + }, + ], + }, + }, "without_references": { "summary": "Response without references", "description": "Example response when include_references=False",