chore: add citation system and enhance RAG UI components

Add citation tracking and display system across backend and frontend components.
Backend changes include citation.py for document attribution, enhanced query routes
with citation metadata, improved prompt templates, and PostgreSQL schema updates.
Frontend includes CitationMarker component, HoverCard UI, QuerySettings refinements,
and ChatMessage enhancements for displaying document sources. Update dependencies
and docker-compose test configuration for improved development workflow.
This commit is contained in:
clssck 2025-12-01 17:50:00 +01:00
parent 77df910525
commit 663ada943a
23 changed files with 12102 additions and 260 deletions

View file

@ -54,13 +54,14 @@ services:
volumes:
- ./data/rag_storage_test:/app/data/rag_storage
- ./data/inputs_test:/app/data/inputs
- ./lightrag:/app/lightrag # Mount source for live reload
environment:
# Server
- HOST=0.0.0.0
- PORT=9621
- LOG_LEVEL=DEBUG
# LLM (OpenAI)
# LLM (OpenAI - gpt-4o-mini for reliable fast extraction)
- LLM_BINDING=openai
- LLM_MODEL=gpt-4o-mini
- LLM_BINDING_HOST=https://api.openai.com/v1
@ -85,8 +86,8 @@ services:
- POSTGRES_PASSWORD=lightrag_pass
- POSTGRES_DATABASE=lightrag
# Entity Resolution - ENABLED!
- ENTITY_RESOLUTION_ENABLED=true
# Entity Resolution - DISABLED for faster ingestion (testing Context Precision changes)
- ENTITY_RESOLUTION_ENABLED=false
- ENTITY_RESOLUTION_FUZZY_THRESHOLD=0.85
- ENTITY_RESOLUTION_VECTOR_THRESHOLD=0.5
- ENTITY_RESOLUTION_MAX_CANDIDATES=3
@ -94,20 +95,19 @@ services:
# Orphan Connection - MANUAL (use UI button instead of auto)
- AUTO_CONNECT_ORPHANS=false
# Processing - Aggressive settings from agent-sdk
# Processing - Matching agent-sdk working settings
- MAX_ASYNC=96
- MAX_PARALLEL_INSERT=10
- EMBEDDING_FUNC_MAX_ASYNC=16
- EMBEDDING_FUNC_MAX_ASYNC=2 # Match llamacpp parallel slots (prevent queue backlog)
- EMBEDDING_BATCH_NUM=48
# Gunicorn - 8 workers x 4 threads = 32 concurrent handlers
- GUNICORN_CMD_ARGS=--workers=8 --worker-class=gthread --threads=4 --worker-connections=1000 --timeout=120 --keep-alive=5 --graceful-timeout=30
# Extraction Optimization - Reduce Orphan Nodes
- CHUNK_SIZE=800 # Smaller chunks for focused extraction
- CHUNK_OVERLAP_SIZE=400 # 50% overlap captures cross-boundary relationships
- MAX_GLEANING=1 # Enable gleaning refinement pass
- FORCE_LLM_SUMMARY_ON_MERGE=4 # More aggressive entity consolidation
# Extraction - Using agent-sdk defaults for reliable ingestion
- CHUNK_SIZE=1200 # Default chunk size (agent-sdk default)
- CHUNK_OVERLAP_SIZE=100 # Default overlap
# MAX_GLEANING defaults to 1 (removed override of 2)
# Orphan Connection - Use UI button for manual triggering
# AUTO_CONNECT_ORPHANS is set to false above (manual mode)

View file

@ -9,6 +9,7 @@ from enum import Enum
from fastapi.responses import StreamingResponse
import asyncio
from lightrag import LightRAG, QueryParam
from lightrag.constants import DEFAULT_TOP_K
from lightrag.utils import TiktokenTokenizer
from lightrag.api.utils_api import get_combined_auth_dependency
from fastapi import Depends
@ -218,7 +219,7 @@ def parse_query_mode(query: str) -> tuple[str, SearchMode, bool, Optional[str]]:
class OllamaAPI:
def __init__(self, rag: LightRAG, top_k: int = 60, api_key: Optional[str] = None):
def __init__(self, rag: LightRAG, top_k: int = DEFAULT_TOP_K, api_key: Optional[str] = None):
self.rag = rag
self.ollama_server_infos = rag.ollama_server_infos
self.top_k = top_k

View file

@ -3,15 +3,76 @@ This module contains all query-related routes for the LightRAG API.
"""
import json
import re
from typing import Any, Dict, List, Literal, Optional
from fastapi import APIRouter, Depends, HTTPException
from lightrag.base import QueryParam
from lightrag.constants import DEFAULT_TOP_K
from lightrag.api.utils_api import get_combined_auth_dependency
from lightrag.utils import logger
from pydantic import BaseModel, Field, field_validator
router = APIRouter(tags=["query"])
# Pattern to match reasoning tags like <think>...</think>
REASONING_TAG_PATTERN = re.compile(r"<think>.*?</think>", re.DOTALL)
def strip_reasoning_tags(text: str) -> str:
"""Strip LLM reasoning tags like <think>...</think> from response text."""
if not text:
return text
return REASONING_TAG_PATTERN.sub("", text).strip()
async def filter_reasoning_stream(response_stream):
"""Filter <think>...</think> blocks from streaming response in real-time.
This is a state machine that buffers chunks and filters out reasoning blocks
as they stream in, preventing <think> tags from appearing to the user.
"""
buffer = ""
in_think_block = False
async for chunk in response_stream:
buffer += chunk
while buffer:
if in_think_block:
# Look for </think> to exit reasoning block
end_idx = buffer.find("</think>")
if end_idx != -1:
buffer = buffer[end_idx + 8:] # Skip past </think>
in_think_block = False
else:
break # Need more data to find closing tag
else:
# Look for <think> to enter reasoning block
start_idx = buffer.find("<think>")
if start_idx != -1:
# Emit everything before <think>
if start_idx > 0:
yield buffer[:start_idx]
buffer = buffer[start_idx + 7:] # Skip past <think>
in_think_block = True
else:
# Check for partial "<think>" match at buffer end
# This prevents emitting incomplete tags
for i in range(min(7, len(buffer)), 0, -1):
if "<think>"[:i] == buffer[-i:]:
if len(buffer) > i:
yield buffer[:-i]
buffer = buffer[-i:]
break
else:
yield buffer
buffer = ""
break
# Emit any remaining buffer (only if not inside a think block)
if buffer and not in_think_block:
yield buffer
class QueryRequest(BaseModel):
query: str = Field(
@ -110,6 +171,18 @@ class QueryRequest(BaseModel):
description="If True, enables streaming output for real-time responses. Only affects /query/stream endpoint.",
)
citation_mode: Optional[Literal["none", "inline", "footnotes"]] = Field(
default="none",
description="Citation extraction mode: 'none' (no post-processing), 'inline' (add [n] markers in text), 'footnotes' (add markers and formatted footnotes). When enabled, citations are computed asynchronously after response completes.",
)
citation_threshold: Optional[float] = Field(
default=0.7,
ge=0.0,
le=1.0,
description="Minimum similarity threshold for citation matching (0.0-1.0). Higher values mean stricter matching.",
)
@field_validator("query", mode="after")
@classmethod
def query_strip_after(cls, query: str) -> str:
@ -134,7 +207,14 @@ class QueryRequest(BaseModel):
# Use Pydantic's `.model_dump(exclude_none=True)` to remove None values automatically
# Exclude API-level parameters that don't belong in QueryParam
request_data = self.model_dump(
exclude_none=True, exclude={"query", "include_chunk_content"}
exclude_none=True,
exclude={
"query",
"include_chunk_content",
"include_references",
"citation_mode",
"citation_threshold",
},
)
# Ensure `mode` and `stream` are set explicitly
@ -190,7 +270,118 @@ class StreamChunkResponse(BaseModel):
)
def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
class CitationSpanModel(BaseModel):
"""A span in the response with citation attribution."""
start_char: int = Field(description="Start character position in response")
end_char: int = Field(description="End character position in response")
text: str = Field(description="The text span being cited")
reference_ids: List[str] = Field(description="Reference IDs supporting this span")
confidence: float = Field(description="Citation confidence score (0.0-1.0)")
class EnhancedReferenceItem(BaseModel):
"""Enhanced reference with full metadata for footnotes."""
reference_id: str = Field(description="Unique reference identifier")
file_path: str = Field(description="Path to the source file")
document_title: Optional[str] = Field(
default=None, description="Human-readable document title"
)
section_title: Optional[str] = Field(
default=None, description="Section or chapter title"
)
page_range: Optional[str] = Field(default=None, description="Page range (e.g., pp. 45-67)")
excerpt: Optional[str] = Field(
default=None, description="Brief excerpt from the source"
)
async def _extract_and_stream_citations(
response: str,
chunks: List[Dict[str, Any]],
references: List[Dict[str, str]],
rag,
min_similarity: float,
citation_mode: str,
):
"""Extract citations from response and yield NDJSON lines.
NEW PROTOCOL (eliminates duplicate payload):
- Does NOT send full annotated_response (that would duplicate the streamed response)
- Instead sends citation positions + metadata for frontend marker insertion
- Frontend uses character positions to insert [n] markers client-side
Args:
response: The full LLM response text
chunks: List of chunk dictionaries from retrieval
references: List of reference dicts
rag: The RAG instance (for embedding function)
min_similarity: Minimum similarity threshold
citation_mode: 'inline' or 'footnotes'
Yields:
NDJSON lines for citation metadata (no duplicate text)
"""
try:
from lightrag.citation import extract_citations_from_response
# Extract citations using the citation module
citation_result = await extract_citations_from_response(
response=response,
chunks=chunks,
references=references,
embedding_func=rag.embedding_func,
min_similarity=min_similarity,
)
# Build citation markers with positions for frontend insertion
# Each marker tells frontend where to insert [n] without sending full text
citation_markers = []
for citation in citation_result.citations:
citation_markers.append({
"marker": "[" + ",".join(citation.reference_ids) + "]",
"insert_position": citation.end_char, # Insert after sentence
"reference_ids": citation.reference_ids,
"confidence": citation.confidence,
"text_preview": citation.text[:50] + "..." if len(citation.text) > 50 else citation.text,
})
# Build enhanced sources with metadata
sources = []
for ref in citation_result.references:
sources.append({
"reference_id": ref.reference_id,
"file_path": ref.file_path,
"document_title": ref.document_title,
"section_title": ref.section_title,
"page_range": ref.page_range,
"excerpt": ref.excerpt,
})
# Format footnotes if requested
footnotes = citation_result.footnotes if citation_mode == "footnotes" else []
# Send single consolidated citations_metadata object
# Frontend uses this to insert markers without needing the full text again
yield json.dumps({
"citations_metadata": {
"markers": citation_markers, # Position-based markers for insertion
"sources": sources, # Enhanced reference metadata
"footnotes": footnotes, # Pre-formatted footnote strings
"uncited_count": len(citation_result.uncited_claims),
}
}) + "\n"
except ImportError:
logger.warning("Citation module not available. Skipping citation extraction.")
yield json.dumps({"citation_error": "Citation module not available"}) + "\n"
except Exception as e:
logger.error(f"Citation extraction error: {str(e)}")
yield json.dumps({"citation_error": str(e)}) + "\n"
def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = DEFAULT_TOP_K):
combined_auth = get_combined_auth_dependency(api_key)
@router.post(
@ -421,6 +612,9 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
if not response_content:
response_content = "No relevant context found for the query."
# Strip reasoning tags like <think>...</think>
response_content = strip_reasoning_tags(response_content)
# Enrich references with chunk content if requested
if request.include_references and request.include_chunk_content:
chunks = data.get("chunks", [])
@ -672,12 +866,11 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
async def stream_generator():
# Extract references and LLM response from unified result
references = result.get("data", {}).get("references", [])
chunks = result.get("data", {}).get("chunks", [])
llm_response = result.get("llm_response", {})
# Enrich references with chunk content if requested
if request.include_references and request.include_chunk_content:
data = result.get("data", {})
chunks = data.get("chunks", [])
# Create a mapping from reference_id to chunk content
ref_id_to_content = {}
for chunk in chunks:
@ -698,6 +891,10 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
enriched_references.append(ref_copy)
references = enriched_references
# Track collected response for citation extraction
collected_response = []
citation_mode = request.citation_mode or "none"
if llm_response.get("is_streaming"):
# Streaming mode: send references first, then stream response chunks
if request.include_references:
@ -706,18 +903,36 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
response_stream = llm_response.get("response_iterator")
if response_stream:
try:
async for chunk in response_stream:
# Filter <think>...</think> blocks in real-time
async for chunk in filter_reasoning_stream(response_stream):
if chunk: # Only send non-empty content
yield f"{json.dumps({'response': chunk})}\n"
collected_response.append(chunk)
except Exception as e:
logger.error(f"Streaming error: {str(e)}")
yield f"{json.dumps({'error': str(e)})}\n"
# After streaming completes, extract citations if enabled
if citation_mode in ["inline", "footnotes"] and collected_response:
full_response = strip_reasoning_tags("".join(collected_response))
async for line in _extract_and_stream_citations(
full_response,
chunks,
references,
rag,
request.citation_threshold or 0.7,
citation_mode,
):
yield line
else:
# Non-streaming mode: send complete response in one message
response_content = llm_response.get("content", "")
if not response_content:
response_content = "No relevant context found for the query."
# Strip reasoning tags like <think>...</think>
response_content = strip_reasoning_tags(response_content)
# Create complete response object
complete_response = {"response": response_content}
if request.include_references:
@ -725,6 +940,18 @@ def create_query_routes(rag, api_key: Optional[str] = None, top_k: int = 60):
yield f"{json.dumps(complete_response)}\n"
# Extract citations for non-streaming mode too
if citation_mode in ["inline", "footnotes"] and response_content:
async for line in _extract_and_stream_citations(
response_content,
chunks,
references,
rag,
request.citation_threshold or 0.7,
citation_mode,
):
yield line
return StreamingResponse(
stream_generator(),
media_type="application/x-ndjson",

View file

@ -18,7 +18,7 @@ import numpy as np
logger = logging.getLogger(__name__)
# Configuration
CITATION_MIN_SIMILARITY = float(os.getenv("CITATION_MIN_SIMILARITY", "0.7"))
CITATION_MIN_SIMILARITY = float(os.getenv("CITATION_MIN_SIMILARITY", "0.5"))
CITATION_MAX_PER_SENTENCE = int(os.getenv("CITATION_MAX_PER_SENTENCE", "3"))

View file

@ -47,8 +47,8 @@ DEFAULT_CHUNK_TOP_K = 20
DEFAULT_MAX_ENTITY_TOKENS = 6000
DEFAULT_MAX_RELATION_TOKENS = 8000
DEFAULT_MAX_TOTAL_TOKENS = 30000
DEFAULT_COSINE_THRESHOLD = 0.2
DEFAULT_RELATED_CHUNK_NUMBER = 5
DEFAULT_COSINE_THRESHOLD = 0.40 # Balanced: 0.35 too permissive, 0.45 breaks local mode
DEFAULT_RELATED_CHUNK_NUMBER = 8 # Increased from 5 for better context coverage
DEFAULT_KG_CHUNK_PICK_METHOD = "VECTOR"
# TODO: Deprated. All conversation_history messages is send to LLM.

View file

@ -115,7 +115,7 @@ def _is_nan(value: Any) -> bool:
class RAGEvaluator:
"""Evaluate RAG system quality using RAGAS metrics"""
def __init__(self, test_dataset_path: str = None, rag_api_url: str = None):
def __init__(self, test_dataset_path: str = None, rag_api_url: str = None, query_mode: str = "mix"):
"""
Initialize evaluator with test dataset
@ -123,6 +123,7 @@ class RAGEvaluator:
test_dataset_path: Path to test dataset JSON file
rag_api_url: Base URL of LightRAG API (e.g., http://localhost:9621)
If None, will try to read from environment or use default
query_mode: Query mode for retrieval (local, global, hybrid, mix, naive)
Environment Variables:
EVAL_LLM_MODEL: LLM model for evaluation (default: gpt-4o-mini)
@ -219,6 +220,7 @@ class RAGEvaluator:
self.test_dataset_path = Path(test_dataset_path)
self.rag_api_url = rag_api_url.rstrip("/")
self.query_mode = query_mode
self.results_dir = Path(__file__).parent / "results"
self.results_dir.mkdir(exist_ok=True)
@ -275,6 +277,7 @@ class RAGEvaluator:
logger.info(" • Total Test Cases: %s", len(self.test_cases))
logger.info(" • Test Dataset: %s", self.test_dataset_path.name)
logger.info(" • LightRAG API: %s", self.rag_api_url)
logger.info(" • Query Mode: %s", self.query_mode)
logger.info(" • Results Directory: %s", self.results_dir.name)
def _load_test_dataset(self) -> List[Dict[str, str]]:
@ -309,7 +312,7 @@ class RAGEvaluator:
try:
payload = {
"query": question,
"mode": "mix",
"mode": self.query_mode,
"include_references": True,
"include_chunk_content": True, # NEW: Request chunk content in references
"response_type": "Multiple Paragraphs",
@ -997,6 +1000,15 @@ Examples:
help="LightRAG API endpoint URL (default: http://localhost:9621 or $LIGHTRAG_API_URL environment variable)",
)
parser.add_argument(
"--mode",
"-m",
type=str,
default="mix",
choices=["local", "global", "hybrid", "mix", "naive"],
help="Query mode for retrieval (default: mix). 'local' for entity-specific questions, 'mix' for comprehensive retrieval.",
)
args = parser.parse_args()
logger.info("%s", "=" * 70)
@ -1004,7 +1016,7 @@ Examples:
logger.info("%s", "=" * 70)
evaluator = RAGEvaluator(
test_dataset_path=args.dataset, rag_api_url=args.ragendpoint
test_dataset_path=args.dataset, rag_api_url=args.ragendpoint, query_mode=args.mode
)
await evaluator.run()
except Exception as e:

View file

@ -62,6 +62,7 @@ class PostgreSQLDB:
self.database = config["database"]
self.workspace = config["workspace"]
self.max = int(config["max_connections"])
self.min = int(config.get("min_connections", 5))
self.increment = 1
self.pool: Pool | None = None
@ -200,7 +201,7 @@ class PostgreSQLDB:
"database": self.database,
"host": self.host,
"port": self.port,
"min_size": 1,
"min_size": self.min, # Configurable via POSTGRES_MIN_CONNECTIONS
"max_size": self.max,
}
@ -1184,6 +1185,28 @@ class PostgreSQLDB:
("idx_lightrag_doc_status_workspace_path", "LIGHTRAG_DOC_STATUS", "(workspace, file_path)"),
]
# GIN indexes for array membership queries (chunk_ids lookups)
gin_indexes = [
("idx_lightrag_vdb_entity_chunk_ids_gin", "LIGHTRAG_VDB_ENTITY", "USING gin (chunk_ids)"),
("idx_lightrag_vdb_relation_chunk_ids_gin", "LIGHTRAG_VDB_RELATION", "USING gin (chunk_ids)"),
]
# Create GIN indexes separately (different syntax)
for index_name, table_name, index_type in gin_indexes:
if index_name not in existing_indexes:
try:
create_gin_sql = (
f"CREATE INDEX IF NOT EXISTS {index_name} ON {table_name} {index_type}"
)
logger.info(
f"PostgreSQL, Creating GIN index {index_name} on {table_name}"
)
await self.execute(create_gin_sql)
except Exception as e:
logger.warning(
f"PostgreSQL, Failed to create GIN index {index_name}: {e}"
)
for index_name, table_name, columns in performance_indexes:
if index_name not in existing_indexes:
try:
@ -1679,6 +1702,39 @@ class PostgreSQLDB:
logger.error(f"PostgreSQL database,\nsql:{sql},\ndata:{data},\nerror:{e}")
raise
async def executemany(
self,
sql: str,
data_list: list[tuple],
batch_size: int = 500,
) -> None:
"""Execute SQL with multiple parameter sets using asyncpg's executemany.
This is significantly faster than calling execute() in a loop because it
reduces database round-trips by batching multiple rows in a single operation.
Args:
sql: The SQL statement with positional parameters ($1, $2, etc.)
data_list: List of tuples, each containing parameters for one row
batch_size: Number of rows to process per batch (default 500)
"""
if not data_list:
return
async def _operation(connection: asyncpg.Connection) -> None:
for i in range(0, len(data_list), batch_size):
batch = data_list[i : i + batch_size]
await connection.executemany(sql, batch)
try:
await self._run_with_retry(_operation)
logger.debug(
f"PostgreSQL executemany: inserted {len(data_list)} rows in batches of {batch_size}"
)
except Exception as e:
logger.error(f"PostgreSQL executemany error: {e}, sql: {sql[:100]}...")
raise
class ClientManager:
_instances: dict[str, Any] = {"db": None, "ref_count": 0}
@ -1712,9 +1768,17 @@ class ClientManager:
"POSTGRES_WORKSPACE",
config.get("postgres", "workspace", fallback=None),
),
"max_connections": os.environ.get(
"POSTGRES_MAX_CONNECTIONS",
config.get("postgres", "max_connections", fallback=50),
"max_connections": int(
os.environ.get(
"POSTGRES_MAX_CONNECTIONS",
config.get("postgres", "max_connections", fallback=50),
)
),
"min_connections": int(
os.environ.get(
"POSTGRES_MIN_CONNECTIONS",
config.get("postgres", "min_connections", fallback=5),
)
),
# SSL configuration
"ssl_mode": os.environ.get(
@ -2161,108 +2225,117 @@ class PGKVStorage(BaseKVStorage):
if not data:
return
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
# Get current UTC time and convert to naive datetime for database storage
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
for k, v in data.items():
upsert_sql = SQL_TEMPLATES["upsert_text_chunk"]
_data = {
"workspace": self.workspace,
"id": k,
"tokens": v["tokens"],
"chunk_order_index": v["chunk_order_index"],
"full_doc_id": v["full_doc_id"],
"content": v["content"],
"file_path": v["file_path"],
"llm_cache_list": json.dumps(v.get("llm_cache_list", [])),
"create_time": current_time,
"update_time": current_time,
}
await self.db.execute(upsert_sql, _data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_DOCS):
for k, v in data.items():
upsert_sql = SQL_TEMPLATES["upsert_doc_full"]
_data = {
"id": k,
"content": v["content"],
"doc_name": v.get("file_path", ""), # Map file_path to doc_name
"workspace": self.workspace,
}
await self.db.execute(upsert_sql, _data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
for k, v in data.items():
upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
_data = {
"workspace": self.workspace,
"id": k, # Use flattened key as id
"original_prompt": v["original_prompt"],
"return_value": v["return"],
"chunk_id": v.get("chunk_id"),
"cache_type": v.get(
"cache_type", "extract"
), # Get cache_type from data
"queryparam": json.dumps(v.get("queryparam"))
if v.get("queryparam")
else None,
}
# Get current UTC time and convert to naive datetime for database storage
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
upsert_sql = SQL_TEMPLATES["upsert_text_chunk"]
# Collect all rows as tuples for batch insert
batch_data = [
(
self.workspace,
k,
v["tokens"],
v["chunk_order_index"],
v["full_doc_id"],
v["content"],
v["file_path"],
json.dumps(v.get("llm_cache_list", [])),
current_time,
current_time,
)
for k, v in data.items()
]
await self.db.executemany(upsert_sql, batch_data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_DOCS):
upsert_sql = SQL_TEMPLATES["upsert_doc_full"]
batch_data = [
(
k,
v["content"],
v.get("file_path", ""), # Map file_path to doc_name
self.workspace,
)
for k, v in data.items()
]
await self.db.executemany(upsert_sql, batch_data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
upsert_sql = SQL_TEMPLATES["upsert_llm_response_cache"]
batch_data = [
(
self.workspace,
k, # Use flattened key as id
v["original_prompt"],
v["return"],
v.get("chunk_id"),
v.get("cache_type", "extract"),
json.dumps(v.get("queryparam")) if v.get("queryparam") else None,
)
for k, v in data.items()
]
await self.db.executemany(upsert_sql, batch_data)
await self.db.execute(upsert_sql, _data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_ENTITIES):
# Get current UTC time and convert to naive datetime for database storage
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
for k, v in data.items():
upsert_sql = SQL_TEMPLATES["upsert_full_entities"]
_data = {
"workspace": self.workspace,
"id": k,
"entity_names": json.dumps(v["entity_names"]),
"count": v["count"],
"create_time": current_time,
"update_time": current_time,
}
await self.db.execute(upsert_sql, _data)
upsert_sql = SQL_TEMPLATES["upsert_full_entities"]
batch_data = [
(
self.workspace,
k,
json.dumps(v["entity_names"]),
v["count"],
current_time,
current_time,
)
for k, v in data.items()
]
await self.db.executemany(upsert_sql, batch_data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_RELATIONS):
# Get current UTC time and convert to naive datetime for database storage
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
for k, v in data.items():
upsert_sql = SQL_TEMPLATES["upsert_full_relations"]
_data = {
"workspace": self.workspace,
"id": k,
"relation_pairs": json.dumps(v["relation_pairs"]),
"count": v["count"],
"create_time": current_time,
"update_time": current_time,
}
await self.db.execute(upsert_sql, _data)
upsert_sql = SQL_TEMPLATES["upsert_full_relations"]
batch_data = [
(
self.workspace,
k,
json.dumps(v["relation_pairs"]),
v["count"],
current_time,
current_time,
)
for k, v in data.items()
]
await self.db.executemany(upsert_sql, batch_data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_ENTITY_CHUNKS):
# Get current UTC time and convert to naive datetime for database storage
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
for k, v in data.items():
upsert_sql = SQL_TEMPLATES["upsert_entity_chunks"]
_data = {
"workspace": self.workspace,
"id": k,
"chunk_ids": json.dumps(v["chunk_ids"]),
"count": v["count"],
"create_time": current_time,
"update_time": current_time,
}
await self.db.execute(upsert_sql, _data)
upsert_sql = SQL_TEMPLATES["upsert_entity_chunks"]
batch_data = [
(
self.workspace,
k,
json.dumps(v["chunk_ids"]),
v["count"],
current_time,
current_time,
)
for k, v in data.items()
]
await self.db.executemany(upsert_sql, batch_data)
elif is_namespace(self.namespace, NameSpace.KV_STORE_RELATION_CHUNKS):
# Get current UTC time and convert to naive datetime for database storage
current_time = datetime.datetime.now(timezone.utc).replace(tzinfo=None)
for k, v in data.items():
upsert_sql = SQL_TEMPLATES["upsert_relation_chunks"]
_data = {
"workspace": self.workspace,
"id": k,
"chunk_ids": json.dumps(v["chunk_ids"]),
"count": v["count"],
"create_time": current_time,
"update_time": current_time,
}
await self.db.execute(upsert_sql, _data)
upsert_sql = SQL_TEMPLATES["upsert_relation_chunks"]
batch_data = [
(
self.workspace,
k,
json.dumps(v["chunk_ids"]),
v["count"],
current_time,
current_time,
)
for k, v in data.items()
]
await self.db.executemany(upsert_sql, batch_data)
async def index_done_callback(self) -> None:
# PG handles persistence automatically
@ -2376,77 +2449,73 @@ class PGVectorStorage(BaseVectorStorage):
await ClientManager.release_client(self.db)
self.db = None
def _upsert_chunks(
def _prepare_chunk_tuple(
self, item: dict[str, Any], current_time: datetime.datetime
) -> tuple[str, dict[str, Any]]:
) -> tuple:
"""Prepare a tuple for batch chunk upsert."""
try:
upsert_sql = SQL_TEMPLATES["upsert_chunk"]
data: dict[str, Any] = {
"workspace": self.workspace,
"id": item["__id__"],
"tokens": item["tokens"],
"chunk_order_index": item["chunk_order_index"],
"full_doc_id": item["full_doc_id"],
"content": item["content"],
"content_vector": json.dumps(item["__vector__"].tolist()),
"file_path": item["file_path"],
"create_time": current_time,
"update_time": current_time,
}
return (
self.workspace,
item["__id__"],
item["tokens"],
item["chunk_order_index"],
item["full_doc_id"],
item["content"],
json.dumps(item["__vector__"].tolist()),
item["file_path"],
current_time,
current_time,
)
except Exception as e:
logger.error(
f"[{self.workspace}] Error to prepare upsert,\nsql: {e}\nitem: {item}"
f"[{self.workspace}] Error to prepare upsert,\nerror: {e}\nitem: {item}"
)
raise
return upsert_sql, data
def _upsert_entities(
def _prepare_entity_tuple(
self, item: dict[str, Any], current_time: datetime.datetime
) -> tuple[str, dict[str, Any]]:
upsert_sql = SQL_TEMPLATES["upsert_entity"]
) -> tuple:
"""Prepare a tuple for batch entity upsert."""
source_id = item["source_id"]
if isinstance(source_id, str) and "<SEP>" in source_id:
chunk_ids = source_id.split("<SEP>")
else:
chunk_ids = [source_id]
data: dict[str, Any] = {
"workspace": self.workspace,
"id": item["__id__"],
"entity_name": item["entity_name"],
"content": item["content"],
"content_vector": json.dumps(item["__vector__"].tolist()),
"chunk_ids": chunk_ids,
"file_path": item.get("file_path", None),
"create_time": current_time,
"update_time": current_time,
}
return upsert_sql, data
return (
self.workspace,
item["__id__"],
item["entity_name"],
item["content"],
json.dumps(item["__vector__"].tolist()),
chunk_ids,
item.get("file_path", None),
current_time,
current_time,
)
def _upsert_relationships(
def _prepare_relationship_tuple(
self, item: dict[str, Any], current_time: datetime.datetime
) -> tuple[str, dict[str, Any]]:
upsert_sql = SQL_TEMPLATES["upsert_relationship"]
) -> tuple:
"""Prepare a tuple for batch relationship upsert."""
source_id = item["source_id"]
if isinstance(source_id, str) and "<SEP>" in source_id:
chunk_ids = source_id.split("<SEP>")
else:
chunk_ids = [source_id]
data: dict[str, Any] = {
"workspace": self.workspace,
"id": item["__id__"],
"source_id": item["src_id"],
"target_id": item["tgt_id"],
"content": item["content"],
"content_vector": json.dumps(item["__vector__"].tolist()),
"chunk_ids": chunk_ids,
"file_path": item.get("file_path", None),
"create_time": current_time,
"update_time": current_time,
}
return upsert_sql, data
return (
self.workspace,
item["__id__"],
item["src_id"],
item["tgt_id"],
item["content"],
json.dumps(item["__vector__"].tolist()),
chunk_ids,
item.get("file_path", None),
current_time,
current_time,
)
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.debug(f"[{self.workspace}] Inserting {len(data)} to {self.namespace}")
@ -2462,29 +2531,42 @@ class PGVectorStorage(BaseVectorStorage):
}
for k, v in data.items()
]
# Batch compute embeddings (already optimized)
contents = [v["content"] for v in data.values()]
batches = [
contents[i : i + self._max_batch_size]
for i in range(0, len(contents), self._max_batch_size)
]
embedding_tasks = [self.embedding_func(batch) for batch in batches]
embeddings_list = await asyncio.gather(*embedding_tasks)
embeddings = np.concatenate(embeddings_list)
# Assign embeddings to items
for i, d in enumerate(list_data):
d["__vector__"] = embeddings[i]
for item in list_data:
if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS):
upsert_sql, data = self._upsert_chunks(item, current_time)
elif is_namespace(self.namespace, NameSpace.VECTOR_STORE_ENTITIES):
upsert_sql, data = self._upsert_entities(item, current_time)
elif is_namespace(self.namespace, NameSpace.VECTOR_STORE_RELATIONSHIPS):
upsert_sql, data = self._upsert_relationships(item, current_time)
else:
raise ValueError(f"{self.namespace} is not supported")
await self.db.execute(upsert_sql, data)
# Prepare batch data based on namespace and execute in single batch
if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS):
upsert_sql = SQL_TEMPLATES["upsert_chunk"]
batch_data = [
self._prepare_chunk_tuple(item, current_time) for item in list_data
]
elif is_namespace(self.namespace, NameSpace.VECTOR_STORE_ENTITIES):
upsert_sql = SQL_TEMPLATES["upsert_entity"]
batch_data = [
self._prepare_entity_tuple(item, current_time) for item in list_data
]
elif is_namespace(self.namespace, NameSpace.VECTOR_STORE_RELATIONSHIPS):
upsert_sql = SQL_TEMPLATES["upsert_relationship"]
batch_data = [
self._prepare_relationship_tuple(item, current_time)
for item in list_data
]
else:
raise ValueError(f"{self.namespace} is not supported")
await self.db.executemany(upsert_sql, batch_data)
#################### query method ###############
async def query(

View file

@ -218,9 +218,11 @@ class LightRAG:
)
)
entity_resolution_config: EntityResolutionConfig | None = field(default=None)
entity_resolution_config: EntityResolutionConfig = field(
default_factory=EntityResolutionConfig
)
"""Configuration for entity resolution (deduplication).
Set to EntityResolutionConfig() to enable, or None to disable.
Now enabled by default. Set to None to disable.
Resolves entities like 'FDA' 'US Food and Drug Administration'."""
# Orphan connection

View file

@ -365,7 +365,16 @@ async def openai_complete_if_cache(
delta = chunk.choices[0].delta
content = getattr(delta, "content", None)
reasoning_content = getattr(delta, "reasoning_content", "")
# Support both OpenAI's reasoning_content and OpenRouter's reasoning field
reasoning_content = getattr(delta, "reasoning_content", "") or getattr(delta, "reasoning", "")
# Also handle OpenRouter's reasoning_details array format
if not reasoning_content:
reasoning_details = getattr(delta, "reasoning_details", None)
if reasoning_details and isinstance(reasoning_details, list):
for detail in reasoning_details:
if isinstance(detail, dict) and detail.get("text"):
reasoning_content = detail.get("text", "")
break
# Handle COT logic for streaming (only if enabled)
if enable_cot:
@ -527,7 +536,18 @@ async def openai_complete_if_cache(
else:
# Handle regular content responses
content = getattr(message, "content", None)
reasoning_content = getattr(message, "reasoning_content", "")
# Support both OpenAI's reasoning_content and OpenRouter's reasoning field
reasoning_content = getattr(message, "reasoning_content", "") or getattr(message, "reasoning", "")
# Also handle OpenRouter's reasoning_details array format
if not reasoning_content:
reasoning_details = getattr(message, "reasoning_details", None)
if reasoning_details and isinstance(reasoning_details, list):
# Concatenate all reasoning text for non-streaming
reasoning_parts = []
for detail in reasoning_details:
if isinstance(detail, dict) and detail.get("text"):
reasoning_parts.append(detail.get("text", ""))
reasoning_content = "".join(reasoning_parts)
# Handle COT logic for non-streaming responses (only if enabled)
final_content = ""

View file

@ -5,6 +5,7 @@ from pathlib import Path
import asyncio
import json
import json_repair
import os
import re
from typing import Any, AsyncIterator, overload, Literal
from collections import Counter, defaultdict
@ -78,18 +79,48 @@ import time
import hashlib
from dotenv import load_dotenv
# Query embedding cache for avoiding redundant API calls
# Query embedding cache configuration (configurable via environment variables)
QUERY_EMBEDDING_CACHE_TTL = int(os.getenv("QUERY_EMBEDDING_CACHE_TTL", "3600")) # 1 hour
QUERY_EMBEDDING_CACHE_MAX_SIZE = int(os.getenv("QUERY_EMBEDDING_CACHE_SIZE", "10000"))
# Redis cache configuration
REDIS_EMBEDDING_CACHE_ENABLED = os.getenv("REDIS_EMBEDDING_CACHE", "false").lower() == "true"
REDIS_URI = os.getenv("REDIS_URI", "redis://localhost:6379")
# Local in-memory cache with LRU eviction
# Structure: {query_hash: (embedding, timestamp)}
_query_embedding_cache: dict[str, tuple[list[float], float]] = {}
QUERY_EMBEDDING_CACHE_TTL = 3600 # 1 hour TTL
QUERY_EMBEDDING_CACHE_MAX_SIZE = 1000 # Maximum cache entries
# Global Redis client (lazy initialized)
_redis_client = None
async def get_cached_query_embedding(
query: str, embedding_func
) -> list[float] | None:
async def _get_redis_client():
"""Lazy initialize Redis client."""
global _redis_client
if _redis_client is None and REDIS_EMBEDDING_CACHE_ENABLED:
try:
import redis.asyncio as redis
_redis_client = redis.from_url(REDIS_URI, decode_responses=True)
# Test connection
await _redis_client.ping()
logger.info(f"Redis embedding cache connected: {REDIS_URI}")
except ImportError:
logger.warning("Redis package not installed. Install with: pip install redis")
return None
except Exception as e:
logger.warning(f"Failed to connect to Redis: {e}. Falling back to local cache.")
return None
return _redis_client
async def get_cached_query_embedding(query: str, embedding_func) -> list[float] | None:
"""Get query embedding with caching to avoid redundant API calls.
Supports both local in-memory cache and Redis for cross-worker sharing.
Redis is used when REDIS_EMBEDDING_CACHE=true environment variable is set.
Args:
query: The query string to embed
embedding_func: The embedding function to call on cache miss
@ -99,11 +130,27 @@ async def get_cached_query_embedding(
"""
query_hash = hashlib.sha256(query.encode()).hexdigest()[:16]
current_time = time.time()
redis_key = f"lightrag:emb:{query_hash}"
# Check cache
# Try Redis cache first (if enabled)
if REDIS_EMBEDDING_CACHE_ENABLED:
try:
redis_client = await _get_redis_client()
if redis_client:
cached_json = await redis_client.get(redis_key)
if cached_json:
embedding = json.loads(cached_json)
logger.debug(f"Redis embedding cache hit for hash {query_hash[:8]}")
# Also update local cache
_query_embedding_cache[query_hash] = (embedding, current_time)
return embedding
except Exception as e:
logger.debug(f"Redis cache read error: {e}")
# Check local cache
cached = _query_embedding_cache.get(query_hash)
if cached and (current_time - cached[1]) < QUERY_EMBEDDING_CACHE_TTL:
logger.debug(f"Query embedding cache hit for hash {query_hash[:8]}")
logger.debug(f"Local embedding cache hit for hash {query_hash[:8]}")
return cached[0]
# Cache miss - compute embedding
@ -111,7 +158,7 @@ async def get_cached_query_embedding(
embedding = await embedding_func([query])
embedding_result = embedding[0] # Extract first from batch
# Manage cache size - simple eviction of oldest entries
# Manage local cache size - LRU eviction of oldest entries
if len(_query_embedding_cache) >= QUERY_EMBEDDING_CACHE_MAX_SIZE:
# Remove oldest 10% of entries
sorted_entries = sorted(
@ -120,13 +167,30 @@ async def get_cached_query_embedding(
for old_key, _ in sorted_entries[: QUERY_EMBEDDING_CACHE_MAX_SIZE // 10]:
del _query_embedding_cache[old_key]
# Store in local cache
_query_embedding_cache[query_hash] = (embedding_result, current_time)
logger.debug(f"Query embedding cached for hash {query_hash[:8]}")
# Store in Redis (if enabled)
if REDIS_EMBEDDING_CACHE_ENABLED:
try:
redis_client = await _get_redis_client()
if redis_client:
await redis_client.setex(
redis_key,
QUERY_EMBEDDING_CACHE_TTL,
json.dumps(embedding_result),
)
logger.debug(f"Embedding cached in Redis for hash {query_hash[:8]}")
except Exception as e:
logger.debug(f"Redis cache write error: {e}")
logger.debug(f"Query embedding computed and cached for hash {query_hash[:8]}")
return embedding_result
except Exception as e:
logger.warning(f"Failed to compute query embedding: {e}")
return None
# use the .env that is inside the current folder
# allows to use different .env file for each lightrag instance
# the OS environment variables take precedence over the .env file
@ -1843,9 +1907,14 @@ async def _merge_nodes_then_upsert(
llm_response_cache: BaseKVStorage | None = None,
entity_chunks_storage: BaseKVStorage | None = None,
pre_resolution_map: dict[str, str] | None = None,
prefetched_nodes: dict[str, dict] | None = None,
) -> tuple[dict, str | None]:
"""Get existing nodes from knowledge graph use name,if exists, merge data, else create, then upsert.
Args:
prefetched_nodes: Optional dict mapping entity names to their existing node data.
If provided, avoids individual get_node() calls for better performance.
Returns:
Tuple of (node_data, original_entity_name). original_entity_name is set if
entity resolution changed the name (e.g., "Dupixant" "Dupixent"),
@ -1969,8 +2038,12 @@ async def _merge_nodes_then_upsert(
already_description = []
already_file_paths = []
# 1. Get existing node data from knowledge graph
already_node = await knowledge_graph_inst.get_node(entity_name)
# 1. Get existing node data from knowledge graph (use prefetched if available)
if prefetched_nodes is not None and entity_name in prefetched_nodes:
already_node = prefetched_nodes[entity_name]
else:
# Fallback to individual fetch if not prefetched (e.g., after VDB resolution)
already_node = await knowledge_graph_inst.get_node(entity_name)
if already_node:
already_entity_types.append(already_node["entity_type"])
already_source_ids.extend(already_node["source_id"].split(GRAPH_FIELD_SEP))
@ -2922,6 +2995,28 @@ async def merge_nodes_and_edges(
pipeline_status["latest_message"] = log_message
pipeline_status["history_messages"].append(log_message)
# ===== Batch Prefetch: Load existing entity data in single query =====
# Build list of entity names to prefetch (apply pre-resolution where applicable)
prefetch_entity_names = []
for entity_name in all_nodes.keys():
resolved_name = pre_resolution_map.get(entity_name, entity_name)
prefetch_entity_names.append(resolved_name)
# Batch fetch existing nodes to avoid N+1 query pattern during parallel processing
prefetched_nodes: dict[str, dict] = {}
if prefetch_entity_names:
try:
prefetched_nodes = await knowledge_graph_inst.get_nodes_batch(
prefetch_entity_names
)
logger.debug(
f"Prefetched {len(prefetched_nodes)}/{len(prefetch_entity_names)} "
f"existing entities for merge"
)
except Exception as e:
logger.warning(f"Batch entity prefetch failed: {e}. Falling back to individual fetches.")
prefetched_nodes = {}
# Resolution map to track original→resolved entity names (e.g., "Dupixant"→"Dupixent")
# This will be used to remap edge endpoints in Phase 2
entity_resolution_map: dict[str, str] = {}
@ -2955,6 +3050,7 @@ async def merge_nodes_and_edges(
llm_response_cache,
entity_chunks_storage,
pre_resolution_map,
prefetched_nodes,
)
# Track resolution mapping for edge remapping in Phase 2
@ -3941,7 +4037,7 @@ async def _perform_kg_search(
query_embedding = await get_cached_query_embedding(
query, actual_embedding_func
)
if query_embedding:
if query_embedding is not None:
logger.debug("Pre-computed query embedding for all vector operations")
# Handle local and global modes

View file

@ -41,6 +41,7 @@ You are a Knowledge Graph Specialist responsible for extracting entities and rel
* Look for implicit categorical or thematic connections to other entities.
* Consider whether the entity belongs to a broader group or domain represented by other entities.
* Extract comparative relationships if the entity is mentioned alongside others.
* **Attribution Verification:** When extracting relationships, ensure the source and target entities are correctly identified from the text. Do not conflate similar entities or transfer attributes from one entity to another.
3. **Delimiter Usage Protocol:**
* The `{tuple_delimiter}` is a complete, atomic marker and **must not be filled with content**. It serves strictly as a field separator.
@ -227,7 +228,43 @@ PROMPTS["fail_response"] = (
"Sorry, I'm not able to provide an answer to that question.[no-context]"
)
PROMPTS["rag_response"] = """---Role---
# Default RAG response prompt - cite-ready (no LLM-generated citations)
# Citations are added by post-processing. This gives cleaner, more accurate results.
PROMPTS["rag_response"] = """You're helping someone understand a topic. Write naturally, like explaining to a curious friend.
STYLE RULES:
- Flowing paragraphs, NOT bullets or numbered lists
- Connect sentences with transitions (however, this means, for example)
- Combine related facts into sentences rather than listing separately
- Vary sentence length - mix short and long
GOOD EXAMPLE:
"Machine learning is a branch of AI that enables computers to learn from data without explicit programming. The field includes several approaches: supervised learning uses labeled data, while unsupervised learning finds hidden patterns. Deep learning, using multi-layer neural networks, has proven especially effective for image recognition and language processing."
BAD EXAMPLE:
"- Machine learning: branch of AI
- Learns from data
- Types: supervised, unsupervised
- Deep learning uses neural networks"
Answer using ONLY the context below. Do NOT include [1], [2] citations - they're added automatically.
{user_prompt}
Context:
{context_data}
"""
# Strict mode suffix - append when response_type="strict"
PROMPTS["rag_response_strict_suffix"] = """
STRICT GROUNDING:
- NEVER state specific numbers/dates unless they appear EXACTLY in context
- If information isn't in context, say "not specified in available information"
- Entity summaries for overview, Source Excerpts for precision
"""
# Legacy prompt with LLM-generated citations (for backward compatibility)
PROMPTS["rag_response_with_llm_citations"] = """---Role---
You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**.
@ -250,6 +287,8 @@ Consider the conversation history if provided to maintain conversational flow an
2. Content & Grounding:
- Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated.
- If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess.
- CRITICAL FOR FACTS: When stating specific facts (dates, numbers, names, statistics), you MUST verify each fact appears EXACTLY in the provided context. If a specific date or number is not explicitly stated in the context, say "the exact [year/number/date] is not specified in the available information" rather than guessing.
- When the question asks "which" or "who" or "how many", provide ONLY the direct answer with facts from context. Do not elaborate with information not explicitly in the context.
3. Formatting & Language:
- The response MUST be in the same language as the user query.
@ -281,8 +320,45 @@ Consider the conversation history if provided to maintain conversational flow an
{context_data}
"""
# Default naive RAG response prompt - cite-ready (no LLM-generated citations)
PROMPTS["naive_rag_response"] = """---Role---
You are an expert AI assistant synthesizing information from a knowledge base.
---Goal---
Generate a comprehensive, well-structured answer to the user query using ONLY information from the provided Document Chunks.
---Instructions---
1. **Cite-Ready Writing Style**:
- Write each factual claim as a distinct, complete sentence
- DO NOT include citation markers like [1], [2], or footnote references
- DO NOT add a References section - citations will be added automatically by the system
- Each sentence should be traceable to specific information in the context
2. **Content & Grounding**:
- Use ONLY information from the provided context
- DO NOT invent, assume, or infer any information not explicitly stated
- If the answer cannot be found in the context, state that clearly
- CRITICAL: Verify each fact appears EXACTLY in the provided context before stating it
3. **Formatting**:
- The response MUST be in the same language as the user query
- Use Markdown formatting for clarity (headings, bullet points, bold)
- The response should be presented in {response_type}
4. Additional Instructions: {user_prompt}
---Context---
{content_data}
"""
# Legacy naive RAG prompt with LLM-generated citations (for backward compatibility)
PROMPTS["naive_rag_response_with_llm_citations"] = """---Role---
You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**.
---Goal---
@ -304,6 +380,8 @@ Consider the conversation history if provided to maintain conversational flow an
2. Content & Grounding:
- Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated.
- If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess.
- CRITICAL FOR FACTS: When stating specific facts (dates, numbers, names, statistics), you MUST verify each fact appears EXACTLY in the provided context. If a specific date or number is not explicitly stated in the context, say "the exact [year/number/date] is not specified in the available information" rather than guessing.
- When the question asks "which" or "who" or "how many", provide ONLY the direct answer with facts from context. Do not elaborate with information not explicitly in the context.
3. Formatting & Language:
- The response MUST be in the same language as the user query.
@ -335,30 +413,31 @@ Consider the conversation history if provided to maintain conversational flow an
{content_data}
"""
# Backward compatibility aliases - the default prompts are now cite-ready
PROMPTS["cite_ready_rag_response"] = PROMPTS["rag_response"]
PROMPTS["cite_ready_naive_rag_response"] = PROMPTS["naive_rag_response"]
PROMPTS["kg_query_context"] = """
Knowledge Graph Data (Entity):
## Entity Summaries (use for definitions and general facts)
```json
{entities_str}
```
Knowledge Graph Data (Relationship):
## Relationships (use to explain connections between concepts)
```json
{relations_str}
```
Document Chunks (Each entry has a reference_id refer to the `Reference Document List`):
## Source Excerpts (use for specific facts, numbers, quotes)
```json
{text_chunks_str}
```
Reference Document List (Each entry starts with a [reference_id] that corresponds to entries in the Document Chunks):
```
## References
{reference_list_str}
```
"""

View file

@ -8,6 +8,7 @@
"@radix-ui/react-alert-dialog": "^1.1.15",
"@radix-ui/react-checkbox": "^1.3.3",
"@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-hover-card": "^1.1.15",
"@radix-ui/react-popover": "^1.1.15",
"@radix-ui/react-progress": "^1.1.7",
"@radix-ui/react-scroll-area": "^1.2.10",
@ -318,6 +319,8 @@
"@radix-ui/react-focus-scope": ["@radix-ui/react-focus-scope@1.1.7", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-callback-ref": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw=="],
"@radix-ui/react-hover-card": ["@radix-ui/react-hover-card@1.1.15", "", { "dependencies": { "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-dismissable-layer": "1.1.11", "@radix-ui/react-popper": "1.2.8", "@radix-ui/react-portal": "1.1.9", "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-controllable-state": "1.2.2" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-qgTkjNT1CfKMoP0rcasmlH2r1DAiYicWsDsufxl940sT2wHNEWWv6FMWIQXWhVdmC1d/HYfbhQx60KYyAtKxjg=="],
"@radix-ui/react-id": ["@radix-ui/react-id@1.1.1", "", { "dependencies": { "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg=="],
"@radix-ui/react-popover": ["@radix-ui/react-popover@1.1.15", "", { "dependencies": { "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-dismissable-layer": "1.1.11", "@radix-ui/react-focus-guards": "1.1.3", "@radix-ui/react-focus-scope": "1.1.7", "@radix-ui/react-id": "1.1.1", "@radix-ui/react-popper": "1.2.8", "@radix-ui/react-portal": "1.1.9", "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-slot": "1.2.3", "@radix-ui/react-use-controllable-state": "1.2.2", "aria-hidden": "^1.2.4", "react-remove-scroll": "^2.6.3" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA=="],

10832
lightrag_webui/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

View file

@ -20,6 +20,7 @@
"@radix-ui/react-alert-dialog": "^1.1.15",
"@radix-ui/react-checkbox": "^1.3.3",
"@radix-ui/react-dialog": "^1.1.15",
"@radix-ui/react-hover-card": "^1.1.15",
"@radix-ui/react-popover": "^1.1.15",
"@radix-ui/react-progress": "^1.1.7",
"@radix-ui/react-scroll-area": "^1.2.10",
@ -82,14 +83,14 @@
"zustand": "^5.0.8"
},
"devDependencies": {
"@biomejs/biome": "^1.9.3",
"@eslint/js": "^9.37.0",
"@stylistic/eslint-plugin-js": "^3.1.0",
"@tailwindcss/typography": "^0.5.15",
"@tailwindcss/vite": "^4.1.14",
"@types/bun": "^1.2.23",
"@types/katex": "^0.16.7",
"@types/node": "^22.18.9",
"@biomejs/biome": "^1.9.3",
"@tailwindcss/typography": "^0.5.15",
"@types/react": "^19.2.2",
"@types/react-dom": "^19.2.1",
"@types/react-i18next": "^8.1.0",

View file

@ -101,12 +101,47 @@ export type LightragDocumentsScanProgress = {
*/
export type QueryMode = 'naive' | 'local' | 'global' | 'hybrid' | 'mix' | 'bypass'
/**
* Citation marker with position data for frontend insertion
*/
export type CitationMarker = {
marker: string // e.g., "[1]" or "[1,2]"
insert_position: number // Character position to insert marker
reference_ids: string[] // Reference IDs this marker cites
confidence: number // Match confidence (0.0-1.0)
text_preview: string // Preview of the cited text
}
/**
* Enhanced source metadata for hover cards
*/
export type CitationSource = {
reference_id: string
file_path: string
document_title: string | null
section_title: string | null
page_range: string | null
excerpt: string | null
}
/**
* Consolidated citation metadata from backend
*/
export type CitationsMetadata = {
markers: CitationMarker[] // Position-based markers for insertion
sources: CitationSource[] // Enhanced reference metadata
footnotes: string[] // Pre-formatted footnote strings
uncited_count: number // Number of claims without citations
}
export type Message = {
role: 'user' | 'assistant' | 'system'
content: string
thinkingContent?: string
displayContent?: string
thinkingTime?: number | null
citationsProcessed?: boolean
citationsMetadata?: CitationsMetadata // New consolidated citation data
}
export type QueryRequest = {
@ -142,6 +177,10 @@ export type QueryRequest = {
user_prompt?: string
/** Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True. */
enable_rerank?: boolean
/** Citation mode for post-processing citations. 'none' = no citations, 'inline' = [n] markers only, 'footnotes' = full footnotes with document titles */
citation_mode?: 'none' | 'inline' | 'footnotes'
/** Minimum similarity threshold (0.0-1.0) for matching response sentences to source chunks. Higher = stricter matching. Default is 0.7 */
citation_threshold?: number
}
export type QueryResponse = {
@ -409,7 +448,8 @@ export const queryText = async (request: QueryRequest): Promise<QueryResponse> =
export const queryTextStream = async (
request: QueryRequest,
onChunk: (chunk: string) => void,
onError?: (error: string) => void
onError?: (error: string) => void,
onCitations?: (metadata: CitationsMetadata) => void
) => {
const apiKey = useSettingsStore.getState().apiKey
const token = localStorage.getItem('LIGHTRAG-API-TOKEN')
@ -486,7 +526,11 @@ export const queryTextStream = async (
onChunk(parsed.response)
} else if (parsed.error && onError) {
onError(parsed.error)
} else if (parsed.citations_metadata && onCitations) {
// NEW: Handle consolidated citations_metadata object
onCitations(parsed.citations_metadata as CitationsMetadata)
}
// Silently ignore references and other events
} catch (error) {
console.error('Error parsing stream chunk:', line, error)
if (onError) onError(`Error parsing server response: ${line}`)
@ -503,6 +547,8 @@ export const queryTextStream = async (
onChunk(parsed.response)
} else if (parsed.error && onError) {
onError(parsed.error)
} else if (parsed.citations_metadata && onCitations) {
onCitations(parsed.citations_metadata as CitationsMetadata)
}
} catch (error) {
console.error('Error parsing final chunk:', buffer, error)

View file

@ -1,7 +1,7 @@
import type { Message } from '@/api/lightrag'
import type { CitationsMetadata, Message } from '@/api/lightrag'
import useTheme from '@/hooks/useTheme'
import { cn } from '@/lib/utils'
import { type ReactNode, memo, useEffect, useMemo, useRef, useState } from 'react' // Import useMemo
import { type ReactNode, memo, useCallback, useEffect, useMemo, useRef, useState } from 'react'
import { remarkFootnotes } from '@/utils/remarkFootnotes'
import mermaid from 'mermaid'
@ -14,8 +14,9 @@ import remarkMath from 'remark-math'
import { Prism as SyntaxHighlighter } from 'react-syntax-highlighter'
import { oneDark, oneLight } from 'react-syntax-highlighter/dist/cjs/styles/prism'
import { ChevronDownIcon, LoaderIcon } from 'lucide-react'
import { BrainIcon, ChevronDownIcon, LoaderIcon } from 'lucide-react'
import { useTranslation } from 'react-i18next'
import { CitationMarker } from './CitationMarker'
// KaTeX configuration options interface
interface KaTeXOptions {
@ -43,6 +44,71 @@ export type MessageWithError = Message & {
latexRendered?: boolean
}
/**
* Helper component to render text with citation markers as interactive HoverCards.
* Parses [n] and [n,m] patterns and replaces them with CitationMarker components.
*/
function TextWithCitations({
children,
citationsMetadata,
}: {
children: ReactNode
citationsMetadata?: CitationsMetadata
}) {
// If no citation metadata or children is not a string, render as-is
if (!citationsMetadata || typeof children !== 'string') {
return <>{children}</>
}
const text = children
// Match citation patterns like [1], [2], [1,2], etc.
const citationPattern = /\[(\d+(?:,\d+)*)\]/g
const parts: ReactNode[] = []
let lastIndex = 0
let match: RegExpExecArray | null
let keyIndex = 0
while ((match = citationPattern.exec(text)) !== null) {
// Add text before the citation
if (match.index > lastIndex) {
parts.push(text.slice(lastIndex, match.index))
}
// Parse reference IDs from the marker
const markerText = match[0]
const refIds = match[1].split(',').map((id) => id.trim())
// Find matching marker data for confidence
const markerData = citationsMetadata.markers?.find((m) => m.marker === markerText)
const confidence = markerData?.confidence ?? 0.5
// Add the citation marker component
parts.push(
<CitationMarker
key={`citation-${keyIndex++}`}
marker={markerText}
referenceIds={refIds}
confidence={confidence}
sources={citationsMetadata.sources || []}
/>
)
lastIndex = match.index + match[0].length
}
// Add remaining text
if (lastIndex < text.length) {
parts.push(text.slice(lastIndex))
}
// If no citations found, return original text
if (parts.length === 0) {
return <>{children}</>
}
return <>{parts}</>
}
// Restore original component definition and export
export const ChatMessage = ({
message,
@ -94,6 +160,9 @@ export const ChatMessage = ({
loadKaTeX()
}, [])
// Get citationsMetadata from message for use in markdown components
const citationsMetadata = message.citationsMetadata
const mainMarkdownComponents = useMemo(
() => ({
code: (props: any) => {
@ -132,6 +201,11 @@ export const ChatMessage = ({
</CodeHighlight>
)
},
// Custom text renderer that handles citation markers [n]
// Transforms plain text [1], [2], [1,2] into interactive CitationMarker components
text: ({ children }: { children?: ReactNode }) => (
<TextWithCitations citationsMetadata={citationsMetadata}>{children}</TextWithCitations>
),
p: ({ children }: { children?: ReactNode }) => <div className="my-2">{children}</div>,
h1: ({ children }: { children?: ReactNode }) => (
<h1 className="text-xl font-bold mt-4 mb-2">{children}</h1>
@ -153,7 +227,7 @@ export const ChatMessage = ({
),
li: ({ children }: { children?: ReactNode }) => <li className="my-1">{children}</li>,
}),
[message.mermaidRendered, message.role]
[message.mermaidRendered, message.role, citationsMetadata]
)
const thinkingMarkdownComponents = useMemo(
@ -179,48 +253,67 @@ export const ChatMessage = ({
: 'w-[95%] bg-muted'
} rounded-lg px-4 py-2`}
>
{/* Thinking process display - only for assistant messages */}
{/* Always render to prevent layout shift when switching tabs */}
{/* Thinking Pill - collapsible bubble UI */}
{message.role === 'assistant' && (isThinking || thinkingTime !== null) && (
<div
className={cn(
'mb-2',
// Reduce visual priority in inactive tabs while maintaining layout
!isTabActive && 'opacity-50'
)}
>
<div
className="flex items-center text-gray-500 dark:text-gray-400 hover:text-gray-700 dark:hover:text-gray-200 transition-colors duration-200 text-sm cursor-pointer select-none"
<div className={cn('mb-3', !isTabActive && 'opacity-50')}>
{/* Pill Header - always visible */}
<button
type="button"
onClick={() => {
// Allow expansion when there's thinking content, even during thinking process
if (finalThinkingContent && finalThinkingContent.trim() !== '') {
setIsThinkingExpanded(!isThinkingExpanded)
}
}}
className={cn(
'inline-flex items-center gap-2 px-3 py-1.5 rounded-full text-xs font-medium transition-all',
'border shadow-sm select-none',
isThinking
? 'bg-amber-50 border-amber-200 text-amber-700 dark:bg-amber-950/50 dark:border-amber-800 dark:text-amber-300'
: 'bg-slate-100 border-slate-200 text-slate-600 dark:bg-slate-800 dark:border-slate-700 dark:text-slate-300',
finalThinkingContent?.trim() && 'cursor-pointer hover:shadow-md'
)}
>
{isThinking ? (
<>
{/* Only show spinner animation in active tab to save resources */}
{isTabActive && <LoaderIcon className="mr-2 size-4 animate-spin" />}
{isTabActive && (
<div className="w-2 h-2 bg-amber-500 rounded-full animate-pulse" />
)}
<span>{t('retrievePanel.chatMessage.thinking')}</span>
</>
) : (
typeof thinkingTime === 'number' && (
<span>{t('retrievePanel.chatMessage.thinkingTime', { time: thinkingTime })}</span>
<>
<BrainIcon className="w-3.5 h-3.5" />
<span>{t('retrievePanel.chatMessage.thinkingTime', { time: thinkingTime })}</span>
</>
)
)}
{/* Show chevron when there's thinking content, even during thinking process */}
{finalThinkingContent && finalThinkingContent.trim() !== '' && (
<ChevronDownIcon
className={`ml-2 size-4 shrink-0 transition-transform ${isThinkingExpanded ? 'rotate-180' : ''}`}
className={cn(
'w-3.5 h-3.5 transition-transform',
isThinkingExpanded && 'rotate-180'
)}
/>
)}
</div>
{/* Show thinking content when expanded and content exists, even during thinking process */}
</button>
{/* Expandable Content */}
{isThinkingExpanded && finalThinkingContent && finalThinkingContent.trim() !== '' && (
<div className="mt-2 pl-4 border-l-2 border-primary/20 dark:border-primary/40 text-sm prose dark:prose-invert max-w-none break-words prose-p:my-1 prose-headings:my-2 [&_sup]:text-[0.75em] [&_sup]:align-[0.1em] [&_sup]:leading-[0] [&_sub]:text-[0.75em] [&_sub]:align-[-0.2em] [&_sub]:leading-[0] [&_mark]:bg-yellow-200 [&_mark]:dark:bg-yellow-800 [&_u]:underline [&_del]:line-through [&_ins]:underline [&_ins]:decoration-green-500 [&_.footnotes]:mt-6 [&_.footnotes]:pt-3 [&_.footnotes]:border-t [&_.footnotes]:border-border [&_.footnotes_ol]:text-xs [&_.footnotes_li]:my-0.5 [&_a[href^='#fn']]:text-primary [&_a[href^='#fn']]:no-underline [&_a[href^='#fn']]:hover:underline [&_a[href^='#fnref']]:text-primary [&_a[href^='#fnref']]:no-underline [&_a[href^='#fnref']]:hover:underline text-foreground">
<div
className={cn(
'mt-2 ml-2 p-3 rounded-lg text-sm',
'bg-slate-50 border border-slate-200 dark:bg-slate-900 dark:border-slate-700',
'max-h-[400px] overflow-y-auto',
'prose dark:prose-invert max-w-none break-words prose-p:my-1 prose-headings:my-2',
'[&_sup]:text-[0.75em] [&_sup]:align-[0.1em] [&_sup]:leading-[0]',
'[&_sub]:text-[0.75em] [&_sub]:align-[-0.2em] [&_sub]:leading-[0]',
'[&_mark]:bg-yellow-200 [&_mark]:dark:bg-yellow-800',
'text-foreground'
)}
>
{isThinking && (
<div className="mb-2 text-xs text-gray-400 dark:text-gray-300 italic">
<div className="mb-2 text-xs text-amber-600 dark:text-amber-400 italic">
{t('retrievePanel.chatMessage.thinkingInProgress', 'Thinking in progress...')}
</div>
)}
@ -238,16 +331,9 @@ export const ChatMessage = ({
displayMode: false,
strict: false,
trust: true,
// Add silent error handling to avoid console noise
errorCallback: (error: string, latex: string) => {
// Only show detailed errors in development environment
if (process.env.NODE_ENV === 'development') {
console.warn(
'KaTeX rendering error in thinking content:',
error,
'for LaTeX:',
latex
)
console.warn('KaTeX error in thinking:', error, latex)
}
},
},

View file

@ -0,0 +1,161 @@
/**
* CitationMarker Component
*
* Renders citation markers (e.g., [1]) as interactive hover cards
* showing source metadata like document title, section, page, and excerpt.
*/
import type { CitationSource } from '@/api/lightrag'
import Badge from '@/components/ui/Badge'
import { HoverCard, HoverCardContent, HoverCardTrigger } from '@/components/ui/HoverCard'
import { FileTextIcon } from 'lucide-react'
interface CitationMarkerProps {
/** The citation marker text, e.g., "[1]" or "[1,2]" */
marker: string
/** Reference IDs this marker cites */
referenceIds: string[]
/** Confidence score (0-1) */
confidence: number
/** Source metadata for hover card */
sources: CitationSource[]
}
/**
* Interactive citation marker with hover card showing source metadata
*/
export function CitationMarker({
marker,
referenceIds,
confidence,
sources,
}: CitationMarkerProps) {
// Find sources matching our reference IDs
const matchingSources = sources.filter((s) => referenceIds.includes(s.reference_id))
// Confidence badge color based on score
const confidenceColor =
confidence >= 0.8
? 'bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200'
: confidence >= 0.6
? 'bg-yellow-100 text-yellow-800 dark:bg-yellow-900 dark:text-yellow-200'
: 'bg-red-100 text-red-800 dark:bg-red-900 dark:text-red-200'
return (
<HoverCard openDelay={200} closeDelay={100}>
<HoverCardTrigger asChild>
<button
type="button"
className="inline-flex items-center text-primary hover:text-primary/80 hover:underline cursor-pointer font-medium text-sm mx-0.5 focus:outline-none focus:ring-2 focus:ring-primary/20 rounded"
>
{marker}
</button>
</HoverCardTrigger>
<HoverCardContent className="w-80" side="top" align="center">
<div className="space-y-3">
{matchingSources.map((source) => (
<div key={source.reference_id} className="space-y-2">
{/* Document title */}
<div className="flex items-start gap-2">
<FileTextIcon className="w-4 h-4 mt-0.5 text-muted-foreground shrink-0" />
<h4 className="font-semibold text-sm leading-tight">
{source.document_title || 'Untitled Document'}
</h4>
</div>
{/* Section title */}
{source.section_title && (
<p className="text-xs text-muted-foreground pl-6">
Section: {source.section_title}
</p>
)}
{/* Page range */}
{source.page_range && (
<p className="text-xs text-muted-foreground pl-6">
Pages: {source.page_range}
</p>
)}
{/* Excerpt */}
{source.excerpt && (
<blockquote className="pl-6 border-l-2 border-muted text-xs italic text-muted-foreground line-clamp-3">
"{source.excerpt}"
</blockquote>
)}
{/* File path */}
<p className="text-xs text-muted-foreground/70 pl-6 truncate" title={source.file_path}>
{source.file_path}
</p>
</div>
))}
{/* Confidence badge */}
<div className="flex items-center justify-between pt-2 border-t">
<span className="text-xs text-muted-foreground">Match confidence</span>
<Badge variant="outline" className={confidenceColor}>
{(confidence * 100).toFixed(0)}%
</Badge>
</div>
</div>
</HoverCardContent>
</HoverCard>
)
}
/**
* Parses text containing citation markers and returns React elements
* with interactive CitationMarker components.
*
* @param text - Text that may contain [n] or [n,m] patterns
* @param sources - Array of citation sources for hover card metadata
* @param markers - Array of citation markers with position and confidence data
* @returns Array of React elements (strings and CitationMarker components)
*/
export function renderTextWithCitations(
text: string,
sources: CitationSource[],
markers: Array<{ marker: string; reference_ids: string[]; confidence: number }>
): React.ReactNode[] {
// Match citation patterns like [1], [2], [1,2], etc.
const citationPattern = /\[(\d+(?:,\d+)*)\]/g
const parts: React.ReactNode[] = []
let lastIndex = 0
let match: RegExpExecArray | null
while ((match = citationPattern.exec(text)) !== null) {
// Add text before the citation
if (match.index > lastIndex) {
parts.push(text.slice(lastIndex, match.index))
}
// Parse reference IDs from the marker
const markerText = match[0]
const refIds = match[1].split(',').map((id) => id.trim())
// Find matching marker data for confidence
const markerData = markers.find((m) => m.marker === markerText)
const confidence = markerData?.confidence ?? 0.5
// Add the citation marker component
parts.push(
<CitationMarker
key={`citation-${match.index}`}
marker={markerText}
referenceIds={refIds}
confidence={confidence}
sources={sources}
/>
)
lastIndex = match.index + match[0].length
}
// Add remaining text
if (lastIndex < text.length) {
parts.push(text.slice(lastIndex))
}
return parts
}

View file

@ -52,6 +52,8 @@ export default function QuerySettings() {
max_entity_tokens: 6000,
max_relation_tokens: 8000,
max_total_tokens: 30000,
citation_mode: 'none' as 'none' | 'inline' | 'footnotes',
citation_threshold: 0.7,
}),
[]
)
@ -474,6 +476,87 @@ export default function QuerySettings() {
/>
</div>
</>
{/* Citation Settings */}
<>
<div className="pt-2 mt-2 border-t border-gray-200 dark:border-gray-700">
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<label htmlFor="citation_mode_select" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.citationMode')}
</label>
</TooltipTrigger>
<TooltipContent side="left">
<p>{t('retrievePanel.querySettings.citationModeTooltip')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
</div>
<div className="flex items-center gap-1">
<Select
value={querySettings.citation_mode || 'none'}
onValueChange={(v) => handleChange('citation_mode', v as 'none' | 'inline' | 'footnotes')}
>
<SelectTrigger
id="citation_mode_select"
className="hover:bg-primary/5 h-9 cursor-pointer focus:ring-0 focus:ring-offset-0 focus:outline-0 active:right-0 flex-1 text-left [&>span]:break-all [&>span]:line-clamp-1"
>
<SelectValue />
</SelectTrigger>
<SelectContent>
<SelectGroup>
<SelectItem value="none">
{t('retrievePanel.querySettings.citationModeOptions.none')}
</SelectItem>
<SelectItem value="inline">
{t('retrievePanel.querySettings.citationModeOptions.inline')}
</SelectItem>
<SelectItem value="footnotes">
{t('retrievePanel.querySettings.citationModeOptions.footnotes')}
</SelectItem>
</SelectGroup>
</SelectContent>
</Select>
<ResetButton onClick={() => handleReset('citation_mode')} title="Reset to default (None)" />
</div>
{/* Citation Threshold - only show when citation mode is not 'none' */}
{querySettings.citation_mode && querySettings.citation_mode !== 'none' && (
<>
<TooltipProvider>
<Tooltip>
<TooltipTrigger asChild>
<label htmlFor="citation_threshold" className="ml-1 cursor-help">
{t('retrievePanel.querySettings.citationThreshold')}
</label>
</TooltipTrigger>
<TooltipContent side="left">
<p>{t('retrievePanel.querySettings.citationThresholdTooltip')}</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
<div className="flex items-center gap-1">
<Input
id="citation_threshold"
type="number"
step="0.05"
min="0"
max="1"
value={querySettings.citation_threshold ?? 0.7}
onChange={(e) => {
const value = parseFloat(e.target.value)
if (!isNaN(value) && value >= 0 && value <= 1) {
handleChange('citation_threshold', value)
}
}}
className="h-9 flex-1 pr-2 [&::-webkit-outer-spin-button]:appearance-none [&::-webkit-inner-spin-button]:appearance-none [-moz-appearance:textfield]"
/>
<ResetButton onClick={() => handleReset('citation_threshold')} title="Reset to default (0.7)" />
</div>
</>
)}
</>
</div>
</div>
</CardContent>

View file

@ -0,0 +1,33 @@
import { cn } from '@/lib/utils'
import * as HoverCardPrimitive from '@radix-ui/react-hover-card'
import * as React from 'react'
const HoverCard = HoverCardPrimitive.Root
const HoverCardTrigger = HoverCardPrimitive.Trigger
const HoverCardContent = React.forwardRef<
React.ComponentRef<typeof HoverCardPrimitive.Content>,
React.ComponentPropsWithoutRef<typeof HoverCardPrimitive.Content>
>(({ className, align = 'center', sideOffset = 4, ...props }, ref) => (
<HoverCardPrimitive.Content
ref={ref}
align={align}
sideOffset={sideOffset}
className={cn(
'z-50 w-64 rounded-md border bg-popover p-4 text-popover-foreground shadow-md outline-none',
'data-[state=open]:animate-in data-[state=closed]:animate-out',
'data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0',
'data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95',
'data-[side=bottom]:slide-in-from-top-2',
'data-[side=left]:slide-in-from-right-2',
'data-[side=right]:slide-in-from-left-2',
'data-[side=top]:slide-in-from-bottom-2',
className
)}
{...props}
/>
))
HoverCardContent.displayName = HoverCardPrimitive.Content.displayName
export { HoverCard, HoverCardTrigger, HoverCardContent }

View file

@ -1,5 +1,5 @@
import { queryText, queryTextStream } from '@/api/lightrag'
import type { QueryMode } from '@/api/lightrag'
import type { CitationsMetadata, QueryMode } from '@/api/lightrag'
import { ChatMessage, type MessageWithError } from '@/components/retrieval/ChatMessage'
import QuerySettings from '@/components/retrieval/QuerySettings'
import Button from '@/components/ui/Button'
@ -231,6 +231,7 @@ export default function RetrievalTesting() {
thinkingContent: undefined, // Explicitly initialize to undefined
displayContent: undefined, // Explicitly initialize to undefined
isThinking: false, // Explicitly initialize to false
citationsProcessed: false, // Prevent finally block from overwriting citation content
}
const prevMessages = [...messages]
@ -373,9 +374,66 @@ export default function RetrievalTesting() {
// Run query
if (state.querySettings.stream) {
let errorMessage = ''
await queryTextStream(queryParams, updateAssistantMessage, (error) => {
errorMessage += error
})
await queryTextStream(
queryParams,
updateAssistantMessage,
(error) => {
errorMessage += error
},
// Citation callback - use position markers to insert citations client-side
// NEW: No longer receives annotated_response (which duplicated payload)
// Instead receives position metadata for client-side marker insertion
(() => {
let citationsApplied = false
return (metadata: CitationsMetadata) => {
// Guard against multiple invocations
if (citationsApplied || !metadata.markers || metadata.markers.length === 0) return
citationsApplied = true
// Insert markers into the accumulated response using position data
// Sort by position descending so we can insert from end to start (preserves positions)
const sortedMarkers = [...metadata.markers].sort(
(a, b) => b.insert_position - a.insert_position
)
let annotatedContent = assistantMessage.content
for (const marker of sortedMarkers) {
// Insert marker at the specified position
if (marker.insert_position <= annotatedContent.length) {
annotatedContent =
annotatedContent.slice(0, marker.insert_position) +
marker.marker +
annotatedContent.slice(marker.insert_position)
}
}
// Append footnotes if provided
let finalContent = annotatedContent
if (metadata.footnotes && metadata.footnotes.length > 0) {
finalContent += '\n\n---\n\n**References:**\n' + metadata.footnotes.join('\n')
}
// Update message with annotated content and store citation metadata for HoverCards
setMessages((prev) =>
prev.map((msg) =>
msg.id === assistantMessage.id
? {
...msg,
content: finalContent,
displayContent: finalContent,
citationsProcessed: true,
citationsMetadata: metadata, // Store for HoverCard rendering
}
: msg
)
)
// Also update the local reference for final cleanup operations
assistantMessage.content = finalContent
assistantMessage.displayContent = finalContent
assistantMessage.citationsProcessed = true
}
})()
)
if (errorMessage) {
if (assistantMessage.content) {
errorMessage = assistantMessage.content + '\n' + errorMessage
@ -413,7 +471,8 @@ export default function RetrievalTesting() {
}
// Ensure display content is correctly set based on final parsing
if (finalCotResult.displayContent !== undefined) {
// BUT skip if citations were processed (they already set displayContent)
if (!assistantMessage.citationsProcessed && finalCotResult.displayContent !== undefined) {
assistantMessage.displayContent = finalCotResult.displayContent
}
} catch (error) {

View file

@ -5,18 +5,19 @@ export const webuiPrefix = '/webui/'
export const controlButtonVariant: ButtonVariantType = 'ghost'
export const labelColorDarkTheme = '#FFFFFF'
export const LabelColorHighlightedDarkTheme = '#000000'
// Dark theme graph palette tuned for contrast on charcoal backgrounds
export const labelColorDarkTheme = '#E5ECFF'
export const LabelColorHighlightedDarkTheme = '#0F172A'
export const labelColorLightTheme = '#000'
export const nodeColorDisabled = '#E2E2E2'
export const nodeBorderColor = '#EEEEEE'
export const nodeBorderColorSelected = '#F57F17'
export const nodeColorDisabled = '#9CA3AF'
export const nodeBorderColor = '#CBD5E1'
export const nodeBorderColorSelected = '#F97316'
export const nodeBorderColorHiddenConnections = '#F59E0B' // Amber color for nodes with hidden connections
export const edgeColorDarkTheme = '#888888'
export const edgeColorSelected = '#F57F17'
export const edgeColorHighlightedDarkTheme = '#F57F17'
export const edgeColorDarkTheme = '#4B5563'
export const edgeColorSelected = '#F97316'
export const edgeColorHighlightedDarkTheme = '#F59E0B'
export const edgeColorHighlightedLightTheme = '#F57F17'
export const searchResultLimit = 50

View file

@ -494,7 +494,16 @@
"userPromptTooltip": "Provide additional response requirements to the LLM (unrelated to query content, only for output processing).",
"userPromptPlaceholder": "Enter custom prompt (optional)",
"enableRerank": "Enable Rerank",
"enableRerankTooltip": "Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True."
"enableRerankTooltip": "Enable reranking for retrieved text chunks. If True but no rerank model is configured, a warning will be issued. Default is True.",
"citationMode": "Citation Mode",
"citationModeTooltip": "Add source citations to responses:\n• None: No citations (fastest)\n• Inline: [n] markers in text only\n• Footnotes: Full footnotes with document titles",
"citationModeOptions": {
"none": "None",
"inline": "Inline [n]",
"footnotes": "Footnotes"
},
"citationThreshold": "Citation Threshold",
"citationThresholdTooltip": "Minimum similarity score (0-1) for matching sentences to sources. Higher = stricter matching, fewer citations. Default: 0.7"
}
},
"apiSite": {

View file

@ -161,6 +161,8 @@ const useSettingsStoreBase = create<SettingsState>()(
history_turns: 0,
user_prompt: '',
enable_rerank: true,
citation_mode: 'none',
citation_threshold: 0.7,
},
setTheme: (theme: Theme) => set({ theme }),
@ -303,7 +305,7 @@ const useSettingsStoreBase = create<SettingsState>()(
{
name: 'settings-storage',
storage: createJSONStorage(() => localStorage),
version: 23,
version: 24,
migrate: (state: any, version: number) => {
if (version < 2) {
state.showEdgeLabel = false
@ -428,6 +430,13 @@ const useSettingsStoreBase = create<SettingsState>()(
// Add expand depth setting for Load Connections
state.graphExpandDepth = 1
}
if (version < 24) {
// Add citation settings for post-processing citations
if (state.querySettings) {
state.querySettings.citation_mode = 'none'
state.querySettings.citation_threshold = 0.7
}
}
return state
},
}