diff --git a/lightrag/api/README.md b/lightrag/api/README.md index d9d9edb9..2ed3e113 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -588,6 +588,69 @@ You can test the API endpoints using the provided curl commands or through the S 4. Query the system using the query endpoints 5. Trigger document scan if new files are put into the inputs directory +### Graph Manipulation Endpoints + +LightRAG provides REST API endpoints for direct knowledge graph manipulation: + +#### Create Entity + +Create a new entity in the knowledge graph: + +```bash +curl -X POST "http://localhost:9621/graph/entity/create" \ + -H "Content-Type: application/json" \ + -d '{ + "entity_name": "Tesla", + "entity_data": { + "description": "Electric vehicle manufacturer", + "entity_type": "ORGANIZATION" + } + }' +``` + +#### Create Relationship + +Create a new relationship between two existing entities: + +```bash +curl -X POST "http://localhost:9621/graph/relation/create" \ + -H "Content-Type: application/json" \ + -d '{ + "source_entity": "Elon Musk", + "target_entity": "Tesla", + "relation_data": { + "description": "Elon Musk is the CEO of Tesla", + "keywords": "CEO, founder", + "weight": 1.0 + } + }' +``` + +#### Merge Entities + +Consolidate duplicate or misspelled entities while preserving all relationships: + +```bash +curl -X POST "http://localhost:9621/graph/entities/merge" \ + -H "Content-Type: application/json" \ + -d '{ + "entities_to_change": ["Elon Msk", "Ellon Musk"], + "entity_to_change_into": "Elon Musk" + }' +``` + +**What the merge operation does:** +- Deletes the specified source entities +- Transfers all relationships from source entities to the target entity +- Intelligently merges duplicate relationships +- Updates vector embeddings for accurate retrieval +- Preserves the entire graph structure + +This is particularly useful for: +- Fixing spelling errors in entity names +- Consolidating duplicate entities discovered after document processing +- Cleaning up the knowledge graph for better query performance + ## Asynchronous Document Indexing with Progress Tracking LightRAG implements asynchronous document indexing to enable frontend monitoring and querying of document processing progress. Upon uploading files or inserting text through designated endpoints, a unique Track ID is returned to facilitate real-time progress monitoring. diff --git a/lightrag/api/routers/graph_routes.py b/lightrag/api/routers/graph_routes.py index 89bc74db..078fd452 100644 --- a/lightrag/api/routers/graph_routes.py +++ b/lightrag/api/routers/graph_routes.py @@ -7,12 +7,8 @@ import traceback from fastapi import APIRouter, Depends, Query, HTTPException from pydantic import BaseModel -from lightrag import LightRAG from lightrag.utils import logger -from lightrag.models.tenant import TenantContext -from lightrag.tenant_rag_manager import TenantRAGManager from ..utils_api import get_combined_auth_dependency -from ..dependencies import get_tenant_context_optional router = APIRouter(tags=["graph"]) @@ -29,29 +25,35 @@ class RelationUpdateRequest(BaseModel): updated_data: Dict[str, Any] -def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optional[TenantRAGManager] = None): +class EntityMergeRequest(BaseModel): + entities_to_change: list[str] + entity_to_change_into: str + + +class EntityCreateRequest(BaseModel): + entity_name: str + entity_data: Dict[str, Any] + + +class RelationCreateRequest(BaseModel): + source_entity: str + target_entity: str + relation_data: Dict[str, Any] + + +def create_graph_routes(rag, api_key: Optional[str] = None): combined_auth = get_combined_auth_dependency(api_key) - - async def get_tenant_rag(tenant_context: Optional[TenantContext] = Depends(get_tenant_context_optional)) -> LightRAG: - """Dependency to get tenant-specific RAG instance for graph operations""" - if rag_manager and tenant_context and tenant_context.tenant_id and tenant_context.kb_id: - return await rag_manager.get_rag_instance( - tenant_context.tenant_id, - tenant_context.kb_id, - tenant_context.user_id # Pass user_id for security validation - ) - return rag @router.get("/graph/label/list", dependencies=[Depends(combined_auth)]) - async def get_graph_labels(tenant_rag: LightRAG = Depends(get_tenant_rag)): + async def get_graph_labels(): """ - Get all graph labels (tenant-scoped) + Get all graph labels Returns: - List[str]: List of graph labels for the selected tenant/KB + List[str]: List of graph labels """ try: - return await tenant_rag.get_graph_labels() + return await rag.get_graph_labels() except Exception as e: logger.error(f"Error getting graph labels: {str(e)}") logger.error(traceback.format_exc()) @@ -64,19 +66,18 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona limit: int = Query( 300, description="Maximum number of popular labels to return", ge=1, le=1000 ), - tenant_rag: LightRAG = Depends(get_tenant_rag), ): """ - Get popular labels by node degree (tenant-scoped) + Get popular labels by node degree (most connected entities) Args: limit (int): Maximum number of labels to return (default: 300, max: 1000) Returns: - List[str]: List of popular labels sorted by degree (highest first) for the selected tenant/KB + List[str]: List of popular labels sorted by degree (highest first) """ try: - return await tenant_rag.chunk_entity_relation_graph.get_popular_labels(limit) + return await rag.chunk_entity_relation_graph.get_popular_labels(limit) except Exception as e: logger.error(f"Error getting popular labels: {str(e)}") logger.error(traceback.format_exc()) @@ -90,20 +91,19 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona limit: int = Query( 50, description="Maximum number of search results to return", ge=1, le=100 ), - tenant_rag: LightRAG = Depends(get_tenant_rag), ): """ - Search labels with fuzzy matching (tenant-scoped) + Search labels with fuzzy matching Args: q (str): Search query string limit (int): Maximum number of results to return (default: 50, max: 100) Returns: - List[str]: List of matching labels sorted by relevance for the selected tenant/KB + List[str]: List of matching labels sorted by relevance """ try: - return await tenant_rag.chunk_entity_relation_graph.search_labels(q, limit) + return await rag.chunk_entity_relation_graph.search_labels(q, limit) except Exception as e: logger.error(f"Error searching labels with query '{q}': {str(e)}") logger.error(traceback.format_exc()) @@ -116,10 +116,9 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona label: str = Query(..., description="Label to get knowledge graph for"), max_depth: int = Query(3, description="Maximum depth of graph", ge=1), max_nodes: int = Query(1000, description="Maximum nodes to return", ge=1), - tenant_rag: LightRAG = Depends(get_tenant_rag), ): """ - Retrieve a connected subgraph of nodes (tenant-scoped). + Retrieve a connected subgraph of nodes where the label includes the specified label. When reducing the number of nodes, the prioritization criteria are as follows: 1. Hops(path) to the staring node take precedence 2. Followed by the degree of the nodes @@ -130,7 +129,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona max_nodes: Maxiumu nodes to return Returns: - Dict[str, List[str]]: Knowledge graph for label from the selected tenant/KB + Dict[str, List[str]]: Knowledge graph for label """ try: # Log the label parameter to check for leading spaces @@ -138,7 +137,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona f"get_knowledge_graph called with label: '{label}' (length: {len(label)}, repr: {repr(label)})" ) - return await tenant_rag.get_knowledge_graph( + return await rag.get_knowledge_graph( node_label=label, max_depth=max_depth, max_nodes=max_nodes, @@ -153,19 +152,18 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona @router.get("/graph/entity/exists", dependencies=[Depends(combined_auth)]) async def check_entity_exists( name: str = Query(..., description="Entity name to check"), - tenant_rag: LightRAG = Depends(get_tenant_rag), ): """ - Check if an entity with the given name exists in the knowledge graph (tenant-scoped) + Check if an entity with the given name exists in the knowledge graph Args: name (str): Name of the entity to check Returns: - Dict[str, bool]: Dictionary with 'exists' key indicating if entity exists in the selected tenant/KB + Dict[str, bool]: Dictionary with 'exists' key indicating if entity exists """ try: - exists = await tenant_rag.chunk_entity_relation_graph.has_node(name) + exists = await rag.chunk_entity_relation_graph.has_node(name) return {"exists": exists} except Exception as e: logger.error(f"Error checking entity existence for '{name}': {str(e)}") @@ -175,12 +173,9 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona ) @router.post("/graph/entity/edit", dependencies=[Depends(combined_auth)]) - async def update_entity( - request: EntityUpdateRequest, - tenant_rag: LightRAG = Depends(get_tenant_rag), - ): + async def update_entity(request: EntityUpdateRequest): """ - Update an entity's properties in the knowledge graph (tenant-scoped) + Update an entity's properties in the knowledge graph Args: request (EntityUpdateRequest): Request containing entity name, updated data, and rename flag @@ -189,7 +184,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona Dict: Updated entity information """ try: - result = await tenant_rag.aedit_entity( + result = await rag.aedit_entity( entity_name=request.entity_name, updated_data=request.updated_data, allow_rename=request.allow_rename, @@ -212,11 +207,8 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona ) @router.post("/graph/relation/edit", dependencies=[Depends(combined_auth)]) - async def update_relation( - request: RelationUpdateRequest, - tenant_rag: LightRAG = Depends(get_tenant_rag), - ): - """Update a relation's properties in the knowledge graph (tenant-scoped) + async def update_relation(request: RelationUpdateRequest): + """Update a relation's properties in the knowledge graph Args: request (RelationUpdateRequest): Request containing source ID, target ID and updated data @@ -225,7 +217,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona Dict: Updated relation information """ try: - result = await tenant_rag.aedit_relation( + result = await rag.aedit_relation( source_entity=request.source_id, target_entity=request.target_id, updated_data=request.updated_data, @@ -249,4 +241,169 @@ def create_graph_routes(rag, api_key: Optional[str] = None, rag_manager: Optiona status_code=500, detail=f"Error updating relation: {str(e)}" ) + @router.post("/graph/entity/create", dependencies=[Depends(combined_auth)]) + async def create_entity(request: EntityCreateRequest): + """ + Create a new entity in the knowledge graph + + Args: + request (EntityCreateRequest): Request containing: + - entity_name: Name of the entity + - entity_data: Dictionary of entity properties (e.g., description, entity_type) + + Returns: + Dict: Created entity information + + Example: + { + "entity_name": "Tesla", + "entity_data": { + "description": "Electric vehicle manufacturer", + "entity_type": "ORGANIZATION" + } + } + """ + try: + # Check if entity already exists + exists = await rag.chunk_entity_relation_graph.has_node(request.entity_name) + if exists: + raise ValueError(f"Entity '{request.entity_name}' already exists") + + # Prepare entity data + entity_data = request.entity_data.copy() + entity_data["entity_id"] = request.entity_name + + # Create the entity + await rag.chunk_entity_relation_graph.upsert_node( + request.entity_name, entity_data + ) + + return { + "status": "success", + "message": f"Entity '{request.entity_name}' created successfully", + "data": entity_data, + } + except ValueError as ve: + logger.error(f"Validation error creating entity '{request.entity_name}': {str(ve)}") + raise HTTPException(status_code=400, detail=str(ve)) + except Exception as e: + logger.error(f"Error creating entity '{request.entity_name}': {str(e)}") + logger.error(traceback.format_exc()) + raise HTTPException( + status_code=500, detail=f"Error creating entity: {str(e)}" + ) + + @router.post("/graph/relation/create", dependencies=[Depends(combined_auth)]) + async def create_relation(request: RelationCreateRequest): + """ + Create a new relationship between two entities in the knowledge graph + + Args: + request (RelationCreateRequest): Request containing: + - source_entity: Source entity name + - target_entity: Target entity name + - relation_data: Dictionary of relation properties (e.g., description, keywords, weight) + + Returns: + Dict: Created relation information + + Example: + { + "source_entity": "Elon Musk", + "target_entity": "Tesla", + "relation_data": { + "description": "Elon Musk is the CEO of Tesla", + "keywords": "CEO, founder", + "weight": 1.0 + } + } + """ + try: + # Check if both entities exist + source_exists = await rag.chunk_entity_relation_graph.has_node( + request.source_entity + ) + target_exists = await rag.chunk_entity_relation_graph.has_node( + request.target_entity + ) + + if not source_exists: + raise ValueError(f"Source entity '{request.source_entity}' does not exist") + if not target_exists: + raise ValueError(f"Target entity '{request.target_entity}' does not exist") + + # Create the relationship + await rag.chunk_entity_relation_graph.upsert_edge( + request.source_entity, request.target_entity, request.relation_data + ) + + return { + "status": "success", + "message": f"Relation created successfully between '{request.source_entity}' and '{request.target_entity}'", + "data": { + "source": request.source_entity, + "target": request.target_entity, + **request.relation_data, + }, + } + except ValueError as ve: + logger.error( + f"Validation error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(ve)}" + ) + raise HTTPException(status_code=400, detail=str(ve)) + except Exception as e: + logger.error( + f"Error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(e)}" + ) + logger.error(traceback.format_exc()) + raise HTTPException( + status_code=500, detail=f"Error creating relation: {str(e)}" + ) + + @router.post("/graph/entities/merge", dependencies=[Depends(combined_auth)]) + async def merge_entities(request: EntityMergeRequest): + """ + Merge multiple entities into a single entity, preserving all relationships. + + This endpoint is useful for consolidating duplicate or misspelled entities. + All relationships from the source entities will be transferred to the target entity. + + Args: + request (EntityMergeRequest): Request containing: + - entities_to_change: List of entity names to be removed + - entity_to_change_into: Name of the target entity to merge into + + Returns: + Dict: Result of the merge operation with merged entity information + + Example: + { + "entities_to_change": ["Elon Msk", "Ellon Musk"], + "entity_to_change_into": "Elon Musk" + } + """ + try: + result = await rag.amerge_entities( + source_entities=request.entities_to_change, + target_entity=request.entity_to_change_into, + ) + return { + "status": "success", + "message": f"Successfully merged {len(request.entities_to_change)} entities into '{request.entity_to_change_into}'", + "data": result, + } + except ValueError as ve: + logger.error( + f"Validation error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(ve)}" + ) + raise HTTPException(status_code=400, detail=str(ve)) + except Exception as e: + logger.error( + f"Error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(e)}" + ) + logger.error(traceback.format_exc()) + raise HTTPException( + status_code=500, detail=f"Error merging entities: {str(e)}" + ) + return router