Merge branch 'adminunblinded/main'

2025-10-10 12:31:47 +08:00 · 2025-10-10 12:31:47 +08:00 · 85d1a563b3
commit 85d1a563b3
parent 1bf802eebf b7c77396a0
5 changed files with 236 additions and 5 deletions
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@ -543,6 +543,69 @@ You can test the API endpoints using the provided curl commands or through the S
 4. Query the system using the query endpoints
 5. Trigger document scan if new files are put into the inputs directory

+### Graph Manipulation Endpoints
+
+LightRAG provides REST API endpoints for direct knowledge graph manipulation:
+
+#### Create Entity
+
+Create a new entity in the knowledge graph:
+
+```bash
+curl -X POST "http://localhost:9621/graph/entity/create" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "entity_name": "Tesla",
+    "entity_data": {
+      "description": "Electric vehicle manufacturer",
+      "entity_type": "ORGANIZATION"
+    }
+  }'
+```
+
+#### Create Relationship
+
+Create a new relationship between two existing entities:
+
+```bash
+curl -X POST "http://localhost:9621/graph/relation/create" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "source_entity": "Elon Musk",
+    "target_entity": "Tesla",
+    "relation_data": {
+      "description": "Elon Musk is the CEO of Tesla",
+      "keywords": "CEO, founder",
+      "weight": 1.0
+    }
+  }'
+```
+
+#### Merge Entities
+
+Consolidate duplicate or misspelled entities while preserving all relationships:
+
+```bash
+curl -X POST "http://localhost:9621/graph/entities/merge" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "entities_to_change": ["Elon Msk", "Ellon Musk"],
+    "entity_to_change_into": "Elon Musk"
+  }'
+```
+
+**What the merge operation does:**
+- Deletes the specified source entities
+- Transfers all relationships from source entities to the target entity
+- Intelligently merges duplicate relationships
+- Updates vector embeddings for accurate retrieval
+- Preserves the entire graph structure
+
+This is particularly useful for:
+- Fixing spelling errors in entity names
+- Consolidating duplicate entities discovered after document processing
+- Cleaning up the knowledge graph for better query performance
+
 ## Asynchronous Document Indexing with Progress Tracking

 LightRAG implements asynchronous document indexing to enable frontend monitoring and querying of document processing progress. Upon uploading files or inserting text through designated endpoints, a unique Track ID is returned to facilitate real-time progress monitoring.
--- a/lightrag/api/routers/graph_routes.py
+++ b/lightrag/api/routers/graph_routes.py
@ -25,6 +25,22 @@ class RelationUpdateRequest(BaseModel):
    updated_data: Dict[str, Any]


+class EntityMergeRequest(BaseModel):
+    entities_to_change: list[str]
+    entity_to_change_into: str
+
+
+class EntityCreateRequest(BaseModel):
+    entity_name: str
+    entity_data: Dict[str, Any]
+
+
+class RelationCreateRequest(BaseModel):
+    source_entity: str
+    target_entity: str
+    relation_data: Dict[str, Any]
+
+
 def create_graph_routes(rag, api_key: Optional[str] = None):
    combined_auth = get_combined_auth_dependency(api_key)

@ -225,4 +241,157 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
                status_code=500, detail=f"Error updating relation: {str(e)}"
            )

+    @router.post("/graph/entity/create", dependencies=[Depends(combined_auth)])
+    async def create_entity(request: EntityCreateRequest):
+        """
+        Create a new entity in the knowledge graph
+
+        Args:
+            request (EntityCreateRequest): Request containing:
+                - entity_name: Name of the entity
+                - entity_data: Dictionary of entity properties (e.g., description, entity_type)
+
+        Returns:
+            Dict: Created entity information
+
+        Example:
+            {
+                "entity_name": "Tesla",
+                "entity_data": {
+                    "description": "Electric vehicle manufacturer",
+                    "entity_type": "ORGANIZATION"
+                }
+            }
+        """
+        try:
+            # Use the proper acreate_entity method which handles:
+            # - Graph lock for concurrency
+            # - Vector embedding creation in entities_vdb
+            # - Metadata population and defaults
+            # - Index consistency via _edit_entity_done
+            result = await rag.acreate_entity(
+                entity_name=request.entity_name,
+                entity_data=request.entity_data,
+            )
+
+            return {
+                "status": "success",
+                "message": f"Entity '{request.entity_name}' created successfully",
+                "data": result,
+            }
+        except ValueError as ve:
+            logger.error(
+                f"Validation error creating entity '{request.entity_name}': {str(ve)}"
+            )
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(f"Error creating entity '{request.entity_name}': {str(e)}")
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error creating entity: {str(e)}"
+            )
+
+    @router.post("/graph/relation/create", dependencies=[Depends(combined_auth)])
+    async def create_relation(request: RelationCreateRequest):
+        """
+        Create a new relationship between two entities in the knowledge graph
+
+        Args:
+            request (RelationCreateRequest): Request containing:
+                - source_entity: Source entity name
+                - target_entity: Target entity name
+                - relation_data: Dictionary of relation properties (e.g., description, keywords, weight)
+
+        Returns:
+            Dict: Created relation information
+
+        Example:
+            {
+                "source_entity": "Elon Musk",
+                "target_entity": "Tesla",
+                "relation_data": {
+                    "description": "Elon Musk is the CEO of Tesla",
+                    "keywords": "CEO, founder",
+                    "weight": 1.0
+                }
+            }
+        """
+        try:
+            # Use the proper acreate_relation method which handles:
+            # - Graph lock for concurrency
+            # - Entity existence validation
+            # - Duplicate relation checks
+            # - Vector embedding creation in relationships_vdb
+            # - Index consistency via _edit_relation_done
+            result = await rag.acreate_relation(
+                source_entity=request.source_entity,
+                target_entity=request.target_entity,
+                relation_data=request.relation_data,
+            )
+
+            return {
+                "status": "success",
+                "message": f"Relation created successfully between '{request.source_entity}' and '{request.target_entity}'",
+                "data": result,
+            }
+        except ValueError as ve:
+            logger.error(
+                f"Validation error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(ve)}"
+            )
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(
+                f"Error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(e)}"
+            )
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error creating relation: {str(e)}"
+            )
+
+    @router.post("/graph/entities/merge", dependencies=[Depends(combined_auth)])
+    async def merge_entities(request: EntityMergeRequest):
+        """
+        Merge multiple entities into a single entity, preserving all relationships.
+
+        This endpoint is useful for consolidating duplicate or misspelled entities.
+        All relationships from the source entities will be transferred to the target entity.
+
+        Args:
+            request (EntityMergeRequest): Request containing:
+                - entities_to_change: List of entity names to be removed
+                - entity_to_change_into: Name of the target entity to merge into
+
+        Returns:
+            Dict: Result of the merge operation with merged entity information
+
+        Example:
+            {
+                "entities_to_change": ["Elon Msk", "Ellon Musk"],
+                "entity_to_change_into": "Elon Musk"
+            }
+        """
+        try:
+            result = await rag.amerge_entities(
+                source_entities=request.entities_to_change,
+                target_entity=request.entity_to_change_into,
+            )
+            return {
+                "status": "success",
+                "message": f"Successfully merged {len(request.entities_to_change)} entities into '{request.entity_to_change_into}'",
+                "data": result,
+            }
+        except ValueError as ve:
+            logger.error(
+                f"Validation error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(ve)}"
+            )
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(
+                f"Error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(e)}"
+            )
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error merging entities: {str(e)}"
+            )
+
    return router
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@ -401,7 +401,7 @@ async def _handle_single_relationship_extraction(
    ):  # treat "relationship" and "relation" interchangeable
        if len(record_attributes) > 1 and "relation" in record_attributes[0]:
            logger.warning(
-                f"{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) >2 else 'N/A'}`"
+                f"{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) > 2 else 'N/A'}`"
            )
            logger.debug(record_attributes)
        return None
@ -2225,7 +2225,7 @@ async def extract_entities(
            await asyncio.wait(pending)

        # Add progress prefix to the exception message
-        progress_prefix = f"C[{processed_chunks+1}/{total_chunks}]"
+        progress_prefix = f"C[{processed_chunks + 1}/{total_chunks}]"

        # Re-raise the original exception with a prefix
        prefixed_exception = create_prefixed_exception(first_exception, progress_prefix)
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@ -1472,8 +1472,7 @@ async def aexport_data(

    else:
        raise ValueError(
-            f"Unsupported file format: {file_format}. "
-            f"Choose from: csv, excel, md, txt"
+            f"Unsupported file format: {file_format}. Choose from: csv, excel, md, txt"
        )
    if file_format is not None:
        print(f"Data exported to: {output_path} with format: {file_format}")
--- a/reproduce/batch_eval.py
+++ b/reproduce/batch_eval.py
@ -73,7 +73,7 @@ def batch_eval(query_file, result1_file, result2_file, output_file_path):
        """

        request_data = {
-            "custom_id": f"request-{i+1}",
+            "custom_id": f"request-{i + 1}",
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {