Merge branch 'adminunblinded/main'

This commit is contained in:
yangdx 2025-10-10 12:31:47 +08:00
commit 85d1a563b3
5 changed files with 236 additions and 5 deletions

View file

@ -543,6 +543,69 @@ You can test the API endpoints using the provided curl commands or through the S
4. Query the system using the query endpoints 4. Query the system using the query endpoints
5. Trigger document scan if new files are put into the inputs directory 5. Trigger document scan if new files are put into the inputs directory
### Graph Manipulation Endpoints
LightRAG provides REST API endpoints for direct knowledge graph manipulation:
#### Create Entity
Create a new entity in the knowledge graph:
```bash
curl -X POST "http://localhost:9621/graph/entity/create" \
-H "Content-Type: application/json" \
-d '{
"entity_name": "Tesla",
"entity_data": {
"description": "Electric vehicle manufacturer",
"entity_type": "ORGANIZATION"
}
}'
```
#### Create Relationship
Create a new relationship between two existing entities:
```bash
curl -X POST "http://localhost:9621/graph/relation/create" \
-H "Content-Type: application/json" \
-d '{
"source_entity": "Elon Musk",
"target_entity": "Tesla",
"relation_data": {
"description": "Elon Musk is the CEO of Tesla",
"keywords": "CEO, founder",
"weight": 1.0
}
}'
```
#### Merge Entities
Consolidate duplicate or misspelled entities while preserving all relationships:
```bash
curl -X POST "http://localhost:9621/graph/entities/merge" \
-H "Content-Type: application/json" \
-d '{
"entities_to_change": ["Elon Msk", "Ellon Musk"],
"entity_to_change_into": "Elon Musk"
}'
```
**What the merge operation does:**
- Deletes the specified source entities
- Transfers all relationships from source entities to the target entity
- Intelligently merges duplicate relationships
- Updates vector embeddings for accurate retrieval
- Preserves the entire graph structure
This is particularly useful for:
- Fixing spelling errors in entity names
- Consolidating duplicate entities discovered after document processing
- Cleaning up the knowledge graph for better query performance
## Asynchronous Document Indexing with Progress Tracking ## Asynchronous Document Indexing with Progress Tracking
LightRAG implements asynchronous document indexing to enable frontend monitoring and querying of document processing progress. Upon uploading files or inserting text through designated endpoints, a unique Track ID is returned to facilitate real-time progress monitoring. LightRAG implements asynchronous document indexing to enable frontend monitoring and querying of document processing progress. Upon uploading files or inserting text through designated endpoints, a unique Track ID is returned to facilitate real-time progress monitoring.

View file

@ -25,6 +25,22 @@ class RelationUpdateRequest(BaseModel):
updated_data: Dict[str, Any] updated_data: Dict[str, Any]
class EntityMergeRequest(BaseModel):
entities_to_change: list[str]
entity_to_change_into: str
class EntityCreateRequest(BaseModel):
entity_name: str
entity_data: Dict[str, Any]
class RelationCreateRequest(BaseModel):
source_entity: str
target_entity: str
relation_data: Dict[str, Any]
def create_graph_routes(rag, api_key: Optional[str] = None): def create_graph_routes(rag, api_key: Optional[str] = None):
combined_auth = get_combined_auth_dependency(api_key) combined_auth = get_combined_auth_dependency(api_key)
@ -225,4 +241,157 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
status_code=500, detail=f"Error updating relation: {str(e)}" status_code=500, detail=f"Error updating relation: {str(e)}"
) )
@router.post("/graph/entity/create", dependencies=[Depends(combined_auth)])
async def create_entity(request: EntityCreateRequest):
"""
Create a new entity in the knowledge graph
Args:
request (EntityCreateRequest): Request containing:
- entity_name: Name of the entity
- entity_data: Dictionary of entity properties (e.g., description, entity_type)
Returns:
Dict: Created entity information
Example:
{
"entity_name": "Tesla",
"entity_data": {
"description": "Electric vehicle manufacturer",
"entity_type": "ORGANIZATION"
}
}
"""
try:
# Use the proper acreate_entity method which handles:
# - Graph lock for concurrency
# - Vector embedding creation in entities_vdb
# - Metadata population and defaults
# - Index consistency via _edit_entity_done
result = await rag.acreate_entity(
entity_name=request.entity_name,
entity_data=request.entity_data,
)
return {
"status": "success",
"message": f"Entity '{request.entity_name}' created successfully",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error creating entity '{request.entity_name}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(f"Error creating entity '{request.entity_name}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error creating entity: {str(e)}"
)
@router.post("/graph/relation/create", dependencies=[Depends(combined_auth)])
async def create_relation(request: RelationCreateRequest):
"""
Create a new relationship between two entities in the knowledge graph
Args:
request (RelationCreateRequest): Request containing:
- source_entity: Source entity name
- target_entity: Target entity name
- relation_data: Dictionary of relation properties (e.g., description, keywords, weight)
Returns:
Dict: Created relation information
Example:
{
"source_entity": "Elon Musk",
"target_entity": "Tesla",
"relation_data": {
"description": "Elon Musk is the CEO of Tesla",
"keywords": "CEO, founder",
"weight": 1.0
}
}
"""
try:
# Use the proper acreate_relation method which handles:
# - Graph lock for concurrency
# - Entity existence validation
# - Duplicate relation checks
# - Vector embedding creation in relationships_vdb
# - Index consistency via _edit_relation_done
result = await rag.acreate_relation(
source_entity=request.source_entity,
target_entity=request.target_entity,
relation_data=request.relation_data,
)
return {
"status": "success",
"message": f"Relation created successfully between '{request.source_entity}' and '{request.target_entity}'",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(
f"Error creating relation between '{request.source_entity}' and '{request.target_entity}': {str(e)}"
)
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error creating relation: {str(e)}"
)
@router.post("/graph/entities/merge", dependencies=[Depends(combined_auth)])
async def merge_entities(request: EntityMergeRequest):
"""
Merge multiple entities into a single entity, preserving all relationships.
This endpoint is useful for consolidating duplicate or misspelled entities.
All relationships from the source entities will be transferred to the target entity.
Args:
request (EntityMergeRequest): Request containing:
- entities_to_change: List of entity names to be removed
- entity_to_change_into: Name of the target entity to merge into
Returns:
Dict: Result of the merge operation with merged entity information
Example:
{
"entities_to_change": ["Elon Msk", "Ellon Musk"],
"entity_to_change_into": "Elon Musk"
}
"""
try:
result = await rag.amerge_entities(
source_entities=request.entities_to_change,
target_entity=request.entity_to_change_into,
)
return {
"status": "success",
"message": f"Successfully merged {len(request.entities_to_change)} entities into '{request.entity_to_change_into}'",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(
f"Error merging entities {request.entities_to_change} into '{request.entity_to_change_into}': {str(e)}"
)
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error merging entities: {str(e)}"
)
return router return router

View file

@ -401,7 +401,7 @@ async def _handle_single_relationship_extraction(
): # treat "relationship" and "relation" interchangeable ): # treat "relationship" and "relation" interchangeable
if len(record_attributes) > 1 and "relation" in record_attributes[0]: if len(record_attributes) > 1 and "relation" in record_attributes[0]:
logger.warning( logger.warning(
f"{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) >2 else 'N/A'}`" f"{chunk_key}: LLM output format error; found {len(record_attributes)}/5 fields on REALTION `{record_attributes[1]}`~`{record_attributes[2] if len(record_attributes) > 2 else 'N/A'}`"
) )
logger.debug(record_attributes) logger.debug(record_attributes)
return None return None
@ -2225,7 +2225,7 @@ async def extract_entities(
await asyncio.wait(pending) await asyncio.wait(pending)
# Add progress prefix to the exception message # Add progress prefix to the exception message
progress_prefix = f"C[{processed_chunks+1}/{total_chunks}]" progress_prefix = f"C[{processed_chunks + 1}/{total_chunks}]"
# Re-raise the original exception with a prefix # Re-raise the original exception with a prefix
prefixed_exception = create_prefixed_exception(first_exception, progress_prefix) prefixed_exception = create_prefixed_exception(first_exception, progress_prefix)

View file

@ -1472,8 +1472,7 @@ async def aexport_data(
else: else:
raise ValueError( raise ValueError(
f"Unsupported file format: {file_format}. " f"Unsupported file format: {file_format}. Choose from: csv, excel, md, txt"
f"Choose from: csv, excel, md, txt"
) )
if file_format is not None: if file_format is not None:
print(f"Data exported to: {output_path} with format: {file_format}") print(f"Data exported to: {output_path} with format: {file_format}")

View file

@ -73,7 +73,7 @@ def batch_eval(query_file, result1_file, result2_file, output_file_path):
""" """
request_data = { request_data = {
"custom_id": f"request-{i+1}", "custom_id": f"request-{i + 1}",
"method": "POST", "method": "POST",
"url": "/v1/chat/completions", "url": "/v1/chat/completions",
"body": { "body": {