Normalize entity order for undirected graph consistency

• Normalize entity pairs for storage
• Update API docs for undirected edges
This commit is contained in:
yangdx 2025-10-26 15:53:31 +08:00
parent 3fbd704bf9
commit bf1897a67e
2 changed files with 41 additions and 9 deletions

View file

@ -299,6 +299,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
entity_data (dict): Entity properties including:
- description (str): Textual description of the entity
- entity_type (str): Category/type of the entity (e.g., PERSON, ORGANIZATION, LOCATION)
- source_id (str): Related chunk_id from which the description originates
- Additional custom properties as needed
Response Schema:
@ -309,6 +310,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
"entity_name": "Tesla",
"description": "Electric vehicle manufacturer",
"entity_type": "ORGANIZATION",
"source_id": "chunk-123<SEP>chunk-456"
... (other entity properties)
}
}
@ -361,10 +363,11 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
"""
Create a new relationship between two entities in the knowledge graph
This endpoint establishes a directed relationship between two existing entities.
Both the source and target entities must already exist in the knowledge graph.
The system automatically generates vector embeddings for the relationship to
enable semantic search and graph traversal.
This endpoint establishes an undirected relationship between two existing entities.
The provided source/target order is accepted for convenience, but the backend
stored edge is undirected and may be returned with the entities swapped.
Both entities must already exist in the knowledge graph. The system automatically
generates vector embeddings for the relationship to enable semantic search and graph traversal.
Prerequisites:
- Both source_entity and target_entity must exist in the knowledge graph
@ -376,6 +379,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
relation_data (dict): Relationship properties including:
- description (str): Textual description of the relationship
- keywords (str): Comma-separated keywords describing the relationship type
- source_id (str): Related chunk_id from which the description originates
- weight (float): Relationship strength/importance (default: 1.0)
- Additional custom properties as needed
@ -388,6 +392,7 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
"tgt_id": "Tesla",
"description": "Elon Musk is the CEO of Tesla",
"keywords": "CEO, founder",
"source_id": "chunk-123<SEP>chunk-456"
"weight": 1.0,
... (other relationship properties)
}

View file

@ -100,6 +100,10 @@ async def adelete_by_relation(
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
# Normalize entity order for undirected graph (ensures consistent key generation)
if source_entity > target_entity:
source_entity, target_entity = target_entity, source_entity
# Check if the relation exists
edge_exists = await chunk_entity_relation_graph.has_edge(
source_entity, target_entity
@ -878,6 +882,10 @@ async def acreate_relation(
source_entity, target_entity, edge_data
)
# Normalize entity order for undirected relation vector (ensures consistent key generation)
if source_entity > target_entity:
source_entity, target_entity = target_entity, source_entity
# Prepare content for embedding
description = edge_data.get("description", "")
keywords = edge_data.get("keywords", "")
@ -1121,19 +1129,27 @@ async def amerge_entities(
tgt = rel_data["tgt"]
edge_data = rel_data["data"]
# Normalize entity order for consistent vector storage
normalized_src, normalized_tgt = sorted([src, tgt])
description = edge_data.get("description", "")
keywords = edge_data.get("keywords", "")
source_id = edge_data.get("source_id", "")
weight = float(edge_data.get("weight", 1.0))
content = f"{keywords}\t{src}\n{tgt}\n{description}"
relation_id = compute_mdhash_id(src + tgt, prefix="rel-")
# Use normalized order for content and relation ID
content = (
f"{keywords}\t{normalized_src}\n{normalized_tgt}\n{description}"
)
relation_id = compute_mdhash_id(
normalized_src + normalized_tgt, prefix="rel-"
)
relation_data_for_vdb = {
relation_id: {
"content": content,
"src_id": src,
"tgt_id": tgt,
"src_id": normalized_src,
"tgt_id": normalized_tgt,
"source_id": source_id,
"description": description,
"keywords": keywords,
@ -1340,7 +1356,18 @@ async def get_relation_info(
tgt_entity: str,
include_vector_data: bool = False,
) -> dict[str, str | None | dict[str, str]]:
"""Get detailed information of a relationship"""
"""
Get detailed information of a relationship between two entities.
Relationship is unidirectional, swap src_entity and tgt_entity does not change the relationship.
Args:
src_entity: Source entity name
tgt_entity: Target entity name
include_vector_data: Whether to include vector database information
Returns:
Dictionary containing relationship information
"""
# Get information from the graph
edge_data = await chunk_entity_relation_graph.get_edge(src_entity, tgt_entity)