LightRAG/examples/graph_visual_with_falkordb.py
2025-08-26 13:03:42 +03:00

279 lines
9 KiB
Python

import os
import xml.etree.ElementTree as ET
import falkordb
# Constants
WORKING_DIR = "./dickens"
BATCH_SIZE_NODES = 500
BATCH_SIZE_EDGES = 100
# FalkorDB connection credentials
FALKORDB_HOST = "localhost"
FALKORDB_PORT = 6379
FALKORDB_GRAPH_NAME = "dickens_graph"
def xml_to_json(xml_file):
try:
tree = ET.parse(xml_file)
root = tree.getroot()
# Print the root element's tag and attributes to confirm the file has been correctly loaded
print(f"Root element: {root.tag}")
print(f"Root attributes: {root.attrib}")
data = {"nodes": [], "edges": []}
# Use namespace
namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
for node in root.findall(".//node", namespace):
node_data = {
"id": node.get("id").strip('"'),
"entity_type": node.find("./data[@key='d1']", namespace).text.strip('"')
if node.find("./data[@key='d1']", namespace) is not None
else "",
"description": node.find("./data[@key='d2']", namespace).text
if node.find("./data[@key='d2']", namespace) is not None
else "",
"source_id": node.find("./data[@key='d3']", namespace).text
if node.find("./data[@key='d3']", namespace) is not None
else "",
}
data["nodes"].append(node_data)
for edge in root.findall(".//edge", namespace):
edge_data = {
"source": edge.get("source").strip('"'),
"target": edge.get("target").strip('"'),
"weight": float(edge.find("./data[@key='d5']", namespace).text)
if edge.find("./data[@key='d5']", namespace) is not None
else 1.0,
"description": edge.find("./data[@key='d6']", namespace).text
if edge.find("./data[@key='d6']", namespace) is not None
else "",
"keywords": edge.find("./data[@key='d7']", namespace).text
if edge.find("./data[@key='d7']", namespace) is not None
else "",
"source_id": edge.find("./data[@key='d8']", namespace).text
if edge.find("./data[@key='d8']", namespace) is not None
else "",
}
data["edges"].append(edge_data)
return data
except ET.ParseError as e:
print(f"Error parsing XML: {e}")
return None
except Exception as e:
print(f"Unexpected error: {e}")
return None
def insert_nodes_and_edges_to_falkordb(data):
"""Insert graph data into FalkorDB"""
try:
# Connect to FalkorDB
db = falkordb.FalkorDB(host=FALKORDB_HOST, port=FALKORDB_PORT)
graph = db.select_graph(FALKORDB_GRAPH_NAME)
print(f"Connected to FalkorDB at {FALKORDB_HOST}:{FALKORDB_PORT}")
print(f"Using graph: {FALKORDB_GRAPH_NAME}")
nodes = data["nodes"]
edges = data["edges"]
print(f"Total nodes to insert: {len(nodes)}")
print(f"Total edges to insert: {len(edges)}")
# Insert nodes in batches
for i in range(0, len(nodes), BATCH_SIZE_NODES):
batch_nodes = nodes[i : i + BATCH_SIZE_NODES]
# Build UNWIND query for batch insert
query = """
UNWIND $nodes AS node
CREATE (n:Entity {
entity_id: node.id,
entity_type: node.entity_type,
description: node.description,
source_id: node.source_id
})
"""
graph.query(query, {"nodes": batch_nodes})
print(f"Inserted nodes {i+1} to {min(i + BATCH_SIZE_NODES, len(nodes))}")
# Insert edges in batches
for i in range(0, len(edges), BATCH_SIZE_EDGES):
batch_edges = edges[i : i + BATCH_SIZE_EDGES]
# Build UNWIND query for batch insert
query = """
UNWIND $edges AS edge
MATCH (source:Entity {entity_id: edge.source})
MATCH (target:Entity {entity_id: edge.target})
CREATE (source)-[r:DIRECTED {
weight: edge.weight,
description: edge.description,
keywords: edge.keywords,
source_id: edge.source_id
}]-(target)
"""
graph.query(query, {"edges": batch_edges})
print(f"Inserted edges {i+1} to {min(i + BATCH_SIZE_EDGES, len(edges))}")
print("Data insertion completed successfully!")
# Print some statistics
node_count_result = graph.query("MATCH (n:Entity) RETURN count(n) AS count")
edge_count_result = graph.query(
"MATCH ()-[r:DIRECTED]-() RETURN count(r) AS count"
)
node_count = (
node_count_result.result_set[0][0] if node_count_result.result_set else 0
)
edge_count = (
edge_count_result.result_set[0][0] if edge_count_result.result_set else 0
)
print("Final statistics:")
print(f"- Nodes in database: {node_count}")
print(f"- Edges in database: {edge_count}")
except Exception as e:
print(f"Error inserting data into FalkorDB: {e}")
def query_graph_data():
"""Query and display some sample data from FalkorDB"""
try:
# Connect to FalkorDB
db = falkordb.FalkorDB(host=FALKORDB_HOST, port=FALKORDB_PORT)
graph = db.select_graph(FALKORDB_GRAPH_NAME)
print("\n=== Sample Graph Data ===")
# Get some sample nodes
query = (
"MATCH (n:Entity) RETURN n.entity_id, n.entity_type, n.description LIMIT 5"
)
result = graph.query(query)
print("\nSample Nodes:")
if result.result_set:
for record in result.result_set:
print(f"- {record[0]} ({record[1]}): {record[2][:100]}...")
# Get some sample edges
query = """
MATCH (a:Entity)-[r:DIRECTED]-(b:Entity)
RETURN a.entity_id, b.entity_id, r.weight, r.description
LIMIT 5
"""
result = graph.query(query)
print("\nSample Edges:")
if result.result_set:
for record in result.result_set:
print(
f"- {record[0]} -> {record[1]} (weight: {record[2]}): {record[3][:100]}..."
)
# Get node degree statistics
query = """
MATCH (n:Entity)
OPTIONAL MATCH (n)-[r]-()
WITH n, count(r) AS degree
RETURN min(degree) AS min_degree, max(degree) AS max_degree, avg(degree) AS avg_degree
"""
result = graph.query(query)
print("\nNode Degree Statistics:")
if result.result_set:
record = result.result_set[0]
print(f"- Min degree: {record[0]}")
print(f"- Max degree: {record[1]}")
print(f"- Avg degree: {record[2]:.2f}")
except Exception as e:
print(f"Error querying FalkorDB: {e}")
def clear_graph():
"""Clear all data from the FalkorDB graph"""
try:
db = falkordb.FalkorDB(host=FALKORDB_HOST, port=FALKORDB_PORT)
graph = db.select_graph(FALKORDB_GRAPH_NAME)
# Delete all nodes and relationships
graph.query("MATCH (n) DETACH DELETE n")
print("Graph cleared successfully!")
except Exception as e:
print(f"Error clearing graph: {e}")
def main():
xml_file = os.path.join(WORKING_DIR, "graph_chunk_entity_relation.graphml")
if not os.path.exists(xml_file):
print(
f"Error: File {xml_file} not found. Please ensure the GraphML file exists."
)
print(
"This file is typically generated by LightRAG after processing documents."
)
return
print("FalkorDB Graph Visualization Example")
print("====================================")
print(f"Processing file: {xml_file}")
print(f"FalkorDB connection: {FALKORDB_HOST}:{FALKORDB_PORT}")
print(f"Graph name: {FALKORDB_GRAPH_NAME}")
print()
# Parse XML to JSON
print("1. Parsing GraphML file...")
data = xml_to_json(xml_file)
if data is None:
print("Failed to parse XML file.")
return
print(f" Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
# Ask user what to do
while True:
print("\nOptions:")
print("1. Clear existing graph data")
print("2. Insert data into FalkorDB")
print("3. Query sample data")
print("4. Exit")
choice = input("\nSelect an option (1-4): ").strip()
if choice == "1":
print("\n2. Clearing existing graph data...")
clear_graph()
elif choice == "2":
print("\n2. Inserting data into FalkorDB...")
insert_nodes_and_edges_to_falkordb(data)
elif choice == "3":
print("\n3. Querying sample data...")
query_graph_data()
elif choice == "4":
print("Goodbye!")
break
else:
print("Invalid choice. Please try again.")
if __name__ == "__main__":
main()