Fix graph truncation detection for depth-limited BFS

- Track unexplored neighbors at max depth
- Improve truncation flag accuracy
This commit is contained in:
yangdx 2025-09-20 13:12:25 +08:00
parent b897eedaef
commit 1dd164a122

View file

@ -353,6 +353,9 @@ class NetworkXStorage(BaseGraphStorage):
# Store (node, depth, degree) in the queue # Store (node, depth, degree) in the queue
queue = [(node_label, 0, graph.degree(node_label))] queue = [(node_label, 0, graph.degree(node_label))]
# Flag to track if there are unexplored neighbors due to depth limit
has_unexplored_neighbors = False
# Modified breadth-first search with degree-based prioritization # Modified breadth-first search with degree-based prioritization
while queue and len(bfs_nodes) < max_nodes: while queue and len(bfs_nodes) < max_nodes:
# Get the current depth from the first node in queue # Get the current depth from the first node in queue
@ -384,18 +387,32 @@ class NetworkXStorage(BaseGraphStorage):
for neighbor in unvisited_neighbors: for neighbor in unvisited_neighbors:
neighbor_degree = graph.degree(neighbor) neighbor_degree = graph.degree(neighbor)
queue.append((neighbor, depth + 1, neighbor_degree)) queue.append((neighbor, depth + 1, neighbor_degree))
else:
# Check if there are unexplored neighbors (skipped due to depth limit)
neighbors = list(graph.neighbors(current_node))
unvisited_neighbors = [
n for n in neighbors if n not in visited
]
if unvisited_neighbors:
has_unexplored_neighbors = True
# Check if we've reached max_nodes # Check if we've reached max_nodes
if len(bfs_nodes) >= max_nodes: if len(bfs_nodes) >= max_nodes:
break break
# Check if graph is truncated - if we still have nodes in the queue # Check if graph is truncated - either due to max_nodes limit or depth limit
# and we've reached max_nodes, then the graph is truncated if (queue and len(bfs_nodes) >= max_nodes) or has_unexplored_neighbors:
if queue and len(bfs_nodes) >= max_nodes:
result.is_truncated = True result.is_truncated = True
logger.info( if has_unexplored_neighbors and not (
f"[{self.workspace}] Graph truncated: breadth-first search limited to {max_nodes} nodes" queue and len(bfs_nodes) >= max_nodes
) ):
logger.info(
f"[{self.workspace}] Graph truncated: reached max_depth {max_depth}, unexplored neighbors exist"
)
else:
logger.info(
f"[{self.workspace}] Graph truncated: breadth-first search limited to {max_nodes} nodes"
)
# Create subgraph with BFS discovered nodes # Create subgraph with BFS discovered nodes
subgraph = graph.subgraph(bfs_nodes) subgraph = graph.subgraph(bfs_nodes)