From c81a56a113ec8fb13ec7e0517289291d17ad31af Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 28 Oct 2025 16:02:35 +0800 Subject: [PATCH 1/6] Fix entity and relationship deletion when no chunk references remain --- lightrag/lightrag.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 45f7afd5..ff7ed253 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -3147,6 +3147,9 @@ class LightRAG: ] if not existing_sources: + # No chunk references means this entity should be deleted + entities_to_delete.add(node_label) + entity_chunk_updates[node_label] = [] continue remaining_sources = subtract_source_ids(existing_sources, chunk_ids) @@ -3168,6 +3171,7 @@ class LightRAG: # Process relationships for edge_data in affected_edges: + # source target is not in normalize order in graph db property src = edge_data.get("source") tgt = edge_data.get("target") @@ -3204,6 +3208,9 @@ class LightRAG: ] if not existing_sources: + # No chunk references means this relationship should be deleted + relationships_to_delete.add(edge_tuple) + relation_chunk_updates[edge_tuple] = [] continue remaining_sources = subtract_source_ids(existing_sources, chunk_ids) From 29c4a91dc39b4fef49132e1a9849225d1cf4b81d Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 28 Oct 2025 19:13:48 +0800 Subject: [PATCH 2/6] Move relationship ID sorting to before vector DB operations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Remove verbose entity rebuild logging • Sort IDs before vector DB updates • Keep graph storage with original order --- lightrag/operate.py | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 7363ab7a..9a66f309 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -677,14 +677,6 @@ async def rebuild_knowledge_from_chunks( entity_chunks_storage=entity_chunks_storage, ) rebuilt_entities_count += 1 - status_message = ( - f"Rebuild `{entity_name}` from {len(chunk_ids)} chunks" - ) - logger.info(status_message) - if pipeline_status is not None and pipeline_status_lock is not None: - async with pipeline_status_lock: - pipeline_status["latest_message"] = status_message - pipeline_status["history_messages"].append(status_message) except Exception as e: failed_entities_count += 1 status_message = f"Failed to rebuild `{entity_name}`: {e}" @@ -1432,10 +1424,6 @@ async def _rebuild_single_relationship( else: truncation_info = "" - # Sort src and tgt to ensure consistent ordering (smaller string first) - if src > tgt: - src, tgt = tgt, src - # Update relationship in graph storage updated_relationship_data = { **current_relationship, @@ -1510,6 +1498,9 @@ async def _rebuild_single_relationship( await knowledge_graph_inst.upsert_edge(src, tgt, updated_relationship_data) # Update relationship in vector database + # Sort src and tgt to ensure consistent ordering (smaller string first) + if src > tgt: + src, tgt = tgt, src try: rel_vdb_id = compute_mdhash_id(src + tgt, prefix="rel-") rel_vdb_id_reverse = compute_mdhash_id(tgt + src, prefix="rel-") @@ -2145,10 +2136,6 @@ async def _merge_edges_then_upsert( else: logger.debug(status_message) - # Sort src_id and tgt_id to ensure consistent ordering (smaller string first) - if src_id > tgt_id: - src_id, tgt_id = tgt_id, src_id - # 11. Update both graph and vector db for need_insert_id in [src_id, tgt_id]: if not (await knowledge_graph_inst.has_node(need_insert_id)): @@ -2236,6 +2223,10 @@ async def _merge_edges_then_upsert( weight=weight, ) + # Sort src_id and tgt_id to ensure consistent ordering (smaller string first) + if src_id > tgt_id: + src_id, tgt_id = tgt_id, src_id + if relationships_vdb is not None: rel_vdb_id = compute_mdhash_id(src_id + tgt_id, prefix="rel-") rel_vdb_id_reverse = compute_mdhash_id(tgt_id + src_id, prefix="rel-") From 3fa79026e0c3083a8b4e46df6d5cbdbea767cb8d Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 29 Oct 2025 01:19:55 +0800 Subject: [PATCH 3/6] Fix Entity Source IDs Tracking Problem - Handle existing node updates properly in edge merging stage - Fix source_ids merging logic - Reorder entity deletion and optimize node operations - Delete relationships before entities - Add edge existence debugging logs --- lightrag/lightrag.py | 91 ++++++++++++++++++++++++------------ lightrag/operate.py | 109 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 169 insertions(+), 31 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index ff7ed253..f0acb72d 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -3294,36 +3294,7 @@ class LightRAG: logger.error(f"Failed to delete chunks: {e}") raise Exception(f"Failed to delete document chunks: {e}") from e - # 6. Delete entities that have no remaining sources - if entities_to_delete: - try: - # Delete from vector database - entity_vdb_ids = [ - compute_mdhash_id(entity, prefix="ent-") - for entity in entities_to_delete - ] - await self.entities_vdb.delete(entity_vdb_ids) - - # Delete from graph - await self.chunk_entity_relation_graph.remove_nodes( - list(entities_to_delete) - ) - - # Delete from entity_chunks storage - if self.entity_chunks: - await self.entity_chunks.delete(list(entities_to_delete)) - - async with pipeline_status_lock: - log_message = f"Successfully deleted {len(entities_to_delete)} entities" - logger.info(log_message) - pipeline_status["latest_message"] = log_message - pipeline_status["history_messages"].append(log_message) - - except Exception as e: - logger.error(f"Failed to delete entities: {e}") - raise Exception(f"Failed to delete entities: {e}") from e - - # 7. Delete relationships that have no remaining sources + # 6. Delete relationships that have no remaining sources if relationships_to_delete: try: # Delete from vector database @@ -3360,6 +3331,66 @@ class LightRAG: logger.error(f"Failed to delete relationships: {e}") raise Exception(f"Failed to delete relationships: {e}") from e + # 7. Delete entities that have no remaining sources + if entities_to_delete: + try: + # Debug: Check and log all edges before deleting nodes + edges_still_exist = 0 + for entity in entities_to_delete: + edges = ( + await self.chunk_entity_relation_graph.get_node_edges( + entity + ) + ) + if edges: + for src, tgt in edges: + if ( + src in entities_to_delete + and tgt in entities_to_delete + ): + logger.warning( + f"Edge still exists: {src} <-> {tgt}" + ) + elif src in entities_to_delete: + logger.warning( + f"Edge still exists: {src} --> {tgt}" + ) + else: + logger.warning( + f"Edge still exists: {tgt} --> {src}" + ) + edges_still_exist += 1 + if edges_still_exist: + logger.warning( + f"⚠️ {edges_still_exist} entities still has edges before deletion" + ) + + # Delete from graph + await self.chunk_entity_relation_graph.remove_nodes( + list(entities_to_delete) + ) + + # Delete from vector database + entity_vdb_ids = [ + compute_mdhash_id(entity, prefix="ent-") + for entity in entities_to_delete + ] + await self.entities_vdb.delete(entity_vdb_ids) + + # Delete from entity_chunks storage + if self.entity_chunks: + await self.entity_chunks.delete(list(entities_to_delete)) + + async with pipeline_status_lock: + log_message = f"Successfully deleted {len(entities_to_delete)} entities" + logger.info(log_message) + pipeline_status["latest_message"] = log_message + pipeline_status["history_messages"].append(log_message) + + except Exception as e: + logger.error(f"Failed to delete entities: {e}") + raise Exception(f"Failed to delete entities: {e}") from e + # Persist changes to graph database before releasing graph database lock await self._insert_done() diff --git a/lightrag/operate.py b/lightrag/operate.py index 9a66f309..3354a2ae 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -2138,7 +2138,11 @@ async def _merge_edges_then_upsert( # 11. Update both graph and vector db for need_insert_id in [src_id, tgt_id]: - if not (await knowledge_graph_inst.has_node(need_insert_id)): + # Optimization: Use get_node instead of has_node + get_node + existing_node = await knowledge_graph_inst.get_node(need_insert_id) + + if existing_node is None: + # Node doesn't exist - create new node node_created_at = int(time.time()) node_data = { "entity_id": need_insert_id, @@ -2195,6 +2199,109 @@ async def _merge_edges_then_upsert( "created_at": node_created_at, } added_entities.append(entity_data) + else: + # Node exists - update its source_ids by merging with new source_ids + updated = False # Track if any update occurred + + # 1. Get existing full source_ids from entity_chunks_storage + existing_full_source_ids = [] + if entity_chunks_storage is not None: + stored_chunks = await entity_chunks_storage.get_by_id(need_insert_id) + if stored_chunks and isinstance(stored_chunks, dict): + existing_full_source_ids = [ + chunk_id + for chunk_id in stored_chunks.get("chunk_ids", []) + if chunk_id + ] + + # If not in entity_chunks_storage, get from graph database + if not existing_full_source_ids: + if existing_node.get("source_id"): + existing_full_source_ids = existing_node["source_id"].split( + GRAPH_FIELD_SEP + ) + + # 2. Merge with new source_ids from this relationship + new_source_ids_from_relation = [ + chunk_id for chunk_id in source_ids if chunk_id + ] + merged_full_source_ids = merge_source_ids( + existing_full_source_ids, new_source_ids_from_relation + ) + + # 3. Save merged full list to entity_chunks_storage (conditional) + if ( + entity_chunks_storage is not None + and merged_full_source_ids != existing_full_source_ids + ): + updated = True + await entity_chunks_storage.upsert( + { + need_insert_id: { + "chunk_ids": merged_full_source_ids, + "count": len(merged_full_source_ids), + } + } + ) + + # 4. Apply source_ids limit for graph and vector db + limit_method = global_config.get( + "source_ids_limit_method", SOURCE_IDS_LIMIT_METHOD_KEEP + ) + max_source_limit = global_config.get("max_source_ids_per_entity") + limited_source_ids = apply_source_ids_limit( + merged_full_source_ids, + max_source_limit, + limit_method, + identifier=f"`{need_insert_id}`", + ) + + # 5. Update graph database and vector database with limited source_ids (conditional) + limited_source_id_str = GRAPH_FIELD_SEP.join(limited_source_ids) + + if limited_source_id_str != existing_node.get("source_id", ""): + updated = True + updated_node_data = { + **existing_node, + "source_id": limited_source_id_str, + } + await knowledge_graph_inst.upsert_node( + need_insert_id, node_data=updated_node_data + ) + + # Update vector database + if entity_vdb is not None: + entity_vdb_id = compute_mdhash_id(need_insert_id, prefix="ent-") + entity_content = ( + f"{need_insert_id}\n{existing_node.get('description', '')}" + ) + vdb_data = { + entity_vdb_id: { + "content": entity_content, + "entity_name": need_insert_id, + "source_id": limited_source_id_str, + "entity_type": existing_node.get("entity_type", "UNKNOWN"), + "file_path": existing_node.get( + "file_path", "unknown_source" + ), + } + } + await safe_vdb_operation_with_exception( + operation=lambda payload=vdb_data: entity_vdb.upsert(payload), + operation_name="existing_entity_update", + entity_name=need_insert_id, + max_retries=3, + retry_delay=0.1, + ) + + # 6. Log once at the end if any update occurred + if updated: + status_message = f"Chunks appended from relation: `{need_insert_id}`" + logger.info(status_message) + if pipeline_status is not None and pipeline_status_lock is not None: + async with pipeline_status_lock: + pipeline_status["latest_message"] = status_message + pipeline_status["history_messages"].append(status_message) edge_created_at = int(time.time()) await knowledge_graph_inst.upsert_edge( From da2e9efd11ec172b3e8d5f9679cb599c036fe60c Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 29 Oct 2025 01:39:55 +0800 Subject: [PATCH 4/6] Bump API version to 0247 --- lightrag/api/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/__init__.py b/lightrag/api/__init__.py index b5db555e..36d6dc0a 100644 --- a/lightrag/api/__init__.py +++ b/lightrag/api/__init__.py @@ -1 +1 @@ -__api_version__ = "0246" +__api_version__ = "0247" From 54c48dce3873100222904c0b1fb184d2fb847973 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 29 Oct 2025 02:09:13 +0800 Subject: [PATCH 5/6] Fix z-index layering for GraphViewer UI panels --- lightrag_webui/src/features/GraphViewer.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag_webui/src/features/GraphViewer.tsx b/lightrag_webui/src/features/GraphViewer.tsx index ec7678d9..6f52e6dc 100644 --- a/lightrag_webui/src/features/GraphViewer.tsx +++ b/lightrag_webui/src/features/GraphViewer.tsx @@ -226,13 +226,13 @@ const GraphViewer = () => { {showPropertyPanel && ( -
+
)} {showLegend && ( -
+
)} From ee7c683fa7268e91e3a8070e363bd7482ff195ba Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 29 Oct 2025 02:31:52 +0800 Subject: [PATCH 6/6] Fix swagger docs page problem in dev mode - Add /static to VITE_API_ENDPOINTS - Update proxy rewrite rules - Include static file serving - Sync sample env file --- lightrag_webui/.env.development | 2 +- lightrag_webui/env.development.smaple | 2 +- lightrag_webui/vite.config.ts | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lightrag_webui/.env.development b/lightrag_webui/.env.development index 5a6ac93f..501be53c 100644 --- a/lightrag_webui/.env.development +++ b/lightrag_webui/.env.development @@ -1,4 +1,4 @@ # Development environment configuration VITE_BACKEND_URL=http://localhost:9621 VITE_API_PROXY=true -VITE_API_ENDPOINTS=/api,/documents,/graphs,/graph,/health,/query,/docs,/redoc,/openapi.json,/login,/auth-status +VITE_API_ENDPOINTS=/api,/documents,/graphs,/graph,/health,/query,/docs,/redoc,/openapi.json,/login,/auth-status,/static diff --git a/lightrag_webui/env.development.smaple b/lightrag_webui/env.development.smaple index 5a6ac93f..501be53c 100644 --- a/lightrag_webui/env.development.smaple +++ b/lightrag_webui/env.development.smaple @@ -1,4 +1,4 @@ # Development environment configuration VITE_BACKEND_URL=http://localhost:9621 VITE_API_PROXY=true -VITE_API_ENDPOINTS=/api,/documents,/graphs,/graph,/health,/query,/docs,/redoc,/openapi.json,/login,/auth-status +VITE_API_ENDPOINTS=/api,/documents,/graphs,/graph,/health,/query,/docs,/redoc,/openapi.json,/login,/auth-status,/static diff --git a/lightrag_webui/vite.config.ts b/lightrag_webui/vite.config.ts index 5cb52992..f39969c6 100644 --- a/lightrag_webui/vite.config.ts +++ b/lightrag_webui/vite.config.ts @@ -40,7 +40,7 @@ export default defineConfig({ changeOrigin: true, rewrite: endpoint === '/api' ? (path) => path.replace(/^\/api/, '') : - endpoint === '/docs' || endpoint === '/redoc' || endpoint === '/openapi.json' ? + endpoint === '/docs' || endpoint === '/redoc' || endpoint === '/openapi.json' || endpoint === '/static' ? (path) => path : undefined } ])