Move merging stage back controled by max parallel insert semhore

2025-07-12 03:32:08 +08:00 · 2025-07-12 03:32:08 +08:00 · 39965d7ded
commit 39965d7ded
parent 7490a18481
2 changed files with 82 additions and 79 deletions
--- a/lightrag/kg/shared_storage.py
+++ b/lightrag/kg/shared_storage.py
@ -57,9 +57,9 @@ _lock_registry_count: Optional[Dict[str, int]] = None
 _lock_cleanup_data: Optional[Dict[str, time.time]] = None
 _registry_guard = None
 # Timeout for keyed locks in seconds
-CLEANUP_KEYED_LOCKS_AFTER_SECONDS = 150
+CLEANUP_KEYED_LOCKS_AFTER_SECONDS = 300
 # Threshold for triggering cleanup - only clean when pending list exceeds this size
-CLEANUP_THRESHOLD = 200
+CLEANUP_THRESHOLD = 500
 _initialized = None
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@ -1094,7 +1094,6 @@ class LightRAG:
                                }
                            )
                    # Semphore is released here
                        # Concurrency is controlled by graph db lock for individual entities and relationships
                        if file_extraction_stage_ok:
                            try:
@ -1141,7 +1140,9 @@ class LightRAG:
                                    log_message = f"Completed processing file {current_file_number}/{total_files}: {file_path}"
                                    logger.info(log_message)
                                    pipeline_status["latest_message"] = log_message
-                                pipeline_status["history_messages"].append(log_message)
+                                    pipeline_status["history_messages"].append(
                                        log_message
                                    )
                            except Exception as e:
                                # Log error and update pipeline status
@ -1153,7 +1154,9 @@ class LightRAG:
                                    pipeline_status["history_messages"].append(
                                        traceback.format_exc()
                                    )
-                                pipeline_status["history_messages"].append(error_msg)
+                                    pipeline_status["history_messages"].append(
                                        error_msg
                                    )
                                # Persistent llm cache
                                if self.llm_response_cache: