import os from multiprocessing.synchronize import Lock as ProcessLock from threading import Lock as ThreadLock from multiprocessing import Manager from typing import Any, Dict, Optional, Union from lightrag.utils import logger LockType = Union[ProcessLock, ThreadLock] _manager = None _initialized = None _is_multiprocess = None is_multiprocess = None # shared data for storage across processes _shared_dicts: Optional[Dict[str, Any]] = None _share_objects: Optional[Dict[str, Any]] = None _init_flags: Optional[Dict[str, bool]] = None # namespace -> initialized _global_lock: Optional[LockType] = None def initialize_share_data(workers: int = 1): """Initialize storage data""" global _manager, _is_multiprocess, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized if _initialized and _initialized.value: is_multiprocess = _is_multiprocess.value if _is_multiprocess.value: logger.info(f"Process {os.getpid()} storage data already initialized!") return _manager = Manager() _initialized = _manager.Value("b", False) _is_multiprocess = _manager.Value("b", False) if workers == 1: _is_multiprocess.value = False _global_lock = ThreadLock() _shared_dicts = {} _share_objects = {} _init_flags = {} logger.info(f"Process {os.getpid()} storage data created for Single Process") else: _is_multiprocess.value = True _global_lock = _manager.Lock() # Create shared dictionaries with manager _shared_dicts = _manager.dict() _share_objects = _manager.dict() _init_flags = _manager.dict() # 使用共享字典存储初始化标志 logger.info(f"Process {os.getpid()} storage data created for Multiple Process") is_multiprocess = _is_multiprocess.value def try_initialize_namespace(namespace: str) -> bool: """ 尝试初始化命名空间。返回True表示当前进程获得了初始化权限。 使用共享字典的原子操作确保只有一个进程能成功初始化。 """ global _init_flags, _manager if _is_multiprocess.value: if _init_flags is None: raise RuntimeError( "Shared storage not initialized. Call initialize_share_data() first." ) else: if _init_flags is None: _init_flags = {} logger.info(f"Process {os.getpid()} trying to initialize namespace {namespace}") with _global_lock: if namespace not in _init_flags: _init_flags[namespace] = True logger.info( f"Process {os.getpid()} ready to initialize namespace {namespace}" ) return True logger.info( f"Process {os.getpid()} found namespace {namespace} already initialized" ) return False def _get_global_lock() -> LockType: return _global_lock def get_storage_lock() -> LockType: """return storage lock for data consistency""" return _get_global_lock() def get_scan_lock() -> LockType: """return scan_progress lock for data consistency""" return get_storage_lock() def get_namespace_object(namespace: str) -> Any: """Get an object for specific namespace""" if namespace not in _share_objects: lock = _get_global_lock() with lock: if namespace not in _share_objects: if _is_multiprocess.value: _share_objects[namespace] = _manager.Value("O", None) else: _share_objects[namespace] = None return _share_objects[namespace] def get_namespace_data(namespace: str) -> Dict[str, Any]: """get storage space for specific storage type(namespace)""" if namespace not in _shared_dicts: lock = _get_global_lock() with lock: if namespace not in _shared_dicts: _shared_dicts[namespace] = {} return _shared_dicts[namespace] def get_scan_progress() -> Dict[str, Any]: """get storage space for document scanning progress data""" return get_namespace_data("scan_progress")