Add queue_name parameter to priority_limit_async_func_call for better logging
• Add queue_name parameter to decorator • Update all log messages with queue names • Pass specific names for LLM and embedding
This commit is contained in:
parent
57fe1403c3
commit
1a015a7015
2 changed files with 25 additions and 19 deletions
|
|
@ -469,6 +469,7 @@ class LightRAG:
|
||||||
self.embedding_func = priority_limit_async_func_call(
|
self.embedding_func = priority_limit_async_func_call(
|
||||||
self.embedding_func_max_async,
|
self.embedding_func_max_async,
|
||||||
llm_timeout=self.default_embedding_timeout,
|
llm_timeout=self.default_embedding_timeout,
|
||||||
|
queue_name="Embedding func:",
|
||||||
)(self.embedding_func)
|
)(self.embedding_func)
|
||||||
|
|
||||||
# Initialize all storages
|
# Initialize all storages
|
||||||
|
|
@ -565,6 +566,7 @@ class LightRAG:
|
||||||
self.llm_model_func = priority_limit_async_func_call(
|
self.llm_model_func = priority_limit_async_func_call(
|
||||||
self.llm_model_max_async,
|
self.llm_model_max_async,
|
||||||
llm_timeout=self.default_llm_timeout,
|
llm_timeout=self.default_llm_timeout,
|
||||||
|
queue_name="LLM func:",
|
||||||
)(
|
)(
|
||||||
partial(
|
partial(
|
||||||
self.llm_model_func, # type: ignore
|
self.llm_model_func, # type: ignore
|
||||||
|
|
|
||||||
|
|
@ -374,6 +374,7 @@ def priority_limit_async_func_call(
|
||||||
max_task_duration: float = None,
|
max_task_duration: float = None,
|
||||||
max_queue_size: int = 1000,
|
max_queue_size: int = 1000,
|
||||||
cleanup_timeout: float = 2.0,
|
cleanup_timeout: float = 2.0,
|
||||||
|
queue_name: str = "limit_async",
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Enhanced priority-limited asynchronous function call decorator with robust timeout handling
|
Enhanced priority-limited asynchronous function call decorator with robust timeout handling
|
||||||
|
|
@ -391,6 +392,7 @@ def priority_limit_async_func_call(
|
||||||
max_execution_timeout: Maximum time for worker to execute function (defaults to llm_timeout + 30s)
|
max_execution_timeout: Maximum time for worker to execute function (defaults to llm_timeout + 30s)
|
||||||
max_task_duration: Maximum time before health check intervenes (defaults to llm_timeout + 60s)
|
max_task_duration: Maximum time before health check intervenes (defaults to llm_timeout + 60s)
|
||||||
cleanup_timeout: Maximum time to wait for cleanup operations (defaults to 2.0s)
|
cleanup_timeout: Maximum time to wait for cleanup operations (defaults to 2.0s)
|
||||||
|
queue_name: Optional queue name for logging identification (defaults to "limit_async")
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Decorator function
|
Decorator function
|
||||||
|
|
@ -482,7 +484,7 @@ def priority_limit_async_func_call(
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
# Worker-level timeout (max_execution_timeout exceeded)
|
# Worker-level timeout (max_execution_timeout exceeded)
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"limit_async: Worker timeout for task {task_id} after {max_execution_timeout}s"
|
f"{queue_name}: Worker timeout for task {task_id} after {max_execution_timeout}s"
|
||||||
)
|
)
|
||||||
if not task_state.future.done():
|
if not task_state.future.done():
|
||||||
task_state.future.set_exception(
|
task_state.future.set_exception(
|
||||||
|
|
@ -495,12 +497,12 @@ def priority_limit_async_func_call(
|
||||||
if not task_state.future.done():
|
if not task_state.future.done():
|
||||||
task_state.future.cancel()
|
task_state.future.cancel()
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"limit_async: Task {task_id} cancelled during execution"
|
f"{queue_name}: Task {task_id} cancelled during execution"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Function execution error
|
# Function execution error
|
||||||
logger.error(
|
logger.error(
|
||||||
f"limit_async: Error in decorated function for task {task_id}: {str(e)}"
|
f"{queue_name}: Error in decorated function for task {task_id}: {str(e)}"
|
||||||
)
|
)
|
||||||
if not task_state.future.done():
|
if not task_state.future.done():
|
||||||
task_state.future.set_exception(e)
|
task_state.future.set_exception(e)
|
||||||
|
|
@ -512,10 +514,12 @@ def priority_limit_async_func_call(
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Critical error in worker loop
|
# Critical error in worker loop
|
||||||
logger.error(f"limit_async: Critical error in worker: {str(e)}")
|
logger.error(
|
||||||
|
f"{queue_name}: Critical error in worker: {str(e)}"
|
||||||
|
)
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
finally:
|
finally:
|
||||||
logger.debug("limit_async: Worker exiting")
|
logger.debug(f"{queue_name}: Worker exiting")
|
||||||
|
|
||||||
async def enhanced_health_check():
|
async def enhanced_health_check():
|
||||||
"""Enhanced health check with stuck task detection and recovery"""
|
"""Enhanced health check with stuck task detection and recovery"""
|
||||||
|
|
@ -549,7 +553,7 @@ def priority_limit_async_func_call(
|
||||||
# Force cleanup of stuck tasks
|
# Force cleanup of stuck tasks
|
||||||
for task_id, execution_duration in stuck_tasks:
|
for task_id, execution_duration in stuck_tasks:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"limit_async: Detected stuck task {task_id} (execution time: {execution_duration:.1f}s), forcing cleanup"
|
f"{queue_name}: Detected stuck task {task_id} (execution time: {execution_duration:.1f}s), forcing cleanup"
|
||||||
)
|
)
|
||||||
async with task_states_lock:
|
async with task_states_lock:
|
||||||
if task_id in task_states:
|
if task_id in task_states:
|
||||||
|
|
@ -572,7 +576,7 @@ def priority_limit_async_func_call(
|
||||||
|
|
||||||
if workers_needed > 0:
|
if workers_needed > 0:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"limit_async: Creating {workers_needed} new workers"
|
f"{queue_name}: Creating {workers_needed} new workers"
|
||||||
)
|
)
|
||||||
new_tasks = set()
|
new_tasks = set()
|
||||||
for _ in range(workers_needed):
|
for _ in range(workers_needed):
|
||||||
|
|
@ -582,9 +586,9 @@ def priority_limit_async_func_call(
|
||||||
tasks.update(new_tasks)
|
tasks.update(new_tasks)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"limit_async: Error in enhanced health check: {str(e)}")
|
logger.error(f"{queue_name}: Error in enhanced health check: {str(e)}")
|
||||||
finally:
|
finally:
|
||||||
logger.debug("limit_async: Enhanced health check task exiting")
|
logger.debug(f"{queue_name}: Enhanced health check task exiting")
|
||||||
initialized = False
|
initialized = False
|
||||||
|
|
||||||
async def ensure_workers():
|
async def ensure_workers():
|
||||||
|
|
@ -601,7 +605,7 @@ def priority_limit_async_func_call(
|
||||||
if reinit_count > 0:
|
if reinit_count > 0:
|
||||||
reinit_count += 1
|
reinit_count += 1
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"limit_async: Reinitializing system (count: {reinit_count})"
|
f"{queue_name}: Reinitializing system (count: {reinit_count})"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
reinit_count = 1
|
reinit_count = 1
|
||||||
|
|
@ -614,7 +618,7 @@ def priority_limit_async_func_call(
|
||||||
active_tasks_count = len(tasks)
|
active_tasks_count = len(tasks)
|
||||||
if active_tasks_count > 0 and reinit_count > 1:
|
if active_tasks_count > 0 and reinit_count > 1:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"limit_async: {active_tasks_count} tasks still running during reinitialization"
|
f"{queue_name}: {active_tasks_count} tasks still running during reinitialization"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create worker tasks
|
# Create worker tasks
|
||||||
|
|
@ -641,12 +645,12 @@ def priority_limit_async_func_call(
|
||||||
f" (Timeouts: {', '.join(timeout_info)})" if timeout_info else ""
|
f" (Timeouts: {', '.join(timeout_info)})" if timeout_info else ""
|
||||||
)
|
)
|
||||||
logger.info(
|
logger.info(
|
||||||
f"limit_async: {workers_needed} new workers initialized {timeout_str}"
|
f"{queue_name}: {workers_needed} new workers initialized {timeout_str}"
|
||||||
)
|
)
|
||||||
|
|
||||||
async def shutdown():
|
async def shutdown():
|
||||||
"""Gracefully shut down all workers and cleanup resources"""
|
"""Gracefully shut down all workers and cleanup resources"""
|
||||||
logger.info("limit_async: Shutting down priority queue workers")
|
logger.info(f"{queue_name}: Shutting down priority queue workers")
|
||||||
|
|
||||||
shutdown_event.set()
|
shutdown_event.set()
|
||||||
|
|
||||||
|
|
@ -667,7 +671,7 @@ def priority_limit_async_func_call(
|
||||||
await asyncio.wait_for(queue.join(), timeout=5.0)
|
await asyncio.wait_for(queue.join(), timeout=5.0)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"limit_async: Timeout waiting for queue to empty during shutdown"
|
f"{queue_name}: Timeout waiting for queue to empty during shutdown"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Cancel worker tasks
|
# Cancel worker tasks
|
||||||
|
|
@ -687,7 +691,7 @@ def priority_limit_async_func_call(
|
||||||
except asyncio.CancelledError:
|
except asyncio.CancelledError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
logger.info("limit_async: Priority queue workers shutdown complete")
|
logger.info(f"{queue_name}: Priority queue workers shutdown complete")
|
||||||
|
|
||||||
@wraps(func)
|
@wraps(func)
|
||||||
async def wait_func(
|
async def wait_func(
|
||||||
|
|
@ -750,7 +754,7 @@ def priority_limit_async_func_call(
|
||||||
)
|
)
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
raise QueueFullError(
|
raise QueueFullError(
|
||||||
f"Queue full, timeout after {_queue_timeout} seconds"
|
f"{queue_name}: Queue full, timeout after {_queue_timeout} seconds"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Clean up on queue error
|
# Clean up on queue error
|
||||||
|
|
@ -785,14 +789,14 @@ def priority_limit_async_func_call(
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
raise TimeoutError(
|
raise TimeoutError(
|
||||||
f"limit_async: User timeout after {_timeout} seconds"
|
f"{queue_name}: User timeout after {_timeout} seconds"
|
||||||
)
|
)
|
||||||
except WorkerTimeoutError as e:
|
except WorkerTimeoutError as e:
|
||||||
# This is Worker-level timeout, directly propagate exception information
|
# This is Worker-level timeout, directly propagate exception information
|
||||||
raise TimeoutError(f"limit_async: {str(e)}")
|
raise TimeoutError(f"{queue_name}: {str(e)}")
|
||||||
except HealthCheckTimeoutError as e:
|
except HealthCheckTimeoutError as e:
|
||||||
# This is Health Check-level timeout, directly propagate exception information
|
# This is Health Check-level timeout, directly propagate exception information
|
||||||
raise TimeoutError(f"limit_async: {str(e)}")
|
raise TimeoutError(f"{queue_name}: {str(e)}")
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
# Ensure cleanup
|
# Ensure cleanup
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue