ragflow/personal_analyze/04-AGENT-SYSTEM/canvas_execution_engine.md
Claude a6ee18476d
docs: Add detailed backend module analysis documentation
Add comprehensive documentation covering 6 modules:
- 01-API-LAYER: Authentication, routing, SSE streaming
- 02-SERVICE-LAYER: Dialog, Task, LLM service analysis
- 03-RAG-ENGINE: Hybrid search, embedding, reranking
- 04-AGENT-SYSTEM: Canvas engine, components, tools
- 05-DOCUMENT-PROCESSING: Task executor, PDF parsing
- 06-ALGORITHMS: BM25, fusion, RAPTOR

Total 28 documentation files with code analysis, diagrams, and formulas.
2025-11-26 11:10:54 +00:00

423 lines
13 KiB
Markdown

# Canvas Execution Engine
## Tong Quan
Canvas engine orchestrates workflow execution, managing component lifecycle, state, and event streaming.
## File Location
```
/agent/canvas.py
```
## Architecture
```
┌─────────────────────────────────────────────────────────────────┐
│ CANVAS EXECUTION ENGINE │
└─────────────────────────────────────────────────────────────────┘
┌─────────────────────────────────────────────────────────────────┐
│ Canvas Class │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ DSL Parsing │ │
│ │ - components: Dict[id → component_config] │ │
│ │ - globals: System + custom variables │ │
│ │ - path: Execution order list │ │
│ │ - history: Conversation history │ │
│ │ - memory: Agent memory │ │
│ └─────────────────────────────────────────────────────────┘ │
│ │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ State Management │ │
│ │ - Component outputs │ │
│ │ - Variable resolution │ │
│ │ - Memory accumulation │ │
│ └─────────────────────────────────────────────────────────┘ │
│ │
│ ┌─────────────────────────────────────────────────────────┐ │
│ │ Execution Control │ │
│ │ - Path-based sequential execution │ │
│ │ - Parallel component batching (5 workers) │ │
│ │ - Exception routing │ │
│ │ - Cancellation support │ │
│ └─────────────────────────────────────────────────────────┘ │
└─────────────────────────────────────────────────────────────────┘
```
## Canvas Class
```python
class Canvas(Graph):
def __init__(self, dsl: str, tenant_id=None, task_id=None):
"""
Initialize Canvas from DSL JSON.
Args:
dsl: JSON string defining workflow
tenant_id: Tenant ID for model access
task_id: Task ID for cancellation
"""
self.globals = {
"sys.query": "",
"sys.user_id": tenant_id,
"sys.conversation_turns": 0,
"sys.files": []
}
# Parse DSL
dsl_dict = json.loads(dsl)
self.components = dsl_dict.get("components", {})
self.path = dsl_dict.get("path", [])
self.history = dsl_dict.get("history", [])
self.memory = dsl_dict.get("memory", [])
# Initialize component objects
for cpn_id, config in self.components.items():
self._initialize_component(cpn_id, config)
```
## Main Execution Loop
```python
async def run(self, **kwargs):
"""
Main execution engine.
Yields:
SSE events for workflow progress
"""
# 1. Initialize workflow
self.reset(mem=False)
yield decorate("workflow_started", {"inputs": kwargs.get("inputs")})
# 2. Batch execution
idx = len(self.path) - 1
while idx < len(self.path):
to = len(self.path)
# 3. Execute batch of components in parallel
self._run_batch(idx, to, kwargs)
# 4. Post-processing for each component
for i in range(idx, to):
cpn = self.get_component(self.path[i])
cpn_obj = cpn["obj"]
# Yield node_started event
yield decorate("node_started", {
"component_id": self.path[i],
"component_name": cpn_obj.component_name
})
# Handle streaming outputs
if isinstance(cpn_obj.output("content"), partial):
for m in cpn_obj.output("content")():
yield decorate("message", {"content": m})
# Yield node_finished event
yield decorate("node_finished", {
"component_id": self.path[i],
"outputs": cpn_obj.output(),
"elapsed_time": cpn_obj.output("_elapsed_time")
})
# Error handling with exception routes
if cpn_obj.error():
ex = cpn_obj.exception_handler()
if ex and ex["goto"]:
self.path.extend(ex["goto"])
# Route to next components
self._route_downstream(cpn, cpn_obj)
idx = to
# 5. Workflow complete
yield decorate("workflow_finished", {
"outputs": self._get_final_outputs(),
"elapsed_time": self._total_elapsed_time()
})
```
## Batch Execution
```python
def _run_batch(self, f, t, kwargs):
"""
Execute components in parallel (max 5 workers).
"""
with ThreadPoolExecutor(max_workers=5) as executor:
threads = []
for i in range(f, t):
cpn = self.get_component_obj(self.path[i])
if cpn.component_name.lower() in ["begin", "userfillup"]:
# Begin needs inputs from kwargs
thr = executor.submit(
cpn.invoke,
inputs=kwargs.get("inputs", {})
)
else:
# Other components get resolved inputs
thr = executor.submit(
cpn.invoke,
**cpn.get_input()
)
threads.append(thr)
# Wait for all to complete
for t in threads:
t.result()
```
## Routing Logic
```python
def _route_downstream(self, cpn, cpn_obj):
"""
Determine next components to execute.
"""
component_name = cpn_obj.component_name.lower()
# 1. Iteration exit
if component_name == "iterationitem" and cpn_obj.end():
parent = self.get_component(cpn["parent_id"])
self._extend_path(parent["downstream"])
# 2. Dynamic routing (Categorize, Switch)
elif component_name in ["categorize", "switch"]:
next_ids = cpn_obj.output("_next")
self._extend_path(next_ids)
# 3. Enter iteration
elif component_name == "iteration":
start_id = cpn_obj.get_start()
self._append_path(start_id)
# 4. Exit sub-iteration
elif not cpn["downstream"] and cpn_obj.get_parent():
parent_start = cpn_obj.get_parent().get_start()
self._append_path(parent_start)
# 5. Standard downstream
else:
self._extend_path(cpn["downstream"])
def _extend_path(self, component_ids):
"""Add multiple components to path."""
if component_ids:
self.path.extend(component_ids)
def _append_path(self, component_id):
"""Add single component to path."""
if component_id:
self.path.append(component_id)
```
## Variable Resolution
```python
def get_variable_value(self, exp: str) -> Any:
"""
Resolve variable expression.
Examples:
- sys.query → self.globals["sys.query"]
- LLM:0@content → component output
- data@items[0].name → nested property
"""
exp = exp.strip("{").strip("}").strip(" ")
# Case 1: System/environment variable
if "@" not in exp:
return self.globals.get(exp)
# Case 2: Component output reference
cpn_id, var_nm = exp.split("@")
cpn = self.get_component(cpn_id)
# Get root value
parts = var_nm.split(".", 1)
root_key = parts[0]
root_val = cpn["obj"].output(root_key)
# Case 3: Nested property access
if len(parts) > 1:
return self._get_nested_value(root_val, parts[1])
return root_val
def _get_nested_value(self, obj: Any, path: str) -> Any:
"""
Deep property access with list/dict/object support.
"""
cur = obj
for key in path.split('.'):
if cur is None:
return None
# Handle JSON strings
if isinstance(cur, str):
try:
cur = json.loads(cur)
except:
return None
# Dict access
if isinstance(cur, dict):
cur = cur.get(key)
# Array indexing
elif isinstance(cur, (list, tuple)):
cur = cur[int(key)]
# Object attribute
else:
cur = getattr(cur, key, None)
return cur
```
## State Management
```python
class Canvas:
def reset(self, mem=True):
"""Reset canvas state for new execution."""
# Reset path to initial component
self.path = ["begin"]
# Reset all component outputs
for cpn_id in self.components:
self.get_component_obj(cpn_id).reset()
# Optionally reset memory
if mem:
self.memory = []
def set_global_variable(self, key: str, value: Any):
"""Set global variable."""
self.globals[key] = value
def add_memory(self, user: str, assist: str, summ: str):
"""Store conversation memory."""
self.memory.append((user, assist, summ))
def get_history(self, window_size: int = 5) -> list:
"""Get recent conversation history."""
return self.history[-window_size:]
def to_json(self) -> str:
"""Serialize canvas state to JSON."""
return json.dumps({
"components": {
cpn_id: {
"obj": cpn["obj"].to_dict(),
"downstream": cpn["downstream"],
"upstream": cpn["upstream"]
}
for cpn_id, cpn in self.components.items()
},
"globals": self.globals,
"path": self.path,
"history": self.history,
"memory": self.memory
})
```
## Cancellation Support
```python
def has_canceled(self) -> bool:
"""Check if task has been canceled."""
if not self.task_id:
return False
# Check Redis flag
return REDIS_CONN.get(f"cancel:{self.task_id}") == "1"
# Usage in run():
async def run(self, **kwargs):
# ...
while idx < len(self.path):
# Check cancellation before each batch
if self.has_canceled():
yield decorate("workflow_finished", {
"canceled": True
})
return
# ...
```
## Event Decoration
```python
def decorate(event_type: str, data: dict) -> dict:
"""
Format SSE event.
Returns:
{
"event": event_type,
"message_id": uuid,
"created_at": timestamp,
"data": data
}
"""
return {
"event": event_type,
"message_id": str(uuid.uuid4()),
"created_at": int(time.time()),
"data": data
}
```
## Error Handling
```python
async def run(self, **kwargs):
try:
# ... execution logic ...
except ComponentExecutionError as e:
yield decorate("error", {
"component_id": e.component_id,
"message": str(e)
})
except TimeoutError:
yield decorate("error", {
"message": "Workflow execution timed out"
})
except Exception as e:
logging.exception(e)
yield decorate("error", {
"message": str(e)
})
finally:
# Always save state
self._save_state()
```
## Performance Optimizations
```python
# Thread pool for parallel execution
ThreadPoolExecutor(max_workers=5)
# Capacity limiting for LLM calls
thread_limiter = trio.CapacityLimiter(
int(os.environ.get('MAX_CONCURRENT_CHATS', 10))
)
# Component timeout
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 10*60)))
def _invoke(self, **kwargs):
# ...
```
## Related Files
- `/agent/canvas.py` - Canvas implementation
- `/agent/component/base.py` - Component base class
- `/api/apps/canvas_app.py` - Canvas API