Add real-time agent health monitoring and response utilities

Health Monitoring:
- Add /controller/agents/health endpoint for efficient bulk health checks
- Check all agents in one call with response time tracking
- Update agent status in real-time (30s interval)
- Show latency next to agent status in UI
- Add collapsible "All Agents Health" panel in sidebar
- Log console notifications when agents go online/offline

Response Utilities:
- Add unwrapAgentResponse() to consistently handle controller proxy format
- Add isAgentMode() and getCurrentAgentName() helpers
- Standardize error handling for agent responses

UI Improvements:
- Show response latency (ms) in agent selector dropdown
- Health panel shows status + running modes for each agent
- Better visual feedback for agent status changes
This commit is contained in:
cemaxecuter
2026-01-26 12:19:20 -05:00
parent 3372daca84
commit d775ba5b3e
3 changed files with 299 additions and 5 deletions

View File

@@ -267,6 +267,68 @@ def get_agent_status(agent_id: int):
}), 503
@controller_bp.route('/agents/health', methods=['GET'])
def check_all_agents_health():
"""
Check health of all registered agents in one call.
More efficient than checking each agent individually.
Returns health status, response time, and running modes for each agent.
"""
agents_list = list_agents(active_only=True)
results = []
for agent in agents_list:
result = {
'id': agent['id'],
'name': agent['name'],
'healthy': False,
'response_time_ms': None,
'running_modes': [],
'error': None
}
try:
client = create_client_from_agent(agent)
# Time the health check
start_time = time.time()
is_healthy = client.health_check()
response_time = (time.time() - start_time) * 1000
result['healthy'] = is_healthy
result['response_time_ms'] = round(response_time, 1)
if is_healthy:
# Update last_seen in database
update_agent(agent['id'], update_last_seen=True)
# Also fetch running modes
try:
status = client.get_status()
result['running_modes'] = status.get('running_modes', [])
result['running_modes_detail'] = status.get('running_modes_detail', {})
except Exception:
pass # Status fetch is optional
except AgentConnectionError as e:
result['error'] = f'Connection failed: {str(e)}'
except AgentHTTPError as e:
result['error'] = f'HTTP error: {str(e)}'
except Exception as e:
result['error'] = str(e)
results.append(result)
return jsonify({
'status': 'success',
'timestamp': datetime.now(timezone.utc).isoformat(),
'agents': results,
'total': len(results),
'healthy_count': sum(1 for r in results if r['healthy'])
})
# =============================================================================
# Proxy Operations - Forward requests to agents
# =============================================================================

View File

@@ -12,6 +12,150 @@ let multiAgentMode = false; // Show combined results from all agents
let multiAgentPollInterval = null;
let agentRunningModes = []; // Track agent's running modes for conflict detection
let agentRunningModesDetail = {}; // Track device info per mode (for multi-SDR agents)
let healthCheckInterval = null; // Health monitoring interval
let agentHealthStatus = {}; // Cache of health status per agent ID
// ============== AGENT HEALTH MONITORING ==============
/**
* Start periodic health monitoring for all agents.
* Runs every 30 seconds to check agent health status.
*/
function startHealthMonitoring() {
// Don't start if already running
if (healthCheckInterval) return;
// Initial check
checkAllAgentsHealth();
// Start periodic checks every 30 seconds
healthCheckInterval = setInterval(checkAllAgentsHealth, 30000);
console.log('[AgentManager] Health monitoring started (30s interval)');
}
/**
* Stop health monitoring.
*/
function stopHealthMonitoring() {
if (healthCheckInterval) {
clearInterval(healthCheckInterval);
healthCheckInterval = null;
console.log('[AgentManager] Health monitoring stopped');
}
}
/**
* Check health of all registered agents in one efficient call.
*/
async function checkAllAgentsHealth() {
if (agents.length === 0) return;
try {
const response = await fetch('/controller/agents/health');
const data = await response.json();
if (data.status === 'success' && data.agents) {
// Update health status cache and UI
data.agents.forEach(agentHealth => {
const previousHealth = agentHealthStatus[agentHealth.id];
agentHealthStatus[agentHealth.id] = agentHealth;
// Update agent in local list
const agent = agents.find(a => a.id === agentHealth.id);
if (agent) {
const wasHealthy = agent.healthy !== false;
agent.healthy = agentHealth.healthy;
agent.response_time_ms = agentHealth.response_time_ms;
agent.running_modes = agentHealth.running_modes || [];
agent.running_modes_detail = agentHealth.running_modes_detail || {};
// Log status change
if (wasHealthy !== agentHealth.healthy) {
console.log(`[AgentManager] ${agent.name} is now ${agentHealth.healthy ? 'ONLINE' : 'OFFLINE'}`);
// Show notification for status change
if (!agentHealth.healthy && typeof showNotification === 'function') {
showNotification(`Agent "${agent.name}" went offline`, 'warning');
}
}
}
});
// Update UI
updateAgentHealthUI();
// If current agent is selected, sync mode warnings
if (currentAgent !== 'local') {
const currentHealth = agentHealthStatus[currentAgent];
if (currentHealth) {
agentRunningModes = currentHealth.running_modes || [];
agentRunningModesDetail = currentHealth.running_modes_detail || {};
showAgentModeWarnings(agentRunningModes, agentRunningModesDetail);
}
}
}
} catch (error) {
console.error('[AgentManager] Health check failed:', error);
}
}
/**
* Update the UI to reflect current health status.
*/
function updateAgentHealthUI() {
const selector = document.getElementById('agentSelect');
if (!selector) return;
// Update each option in selector
agents.forEach(agent => {
const option = selector.querySelector(`option[value="${agent.id}"]`);
if (option) {
const health = agentHealthStatus[agent.id];
const isHealthy = health ? health.healthy : agent.healthy !== false;
const status = isHealthy ? '●' : '○';
const latency = health?.response_time_ms ? ` (${health.response_time_ms}ms)` : '';
option.textContent = `${status} ${agent.name}${latency}`;
option.dataset.healthy = isHealthy;
}
});
// Update status display for current agent
updateAgentStatus();
// Update health panel if it exists
updateHealthPanel();
}
/**
* Update the optional health panel showing all agents.
*/
function updateHealthPanel() {
const panel = document.getElementById('agentHealthPanel');
if (!panel) return;
if (agents.length === 0) {
panel.innerHTML = '<div style="color: var(--text-muted); font-size: 11px;">No agents registered</div>';
return;
}
const html = agents.map(agent => {
const health = agentHealthStatus[agent.id];
const isHealthy = health ? health.healthy : agent.healthy !== false;
const latency = health?.response_time_ms ? `${health.response_time_ms}ms` : '--';
const modes = health?.running_modes?.length || 0;
const statusColor = isHealthy ? 'var(--accent-green)' : 'var(--accent-red)';
const statusIcon = isHealthy ? '●' : '○';
return `<div style="display: flex; justify-content: space-between; align-items: center; padding: 4px 0; border-bottom: 1px solid var(--border-color);">
<span style="color: ${statusColor}; font-size: 12px;">${statusIcon} ${agent.name}</span>
<span style="font-size: 10px; color: var(--text-muted);">
${latency} ${modes > 0 ? `| ${modes} mode${modes > 1 ? 's' : ''}` : ''}
</span>
</div>`;
}).join('');
panel.innerHTML = html;
}
// ============== AGENT LOADING ==============
@@ -84,22 +228,91 @@ function updateAgentStatus() {
const selector = document.getElementById('agentSelect');
const statusDot = document.getElementById('agentStatusDot');
const statusText = document.getElementById('agentStatusText');
const latencyText = document.getElementById('agentLatencyText');
if (!selector || !statusDot) return;
if (currentAgent === 'local') {
statusDot.className = 'agent-status-dot online';
if (statusText) statusText.textContent = 'Local';
if (latencyText) latencyText.textContent = '';
} else {
const agent = agents.find(a => a.id == currentAgent);
if (agent) {
const isOnline = agent.healthy !== false;
const health = agentHealthStatus[agent.id];
const isOnline = health ? health.healthy : agent.healthy !== false;
statusDot.className = `agent-status-dot ${isOnline ? 'online' : 'offline'}`;
if (statusText) statusText.textContent = isOnline ? 'Connected' : 'Offline';
if (statusText) {
statusText.textContent = isOnline ? 'Connected' : 'Offline';
}
// Show latency if available
if (latencyText) {
if (health?.response_time_ms) {
latencyText.textContent = `${health.response_time_ms}ms`;
} else {
latencyText.textContent = '';
}
}
}
}
}
// ============== RESPONSE UTILITIES ==============
/**
* Unwrap agent response from controller proxy format.
* Controller returns: {status: 'success', result: {...agent response...}}
* This extracts the actual agent response.
*
* @param {Object} response - Response from fetch
* @param {boolean} isAgentMode - Whether this is an agent (vs local) request
* @returns {Object} - Unwrapped response
* @throws {Error} - If response indicates an error
*/
function unwrapAgentResponse(response, isAgentMode = false) {
if (!response) return null;
// Check for error status first
if (response.status === 'error') {
throw new Error(response.message || response.error || 'Unknown error');
}
// If agent mode and has nested result, unwrap it
if (isAgentMode && response.status === 'success' && response.result !== undefined) {
const result = response.result;
// Check if the nested result itself is an error
if (result.status === 'error') {
throw new Error(result.message || result.error || 'Agent operation failed');
}
return result;
}
// Return as-is for local mode or already-unwrapped responses
return response;
}
/**
* Check if currently operating in agent mode.
* @returns {boolean}
*/
function isAgentMode() {
return currentAgent !== 'local';
}
/**
* Get the current agent's name for display.
* @returns {string}
*/
function getCurrentAgentName() {
if (currentAgent === 'local') return 'Local';
const agent = agents.find(a => a.id == currentAgent);
return agent ? agent.name : 'Unknown';
}
// ============== AGENT SELECTION ==============
function selectAgent(agentId) {
@@ -625,7 +838,12 @@ function disconnectAgentStream() {
function initAgentManager() {
// Load agents on page load
loadAgents();
loadAgents().then(() => {
// Start health monitoring after agents are loaded
if (agents.length > 0) {
startHealthMonitoring();
}
});
// Set up agent selector change handler
const selector = document.getElementById('agentSelect');
@@ -635,8 +853,14 @@ function initAgentManager() {
});
}
// Refresh agents periodically
setInterval(loadAgents, 30000);
// Refresh agent list periodically (less often since health monitor is active)
setInterval(async () => {
await loadAgents();
// Start health monitoring if we now have agents
if (agents.length > 0 && !healthCheckInterval) {
startHealthMonitoring();
}
}, 60000); // Refresh list every 60s (health checks every 30s)
}
// ============== MULTI-AGENT MODE ==============

View File

@@ -375,7 +375,15 @@
</div>
<div id="agentInfo" class="info-text" style="font-size: 10px; color: #666; margin-top: 4px;">
<span id="agentStatusText">Local</span>
<span id="agentLatencyText" style="margin-left: 6px; color: var(--accent-cyan);"></span>
</div>
<!-- Agent health panel (shows all agents when expanded) -->
<details style="margin-top: 8px;">
<summary style="font-size: 10px; color: #888; cursor: pointer;">All Agents Health</summary>
<div id="agentHealthPanel" style="margin-top: 6px; padding: 6px; background: rgba(0,0,0,0.2); border-radius: 4px; max-height: 120px; overflow-y: auto;">
<div style="color: var(--text-muted); font-size: 11px;">Loading...</div>
</div>
</details>
<!-- Multi-agent mode toggle -->
<div class="form-group" style="margin-top: 10px;">
<label class="inline-checkbox" style="display: flex; align-items: center; gap: 8px;">