From d775ba5b3e73fcbcbc6badab24505670d32487f5 Mon Sep 17 00:00:00 2001
From: cemaxecuter <cemaxecuter@gmail.com>
Date: Mon, 26 Jan 2026 12:19:20 -0500
Subject: [PATCH] Add real-time agent health monitoring and response utilities

Health Monitoring:
- Add /controller/agents/health endpoint for efficient bulk health checks
- Check all agents in one call with response time tracking
- Update agent status in real-time (30s interval)
- Show latency next to agent status in UI
- Add collapsible "All Agents Health" panel in sidebar
- Log console notifications when agents go online/offline

Response Utilities:
- Add unwrapAgentResponse() to consistently handle controller proxy format
- Add isAgentMode() and getCurrentAgentName() helpers
- Standardize error handling for agent responses

UI Improvements:
- Show response latency (ms) in agent selector dropdown
- Health panel shows status + running modes for each agent
- Better visual feedback for agent status changes
---
 routes/controller.py     |  62 +++++++++++
 static/js/core/agents.js | 234 ++++++++++++++++++++++++++++++++++++++-
 templates/index.html     |   8 ++
 3 files changed, 299 insertions(+), 5 deletions(-)

diff --git a/routes/controller.py b/routes/controller.py
index 9428bbd..80e74b0 100644
--- a/routes/controller.py
+++ b/routes/controller.py
@@ -267,6 +267,68 @@ def get_agent_status(agent_id: int):
         }), 503
 
 
+@controller_bp.route('/agents/health', methods=['GET'])
+def check_all_agents_health():
+    """
+    Check health of all registered agents in one call.
+
+    More efficient than checking each agent individually.
+    Returns health status, response time, and running modes for each agent.
+    """
+    agents_list = list_agents(active_only=True)
+    results = []
+
+    for agent in agents_list:
+        result = {
+            'id': agent['id'],
+            'name': agent['name'],
+            'healthy': False,
+            'response_time_ms': None,
+            'running_modes': [],
+            'error': None
+        }
+
+        try:
+            client = create_client_from_agent(agent)
+
+            # Time the health check
+            start_time = time.time()
+            is_healthy = client.health_check()
+            response_time = (time.time() - start_time) * 1000
+
+            result['healthy'] = is_healthy
+            result['response_time_ms'] = round(response_time, 1)
+
+            if is_healthy:
+                # Update last_seen in database
+                update_agent(agent['id'], update_last_seen=True)
+
+                # Also fetch running modes
+                try:
+                    status = client.get_status()
+                    result['running_modes'] = status.get('running_modes', [])
+                    result['running_modes_detail'] = status.get('running_modes_detail', {})
+                except Exception:
+                    pass  # Status fetch is optional
+
+        except AgentConnectionError as e:
+            result['error'] = f'Connection failed: {str(e)}'
+        except AgentHTTPError as e:
+            result['error'] = f'HTTP error: {str(e)}'
+        except Exception as e:
+            result['error'] = str(e)
+
+        results.append(result)
+
+    return jsonify({
+        'status': 'success',
+        'timestamp': datetime.now(timezone.utc).isoformat(),
+        'agents': results,
+        'total': len(results),
+        'healthy_count': sum(1 for r in results if r['healthy'])
+    })
+
+
 # =============================================================================
 # Proxy Operations - Forward requests to agents
 # =============================================================================
diff --git a/static/js/core/agents.js b/static/js/core/agents.js
index e2458ed..15e9a8a 100644
--- a/static/js/core/agents.js
+++ b/static/js/core/agents.js
@@ -12,6 +12,150 @@ let multiAgentMode = false;  // Show combined results from all agents
 let multiAgentPollInterval = null;
 let agentRunningModes = [];  // Track agent's running modes for conflict detection
 let agentRunningModesDetail = {};  // Track device info per mode (for multi-SDR agents)
+let healthCheckInterval = null;  // Health monitoring interval
+let agentHealthStatus = {};  // Cache of health status per agent ID
+
+// ============== AGENT HEALTH MONITORING ==============
+
+/**
+ * Start periodic health monitoring for all agents.
+ * Runs every 30 seconds to check agent health status.
+ */
+function startHealthMonitoring() {
+    // Don't start if already running
+    if (healthCheckInterval) return;
+
+    // Initial check
+    checkAllAgentsHealth();
+
+    // Start periodic checks every 30 seconds
+    healthCheckInterval = setInterval(checkAllAgentsHealth, 30000);
+    console.log('[AgentManager] Health monitoring started (30s interval)');
+}
+
+/**
+ * Stop health monitoring.
+ */
+function stopHealthMonitoring() {
+    if (healthCheckInterval) {
+        clearInterval(healthCheckInterval);
+        healthCheckInterval = null;
+        console.log('[AgentManager] Health monitoring stopped');
+    }
+}
+
+/**
+ * Check health of all registered agents in one efficient call.
+ */
+async function checkAllAgentsHealth() {
+    if (agents.length === 0) return;
+
+    try {
+        const response = await fetch('/controller/agents/health');
+        const data = await response.json();
+
+        if (data.status === 'success' && data.agents) {
+            // Update health status cache and UI
+            data.agents.forEach(agentHealth => {
+                const previousHealth = agentHealthStatus[agentHealth.id];
+                agentHealthStatus[agentHealth.id] = agentHealth;
+
+                // Update agent in local list
+                const agent = agents.find(a => a.id === agentHealth.id);
+                if (agent) {
+                    const wasHealthy = agent.healthy !== false;
+                    agent.healthy = agentHealth.healthy;
+                    agent.response_time_ms = agentHealth.response_time_ms;
+                    agent.running_modes = agentHealth.running_modes || [];
+                    agent.running_modes_detail = agentHealth.running_modes_detail || {};
+
+                    // Log status change
+                    if (wasHealthy !== agentHealth.healthy) {
+                        console.log(`[AgentManager] ${agent.name} is now ${agentHealth.healthy ? 'ONLINE' : 'OFFLINE'}`);
+
+                        // Show notification for status change
+                        if (!agentHealth.healthy && typeof showNotification === 'function') {
+                            showNotification(`Agent "${agent.name}" went offline`, 'warning');
+                        }
+                    }
+                }
+            });
+
+            // Update UI
+            updateAgentHealthUI();
+
+            // If current agent is selected, sync mode warnings
+            if (currentAgent !== 'local') {
+                const currentHealth = agentHealthStatus[currentAgent];
+                if (currentHealth) {
+                    agentRunningModes = currentHealth.running_modes || [];
+                    agentRunningModesDetail = currentHealth.running_modes_detail || {};
+                    showAgentModeWarnings(agentRunningModes, agentRunningModesDetail);
+                }
+            }
+        }
+    } catch (error) {
+        console.error('[AgentManager] Health check failed:', error);
+    }
+}
+
+/**
+ * Update the UI to reflect current health status.
+ */
+function updateAgentHealthUI() {
+    const selector = document.getElementById('agentSelect');
+    if (!selector) return;
+
+    // Update each option in selector
+    agents.forEach(agent => {
+        const option = selector.querySelector(`option[value="${agent.id}"]`);
+        if (option) {
+            const health = agentHealthStatus[agent.id];
+            const isHealthy = health ? health.healthy : agent.healthy !== false;
+            const status = isHealthy ? '●' : '○';
+            const latency = health?.response_time_ms ? ` (${health.response_time_ms}ms)` : '';
+            option.textContent = `${status} ${agent.name}${latency}`;
+            option.dataset.healthy = isHealthy;
+        }
+    });
+
+    // Update status display for current agent
+    updateAgentStatus();
+
+    // Update health panel if it exists
+    updateHealthPanel();
+}
+
+/**
+ * Update the optional health panel showing all agents.
+ */
+function updateHealthPanel() {
+    const panel = document.getElementById('agentHealthPanel');
+    if (!panel) return;
+
+    if (agents.length === 0) {
+        panel.innerHTML = '<div style="color: var(--text-muted); font-size: 11px;">No agents registered</div>';
+        return;
+    }
+
+    const html = agents.map(agent => {
+        const health = agentHealthStatus[agent.id];
+        const isHealthy = health ? health.healthy : agent.healthy !== false;
+        const latency = health?.response_time_ms ? `${health.response_time_ms}ms` : '--';
+        const modes = health?.running_modes?.length || 0;
+        const statusColor = isHealthy ? 'var(--accent-green)' : 'var(--accent-red)';
+        const statusIcon = isHealthy ? '●' : '○';
+
+        return `<div style="display: flex; justify-content: space-between; align-items: center; padding: 4px 0; border-bottom: 1px solid var(--border-color);">
+            <span style="color: ${statusColor}; font-size: 12px;">${statusIcon} ${agent.name}</span>
+            <span style="font-size: 10px; color: var(--text-muted);">
+                ${latency} ${modes > 0 ? `| ${modes} mode${modes > 1 ? 's' : ''}` : ''}
+            </span>
+        </div>`;
+    }).join('');
+
+    panel.innerHTML = html;
+}
 
 // ============== AGENT LOADING ==============
 
@@ -84,22 +228,91 @@ function updateAgentStatus() {
     const selector = document.getElementById('agentSelect');
     const statusDot = document.getElementById('agentStatusDot');
     const statusText = document.getElementById('agentStatusText');
+    const latencyText = document.getElementById('agentLatencyText');
 
     if (!selector || !statusDot) return;
 
     if (currentAgent === 'local') {
         statusDot.className = 'agent-status-dot online';
         if (statusText) statusText.textContent = 'Local';
+        if (latencyText) latencyText.textContent = '';
     } else {
         const agent = agents.find(a => a.id == currentAgent);
         if (agent) {
-            const isOnline = agent.healthy !== false;
+            const health = agentHealthStatus[agent.id];
+            const isOnline = health ? health.healthy : agent.healthy !== false;
             statusDot.className = `agent-status-dot ${isOnline ? 'online' : 'offline'}`;
-            if (statusText) statusText.textContent = isOnline ? 'Connected' : 'Offline';
+
+            if (statusText) {
+                statusText.textContent = isOnline ? 'Connected' : 'Offline';
+            }
+
+            // Show latency if available
+            if (latencyText) {
+                if (health?.response_time_ms) {
+                    latencyText.textContent = `${health.response_time_ms}ms`;
+                } else {
+                    latencyText.textContent = '';
+                }
+            }
         }
     }
 }
 
+// ============== RESPONSE UTILITIES ==============
+
+/**
+ * Unwrap agent response from controller proxy format.
+ * Controller returns: {status: 'success', result: {...agent response...}}
+ * This extracts the actual agent response.
+ *
+ * @param {Object} response - Response from fetch
+ * @param {boolean} isAgentMode - Whether this is an agent (vs local) request
+ * @returns {Object} - Unwrapped response
+ * @throws {Error} - If response indicates an error
+ */
+function unwrapAgentResponse(response, isAgentMode = false) {
+    if (!response) return null;
+
+    // Check for error status first
+    if (response.status === 'error') {
+        throw new Error(response.message || response.error || 'Unknown error');
+    }
+
+    // If agent mode and has nested result, unwrap it
+    if (isAgentMode && response.status === 'success' && response.result !== undefined) {
+        const result = response.result;
+
+        // Check if the nested result itself is an error
+        if (result.status === 'error') {
+            throw new Error(result.message || result.error || 'Agent operation failed');
+        }
+
+        return result;
+    }
+
+    // Return as-is for local mode or already-unwrapped responses
+    return response;
+}
+
+/**
+ * Check if currently operating in agent mode.
+ * @returns {boolean}
+ */
+function isAgentMode() {
+    return currentAgent !== 'local';
+}
+
+/**
+ * Get the current agent's name for display.
+ * @returns {string}
+ */
+function getCurrentAgentName() {
+    if (currentAgent === 'local') return 'Local';
+    const agent = agents.find(a => a.id == currentAgent);
+    return agent ? agent.name : 'Unknown';
+}
+
 // ============== AGENT SELECTION ==============
 
 function selectAgent(agentId) {
@@ -625,7 +838,12 @@ function disconnectAgentStream() {
 
 function initAgentManager() {
     // Load agents on page load
-    loadAgents();
+    loadAgents().then(() => {
+        // Start health monitoring after agents are loaded
+        if (agents.length > 0) {
+            startHealthMonitoring();
+        }
+    });
 
     // Set up agent selector change handler
     const selector = document.getElementById('agentSelect');
@@ -635,8 +853,14 @@ function initAgentManager() {
         });
     }
 
-    // Refresh agents periodically
-    setInterval(loadAgents, 30000);
+    // Refresh agent list periodically (less often since health monitor is active)
+    setInterval(async () => {
+        await loadAgents();
+        // Start health monitoring if we now have agents
+        if (agents.length > 0 && !healthCheckInterval) {
+            startHealthMonitoring();
+        }
+    }, 60000);  // Refresh list every 60s (health checks every 30s)
 }
 
 // ============== MULTI-AGENT MODE ==============
diff --git a/templates/index.html b/templates/index.html
index 0856ba7..ba890e3 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -375,7 +375,15 @@
                     </div>
                     <div id="agentInfo" class="info-text" style="font-size: 10px; color: #666; margin-top: 4px;">
                         <span id="agentStatusText">Local</span>
+                        <span id="agentLatencyText" style="margin-left: 6px; color: var(--accent-cyan);"></span>
                     </div>
+                    <!-- Agent health panel (shows all agents when expanded) -->
+                    <details style="margin-top: 8px;">
+                        <summary style="font-size: 10px; color: #888; cursor: pointer;">All Agents Health</summary>
+                        <div id="agentHealthPanel" style="margin-top: 6px; padding: 6px; background: rgba(0,0,0,0.2); border-radius: 4px; max-height: 120px; overflow-y: auto;">
+                            <div style="color: var(--text-muted); font-size: 11px;">Loading...</div>
+                        </div>
+                    </details>
                     <!-- Multi-agent mode toggle -->
                     <div class="form-group" style="margin-top: 10px;">
                         <label class="inline-checkbox" style="display: flex; align-items: center; gap: 8px;">