From fb8b6a01e815cf17e412c8e7ee579e7f19865b2f Mon Sep 17 00:00:00 2001 From: James Smith Date: Thu, 19 Mar 2026 08:09:07 +0000 Subject: [PATCH] Shorten agent health checks on load --- routes/controller.py | 79 ++++++++++++++++++++++++---------------- static/js/core/agents.js | 54 ++++++++++++++++----------- 2 files changed, 80 insertions(+), 53 deletions(-) diff --git a/routes/controller.py b/routes/controller.py index 166e312..3f04970 100644 --- a/routes/controller.py +++ b/routes/controller.py @@ -40,14 +40,16 @@ from utils.trilateration import ( estimate_location_from_observations, ) -logger = logging.getLogger('intercept.controller') - -controller_bp = Blueprint('controller', __name__, url_prefix='/controller') - -# Multi-agent SSE fanout state (per-client queues). -_agent_stream_subscribers: set[queue.Queue] = set() -_agent_stream_subscribers_lock = threading.Lock() -_AGENT_STREAM_CLIENT_QUEUE_SIZE = 500 +logger = logging.getLogger('intercept.controller') + +controller_bp = Blueprint('controller', __name__, url_prefix='/controller') +AGENT_HEALTH_TIMEOUT_SECONDS = 2.0 +AGENT_STATUS_TIMEOUT_SECONDS = 2.5 + +# Multi-agent SSE fanout state (per-client queues). +_agent_stream_subscribers: set[queue.Queue] = set() +_agent_stream_subscribers_lock = threading.Lock() +_AGENT_STREAM_CLIENT_QUEUE_SIZE = 500 def _broadcast_agent_data(payload: dict) -> None: @@ -77,14 +79,18 @@ def get_agents(): agents = list_agents(active_only=active_only) # Optionally refresh status for each agent - refresh = request.args.get('refresh', 'false').lower() == 'true' - if refresh: - for agent in agents: - try: - client = create_client_from_agent(agent) - agent['healthy'] = client.health_check() - except Exception: - agent['healthy'] = False + refresh = request.args.get('refresh', 'false').lower() == 'true' + if refresh: + for agent in agents: + try: + client = AgentClient( + agent['base_url'], + api_key=agent.get('api_key'), + timeout=AGENT_HEALTH_TIMEOUT_SECONDS, + ) + agent['healthy'] = client.health_check() + except Exception: + agent['healthy'] = False return jsonify({ 'status': 'success', @@ -327,27 +333,36 @@ def check_all_agents_health(): 'error': None } - try: - client = create_client_from_agent(agent) - - # Time the health check - start_time = time.time() - is_healthy = client.health_check() - response_time = (time.time() - start_time) * 1000 + try: + client = AgentClient( + agent['base_url'], + api_key=agent.get('api_key'), + timeout=AGENT_HEALTH_TIMEOUT_SECONDS, + ) + + # Time the health check + start_time = time.time() + is_healthy = client.health_check() + response_time = (time.time() - start_time) * 1000 result['healthy'] = is_healthy result['response_time_ms'] = round(response_time, 1) if is_healthy: - # Update last_seen in database - update_agent(agent['id'], update_last_seen=True) - - # Also fetch running modes - try: - status = client.get_status() - result['running_modes'] = status.get('running_modes', []) - result['running_modes_detail'] = status.get('running_modes_detail', {}) - except Exception: + # Update last_seen in database + update_agent(agent['id'], update_last_seen=True) + + # Also fetch running modes + try: + status_client = AgentClient( + agent['base_url'], + api_key=agent.get('api_key'), + timeout=AGENT_STATUS_TIMEOUT_SECONDS, + ) + status = status_client.get_status() + result['running_modes'] = status.get('running_modes', []) + result['running_modes_detail'] = status.get('running_modes_detail', {}) + except Exception: pass # Status fetch is optional except AgentConnectionError as e: diff --git a/static/js/core/agents.js b/static/js/core/agents.js index 79f0f9c..6300816 100644 --- a/static/js/core/agents.js +++ b/static/js/core/agents.js @@ -10,10 +10,11 @@ let currentAgent = 'local'; let agentEventSource = null; let multiAgentMode = false; // Show combined results from all agents let multiAgentPollInterval = null; -let agentRunningModes = []; // Track agent's running modes for conflict detection -let agentRunningModesDetail = {}; // Track device info per mode (for multi-SDR agents) -let healthCheckInterval = null; // Health monitoring interval -let agentHealthStatus = {}; // Cache of health status per agent ID +let agentRunningModes = []; // Track agent's running modes for conflict detection +let agentRunningModesDetail = {}; // Track device info per mode (for multi-SDR agents) +let healthCheckInterval = null; // Health monitoring interval +let agentHealthStatus = {}; // Cache of health status per agent ID +let healthCheckKickoffTimer = null; // ============== AGENT HEALTH MONITORING ============== @@ -21,27 +22,38 @@ let agentHealthStatus = {}; // Cache of health status per agent ID * Start periodic health monitoring for all agents. * Runs every 30 seconds to check agent health status. */ -function startHealthMonitoring() { - // Don't start if already running - if (healthCheckInterval) return; - - // Initial check - checkAllAgentsHealth(); - - // Start periodic checks every 30 seconds - healthCheckInterval = setInterval(checkAllAgentsHealth, 30000); - console.log('[AgentManager] Health monitoring started (30s interval)'); -} +function startHealthMonitoring() { + // Don't start if already running + if (healthCheckInterval) return; + + // Defer the first probe so heavy dashboards can finish initial render + // before we start contacting remote agents. + if (healthCheckKickoffTimer) { + clearTimeout(healthCheckKickoffTimer); + } + healthCheckKickoffTimer = setTimeout(() => { + healthCheckKickoffTimer = null; + checkAllAgentsHealth(); + }, 5000); + + // Start periodic checks every 30 seconds + healthCheckInterval = setInterval(checkAllAgentsHealth, 30000); + console.log('[AgentManager] Health monitoring started (30s interval)'); +} /** * Stop health monitoring. */ -function stopHealthMonitoring() { - if (healthCheckInterval) { - clearInterval(healthCheckInterval); - healthCheckInterval = null; - console.log('[AgentManager] Health monitoring stopped'); - } +function stopHealthMonitoring() { + if (healthCheckKickoffTimer) { + clearTimeout(healthCheckKickoffTimer); + healthCheckKickoffTimer = null; + } + if (healthCheckInterval) { + clearInterval(healthCheckInterval); + healthCheckInterval = null; + console.log('[AgentManager] Health monitoring stopped'); + } } /**