Shorten agent health checks on load

This commit is contained in:
James Smith
2026-03-19 08:09:07 +00:00
parent db0a26cd64
commit fb8b6a01e8
2 changed files with 80 additions and 53 deletions

View File

@@ -43,6 +43,8 @@ from utils.trilateration import (
logger = logging.getLogger('intercept.controller')
controller_bp = Blueprint('controller', __name__, url_prefix='/controller')
AGENT_HEALTH_TIMEOUT_SECONDS = 2.0
AGENT_STATUS_TIMEOUT_SECONDS = 2.5
# Multi-agent SSE fanout state (per-client queues).
_agent_stream_subscribers: set[queue.Queue] = set()
@@ -81,7 +83,11 @@ def get_agents():
if refresh:
for agent in agents:
try:
client = create_client_from_agent(agent)
client = AgentClient(
agent['base_url'],
api_key=agent.get('api_key'),
timeout=AGENT_HEALTH_TIMEOUT_SECONDS,
)
agent['healthy'] = client.health_check()
except Exception:
agent['healthy'] = False
@@ -328,7 +334,11 @@ def check_all_agents_health():
}
try:
client = create_client_from_agent(agent)
client = AgentClient(
agent['base_url'],
api_key=agent.get('api_key'),
timeout=AGENT_HEALTH_TIMEOUT_SECONDS,
)
# Time the health check
start_time = time.time()
@@ -344,7 +354,12 @@ def check_all_agents_health():
# Also fetch running modes
try:
status = client.get_status()
status_client = AgentClient(
agent['base_url'],
api_key=agent.get('api_key'),
timeout=AGENT_STATUS_TIMEOUT_SECONDS,
)
status = status_client.get_status()
result['running_modes'] = status.get('running_modes', [])
result['running_modes_detail'] = status.get('running_modes_detail', {})
except Exception:

View File

@@ -14,6 +14,7 @@ let agentRunningModes = []; // Track agent's running modes for conflict detecti
let agentRunningModesDetail = {}; // Track device info per mode (for multi-SDR agents)
let healthCheckInterval = null; // Health monitoring interval
let agentHealthStatus = {}; // Cache of health status per agent ID
let healthCheckKickoffTimer = null;
// ============== AGENT HEALTH MONITORING ==============
@@ -25,8 +26,15 @@ function startHealthMonitoring() {
// Don't start if already running
if (healthCheckInterval) return;
// Initial check
// Defer the first probe so heavy dashboards can finish initial render
// before we start contacting remote agents.
if (healthCheckKickoffTimer) {
clearTimeout(healthCheckKickoffTimer);
}
healthCheckKickoffTimer = setTimeout(() => {
healthCheckKickoffTimer = null;
checkAllAgentsHealth();
}, 5000);
// Start periodic checks every 30 seconds
healthCheckInterval = setInterval(checkAllAgentsHealth, 30000);
@@ -37,6 +45,10 @@ function startHealthMonitoring() {
* Stop health monitoring.
*/
function stopHealthMonitoring() {
if (healthCheckKickoffTimer) {
clearTimeout(healthCheckKickoffTimer);
healthCheckKickoffTimer = null;
}
if (healthCheckInterval) {
clearInterval(healthCheckInterval);
healthCheckInterval = null;