mirror of
https://github.com/smittix/intercept.git
synced 2026-04-24 06:40:00 -07:00
Shorten agent health checks on load
This commit is contained in:
@@ -40,14 +40,16 @@ from utils.trilateration import (
|
|||||||
estimate_location_from_observations,
|
estimate_location_from_observations,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger('intercept.controller')
|
logger = logging.getLogger('intercept.controller')
|
||||||
|
|
||||||
controller_bp = Blueprint('controller', __name__, url_prefix='/controller')
|
controller_bp = Blueprint('controller', __name__, url_prefix='/controller')
|
||||||
|
AGENT_HEALTH_TIMEOUT_SECONDS = 2.0
|
||||||
# Multi-agent SSE fanout state (per-client queues).
|
AGENT_STATUS_TIMEOUT_SECONDS = 2.5
|
||||||
_agent_stream_subscribers: set[queue.Queue] = set()
|
|
||||||
_agent_stream_subscribers_lock = threading.Lock()
|
# Multi-agent SSE fanout state (per-client queues).
|
||||||
_AGENT_STREAM_CLIENT_QUEUE_SIZE = 500
|
_agent_stream_subscribers: set[queue.Queue] = set()
|
||||||
|
_agent_stream_subscribers_lock = threading.Lock()
|
||||||
|
_AGENT_STREAM_CLIENT_QUEUE_SIZE = 500
|
||||||
|
|
||||||
|
|
||||||
def _broadcast_agent_data(payload: dict) -> None:
|
def _broadcast_agent_data(payload: dict) -> None:
|
||||||
@@ -77,14 +79,18 @@ def get_agents():
|
|||||||
agents = list_agents(active_only=active_only)
|
agents = list_agents(active_only=active_only)
|
||||||
|
|
||||||
# Optionally refresh status for each agent
|
# Optionally refresh status for each agent
|
||||||
refresh = request.args.get('refresh', 'false').lower() == 'true'
|
refresh = request.args.get('refresh', 'false').lower() == 'true'
|
||||||
if refresh:
|
if refresh:
|
||||||
for agent in agents:
|
for agent in agents:
|
||||||
try:
|
try:
|
||||||
client = create_client_from_agent(agent)
|
client = AgentClient(
|
||||||
agent['healthy'] = client.health_check()
|
agent['base_url'],
|
||||||
except Exception:
|
api_key=agent.get('api_key'),
|
||||||
agent['healthy'] = False
|
timeout=AGENT_HEALTH_TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
agent['healthy'] = client.health_check()
|
||||||
|
except Exception:
|
||||||
|
agent['healthy'] = False
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'status': 'success',
|
'status': 'success',
|
||||||
@@ -327,27 +333,36 @@ def check_all_agents_health():
|
|||||||
'error': None
|
'error': None
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
client = create_client_from_agent(agent)
|
client = AgentClient(
|
||||||
|
agent['base_url'],
|
||||||
# Time the health check
|
api_key=agent.get('api_key'),
|
||||||
start_time = time.time()
|
timeout=AGENT_HEALTH_TIMEOUT_SECONDS,
|
||||||
is_healthy = client.health_check()
|
)
|
||||||
response_time = (time.time() - start_time) * 1000
|
|
||||||
|
# Time the health check
|
||||||
|
start_time = time.time()
|
||||||
|
is_healthy = client.health_check()
|
||||||
|
response_time = (time.time() - start_time) * 1000
|
||||||
|
|
||||||
result['healthy'] = is_healthy
|
result['healthy'] = is_healthy
|
||||||
result['response_time_ms'] = round(response_time, 1)
|
result['response_time_ms'] = round(response_time, 1)
|
||||||
|
|
||||||
if is_healthy:
|
if is_healthy:
|
||||||
# Update last_seen in database
|
# Update last_seen in database
|
||||||
update_agent(agent['id'], update_last_seen=True)
|
update_agent(agent['id'], update_last_seen=True)
|
||||||
|
|
||||||
# Also fetch running modes
|
# Also fetch running modes
|
||||||
try:
|
try:
|
||||||
status = client.get_status()
|
status_client = AgentClient(
|
||||||
result['running_modes'] = status.get('running_modes', [])
|
agent['base_url'],
|
||||||
result['running_modes_detail'] = status.get('running_modes_detail', {})
|
api_key=agent.get('api_key'),
|
||||||
except Exception:
|
timeout=AGENT_STATUS_TIMEOUT_SECONDS,
|
||||||
|
)
|
||||||
|
status = status_client.get_status()
|
||||||
|
result['running_modes'] = status.get('running_modes', [])
|
||||||
|
result['running_modes_detail'] = status.get('running_modes_detail', {})
|
||||||
|
except Exception:
|
||||||
pass # Status fetch is optional
|
pass # Status fetch is optional
|
||||||
|
|
||||||
except AgentConnectionError as e:
|
except AgentConnectionError as e:
|
||||||
|
|||||||
@@ -10,10 +10,11 @@ let currentAgent = 'local';
|
|||||||
let agentEventSource = null;
|
let agentEventSource = null;
|
||||||
let multiAgentMode = false; // Show combined results from all agents
|
let multiAgentMode = false; // Show combined results from all agents
|
||||||
let multiAgentPollInterval = null;
|
let multiAgentPollInterval = null;
|
||||||
let agentRunningModes = []; // Track agent's running modes for conflict detection
|
let agentRunningModes = []; // Track agent's running modes for conflict detection
|
||||||
let agentRunningModesDetail = {}; // Track device info per mode (for multi-SDR agents)
|
let agentRunningModesDetail = {}; // Track device info per mode (for multi-SDR agents)
|
||||||
let healthCheckInterval = null; // Health monitoring interval
|
let healthCheckInterval = null; // Health monitoring interval
|
||||||
let agentHealthStatus = {}; // Cache of health status per agent ID
|
let agentHealthStatus = {}; // Cache of health status per agent ID
|
||||||
|
let healthCheckKickoffTimer = null;
|
||||||
|
|
||||||
// ============== AGENT HEALTH MONITORING ==============
|
// ============== AGENT HEALTH MONITORING ==============
|
||||||
|
|
||||||
@@ -21,27 +22,38 @@ let agentHealthStatus = {}; // Cache of health status per agent ID
|
|||||||
* Start periodic health monitoring for all agents.
|
* Start periodic health monitoring for all agents.
|
||||||
* Runs every 30 seconds to check agent health status.
|
* Runs every 30 seconds to check agent health status.
|
||||||
*/
|
*/
|
||||||
function startHealthMonitoring() {
|
function startHealthMonitoring() {
|
||||||
// Don't start if already running
|
// Don't start if already running
|
||||||
if (healthCheckInterval) return;
|
if (healthCheckInterval) return;
|
||||||
|
|
||||||
// Initial check
|
// Defer the first probe so heavy dashboards can finish initial render
|
||||||
checkAllAgentsHealth();
|
// before we start contacting remote agents.
|
||||||
|
if (healthCheckKickoffTimer) {
|
||||||
// Start periodic checks every 30 seconds
|
clearTimeout(healthCheckKickoffTimer);
|
||||||
healthCheckInterval = setInterval(checkAllAgentsHealth, 30000);
|
}
|
||||||
console.log('[AgentManager] Health monitoring started (30s interval)');
|
healthCheckKickoffTimer = setTimeout(() => {
|
||||||
}
|
healthCheckKickoffTimer = null;
|
||||||
|
checkAllAgentsHealth();
|
||||||
|
}, 5000);
|
||||||
|
|
||||||
|
// Start periodic checks every 30 seconds
|
||||||
|
healthCheckInterval = setInterval(checkAllAgentsHealth, 30000);
|
||||||
|
console.log('[AgentManager] Health monitoring started (30s interval)');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stop health monitoring.
|
* Stop health monitoring.
|
||||||
*/
|
*/
|
||||||
function stopHealthMonitoring() {
|
function stopHealthMonitoring() {
|
||||||
if (healthCheckInterval) {
|
if (healthCheckKickoffTimer) {
|
||||||
clearInterval(healthCheckInterval);
|
clearTimeout(healthCheckKickoffTimer);
|
||||||
healthCheckInterval = null;
|
healthCheckKickoffTimer = null;
|
||||||
console.log('[AgentManager] Health monitoring stopped');
|
}
|
||||||
}
|
if (healthCheckInterval) {
|
||||||
|
clearInterval(healthCheckInterval);
|
||||||
|
healthCheckInterval = null;
|
||||||
|
console.log('[AgentManager] Health monitoring stopped');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
Reference in New Issue
Block a user