Shorten agent health checks on load

This commit is contained in:
James Smith
2026-03-19 08:09:07 +00:00
parent db0a26cd64
commit fb8b6a01e8
2 changed files with 80 additions and 53 deletions

View File

@@ -40,14 +40,16 @@ from utils.trilateration import (
estimate_location_from_observations,
)
logger = logging.getLogger('intercept.controller')
controller_bp = Blueprint('controller', __name__, url_prefix='/controller')
# Multi-agent SSE fanout state (per-client queues).
_agent_stream_subscribers: set[queue.Queue] = set()
_agent_stream_subscribers_lock = threading.Lock()
_AGENT_STREAM_CLIENT_QUEUE_SIZE = 500
logger = logging.getLogger('intercept.controller')
controller_bp = Blueprint('controller', __name__, url_prefix='/controller')
AGENT_HEALTH_TIMEOUT_SECONDS = 2.0
AGENT_STATUS_TIMEOUT_SECONDS = 2.5
# Multi-agent SSE fanout state (per-client queues).
_agent_stream_subscribers: set[queue.Queue] = set()
_agent_stream_subscribers_lock = threading.Lock()
_AGENT_STREAM_CLIENT_QUEUE_SIZE = 500
def _broadcast_agent_data(payload: dict) -> None:
@@ -77,14 +79,18 @@ def get_agents():
agents = list_agents(active_only=active_only)
# Optionally refresh status for each agent
refresh = request.args.get('refresh', 'false').lower() == 'true'
if refresh:
for agent in agents:
try:
client = create_client_from_agent(agent)
agent['healthy'] = client.health_check()
except Exception:
agent['healthy'] = False
refresh = request.args.get('refresh', 'false').lower() == 'true'
if refresh:
for agent in agents:
try:
client = AgentClient(
agent['base_url'],
api_key=agent.get('api_key'),
timeout=AGENT_HEALTH_TIMEOUT_SECONDS,
)
agent['healthy'] = client.health_check()
except Exception:
agent['healthy'] = False
return jsonify({
'status': 'success',
@@ -327,27 +333,36 @@ def check_all_agents_health():
'error': None
}
try:
client = create_client_from_agent(agent)
# Time the health check
start_time = time.time()
is_healthy = client.health_check()
response_time = (time.time() - start_time) * 1000
try:
client = AgentClient(
agent['base_url'],
api_key=agent.get('api_key'),
timeout=AGENT_HEALTH_TIMEOUT_SECONDS,
)
# Time the health check
start_time = time.time()
is_healthy = client.health_check()
response_time = (time.time() - start_time) * 1000
result['healthy'] = is_healthy
result['response_time_ms'] = round(response_time, 1)
if is_healthy:
# Update last_seen in database
update_agent(agent['id'], update_last_seen=True)
# Also fetch running modes
try:
status = client.get_status()
result['running_modes'] = status.get('running_modes', [])
result['running_modes_detail'] = status.get('running_modes_detail', {})
except Exception:
# Update last_seen in database
update_agent(agent['id'], update_last_seen=True)
# Also fetch running modes
try:
status_client = AgentClient(
agent['base_url'],
api_key=agent.get('api_key'),
timeout=AGENT_STATUS_TIMEOUT_SECONDS,
)
status = status_client.get_status()
result['running_modes'] = status.get('running_modes', [])
result['running_modes_detail'] = status.get('running_modes_detail', {})
except Exception:
pass # Status fetch is optional
except AgentConnectionError as e: