Files
intercept/utils/bluetooth/aggregator.py
Smittix 99d52eafe7 chore: Bump version to v2.18.0
Bluetooth enhancements (service data inspector, appearance codes, MAC
cluster tracking, behavioral flags, IRK badges, distance estimation),
ACARS SoapySDR multi-backend support, dump1090 stale process cleanup,
GPS error state, and proximity radar/signal card UI improvements.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 15:12:10 +00:00

618 lines
23 KiB
Python

"""
Device aggregator for Bluetooth observations.
Handles RSSI statistics, range band estimation, and device state management.
"""
from __future__ import annotations
import statistics
import threading
from datetime import datetime, timedelta
from typing import Optional
from .constants import (
MAX_RSSI_SAMPLES,
DEVICE_STALE_TIMEOUT,
RSSI_VERY_CLOSE,
RSSI_CLOSE,
RSSI_NEARBY,
RSSI_FAR,
CONFIDENCE_VERY_CLOSE,
CONFIDENCE_CLOSE,
CONFIDENCE_NEARBY,
CONFIDENCE_FAR,
RANGE_VERY_CLOSE,
RANGE_CLOSE,
RANGE_NEARBY,
RANGE_FAR,
RANGE_UNKNOWN,
ADDRESS_TYPE_RANDOM,
ADDRESS_TYPE_RANDOM_STATIC,
ADDRESS_TYPE_RPA,
ADDRESS_TYPE_NRPA,
MANUFACTURER_NAMES,
PROTOCOL_BLE,
PROTOCOL_CLASSIC,
)
from .models import BTObservation, BTDeviceAggregate
from .device_key import generate_device_key, is_randomized_mac
from .distance import DistanceEstimator, get_distance_estimator
from .ring_buffer import RingBuffer, get_ring_buffer
from .tracker_signatures import (
TrackerSignatureEngine,
get_tracker_engine,
TrackerDetectionResult,
)
class DeviceAggregator:
"""
Aggregates Bluetooth observations into unified device records.
Maintains RSSI statistics, estimates range bands, and tracks device state
across multiple observations.
"""
def __init__(self, max_rssi_samples: int = MAX_RSSI_SAMPLES):
self._devices: dict[str, BTDeviceAggregate] = {}
self._lock = threading.Lock()
self._max_rssi_samples = max_rssi_samples
self._baseline_device_ids: set[str] = set()
self._baseline_set_time: Optional[datetime] = None
# Proximity estimation components
self._distance_estimator = get_distance_estimator()
self._ring_buffer = get_ring_buffer()
# Tracker detection engine
self._tracker_engine = get_tracker_engine()
# Device key mapping (device_id -> device_key)
self._device_keys: dict[str, str] = {}
# Fingerprint mapping for cross-MAC tracking
self._fingerprint_to_devices: dict[str, set[str]] = {}
def ingest(self, observation: BTObservation) -> BTDeviceAggregate:
"""
Ingest a new observation and update the device aggregate.
Args:
observation: The BTObservation to process.
Returns:
The updated BTDeviceAggregate for this device.
"""
device_id = observation.device_id
with self._lock:
if device_id not in self._devices:
# Create new device aggregate
device = BTDeviceAggregate(
device_id=device_id,
address=observation.address,
address_type=observation.address_type,
first_seen=observation.timestamp,
last_seen=observation.timestamp,
protocol=self._infer_protocol(observation),
)
self._devices[device_id] = device
else:
device = self._devices[device_id]
# Update timestamps and counts
device.last_seen = observation.timestamp
device.seen_count += 1
# Calculate seen rate (observations per minute)
duration = device.duration_seconds
if duration > 0:
device.seen_rate = (device.seen_count / duration) * 60
else:
device.seen_rate = 0
# Update RSSI samples
if observation.rssi is not None:
device.rssi_samples.append((observation.timestamp, observation.rssi))
# Prune old samples
if len(device.rssi_samples) > self._max_rssi_samples:
device.rssi_samples = device.rssi_samples[-self._max_rssi_samples:]
# Recalculate RSSI statistics
self._update_rssi_stats(device)
# Merge device info (prefer non-None values)
self._merge_device_info(device, observation)
# Update range band
self._update_range_band(device)
# Check if address is random
device.has_random_address = observation.address_type in (
ADDRESS_TYPE_RANDOM,
ADDRESS_TYPE_RANDOM_STATIC,
ADDRESS_TYPE_RPA,
ADDRESS_TYPE_NRPA,
)
# Check baseline status
device.in_baseline = device_id in self._baseline_device_ids
device.is_new = not device.in_baseline and self._baseline_set_time is not None
# Generate stable device key
device_key = generate_device_key(
address=observation.address,
address_type=observation.address_type,
name=device.name,
manufacturer_id=device.manufacturer_id,
service_uuids=device.service_uuids if device.service_uuids else None,
)
device.device_key = device_key
self._device_keys[device_id] = device_key
# Check if randomized MAC
device.is_randomized_mac = is_randomized_mac(observation.address_type)
# Apply EMA smoothing to RSSI
if observation.rssi is not None:
device.rssi_ema = self._distance_estimator.apply_ema_smoothing(
current=observation.rssi,
prev_ema=device.rssi_ema,
)
# Get 60-second min/max
device.rssi_60s_min, device.rssi_60s_max = self._distance_estimator.get_rssi_60s_window(
device.rssi_samples,
window_seconds=60,
)
# Store in ring buffer for heatmap
self._ring_buffer.ingest(
device_key=device_key,
rssi=observation.rssi,
timestamp=observation.timestamp,
)
# Estimate distance and proximity band
self._update_proximity(device)
# Run tracker detection
self._update_tracker_detection(device, observation)
# Evaluate suspicious presence heuristics
self._update_risk_analysis(device)
return device
def _infer_protocol(self, observation: BTObservation) -> str:
"""Infer the Bluetooth protocol from observation data."""
# If Class of Device is set, it's Classic BT
if observation.class_of_device is not None:
return PROTOCOL_CLASSIC
# If address type is anything other than public, likely BLE
if observation.address_type != 'public':
return PROTOCOL_BLE
# If service UUIDs are present with 16-bit format, likely BLE
if observation.service_uuids:
for uuid in observation.service_uuids:
if len(uuid) == 4 or len(uuid) == 8: # 16-bit or 32-bit
return PROTOCOL_BLE
# Default to BLE as it's more common in modern scanning
return PROTOCOL_BLE
def _update_rssi_stats(self, device: BTDeviceAggregate) -> None:
"""Update RSSI statistics for a device."""
if not device.rssi_samples:
return
rssi_values = [rssi for _, rssi in device.rssi_samples]
# Current is most recent
device.rssi_current = rssi_values[-1]
# Basic statistics
device.rssi_min = min(rssi_values)
device.rssi_max = max(rssi_values)
# Median
device.rssi_median = statistics.median(rssi_values)
# Variance (need at least 2 samples)
if len(rssi_values) >= 2:
device.rssi_variance = statistics.variance(rssi_values)
else:
device.rssi_variance = 0.0
# Confidence based on sample count and variance
device.rssi_confidence = self._calculate_confidence(rssi_values)
def _calculate_confidence(self, rssi_values: list[int]) -> float:
"""
Calculate confidence score for RSSI measurements.
Factors:
- Sample count (more samples = higher confidence)
- Low variance (less variance = higher confidence)
"""
if not rssi_values:
return 0.0
# Sample count factor (logarithmic scaling, max out at ~50 samples)
sample_factor = min(1.0, len(rssi_values) / 20)
# Variance factor (lower variance = higher confidence)
if len(rssi_values) >= 2:
variance = statistics.variance(rssi_values)
# Normalize: 0 variance = 1.0, 100 variance = 0.0
variance_factor = max(0.0, 1.0 - (variance / 100))
else:
variance_factor = 0.5 # Unknown variance
# Combined confidence (weighted average)
confidence = (sample_factor * 0.4) + (variance_factor * 0.6)
return min(1.0, max(0.0, confidence))
def _update_range_band(self, device: BTDeviceAggregate) -> None:
"""Estimate range band from RSSI median and confidence."""
if device.rssi_median is None:
device.range_band = RANGE_UNKNOWN
device.range_confidence = 0.0
return
rssi = device.rssi_median
confidence = device.rssi_confidence
# Determine range band based on RSSI thresholds
if rssi >= RSSI_VERY_CLOSE and confidence >= CONFIDENCE_VERY_CLOSE:
device.range_band = RANGE_VERY_CLOSE
device.range_confidence = confidence
elif rssi >= RSSI_CLOSE and confidence >= CONFIDENCE_CLOSE:
device.range_band = RANGE_CLOSE
device.range_confidence = confidence
elif rssi >= RSSI_NEARBY and confidence >= CONFIDENCE_NEARBY:
device.range_band = RANGE_NEARBY
device.range_confidence = confidence
elif rssi >= RSSI_FAR and confidence >= CONFIDENCE_FAR:
device.range_band = RANGE_FAR
device.range_confidence = confidence
else:
device.range_band = RANGE_UNKNOWN
device.range_confidence = confidence * 0.5 # Reduced confidence for unknown
def _update_proximity(self, device: BTDeviceAggregate) -> None:
"""Update proximity estimation for a device."""
if device.rssi_ema is None:
device.proximity_band = 'unknown'
device.estimated_distance_m = None
device.distance_confidence = 0.0
return
# Estimate distance
distance, confidence = self._distance_estimator.estimate_distance(
rssi=device.rssi_ema,
tx_power=device.tx_power,
variance=device.rssi_variance,
)
device.estimated_distance_m = distance
device.distance_confidence = confidence
# Classify proximity band
band = self._distance_estimator.classify_proximity_band(
distance_m=distance,
rssi_ema=device.rssi_ema,
)
device.proximity_band = str(band)
def _update_tracker_detection(
self,
device: BTDeviceAggregate,
observation: BTObservation,
) -> None:
"""Run tracker signature detection on a device."""
# Prepare service data from observation if available
service_data = observation.service_data if observation.service_data else {}
# Store service data on device for investigation
for uuid, data in service_data.items():
device.service_data[uuid] = data
# Run tracker detection
result = self._tracker_engine.detect_tracker(
address=device.address,
address_type=device.address_type,
name=device.name,
manufacturer_id=device.manufacturer_id,
manufacturer_data=device.manufacturer_bytes,
service_uuids=device.service_uuids,
service_data=service_data,
tx_power=device.tx_power,
)
# Update device with detection results
device.is_tracker = result.is_tracker
device.tracker_type = result.tracker_type.value if result.tracker_type else None
device.tracker_name = result.tracker_name
device.tracker_confidence = result.confidence.value if result.confidence else None
device.tracker_confidence_score = result.confidence_score
device.tracker_evidence = result.evidence
# Generate and store payload fingerprint
fingerprint = self._tracker_engine.generate_device_fingerprint(
manufacturer_id=device.manufacturer_id,
manufacturer_data=device.manufacturer_bytes,
service_uuids=device.service_uuids,
service_data=service_data,
tx_power=device.tx_power,
name=device.name,
)
device.payload_fingerprint_id = fingerprint.fingerprint_id
device.payload_fingerprint_stability = fingerprint.stability_confidence
# Track fingerprint to device mapping
if fingerprint.fingerprint_id not in self._fingerprint_to_devices:
self._fingerprint_to_devices[fingerprint.fingerprint_id] = set()
self._fingerprint_to_devices[fingerprint.fingerprint_id].add(device.device_id)
# Record sighting for persistence tracking
self._tracker_engine.record_sighting(fingerprint.fingerprint_id)
def _update_risk_analysis(self, device: BTDeviceAggregate) -> None:
"""Evaluate suspicious presence heuristics for a device."""
if not device.payload_fingerprint_id:
return
risk_score, risk_factors = self._tracker_engine.evaluate_suspicious_presence(
fingerprint_id=device.payload_fingerprint_id,
is_tracker=device.is_tracker,
seen_count=device.seen_count,
duration_seconds=device.duration_seconds,
seen_rate=device.seen_rate,
rssi_variance=device.rssi_variance,
is_new=device.is_new,
)
device.risk_score = risk_score
device.risk_factors = risk_factors
def _merge_device_info(self, device: BTDeviceAggregate, observation: BTObservation) -> None:
"""Merge observation data into device aggregate (prefer non-None values)."""
# Name (prefer longer names as they're usually more complete)
if observation.name:
if not device.name or len(observation.name) > len(device.name):
device.name = observation.name
# Manufacturer
if observation.manufacturer_id is not None:
device.manufacturer_id = observation.manufacturer_id
device.manufacturer_name = MANUFACTURER_NAMES.get(
observation.manufacturer_id,
f"Unknown (0x{observation.manufacturer_id:04X})"
)
if observation.manufacturer_data:
device.manufacturer_bytes = observation.manufacturer_data
# Service UUIDs (merge, don't replace)
for uuid in observation.service_uuids:
if uuid not in device.service_uuids:
device.service_uuids.append(uuid)
# Other fields
if observation.tx_power is not None:
device.tx_power = observation.tx_power
if observation.appearance is not None:
device.appearance = observation.appearance
if observation.class_of_device is not None:
device.class_of_device = observation.class_of_device
device.major_class = observation.major_class
device.minor_class = observation.minor_class
# Connection state (use most recent)
device.is_connectable = observation.is_connectable
device.is_paired = observation.is_paired
device.is_connected = observation.is_connected
def get_device(self, device_id: str) -> Optional[BTDeviceAggregate]:
"""Get a device by ID."""
with self._lock:
return self._devices.get(device_id)
def get_all_devices(self) -> list[BTDeviceAggregate]:
"""Get all tracked devices."""
with self._lock:
return list(self._devices.values())
def get_active_devices(self, max_age_seconds: float = DEVICE_STALE_TIMEOUT) -> list[BTDeviceAggregate]:
"""Get devices seen within the specified time window."""
cutoff = datetime.now() - timedelta(seconds=max_age_seconds)
with self._lock:
return [d for d in self._devices.values() if d.last_seen >= cutoff]
def prune_stale_devices(self, max_age_seconds: float = DEVICE_STALE_TIMEOUT) -> int:
"""
Remove devices not seen within the specified time window.
Returns:
Number of devices removed.
"""
cutoff = datetime.now() - timedelta(seconds=max_age_seconds)
with self._lock:
stale_ids = [
device_id for device_id, device in self._devices.items()
if device.last_seen < cutoff
]
for device_id in stale_ids:
del self._devices[device_id]
return len(stale_ids)
def clear(self) -> None:
"""Clear all tracked devices."""
with self._lock:
self._devices.clear()
def set_baseline(self) -> int:
"""
Set the current devices as the baseline.
Returns:
Number of devices in baseline.
"""
with self._lock:
self._baseline_device_ids = set(self._devices.keys())
self._baseline_set_time = datetime.now()
# Mark all current devices as in baseline
for device in self._devices.values():
device.in_baseline = True
device.is_new = False
return len(self._baseline_device_ids)
def clear_baseline(self) -> None:
"""Clear the baseline."""
with self._lock:
self._baseline_device_ids.clear()
self._baseline_set_time = None
for device in self._devices.values():
device.in_baseline = False
device.is_new = False
def load_baseline(self, device_ids: set[str], set_time: datetime) -> None:
"""Load a baseline from storage."""
with self._lock:
self._baseline_device_ids = device_ids
self._baseline_set_time = set_time
# Update existing devices
for device_id, device in self._devices.items():
device.in_baseline = device_id in self._baseline_device_ids
device.is_new = not device.in_baseline
@property
def device_count(self) -> int:
"""Number of tracked devices."""
with self._lock:
return len(self._devices)
@property
def baseline_device_count(self) -> int:
"""Number of devices in baseline."""
with self._lock:
return len(self._baseline_device_ids)
@property
def has_baseline(self) -> bool:
"""Whether a baseline is set."""
return self._baseline_set_time is not None
@property
def ring_buffer(self) -> RingBuffer:
"""Access the ring buffer for timeseries data."""
return self._ring_buffer
def get_device_by_key(self, device_key: str) -> Optional[BTDeviceAggregate]:
"""Get a device by its stable device key."""
with self._lock:
# Find device_id from device_key
for device_id, key in self._device_keys.items():
if key == device_key:
return self._devices.get(device_id)
return None
def get_timeseries(
self,
device_key: str,
window_minutes: int = 30,
downsample_seconds: int = 10,
) -> list[dict]:
"""
Get timeseries data for a device.
Args:
device_key: Stable device identifier.
window_minutes: Time window in minutes.
downsample_seconds: Bucket size for downsampling.
Returns:
List of {timestamp, rssi} dicts.
"""
return self._ring_buffer.get_timeseries(
device_key=device_key,
window_minutes=window_minutes,
downsample_seconds=downsample_seconds,
)
def get_heatmap_data(
self,
top_n: int = 20,
window_minutes: int = 10,
bucket_seconds: int = 10,
sort_by: str = 'recency',
) -> dict:
"""
Get heatmap data for visualization.
Args:
top_n: Number of devices to include.
window_minutes: Time window.
bucket_seconds: Bucket size for downsampling.
sort_by: Sort method ('recency', 'strength', 'activity').
Returns:
Dict with device timeseries and metadata.
"""
# Get timeseries data from ring buffer
timeseries = self._ring_buffer.get_all_timeseries(
window_minutes=window_minutes,
downsample_seconds=bucket_seconds,
top_n=top_n,
sort_by=sort_by,
)
# Enrich with device metadata
result = {
'window_minutes': window_minutes,
'bucket_seconds': bucket_seconds,
'devices': [],
}
with self._lock:
for device_key, ts_data in timeseries.items():
device = self.get_device_by_key(device_key)
device_info = {
'device_key': device_key,
'timeseries': ts_data,
}
if device:
device_info.update({
'name': device.name,
'address': device.address,
'rssi_current': device.rssi_current,
'rssi_ema': round(device.rssi_ema, 1) if device.rssi_ema else None,
'proximity_band': device.proximity_band,
})
else:
device_info.update({
'name': None,
'address': None,
'rssi_current': None,
'rssi_ema': None,
'proximity_band': 'unknown',
})
result['devices'].append(device_info)
return result
def get_fingerprint_mac_count(self, fingerprint_id: str) -> int:
"""Return how many distinct device_ids share a fingerprint."""
with self._lock:
device_ids = self._fingerprint_to_devices.get(fingerprint_id)
return len(device_ids) if device_ids else 0
def prune_ring_buffer(self) -> int:
"""Prune old observations from ring buffer."""
return self._ring_buffer.prune_old()