mirror of
https://github.com/smittix/intercept.git
synced 2026-04-26 07:40:01 -07:00
Add comprehensive BLE tracker detection with signature engine
Implement reliable tracker detection for AirTag, Tile, Samsung SmartTag, and other BLE trackers based on manufacturer data patterns, service UUIDs, and advertising payload analysis. Key changes: - Add TrackerSignatureEngine with signatures for major tracker brands - Device fingerprinting to track devices across MAC randomization - Suspicious presence heuristics (persistence, following patterns) - New API endpoints: /api/bluetooth/trackers, /diagnostics - UI updates with tracker badges, confidence, and evidence display - TSCM integration updated to use v2 tracker detection data - Unit tests and smoke test scripts for validation Detection is heuristic-based with confidence scoring (high/medium/low) and evidence transparency. Backwards compatible with existing APIs. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -36,6 +36,15 @@ from .heuristics import HeuristicsEngine, evaluate_device_heuristics, evaluate_a
|
||||
from .models import BTDeviceAggregate, BTObservation, ScanStatus, SystemCapabilities
|
||||
from .ring_buffer import RingBuffer, get_ring_buffer, reset_ring_buffer
|
||||
from .scanner import BluetoothScanner, get_bluetooth_scanner, reset_bluetooth_scanner
|
||||
from .tracker_signatures import (
|
||||
TrackerSignatureEngine,
|
||||
TrackerDetectionResult,
|
||||
TrackerType,
|
||||
TrackerConfidence,
|
||||
DeviceFingerprint,
|
||||
detect_tracker,
|
||||
get_tracker_engine,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# Main scanner
|
||||
@@ -100,4 +109,13 @@ __all__ = [
|
||||
'ADDRESS_TYPE_RANDOM_STATIC',
|
||||
'ADDRESS_TYPE_RPA',
|
||||
'ADDRESS_TYPE_NRPA',
|
||||
|
||||
# Tracker detection
|
||||
'TrackerSignatureEngine',
|
||||
'TrackerDetectionResult',
|
||||
'TrackerType',
|
||||
'TrackerConfidence',
|
||||
'DeviceFingerprint',
|
||||
'detect_tracker',
|
||||
'get_tracker_engine',
|
||||
]
|
||||
|
||||
@@ -39,6 +39,11 @@ from .models import BTObservation, BTDeviceAggregate
|
||||
from .device_key import generate_device_key, is_randomized_mac
|
||||
from .distance import DistanceEstimator, get_distance_estimator
|
||||
from .ring_buffer import RingBuffer, get_ring_buffer
|
||||
from .tracker_signatures import (
|
||||
TrackerSignatureEngine,
|
||||
get_tracker_engine,
|
||||
TrackerDetectionResult,
|
||||
)
|
||||
|
||||
|
||||
class DeviceAggregator:
|
||||
@@ -60,9 +65,15 @@ class DeviceAggregator:
|
||||
self._distance_estimator = get_distance_estimator()
|
||||
self._ring_buffer = get_ring_buffer()
|
||||
|
||||
# Tracker detection engine
|
||||
self._tracker_engine = get_tracker_engine()
|
||||
|
||||
# Device key mapping (device_id -> device_key)
|
||||
self._device_keys: dict[str, str] = {}
|
||||
|
||||
# Fingerprint mapping for cross-MAC tracking
|
||||
self._fingerprint_to_devices: dict[str, set[str]] = {}
|
||||
|
||||
def ingest(self, observation: BTObservation) -> BTDeviceAggregate:
|
||||
"""
|
||||
Ingest a new observation and update the device aggregate.
|
||||
@@ -166,6 +177,12 @@ class DeviceAggregator:
|
||||
# Estimate distance and proximity band
|
||||
self._update_proximity(device)
|
||||
|
||||
# Run tracker detection
|
||||
self._update_tracker_detection(device, observation)
|
||||
|
||||
# Evaluate suspicious presence heuristics
|
||||
self._update_risk_analysis(device)
|
||||
|
||||
return device
|
||||
|
||||
def _infer_protocol(self, observation: BTObservation) -> str:
|
||||
@@ -291,6 +308,77 @@ class DeviceAggregator:
|
||||
)
|
||||
device.proximity_band = str(band)
|
||||
|
||||
def _update_tracker_detection(
|
||||
self,
|
||||
device: BTDeviceAggregate,
|
||||
observation: BTObservation,
|
||||
) -> None:
|
||||
"""Run tracker signature detection on a device."""
|
||||
# Prepare service data from observation if available
|
||||
service_data = observation.service_data if observation.service_data else {}
|
||||
|
||||
# Store service data on device for investigation
|
||||
for uuid, data in service_data.items():
|
||||
device.service_data[uuid] = data
|
||||
|
||||
# Run tracker detection
|
||||
result = self._tracker_engine.detect_tracker(
|
||||
address=device.address,
|
||||
address_type=device.address_type,
|
||||
name=device.name,
|
||||
manufacturer_id=device.manufacturer_id,
|
||||
manufacturer_data=device.manufacturer_bytes,
|
||||
service_uuids=device.service_uuids,
|
||||
service_data=service_data,
|
||||
tx_power=device.tx_power,
|
||||
)
|
||||
|
||||
# Update device with detection results
|
||||
device.is_tracker = result.is_tracker
|
||||
device.tracker_type = result.tracker_type.value if result.tracker_type else None
|
||||
device.tracker_name = result.tracker_name
|
||||
device.tracker_confidence = result.confidence.value if result.confidence else None
|
||||
device.tracker_confidence_score = result.confidence_score
|
||||
device.tracker_evidence = result.evidence
|
||||
|
||||
# Generate and store payload fingerprint
|
||||
fingerprint = self._tracker_engine.generate_device_fingerprint(
|
||||
manufacturer_id=device.manufacturer_id,
|
||||
manufacturer_data=device.manufacturer_bytes,
|
||||
service_uuids=device.service_uuids,
|
||||
service_data=service_data,
|
||||
tx_power=device.tx_power,
|
||||
name=device.name,
|
||||
)
|
||||
device.payload_fingerprint_id = fingerprint.fingerprint_id
|
||||
device.payload_fingerprint_stability = fingerprint.stability_confidence
|
||||
|
||||
# Track fingerprint to device mapping
|
||||
if fingerprint.fingerprint_id not in self._fingerprint_to_devices:
|
||||
self._fingerprint_to_devices[fingerprint.fingerprint_id] = set()
|
||||
self._fingerprint_to_devices[fingerprint.fingerprint_id].add(device.device_id)
|
||||
|
||||
# Record sighting for persistence tracking
|
||||
self._tracker_engine.record_sighting(fingerprint.fingerprint_id)
|
||||
|
||||
def _update_risk_analysis(self, device: BTDeviceAggregate) -> None:
|
||||
"""Evaluate suspicious presence heuristics for a device."""
|
||||
if not device.payload_fingerprint_id:
|
||||
return
|
||||
|
||||
risk_score, risk_factors = self._tracker_engine.evaluate_suspicious_presence(
|
||||
fingerprint_id=device.payload_fingerprint_id,
|
||||
is_tracker=device.is_tracker,
|
||||
seen_count=device.seen_count,
|
||||
duration_seconds=device.duration_seconds,
|
||||
seen_rate=device.seen_rate,
|
||||
rssi_variance=device.rssi_variance,
|
||||
is_new=device.is_new,
|
||||
)
|
||||
|
||||
device.risk_score = risk_score
|
||||
device.risk_factors = risk_factors
|
||||
|
||||
def _merge_device_info(self, device: BTDeviceAggregate, observation: BTObservation) -> None:
|
||||
"""Merge observation data into device aggregate (prefer non-None values)."""
|
||||
# Name (prefer longer names as they're usually more complete)
|
||||
|
||||
@@ -20,6 +20,12 @@ from .constants import (
|
||||
PROXIMITY_UNKNOWN,
|
||||
)
|
||||
|
||||
# Import tracker types (will be available after tracker_signatures module loads)
|
||||
# Use string type hints to avoid circular imports
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from .tracker_signatures import TrackerDetectionResult, DeviceFingerprint
|
||||
|
||||
|
||||
@dataclass
|
||||
class BTObservation:
|
||||
@@ -146,6 +152,25 @@ class BTDeviceAggregate:
|
||||
in_baseline: bool = False
|
||||
baseline_id: Optional[int] = None
|
||||
|
||||
# Tracker detection fields
|
||||
is_tracker: bool = False
|
||||
tracker_type: Optional[str] = None # 'airtag', 'tile', 'samsung_smarttag', etc.
|
||||
tracker_name: Optional[str] = None
|
||||
tracker_confidence: Optional[str] = None # 'high', 'medium', 'low', 'none'
|
||||
tracker_confidence_score: float = 0.0 # 0.0 to 1.0
|
||||
tracker_evidence: list[str] = field(default_factory=list)
|
||||
|
||||
# Suspicious presence / following heuristics
|
||||
risk_score: float = 0.0 # 0.0 to 1.0
|
||||
risk_factors: list[str] = field(default_factory=list)
|
||||
|
||||
# Payload fingerprint (survives MAC randomization)
|
||||
payload_fingerprint_id: Optional[str] = None
|
||||
payload_fingerprint_stability: float = 0.0
|
||||
|
||||
# Service data (for tracker analysis)
|
||||
service_data: dict[str, bytes] = field(default_factory=dict)
|
||||
|
||||
def get_rssi_history(self, max_points: int = 50) -> list[dict]:
|
||||
"""Get RSSI history for sparkline visualization."""
|
||||
if not self.rssi_samples:
|
||||
@@ -252,6 +277,31 @@ class BTDeviceAggregate:
|
||||
# Baseline
|
||||
'in_baseline': self.in_baseline,
|
||||
'baseline_id': self.baseline_id,
|
||||
|
||||
# Tracker detection
|
||||
'tracker': {
|
||||
'is_tracker': self.is_tracker,
|
||||
'type': self.tracker_type,
|
||||
'name': self.tracker_name,
|
||||
'confidence': self.tracker_confidence,
|
||||
'confidence_score': round(self.tracker_confidence_score, 2),
|
||||
'evidence': self.tracker_evidence,
|
||||
},
|
||||
|
||||
# Suspicious presence analysis
|
||||
'risk_analysis': {
|
||||
'risk_score': round(self.risk_score, 2),
|
||||
'risk_factors': self.risk_factors,
|
||||
},
|
||||
|
||||
# Fingerprint
|
||||
'fingerprint': {
|
||||
'id': self.payload_fingerprint_id,
|
||||
'stability': round(self.payload_fingerprint_stability, 2),
|
||||
},
|
||||
|
||||
# Raw service data for investigation
|
||||
'service_data': {k: v.hex() for k, v in self.service_data.items()},
|
||||
}
|
||||
|
||||
def to_summary_dict(self) -> dict:
|
||||
@@ -277,6 +327,14 @@ class BTDeviceAggregate:
|
||||
'seen_count': self.seen_count,
|
||||
'heuristic_flags': self.heuristic_flags,
|
||||
'in_baseline': self.in_baseline,
|
||||
# Tracker info for list view
|
||||
'is_tracker': self.is_tracker,
|
||||
'tracker_type': self.tracker_type,
|
||||
'tracker_name': self.tracker_name,
|
||||
'tracker_confidence': self.tracker_confidence,
|
||||
'tracker_confidence_score': round(self.tracker_confidence_score, 2),
|
||||
'risk_score': round(self.risk_score, 2),
|
||||
'fingerprint_id': self.payload_fingerprint_id,
|
||||
}
|
||||
|
||||
|
||||
|
||||
804
utils/bluetooth/tracker_signatures.py
Normal file
804
utils/bluetooth/tracker_signatures.py
Normal file
@@ -0,0 +1,804 @@
|
||||
"""
|
||||
Tracker Signature Engine for BLE device classification.
|
||||
|
||||
Detects Apple AirTag, Find My accessories, Tile trackers, Samsung SmartTag,
|
||||
and other known BLE trackers based on manufacturer data patterns, service UUIDs,
|
||||
and advertising payload analysis.
|
||||
|
||||
This module provides reliable tracker detection that:
|
||||
1. Works with MAC randomization (uses payload fingerprinting)
|
||||
2. Provides confidence scores and evidence for each match
|
||||
3. Does NOT claim certainty - provides "indicators" not proof
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger('intercept.bluetooth.tracker_signatures')
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TRACKER TYPES
|
||||
# =============================================================================
|
||||
|
||||
class TrackerType(str, Enum):
|
||||
"""Known tracker device types."""
|
||||
AIRTAG = 'airtag'
|
||||
FINDMY_ACCESSORY = 'findmy_accessory'
|
||||
TILE = 'tile'
|
||||
SAMSUNG_SMARTTAG = 'samsung_smarttag'
|
||||
CHIPOLO = 'chipolo'
|
||||
PEBBLEBEE = 'pebblebee'
|
||||
NUTFIND = 'nutfind'
|
||||
ORBIT = 'orbit'
|
||||
EUFY = 'eufy'
|
||||
CUBE = 'cube'
|
||||
UNKNOWN_TRACKER = 'unknown_tracker'
|
||||
NOT_A_TRACKER = 'not_a_tracker'
|
||||
|
||||
|
||||
class TrackerConfidence(str, Enum):
|
||||
"""Confidence level for tracker detection."""
|
||||
HIGH = 'high' # Multiple strong indicators match
|
||||
MEDIUM = 'medium' # Some indicators match
|
||||
LOW = 'low' # Weak indicators, needs investigation
|
||||
NONE = 'none' # Not detected as tracker
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TRACKER SIGNATURES DATABASE
|
||||
# =============================================================================
|
||||
|
||||
# Apple Manufacturer ID
|
||||
APPLE_COMPANY_ID = 0x004C
|
||||
|
||||
# Apple Find My / AirTag advertisement types (first byte of manufacturer data after company ID)
|
||||
APPLE_FINDMY_ADV_TYPE = 0x12 # Find My network advertisement
|
||||
APPLE_NEARBY_ADV_TYPE = 0x10 # Nearby action
|
||||
APPLE_AIRTAG_ADV_PATTERN = bytes([0x12, 0x19]) # AirTag specific
|
||||
APPLE_FINDMY_PREFIX_SHORT = bytes([0x12]) # Find My prefix (short)
|
||||
APPLE_FINDMY_PREFIX_ALT = bytes([0x07, 0x19]) # Alternative Find My pattern
|
||||
|
||||
# Find My service UUID (Apple's offline finding service)
|
||||
APPLE_FINDMY_SERVICE_UUID = 'fd6f' # 16-bit UUID
|
||||
APPLE_CONTINUITY_SERVICE_UUID = 'd0611e78-bbb4-4591-a5f8-487910ae4366'
|
||||
|
||||
# Tile
|
||||
TILE_COMPANY_ID = 0x00ED # Tile Inc
|
||||
TILE_ALT_COMPANY_ID = 0x038F # Alternative Tile ID
|
||||
TILE_SERVICE_UUID = 'feed' # Tile service UUID (16-bit)
|
||||
TILE_MAC_PREFIXES = ['C4:E7', 'DC:54', 'E4:B0', 'F8:8A', 'E6:43', '90:32', 'D0:72']
|
||||
|
||||
# Samsung SmartTag
|
||||
SAMSUNG_COMPANY_ID = 0x0075
|
||||
SMARTTAG_SERVICE_UUID = 'fd5a' # SmartThings Find service
|
||||
SMARTTAG_MAC_PREFIXES = ['58:4D', 'A0:75', 'B8:D7', '50:32']
|
||||
|
||||
# Chipolo
|
||||
CHIPOLO_COMPANY_ID = 0x0A09
|
||||
CHIPOLO_SERVICE_UUID = 'feaa' # Eddystone beacon (used by some Chipolo)
|
||||
CHIPOLO_ALT_SERVICE = 'feb1'
|
||||
|
||||
# PebbleBee
|
||||
PEBBLEBEE_SERVICE_UUID = 'feab'
|
||||
PEBBLEBEE_MAC_PREFIXES = ['D4:3D', 'E0:E5']
|
||||
|
||||
# Other known trackers
|
||||
NUTFIND_COMPANY_ID = 0x0A09
|
||||
EUFY_COMPANY_ID = 0x0590
|
||||
|
||||
# Generic beacon patterns that may indicate a tracker
|
||||
BEACON_SERVICE_UUIDS = [
|
||||
'feaa', # Eddystone
|
||||
'feab', # Nokia beacon
|
||||
'feb1', # Dialog Semiconductor
|
||||
'febe', # Bose
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrackerSignature:
|
||||
"""Defines a tracker signature pattern."""
|
||||
tracker_type: TrackerType
|
||||
name: str
|
||||
description: str
|
||||
company_id: Optional[int] = None
|
||||
company_ids: list[int] = field(default_factory=list)
|
||||
manufacturer_data_prefixes: list[bytes] = field(default_factory=list)
|
||||
service_uuids: list[str] = field(default_factory=list)
|
||||
service_data_prefixes: dict[str, bytes] = field(default_factory=dict)
|
||||
mac_prefixes: list[str] = field(default_factory=list)
|
||||
name_patterns: list[str] = field(default_factory=list)
|
||||
min_manufacturer_data_len: int = 0
|
||||
confidence_boost: float = 0.0 # Extra confidence for specific patterns
|
||||
|
||||
|
||||
# Tracker signatures database
|
||||
TRACKER_SIGNATURES: list[TrackerSignature] = [
|
||||
# Apple AirTag
|
||||
TrackerSignature(
|
||||
tracker_type=TrackerType.AIRTAG,
|
||||
name='Apple AirTag',
|
||||
description='Apple AirTag tracking device using Find My network',
|
||||
company_id=APPLE_COMPANY_ID,
|
||||
manufacturer_data_prefixes=[
|
||||
APPLE_AIRTAG_ADV_PATTERN,
|
||||
APPLE_FINDMY_PREFIX_SHORT,
|
||||
],
|
||||
service_uuids=[APPLE_FINDMY_SERVICE_UUID],
|
||||
name_patterns=['airtag'],
|
||||
min_manufacturer_data_len=22, # AirTags have 22+ byte payloads
|
||||
confidence_boost=0.2,
|
||||
),
|
||||
|
||||
# Apple Find My Accessory (non-AirTag)
|
||||
TrackerSignature(
|
||||
tracker_type=TrackerType.FINDMY_ACCESSORY,
|
||||
name='Find My Accessory',
|
||||
description='Third-party Apple Find My network accessory',
|
||||
company_id=APPLE_COMPANY_ID,
|
||||
manufacturer_data_prefixes=[
|
||||
APPLE_FINDMY_PREFIX_SHORT,
|
||||
APPLE_FINDMY_PREFIX_ALT,
|
||||
],
|
||||
service_uuids=[APPLE_FINDMY_SERVICE_UUID],
|
||||
name_patterns=['findmy', 'find my', 'chipolo one spot', 'belkin'],
|
||||
),
|
||||
|
||||
# Tile
|
||||
TrackerSignature(
|
||||
tracker_type=TrackerType.TILE,
|
||||
name='Tile Tracker',
|
||||
description='Tile Bluetooth tracker',
|
||||
company_ids=[TILE_COMPANY_ID, TILE_ALT_COMPANY_ID],
|
||||
service_uuids=[TILE_SERVICE_UUID],
|
||||
mac_prefixes=TILE_MAC_PREFIXES,
|
||||
name_patterns=['tile'],
|
||||
),
|
||||
|
||||
# Samsung SmartTag
|
||||
TrackerSignature(
|
||||
tracker_type=TrackerType.SAMSUNG_SMARTTAG,
|
||||
name='Samsung SmartTag',
|
||||
description='Samsung SmartThings tracker',
|
||||
company_id=SAMSUNG_COMPANY_ID,
|
||||
service_uuids=[SMARTTAG_SERVICE_UUID],
|
||||
mac_prefixes=SMARTTAG_MAC_PREFIXES,
|
||||
name_patterns=['smarttag', 'smart tag', 'galaxy tag'],
|
||||
),
|
||||
|
||||
# Chipolo
|
||||
TrackerSignature(
|
||||
tracker_type=TrackerType.CHIPOLO,
|
||||
name='Chipolo',
|
||||
description='Chipolo Bluetooth tracker',
|
||||
company_id=CHIPOLO_COMPANY_ID,
|
||||
service_uuids=[CHIPOLO_SERVICE_UUID, CHIPOLO_ALT_SERVICE],
|
||||
name_patterns=['chipolo'],
|
||||
),
|
||||
|
||||
# PebbleBee
|
||||
TrackerSignature(
|
||||
tracker_type=TrackerType.PEBBLEBEE,
|
||||
name='PebbleBee',
|
||||
description='PebbleBee Bluetooth tracker',
|
||||
service_uuids=[PEBBLEBEE_SERVICE_UUID],
|
||||
mac_prefixes=PEBBLEBEE_MAC_PREFIXES,
|
||||
name_patterns=['pebblebee', 'pebble bee', 'honey'],
|
||||
),
|
||||
|
||||
# Eufy
|
||||
TrackerSignature(
|
||||
tracker_type=TrackerType.EUFY,
|
||||
name='Eufy SmartTrack',
|
||||
description='Eufy/Anker smart tracker',
|
||||
company_id=EUFY_COMPANY_ID,
|
||||
name_patterns=['eufy', 'smarttrack'],
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TRACKER DETECTION RESULT
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class TrackerDetectionResult:
|
||||
"""Result of tracker detection analysis."""
|
||||
|
||||
is_tracker: bool = False
|
||||
tracker_type: TrackerType = TrackerType.NOT_A_TRACKER
|
||||
tracker_name: str = ''
|
||||
confidence: TrackerConfidence = TrackerConfidence.NONE
|
||||
confidence_score: float = 0.0 # 0.0 to 1.0
|
||||
evidence: list[str] = field(default_factory=list)
|
||||
matched_signature: Optional[str] = None
|
||||
|
||||
# For suspicious presence heuristics
|
||||
risk_factors: list[str] = field(default_factory=list)
|
||||
risk_score: float = 0.0 # 0.0 to 1.0
|
||||
|
||||
# Raw data used for detection
|
||||
manufacturer_id: Optional[int] = None
|
||||
manufacturer_data_hex: Optional[str] = None
|
||||
service_uuids_found: list[str] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
'is_tracker': self.is_tracker,
|
||||
'tracker_type': self.tracker_type.value if self.tracker_type else None,
|
||||
'tracker_name': self.tracker_name,
|
||||
'confidence': self.confidence.value if self.confidence else None,
|
||||
'confidence_score': round(self.confidence_score, 2),
|
||||
'evidence': self.evidence,
|
||||
'matched_signature': self.matched_signature,
|
||||
'risk_factors': self.risk_factors,
|
||||
'risk_score': round(self.risk_score, 2),
|
||||
'manufacturer_id': self.manufacturer_id,
|
||||
'manufacturer_data_hex': self.manufacturer_data_hex,
|
||||
'service_uuids_found': self.service_uuids_found,
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# DEVICE FINGERPRINT (survives MAC randomization)
|
||||
# =============================================================================
|
||||
|
||||
@dataclass
|
||||
class DeviceFingerprint:
|
||||
"""
|
||||
Stable fingerprint for a BLE device that can survive MAC randomization.
|
||||
|
||||
Uses stable parts of the advertising payload to create a probabilistic
|
||||
identity. This is NOT perfect - randomized devices may produce different
|
||||
fingerprints over time. Document this as a limitation.
|
||||
"""
|
||||
|
||||
fingerprint_id: str # SHA256 hash of stable features
|
||||
|
||||
# Features used for fingerprinting
|
||||
manufacturer_id: Optional[int] = None
|
||||
manufacturer_data_prefix: Optional[bytes] = None # First 4 bytes (stable across MACs)
|
||||
manufacturer_data_length: int = 0
|
||||
service_uuids: list[str] = field(default_factory=list)
|
||||
service_data_keys: list[str] = field(default_factory=list)
|
||||
tx_power_bucket: Optional[str] = None # "high"/"medium"/"low"
|
||||
name_hint: Optional[str] = None
|
||||
|
||||
# Confidence in this fingerprint's stability
|
||||
stability_confidence: float = 0.5 # 0.0-1.0
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
"""Convert to dictionary for JSON serialization."""
|
||||
return {
|
||||
'fingerprint_id': self.fingerprint_id,
|
||||
'manufacturer_id': self.manufacturer_id,
|
||||
'manufacturer_data_prefix': self.manufacturer_data_prefix.hex() if self.manufacturer_data_prefix else None,
|
||||
'manufacturer_data_length': self.manufacturer_data_length,
|
||||
'service_uuids': self.service_uuids,
|
||||
'service_data_keys': self.service_data_keys,
|
||||
'tx_power_bucket': self.tx_power_bucket,
|
||||
'name_hint': self.name_hint,
|
||||
'stability_confidence': round(self.stability_confidence, 2),
|
||||
}
|
||||
|
||||
|
||||
def generate_fingerprint(
|
||||
manufacturer_id: Optional[int],
|
||||
manufacturer_data: Optional[bytes],
|
||||
service_uuids: list[str],
|
||||
service_data: dict[str, bytes],
|
||||
tx_power: Optional[int],
|
||||
name: Optional[str],
|
||||
) -> DeviceFingerprint:
|
||||
"""
|
||||
Generate a stable fingerprint for a BLE device.
|
||||
|
||||
Fingerprint is based on stable parts of the advertising payload that
|
||||
typically persist across MAC address rotations.
|
||||
|
||||
Limitations:
|
||||
- Devices that fully randomize their payload will not be consistently tracked
|
||||
- Some devices change manufacturer data patterns periodically
|
||||
- Best for trackers which have consistent advertising patterns
|
||||
"""
|
||||
# Build fingerprint features
|
||||
features = []
|
||||
stability_score = 0.0
|
||||
|
||||
mfr_prefix = None
|
||||
mfr_length = 0
|
||||
|
||||
if manufacturer_id is not None:
|
||||
features.append(f'mfr:{manufacturer_id:04x}')
|
||||
stability_score += 0.2
|
||||
|
||||
if manufacturer_data:
|
||||
mfr_length = len(manufacturer_data)
|
||||
features.append(f'mfr_len:{mfr_length}')
|
||||
stability_score += 0.1
|
||||
|
||||
# First 4 bytes of manufacturer data are often stable
|
||||
mfr_prefix = manufacturer_data[:min(4, len(manufacturer_data))]
|
||||
features.append(f'mfr_pfx:{mfr_prefix.hex()}')
|
||||
stability_score += 0.2
|
||||
|
||||
sorted_uuids = sorted(service_uuids)
|
||||
if sorted_uuids:
|
||||
features.append(f'uuids:{",".join(sorted_uuids)}')
|
||||
stability_score += 0.2
|
||||
|
||||
sd_keys = sorted(service_data.keys())
|
||||
if sd_keys:
|
||||
features.append(f'sd_keys:{",".join(sd_keys)}')
|
||||
stability_score += 0.1
|
||||
|
||||
# TX power bucket
|
||||
tx_bucket = None
|
||||
if tx_power is not None:
|
||||
if tx_power >= 0:
|
||||
tx_bucket = 'high'
|
||||
elif tx_power >= -10:
|
||||
tx_bucket = 'medium'
|
||||
else:
|
||||
tx_bucket = 'low'
|
||||
features.append(f'tx:{tx_bucket}')
|
||||
stability_score += 0.05
|
||||
|
||||
# Name hint (for devices that advertise names)
|
||||
name_hint = None
|
||||
if name:
|
||||
# Only use first word of name (often stable)
|
||||
name_hint = name.split()[0].lower() if name else None
|
||||
if name_hint:
|
||||
features.append(f'name:{name_hint}')
|
||||
stability_score += 0.15
|
||||
|
||||
# Generate fingerprint ID
|
||||
feature_str = '|'.join(features)
|
||||
fingerprint_id = hashlib.sha256(feature_str.encode()).hexdigest()[:16]
|
||||
|
||||
return DeviceFingerprint(
|
||||
fingerprint_id=fingerprint_id,
|
||||
manufacturer_id=manufacturer_id,
|
||||
manufacturer_data_prefix=mfr_prefix,
|
||||
manufacturer_data_length=mfr_length,
|
||||
service_uuids=sorted_uuids,
|
||||
service_data_keys=sd_keys,
|
||||
tx_power_bucket=tx_bucket,
|
||||
name_hint=name_hint,
|
||||
stability_confidence=min(1.0, stability_score),
|
||||
)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# TRACKER DETECTION ENGINE
|
||||
# =============================================================================
|
||||
|
||||
class TrackerSignatureEngine:
|
||||
"""
|
||||
Engine for detecting known BLE trackers from advertising data.
|
||||
|
||||
Detection is based on multiple indicators:
|
||||
1. Manufacturer ID matching known tracker companies
|
||||
2. Manufacturer data patterns specific to tracker types
|
||||
3. Service UUID matching known tracker services
|
||||
4. MAC address prefix matching known tracker OUIs
|
||||
5. Device name pattern matching
|
||||
|
||||
Confidence is cumulative - more matching indicators = higher confidence.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.signatures = TRACKER_SIGNATURES
|
||||
|
||||
# Tracking for suspicious presence detection
|
||||
self._sighting_history: dict[str, list[datetime]] = {}
|
||||
self._fingerprint_cache: dict[str, DeviceFingerprint] = {}
|
||||
|
||||
def detect_tracker(
|
||||
self,
|
||||
address: str,
|
||||
address_type: str,
|
||||
name: Optional[str] = None,
|
||||
manufacturer_id: Optional[int] = None,
|
||||
manufacturer_data: Optional[bytes] = None,
|
||||
service_uuids: Optional[list[str]] = None,
|
||||
service_data: Optional[dict[str, bytes]] = None,
|
||||
tx_power: Optional[int] = None,
|
||||
) -> TrackerDetectionResult:
|
||||
"""
|
||||
Analyze a BLE device for tracker indicators.
|
||||
|
||||
Returns a TrackerDetectionResult with:
|
||||
- is_tracker: True if any tracker indicators match
|
||||
- tracker_type: The most likely tracker type
|
||||
- confidence: HIGH/MEDIUM/LOW based on indicator strength
|
||||
- evidence: List of matching indicators for transparency
|
||||
|
||||
IMPORTANT: This is heuristic detection. A match indicates
|
||||
the device RESEMBLES a known tracker, not proof it IS one.
|
||||
"""
|
||||
result = TrackerDetectionResult()
|
||||
service_uuids = service_uuids or []
|
||||
service_data = service_data or {}
|
||||
|
||||
# Store raw data in result for transparency
|
||||
result.manufacturer_id = manufacturer_id
|
||||
if manufacturer_data:
|
||||
result.manufacturer_data_hex = manufacturer_data.hex()
|
||||
result.service_uuids_found = service_uuids
|
||||
|
||||
# Normalize service UUIDs to lowercase 16-bit format where possible
|
||||
normalized_uuids = self._normalize_service_uuids(service_uuids)
|
||||
|
||||
# Score each signature
|
||||
best_match = None
|
||||
best_score = 0.0
|
||||
best_evidence = []
|
||||
|
||||
for signature in self.signatures:
|
||||
score, evidence = self._score_signature(
|
||||
signature=signature,
|
||||
address=address,
|
||||
name=name,
|
||||
manufacturer_id=manufacturer_id,
|
||||
manufacturer_data=manufacturer_data,
|
||||
normalized_uuids=normalized_uuids,
|
||||
service_data=service_data,
|
||||
)
|
||||
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best_match = signature
|
||||
best_evidence = evidence
|
||||
|
||||
# Check for generic tracker indicators if no specific match
|
||||
if best_score < 0.3:
|
||||
generic_score, generic_evidence = self._check_generic_tracker_indicators(
|
||||
address=address,
|
||||
address_type=address_type,
|
||||
manufacturer_id=manufacturer_id,
|
||||
manufacturer_data=manufacturer_data,
|
||||
normalized_uuids=normalized_uuids,
|
||||
)
|
||||
if generic_score > best_score:
|
||||
best_score = generic_score
|
||||
best_match = None
|
||||
best_evidence = generic_evidence
|
||||
|
||||
# Build result
|
||||
if best_score >= 0.3: # Minimum threshold for tracker detection
|
||||
result.is_tracker = True
|
||||
result.confidence_score = min(1.0, best_score)
|
||||
result.evidence = best_evidence
|
||||
|
||||
if best_match:
|
||||
result.tracker_type = best_match.tracker_type
|
||||
result.tracker_name = best_match.name
|
||||
result.matched_signature = best_match.name
|
||||
else:
|
||||
result.tracker_type = TrackerType.UNKNOWN_TRACKER
|
||||
result.tracker_name = 'Unknown Tracker'
|
||||
|
||||
# Determine confidence level
|
||||
if best_score >= 0.7:
|
||||
result.confidence = TrackerConfidence.HIGH
|
||||
elif best_score >= 0.5:
|
||||
result.confidence = TrackerConfidence.MEDIUM
|
||||
else:
|
||||
result.confidence = TrackerConfidence.LOW
|
||||
|
||||
return result
|
||||
|
||||
def _score_signature(
|
||||
self,
|
||||
signature: TrackerSignature,
|
||||
address: str,
|
||||
name: Optional[str],
|
||||
manufacturer_id: Optional[int],
|
||||
manufacturer_data: Optional[bytes],
|
||||
normalized_uuids: list[str],
|
||||
service_data: dict[str, bytes],
|
||||
) -> tuple[float, list[str]]:
|
||||
"""Score how well a device matches a tracker signature."""
|
||||
score = 0.0
|
||||
evidence = []
|
||||
|
||||
# Check company ID
|
||||
# For Apple, company ID alone is NOT enough - require additional indicators
|
||||
# Many Apple devices (AirPods, Watch, etc.) share the same manufacturer ID
|
||||
company_id_matches = False
|
||||
if manufacturer_id is not None:
|
||||
if signature.company_id == manufacturer_id:
|
||||
company_id_matches = True
|
||||
elif manufacturer_id in signature.company_ids:
|
||||
company_id_matches = True
|
||||
|
||||
# For Apple devices, only add company ID score if we also have Find My indicators
|
||||
if company_id_matches:
|
||||
if manufacturer_id == APPLE_COMPANY_ID:
|
||||
# Apple devices need additional proof - just the company ID isn't enough
|
||||
# Only give full score if we have the manufacturer data pattern or service UUID
|
||||
has_findmy_pattern = False
|
||||
if manufacturer_data and len(manufacturer_data) >= 1:
|
||||
adv_type = manufacturer_data[0]
|
||||
if adv_type == APPLE_FINDMY_ADV_TYPE: # 0x12 = Find My
|
||||
has_findmy_pattern = True
|
||||
|
||||
has_findmy_service = APPLE_FINDMY_SERVICE_UUID in normalized_uuids
|
||||
|
||||
if has_findmy_pattern or has_findmy_service:
|
||||
score += 0.35
|
||||
evidence.append(f'Manufacturer ID 0x{manufacturer_id:04X} matches {signature.name}')
|
||||
# Don't add score for Apple manufacturer ID without Find My indicators
|
||||
else:
|
||||
# Non-Apple trackers - company ID is strong evidence
|
||||
score += 0.35
|
||||
evidence.append(f'Manufacturer ID 0x{manufacturer_id:04X} matches {signature.name}')
|
||||
|
||||
# Check manufacturer data prefix (high weight for specific patterns)
|
||||
if manufacturer_data and signature.manufacturer_data_prefixes:
|
||||
for prefix in signature.manufacturer_data_prefixes:
|
||||
if manufacturer_data.startswith(prefix):
|
||||
score += 0.30
|
||||
evidence.append(f'Manufacturer data pattern matches {signature.name}')
|
||||
break
|
||||
|
||||
# Check manufacturer data length
|
||||
if manufacturer_data and signature.min_manufacturer_data_len > 0:
|
||||
if len(manufacturer_data) >= signature.min_manufacturer_data_len:
|
||||
score += 0.10
|
||||
evidence.append(f'Manufacturer data length ({len(manufacturer_data)} bytes) consistent with {signature.name}')
|
||||
|
||||
# Check service UUIDs (medium weight)
|
||||
for sig_uuid in signature.service_uuids:
|
||||
if sig_uuid.lower() in normalized_uuids:
|
||||
score += 0.25
|
||||
evidence.append(f'Service UUID {sig_uuid} matches {signature.name}')
|
||||
break
|
||||
|
||||
# Check MAC prefix (medium weight)
|
||||
if signature.mac_prefixes:
|
||||
mac_upper = address.upper()
|
||||
for prefix in signature.mac_prefixes:
|
||||
if mac_upper.startswith(prefix):
|
||||
score += 0.20
|
||||
evidence.append(f'MAC prefix {prefix} matches known {signature.name} range')
|
||||
break
|
||||
|
||||
# Check name patterns (lower weight - can be spoofed)
|
||||
if name and signature.name_patterns:
|
||||
name_lower = name.lower()
|
||||
for pattern in signature.name_patterns:
|
||||
if pattern.lower() in name_lower:
|
||||
score += 0.15
|
||||
evidence.append(f'Device name "{name}" contains pattern "{pattern}"')
|
||||
break
|
||||
|
||||
# Apply confidence boost for specific signatures
|
||||
score += signature.confidence_boost
|
||||
|
||||
return score, evidence
|
||||
|
||||
def _check_generic_tracker_indicators(
|
||||
self,
|
||||
address: str,
|
||||
address_type: str,
|
||||
manufacturer_id: Optional[int],
|
||||
manufacturer_data: Optional[bytes],
|
||||
normalized_uuids: list[str],
|
||||
) -> tuple[float, list[str]]:
|
||||
"""Check for generic tracker-like indicators."""
|
||||
score = 0.0
|
||||
evidence = []
|
||||
|
||||
# Apple Find My service UUID without specific AirTag pattern
|
||||
if APPLE_FINDMY_SERVICE_UUID in normalized_uuids:
|
||||
score += 0.4
|
||||
evidence.append('Uses Apple Find My network service (fd6f)')
|
||||
|
||||
# Apple manufacturer with Find My advertisement type
|
||||
if manufacturer_id == APPLE_COMPANY_ID and manufacturer_data:
|
||||
if len(manufacturer_data) >= 2:
|
||||
adv_type = manufacturer_data[0]
|
||||
if adv_type == APPLE_FINDMY_ADV_TYPE:
|
||||
score += 0.35
|
||||
evidence.append('Apple Find My network advertisement detected')
|
||||
|
||||
# Check for beacon-like service UUIDs
|
||||
for beacon_uuid in BEACON_SERVICE_UUIDS:
|
||||
if beacon_uuid in normalized_uuids:
|
||||
score += 0.15
|
||||
evidence.append(f'Uses beacon service UUID ({beacon_uuid})')
|
||||
break
|
||||
|
||||
# Random address (most trackers use random addresses)
|
||||
if address_type in ('random', 'rpa', 'nrpa'):
|
||||
# This is a weak indicator - many devices use random addresses
|
||||
if score > 0: # Only add if other indicators present
|
||||
score += 0.05
|
||||
evidence.append('Uses randomized MAC address')
|
||||
|
||||
# Small manufacturer data payload typical of beacons
|
||||
if manufacturer_data and 20 <= len(manufacturer_data) <= 30:
|
||||
if score > 0:
|
||||
score += 0.05
|
||||
evidence.append(f'Manufacturer data length ({len(manufacturer_data)} bytes) typical of beacon')
|
||||
|
||||
return score, evidence
|
||||
|
||||
def _normalize_service_uuids(self, uuids: list[str]) -> list[str]:
|
||||
"""Normalize service UUIDs to lowercase, extracting 16-bit UUIDs where possible."""
|
||||
normalized = []
|
||||
for uuid in uuids:
|
||||
uuid_lower = uuid.lower()
|
||||
# Extract 16-bit UUID from full 128-bit Bluetooth Base UUID
|
||||
# Format: 0000XXXX-0000-1000-8000-00805f9b34fb
|
||||
if len(uuid_lower) == 36 and uuid_lower.endswith('-0000-1000-8000-00805f9b34fb'):
|
||||
short_uuid = uuid_lower[4:8]
|
||||
normalized.append(short_uuid)
|
||||
else:
|
||||
normalized.append(uuid_lower)
|
||||
return normalized
|
||||
|
||||
def generate_device_fingerprint(
|
||||
self,
|
||||
manufacturer_id: Optional[int],
|
||||
manufacturer_data: Optional[bytes],
|
||||
service_uuids: list[str],
|
||||
service_data: dict[str, bytes],
|
||||
tx_power: Optional[int],
|
||||
name: Optional[str],
|
||||
) -> DeviceFingerprint:
|
||||
"""Generate a fingerprint for device tracking across MAC rotations."""
|
||||
return generate_fingerprint(
|
||||
manufacturer_id=manufacturer_id,
|
||||
manufacturer_data=manufacturer_data,
|
||||
service_uuids=service_uuids,
|
||||
service_data=service_data,
|
||||
tx_power=tx_power,
|
||||
name=name,
|
||||
)
|
||||
|
||||
def record_sighting(self, fingerprint_id: str, timestamp: Optional[datetime] = None) -> int:
|
||||
"""
|
||||
Record a device sighting for persistence tracking.
|
||||
|
||||
Returns the number of times this fingerprint has been seen.
|
||||
"""
|
||||
ts = timestamp or datetime.now()
|
||||
|
||||
if fingerprint_id not in self._sighting_history:
|
||||
self._sighting_history[fingerprint_id] = []
|
||||
|
||||
# Keep only last 24 hours of sightings
|
||||
cutoff = ts - timedelta(hours=24)
|
||||
self._sighting_history[fingerprint_id] = [
|
||||
t for t in self._sighting_history[fingerprint_id]
|
||||
if t > cutoff
|
||||
]
|
||||
|
||||
self._sighting_history[fingerprint_id].append(ts)
|
||||
return len(self._sighting_history[fingerprint_id])
|
||||
|
||||
def get_sighting_count(self, fingerprint_id: str, window_hours: int = 24) -> int:
|
||||
"""Get the number of times a fingerprint has been seen in the time window."""
|
||||
if fingerprint_id not in self._sighting_history:
|
||||
return 0
|
||||
|
||||
cutoff = datetime.now() - timedelta(hours=window_hours)
|
||||
return sum(1 for t in self._sighting_history[fingerprint_id] if t > cutoff)
|
||||
|
||||
def evaluate_suspicious_presence(
|
||||
self,
|
||||
fingerprint_id: str,
|
||||
is_tracker: bool,
|
||||
seen_count: int,
|
||||
duration_seconds: float,
|
||||
seen_rate: float,
|
||||
rssi_variance: Optional[float],
|
||||
is_new: bool,
|
||||
) -> tuple[float, list[str]]:
|
||||
"""
|
||||
Evaluate if a device shows suspicious "following" behavior.
|
||||
|
||||
Returns (risk_score, risk_factors) where:
|
||||
- risk_score: 0.0-1.0 indicating likelihood of suspicious presence
|
||||
- risk_factors: List of reasons contributing to the score
|
||||
|
||||
IMPORTANT: These are HEURISTICS only. They indicate patterns that
|
||||
MIGHT suggest a device is following/tracking, but cannot prove intent.
|
||||
Always present to users with appropriate caveats.
|
||||
"""
|
||||
risk_score = 0.0
|
||||
risk_factors = []
|
||||
|
||||
# Tracker baseline - if it's a tracker, start with some risk
|
||||
if is_tracker:
|
||||
risk_score += 0.3
|
||||
risk_factors.append('Device matches known tracker signature')
|
||||
|
||||
# Heuristic 1: Persistently near - seen many times over a long period
|
||||
if seen_count >= 20 and duration_seconds >= 600: # 10+ minutes
|
||||
points = min(0.25, (seen_count / 100) * 0.25)
|
||||
risk_score += points
|
||||
risk_factors.append(f'Persistently present: seen {seen_count} times over {duration_seconds/60:.1f} min')
|
||||
elif seen_count >= 50:
|
||||
risk_score += 0.2
|
||||
risk_factors.append(f'High observation count: {seen_count} sightings')
|
||||
|
||||
# Heuristic 2: Consistent presence rate (beacon-like behavior)
|
||||
if seen_rate >= 3.0: # 3+ observations per minute
|
||||
points = min(0.15, (seen_rate / 10) * 0.15)
|
||||
risk_score += points
|
||||
risk_factors.append(f'Beacon-like presence: {seen_rate:.1f} obs/min')
|
||||
|
||||
# Heuristic 3: Stable RSSI (moving with us, same relative distance)
|
||||
if rssi_variance is not None and rssi_variance < 10:
|
||||
risk_score += 0.1
|
||||
risk_factors.append(f'Stable signal strength (variance: {rssi_variance:.1f})')
|
||||
|
||||
# Heuristic 4: New device appearing (not in baseline)
|
||||
if is_new and is_tracker:
|
||||
risk_score += 0.15
|
||||
risk_factors.append('New tracker appeared after baseline was set')
|
||||
|
||||
# Cross-session persistence (from sighting history)
|
||||
historical_count = self.get_sighting_count(fingerprint_id, window_hours=24)
|
||||
if historical_count >= 10:
|
||||
points = min(0.15, (historical_count / 50) * 0.15)
|
||||
risk_score += points
|
||||
risk_factors.append(f'Seen across multiple sessions: {historical_count} total sightings in 24h')
|
||||
|
||||
return min(1.0, risk_score), risk_factors
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# SINGLETON ENGINE INSTANCE
|
||||
# =============================================================================
|
||||
|
||||
_engine_instance: Optional[TrackerSignatureEngine] = None
|
||||
|
||||
|
||||
def get_tracker_engine() -> TrackerSignatureEngine:
|
||||
"""Get the singleton tracker signature engine instance."""
|
||||
global _engine_instance
|
||||
if _engine_instance is None:
|
||||
_engine_instance = TrackerSignatureEngine()
|
||||
return _engine_instance
|
||||
|
||||
|
||||
def detect_tracker(
|
||||
address: str,
|
||||
address_type: str = 'public',
|
||||
name: Optional[str] = None,
|
||||
manufacturer_id: Optional[int] = None,
|
||||
manufacturer_data: Optional[bytes] = None,
|
||||
service_uuids: Optional[list[str]] = None,
|
||||
service_data: Optional[dict[str, bytes]] = None,
|
||||
tx_power: Optional[int] = None,
|
||||
) -> TrackerDetectionResult:
|
||||
"""
|
||||
Convenience function to detect if a BLE device is a tracker.
|
||||
|
||||
See TrackerSignatureEngine.detect_tracker for full documentation.
|
||||
"""
|
||||
engine = get_tracker_engine()
|
||||
return engine.detect_tracker(
|
||||
address=address,
|
||||
address_type=address_type,
|
||||
name=name,
|
||||
manufacturer_id=manufacturer_id,
|
||||
manufacturer_data=manufacturer_data,
|
||||
service_uuids=service_uuids,
|
||||
service_data=service_data,
|
||||
tx_power=tx_power,
|
||||
)
|
||||
Reference in New Issue
Block a user