feat: add signal_db loader and match_signals scoring function

Implements lazy-cached load_signals() and pure match_signals() with
frequency/bandwidth/modulation/region scoring for the signal ID feature.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
James Smith
2026-07-03 08:10:47 +01:00
parent 76fcce949c
commit 115dffd18e
2 changed files with 281 additions and 0 deletions
+139
View File
@@ -0,0 +1,139 @@
"""Unit tests for the signal database loader and match function."""
from __future__ import annotations
import pytest
class TestLoadSignals:
def test_returns_list(self):
from utils.signal_db import load_signals
signals = load_signals()
assert isinstance(signals, list)
assert len(signals) > 0
def test_cached_on_second_call(self):
from utils.signal_db import load_signals
first = load_signals()
second = load_signals()
assert first is second # same object — cached
class TestMatchSignals:
def test_fm_broadcast_matched_at_center(self):
from utils.signal_db import match_signals
results = match_signals(frequency_mhz=98.5)
names = [r["name"] for r in results]
assert "FM Broadcast Radio" in names
def test_frequency_at_exact_range_boundary_included(self):
from utils.signal_db import match_signals
# 87.5 MHz is the lower bound of FM broadcast
results = match_signals(frequency_mhz=87.5)
names = [r["name"] for r in results]
assert "FM Broadcast Radio" in names
def test_frequency_just_outside_range_excluded(self):
from utils.signal_db import match_signals
# 87.499 MHz is just below FM broadcast lower bound
results = match_signals(frequency_mhz=87.499)
names = [r["name"] for r in results]
assert "FM Broadcast Radio" not in names
def test_no_matches_returns_empty_list(self):
from utils.signal_db import match_signals
# 5000 MHz has no signals in our database
results = match_signals(frequency_mhz=5000.0)
assert results == []
def test_results_have_score_field(self):
from utils.signal_db import match_signals
results = match_signals(frequency_mhz=98.5)
assert len(results) > 0
for r in results:
assert "score" in r
assert isinstance(r["score"], int)
assert 0 <= r["score"] <= 100
def test_results_have_match_reasons(self):
from utils.signal_db import match_signals
results = match_signals(frequency_mhz=98.5)
assert len(results) > 0
for r in results:
assert "match_reasons" in r
assert isinstance(r["match_reasons"], list)
def test_results_sorted_by_score_descending(self):
from utils.signal_db import match_signals
results = match_signals(frequency_mhz=98.5)
scores = [r["score"] for r in results]
assert scores == sorted(scores, reverse=True)
def test_limit_respected(self):
from utils.signal_db import match_signals
results = match_signals(frequency_mhz=98.5, limit=2)
assert len(results) <= 2
def test_limit_clamped_to_minimum_1(self):
from utils.signal_db import match_signals
results = match_signals(frequency_mhz=98.5, limit=0)
assert len(results) >= 1
def test_bandwidth_within_range_scores_higher(self):
from utils.signal_db import match_signals
# FM broadcast bandwidth_range is 150k250k Hz; 200k is within
with_bw = match_signals(frequency_mhz=98.5, bandwidth_hz=200_000)
without_bw = match_signals(frequency_mhz=98.5)
fm_with = next(r for r in with_bw if r["name"] == "FM Broadcast Radio")
fm_without = next(r for r in without_bw if r["name"] == "FM Broadcast Radio")
assert fm_with["score"] >= fm_without["score"]
def test_bandwidth_outside_2x_scores_zero_for_bw_criterion(self):
from utils.signal_db import match_signals
# FM broadcast max_bw is 250k Hz; 600k is > 2× that
results = match_signals(frequency_mhz=98.5, bandwidth_hz=600_000)
fm = next((r for r in results if r["name"] == "FM Broadcast Radio"), None)
# FM may still appear due to frequency score, but BW reason should not be "within typical"
if fm:
assert "bandwidth: within typical" not in fm["match_reasons"]
def test_modulation_exact_match_scores_higher(self):
from utils.signal_db import match_signals
with_mod = match_signals(frequency_mhz=98.5, modulation="WFM")
without_mod = match_signals(frequency_mhz=98.5)
fm_with = next(r for r in with_mod if r["name"] == "FM Broadcast Radio")
fm_without = next(r for r in without_mod if r["name"] == "FM Broadcast Radio")
assert fm_with["score"] >= fm_without["score"]
def test_modulation_mismatch_no_mod_reason(self):
from utils.signal_db import match_signals
results = match_signals(frequency_mhz=98.5, modulation="LSB")
fm = next((r for r in results if r["name"] == "FM Broadcast Radio"), None)
if fm:
assert "modulation: exact match" not in fm["match_reasons"]
def test_multi_range_signal_matched_by_any_range(self):
from utils.signal_db import match_signals
# POCSAG has ranges in 138-175 MHz and 450-470 MHz
# 162 MHz is in the first range (maritime VHF area, but also POCSAG territory)
results_vhf = match_signals(frequency_mhz=155.0)
results_uhf = match_signals(frequency_mhz=455.0)
vhf_names = [r["name"] for r in results_vhf]
uhf_names = [r["name"] for r in results_uhf]
assert "POCSAG Pager" in vhf_names
assert "POCSAG Pager" in uhf_names
def test_region_mismatch_does_not_exclude_signal(self):
from utils.signal_db import match_signals
# PMR446 is EU/UK only; should still appear with US region but may score lower
results = match_signals(frequency_mhz=446.09375, region="US")
names = [r["name"] for r in results]
assert "PMR446 (Licence-Free UHF)" in names
def test_original_signal_dict_not_mutated(self):
from utils.signal_db import load_signals, match_signals
original = load_signals()
first_id_before = original[0]["id"]
match_signals(frequency_mhz=98.5, modulation="WFM")
assert original[0]["id"] == first_id_before # not mutated
assert "score" not in original[0] # score not added in-place
+142
View File
@@ -0,0 +1,142 @@
"""Signal database loader and match engine.
Loads data/signals.json once at startup (lazy, cached). Provides a pure
match_signals() function that scores candidates by frequency, bandwidth,
modulation, and region.
"""
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
from utils.logging import get_logger
logger = get_logger('intercept.signal_db')
_DB_PATH = Path(__file__).resolve().parent.parent / "data" / "signals.json"
_cache: list[dict[str, Any]] | None = None
def load_signals() -> list[dict[str, Any]]:
"""Return cached signal list, loading from JSON on first call."""
global _cache
if _cache is not None:
return _cache
if not _DB_PATH.exists():
logger.warning("signals.json not found at %s — signal matching will return no results", _DB_PATH)
_cache = []
return _cache
try:
with open(_DB_PATH) as f:
data = json.load(f)
if not isinstance(data, list):
raise ValueError("signals.json must be a JSON array")
_cache = data
logger.info("Loaded %d signals from %s", len(_cache), _DB_PATH)
except Exception as exc:
logger.error("Failed to load signals.json: %s", exc)
_cache = []
return _cache
def match_signals(
*,
frequency_mhz: float,
bandwidth_hz: int | None = None,
modulation: str | None = None,
region: str = "GLOBAL",
limit: int = 8,
) -> list[dict[str, Any]]:
"""Return signals ranked by how well they match the given parameters.
Args:
frequency_mhz: Centre frequency to match (required).
bandwidth_hz: Observed signal bandwidth in Hz (optional — improves scoring).
modulation: Observed modulation token e.g. 'WFM', 'AM' (optional).
region: User's region code e.g. 'EU', 'US', 'GLOBAL'.
limit: Maximum number of results to return (clamped to 120).
Returns:
List of signal dicts (copies) sorted by score descending, each with
added fields: score (int 0100), match_reasons (list[str]).
"""
limit = max(1, min(limit, 20))
target_hz = frequency_mhz * 1_000_000
mod_upper = modulation.strip().upper() if modulation else None
region_upper = region.strip().upper() if region else "GLOBAL"
candidates: list[dict[str, Any]] = []
for sig in load_signals():
ranges = sig.get("frequency_ranges", [])
if not any(r["min_hz"] <= target_hz <= r["max_hz"] for r in ranges):
continue
candidates.append(sig)
scored: list[dict[str, Any]] = []
for sig in candidates:
result = dict(sig) # shallow copy — do not mutate original
score = 0
reasons: list[str] = []
# --- Frequency centrality (1040 pts) ---
ranges = sig.get("frequency_ranges", [])
best = min(
ranges,
key=lambda r: abs(target_hz - (r["min_hz"] + r["max_hz"]) / 2),
)
centre = (best["min_hz"] + best["max_hz"]) / 2
half_span = (best["max_hz"] - best["min_hz"]) / 2 or 1
centrality = 1.0 - min(abs(target_hz - centre) / half_span, 1.0)
freq_pts = int(10 + 30 * centrality)
score += freq_pts
if centrality >= 0.8:
reasons.append("frequency: centre of range")
elif centrality >= 0.4:
reasons.append("frequency: within range")
else:
reasons.append("frequency: edge of range")
# --- Bandwidth match (030 pts) ---
bw_range = sig.get("bandwidth_range")
if bandwidth_hz is not None:
if bw_range is None:
score += 10
elif bw_range["min_hz"] <= bandwidth_hz <= bw_range["max_hz"]:
score += 30
reasons.append("bandwidth: within typical")
elif (bandwidth_hz <= bw_range["max_hz"] * 2
and bandwidth_hz >= bw_range["min_hz"] // 2):
score += 15
reasons.append("bandwidth: near typical")
# else: 0 pts, no reason added
else:
score += 15 # neutral — no bandwidth provided
# --- Modulation match (020 pts) ---
sig_mods = [m.upper() for m in sig.get("modulations", [])]
if mod_upper:
if mod_upper in sig_mods:
score += 20
reasons.append("modulation: exact match")
# else: 0 pts
else:
score += 10 # neutral — no modulation provided
# --- Region match (510 pts) ---
sig_regions = [r.upper() for r in sig.get("regions", [])]
if "GLOBAL" in sig_regions or region_upper in sig_regions:
score += 10
else:
score += 5
result["score"] = min(score, 100)
result["match_reasons"] = reasons
scored.append(result)
scored.sort(key=lambda s: s["score"], reverse=True)
return scored[:limit]