From 115dffd18e852559483384d3f089f7d133387e00 Mon Sep 17 00:00:00 2001 From: James Smith Date: Fri, 3 Jul 2026 08:10:47 +0100 Subject: [PATCH] feat: add signal_db loader and match_signals scoring function Implements lazy-cached load_signals() and pure match_signals() with frequency/bandwidth/modulation/region scoring for the signal ID feature. Co-Authored-By: Claude Sonnet 4.6 --- tests/test_signalid_match.py | 139 ++++++++++++++++++++++++++++++++++ utils/signal_db.py | 142 +++++++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 tests/test_signalid_match.py create mode 100644 utils/signal_db.py diff --git a/tests/test_signalid_match.py b/tests/test_signalid_match.py new file mode 100644 index 0000000..2fef768 --- /dev/null +++ b/tests/test_signalid_match.py @@ -0,0 +1,139 @@ +"""Unit tests for the signal database loader and match function.""" + +from __future__ import annotations + +import pytest + + +class TestLoadSignals: + def test_returns_list(self): + from utils.signal_db import load_signals + signals = load_signals() + assert isinstance(signals, list) + assert len(signals) > 0 + + def test_cached_on_second_call(self): + from utils.signal_db import load_signals + first = load_signals() + second = load_signals() + assert first is second # same object — cached + + +class TestMatchSignals: + def test_fm_broadcast_matched_at_center(self): + from utils.signal_db import match_signals + results = match_signals(frequency_mhz=98.5) + names = [r["name"] for r in results] + assert "FM Broadcast Radio" in names + + def test_frequency_at_exact_range_boundary_included(self): + from utils.signal_db import match_signals + # 87.5 MHz is the lower bound of FM broadcast + results = match_signals(frequency_mhz=87.5) + names = [r["name"] for r in results] + assert "FM Broadcast Radio" in names + + def test_frequency_just_outside_range_excluded(self): + from utils.signal_db import match_signals + # 87.499 MHz is just below FM broadcast lower bound + results = match_signals(frequency_mhz=87.499) + names = [r["name"] for r in results] + assert "FM Broadcast Radio" not in names + + def test_no_matches_returns_empty_list(self): + from utils.signal_db import match_signals + # 5000 MHz has no signals in our database + results = match_signals(frequency_mhz=5000.0) + assert results == [] + + def test_results_have_score_field(self): + from utils.signal_db import match_signals + results = match_signals(frequency_mhz=98.5) + assert len(results) > 0 + for r in results: + assert "score" in r + assert isinstance(r["score"], int) + assert 0 <= r["score"] <= 100 + + def test_results_have_match_reasons(self): + from utils.signal_db import match_signals + results = match_signals(frequency_mhz=98.5) + assert len(results) > 0 + for r in results: + assert "match_reasons" in r + assert isinstance(r["match_reasons"], list) + + def test_results_sorted_by_score_descending(self): + from utils.signal_db import match_signals + results = match_signals(frequency_mhz=98.5) + scores = [r["score"] for r in results] + assert scores == sorted(scores, reverse=True) + + def test_limit_respected(self): + from utils.signal_db import match_signals + results = match_signals(frequency_mhz=98.5, limit=2) + assert len(results) <= 2 + + def test_limit_clamped_to_minimum_1(self): + from utils.signal_db import match_signals + results = match_signals(frequency_mhz=98.5, limit=0) + assert len(results) >= 1 + + def test_bandwidth_within_range_scores_higher(self): + from utils.signal_db import match_signals + # FM broadcast bandwidth_range is 150k–250k Hz; 200k is within + with_bw = match_signals(frequency_mhz=98.5, bandwidth_hz=200_000) + without_bw = match_signals(frequency_mhz=98.5) + fm_with = next(r for r in with_bw if r["name"] == "FM Broadcast Radio") + fm_without = next(r for r in without_bw if r["name"] == "FM Broadcast Radio") + assert fm_with["score"] >= fm_without["score"] + + def test_bandwidth_outside_2x_scores_zero_for_bw_criterion(self): + from utils.signal_db import match_signals + # FM broadcast max_bw is 250k Hz; 600k is > 2× that + results = match_signals(frequency_mhz=98.5, bandwidth_hz=600_000) + fm = next((r for r in results if r["name"] == "FM Broadcast Radio"), None) + # FM may still appear due to frequency score, but BW reason should not be "within typical" + if fm: + assert "bandwidth: within typical" not in fm["match_reasons"] + + def test_modulation_exact_match_scores_higher(self): + from utils.signal_db import match_signals + with_mod = match_signals(frequency_mhz=98.5, modulation="WFM") + without_mod = match_signals(frequency_mhz=98.5) + fm_with = next(r for r in with_mod if r["name"] == "FM Broadcast Radio") + fm_without = next(r for r in without_mod if r["name"] == "FM Broadcast Radio") + assert fm_with["score"] >= fm_without["score"] + + def test_modulation_mismatch_no_mod_reason(self): + from utils.signal_db import match_signals + results = match_signals(frequency_mhz=98.5, modulation="LSB") + fm = next((r for r in results if r["name"] == "FM Broadcast Radio"), None) + if fm: + assert "modulation: exact match" not in fm["match_reasons"] + + def test_multi_range_signal_matched_by_any_range(self): + from utils.signal_db import match_signals + # POCSAG has ranges in 138-175 MHz and 450-470 MHz + # 162 MHz is in the first range (maritime VHF area, but also POCSAG territory) + results_vhf = match_signals(frequency_mhz=155.0) + results_uhf = match_signals(frequency_mhz=455.0) + vhf_names = [r["name"] for r in results_vhf] + uhf_names = [r["name"] for r in results_uhf] + assert "POCSAG Pager" in vhf_names + assert "POCSAG Pager" in uhf_names + + def test_region_mismatch_does_not_exclude_signal(self): + from utils.signal_db import match_signals + # PMR446 is EU/UK only; should still appear with US region but may score lower + results = match_signals(frequency_mhz=446.09375, region="US") + names = [r["name"] for r in results] + assert "PMR446 (Licence-Free UHF)" in names + + def test_original_signal_dict_not_mutated(self): + from utils.signal_db import load_signals, match_signals + original = load_signals() + first_id_before = original[0]["id"] + match_signals(frequency_mhz=98.5, modulation="WFM") + assert original[0]["id"] == first_id_before # not mutated + assert "score" not in original[0] # score not added in-place diff --git a/utils/signal_db.py b/utils/signal_db.py new file mode 100644 index 0000000..434f7ca --- /dev/null +++ b/utils/signal_db.py @@ -0,0 +1,142 @@ +"""Signal database loader and match engine. + +Loads data/signals.json once at startup (lazy, cached). Provides a pure +match_signals() function that scores candidates by frequency, bandwidth, +modulation, and region. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + +from utils.logging import get_logger + +logger = get_logger('intercept.signal_db') + +_DB_PATH = Path(__file__).resolve().parent.parent / "data" / "signals.json" +_cache: list[dict[str, Any]] | None = None + + +def load_signals() -> list[dict[str, Any]]: + """Return cached signal list, loading from JSON on first call.""" + global _cache + if _cache is not None: + return _cache + + if not _DB_PATH.exists(): + logger.warning("signals.json not found at %s — signal matching will return no results", _DB_PATH) + _cache = [] + return _cache + + try: + with open(_DB_PATH) as f: + data = json.load(f) + if not isinstance(data, list): + raise ValueError("signals.json must be a JSON array") + _cache = data + logger.info("Loaded %d signals from %s", len(_cache), _DB_PATH) + except Exception as exc: + logger.error("Failed to load signals.json: %s", exc) + _cache = [] + + return _cache + + +def match_signals( + *, + frequency_mhz: float, + bandwidth_hz: int | None = None, + modulation: str | None = None, + region: str = "GLOBAL", + limit: int = 8, +) -> list[dict[str, Any]]: + """Return signals ranked by how well they match the given parameters. + + Args: + frequency_mhz: Centre frequency to match (required). + bandwidth_hz: Observed signal bandwidth in Hz (optional — improves scoring). + modulation: Observed modulation token e.g. 'WFM', 'AM' (optional). + region: User's region code e.g. 'EU', 'US', 'GLOBAL'. + limit: Maximum number of results to return (clamped to 1–20). + + Returns: + List of signal dicts (copies) sorted by score descending, each with + added fields: score (int 0–100), match_reasons (list[str]). + """ + limit = max(1, min(limit, 20)) + target_hz = frequency_mhz * 1_000_000 + mod_upper = modulation.strip().upper() if modulation else None + region_upper = region.strip().upper() if region else "GLOBAL" + + candidates: list[dict[str, Any]] = [] + for sig in load_signals(): + ranges = sig.get("frequency_ranges", []) + if not any(r["min_hz"] <= target_hz <= r["max_hz"] for r in ranges): + continue + candidates.append(sig) + + scored: list[dict[str, Any]] = [] + for sig in candidates: + result = dict(sig) # shallow copy — do not mutate original + score = 0 + reasons: list[str] = [] + + # --- Frequency centrality (10–40 pts) --- + ranges = sig.get("frequency_ranges", []) + best = min( + ranges, + key=lambda r: abs(target_hz - (r["min_hz"] + r["max_hz"]) / 2), + ) + centre = (best["min_hz"] + best["max_hz"]) / 2 + half_span = (best["max_hz"] - best["min_hz"]) / 2 or 1 + centrality = 1.0 - min(abs(target_hz - centre) / half_span, 1.0) + freq_pts = int(10 + 30 * centrality) + score += freq_pts + if centrality >= 0.8: + reasons.append("frequency: centre of range") + elif centrality >= 0.4: + reasons.append("frequency: within range") + else: + reasons.append("frequency: edge of range") + + # --- Bandwidth match (0–30 pts) --- + bw_range = sig.get("bandwidth_range") + if bandwidth_hz is not None: + if bw_range is None: + score += 10 + elif bw_range["min_hz"] <= bandwidth_hz <= bw_range["max_hz"]: + score += 30 + reasons.append("bandwidth: within typical") + elif (bandwidth_hz <= bw_range["max_hz"] * 2 + and bandwidth_hz >= bw_range["min_hz"] // 2): + score += 15 + reasons.append("bandwidth: near typical") + # else: 0 pts, no reason added + else: + score += 15 # neutral — no bandwidth provided + + # --- Modulation match (0–20 pts) --- + sig_mods = [m.upper() for m in sig.get("modulations", [])] + if mod_upper: + if mod_upper in sig_mods: + score += 20 + reasons.append("modulation: exact match") + # else: 0 pts + else: + score += 10 # neutral — no modulation provided + + # --- Region match (5–10 pts) --- + sig_regions = [r.upper() for r in sig.get("regions", [])] + if "GLOBAL" in sig_regions or region_upper in sig_regions: + score += 10 + else: + score += 5 + + result["score"] = min(score, 100) + result["match_reasons"] = reasons + scored.append(result) + + scored.sort(key=lambda s: s["score"], reverse=True) + return scored[:limit]