mirror of
https://github.com/smittix/intercept.git
synced 2026-07-03 23:33:38 -07:00
feat: add signal_db loader and match_signals scoring function
Implements lazy-cached load_signals() and pure match_signals() with frequency/bandwidth/modulation/region scoring for the signal ID feature. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,139 @@
|
||||
"""Unit tests for the signal database loader and match function."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestLoadSignals:
|
||||
def test_returns_list(self):
|
||||
from utils.signal_db import load_signals
|
||||
signals = load_signals()
|
||||
assert isinstance(signals, list)
|
||||
assert len(signals) > 0
|
||||
|
||||
def test_cached_on_second_call(self):
|
||||
from utils.signal_db import load_signals
|
||||
first = load_signals()
|
||||
second = load_signals()
|
||||
assert first is second # same object — cached
|
||||
|
||||
|
||||
class TestMatchSignals:
|
||||
def test_fm_broadcast_matched_at_center(self):
|
||||
from utils.signal_db import match_signals
|
||||
results = match_signals(frequency_mhz=98.5)
|
||||
names = [r["name"] for r in results]
|
||||
assert "FM Broadcast Radio" in names
|
||||
|
||||
def test_frequency_at_exact_range_boundary_included(self):
|
||||
from utils.signal_db import match_signals
|
||||
# 87.5 MHz is the lower bound of FM broadcast
|
||||
results = match_signals(frequency_mhz=87.5)
|
||||
names = [r["name"] for r in results]
|
||||
assert "FM Broadcast Radio" in names
|
||||
|
||||
def test_frequency_just_outside_range_excluded(self):
|
||||
from utils.signal_db import match_signals
|
||||
# 87.499 MHz is just below FM broadcast lower bound
|
||||
results = match_signals(frequency_mhz=87.499)
|
||||
names = [r["name"] for r in results]
|
||||
assert "FM Broadcast Radio" not in names
|
||||
|
||||
def test_no_matches_returns_empty_list(self):
|
||||
from utils.signal_db import match_signals
|
||||
# 5000 MHz has no signals in our database
|
||||
results = match_signals(frequency_mhz=5000.0)
|
||||
assert results == []
|
||||
|
||||
def test_results_have_score_field(self):
|
||||
from utils.signal_db import match_signals
|
||||
results = match_signals(frequency_mhz=98.5)
|
||||
assert len(results) > 0
|
||||
for r in results:
|
||||
assert "score" in r
|
||||
assert isinstance(r["score"], int)
|
||||
assert 0 <= r["score"] <= 100
|
||||
|
||||
def test_results_have_match_reasons(self):
|
||||
from utils.signal_db import match_signals
|
||||
results = match_signals(frequency_mhz=98.5)
|
||||
assert len(results) > 0
|
||||
for r in results:
|
||||
assert "match_reasons" in r
|
||||
assert isinstance(r["match_reasons"], list)
|
||||
|
||||
def test_results_sorted_by_score_descending(self):
|
||||
from utils.signal_db import match_signals
|
||||
results = match_signals(frequency_mhz=98.5)
|
||||
scores = [r["score"] for r in results]
|
||||
assert scores == sorted(scores, reverse=True)
|
||||
|
||||
def test_limit_respected(self):
|
||||
from utils.signal_db import match_signals
|
||||
results = match_signals(frequency_mhz=98.5, limit=2)
|
||||
assert len(results) <= 2
|
||||
|
||||
def test_limit_clamped_to_minimum_1(self):
|
||||
from utils.signal_db import match_signals
|
||||
results = match_signals(frequency_mhz=98.5, limit=0)
|
||||
assert len(results) >= 1
|
||||
|
||||
def test_bandwidth_within_range_scores_higher(self):
|
||||
from utils.signal_db import match_signals
|
||||
# FM broadcast bandwidth_range is 150k–250k Hz; 200k is within
|
||||
with_bw = match_signals(frequency_mhz=98.5, bandwidth_hz=200_000)
|
||||
without_bw = match_signals(frequency_mhz=98.5)
|
||||
fm_with = next(r for r in with_bw if r["name"] == "FM Broadcast Radio")
|
||||
fm_without = next(r for r in without_bw if r["name"] == "FM Broadcast Radio")
|
||||
assert fm_with["score"] >= fm_without["score"]
|
||||
|
||||
def test_bandwidth_outside_2x_scores_zero_for_bw_criterion(self):
|
||||
from utils.signal_db import match_signals
|
||||
# FM broadcast max_bw is 250k Hz; 600k is > 2× that
|
||||
results = match_signals(frequency_mhz=98.5, bandwidth_hz=600_000)
|
||||
fm = next((r for r in results if r["name"] == "FM Broadcast Radio"), None)
|
||||
# FM may still appear due to frequency score, but BW reason should not be "within typical"
|
||||
if fm:
|
||||
assert "bandwidth: within typical" not in fm["match_reasons"]
|
||||
|
||||
def test_modulation_exact_match_scores_higher(self):
|
||||
from utils.signal_db import match_signals
|
||||
with_mod = match_signals(frequency_mhz=98.5, modulation="WFM")
|
||||
without_mod = match_signals(frequency_mhz=98.5)
|
||||
fm_with = next(r for r in with_mod if r["name"] == "FM Broadcast Radio")
|
||||
fm_without = next(r for r in without_mod if r["name"] == "FM Broadcast Radio")
|
||||
assert fm_with["score"] >= fm_without["score"]
|
||||
|
||||
def test_modulation_mismatch_no_mod_reason(self):
|
||||
from utils.signal_db import match_signals
|
||||
results = match_signals(frequency_mhz=98.5, modulation="LSB")
|
||||
fm = next((r for r in results if r["name"] == "FM Broadcast Radio"), None)
|
||||
if fm:
|
||||
assert "modulation: exact match" not in fm["match_reasons"]
|
||||
|
||||
def test_multi_range_signal_matched_by_any_range(self):
|
||||
from utils.signal_db import match_signals
|
||||
# POCSAG has ranges in 138-175 MHz and 450-470 MHz
|
||||
# 162 MHz is in the first range (maritime VHF area, but also POCSAG territory)
|
||||
results_vhf = match_signals(frequency_mhz=155.0)
|
||||
results_uhf = match_signals(frequency_mhz=455.0)
|
||||
vhf_names = [r["name"] for r in results_vhf]
|
||||
uhf_names = [r["name"] for r in results_uhf]
|
||||
assert "POCSAG Pager" in vhf_names
|
||||
assert "POCSAG Pager" in uhf_names
|
||||
|
||||
def test_region_mismatch_does_not_exclude_signal(self):
|
||||
from utils.signal_db import match_signals
|
||||
# PMR446 is EU/UK only; should still appear with US region but may score lower
|
||||
results = match_signals(frequency_mhz=446.09375, region="US")
|
||||
names = [r["name"] for r in results]
|
||||
assert "PMR446 (Licence-Free UHF)" in names
|
||||
|
||||
def test_original_signal_dict_not_mutated(self):
|
||||
from utils.signal_db import load_signals, match_signals
|
||||
original = load_signals()
|
||||
first_id_before = original[0]["id"]
|
||||
match_signals(frequency_mhz=98.5, modulation="WFM")
|
||||
assert original[0]["id"] == first_id_before # not mutated
|
||||
assert "score" not in original[0] # score not added in-place
|
||||
@@ -0,0 +1,142 @@
|
||||
"""Signal database loader and match engine.
|
||||
|
||||
Loads data/signals.json once at startup (lazy, cached). Provides a pure
|
||||
match_signals() function that scores candidates by frequency, bandwidth,
|
||||
modulation, and region.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from utils.logging import get_logger
|
||||
|
||||
logger = get_logger('intercept.signal_db')
|
||||
|
||||
_DB_PATH = Path(__file__).resolve().parent.parent / "data" / "signals.json"
|
||||
_cache: list[dict[str, Any]] | None = None
|
||||
|
||||
|
||||
def load_signals() -> list[dict[str, Any]]:
|
||||
"""Return cached signal list, loading from JSON on first call."""
|
||||
global _cache
|
||||
if _cache is not None:
|
||||
return _cache
|
||||
|
||||
if not _DB_PATH.exists():
|
||||
logger.warning("signals.json not found at %s — signal matching will return no results", _DB_PATH)
|
||||
_cache = []
|
||||
return _cache
|
||||
|
||||
try:
|
||||
with open(_DB_PATH) as f:
|
||||
data = json.load(f)
|
||||
if not isinstance(data, list):
|
||||
raise ValueError("signals.json must be a JSON array")
|
||||
_cache = data
|
||||
logger.info("Loaded %d signals from %s", len(_cache), _DB_PATH)
|
||||
except Exception as exc:
|
||||
logger.error("Failed to load signals.json: %s", exc)
|
||||
_cache = []
|
||||
|
||||
return _cache
|
||||
|
||||
|
||||
def match_signals(
|
||||
*,
|
||||
frequency_mhz: float,
|
||||
bandwidth_hz: int | None = None,
|
||||
modulation: str | None = None,
|
||||
region: str = "GLOBAL",
|
||||
limit: int = 8,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Return signals ranked by how well they match the given parameters.
|
||||
|
||||
Args:
|
||||
frequency_mhz: Centre frequency to match (required).
|
||||
bandwidth_hz: Observed signal bandwidth in Hz (optional — improves scoring).
|
||||
modulation: Observed modulation token e.g. 'WFM', 'AM' (optional).
|
||||
region: User's region code e.g. 'EU', 'US', 'GLOBAL'.
|
||||
limit: Maximum number of results to return (clamped to 1–20).
|
||||
|
||||
Returns:
|
||||
List of signal dicts (copies) sorted by score descending, each with
|
||||
added fields: score (int 0–100), match_reasons (list[str]).
|
||||
"""
|
||||
limit = max(1, min(limit, 20))
|
||||
target_hz = frequency_mhz * 1_000_000
|
||||
mod_upper = modulation.strip().upper() if modulation else None
|
||||
region_upper = region.strip().upper() if region else "GLOBAL"
|
||||
|
||||
candidates: list[dict[str, Any]] = []
|
||||
for sig in load_signals():
|
||||
ranges = sig.get("frequency_ranges", [])
|
||||
if not any(r["min_hz"] <= target_hz <= r["max_hz"] for r in ranges):
|
||||
continue
|
||||
candidates.append(sig)
|
||||
|
||||
scored: list[dict[str, Any]] = []
|
||||
for sig in candidates:
|
||||
result = dict(sig) # shallow copy — do not mutate original
|
||||
score = 0
|
||||
reasons: list[str] = []
|
||||
|
||||
# --- Frequency centrality (10–40 pts) ---
|
||||
ranges = sig.get("frequency_ranges", [])
|
||||
best = min(
|
||||
ranges,
|
||||
key=lambda r: abs(target_hz - (r["min_hz"] + r["max_hz"]) / 2),
|
||||
)
|
||||
centre = (best["min_hz"] + best["max_hz"]) / 2
|
||||
half_span = (best["max_hz"] - best["min_hz"]) / 2 or 1
|
||||
centrality = 1.0 - min(abs(target_hz - centre) / half_span, 1.0)
|
||||
freq_pts = int(10 + 30 * centrality)
|
||||
score += freq_pts
|
||||
if centrality >= 0.8:
|
||||
reasons.append("frequency: centre of range")
|
||||
elif centrality >= 0.4:
|
||||
reasons.append("frequency: within range")
|
||||
else:
|
||||
reasons.append("frequency: edge of range")
|
||||
|
||||
# --- Bandwidth match (0–30 pts) ---
|
||||
bw_range = sig.get("bandwidth_range")
|
||||
if bandwidth_hz is not None:
|
||||
if bw_range is None:
|
||||
score += 10
|
||||
elif bw_range["min_hz"] <= bandwidth_hz <= bw_range["max_hz"]:
|
||||
score += 30
|
||||
reasons.append("bandwidth: within typical")
|
||||
elif (bandwidth_hz <= bw_range["max_hz"] * 2
|
||||
and bandwidth_hz >= bw_range["min_hz"] // 2):
|
||||
score += 15
|
||||
reasons.append("bandwidth: near typical")
|
||||
# else: 0 pts, no reason added
|
||||
else:
|
||||
score += 15 # neutral — no bandwidth provided
|
||||
|
||||
# --- Modulation match (0–20 pts) ---
|
||||
sig_mods = [m.upper() for m in sig.get("modulations", [])]
|
||||
if mod_upper:
|
||||
if mod_upper in sig_mods:
|
||||
score += 20
|
||||
reasons.append("modulation: exact match")
|
||||
# else: 0 pts
|
||||
else:
|
||||
score += 10 # neutral — no modulation provided
|
||||
|
||||
# --- Region match (5–10 pts) ---
|
||||
sig_regions = [r.upper() for r in sig.get("regions", [])]
|
||||
if "GLOBAL" in sig_regions or region_upper in sig_regions:
|
||||
score += 10
|
||||
else:
|
||||
score += 5
|
||||
|
||||
result["score"] = min(score, 100)
|
||||
result["match_reasons"] = reasons
|
||||
scored.append(result)
|
||||
|
||||
scored.sort(key=lambda s: s["score"], reverse=True)
|
||||
return scored[:limit]
|
||||
Reference in New Issue
Block a user