fix: Correct SSTV VIS codes and replace Goertzel pixel decoder with Hilbert transform

Fix wrong VIS codes for PD90 (96→99), PD120 (93→95), PD180 (95→97),
PD240 (113→96), and ScottieDX (55→76). This caused PD180 to be detected
as PD90 and PD120 to fail entirely.

Replace batch Goertzel pixel decoding with analytic signal (Hilbert
transform) FM demodulation. The Goertzel approach used 96-sample windows
with ~500 Hz resolution — wider than the 800 Hz pixel frequency range —
making accurate pixel decoding impossible for fast modes like Martin2
and Scottie2. The Hilbert method computes per-sample instantaneous
frequency, matching the approach used by QSSTV and other professional
SSTV decoders.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Smittix
2026-02-19 09:23:15 +00:00
parent 481651c88d
commit 17f6947648
4 changed files with 55 additions and 50 deletions
+4 -4
View File
@@ -354,15 +354,15 @@ class TestVISDetector:
assert mode_name == 'Scottie1'
def test_detect_pd120(self):
"""Should detect PD120 VIS code (93)."""
"""Should detect PD120 VIS code (95)."""
detector = VISDetector()
header = generate_vis_header(93) # PD120
header = generate_vis_header(95) # PD120
audio = np.concatenate([np.zeros(2400), header, np.zeros(2400)])
result = detector.feed(audio)
assert result is not None
vis_code, mode_name = result
assert vis_code == 93
assert vis_code == 95
assert mode_name == 'PD120'
def test_noise_rejection(self):
@@ -520,7 +520,7 @@ class TestModes:
def test_all_vis_codes_have_modes(self):
"""All defined VIS codes should have matching mode specs."""
for vis_code in [8, 12, 44, 40, 60, 56, 93, 95, 96, 98, 113, 55]:
for vis_code in [8, 12, 44, 40, 60, 56, 95, 97, 99, 98, 96, 76]:
mode = get_mode(vis_code)
assert mode is not None, f"No mode for VIS code {vis_code}"
+5 -5
View File
@@ -59,15 +59,15 @@ VIS_CODES: dict[int, str] = {
40: 'Martin2',
60: 'Scottie1',
56: 'Scottie2',
93: 'PD120',
95: 'PD180',
95: 'PD120',
97: 'PD180',
# Less common but recognized
4: 'Robot24',
36: 'Martin3',
52: 'Scottie3',
55: 'ScottieDX',
113: 'PD240',
96: 'PD90',
76: 'ScottieDX',
96: 'PD240',
99: 'PD90',
98: 'PD160',
}
+41 -36
View File
@@ -20,7 +20,6 @@ from .constants import (
)
from .dsp import (
goertzel,
goertzel_batch,
samples_for_duration,
)
from .modes import (
@@ -98,10 +97,6 @@ class SSTVImageDecoder:
self._channel_data.append(
np.zeros((mode.height, mode.width), dtype=np.uint8))
# Pre-compute candidate frequencies for batch pixel decoding (5 Hz step)
self._freq_candidates = np.arange(
FREQ_PIXEL_LOW - 100, FREQ_PIXEL_HIGH + 105, 5.0)
# Track sync position for re-synchronization
self._expected_line_start = 0 # Sample offset within buffer
self._synced = False
@@ -261,18 +256,16 @@ class SSTVImageDecoder:
if self._current_line >= self._total_audio_lines:
self._complete = True
# Minimum analysis window for meaningful Goertzel frequency estimation.
# With 96 samples (2ms at 48kHz), frequency accuracy is within ~25 Hz,
# giving pixel-level accuracy of ~8/255 levels.
_MIN_ANALYSIS_WINDOW = 96
def _decode_channel_pixels(self, audio: np.ndarray) -> np.ndarray:
"""Decode pixel values from a channel's audio data.
Uses batch Goertzel to estimate frequencies for all pixels
simultaneously, then maps to luminance values. When pixels have
fewer samples than ``_MIN_ANALYSIS_WINDOW``, overlapping analysis
windows are used to maintain frequency estimation accuracy.
Uses the analytic signal (Hilbert transform via FFT) to compute
the instantaneous frequency at every sample, then averages over
each pixel's duration. This is the same FM-demodulation approach
used by QSSTV and other professional SSTV decoders, and provides
far better frequency resolution than windowed Goertzel — especially
for fast modes (Martin2, Scottie2) where each pixel spans only
~11-13 audio samples.
Args:
audio: Audio samples for one channel of one scanline.
@@ -281,36 +274,48 @@ class SSTVImageDecoder:
Array of pixel values (0-255), shape (width,).
"""
width = self._mode.width
samples_per_pixel = max(1, len(audio) // width)
n = len(audio)
if len(audio) < width or samples_per_pixel < 2:
if n < width:
return np.zeros(width, dtype=np.uint8)
window_size = max(samples_per_pixel, self._MIN_ANALYSIS_WINDOW)
# --- Analytic signal via Hilbert transform (FFT method) ---
spectrum = np.fft.fft(audio)
if window_size > samples_per_pixel and len(audio) >= window_size:
# Use overlapping windows centered on each pixel position
windows = np.lib.stride_tricks.sliding_window_view(
audio, window_size)
# Pixel centers, clamped to valid window indices
centers = np.arange(width) * samples_per_pixel
indices = np.minimum(centers, len(windows) - 1)
audio_matrix = np.ascontiguousarray(windows[indices])
# Build the analytic-signal multiplier:
# h[0] = 1 (DC), h[1..N/2-1] = 2 (positive freqs),
# h[N/2] = 1 (Nyquist), h[N/2+1..] = 0 (negative freqs)
h = np.zeros(n)
if n % 2 == 0:
h[0] = h[n // 2] = 1
h[1:n // 2] = 2
else:
# Non-overlapping: each pixel has enough samples
usable = width * samples_per_pixel
audio_matrix = audio[:usable].reshape(width, samples_per_pixel)
h[0] = 1
h[1:(n + 1) // 2] = 2
# Batch Goertzel at all candidate frequencies
energies = goertzel_batch(
audio_matrix, self._freq_candidates, self._sample_rate)
analytic = np.fft.ifft(spectrum * h)
# Find peak frequency per pixel
best_idx = np.argmax(energies, axis=1)
best_freqs = self._freq_candidates[best_idx]
# --- Instantaneous frequency ---
phase = np.unwrap(np.angle(analytic))
inst_freq = np.diff(phase) * (self._sample_rate / (2.0 * np.pi))
# Map frequencies to pixel values (1500 Hz = 0, 2300 Hz = 255)
normalized = (best_freqs - FREQ_PIXEL_LOW) / (FREQ_PIXEL_HIGH - FREQ_PIXEL_LOW)
# --- Average frequency per pixel ---
freq_len = len(inst_freq)
if freq_len < width:
# Fewer freq samples than pixels — index directly
indices = np.linspace(0, freq_len - 1, width).astype(int)
avg_freqs = inst_freq[indices]
else:
pixel_edges = np.linspace(0, freq_len, width + 1).astype(int)
segment_starts = pixel_edges[:-1]
segment_lengths = np.diff(pixel_edges)
segment_lengths = np.maximum(segment_lengths, 1)
sums = np.add.reduceat(inst_freq, segment_starts)
avg_freqs = sums / segment_lengths
# Map to pixel values (1500 Hz → 0, 2300 Hz → 255)
normalized = (avg_freqs - FREQ_PIXEL_LOW) / (
FREQ_PIXEL_HIGH - FREQ_PIXEL_LOW)
return np.clip(normalized * 255 + 0.5, 0, 255).astype(np.uint8)
def get_image(self) -> Image.Image | None:
+5 -5
View File
@@ -189,7 +189,7 @@ SCOTTIE_2 = SSTVMode(
PD_120 = SSTVMode(
name='PD120',
vis_code=93,
vis_code=95,
width=640,
height=496,
color_model=ColorModel.YCRCB_DUAL,
@@ -207,7 +207,7 @@ PD_120 = SSTVMode(
PD_180 = SSTVMode(
name='PD180',
vis_code=95,
vis_code=97,
width=640,
height=496,
color_model=ColorModel.YCRCB_DUAL,
@@ -225,7 +225,7 @@ PD_180 = SSTVMode(
PD_90 = SSTVMode(
name='PD90',
vis_code=96,
vis_code=99,
width=640,
height=496,
color_model=ColorModel.YCRCB_DUAL,
@@ -261,7 +261,7 @@ PD_160 = SSTVMode(
PD_240 = SSTVMode(
name='PD240',
vis_code=113,
vis_code=96,
width=640,
height=496,
color_model=ColorModel.YCRCB_DUAL,
@@ -283,7 +283,7 @@ PD_240 = SSTVMode(
SCOTTIE_DX = SSTVMode(
name='ScottieDX',
vis_code=55,
vis_code=76,
width=320,
height=256,
color_model=ColorModel.RGB,