Persist Meteor decode job state

This commit is contained in:
James Smith
2026-03-18 22:20:24 +00:00
parent e388baa464
commit 4cf394f92e
8 changed files with 699 additions and 172 deletions
+24
View File
@@ -717,6 +717,25 @@ def init_db() -> None:
FOREIGN KEY (observation_id) REFERENCES ground_station_observations(id) ON DELETE CASCADE
)
''')
conn.execute('''
CREATE TABLE IF NOT EXISTS ground_station_decode_jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
observation_id INTEGER,
norad_id INTEGER,
backend TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'queued',
input_path TEXT,
output_dir TEXT,
error_message TEXT,
details_json TEXT,
started_at TIMESTAMP,
completed_at TIMESTAMP,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (observation_id) REFERENCES ground_station_observations(id) ON DELETE CASCADE
)
''')
conn.execute('''
CREATE INDEX IF NOT EXISTS idx_gs_observations_norad
@@ -733,6 +752,11 @@ def init_db() -> None:
ON ground_station_outputs(observation_id, created_at)
''')
conn.execute('''
CREATE INDEX IF NOT EXISTS idx_gs_decode_jobs_observation
ON ground_station_decode_jobs(observation_id, created_at)
''')
# Lightweight schema migrations for existing installs.
profile_cols = {
row['name'] for row in conn.execute('PRAGMA table_info(observation_profiles)')
+289 -3
View File
@@ -2,8 +2,10 @@
from __future__ import annotations
import json
import threading
import time
from datetime import datetime, timezone
from pathlib import Path
from utils.logging import get_logger
@@ -43,9 +45,24 @@ def launch_meteor_decode(
register_output,
) -> None:
"""Run Meteor LRPT offline decode in a background thread."""
decode_job_id = _create_decode_job(
observation_id=obs_db_id,
norad_id=norad_id,
backend='meteor_lrpt',
input_path=data_path,
)
emit_event({
'type': 'weather_decode_queued',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
'input_path': str(data_path),
})
t = threading.Thread(
target=_run_decode,
kwargs={
'decode_job_id': decode_job_id,
'obs_db_id': obs_db_id,
'norad_id': norad_id,
'satellite_name': satellite_name,
@@ -62,6 +79,7 @@ def launch_meteor_decode(
def _run_decode(
*,
decode_job_id: int | None,
obs_db_id: int | None,
norad_id: int,
satellite_name: str,
@@ -70,10 +88,23 @@ def _run_decode(
emit_event,
register_output,
) -> None:
latest_status: dict[str, str | int | None] = {
'message': None,
'status': None,
'phase': None,
}
sat_key = resolve_meteor_satellite_key(norad_id, satellite_name)
if not sat_key:
_update_decode_job(
decode_job_id,
status='failed',
error_message='No Meteor satellite mapping is available for this observation.',
details={'reason': 'unknown_satellite_mapping'},
completed=True,
)
emit_event({
'type': 'weather_decode_failed',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
@@ -84,8 +115,16 @@ def _run_decode(
output_dir = OUTPUT_ROOT / f'{norad_id}_{int(time.time())}'
decoder = WeatherSatDecoder(output_dir=output_dir)
if decoder.decoder_available is None:
_update_decode_job(
decode_job_id,
status='failed',
error_message='SatDump backend is not available for Meteor LRPT decode.',
details={'reason': 'backend_unavailable', 'output_dir': str(output_dir)},
completed=True,
)
emit_event({
'type': 'weather_decode_failed',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
@@ -94,10 +133,14 @@ def _run_decode(
return
def _progress_cb(progress):
latest_status['message'] = progress.message or latest_status.get('message')
latest_status['status'] = progress.status
latest_status['phase'] = progress.capture_phase or latest_status.get('phase')
progress_event = progress.to_dict()
progress_event.pop('type', None)
emit_event({
'type': 'weather_decode_progress',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
@@ -105,8 +148,20 @@ def _run_decode(
})
decoder.set_callback(_progress_cb)
_update_decode_job(
decode_job_id,
status='decoding',
output_dir=output_dir,
details={
'sample_rate': sample_rate,
'input_path': str(data_path),
'satellite': satellite_name,
},
started=True,
)
emit_event({
'type': 'weather_decode_started',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
@@ -119,13 +174,29 @@ def _run_decode(
sample_rate=sample_rate,
)
if not ok:
details = _build_failure_details(
data_path=data_path,
sample_rate=sample_rate,
decoder=decoder,
latest_status=latest_status,
)
emit_event({
'type': 'weather_decode_failed',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
'message': error or 'Meteor decode failed to start.',
'message': error or details['message'],
'failure_reason': details['reason'],
'details': details,
})
_update_decode_job(
decode_job_id,
status='failed',
error_message=error or details['message'],
details=details,
completed=True,
)
return
started = time.time()
@@ -134,24 +205,58 @@ def _run_decode(
if decoder.is_running:
decoder.stop()
details = _build_failure_details(
data_path=data_path,
sample_rate=sample_rate,
decoder=decoder,
latest_status=latest_status,
override_reason='timeout',
override_message='Meteor decode timed out.',
)
emit_event({
'type': 'weather_decode_failed',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
'message': 'Meteor decode timed out.',
'message': details['message'],
'failure_reason': details['reason'],
'details': details,
})
_update_decode_job(
decode_job_id,
status='failed',
error_message=details['message'],
details=details,
completed=True,
)
return
images = decoder.get_images()
if not images:
details = _build_failure_details(
data_path=data_path,
sample_rate=sample_rate,
decoder=decoder,
latest_status=latest_status,
)
emit_event({
'type': 'weather_decode_failed',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
'message': 'Decode completed but no image outputs were produced.',
'message': details['message'],
'failure_reason': details['reason'],
'details': details,
})
_update_decode_job(
decode_job_id,
status='failed',
error_message=details['message'],
details=details,
completed=True,
)
return
outputs = []
@@ -180,10 +285,191 @@ def _run_decode(
'product': image.product,
})
completion_details = {
'sample_rate': sample_rate,
'input_path': str(data_path),
'output_dir': str(output_dir),
'output_count': len(outputs),
}
_update_decode_job(
decode_job_id,
status='complete',
details=completion_details,
completed=True,
)
emit_event({
'type': 'weather_decode_complete',
'decode_job_id': decode_job_id,
'norad_id': norad_id,
'satellite': satellite_name,
'backend': 'meteor_lrpt',
'outputs': outputs,
})
def _build_failure_details(
*,
data_path: Path,
sample_rate: int,
decoder: WeatherSatDecoder,
latest_status: dict[str, str | int | None],
override_reason: str | None = None,
override_message: str | None = None,
) -> dict[str, str | int | None]:
file_size = data_path.stat().st_size if data_path.exists() else 0
status = decoder.get_status()
last_error = str(status.get('last_error') or latest_status.get('message') or '').strip()
return_code = status.get('last_returncode')
if override_reason:
reason = override_reason
elif sample_rate < 200_000:
reason = 'sample_rate_too_low'
elif not data_path.exists():
reason = 'missing_recording'
elif file_size < 5_000_000:
reason = 'recording_too_small'
elif return_code not in (None, 0):
reason = 'satdump_failed'
elif 'samplerate' in last_error.lower() or 'sample rate' in last_error.lower():
reason = 'invalid_sample_rate'
elif 'not found' in last_error.lower():
reason = 'input_missing'
elif 'permission' in last_error.lower():
reason = 'permission_error'
else:
reason = 'no_imagery_produced'
if override_message:
message = override_message
elif reason == 'sample_rate_too_low':
message = f'Sample rate {sample_rate} Hz is too low for Meteor LRPT decoding.'
elif reason == 'missing_recording':
message = 'The recording file was not found when decode started.'
elif reason == 'recording_too_small':
message = (
f'Recording is very small ({_format_bytes(file_size)}); this usually means the pass '
'ended early or little usable IQ was captured.'
)
elif reason == 'satdump_failed':
message = last_error or f'SatDump exited with code {return_code}.'
elif reason == 'invalid_sample_rate':
message = last_error or 'SatDump rejected the recording sample rate.'
elif reason == 'input_missing':
message = last_error or 'Input recording was not accessible to the decoder.'
elif reason == 'permission_error':
message = last_error or 'Decoder could not access the recording or output path.'
else:
message = (
last_error or
'Decode completed without any image outputs. This usually indicates weak signal, '
'incorrect sample rate, or a SatDump pipeline mismatch.'
)
return {
'reason': reason,
'message': message,
'sample_rate': sample_rate,
'file_size_bytes': file_size,
'file_size_human': _format_bytes(file_size),
'last_error': last_error or None,
'last_returncode': return_code,
'capture_phase': status.get('capture_phase') or latest_status.get('phase'),
'input_path': str(data_path),
}
def _format_bytes(size_bytes: int) -> str:
if size_bytes < 1024:
return f'{size_bytes} B'
if size_bytes < 1024 * 1024:
return f'{size_bytes / 1024:.1f} KB'
if size_bytes < 1024 * 1024 * 1024:
return f'{size_bytes / (1024 * 1024):.1f} MB'
return f'{size_bytes / (1024 * 1024 * 1024):.2f} GB'
def _create_decode_job(
*,
observation_id: int | None,
norad_id: int,
backend: str,
input_path: Path,
) -> int | None:
try:
from utils.database import get_db
with get_db() as conn:
cur = conn.execute(
'''
INSERT INTO ground_station_decode_jobs
(observation_id, norad_id, backend, status, input_path, created_at, updated_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
''',
(
observation_id,
norad_id,
backend,
'queued',
str(input_path),
_utcnow_iso(),
_utcnow_iso(),
),
)
return cur.lastrowid
except Exception as e:
logger.warning("Failed to create decode job: %s", e)
return None
def _update_decode_job(
decode_job_id: int | None,
*,
status: str,
output_dir: Path | None = None,
error_message: str | None = None,
details: dict | None = None,
started: bool = False,
completed: bool = False,
) -> None:
if decode_job_id is None:
return
try:
from utils.database import get_db
fields = ['status = ?', 'updated_at = ?']
values: list[object] = [status, _utcnow_iso()]
if output_dir is not None:
fields.append('output_dir = ?')
values.append(str(output_dir))
if error_message is not None:
fields.append('error_message = ?')
values.append(error_message)
if details is not None:
fields.append('details_json = ?')
values.append(json.dumps(details))
if started:
fields.append('started_at = ?')
values.append(_utcnow_iso())
if completed:
fields.append('completed_at = ?')
values.append(_utcnow_iso())
values.append(decode_job_id)
with get_db() as conn:
conn.execute(
f'''
UPDATE ground_station_decode_jobs
SET {", ".join(fields)}
WHERE id = ?
''',
values,
)
except Exception as e:
logger.warning("Failed to update decode job %s: %s", decode_job_id, e)
def _utcnow_iso() -> str:
return datetime.now(timezone.utc).isoformat()
+55 -43
View File
@@ -177,12 +177,14 @@ class WeatherSatDecoder:
self._pty_master_fd: int | None = None
self._current_satellite: str = ''
self._current_frequency: float = 0.0
self._current_mode: str = ''
self._capture_start_time: float = 0
self._device_index: int = -1
self._capture_output_dir: Path | None = None
self._on_complete_callback: Callable[[], None] | None = None
self._capture_phase: str = 'idle'
self._current_mode: str = ''
self._capture_start_time: float = 0
self._device_index: int = -1
self._capture_output_dir: Path | None = None
self._on_complete_callback: Callable[[], None] | None = None
self._capture_phase: str = 'idle'
self._last_error_message: str = ''
self._last_process_returncode: int | None = None
# Ensure output directory exists
self._output_dir.mkdir(parents=True, exist_ok=True)
@@ -314,11 +316,13 @@ class WeatherSatDecoder:
self._current_satellite = satellite
self._current_frequency = sat_info['frequency']
self._current_mode = sat_info['mode']
self._device_index = -1 # Offline decode does not claim an SDR device
self._capture_start_time = time.time()
self._capture_phase = 'decoding'
self._stop_event.clear()
self._current_mode = sat_info['mode']
self._device_index = -1 # Offline decode does not claim an SDR device
self._capture_start_time = time.time()
self._capture_phase = 'decoding'
self._last_error_message = ''
self._last_process_returncode = None
self._stop_event.clear()
try:
self._running = True
@@ -408,11 +412,13 @@ class WeatherSatDecoder:
self._current_satellite = satellite
self._current_frequency = sat_info['frequency']
self._current_mode = sat_info['mode']
self._device_index = device_index
self._capture_start_time = time.time()
self._capture_phase = 'tuning'
self._stop_event.clear()
self._current_mode = sat_info['mode']
self._device_index = device_index
self._capture_start_time = time.time()
self._capture_phase = 'tuning'
self._last_error_message = ''
self._last_process_returncode = None
self._stop_event.clear()
try:
self._running = True
@@ -886,16 +892,17 @@ class WeatherSatDecoder:
if was_running:
# Collect exit status (returncode is only set after poll/wait)
if process and process.returncode is None:
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
process.wait()
retcode = process.returncode if process else None
if retcode and retcode != 0:
self._capture_phase = 'error'
self._emit_progress(CaptureProgress(
if process and process.returncode is None:
try:
process.wait(timeout=5)
except subprocess.TimeoutExpired:
process.kill()
process.wait()
retcode = process.returncode if process else None
self._last_process_returncode = retcode
if retcode and retcode != 0:
self._capture_phase = 'error'
self._emit_progress(CaptureProgress(
status='error',
satellite=self._current_satellite,
frequency=self._current_frequency,
@@ -1138,13 +1145,15 @@ class WeatherSatDecoder:
self._images.clear()
return count
def _emit_progress(self, progress: CaptureProgress) -> None:
"""Emit progress update to callback."""
if self._callback:
try:
self._callback(progress)
except Exception as e:
logger.error(f"Error in progress callback: {e}")
def _emit_progress(self, progress: CaptureProgress) -> None:
"""Emit progress update to callback."""
if progress.status == 'error' and progress.message:
self._last_error_message = str(progress.message)
if self._callback:
try:
self._callback(progress)
except Exception as e:
logger.error(f"Error in progress callback: {e}")
def get_status(self) -> dict:
"""Get current decoder status."""
@@ -1152,16 +1161,19 @@ class WeatherSatDecoder:
if self._running and self._capture_start_time:
elapsed = int(time.time() - self._capture_start_time)
return {
'available': self._decoder is not None,
'decoder': self._decoder,
'running': self._running,
'satellite': self._current_satellite,
'frequency': self._current_frequency,
'mode': self._current_mode,
'elapsed_seconds': elapsed,
'image_count': len(self._images),
}
return {
'available': self._decoder is not None,
'decoder': self._decoder,
'running': self._running,
'satellite': self._current_satellite,
'frequency': self._current_frequency,
'mode': self._current_mode,
'capture_phase': self._capture_phase,
'elapsed_seconds': elapsed,
'image_count': len(self._images),
'last_error': self._last_error_message,
'last_returncode': self._last_process_returncode,
}
# Global decoder instance