mirror of
https://github.com/EFForg/rayhunter.git
synced 2026-04-28 00:20:00 -07:00
daemon: switch to writing heuristics output to ND-JSON
ND-JSON (newline-delimited JSON) is just a file with a list of JSON objects separated by newlines. This way, as the analyzer harness processes new packets, it can simply append JSON-serialized results to a file without parsing the entire thing first. Also simplifies the analysis stuff to all operate in the diag thread.
This commit is contained in:
138
bin/src/diag.rs
138
bin/src/diag.rs
@@ -1,57 +1,122 @@
|
||||
use std::pin::pin;
|
||||
use std::sync::Arc;
|
||||
|
||||
use axum::body::Body;
|
||||
use axum::extract::State;
|
||||
use axum::http::header::CONTENT_TYPE;
|
||||
use axum::http::StatusCode;
|
||||
use rayhunter::diag::DataType;
|
||||
use axum::response::{IntoResponse, Response};
|
||||
use rayhunter::analysis::analyzer::Harness;
|
||||
use rayhunter::diag::{DataType, MessagesContainer};
|
||||
use rayhunter::diag_device::DiagDevice;
|
||||
use serde::Serialize;
|
||||
use tokio::sync::RwLock;
|
||||
use tokio::sync::mpsc::{Receiver, Sender};
|
||||
use tokio::sync::mpsc::Receiver;
|
||||
use rayhunter::qmdl::QmdlWriter;
|
||||
use log::{debug, error, info};
|
||||
use tokio::fs::File;
|
||||
use tokio::io::{BufWriter, AsyncWriteExt};
|
||||
use tokio_util::io::ReaderStream;
|
||||
use tokio_util::task::TaskTracker;
|
||||
use futures::{StreamExt, TryStreamExt};
|
||||
|
||||
use crate::analysis::AnalysisMessage;
|
||||
use crate::qmdl_store::QmdlStore;
|
||||
use crate::qmdl_store::RecordingStore;
|
||||
use crate::server::ServerState;
|
||||
|
||||
pub enum DiagDeviceCtrlMessage {
|
||||
StopRecording,
|
||||
StartRecording(QmdlWriter<File>),
|
||||
StartRecording((QmdlWriter<File>, File)),
|
||||
Exit,
|
||||
}
|
||||
|
||||
struct AnalysisWriter {
|
||||
writer: BufWriter<File>,
|
||||
harness: Harness,
|
||||
bytes_written: usize,
|
||||
}
|
||||
|
||||
// We write our analysis results to a file immediately to minimize the amount of
|
||||
// state Rayhunter has to keep track of in memory. The analysis file's format is
|
||||
// Newline Delimited JSON
|
||||
// (https://docs.mulesoft.com/dataweave/latest/dataweave-formats-ndjson), which
|
||||
// lets us simply append new rows to the end without parsing the entire JSON
|
||||
// object beforehand.
|
||||
impl AnalysisWriter {
|
||||
pub async fn new(file: File) -> Result<Self, std::io::Error> {
|
||||
let mut result = Self {
|
||||
writer: BufWriter::new(file),
|
||||
harness: Harness::new_with_all_analyzers(),
|
||||
bytes_written: 0,
|
||||
};
|
||||
let metadata = result.harness.get_metadata();
|
||||
result.write(&metadata).await?;
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
// Runs the analysis harness on the given container, serializing the results
|
||||
// to the analysis file and returning the file's new length.
|
||||
pub async fn analyze(&mut self, container: MessagesContainer) -> Result<usize, std::io::Error> {
|
||||
let row = self.harness.analyze_qmdl_messages(container);
|
||||
if !row.is_empty() {
|
||||
self.write(&row).await?;
|
||||
}
|
||||
Ok(self.bytes_written)
|
||||
}
|
||||
|
||||
async fn write<T: Serialize>(&mut self, value: &T) -> Result<(), std::io::Error> {
|
||||
let mut value_str = serde_json::to_string(value).unwrap();
|
||||
value_str.push('\n');
|
||||
self.bytes_written += value_str.len();
|
||||
self.writer.write_all(value_str.as_bytes()).await?;
|
||||
self.writer.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Flushes any pending I/O to disk before dropping the writer
|
||||
pub async fn close(mut self) -> Result<(), std::io::Error> {
|
||||
self.writer.flush().await?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn run_diag_read_thread(
|
||||
task_tracker: &TaskTracker,
|
||||
mut dev: DiagDevice,
|
||||
mut qmdl_file_rx: Receiver<DiagDeviceCtrlMessage>,
|
||||
qmdl_store_lock: Arc<RwLock<QmdlStore>>,
|
||||
analysis_tx: Sender<AnalysisMessage>
|
||||
qmdl_store_lock: Arc<RwLock<RecordingStore>>
|
||||
) {
|
||||
task_tracker.spawn(async move {
|
||||
let initial_file = qmdl_store_lock.write().await.new_entry().await.expect("failed creating QMDL file entry");
|
||||
let mut qmdl_writer: Option<QmdlWriter<File>> = Some(QmdlWriter::new(initial_file));
|
||||
let (initial_qmdl_file, initial_analysis_file) = qmdl_store_lock.write().await.new_entry().await.expect("failed creating QMDL file entry");
|
||||
let mut maybe_qmdl_writer: Option<QmdlWriter<File>> = Some(QmdlWriter::new(initial_qmdl_file));
|
||||
let mut diag_stream = pin!(dev.as_stream().into_stream());
|
||||
let mut maybe_analysis_writer = Some(AnalysisWriter::new(initial_analysis_file).await
|
||||
.expect("failed to create analysis writer"));
|
||||
loop {
|
||||
tokio::select! {
|
||||
msg = qmdl_file_rx.recv() => {
|
||||
match msg {
|
||||
Some(DiagDeviceCtrlMessage::StartRecording(new_writer)) => {
|
||||
qmdl_writer = Some(new_writer);
|
||||
analysis_tx.send(AnalysisMessage::Reset).await
|
||||
.expect("failed to send message to analysis thread");
|
||||
Some(DiagDeviceCtrlMessage::StartRecording((new_writer, new_analysis_file))) => {
|
||||
maybe_qmdl_writer = Some(new_writer);
|
||||
if let Some(analysis_writer) = maybe_analysis_writer {
|
||||
analysis_writer.close().await.expect("failed to close analysis writer");
|
||||
}
|
||||
maybe_analysis_writer = Some(AnalysisWriter::new(new_analysis_file).await
|
||||
.expect("failed to write to analysis file"));
|
||||
},
|
||||
Some(DiagDeviceCtrlMessage::StopRecording) => {
|
||||
qmdl_writer = None;
|
||||
analysis_tx.send(AnalysisMessage::Reset).await
|
||||
.expect("failed to send message to analysis thread");
|
||||
maybe_qmdl_writer = None;
|
||||
if let Some(analysis_writer) = maybe_analysis_writer {
|
||||
analysis_writer.close().await.expect("failed to close analysis writer");
|
||||
}
|
||||
maybe_analysis_writer = None;
|
||||
},
|
||||
// None means all the Senders have been dropped, so it's
|
||||
// time to go
|
||||
Some(DiagDeviceCtrlMessage::Exit) | None => {
|
||||
info!("Diag reader thread exiting...");
|
||||
if let Some(analysis_writer) = maybe_analysis_writer {
|
||||
analysis_writer.close().await.expect("failed to close analysis writer");
|
||||
}
|
||||
return Ok(())
|
||||
},
|
||||
}
|
||||
@@ -65,20 +130,26 @@ pub fn run_diag_read_thread(
|
||||
}
|
||||
// keep track of how many bytes were written to the QMDL file so we can read
|
||||
// a valid block of data from it in the HTTP server
|
||||
if let Some(writer) = qmdl_writer.as_mut() {
|
||||
writer.write_container(&container).await.expect("failed to write to QMDL writer");
|
||||
debug!("total QMDL bytes written: {}, updating manifest...", writer.total_written);
|
||||
if let Some(qmdl_writer) = maybe_qmdl_writer.as_mut() {
|
||||
qmdl_writer.write_container(&container).await.expect("failed to write to QMDL writer");
|
||||
debug!("total QMDL bytes written: {}, updating manifest...", qmdl_writer.total_written);
|
||||
let mut qmdl_store = qmdl_store_lock.write().await;
|
||||
let index = qmdl_store.current_entry.expect("DiagDevice had qmdl_writer, but QmdlStore didn't have current entry???");
|
||||
qmdl_store.update_entry(index, writer.total_written).await
|
||||
qmdl_store.update_entry_qmdl_size(index, qmdl_writer.total_written).await
|
||||
.expect("failed to update qmdl file size");
|
||||
debug!("sending container to analysis thread...");
|
||||
analysis_tx.send(AnalysisMessage::AnalyzeContainer(container)).await
|
||||
.expect("failed sending messages container to analysis thread");
|
||||
debug!("done!");
|
||||
} else {
|
||||
debug!("no qmdl_writer set, continuing...");
|
||||
}
|
||||
|
||||
if let Some(analysis_writer) = maybe_analysis_writer.as_mut() {
|
||||
let analysis_file_len = analysis_writer.analyze(container).await
|
||||
.expect("failed to analyze container");
|
||||
let mut qmdl_store = qmdl_store_lock.write().await;
|
||||
let index = qmdl_store.current_entry.expect("DiagDevice had qmdl_writer, but QmdlStore didn't have current entry???");
|
||||
qmdl_store.update_entry_analysis_size(index, analysis_file_len as usize).await
|
||||
.expect("failed to update analysis file size");
|
||||
}
|
||||
},
|
||||
Err(err) => {
|
||||
error!("error reading diag device: {}", err);
|
||||
@@ -96,10 +167,10 @@ pub async fn start_recording(State(state): State<Arc<ServerState>>) -> Result<(S
|
||||
return Err((StatusCode::FORBIDDEN, "server is in readonly mode".to_string()));
|
||||
}
|
||||
let mut qmdl_store = state.qmdl_store_lock.write().await;
|
||||
let qmdl_file = qmdl_store.new_entry().await
|
||||
let (qmdl_file, analysis_file) = qmdl_store.new_entry().await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("couldn't create new qmdl entry: {}", e)))?;
|
||||
let qmdl_writer = QmdlWriter::new(qmdl_file);
|
||||
state.diag_device_ctrl_sender.send(DiagDeviceCtrlMessage::StartRecording(qmdl_writer)).await
|
||||
state.diag_device_ctrl_sender.send(DiagDeviceCtrlMessage::StartRecording((qmdl_writer, analysis_file))).await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("couldn't send stop recording message: {}", e)))?;
|
||||
Ok((StatusCode::ACCEPTED, "ok".to_string()))
|
||||
}
|
||||
@@ -115,3 +186,20 @@ pub async fn stop_recording(State(state): State<Arc<ServerState>>) -> Result<(St
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("couldn't send stop recording message: {}", e)))?;
|
||||
Ok((StatusCode::ACCEPTED, "ok".to_string()))
|
||||
}
|
||||
|
||||
pub async fn get_analysis_report(State(state): State<Arc<ServerState>>) -> Result<Response, (StatusCode, String)> {
|
||||
let qmdl_store = state.qmdl_store_lock.read().await;
|
||||
let Some(entry) = qmdl_store.get_current_entry() else {
|
||||
return Err((
|
||||
StatusCode::SERVICE_UNAVAILABLE,
|
||||
"No QMDL data's being recorded to analyze, try starting a new recording!".to_string()
|
||||
));
|
||||
};
|
||||
let analysis_file = qmdl_store.open_entry_analysis(entry).await
|
||||
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, format!("{:?}", e)))?;
|
||||
let analysis_stream = ReaderStream::new(analysis_file);
|
||||
|
||||
let headers = [(CONTENT_TYPE, "application/x-ndjson")];
|
||||
let body = Body::from_stream(analysis_stream);
|
||||
Ok((headers, body).into_response())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user