mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-04-28 08:39:59 -07:00
global: datasets compression via zstd
This commit is contained in:
@@ -1,15 +1,39 @@
|
||||
use std::{
|
||||
fmt::Debug,
|
||||
fs::File,
|
||||
io::{BufReader, BufWriter},
|
||||
fs::{self, File},
|
||||
io::{BufReader, BufWriter, Cursor},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use bincode::{config, decode_from_std_read, encode_into_std_write, Decode, Encode};
|
||||
use bincode::{
|
||||
config, decode_from_slice, decode_from_std_read, encode_into_std_write, Decode, Encode,
|
||||
};
|
||||
use zstd::decode_all;
|
||||
|
||||
const ZST_EXTENSION: &str = "zst";
|
||||
|
||||
pub const BIN_EXTENSION: &str = "bin";
|
||||
pub const COMPRESSED_BIN_EXTENSION: &str = "bin.zst";
|
||||
|
||||
enum BinaryType {
|
||||
Raw,
|
||||
Compressed,
|
||||
}
|
||||
|
||||
pub struct Binary;
|
||||
|
||||
impl Binary {
|
||||
pub fn import<T>(path: &str) -> color_eyre::Result<T>
|
||||
pub fn import<T>(path: &Path) -> color_eyre::Result<T>
|
||||
where
|
||||
T: Decode,
|
||||
{
|
||||
match Self::type_from_path(path) {
|
||||
BinaryType::Compressed => Self::import_compressed(path),
|
||||
BinaryType::Raw => Self::import_raw(path),
|
||||
}
|
||||
}
|
||||
|
||||
fn import_raw<T>(path: &Path) -> color_eyre::Result<T>
|
||||
where
|
||||
T: Decode,
|
||||
{
|
||||
@@ -24,7 +48,34 @@ impl Binary {
|
||||
Ok(decoded)
|
||||
}
|
||||
|
||||
pub fn export<T>(path: &str, value: &T) -> color_eyre::Result<()>
|
||||
fn import_compressed<T>(path: &Path) -> color_eyre::Result<T>
|
||||
where
|
||||
T: Decode,
|
||||
{
|
||||
let file = File::open(path).unwrap();
|
||||
|
||||
let reader = BufReader::new(file);
|
||||
|
||||
let decompressed = decode_all(reader).unwrap();
|
||||
|
||||
let config = config::standard();
|
||||
|
||||
let decoded = decode_from_slice::<T, _>(&decompressed, config).unwrap().0;
|
||||
|
||||
Ok(decoded)
|
||||
}
|
||||
|
||||
pub fn export<T>(path: &Path, value: &T) -> color_eyre::Result<()>
|
||||
where
|
||||
T: Debug + Encode,
|
||||
{
|
||||
match Self::type_from_path(path) {
|
||||
BinaryType::Compressed => Self::export_compressed(path, value),
|
||||
BinaryType::Raw => Self::export_raw(path, value),
|
||||
}
|
||||
}
|
||||
|
||||
fn export_raw<T>(path: &Path, value: &T) -> color_eyre::Result<()>
|
||||
where
|
||||
T: Debug + Encode,
|
||||
{
|
||||
@@ -40,4 +91,47 @@ impl Binary {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn export_compressed<T>(path: &Path, value: &T) -> color_eyre::Result<()>
|
||||
where
|
||||
T: Debug + Encode,
|
||||
{
|
||||
let config = config::standard();
|
||||
|
||||
let encoded = bincode::encode_to_vec(value, config).unwrap();
|
||||
|
||||
let cursor = Cursor::new(encoded);
|
||||
|
||||
let compressed = zstd::encode_all(cursor, 0).unwrap();
|
||||
|
||||
fs::write(path, compressed).unwrap();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn has_correct_extension(path: &Path) -> bool {
|
||||
let path = path.to_str().unwrap();
|
||||
path.ends_with(BIN_EXTENSION) || path.ends_with(COMPRESSED_BIN_EXTENSION)
|
||||
}
|
||||
|
||||
fn type_from_path(path: &Path) -> BinaryType {
|
||||
let extension = path.extension();
|
||||
|
||||
if extension.is_none() {
|
||||
panic!("Should have extension");
|
||||
}
|
||||
|
||||
if !Self::has_correct_extension(path) {
|
||||
dbg!(path);
|
||||
panic!("Wrong extension")
|
||||
}
|
||||
|
||||
let extension = extension.unwrap();
|
||||
|
||||
if extension == ZST_EXTENSION {
|
||||
BinaryType::Compressed
|
||||
} else {
|
||||
BinaryType::Raw
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
pub const INPUTS_FOLDER_PATH: &str = "./in";
|
||||
pub const OUTPUTS_FOLDER_PATH: &str = "./target/outputs";
|
||||
pub const OUTPUTS_FOLDER_PATH: &str = "./out";
|
||||
|
||||
@@ -1,17 +1,25 @@
|
||||
use std::{
|
||||
fs::File,
|
||||
io::{BufReader, BufWriter},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use serde::{de::DeserializeOwned, Serialize};
|
||||
|
||||
pub struct Json;
|
||||
|
||||
pub const JSON_EXTENSION: &str = "json";
|
||||
pub const HAR_EXTENSION: &str = "har";
|
||||
|
||||
impl Json {
|
||||
pub fn import<T>(path: &str) -> color_eyre::Result<T>
|
||||
pub fn import<T>(path: &Path) -> color_eyre::Result<T>
|
||||
where
|
||||
T: DeserializeOwned,
|
||||
{
|
||||
if !Self::has_correct_extension(path) {
|
||||
panic!("Wrong extension");
|
||||
}
|
||||
|
||||
let file = File::open(path)?;
|
||||
|
||||
let reader = BufReader::new(file);
|
||||
@@ -19,10 +27,15 @@ impl Json {
|
||||
Ok(serde_json::from_reader(reader)?)
|
||||
}
|
||||
|
||||
pub fn export<T>(path: &str, value: &T) -> color_eyre::Result<()>
|
||||
pub fn export<T>(path: &Path, value: &T) -> color_eyre::Result<()>
|
||||
where
|
||||
T: Serialize,
|
||||
{
|
||||
if !Self::has_correct_extension(path) {
|
||||
dbg!(path);
|
||||
panic!("Wrong extension");
|
||||
}
|
||||
|
||||
let file = File::create(path).unwrap_or_else(|_| {
|
||||
dbg!(&path);
|
||||
panic!("No such file or directory")
|
||||
@@ -34,4 +47,10 @@ impl Json {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn has_correct_extension(path: &Path) -> bool {
|
||||
let path = path.to_str().unwrap();
|
||||
path.ends_with(JSON_EXTENSION) || path.ends_with(HAR_EXTENSION)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::fmt::Debug;
|
||||
use std::{fmt::Debug, fs, path::Path};
|
||||
|
||||
use allocative::Allocative;
|
||||
use bincode::{Decode, Encode};
|
||||
@@ -6,6 +6,8 @@ use serde::{de::DeserializeOwned, Serialize};
|
||||
|
||||
use crate::io::{Binary, Json};
|
||||
|
||||
use super::{BIN_EXTENSION, COMPRESSED_BIN_EXTENSION, HAR_EXTENSION, JSON_EXTENSION};
|
||||
|
||||
#[derive(PartialEq, PartialOrd, Ord, Eq, Debug, Clone, Copy, Default, Allocative)]
|
||||
pub enum Serialization {
|
||||
#[default]
|
||||
@@ -14,42 +16,105 @@ pub enum Serialization {
|
||||
}
|
||||
|
||||
impl Serialization {
|
||||
pub fn to_extension(&self) -> &str {
|
||||
pub fn is_serializable(&self, path: &Path) -> bool {
|
||||
let path = path.to_str().unwrap();
|
||||
match self {
|
||||
Self::Binary => "bin",
|
||||
Self::Json => "json",
|
||||
Self::Binary => {
|
||||
path.ends_with(BIN_EXTENSION) || path.ends_with(COMPRESSED_BIN_EXTENSION)
|
||||
}
|
||||
Self::Json => path.ends_with(JSON_EXTENSION) || path.ends_with(HAR_EXTENSION),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_extension(extension: &str) -> Self {
|
||||
match extension {
|
||||
"bin" => Self::Binary,
|
||||
"json" => Self::Json,
|
||||
_ => panic!("Extension \"{extension}\" isn't supported"),
|
||||
pub fn from_path(path: &Path) -> Self {
|
||||
let path = path.to_str().unwrap();
|
||||
if path.ends_with(BIN_EXTENSION) || path.ends_with(COMPRESSED_BIN_EXTENSION) {
|
||||
Self::Binary
|
||||
} else if path.ends_with(JSON_EXTENSION) || path.ends_with(HAR_EXTENSION) {
|
||||
Self::Json
|
||||
} else {
|
||||
panic!("Extension \"{path}\" isn't supported")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn append_extension(&self, path: &str) -> String {
|
||||
format!("{path}.{}", self.to_extension())
|
||||
}
|
||||
|
||||
pub fn import<T>(&self, path: &str) -> color_eyre::Result<T>
|
||||
pub fn import<T>(&self, path: &Path) -> color_eyre::Result<T>
|
||||
where
|
||||
T: Debug + DeserializeOwned + Decode,
|
||||
{
|
||||
match self {
|
||||
Serialization::Binary => Binary::import(path),
|
||||
Serialization::Json => Json::import(path),
|
||||
Serialization::Binary => {
|
||||
if self.is_serializable(path) {
|
||||
Binary::import(path)
|
||||
} else {
|
||||
let path = path.to_str().unwrap();
|
||||
let bin_path_str = format!("{path}.{BIN_EXTENSION}");
|
||||
let bin_path = Path::new(&bin_path_str);
|
||||
|
||||
if bin_path.exists() {
|
||||
return Binary::import(bin_path);
|
||||
}
|
||||
|
||||
let compressed_bin_path_str = format!("{path}.{COMPRESSED_BIN_EXTENSION}");
|
||||
let compressed_bin_path = Path::new(&compressed_bin_path_str);
|
||||
|
||||
if compressed_bin_path.exists() {
|
||||
return Binary::import(compressed_bin_path);
|
||||
}
|
||||
|
||||
panic!("Wrong path")
|
||||
}
|
||||
}
|
||||
Serialization::Json => {
|
||||
if self.is_serializable(path) {
|
||||
Json::import(path)
|
||||
} else {
|
||||
let path = path.to_str().unwrap();
|
||||
let json_path_str = format!("{path}.{JSON_EXTENSION}");
|
||||
let json_path = Path::new(&json_path_str);
|
||||
|
||||
if json_path.exists() {
|
||||
return Json::import(json_path);
|
||||
}
|
||||
|
||||
panic!("Wrong path")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn export<T>(&self, path: &str, value: &T) -> color_eyre::Result<()>
|
||||
pub fn export<T>(&self, path: &Path, value: &T) -> color_eyre::Result<()>
|
||||
where
|
||||
T: Debug + Serialize + Encode,
|
||||
{
|
||||
match self {
|
||||
Serialization::Binary => Binary::export(path, value),
|
||||
Serialization::Json => Json::export(path, value),
|
||||
Serialization::Binary => {
|
||||
if self.is_serializable(path) {
|
||||
Binary::export(path, value)
|
||||
} else {
|
||||
let path = path.to_str().unwrap();
|
||||
|
||||
let res = Binary::export(
|
||||
Path::new(&format!("{}.{COMPRESSED_BIN_EXTENSION}", path,)),
|
||||
value,
|
||||
);
|
||||
|
||||
if res.is_ok() {
|
||||
let _ = fs::remove_file(Path::new(&format!("{}.{BIN_EXTENSION}", path)));
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
}
|
||||
Serialization::Json => {
|
||||
if self.is_serializable(path) {
|
||||
Json::export(path, value)
|
||||
} else {
|
||||
Json::export(
|
||||
Path::new(&format!("{}.{JSON_EXTENSION}", path.to_str().unwrap())),
|
||||
value,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user