global: datasets compression via zstd

This commit is contained in:
k
2024-08-05 00:44:46 +02:00
parent 9067c28d24
commit c646d6dc60
36 changed files with 544 additions and 249 deletions

View File

@@ -1,15 +1,39 @@
use std::{
fmt::Debug,
fs::File,
io::{BufReader, BufWriter},
fs::{self, File},
io::{BufReader, BufWriter, Cursor},
path::Path,
};
use bincode::{config, decode_from_std_read, encode_into_std_write, Decode, Encode};
use bincode::{
config, decode_from_slice, decode_from_std_read, encode_into_std_write, Decode, Encode,
};
use zstd::decode_all;
const ZST_EXTENSION: &str = "zst";
pub const BIN_EXTENSION: &str = "bin";
pub const COMPRESSED_BIN_EXTENSION: &str = "bin.zst";
enum BinaryType {
Raw,
Compressed,
}
pub struct Binary;
impl Binary {
pub fn import<T>(path: &str) -> color_eyre::Result<T>
pub fn import<T>(path: &Path) -> color_eyre::Result<T>
where
T: Decode,
{
match Self::type_from_path(path) {
BinaryType::Compressed => Self::import_compressed(path),
BinaryType::Raw => Self::import_raw(path),
}
}
fn import_raw<T>(path: &Path) -> color_eyre::Result<T>
where
T: Decode,
{
@@ -24,7 +48,34 @@ impl Binary {
Ok(decoded)
}
pub fn export<T>(path: &str, value: &T) -> color_eyre::Result<()>
fn import_compressed<T>(path: &Path) -> color_eyre::Result<T>
where
T: Decode,
{
let file = File::open(path).unwrap();
let reader = BufReader::new(file);
let decompressed = decode_all(reader).unwrap();
let config = config::standard();
let decoded = decode_from_slice::<T, _>(&decompressed, config).unwrap().0;
Ok(decoded)
}
pub fn export<T>(path: &Path, value: &T) -> color_eyre::Result<()>
where
T: Debug + Encode,
{
match Self::type_from_path(path) {
BinaryType::Compressed => Self::export_compressed(path, value),
BinaryType::Raw => Self::export_raw(path, value),
}
}
fn export_raw<T>(path: &Path, value: &T) -> color_eyre::Result<()>
where
T: Debug + Encode,
{
@@ -40,4 +91,47 @@ impl Binary {
Ok(())
}
fn export_compressed<T>(path: &Path, value: &T) -> color_eyre::Result<()>
where
T: Debug + Encode,
{
let config = config::standard();
let encoded = bincode::encode_to_vec(value, config).unwrap();
let cursor = Cursor::new(encoded);
let compressed = zstd::encode_all(cursor, 0).unwrap();
fs::write(path, compressed).unwrap();
Ok(())
}
pub fn has_correct_extension(path: &Path) -> bool {
let path = path.to_str().unwrap();
path.ends_with(BIN_EXTENSION) || path.ends_with(COMPRESSED_BIN_EXTENSION)
}
fn type_from_path(path: &Path) -> BinaryType {
let extension = path.extension();
if extension.is_none() {
panic!("Should have extension");
}
if !Self::has_correct_extension(path) {
dbg!(path);
panic!("Wrong extension")
}
let extension = extension.unwrap();
if extension == ZST_EXTENSION {
BinaryType::Compressed
} else {
BinaryType::Raw
}
}
}

View File

@@ -1,2 +1,2 @@
pub const INPUTS_FOLDER_PATH: &str = "./in";
pub const OUTPUTS_FOLDER_PATH: &str = "./target/outputs";
pub const OUTPUTS_FOLDER_PATH: &str = "./out";

View File

@@ -1,17 +1,25 @@
use std::{
fs::File,
io::{BufReader, BufWriter},
path::Path,
};
use serde::{de::DeserializeOwned, Serialize};
pub struct Json;
pub const JSON_EXTENSION: &str = "json";
pub const HAR_EXTENSION: &str = "har";
impl Json {
pub fn import<T>(path: &str) -> color_eyre::Result<T>
pub fn import<T>(path: &Path) -> color_eyre::Result<T>
where
T: DeserializeOwned,
{
if !Self::has_correct_extension(path) {
panic!("Wrong extension");
}
let file = File::open(path)?;
let reader = BufReader::new(file);
@@ -19,10 +27,15 @@ impl Json {
Ok(serde_json::from_reader(reader)?)
}
pub fn export<T>(path: &str, value: &T) -> color_eyre::Result<()>
pub fn export<T>(path: &Path, value: &T) -> color_eyre::Result<()>
where
T: Serialize,
{
if !Self::has_correct_extension(path) {
dbg!(path);
panic!("Wrong extension");
}
let file = File::create(path).unwrap_or_else(|_| {
dbg!(&path);
panic!("No such file or directory")
@@ -34,4 +47,10 @@ impl Json {
Ok(())
}
#[inline(always)]
pub fn has_correct_extension(path: &Path) -> bool {
let path = path.to_str().unwrap();
path.ends_with(JSON_EXTENSION) || path.ends_with(HAR_EXTENSION)
}
}

View File

@@ -1,4 +1,4 @@
use std::fmt::Debug;
use std::{fmt::Debug, fs, path::Path};
use allocative::Allocative;
use bincode::{Decode, Encode};
@@ -6,6 +6,8 @@ use serde::{de::DeserializeOwned, Serialize};
use crate::io::{Binary, Json};
use super::{BIN_EXTENSION, COMPRESSED_BIN_EXTENSION, HAR_EXTENSION, JSON_EXTENSION};
#[derive(PartialEq, PartialOrd, Ord, Eq, Debug, Clone, Copy, Default, Allocative)]
pub enum Serialization {
#[default]
@@ -14,42 +16,105 @@ pub enum Serialization {
}
impl Serialization {
pub fn to_extension(&self) -> &str {
pub fn is_serializable(&self, path: &Path) -> bool {
let path = path.to_str().unwrap();
match self {
Self::Binary => "bin",
Self::Json => "json",
Self::Binary => {
path.ends_with(BIN_EXTENSION) || path.ends_with(COMPRESSED_BIN_EXTENSION)
}
Self::Json => path.ends_with(JSON_EXTENSION) || path.ends_with(HAR_EXTENSION),
}
}
pub fn from_extension(extension: &str) -> Self {
match extension {
"bin" => Self::Binary,
"json" => Self::Json,
_ => panic!("Extension \"{extension}\" isn't supported"),
pub fn from_path(path: &Path) -> Self {
let path = path.to_str().unwrap();
if path.ends_with(BIN_EXTENSION) || path.ends_with(COMPRESSED_BIN_EXTENSION) {
Self::Binary
} else if path.ends_with(JSON_EXTENSION) || path.ends_with(HAR_EXTENSION) {
Self::Json
} else {
panic!("Extension \"{path}\" isn't supported")
}
}
pub fn append_extension(&self, path: &str) -> String {
format!("{path}.{}", self.to_extension())
}
pub fn import<T>(&self, path: &str) -> color_eyre::Result<T>
pub fn import<T>(&self, path: &Path) -> color_eyre::Result<T>
where
T: Debug + DeserializeOwned + Decode,
{
match self {
Serialization::Binary => Binary::import(path),
Serialization::Json => Json::import(path),
Serialization::Binary => {
if self.is_serializable(path) {
Binary::import(path)
} else {
let path = path.to_str().unwrap();
let bin_path_str = format!("{path}.{BIN_EXTENSION}");
let bin_path = Path::new(&bin_path_str);
if bin_path.exists() {
return Binary::import(bin_path);
}
let compressed_bin_path_str = format!("{path}.{COMPRESSED_BIN_EXTENSION}");
let compressed_bin_path = Path::new(&compressed_bin_path_str);
if compressed_bin_path.exists() {
return Binary::import(compressed_bin_path);
}
panic!("Wrong path")
}
}
Serialization::Json => {
if self.is_serializable(path) {
Json::import(path)
} else {
let path = path.to_str().unwrap();
let json_path_str = format!("{path}.{JSON_EXTENSION}");
let json_path = Path::new(&json_path_str);
if json_path.exists() {
return Json::import(json_path);
}
panic!("Wrong path")
}
}
}
}
pub fn export<T>(&self, path: &str, value: &T) -> color_eyre::Result<()>
pub fn export<T>(&self, path: &Path, value: &T) -> color_eyre::Result<()>
where
T: Debug + Serialize + Encode,
{
match self {
Serialization::Binary => Binary::export(path, value),
Serialization::Json => Json::export(path, value),
Serialization::Binary => {
if self.is_serializable(path) {
Binary::export(path, value)
} else {
let path = path.to_str().unwrap();
let res = Binary::export(
Path::new(&format!("{}.{COMPRESSED_BIN_EXTENSION}", path,)),
value,
);
if res.is_ok() {
let _ = fs::remove_file(Path::new(&format!("{}.{BIN_EXTENSION}", path)));
}
res
}
}
Serialization::Json => {
if self.is_serializable(path) {
Json::export(path, value)
} else {
Json::export(
Path::new(&format!("{}.{JSON_EXTENSION}", path.to_str().unwrap())),
value,
)
}
}
}
}
}