global: datasets compression via zstd

This commit is contained in:
k
2024-08-05 00:44:46 +02:00
parent 9067c28d24
commit c646d6dc60
36 changed files with 544 additions and 249 deletions

2
server/.gitignore vendored
View File

@@ -1,4 +1,4 @@
/target
.DS_Store
/parser.log
/.log
/in

49
server/Cargo.lock generated
View File

@@ -477,9 +477,9 @@ dependencies = [
[[package]]
name = "clap"
version = "4.5.11"
version = "4.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35723e6a11662c2afb578bcf0b88bf6ea8e21282a953428f240574fcc3a2b5b3"
checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc"
dependencies = [
"clap_builder",
"clap_derive",
@@ -487,9 +487,9 @@ dependencies = [
[[package]]
name = "clap_builder"
version = "4.5.11"
version = "4.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49eb96cbfa7cfa35017b7cd548c75b14c3118c98b423041d70562665e07fb0fa"
checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99"
dependencies = [
"anstream",
"anstyle",
@@ -499,9 +499,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "4.5.11"
version = "4.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d029b67f89d30bbb547c89fd5161293c0aec155fc691d7924b64550662db93e"
checksum = "501d359d5f3dcaf6ecdeee48833ae73ec6e42723a1e52419c79abf9507eec0a0"
dependencies = [
"heck",
"proc-macro2",
@@ -1081,9 +1081,9 @@ dependencies = [
[[package]]
name = "inferno"
version = "0.11.20"
version = "0.11.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c77a3ae7d4761b9c64d2c030f70746ceb8cfba32dce0325a56792e0a4816c31"
checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88"
dependencies = [
"ahash",
"clap",
@@ -1380,9 +1380,9 @@ dependencies = [
[[package]]
name = "ordered-float"
version = "4.2.1"
version = "4.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19ff2cf528c6c03d9ed653d6c4ce1dc0582dc4af309790ad92f07c1cd551b0be"
checksum = "4a91171844676f8c7990ce64959210cd2eaef32c2612c50f9fae9f8aaa6065a6"
dependencies = [
"num-traits",
]
@@ -1463,6 +1463,7 @@ dependencies = [
"serde",
"serde_json",
"toml",
"zstd",
]
[[package]]
@@ -2187,9 +2188,9 @@ dependencies = [
[[package]]
name = "toml"
version = "0.8.16"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "81967dd0dd2c1ab0bc3468bd7caecc32b8a4aa47d0c8c695d8c2b2108168d62c"
checksum = "a1ed1f98e3fdc28d6d910e6737ae6ab1a93bf1985935a1193e68f93eeb68d24e"
dependencies = [
"serde",
"serde_spanned",
@@ -2199,18 +2200,18 @@ dependencies = [
[[package]]
name = "toml_datetime"
version = "0.6.7"
version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8fb9f64314842840f1d940ac544da178732128f1c78c21772e876579e0da1db"
checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.17"
version = "0.22.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d9f8729f5aea9562aac1cc0441f5d6de3cff1ee0c5d67293eeca5eb36ee7c16"
checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d"
dependencies = [
"indexmap",
"serde",
@@ -2645,9 +2646,9 @@ checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8"
[[package]]
name = "winnow"
version = "0.6.13"
version = "0.6.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59b5e5f6c299a3c7890b876a2a587f3115162487e704907d9b6cd29473052ba1"
checksum = "68a9bda4691f099d435ad181000724da8e5899daa10713c2d432552b9ccd3a6f"
dependencies = [
"memchr",
]
@@ -2711,27 +2712,27 @@ checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
[[package]]
name = "zstd"
version = "0.13.0"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110"
checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.0.0"
version = "7.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e"
checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.9+zstd.1.5.5"
version = "2.0.13+zstd.1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656"
checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa"
dependencies = [
"cc",
"pkg-config",

View File

@@ -1,5 +1,7 @@
if cargo watch --help &> /dev/null; then
cargo watch --no-vcs-ignores -w "./src" -w "./run.sh" -w "./in/datasets_len.txt" -x "run -r"
TRIGGER="./in/datasets_len.txt"
echo "0" > $TRIGGER
cargo watch --no-vcs-ignores -w "./src" -w "./run.sh" -w "$TRIGGER" -x "run -r"
else
cargo run -r
fi

View File

@@ -109,16 +109,16 @@ fn _file_handler(
let type_name = route.values_type.split("::").last().unwrap();
let value = match type_name {
"u8" => typed_value_to_response::<u8>(kind, &route.file_path, chunk)?,
"u16" => typed_value_to_response::<u16>(kind, &route.file_path, chunk)?,
"u32" => typed_value_to_response::<u32>(kind, &route.file_path, chunk)?,
"u64" => typed_value_to_response::<u64>(kind, &route.file_path, chunk)?,
"usize" => typed_value_to_response::<usize>(kind, &route.file_path, chunk)?,
"f32" => typed_value_to_response::<f32>(kind, &route.file_path, chunk)?,
"f64" => typed_value_to_response::<f64>(kind, &route.file_path, chunk)?,
"OHLC" => typed_value_to_response::<OHLC>(kind, &route.file_path, chunk)?,
"Date" => typed_value_to_response::<Date>(kind, &route.file_path, chunk)?,
"Height" => typed_value_to_response::<Height>(kind, &route.file_path, chunk)?,
"u8" => typed_value_to_response::<u8>(kind, &route, chunk)?,
"u16" => typed_value_to_response::<u16>(kind, &route, chunk)?,
"u32" => typed_value_to_response::<u32>(kind, &route, chunk)?,
"u64" => typed_value_to_response::<u64>(kind, &route, chunk)?,
"usize" => typed_value_to_response::<usize>(kind, &route, chunk)?,
"f32" => typed_value_to_response::<f32>(kind, &route, chunk)?,
"f64" => typed_value_to_response::<f64>(kind, &route, chunk)?,
"OHLC" => typed_value_to_response::<OHLC>(kind, &route, chunk)?,
"Date" => typed_value_to_response::<Date>(kind, &route, chunk)?,
"Height" => typed_value_to_response::<Height>(kind, &route, chunk)?,
_ => panic!("Incompatible type: {type_name}"),
};

View File

@@ -1,27 +1,25 @@
use std::fmt::Debug;
use std::{fmt::Debug, path::Path};
use bincode::Decode;
use parser::{Date, Serialization, SerializedBTreeMap, SerializedVec};
use parser::{Date, SerializedBTreeMap, SerializedVec};
use serde::{de::DeserializeOwned, Serialize};
pub fn import_map<T>(relative_path: &str) -> color_eyre::Result<SerializedBTreeMap<Date, T>>
use crate::routes::Route;
pub fn import_map<T>(route: &Route) -> color_eyre::Result
where
T: Serialize + Debug + DeserializeOwned + Decode,
{
Serialization::from_extension(relative_path.split('.').last().unwrap()).import(relative_path)
}
pub fn import_vec<T>(relative_path: &str) -> color_eyre::Result<SerializedVec<T>>
pub fn import_vec<T>(route: &Route) -> color_eyre::Result
where
T: Serialize + Debug + DeserializeOwned + Decode,
{
Serialization::from_extension(relative_path.split('.').last().unwrap()).import(relative_path)
}
pub fn import_value<T>(relative_path: &str) -> color_eyre::Result<T>
pub fn import_value<T>(route: &Route) -> color_eyre::Result<T>
where
T: Serialize + Debug + DeserializeOwned + Decode,
{
Serialization::from_extension(relative_path.split('.').last().unwrap())
.import::<T>(relative_path)
}

View File

@@ -1,7 +1,7 @@
use std::sync::Arc;
use axum::{extract::State, http::HeaderMap, response::Response, routing::get, serve, Router};
use parser::log;
use parser::{log, reset_logs};
use reqwest::header::HOST;
use response::generic_to_reponse;
use routes::Routes;
@@ -12,7 +12,6 @@ use tower_http::compression::CompressionLayer;
mod chunk;
mod handler;
mod headers;
mod imports;
mod kind;
mod paths;
mod response;
@@ -36,6 +35,8 @@ pub struct AppState {
async fn main() -> color_eyre::Result<()> {
color_eyre::install()?;
reset_logs();
let routes = Routes::build();
routes.generate_dts_file();

View File

@@ -1,15 +1,16 @@
use std::fmt::Debug;
use std::{fmt::Debug, path::Path};
use axum::response::{IntoResponse, Json, Response};
use bincode::Decode;
use parser::{Date, SerializedBTreeMap, SerializedVec};
use serde::de::DeserializeOwned;
use serde::Serialize;
use crate::{
chunk::Chunk,
headers::{add_cache_control_to_headers, add_cors_to_headers, add_json_type_to_headers},
imports::{import_map, import_value, import_vec},
kind::Kind,
routes::Route,
};
#[derive(Serialize)]
@@ -24,16 +25,30 @@ where
pub fn typed_value_to_response<T>(
kind: Kind,
relative_path: &str,
route: &Route,
chunk: Option<Chunk>,
) -> color_eyre::Result<Response>
where
T: Serialize + Debug + DeserializeOwned + Decode,
{
Ok(match kind {
Kind::Date => dataset_to_response(import_map::<T>(relative_path)?, chunk.unwrap()),
Kind::Height => dataset_to_response(import_vec::<T>(relative_path)?, chunk.unwrap()),
Kind::Last => value_to_response(import_value::<T>(relative_path)?),
Kind::Date => dataset_to_response(
route
.serialization
.import::<SerializedBTreeMap<Date, T>>(Path::new(&route.file_path))?,
chunk.unwrap(),
),
Kind::Height => dataset_to_response(
route
.serialization
.import::<SerializedVec<T>>(Path::new(&route.file_path))?,
chunk.unwrap(),
),
Kind::Last => value_to_response(
route
.serialization
.import::<T>(Path::new(&route.file_path))?,
),
})
}

View File

@@ -1,6 +1,7 @@
use std::{
collections::{BTreeMap, HashMap},
fs,
path::Path,
};
use derive_deref::{Deref, DerefMut};
@@ -26,31 +27,22 @@ const APP_TYPES_PATH: &str = "../app/src/types";
impl Routes {
pub fn build() -> Self {
let path_to_type: BTreeMap<String, String> =
Json::import(&format!("{INPUTS_PATH}/disk_path_to_type.json")).unwrap();
Json::import(Path::new(&format!("{INPUTS_PATH}/disk_path_to_type.json"))).unwrap();
let mut routes = Routes::default();
path_to_type.into_iter().for_each(|(key, value)| {
let mut split_key = key.split('/').collect_vec();
let mut split_last = split_key.pop().unwrap().split('.').rev().collect_vec();
let last = split_last.pop().unwrap().to_owned();
let last = split_key.pop().unwrap().to_owned();
let mut skip = 2;
let serialization = split_last.pop().map_or_else(
|| {
if *split_key.get(1).unwrap() == "price" {
skip = 1;
let mut serialization = Serialization::Binary;
Serialization::Json
} else {
Serialization::Binary
}
},
Serialization::from_extension,
);
if *split_key.get(1).unwrap() == "price" {
skip = 1;
serialization = Serialization::Json;
}
let split_key = split_key.iter().skip(skip).collect_vec();