vec: compression part 1

This commit is contained in:
nym21
2025-03-13 17:11:04 +01:00
parent b4fbcf6bee
commit c459a3033d
30 changed files with 960 additions and 337 deletions

View File

@@ -1 +0,0 @@

View File

@@ -8,7 +8,7 @@ use crate::run::RunConfig;
pub fn query(params: QueryParams) -> color_eyre::Result<()> {
let config = RunConfig::import(None)?;
let mut indexer = Indexer::new(config.indexeddir())?;
let mut indexer = Indexer::new(config.indexeddir(), config.check_collisions())?;
indexer.import_vecs()?;
let mut computer = Computer::new(config.computeddir(), None);

View File

@@ -26,7 +26,7 @@ pub fn run(config: RunConfig) -> color_eyre::Result<()> {
let parser = brk_parser::Parser::new(config.blocksdir(), rpc);
let mut indexer = Indexer::new(config.indexeddir())?;
let mut indexer = Indexer::new(config.indexeddir(), config.check_collisions())?;
indexer.import_stores()?;
indexer.import_vecs()?;
@@ -134,6 +134,10 @@ pub struct RunConfig {
/// Delay between runs, default: 0, saved
#[arg(long, value_name = "SECONDS")]
delay: Option<u64>,
/// DEV: Activate checking address hashes for collisions when indexing, default: false, saved
#[arg(long, value_name = "BOOL")]
check_collisions: Option<bool>,
}
impl RunConfig {
@@ -195,6 +199,10 @@ impl RunConfig {
config_saved.delay = Some(delay);
}
if let Some(check_collisions) = config_args.check_collisions.take() {
config_saved.check_collisions = Some(check_collisions);
}
if config_args != RunConfig::default() {
dbg!(config_args);
panic!("Didn't consume the full config")
@@ -378,6 +386,10 @@ impl RunConfig {
pub fn fetch(&self) -> bool {
self.fetch.is_some_and(|b| b)
}
pub fn check_collisions(&self) -> bool {
self.check_collisions.is_some_and(|b| b)
}
}
#[derive(

View File

@@ -28,7 +28,7 @@ pub fn main() -> color_eyre::Result<()> {
let outputs_dir = Path::new("../../_outputs");
let mut indexer = Indexer::new(outputs_dir.join("indexed"))?;
let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?;
indexer.import_stores()?;
indexer.import_vecs()?;

View File

@@ -9,7 +9,7 @@ use std::{
use brk_core::CheckedSub;
use brk_exit::Exit;
use brk_vec::{Error, Result, StoredIndex, StoredType, Version};
use brk_vec::{Compressed, Error, Result, StoredIndex, StoredType, Version};
const FLUSH_EVERY: usize = 10_000;
@@ -25,7 +25,7 @@ where
T: StoredType,
{
pub fn import(path: &Path, version: Version) -> brk_vec::Result<Self> {
let vec = brk_vec::StorableVec::forced_import(path, version)?;
let vec = brk_vec::StorableVec::forced_import(path, version, Compressed::YES)?;
Ok(Self {
computed_version: None,
@@ -103,14 +103,14 @@ where
where
A: StoredIndex,
B: StoredType,
F: FnMut((A, &B, &mut Self, &mut brk_vec::StorableVec<A, B>)) -> (I, T),
F: FnMut((A, B, &mut Self, &mut brk_vec::StorableVec<A, B>)) -> (I, T),
{
self.validate_computed_version_or_reset_file(
Version::from(0) + self.version() + other.version(),
)?;
let index = max_from.min(A::from(self.len()));
other.iter_from(index, |(a, b, other)| {
other.iter_from_cloned(index, |(a, b, other)| {
let (i, v) = t((a, b, self, other));
self.push_and_flush_if_needed(i, v, exit)
})?;

View File

@@ -100,7 +100,7 @@ impl Vecs {
self.height_to_real_date.compute_transform(
starting_indexes.height,
&mut indexer_vecs.height_to_timestamp,
|(h, t, ..)| (h, Date::from(*t)),
|(h, t, ..)| (h, Date::from(t)),
exit,
)?;
@@ -112,7 +112,10 @@ impl Vecs {
.decremented()
.and_then(|h| s.read(h).ok())
.flatten()
.map_or(*d, |prev_d| if prev_d > d { *prev_d } else { *d });
.map_or(d, |prev_d| {
let prev_d = *prev_d;
if prev_d > d { prev_d } else { d }
});
(h, d)
},
exit,
@@ -121,7 +124,7 @@ impl Vecs {
self.height_to_dateindex.compute_transform(
starting_indexes.height,
&mut self.height_to_fixed_date,
|(h, d, ..)| (h, Dateindex::try_from(*d).unwrap()),
|(h, d, ..)| (h, Dateindex::try_from(d).unwrap()),
exit,
)?;

View File

@@ -134,7 +134,7 @@ impl Vecs {
let ohlc = fetcher
.get_height(
h,
*t,
t,
h.decremented().map(|prev_h| {
height_to_timestamp
.get(prev_h)
@@ -215,7 +215,7 @@ impl Vecs {
self.height_to_sats_per_dollar.compute_transform(
starting_indexes.height,
&mut self.height_to_close,
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / **close)),
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / *close)),
exit,
)?;
@@ -223,7 +223,7 @@ impl Vecs {
starting_indexes.dateindex,
&mut indexes.dateindex_to_date,
|(di, d, ..)| {
let ohlc = fetcher.get_date(*d).unwrap();
let ohlc = fetcher.get_date(d).unwrap();
(di, ohlc)
},
exit,
@@ -295,7 +295,7 @@ impl Vecs {
self.dateindex_to_sats_per_dollar.compute_transform(
starting_indexes.dateindex,
&mut self.dateindex_to_close,
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / **close)),
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / *close)),
exit,
)?;

View File

@@ -8,7 +8,20 @@ use crate::Error;
use super::{Addressbytes, Addresstype, BlockHash, Txid};
#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)]
#[derive(
Debug,
Deref,
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
FromBytes,
Immutable,
IntoBytes,
KnownLayout,
)]
pub struct AddressHash([u8; 8]);
impl From<(&Addressbytes, Addresstype)> for AddressHash {
fn from((addressbytes, addresstype): (&Addressbytes, Addresstype)) -> Self {
@@ -41,8 +54,26 @@ impl From<AddressHash> for ByteView {
}
}
#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)]
#[derive(
Debug,
Deref,
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
FromBytes,
Immutable,
IntoBytes,
KnownLayout,
)]
pub struct BlockHashPrefix([u8; 8]);
impl From<BlockHash> for BlockHashPrefix {
fn from(value: BlockHash) -> Self {
Self::from(&value)
}
}
impl From<&BlockHash> for BlockHashPrefix {
fn from(value: &BlockHash) -> Self {
Self(copy_first_8bytes(&value[..]).unwrap())
@@ -65,8 +96,26 @@ impl From<BlockHashPrefix> for ByteView {
}
}
#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)]
#[derive(
Debug,
Deref,
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
FromBytes,
Immutable,
IntoBytes,
KnownLayout,
)]
pub struct TxidPrefix([u8; 8]);
impl From<Txid> for TxidPrefix {
fn from(value: Txid) -> Self {
Self::from(&value)
}
}
impl From<&Txid> for TxidPrefix {
fn from(value: &Txid) -> Self {
Self(copy_first_8bytes(&value[..]).unwrap())

View File

@@ -22,12 +22,6 @@ Vecs: `src/storage/vecs/mod.rs`
Stores: `src/storage/stores/mod.rs`
## Examples
Rust: `src/main.rs`
Python: `../python/parse.py`
## Benchmark
Indexing `0..885_835` took `11 hours 6 min 50 s` on a Macbook Pro M3 Pro with 36 GB of RAM

View File

@@ -1,6 +1,6 @@
use std::{path::Path, thread::sleep, time::Duration};
use std::path::Path;
use brk_core::default_bitcoin_path;
use brk_core::{default_bitcoin_path, dot_brk_path};
use brk_exit::Exit;
use brk_indexer::{Indexer, rpc::RpcApi};
use brk_parser::{
@@ -24,23 +24,25 @@ fn main() -> color_eyre::Result<()> {
let parser = Parser::new(bitcoin_dir.join("blocks"), rpc);
let mut indexer = Indexer::new(Path::new("../../_outputs/indexed").to_owned())?;
let outputs = dot_brk_path().join("outputs");
let mut indexer = Indexer::new(outputs.join("indexed").to_owned(), true)?;
indexer.import_stores()?;
indexer.import_vecs()?;
loop {
let block_count = rpc.get_block_count()?;
// loop {
let block_count = rpc.get_block_count()?;
info!("{block_count} blocks found.");
info!("{block_count} blocks found.");
indexer.index(&parser, rpc, &exit)?;
indexer.index(&parser, rpc, &exit)?;
info!("Waiting for new blocks...");
info!("Waiting for new blocks...");
while block_count == rpc.get_block_count()? {
sleep(Duration::from_secs(1))
}
}
// while block_count == rpc.get_block_count()? {
// sleep(Duration::from_secs(1))
// }
// }
#[allow(unreachable_code)]
Ok(())

View File

@@ -18,6 +18,7 @@ pub use brk_parser::*;
use bitcoin::{Transaction, TxIn, TxOut};
use brk_exit::Exit;
use brk_vec::Compressed;
use color_eyre::eyre::{ContextCompat, eyre};
use log::info;
use rayon::prelude::*;
@@ -36,15 +37,17 @@ pub struct Indexer {
path: PathBuf,
vecs: Option<Vecs>,
stores: Option<Stores>,
check_collisions: bool,
}
impl Indexer {
pub fn new(indexes_dir: PathBuf) -> color_eyre::Result<Self> {
pub fn new(indexes_dir: PathBuf, check_collisions: bool) -> color_eyre::Result<Self> {
setrlimit()?;
Ok(Self {
path: indexes_dir,
vecs: None,
stores: None,
check_collisions,
})
}
@@ -66,8 +69,6 @@ impl Indexer {
rpc: &'static rpc::Client,
exit: &Exit,
) -> color_eyre::Result<Indexes> {
let check_collisions = true;
let starting_indexes = Indexes::try_from((
self.vecs.as_mut().unwrap(),
self.stores.as_ref().unwrap(),
@@ -96,7 +97,7 @@ impl Indexer {
let mut idxs = starting_indexes.clone();
let start = Some(idxs.height);
let end = None; //Some(Height::new(400_000));
let end = None;
if starting_indexes.height > Height::try_from(rpc)?
|| end.is_some_and(|end| starting_indexes.height > end)
@@ -124,12 +125,14 @@ impl Indexer {
Ok(())
};
parser.parse(start, None).iter().try_for_each(
parser.parse(start, end).iter().try_for_each(
|(height, block, blockhash)| -> color_eyre::Result<()> {
info!("Indexing block {height}...");
idxs.height = height;
let check_collisions = self.check_collisions && height > Height::new(886_000);
let blockhash = BlockHash::from(blockhash);
let blockhash_prefix = BlockHashPrefix::from(&blockhash);
@@ -232,8 +235,6 @@ impl Indexer {
let txindex = idxs.txindex + block_txindex;
let txinindex = idxs.txinindex + Txinindex::from(block_txinindex);
// dbg!((txindex, txinindex, vin));
let outpoint = txin.previous_output;
let txid = Txid::from(outpoint.txid);
@@ -598,6 +599,10 @@ impl Indexer {
return Ok(());
}
if !check_collisions {
return Ok(())
}
let len = vecs.txindex_to_txid.len();
// Ok if `get` is not par as should happen only twice
let prev_txid = vecs
@@ -608,8 +613,6 @@ impl Indexer {
dbg!(txindex, len);
})?;
// #[allow(clippy::redundant_locals)]
// let prev_txid = prev_txid;
let prev_txid = prev_txid.as_ref();
// If another Txid needs to be added to the list

View File

@@ -5,7 +5,7 @@ use std::{
path::{Path, PathBuf},
};
use brk_vec::{StoredIndex, StoredType, Version};
use brk_vec::{Compressed, StoredIndex, StoredType, Version};
use super::Height;
@@ -20,10 +20,10 @@ where
I: StoredIndex,
T: StoredType,
{
pub fn import(path: &Path, version: Version) -> brk_vec::Result<Self> {
let mut vec = brk_vec::StorableVec::forced_import(path, version)?;
pub fn import(path: &Path, version: Version, compressed: Compressed) -> brk_vec::Result<Self> {
let mut vec = brk_vec::StorableVec::forced_import(path, version, compressed)?;
vec.reset_mmaps()?;
vec.init_big_cache()?;
Ok(Self {
height: Height::try_from(Self::path_height_(path).as_path()).ok(),
@@ -31,11 +31,12 @@ where
})
}
pub fn truncate_if_needed(&mut self, index: I, height: Height) -> brk_vec::Result<Option<T>> {
pub fn truncate_if_needed(&mut self, index: I, height: Height) -> brk_vec::Result<()> {
if self.height.is_none_or(|self_height| self_height != height) {
height.write(&self.path_height())?;
}
self.vec.truncate_if_needed(index)
self.vec.truncate_if_needed(index)?;
Ok(())
}
pub fn height(&self) -> brk_core::Result<Height> {
@@ -51,7 +52,7 @@ where
pub fn flush(&mut self, height: Height) -> io::Result<()> {
height.write(&self.path_height())?;
self.vec.flush()?;
self.vec.reset_mmaps()
self.vec.init_big_cache()
}
}

View File

@@ -7,7 +7,7 @@ use brk_core::{
P2TRindex, P2WPKHAddressBytes, P2WPKHindex, P2WSHAddressBytes, P2WSHindex, Pushonlyindex, Sats,
Timestamp, TxVersion, Txid, Txindex, Txinindex, Txoutindex, Unknownindex, Weight,
};
use brk_vec::{AnyStorableVec, Version};
use brk_vec::{AnyStorableVec, Compressed, Version};
use rayon::prelude::*;
use crate::Indexes;
@@ -71,168 +71,217 @@ impl Vecs {
addressindex_to_addresstype: StorableVec::import(
&path.join("addressindex_to_addresstype"),
Version::from(1),
Compressed::YES,
)?,
addressindex_to_addresstypeindex: StorableVec::import(
&path.join("addressindex_to_addresstypeindex"),
Version::from(1),
Compressed::YES,
)?,
addressindex_to_height: StorableVec::import(
&path.join("addressindex_to_height"),
Version::from(1),
Compressed::YES,
)?,
height_to_blockhash: StorableVec::import(
&path.join("height_to_blockhash"),
Version::from(1),
Compressed::NO,
)?,
height_to_difficulty: StorableVec::import(
&path.join("height_to_difficulty"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_addressindex: StorableVec::import(
&path.join("height_to_first_addressindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_emptyindex: StorableVec::import(
&path.join("height_to_first_emptyindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_multisigindex: StorableVec::import(
&path.join("height_to_first_multisigindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_opreturnindex: StorableVec::import(
&path.join("height_to_first_opreturnindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_pushonlyindex: StorableVec::import(
&path.join("height_to_first_pushonlyindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_txindex: StorableVec::import(
&path.join("height_to_first_txindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_txinindex: StorableVec::import(
&path.join("height_to_first_txinindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_txoutindex: StorableVec::import(
&path.join("height_to_first_txoutindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_unknownindex: StorableVec::import(
&path.join("height_to_first_unkownindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_p2pk33index: StorableVec::import(
&path.join("height_to_first_p2pk33index"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_p2pk65index: StorableVec::import(
&path.join("height_to_first_p2pk65index"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_p2pkhindex: StorableVec::import(
&path.join("height_to_first_p2pkhindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_p2shindex: StorableVec::import(
&path.join("height_to_first_p2shindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_p2trindex: StorableVec::import(
&path.join("height_to_first_p2trindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_p2wpkhindex: StorableVec::import(
&path.join("height_to_first_p2wpkhindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_first_p2wshindex: StorableVec::import(
&path.join("height_to_first_p2wshindex"),
Version::from(1),
Compressed::YES,
)?,
height_to_size: StorableVec::import(
&path.join("height_to_size"),
Version::from(1),
Compressed::YES,
)?,
height_to_size: StorableVec::import(&path.join("height_to_size"), Version::from(1))?,
height_to_timestamp: StorableVec::import(
&path.join("height_to_timestamp"),
Version::from(1),
Compressed::YES,
)?,
height_to_weight: StorableVec::import(
&path.join("height_to_weight"),
Version::from(1),
Compressed::YES,
)?,
p2pk33index_to_p2pk33addressbytes: StorableVec::import(
&path.join("p2pk33index_to_p2pk33addressbytes"),
Version::from(1),
Compressed::NO,
)?,
p2pk65index_to_p2pk65addressbytes: StorableVec::import(
&path.join("p2pk65index_to_p2pk65addressbytes"),
Version::from(1),
Compressed::NO,
)?,
p2pkhindex_to_p2pkhaddressbytes: StorableVec::import(
&path.join("p2pkhindex_to_p2pkhaddressbytes"),
Version::from(1),
Compressed::NO,
)?,
p2shindex_to_p2shaddressbytes: StorableVec::import(
&path.join("p2shindex_to_p2shaddressbytes"),
Version::from(1),
Compressed::NO,
)?,
p2trindex_to_p2traddressbytes: StorableVec::import(
&path.join("p2trindex_to_p2traddressbytes"),
Version::from(1),
Compressed::NO,
)?,
p2wpkhindex_to_p2wpkhaddressbytes: StorableVec::import(
&path.join("p2wpkhindex_to_p2wpkhaddressbytes"),
Version::from(1),
Compressed::NO,
)?,
p2wshindex_to_p2wshaddressbytes: StorableVec::import(
&path.join("p2wshindex_to_p2wshaddressbytes"),
Version::from(1),
Compressed::NO,
)?,
txindex_to_first_txinindex: StorableVec::import(
&path.join("txindex_to_first_txinindex"),
Version::from(1),
Compressed::YES,
)?,
txindex_to_first_txoutindex: StorableVec::import(
&path.join("txindex_to_first_txoutindex"),
Version::from(1),
Compressed::NO,
)?,
txindex_to_height: StorableVec::import(
&path.join("txindex_to_height"),
Version::from(1),
Compressed::YES,
)?,
txindex_to_locktime: StorableVec::import(
&path.join("txindex_to_locktime"),
Version::from(1),
Compressed::YES,
)?,
txindex_to_txid: StorableVec::import(
&path.join("txindex_to_txid"),
Version::from(1),
Compressed::NO,
)?,
txindex_to_txid: StorableVec::import(&path.join("txindex_to_txid"), Version::from(1))?,
txindex_to_base_size: StorableVec::import(
&path.join("txindex_to_base_size"),
Version::from(1),
Compressed::YES,
)?,
txindex_to_total_size: StorableVec::import(
&path.join("txindex_to_total_size"),
Version::from(1),
Compressed::YES,
)?,
txindex_to_is_explicitly_rbf: StorableVec::import(
&path.join("txindex_to_is_explicitly_rbf"),
Version::from(1),
Compressed::YES,
)?,
txindex_to_txversion: StorableVec::import(
&path.join("txindex_to_txversion"),
Version::from(1),
Compressed::YES,
)?,
txinindex_to_txoutindex: StorableVec::import(
&path.join("txinindex_to_txoutindex"),
Version::from(1),
Compressed::YES,
)?,
txoutindex_to_addressindex: StorableVec::import(
&path.join("txoutindex_to_addressindex"),
Version::from(1),
Compressed::YES,
)?,
txoutindex_to_value: StorableVec::import(
&path.join("txoutindex_to_value"),
Version::from(1),
Compressed::YES,
)?,
})
}

View File

@@ -15,17 +15,17 @@ fn main() {
.unwrap(),
));
// let start = None;
// let end = None;
let start = None;
let end = None;
let parser = Parser::new(bitcoin_dir.join("blocks"), rpc);
// parser
// .parse(start, end)
// .iter()
// .for_each(|(height, _block, hash)| {
// println!("{height}: {hash}");
// });
parser
.parse(start, end)
.iter()
.for_each(|(height, _block, hash)| {
println!("{height}: {hash}");
});
println!(
"{}",

View File

@@ -9,7 +9,7 @@ pub fn main() -> color_eyre::Result<()> {
let outputs_dir = Path::new("../../_outputs");
let mut indexer = Indexer::new(outputs_dir.join("indexed"))?;
let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?;
indexer.import_vecs()?;
let mut computer = Computer::new(outputs_dir.join("computed"), None);

View File

@@ -21,8 +21,8 @@ color-eyre = { workspace = true }
jiff = { workspace = true }
log = { workspace = true }
minreq = { workspace = true }
oxc = { version = "0.57.0", features = ["codegen", "minifier"] }
oxc = { version = "0.58.0", features = ["codegen", "minifier"] }
serde = { workspace = true }
tokio = { version = "1.44.0", features = ["full"] }
tokio = { version = "1.44.1", features = ["full"] }
tower-http = { version = "0.6.2", features = ["compression-full"] }
zip = "2.2.3"

View File

@@ -31,7 +31,7 @@ pub fn main() -> color_eyre::Result<()> {
let outputs_dir = Path::new("../../_outputs");
let mut indexer = Indexer::new(outputs_dir.join("indexed"))?;
let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?;
indexer.import_stores()?;
indexer.import_vecs()?;

View File

@@ -14,3 +14,4 @@ rayon = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
zerocopy = { workspace = true }
zstd = "0.13.3"

View File

@@ -1,16 +1,19 @@
use std::path::Path;
use std::{fs, path::Path};
use brk_vec::{StorableVec, Version};
use brk_vec::{Compressed, StorableVec, Version};
fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = fs::remove_dir_all("./vec");
{
let mut vec: StorableVec<usize, u32> =
StorableVec::forced_import(Path::new("./vec"), Version::from(1))?;
StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?;
vec.push(0);
vec.push(1);
vec.push(2);
(0..21_u32).for_each(|v| {
vec.push(v);
});
dbg!(vec.get(0)?); // Some(0)
dbg!(vec.get(20)?); // Some(0)
dbg!(vec.get(21)?); // None
vec.flush()?;
@@ -18,12 +21,54 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
{
let mut vec: StorableVec<usize, u32> =
StorableVec::forced_import(Path::new("./vec"), Version::from(1))?;
StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?;
dbg!(vec.get(0)?); // 0
dbg!(vec.read(0)?); // 0
dbg!(vec.read(1)?); // 0
dbg!(vec.read(2)?); // 0
dbg!(vec.read(20)?); // 0
dbg!(vec.get(20)?); // 0
dbg!(vec.read(0)?); // 0
vec.push(21);
vec.push(22);
dbg!(vec.get(20)?);
dbg!(vec.get(21)?);
dbg!(vec.get(22)?);
dbg!(vec.get(23)?);
vec.flush()?;
}
{
let mut vec: StorableVec<usize, u32> =
StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?;
vec.init_big_cache()?;
dbg!(vec.get(0)?); // 0
dbg!(vec.get(20)?); // 0
dbg!(vec.get(21)?); // 0
dbg!(vec.get(22)?); // 0
vec.truncate_if_needed(14)?;
dbg!(vec.get(0)?); // 0
dbg!(vec.get(5)?); // 0
dbg!(vec.get(20)?); // 0
vec.iter(|(_, v)| {
dbg!(v);
Ok(())
})?;
vec.iter_from(5, |(_, v)| {
dbg!(v);
Ok(())
})?;
dbg!(vec.collect_range(Some(-5), None)?);
}
Ok(())

View File

@@ -21,6 +21,7 @@ pub enum Error {
FailedKeyTryIntoUsize,
UnsupportedUnflushedState,
RangeFromAfterTo,
DifferentCompressionMode,
}
impl From<io::Error> for Error {
@@ -46,7 +47,10 @@ impl fmt::Display for Error {
match self {
Error::WrongEndian => write!(f, "Wrong endian"),
Error::DifferentVersion { found, expected } => {
write!(f, "Different version; found: {found:?}, expected: {expected:?}")
write!(
f,
"Different version; found: {found:?}, expected: {expected:?}"
)
}
Error::MmapsVecIsTooSmall => write!(f, "Mmaps vec is too small"),
Error::IO(error) => Debug::fmt(&error, f),
@@ -56,10 +60,14 @@ impl fmt::Display for Error {
Error::ExpectVecToHaveIndex => write!(f, "Expect vec to have index"),
Error::FailedKeyTryIntoUsize => write!(f, "Failed to convert key to usize"),
Error::UnsupportedUnflushedState => {
write!(f, "Unsupported unflush state, please flush before using this function")
write!(
f,
"Unsupported unflush state, please flush before using this function"
)
}
Error::ZeroCopyError => write!(f, "Zero copy convert error"),
Error::RangeFromAfterTo => write!(f, "Range, from is after to"),
Error::DifferentCompressionMode => write!(f, "Different compression mode chosen"),
}
}
}

View File

@@ -7,15 +7,15 @@ use std::{
cmp::Ordering,
fmt::Debug,
fs::{self, File, OpenOptions},
io::{self, Read, Seek, SeekFrom, Write},
io::{self, Seek, SeekFrom, Write},
marker::PhantomData,
mem,
ops::Range,
path::{Path, PathBuf},
sync::OnceLock,
};
pub use memmap2;
use memmap2::Mmap;
use rayon::prelude::*;
pub use zerocopy;
@@ -26,18 +26,21 @@ mod traits;
pub use enums::*;
pub use structs::*;
pub use traits::*;
use zstd::DEFAULT_COMPRESSION_LEVEL;
/// In bytes
const MAX_PAGE_SIZE: usize = 4 * 4096;
const ONE_MIB: usize = 1024 * 1024;
const ONE_KIB: usize = 1024;
const MAX_PAGE_SIZE: usize = 16 * ONE_KIB;
const ONE_MIB: usize = ONE_KIB * ONE_KIB;
const MAX_CACHE_SIZE: usize = 100 * ONE_MIB;
type SmallCache<T> = Option<(usize, Box<[T]>)>;
///
/// A very small, fast, efficient and simple storable Vec
///
/// Reads (imports of Mmap) are lazy
///
/// Stores only raw data without any overhead, and doesn't even have a header (TODO: which it should, at least to Err if wrong endian)
/// Stores only raw data without any overhead, and doesn't even have a header
///
/// The file isn't portable for speed reasons (TODO: but could be ?)
///
@@ -47,12 +50,20 @@ const MAX_CACHE_SIZE: usize = 100 * ONE_MIB;
pub struct StorableVec<I, T> {
version: Version,
pathbuf: PathBuf,
file: File,
/// **Number of values NOT number of bytes**
file_len: usize,
file_position: u64,
buf: Vec<u8>,
mmaps: Vec<OnceLock<Box<memmap2::Mmap>>>, // Boxed Mmap to reduce the size of the Lock (from 24 to 16)
stored_len: Length,
compressed: Compressed,
// Compressed
decoded_pages: Option<Vec<OnceLock<Box<[T]>>>>,
decoded_page: SmallCache<T>,
pages: CompressedPagesMetadata,
// Raw
// raw_pages: Vec<OnceLock<Box<memmap2::Mmap>>>,
// raw_page: memmap2::Mmap,
// file: File,
// file_position: u64,
// buf: Vec<u8>,
pushed: Vec<T>,
phantom: PhantomData<I>,
}
@@ -68,51 +79,49 @@ where
pub const PAGE_SIZE: usize = Self::PER_PAGE * Self::SIZE_OF_T;
pub const CACHE_LENGTH: usize = MAX_CACHE_SIZE / Self::PAGE_SIZE;
/// Same as import but will remove the folder if the endian or the version is different, so be careful !
pub fn forced_import(path: &Path, version: Version) -> Result<Self> {
let res = Self::import(path, version);
/// Same as import but will reset the folder under certain errors, so be careful !
pub fn forced_import(path: &Path, version: Version, compressed: Compressed) -> Result<Self> {
let res = Self::import(path, version, compressed);
match res {
Err(Error::WrongEndian)
| Err(Error::DifferentCompressionMode)
| Err(Error::DifferentVersion {
found: _,
expected: _,
}) => {
fs::remove_dir_all(path)?;
Self::import(path, version)
Self::import(path, version, compressed)
}
_ => res,
}
}
pub fn import(path: &Path, version: Version) -> Result<Self> {
pub fn import(path: &Path, version: Version, compressed: Compressed) -> Result<Self> {
fs::create_dir_all(path)?;
let version_path = Self::path_version_(path);
version.validate(version_path.as_ref())?;
version.write(version_path.as_ref())?;
let file = Self::open_file_(&Self::path_vec_(path))?;
let compressed_path = Self::path_compressed_(path);
compressed.validate(compressed_path.as_ref())?;
compressed.write(compressed_path.as_ref())?;
let mut slf = Self {
let stored_len = Length::try_from(Self::path_length_(path).as_path())?;
let pages = CompressedPagesMetadata::read(Self::path_pages_(path).as_path())?;
Ok(Self {
version,
compressed,
pathbuf: path.to_owned(),
file_position: 0,
file_len: Self::read_disk_len_(&file)?,
file,
buf: Self::create_buffer(),
mmaps: vec![],
stored_len,
decoded_pages: None,
pushed: vec![],
pages,
decoded_page: None,
phantom: PhantomData,
};
slf.reset_file_metadata()?;
Ok(slf)
}
#[inline]
fn create_buffer() -> Vec<u8> {
vec![0; Self::SIZE_OF_T]
})
}
fn open_file(&self) -> io::Result<File> {
@@ -127,57 +136,91 @@ where
.open(path)
}
pub fn open_then_read(&self, index: I) -> Result<T> {
#[inline(always)]
fn mmap(&self, page: &CompressedPageMetadata) -> io::Result<Mmap> {
let len = page.bytes_len as usize;
let offset = page.start;
let file = self.open_file()?;
Ok(unsafe {
memmap2::MmapOptions::new()
.len(len)
.offset(offset)
.map(&file)?
})
}
fn decode(&self, page_index: usize) -> Result<Box<[T]>> {
if self.pages.len() <= page_index {
return Err(Error::ExpectVecToHaveIndex);
}
let page = self.pages.get(page_index).unwrap();
let mmap = self.mmap(page)?;
let decoded = zstd::decode_all(&mmap[..]);
if decoded.is_err() {
dbg!((page, page_index, &mmap[..], &mmap.len(), &decoded));
}
Ok(decoded?
.chunks(Self::SIZE_OF_T)
.map(|slice| T::try_read_from_bytes(slice).unwrap())
.collect::<Vec<_>>()
.into_boxed_slice())
}
pub fn open_then_read(&self, index: I) -> Result<Option<T>> {
self.open_then_read_(Self::i_to_usize(index)?)
}
fn open_then_read_(&self, index: usize) -> Result<T> {
let mut file = self.open_file()?;
Self::seek_(&mut file, Self::index_to_byte_index(index))?;
let mut buf = Self::create_buffer();
Self::read_exact(&mut file, &mut buf).map(|v| v.to_owned())
fn open_then_read_(&self, index: usize) -> Result<Option<T>> {
Ok(self
.decode(Self::index_to_page_index(index))?
.get(Self::index_to_decoded_index(index))
.cloned())
}
fn read_disk_len(&self) -> io::Result<usize> {
Self::read_disk_len_(&self.file)
}
fn read_disk_len_(file: &File) -> io::Result<usize> {
Ok(Self::byte_index_to_index(file.metadata()?.len() as usize))
pub fn init_big_cache(&mut self) -> io::Result<()> {
self.decoded_pages.replace(vec![]);
self.reset_big_cache()
}
fn reset_file_metadata(&mut self) -> io::Result<()> {
self.file_len = self.read_disk_len()?;
self.file_position = self.file.seek(SeekFrom::Start(0))?;
Ok(())
}
fn reset_big_cache(&mut self) -> io::Result<()> {
if self.decoded_pages.is_none() {
return Ok(());
}
pub fn reset_mmaps(&mut self) -> io::Result<()> {
self.mmaps.par_iter_mut().for_each(|lock| {
let big_cache = self.decoded_pages.as_mut().unwrap();
big_cache.par_iter_mut().for_each(|lock| {
lock.take();
});
let len = (self.file_len as f64 / Self::PER_PAGE as f64).ceil() as usize;
let len = (*self.stored_len as f64 / Self::PER_PAGE as f64).ceil() as usize;
let len = Self::CACHE_LENGTH.min(len);
if self.mmaps.len() != len {
self.mmaps.resize_with(len, Default::default);
if big_cache.len() != len {
big_cache.resize_with(len, Default::default);
}
Ok(())
}
#[inline]
fn seek(&mut self, byte_index: u64) -> io::Result<u64> {
self.file.seek(SeekFrom::Start(byte_index))
}
#[inline]
fn seek_(file: &mut File, byte_index: u64) -> io::Result<u64> {
file.seek(SeekFrom::Start(byte_index))
fn reset_caches(&mut self) -> io::Result<()> {
self.decoded_page.take();
self.reset_big_cache()
}
fn read_exact<'a>(file: &'a mut File, buf: &'a mut [u8]) -> Result<&'a T> {
file.read_exact(buf)?;
let v = T::try_ref_from_bytes(&buf[..])?;
Ok(v)
#[inline(always)]
fn index_to_page_index(index: usize) -> usize {
index / Self::PER_PAGE
}
#[inline(always)]
fn index_to_decoded_index(index: usize) -> usize {
index % Self::PER_PAGE
}
#[inline]
@@ -196,46 +239,28 @@ where
Err(error) => return Err(error),
}
// if !self.updated.is_empty() {
// if let Some(v) = self.updated.get(&index) {
// return Ok(Some(v));
// }
// }
if let Some(big_cache) = self
.decoded_pages
.as_ref()
.and_then(|v| if v.is_empty() { None } else { Some(v) })
{
let page_index = Self::index_to_page_index(index);
let last_index = *self.stored_len - 1;
let max_page_index = last_index / Self::PER_PAGE;
let page_index = index / Self::PER_PAGE;
let last_index = self.file_len - 1;
let max_page_index = last_index / Self::PER_PAGE;
let min_page_index = (max_page_index + 1) - self.mmaps.len();
let min_page_index = (max_page_index + 1) - big_cache.len();
// let min_open_page = self.min.load(AtomicOrdering::SeqCst);
// if self.min.load(AtomicOrdering::SeqCst) {
// self.min.set(value)
// }
if !self.mmaps.is_empty() && page_index >= min_page_index {
let mmap = &**self
.mmaps
.get(page_index - min_page_index)
.ok_or(Error::MmapsVecIsTooSmall)?
.get_or_init(|| {
Box::new(unsafe {
memmap2::MmapOptions::new()
.len(Self::PAGE_SIZE)
.offset((page_index * Self::PAGE_SIZE) as u64)
.map(&self.file)
.unwrap()
})
});
let range = Self::index_to_byte_range(index);
let slice = &mmap[range];
return Ok(Some(Value::Ref(T::try_ref_from_bytes(slice)?)));
if page_index >= min_page_index {
return Ok(big_cache
.get(page_index - min_page_index)
.ok_or(Error::MmapsVecIsTooSmall)?
.get_or_init(|| self.decode(page_index).unwrap())
.get(Self::index_to_decoded_index(index))
.map(|v| Value::Ref(v)));
}
}
Ok(self
.open_then_read_(index)
.map_or(None, |v| Some(Value::Owned(v))))
Ok(self.open_then_read_(index)?.map(|v| Value::Owned(v)))
}
#[inline]
@@ -255,17 +280,19 @@ where
Err(error) => return Err(error),
}
let byte_index = Self::index_to_byte_index(index);
if self.file_position != byte_index {
self.file_position = self.seek(Self::index_to_byte_index(index))?;
}
match Self::read_exact(&mut self.file, &mut self.buf) {
Ok(value) => {
self.file_position += Self::SIZE_OF_T as u64;
Ok(Some(value))
}
Err(e) => Err(e),
let page_index = Self::index_to_page_index(index);
if self.decoded_page.as_ref().is_none_or(|b| b.0 != page_index) {
self.decoded_page
.replace((page_index, self.decode(page_index)?));
}
Ok(self
.decoded_page
.as_ref()
.unwrap()
.1
.get(Self::index_to_decoded_index(index)))
}
pub fn read_last(&mut self) -> Result<Option<&T>> {
@@ -278,33 +305,44 @@ where
pub fn iter<F>(&mut self, f: F) -> Result<()>
where
F: FnMut((I, &T, &mut Self)) -> Result<()>,
F: FnMut((I, &T)) -> Result<()>,
{
self.iter_from(I::default(), f)
}
pub fn iter_from<F>(&mut self, mut index: I, mut f: F) -> Result<()>
where
F: FnMut((I, &T, &mut Self)) -> Result<()>,
F: FnMut((I, &T)) -> Result<()>,
{
let mut file = self.open_file()?;
if !self.pushed.is_empty() {
return Err(Error::UnsupportedUnflushedState);
}
let disk_len = I::from(Self::read_disk_len_(&file)?);
let stored_len = I::from(*self.stored_len);
Self::seek_(
&mut file,
Self::index_to_byte_index(Self::i_to_usize(index)?),
)?;
let mut buf = Self::create_buffer();
while index < disk_len {
f((index, Self::read_exact(&mut file, &mut buf)?, self))?;
while index < stored_len {
let v = self.read(index)?.unwrap();
f((index, v))?;
index = index + 1;
}
if self.pushed_len() != 0 {
unreachable!();
Ok(())
}
pub fn iter_from_cloned<F>(&mut self, mut index: I, mut f: F) -> Result<()>
where
F: FnMut((I, T, &mut Self)) -> Result<()>,
{
if !self.pushed.is_empty() {
return Err(Error::UnsupportedUnflushedState);
}
let stored_len = I::from(*self.stored_len);
while index < stored_len {
let v = self.read(index)?.unwrap().clone();
f((index, v, self))?;
index = index + 1;
}
Ok(())
@@ -315,9 +353,7 @@ where
return Err(Error::UnsupportedUnflushedState);
}
let mut file = self.open_file()?;
let len = Self::read_disk_len_(&file)?;
let len = *self.stored_len;
let from = from.map_or(0, |from| {
if from >= 0 {
@@ -339,13 +375,26 @@ where
return Err(Error::RangeFromAfterTo);
}
Self::seek_(&mut file, Self::index_to_byte_index(from))?;
let mut small_cache: SmallCache<T> = None;
let mut buf = Self::create_buffer();
let values = (from..=to)
.flat_map(|index| {
let page_index = Self::index_to_page_index(index);
Ok((from..=to)
.flat_map(|_| Self::read_exact(&mut file, &mut buf).map(|v| v.to_owned()))
.collect::<Vec<_>>())
if small_cache.as_ref().is_none_or(|b| b.0 != page_index) {
small_cache.replace((page_index, self.decode(page_index).unwrap()));
}
small_cache
.as_ref()
.unwrap()
.1
.get(Self::index_to_decoded_index(index))
.cloned()
})
.collect::<Vec<_>>();
Ok(values)
}
#[inline]
@@ -374,7 +423,7 @@ where
#[inline]
pub fn len(&self) -> usize {
self.file_len + self.pushed_len()
*self.stored_len + self.pushed_len()
}
#[inline]
@@ -406,41 +455,166 @@ where
return Ok(());
}
let mut bytes: Vec<u8> = vec![0; self.pushed_len() * Self::SIZE_OF_T];
let mut file = self.open_file()?;
let (starting_page_index, values) = if *self.stored_len % Self::PER_PAGE != 0 {
if self.pages.is_empty() {
unreachable!()
}
let last_page_index = self.pages.len() - 1;
let values = if let Some(values) = self.decoded_pages.as_mut().and_then(|big_cache| {
big_cache
.last_mut()
.and_then(|lock| lock.take())
.map(|b| b.into_vec())
}) {
values
} else if self
.decoded_page
.as_ref()
.is_some_and(|(page_index, _)| *page_index == last_page_index)
{
self.decoded_page.take().unwrap().1.into_vec()
} else {
self.decode(last_page_index)
.inspect_err(|_| {
dbg!(last_page_index, &self.pages);
})
.unwrap()
.into_vec()
};
let file_len = self.pages.pop().unwrap().start;
Self::file_set_len(&mut file, file_len)?;
(last_page_index, values)
} else {
(self.pages.len(), vec![])
};
self.stored_len += self.pushed_len();
let compressed = values
.into_par_iter()
.chain(mem::take(&mut self.pushed).into_par_iter())
.chunks(Self::PER_PAGE)
.map(|chunk| (Self::compress_chunk(&chunk), chunk.len()))
.collect::<Vec<_>>();
compressed
.iter()
.enumerate()
.for_each(|(i, (compressed_bytes, values_len))| {
let page_index = starting_page_index + i;
let start = if page_index != 0 {
let prev = self.pages.get(page_index - 1).unwrap();
prev.start + prev.bytes_len as u64
} else {
0
};
let bytes_len = compressed_bytes.len() as u32;
let values_len = *values_len as u32;
let page = CompressedPageMetadata::new(start, bytes_len, values_len);
self.pages.push(page_index, page);
});
let compressed = compressed
.into_iter()
.flat_map(|(v, _)| v)
.collect::<Box<_>>();
self.pages.write()?;
file.write_all(&compressed)?;
self.reset_caches()?;
self.write_length()?;
Ok(())
}
fn compress_chunk(chunk: &[T]) -> Box<[u8]> {
if chunk.len() > Self::PER_PAGE {
panic!();
}
let mut bytes: Vec<u8> = vec![0; chunk.len() * Self::SIZE_OF_T];
let unsafe_bytes = UnsafeSlice::new(&mut bytes);
mem::take(&mut self.pushed)
chunk
.into_par_iter()
.enumerate()
.for_each(|(i, v)| unsafe_bytes.copy_slice(i * Self::SIZE_OF_T, v.as_bytes()));
self.file.write_all(&bytes)?;
zstd::encode_all(bytes.as_slice(), DEFAULT_COMPRESSION_LEVEL)
.unwrap()
.into_boxed_slice()
}
self.reset_file_metadata()?;
pub fn truncate_if_needed(&mut self, index: I) -> Result<()> {
let index = Self::i_to_usize(index)?;
if index >= *self.stored_len {
return Ok(());
}
if index == 0 {
self.reset_file()?;
return Ok(());
}
let page_index = Self::index_to_page_index(index);
let values = self.decode(page_index)?;
let mut page = self.pages.truncate(page_index).unwrap();
let mut file = self.open_file()?;
Self::file_set_len(&mut file, page.start)?;
let decoded_index = Self::index_to_decoded_index(index);
if decoded_index != 0 {
let chunk = &values[..decoded_index];
let compressed = Self::compress_chunk(chunk);
page.values_len = chunk.len() as u32;
page.bytes_len = compressed.len() as u32;
file.write_all(&compressed)?;
self.pages.push(page_index, page);
}
self.pages.write()?;
*self.stored_len = index;
self.write_length()?;
self.reset_caches()?;
Ok(())
}
pub fn reset_file(&mut self) -> Result<()> {
self.truncate_if_needed(I::from(0))?;
let mut file = self.open_file()?;
Self::file_set_len(&mut file, 0)?;
*self.stored_len = 0;
self.reset_caches()?;
Ok(())
}
pub fn truncate_if_needed(&mut self, index: I) -> Result<Option<T>> {
let index = Self::i_to_usize(index)?;
if index >= self.file_len {
return Ok(None);
}
let value_at_index = self.open_then_read_(index).ok();
self.file.set_len(Self::index_to_byte_index(index))?;
self.reset_file_metadata()?;
Ok(value_at_index)
fn file_set_len(file: &mut File, len: u64) -> io::Result<()> {
file.set_len(len)?;
file.seek(SeekFrom::End(0))?;
Ok(())
}
#[inline]
@@ -449,24 +623,11 @@ where
}
#[inline]
fn byte_index_to_index(byte_index: usize) -> usize {
byte_index / Self::SIZE_OF_T
}
#[inline]
fn index_to_byte_index(index: usize) -> u64 {
(index * Self::SIZE_OF_T) as u64
}
#[inline]
fn index_to_byte_range(index: usize) -> Range<usize> {
let index = (Self::index_to_byte_index(index) as usize) % Self::PAGE_SIZE;
index..(index + Self::SIZE_OF_T)
}
fn index_to_pushed_index(&self, index: usize) -> Result<Option<usize>> {
if index >= self.file_len {
let index = index - self.file_len;
let file_len = *self.stored_len;
if index >= file_len {
let index = index - file_len;
if index >= self.pushed.len() {
Err(Error::IndexTooHigh)
} else {
@@ -497,7 +658,24 @@ where
}
#[inline]
fn path_vec_(path: &Path) -> PathBuf {
path.join("vec")
path.join("vec.zstd")
}
fn write_length(&self) -> io::Result<()> {
self.stored_len.write(&self.path_length())
}
#[inline]
fn path_length(&self) -> PathBuf {
Self::path_length_(&self.pathbuf)
}
#[inline]
fn path_length_(path: &Path) -> PathBuf {
path.join("length")
}
#[inline]
fn path_pages_(path: &Path) -> PathBuf {
path.join("pages")
}
#[inline]
@@ -505,6 +683,11 @@ where
path.join("version")
}
#[inline]
fn path_compressed_(path: &Path) -> PathBuf {
path.join("compressed")
}
pub fn index_type_to_string(&self) -> &str {
std::any::type_name::<I>()
}
@@ -520,9 +703,6 @@ where
T: StoredType,
{
fn clone(&self) -> Self {
let path = &self.pathbuf;
let path_version = Self::path_version_(path);
let version = Version::try_from(path_version.as_path()).unwrap();
Self::import(path, version).unwrap()
Self::import(&self.pathbuf, self.version, self.compressed).unwrap()
}
}

View File

@@ -0,0 +1,20 @@
use std::{fs::File, sync::OnceLock};
use super::CompressedPagesMetadata;
type CompressedPage<T> = Option<(usize, Box<[T]>)>;
pub enum Back<T> {
Raw {
raw_pages: Vec<OnceLock<Box<memmap2::Mmap>>>,
raw_page: memmap2::Mmap,
file: File,
file_position: u64,
buf: Vec<u8>,
},
Compressed {
decoded_pages: Option<Vec<OnceLock<Box<[T]>>>>,
decoded_page: CompressedPage<T>,
pages: CompressedPagesMetadata,
},
}

View File

@@ -0,0 +1,67 @@
use std::{
fs,
io::{self},
ops::Deref,
path::Path,
};
use crate::{Error, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct Compressed(bool);
impl Compressed {
pub const YES: Self = Self(true);
pub const NO: Self = Self(false);
pub fn write(&self, path: &Path) -> Result<(), io::Error> {
fs::write(path, self.as_bytes())
}
fn as_bytes(&self) -> Vec<u8> {
if self.0 { vec![1] } else { vec![0] }
}
fn from_bytes(bytes: &[u8]) -> Self {
if bytes.len() != 1 {
panic!();
}
if bytes[0] == 1 {
Self(true)
} else if bytes[0] == 0 {
Self(false)
} else {
panic!()
}
}
pub fn validate(&self, path: &Path) -> Result<()> {
if let Ok(prev_compressed) = Compressed::try_from(path) {
if prev_compressed != *self {
return Err(Error::DifferentCompressionMode);
}
}
Ok(())
}
}
impl TryFrom<&Path> for Compressed {
type Error = Error;
fn try_from(value: &Path) -> Result<Self, Self::Error> {
Ok(Self::from_bytes(&fs::read(value)?))
}
}
impl From<bool> for Compressed {
fn from(value: bool) -> Self {
Self(value)
}
}
impl Deref for Compressed {
type Target = bool;
fn deref(&self) -> &Self::Target {
&self.0
}
}

View File

@@ -0,0 +1,71 @@
use std::{
fs,
io::{self, Read},
ops::{AddAssign, Deref, DerefMut},
path::Path,
};
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
use crate::{Error, Result};
#[derive(
Debug,
Default,
Clone,
Copy,
PartialEq,
Eq,
PartialOrd,
Ord,
FromBytes,
IntoBytes,
Immutable,
KnownLayout,
)]
pub struct Length(usize);
impl Length {
pub fn write(&self, path: &Path) -> Result<(), io::Error> {
fs::write(path, self.as_bytes())
}
}
impl From<usize> for Length {
fn from(value: usize) -> Self {
Self(value)
}
}
impl Deref for Length {
type Target = usize;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Length {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
impl TryFrom<&Path> for Length {
type Error = Error;
fn try_from(value: &Path) -> Result<Self, Self::Error> {
let mut buf = [0; 8];
if let Ok(bytes) = fs::read(value) {
bytes.as_slice().read_exact(&mut buf)?;
Ok(*(Self::ref_from_bytes(&buf)?))
} else {
Ok(Self::default())
}
}
}
impl AddAssign<usize> for Length {
fn add_assign(&mut self, rhs: usize) {
self.0 += rhs;
}
}

View File

@@ -1,5 +1,15 @@
mod back;
mod compressed;
mod length;
mod page;
mod pages;
mod unsafe_slice;
mod version;
pub use back::*;
pub use compressed::*;
pub use length::*;
pub use page::*;
pub use pages::*;
pub use unsafe_slice::*;
pub use version::*;

View File

@@ -0,0 +1,18 @@
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
#[derive(Debug, Clone, IntoBytes, Immutable, FromBytes, KnownLayout)]
pub struct CompressedPageMetadata {
pub start: u64,
pub bytes_len: u32,
pub values_len: u32,
}
impl CompressedPageMetadata {
pub fn new(start: u64, bytes_len: u32, values_len: u32) -> Self {
Self {
start,
bytes_len,
values_len,
}
}
}

View File

@@ -0,0 +1,118 @@
use std::{
fs::{self, OpenOptions},
io::{self, Seek, SeekFrom, Write},
path::{Path, PathBuf},
};
use rayon::prelude::*;
use zerocopy::{IntoBytes, TryFromBytes};
use crate::Result;
use super::{CompressedPageMetadata, UnsafeSlice};
#[derive(Debug, Clone)]
pub struct CompressedPagesMetadata {
vec: Vec<CompressedPageMetadata>,
change_at: Option<usize>,
path: PathBuf,
}
impl CompressedPagesMetadata {
const PAGE_SIZE: usize = size_of::<CompressedPageMetadata>();
pub fn read(path: &Path) -> Result<CompressedPagesMetadata> {
let slf = Self {
vec: fs::read(path)
.unwrap_or_default()
.chunks(Self::PAGE_SIZE)
.map(|bytes| {
if bytes.len() != Self::PAGE_SIZE {
panic!()
}
CompressedPageMetadata::try_read_from_bytes(bytes).unwrap()
})
.collect::<Vec<_>>(),
path: path.to_owned(),
change_at: None,
};
Ok(slf)
}
pub fn write(&mut self) -> io::Result<()> {
if self.change_at.is_none() {
return Ok(());
}
let change_at = self.change_at.take().unwrap();
let len = (self.vec.len() - change_at) * Self::PAGE_SIZE;
let mut bytes: Vec<u8> = vec![0; len];
let unsafe_bytes = UnsafeSlice::new(&mut bytes);
self.vec[change_at..]
.par_iter()
.enumerate()
.for_each(|(i, v)| unsafe_bytes.copy_slice(i * Self::PAGE_SIZE, v.as_bytes()));
let mut file = OpenOptions::new()
.read(true)
.create(true)
.truncate(false)
.append(true)
.open(&self.path)?;
file.set_len((change_at * Self::PAGE_SIZE) as u64)?;
file.seek(SeekFrom::End(0))?;
file.write_all(&bytes)?;
Ok(())
}
pub fn len(&self) -> usize {
self.vec.len()
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn get(&self, page_index: usize) -> Option<&CompressedPageMetadata> {
self.vec.get(page_index)
}
pub fn last(&self) -> Option<&CompressedPageMetadata> {
self.vec.last()
}
pub fn pop(&mut self) -> Option<CompressedPageMetadata> {
self.vec.pop()
}
pub fn push(&mut self, page_index: usize, page: CompressedPageMetadata) {
if page_index != self.vec.len() {
panic!();
}
self.set_changed_at(page_index);
self.vec.push(page);
}
fn set_changed_at(&mut self, page_index: usize) {
if self.change_at.is_none_or(|pi| pi > page_index) {
self.change_at.replace(page_index);
}
}
pub fn truncate(&mut self, page_index: usize) -> Option<CompressedPageMetadata> {
let page = self.get(page_index).cloned();
self.vec.truncate(page_index);
self.set_changed_at(page_index);
page
}
}