mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-05-10 14:19:10 -07:00
vec: compression part 1
This commit is contained in:
@@ -1 +0,0 @@
|
||||
|
||||
@@ -8,7 +8,7 @@ use crate::run::RunConfig;
|
||||
pub fn query(params: QueryParams) -> color_eyre::Result<()> {
|
||||
let config = RunConfig::import(None)?;
|
||||
|
||||
let mut indexer = Indexer::new(config.indexeddir())?;
|
||||
let mut indexer = Indexer::new(config.indexeddir(), config.check_collisions())?;
|
||||
indexer.import_vecs()?;
|
||||
|
||||
let mut computer = Computer::new(config.computeddir(), None);
|
||||
|
||||
@@ -26,7 +26,7 @@ pub fn run(config: RunConfig) -> color_eyre::Result<()> {
|
||||
|
||||
let parser = brk_parser::Parser::new(config.blocksdir(), rpc);
|
||||
|
||||
let mut indexer = Indexer::new(config.indexeddir())?;
|
||||
let mut indexer = Indexer::new(config.indexeddir(), config.check_collisions())?;
|
||||
indexer.import_stores()?;
|
||||
indexer.import_vecs()?;
|
||||
|
||||
@@ -134,6 +134,10 @@ pub struct RunConfig {
|
||||
/// Delay between runs, default: 0, saved
|
||||
#[arg(long, value_name = "SECONDS")]
|
||||
delay: Option<u64>,
|
||||
|
||||
/// DEV: Activate checking address hashes for collisions when indexing, default: false, saved
|
||||
#[arg(long, value_name = "BOOL")]
|
||||
check_collisions: Option<bool>,
|
||||
}
|
||||
|
||||
impl RunConfig {
|
||||
@@ -195,6 +199,10 @@ impl RunConfig {
|
||||
config_saved.delay = Some(delay);
|
||||
}
|
||||
|
||||
if let Some(check_collisions) = config_args.check_collisions.take() {
|
||||
config_saved.check_collisions = Some(check_collisions);
|
||||
}
|
||||
|
||||
if config_args != RunConfig::default() {
|
||||
dbg!(config_args);
|
||||
panic!("Didn't consume the full config")
|
||||
@@ -378,6 +386,10 @@ impl RunConfig {
|
||||
pub fn fetch(&self) -> bool {
|
||||
self.fetch.is_some_and(|b| b)
|
||||
}
|
||||
|
||||
pub fn check_collisions(&self) -> bool {
|
||||
self.check_collisions.is_some_and(|b| b)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(
|
||||
|
||||
@@ -28,7 +28,7 @@ pub fn main() -> color_eyre::Result<()> {
|
||||
|
||||
let outputs_dir = Path::new("../../_outputs");
|
||||
|
||||
let mut indexer = Indexer::new(outputs_dir.join("indexed"))?;
|
||||
let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?;
|
||||
indexer.import_stores()?;
|
||||
indexer.import_vecs()?;
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@ use std::{
|
||||
|
||||
use brk_core::CheckedSub;
|
||||
use brk_exit::Exit;
|
||||
use brk_vec::{Error, Result, StoredIndex, StoredType, Version};
|
||||
use brk_vec::{Compressed, Error, Result, StoredIndex, StoredType, Version};
|
||||
|
||||
const FLUSH_EVERY: usize = 10_000;
|
||||
|
||||
@@ -25,7 +25,7 @@ where
|
||||
T: StoredType,
|
||||
{
|
||||
pub fn import(path: &Path, version: Version) -> brk_vec::Result<Self> {
|
||||
let vec = brk_vec::StorableVec::forced_import(path, version)?;
|
||||
let vec = brk_vec::StorableVec::forced_import(path, version, Compressed::YES)?;
|
||||
|
||||
Ok(Self {
|
||||
computed_version: None,
|
||||
@@ -103,14 +103,14 @@ where
|
||||
where
|
||||
A: StoredIndex,
|
||||
B: StoredType,
|
||||
F: FnMut((A, &B, &mut Self, &mut brk_vec::StorableVec<A, B>)) -> (I, T),
|
||||
F: FnMut((A, B, &mut Self, &mut brk_vec::StorableVec<A, B>)) -> (I, T),
|
||||
{
|
||||
self.validate_computed_version_or_reset_file(
|
||||
Version::from(0) + self.version() + other.version(),
|
||||
)?;
|
||||
|
||||
let index = max_from.min(A::from(self.len()));
|
||||
other.iter_from(index, |(a, b, other)| {
|
||||
other.iter_from_cloned(index, |(a, b, other)| {
|
||||
let (i, v) = t((a, b, self, other));
|
||||
self.push_and_flush_if_needed(i, v, exit)
|
||||
})?;
|
||||
|
||||
@@ -100,7 +100,7 @@ impl Vecs {
|
||||
self.height_to_real_date.compute_transform(
|
||||
starting_indexes.height,
|
||||
&mut indexer_vecs.height_to_timestamp,
|
||||
|(h, t, ..)| (h, Date::from(*t)),
|
||||
|(h, t, ..)| (h, Date::from(t)),
|
||||
exit,
|
||||
)?;
|
||||
|
||||
@@ -112,7 +112,10 @@ impl Vecs {
|
||||
.decremented()
|
||||
.and_then(|h| s.read(h).ok())
|
||||
.flatten()
|
||||
.map_or(*d, |prev_d| if prev_d > d { *prev_d } else { *d });
|
||||
.map_or(d, |prev_d| {
|
||||
let prev_d = *prev_d;
|
||||
if prev_d > d { prev_d } else { d }
|
||||
});
|
||||
(h, d)
|
||||
},
|
||||
exit,
|
||||
@@ -121,7 +124,7 @@ impl Vecs {
|
||||
self.height_to_dateindex.compute_transform(
|
||||
starting_indexes.height,
|
||||
&mut self.height_to_fixed_date,
|
||||
|(h, d, ..)| (h, Dateindex::try_from(*d).unwrap()),
|
||||
|(h, d, ..)| (h, Dateindex::try_from(d).unwrap()),
|
||||
exit,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -134,7 +134,7 @@ impl Vecs {
|
||||
let ohlc = fetcher
|
||||
.get_height(
|
||||
h,
|
||||
*t,
|
||||
t,
|
||||
h.decremented().map(|prev_h| {
|
||||
height_to_timestamp
|
||||
.get(prev_h)
|
||||
@@ -215,7 +215,7 @@ impl Vecs {
|
||||
self.height_to_sats_per_dollar.compute_transform(
|
||||
starting_indexes.height,
|
||||
&mut self.height_to_close,
|
||||
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / **close)),
|
||||
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / *close)),
|
||||
exit,
|
||||
)?;
|
||||
|
||||
@@ -223,7 +223,7 @@ impl Vecs {
|
||||
starting_indexes.dateindex,
|
||||
&mut indexes.dateindex_to_date,
|
||||
|(di, d, ..)| {
|
||||
let ohlc = fetcher.get_date(*d).unwrap();
|
||||
let ohlc = fetcher.get_date(d).unwrap();
|
||||
(di, ohlc)
|
||||
},
|
||||
exit,
|
||||
@@ -295,7 +295,7 @@ impl Vecs {
|
||||
self.dateindex_to_sats_per_dollar.compute_transform(
|
||||
starting_indexes.dateindex,
|
||||
&mut self.dateindex_to_close,
|
||||
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / **close)),
|
||||
|(di, close, ..)| (di, Close::from(Sats::ONE_BTC / *close)),
|
||||
exit,
|
||||
)?;
|
||||
|
||||
|
||||
@@ -8,7 +8,20 @@ use crate::Error;
|
||||
|
||||
use super::{Addressbytes, Addresstype, BlockHash, Txid};
|
||||
|
||||
#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)]
|
||||
#[derive(
|
||||
Debug,
|
||||
Deref,
|
||||
Clone,
|
||||
Copy,
|
||||
PartialEq,
|
||||
Eq,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
FromBytes,
|
||||
Immutable,
|
||||
IntoBytes,
|
||||
KnownLayout,
|
||||
)]
|
||||
pub struct AddressHash([u8; 8]);
|
||||
impl From<(&Addressbytes, Addresstype)> for AddressHash {
|
||||
fn from((addressbytes, addresstype): (&Addressbytes, Addresstype)) -> Self {
|
||||
@@ -41,8 +54,26 @@ impl From<AddressHash> for ByteView {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)]
|
||||
#[derive(
|
||||
Debug,
|
||||
Deref,
|
||||
Clone,
|
||||
Copy,
|
||||
PartialEq,
|
||||
Eq,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
FromBytes,
|
||||
Immutable,
|
||||
IntoBytes,
|
||||
KnownLayout,
|
||||
)]
|
||||
pub struct BlockHashPrefix([u8; 8]);
|
||||
impl From<BlockHash> for BlockHashPrefix {
|
||||
fn from(value: BlockHash) -> Self {
|
||||
Self::from(&value)
|
||||
}
|
||||
}
|
||||
impl From<&BlockHash> for BlockHashPrefix {
|
||||
fn from(value: &BlockHash) -> Self {
|
||||
Self(copy_first_8bytes(&value[..]).unwrap())
|
||||
@@ -65,8 +96,26 @@ impl From<BlockHashPrefix> for ByteView {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)]
|
||||
#[derive(
|
||||
Debug,
|
||||
Deref,
|
||||
Clone,
|
||||
Copy,
|
||||
PartialEq,
|
||||
Eq,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
FromBytes,
|
||||
Immutable,
|
||||
IntoBytes,
|
||||
KnownLayout,
|
||||
)]
|
||||
pub struct TxidPrefix([u8; 8]);
|
||||
impl From<Txid> for TxidPrefix {
|
||||
fn from(value: Txid) -> Self {
|
||||
Self::from(&value)
|
||||
}
|
||||
}
|
||||
impl From<&Txid> for TxidPrefix {
|
||||
fn from(value: &Txid) -> Self {
|
||||
Self(copy_first_8bytes(&value[..]).unwrap())
|
||||
|
||||
@@ -22,12 +22,6 @@ Vecs: `src/storage/vecs/mod.rs`
|
||||
|
||||
Stores: `src/storage/stores/mod.rs`
|
||||
|
||||
## Examples
|
||||
|
||||
Rust: `src/main.rs`
|
||||
|
||||
Python: `../python/parse.py`
|
||||
|
||||
## Benchmark
|
||||
|
||||
Indexing `0..885_835` took `11 hours 6 min 50 s` on a Macbook Pro M3 Pro with 36 GB of RAM
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use std::{path::Path, thread::sleep, time::Duration};
|
||||
use std::path::Path;
|
||||
|
||||
use brk_core::default_bitcoin_path;
|
||||
use brk_core::{default_bitcoin_path, dot_brk_path};
|
||||
use brk_exit::Exit;
|
||||
use brk_indexer::{Indexer, rpc::RpcApi};
|
||||
use brk_parser::{
|
||||
@@ -24,23 +24,25 @@ fn main() -> color_eyre::Result<()> {
|
||||
|
||||
let parser = Parser::new(bitcoin_dir.join("blocks"), rpc);
|
||||
|
||||
let mut indexer = Indexer::new(Path::new("../../_outputs/indexed").to_owned())?;
|
||||
let outputs = dot_brk_path().join("outputs");
|
||||
|
||||
let mut indexer = Indexer::new(outputs.join("indexed").to_owned(), true)?;
|
||||
indexer.import_stores()?;
|
||||
indexer.import_vecs()?;
|
||||
|
||||
loop {
|
||||
let block_count = rpc.get_block_count()?;
|
||||
// loop {
|
||||
let block_count = rpc.get_block_count()?;
|
||||
|
||||
info!("{block_count} blocks found.");
|
||||
info!("{block_count} blocks found.");
|
||||
|
||||
indexer.index(&parser, rpc, &exit)?;
|
||||
indexer.index(&parser, rpc, &exit)?;
|
||||
|
||||
info!("Waiting for new blocks...");
|
||||
info!("Waiting for new blocks...");
|
||||
|
||||
while block_count == rpc.get_block_count()? {
|
||||
sleep(Duration::from_secs(1))
|
||||
}
|
||||
}
|
||||
// while block_count == rpc.get_block_count()? {
|
||||
// sleep(Duration::from_secs(1))
|
||||
// }
|
||||
// }
|
||||
|
||||
#[allow(unreachable_code)]
|
||||
Ok(())
|
||||
|
||||
@@ -18,6 +18,7 @@ pub use brk_parser::*;
|
||||
|
||||
use bitcoin::{Transaction, TxIn, TxOut};
|
||||
use brk_exit::Exit;
|
||||
use brk_vec::Compressed;
|
||||
use color_eyre::eyre::{ContextCompat, eyre};
|
||||
use log::info;
|
||||
use rayon::prelude::*;
|
||||
@@ -36,15 +37,17 @@ pub struct Indexer {
|
||||
path: PathBuf,
|
||||
vecs: Option<Vecs>,
|
||||
stores: Option<Stores>,
|
||||
check_collisions: bool,
|
||||
}
|
||||
|
||||
impl Indexer {
|
||||
pub fn new(indexes_dir: PathBuf) -> color_eyre::Result<Self> {
|
||||
pub fn new(indexes_dir: PathBuf, check_collisions: bool) -> color_eyre::Result<Self> {
|
||||
setrlimit()?;
|
||||
Ok(Self {
|
||||
path: indexes_dir,
|
||||
vecs: None,
|
||||
stores: None,
|
||||
check_collisions,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -66,8 +69,6 @@ impl Indexer {
|
||||
rpc: &'static rpc::Client,
|
||||
exit: &Exit,
|
||||
) -> color_eyre::Result<Indexes> {
|
||||
let check_collisions = true;
|
||||
|
||||
let starting_indexes = Indexes::try_from((
|
||||
self.vecs.as_mut().unwrap(),
|
||||
self.stores.as_ref().unwrap(),
|
||||
@@ -96,7 +97,7 @@ impl Indexer {
|
||||
let mut idxs = starting_indexes.clone();
|
||||
|
||||
let start = Some(idxs.height);
|
||||
let end = None; //Some(Height::new(400_000));
|
||||
let end = None;
|
||||
|
||||
if starting_indexes.height > Height::try_from(rpc)?
|
||||
|| end.is_some_and(|end| starting_indexes.height > end)
|
||||
@@ -124,12 +125,14 @@ impl Indexer {
|
||||
Ok(())
|
||||
};
|
||||
|
||||
parser.parse(start, None).iter().try_for_each(
|
||||
parser.parse(start, end).iter().try_for_each(
|
||||
|(height, block, blockhash)| -> color_eyre::Result<()> {
|
||||
info!("Indexing block {height}...");
|
||||
|
||||
idxs.height = height;
|
||||
|
||||
let check_collisions = self.check_collisions && height > Height::new(886_000);
|
||||
|
||||
let blockhash = BlockHash::from(blockhash);
|
||||
let blockhash_prefix = BlockHashPrefix::from(&blockhash);
|
||||
|
||||
@@ -232,8 +235,6 @@ impl Indexer {
|
||||
let txindex = idxs.txindex + block_txindex;
|
||||
let txinindex = idxs.txinindex + Txinindex::from(block_txinindex);
|
||||
|
||||
// dbg!((txindex, txinindex, vin));
|
||||
|
||||
let outpoint = txin.previous_output;
|
||||
let txid = Txid::from(outpoint.txid);
|
||||
|
||||
@@ -598,6 +599,10 @@ impl Indexer {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !check_collisions {
|
||||
return Ok(())
|
||||
}
|
||||
|
||||
let len = vecs.txindex_to_txid.len();
|
||||
// Ok if `get` is not par as should happen only twice
|
||||
let prev_txid = vecs
|
||||
@@ -608,8 +613,6 @@ impl Indexer {
|
||||
dbg!(txindex, len);
|
||||
})?;
|
||||
|
||||
// #[allow(clippy::redundant_locals)]
|
||||
// let prev_txid = prev_txid;
|
||||
let prev_txid = prev_txid.as_ref();
|
||||
|
||||
// If another Txid needs to be added to the list
|
||||
|
||||
@@ -5,7 +5,7 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use brk_vec::{StoredIndex, StoredType, Version};
|
||||
use brk_vec::{Compressed, StoredIndex, StoredType, Version};
|
||||
|
||||
use super::Height;
|
||||
|
||||
@@ -20,10 +20,10 @@ where
|
||||
I: StoredIndex,
|
||||
T: StoredType,
|
||||
{
|
||||
pub fn import(path: &Path, version: Version) -> brk_vec::Result<Self> {
|
||||
let mut vec = brk_vec::StorableVec::forced_import(path, version)?;
|
||||
pub fn import(path: &Path, version: Version, compressed: Compressed) -> brk_vec::Result<Self> {
|
||||
let mut vec = brk_vec::StorableVec::forced_import(path, version, compressed)?;
|
||||
|
||||
vec.reset_mmaps()?;
|
||||
vec.init_big_cache()?;
|
||||
|
||||
Ok(Self {
|
||||
height: Height::try_from(Self::path_height_(path).as_path()).ok(),
|
||||
@@ -31,11 +31,12 @@ where
|
||||
})
|
||||
}
|
||||
|
||||
pub fn truncate_if_needed(&mut self, index: I, height: Height) -> brk_vec::Result<Option<T>> {
|
||||
pub fn truncate_if_needed(&mut self, index: I, height: Height) -> brk_vec::Result<()> {
|
||||
if self.height.is_none_or(|self_height| self_height != height) {
|
||||
height.write(&self.path_height())?;
|
||||
}
|
||||
self.vec.truncate_if_needed(index)
|
||||
self.vec.truncate_if_needed(index)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn height(&self) -> brk_core::Result<Height> {
|
||||
@@ -51,7 +52,7 @@ where
|
||||
pub fn flush(&mut self, height: Height) -> io::Result<()> {
|
||||
height.write(&self.path_height())?;
|
||||
self.vec.flush()?;
|
||||
self.vec.reset_mmaps()
|
||||
self.vec.init_big_cache()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ use brk_core::{
|
||||
P2TRindex, P2WPKHAddressBytes, P2WPKHindex, P2WSHAddressBytes, P2WSHindex, Pushonlyindex, Sats,
|
||||
Timestamp, TxVersion, Txid, Txindex, Txinindex, Txoutindex, Unknownindex, Weight,
|
||||
};
|
||||
use brk_vec::{AnyStorableVec, Version};
|
||||
use brk_vec::{AnyStorableVec, Compressed, Version};
|
||||
use rayon::prelude::*;
|
||||
|
||||
use crate::Indexes;
|
||||
@@ -71,168 +71,217 @@ impl Vecs {
|
||||
addressindex_to_addresstype: StorableVec::import(
|
||||
&path.join("addressindex_to_addresstype"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
addressindex_to_addresstypeindex: StorableVec::import(
|
||||
&path.join("addressindex_to_addresstypeindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
addressindex_to_height: StorableVec::import(
|
||||
&path.join("addressindex_to_height"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_blockhash: StorableVec::import(
|
||||
&path.join("height_to_blockhash"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
height_to_difficulty: StorableVec::import(
|
||||
&path.join("height_to_difficulty"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_addressindex: StorableVec::import(
|
||||
&path.join("height_to_first_addressindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_emptyindex: StorableVec::import(
|
||||
&path.join("height_to_first_emptyindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_multisigindex: StorableVec::import(
|
||||
&path.join("height_to_first_multisigindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_opreturnindex: StorableVec::import(
|
||||
&path.join("height_to_first_opreturnindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_pushonlyindex: StorableVec::import(
|
||||
&path.join("height_to_first_pushonlyindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_txindex: StorableVec::import(
|
||||
&path.join("height_to_first_txindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_txinindex: StorableVec::import(
|
||||
&path.join("height_to_first_txinindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_txoutindex: StorableVec::import(
|
||||
&path.join("height_to_first_txoutindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_unknownindex: StorableVec::import(
|
||||
&path.join("height_to_first_unkownindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_p2pk33index: StorableVec::import(
|
||||
&path.join("height_to_first_p2pk33index"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_p2pk65index: StorableVec::import(
|
||||
&path.join("height_to_first_p2pk65index"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_p2pkhindex: StorableVec::import(
|
||||
&path.join("height_to_first_p2pkhindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_p2shindex: StorableVec::import(
|
||||
&path.join("height_to_first_p2shindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_p2trindex: StorableVec::import(
|
||||
&path.join("height_to_first_p2trindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_p2wpkhindex: StorableVec::import(
|
||||
&path.join("height_to_first_p2wpkhindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_first_p2wshindex: StorableVec::import(
|
||||
&path.join("height_to_first_p2wshindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_size: StorableVec::import(
|
||||
&path.join("height_to_size"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_size: StorableVec::import(&path.join("height_to_size"), Version::from(1))?,
|
||||
height_to_timestamp: StorableVec::import(
|
||||
&path.join("height_to_timestamp"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
height_to_weight: StorableVec::import(
|
||||
&path.join("height_to_weight"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
p2pk33index_to_p2pk33addressbytes: StorableVec::import(
|
||||
&path.join("p2pk33index_to_p2pk33addressbytes"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
p2pk65index_to_p2pk65addressbytes: StorableVec::import(
|
||||
&path.join("p2pk65index_to_p2pk65addressbytes"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
p2pkhindex_to_p2pkhaddressbytes: StorableVec::import(
|
||||
&path.join("p2pkhindex_to_p2pkhaddressbytes"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
p2shindex_to_p2shaddressbytes: StorableVec::import(
|
||||
&path.join("p2shindex_to_p2shaddressbytes"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
p2trindex_to_p2traddressbytes: StorableVec::import(
|
||||
&path.join("p2trindex_to_p2traddressbytes"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
p2wpkhindex_to_p2wpkhaddressbytes: StorableVec::import(
|
||||
&path.join("p2wpkhindex_to_p2wpkhaddressbytes"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
p2wshindex_to_p2wshaddressbytes: StorableVec::import(
|
||||
&path.join("p2wshindex_to_p2wshaddressbytes"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
txindex_to_first_txinindex: StorableVec::import(
|
||||
&path.join("txindex_to_first_txinindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txindex_to_first_txoutindex: StorableVec::import(
|
||||
&path.join("txindex_to_first_txoutindex"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
txindex_to_height: StorableVec::import(
|
||||
&path.join("txindex_to_height"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txindex_to_locktime: StorableVec::import(
|
||||
&path.join("txindex_to_locktime"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txindex_to_txid: StorableVec::import(
|
||||
&path.join("txindex_to_txid"),
|
||||
Version::from(1),
|
||||
Compressed::NO,
|
||||
)?,
|
||||
txindex_to_txid: StorableVec::import(&path.join("txindex_to_txid"), Version::from(1))?,
|
||||
txindex_to_base_size: StorableVec::import(
|
||||
&path.join("txindex_to_base_size"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txindex_to_total_size: StorableVec::import(
|
||||
&path.join("txindex_to_total_size"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txindex_to_is_explicitly_rbf: StorableVec::import(
|
||||
&path.join("txindex_to_is_explicitly_rbf"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txindex_to_txversion: StorableVec::import(
|
||||
&path.join("txindex_to_txversion"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txinindex_to_txoutindex: StorableVec::import(
|
||||
&path.join("txinindex_to_txoutindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txoutindex_to_addressindex: StorableVec::import(
|
||||
&path.join("txoutindex_to_addressindex"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
txoutindex_to_value: StorableVec::import(
|
||||
&path.join("txoutindex_to_value"),
|
||||
Version::from(1),
|
||||
Compressed::YES,
|
||||
)?,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -15,17 +15,17 @@ fn main() {
|
||||
.unwrap(),
|
||||
));
|
||||
|
||||
// let start = None;
|
||||
// let end = None;
|
||||
let start = None;
|
||||
let end = None;
|
||||
|
||||
let parser = Parser::new(bitcoin_dir.join("blocks"), rpc);
|
||||
|
||||
// parser
|
||||
// .parse(start, end)
|
||||
// .iter()
|
||||
// .for_each(|(height, _block, hash)| {
|
||||
// println!("{height}: {hash}");
|
||||
// });
|
||||
parser
|
||||
.parse(start, end)
|
||||
.iter()
|
||||
.for_each(|(height, _block, hash)| {
|
||||
println!("{height}: {hash}");
|
||||
});
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
|
||||
@@ -9,7 +9,7 @@ pub fn main() -> color_eyre::Result<()> {
|
||||
|
||||
let outputs_dir = Path::new("../../_outputs");
|
||||
|
||||
let mut indexer = Indexer::new(outputs_dir.join("indexed"))?;
|
||||
let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?;
|
||||
indexer.import_vecs()?;
|
||||
|
||||
let mut computer = Computer::new(outputs_dir.join("computed"), None);
|
||||
|
||||
@@ -21,8 +21,8 @@ color-eyre = { workspace = true }
|
||||
jiff = { workspace = true }
|
||||
log = { workspace = true }
|
||||
minreq = { workspace = true }
|
||||
oxc = { version = "0.57.0", features = ["codegen", "minifier"] }
|
||||
oxc = { version = "0.58.0", features = ["codegen", "minifier"] }
|
||||
serde = { workspace = true }
|
||||
tokio = { version = "1.44.0", features = ["full"] }
|
||||
tokio = { version = "1.44.1", features = ["full"] }
|
||||
tower-http = { version = "0.6.2", features = ["compression-full"] }
|
||||
zip = "2.2.3"
|
||||
|
||||
@@ -31,7 +31,7 @@ pub fn main() -> color_eyre::Result<()> {
|
||||
|
||||
let outputs_dir = Path::new("../../_outputs");
|
||||
|
||||
let mut indexer = Indexer::new(outputs_dir.join("indexed"))?;
|
||||
let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?;
|
||||
indexer.import_stores()?;
|
||||
indexer.import_vecs()?;
|
||||
|
||||
|
||||
@@ -14,3 +14,4 @@ rayon = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
zerocopy = { workspace = true }
|
||||
zstd = "0.13.3"
|
||||
|
||||
@@ -1,16 +1,19 @@
|
||||
use std::path::Path;
|
||||
use std::{fs, path::Path};
|
||||
|
||||
use brk_vec::{StorableVec, Version};
|
||||
use brk_vec::{Compressed, StorableVec, Version};
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let _ = fs::remove_dir_all("./vec");
|
||||
|
||||
{
|
||||
let mut vec: StorableVec<usize, u32> =
|
||||
StorableVec::forced_import(Path::new("./vec"), Version::from(1))?;
|
||||
StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?;
|
||||
|
||||
vec.push(0);
|
||||
vec.push(1);
|
||||
vec.push(2);
|
||||
(0..21_u32).for_each(|v| {
|
||||
vec.push(v);
|
||||
});
|
||||
dbg!(vec.get(0)?); // Some(0)
|
||||
dbg!(vec.get(20)?); // Some(0)
|
||||
dbg!(vec.get(21)?); // None
|
||||
|
||||
vec.flush()?;
|
||||
@@ -18,12 +21,54 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
|
||||
{
|
||||
let mut vec: StorableVec<usize, u32> =
|
||||
StorableVec::forced_import(Path::new("./vec"), Version::from(1))?;
|
||||
StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?;
|
||||
|
||||
dbg!(vec.get(0)?); // 0
|
||||
dbg!(vec.read(0)?); // 0
|
||||
dbg!(vec.read(1)?); // 0
|
||||
dbg!(vec.read(2)?); // 0
|
||||
dbg!(vec.read(20)?); // 0
|
||||
dbg!(vec.get(20)?); // 0
|
||||
dbg!(vec.read(0)?); // 0
|
||||
|
||||
vec.push(21);
|
||||
vec.push(22);
|
||||
dbg!(vec.get(20)?);
|
||||
dbg!(vec.get(21)?);
|
||||
dbg!(vec.get(22)?);
|
||||
dbg!(vec.get(23)?);
|
||||
|
||||
vec.flush()?;
|
||||
}
|
||||
|
||||
{
|
||||
let mut vec: StorableVec<usize, u32> =
|
||||
StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?;
|
||||
|
||||
vec.init_big_cache()?;
|
||||
|
||||
dbg!(vec.get(0)?); // 0
|
||||
dbg!(vec.get(20)?); // 0
|
||||
dbg!(vec.get(21)?); // 0
|
||||
dbg!(vec.get(22)?); // 0
|
||||
|
||||
vec.truncate_if_needed(14)?;
|
||||
|
||||
dbg!(vec.get(0)?); // 0
|
||||
dbg!(vec.get(5)?); // 0
|
||||
dbg!(vec.get(20)?); // 0
|
||||
|
||||
vec.iter(|(_, v)| {
|
||||
dbg!(v);
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
vec.iter_from(5, |(_, v)| {
|
||||
dbg!(v);
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
dbg!(vec.collect_range(Some(-5), None)?);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -21,6 +21,7 @@ pub enum Error {
|
||||
FailedKeyTryIntoUsize,
|
||||
UnsupportedUnflushedState,
|
||||
RangeFromAfterTo,
|
||||
DifferentCompressionMode,
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
@@ -46,7 +47,10 @@ impl fmt::Display for Error {
|
||||
match self {
|
||||
Error::WrongEndian => write!(f, "Wrong endian"),
|
||||
Error::DifferentVersion { found, expected } => {
|
||||
write!(f, "Different version; found: {found:?}, expected: {expected:?}")
|
||||
write!(
|
||||
f,
|
||||
"Different version; found: {found:?}, expected: {expected:?}"
|
||||
)
|
||||
}
|
||||
Error::MmapsVecIsTooSmall => write!(f, "Mmaps vec is too small"),
|
||||
Error::IO(error) => Debug::fmt(&error, f),
|
||||
@@ -56,10 +60,14 @@ impl fmt::Display for Error {
|
||||
Error::ExpectVecToHaveIndex => write!(f, "Expect vec to have index"),
|
||||
Error::FailedKeyTryIntoUsize => write!(f, "Failed to convert key to usize"),
|
||||
Error::UnsupportedUnflushedState => {
|
||||
write!(f, "Unsupported unflush state, please flush before using this function")
|
||||
write!(
|
||||
f,
|
||||
"Unsupported unflush state, please flush before using this function"
|
||||
)
|
||||
}
|
||||
Error::ZeroCopyError => write!(f, "Zero copy convert error"),
|
||||
Error::RangeFromAfterTo => write!(f, "Range, from is after to"),
|
||||
Error::DifferentCompressionMode => write!(f, "Different compression mode chosen"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,15 +7,15 @@ use std::{
|
||||
cmp::Ordering,
|
||||
fmt::Debug,
|
||||
fs::{self, File, OpenOptions},
|
||||
io::{self, Read, Seek, SeekFrom, Write},
|
||||
io::{self, Seek, SeekFrom, Write},
|
||||
marker::PhantomData,
|
||||
mem,
|
||||
ops::Range,
|
||||
path::{Path, PathBuf},
|
||||
sync::OnceLock,
|
||||
};
|
||||
|
||||
pub use memmap2;
|
||||
use memmap2::Mmap;
|
||||
use rayon::prelude::*;
|
||||
pub use zerocopy;
|
||||
|
||||
@@ -26,18 +26,21 @@ mod traits;
|
||||
pub use enums::*;
|
||||
pub use structs::*;
|
||||
pub use traits::*;
|
||||
use zstd::DEFAULT_COMPRESSION_LEVEL;
|
||||
|
||||
/// In bytes
|
||||
const MAX_PAGE_SIZE: usize = 4 * 4096;
|
||||
const ONE_MIB: usize = 1024 * 1024;
|
||||
const ONE_KIB: usize = 1024;
|
||||
const MAX_PAGE_SIZE: usize = 16 * ONE_KIB;
|
||||
const ONE_MIB: usize = ONE_KIB * ONE_KIB;
|
||||
const MAX_CACHE_SIZE: usize = 100 * ONE_MIB;
|
||||
|
||||
type SmallCache<T> = Option<(usize, Box<[T]>)>;
|
||||
|
||||
///
|
||||
/// A very small, fast, efficient and simple storable Vec
|
||||
///
|
||||
/// Reads (imports of Mmap) are lazy
|
||||
///
|
||||
/// Stores only raw data without any overhead, and doesn't even have a header (TODO: which it should, at least to Err if wrong endian)
|
||||
/// Stores only raw data without any overhead, and doesn't even have a header
|
||||
///
|
||||
/// The file isn't portable for speed reasons (TODO: but could be ?)
|
||||
///
|
||||
@@ -47,12 +50,20 @@ const MAX_CACHE_SIZE: usize = 100 * ONE_MIB;
|
||||
pub struct StorableVec<I, T> {
|
||||
version: Version,
|
||||
pathbuf: PathBuf,
|
||||
file: File,
|
||||
/// **Number of values NOT number of bytes**
|
||||
file_len: usize,
|
||||
file_position: u64,
|
||||
buf: Vec<u8>,
|
||||
mmaps: Vec<OnceLock<Box<memmap2::Mmap>>>, // Boxed Mmap to reduce the size of the Lock (from 24 to 16)
|
||||
stored_len: Length,
|
||||
compressed: Compressed,
|
||||
|
||||
// Compressed
|
||||
decoded_pages: Option<Vec<OnceLock<Box<[T]>>>>,
|
||||
decoded_page: SmallCache<T>,
|
||||
pages: CompressedPagesMetadata,
|
||||
|
||||
// Raw
|
||||
// raw_pages: Vec<OnceLock<Box<memmap2::Mmap>>>,
|
||||
// raw_page: memmap2::Mmap,
|
||||
// file: File,
|
||||
// file_position: u64,
|
||||
// buf: Vec<u8>,
|
||||
pushed: Vec<T>,
|
||||
phantom: PhantomData<I>,
|
||||
}
|
||||
@@ -68,51 +79,49 @@ where
|
||||
pub const PAGE_SIZE: usize = Self::PER_PAGE * Self::SIZE_OF_T;
|
||||
pub const CACHE_LENGTH: usize = MAX_CACHE_SIZE / Self::PAGE_SIZE;
|
||||
|
||||
/// Same as import but will remove the folder if the endian or the version is different, so be careful !
|
||||
pub fn forced_import(path: &Path, version: Version) -> Result<Self> {
|
||||
let res = Self::import(path, version);
|
||||
/// Same as import but will reset the folder under certain errors, so be careful !
|
||||
pub fn forced_import(path: &Path, version: Version, compressed: Compressed) -> Result<Self> {
|
||||
let res = Self::import(path, version, compressed);
|
||||
match res {
|
||||
Err(Error::WrongEndian)
|
||||
| Err(Error::DifferentCompressionMode)
|
||||
| Err(Error::DifferentVersion {
|
||||
found: _,
|
||||
expected: _,
|
||||
}) => {
|
||||
fs::remove_dir_all(path)?;
|
||||
Self::import(path, version)
|
||||
Self::import(path, version, compressed)
|
||||
}
|
||||
_ => res,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn import(path: &Path, version: Version) -> Result<Self> {
|
||||
pub fn import(path: &Path, version: Version, compressed: Compressed) -> Result<Self> {
|
||||
fs::create_dir_all(path)?;
|
||||
|
||||
let version_path = Self::path_version_(path);
|
||||
version.validate(version_path.as_ref())?;
|
||||
version.write(version_path.as_ref())?;
|
||||
|
||||
let file = Self::open_file_(&Self::path_vec_(path))?;
|
||||
let compressed_path = Self::path_compressed_(path);
|
||||
compressed.validate(compressed_path.as_ref())?;
|
||||
compressed.write(compressed_path.as_ref())?;
|
||||
|
||||
let mut slf = Self {
|
||||
let stored_len = Length::try_from(Self::path_length_(path).as_path())?;
|
||||
|
||||
let pages = CompressedPagesMetadata::read(Self::path_pages_(path).as_path())?;
|
||||
|
||||
Ok(Self {
|
||||
version,
|
||||
compressed,
|
||||
pathbuf: path.to_owned(),
|
||||
file_position: 0,
|
||||
file_len: Self::read_disk_len_(&file)?,
|
||||
file,
|
||||
buf: Self::create_buffer(),
|
||||
mmaps: vec![],
|
||||
stored_len,
|
||||
decoded_pages: None,
|
||||
pushed: vec![],
|
||||
pages,
|
||||
decoded_page: None,
|
||||
phantom: PhantomData,
|
||||
};
|
||||
|
||||
slf.reset_file_metadata()?;
|
||||
|
||||
Ok(slf)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn create_buffer() -> Vec<u8> {
|
||||
vec![0; Self::SIZE_OF_T]
|
||||
})
|
||||
}
|
||||
|
||||
fn open_file(&self) -> io::Result<File> {
|
||||
@@ -127,57 +136,91 @@ where
|
||||
.open(path)
|
||||
}
|
||||
|
||||
pub fn open_then_read(&self, index: I) -> Result<T> {
|
||||
#[inline(always)]
|
||||
fn mmap(&self, page: &CompressedPageMetadata) -> io::Result<Mmap> {
|
||||
let len = page.bytes_len as usize;
|
||||
let offset = page.start;
|
||||
let file = self.open_file()?;
|
||||
|
||||
Ok(unsafe {
|
||||
memmap2::MmapOptions::new()
|
||||
.len(len)
|
||||
.offset(offset)
|
||||
.map(&file)?
|
||||
})
|
||||
}
|
||||
|
||||
fn decode(&self, page_index: usize) -> Result<Box<[T]>> {
|
||||
if self.pages.len() <= page_index {
|
||||
return Err(Error::ExpectVecToHaveIndex);
|
||||
}
|
||||
|
||||
let page = self.pages.get(page_index).unwrap();
|
||||
|
||||
let mmap = self.mmap(page)?;
|
||||
|
||||
let decoded = zstd::decode_all(&mmap[..]);
|
||||
|
||||
if decoded.is_err() {
|
||||
dbg!((page, page_index, &mmap[..], &mmap.len(), &decoded));
|
||||
}
|
||||
|
||||
Ok(decoded?
|
||||
.chunks(Self::SIZE_OF_T)
|
||||
.map(|slice| T::try_read_from_bytes(slice).unwrap())
|
||||
.collect::<Vec<_>>()
|
||||
.into_boxed_slice())
|
||||
}
|
||||
|
||||
pub fn open_then_read(&self, index: I) -> Result<Option<T>> {
|
||||
self.open_then_read_(Self::i_to_usize(index)?)
|
||||
}
|
||||
fn open_then_read_(&self, index: usize) -> Result<T> {
|
||||
let mut file = self.open_file()?;
|
||||
Self::seek_(&mut file, Self::index_to_byte_index(index))?;
|
||||
let mut buf = Self::create_buffer();
|
||||
Self::read_exact(&mut file, &mut buf).map(|v| v.to_owned())
|
||||
fn open_then_read_(&self, index: usize) -> Result<Option<T>> {
|
||||
Ok(self
|
||||
.decode(Self::index_to_page_index(index))?
|
||||
.get(Self::index_to_decoded_index(index))
|
||||
.cloned())
|
||||
}
|
||||
|
||||
fn read_disk_len(&self) -> io::Result<usize> {
|
||||
Self::read_disk_len_(&self.file)
|
||||
}
|
||||
fn read_disk_len_(file: &File) -> io::Result<usize> {
|
||||
Ok(Self::byte_index_to_index(file.metadata()?.len() as usize))
|
||||
pub fn init_big_cache(&mut self) -> io::Result<()> {
|
||||
self.decoded_pages.replace(vec![]);
|
||||
self.reset_big_cache()
|
||||
}
|
||||
|
||||
fn reset_file_metadata(&mut self) -> io::Result<()> {
|
||||
self.file_len = self.read_disk_len()?;
|
||||
self.file_position = self.file.seek(SeekFrom::Start(0))?;
|
||||
Ok(())
|
||||
}
|
||||
fn reset_big_cache(&mut self) -> io::Result<()> {
|
||||
if self.decoded_pages.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
pub fn reset_mmaps(&mut self) -> io::Result<()> {
|
||||
self.mmaps.par_iter_mut().for_each(|lock| {
|
||||
let big_cache = self.decoded_pages.as_mut().unwrap();
|
||||
|
||||
big_cache.par_iter_mut().for_each(|lock| {
|
||||
lock.take();
|
||||
});
|
||||
|
||||
let len = (self.file_len as f64 / Self::PER_PAGE as f64).ceil() as usize;
|
||||
let len = (*self.stored_len as f64 / Self::PER_PAGE as f64).ceil() as usize;
|
||||
let len = Self::CACHE_LENGTH.min(len);
|
||||
|
||||
if self.mmaps.len() != len {
|
||||
self.mmaps.resize_with(len, Default::default);
|
||||
if big_cache.len() != len {
|
||||
big_cache.resize_with(len, Default::default);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn seek(&mut self, byte_index: u64) -> io::Result<u64> {
|
||||
self.file.seek(SeekFrom::Start(byte_index))
|
||||
}
|
||||
#[inline]
|
||||
fn seek_(file: &mut File, byte_index: u64) -> io::Result<u64> {
|
||||
file.seek(SeekFrom::Start(byte_index))
|
||||
fn reset_caches(&mut self) -> io::Result<()> {
|
||||
self.decoded_page.take();
|
||||
self.reset_big_cache()
|
||||
}
|
||||
|
||||
fn read_exact<'a>(file: &'a mut File, buf: &'a mut [u8]) -> Result<&'a T> {
|
||||
file.read_exact(buf)?;
|
||||
let v = T::try_ref_from_bytes(&buf[..])?;
|
||||
Ok(v)
|
||||
#[inline(always)]
|
||||
fn index_to_page_index(index: usize) -> usize {
|
||||
index / Self::PER_PAGE
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn index_to_decoded_index(index: usize) -> usize {
|
||||
index % Self::PER_PAGE
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -196,46 +239,28 @@ where
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
|
||||
// if !self.updated.is_empty() {
|
||||
// if let Some(v) = self.updated.get(&index) {
|
||||
// return Ok(Some(v));
|
||||
// }
|
||||
// }
|
||||
if let Some(big_cache) = self
|
||||
.decoded_pages
|
||||
.as_ref()
|
||||
.and_then(|v| if v.is_empty() { None } else { Some(v) })
|
||||
{
|
||||
let page_index = Self::index_to_page_index(index);
|
||||
let last_index = *self.stored_len - 1;
|
||||
let max_page_index = last_index / Self::PER_PAGE;
|
||||
|
||||
let page_index = index / Self::PER_PAGE;
|
||||
let last_index = self.file_len - 1;
|
||||
let max_page_index = last_index / Self::PER_PAGE;
|
||||
let min_page_index = (max_page_index + 1) - self.mmaps.len();
|
||||
let min_page_index = (max_page_index + 1) - big_cache.len();
|
||||
|
||||
// let min_open_page = self.min.load(AtomicOrdering::SeqCst);
|
||||
|
||||
// if self.min.load(AtomicOrdering::SeqCst) {
|
||||
// self.min.set(value)
|
||||
// }
|
||||
|
||||
if !self.mmaps.is_empty() && page_index >= min_page_index {
|
||||
let mmap = &**self
|
||||
.mmaps
|
||||
.get(page_index - min_page_index)
|
||||
.ok_or(Error::MmapsVecIsTooSmall)?
|
||||
.get_or_init(|| {
|
||||
Box::new(unsafe {
|
||||
memmap2::MmapOptions::new()
|
||||
.len(Self::PAGE_SIZE)
|
||||
.offset((page_index * Self::PAGE_SIZE) as u64)
|
||||
.map(&self.file)
|
||||
.unwrap()
|
||||
})
|
||||
});
|
||||
|
||||
let range = Self::index_to_byte_range(index);
|
||||
let slice = &mmap[range];
|
||||
return Ok(Some(Value::Ref(T::try_ref_from_bytes(slice)?)));
|
||||
if page_index >= min_page_index {
|
||||
return Ok(big_cache
|
||||
.get(page_index - min_page_index)
|
||||
.ok_or(Error::MmapsVecIsTooSmall)?
|
||||
.get_or_init(|| self.decode(page_index).unwrap())
|
||||
.get(Self::index_to_decoded_index(index))
|
||||
.map(|v| Value::Ref(v)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(self
|
||||
.open_then_read_(index)
|
||||
.map_or(None, |v| Some(Value::Owned(v))))
|
||||
Ok(self.open_then_read_(index)?.map(|v| Value::Owned(v)))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -255,17 +280,19 @@ where
|
||||
Err(error) => return Err(error),
|
||||
}
|
||||
|
||||
let byte_index = Self::index_to_byte_index(index);
|
||||
if self.file_position != byte_index {
|
||||
self.file_position = self.seek(Self::index_to_byte_index(index))?;
|
||||
}
|
||||
match Self::read_exact(&mut self.file, &mut self.buf) {
|
||||
Ok(value) => {
|
||||
self.file_position += Self::SIZE_OF_T as u64;
|
||||
Ok(Some(value))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
let page_index = Self::index_to_page_index(index);
|
||||
|
||||
if self.decoded_page.as_ref().is_none_or(|b| b.0 != page_index) {
|
||||
self.decoded_page
|
||||
.replace((page_index, self.decode(page_index)?));
|
||||
}
|
||||
|
||||
Ok(self
|
||||
.decoded_page
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.1
|
||||
.get(Self::index_to_decoded_index(index)))
|
||||
}
|
||||
|
||||
pub fn read_last(&mut self) -> Result<Option<&T>> {
|
||||
@@ -278,33 +305,44 @@ where
|
||||
|
||||
pub fn iter<F>(&mut self, f: F) -> Result<()>
|
||||
where
|
||||
F: FnMut((I, &T, &mut Self)) -> Result<()>,
|
||||
F: FnMut((I, &T)) -> Result<()>,
|
||||
{
|
||||
self.iter_from(I::default(), f)
|
||||
}
|
||||
|
||||
pub fn iter_from<F>(&mut self, mut index: I, mut f: F) -> Result<()>
|
||||
where
|
||||
F: FnMut((I, &T, &mut Self)) -> Result<()>,
|
||||
F: FnMut((I, &T)) -> Result<()>,
|
||||
{
|
||||
let mut file = self.open_file()?;
|
||||
if !self.pushed.is_empty() {
|
||||
return Err(Error::UnsupportedUnflushedState);
|
||||
}
|
||||
|
||||
let disk_len = I::from(Self::read_disk_len_(&file)?);
|
||||
let stored_len = I::from(*self.stored_len);
|
||||
|
||||
Self::seek_(
|
||||
&mut file,
|
||||
Self::index_to_byte_index(Self::i_to_usize(index)?),
|
||||
)?;
|
||||
|
||||
let mut buf = Self::create_buffer();
|
||||
|
||||
while index < disk_len {
|
||||
f((index, Self::read_exact(&mut file, &mut buf)?, self))?;
|
||||
while index < stored_len {
|
||||
let v = self.read(index)?.unwrap();
|
||||
f((index, v))?;
|
||||
index = index + 1;
|
||||
}
|
||||
|
||||
if self.pushed_len() != 0 {
|
||||
unreachable!();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn iter_from_cloned<F>(&mut self, mut index: I, mut f: F) -> Result<()>
|
||||
where
|
||||
F: FnMut((I, T, &mut Self)) -> Result<()>,
|
||||
{
|
||||
if !self.pushed.is_empty() {
|
||||
return Err(Error::UnsupportedUnflushedState);
|
||||
}
|
||||
|
||||
let stored_len = I::from(*self.stored_len);
|
||||
|
||||
while index < stored_len {
|
||||
let v = self.read(index)?.unwrap().clone();
|
||||
f((index, v, self))?;
|
||||
index = index + 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -315,9 +353,7 @@ where
|
||||
return Err(Error::UnsupportedUnflushedState);
|
||||
}
|
||||
|
||||
let mut file = self.open_file()?;
|
||||
|
||||
let len = Self::read_disk_len_(&file)?;
|
||||
let len = *self.stored_len;
|
||||
|
||||
let from = from.map_or(0, |from| {
|
||||
if from >= 0 {
|
||||
@@ -339,13 +375,26 @@ where
|
||||
return Err(Error::RangeFromAfterTo);
|
||||
}
|
||||
|
||||
Self::seek_(&mut file, Self::index_to_byte_index(from))?;
|
||||
let mut small_cache: SmallCache<T> = None;
|
||||
|
||||
let mut buf = Self::create_buffer();
|
||||
let values = (from..=to)
|
||||
.flat_map(|index| {
|
||||
let page_index = Self::index_to_page_index(index);
|
||||
|
||||
Ok((from..=to)
|
||||
.flat_map(|_| Self::read_exact(&mut file, &mut buf).map(|v| v.to_owned()))
|
||||
.collect::<Vec<_>>())
|
||||
if small_cache.as_ref().is_none_or(|b| b.0 != page_index) {
|
||||
small_cache.replace((page_index, self.decode(page_index).unwrap()));
|
||||
}
|
||||
|
||||
small_cache
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.1
|
||||
.get(Self::index_to_decoded_index(index))
|
||||
.cloned()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok(values)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -374,7 +423,7 @@ where
|
||||
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.file_len + self.pushed_len()
|
||||
*self.stored_len + self.pushed_len()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -406,41 +455,166 @@ where
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut bytes: Vec<u8> = vec![0; self.pushed_len() * Self::SIZE_OF_T];
|
||||
let mut file = self.open_file()?;
|
||||
|
||||
let (starting_page_index, values) = if *self.stored_len % Self::PER_PAGE != 0 {
|
||||
if self.pages.is_empty() {
|
||||
unreachable!()
|
||||
}
|
||||
|
||||
let last_page_index = self.pages.len() - 1;
|
||||
|
||||
let values = if let Some(values) = self.decoded_pages.as_mut().and_then(|big_cache| {
|
||||
big_cache
|
||||
.last_mut()
|
||||
.and_then(|lock| lock.take())
|
||||
.map(|b| b.into_vec())
|
||||
}) {
|
||||
values
|
||||
} else if self
|
||||
.decoded_page
|
||||
.as_ref()
|
||||
.is_some_and(|(page_index, _)| *page_index == last_page_index)
|
||||
{
|
||||
self.decoded_page.take().unwrap().1.into_vec()
|
||||
} else {
|
||||
self.decode(last_page_index)
|
||||
.inspect_err(|_| {
|
||||
dbg!(last_page_index, &self.pages);
|
||||
})
|
||||
.unwrap()
|
||||
.into_vec()
|
||||
};
|
||||
|
||||
let file_len = self.pages.pop().unwrap().start;
|
||||
|
||||
Self::file_set_len(&mut file, file_len)?;
|
||||
|
||||
(last_page_index, values)
|
||||
} else {
|
||||
(self.pages.len(), vec![])
|
||||
};
|
||||
|
||||
self.stored_len += self.pushed_len();
|
||||
|
||||
let compressed = values
|
||||
.into_par_iter()
|
||||
.chain(mem::take(&mut self.pushed).into_par_iter())
|
||||
.chunks(Self::PER_PAGE)
|
||||
.map(|chunk| (Self::compress_chunk(&chunk), chunk.len()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
compressed
|
||||
.iter()
|
||||
.enumerate()
|
||||
.for_each(|(i, (compressed_bytes, values_len))| {
|
||||
let page_index = starting_page_index + i;
|
||||
|
||||
let start = if page_index != 0 {
|
||||
let prev = self.pages.get(page_index - 1).unwrap();
|
||||
prev.start + prev.bytes_len as u64
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
let bytes_len = compressed_bytes.len() as u32;
|
||||
let values_len = *values_len as u32;
|
||||
|
||||
let page = CompressedPageMetadata::new(start, bytes_len, values_len);
|
||||
|
||||
self.pages.push(page_index, page);
|
||||
});
|
||||
|
||||
let compressed = compressed
|
||||
.into_iter()
|
||||
.flat_map(|(v, _)| v)
|
||||
.collect::<Box<_>>();
|
||||
|
||||
self.pages.write()?;
|
||||
file.write_all(&compressed)?;
|
||||
self.reset_caches()?;
|
||||
|
||||
self.write_length()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compress_chunk(chunk: &[T]) -> Box<[u8]> {
|
||||
if chunk.len() > Self::PER_PAGE {
|
||||
panic!();
|
||||
}
|
||||
|
||||
let mut bytes: Vec<u8> = vec![0; chunk.len() * Self::SIZE_OF_T];
|
||||
|
||||
let unsafe_bytes = UnsafeSlice::new(&mut bytes);
|
||||
|
||||
mem::take(&mut self.pushed)
|
||||
chunk
|
||||
.into_par_iter()
|
||||
.enumerate()
|
||||
.for_each(|(i, v)| unsafe_bytes.copy_slice(i * Self::SIZE_OF_T, v.as_bytes()));
|
||||
|
||||
self.file.write_all(&bytes)?;
|
||||
zstd::encode_all(bytes.as_slice(), DEFAULT_COMPRESSION_LEVEL)
|
||||
.unwrap()
|
||||
.into_boxed_slice()
|
||||
}
|
||||
|
||||
self.reset_file_metadata()?;
|
||||
pub fn truncate_if_needed(&mut self, index: I) -> Result<()> {
|
||||
let index = Self::i_to_usize(index)?;
|
||||
|
||||
if index >= *self.stored_len {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if index == 0 {
|
||||
self.reset_file()?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let page_index = Self::index_to_page_index(index);
|
||||
|
||||
let values = self.decode(page_index)?;
|
||||
let mut page = self.pages.truncate(page_index).unwrap();
|
||||
|
||||
let mut file = self.open_file()?;
|
||||
Self::file_set_len(&mut file, page.start)?;
|
||||
|
||||
let decoded_index = Self::index_to_decoded_index(index);
|
||||
|
||||
if decoded_index != 0 {
|
||||
let chunk = &values[..decoded_index];
|
||||
|
||||
let compressed = Self::compress_chunk(chunk);
|
||||
|
||||
page.values_len = chunk.len() as u32;
|
||||
page.bytes_len = compressed.len() as u32;
|
||||
|
||||
file.write_all(&compressed)?;
|
||||
|
||||
self.pages.push(page_index, page);
|
||||
}
|
||||
|
||||
self.pages.write()?;
|
||||
|
||||
*self.stored_len = index;
|
||||
self.write_length()?;
|
||||
|
||||
self.reset_caches()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn reset_file(&mut self) -> Result<()> {
|
||||
self.truncate_if_needed(I::from(0))?;
|
||||
let mut file = self.open_file()?;
|
||||
Self::file_set_len(&mut file, 0)?;
|
||||
*self.stored_len = 0;
|
||||
self.reset_caches()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn truncate_if_needed(&mut self, index: I) -> Result<Option<T>> {
|
||||
let index = Self::i_to_usize(index)?;
|
||||
|
||||
if index >= self.file_len {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let value_at_index = self.open_then_read_(index).ok();
|
||||
|
||||
self.file.set_len(Self::index_to_byte_index(index))?;
|
||||
|
||||
self.reset_file_metadata()?;
|
||||
|
||||
Ok(value_at_index)
|
||||
fn file_set_len(file: &mut File, len: u64) -> io::Result<()> {
|
||||
file.set_len(len)?;
|
||||
file.seek(SeekFrom::End(0))?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -449,24 +623,11 @@ where
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn byte_index_to_index(byte_index: usize) -> usize {
|
||||
byte_index / Self::SIZE_OF_T
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn index_to_byte_index(index: usize) -> u64 {
|
||||
(index * Self::SIZE_OF_T) as u64
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn index_to_byte_range(index: usize) -> Range<usize> {
|
||||
let index = (Self::index_to_byte_index(index) as usize) % Self::PAGE_SIZE;
|
||||
index..(index + Self::SIZE_OF_T)
|
||||
}
|
||||
|
||||
fn index_to_pushed_index(&self, index: usize) -> Result<Option<usize>> {
|
||||
if index >= self.file_len {
|
||||
let index = index - self.file_len;
|
||||
let file_len = *self.stored_len;
|
||||
|
||||
if index >= file_len {
|
||||
let index = index - file_len;
|
||||
if index >= self.pushed.len() {
|
||||
Err(Error::IndexTooHigh)
|
||||
} else {
|
||||
@@ -497,7 +658,24 @@ where
|
||||
}
|
||||
#[inline]
|
||||
fn path_vec_(path: &Path) -> PathBuf {
|
||||
path.join("vec")
|
||||
path.join("vec.zstd")
|
||||
}
|
||||
|
||||
fn write_length(&self) -> io::Result<()> {
|
||||
self.stored_len.write(&self.path_length())
|
||||
}
|
||||
#[inline]
|
||||
fn path_length(&self) -> PathBuf {
|
||||
Self::path_length_(&self.pathbuf)
|
||||
}
|
||||
#[inline]
|
||||
fn path_length_(path: &Path) -> PathBuf {
|
||||
path.join("length")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn path_pages_(path: &Path) -> PathBuf {
|
||||
path.join("pages")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -505,6 +683,11 @@ where
|
||||
path.join("version")
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn path_compressed_(path: &Path) -> PathBuf {
|
||||
path.join("compressed")
|
||||
}
|
||||
|
||||
pub fn index_type_to_string(&self) -> &str {
|
||||
std::any::type_name::<I>()
|
||||
}
|
||||
@@ -520,9 +703,6 @@ where
|
||||
T: StoredType,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
let path = &self.pathbuf;
|
||||
let path_version = Self::path_version_(path);
|
||||
let version = Version::try_from(path_version.as_path()).unwrap();
|
||||
Self::import(path, version).unwrap()
|
||||
Self::import(&self.pathbuf, self.version, self.compressed).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
20
crates/brk_vec/src/structs/back.rs
Normal file
20
crates/brk_vec/src/structs/back.rs
Normal file
@@ -0,0 +1,20 @@
|
||||
use std::{fs::File, sync::OnceLock};
|
||||
|
||||
use super::CompressedPagesMetadata;
|
||||
|
||||
type CompressedPage<T> = Option<(usize, Box<[T]>)>;
|
||||
|
||||
pub enum Back<T> {
|
||||
Raw {
|
||||
raw_pages: Vec<OnceLock<Box<memmap2::Mmap>>>,
|
||||
raw_page: memmap2::Mmap,
|
||||
file: File,
|
||||
file_position: u64,
|
||||
buf: Vec<u8>,
|
||||
},
|
||||
Compressed {
|
||||
decoded_pages: Option<Vec<OnceLock<Box<[T]>>>>,
|
||||
decoded_page: CompressedPage<T>,
|
||||
pages: CompressedPagesMetadata,
|
||||
},
|
||||
}
|
||||
67
crates/brk_vec/src/structs/compressed.rs
Normal file
67
crates/brk_vec/src/structs/compressed.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use std::{
|
||||
fs,
|
||||
io::{self},
|
||||
ops::Deref,
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use crate::{Error, Result};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Compressed(bool);
|
||||
|
||||
impl Compressed {
|
||||
pub const YES: Self = Self(true);
|
||||
pub const NO: Self = Self(false);
|
||||
|
||||
pub fn write(&self, path: &Path) -> Result<(), io::Error> {
|
||||
fs::write(path, self.as_bytes())
|
||||
}
|
||||
|
||||
fn as_bytes(&self) -> Vec<u8> {
|
||||
if self.0 { vec![1] } else { vec![0] }
|
||||
}
|
||||
|
||||
fn from_bytes(bytes: &[u8]) -> Self {
|
||||
if bytes.len() != 1 {
|
||||
panic!();
|
||||
}
|
||||
if bytes[0] == 1 {
|
||||
Self(true)
|
||||
} else if bytes[0] == 0 {
|
||||
Self(false)
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn validate(&self, path: &Path) -> Result<()> {
|
||||
if let Ok(prev_compressed) = Compressed::try_from(path) {
|
||||
if prev_compressed != *self {
|
||||
return Err(Error::DifferentCompressionMode);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Path> for Compressed {
|
||||
type Error = Error;
|
||||
fn try_from(value: &Path) -> Result<Self, Self::Error> {
|
||||
Ok(Self::from_bytes(&fs::read(value)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bool> for Compressed {
|
||||
fn from(value: bool) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Compressed {
|
||||
type Target = bool;
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
71
crates/brk_vec/src/structs/length.rs
Normal file
71
crates/brk_vec/src/structs/length.rs
Normal file
@@ -0,0 +1,71 @@
|
||||
use std::{
|
||||
fs,
|
||||
io::{self, Read},
|
||||
ops::{AddAssign, Deref, DerefMut},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
|
||||
|
||||
use crate::{Error, Result};
|
||||
|
||||
#[derive(
|
||||
Debug,
|
||||
Default,
|
||||
Clone,
|
||||
Copy,
|
||||
PartialEq,
|
||||
Eq,
|
||||
PartialOrd,
|
||||
Ord,
|
||||
FromBytes,
|
||||
IntoBytes,
|
||||
Immutable,
|
||||
KnownLayout,
|
||||
)]
|
||||
pub struct Length(usize);
|
||||
|
||||
impl Length {
|
||||
pub fn write(&self, path: &Path) -> Result<(), io::Error> {
|
||||
fs::write(path, self.as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<usize> for Length {
|
||||
fn from(value: usize) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Length {
|
||||
type Target = usize;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for Length {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&Path> for Length {
|
||||
type Error = Error;
|
||||
fn try_from(value: &Path) -> Result<Self, Self::Error> {
|
||||
let mut buf = [0; 8];
|
||||
if let Ok(bytes) = fs::read(value) {
|
||||
bytes.as_slice().read_exact(&mut buf)?;
|
||||
Ok(*(Self::ref_from_bytes(&buf)?))
|
||||
} else {
|
||||
Ok(Self::default())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign<usize> for Length {
|
||||
fn add_assign(&mut self, rhs: usize) {
|
||||
self.0 += rhs;
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,15 @@
|
||||
mod back;
|
||||
mod compressed;
|
||||
mod length;
|
||||
mod page;
|
||||
mod pages;
|
||||
mod unsafe_slice;
|
||||
mod version;
|
||||
|
||||
pub use back::*;
|
||||
pub use compressed::*;
|
||||
pub use length::*;
|
||||
pub use page::*;
|
||||
pub use pages::*;
|
||||
pub use unsafe_slice::*;
|
||||
pub use version::*;
|
||||
|
||||
18
crates/brk_vec/src/structs/page.rs
Normal file
18
crates/brk_vec/src/structs/page.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
|
||||
|
||||
#[derive(Debug, Clone, IntoBytes, Immutable, FromBytes, KnownLayout)]
|
||||
pub struct CompressedPageMetadata {
|
||||
pub start: u64,
|
||||
pub bytes_len: u32,
|
||||
pub values_len: u32,
|
||||
}
|
||||
|
||||
impl CompressedPageMetadata {
|
||||
pub fn new(start: u64, bytes_len: u32, values_len: u32) -> Self {
|
||||
Self {
|
||||
start,
|
||||
bytes_len,
|
||||
values_len,
|
||||
}
|
||||
}
|
||||
}
|
||||
118
crates/brk_vec/src/structs/pages.rs
Normal file
118
crates/brk_vec/src/structs/pages.rs
Normal file
@@ -0,0 +1,118 @@
|
||||
use std::{
|
||||
fs::{self, OpenOptions},
|
||||
io::{self, Seek, SeekFrom, Write},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use rayon::prelude::*;
|
||||
use zerocopy::{IntoBytes, TryFromBytes};
|
||||
|
||||
use crate::Result;
|
||||
|
||||
use super::{CompressedPageMetadata, UnsafeSlice};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompressedPagesMetadata {
|
||||
vec: Vec<CompressedPageMetadata>,
|
||||
change_at: Option<usize>,
|
||||
path: PathBuf,
|
||||
}
|
||||
|
||||
impl CompressedPagesMetadata {
|
||||
const PAGE_SIZE: usize = size_of::<CompressedPageMetadata>();
|
||||
|
||||
pub fn read(path: &Path) -> Result<CompressedPagesMetadata> {
|
||||
let slf = Self {
|
||||
vec: fs::read(path)
|
||||
.unwrap_or_default()
|
||||
.chunks(Self::PAGE_SIZE)
|
||||
.map(|bytes| {
|
||||
if bytes.len() != Self::PAGE_SIZE {
|
||||
panic!()
|
||||
}
|
||||
CompressedPageMetadata::try_read_from_bytes(bytes).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
path: path.to_owned(),
|
||||
change_at: None,
|
||||
};
|
||||
|
||||
Ok(slf)
|
||||
}
|
||||
|
||||
pub fn write(&mut self) -> io::Result<()> {
|
||||
if self.change_at.is_none() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let change_at = self.change_at.take().unwrap();
|
||||
|
||||
let len = (self.vec.len() - change_at) * Self::PAGE_SIZE;
|
||||
|
||||
let mut bytes: Vec<u8> = vec![0; len];
|
||||
|
||||
let unsafe_bytes = UnsafeSlice::new(&mut bytes);
|
||||
|
||||
self.vec[change_at..]
|
||||
.par_iter()
|
||||
.enumerate()
|
||||
.for_each(|(i, v)| unsafe_bytes.copy_slice(i * Self::PAGE_SIZE, v.as_bytes()));
|
||||
|
||||
let mut file = OpenOptions::new()
|
||||
.read(true)
|
||||
.create(true)
|
||||
.truncate(false)
|
||||
.append(true)
|
||||
.open(&self.path)?;
|
||||
|
||||
file.set_len((change_at * Self::PAGE_SIZE) as u64)?;
|
||||
file.seek(SeekFrom::End(0))?;
|
||||
|
||||
file.write_all(&bytes)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.vec.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn get(&self, page_index: usize) -> Option<&CompressedPageMetadata> {
|
||||
self.vec.get(page_index)
|
||||
}
|
||||
|
||||
pub fn last(&self) -> Option<&CompressedPageMetadata> {
|
||||
self.vec.last()
|
||||
}
|
||||
|
||||
pub fn pop(&mut self) -> Option<CompressedPageMetadata> {
|
||||
self.vec.pop()
|
||||
}
|
||||
|
||||
pub fn push(&mut self, page_index: usize, page: CompressedPageMetadata) {
|
||||
if page_index != self.vec.len() {
|
||||
panic!();
|
||||
}
|
||||
|
||||
self.set_changed_at(page_index);
|
||||
|
||||
self.vec.push(page);
|
||||
}
|
||||
|
||||
fn set_changed_at(&mut self, page_index: usize) {
|
||||
if self.change_at.is_none_or(|pi| pi > page_index) {
|
||||
self.change_at.replace(page_index);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn truncate(&mut self, page_index: usize) -> Option<CompressedPageMetadata> {
|
||||
let page = self.get(page_index).cloned();
|
||||
self.vec.truncate(page_index);
|
||||
self.set_changed_at(page_index);
|
||||
page
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user