diff --git a/README.md b/README.md index 9c042bf57..072318c2c 100644 --- a/README.md +++ b/README.md @@ -77,7 +77,7 @@ In contrast, existing alternatives tend to be either [very costly](https://studi - [`brk_parser`](https://crates.io/crates/brk_parser): A very fast Bitcoin Core block parser and iterator built on top of bitcoin-rust - [`brk_query`](https://crates.io/crates/brk_query): A library that finds requested datasets. - [`brk_server`](https://crates.io/crates/brk_server): A server that serves Bitcoin data and swappable front-ends, built on top of `brk_indexer`, `brk_fetcher` and `brk_computer` -- [`brk_vec`](https://crates.io/crates/brk_vec): A very small, fast, efficient and simple storable Vec. +- [`brk_vec`](https://crates.io/crates/brk_vec): A push-only, truncable, compressable, saveable Vec ## Acknowledgments diff --git a/crates/brk_cli/src/run.rs b/crates/brk_cli/src/run.rs index 83589fb53..ab10f1b99 100644 --- a/crates/brk_cli/src/run.rs +++ b/crates/brk_cli/src/run.rs @@ -65,7 +65,7 @@ pub fn run(config: RunConfig) -> color_eyre::Result<()> { let starting_indexes = indexer.index(&parser, rpc, &exit)?; - // computer.compute(&mut indexer, starting_indexes, &exit)?; + computer.compute(&mut indexer, starting_indexes, &exit)?; if let Some(delay) = config.delay() { sleep(Duration::from_secs(delay)) diff --git a/crates/brk_computer/src/storage/vecs/base.rs b/crates/brk_computer/src/storage/vecs/base.rs index d1b0158b8..8fcc8e535 100644 --- a/crates/brk_computer/src/storage/vecs/base.rs +++ b/crates/brk_computer/src/storage/vecs/base.rs @@ -12,6 +12,7 @@ use brk_vec::{ Compressed, DynamicVec, Error, GenericVec, Result, StoredIndex, StoredType, StoredVec, Value, Version, }; +use log::info; const ONE_KIB: usize = 1024; const ONE_MIB: usize = ONE_KIB * ONE_KIB; @@ -39,11 +40,11 @@ where version: Version, compressed: Compressed, ) -> brk_vec::Result { - let vec = StoredVec::forced_import(path, version, compressed)?; + let inner = StoredVec::forced_import(path, version, compressed)?; Ok(Self { computed_version: None, - inner: vec, + inner, }) } @@ -95,6 +96,13 @@ where pub fn len(&self) -> usize { self.inner.len() } + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + fn file_name(&self) -> String { + self.inner.file_name() + } pub fn vec(&self) -> &StoredVec { &self.inner @@ -135,6 +143,11 @@ where self.inner.reset()?; } version.write(path.as_ref())?; + + if self.is_empty() { + info!("Computing {}...", self.file_name()) + } + Ok(()) } diff --git a/crates/brk_computer/src/storage/vecs/transactions.rs b/crates/brk_computer/src/storage/vecs/transactions.rs index c39937cba..d4e76348d 100644 --- a/crates/brk_computer/src/storage/vecs/transactions.rs +++ b/crates/brk_computer/src/storage/vecs/transactions.rs @@ -1,6 +1,6 @@ use std::{fs, path::Path}; -use brk_core::{Sats, StoredU64, Txindex, Txinindex, Txoutindex}; +use brk_core::{Sats, StoredU8, StoredU32, StoredU64, Txindex, Txinindex, Txoutindex}; use brk_exit::Exit; use brk_indexer::Indexer; use brk_vec::{Compressed, DynamicVec, Version}; @@ -26,9 +26,12 @@ pub struct Vecs { // pub txindex_to_input_sum: ComputedVec, // pub txindex_to_output_sum: ComputedVec, // pub txindex_to_output_value: ComputedVecsFromTxindex, - // pub txindex_to_version_1: ComputedVecsFromTxindex, - // pub txindex_to_version_2: ComputedVecsFromTxindex, - // pub txindex_to_version_3: ComputedVecsFromTxindex, + pub txindex_to_v1: ComputedVec, + pub txindex_to_v2: ComputedVec, + pub txindex_to_v3: ComputedVec, + pub indexes_to_tx_v1: ComputedVecsFromHeight, + pub indexes_to_tx_v2: ComputedVecsFromHeight, + pub indexes_to_tx_v3: ComputedVecsFromHeight, // pub txinindex_to_value: ComputedVec, pub height_to_tx_count: ComputedVecsFromHeight, pub txindex_to_input_count: ComputedVecsFromTxindex, @@ -104,6 +107,45 @@ impl Vecs { Version::ZERO, compressed, )?, + txindex_to_v1: ComputedVec::forced_import( + &path.join("txindex_to_v1"), + Version::ZERO, + compressed, + )?, + txindex_to_v2: ComputedVec::forced_import( + &path.join("txindex_to_v2"), + Version::ZERO, + compressed, + )?, + txindex_to_v3: ComputedVec::forced_import( + &path.join("txindex_to_v3"), + Version::ZERO, + compressed, + )?, + indexes_to_tx_v1: ComputedVecsFromHeight::forced_import( + path, + "tx_v1", + true, + Version::ZERO, + compressed, + StorableVecGeneatorOptions::default().add_sum().add_total(), + )?, + indexes_to_tx_v2: ComputedVecsFromHeight::forced_import( + path, + "tx_v2", + true, + Version::ZERO, + compressed, + StorableVecGeneatorOptions::default().add_sum().add_total(), + )?, + indexes_to_tx_v3: ComputedVecsFromHeight::forced_import( + path, + "tx_v3", + true, + Version::ZERO, + compressed, + StorableVecGeneatorOptions::default().add_sum().add_total(), + )?, }) } @@ -176,6 +218,51 @@ impl Vecs { exit, )?; + self.txindex_to_v1.compute_transform( + starting_indexes.txindex, + indexer_vecs.txindex_to_txversion.mut_vec(), + |(i, v, ..)| (i, StoredU8::from(v)), + exit, + )?; + // self.indexes_to_tx_v1.compute_all( + // indexer, + // indexes, + // starting_indexes, + // exit, + // |vec, indexer, indexes, indexes, exit| { + // vec.compute_transform( + // starting_indexes.height, + // indexer.mut_vecs().txindex_to_txversion.mut_vec(), + // || {}, + // exit, + // )?; + // }, + // )?; + self.txindex_to_v2.compute_transform( + starting_indexes.txindex, + indexer_vecs.txindex_to_txversion.mut_vec(), + |(i, v, ..)| (i, StoredU8::from(v)), + exit, + )?; + // self.indexes_to_tx_v1.compute_rest( + // starting_indexes.txindex, + // indexer_vecs.txindex_to_txversion.mut_vec(), + // |(i, v, ..)| (i, StoredU8::from(v)), + // exit, + // )?; + self.txindex_to_v3.compute_transform( + starting_indexes.txindex, + indexer_vecs.txindex_to_txversion.mut_vec(), + |(i, v, ..)| (i, StoredU8::from(v)), + exit, + )?; + // self.indexes_to_tx_v1.compute_rest( + // starting_indexes.txindex, + // indexer_vecs.txindex_to_txversion.mut_vec(), + // |(i, v, ..)| (i, StoredU8::from(v)), + // exit, + // )?; + self.txinindex_to_value.compute_transform( starting_indexes.txinindex, indexer_vecs.txinindex_to_txoutindex.mut_vec(), @@ -210,10 +297,16 @@ impl Vecs { vec![ self.txindex_to_is_coinbase.any_vec(), self.txinindex_to_value.any_vec(), + self.txindex_to_v1.any_vec(), + self.txindex_to_v2.any_vec(), + self.txindex_to_v3.any_vec(), ], self.height_to_tx_count.any_vecs(), self.txindex_to_output_count.any_vecs(), self.txindex_to_input_count.any_vecs(), + self.indexes_to_tx_v1.any_vecs(), + self.indexes_to_tx_v2.any_vecs(), + self.indexes_to_tx_v3.any_vecs(), ] .concat() } diff --git a/crates/brk_core/src/structs/mod.rs b/crates/brk_core/src/structs/mod.rs index c0ea3a7a6..a25923be8 100644 --- a/crates/brk_core/src/structs/mod.rs +++ b/crates/brk_core/src/structs/mod.rs @@ -22,6 +22,7 @@ mod quarterindex; mod sats; mod stored_u32; mod stored_u64; +mod stored_u8; mod stored_usize; mod timestamp; mod txid; @@ -58,6 +59,7 @@ pub use monthindex::*; pub use ohlc::*; pub use quarterindex::*; pub use sats::*; +pub use stored_u8::*; pub use stored_u32::*; pub use stored_u64::*; pub use stored_usize::*; diff --git a/crates/brk_core/src/structs/stored_u8.rs b/crates/brk_core/src/structs/stored_u8.rs new file mode 100644 index 000000000..69d8fde16 --- /dev/null +++ b/crates/brk_core/src/structs/stored_u8.rs @@ -0,0 +1,79 @@ +use std::ops::{Add, Div}; + +use derive_deref::Deref; +use serde::Serialize; +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +use crate::CheckedSub; + +#[derive( + Debug, + Deref, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + FromBytes, + Immutable, + IntoBytes, + KnownLayout, + Serialize, +)] +pub struct StoredU8(u8); + +impl StoredU8 { + pub const ZERO: Self = Self(0); + + pub fn new(counter: u8) -> Self { + Self(counter) + } +} + +impl From for StoredU8 { + fn from(value: u8) -> Self { + Self(value) + } +} + +impl From for StoredU8 { + fn from(value: usize) -> Self { + Self(value as u8) + } +} + +impl CheckedSub for StoredU8 { + fn checked_sub(self, rhs: Self) -> Option { + self.0.checked_sub(rhs.0).map(Self) + } +} + +impl Div for StoredU8 { + type Output = Self; + fn div(self, rhs: usize) -> Self::Output { + Self(self.0 / rhs as u8) + } +} + +impl Add for StoredU8 { + type Output = Self; + fn add(self, rhs: Self) -> Self::Output { + Self(self.0 + rhs.0) + } +} + +impl From for StoredU8 { + fn from(value: f64) -> Self { + if value < 0.0 || value > u32::MAX as f64 { + panic!() + } + Self(value as u8) + } +} + +impl From for f64 { + fn from(value: StoredU8) -> Self { + value.0 as f64 + } +} diff --git a/crates/brk_core/src/structs/txversion.rs b/crates/brk_core/src/structs/txversion.rs index c7ca53183..9a8f91b34 100644 --- a/crates/brk_core/src/structs/txversion.rs +++ b/crates/brk_core/src/structs/txversion.rs @@ -2,6 +2,8 @@ use derive_deref::Deref; use serde::Serialize; use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; +use super::StoredU8; + #[derive(Debug, Deref, Clone, Copy, Immutable, IntoBytes, KnownLayout, FromBytes, Serialize)] pub struct TxVersion(i32); @@ -16,3 +18,9 @@ impl From for bitcoin::transaction::Version { Self(value.0) } } + +impl From for StoredU8 { + fn from(value: TxVersion) -> Self { + Self::from(value.0 as u8) + } +} diff --git a/crates/brk_indexer/README.md b/crates/brk_indexer/README.md index 3c9750f02..6c270bba0 100644 --- a/crates/brk_indexer/README.md +++ b/crates/brk_indexer/README.md @@ -58,9 +58,13 @@ Stores: `src/storage/stores/mod.rs` ## Benchmark -Indexing `0..885_835` took `11 hours 6 min 50 s` on a Macbook Pro M3 Pro with 36 GB of RAM +### Result 1 - 2025-04-10 -`footprint` report: -- Peak memory: `5115 MB` -- Memory while waiting for a new block: `890 MB` -- Reclaimable memory: `6478 MB` +- version: `v0.0.20` +- machine: `Macbook Pro M3 Pro (36GB RAM)` +- mode: `raw` +- from: `0` +- to: `891_810` +- time: `8 hours 27 min 3s` +- peak memory: `6.5GB` +- disk usage: `270 GB` diff --git a/crates/brk_indexer/examples/main.rs b/crates/brk_indexer/examples/main.rs index a835ffe49..2302f5ea2 100644 --- a/crates/brk_indexer/examples/main.rs +++ b/crates/brk_indexer/examples/main.rs @@ -24,7 +24,7 @@ fn main() -> color_eyre::Result<()> { let outputs = Path::new("../../_outputs"); - let mut indexer = Indexer::new(outputs.join("indexed").to_owned(), false, true)?; + let mut indexer = Indexer::new(outputs.join("indexed").to_owned(), true, true)?; indexer.import_stores()?; indexer.import_vecs()?; diff --git a/crates/brk_vec/Cargo.toml b/crates/brk_vec/Cargo.toml index 656e2751a..d7bc59001 100644 --- a/crates/brk_vec/Cargo.toml +++ b/crates/brk_vec/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "brk_vec" -description = "A very small, fast, efficient and simple storable Vec" +description = "A push-only, truncable, compressable, saveable Vec" keywords = ["vec", "disk", "data"] categories = ["database"] version.workspace = true diff --git a/crates/brk_vec/README.md b/crates/brk_vec/README.md index 17c7c8599..5fe4d2b9a 100644 --- a/crates/brk_vec/README.md +++ b/crates/brk_vec/README.md @@ -39,19 +39,3 @@ A `Vec` (an array) that is stored on disk and thus which can be much larger than Compared to a key/value store, the data stored is raw byte interpretation of the Vec's values without any overhead which is very efficient. Additionally it uses close to no RAM when caching isn't active and up to 100 MB when it is. Compression is also available and built on top [`zstd`](https://crates.io/crates/zstd) to save even more space (from 0 to 75%). The tradeoff being slower reading speeds, especially random reading speeds. This is due to the data being stored in compressed pages of 16 KB, which means that if you to read even one value in that page you have to uncompress the whole page. - -## Disclaimer - -Portability will depend on the type of values. - -Non bytes/slices types (`u8`, `u16`, ...) will be read as slice in an unsafe manner (using `std::slice::from_raw_parts`) and thus have the endianness of the system. On the other hand, `&[u8]` should be inserted as is. - -If portability is important to you, just create a wrapper struct which has custom `get`, `push`, ... methods and does something like: - -```rust -impl StorableVecU64 { - pub fn push(&mut self, value: u64) { - self.push(&value.to_be_bytes()) - } -} -``` diff --git a/crates/brk_vec/src/traits/dynamic.rs b/crates/brk_vec/src/traits/dynamic.rs index 853739c61..fd541b7d6 100644 --- a/crates/brk_vec/src/traits/dynamic.rs +++ b/crates/brk_vec/src/traits/dynamic.rs @@ -83,10 +83,6 @@ pub trait DynamicVec: Send + Sync { fn mmap(&self) -> &ArcSwap; - #[inline] - fn new_guard(&self) -> Guard> { - self.mmap().load() - } fn guard(&self) -> &Option>>; fn mut_guard(&mut self) -> &mut Option>>; diff --git a/crates/brk_vec/src/traits/generic.rs b/crates/brk_vec/src/traits/generic.rs index 606d78e16..ac6074517 100644 --- a/crates/brk_vec/src/traits/generic.rs +++ b/crates/brk_vec/src/traits/generic.rs @@ -73,7 +73,7 @@ where let mmap = Self::new_mmap(file)?; self.mmap().store(mmap); if self.guard().is_some() { - let guard = self.new_guard(); + let guard = self.mmap().load(); self.mut_guard().replace(guard); } else { unreachable!("This function shouldn't be called in a cloned instance") diff --git a/crates/brk_vec/src/variants/raw.rs b/crates/brk_vec/src/variants/raw.rs index d878460d2..4a58d91ae 100644 --- a/crates/brk_vec/src/variants/raw.rs +++ b/crates/brk_vec/src/variants/raw.rs @@ -102,7 +102,7 @@ where if let Some(guard) = self.guard() { guard.len() / Self::SIZE_OF_T } else { - self.new_guard().len() / Self::SIZE_OF_T + self.mmap.load().len() / Self::SIZE_OF_T } } diff --git a/websites/kibo.money/scripts/vecid-to-indexes.js b/websites/kibo.money/scripts/vecid-to-indexes.js index 6d2190ec4..3a89fe428 100644 --- a/websites/kibo.money/scripts/vecid-to-indexes.js +++ b/websites/kibo.money/scripts/vecid-to-indexes.js @@ -153,11 +153,23 @@ export function createVecIdToIndexes() { "total-output-count": [Dateindex, Height, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], "total-size": [Txindex], "total-tx-count": [Dateindex, Height, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], + "total-tx-v1": [Dateindex, Height, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], + "total-tx-v2": [Dateindex, Height, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], + "total-tx-v3": [Dateindex, Height, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], "tx-count": [Height], "tx-count-sum": [Dateindex, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], + "tx-v1": [Height], + "tx-v1-sum": [Dateindex, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], + "tx-v2": [Height], + "tx-v2-sum": [Dateindex, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], + "tx-v3": [Height], + "tx-v3-sum": [Dateindex, Weekindex, Monthindex, Quarterindex, Yearindex, Decadeindex, Difficultyepoch], txid: [Txindex], txoutindex: [Txinindex], txversion: [Txindex], + v1: [Txindex], + v2: [Txindex], + v3: [Txindex], value: [Txinindex, Txoutindex], weekindex: [Dateindex, Weekindex], yearindex: [Monthindex, Yearindex],