diff --git a/.gitignore b/.gitignore index 6ba4d7b64..ad84eaf99 100644 --- a/.gitignore +++ b/.gitignore @@ -34,8 +34,6 @@ vecid-to-indexes.d.ts # Outputs _outputs -# Python -.ropeproject # Logs .log diff --git a/Cargo.lock b/Cargo.lock index 0001d38c4..534a176e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -556,6 +556,7 @@ dependencies = [ "serde", "serde_json", "zerocopy", + "zstd", ] [[package]] @@ -1295,9 +1296,9 @@ dependencies = [ [[package]] name = "http" -version = "1.2.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" +checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" dependencies = [ "bytes", "fnv", @@ -1316,12 +1317,12 @@ dependencies = [ [[package]] name = "http-body-util" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", - "futures-util", + "futures-core", "http", "http-body", "pin-project-lite", @@ -1779,9 +1780,9 @@ checksum = "1036865bb9422d3300cf723f657c2851d0e9ab12567854b1f4eba3d77decf564" [[package]] name = "oxc" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "918d8f6deffeb380427e17bfc2d63c38f30e485af3a285faa6ca87d8c8a37a0d" +checksum = "68df167eecf9d9d0c8f5e0a7daa7cc5fb1627df45f6e285b3e6726a7325dd458" dependencies = [ "oxc_allocator", "oxc_ast", @@ -1822,11 +1823,12 @@ dependencies = [ [[package]] name = "oxc_allocator" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d92b3f2a5e042d3e7d21aab5896d3f02d657c4b3cae42aa2a5842ebd018ec194" +checksum = "80925d1f320efc034e1af4b88057c7a115d5163c73c5543000cb1d9d40097457" dependencies = [ "allocator-api2", + "assert-unchecked", "bumpalo", "hashbrown 0.15.2", "rustc-hash", @@ -1835,9 +1837,9 @@ dependencies = [ [[package]] name = "oxc_ast" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "864754f59a7965a83bf47c376f97ac1937dedd415c367f7e6b7d6499453aaad4" +checksum = "c7d38367501a804dee978311c96d6740c65369347fbab4bb9def75c98faab640" dependencies = [ "bitflags", "cow-utils", @@ -1852,9 +1854,9 @@ dependencies = [ [[package]] name = "oxc_ast_macros" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f1b369ef5a746cfd3400047f0ec972d66fe75ee5ab5fbfa8f1699e7a3b86724" +checksum = "ab500f253c07b126d10b5b707ee6f40abdb1676f31a7e896684419ce1214f333" dependencies = [ "proc-macro2", "quote", @@ -1863,9 +1865,9 @@ dependencies = [ [[package]] name = "oxc_ast_visit" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7874fe470308c6882fbeb42953896a21ea727d76165bfa80613dab0abbf72254" +checksum = "5ff7e8da546667e4f5e4e7c811518c069cdb3e9126b9496e3678401da2150947" dependencies = [ "oxc_allocator", "oxc_ast", @@ -1875,9 +1877,9 @@ dependencies = [ [[package]] name = "oxc_cfg" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff4ba4ecfa3296e937a3173f3a6b7e98446fed48e182d03f0955729f74b219c1" +checksum = "0e0f65c0e170808c197ef205a442d50a5e4992e9247d8da986c4c591a88d557c" dependencies = [ "bitflags", "itertools", @@ -1890,9 +1892,9 @@ dependencies = [ [[package]] name = "oxc_codegen" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6eb495fd82a33f1d40ea0c27d1db81772946b26a4a236de411a83511aa628a96" +checksum = "c5a6662210485fc557bd26bcbfa5441fac4d74d632297caee5e01d886bf17599" dependencies = [ "bitflags", "cow-utils", @@ -1911,9 +1913,9 @@ dependencies = [ [[package]] name = "oxc_data_structures" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3454bded31b396d75d4ebad5fbdc547ad544b47556b540925be24a874e1d87d8" +checksum = "0863880597e1723b2c51d4212294010d3e5f600d82c6c2cb8aaf7e6bd9400732" dependencies = [ "assert-unchecked", "ropey", @@ -1921,9 +1923,9 @@ dependencies = [ [[package]] name = "oxc_diagnostics" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27cd28f66608e282c1384a877505d3678e099bc3cef6856bc55e2ef60f5344b2" +checksum = "c180f07354c135fc434766b905135443d88f2d8b392c0ec22c468df19db889ce" dependencies = [ "cow-utils", "oxc-miette", @@ -1931,9 +1933,9 @@ dependencies = [ [[package]] name = "oxc_ecmascript" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b9a66f69c6dcc78bf1c164e1e98b4494ba71fbf1b17ebd8940b936902e68ec" +checksum = "1539cce5d32af3b1bb496e219272c3c85ac1719b7c2ba08b98c4688c71f2067e" dependencies = [ "cow-utils", "num-bigint", @@ -1945,9 +1947,9 @@ dependencies = [ [[package]] name = "oxc_estree" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5aa7a068d758764707466b733f92ad66bbe91d5c66ce562d868dff8a7ae72b09" +checksum = "58fda7b4e0bc5b16cc64eb19c519f13982968211bb35a7cd7ec50188f9e020cb" [[package]] name = "oxc_index" @@ -1957,9 +1959,9 @@ checksum = "2fa07b0cfa997730afed43705766ef27792873fdf5215b1391949fec678d2392" [[package]] name = "oxc_mangler" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f5ae1f1f842c714f3144036a644ee275c8a360fd50ac6f295ce52e468cee53" +checksum = "0a0c5e897ed00988f4a8c665344adef77337f2f2caf07e51559f52fa85d146be" dependencies = [ "fixedbitset", "itertools", @@ -1974,9 +1976,9 @@ dependencies = [ [[package]] name = "oxc_minifier" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e4030f603ae314c93b761753969328e96960a853f443fdeaf8c9b2273f7971e" +checksum = "eb6c12dbb296d6a07c433821c282c17d27118781751df0b6c788b0da7e17810b" dependencies = [ "cow-utils", "oxc_allocator", @@ -1996,9 +1998,9 @@ dependencies = [ [[package]] name = "oxc_parser" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71d48cff62f816c27be1912d1d173140602214c8eec38f1e98e77270f39e0ce6" +checksum = "eb503b32077fb3ee4466eae1d38e5360bc7bbe5d3e0d48b23bb7d26e556386de" dependencies = [ "assert-unchecked", "bitflags", @@ -2019,9 +2021,9 @@ dependencies = [ [[package]] name = "oxc_regular_expression" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a49d5d6ede8d8740b25743a78c2fb45859099ba89d88425b1b3d6a433dda352" +checksum = "6578e43fd0f44b6c1f00b9dc7aac0d65b8e85abb5b576aedd2f56ec37430837b" dependencies = [ "oxc_allocator", "oxc_ast_macros", @@ -2035,9 +2037,9 @@ dependencies = [ [[package]] name = "oxc_semantic" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7b84c302f11c87b330b5e3f02f3efc4cd1ea44e8553551d6e245cb161dcc6ef" +checksum = "c0dd10e2c69820e02ad4f9a55d53484bd353567c4784b72a11843a81d917ee04" dependencies = [ "assert-unchecked", "itertools", @@ -2072,9 +2074,9 @@ dependencies = [ [[package]] name = "oxc_span" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf5d0e27b520397a9d0a99dfa21d9c29fa389f34719aff30f6c5468498d603d9" +checksum = "a7492a8a4c6e018a6156b6b8a69ff5ab48cc034f895563b40902d1a0e78bef04" dependencies = [ "compact_str", "oxc-miette", @@ -2085,9 +2087,9 @@ dependencies = [ [[package]] name = "oxc_syntax" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91fdded3cdce1641b8ee44a223be03dca3def248120cbe3fe0492a14bd8a744" +checksum = "35c96f9478b96246f760b5cc54abf4044233fb6a52781e9cdc635b6dc5734752" dependencies = [ "assert-unchecked", "bitflags", @@ -2106,9 +2108,9 @@ dependencies = [ [[package]] name = "oxc_traverse" -version = "0.57.0" +version = "0.58.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77703db3bd925f63fd688e3858ecd48d4b1a455a64f4c54bec6baae50af03cab" +checksum = "b1bf9f3c23fc9fdf3f64d1e9b103b3aa54f91e3297ab201a922e1b66767350ce" dependencies = [ "compact_str", "itoa", @@ -2324,9 +2326,9 @@ dependencies = [ [[package]] name = "quick_cache" -version = "0.6.11" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0af25b4e960ffdf0dead61cf0cec0c2e44c76927bf933ab4f02e2858fb449397" +checksum = "8f8ed0655cbaf18a26966142ad23b95d8ab47221c50c4f73a1db7d0d2d6e3da8" dependencies = [ "equivalent", "hashbrown 0.15.2", @@ -2334,9 +2336,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.39" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1f1914ce909e1658d9907913b4b91947430c7d9be598b15a1912935b8c04801" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] @@ -2437,9 +2439,9 @@ checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] name = "ring" -version = "0.17.13" +version = "0.17.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", @@ -2948,9 +2950,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.44.0" +version = "1.44.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9975ea0f48b5aa3972bf2d888c238182458437cc2a19374b81b25cdf1023fb3a" +checksum = "f382da615b842244d4b8738c82ed1275e6c5dd90c459a30941cd07080b06c91a" dependencies = [ "backtrace", "bytes", @@ -3161,9 +3163,9 @@ checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" [[package]] name = "value-log" -version = "1.5.5" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2398750880b2b9770afbd1a3e299b9e859d6143c299867eb35fdf484b7625d3" +checksum = "7ea85c4fcd677afc3a00abb1fb0fdc494ad3bb5d7d14736c75cb7c07b3ddd182" dependencies = [ "byteorder", "byteview", @@ -3372,9 +3374,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1" +checksum = "0e97b544156e9bebe1a0ffbc03484fc1ffe3100cbce3ffb17eac35f7cdd7ab36" dependencies = [ "memchr", ] diff --git a/crates/brk_cli/src/config.rs b/crates/brk_cli/src/config.rs deleted file mode 100644 index 8b1378917..000000000 --- a/crates/brk_cli/src/config.rs +++ /dev/null @@ -1 +0,0 @@ - diff --git a/crates/brk_cli/src/query.rs b/crates/brk_cli/src/query.rs index c27f003a5..58bd3fa0e 100644 --- a/crates/brk_cli/src/query.rs +++ b/crates/brk_cli/src/query.rs @@ -8,7 +8,7 @@ use crate::run::RunConfig; pub fn query(params: QueryParams) -> color_eyre::Result<()> { let config = RunConfig::import(None)?; - let mut indexer = Indexer::new(config.indexeddir())?; + let mut indexer = Indexer::new(config.indexeddir(), config.check_collisions())?; indexer.import_vecs()?; let mut computer = Computer::new(config.computeddir(), None); diff --git a/crates/brk_cli/src/run.rs b/crates/brk_cli/src/run.rs index 57c03a314..1ae67bed1 100644 --- a/crates/brk_cli/src/run.rs +++ b/crates/brk_cli/src/run.rs @@ -26,7 +26,7 @@ pub fn run(config: RunConfig) -> color_eyre::Result<()> { let parser = brk_parser::Parser::new(config.blocksdir(), rpc); - let mut indexer = Indexer::new(config.indexeddir())?; + let mut indexer = Indexer::new(config.indexeddir(), config.check_collisions())?; indexer.import_stores()?; indexer.import_vecs()?; @@ -134,6 +134,10 @@ pub struct RunConfig { /// Delay between runs, default: 0, saved #[arg(long, value_name = "SECONDS")] delay: Option, + + /// DEV: Activate checking address hashes for collisions when indexing, default: false, saved + #[arg(long, value_name = "BOOL")] + check_collisions: Option, } impl RunConfig { @@ -195,6 +199,10 @@ impl RunConfig { config_saved.delay = Some(delay); } + if let Some(check_collisions) = config_args.check_collisions.take() { + config_saved.check_collisions = Some(check_collisions); + } + if config_args != RunConfig::default() { dbg!(config_args); panic!("Didn't consume the full config") @@ -378,6 +386,10 @@ impl RunConfig { pub fn fetch(&self) -> bool { self.fetch.is_some_and(|b| b) } + + pub fn check_collisions(&self) -> bool { + self.check_collisions.is_some_and(|b| b) + } } #[derive( diff --git a/crates/brk_computer/examples/main.rs b/crates/brk_computer/examples/main.rs index ed3c24ce1..062787e96 100644 --- a/crates/brk_computer/examples/main.rs +++ b/crates/brk_computer/examples/main.rs @@ -28,7 +28,7 @@ pub fn main() -> color_eyre::Result<()> { let outputs_dir = Path::new("../../_outputs"); - let mut indexer = Indexer::new(outputs_dir.join("indexed"))?; + let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?; indexer.import_stores()?; indexer.import_vecs()?; diff --git a/crates/brk_computer/src/storage/vecs/base.rs b/crates/brk_computer/src/storage/vecs/base.rs index 3a98a2ca1..aee9dda16 100644 --- a/crates/brk_computer/src/storage/vecs/base.rs +++ b/crates/brk_computer/src/storage/vecs/base.rs @@ -9,7 +9,7 @@ use std::{ use brk_core::CheckedSub; use brk_exit::Exit; -use brk_vec::{Error, Result, StoredIndex, StoredType, Version}; +use brk_vec::{Compressed, Error, Result, StoredIndex, StoredType, Version}; const FLUSH_EVERY: usize = 10_000; @@ -25,7 +25,7 @@ where T: StoredType, { pub fn import(path: &Path, version: Version) -> brk_vec::Result { - let vec = brk_vec::StorableVec::forced_import(path, version)?; + let vec = brk_vec::StorableVec::forced_import(path, version, Compressed::YES)?; Ok(Self { computed_version: None, @@ -103,14 +103,14 @@ where where A: StoredIndex, B: StoredType, - F: FnMut((A, &B, &mut Self, &mut brk_vec::StorableVec)) -> (I, T), + F: FnMut((A, B, &mut Self, &mut brk_vec::StorableVec)) -> (I, T), { self.validate_computed_version_or_reset_file( Version::from(0) + self.version() + other.version(), )?; let index = max_from.min(A::from(self.len())); - other.iter_from(index, |(a, b, other)| { + other.iter_from_cloned(index, |(a, b, other)| { let (i, v) = t((a, b, self, other)); self.push_and_flush_if_needed(i, v, exit) })?; diff --git a/crates/brk_computer/src/storage/vecs/indexes.rs b/crates/brk_computer/src/storage/vecs/indexes.rs index ecb5a7b71..c201bd48b 100644 --- a/crates/brk_computer/src/storage/vecs/indexes.rs +++ b/crates/brk_computer/src/storage/vecs/indexes.rs @@ -100,7 +100,7 @@ impl Vecs { self.height_to_real_date.compute_transform( starting_indexes.height, &mut indexer_vecs.height_to_timestamp, - |(h, t, ..)| (h, Date::from(*t)), + |(h, t, ..)| (h, Date::from(t)), exit, )?; @@ -112,7 +112,10 @@ impl Vecs { .decremented() .and_then(|h| s.read(h).ok()) .flatten() - .map_or(*d, |prev_d| if prev_d > d { *prev_d } else { *d }); + .map_or(d, |prev_d| { + let prev_d = *prev_d; + if prev_d > d { prev_d } else { d } + }); (h, d) }, exit, @@ -121,7 +124,7 @@ impl Vecs { self.height_to_dateindex.compute_transform( starting_indexes.height, &mut self.height_to_fixed_date, - |(h, d, ..)| (h, Dateindex::try_from(*d).unwrap()), + |(h, d, ..)| (h, Dateindex::try_from(d).unwrap()), exit, )?; diff --git a/crates/brk_computer/src/storage/vecs/marketprice.rs b/crates/brk_computer/src/storage/vecs/marketprice.rs index 0c63f808a..f24e0c082 100644 --- a/crates/brk_computer/src/storage/vecs/marketprice.rs +++ b/crates/brk_computer/src/storage/vecs/marketprice.rs @@ -134,7 +134,7 @@ impl Vecs { let ohlc = fetcher .get_height( h, - *t, + t, h.decremented().map(|prev_h| { height_to_timestamp .get(prev_h) @@ -215,7 +215,7 @@ impl Vecs { self.height_to_sats_per_dollar.compute_transform( starting_indexes.height, &mut self.height_to_close, - |(di, close, ..)| (di, Close::from(Sats::ONE_BTC / **close)), + |(di, close, ..)| (di, Close::from(Sats::ONE_BTC / *close)), exit, )?; @@ -223,7 +223,7 @@ impl Vecs { starting_indexes.dateindex, &mut indexes.dateindex_to_date, |(di, d, ..)| { - let ohlc = fetcher.get_date(*d).unwrap(); + let ohlc = fetcher.get_date(d).unwrap(); (di, ohlc) }, exit, @@ -295,7 +295,7 @@ impl Vecs { self.dateindex_to_sats_per_dollar.compute_transform( starting_indexes.dateindex, &mut self.dateindex_to_close, - |(di, close, ..)| (di, Close::from(Sats::ONE_BTC / **close)), + |(di, close, ..)| (di, Close::from(Sats::ONE_BTC / *close)), exit, )?; diff --git a/crates/brk_core/src/structs/compressed.rs b/crates/brk_core/src/structs/compressed.rs index 8a612e0d0..271a77fea 100644 --- a/crates/brk_core/src/structs/compressed.rs +++ b/crates/brk_core/src/structs/compressed.rs @@ -8,7 +8,20 @@ use crate::Error; use super::{Addressbytes, Addresstype, BlockHash, Txid}; -#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[derive( + Debug, + Deref, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + FromBytes, + Immutable, + IntoBytes, + KnownLayout, +)] pub struct AddressHash([u8; 8]); impl From<(&Addressbytes, Addresstype)> for AddressHash { fn from((addressbytes, addresstype): (&Addressbytes, Addresstype)) -> Self { @@ -41,8 +54,26 @@ impl From for ByteView { } } -#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[derive( + Debug, + Deref, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + FromBytes, + Immutable, + IntoBytes, + KnownLayout, +)] pub struct BlockHashPrefix([u8; 8]); +impl From for BlockHashPrefix { + fn from(value: BlockHash) -> Self { + Self::from(&value) + } +} impl From<&BlockHash> for BlockHashPrefix { fn from(value: &BlockHash) -> Self { Self(copy_first_8bytes(&value[..]).unwrap()) @@ -65,8 +96,26 @@ impl From for ByteView { } } -#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, FromBytes, Immutable, IntoBytes, KnownLayout)] +#[derive( + Debug, + Deref, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + FromBytes, + Immutable, + IntoBytes, + KnownLayout, +)] pub struct TxidPrefix([u8; 8]); +impl From for TxidPrefix { + fn from(value: Txid) -> Self { + Self::from(&value) + } +} impl From<&Txid> for TxidPrefix { fn from(value: &Txid) -> Self { Self(copy_first_8bytes(&value[..]).unwrap()) diff --git a/crates/brk_indexer/README.md b/crates/brk_indexer/README.md index 70e419474..1a5c98ccc 100644 --- a/crates/brk_indexer/README.md +++ b/crates/brk_indexer/README.md @@ -22,12 +22,6 @@ Vecs: `src/storage/vecs/mod.rs` Stores: `src/storage/stores/mod.rs` -## Examples - -Rust: `src/main.rs` - -Python: `../python/parse.py` - ## Benchmark Indexing `0..885_835` took `11 hours 6 min 50 s` on a Macbook Pro M3 Pro with 36 GB of RAM diff --git a/crates/brk_indexer/examples/main.rs b/crates/brk_indexer/examples/main.rs index 90b81cd37..69ff767b2 100644 --- a/crates/brk_indexer/examples/main.rs +++ b/crates/brk_indexer/examples/main.rs @@ -1,6 +1,6 @@ -use std::{path::Path, thread::sleep, time::Duration}; +use std::path::Path; -use brk_core::default_bitcoin_path; +use brk_core::{default_bitcoin_path, dot_brk_path}; use brk_exit::Exit; use brk_indexer::{Indexer, rpc::RpcApi}; use brk_parser::{ @@ -24,23 +24,25 @@ fn main() -> color_eyre::Result<()> { let parser = Parser::new(bitcoin_dir.join("blocks"), rpc); - let mut indexer = Indexer::new(Path::new("../../_outputs/indexed").to_owned())?; + let outputs = dot_brk_path().join("outputs"); + + let mut indexer = Indexer::new(outputs.join("indexed").to_owned(), true)?; indexer.import_stores()?; indexer.import_vecs()?; - loop { - let block_count = rpc.get_block_count()?; + // loop { + let block_count = rpc.get_block_count()?; - info!("{block_count} blocks found."); + info!("{block_count} blocks found."); - indexer.index(&parser, rpc, &exit)?; + indexer.index(&parser, rpc, &exit)?; - info!("Waiting for new blocks..."); + info!("Waiting for new blocks..."); - while block_count == rpc.get_block_count()? { - sleep(Duration::from_secs(1)) - } - } + // while block_count == rpc.get_block_count()? { + // sleep(Duration::from_secs(1)) + // } + // } #[allow(unreachable_code)] Ok(()) diff --git a/crates/brk_indexer/src/lib.rs b/crates/brk_indexer/src/lib.rs index bc04a1769..7e89e53cd 100644 --- a/crates/brk_indexer/src/lib.rs +++ b/crates/brk_indexer/src/lib.rs @@ -18,6 +18,7 @@ pub use brk_parser::*; use bitcoin::{Transaction, TxIn, TxOut}; use brk_exit::Exit; +use brk_vec::Compressed; use color_eyre::eyre::{ContextCompat, eyre}; use log::info; use rayon::prelude::*; @@ -36,15 +37,17 @@ pub struct Indexer { path: PathBuf, vecs: Option, stores: Option, + check_collisions: bool, } impl Indexer { - pub fn new(indexes_dir: PathBuf) -> color_eyre::Result { + pub fn new(indexes_dir: PathBuf, check_collisions: bool) -> color_eyre::Result { setrlimit()?; Ok(Self { path: indexes_dir, vecs: None, stores: None, + check_collisions, }) } @@ -66,8 +69,6 @@ impl Indexer { rpc: &'static rpc::Client, exit: &Exit, ) -> color_eyre::Result { - let check_collisions = true; - let starting_indexes = Indexes::try_from(( self.vecs.as_mut().unwrap(), self.stores.as_ref().unwrap(), @@ -96,7 +97,7 @@ impl Indexer { let mut idxs = starting_indexes.clone(); let start = Some(idxs.height); - let end = None; //Some(Height::new(400_000)); + let end = None; if starting_indexes.height > Height::try_from(rpc)? || end.is_some_and(|end| starting_indexes.height > end) @@ -124,12 +125,14 @@ impl Indexer { Ok(()) }; - parser.parse(start, None).iter().try_for_each( + parser.parse(start, end).iter().try_for_each( |(height, block, blockhash)| -> color_eyre::Result<()> { info!("Indexing block {height}..."); idxs.height = height; + let check_collisions = self.check_collisions && height > Height::new(886_000); + let blockhash = BlockHash::from(blockhash); let blockhash_prefix = BlockHashPrefix::from(&blockhash); @@ -232,8 +235,6 @@ impl Indexer { let txindex = idxs.txindex + block_txindex; let txinindex = idxs.txinindex + Txinindex::from(block_txinindex); - // dbg!((txindex, txinindex, vin)); - let outpoint = txin.previous_output; let txid = Txid::from(outpoint.txid); @@ -598,6 +599,10 @@ impl Indexer { return Ok(()); } + if !check_collisions { + return Ok(()) + } + let len = vecs.txindex_to_txid.len(); // Ok if `get` is not par as should happen only twice let prev_txid = vecs @@ -608,8 +613,6 @@ impl Indexer { dbg!(txindex, len); })?; - // #[allow(clippy::redundant_locals)] - // let prev_txid = prev_txid; let prev_txid = prev_txid.as_ref(); // If another Txid needs to be added to the list diff --git a/crates/brk_indexer/src/vecs/base.rs b/crates/brk_indexer/src/vecs/base.rs index 449ea4210..4d515cf2b 100644 --- a/crates/brk_indexer/src/vecs/base.rs +++ b/crates/brk_indexer/src/vecs/base.rs @@ -5,7 +5,7 @@ use std::{ path::{Path, PathBuf}, }; -use brk_vec::{StoredIndex, StoredType, Version}; +use brk_vec::{Compressed, StoredIndex, StoredType, Version}; use super::Height; @@ -20,10 +20,10 @@ where I: StoredIndex, T: StoredType, { - pub fn import(path: &Path, version: Version) -> brk_vec::Result { - let mut vec = brk_vec::StorableVec::forced_import(path, version)?; + pub fn import(path: &Path, version: Version, compressed: Compressed) -> brk_vec::Result { + let mut vec = brk_vec::StorableVec::forced_import(path, version, compressed)?; - vec.reset_mmaps()?; + vec.init_big_cache()?; Ok(Self { height: Height::try_from(Self::path_height_(path).as_path()).ok(), @@ -31,11 +31,12 @@ where }) } - pub fn truncate_if_needed(&mut self, index: I, height: Height) -> brk_vec::Result> { + pub fn truncate_if_needed(&mut self, index: I, height: Height) -> brk_vec::Result<()> { if self.height.is_none_or(|self_height| self_height != height) { height.write(&self.path_height())?; } - self.vec.truncate_if_needed(index) + self.vec.truncate_if_needed(index)?; + Ok(()) } pub fn height(&self) -> brk_core::Result { @@ -51,7 +52,7 @@ where pub fn flush(&mut self, height: Height) -> io::Result<()> { height.write(&self.path_height())?; self.vec.flush()?; - self.vec.reset_mmaps() + self.vec.init_big_cache() } } diff --git a/crates/brk_indexer/src/vecs/mod.rs b/crates/brk_indexer/src/vecs/mod.rs index a0b3442a7..37d5f3349 100644 --- a/crates/brk_indexer/src/vecs/mod.rs +++ b/crates/brk_indexer/src/vecs/mod.rs @@ -7,7 +7,7 @@ use brk_core::{ P2TRindex, P2WPKHAddressBytes, P2WPKHindex, P2WSHAddressBytes, P2WSHindex, Pushonlyindex, Sats, Timestamp, TxVersion, Txid, Txindex, Txinindex, Txoutindex, Unknownindex, Weight, }; -use brk_vec::{AnyStorableVec, Version}; +use brk_vec::{AnyStorableVec, Compressed, Version}; use rayon::prelude::*; use crate::Indexes; @@ -71,168 +71,217 @@ impl Vecs { addressindex_to_addresstype: StorableVec::import( &path.join("addressindex_to_addresstype"), Version::from(1), + Compressed::YES, )?, addressindex_to_addresstypeindex: StorableVec::import( &path.join("addressindex_to_addresstypeindex"), Version::from(1), + Compressed::YES, )?, addressindex_to_height: StorableVec::import( &path.join("addressindex_to_height"), Version::from(1), + Compressed::YES, )?, height_to_blockhash: StorableVec::import( &path.join("height_to_blockhash"), Version::from(1), + Compressed::NO, )?, height_to_difficulty: StorableVec::import( &path.join("height_to_difficulty"), Version::from(1), + Compressed::YES, )?, height_to_first_addressindex: StorableVec::import( &path.join("height_to_first_addressindex"), Version::from(1), + Compressed::YES, )?, height_to_first_emptyindex: StorableVec::import( &path.join("height_to_first_emptyindex"), Version::from(1), + Compressed::YES, )?, height_to_first_multisigindex: StorableVec::import( &path.join("height_to_first_multisigindex"), Version::from(1), + Compressed::YES, )?, height_to_first_opreturnindex: StorableVec::import( &path.join("height_to_first_opreturnindex"), Version::from(1), + Compressed::YES, )?, height_to_first_pushonlyindex: StorableVec::import( &path.join("height_to_first_pushonlyindex"), Version::from(1), + Compressed::YES, )?, height_to_first_txindex: StorableVec::import( &path.join("height_to_first_txindex"), Version::from(1), + Compressed::YES, )?, height_to_first_txinindex: StorableVec::import( &path.join("height_to_first_txinindex"), Version::from(1), + Compressed::YES, )?, height_to_first_txoutindex: StorableVec::import( &path.join("height_to_first_txoutindex"), Version::from(1), + Compressed::YES, )?, height_to_first_unknownindex: StorableVec::import( &path.join("height_to_first_unkownindex"), Version::from(1), + Compressed::YES, )?, height_to_first_p2pk33index: StorableVec::import( &path.join("height_to_first_p2pk33index"), Version::from(1), + Compressed::YES, )?, height_to_first_p2pk65index: StorableVec::import( &path.join("height_to_first_p2pk65index"), Version::from(1), + Compressed::YES, )?, height_to_first_p2pkhindex: StorableVec::import( &path.join("height_to_first_p2pkhindex"), Version::from(1), + Compressed::YES, )?, height_to_first_p2shindex: StorableVec::import( &path.join("height_to_first_p2shindex"), Version::from(1), + Compressed::YES, )?, height_to_first_p2trindex: StorableVec::import( &path.join("height_to_first_p2trindex"), Version::from(1), + Compressed::YES, )?, height_to_first_p2wpkhindex: StorableVec::import( &path.join("height_to_first_p2wpkhindex"), Version::from(1), + Compressed::YES, )?, height_to_first_p2wshindex: StorableVec::import( &path.join("height_to_first_p2wshindex"), Version::from(1), + Compressed::YES, + )?, + height_to_size: StorableVec::import( + &path.join("height_to_size"), + Version::from(1), + Compressed::YES, )?, - height_to_size: StorableVec::import(&path.join("height_to_size"), Version::from(1))?, height_to_timestamp: StorableVec::import( &path.join("height_to_timestamp"), Version::from(1), + Compressed::YES, )?, height_to_weight: StorableVec::import( &path.join("height_to_weight"), Version::from(1), + Compressed::YES, )?, p2pk33index_to_p2pk33addressbytes: StorableVec::import( &path.join("p2pk33index_to_p2pk33addressbytes"), Version::from(1), + Compressed::NO, )?, p2pk65index_to_p2pk65addressbytes: StorableVec::import( &path.join("p2pk65index_to_p2pk65addressbytes"), Version::from(1), + Compressed::NO, )?, p2pkhindex_to_p2pkhaddressbytes: StorableVec::import( &path.join("p2pkhindex_to_p2pkhaddressbytes"), Version::from(1), + Compressed::NO, )?, p2shindex_to_p2shaddressbytes: StorableVec::import( &path.join("p2shindex_to_p2shaddressbytes"), Version::from(1), + Compressed::NO, )?, p2trindex_to_p2traddressbytes: StorableVec::import( &path.join("p2trindex_to_p2traddressbytes"), Version::from(1), + Compressed::NO, )?, p2wpkhindex_to_p2wpkhaddressbytes: StorableVec::import( &path.join("p2wpkhindex_to_p2wpkhaddressbytes"), Version::from(1), + Compressed::NO, )?, p2wshindex_to_p2wshaddressbytes: StorableVec::import( &path.join("p2wshindex_to_p2wshaddressbytes"), Version::from(1), + Compressed::NO, )?, txindex_to_first_txinindex: StorableVec::import( &path.join("txindex_to_first_txinindex"), Version::from(1), + Compressed::YES, )?, txindex_to_first_txoutindex: StorableVec::import( &path.join("txindex_to_first_txoutindex"), Version::from(1), + Compressed::NO, )?, txindex_to_height: StorableVec::import( &path.join("txindex_to_height"), Version::from(1), + Compressed::YES, )?, txindex_to_locktime: StorableVec::import( &path.join("txindex_to_locktime"), Version::from(1), + Compressed::YES, + )?, + txindex_to_txid: StorableVec::import( + &path.join("txindex_to_txid"), + Version::from(1), + Compressed::NO, )?, - txindex_to_txid: StorableVec::import(&path.join("txindex_to_txid"), Version::from(1))?, txindex_to_base_size: StorableVec::import( &path.join("txindex_to_base_size"), Version::from(1), + Compressed::YES, )?, txindex_to_total_size: StorableVec::import( &path.join("txindex_to_total_size"), Version::from(1), + Compressed::YES, )?, txindex_to_is_explicitly_rbf: StorableVec::import( &path.join("txindex_to_is_explicitly_rbf"), Version::from(1), + Compressed::YES, )?, txindex_to_txversion: StorableVec::import( &path.join("txindex_to_txversion"), Version::from(1), + Compressed::YES, )?, txinindex_to_txoutindex: StorableVec::import( &path.join("txinindex_to_txoutindex"), Version::from(1), + Compressed::YES, )?, txoutindex_to_addressindex: StorableVec::import( &path.join("txoutindex_to_addressindex"), Version::from(1), + Compressed::YES, )?, txoutindex_to_value: StorableVec::import( &path.join("txoutindex_to_value"), Version::from(1), + Compressed::YES, )?, }) } diff --git a/crates/brk_parser/examples/main.rs b/crates/brk_parser/examples/main.rs index 3cf8cca63..0a56f9a2d 100644 --- a/crates/brk_parser/examples/main.rs +++ b/crates/brk_parser/examples/main.rs @@ -15,17 +15,17 @@ fn main() { .unwrap(), )); - // let start = None; - // let end = None; + let start = None; + let end = None; let parser = Parser::new(bitcoin_dir.join("blocks"), rpc); - // parser - // .parse(start, end) - // .iter() - // .for_each(|(height, _block, hash)| { - // println!("{height}: {hash}"); - // }); + parser + .parse(start, end) + .iter() + .for_each(|(height, _block, hash)| { + println!("{height}: {hash}"); + }); println!( "{}", diff --git a/crates/brk_query/examples/main.rs b/crates/brk_query/examples/main.rs index 149a3d031..6539099dc 100644 --- a/crates/brk_query/examples/main.rs +++ b/crates/brk_query/examples/main.rs @@ -9,7 +9,7 @@ pub fn main() -> color_eyre::Result<()> { let outputs_dir = Path::new("../../_outputs"); - let mut indexer = Indexer::new(outputs_dir.join("indexed"))?; + let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?; indexer.import_vecs()?; let mut computer = Computer::new(outputs_dir.join("computed"), None); diff --git a/crates/brk_server/Cargo.toml b/crates/brk_server/Cargo.toml index 0d311ac47..c196245b9 100644 --- a/crates/brk_server/Cargo.toml +++ b/crates/brk_server/Cargo.toml @@ -21,8 +21,8 @@ color-eyre = { workspace = true } jiff = { workspace = true } log = { workspace = true } minreq = { workspace = true } -oxc = { version = "0.57.0", features = ["codegen", "minifier"] } +oxc = { version = "0.58.0", features = ["codegen", "minifier"] } serde = { workspace = true } -tokio = { version = "1.44.0", features = ["full"] } +tokio = { version = "1.44.1", features = ["full"] } tower-http = { version = "0.6.2", features = ["compression-full"] } zip = "2.2.3" diff --git a/crates/brk_server/examples/main.rs b/crates/brk_server/examples/main.rs index 67fb34fd2..a98f9bd9d 100644 --- a/crates/brk_server/examples/main.rs +++ b/crates/brk_server/examples/main.rs @@ -31,7 +31,7 @@ pub fn main() -> color_eyre::Result<()> { let outputs_dir = Path::new("../../_outputs"); - let mut indexer = Indexer::new(outputs_dir.join("indexed"))?; + let mut indexer = Indexer::new(outputs_dir.join("indexed"), true)?; indexer.import_stores()?; indexer.import_vecs()?; diff --git a/crates/brk_vec/Cargo.toml b/crates/brk_vec/Cargo.toml index e992d89d8..1fd531032 100644 --- a/crates/brk_vec/Cargo.toml +++ b/crates/brk_vec/Cargo.toml @@ -14,3 +14,4 @@ rayon = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } zerocopy = { workspace = true } +zstd = "0.13.3" diff --git a/crates/brk_vec/examples/main.rs b/crates/brk_vec/examples/main.rs index efaeef671..53a1d3926 100644 --- a/crates/brk_vec/examples/main.rs +++ b/crates/brk_vec/examples/main.rs @@ -1,16 +1,19 @@ -use std::path::Path; +use std::{fs, path::Path}; -use brk_vec::{StorableVec, Version}; +use brk_vec::{Compressed, StorableVec, Version}; fn main() -> Result<(), Box> { + let _ = fs::remove_dir_all("./vec"); + { let mut vec: StorableVec = - StorableVec::forced_import(Path::new("./vec"), Version::from(1))?; + StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?; - vec.push(0); - vec.push(1); - vec.push(2); + (0..21_u32).for_each(|v| { + vec.push(v); + }); dbg!(vec.get(0)?); // Some(0) + dbg!(vec.get(20)?); // Some(0) dbg!(vec.get(21)?); // None vec.flush()?; @@ -18,12 +21,54 @@ fn main() -> Result<(), Box> { { let mut vec: StorableVec = - StorableVec::forced_import(Path::new("./vec"), Version::from(1))?; + StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?; + dbg!(vec.get(0)?); // 0 dbg!(vec.read(0)?); // 0 dbg!(vec.read(1)?); // 0 dbg!(vec.read(2)?); // 0 + dbg!(vec.read(20)?); // 0 + dbg!(vec.get(20)?); // 0 dbg!(vec.read(0)?); // 0 + + vec.push(21); + vec.push(22); + dbg!(vec.get(20)?); + dbg!(vec.get(21)?); + dbg!(vec.get(22)?); + dbg!(vec.get(23)?); + + vec.flush()?; + } + + { + let mut vec: StorableVec = + StorableVec::forced_import(Path::new("./vec"), Version::from(1), Compressed::YES)?; + + vec.init_big_cache()?; + + dbg!(vec.get(0)?); // 0 + dbg!(vec.get(20)?); // 0 + dbg!(vec.get(21)?); // 0 + dbg!(vec.get(22)?); // 0 + + vec.truncate_if_needed(14)?; + + dbg!(vec.get(0)?); // 0 + dbg!(vec.get(5)?); // 0 + dbg!(vec.get(20)?); // 0 + + vec.iter(|(_, v)| { + dbg!(v); + Ok(()) + })?; + + vec.iter_from(5, |(_, v)| { + dbg!(v); + Ok(()) + })?; + + dbg!(vec.collect_range(Some(-5), None)?); } Ok(()) diff --git a/crates/brk_vec/src/enums/error.rs b/crates/brk_vec/src/enums/error.rs index b066a4ece..0050895a5 100644 --- a/crates/brk_vec/src/enums/error.rs +++ b/crates/brk_vec/src/enums/error.rs @@ -21,6 +21,7 @@ pub enum Error { FailedKeyTryIntoUsize, UnsupportedUnflushedState, RangeFromAfterTo, + DifferentCompressionMode, } impl From for Error { @@ -46,7 +47,10 @@ impl fmt::Display for Error { match self { Error::WrongEndian => write!(f, "Wrong endian"), Error::DifferentVersion { found, expected } => { - write!(f, "Different version; found: {found:?}, expected: {expected:?}") + write!( + f, + "Different version; found: {found:?}, expected: {expected:?}" + ) } Error::MmapsVecIsTooSmall => write!(f, "Mmaps vec is too small"), Error::IO(error) => Debug::fmt(&error, f), @@ -56,10 +60,14 @@ impl fmt::Display for Error { Error::ExpectVecToHaveIndex => write!(f, "Expect vec to have index"), Error::FailedKeyTryIntoUsize => write!(f, "Failed to convert key to usize"), Error::UnsupportedUnflushedState => { - write!(f, "Unsupported unflush state, please flush before using this function") + write!( + f, + "Unsupported unflush state, please flush before using this function" + ) } Error::ZeroCopyError => write!(f, "Zero copy convert error"), Error::RangeFromAfterTo => write!(f, "Range, from is after to"), + Error::DifferentCompressionMode => write!(f, "Different compression mode chosen"), } } } diff --git a/crates/brk_vec/src/lib.rs b/crates/brk_vec/src/lib.rs index 5acd0dabe..ccaad4b1f 100644 --- a/crates/brk_vec/src/lib.rs +++ b/crates/brk_vec/src/lib.rs @@ -7,15 +7,15 @@ use std::{ cmp::Ordering, fmt::Debug, fs::{self, File, OpenOptions}, - io::{self, Read, Seek, SeekFrom, Write}, + io::{self, Seek, SeekFrom, Write}, marker::PhantomData, mem, - ops::Range, path::{Path, PathBuf}, sync::OnceLock, }; pub use memmap2; +use memmap2::Mmap; use rayon::prelude::*; pub use zerocopy; @@ -26,18 +26,21 @@ mod traits; pub use enums::*; pub use structs::*; pub use traits::*; +use zstd::DEFAULT_COMPRESSION_LEVEL; -/// In bytes -const MAX_PAGE_SIZE: usize = 4 * 4096; -const ONE_MIB: usize = 1024 * 1024; +const ONE_KIB: usize = 1024; +const MAX_PAGE_SIZE: usize = 16 * ONE_KIB; +const ONE_MIB: usize = ONE_KIB * ONE_KIB; const MAX_CACHE_SIZE: usize = 100 * ONE_MIB; +type SmallCache = Option<(usize, Box<[T]>)>; + /// /// A very small, fast, efficient and simple storable Vec /// /// Reads (imports of Mmap) are lazy /// -/// Stores only raw data without any overhead, and doesn't even have a header (TODO: which it should, at least to Err if wrong endian) +/// Stores only raw data without any overhead, and doesn't even have a header /// /// The file isn't portable for speed reasons (TODO: but could be ?) /// @@ -47,12 +50,20 @@ const MAX_CACHE_SIZE: usize = 100 * ONE_MIB; pub struct StorableVec { version: Version, pathbuf: PathBuf, - file: File, - /// **Number of values NOT number of bytes** - file_len: usize, - file_position: u64, - buf: Vec, - mmaps: Vec>>, // Boxed Mmap to reduce the size of the Lock (from 24 to 16) + stored_len: Length, + compressed: Compressed, + + // Compressed + decoded_pages: Option>>>, + decoded_page: SmallCache, + pages: CompressedPagesMetadata, + + // Raw + // raw_pages: Vec>>, + // raw_page: memmap2::Mmap, + // file: File, + // file_position: u64, + // buf: Vec, pushed: Vec, phantom: PhantomData, } @@ -68,51 +79,49 @@ where pub const PAGE_SIZE: usize = Self::PER_PAGE * Self::SIZE_OF_T; pub const CACHE_LENGTH: usize = MAX_CACHE_SIZE / Self::PAGE_SIZE; - /// Same as import but will remove the folder if the endian or the version is different, so be careful ! - pub fn forced_import(path: &Path, version: Version) -> Result { - let res = Self::import(path, version); + /// Same as import but will reset the folder under certain errors, so be careful ! + pub fn forced_import(path: &Path, version: Version, compressed: Compressed) -> Result { + let res = Self::import(path, version, compressed); match res { Err(Error::WrongEndian) + | Err(Error::DifferentCompressionMode) | Err(Error::DifferentVersion { found: _, expected: _, }) => { fs::remove_dir_all(path)?; - Self::import(path, version) + Self::import(path, version, compressed) } _ => res, } } - pub fn import(path: &Path, version: Version) -> Result { + pub fn import(path: &Path, version: Version, compressed: Compressed) -> Result { fs::create_dir_all(path)?; let version_path = Self::path_version_(path); version.validate(version_path.as_ref())?; version.write(version_path.as_ref())?; - let file = Self::open_file_(&Self::path_vec_(path))?; + let compressed_path = Self::path_compressed_(path); + compressed.validate(compressed_path.as_ref())?; + compressed.write(compressed_path.as_ref())?; - let mut slf = Self { + let stored_len = Length::try_from(Self::path_length_(path).as_path())?; + + let pages = CompressedPagesMetadata::read(Self::path_pages_(path).as_path())?; + + Ok(Self { version, + compressed, pathbuf: path.to_owned(), - file_position: 0, - file_len: Self::read_disk_len_(&file)?, - file, - buf: Self::create_buffer(), - mmaps: vec![], + stored_len, + decoded_pages: None, pushed: vec![], + pages, + decoded_page: None, phantom: PhantomData, - }; - - slf.reset_file_metadata()?; - - Ok(slf) - } - - #[inline] - fn create_buffer() -> Vec { - vec![0; Self::SIZE_OF_T] + }) } fn open_file(&self) -> io::Result { @@ -127,57 +136,91 @@ where .open(path) } - pub fn open_then_read(&self, index: I) -> Result { + #[inline(always)] + fn mmap(&self, page: &CompressedPageMetadata) -> io::Result { + let len = page.bytes_len as usize; + let offset = page.start; + let file = self.open_file()?; + + Ok(unsafe { + memmap2::MmapOptions::new() + .len(len) + .offset(offset) + .map(&file)? + }) + } + + fn decode(&self, page_index: usize) -> Result> { + if self.pages.len() <= page_index { + return Err(Error::ExpectVecToHaveIndex); + } + + let page = self.pages.get(page_index).unwrap(); + + let mmap = self.mmap(page)?; + + let decoded = zstd::decode_all(&mmap[..]); + + if decoded.is_err() { + dbg!((page, page_index, &mmap[..], &mmap.len(), &decoded)); + } + + Ok(decoded? + .chunks(Self::SIZE_OF_T) + .map(|slice| T::try_read_from_bytes(slice).unwrap()) + .collect::>() + .into_boxed_slice()) + } + + pub fn open_then_read(&self, index: I) -> Result> { self.open_then_read_(Self::i_to_usize(index)?) } - fn open_then_read_(&self, index: usize) -> Result { - let mut file = self.open_file()?; - Self::seek_(&mut file, Self::index_to_byte_index(index))?; - let mut buf = Self::create_buffer(); - Self::read_exact(&mut file, &mut buf).map(|v| v.to_owned()) + fn open_then_read_(&self, index: usize) -> Result> { + Ok(self + .decode(Self::index_to_page_index(index))? + .get(Self::index_to_decoded_index(index)) + .cloned()) } - fn read_disk_len(&self) -> io::Result { - Self::read_disk_len_(&self.file) - } - fn read_disk_len_(file: &File) -> io::Result { - Ok(Self::byte_index_to_index(file.metadata()?.len() as usize)) + pub fn init_big_cache(&mut self) -> io::Result<()> { + self.decoded_pages.replace(vec![]); + self.reset_big_cache() } - fn reset_file_metadata(&mut self) -> io::Result<()> { - self.file_len = self.read_disk_len()?; - self.file_position = self.file.seek(SeekFrom::Start(0))?; - Ok(()) - } + fn reset_big_cache(&mut self) -> io::Result<()> { + if self.decoded_pages.is_none() { + return Ok(()); + } - pub fn reset_mmaps(&mut self) -> io::Result<()> { - self.mmaps.par_iter_mut().for_each(|lock| { + let big_cache = self.decoded_pages.as_mut().unwrap(); + + big_cache.par_iter_mut().for_each(|lock| { lock.take(); }); - let len = (self.file_len as f64 / Self::PER_PAGE as f64).ceil() as usize; + let len = (*self.stored_len as f64 / Self::PER_PAGE as f64).ceil() as usize; let len = Self::CACHE_LENGTH.min(len); - if self.mmaps.len() != len { - self.mmaps.resize_with(len, Default::default); + if big_cache.len() != len { + big_cache.resize_with(len, Default::default); } Ok(()) } - #[inline] - fn seek(&mut self, byte_index: u64) -> io::Result { - self.file.seek(SeekFrom::Start(byte_index)) - } - #[inline] - fn seek_(file: &mut File, byte_index: u64) -> io::Result { - file.seek(SeekFrom::Start(byte_index)) + fn reset_caches(&mut self) -> io::Result<()> { + self.decoded_page.take(); + self.reset_big_cache() } - fn read_exact<'a>(file: &'a mut File, buf: &'a mut [u8]) -> Result<&'a T> { - file.read_exact(buf)?; - let v = T::try_ref_from_bytes(&buf[..])?; - Ok(v) + #[inline(always)] + fn index_to_page_index(index: usize) -> usize { + index / Self::PER_PAGE + } + + #[inline(always)] + fn index_to_decoded_index(index: usize) -> usize { + index % Self::PER_PAGE } #[inline] @@ -196,46 +239,28 @@ where Err(error) => return Err(error), } - // if !self.updated.is_empty() { - // if let Some(v) = self.updated.get(&index) { - // return Ok(Some(v)); - // } - // } + if let Some(big_cache) = self + .decoded_pages + .as_ref() + .and_then(|v| if v.is_empty() { None } else { Some(v) }) + { + let page_index = Self::index_to_page_index(index); + let last_index = *self.stored_len - 1; + let max_page_index = last_index / Self::PER_PAGE; - let page_index = index / Self::PER_PAGE; - let last_index = self.file_len - 1; - let max_page_index = last_index / Self::PER_PAGE; - let min_page_index = (max_page_index + 1) - self.mmaps.len(); + let min_page_index = (max_page_index + 1) - big_cache.len(); - // let min_open_page = self.min.load(AtomicOrdering::SeqCst); - - // if self.min.load(AtomicOrdering::SeqCst) { - // self.min.set(value) - // } - - if !self.mmaps.is_empty() && page_index >= min_page_index { - let mmap = &**self - .mmaps - .get(page_index - min_page_index) - .ok_or(Error::MmapsVecIsTooSmall)? - .get_or_init(|| { - Box::new(unsafe { - memmap2::MmapOptions::new() - .len(Self::PAGE_SIZE) - .offset((page_index * Self::PAGE_SIZE) as u64) - .map(&self.file) - .unwrap() - }) - }); - - let range = Self::index_to_byte_range(index); - let slice = &mmap[range]; - return Ok(Some(Value::Ref(T::try_ref_from_bytes(slice)?))); + if page_index >= min_page_index { + return Ok(big_cache + .get(page_index - min_page_index) + .ok_or(Error::MmapsVecIsTooSmall)? + .get_or_init(|| self.decode(page_index).unwrap()) + .get(Self::index_to_decoded_index(index)) + .map(|v| Value::Ref(v))); + } } - Ok(self - .open_then_read_(index) - .map_or(None, |v| Some(Value::Owned(v)))) + Ok(self.open_then_read_(index)?.map(|v| Value::Owned(v))) } #[inline] @@ -255,17 +280,19 @@ where Err(error) => return Err(error), } - let byte_index = Self::index_to_byte_index(index); - if self.file_position != byte_index { - self.file_position = self.seek(Self::index_to_byte_index(index))?; - } - match Self::read_exact(&mut self.file, &mut self.buf) { - Ok(value) => { - self.file_position += Self::SIZE_OF_T as u64; - Ok(Some(value)) - } - Err(e) => Err(e), + let page_index = Self::index_to_page_index(index); + + if self.decoded_page.as_ref().is_none_or(|b| b.0 != page_index) { + self.decoded_page + .replace((page_index, self.decode(page_index)?)); } + + Ok(self + .decoded_page + .as_ref() + .unwrap() + .1 + .get(Self::index_to_decoded_index(index))) } pub fn read_last(&mut self) -> Result> { @@ -278,33 +305,44 @@ where pub fn iter(&mut self, f: F) -> Result<()> where - F: FnMut((I, &T, &mut Self)) -> Result<()>, + F: FnMut((I, &T)) -> Result<()>, { self.iter_from(I::default(), f) } pub fn iter_from(&mut self, mut index: I, mut f: F) -> Result<()> where - F: FnMut((I, &T, &mut Self)) -> Result<()>, + F: FnMut((I, &T)) -> Result<()>, { - let mut file = self.open_file()?; + if !self.pushed.is_empty() { + return Err(Error::UnsupportedUnflushedState); + } - let disk_len = I::from(Self::read_disk_len_(&file)?); + let stored_len = I::from(*self.stored_len); - Self::seek_( - &mut file, - Self::index_to_byte_index(Self::i_to_usize(index)?), - )?; - - let mut buf = Self::create_buffer(); - - while index < disk_len { - f((index, Self::read_exact(&mut file, &mut buf)?, self))?; + while index < stored_len { + let v = self.read(index)?.unwrap(); + f((index, v))?; index = index + 1; } - if self.pushed_len() != 0 { - unreachable!(); + Ok(()) + } + + pub fn iter_from_cloned(&mut self, mut index: I, mut f: F) -> Result<()> + where + F: FnMut((I, T, &mut Self)) -> Result<()>, + { + if !self.pushed.is_empty() { + return Err(Error::UnsupportedUnflushedState); + } + + let stored_len = I::from(*self.stored_len); + + while index < stored_len { + let v = self.read(index)?.unwrap().clone(); + f((index, v, self))?; + index = index + 1; } Ok(()) @@ -315,9 +353,7 @@ where return Err(Error::UnsupportedUnflushedState); } - let mut file = self.open_file()?; - - let len = Self::read_disk_len_(&file)?; + let len = *self.stored_len; let from = from.map_or(0, |from| { if from >= 0 { @@ -339,13 +375,26 @@ where return Err(Error::RangeFromAfterTo); } - Self::seek_(&mut file, Self::index_to_byte_index(from))?; + let mut small_cache: SmallCache = None; - let mut buf = Self::create_buffer(); + let values = (from..=to) + .flat_map(|index| { + let page_index = Self::index_to_page_index(index); - Ok((from..=to) - .flat_map(|_| Self::read_exact(&mut file, &mut buf).map(|v| v.to_owned())) - .collect::>()) + if small_cache.as_ref().is_none_or(|b| b.0 != page_index) { + small_cache.replace((page_index, self.decode(page_index).unwrap())); + } + + small_cache + .as_ref() + .unwrap() + .1 + .get(Self::index_to_decoded_index(index)) + .cloned() + }) + .collect::>(); + + Ok(values) } #[inline] @@ -374,7 +423,7 @@ where #[inline] pub fn len(&self) -> usize { - self.file_len + self.pushed_len() + *self.stored_len + self.pushed_len() } #[inline] @@ -406,41 +455,166 @@ where return Ok(()); } - let mut bytes: Vec = vec![0; self.pushed_len() * Self::SIZE_OF_T]; + let mut file = self.open_file()?; + + let (starting_page_index, values) = if *self.stored_len % Self::PER_PAGE != 0 { + if self.pages.is_empty() { + unreachable!() + } + + let last_page_index = self.pages.len() - 1; + + let values = if let Some(values) = self.decoded_pages.as_mut().and_then(|big_cache| { + big_cache + .last_mut() + .and_then(|lock| lock.take()) + .map(|b| b.into_vec()) + }) { + values + } else if self + .decoded_page + .as_ref() + .is_some_and(|(page_index, _)| *page_index == last_page_index) + { + self.decoded_page.take().unwrap().1.into_vec() + } else { + self.decode(last_page_index) + .inspect_err(|_| { + dbg!(last_page_index, &self.pages); + }) + .unwrap() + .into_vec() + }; + + let file_len = self.pages.pop().unwrap().start; + + Self::file_set_len(&mut file, file_len)?; + + (last_page_index, values) + } else { + (self.pages.len(), vec![]) + }; + + self.stored_len += self.pushed_len(); + + let compressed = values + .into_par_iter() + .chain(mem::take(&mut self.pushed).into_par_iter()) + .chunks(Self::PER_PAGE) + .map(|chunk| (Self::compress_chunk(&chunk), chunk.len())) + .collect::>(); + + compressed + .iter() + .enumerate() + .for_each(|(i, (compressed_bytes, values_len))| { + let page_index = starting_page_index + i; + + let start = if page_index != 0 { + let prev = self.pages.get(page_index - 1).unwrap(); + prev.start + prev.bytes_len as u64 + } else { + 0 + }; + + let bytes_len = compressed_bytes.len() as u32; + let values_len = *values_len as u32; + + let page = CompressedPageMetadata::new(start, bytes_len, values_len); + + self.pages.push(page_index, page); + }); + + let compressed = compressed + .into_iter() + .flat_map(|(v, _)| v) + .collect::>(); + + self.pages.write()?; + file.write_all(&compressed)?; + self.reset_caches()?; + + self.write_length()?; + + Ok(()) + } + + fn compress_chunk(chunk: &[T]) -> Box<[u8]> { + if chunk.len() > Self::PER_PAGE { + panic!(); + } + + let mut bytes: Vec = vec![0; chunk.len() * Self::SIZE_OF_T]; let unsafe_bytes = UnsafeSlice::new(&mut bytes); - mem::take(&mut self.pushed) + chunk .into_par_iter() .enumerate() .for_each(|(i, v)| unsafe_bytes.copy_slice(i * Self::SIZE_OF_T, v.as_bytes())); - self.file.write_all(&bytes)?; + zstd::encode_all(bytes.as_slice(), DEFAULT_COMPRESSION_LEVEL) + .unwrap() + .into_boxed_slice() + } - self.reset_file_metadata()?; + pub fn truncate_if_needed(&mut self, index: I) -> Result<()> { + let index = Self::i_to_usize(index)?; + + if index >= *self.stored_len { + return Ok(()); + } + + if index == 0 { + self.reset_file()?; + return Ok(()); + } + + let page_index = Self::index_to_page_index(index); + + let values = self.decode(page_index)?; + let mut page = self.pages.truncate(page_index).unwrap(); + + let mut file = self.open_file()?; + Self::file_set_len(&mut file, page.start)?; + + let decoded_index = Self::index_to_decoded_index(index); + + if decoded_index != 0 { + let chunk = &values[..decoded_index]; + + let compressed = Self::compress_chunk(chunk); + + page.values_len = chunk.len() as u32; + page.bytes_len = compressed.len() as u32; + + file.write_all(&compressed)?; + + self.pages.push(page_index, page); + } + + self.pages.write()?; + + *self.stored_len = index; + self.write_length()?; + + self.reset_caches()?; Ok(()) } pub fn reset_file(&mut self) -> Result<()> { - self.truncate_if_needed(I::from(0))?; + let mut file = self.open_file()?; + Self::file_set_len(&mut file, 0)?; + *self.stored_len = 0; + self.reset_caches()?; Ok(()) } - pub fn truncate_if_needed(&mut self, index: I) -> Result> { - let index = Self::i_to_usize(index)?; - - if index >= self.file_len { - return Ok(None); - } - - let value_at_index = self.open_then_read_(index).ok(); - - self.file.set_len(Self::index_to_byte_index(index))?; - - self.reset_file_metadata()?; - - Ok(value_at_index) + fn file_set_len(file: &mut File, len: u64) -> io::Result<()> { + file.set_len(len)?; + file.seek(SeekFrom::End(0))?; + Ok(()) } #[inline] @@ -449,24 +623,11 @@ where } #[inline] - fn byte_index_to_index(byte_index: usize) -> usize { - byte_index / Self::SIZE_OF_T - } - - #[inline] - fn index_to_byte_index(index: usize) -> u64 { - (index * Self::SIZE_OF_T) as u64 - } - - #[inline] - fn index_to_byte_range(index: usize) -> Range { - let index = (Self::index_to_byte_index(index) as usize) % Self::PAGE_SIZE; - index..(index + Self::SIZE_OF_T) - } - fn index_to_pushed_index(&self, index: usize) -> Result> { - if index >= self.file_len { - let index = index - self.file_len; + let file_len = *self.stored_len; + + if index >= file_len { + let index = index - file_len; if index >= self.pushed.len() { Err(Error::IndexTooHigh) } else { @@ -497,7 +658,24 @@ where } #[inline] fn path_vec_(path: &Path) -> PathBuf { - path.join("vec") + path.join("vec.zstd") + } + + fn write_length(&self) -> io::Result<()> { + self.stored_len.write(&self.path_length()) + } + #[inline] + fn path_length(&self) -> PathBuf { + Self::path_length_(&self.pathbuf) + } + #[inline] + fn path_length_(path: &Path) -> PathBuf { + path.join("length") + } + + #[inline] + fn path_pages_(path: &Path) -> PathBuf { + path.join("pages") } #[inline] @@ -505,6 +683,11 @@ where path.join("version") } + #[inline] + fn path_compressed_(path: &Path) -> PathBuf { + path.join("compressed") + } + pub fn index_type_to_string(&self) -> &str { std::any::type_name::() } @@ -520,9 +703,6 @@ where T: StoredType, { fn clone(&self) -> Self { - let path = &self.pathbuf; - let path_version = Self::path_version_(path); - let version = Version::try_from(path_version.as_path()).unwrap(); - Self::import(path, version).unwrap() + Self::import(&self.pathbuf, self.version, self.compressed).unwrap() } } diff --git a/crates/brk_vec/src/structs/back.rs b/crates/brk_vec/src/structs/back.rs new file mode 100644 index 000000000..fbd6dd506 --- /dev/null +++ b/crates/brk_vec/src/structs/back.rs @@ -0,0 +1,20 @@ +use std::{fs::File, sync::OnceLock}; + +use super::CompressedPagesMetadata; + +type CompressedPage = Option<(usize, Box<[T]>)>; + +pub enum Back { + Raw { + raw_pages: Vec>>, + raw_page: memmap2::Mmap, + file: File, + file_position: u64, + buf: Vec, + }, + Compressed { + decoded_pages: Option>>>, + decoded_page: CompressedPage, + pages: CompressedPagesMetadata, + }, +} diff --git a/crates/brk_vec/src/structs/compressed.rs b/crates/brk_vec/src/structs/compressed.rs new file mode 100644 index 000000000..10bcb5fe5 --- /dev/null +++ b/crates/brk_vec/src/structs/compressed.rs @@ -0,0 +1,67 @@ +use std::{ + fs, + io::{self}, + ops::Deref, + path::Path, +}; + +use crate::{Error, Result}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct Compressed(bool); + +impl Compressed { + pub const YES: Self = Self(true); + pub const NO: Self = Self(false); + + pub fn write(&self, path: &Path) -> Result<(), io::Error> { + fs::write(path, self.as_bytes()) + } + + fn as_bytes(&self) -> Vec { + if self.0 { vec![1] } else { vec![0] } + } + + fn from_bytes(bytes: &[u8]) -> Self { + if bytes.len() != 1 { + panic!(); + } + if bytes[0] == 1 { + Self(true) + } else if bytes[0] == 0 { + Self(false) + } else { + panic!() + } + } + + pub fn validate(&self, path: &Path) -> Result<()> { + if let Ok(prev_compressed) = Compressed::try_from(path) { + if prev_compressed != *self { + return Err(Error::DifferentCompressionMode); + } + } + + Ok(()) + } +} + +impl TryFrom<&Path> for Compressed { + type Error = Error; + fn try_from(value: &Path) -> Result { + Ok(Self::from_bytes(&fs::read(value)?)) + } +} + +impl From for Compressed { + fn from(value: bool) -> Self { + Self(value) + } +} + +impl Deref for Compressed { + type Target = bool; + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/crates/brk_vec/src/structs/length.rs b/crates/brk_vec/src/structs/length.rs new file mode 100644 index 000000000..5634443d4 --- /dev/null +++ b/crates/brk_vec/src/structs/length.rs @@ -0,0 +1,71 @@ +use std::{ + fs, + io::{self, Read}, + ops::{AddAssign, Deref, DerefMut}, + path::Path, +}; + +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +use crate::{Error, Result}; + +#[derive( + Debug, + Default, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + FromBytes, + IntoBytes, + Immutable, + KnownLayout, +)] +pub struct Length(usize); + +impl Length { + pub fn write(&self, path: &Path) -> Result<(), io::Error> { + fs::write(path, self.as_bytes()) + } +} + +impl From for Length { + fn from(value: usize) -> Self { + Self(value) + } +} + +impl Deref for Length { + type Target = usize; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Length { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl TryFrom<&Path> for Length { + type Error = Error; + fn try_from(value: &Path) -> Result { + let mut buf = [0; 8]; + if let Ok(bytes) = fs::read(value) { + bytes.as_slice().read_exact(&mut buf)?; + Ok(*(Self::ref_from_bytes(&buf)?)) + } else { + Ok(Self::default()) + } + } +} + +impl AddAssign for Length { + fn add_assign(&mut self, rhs: usize) { + self.0 += rhs; + } +} diff --git a/crates/brk_vec/src/structs/mod.rs b/crates/brk_vec/src/structs/mod.rs index 51482371f..a17374000 100644 --- a/crates/brk_vec/src/structs/mod.rs +++ b/crates/brk_vec/src/structs/mod.rs @@ -1,5 +1,15 @@ +mod back; +mod compressed; +mod length; +mod page; +mod pages; mod unsafe_slice; mod version; +pub use back::*; +pub use compressed::*; +pub use length::*; +pub use page::*; +pub use pages::*; pub use unsafe_slice::*; pub use version::*; diff --git a/crates/brk_vec/src/structs/page.rs b/crates/brk_vec/src/structs/page.rs new file mode 100644 index 000000000..6f0ee9911 --- /dev/null +++ b/crates/brk_vec/src/structs/page.rs @@ -0,0 +1,18 @@ +use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout}; + +#[derive(Debug, Clone, IntoBytes, Immutable, FromBytes, KnownLayout)] +pub struct CompressedPageMetadata { + pub start: u64, + pub bytes_len: u32, + pub values_len: u32, +} + +impl CompressedPageMetadata { + pub fn new(start: u64, bytes_len: u32, values_len: u32) -> Self { + Self { + start, + bytes_len, + values_len, + } + } +} diff --git a/crates/brk_vec/src/structs/pages.rs b/crates/brk_vec/src/structs/pages.rs new file mode 100644 index 000000000..05ab3b38f --- /dev/null +++ b/crates/brk_vec/src/structs/pages.rs @@ -0,0 +1,118 @@ +use std::{ + fs::{self, OpenOptions}, + io::{self, Seek, SeekFrom, Write}, + path::{Path, PathBuf}, +}; + +use rayon::prelude::*; +use zerocopy::{IntoBytes, TryFromBytes}; + +use crate::Result; + +use super::{CompressedPageMetadata, UnsafeSlice}; + +#[derive(Debug, Clone)] +pub struct CompressedPagesMetadata { + vec: Vec, + change_at: Option, + path: PathBuf, +} + +impl CompressedPagesMetadata { + const PAGE_SIZE: usize = size_of::(); + + pub fn read(path: &Path) -> Result { + let slf = Self { + vec: fs::read(path) + .unwrap_or_default() + .chunks(Self::PAGE_SIZE) + .map(|bytes| { + if bytes.len() != Self::PAGE_SIZE { + panic!() + } + CompressedPageMetadata::try_read_from_bytes(bytes).unwrap() + }) + .collect::>(), + path: path.to_owned(), + change_at: None, + }; + + Ok(slf) + } + + pub fn write(&mut self) -> io::Result<()> { + if self.change_at.is_none() { + return Ok(()); + } + + let change_at = self.change_at.take().unwrap(); + + let len = (self.vec.len() - change_at) * Self::PAGE_SIZE; + + let mut bytes: Vec = vec![0; len]; + + let unsafe_bytes = UnsafeSlice::new(&mut bytes); + + self.vec[change_at..] + .par_iter() + .enumerate() + .for_each(|(i, v)| unsafe_bytes.copy_slice(i * Self::PAGE_SIZE, v.as_bytes())); + + let mut file = OpenOptions::new() + .read(true) + .create(true) + .truncate(false) + .append(true) + .open(&self.path)?; + + file.set_len((change_at * Self::PAGE_SIZE) as u64)?; + file.seek(SeekFrom::End(0))?; + + file.write_all(&bytes)?; + + Ok(()) + } + + pub fn len(&self) -> usize { + self.vec.len() + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn get(&self, page_index: usize) -> Option<&CompressedPageMetadata> { + self.vec.get(page_index) + } + + pub fn last(&self) -> Option<&CompressedPageMetadata> { + self.vec.last() + } + + pub fn pop(&mut self) -> Option { + self.vec.pop() + } + + pub fn push(&mut self, page_index: usize, page: CompressedPageMetadata) { + if page_index != self.vec.len() { + panic!(); + } + + self.set_changed_at(page_index); + + self.vec.push(page); + } + + fn set_changed_at(&mut self, page_index: usize) { + if self.change_at.is_none_or(|pi| pi > page_index) { + self.change_at.replace(page_index); + } + } + + pub fn truncate(&mut self, page_index: usize) -> Option { + let page = self.get(page_index).cloned(); + self.vec.truncate(page_index); + self.set_changed_at(page_index); + page + } +} diff --git a/python/parse.py b/python/parse.py deleted file mode 100644 index bffdf2c0f..000000000 --- a/python/parse.py +++ /dev/null @@ -1,27 +0,0 @@ -# Here's an example on how to parse the output from the indexer -# We're aiming to read the first 21 values from the height_to_timestamp vec - -import sys -# import struct -import datetime - -with open("../_outputs/indexes/vecs/height_to_timestamp/vec", "rb") as file: - for x in range(0, 21): - b = file.read(4) # Need to check the rust side to find the size, at least for now - number = int.from_bytes(b, sys.byteorder) - date = datetime.date.fromtimestamp(number) - print(date) - -# print(int.from_bytes([21], sys.byteorder)) # 21 u8 native endian -# print(int.from_bytes([21, 0], sys.byteorder)) # 21 u16 native endian -# print(int.from_bytes([21, 0, 0, 0], sys.byteorder)) # 21 u32 native endian -# print(int.from_bytes([21, 0, 0, 0, 0, 0, 0, 0], sys.byteorder)) # 21 u64/usize native endian - -# # check i8, ... - -# print(struct.unpack('f', bytes([0, 0, 168, 65]))) # 21.0 f32 native endian -# print(struct.unpack('d', bytes([0, 0, 0, 0, 0, 0, 53, 64]))) # 21.0 f64 native endian -# print(struct.unpack('f', bytes([65, 168, 0, 0]))) # 21.0 f32 big endian -# print(struct.unpack('>d', bytes([64, 53, 0, 0, 0, 0, 0, 0]))) # 21.0 f64 big endian