git: reset

This commit is contained in:
k
2024-06-23 17:38:53 +02:00
commit a1a576d088
375 changed files with 40952 additions and 0 deletions

View File

@@ -0,0 +1,235 @@
use std::{
collections::{BTreeMap, BTreeSet},
fmt::Debug,
fs,
};
use allocative::Allocative;
use derive_deref::{Deref, DerefMut};
// https://docs.rs/sanakirja/latest/sanakirja/index.html
// https://pijul.org/posts/2021-02-06-rethinking-sanakirja/
//
// Seems indeed much faster than ReDB and LMDB (heed)
// But a lot has changed code wise between them so a retest wouldn't hurt
//
// Possible compression: https://pijul.org/posts/sanakirja-zstd/
use sanakirja::{
btree::{self, page, page_unsized, BTreeMutPage, Db_},
direct_repr, Commit, Env, Error, MutTxn, RootDb, Storable, UnsizedStorable,
};
use crate::io::OUTPUTS_FOLDER_PATH;
pub type SizedDatabase<Key, Value> = Database<Key, Key, Value, page::Page<Key, Value>>;
pub type UnsizedDatabase<KeyTree, KeyDB, Value> =
Database<KeyTree, KeyDB, Value, page_unsized::Page<KeyDB, Value>>;
#[derive(Allocative)]
#[allocative(bound = "KeyTree: Allocative, KeyDB, Value: Allocative, Page")]
/// There is no `cached_gets` since it's much cheaper and faster to do a parallel search first using `unsafe_get` than caching gets along the way.
pub struct Database<KeyTree, KeyDB, Value, Page>
where
KeyTree: Ord + Clone + Debug,
KeyDB: Ord + ?Sized + Storable,
Value: Storable + PartialEq,
Page: BTreeMutPage<KeyDB, Value>,
{
pub cached_puts: BTreeMap<KeyTree, Value>,
pub cached_dels: BTreeSet<KeyTree>,
#[allocative(skip)]
db: Db_<KeyDB, Value, Page>,
#[allocative(skip)]
txn: MutTxn<Env, ()>,
#[allocative(skip)]
key_tree_to_key_db: fn(&KeyTree) -> &KeyDB,
}
pub const SANAKIRJA_MAX_KEY_SIZE: usize = 510;
const ROOT_DB: usize = 0;
const PAGE_SIZE: u64 = 4096 * 256; // 1mo - Must be a multiplier of 4096
impl<KeyDB, KeyTree, Value, Page> Database<KeyTree, KeyDB, Value, Page>
where
KeyTree: Ord + Clone + Debug,
KeyDB: Ord + ?Sized + Storable,
Value: Storable + PartialEq,
Page: BTreeMutPage<KeyDB, Value>,
{
pub fn open(
folder: &str,
file: &str,
key_tree_to_key_db: fn(&KeyTree) -> &KeyDB,
) -> color_eyre::Result<Self> {
let mut txn = Self::init_txn(folder, file)?;
let db = txn
.root_db(ROOT_DB)
.unwrap_or_else(|| unsafe { btree::create_db_(&mut txn).unwrap() });
Ok(Self {
cached_puts: BTreeMap::default(),
cached_dels: BTreeSet::default(),
db,
txn,
key_tree_to_key_db,
})
}
pub fn iter<F>(&self, callback: &mut F)
where
F: FnMut((&KeyDB, &Value)),
{
btree::iter(&self.txn, &self.db, None)
.unwrap()
.for_each(|entry| callback(entry.unwrap()));
}
pub fn get(&self, key: &KeyTree) -> Option<&Value> {
if let Some(cached_put) = self.get_from_puts(key) {
return Some(cached_put);
}
self.db_get(key)
}
pub fn db_get(&self, key: &KeyTree) -> Option<&Value> {
let k = (self.key_tree_to_key_db)(key);
let option = btree::get(&self.txn, &self.db, k, None).unwrap();
if let Some((k_found, v)) = option {
if k == k_found {
return Some(v);
}
}
None
}
#[inline(always)]
pub fn get_from_puts(&self, key: &KeyTree) -> Option<&Value> {
self.cached_puts.get(key)
}
#[inline(always)]
pub fn get_mut_from_puts(&mut self, key: &KeyTree) -> Option<&mut Value> {
self.cached_puts.get_mut(key)
}
#[inline(always)]
pub fn remove(&mut self, key: &KeyTree) -> Option<Value> {
self.remove_from_puts(key).or_else(|| {
self.db_remove(key);
None
})
}
#[inline(always)]
pub fn db_remove(&mut self, key: &KeyTree) {
self.cached_dels.insert(key.clone());
}
pub fn update(&mut self, key: KeyTree, value: Value) -> Option<Value> {
self.cached_dels.insert(key.clone());
self.cached_puts.insert(key, value)
}
#[inline(always)]
pub fn remove_from_puts(&mut self, key: &KeyTree) -> Option<Value> {
self.cached_puts.remove(key)
}
#[inline(always)]
pub fn insert(&mut self, key: KeyTree, value: Value) -> Option<Value> {
self.cached_dels.remove(&key);
self.unsafe_insert(key, value)
}
#[inline(always)]
pub fn unsafe_insert(&mut self, key: KeyTree, value: Value) -> Option<Value> {
self.cached_puts.insert(key, value)
}
fn init_txn(folder: &str, file: &str) -> color_eyre::Result<MutTxn<Env, ()>> {
let path = databases_folder_path(folder);
fs::create_dir_all(&path)?;
let env = unsafe { Env::new_nolock(format!("{path}/{file}"), PAGE_SIZE, 1).unwrap() };
let txn = Env::mut_txn_begin(env)?;
Ok(txn)
}
pub fn export(mut self) -> color_eyre::Result<(), Error> {
if self.cached_dels.is_empty() && self.cached_puts.is_empty() {
return Ok(());
}
self.cached_dels
.into_iter()
.try_for_each(|key| -> Result<(), Error> {
btree::del(
&mut self.txn,
&mut self.db,
(self.key_tree_to_key_db)(&key),
None,
)?;
Ok(())
})?;
self.cached_puts
.into_iter()
.try_for_each(|(key, value)| -> Result<(), Error> {
btree::put(
&mut self.txn,
&mut self.db,
(self.key_tree_to_key_db)(&key),
&value,
)?;
Ok(())
})?;
self.txn.set_root(ROOT_DB, self.db.db.into());
self.txn.commit()
}
}
#[derive(
Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut, Default, Copy, Allocative,
)]
pub struct U8x19([u8; 19]);
direct_repr!(U8x19);
impl From<&[u8]> for U8x19 {
fn from(slice: &[u8]) -> Self {
let mut arr = Self::default();
arr.copy_from_slice(slice);
arr
}
}
#[derive(
Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Deref, DerefMut, Default, Copy, Allocative,
)]
pub struct U8x31([u8; 31]);
direct_repr!(U8x31);
impl From<&[u8]> for U8x31 {
fn from(slice: &[u8]) -> Self {
let mut arr = Self::default();
arr.copy_from_slice(slice);
arr
}
}
pub fn databases_folder_path(folder: &str) -> String {
format!("{OUTPUTS_FOLDER_PATH}/databases/{folder}")
}

View File

@@ -0,0 +1,32 @@
use std::{fs, io};
use crate::{structs::WNaiveDate, utils::log};
use super::databases_folder_path;
pub trait AnyDatabaseGroup
where
Self: Sized,
{
fn import() -> Self;
fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()>;
fn folder<'a>() -> &'a str;
fn reset(&mut self) -> color_eyre::Result<(), io::Error> {
log(&format!("Reset {}", Self::folder()));
self.reset_metadata();
fs::remove_dir_all(Self::full_path())?;
Ok(())
}
fn full_path() -> String {
databases_folder_path(Self::folder())
}
fn reset_metadata(&mut self);
}

View File

@@ -0,0 +1,148 @@
use std::{
collections::BTreeMap,
fs, mem,
ops::{Deref, DerefMut},
};
use allocative::Allocative;
use rayon::prelude::*;
use crate::{
structs::{AddressData, WNaiveDate},
utils::time,
};
use super::{databases_folder_path, AnyDatabaseGroup, Metadata, SizedDatabase};
type Key = u32;
type Value = AddressData;
type Database = SizedDatabase<Key, Value>;
#[derive(Allocative)]
pub struct AddressIndexToAddressData {
pub metadata: Metadata,
map: BTreeMap<usize, Database>,
}
impl Deref for AddressIndexToAddressData {
type Target = BTreeMap<usize, Database>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl DerefMut for AddressIndexToAddressData {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
const DB_MAX_SIZE: usize = 500_000;
impl AddressIndexToAddressData {
pub fn unsafe_insert(&mut self, key: Key, value: Value) -> Option<Value> {
self.metadata.called_insert();
self.open_db(&key).unsafe_insert(key, value)
}
pub fn remove(&mut self, key: &Key) -> Option<Value> {
self.metadata.called_remove();
self.open_db(key).remove(key)
}
/// Doesn't check if the database is open contrary to `safe_get` which does and opens if needed
/// Though it makes it easy to use with rayon.
pub fn unsafe_get_from_cache(&self, key: &Key) -> Option<&Value> {
let db_index = Self::db_index(key);
self.get(&db_index).unwrap().get_from_puts(key)
}
pub fn unsafe_get_from_db(&self, key: &Key) -> Option<&Value> {
let db_index = Self::db_index(key);
self.get(&db_index).unwrap().db_get(key)
}
pub fn open_db(&mut self, key: &Key) -> &mut Database {
let db_index = Self::db_index(key);
self.entry(db_index).or_insert_with(|| {
let db_name = format!(
"{}..{}",
db_index * DB_MAX_SIZE,
(db_index + 1) * DB_MAX_SIZE
);
SizedDatabase::open(Self::folder(), &db_name, |key| key).unwrap()
})
}
pub fn iter<F>(&mut self, callback: &mut F)
where
F: FnMut((&Key, &Value)),
{
time("Iter through address_index_to_address_data", || {
self.open_all();
// MUST CLEAR MAP, otherwise some weird shit in happening later in the export I think
mem::take(&mut self.map)
.values()
.for_each(|database| database.iter(callback));
});
}
fn open_all(&mut self) {
fs::read_dir(databases_folder_path(Self::folder()))
.unwrap()
.map(|entry| {
entry
.unwrap()
.path()
.file_name()
.unwrap()
.to_str()
.unwrap()
.to_owned()
})
.filter(|file_name| file_name.contains(".."))
.for_each(|path| {
self.open_db(&path.split("..").next().unwrap().parse::<u32>().unwrap());
});
}
fn db_index(key: &Key) -> usize {
*key as usize / DB_MAX_SIZE
}
}
impl AnyDatabaseGroup for AddressIndexToAddressData {
fn import() -> Self {
Self {
map: BTreeMap::default(),
metadata: Metadata::import(&Self::full_path()),
}
}
fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
mem::take(&mut self.map)
.into_par_iter()
.try_for_each(|(_, db)| db.export())?;
self.metadata.export(height, date).unwrap();
Ok(())
}
fn reset_metadata(&mut self) {
self.metadata.reset();
}
fn folder<'a>() -> &'a str {
"address_index_to_address_data"
}
}

View File

@@ -0,0 +1,123 @@
use std::{
collections::BTreeMap,
mem,
ops::{Deref, DerefMut},
};
use allocative::Allocative;
use rayon::prelude::*;
use crate::structs::{EmptyAddressData, WNaiveDate};
use super::{AnyDatabaseGroup, Metadata, SizedDatabase};
type Key = u32;
type Value = EmptyAddressData;
type Database = SizedDatabase<Key, Value>;
#[derive(Allocative)]
pub struct AddressIndexToEmptyAddressData {
pub metadata: Metadata,
map: BTreeMap<usize, Database>,
}
impl Deref for AddressIndexToEmptyAddressData {
type Target = BTreeMap<usize, Database>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl DerefMut for AddressIndexToEmptyAddressData {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
const DB_MAX_SIZE: usize = 500_000;
impl AddressIndexToEmptyAddressData {
pub fn unsafe_insert(&mut self, key: Key, value: Value) -> Option<Value> {
self.metadata.called_insert();
self.open_db(&key).unsafe_insert(key, value)
}
// pub fn undo_insert(&mut self, key: &Key) -> Option<Value> {
// self.metadata.called_remove();
// self.open_db(key).remove_from_puts(key)
// }
pub fn remove(&mut self, key: &Key) -> Option<Value> {
self.metadata.called_remove();
self.open_db(key).remove(key)
}
/// Doesn't check if the database is open contrary to `safe_get` which does and opens if needed
/// Though it makes it easy to use with rayon.
pub fn unsafe_get_from_cache(&self, key: &Key) -> Option<&Value> {
let db_index = Self::db_index(key);
self.get(&db_index).and_then(|db| db.get_from_puts(key))
}
pub fn unsafe_get_from_db(&self, key: &Key) -> Option<&Value> {
let db_index = Self::db_index(key);
self.get(&db_index)
.unwrap_or_else(|| {
dbg!(&self.map.keys(), &key, &db_index);
panic!()
})
.db_get(key)
}
pub fn open_db(&mut self, key: &Key) -> &mut Database {
let db_index = Self::db_index(key);
self.entry(db_index).or_insert_with(|| {
let db_name = format!(
"{}..{}",
db_index * DB_MAX_SIZE,
(db_index + 1) * DB_MAX_SIZE
);
SizedDatabase::open(Self::folder(), &db_name, |key| key).unwrap()
})
}
fn db_index(key: &Key) -> usize {
*key as usize / DB_MAX_SIZE
}
}
impl AnyDatabaseGroup for AddressIndexToEmptyAddressData {
fn import() -> Self {
Self {
map: BTreeMap::default(),
metadata: Metadata::import(&Self::full_path()),
}
}
fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
mem::take(&mut self.map)
.into_par_iter()
.try_for_each(|(_, db)| db.export())?;
self.metadata.export(height, date)?;
Ok(())
}
fn reset_metadata(&mut self) {
self.metadata.reset();
}
fn folder<'a>() -> &'a str {
"address_index_to_empty_address_data"
}
}

View File

@@ -0,0 +1,309 @@
use std::{collections::BTreeMap, mem, thread};
use allocative::Allocative;
use rayon::prelude::*;
use crate::structs::{Address, WNaiveDate};
use super::{
AnyDatabaseGroup, Database, Metadata, SizedDatabase, U8x19, U8x31,
UnsizedDatabase as _UnsizedDatabase,
};
type Value = u32;
type U8x19Database = SizedDatabase<U8x19, Value>;
type U8x31Database = SizedDatabase<U8x31, Value>;
type U32Database = SizedDatabase<u32, Value>;
type UnsizedDatabase = _UnsizedDatabase<Box<[u8]>, [u8], Value>;
type P2PKDatabase = U8x19Database;
type P2PKHDatabase = U8x19Database;
type P2SHDatabase = U8x19Database;
type P2WPKHDatabase = U8x19Database;
type P2WSHDatabase = U8x31Database;
type P2TRDatabase = U8x31Database;
type UnknownDatabase = U32Database;
type OpReturnDatabase = U32Database;
type PushOnlyDatabase = U32Database;
type EmptyDatabase = U32Database;
type MultisigDatabase = UnsizedDatabase;
#[derive(Allocative)]
pub struct AddressToAddressIndex {
pub metadata: Metadata,
p2pk: BTreeMap<u16, P2PKDatabase>,
p2pkh: BTreeMap<u16, P2PKHDatabase>,
p2sh: BTreeMap<u16, P2SHDatabase>,
p2wpkh: BTreeMap<u16, P2WPKHDatabase>,
p2wsh: BTreeMap<u16, P2WSHDatabase>,
p2tr: BTreeMap<u16, P2TRDatabase>,
op_return: Option<OpReturnDatabase>,
push_only: Option<PushOnlyDatabase>,
unknown: Option<UnknownDatabase>,
empty: Option<EmptyDatabase>,
multisig: Option<MultisigDatabase>,
}
impl AddressToAddressIndex {
// pub fn safe_get(&mut self, address: &Address) -> Option<&Value> {
// match address {
// Address::Empty(key) => self.open_empty().get(key),
// Address::Unknown(key) => self.open_unknown().get(key),
// Address::MultiSig(key) => self.open_multisig().get(key),
// Address::P2PK((prefix, rest)) => self.open_p2pk(*prefix).get(rest),
// Address::P2PKH((prefix, rest)) => self.open_p2pkh(*prefix).get(rest),
// Address::P2SH((prefix, rest)) => self.open_p2sh(*prefix).get(rest),
// Address::P2WPKH((prefix, rest)) => self.open_p2wpkh(*prefix).get(rest),
// Address::P2WSH((prefix, rest)) => self.open_p2wsh(*prefix).get(rest),
// Address::P2TR((prefix, rest)) => self.open_p2tr(*prefix).get(rest),
// }
// }
pub fn open_db(&mut self, address: &Address) {
match address {
Address::Empty(_) => {
self.open_empty();
}
Address::Unknown(_) => {
self.open_unknown();
}
Address::OpReturn(_) => {
self.open_op_return();
}
Address::PushOnly(_) => {
self.open_push_only();
}
Address::MultiSig(_) => {
self.open_multisig();
}
Address::P2PK((prefix, _)) => {
self.open_p2pk(*prefix);
}
Address::P2PKH((prefix, _)) => {
self.open_p2pkh(*prefix);
}
Address::P2SH((prefix, _)) => {
self.open_p2sh(*prefix);
}
Address::P2WPKH((prefix, _)) => {
self.open_p2wpkh(*prefix);
}
Address::P2WSH((prefix, _)) => {
self.open_p2wsh(*prefix);
}
Address::P2TR((prefix, _)) => {
self.open_p2tr(*prefix);
}
}
}
/// Doesn't check if the database is open contrary to `safe_get` which does and opens if needed.
/// Though it makes it easy to use with rayon
pub fn unsafe_get(&self, address: &Address) -> Option<&Value> {
match address {
Address::Empty(key) => self.empty.as_ref().unwrap().get(key),
Address::Unknown(key) => self.unknown.as_ref().unwrap().get(key),
Address::OpReturn(key) => self.op_return.as_ref().unwrap().get(key),
Address::PushOnly(key) => self.push_only.as_ref().unwrap().get(key),
Address::MultiSig(key) => self.multisig.as_ref().unwrap().get(key),
Address::P2PK((prefix, key)) => self.p2pk.get(prefix).unwrap().get(key),
Address::P2PKH((prefix, key)) => self.p2pkh.get(prefix).unwrap().get(key),
Address::P2SH((prefix, key)) => self.p2sh.get(prefix).unwrap().get(key),
Address::P2WPKH((prefix, key)) => self.p2wpkh.get(prefix).unwrap().get(key),
Address::P2WSH((prefix, key)) => self.p2wsh.get(prefix).unwrap().get(key),
Address::P2TR((prefix, key)) => self.p2tr.get(prefix).unwrap().get(key),
}
}
pub fn unsafe_get_from_puts(&self, address: &Address) -> Option<&Value> {
match address {
Address::Empty(key) => self.empty.as_ref().unwrap().get_from_puts(key),
Address::Unknown(key) => self.unknown.as_ref().unwrap().get_from_puts(key),
Address::OpReturn(key) => self.op_return.as_ref().unwrap().get_from_puts(key),
Address::PushOnly(key) => self.push_only.as_ref().unwrap().get_from_puts(key),
Address::MultiSig(key) => self.multisig.as_ref().unwrap().get_from_puts(key),
Address::P2PK((prefix, key)) => self.p2pk.get(prefix).unwrap().get_from_puts(key),
Address::P2PKH((prefix, key)) => self.p2pkh.get(prefix).unwrap().get_from_puts(key),
Address::P2SH((prefix, key)) => self.p2sh.get(prefix).unwrap().get_from_puts(key),
Address::P2WPKH((prefix, key)) => self.p2wpkh.get(prefix).unwrap().get_from_puts(key),
Address::P2WSH((prefix, key)) => self.p2wsh.get(prefix).unwrap().get_from_puts(key),
Address::P2TR((prefix, key)) => self.p2tr.get(prefix).unwrap().get_from_puts(key),
}
}
pub fn insert(&mut self, address: Address, value: Value) -> Option<Value> {
self.metadata.called_insert();
match address {
Address::Empty(key) => self.open_empty().insert(key, value),
Address::Unknown(key) => self.open_unknown().insert(key, value),
Address::OpReturn(key) => self.open_op_return().insert(key, value),
Address::PushOnly(key) => self.open_push_only().insert(key, value),
Address::MultiSig(key) => self.open_multisig().insert(key, value),
Address::P2PK((prefix, rest)) => self.open_p2pk(prefix).insert(rest, value),
Address::P2PKH((prefix, rest)) => self.open_p2pkh(prefix).insert(rest, value),
Address::P2SH((prefix, rest)) => self.open_p2sh(prefix).insert(rest, value),
Address::P2WPKH((prefix, rest)) => self.open_p2wpkh(prefix).insert(rest, value),
Address::P2WSH((prefix, rest)) => self.open_p2wsh(prefix).insert(rest, value),
Address::P2TR((prefix, rest)) => self.open_p2tr(prefix).insert(rest, value),
}
}
pub fn open_p2pk(&mut self, prefix: u16) -> &mut P2PKDatabase {
self.p2pk.entry(prefix).or_insert_with(|| {
Database::open(
&format!("{}/{}", Self::folder(), "p2pk"),
&prefix.to_string(),
|key| key,
)
.unwrap()
})
}
pub fn open_p2pkh(&mut self, prefix: u16) -> &mut P2PKHDatabase {
self.p2pkh.entry(prefix).or_insert_with(|| {
Database::open(
&format!("{}/{}", Self::folder(), "p2pkh"),
&prefix.to_string(),
|key| key,
)
.unwrap()
})
}
pub fn open_p2sh(&mut self, prefix: u16) -> &mut P2SHDatabase {
self.p2sh.entry(prefix).or_insert_with(|| {
Database::open(
&format!("{}/{}", Self::folder(), "p2sh"),
&prefix.to_string(),
|key| key,
)
.unwrap()
})
}
pub fn open_p2wpkh(&mut self, prefix: u16) -> &mut P2WPKHDatabase {
self.p2wpkh.entry(prefix).or_insert_with(|| {
Database::open(
&format!("{}/{}", Self::folder(), "p2wpkh"),
&prefix.to_string(),
|key| key,
)
.unwrap()
})
}
pub fn open_p2wsh(&mut self, prefix: u16) -> &mut P2WSHDatabase {
self.p2wsh.entry(prefix).or_insert_with(|| {
Database::open(
&format!("{}/{}", Self::folder(), "p2wsh"),
&prefix.to_string(),
|key| key,
)
.unwrap()
})
}
pub fn open_p2tr(&mut self, prefix: u16) -> &mut P2TRDatabase {
self.p2tr.entry(prefix).or_insert_with(|| {
Database::open(
&format!("{}/{}", Self::folder(), "p2tr"),
&prefix.to_string(),
|key| key,
)
.unwrap()
})
}
pub fn open_unknown(&mut self) -> &mut UnknownDatabase {
self.unknown
.get_or_insert_with(|| Database::open(Self::folder(), "unknown", |key| key).unwrap())
}
pub fn open_op_return(&mut self) -> &mut UnknownDatabase {
self.op_return
.get_or_insert_with(|| Database::open(Self::folder(), "op_return", |key| key).unwrap())
}
pub fn open_push_only(&mut self) -> &mut UnknownDatabase {
self.push_only
.get_or_insert_with(|| Database::open(Self::folder(), "push_only", |key| key).unwrap())
}
pub fn open_empty(&mut self) -> &mut UnknownDatabase {
self.empty
.get_or_insert_with(|| Database::open(Self::folder(), "empty", |key| key).unwrap())
}
pub fn open_multisig(&mut self) -> &mut MultisigDatabase {
self.multisig.get_or_insert_with(|| {
Database::open(Self::folder(), "multisig", |key| key as &[u8]).unwrap()
})
}
}
impl AnyDatabaseGroup for AddressToAddressIndex {
fn import() -> Self {
Self {
p2pk: BTreeMap::default(),
p2pkh: BTreeMap::default(),
p2sh: BTreeMap::default(),
p2wpkh: BTreeMap::default(),
p2wsh: BTreeMap::default(),
p2tr: BTreeMap::default(),
op_return: None,
push_only: None,
unknown: None,
empty: None,
multisig: None,
metadata: Metadata::import(&Self::full_path()),
}
}
fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
thread::scope(|s| {
s.spawn(|| {
mem::take(&mut self.p2pk)
.into_par_iter()
.chain(mem::take(&mut self.p2pkh).into_par_iter())
.chain(mem::take(&mut self.p2sh).into_par_iter())
.chain(mem::take(&mut self.p2wpkh).into_par_iter())
.try_for_each(|(_, db)| db.export())
});
s.spawn(|| {
mem::take(&mut self.p2wsh)
.into_par_iter()
.chain(mem::take(&mut self.p2tr).into_par_iter())
.try_for_each(|(_, db)| db.export())
});
s.spawn(|| {
[
self.unknown.take(),
self.op_return.take(),
self.push_only.take(),
self.empty.take(),
]
.into_par_iter()
.flatten()
.try_for_each(|db| db.export())
});
self.multisig.take().map(|db| db.export());
});
self.metadata.export(height, date)?;
Ok(())
}
fn reset_metadata(&mut self) {
self.metadata.reset()
}
fn folder<'a>() -> &'a str {
"address_to_address_index"
}
}

View File

@@ -0,0 +1,116 @@
use allocative::Allocative;
use bincode::{Decode, Encode};
use std::{
fmt::Debug,
fs, io,
ops::{Deref, DerefMut},
};
use crate::{
io::Binary,
structs::{Counter, WNaiveDate},
};
#[derive(Default, Debug, Encode, Decode, Allocative)]
pub struct Metadata {
path: String,
data: MetadataData,
}
impl Deref for Metadata {
type Target = MetadataData;
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl DerefMut for Metadata {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.data
}
}
impl Metadata {
pub fn import(path: &str) -> Self {
Self {
path: path.to_owned(),
data: MetadataData::import(path).unwrap_or_default(),
}
}
pub fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
if self.last_height.unwrap_or_default() < height {
self.last_height.replace(height);
}
if self.last_date.unwrap_or_default() < date {
self.last_date.replace(date);
}
self.data.export(&self.path)
}
pub fn reset(&mut self) {
let _ = self.data.reset(&self.path);
}
pub fn called_insert(&mut self) {
self.serial += 1;
self.len.increment();
}
pub fn called_remove(&mut self) {
self.len.decrement();
}
pub fn check_if_in_sync(&self, other: &Self) -> bool {
self.last_date == other.last_date && self.last_height == other.last_height
}
pub fn check_farer_or_in_sync(&self, other: &Self) -> bool {
self.last_date >= other.last_date && self.last_height >= other.last_height
}
}
#[derive(Default, Debug, Encode, Decode, Allocative)]
pub struct MetadataData {
pub serial: usize,
pub len: Counter,
pub last_height: Option<usize>,
pub last_date: Option<WNaiveDate>,
}
impl MetadataData {
fn name<'a>() -> &'a str {
"metadata"
}
fn full_path(folder_path: &str) -> String {
let name = Self::name();
format!("{folder_path}/{name}.bin")
}
pub fn import(path: &str) -> color_eyre::Result<Self> {
fs::create_dir_all(path)?;
Binary::import(&Self::full_path(path))
}
pub fn export(&self, path: &str) -> color_eyre::Result<()> {
Binary::export(&Self::full_path(path), self)
}
pub fn reset(&mut self, path: &str) -> color_eyre::Result<(), io::Error> {
self.clear();
fs::remove_file(Self::full_path(path))
}
fn clear(&mut self) {
self.serial = 0;
self.len.reset();
self.last_height = None;
self.last_date = None;
}
}

178
parser/src/databases/mod.rs Normal file
View File

@@ -0,0 +1,178 @@
use std::thread::{self};
use allocative::Allocative;
mod _database;
mod _trait;
mod address_index_to_address_data;
mod address_index_to_empty_address_data;
mod address_to_address_index;
mod metadata;
mod txid_to_tx_data;
mod txout_index_to_address_index;
mod txout_index_to_amount;
pub use _database::*;
use _trait::*;
pub use address_index_to_address_data::*;
pub use address_index_to_empty_address_data::*;
pub use address_to_address_index::*;
use metadata::*;
pub use txid_to_tx_data::*;
pub use txout_index_to_address_index::*;
pub use txout_index_to_amount::*;
use crate::{structs::WNaiveDate, utils::time};
#[derive(Allocative)]
pub struct Databases {
pub address_index_to_address_data: AddressIndexToAddressData,
pub address_index_to_empty_address_data: AddressIndexToEmptyAddressData,
pub address_to_address_index: AddressToAddressIndex,
pub txid_to_tx_data: TxidToTxData,
pub txout_index_to_address_index: TxoutIndexToAddressIndex,
pub txout_index_to_amount: TxoutIndexToAmount,
}
impl Databases {
pub fn import() -> Self {
let address_index_to_address_data = AddressIndexToAddressData::import();
let address_index_to_empty_address_data = AddressIndexToEmptyAddressData::import();
let address_to_address_index = AddressToAddressIndex::import();
let txid_to_tx_data = TxidToTxData::import();
let txout_index_to_address_index = TxoutIndexToAddressIndex::import();
let txout_index_to_amount = TxoutIndexToAmount::import();
Self {
address_index_to_address_data,
address_index_to_empty_address_data,
address_to_address_index,
txid_to_tx_data,
txout_index_to_address_index,
txout_index_to_amount,
}
}
pub fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
thread::scope(|s| {
s.spawn(|| {
time("> Database txid_to_tx_data", || {
self.txid_to_tx_data.export(height, date)
})
});
s.spawn(|| {
time("> Database txout_index_to_amount", || {
self.txout_index_to_amount.export(height, date)
})
});
});
thread::scope(|s| {
s.spawn(|| {
time("> Database address_index_to_address_data", || {
self.address_index_to_address_data.export(height, date)
})
});
s.spawn(|| {
time("> Database address_index_to_empty_address_data", || {
self.address_index_to_empty_address_data
.export(height, date)
})
});
s.spawn(|| {
time("> Database address_to_address_index", || {
self.address_to_address_index.export(height, date)
})
});
s.spawn(|| {
time("> Database txout_index_to_address_index", || {
self.txout_index_to_address_index.export(height, date)
})
});
});
Ok(())
}
pub fn reset(&mut self, include_addresses: bool) {
if include_addresses {
let _ = self.address_index_to_address_data.reset();
let _ = self.address_index_to_empty_address_data.reset();
let _ = self.address_to_address_index.reset();
let _ = self.txout_index_to_address_index.reset();
}
let _ = self.txid_to_tx_data.reset();
let _ = self.txout_index_to_amount.reset();
}
pub fn check_if_needs_to_compute_addresses(&self, height: usize, date: WNaiveDate) -> bool {
let check_height = |last_height: Option<usize>| {
last_height.map_or(true, |last_height| last_height < height)
};
let check_date =
|last_date: Option<WNaiveDate>| last_date.map_or(true, |last_date| last_date < date);
let check_metadata = |metadata: &Metadata| {
check_height(metadata.last_height) || check_date(metadata.last_date)
};
// We only need to check one as we previously checked that they're all in sync
check_metadata(&self.address_to_address_index.metadata)
}
pub fn check_if_usable(
&self,
min_initial_last_address_height: Option<usize>,
min_initial_last_address_date: Option<WNaiveDate>,
) -> bool {
let are_tx_databases_in_sync = self
.txout_index_to_amount
.metadata
.check_if_in_sync(&self.txid_to_tx_data.metadata);
if !are_tx_databases_in_sync {
return false;
}
let are_address_databases_in_sync = self
.address_to_address_index
.metadata
.check_if_in_sync(&self.address_index_to_empty_address_data.metadata)
&& self
.address_to_address_index
.metadata
.check_if_in_sync(&self.address_index_to_address_data.metadata)
&& self
.address_to_address_index
.metadata
.check_if_in_sync(&self.txout_index_to_address_index.metadata);
if !are_address_databases_in_sync {
return false;
}
let are_address_databases_farer_or_in_sync_with_tx_database = self
.address_to_address_index
.metadata
.check_farer_or_in_sync(&self.txid_to_tx_data.metadata);
if !are_address_databases_farer_or_in_sync_with_tx_database {
return false;
}
// let are_address_datasets_farer_or_in_sync_with_address_databases =
min_initial_last_address_height >= self.address_to_address_index.metadata.last_height
&& min_initial_last_address_date >= self.address_to_address_index.metadata.last_date
}
}

View File

@@ -0,0 +1,147 @@
use std::{
collections::BTreeMap,
mem,
ops::{Deref, DerefMut},
};
use allocative::Allocative;
use bitcoin::Txid;
use rayon::prelude::*;
use crate::structs::{TxData, WNaiveDate};
use super::{AnyDatabaseGroup, Metadata, SizedDatabase, U8x31};
type Key = U8x31;
type Value = TxData;
type Database = SizedDatabase<Key, Value>;
#[derive(Allocative)]
pub struct TxidToTxData {
pub metadata: Metadata,
map: BTreeMap<u8, Database>,
}
impl Deref for TxidToTxData {
type Target = BTreeMap<u8, Database>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl DerefMut for TxidToTxData {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
impl TxidToTxData {
pub fn insert(&mut self, txid: &Txid, tx_index: Value) -> Option<Value> {
self.metadata.called_insert();
let txid_key = Self::txid_to_key(txid);
self.open_db(txid).insert(txid_key, tx_index)
}
// pub fn safe_get(&mut self, txid: &Txid) -> Option<&Value> {
// let txid_key = Self::txid_to_key(txid);
// self.open_db(txid).get(&txid_key)
// }
/// Doesn't check if the database is open contrary to `safe_get` which does and opens if needed.
/// Though it makes it easy to use with rayon
pub fn unsafe_get(&self, txid: &Txid) -> Option<&Value> {
let txid_key = Self::txid_to_key(txid);
let db_index = Self::db_index(txid);
self.get(&db_index).unwrap().get(&txid_key)
}
// pub fn unsafe_get_from_puts(&self, txid: &Txid) -> Option<&Value> {
// let txid_key = Self::txid_to_key(txid);
// let db_index = Self::db_index(txid);
// self.get(&db_index).unwrap().get_from_puts(&txid_key)
// }
pub fn unsafe_get_mut_from_puts(&mut self, txid: &Txid) -> Option<&mut Value> {
let txid_key = Self::txid_to_key(txid);
let db_index = Self::db_index(txid);
self.get_mut(&db_index)
.unwrap()
.get_mut_from_puts(&txid_key)
}
pub fn remove_from_db(&mut self, txid: &Txid) {
self.metadata.called_remove();
let txid_key = Self::txid_to_key(txid);
self.open_db(txid).db_remove(&txid_key);
}
pub fn remove_from_puts(&mut self, txid: &Txid) {
self.metadata.called_remove();
let txid_key = Self::txid_to_key(txid);
self.open_db(txid).remove_from_puts(&txid_key);
}
pub fn update(&mut self, txid: &Txid, tx_data: TxData) {
let txid_key = Self::txid_to_key(txid);
self.open_db(txid).update(txid_key, tx_data);
}
#[inline(always)]
pub fn open_db(&mut self, txid: &Txid) -> &mut Database {
let db_index = Self::db_index(txid);
self.entry(db_index).or_insert_with(|| {
SizedDatabase::open(Self::folder(), &db_index.to_string(), |key| key).unwrap()
})
}
fn txid_to_key(txid: &Txid) -> U8x31 {
U8x31::from(&txid[1..])
}
fn db_index(txid: &Txid) -> u8 {
txid[0]
}
}
impl AnyDatabaseGroup for TxidToTxData {
fn import() -> Self {
Self {
map: BTreeMap::default(),
metadata: Metadata::import(&Self::full_path()),
}
}
fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
mem::take(&mut self.map)
.into_par_iter()
.try_for_each(|(_, db)| db.export())?;
self.metadata.export(height, date)?;
Ok(())
}
fn reset_metadata(&mut self) {
self.metadata.reset();
}
fn folder<'a>() -> &'a str {
"txid_to_tx_data"
}
}

View File

@@ -0,0 +1,114 @@
use std::{
collections::BTreeMap,
mem,
ops::{Deref, DerefMut},
};
use allocative::Allocative;
use rayon::prelude::*;
use crate::structs::{TxoutIndex, WNaiveDate};
use super::{AnyDatabaseGroup, Metadata, SizedDatabase};
type Key = TxoutIndex;
type Value = u32;
type Database = SizedDatabase<Key, Value>;
#[derive(Allocative)]
pub struct TxoutIndexToAddressIndex {
pub metadata: Metadata,
map: BTreeMap<usize, Database>,
}
impl Deref for TxoutIndexToAddressIndex {
type Target = BTreeMap<usize, Database>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl DerefMut for TxoutIndexToAddressIndex {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
const DB_MAX_SIZE: usize = 10_000_000_000;
impl TxoutIndexToAddressIndex {
pub fn unsafe_insert(&mut self, key: Key, value: Value) -> Option<Value> {
self.metadata.called_insert();
self.open_db(&key).unsafe_insert(key, value)
}
// pub fn undo_insert(&mut self, key: &Key) -> Option<Value> {
// self.open_db(key).remove_from_puts(key).map(|v| {
// self.metadata.called_remove();
// v
// })
// }
pub fn remove(&mut self, key: &Key) -> Option<Value> {
self.metadata.called_remove();
self.open_db(key).remove(key)
}
/// Doesn't check if the database is open contrary to `safe_get` which does and opens if needed
/// Though it makes it easy to use with rayon.
pub fn unsafe_get(&self, key: &Key) -> Option<&Value> {
let db_index = Self::db_index(key);
self.get(&db_index).unwrap().get(key)
}
pub fn open_db(&mut self, key: &Key) -> &mut Database {
let db_index = Self::db_index(key);
self.entry(db_index).or_insert_with(|| {
let db_name = format!(
"{}..{}",
db_index * DB_MAX_SIZE,
(db_index + 1) * DB_MAX_SIZE
);
SizedDatabase::open(Self::folder(), &db_name, |key| key).unwrap()
})
}
fn db_index(key: &Key) -> usize {
key.as_u64() as usize / DB_MAX_SIZE
}
}
impl AnyDatabaseGroup for TxoutIndexToAddressIndex {
fn import() -> Self {
Self {
map: BTreeMap::default(),
metadata: Metadata::import(&Self::full_path()),
}
}
fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
mem::take(&mut self.map)
.into_par_iter()
.try_for_each(|(_, db)| db.export())?;
self.metadata.export(height, date)?;
Ok(())
}
fn reset_metadata(&mut self) {
self.metadata.reset();
}
fn folder<'a>() -> &'a str {
"txout_index_to_address_index"
}
}

View File

@@ -0,0 +1,114 @@
use std::{
collections::BTreeMap,
mem,
ops::{Deref, DerefMut},
};
use allocative::Allocative;
use rayon::prelude::*;
use crate::structs::{TxoutIndex, WAmount, WNaiveDate};
use super::{AnyDatabaseGroup, Metadata, SizedDatabase};
type Key = TxoutIndex;
type Value = WAmount;
type Database = SizedDatabase<Key, Value>;
#[derive(Allocative)]
pub struct TxoutIndexToAmount {
pub metadata: Metadata,
pub map: BTreeMap<usize, Database>,
}
impl Deref for TxoutIndexToAmount {
type Target = BTreeMap<usize, Database>;
fn deref(&self) -> &Self::Target {
&self.map
}
}
impl DerefMut for TxoutIndexToAmount {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.map
}
}
const DB_MAX_SIZE: usize = 10_000_000_000;
impl TxoutIndexToAmount {
pub fn unsafe_insert(&mut self, key: Key, value: Value) -> Option<Value> {
self.metadata.called_insert();
self.open_db(&key).unsafe_insert(key, value)
}
// pub fn undo_insert(&mut self, key: &Key) -> Option<Value> {
// self.open_db(key).remove_from_puts(key).map(|v| {
// self.metadata.called_remove();
// v
// })
// }
pub fn remove(&mut self, key: &Key) -> Option<Value> {
self.metadata.called_remove();
self.open_db(key).remove(key)
}
/// Doesn't check if the database is open contrary to `safe_get` which does and opens if needed
/// Though it makes it easy to use with rayon.
pub fn unsafe_get(&self, key: &Key) -> Option<&Value> {
let db_index = Self::db_index(key);
self.get(&db_index).unwrap().get(key)
}
pub fn open_db(&mut self, key: &Key) -> &mut Database {
let db_index = Self::db_index(key);
self.entry(db_index).or_insert_with(|| {
let db_name = format!(
"{}..{}",
db_index * DB_MAX_SIZE,
(db_index + 1) * DB_MAX_SIZE
);
SizedDatabase::open(Self::folder(), &db_name, |key| key).unwrap()
})
}
fn db_index(key: &Key) -> usize {
key.as_u64() as usize / DB_MAX_SIZE
}
}
impl AnyDatabaseGroup for TxoutIndexToAmount {
fn import() -> Self {
Self {
map: BTreeMap::default(),
metadata: Metadata::import(&Self::full_path()),
}
}
fn export(&mut self, height: usize, date: WNaiveDate) -> color_eyre::Result<()> {
mem::take(&mut self.map)
.into_par_iter()
.try_for_each(|(_, db)| db.export())?;
self.metadata.export(height, date)?;
Ok(())
}
fn reset_metadata(&mut self) {
self.metadata.reset();
}
fn folder<'a>() -> &'a str {
"txout_index_to_amount"
}
}