mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-04-24 06:39:58 -07:00
global: snapshot
This commit is contained in:
@@ -1139,6 +1139,42 @@ pub struct CapGrossInvestorLossMvrvNetPeakPriceProfitSellSoprPattern {
|
||||
pub sopr: AdjustedRatioValuePattern,
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct EmptyOpP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2 {
|
||||
pub empty: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub op_return: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2a: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2ms: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk33: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk65: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2sh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2tr: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wpkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wsh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub unknown: _1m1w1y24hBpsPercentRatioPattern,
|
||||
}
|
||||
|
||||
impl EmptyOpP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2 {
|
||||
/// Create a new pattern node with accumulated series name.
|
||||
pub fn new(client: Arc<BrkClientBase>, acc: String) -> Self {
|
||||
Self {
|
||||
empty: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "empty_outputs_output")),
|
||||
op_return: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "op_return_output")),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2a_output")),
|
||||
p2ms: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2ms_output")),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2pk33_output")),
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2pk65_output")),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2pkh_output")),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2sh_output")),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2tr_output")),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2wpkh_output")),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2wsh_output")),
|
||||
unknown: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "unknown_outputs_output")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct AverageBlockCumulativeMaxMedianMinPct10Pct25Pct75Pct90SumPattern {
|
||||
pub average: _1m1w1y24hPattern<StoredF32>,
|
||||
@@ -1173,6 +1209,40 @@ impl AverageBlockCumulativeMaxMedianMinPct10Pct25Pct75Pct90SumPattern {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct EmptyP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2 {
|
||||
pub empty: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2a: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2ms: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk33: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk65: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2sh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2tr: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wpkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wsh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub unknown: _1m1w1y24hBpsPercentRatioPattern,
|
||||
}
|
||||
|
||||
impl EmptyP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2 {
|
||||
/// Create a new pattern node with accumulated series name.
|
||||
pub fn new(client: Arc<BrkClientBase>, acc: String) -> Self {
|
||||
Self {
|
||||
empty: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "empty_outputs_prevout")),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2a_prevout")),
|
||||
p2ms: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2ms_prevout")),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2pk33_prevout")),
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2pk65_prevout")),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2pkh_prevout")),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2sh_prevout")),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2tr_prevout")),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2wpkh_prevout")),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "p2wsh_prevout")),
|
||||
unknown: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _m(&acc, "unknown_outputs_prevout")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct AverageBaseCumulativeMaxMedianMinPct10Pct25Pct75Pct90SumPattern<T> {
|
||||
pub average: _1m1w1y24hPattern<T>,
|
||||
@@ -1267,6 +1337,36 @@ impl AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern3 {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern6 {
|
||||
pub all: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2a: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk33: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk65: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2sh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2tr: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wpkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wsh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
}
|
||||
|
||||
impl AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern6 {
|
||||
/// Create a new pattern node with accumulated series name.
|
||||
pub fn new(client: Arc<BrkClientBase>, acc: String) -> Self {
|
||||
Self {
|
||||
all: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), acc.clone()),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2a", &acc)),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2pk33", &acc)),
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2pk65", &acc)),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2pkh", &acc)),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2sh", &acc)),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2tr", &acc)),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2wpkh", &acc)),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), _p("p2wsh", &acc)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct AverageMaxMedianMinPct10Pct25Pct75Pct90SumPattern {
|
||||
pub average: _1m1w1y24hPattern<StoredF32>,
|
||||
@@ -1737,6 +1837,28 @@ impl _1m1w1y24hBlockPattern {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct ActiveBidirectionalReactivatedReceivingSendingPattern {
|
||||
pub active: _1m1w1y24hBlockPattern,
|
||||
pub bidirectional: _1m1w1y24hBlockPattern,
|
||||
pub reactivated: _1m1w1y24hBlockPattern,
|
||||
pub receiving: _1m1w1y24hBlockPattern,
|
||||
pub sending: _1m1w1y24hBlockPattern,
|
||||
}
|
||||
|
||||
impl ActiveBidirectionalReactivatedReceivingSendingPattern {
|
||||
/// Create a new pattern node with accumulated series name.
|
||||
pub fn new(client: Arc<BrkClientBase>, acc: String) -> Self {
|
||||
Self {
|
||||
active: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "active_addrs")),
|
||||
bidirectional: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "bidirectional_addrs")),
|
||||
reactivated: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "reactivated_addrs")),
|
||||
receiving: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "receiving_addrs")),
|
||||
sending: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "sending_addrs")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct ActivityOutputsRealizedSupplyUnrealizedPattern {
|
||||
pub activity: CoindaysTransferPattern,
|
||||
@@ -2238,26 +2360,6 @@ impl BlockCumulativeDeltaSumPattern {
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct BothReactivatedReceivingSendingPattern {
|
||||
pub both: _1m1w1y24hBlockPattern,
|
||||
pub reactivated: _1m1w1y24hBlockPattern,
|
||||
pub receiving: _1m1w1y24hBlockPattern,
|
||||
pub sending: _1m1w1y24hBlockPattern,
|
||||
}
|
||||
|
||||
impl BothReactivatedReceivingSendingPattern {
|
||||
/// Create a new pattern node with accumulated series name.
|
||||
pub fn new(client: Arc<BrkClientBase>, acc: String) -> Self {
|
||||
Self {
|
||||
both: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "both")),
|
||||
reactivated: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "reactivated")),
|
||||
receiving: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "receiving")),
|
||||
sending: _1m1w1y24hBlockPattern::new(client.clone(), _m(&acc, "sending")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pattern struct for repeated tree structure.
|
||||
pub struct BtcCentsSatsUsdPattern {
|
||||
pub btc: SeriesPattern1<Bitcoin>,
|
||||
@@ -3748,16 +3850,18 @@ impl SeriesTree_Inputs_Spent {
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Inputs_ByType {
|
||||
pub input_count: SeriesTree_Inputs_ByType_InputCount,
|
||||
pub input_share: SeriesTree_Inputs_ByType_InputShare,
|
||||
pub tx_count: SeriesTree_Inputs_ByType_TxCount,
|
||||
pub tx_percent: SeriesTree_Inputs_ByType_TxPercent,
|
||||
pub tx_share: EmptyP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2,
|
||||
}
|
||||
|
||||
impl SeriesTree_Inputs_ByType {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
input_count: SeriesTree_Inputs_ByType_InputCount::new(client.clone(), format!("{base_path}_input_count")),
|
||||
input_share: SeriesTree_Inputs_ByType_InputShare::new(client.clone(), format!("{base_path}_input_share")),
|
||||
tx_count: SeriesTree_Inputs_ByType_TxCount::new(client.clone(), format!("{base_path}_tx_count")),
|
||||
tx_percent: SeriesTree_Inputs_ByType_TxPercent::new(client.clone(), format!("{base_path}_tx_percent")),
|
||||
tx_share: EmptyP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2::new(client.clone(), "tx_share_with".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3797,6 +3901,39 @@ impl SeriesTree_Inputs_ByType_InputCount {
|
||||
}
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Inputs_ByType_InputShare {
|
||||
pub p2pk65: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk33: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2ms: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2sh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wpkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wsh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2tr: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2a: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub unknown: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub empty: _1m1w1y24hBpsPercentRatioPattern,
|
||||
}
|
||||
|
||||
impl SeriesTree_Inputs_ByType_InputShare {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pk65_prevout_share".to_string()),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pk33_prevout_share".to_string()),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pkh_prevout_share".to_string()),
|
||||
p2ms: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2ms_prevout_share".to_string()),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2sh_prevout_share".to_string()),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2wpkh_prevout_share".to_string()),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2wsh_prevout_share".to_string()),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2tr_prevout_share".to_string()),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2a_prevout_share".to_string()),
|
||||
unknown: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "unknown_outputs_prevout_share".to_string()),
|
||||
empty: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "empty_outputs_prevout_share".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Inputs_ByType_TxCount {
|
||||
pub all: AverageBlockCumulativeSumPattern<StoredU64>,
|
||||
@@ -3832,39 +3969,6 @@ impl SeriesTree_Inputs_ByType_TxCount {
|
||||
}
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Inputs_ByType_TxPercent {
|
||||
pub p2pk65: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk33: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2ms: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2sh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wpkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wsh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2tr: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2a: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub unknown: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub empty: _1m1w1y24hBpsPercentRatioPattern,
|
||||
}
|
||||
|
||||
impl SeriesTree_Inputs_ByType_TxPercent {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2pk65_prevout".to_string()),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2pk33_prevout".to_string()),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2pkh_prevout".to_string()),
|
||||
p2ms: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2ms_prevout".to_string()),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2sh_prevout".to_string()),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2wpkh_prevout".to_string()),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2wsh_prevout".to_string()),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2tr_prevout".to_string()),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2a_prevout".to_string()),
|
||||
unknown: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_unknown_outputs_prevout".to_string()),
|
||||
empty: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_empty_outputs_prevout".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Outputs {
|
||||
pub raw: SeriesTree_Outputs_Raw,
|
||||
@@ -3953,16 +4057,20 @@ impl SeriesTree_Outputs_Unspent {
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Outputs_ByType {
|
||||
pub output_count: SeriesTree_Outputs_ByType_OutputCount,
|
||||
pub spendable_output_count: AverageBlockCumulativeSumPattern<StoredU64>,
|
||||
pub output_share: SeriesTree_Outputs_ByType_OutputShare,
|
||||
pub tx_count: AllEmptyOpP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern,
|
||||
pub tx_percent: SeriesTree_Outputs_ByType_TxPercent,
|
||||
pub tx_share: EmptyOpP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2,
|
||||
}
|
||||
|
||||
impl SeriesTree_Outputs_ByType {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
output_count: SeriesTree_Outputs_ByType_OutputCount::new(client.clone(), format!("{base_path}_output_count")),
|
||||
spendable_output_count: AverageBlockCumulativeSumPattern::new(client.clone(), "spendable_output_count".to_string()),
|
||||
output_share: SeriesTree_Outputs_ByType_OutputShare::new(client.clone(), format!("{base_path}_output_share")),
|
||||
tx_count: AllEmptyOpP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern::new(client.clone(), "tx_count".to_string()),
|
||||
tx_percent: SeriesTree_Outputs_ByType_TxPercent::new(client.clone(), format!("{base_path}_tx_percent")),
|
||||
tx_share: EmptyOpP2aP2msP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshUnknownPattern2::new(client.clone(), "tx_share_with".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4005,7 +4113,7 @@ impl SeriesTree_Outputs_ByType_OutputCount {
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Outputs_ByType_TxPercent {
|
||||
pub struct SeriesTree_Outputs_ByType_OutputShare {
|
||||
pub p2pk65: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk33: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
@@ -4020,21 +4128,21 @@ pub struct SeriesTree_Outputs_ByType_TxPercent {
|
||||
pub op_return: _1m1w1y24hBpsPercentRatioPattern,
|
||||
}
|
||||
|
||||
impl SeriesTree_Outputs_ByType_TxPercent {
|
||||
impl SeriesTree_Outputs_ByType_OutputShare {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2pk65_output".to_string()),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2pk33_output".to_string()),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2pkh_output".to_string()),
|
||||
p2ms: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2ms_output".to_string()),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2sh_output".to_string()),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2wpkh_output".to_string()),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2wsh_output".to_string()),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2tr_output".to_string()),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_p2a_output".to_string()),
|
||||
unknown: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_unknown_outputs_output".to_string()),
|
||||
empty: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_empty_outputs_output".to_string()),
|
||||
op_return: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "tx_percent_with_op_return_output".to_string()),
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pk65_output_share".to_string()),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pk33_output_share".to_string()),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pkh_output_share".to_string()),
|
||||
p2ms: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2ms_output_share".to_string()),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2sh_output_share".to_string()),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2wpkh_output_share".to_string()),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2wsh_output_share".to_string()),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2tr_output_share".to_string()),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2a_output_share".to_string()),
|
||||
unknown: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "unknown_outputs_output_share".to_string()),
|
||||
empty: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "empty_outputs_output_share".to_string()),
|
||||
op_return: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "op_return_output_share".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4280,29 +4388,50 @@ impl SeriesTree_Addrs_Data {
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Addrs_Activity {
|
||||
pub all: BothReactivatedReceivingSendingPattern,
|
||||
pub p2pk65: BothReactivatedReceivingSendingPattern,
|
||||
pub p2pk33: BothReactivatedReceivingSendingPattern,
|
||||
pub p2pkh: BothReactivatedReceivingSendingPattern,
|
||||
pub p2sh: BothReactivatedReceivingSendingPattern,
|
||||
pub p2wpkh: BothReactivatedReceivingSendingPattern,
|
||||
pub p2wsh: BothReactivatedReceivingSendingPattern,
|
||||
pub p2tr: BothReactivatedReceivingSendingPattern,
|
||||
pub p2a: BothReactivatedReceivingSendingPattern,
|
||||
pub all: SeriesTree_Addrs_Activity_All,
|
||||
pub p2pk65: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
pub p2pk33: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
pub p2pkh: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
pub p2sh: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
pub p2wpkh: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
pub p2wsh: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
pub p2tr: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
pub p2a: ActiveBidirectionalReactivatedReceivingSendingPattern,
|
||||
}
|
||||
|
||||
impl SeriesTree_Addrs_Activity {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
all: BothReactivatedReceivingSendingPattern::new(client.clone(), "addr_activity".to_string()),
|
||||
p2pk65: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2pk65_addr_activity".to_string()),
|
||||
p2pk33: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2pk33_addr_activity".to_string()),
|
||||
p2pkh: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2pkh_addr_activity".to_string()),
|
||||
p2sh: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2sh_addr_activity".to_string()),
|
||||
p2wpkh: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2wpkh_addr_activity".to_string()),
|
||||
p2wsh: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2wsh_addr_activity".to_string()),
|
||||
p2tr: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2tr_addr_activity".to_string()),
|
||||
p2a: BothReactivatedReceivingSendingPattern::new(client.clone(), "p2a_addr_activity".to_string()),
|
||||
all: SeriesTree_Addrs_Activity_All::new(client.clone(), format!("{base_path}_all")),
|
||||
p2pk65: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2pk65".to_string()),
|
||||
p2pk33: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2pk33".to_string()),
|
||||
p2pkh: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2pkh".to_string()),
|
||||
p2sh: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2sh".to_string()),
|
||||
p2wpkh: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2wpkh".to_string()),
|
||||
p2wsh: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2wsh".to_string()),
|
||||
p2tr: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2tr".to_string()),
|
||||
p2a: ActiveBidirectionalReactivatedReceivingSendingPattern::new(client.clone(), "p2a".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Addrs_Activity_All {
|
||||
pub reactivated: _1m1w1y24hBlockPattern,
|
||||
pub sending: _1m1w1y24hBlockPattern,
|
||||
pub receiving: _1m1w1y24hBlockPattern,
|
||||
pub bidirectional: _1m1w1y24hBlockPattern,
|
||||
pub active: _1m1w1y24hBlockPattern,
|
||||
}
|
||||
|
||||
impl SeriesTree_Addrs_Activity_All {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
reactivated: _1m1w1y24hBlockPattern::new(client.clone(), "reactivated_addrs".to_string()),
|
||||
sending: _1m1w1y24hBlockPattern::new(client.clone(), "sending_addrs".to_string()),
|
||||
receiving: _1m1w1y24hBlockPattern::new(client.clone(), "receiving_addrs".to_string()),
|
||||
bidirectional: _1m1w1y24hBlockPattern::new(client.clone(), "bidirectional_addrs".to_string()),
|
||||
active: _1m1w1y24hBlockPattern::new(client.clone(), "active_addrs".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4310,58 +4439,60 @@ impl SeriesTree_Addrs_Activity {
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Addrs_Reused {
|
||||
pub count: FundedTotalPattern,
|
||||
pub uses: SeriesTree_Addrs_Reused_Uses,
|
||||
pub events: SeriesTree_Addrs_Reused_Events,
|
||||
}
|
||||
|
||||
impl SeriesTree_Addrs_Reused {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
count: FundedTotalPattern::new(client.clone(), "reused_addr_count".to_string()),
|
||||
uses: SeriesTree_Addrs_Reused_Uses::new(client.clone(), format!("{base_path}_uses")),
|
||||
events: SeriesTree_Addrs_Reused_Events::new(client.clone(), format!("{base_path}_events")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Addrs_Reused_Uses {
|
||||
pub reused_addr_use_count: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern5,
|
||||
pub reused_addr_use_percent: SeriesTree_Addrs_Reused_Uses_ReusedAddrUsePercent,
|
||||
pub struct SeriesTree_Addrs_Reused_Events {
|
||||
pub output_to_reused_addr_count: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern5,
|
||||
pub output_to_reused_addr_share: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern6,
|
||||
pub spendable_output_to_reused_addr_share: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub input_from_reused_addr_count: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern5,
|
||||
pub input_from_reused_addr_share: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern6,
|
||||
pub active_reused_addr_count: _1m1w1y24hBlockPattern,
|
||||
pub active_reused_addr_share: SeriesTree_Addrs_Reused_Events_ActiveReusedAddrShare,
|
||||
}
|
||||
|
||||
impl SeriesTree_Addrs_Reused_Uses {
|
||||
impl SeriesTree_Addrs_Reused_Events {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
reused_addr_use_count: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern5::new(client.clone(), "reused_addr_use_count".to_string()),
|
||||
reused_addr_use_percent: SeriesTree_Addrs_Reused_Uses_ReusedAddrUsePercent::new(client.clone(), format!("{base_path}_reused_addr_use_percent")),
|
||||
output_to_reused_addr_count: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern5::new(client.clone(), "output_to_reused_addr_count".to_string()),
|
||||
output_to_reused_addr_share: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern6::new(client.clone(), "output_to_reused_addr_share".to_string()),
|
||||
spendable_output_to_reused_addr_share: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "spendable_output_to_reused_addr_share".to_string()),
|
||||
input_from_reused_addr_count: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern5::new(client.clone(), "input_from_reused_addr_count".to_string()),
|
||||
input_from_reused_addr_share: AllP2aP2pk33P2pk65P2pkhP2shP2trP2wpkhP2wshPattern6::new(client.clone(), "input_from_reused_addr_share".to_string()),
|
||||
active_reused_addr_count: _1m1w1y24hBlockPattern::new(client.clone(), "active_reused_addr_count".to_string()),
|
||||
active_reused_addr_share: SeriesTree_Addrs_Reused_Events_ActiveReusedAddrShare::new(client.clone(), format!("{base_path}_active_reused_addr_share")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Addrs_Reused_Uses_ReusedAddrUsePercent {
|
||||
pub all: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk65: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pk33: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2pkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2sh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wpkh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2wsh: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2tr: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub p2a: _1m1w1y24hBpsPercentRatioPattern,
|
||||
pub struct SeriesTree_Addrs_Reused_Events_ActiveReusedAddrShare {
|
||||
pub block: SeriesPattern18<StoredF32>,
|
||||
pub _24h: SeriesPattern1<StoredF32>,
|
||||
pub _1w: SeriesPattern1<StoredF32>,
|
||||
pub _1m: SeriesPattern1<StoredF32>,
|
||||
pub _1y: SeriesPattern1<StoredF32>,
|
||||
}
|
||||
|
||||
impl SeriesTree_Addrs_Reused_Uses_ReusedAddrUsePercent {
|
||||
impl SeriesTree_Addrs_Reused_Events_ActiveReusedAddrShare {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
all: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "reused_addr_use_percent".to_string()),
|
||||
p2pk65: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pk65_reused_addr_use_percent".to_string()),
|
||||
p2pk33: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pk33_reused_addr_use_percent".to_string()),
|
||||
p2pkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2pkh_reused_addr_use_percent".to_string()),
|
||||
p2sh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2sh_reused_addr_use_percent".to_string()),
|
||||
p2wpkh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2wpkh_reused_addr_use_percent".to_string()),
|
||||
p2wsh: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2wsh_reused_addr_use_percent".to_string()),
|
||||
p2tr: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2tr_reused_addr_use_percent".to_string()),
|
||||
p2a: _1m1w1y24hBpsPercentRatioPattern::new(client.clone(), "p2a_reused_addr_use_percent".to_string()),
|
||||
block: SeriesPattern18::new(client.clone(), "active_reused_addr_share".to_string()),
|
||||
_24h: SeriesPattern1::new(client.clone(), "active_reused_addr_share_average_24h".to_string()),
|
||||
_1w: SeriesPattern1::new(client.clone(), "active_reused_addr_share_average_1w".to_string()),
|
||||
_1m: SeriesPattern1::new(client.clone(), "active_reused_addr_share_average_1m".to_string()),
|
||||
_1y: SeriesPattern1::new(client.clone(), "active_reused_addr_share_average_1y".to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5483,8 +5614,8 @@ pub struct SeriesTree_Indicators {
|
||||
pub gini: BpsPercentRatioPattern2,
|
||||
pub rhodl_ratio: BpsRatioPattern2,
|
||||
pub thermo_cap_multiple: BpsRatioPattern2,
|
||||
pub coindays_destroyed_supply_adjusted: SeriesPattern1<StoredF32>,
|
||||
pub coinyears_destroyed_supply_adjusted: SeriesPattern1<StoredF32>,
|
||||
pub coindays_destroyed_supply_adj: SeriesPattern1<StoredF32>,
|
||||
pub coinyears_destroyed_supply_adj: SeriesPattern1<StoredF32>,
|
||||
pub dormancy: SeriesTree_Indicators_Dormancy,
|
||||
pub stock_to_flow: SeriesPattern1<StoredF32>,
|
||||
pub seller_exhaustion: SeriesPattern1<StoredF32>,
|
||||
@@ -5499,8 +5630,8 @@ impl SeriesTree_Indicators {
|
||||
gini: BpsPercentRatioPattern2::new(client.clone(), "gini".to_string()),
|
||||
rhodl_ratio: BpsRatioPattern2::new(client.clone(), "rhodl_ratio".to_string()),
|
||||
thermo_cap_multiple: BpsRatioPattern2::new(client.clone(), "thermo_cap_multiple".to_string()),
|
||||
coindays_destroyed_supply_adjusted: SeriesPattern1::new(client.clone(), "coindays_destroyed_supply_adjusted".to_string()),
|
||||
coinyears_destroyed_supply_adjusted: SeriesPattern1::new(client.clone(), "coinyears_destroyed_supply_adjusted".to_string()),
|
||||
coindays_destroyed_supply_adj: SeriesPattern1::new(client.clone(), "coindays_destroyed_supply_adj".to_string()),
|
||||
coinyears_destroyed_supply_adj: SeriesPattern1::new(client.clone(), "coinyears_destroyed_supply_adj".to_string()),
|
||||
dormancy: SeriesTree_Indicators_Dormancy::new(client.clone(), format!("{base_path}_dormancy")),
|
||||
stock_to_flow: SeriesPattern1::new(client.clone(), "stock_to_flow".to_string()),
|
||||
seller_exhaustion: SeriesPattern1::new(client.clone(), "seller_exhaustion".to_string()),
|
||||
@@ -5511,14 +5642,14 @@ impl SeriesTree_Indicators {
|
||||
|
||||
/// Series tree node.
|
||||
pub struct SeriesTree_Indicators_Dormancy {
|
||||
pub supply_adjusted: SeriesPattern1<StoredF32>,
|
||||
pub supply_adj: SeriesPattern1<StoredF32>,
|
||||
pub flow: SeriesPattern1<StoredF32>,
|
||||
}
|
||||
|
||||
impl SeriesTree_Indicators_Dormancy {
|
||||
pub fn new(client: Arc<BrkClientBase>, base_path: String) -> Self {
|
||||
Self {
|
||||
supply_adjusted: SeriesPattern1::new(client.clone(), "dormancy_supply_adjusted".to_string()),
|
||||
supply_adj: SeriesPattern1::new(client.clone(), "dormancy_supply_adj".to_string()),
|
||||
flow: SeriesPattern1::new(client.clone(), "dormancy_flow".to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -116,6 +116,23 @@ impl<T> SpendableType<T> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get(&self, output_type: OutputType) -> &T {
|
||||
match output_type {
|
||||
OutputType::P2PK65 => &self.p2pk65,
|
||||
OutputType::P2PK33 => &self.p2pk33,
|
||||
OutputType::P2PKH => &self.p2pkh,
|
||||
OutputType::P2MS => &self.p2ms,
|
||||
OutputType::P2SH => &self.p2sh,
|
||||
OutputType::P2WPKH => &self.p2wpkh,
|
||||
OutputType::P2WSH => &self.p2wsh,
|
||||
OutputType::P2TR => &self.p2tr,
|
||||
OutputType::P2A => &self.p2a,
|
||||
OutputType::Unknown => &self.unknown,
|
||||
OutputType::Empty => &self.empty,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self, output_type: OutputType) -> &mut T {
|
||||
match output_type {
|
||||
OutputType::P2PK65 => &mut self.p2pk65,
|
||||
|
||||
@@ -8,5 +8,5 @@ use crate::internal::PerBlockRollingAverage;
|
||||
|
||||
#[derive(Deref, DerefMut, Traversable)]
|
||||
pub struct Vecs<M: StorageMode = Rw>(
|
||||
#[traversable(flatten)] pub PerBlockRollingAverage<Timestamp, M>,
|
||||
#[traversable(flatten)] pub PerBlockRollingAverage<Timestamp, Timestamp, M>,
|
||||
);
|
||||
|
||||
@@ -7,12 +7,13 @@
|
||||
//! | `receiving` | Unique addresses that received this block |
|
||||
//! | `sending` | Unique addresses that sent this block |
|
||||
//! | `reactivated` | Addresses that were empty and now have funds |
|
||||
//! | `both` | Addresses that both sent AND received same block |
|
||||
//! | `bidirectional` | Addresses that both sent AND received in same block |
|
||||
//! | `active` | Distinct addresses involved (sent ∪ received) |
|
||||
|
||||
use brk_cohort::ByAddrType;
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{Height, StoredU32, Version};
|
||||
use brk_types::{Height, StoredU32, StoredU64, Version};
|
||||
use derive_more::{Deref, DerefMut};
|
||||
use rayon::prelude::*;
|
||||
use vecdb::{AnyStoredVec, AnyVec, Database, Exit, Rw, StorageMode, WritableVec};
|
||||
@@ -28,7 +29,7 @@ pub struct BlockActivityCounts {
|
||||
pub reactivated: u32,
|
||||
pub sending: u32,
|
||||
pub receiving: u32,
|
||||
pub both: u32,
|
||||
pub bidirectional: u32,
|
||||
}
|
||||
|
||||
impl BlockActivityCounts {
|
||||
@@ -56,7 +57,7 @@ impl AddrTypeToActivityCounts {
|
||||
total.reactivated += counts.reactivated;
|
||||
total.sending += counts.sending;
|
||||
total.receiving += counts.receiving;
|
||||
total.both += counts.both;
|
||||
total.bidirectional += counts.bidirectional;
|
||||
}
|
||||
total
|
||||
}
|
||||
@@ -65,16 +66,25 @@ impl AddrTypeToActivityCounts {
|
||||
/// Activity count vectors for a single category (e.g., one address type or "all").
|
||||
#[derive(Traversable)]
|
||||
pub struct ActivityCountVecs<M: StorageMode = Rw> {
|
||||
pub reactivated: PerBlockRollingAverage<StoredU32, M>,
|
||||
pub sending: PerBlockRollingAverage<StoredU32, M>,
|
||||
pub receiving: PerBlockRollingAverage<StoredU32, M>,
|
||||
pub both: PerBlockRollingAverage<StoredU32, M>,
|
||||
pub reactivated: PerBlockRollingAverage<StoredU32, StoredU64, M>,
|
||||
pub sending: PerBlockRollingAverage<StoredU32, StoredU64, M>,
|
||||
pub receiving: PerBlockRollingAverage<StoredU32, StoredU64, M>,
|
||||
pub bidirectional: PerBlockRollingAverage<StoredU32, StoredU64, M>,
|
||||
/// Distinct addresses involved in this block (sent ∪ received),
|
||||
/// computed at push time as `sending + receiving - bidirectional`
|
||||
/// via inclusion-exclusion. For per-type instances this is
|
||||
/// per-type. For the `all` aggregate it's the cross-type total.
|
||||
pub active: PerBlockRollingAverage<StoredU32, StoredU64, M>,
|
||||
}
|
||||
|
||||
impl ActivityCountVecs {
|
||||
/// `prefix` is prepended to each field's disk name. Use `""` for the
|
||||
/// "all" aggregate and `"{type}_"` for per-address-type instances.
|
||||
/// Field names are suffixed with `_addrs` so the final disk series
|
||||
/// are e.g. `active_addrs`, `p2tr_bidirectional_addrs`.
|
||||
pub(crate) fn forced_import(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
prefix: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
cached_starts: &Windows<&WindowStartVec>,
|
||||
@@ -82,28 +92,35 @@ impl ActivityCountVecs {
|
||||
Ok(Self {
|
||||
reactivated: PerBlockRollingAverage::forced_import(
|
||||
db,
|
||||
&format!("{name}_reactivated"),
|
||||
&format!("{prefix}reactivated_addrs"),
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?,
|
||||
sending: PerBlockRollingAverage::forced_import(
|
||||
db,
|
||||
&format!("{name}_sending"),
|
||||
&format!("{prefix}sending_addrs"),
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?,
|
||||
receiving: PerBlockRollingAverage::forced_import(
|
||||
db,
|
||||
&format!("{name}_receiving"),
|
||||
&format!("{prefix}receiving_addrs"),
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?,
|
||||
both: PerBlockRollingAverage::forced_import(
|
||||
bidirectional: PerBlockRollingAverage::forced_import(
|
||||
db,
|
||||
&format!("{name}_both"),
|
||||
&format!("{prefix}bidirectional_addrs"),
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?,
|
||||
active: PerBlockRollingAverage::forced_import(
|
||||
db,
|
||||
&format!("{prefix}active_addrs"),
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
@@ -117,7 +134,8 @@ impl ActivityCountVecs {
|
||||
.len()
|
||||
.min(self.sending.block.len())
|
||||
.min(self.receiving.block.len())
|
||||
.min(self.both.block.len())
|
||||
.min(self.bidirectional.block.len())
|
||||
.min(self.active.block.len())
|
||||
}
|
||||
|
||||
pub(crate) fn par_iter_height_mut(
|
||||
@@ -125,9 +143,10 @@ impl ActivityCountVecs {
|
||||
) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
[
|
||||
&mut self.reactivated.block as &mut dyn AnyStoredVec,
|
||||
&mut self.sending.block as &mut dyn AnyStoredVec,
|
||||
&mut self.receiving.block as &mut dyn AnyStoredVec,
|
||||
&mut self.both.block as &mut dyn AnyStoredVec,
|
||||
&mut self.sending.block,
|
||||
&mut self.receiving.block,
|
||||
&mut self.bidirectional.block,
|
||||
&mut self.active.block,
|
||||
]
|
||||
.into_par_iter()
|
||||
}
|
||||
@@ -136,7 +155,8 @@ impl ActivityCountVecs {
|
||||
self.reactivated.block.reset()?;
|
||||
self.sending.block.reset()?;
|
||||
self.receiving.block.reset()?;
|
||||
self.both.block.reset()?;
|
||||
self.bidirectional.block.reset()?;
|
||||
self.active.block.reset()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -145,14 +165,19 @@ impl ActivityCountVecs {
|
||||
self.reactivated.block.push(counts.reactivated.into());
|
||||
self.sending.block.push(counts.sending.into());
|
||||
self.receiving.block.push(counts.receiving.into());
|
||||
self.both.block.push(counts.both.into());
|
||||
self.bidirectional
|
||||
.block
|
||||
.push(counts.bidirectional.into());
|
||||
let active = counts.sending + counts.receiving - counts.bidirectional;
|
||||
self.active.block.push(active.into());
|
||||
}
|
||||
|
||||
pub(crate) fn compute_rest(&mut self, max_from: Height, exit: &Exit) -> Result<()> {
|
||||
self.reactivated.compute_rest(max_from, exit)?;
|
||||
self.sending.compute_rest(max_from, exit)?;
|
||||
self.receiving.compute_rest(max_from, exit)?;
|
||||
self.both.compute_rest(max_from, exit)?;
|
||||
self.bidirectional.compute_rest(max_from, exit)?;
|
||||
self.active.compute_rest(max_from, exit)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -171,7 +196,6 @@ impl From<ByAddrType<ActivityCountVecs>> for AddrTypeToActivityCountVecs {
|
||||
impl AddrTypeToActivityCountVecs {
|
||||
pub(crate) fn forced_import(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
cached_starts: &Windows<&WindowStartVec>,
|
||||
@@ -180,7 +204,7 @@ impl AddrTypeToActivityCountVecs {
|
||||
|type_name| {
|
||||
ActivityCountVecs::forced_import(
|
||||
db,
|
||||
&format!("{type_name}_{name}"),
|
||||
&format!("{type_name}_"),
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
@@ -205,7 +229,8 @@ impl AddrTypeToActivityCountVecs {
|
||||
vecs.push(&mut type_vecs.reactivated.block);
|
||||
vecs.push(&mut type_vecs.sending.block);
|
||||
vecs.push(&mut type_vecs.receiving.block);
|
||||
vecs.push(&mut type_vecs.both.block);
|
||||
vecs.push(&mut type_vecs.bidirectional.block);
|
||||
vecs.push(&mut type_vecs.active.block);
|
||||
}
|
||||
vecs.into_par_iter()
|
||||
}
|
||||
@@ -243,16 +268,14 @@ pub struct AddrActivityVecs<M: StorageMode = Rw> {
|
||||
impl AddrActivityVecs {
|
||||
pub(crate) fn forced_import(
|
||||
db: &Database,
|
||||
name: &str,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
cached_starts: &Windows<&WindowStartVec>,
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
all: ActivityCountVecs::forced_import(db, name, version, indexes, cached_starts)?,
|
||||
all: ActivityCountVecs::forced_import(db, "", version, indexes, cached_starts)?,
|
||||
by_addr_type: AddrTypeToActivityCountVecs::forced_import(
|
||||
db,
|
||||
name,
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
|
||||
@@ -19,7 +19,7 @@ pub use exposed::{
|
||||
pub use indexes::AnyAddrIndexesVecs;
|
||||
pub use new_addr_count::NewAddrCountVecs;
|
||||
pub use reused::{
|
||||
AddrTypeToReusedAddrCount, AddrTypeToReusedAddrUseCount, ReusedAddrVecs,
|
||||
AddrTypeToReusedAddrCount, AddrTypeToReusedAddrEventCount, ReusedAddrVecs,
|
||||
};
|
||||
pub use total_addr_count::TotalAddrCountVecs;
|
||||
pub use type_map::{AddrTypeToTypeIndexMap, AddrTypeToVec, HeightToAddrTypeToVec};
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
//! Per-block reused-address event tracking. Holds both the output-side
|
||||
//! ("an output landed on a previously-used address") and input-side
|
||||
//! ("an input spent from an address in the reused set") event counters.
|
||||
//! See [`vecs::ReusedAddrEventsVecs`] for the full description of each
|
||||
//! metric.
|
||||
|
||||
mod state;
|
||||
mod vecs;
|
||||
|
||||
pub use state::AddrTypeToReusedAddrEventCount;
|
||||
pub use vecs::ReusedAddrEventsVecs;
|
||||
@@ -0,0 +1,28 @@
|
||||
use brk_cohort::ByAddrType;
|
||||
use derive_more::{Deref, DerefMut};
|
||||
|
||||
/// Per-block running counter of reused-address events, per address type.
|
||||
/// Shared runtime container for both output-side events
|
||||
/// (`output_to_reused_addr_count`, outputs landing on addresses that
|
||||
/// had already received ≥ 1 prior output) and input-side events
|
||||
/// (`input_from_reused_addr_count`, inputs spending from addresses
|
||||
/// with lifetime `funded_txo_count > 1`). Reset at the start of each
|
||||
/// block (no disk recovery needed since per-block flow is
|
||||
/// reconstructed deterministically from `process_received` /
|
||||
/// `process_sent`).
|
||||
#[derive(Debug, Default, Deref, DerefMut)]
|
||||
pub struct AddrTypeToReusedAddrEventCount(ByAddrType<u64>);
|
||||
|
||||
impl AddrTypeToReusedAddrEventCount {
|
||||
#[inline]
|
||||
pub(crate) fn sum(&self) -> u64 {
|
||||
self.0.values().sum()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn reset(&mut self) {
|
||||
for v in self.0.values_mut() {
|
||||
*v = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
261
crates/brk_computer/src/distribution/addr/reused/events/vecs.rs
Normal file
261
crates/brk_computer/src/distribution/addr/reused/events/vecs.rs
Normal file
@@ -0,0 +1,261 @@
|
||||
use brk_cohort::ByAddrType;
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{BasisPoints16, Indexes, OutputType, StoredF32, StoredU32, StoredU64, Version};
|
||||
use rayon::prelude::*;
|
||||
use vecdb::{AnyStoredVec, AnyVec, Database, Exit, Rw, StorageMode, WritableVec};
|
||||
|
||||
use crate::{
|
||||
indexes, inputs,
|
||||
internal::{
|
||||
PerBlockCumulativeRolling, PerBlockRollingAverage, PercentCumulativeRolling,
|
||||
WindowStartVec, Windows, WithAddrTypes,
|
||||
},
|
||||
outputs,
|
||||
};
|
||||
|
||||
use super::state::AddrTypeToReusedAddrEventCount;
|
||||
|
||||
/// Per-block reused-address event metrics. Holds three families of
|
||||
/// signals: output-level (use), input-level (spend), and address-level
|
||||
/// (active in block).
|
||||
///
|
||||
/// `output_to_reused_addr_count`: every output landing on an address that had
|
||||
/// already received at least one prior output anywhere in its lifetime,
|
||||
/// i.e. an output-level reuse event. Outputs are not deduplicated per
|
||||
/// address within a block: an address receiving N outputs in one block
|
||||
/// that had `before` lifetime outputs contributes
|
||||
/// `max(0, N - max(0, 1 - before))` events. Only the very first output
|
||||
/// an address ever sees is excluded. Every subsequent output counts,
|
||||
/// matching the standard "% of outputs to previously-used addresses"
|
||||
/// reuse ratio reported by external sources. `output_to_reused_addr_share`
|
||||
/// uses `outputs::ByTypeVecs::output_count` (all 12 output types) as
|
||||
/// denominator. `spendable_output_to_reused_addr_share` uses the
|
||||
/// op_return-excluded 11-type aggregate (`spendable_output_count`).
|
||||
///
|
||||
/// `input_from_reused_addr_count`: every input spending from an address
|
||||
/// whose lifetime `funded_txo_count > 1` at the time of the spend (i.e.
|
||||
/// the address is in the same reused set tracked by
|
||||
/// `reused_addr_count`). Every input is checked independently. If a
|
||||
/// single address has multiple inputs in one block each one counts.
|
||||
/// This is a *stable-predicate* signal about the sending address, not
|
||||
/// an output-level repeat event: the first spend from a reused address
|
||||
/// counts just as much as the tenth. Denominator
|
||||
/// (`input_from_reused_addr_share`): `inputs::ByTypeVecs::input_count` (11
|
||||
/// spendable types, where `p2ms`, `unknown`, `empty` count as true
|
||||
/// negatives).
|
||||
///
|
||||
/// `active_reused_addr_count` / `active_reused_addr_share`: block-level
|
||||
/// *address* signals (single aggregate, not per-type).
|
||||
/// `active_reused_addr_count` is the count of distinct addresses
|
||||
/// involved in this block (sent ∪ received) that satisfy `is_reused()`
|
||||
/// after the block's events, populated inline in `process_received`
|
||||
/// (each receiver, post-receive) and in `process_sent` (each
|
||||
/// first-encounter sender, deduped against `received_addrs` so
|
||||
/// addresses that did both aren't double-counted).
|
||||
/// `active_reused_addr_share` is the per-block ratio
|
||||
/// `reused / active * 100` as a percentage in `[0, 100]` (or `0.0` for
|
||||
/// empty blocks). The denominator (distinct active addrs per block)
|
||||
/// lives on `ActivityCountVecs::active` (`addrs.activity.all.active`),
|
||||
/// derived from `sending + receiving - bidirectional`. Both fields
|
||||
/// use `PerBlockRollingAverage` so their lazy 24h/1w/1m/1y series are
|
||||
/// rolling *averages* of the per-block values. Sums and cumulatives of
|
||||
/// distinct-address counts would be misleading because the same
|
||||
/// address can appear in multiple blocks.
|
||||
#[derive(Traversable)]
|
||||
pub struct ReusedAddrEventsVecs<M: StorageMode = Rw> {
|
||||
pub output_to_reused_addr_count:
|
||||
WithAddrTypes<PerBlockCumulativeRolling<StoredU64, StoredU64, M>>,
|
||||
pub output_to_reused_addr_share: WithAddrTypes<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
pub spendable_output_to_reused_addr_share: PercentCumulativeRolling<BasisPoints16, M>,
|
||||
pub input_from_reused_addr_count:
|
||||
WithAddrTypes<PerBlockCumulativeRolling<StoredU64, StoredU64, M>>,
|
||||
pub input_from_reused_addr_share: WithAddrTypes<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
pub active_reused_addr_count: PerBlockRollingAverage<StoredU32, StoredU64, M>,
|
||||
pub active_reused_addr_share: PerBlockRollingAverage<StoredF32, StoredF32, M>,
|
||||
}
|
||||
|
||||
impl ReusedAddrEventsVecs {
|
||||
pub(crate) fn forced_import(
|
||||
db: &Database,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
cached_starts: &Windows<&WindowStartVec>,
|
||||
) -> Result<Self> {
|
||||
let import_count = |name: &str| {
|
||||
WithAddrTypes::<PerBlockCumulativeRolling<StoredU64, StoredU64>>::forced_import(
|
||||
db,
|
||||
name,
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)
|
||||
};
|
||||
let import_percent = |name: &str| -> Result<WithAddrTypes<
|
||||
PercentCumulativeRolling<BasisPoints16>,
|
||||
>> {
|
||||
Ok(WithAddrTypes {
|
||||
all: PercentCumulativeRolling::forced_import(db, name, version, indexes)?,
|
||||
by_addr_type: ByAddrType::new_with_name(|type_name| {
|
||||
PercentCumulativeRolling::forced_import(
|
||||
db,
|
||||
&format!("{type_name}_{name}"),
|
||||
version,
|
||||
indexes,
|
||||
)
|
||||
})?,
|
||||
})
|
||||
};
|
||||
|
||||
let output_to_reused_addr_count = import_count("output_to_reused_addr_count")?;
|
||||
let output_to_reused_addr_share = import_percent("output_to_reused_addr_share")?;
|
||||
let spendable_output_to_reused_addr_share = PercentCumulativeRolling::forced_import(
|
||||
db,
|
||||
"spendable_output_to_reused_addr_share",
|
||||
version,
|
||||
indexes,
|
||||
)?;
|
||||
let input_from_reused_addr_count = import_count("input_from_reused_addr_count")?;
|
||||
let input_from_reused_addr_share = import_percent("input_from_reused_addr_share")?;
|
||||
|
||||
let active_reused_addr_count = PerBlockRollingAverage::forced_import(
|
||||
db,
|
||||
"active_reused_addr_count",
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?;
|
||||
let active_reused_addr_share = PerBlockRollingAverage::forced_import(
|
||||
db,
|
||||
"active_reused_addr_share",
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?;
|
||||
|
||||
Ok(Self {
|
||||
output_to_reused_addr_count,
|
||||
output_to_reused_addr_share,
|
||||
spendable_output_to_reused_addr_share,
|
||||
input_from_reused_addr_count,
|
||||
input_from_reused_addr_share,
|
||||
active_reused_addr_count,
|
||||
active_reused_addr_share,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn min_stateful_len(&self) -> usize {
|
||||
self.output_to_reused_addr_count
|
||||
.min_stateful_len()
|
||||
.min(self.input_from_reused_addr_count.min_stateful_len())
|
||||
.min(self.active_reused_addr_count.block.len())
|
||||
.min(self.active_reused_addr_share.block.len())
|
||||
}
|
||||
|
||||
pub(crate) fn par_iter_height_mut(
|
||||
&mut self,
|
||||
) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
self.output_to_reused_addr_count
|
||||
.par_iter_height_mut()
|
||||
.chain(self.input_from_reused_addr_count.par_iter_height_mut())
|
||||
.chain([
|
||||
&mut self.active_reused_addr_count.block as &mut dyn AnyStoredVec,
|
||||
&mut self.active_reused_addr_share.block as &mut dyn AnyStoredVec,
|
||||
])
|
||||
}
|
||||
|
||||
pub(crate) fn reset_height(&mut self) -> Result<()> {
|
||||
self.output_to_reused_addr_count.reset_height()?;
|
||||
self.input_from_reused_addr_count.reset_height()?;
|
||||
self.active_reused_addr_count.block.reset()?;
|
||||
self.active_reused_addr_share.block.reset()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn push_height(
|
||||
&mut self,
|
||||
uses: &AddrTypeToReusedAddrEventCount,
|
||||
spends: &AddrTypeToReusedAddrEventCount,
|
||||
active_addr_count: u32,
|
||||
active_reused_addr_count: u32,
|
||||
) {
|
||||
self.output_to_reused_addr_count
|
||||
.push_height(uses.sum(), uses.values().copied());
|
||||
self.input_from_reused_addr_count
|
||||
.push_height(spends.sum(), spends.values().copied());
|
||||
self.active_reused_addr_count
|
||||
.block
|
||||
.push(StoredU32::from(active_reused_addr_count));
|
||||
// Stored as a percentage in [0, 100] to match the rest of the
|
||||
// codebase (Unit.percentage on the website expects 0..100). The
|
||||
// `active_addr_count` denominator lives on `ActivityCountVecs`
|
||||
// (`addrs.activity.all.active`), passed in here so we can
|
||||
// compute the per-block ratio inline.
|
||||
let share = if active_addr_count > 0 {
|
||||
100.0 * (active_reused_addr_count as f32 / active_addr_count as f32)
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
self.active_reused_addr_share
|
||||
.block
|
||||
.push(StoredF32::from(share));
|
||||
}
|
||||
|
||||
pub(crate) fn compute_rest(
|
||||
&mut self,
|
||||
starting_indexes: &Indexes,
|
||||
outputs_by_type: &outputs::ByTypeVecs,
|
||||
inputs_by_type: &inputs::ByTypeVecs,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.output_to_reused_addr_count
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
self.input_from_reused_addr_count
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
self.active_reused_addr_count
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
self.active_reused_addr_share
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
|
||||
self.output_to_reused_addr_share.all.compute_count_ratio(
|
||||
&self.output_to_reused_addr_count.all,
|
||||
&outputs_by_type.output_count.all,
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
self.spendable_output_to_reused_addr_share.compute_count_ratio(
|
||||
&self.output_to_reused_addr_count.all,
|
||||
&outputs_by_type.spendable_output_count,
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
self.input_from_reused_addr_share.all.compute_count_ratio(
|
||||
&self.input_from_reused_addr_count.all,
|
||||
&inputs_by_type.input_count.all,
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
for otype in OutputType::ADDR_TYPES {
|
||||
self.output_to_reused_addr_share
|
||||
.by_addr_type
|
||||
.get_mut_unwrap(otype)
|
||||
.compute_count_ratio(
|
||||
self.output_to_reused_addr_count.by_addr_type.get_unwrap(otype),
|
||||
outputs_by_type.output_count.by_type.get(otype),
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
self.input_from_reused_addr_share
|
||||
.by_addr_type
|
||||
.get_mut_unwrap(otype)
|
||||
.compute_count_ratio(
|
||||
self.input_from_reused_addr_count.by_addr_type.get_unwrap(otype),
|
||||
inputs_by_type.input_count.by_type.get(otype),
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,26 @@
|
||||
//! Reused address tracking.
|
||||
//!
|
||||
//! An address is "reused" if its lifetime `funded_txo_count > 1` — i.e. it
|
||||
//! has received more than one output across its lifetime. This is the
|
||||
//! simplest output-multiplicity proxy for address linkability.
|
||||
//! An address is "reused" if its lifetime `funded_txo_count > 1`, i.e.
|
||||
//! it has received more than one output across its lifetime. This is
|
||||
//! the simplest output-multiplicity proxy for address linkability.
|
||||
//!
|
||||
//! Two facets are tracked here:
|
||||
//! - [`count`] — how many distinct addresses are currently reused (funded)
|
||||
//! and how many have *ever* been reused (total). Per address type plus
|
||||
//! an aggregated `all`.
|
||||
//! - [`uses`] — per-block count of outputs going to addresses that were
|
||||
//! already reused, plus the derived percent over total address-output
|
||||
//! count (denominator from `outputs::by_type`).
|
||||
//! - [`count`]: how many distinct addresses are currently reused
|
||||
//! (funded) and how many have *ever* been reused (total). Per address
|
||||
//! type plus an aggregated `all`.
|
||||
//! - [`events`]: per-block address-reuse event counts on both sides.
|
||||
//! Output-side (`output_to_reused_addr_count`, outputs landing on
|
||||
//! addresses that had already received ≥ 1 prior output) and
|
||||
//! input-side (`input_from_reused_addr_count`, inputs spending from
|
||||
//! addresses with lifetime `funded_txo_count > 1`). Each count is
|
||||
//! paired with a percent over the matching block-level output/input
|
||||
//! total.
|
||||
|
||||
mod count;
|
||||
mod uses;
|
||||
mod events;
|
||||
|
||||
pub use count::{AddrTypeToReusedAddrCount, ReusedAddrCountsVecs};
|
||||
pub use uses::{AddrTypeToReusedAddrUseCount, ReusedAddrUsesVecs};
|
||||
pub use events::{AddrTypeToReusedAddrEventCount, ReusedAddrEventsVecs};
|
||||
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
@@ -25,17 +29,17 @@ use rayon::prelude::*;
|
||||
use vecdb::{AnyStoredVec, Database, Exit, Rw, StorageMode};
|
||||
|
||||
use crate::{
|
||||
indexes,
|
||||
indexes, inputs,
|
||||
internal::{WindowStartVec, Windows},
|
||||
outputs,
|
||||
};
|
||||
|
||||
/// Top-level container for all reused address tracking: counts (funded +
|
||||
/// total) plus per-block uses (count + percent).
|
||||
/// total) plus per-block reuse events (output-side + input-side).
|
||||
#[derive(Traversable)]
|
||||
pub struct ReusedAddrVecs<M: StorageMode = Rw> {
|
||||
pub count: ReusedAddrCountsVecs<M>,
|
||||
pub uses: ReusedAddrUsesVecs<M>,
|
||||
pub events: ReusedAddrEventsVecs<M>,
|
||||
}
|
||||
|
||||
impl ReusedAddrVecs {
|
||||
@@ -47,14 +51,14 @@ impl ReusedAddrVecs {
|
||||
) -> Result<Self> {
|
||||
Ok(Self {
|
||||
count: ReusedAddrCountsVecs::forced_import(db, version, indexes)?,
|
||||
uses: ReusedAddrUsesVecs::forced_import(db, version, indexes, cached_starts)?,
|
||||
events: ReusedAddrEventsVecs::forced_import(db, version, indexes, cached_starts)?,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn min_stateful_len(&self) -> usize {
|
||||
self.count
|
||||
.min_stateful_len()
|
||||
.min(self.uses.min_stateful_len())
|
||||
.min(self.events.min_stateful_len())
|
||||
}
|
||||
|
||||
pub(crate) fn par_iter_height_mut(
|
||||
@@ -62,12 +66,12 @@ impl ReusedAddrVecs {
|
||||
) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
self.count
|
||||
.par_iter_height_mut()
|
||||
.chain(self.uses.par_iter_height_mut())
|
||||
.chain(self.events.par_iter_height_mut())
|
||||
}
|
||||
|
||||
pub(crate) fn reset_height(&mut self) -> Result<()> {
|
||||
self.count.reset_height()?;
|
||||
self.uses.reset_height()?;
|
||||
self.events.reset_height()?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -75,11 +79,16 @@ impl ReusedAddrVecs {
|
||||
&mut self,
|
||||
starting_indexes: &Indexes,
|
||||
outputs_by_type: &outputs::ByTypeVecs,
|
||||
inputs_by_type: &inputs::ByTypeVecs,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.count.compute_rest(starting_indexes, exit)?;
|
||||
self.uses
|
||||
.compute_rest(starting_indexes, outputs_by_type, exit)?;
|
||||
self.events.compute_rest(
|
||||
starting_indexes,
|
||||
outputs_by_type,
|
||||
inputs_by_type,
|
||||
exit,
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +0,0 @@
|
||||
//! Per-block reused-address-use tracking. See [`vecs::ReusedAddrUsesVecs`]
|
||||
//! for the full description of the metric.
|
||||
|
||||
mod state;
|
||||
mod vecs;
|
||||
|
||||
pub use state::AddrTypeToReusedAddrUseCount;
|
||||
pub use vecs::ReusedAddrUsesVecs;
|
||||
@@ -1,22 +0,0 @@
|
||||
use brk_cohort::ByAddrType;
|
||||
use derive_more::{Deref, DerefMut};
|
||||
|
||||
/// Per-block running counter of reused address uses, per address type.
|
||||
/// Reset at the start of each block (no disk recovery needed since the
|
||||
/// per-block flow is reconstructed from `process_received` deterministically).
|
||||
#[derive(Debug, Default, Deref, DerefMut)]
|
||||
pub struct AddrTypeToReusedAddrUseCount(ByAddrType<u64>);
|
||||
|
||||
impl AddrTypeToReusedAddrUseCount {
|
||||
#[inline]
|
||||
pub(crate) fn sum(&self) -> u64 {
|
||||
self.0.values().sum()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub(crate) fn reset(&mut self) {
|
||||
for v in self.0.values_mut() {
|
||||
*v = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,116 +0,0 @@
|
||||
use brk_cohort::ByAddrType;
|
||||
use brk_error::Result;
|
||||
use brk_traversable::Traversable;
|
||||
use brk_types::{BasisPoints16, Indexes, OutputType, StoredU64, Version};
|
||||
use rayon::prelude::*;
|
||||
use vecdb::{AnyStoredVec, Database, Exit, Rw, StorageMode};
|
||||
|
||||
use crate::{
|
||||
indexes,
|
||||
internal::{
|
||||
PerBlockCumulativeRolling, PercentCumulativeRolling, WindowStartVec, Windows,
|
||||
WithAddrTypes,
|
||||
},
|
||||
outputs,
|
||||
};
|
||||
|
||||
use super::state::AddrTypeToReusedAddrUseCount;
|
||||
|
||||
/// Per-block reused-address-use metrics. A "use" is a single output going
|
||||
/// to an address (not deduplicated): an address receiving N outputs in one
|
||||
/// block contributes N. The count only includes uses going to addresses
|
||||
/// that were *already* reused at the moment of the use, so the use that
|
||||
/// makes an address reused is not itself counted.
|
||||
///
|
||||
/// The denominator for the percent (per-type and aggregate address-output
|
||||
/// counts) is read from `outputs::ByTypeVecs::output_count` rather than
|
||||
/// duplicated here.
|
||||
#[derive(Traversable)]
|
||||
pub struct ReusedAddrUsesVecs<M: StorageMode = Rw> {
|
||||
pub reused_addr_use_count:
|
||||
WithAddrTypes<PerBlockCumulativeRolling<StoredU64, StoredU64, M>>,
|
||||
pub reused_addr_use_percent: WithAddrTypes<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
}
|
||||
|
||||
impl ReusedAddrUsesVecs {
|
||||
pub(crate) fn forced_import(
|
||||
db: &Database,
|
||||
version: Version,
|
||||
indexes: &indexes::Vecs,
|
||||
cached_starts: &Windows<&WindowStartVec>,
|
||||
) -> Result<Self> {
|
||||
let reused_addr_use_count =
|
||||
WithAddrTypes::<PerBlockCumulativeRolling<StoredU64, StoredU64>>::forced_import(
|
||||
db,
|
||||
"reused_addr_use_count",
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?;
|
||||
let percent_name = "reused_addr_use_percent";
|
||||
let reused_addr_use_percent = WithAddrTypes {
|
||||
all: PercentCumulativeRolling::forced_import(db, percent_name, version, indexes)?,
|
||||
by_addr_type: ByAddrType::new_with_name(|type_name| {
|
||||
PercentCumulativeRolling::forced_import(
|
||||
db,
|
||||
&format!("{type_name}_{percent_name}"),
|
||||
version,
|
||||
indexes,
|
||||
)
|
||||
})?,
|
||||
};
|
||||
Ok(Self {
|
||||
reused_addr_use_count,
|
||||
reused_addr_use_percent,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn min_stateful_len(&self) -> usize {
|
||||
self.reused_addr_use_count.min_stateful_len()
|
||||
}
|
||||
|
||||
pub(crate) fn par_iter_height_mut(
|
||||
&mut self,
|
||||
) -> impl ParallelIterator<Item = &mut dyn AnyStoredVec> {
|
||||
self.reused_addr_use_count.par_iter_height_mut()
|
||||
}
|
||||
|
||||
pub(crate) fn reset_height(&mut self) -> Result<()> {
|
||||
self.reused_addr_use_count.reset_height()
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn push_height(&mut self, reused: &AddrTypeToReusedAddrUseCount) {
|
||||
self.reused_addr_use_count
|
||||
.push_height(reused.sum(), reused.values().copied());
|
||||
}
|
||||
|
||||
pub(crate) fn compute_rest(
|
||||
&mut self,
|
||||
starting_indexes: &Indexes,
|
||||
outputs_by_type: &outputs::ByTypeVecs,
|
||||
exit: &Exit,
|
||||
) -> Result<()> {
|
||||
self.reused_addr_use_count
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
|
||||
self.reused_addr_use_percent.all.compute_count_ratio(
|
||||
&self.reused_addr_use_count.all,
|
||||
&outputs_by_type.output_count.all,
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
for otype in OutputType::ADDR_TYPES {
|
||||
self.reused_addr_use_percent
|
||||
.by_addr_type
|
||||
.get_mut_unwrap(otype)
|
||||
.compute_count_ratio(
|
||||
self.reused_addr_use_count.by_addr_type.get_unwrap(otype),
|
||||
outputs_by_type.output_count.by_type.get(otype),
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -5,7 +5,7 @@ use rustc_hash::FxHashMap;
|
||||
use crate::distribution::{
|
||||
addr::{
|
||||
AddrTypeToActivityCounts, AddrTypeToExposedAddrCount, AddrTypeToExposedAddrSupply,
|
||||
AddrTypeToReusedAddrCount, AddrTypeToReusedAddrUseCount, AddrTypeToVec,
|
||||
AddrTypeToReusedAddrCount, AddrTypeToReusedAddrEventCount, AddrTypeToVec,
|
||||
},
|
||||
cohorts::AddrCohorts,
|
||||
};
|
||||
@@ -30,7 +30,8 @@ pub(crate) fn process_received(
|
||||
activity_counts: &mut AddrTypeToActivityCounts,
|
||||
reused_addr_count: &mut AddrTypeToReusedAddrCount,
|
||||
total_reused_addr_count: &mut AddrTypeToReusedAddrCount,
|
||||
reused_addr_use_count: &mut AddrTypeToReusedAddrUseCount,
|
||||
output_to_reused_addr_count: &mut AddrTypeToReusedAddrEventCount,
|
||||
active_reused_addr_count: &mut AddrTypeToReusedAddrEventCount,
|
||||
exposed_addr_count: &mut AddrTypeToExposedAddrCount,
|
||||
total_exposed_addr_count: &mut AddrTypeToExposedAddrCount,
|
||||
exposed_addr_supply: &mut AddrTypeToExposedAddrSupply,
|
||||
@@ -54,7 +55,9 @@ pub(crate) fn process_received(
|
||||
let type_activity = activity_counts.get_mut_unwrap(output_type);
|
||||
let type_reused_count = reused_addr_count.get_mut(output_type).unwrap();
|
||||
let type_total_reused_count = total_reused_addr_count.get_mut(output_type).unwrap();
|
||||
let type_reused_use_count = reused_addr_use_count.get_mut(output_type).unwrap();
|
||||
let type_output_to_reused_count = output_to_reused_addr_count.get_mut(output_type).unwrap();
|
||||
let type_active_reused_count =
|
||||
active_reused_addr_count.get_mut(output_type).unwrap();
|
||||
let type_exposed_count = exposed_addr_count.get_mut(output_type).unwrap();
|
||||
let type_total_exposed_count = total_exposed_addr_count.get_mut(output_type).unwrap();
|
||||
let type_exposed_supply = exposed_addr_supply.get_mut(output_type).unwrap();
|
||||
@@ -168,15 +171,26 @@ pub(crate) fn process_received(
|
||||
*type_reused_count += 1;
|
||||
}
|
||||
|
||||
// Block-level "active reused address" count: each address
|
||||
// is processed exactly once here (via aggregation), so we
|
||||
// count it once iff it is reused after the block's receives.
|
||||
// The sender-side counterpart in process_sent dedupes
|
||||
// against `received_addrs` so addresses that did both
|
||||
// aren't double-counted.
|
||||
if is_now_reused {
|
||||
*type_active_reused_count += 1;
|
||||
}
|
||||
|
||||
// Per-block reused-use count: every individual output to this
|
||||
// address counts iff the address was already reused at the
|
||||
// moment of that output. With aggregation, that means we
|
||||
// skip enough outputs at the front to take the lifetime
|
||||
// funding count from `funded_txo_count_before` past 1, then
|
||||
// count the rest. `skipped` is `max(0, 2 - before)`.
|
||||
let skipped = 2u32.saturating_sub(funded_txo_count_before);
|
||||
// address counts iff, at the moment the output arrives, the
|
||||
// address had already received at least one prior output
|
||||
// (i.e. it is an output-level "address reuse event"). With
|
||||
// aggregation, that means we skip the very first output the
|
||||
// address ever sees and count every subsequent one, so
|
||||
// `skipped` is `max(0, 1 - before)`.
|
||||
let skipped = 1u32.saturating_sub(funded_txo_count_before);
|
||||
let counted = recv.output_count.saturating_sub(skipped);
|
||||
*type_reused_use_count += u64::from(counted);
|
||||
*type_output_to_reused_count += u64::from(counted);
|
||||
|
||||
// Update exposed counts. The address's pubkey-exposure state
|
||||
// is unchanged by a receive (spent_txo_count unchanged), so we
|
||||
|
||||
@@ -7,7 +7,7 @@ use vecdb::VecIndex;
|
||||
use crate::distribution::{
|
||||
addr::{
|
||||
AddrTypeToActivityCounts, AddrTypeToExposedAddrCount, AddrTypeToExposedAddrSupply,
|
||||
AddrTypeToReusedAddrCount, HeightToAddrTypeToVec,
|
||||
AddrTypeToReusedAddrCount, AddrTypeToReusedAddrEventCount, HeightToAddrTypeToVec,
|
||||
},
|
||||
cohorts::AddrCohorts,
|
||||
compute::PriceRangeMax,
|
||||
@@ -39,6 +39,8 @@ pub(crate) fn process_sent(
|
||||
empty_addr_count: &mut ByAddrType<u64>,
|
||||
activity_counts: &mut AddrTypeToActivityCounts,
|
||||
reused_addr_count: &mut AddrTypeToReusedAddrCount,
|
||||
input_from_reused_addr_count: &mut AddrTypeToReusedAddrEventCount,
|
||||
active_reused_addr_count: &mut AddrTypeToReusedAddrEventCount,
|
||||
exposed_addr_count: &mut AddrTypeToExposedAddrCount,
|
||||
total_exposed_addr_count: &mut AddrTypeToExposedAddrCount,
|
||||
exposed_addr_supply: &mut AddrTypeToExposedAddrSupply,
|
||||
@@ -65,6 +67,10 @@ pub(crate) fn process_sent(
|
||||
let type_empty_count = empty_addr_count.get_mut(output_type).unwrap();
|
||||
let type_activity = activity_counts.get_mut_unwrap(output_type);
|
||||
let type_reused_count = reused_addr_count.get_mut(output_type).unwrap();
|
||||
let type_input_from_reused_count =
|
||||
input_from_reused_addr_count.get_mut(output_type).unwrap();
|
||||
let type_active_reused_count =
|
||||
active_reused_addr_count.get_mut(output_type).unwrap();
|
||||
let type_exposed_count = exposed_addr_count.get_mut(output_type).unwrap();
|
||||
let type_total_exposed_count = total_exposed_addr_count.get_mut(output_type).unwrap();
|
||||
let type_exposed_supply = exposed_addr_supply.get_mut(output_type).unwrap();
|
||||
@@ -74,6 +80,15 @@ pub(crate) fn process_sent(
|
||||
for (type_index, value) in vec {
|
||||
let addr_data = lookup.get_for_send(output_type, type_index);
|
||||
|
||||
// "Input from a reused address" event: the sending
|
||||
// address is in the reused set (lifetime
|
||||
// funded_txo_count > 1). Checked once per input. The
|
||||
// spend itself doesn't touch funded_txo_count so the
|
||||
// predicate is stable before/after `cohort_state.send`.
|
||||
if addr_data.is_reused() {
|
||||
*type_input_from_reused_count += 1;
|
||||
}
|
||||
|
||||
let prev_balance = addr_data.balance();
|
||||
let new_balance = prev_balance.checked_sub(value).unwrap();
|
||||
|
||||
@@ -81,9 +96,20 @@ pub(crate) fn process_sent(
|
||||
if type_seen.insert(type_index) {
|
||||
type_activity.sending += 1;
|
||||
|
||||
// Track "both" - addresses that sent AND received this block
|
||||
if type_received.is_some_and(|s| s.contains(&type_index)) {
|
||||
type_activity.both += 1;
|
||||
let also_received =
|
||||
type_received.is_some_and(|s| s.contains(&type_index));
|
||||
// Track "bidirectional": addresses that sent AND
|
||||
// received this block.
|
||||
if also_received {
|
||||
type_activity.bidirectional += 1;
|
||||
}
|
||||
|
||||
// Block-level "active reused address" count: count
|
||||
// every distinct sender that's reused, but skip
|
||||
// those that also received this block (already
|
||||
// counted in process_received).
|
||||
if !also_received && addr_data.is_reused() {
|
||||
*type_active_reused_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,7 +13,8 @@ use crate::{
|
||||
distribution::{
|
||||
addr::{
|
||||
AddrTypeToActivityCounts, AddrTypeToAddrCount, AddrTypeToExposedAddrCount,
|
||||
AddrTypeToExposedAddrSupply, AddrTypeToReusedAddrCount, AddrTypeToReusedAddrUseCount,
|
||||
AddrTypeToExposedAddrSupply, AddrTypeToReusedAddrCount,
|
||||
AddrTypeToReusedAddrEventCount,
|
||||
},
|
||||
block::{
|
||||
AddrCache, InputsResult, process_inputs, process_outputs, process_received,
|
||||
@@ -228,8 +229,16 @@ pub(crate) fn process_blocks(
|
||||
|
||||
// Track activity counts - reset each block
|
||||
let mut activity_counts = AddrTypeToActivityCounts::default();
|
||||
// Reused-use count - per-block flow, reset each block
|
||||
let mut reused_addr_use_counts = AddrTypeToReusedAddrUseCount::default();
|
||||
// Reused-addr event counts (receive + spend side). Per-block
|
||||
// flow, reset each block.
|
||||
let mut output_to_reused_addr_counts = AddrTypeToReusedAddrEventCount::default();
|
||||
let mut input_from_reused_addr_counts = AddrTypeToReusedAddrEventCount::default();
|
||||
// Distinct addresses active this block whose lifetime
|
||||
// funded_txo_count > 1 after this block's events. Incremented in
|
||||
// process_received for every receiver that ends up reused, and in
|
||||
// process_sent for every sender that's reused AND didn't also
|
||||
// receive this block (deduped via `received_addrs`).
|
||||
let mut active_reused_addr_counts = AddrTypeToReusedAddrEventCount::default();
|
||||
|
||||
debug!("creating AddrCache");
|
||||
let mut cache = AddrCache::new();
|
||||
@@ -302,7 +311,9 @@ pub(crate) fn process_blocks(
|
||||
|
||||
// Reset per-block activity counts
|
||||
activity_counts.reset();
|
||||
reused_addr_use_counts.reset();
|
||||
output_to_reused_addr_counts.reset();
|
||||
input_from_reused_addr_counts.reset();
|
||||
active_reused_addr_counts.reset();
|
||||
|
||||
// Process outputs, inputs, and tick-tock in parallel via rayon::join.
|
||||
// Collection (build tx_index mappings + bulk mmap reads) is merged into the
|
||||
@@ -474,7 +485,8 @@ pub(crate) fn process_blocks(
|
||||
&mut activity_counts,
|
||||
&mut reused_addr_counts,
|
||||
&mut total_reused_addr_counts,
|
||||
&mut reused_addr_use_counts,
|
||||
&mut output_to_reused_addr_counts,
|
||||
&mut active_reused_addr_counts,
|
||||
&mut exposed_addr_counts,
|
||||
&mut total_exposed_addr_counts,
|
||||
&mut exposed_addr_supply,
|
||||
@@ -491,6 +503,8 @@ pub(crate) fn process_blocks(
|
||||
&mut empty_addr_counts,
|
||||
&mut activity_counts,
|
||||
&mut reused_addr_counts,
|
||||
&mut input_from_reused_addr_counts,
|
||||
&mut active_reused_addr_counts,
|
||||
&mut exposed_addr_counts,
|
||||
&mut total_exposed_addr_counts,
|
||||
&mut exposed_addr_supply,
|
||||
@@ -524,7 +538,16 @@ pub(crate) fn process_blocks(
|
||||
total_reused_addr_counts.sum(),
|
||||
total_reused_addr_counts.values().copied(),
|
||||
);
|
||||
vecs.addrs.reused.uses.push_height(&reused_addr_use_counts);
|
||||
let activity_totals = activity_counts.totals();
|
||||
let active_addr_count = activity_totals.sending + activity_totals.receiving
|
||||
- activity_totals.bidirectional;
|
||||
let active_reused = u32::try_from(active_reused_addr_counts.sum()).unwrap();
|
||||
vecs.addrs.reused.events.push_height(
|
||||
&output_to_reused_addr_counts,
|
||||
&input_from_reused_addr_counts,
|
||||
active_addr_count,
|
||||
active_reused,
|
||||
);
|
||||
vecs.addrs.exposed.count.funded.push_height(
|
||||
exposed_addr_counts.sum(),
|
||||
exposed_addr_counts.values().copied(),
|
||||
@@ -609,7 +632,7 @@ fn push_cohort_states(
|
||||
height: Height,
|
||||
height_price: Cents,
|
||||
) {
|
||||
// Phase 1: push + unrealized (no reset yet — states still needed for aggregation)
|
||||
// Phase 1: push + unrealized (no reset yet; states still needed for aggregation)
|
||||
rayon::join(
|
||||
|| {
|
||||
utxo_cohorts.par_iter_separate_mut().for_each(|v| {
|
||||
|
||||
@@ -38,7 +38,7 @@ use super::{
|
||||
},
|
||||
};
|
||||
|
||||
const VERSION: Version = Version::new(22);
|
||||
const VERSION: Version = Version::new(23);
|
||||
|
||||
#[derive(Traversable)]
|
||||
pub struct AddrMetricsVecs<M: StorageMode = Rw> {
|
||||
@@ -151,7 +151,7 @@ impl Vecs {
|
||||
let empty_addr_count =
|
||||
AddrCountsVecs::forced_import(&db, "empty_addr_count", version, indexes)?;
|
||||
let addr_activity =
|
||||
AddrActivityVecs::forced_import(&db, "addr_activity", version, indexes, cached_starts)?;
|
||||
AddrActivityVecs::forced_import(&db, version, indexes, cached_starts)?;
|
||||
|
||||
// Stored total = addr_count + empty_addr_count (global + per-type, with all derived indexes)
|
||||
let total_addr_count = TotalAddrCountVecs::forced_import(&db, version, indexes)?;
|
||||
@@ -470,9 +470,12 @@ impl Vecs {
|
||||
// 6b. Compute address count sum (by addr_type -> all)
|
||||
self.addrs.funded.compute_rest(starting_indexes, exit)?;
|
||||
self.addrs.empty.compute_rest(starting_indexes, exit)?;
|
||||
self.addrs
|
||||
.reused
|
||||
.compute_rest(starting_indexes, &outputs.by_type, exit)?;
|
||||
self.addrs.reused.compute_rest(
|
||||
starting_indexes,
|
||||
&outputs.by_type,
|
||||
&inputs.by_type,
|
||||
exit,
|
||||
)?;
|
||||
self.addrs
|
||||
.exposed
|
||||
.compute_rest(starting_indexes, prices, exit)?;
|
||||
|
||||
@@ -90,7 +90,7 @@ impl Vecs {
|
||||
let supply_total_sats = &all_metrics.supply.total.sats.height;
|
||||
|
||||
// Supply-Adjusted CDD = sum_24h(CDD) / circulating_supply_btc
|
||||
self.coindays_destroyed_supply_adjusted
|
||||
self.coindays_destroyed_supply_adj
|
||||
.height
|
||||
.compute_transform2(
|
||||
starting_indexes.height,
|
||||
@@ -108,7 +108,7 @@ impl Vecs {
|
||||
)?;
|
||||
|
||||
// Supply-Adjusted CYD = CYD / circulating_supply_btc
|
||||
self.coinyears_destroyed_supply_adjusted
|
||||
self.coinyears_destroyed_supply_adj
|
||||
.height
|
||||
.compute_transform2(
|
||||
starting_indexes.height,
|
||||
@@ -126,7 +126,7 @@ impl Vecs {
|
||||
)?;
|
||||
|
||||
// Supply-Adjusted Dormancy = dormancy / circulating_supply_btc
|
||||
self.dormancy.supply_adjusted.height.compute_transform2(
|
||||
self.dormancy.supply_adj.height.compute_transform2(
|
||||
starting_indexes.height,
|
||||
&all_activity.dormancy._24h.height,
|
||||
supply_total_sats,
|
||||
|
||||
@@ -29,12 +29,12 @@ impl Vecs {
|
||||
let rhodl_ratio = RatioPerBlock::forced_import_raw(&db, "rhodl_ratio", v, indexes)?;
|
||||
let thermo_cap_multiple =
|
||||
RatioPerBlock::forced_import_raw(&db, "thermo_cap_multiple", v, indexes)?;
|
||||
let coindays_destroyed_supply_adjusted =
|
||||
PerBlock::forced_import(&db, "coindays_destroyed_supply_adjusted", v, indexes)?;
|
||||
let coinyears_destroyed_supply_adjusted =
|
||||
PerBlock::forced_import(&db, "coinyears_destroyed_supply_adjusted", v, indexes)?;
|
||||
let coindays_destroyed_supply_adj =
|
||||
PerBlock::forced_import(&db, "coindays_destroyed_supply_adj", v, indexes)?;
|
||||
let coinyears_destroyed_supply_adj =
|
||||
PerBlock::forced_import(&db, "coinyears_destroyed_supply_adj", v, indexes)?;
|
||||
let dormancy = super::vecs::DormancyVecs {
|
||||
supply_adjusted: PerBlock::forced_import(&db, "dormancy_supply_adjusted", v, indexes)?,
|
||||
supply_adj: PerBlock::forced_import(&db, "dormancy_supply_adj", v, indexes)?,
|
||||
flow: PerBlock::forced_import(&db, "dormancy_flow", v, indexes)?,
|
||||
};
|
||||
let stock_to_flow = PerBlock::forced_import(&db, "stock_to_flow", v, indexes)?;
|
||||
@@ -49,8 +49,8 @@ impl Vecs {
|
||||
gini,
|
||||
rhodl_ratio,
|
||||
thermo_cap_multiple,
|
||||
coindays_destroyed_supply_adjusted,
|
||||
coinyears_destroyed_supply_adjusted,
|
||||
coindays_destroyed_supply_adj,
|
||||
coinyears_destroyed_supply_adj,
|
||||
dormancy,
|
||||
stock_to_flow,
|
||||
seller_exhaustion,
|
||||
|
||||
@@ -7,7 +7,7 @@ use crate::internal::{PerBlock, PercentPerBlock, RatioPerBlock};
|
||||
|
||||
#[derive(Traversable)]
|
||||
pub struct DormancyVecs<M: StorageMode = Rw> {
|
||||
pub supply_adjusted: PerBlock<StoredF32, M>,
|
||||
pub supply_adj: PerBlock<StoredF32, M>,
|
||||
pub flow: PerBlock<StoredF32, M>,
|
||||
}
|
||||
|
||||
@@ -20,8 +20,8 @@ pub struct Vecs<M: StorageMode = Rw> {
|
||||
pub gini: PercentPerBlock<BasisPoints16, M>,
|
||||
pub rhodl_ratio: RatioPerBlock<BasisPoints32, M>,
|
||||
pub thermo_cap_multiple: RatioPerBlock<BasisPoints32, M>,
|
||||
pub coindays_destroyed_supply_adjusted: PerBlock<StoredF32, M>,
|
||||
pub coinyears_destroyed_supply_adjusted: PerBlock<StoredF32, M>,
|
||||
pub coindays_destroyed_supply_adj: PerBlock<StoredF32, M>,
|
||||
pub coinyears_destroyed_supply_adj: PerBlock<StoredF32, M>,
|
||||
pub dormancy: DormancyVecs<M>,
|
||||
pub stock_to_flow: PerBlock<StoredF32, M>,
|
||||
pub seller_exhaustion: PerBlock<StoredF32, M>,
|
||||
|
||||
@@ -85,8 +85,17 @@ impl Vecs {
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
}
|
||||
|
||||
for (otype, source) in self.input_count.by_type.iter_typed() {
|
||||
self.input_share.get_mut(otype).compute_count_ratio(
|
||||
source,
|
||||
&self.input_count.all,
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
}
|
||||
|
||||
for (otype, source) in self.tx_count.by_type.iter_typed() {
|
||||
self.tx_percent.get_mut(otype).compute_count_ratio(
|
||||
self.tx_share.get_mut(otype).compute_count_ratio(
|
||||
source,
|
||||
&self.tx_count.all,
|
||||
starting_indexes.height,
|
||||
|
||||
@@ -39,18 +39,28 @@ impl Vecs {
|
||||
cached_starts,
|
||||
)?;
|
||||
|
||||
let tx_percent = SpendableType::try_new(|_, name| {
|
||||
let input_share = SpendableType::try_new(|_, name| {
|
||||
PercentCumulativeRolling::forced_import(
|
||||
db,
|
||||
&format!("tx_percent_with_{name}_prevout"),
|
||||
&format!("{name}_prevout_share"),
|
||||
version,
|
||||
indexes,
|
||||
)
|
||||
})?;
|
||||
|
||||
let tx_share = SpendableType::try_new(|_, name| {
|
||||
PercentCumulativeRolling::forced_import(
|
||||
db,
|
||||
&format!("tx_share_with_{name}_prevout"),
|
||||
version,
|
||||
indexes,
|
||||
)
|
||||
})?;
|
||||
Ok(Self {
|
||||
input_count,
|
||||
input_share,
|
||||
tx_count,
|
||||
tx_percent,
|
||||
tx_share,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,7 @@ use crate::internal::{PerBlockCumulativeRolling, PercentCumulativeRolling};
|
||||
#[derive(Traversable)]
|
||||
pub struct Vecs<M: StorageMode = Rw> {
|
||||
pub input_count: WithInputTypes<PerBlockCumulativeRolling<StoredU64, StoredU64, M>>,
|
||||
pub input_share: SpendableType<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
pub tx_count: WithInputTypes<PerBlockCumulativeRolling<StoredU64, StoredU64, M>>,
|
||||
pub tx_percent: SpendableType<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
pub tx_share: SpendableType<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
}
|
||||
|
||||
@@ -1,7 +1,12 @@
|
||||
//! PerBlock with rolling average (no distribution stats).
|
||||
//!
|
||||
//! Stored height data + f64 cumulative + lazy 4-window rolling averages.
|
||||
//! Stored height data + cumulative + lazy 4-window rolling averages.
|
||||
//! Rolling averages are computed on-the-fly from the cumulative via DeltaAvg.
|
||||
//!
|
||||
//! Type parameters:
|
||||
//! - `T`: per-block value type
|
||||
//! - `C`: cumulative type, defaults to `T`. Use a wider type (e.g., `StoredU64`)
|
||||
//! when the prefix sum of `T` values could overflow `T`.
|
||||
|
||||
use brk_error::Result;
|
||||
|
||||
@@ -15,20 +20,22 @@ use crate::indexes;
|
||||
use crate::internal::{LazyRollingAvgsFromHeight, NumericValue, WindowStartVec, Windows};
|
||||
|
||||
#[derive(Traversable)]
|
||||
pub struct PerBlockRollingAverage<T, M: StorageMode = Rw>
|
||||
pub struct PerBlockRollingAverage<T, C = T, M: StorageMode = Rw>
|
||||
where
|
||||
T: NumericValue + JsonSchema,
|
||||
C: NumericValue + JsonSchema,
|
||||
{
|
||||
pub block: M::Stored<EagerVec<PcoVec<Height, T>>>,
|
||||
#[traversable(hidden)]
|
||||
pub cumulative: M::Stored<EagerVec<PcoVec<Height, T>>>,
|
||||
pub cumulative: M::Stored<EagerVec<PcoVec<Height, C>>>,
|
||||
#[traversable(flatten)]
|
||||
pub average: LazyRollingAvgsFromHeight<T>,
|
||||
pub average: LazyRollingAvgsFromHeight<C>,
|
||||
}
|
||||
|
||||
impl<T> PerBlockRollingAverage<T>
|
||||
impl<T, C> PerBlockRollingAverage<T, C>
|
||||
where
|
||||
T: NumericValue + JsonSchema,
|
||||
T: NumericValue + JsonSchema + Into<C>,
|
||||
C: NumericValue + JsonSchema,
|
||||
{
|
||||
pub(crate) fn forced_import(
|
||||
db: &Database,
|
||||
@@ -38,11 +45,11 @@ where
|
||||
cached_starts: &Windows<&WindowStartVec>,
|
||||
) -> Result<Self> {
|
||||
let block: EagerVec<PcoVec<Height, T>> = EagerVec::forced_import(db, name, version)?;
|
||||
let cumulative: EagerVec<PcoVec<Height, T>> =
|
||||
EagerVec::forced_import(db, &format!("{name}_cumulative"), version + Version::ONE)?;
|
||||
let cumulative: EagerVec<PcoVec<Height, C>> =
|
||||
EagerVec::forced_import(db, &format!("{name}_cumulative"), version + Version::TWO)?;
|
||||
let average = LazyRollingAvgsFromHeight::new(
|
||||
&format!("{name}_average"),
|
||||
version + Version::ONE,
|
||||
version + Version::TWO,
|
||||
&cumulative,
|
||||
cached_starts,
|
||||
indexes,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use brk_error::{OptionData, Result};
|
||||
use brk_indexer::Indexer;
|
||||
use brk_types::{Indexes, StoredU64};
|
||||
use vecdb::{AnyVec, Exit, ReadableVec, VecIndex, WritableVec};
|
||||
use brk_types::{Indexes, OutputType, StoredU64};
|
||||
use vecdb::{AnyStoredVec, AnyVec, Exit, ReadableVec, VecIndex, WritableVec};
|
||||
|
||||
use super::{Vecs, WithOutputTypes};
|
||||
use crate::internal::{CoinbasePolicy, PerBlockCumulativeRolling, walk_blocks};
|
||||
@@ -20,18 +20,25 @@ impl Vecs {
|
||||
|
||||
self.output_count
|
||||
.validate_and_truncate(dep_version, starting_indexes.height)?;
|
||||
self.spendable_output_count
|
||||
.block
|
||||
.validate_and_truncate(dep_version, starting_indexes.height)?;
|
||||
self.tx_count
|
||||
.validate_and_truncate(dep_version, starting_indexes.height)?;
|
||||
|
||||
let skip = self
|
||||
.output_count
|
||||
.min_stateful_len()
|
||||
.min(self.spendable_output_count.block.len())
|
||||
.min(self.tx_count.min_stateful_len());
|
||||
|
||||
let first_tx_index = &indexer.vecs.transactions.first_tx_index;
|
||||
let end = first_tx_index.len();
|
||||
if skip < end {
|
||||
self.output_count.truncate_if_needed_at(skip)?;
|
||||
self.spendable_output_count
|
||||
.block
|
||||
.truncate_if_needed_at(skip)?;
|
||||
self.tx_count.truncate_if_needed_at(skip)?;
|
||||
|
||||
let fi_batch = first_tx_index.collect_range_at(skip, end);
|
||||
@@ -63,10 +70,16 @@ impl Vecs {
|
||||
|agg| {
|
||||
push_block(&mut self.output_count, agg.entries_all, &agg.entries_per_type);
|
||||
push_block(&mut self.tx_count, agg.txs_all, &agg.txs_per_type);
|
||||
let spendable_total = agg.entries_all
|
||||
- agg.entries_per_type[OutputType::OpReturn as usize];
|
||||
self.spendable_output_count
|
||||
.block
|
||||
.push(StoredU64::from(spendable_total));
|
||||
|
||||
if self.output_count.all.block.batch_limit_reached() {
|
||||
let _lock = exit.lock();
|
||||
self.output_count.write()?;
|
||||
self.spendable_output_count.block.write()?;
|
||||
self.tx_count.write()?;
|
||||
}
|
||||
Ok(())
|
||||
@@ -76,17 +89,29 @@ impl Vecs {
|
||||
{
|
||||
let _lock = exit.lock();
|
||||
self.output_count.write()?;
|
||||
self.spendable_output_count.block.write()?;
|
||||
self.tx_count.write()?;
|
||||
}
|
||||
|
||||
self.output_count
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
self.spendable_output_count
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
self.tx_count
|
||||
.compute_rest(starting_indexes.height, exit)?;
|
||||
}
|
||||
|
||||
for (otype, source) in self.output_count.by_type.iter_typed() {
|
||||
self.output_share.get_mut(otype).compute_count_ratio(
|
||||
source,
|
||||
&self.output_count.all,
|
||||
starting_indexes.height,
|
||||
exit,
|
||||
)?;
|
||||
}
|
||||
|
||||
for (otype, source) in self.tx_count.by_type.iter_typed() {
|
||||
self.tx_percent.get_mut(otype).compute_count_ratio(
|
||||
self.tx_share.get_mut(otype).compute_count_ratio(
|
||||
source,
|
||||
&self.tx_count.all,
|
||||
starting_indexes.height,
|
||||
|
||||
@@ -6,9 +6,7 @@ use vecdb::Database;
|
||||
use super::{Vecs, WithOutputTypes};
|
||||
use crate::{
|
||||
indexes,
|
||||
internal::{
|
||||
PerBlockCumulativeRolling, PercentCumulativeRolling, WindowStartVec, Windows,
|
||||
},
|
||||
internal::{PerBlockCumulativeRolling, PercentCumulativeRolling, WindowStartVec, Windows},
|
||||
};
|
||||
|
||||
impl Vecs {
|
||||
@@ -39,18 +37,37 @@ impl Vecs {
|
||||
cached_starts,
|
||||
)?;
|
||||
|
||||
let tx_percent = ByType::try_new(|_, name| {
|
||||
let spendable_output_count = PerBlockCumulativeRolling::forced_import(
|
||||
db,
|
||||
"spendable_output_count",
|
||||
version,
|
||||
indexes,
|
||||
cached_starts,
|
||||
)?;
|
||||
|
||||
let output_share = ByType::try_new(|_, name| {
|
||||
PercentCumulativeRolling::forced_import(
|
||||
db,
|
||||
&format!("tx_percent_with_{name}_output"),
|
||||
&format!("{name}_output_share"),
|
||||
version,
|
||||
indexes,
|
||||
)
|
||||
})?;
|
||||
|
||||
let tx_share = ByType::try_new(|_, name| {
|
||||
PercentCumulativeRolling::forced_import(
|
||||
db,
|
||||
&format!("tx_share_with_{name}_output"),
|
||||
version,
|
||||
indexes,
|
||||
)
|
||||
})?;
|
||||
Ok(Self {
|
||||
output_count,
|
||||
spendable_output_count,
|
||||
output_share,
|
||||
tx_count,
|
||||
tx_percent,
|
||||
tx_share,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,6 +9,8 @@ use crate::internal::{PerBlockCumulativeRolling, PercentCumulativeRolling};
|
||||
#[derive(Traversable)]
|
||||
pub struct Vecs<M: StorageMode = Rw> {
|
||||
pub output_count: WithOutputTypes<PerBlockCumulativeRolling<StoredU64, StoredU64, M>>,
|
||||
pub spendable_output_count: PerBlockCumulativeRolling<StoredU64, StoredU64, M>,
|
||||
pub output_share: ByType<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
pub tx_count: WithOutputTypes<PerBlockCumulativeRolling<StoredU64, StoredU64, M>>,
|
||||
pub tx_percent: ByType<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
pub tx_share: ByType<PercentCumulativeRolling<BasisPoints16, M>>,
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ use brk_rpc::Client;
|
||||
use brk_types::{BlockHash, Height};
|
||||
use fjall::PersistMode;
|
||||
use parking_lot::RwLock;
|
||||
use tracing::{debug, info};
|
||||
use tracing::{debug, error, info};
|
||||
use vecdb::{
|
||||
Exit, RawDBError, ReadOnlyClone, ReadableVec, Ro, Rw, StorageMode, WritableVec, unlikely,
|
||||
};
|
||||
@@ -237,6 +237,18 @@ impl Indexer {
|
||||
let stores = &mut self.stores;
|
||||
|
||||
for block in reader.after(prev_hash)?.iter() {
|
||||
let block = match block {
|
||||
Ok(block) => block,
|
||||
Err(e) => {
|
||||
// The reader hit an unrecoverable mid-stream issue
|
||||
// (chain break, parse failure, missing blocks).
|
||||
// Stop cleanly so what we've already indexed gets
|
||||
// flushed in the post-loop export — the next
|
||||
// `index` call will resume from the new tip.
|
||||
error!("Reader stream stopped early: {e}");
|
||||
break;
|
||||
}
|
||||
};
|
||||
let height = block.height();
|
||||
|
||||
if unlikely(height.is_multiple_of(100)) {
|
||||
|
||||
@@ -19,16 +19,10 @@ fn main() -> Result<()> {
|
||||
let blocks = Blocks::new(&client, &reader);
|
||||
|
||||
let i = Instant::now();
|
||||
blocks
|
||||
.range(Height::new(920040), Height::new(920041))?
|
||||
// .start(Height::new(920040))?
|
||||
// .end(Height::new(10))?
|
||||
// .after(brk_types::BlockHash::try_from(
|
||||
// "00000000000000000000840d205cac2728740e0e7c5dc92a04c52503017c6241",
|
||||
// )?)?
|
||||
.for_each(|b| {
|
||||
dbg!(b.height());
|
||||
});
|
||||
for block in blocks.range(Height::new(920040), Height::new(920041))? {
|
||||
let block = block?;
|
||||
dbg!(block.height());
|
||||
}
|
||||
dbg!(i.elapsed());
|
||||
|
||||
Ok(())
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use brk_error::{Error, Result};
|
||||
use brk_types::Block;
|
||||
|
||||
use crate::State;
|
||||
@@ -11,7 +12,7 @@ impl BlockIterator {
|
||||
}
|
||||
|
||||
impl Iterator for BlockIterator {
|
||||
type Item = Block;
|
||||
type Item = Result<Block>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match &mut self.0 {
|
||||
@@ -21,35 +22,32 @@ impl Iterator for BlockIterator {
|
||||
prev_hash,
|
||||
} => {
|
||||
let height = heights.next()?;
|
||||
let hash = client.get_block_hash(height).ok()?;
|
||||
let block = client.get_block(&hash).ok()?;
|
||||
let hash = match client.get_block_hash(height) {
|
||||
Ok(h) => h,
|
||||
Err(e) => return Some(Err(e)),
|
||||
};
|
||||
let block = match client.get_block(&hash) {
|
||||
Ok(b) => b,
|
||||
Err(e) => return Some(Err(e)),
|
||||
};
|
||||
|
||||
if prev_hash
|
||||
.as_ref()
|
||||
.is_some_and(|prev_hash| block.header.prev_blockhash != prev_hash.into())
|
||||
.is_some_and(|prev| block.header.prev_blockhash != prev.into())
|
||||
{
|
||||
return None;
|
||||
return Some(Err(Error::Internal(
|
||||
"rpc iterator: chain continuity broken (likely reorg mid-iteration)",
|
||||
)));
|
||||
}
|
||||
|
||||
prev_hash.replace(hash.clone());
|
||||
|
||||
Some(Block::from((height, hash, block)))
|
||||
}
|
||||
State::Reader {
|
||||
receiver,
|
||||
after_hash,
|
||||
} => {
|
||||
let block = Block::from(receiver.recv().ok()?);
|
||||
|
||||
// Only validate the first block (Reader validates the rest)
|
||||
if let Some(expected_prev) = after_hash.take()
|
||||
&& block.header.prev_blockhash != expected_prev.into()
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(block)
|
||||
Some(Ok(Block::from((height, hash, block))))
|
||||
}
|
||||
State::Reader { receiver } => match receiver.recv().ok()? {
|
||||
Ok(b) => Some(Ok(Block::from(b))),
|
||||
Err(e) => Some(Err(e)),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,8 +12,7 @@ pub enum State {
|
||||
prev_hash: Option<BlockHash>,
|
||||
},
|
||||
Reader {
|
||||
receiver: Receiver<ReadBlock>,
|
||||
after_hash: Option<BlockHash>,
|
||||
receiver: Receiver<Result<ReadBlock>>,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -36,15 +35,26 @@ impl State {
|
||||
}
|
||||
}
|
||||
|
||||
/// `after_hash` selects between the two Reader entry points:
|
||||
///
|
||||
/// * `Some(anchor)` → [`Reader::after`], which seeds the pipeline's
|
||||
/// continuity check with `anchor` so the very first emitted
|
||||
/// block is verified against it. This is what stops a stale
|
||||
/// anchor (tip of a reorged-out chain) from silently producing
|
||||
/// a stitched stream.
|
||||
/// * `None` → [`Reader::range`], which has no anchor to verify
|
||||
/// against and just streams the canonical blocks at the given
|
||||
/// heights.
|
||||
pub fn new_reader(
|
||||
reader: Reader,
|
||||
start: Height,
|
||||
end: Height,
|
||||
after_hash: Option<BlockHash>,
|
||||
) -> Result<Self> {
|
||||
Ok(State::Reader {
|
||||
receiver: reader.range(start, end)?,
|
||||
after_hash,
|
||||
})
|
||||
let receiver = match after_hash {
|
||||
Some(hash) => reader.after(Some(hash))?,
|
||||
None => reader.range(start, end)?,
|
||||
};
|
||||
Ok(State::Reader { receiver })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@ brk_rpc = { workspace = true, features = ["corepc"] }
|
||||
brk_types = { workspace = true }
|
||||
crossbeam = { version = "0.8.4", features = ["crossbeam-channel"] }
|
||||
derive_more = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
rlimit = "0.11.0"
|
||||
rustc-hash = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
|
||||
@@ -21,22 +21,28 @@ let client = Client::new(
|
||||
let reader = Reader::new(bitcoin_dir.join("blocks"), &client);
|
||||
|
||||
// Everything from genesis to the current tip
|
||||
for block in reader.after(None)?.iter() {
|
||||
for block in reader.after(None)? {
|
||||
let block = block?;
|
||||
println!("{}: {}", block.height(), block.hash());
|
||||
}
|
||||
|
||||
// Everything strictly after a known hash (typical sync / catchup pattern)
|
||||
for block in reader.after(Some(last_known_hash))?.iter() {
|
||||
for block in reader.after(Some(last_known_hash))? {
|
||||
let block = block?;
|
||||
// ...
|
||||
}
|
||||
|
||||
// A specific inclusive height range
|
||||
for block in reader.range(Height::new(800_000), Height::new(850_000))?.iter() {
|
||||
for block in reader.range(Height::new(800_000), Height::new(850_000))? {
|
||||
let block = block?;
|
||||
// ...
|
||||
}
|
||||
```
|
||||
|
||||
`Reader` is thread-safe and cheap to clone (Arc-backed).
|
||||
`Reader` is thread-safe and cheap to clone (Arc-backed). Each item is
|
||||
a `Result<ReadBlock>` so mid-stream failures (chain breaks, parse
|
||||
errors, missing canonical blocks) reach the consumer as a final
|
||||
`Err` instead of being silently dropped.
|
||||
|
||||
## What You Get
|
||||
|
||||
@@ -54,11 +60,19 @@ Each `ReadBlock` gives you access to:
|
||||
|
||||
## How It Works
|
||||
|
||||
Two-stage pipeline, one reader thread plus `N` parser threads
|
||||
(default `N = 1`, configurable via `after_with` / `range_with`):
|
||||
Two strategies, picked per call:
|
||||
|
||||
* **forward** — one reader thread walks blk files in order from a
|
||||
bisection lower bound, ships canonical hits to a parser pool of `N`
|
||||
threads (default `N = 1`, configurable via `after_with` /
|
||||
`range_with`), which decode bodies in parallel and emit in-order.
|
||||
* **tail** — single-threaded reverse scan of the newest blk files,
|
||||
used when the requested range sits within ~8 files of the chain
|
||||
tip. Avoids the forward pipeline's bisection + 21-file backoff
|
||||
(~2.7 GB of reads) for tip-clustered catchups.
|
||||
|
||||
```text
|
||||
canonical chain ──► Reader thread ──► Parser pool ──► Receiver<ReadBlock>
|
||||
canonical chain ──► Reader thread ──► Parser pool ──► Receiver<Result<ReadBlock>>
|
||||
(pre-fetched walks blk files, N workers in canonical order
|
||||
hashes via RPC) peeks headers, decode bodies
|
||||
ships hits
|
||||
@@ -67,15 +81,17 @@ canonical chain ──► Reader thread ──► Parser pool ──► Receiver
|
||||
1. **`CanonicalRange`** asks bitcoind once, up front, for the canonical
|
||||
block hash at every height in the target window — one batched
|
||||
JSON-RPC call, no per-block RPC chatter.
|
||||
2. **Reader thread** walks blk files in order, scans each for block
|
||||
magic, and for every block found hashes its 80-byte header and
|
||||
looks the hash up in the canonical map. Orphans short-circuit
|
||||
before the block bytes are cloned.
|
||||
3. **Parser pool** (scoped threads) fully decodes canonical bodies in
|
||||
parallel and serialises output through an in-order reorder buffer.
|
||||
The consumer always receives blocks in canonical-height order.
|
||||
2. **Reader thread** walks blk files, scans each for block magic, and
|
||||
for every block found hashes its 80-byte header and looks the hash
|
||||
up in the canonical map. Orphans short-circuit before the block
|
||||
bytes are cloned.
|
||||
3. **Parser pool** (scoped threads, forward pipeline only) fully
|
||||
decodes canonical bodies in parallel and serialises output through
|
||||
an in-order reorder buffer that also verifies `prev_blockhash`
|
||||
against the previously-emitted block — and against the user-
|
||||
supplied anchor for the very first block.
|
||||
|
||||
Orphans can never be mistaken for canonical blocks, and a missing
|
||||
canonical block produces a hard error instead of a silent drop. See
|
||||
`src/pipeline.rs` for the orchestration and `src/canonical.rs` for the
|
||||
filter map.
|
||||
canonical block produces a final `Err` to the consumer instead of a
|
||||
silent drop. See `src/pipeline/` for the orchestration and
|
||||
`src/canonical.rs` for the filter map.
|
||||
|
||||
@@ -24,7 +24,9 @@ use std::time::{Duration, Instant};
|
||||
use brk_error::Result;
|
||||
use brk_reader::{Reader, Receiver};
|
||||
use brk_rpc::{Auth, Client};
|
||||
use brk_types::{BlockHash, Height, ReadBlock};
|
||||
use brk_types::{Height, ReadBlock};
|
||||
|
||||
type BlockStream = Receiver<Result<ReadBlock>>;
|
||||
|
||||
const SCENARIOS: &[usize] = &[5, 10, 100, 1_000, 10_000];
|
||||
const REPEATS: usize = 3;
|
||||
@@ -51,7 +53,7 @@ fn main() -> Result<()> {
|
||||
for &n in SCENARIOS {
|
||||
let anchor_height = Height::from(tip.saturating_sub(n as u32));
|
||||
let anchor_hash = client.get_block_hash(*anchor_height as u64)?;
|
||||
let anchor = Some(BlockHash::from(anchor_hash));
|
||||
let anchor = Some(anchor_hash);
|
||||
|
||||
let mut first: Option<RunStats> = None;
|
||||
for &p in PARSER_COUNTS {
|
||||
@@ -113,7 +115,7 @@ struct RunStats {
|
||||
|
||||
fn bench<F>(repeats: usize, mut f: F) -> Result<RunStats>
|
||||
where
|
||||
F: FnMut() -> Result<Receiver<ReadBlock>>,
|
||||
F: FnMut() -> Result<BlockStream>,
|
||||
{
|
||||
let mut best = Duration::MAX;
|
||||
let mut total = Duration::ZERO;
|
||||
@@ -124,6 +126,7 @@ where
|
||||
let recv = f()?;
|
||||
let mut n = 0;
|
||||
for block in recv.iter() {
|
||||
let block = block?;
|
||||
std::hint::black_box(block.height());
|
||||
n += 1;
|
||||
}
|
||||
@@ -175,12 +178,13 @@ struct FullRun {
|
||||
|
||||
fn run_once<F>(mut f: F) -> Result<FullRun>
|
||||
where
|
||||
F: FnMut() -> Result<Receiver<ReadBlock>>,
|
||||
F: FnMut() -> Result<BlockStream>,
|
||||
{
|
||||
let start = Instant::now();
|
||||
let recv = f()?;
|
||||
let mut count = 0;
|
||||
for block in recv.iter() {
|
||||
let block = block?;
|
||||
std::hint::black_box(block.height());
|
||||
count += 1;
|
||||
}
|
||||
@@ -195,12 +199,13 @@ where
|
||||
/// the channel, which unblocks and unwinds the reader's spawned worker.
|
||||
fn run_bounded<F>(limit: usize, mut f: F) -> Result<FullRun>
|
||||
where
|
||||
F: FnMut() -> Result<Receiver<ReadBlock>>,
|
||||
F: FnMut() -> Result<BlockStream>,
|
||||
{
|
||||
let start = Instant::now();
|
||||
let recv = f()?;
|
||||
let mut count = 0;
|
||||
for block in recv.iter().take(limit) {
|
||||
let block = block?;
|
||||
std::hint::black_box(block.height());
|
||||
count += 1;
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
use brk_error::Result;
|
||||
use brk_reader::Reader;
|
||||
use brk_reader::{BlkIndexToBlkPath, Reader};
|
||||
use brk_rpc::{Auth, Client};
|
||||
|
||||
fn main() -> Result<()> {
|
||||
@@ -11,7 +11,7 @@ fn main() -> Result<()> {
|
||||
|
||||
let reader = Reader::new(bitcoin_dir.join("blocks"), &client);
|
||||
let xor_bytes = reader.xor_bytes();
|
||||
let blk_map = reader.blk_index_to_blk_path();
|
||||
let blk_map = BlkIndexToBlkPath::scan(reader.blocks_dir())?;
|
||||
|
||||
let mut prev_height: Option<u32> = None;
|
||||
let mut max_drop: u32 = 0;
|
||||
|
||||
@@ -15,6 +15,7 @@ fn main() -> Result<()> {
|
||||
// Stream all blocks from genesis to the current tip.
|
||||
let i = std::time::Instant::now();
|
||||
for block in reader.after(None)?.iter() {
|
||||
let block = block?;
|
||||
println!("{}: {}", block.height(), block.hash());
|
||||
}
|
||||
println!("Full read: {:?}", i.elapsed());
|
||||
|
||||
@@ -20,6 +20,7 @@ fn main() -> Result<()> {
|
||||
let i = std::time::Instant::now();
|
||||
|
||||
if let Some(block) = reader.range(height, height)?.iter().next() {
|
||||
let block = block?;
|
||||
println!(
|
||||
"height={} hash={} txs={} coinbase=\"{:?}\" ({:?})",
|
||||
block.height(),
|
||||
|
||||
109
crates/brk_reader/src/bisect.rs
Normal file
109
crates/brk_reader/src/bisect.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
//! Helpers for picking where to start scanning: probe the first
|
||||
//! block of a file ([`first_block_height`]) and bisect the blk-index
|
||||
//! map for a target height ([`find_start_blk_index`]).
|
||||
|
||||
use std::{fs::File, io::Read, path::Path};
|
||||
|
||||
use bitcoin::{block::Header, consensus::Decodable};
|
||||
use brk_error::{Error, Result};
|
||||
use brk_rpc::Client;
|
||||
use brk_types::Height;
|
||||
use tracing::warn;
|
||||
|
||||
use crate::{
|
||||
BlkIndexToBlkPath, OUT_OF_ORDER_FILE_BACKOFF, XORBytes, XORIndex,
|
||||
parse::HEADER_LEN, scan::find_magic,
|
||||
};
|
||||
|
||||
const PROBE_BUF_LEN: usize = 4096;
|
||||
|
||||
/// Decodes the first block in `blk_path` and resolves its height via
|
||||
/// RPC. One short read + one RPC.
|
||||
pub(crate) fn first_block_height(
|
||||
client: &Client,
|
||||
blk_path: &Path,
|
||||
xor_bytes: XORBytes,
|
||||
) -> Result<Height> {
|
||||
let mut file = File::open(blk_path)?;
|
||||
let mut buf = [0u8; PROBE_BUF_LEN];
|
||||
let n = file.read(&mut buf)?;
|
||||
|
||||
let mut xor_i = XORIndex::default();
|
||||
let magic_end = find_magic(&buf[..n], &mut xor_i, xor_bytes)
|
||||
.ok_or_else(|| Error::NotFound("No magic bytes found".into()))?;
|
||||
|
||||
// Decode the 4-byte size + 80-byte header in one pass; the size
|
||||
// is discarded. Bounds-check first so a corrupt file whose only
|
||||
// magic-shaped bytes sit at the end of the probe doesn't index
|
||||
// past `n`.
|
||||
let header_end = magic_end + 4 + HEADER_LEN;
|
||||
if header_end > n {
|
||||
warn!(
|
||||
"first_block_height: {} has magic-shaped bytes at offset {} but \
|
||||
not enough room in the {}-byte probe to decode the header — \
|
||||
the file is probably corrupt",
|
||||
blk_path.display(),
|
||||
magic_end - 4,
|
||||
PROBE_BUF_LEN,
|
||||
);
|
||||
return Err(Error::Parse(format!(
|
||||
"blk file probe truncated before header at {}",
|
||||
blk_path.display()
|
||||
)));
|
||||
}
|
||||
xor_i.bytes(&mut buf[magic_end..header_end], xor_bytes);
|
||||
|
||||
let header = Header::consensus_decode(&mut &buf[magic_end + 4..header_end])?;
|
||||
let height = client.get_block_info(&header.block_hash())?.height as u32;
|
||||
|
||||
Ok(Height::new(height))
|
||||
}
|
||||
|
||||
/// Bisects the map for the file whose first block height is ≤
|
||||
/// `target_start`, then backs off [`OUT_OF_ORDER_FILE_BACKOFF`] files.
|
||||
/// Always returns a valid blk index — read errors mid-search log and
|
||||
/// fall through to the backoff (or to 0 if the map is empty).
|
||||
///
|
||||
/// On a transient read error we **break** rather than `left = mid + 1`:
|
||||
/// the height bound at `mid` is unknown, so any further bisection on
|
||||
/// that side could skip valid lower indices. Falling through to the
|
||||
/// backoff still gives a safe lower bound.
|
||||
pub(crate) fn find_start_blk_index(
|
||||
client: &Client,
|
||||
target_start: Height,
|
||||
paths: &BlkIndexToBlkPath,
|
||||
xor_bytes: XORBytes,
|
||||
) -> u16 {
|
||||
let entries: Vec<(u16, &Path)> = paths.iter().map(|(&i, p)| (i, p.as_path())).collect();
|
||||
if entries.is_empty() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let mut left = 0;
|
||||
let mut right = entries.len() - 1;
|
||||
let mut best_start_idx = 0;
|
||||
|
||||
while left <= right {
|
||||
let mid = (left + right) / 2;
|
||||
let (blk_index, blk_path) = entries[mid];
|
||||
match first_block_height(client, blk_path, xor_bytes) {
|
||||
Ok(height) if height <= target_start => {
|
||||
best_start_idx = mid;
|
||||
left = mid + 1;
|
||||
}
|
||||
Ok(_) => {
|
||||
if mid == 0 {
|
||||
break;
|
||||
}
|
||||
right = mid - 1;
|
||||
}
|
||||
Err(e) => {
|
||||
warn!("find_start_blk_index: read error at blk{blk_index:05}.dat: {e}");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_idx = best_start_idx.saturating_sub(OUT_OF_ORDER_FILE_BACKOFF);
|
||||
entries[final_idx].0
|
||||
}
|
||||
@@ -4,29 +4,49 @@ use std::{
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use derive_more::{Deref, DerefMut};
|
||||
use brk_error::{Error, Result};
|
||||
use derive_more::Deref;
|
||||
|
||||
const BLK: &str = "blk";
|
||||
const DOT_DAT: &str = ".dat";
|
||||
|
||||
#[derive(Debug, Clone, Deref, DerefMut)]
|
||||
#[derive(Debug, Default, Clone, Deref)]
|
||||
pub struct BlkIndexToBlkPath(BTreeMap<u16, PathBuf>);
|
||||
|
||||
impl BlkIndexToBlkPath {
|
||||
pub fn scan(blocks_dir: &Path) -> Self {
|
||||
Self(
|
||||
fs::read_dir(blocks_dir)
|
||||
.unwrap()
|
||||
.filter_map(|entry| {
|
||||
let path = entry.unwrap().path();
|
||||
let file_name = path.file_name()?.to_str()?;
|
||||
/// Collects every `blkNNNNN.dat` in `blocks_dir`. Unrelated
|
||||
/// entries (`xor.dat`, `rev*.dat`, `index/`, …) are skipped
|
||||
/// silently; anything that **looks** like a blk file but fails to
|
||||
/// parse or isn't a regular file is a hard error, since silently
|
||||
/// dropping one would leave an undetectable hole in the chain.
|
||||
pub fn scan(blocks_dir: &Path) -> Result<Self> {
|
||||
let mut map = BTreeMap::new();
|
||||
|
||||
let index_str = file_name.strip_prefix(BLK)?.strip_suffix(DOT_DAT)?;
|
||||
let blk_index = index_str.parse::<u16>().ok()?;
|
||||
for entry in fs::read_dir(blocks_dir)? {
|
||||
let path = entry?.path();
|
||||
|
||||
path.is_file().then_some((blk_index, path))
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
let Some(file_name) = path.file_name().and_then(|n| n.to_str()) else {
|
||||
continue;
|
||||
};
|
||||
let Some(index_str) = file_name.strip_prefix(BLK).and_then(|s| s.strip_suffix(DOT_DAT))
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
|
||||
let blk_index = index_str
|
||||
.parse::<u16>()
|
||||
.map_err(|_| Error::Parse(format!("Malformed blk file name: {file_name}")))?;
|
||||
|
||||
if !path.is_file() {
|
||||
return Err(Error::Parse(format!(
|
||||
"blk entry is not a regular file: {}",
|
||||
path.display()
|
||||
)));
|
||||
}
|
||||
|
||||
map.insert(blk_index, path);
|
||||
}
|
||||
|
||||
Ok(Self(map))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,36 +1,28 @@
|
||||
//! `CanonicalRange`: a pre-fetched map from canonical block hash to
|
||||
//! offset-from-`start`. The reader uses this as the authoritative
|
||||
//! filter for "is this block on the main chain?".
|
||||
//!
|
||||
//! Every canonical hash in the target height window is fetched from
|
||||
//! bitcoind up front via [`get_block_hashes_range`], so the scan
|
||||
//! pipeline never needs a per-block RPC call (which is what caused the
|
||||
//! original silent-drop reorg bug).
|
||||
//!
|
||||
//! [`get_block_hashes_range`]: brk_rpc::Client::get_block_hashes_range
|
||||
//! `CanonicalRange`: every canonical block hash in a height window,
|
||||
//! pre-fetched once via [`brk_rpc::Client::get_block_hashes_range`].
|
||||
//! Used as the authoritative "is this block on the main chain?"
|
||||
//! filter so the scan pipeline never needs a per-block RPC call.
|
||||
|
||||
use brk_error::Result;
|
||||
use brk_rpc::Client;
|
||||
use brk_types::{BlockHash, BlockHashPrefix, Height};
|
||||
use brk_types::{BlockHash, Height};
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
/// Every canonical block hash in a contiguous height window, resolved
|
||||
/// from bitcoind once up front. `hashes[i]` is the canonical hash at
|
||||
/// height `start + i`. Lookups by hash go through `by_prefix` (8-byte
|
||||
/// key, same scheme as `brk_store`) and verify the full hash on hit.
|
||||
/// Keyed on the full 32-byte hash because a prefix collision would
|
||||
/// silently drop both blocks; the ~24 MB extra RAM is negligible
|
||||
/// against the 128 MB blk reads happening in parallel.
|
||||
pub struct CanonicalRange {
|
||||
pub start: Height,
|
||||
hashes: Vec<BlockHash>,
|
||||
by_prefix: FxHashMap<BlockHashPrefix, u32>,
|
||||
by_hash: FxHashMap<BlockHash, u32>,
|
||||
}
|
||||
|
||||
impl CanonicalRange {
|
||||
/// Resolves canonical hashes for every height strictly after
|
||||
/// `anchor` up to `tip` inclusive. `anchor = None` starts at
|
||||
/// genesis.
|
||||
pub fn walk(client: &Client, anchor: Option<BlockHash>, tip: Height) -> Result<Self> {
|
||||
pub fn walk(client: &Client, anchor: Option<&BlockHash>, tip: Height) -> Result<Self> {
|
||||
let start = match anchor {
|
||||
Some(hash) => Height::from(client.get_block_header_info(&hash)?.height + 1),
|
||||
Some(hash) => Height::from(client.get_block_header_info(hash)?.height + 1),
|
||||
None => Height::ZERO,
|
||||
};
|
||||
Self::between(client, start, tip)
|
||||
@@ -41,43 +33,29 @@ impl CanonicalRange {
|
||||
if start > end {
|
||||
return Ok(Self {
|
||||
start,
|
||||
hashes: Vec::new(),
|
||||
by_prefix: FxHashMap::default(),
|
||||
by_hash: FxHashMap::default(),
|
||||
});
|
||||
}
|
||||
|
||||
let hashes = client.get_block_hashes_range(*start, *end)?;
|
||||
let mut by_prefix =
|
||||
FxHashMap::with_capacity_and_hasher(hashes.len(), Default::default());
|
||||
by_prefix.extend(
|
||||
hashes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, h)| (BlockHashPrefix::from(h), i as u32)),
|
||||
);
|
||||
|
||||
Ok(Self {
|
||||
start,
|
||||
hashes,
|
||||
by_prefix,
|
||||
})
|
||||
let by_hash = client
|
||||
.get_block_hashes_range(*start, *end)?
|
||||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(i, h)| (h, i as u32))
|
||||
.collect();
|
||||
Ok(Self { start, by_hash })
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.hashes.len()
|
||||
self.by_hash.len()
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.hashes.is_empty()
|
||||
self.by_hash.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the offset-from-`start` of `hash` iff it matches the
|
||||
/// canonical chain in this range. A prefix hit is verified against
|
||||
/// the full hash so prefix collisions from orphaned blocks are
|
||||
/// rejected.
|
||||
/// Offset-from-`start` of `hash` iff it's on the canonical chain.
|
||||
#[inline]
|
||||
pub(crate) fn offset_of(&self, hash: &BlockHash) -> Option<u32> {
|
||||
let offset = *self.by_prefix.get(&BlockHashPrefix::from(hash))?;
|
||||
(self.hashes[offset as usize] == *hash).then_some(offset)
|
||||
self.by_hash.get(hash).copied()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,15 +9,14 @@ use std::{
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use bitcoin::{block::Header, consensus::Decodable};
|
||||
use blk_index_to_blk_path::*;
|
||||
use brk_error::{Error, Result};
|
||||
use brk_rpc::Client;
|
||||
use brk_types::{BlkPosition, BlockHash, Height, ReadBlock};
|
||||
pub use crossbeam::channel::Receiver;
|
||||
use derive_more::Deref;
|
||||
use parking_lot::{RwLock, RwLockReadGuard};
|
||||
use parking_lot::RwLock;
|
||||
use tracing::warn;
|
||||
|
||||
mod bisect;
|
||||
mod blk_index_to_blk_path;
|
||||
mod canonical;
|
||||
mod parse;
|
||||
@@ -26,259 +25,201 @@ mod scan;
|
||||
mod xor_bytes;
|
||||
mod xor_index;
|
||||
|
||||
pub use blk_index_to_blk_path::BlkIndexToBlkPath;
|
||||
pub use canonical::CanonicalRange;
|
||||
use scan::*;
|
||||
pub use xor_bytes::*;
|
||||
pub use xor_index::*;
|
||||
|
||||
/// How many blk files to step back from the binary-search hit in
|
||||
/// [`ReaderInner::find_start_blk_index`]. Guards against blocks that
|
||||
/// bitcoind wrote to the "current" file slightly out of height order
|
||||
/// (e.g. the tail of a reorg landing in an earlier file index than
|
||||
/// its successors).
|
||||
const START_BLK_INDEX_BACKOFF: usize = 21;
|
||||
/// Files of out-of-order play to tolerate. bitcoind sometimes writes
|
||||
/// blocks slightly out of height order across files (initial sync,
|
||||
/// headers-first body fetch, reindex), so a single "out of bounds"
|
||||
/// signal isn't enough to declare failure. Used by the forward
|
||||
/// bisection backoff and the tail bailout streak.
|
||||
pub(crate) const OUT_OF_ORDER_FILE_BACKOFF: usize = 21;
|
||||
|
||||
/// Handle to a Bitcoin Core blk-file reader.
|
||||
///
|
||||
/// Cheap to clone (`Arc`-backed) and thread-safe: all streaming
|
||||
/// methods take `&self` and the returned `Receiver<ReadBlock>` can be
|
||||
const TARGET_NOFILE: u64 = 15_000;
|
||||
|
||||
/// Bitcoin Core blk-file reader. Cheap to clone (`Arc`-backed) and
|
||||
/// thread-safe: every method takes `&self` and the
|
||||
/// `Receiver<Result<ReadBlock>>` from the streaming API can be
|
||||
/// drained from any thread.
|
||||
#[derive(Debug, Clone, Deref)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Reader(Arc<ReaderInner>);
|
||||
|
||||
impl Reader {
|
||||
/// Raises the per-process `NOFILE` limit so the file-handle cache
|
||||
/// can keep one open `File` per `blkNNNNN.dat`. For tests or
|
||||
/// embeddings that don't want the process-wide rlimit side
|
||||
/// effect, use [`Self::new_without_rlimit`].
|
||||
pub fn new(blocks_dir: PathBuf, client: &Client) -> Self {
|
||||
Self(Arc::new(ReaderInner::new(blocks_dir, client.clone())))
|
||||
Self::raise_fd_limit();
|
||||
Self::new_without_rlimit(blocks_dir, client)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReaderInner {
|
||||
blk_index_to_blk_path: Arc<RwLock<BlkIndexToBlkPath>>,
|
||||
blk_file_cache: RwLock<BTreeMap<u16, File>>,
|
||||
xor_bytes: XORBytes,
|
||||
blocks_dir: PathBuf,
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl ReaderInner {
|
||||
pub fn new(blocks_dir: PathBuf, client: Client) -> Self {
|
||||
let no_file_limit = rlimit::getrlimit(rlimit::Resource::NOFILE).unwrap_or((0, 0));
|
||||
let _ = rlimit::setrlimit(
|
||||
rlimit::Resource::NOFILE,
|
||||
no_file_limit.0.max(15_000),
|
||||
no_file_limit.1,
|
||||
);
|
||||
|
||||
Self {
|
||||
pub fn new_without_rlimit(blocks_dir: PathBuf, client: &Client) -> Self {
|
||||
Self(Arc::new(ReaderInner {
|
||||
xor_bytes: XORBytes::from(blocks_dir.as_path()),
|
||||
blk_index_to_blk_path: Arc::new(RwLock::new(BlkIndexToBlkPath::scan(
|
||||
blocks_dir.as_path(),
|
||||
))),
|
||||
blk_file_cache: RwLock::new(BTreeMap::new()),
|
||||
blocks_dir,
|
||||
client,
|
||||
client: client.clone(),
|
||||
}))
|
||||
}
|
||||
|
||||
/// Called automatically by [`Self::new`]. Exposed so callers
|
||||
/// using [`Self::new_without_rlimit`] can opt in once.
|
||||
///
|
||||
/// Raises **only the soft limit**, clamped to the current hard
|
||||
/// limit — raising the hard limit requires `CAP_SYS_RESOURCE`
|
||||
/// and would fail (dropping the entire call) on containers and
|
||||
/// unprivileged macOS user processes.
|
||||
pub fn raise_fd_limit() {
|
||||
let (soft, hard) = rlimit::getrlimit(rlimit::Resource::NOFILE).unwrap_or((0, 0));
|
||||
let new_soft = soft.max(TARGET_NOFILE).min(hard);
|
||||
if new_soft > soft
|
||||
&& let Err(e) = rlimit::setrlimit(rlimit::Resource::NOFILE, new_soft, hard)
|
||||
{
|
||||
warn!("failed to raise NOFILE rlimit: {e}");
|
||||
}
|
||||
}
|
||||
|
||||
pub fn client(&self) -> &Client {
|
||||
&self.client
|
||||
&self.0.client
|
||||
}
|
||||
|
||||
pub fn blocks_dir(&self) -> &Path {
|
||||
&self.blocks_dir
|
||||
}
|
||||
|
||||
pub fn blk_index_to_blk_path(&self) -> RwLockReadGuard<'_, BlkIndexToBlkPath> {
|
||||
self.blk_index_to_blk_path.read()
|
||||
&self.0.blocks_dir
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn xor_bytes(&self) -> XORBytes {
|
||||
self.xor_bytes
|
||||
self.0.xor_bytes
|
||||
}
|
||||
|
||||
/// Ensure the blk file for `blk_index` is in the file handle cache.
|
||||
fn ensure_blk_cached(&self, blk_index: u16) -> Result<()> {
|
||||
if self.blk_file_cache.read().contains_key(&blk_index) {
|
||||
return Ok(());
|
||||
}
|
||||
let blk_paths = self.blk_index_to_blk_path();
|
||||
let blk_path = blk_paths
|
||||
.get(&blk_index)
|
||||
.ok_or(Error::NotFound("Blk file not found".into()))?;
|
||||
let file = File::open(blk_path)?;
|
||||
self.blk_file_cache.write().entry(blk_index).or_insert(file);
|
||||
Ok(())
|
||||
/// Decode the first block in `blk_path` and resolve its height
|
||||
/// via RPC. Exposed for inspection tools (see
|
||||
/// `examples/blk_heights.rs`).
|
||||
pub fn first_block_height(&self, blk_path: &Path, xor_bytes: XORBytes) -> Result<Height> {
|
||||
bisect::first_block_height(&self.0.client, blk_path, xor_bytes)
|
||||
}
|
||||
|
||||
/// Read raw bytes from a blk file at the given position with XOR decoding.
|
||||
/// `read_exact_at` so a short read becomes a hard error instead
|
||||
/// of silent corruption from the buffer's zero-init tail.
|
||||
pub fn read_raw_bytes(&self, position: BlkPosition, size: usize) -> Result<Vec<u8>> {
|
||||
self.ensure_blk_cached(position.blk_index())?;
|
||||
|
||||
let cache = self.blk_file_cache.read();
|
||||
let file = cache.get(&position.blk_index()).unwrap();
|
||||
let file = self.0.open_blk(position.blk_index())?;
|
||||
let mut buffer = vec![0u8; size];
|
||||
file.read_at(&mut buffer, position.offset() as u64)?;
|
||||
XORIndex::decode_at(&mut buffer, position.offset() as usize, self.xor_bytes);
|
||||
file.read_exact_at(&mut buffer, position.offset() as u64)?;
|
||||
XORIndex::decode_at(&mut buffer, position.offset() as usize, self.0.xor_bytes);
|
||||
Ok(buffer)
|
||||
}
|
||||
|
||||
/// Returns a `Read` impl positioned at `position` in the blk file.
|
||||
/// Reads only the bytes requested — no upfront allocation.
|
||||
pub fn reader_at(&self, position: BlkPosition) -> Result<BlkRead<'_>> {
|
||||
self.ensure_blk_cached(position.blk_index())?;
|
||||
|
||||
let mut xor_index = XORIndex::default();
|
||||
xor_index.add_assign(position.offset() as usize);
|
||||
|
||||
/// Streaming `Read` at `position`. Holds an `Arc<File>` so the
|
||||
/// cache lock isn't held across the I/O.
|
||||
pub fn reader_at(&self, position: BlkPosition) -> Result<BlkRead> {
|
||||
let file = self.0.open_blk(position.blk_index())?;
|
||||
Ok(BlkRead {
|
||||
cache: self.blk_file_cache.read(),
|
||||
blk_index: position.blk_index(),
|
||||
file,
|
||||
offset: position.offset() as u64,
|
||||
xor_index,
|
||||
xor_bytes: self.xor_bytes,
|
||||
xor_index: XORIndex::at_offset(position.offset() as usize),
|
||||
xor_bytes: self.0.xor_bytes,
|
||||
})
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
// Public streaming API — all calls delegate to `pipeline::spawn`.
|
||||
// ─────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Streams every canonical block strictly after `hash` (or from
|
||||
/// genesis when `None`) up to the current chain tip, in canonical
|
||||
/// order. Uses the default parser-thread count; see
|
||||
/// [`after_with`](Self::after_with) to override.
|
||||
pub fn after(&self, hash: Option<BlockHash>) -> Result<Receiver<ReadBlock>> {
|
||||
/// genesis when `None`) up to the current chain tip.
|
||||
pub fn after(&self, hash: Option<BlockHash>) -> Result<Receiver<Result<ReadBlock>>> {
|
||||
self.after_with(hash, pipeline::DEFAULT_PARSER_THREADS)
|
||||
}
|
||||
|
||||
/// Like [`after`](Self::after) but with a configurable number of
|
||||
/// parser threads. `parser_threads = 1` is the minimal-thread
|
||||
/// default (1 reader + 1 parser, uncontended mutex). Higher values
|
||||
/// trade extra cores for throughput on dense ranges where the
|
||||
/// parser is the bottleneck.
|
||||
/// Like [`after`](Self::after) with a configurable parser-thread
|
||||
/// count. The default of 1 reader + 1 parser leaves the rest of
|
||||
/// the cores for the indexer; bench tools that drain the channel
|
||||
/// cheaply can override.
|
||||
pub fn after_with(
|
||||
&self,
|
||||
hash: Option<BlockHash>,
|
||||
parser_threads: usize,
|
||||
) -> Result<Receiver<ReadBlock>> {
|
||||
let tip = self.client.get_last_height()?;
|
||||
let canonical = CanonicalRange::walk(&self.client, hash, tip)?;
|
||||
pipeline::spawn(self, canonical, parser_threads)
|
||||
) -> Result<Receiver<Result<ReadBlock>>> {
|
||||
let tip = self.0.client.get_last_height()?;
|
||||
let canonical = CanonicalRange::walk(&self.0.client, hash.as_ref(), tip)?;
|
||||
pipeline::spawn(self.0.clone(), canonical, hash, parser_threads)
|
||||
}
|
||||
|
||||
/// Streams every canonical block in the inclusive height range
|
||||
/// `start..=end` in canonical order, via the same pipeline as
|
||||
/// [`after`](Self::after).
|
||||
pub fn range(&self, start: Height, end: Height) -> Result<Receiver<ReadBlock>> {
|
||||
/// Inclusive height range `start..=end` in canonical order.
|
||||
pub fn range(&self, start: Height, end: Height) -> Result<Receiver<Result<ReadBlock>>> {
|
||||
self.range_with(start, end, pipeline::DEFAULT_PARSER_THREADS)
|
||||
}
|
||||
|
||||
/// Like [`range`](Self::range) but with a configurable number of
|
||||
/// parser threads. See [`after_with`](Self::after_with) for the
|
||||
/// parser-count tradeoff.
|
||||
pub fn range_with(
|
||||
&self,
|
||||
start: Height,
|
||||
end: Height,
|
||||
parser_threads: usize,
|
||||
) -> Result<Receiver<ReadBlock>> {
|
||||
let canonical = CanonicalRange::between(&self.client, start, end)?;
|
||||
pipeline::spawn(self, canonical, parser_threads)
|
||||
}
|
||||
|
||||
/// Binary-searches `blk_index_to_blk_path` for the first file
|
||||
/// whose earliest block height is ≤ `target_start`, then backs
|
||||
/// off a few files as a safety margin for blocks that were written
|
||||
/// out of height order (see [`START_BLK_INDEX_BACKOFF`]).
|
||||
fn find_start_blk_index(
|
||||
&self,
|
||||
target_start: Option<Height>,
|
||||
blk_index_to_blk_path: &BlkIndexToBlkPath,
|
||||
xor_bytes: XORBytes,
|
||||
) -> Result<u16> {
|
||||
let Some(target_start) = target_start else {
|
||||
return Ok(0);
|
||||
};
|
||||
|
||||
let blk_indices: Vec<u16> = blk_index_to_blk_path.keys().copied().collect();
|
||||
if blk_indices.is_empty() {
|
||||
return Ok(0);
|
||||
) -> Result<Receiver<Result<ReadBlock>>> {
|
||||
let tip = self.0.client.get_last_height()?;
|
||||
if end > tip {
|
||||
return Err(Error::OutOfRange(format!(
|
||||
"range end {end} is past current tip {tip}"
|
||||
)));
|
||||
}
|
||||
|
||||
let mut left = 0;
|
||||
let mut right = blk_indices.len() - 1;
|
||||
let mut best_start_idx = 0;
|
||||
|
||||
while left <= right {
|
||||
let mid = (left + right) / 2;
|
||||
let blk_index = blk_indices[mid];
|
||||
|
||||
let Some(blk_path) = blk_index_to_blk_path.get(&blk_index) else {
|
||||
break;
|
||||
};
|
||||
match self.first_block_height(blk_path, xor_bytes) {
|
||||
Ok(height) if height <= target_start => {
|
||||
best_start_idx = mid;
|
||||
left = mid + 1;
|
||||
}
|
||||
Ok(_) => {
|
||||
if mid == 0 {
|
||||
break;
|
||||
}
|
||||
right = mid - 1;
|
||||
}
|
||||
Err(_) => {
|
||||
left = mid + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let final_idx = best_start_idx.saturating_sub(START_BLK_INDEX_BACKOFF);
|
||||
Ok(blk_indices.get(final_idx).copied().unwrap_or(0))
|
||||
}
|
||||
|
||||
pub fn first_block_height(
|
||||
&self,
|
||||
blk_path: &Path,
|
||||
xor_bytes: XORBytes,
|
||||
) -> Result<Height> {
|
||||
let mut file = File::open(blk_path)?;
|
||||
let mut buf = [0u8; 4096];
|
||||
let n = file.read(&mut buf)?;
|
||||
|
||||
let mut xor_i = XORIndex::default();
|
||||
let magic_end = find_magic(&buf[..n], &mut xor_i, xor_bytes)
|
||||
.ok_or_else(|| Error::NotFound("No magic bytes found".into()))?;
|
||||
|
||||
let size_end = magic_end + 4;
|
||||
xor_i.bytes(&mut buf[magic_end..size_end], xor_bytes);
|
||||
|
||||
let header_end = size_end + 80;
|
||||
xor_i.bytes(&mut buf[size_end..header_end], xor_bytes);
|
||||
|
||||
let header =
|
||||
Header::consensus_decode(&mut std::io::Cursor::new(&buf[size_end..header_end]))?;
|
||||
|
||||
let height = self.client.get_block_info(&header.block_hash())?.height as u32;
|
||||
|
||||
Ok(Height::new(height))
|
||||
let canonical = CanonicalRange::between(&self.0.client, start, end)?;
|
||||
// No anchor: caller asked for "blocks at heights X..=Y", they
|
||||
// get whatever bitcoind says is canonical there.
|
||||
pipeline::spawn(self.0.clone(), canonical, None, parser_threads)
|
||||
}
|
||||
}
|
||||
|
||||
/// Streaming reader at a position in a blk file. Reads via pread + XOR on demand.
|
||||
pub struct BlkRead<'a> {
|
||||
cache: RwLockReadGuard<'a, BTreeMap<u16, File>>,
|
||||
blk_index: u16,
|
||||
/// `pub(crate)` so `pipeline` can capture it via `Arc<ReaderInner>`
|
||||
/// for spawned workers; everything else goes through `Reader`.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ReaderInner {
|
||||
/// Invalidated on every [`refresh_paths`](Self::refresh_paths) so
|
||||
/// a pruned/reindexed blk file can't keep serving stale bytes
|
||||
/// from a dead inode. `Arc<File>` lets us hand out cheap clones
|
||||
/// without holding the cache lock during I/O.
|
||||
blk_file_cache: RwLock<BTreeMap<u16, Arc<File>>>,
|
||||
pub(crate) xor_bytes: XORBytes,
|
||||
pub(crate) blocks_dir: PathBuf,
|
||||
pub(crate) client: Client,
|
||||
}
|
||||
|
||||
impl ReaderInner {
|
||||
/// Rescan the blocks directory and drop the file-handle cache in
|
||||
/// the same critical section. Old `Arc<File>`s already in flight
|
||||
/// stay valid until their last drop; new lookups go through the
|
||||
/// fresh inode.
|
||||
pub(crate) fn refresh_paths(&self) -> Result<BlkIndexToBlkPath> {
|
||||
let paths = BlkIndexToBlkPath::scan(&self.blocks_dir)?;
|
||||
self.blk_file_cache.write().clear();
|
||||
Ok(paths)
|
||||
}
|
||||
|
||||
/// The blk path is deterministic (`<blocks_dir>/blkNNNNN.dat`),
|
||||
/// so we don't need a directory scan to resolve it. Two threads
|
||||
/// racing on a missing entry will both call `File::open`; the
|
||||
/// loser's `Arc` is dropped via `or_insert`.
|
||||
fn open_blk(&self, blk_index: u16) -> Result<Arc<File>> {
|
||||
if let Some(file) = self.blk_file_cache.read().get(&blk_index).cloned() {
|
||||
return Ok(file);
|
||||
}
|
||||
let path = self.blocks_dir.join(format!("blk{blk_index:05}.dat"));
|
||||
let file = Arc::new(File::open(&path)?);
|
||||
let mut cache = self.blk_file_cache.write();
|
||||
Ok(cache.entry(blk_index).or_insert(file).clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Streaming reader at a position in a blk file. Holds an `Arc<File>`
|
||||
/// so it doesn't lock the file cache while the consumer is reading.
|
||||
pub struct BlkRead {
|
||||
file: Arc<File>,
|
||||
offset: u64,
|
||||
xor_index: XORIndex,
|
||||
xor_bytes: XORBytes,
|
||||
}
|
||||
|
||||
impl Read for BlkRead<'_> {
|
||||
impl Read for BlkRead {
|
||||
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
|
||||
let file = self.cache.get(&self.blk_index).unwrap();
|
||||
let n = file.read_at(buf, self.offset)?;
|
||||
let n = self.file.read_at(buf, self.offset)?;
|
||||
self.xor_index.bytes(&mut buf[..n], self.xor_bytes);
|
||||
self.offset += n as u64;
|
||||
Ok(n)
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
//! Pure block parsing — XOR decoding, header and body decode.
|
||||
//!
|
||||
//! Split into a cheap header peek and a full body parse so the scan
|
||||
//! loop can reject non-canonical blocks without copying them. No RPC,
|
||||
//! no threading, no state.
|
||||
//! Block parsing — XOR decoding, header peek, full body parse. Split
|
||||
//! so the scan loop can reject non-canonical blocks before copying.
|
||||
|
||||
use std::io::Cursor;
|
||||
|
||||
@@ -12,19 +9,18 @@ use brk_types::{BlkMetadata, Block, BlockHash, Height, ReadBlock};
|
||||
|
||||
use crate::{XORBytes, XORIndex, canonical::CanonicalRange};
|
||||
|
||||
const HEADER_LEN: usize = 80;
|
||||
pub(crate) const HEADER_LEN: usize = 80;
|
||||
|
||||
/// Returns the canonical offset of `bytes` if its header hashes to a
|
||||
/// known canonical block, otherwise `None`. Does not allocate and does
|
||||
/// not mutate `bytes`: the header is copied onto a stack buffer and
|
||||
/// XOR-decoded there so an orphan short-circuits cleanly and a
|
||||
/// canonical hit can still be cloned out intact.
|
||||
pub fn peek_canonical_offset(
|
||||
/// Cheap canonical-membership check. Decodes the header onto a stack
|
||||
/// buffer so `bytes` stays untouched (the parser later re-XORs the
|
||||
/// full block from the original phase). Returning the parsed header
|
||||
/// lets the body parse skip a second decode.
|
||||
pub(crate) fn peek_canonical(
|
||||
bytes: &[u8],
|
||||
mut xor_state: XORIndex,
|
||||
xor_bytes: XORBytes,
|
||||
canonical: &CanonicalRange,
|
||||
) -> Option<u32> {
|
||||
) -> Option<(u32, Header)> {
|
||||
if bytes.len() < HEADER_LEN {
|
||||
return None;
|
||||
}
|
||||
@@ -32,27 +28,30 @@ pub fn peek_canonical_offset(
|
||||
header_buf.copy_from_slice(&bytes[..HEADER_LEN]);
|
||||
xor_state.bytes(&mut header_buf, xor_bytes);
|
||||
let header = Header::consensus_decode(&mut &header_buf[..]).ok()?;
|
||||
canonical.offset_of(&BlockHash::from(header.block_hash()))
|
||||
let offset = canonical.offset_of(&BlockHash::from(header.block_hash()))?;
|
||||
Some((offset, header))
|
||||
}
|
||||
|
||||
/// Full XOR-decode + parse for a block that has already been confirmed
|
||||
/// canonical by `peek_canonical_offset`. Takes owned `bytes` so it can
|
||||
/// mutate them in place and hand them to the resulting `ReadBlock`.
|
||||
pub fn parse_canonical_body(
|
||||
/// Full XOR-decode + body parse. Takes the previously-parsed `header`
|
||||
/// from `peek_canonical` so we don't re-parse it.
|
||||
pub(crate) fn parse_canonical_body(
|
||||
mut bytes: Vec<u8>,
|
||||
metadata: BlkMetadata,
|
||||
mut xor_state: XORIndex,
|
||||
xor_bytes: XORBytes,
|
||||
height: Height,
|
||||
header: Header,
|
||||
) -> Result<ReadBlock> {
|
||||
if bytes.len() < HEADER_LEN {
|
||||
return Err(Error::Internal("Block bytes shorter than header"));
|
||||
}
|
||||
|
||||
xor_state.bytes(&mut bytes, xor_bytes);
|
||||
let mut cursor = Cursor::new(bytes);
|
||||
let header = Header::consensus_decode(&mut cursor)?;
|
||||
let bitcoin_hash = header.block_hash();
|
||||
|
||||
let mut cursor = Cursor::new(bytes);
|
||||
cursor.set_position(HEADER_LEN as u64);
|
||||
|
||||
let tx_count = VarInt::consensus_decode(&mut cursor)?.0 as usize;
|
||||
let mut txdata = Vec::with_capacity(tx_count);
|
||||
let mut tx_metadata = Vec::with_capacity(tx_count);
|
||||
|
||||
@@ -1,470 +0,0 @@
|
||||
//! The actual pipeline turning a blk-file scan into an ordered
|
||||
//! `ReadBlock` stream. [`spawn`] picks between two strategies:
|
||||
//!
|
||||
//! * **[`pipeline_forward`]** — one reader thread walks blk files in
|
||||
//! order, peeks each block's header against the pre-fetched
|
||||
//! `CanonicalRange`, and ships canonical hits over an mpmc channel
|
||||
//! to a scoped parser pool of `parser_threads` workers, which decode
|
||||
//! bodies in parallel and serialise emission through a shared
|
||||
//! [`ReorderState`] mutex. Used when the range is larger than
|
||||
//! `TAIL_THRESHOLD`.
|
||||
//! * **[`pipeline_tail`]** — single-threaded reverse scan of the
|
||||
//! newest blk files in 5 MB chunks, buffering every canonical match
|
||||
//! in offset-indexed slots and then emitting through [`ReorderState`]
|
||||
//! in the same order. Used for `canonical.len() <= TAIL_THRESHOLD`,
|
||||
//! where the channel + lock overhead of the forward pipeline would
|
||||
//! dominate.
|
||||
//!
|
||||
//! Both pipelines route emission through [`ReorderState`], which
|
||||
//! verifies `block.header.prev_blockhash` against the previously
|
||||
//! emitted block's hash and aborts cleanly if the canonical-hash batch
|
||||
//! that produced the stream was stitched across a mid-batch reorg.
|
||||
//!
|
||||
//! Canonical blocks can also arrive out of order across blk files
|
||||
//! (bitcoind doesn't write in strict chain order during initial sync,
|
||||
//! headers-first body fetch, or reindex), so the reorder buffer is
|
||||
//! required even at `parser_threads = 1`.
|
||||
|
||||
use std::{
|
||||
fs::{self, File},
|
||||
io::{Read, Seek, SeekFrom},
|
||||
ops::ControlFlow,
|
||||
sync::atomic::{AtomicBool, Ordering},
|
||||
thread,
|
||||
};
|
||||
|
||||
use brk_error::{Error, Result};
|
||||
use brk_types::{BlkMetadata, BlockHash, Height, ReadBlock};
|
||||
use crossbeam::channel::{Receiver, Sender, bounded};
|
||||
use parking_lot::Mutex;
|
||||
use rustc_hash::FxHashMap;
|
||||
use tracing::{error, warn};
|
||||
|
||||
use crate::{
|
||||
BlkIndexToBlkPath, ReaderInner, XORBytes, XORIndex,
|
||||
canonical::CanonicalRange,
|
||||
parse::{parse_canonical_body, peek_canonical_offset},
|
||||
scan::scan_bytes,
|
||||
};
|
||||
|
||||
const CHANNEL_CAPACITY: usize = 50;
|
||||
const TAIL_CHUNK: usize = 5 * 1024 * 1024;
|
||||
/// Up to this many canonical blocks → tail pipeline. Beyond → forward.
|
||||
const TAIL_THRESHOLD: usize = 10;
|
||||
|
||||
/// Default parser-thread count for [`ReaderInner::after`]. The indexer
|
||||
/// is CPU-bound on the consumer side, so 1 parser + 1 reader (= 2
|
||||
/// threads total) leaves the rest of the cores for the indexer. Bench
|
||||
/// tools that drain the channel cheaply can override via
|
||||
/// [`ReaderInner::after_with`].
|
||||
pub(crate) const DEFAULT_PARSER_THREADS: usize = 1;
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Shared pipeline entry — called by `Reader::after_with` and `Reader::range_with`.
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Spawns the reader worker and (for non-tail ranges) a scoped parser
|
||||
/// pool, and returns the consumer receiver. Shared backend for
|
||||
/// `after_with` and `range_with`.
|
||||
pub(crate) fn spawn(
|
||||
reader: &ReaderInner,
|
||||
canonical: CanonicalRange,
|
||||
parser_threads: usize,
|
||||
) -> Result<Receiver<ReadBlock>> {
|
||||
let parser_threads = parser_threads.max(1);
|
||||
|
||||
if canonical.is_empty() {
|
||||
return Ok(bounded(0).1);
|
||||
}
|
||||
|
||||
let paths = BlkIndexToBlkPath::scan(reader.blocks_dir());
|
||||
*reader.blk_index_to_blk_path.write() = paths.clone();
|
||||
|
||||
let (send, recv) = bounded(CHANNEL_CAPACITY);
|
||||
let xor_bytes = reader.xor_bytes();
|
||||
let use_tail = canonical.len() <= TAIL_THRESHOLD;
|
||||
let first_blk_index = if use_tail {
|
||||
0
|
||||
} else {
|
||||
reader
|
||||
.find_start_blk_index(Some(canonical.start), &paths, xor_bytes)
|
||||
.unwrap_or_default()
|
||||
};
|
||||
|
||||
thread::spawn(move || {
|
||||
let result = if use_tail {
|
||||
pipeline_tail(&paths, xor_bytes, &canonical, &send)
|
||||
} else {
|
||||
pipeline_forward(
|
||||
&paths,
|
||||
first_blk_index,
|
||||
xor_bytes,
|
||||
&canonical,
|
||||
&send,
|
||||
parser_threads,
|
||||
)
|
||||
};
|
||||
if let Err(e) = result {
|
||||
error!("Reader canonical pipeline failed: {e}");
|
||||
}
|
||||
});
|
||||
|
||||
Ok(recv)
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Forward pipeline — 1 reader + N parsers + shared in-order emission.
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// A raw block the reader has already confirmed is on the canonical
|
||||
/// chain, shipped to the parser pool for full decoding.
|
||||
struct ScannedBlock {
|
||||
metadata: BlkMetadata,
|
||||
bytes: Vec<u8>,
|
||||
xor_state: XORIndex,
|
||||
canonical_offset: u32,
|
||||
}
|
||||
|
||||
/// In-order emission buffer shared between the parser threads. Access
|
||||
/// is serialised through a `parking_lot::Mutex`; at `parser_threads = 1`
|
||||
/// the lock is always uncontended.
|
||||
///
|
||||
/// Also enforces **chain continuity**: before emitting each block it
|
||||
/// checks that `block.header.prev_blockhash` matches the previously-
|
||||
/// emitted block's hash. A mismatch means the canonical-hash batch
|
||||
/// that produced this stream was stitched across a mid-batch reorg,
|
||||
/// so we stop emitting cleanly and let the caller retry.
|
||||
struct ReorderState {
|
||||
next_offset: u32,
|
||||
/// Ahead-of-line matches keyed by canonical offset; drained
|
||||
/// contiguously each time `next_offset` advances. Bounded in
|
||||
/// practice by parser-thread scheduling jitter.
|
||||
pending: FxHashMap<u32, ReadBlock>,
|
||||
send_to_consumer: Sender<ReadBlock>,
|
||||
/// Hash of the last block successfully emitted, used to verify
|
||||
/// continuity with the next one. `None` before the first emit.
|
||||
last_emitted_hash: Option<BlockHash>,
|
||||
/// Flipped when a continuity check fails.
|
||||
chain_broken: bool,
|
||||
}
|
||||
|
||||
impl ReorderState {
|
||||
fn new(send_to_consumer: Sender<ReadBlock>) -> Self {
|
||||
Self {
|
||||
next_offset: 0,
|
||||
pending: FxHashMap::default(),
|
||||
send_to_consumer,
|
||||
last_emitted_hash: None,
|
||||
chain_broken: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Accepts a parsed canonical block; emits it and drains any
|
||||
/// contiguous pending matches. Returns `false` once the pipeline
|
||||
/// should stop — either the consumer dropped the receiver or a
|
||||
/// chain-continuity check failed. Completion (all blocks emitted)
|
||||
/// is checked by the caller via `next_offset`.
|
||||
fn try_emit(&mut self, offset: u32, block: ReadBlock) -> bool {
|
||||
use std::cmp::Ordering::*;
|
||||
match offset.cmp(&self.next_offset) {
|
||||
Equal => {
|
||||
if !self.send_in_order(block) {
|
||||
return false;
|
||||
}
|
||||
while let Some(next) = self.pending.remove(&self.next_offset) {
|
||||
if !self.send_in_order(next) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
Greater => {
|
||||
self.pending.insert(offset, block);
|
||||
true
|
||||
}
|
||||
// Unreachable in practice: each canonical hash appears at
|
||||
// exactly one offset and each block is parsed once.
|
||||
Less => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Verifies `block.prev_blockhash` against the last emitted hash,
|
||||
/// sends the block, and bumps `next_offset`. Returns `false` on
|
||||
/// continuity failure or consumer drop.
|
||||
fn send_in_order(&mut self, block: ReadBlock) -> bool {
|
||||
if let Some(last) = &self.last_emitted_hash {
|
||||
let prev = BlockHash::from(block.header.prev_blockhash);
|
||||
if prev != *last {
|
||||
warn!(
|
||||
"canonical chain broken at offset {}: expected prev={} got {}",
|
||||
self.next_offset, last, prev,
|
||||
);
|
||||
self.chain_broken = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
let hash = block.hash().clone();
|
||||
if self.send_to_consumer.send(block).is_err() {
|
||||
return false;
|
||||
}
|
||||
self.last_emitted_hash = Some(hash);
|
||||
self.next_offset += 1;
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
fn pipeline_forward(
|
||||
paths: &BlkIndexToBlkPath,
|
||||
first_blk_index: u16,
|
||||
xor_bytes: XORBytes,
|
||||
canonical: &CanonicalRange,
|
||||
send: &Sender<ReadBlock>,
|
||||
parser_threads: usize,
|
||||
) -> Result<()> {
|
||||
let (parser_send, parser_recv) = bounded::<ScannedBlock>(CHANNEL_CAPACITY);
|
||||
let reorder = Mutex::new(ReorderState::new(send.clone()));
|
||||
let done = AtomicBool::new(false);
|
||||
|
||||
thread::scope(|scope| -> Result<()> {
|
||||
for _ in 0..parser_threads {
|
||||
let parser_recv = parser_recv.clone();
|
||||
scope.spawn(|| parser_loop(parser_recv, &reorder, &done, canonical, xor_bytes));
|
||||
}
|
||||
// Every parser owns its own clone; ours would otherwise keep
|
||||
// the channel "alive" and leak a dangling receiver.
|
||||
drop(parser_recv);
|
||||
|
||||
let read_result = read_and_dispatch(
|
||||
paths,
|
||||
first_blk_index,
|
||||
xor_bytes,
|
||||
canonical,
|
||||
&parser_send,
|
||||
&done,
|
||||
);
|
||||
// Signal end-of-input to the parsers so they exit their `for`
|
||||
// loops and the scope can join them.
|
||||
drop(parser_send);
|
||||
read_result
|
||||
})?;
|
||||
|
||||
let state = reorder.lock();
|
||||
if state.chain_broken {
|
||||
return Err(Error::Internal(
|
||||
"forward pipeline: canonical batch stitched across a reorg",
|
||||
));
|
||||
}
|
||||
let pipeline_cancelled = done.load(Ordering::Relaxed);
|
||||
let emitted = state.next_offset as usize;
|
||||
if !pipeline_cancelled && emitted < canonical.len() {
|
||||
return Err(Error::Internal(
|
||||
"forward pipeline: blk files missing canonical blocks",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Full-body parse + in-order emit loop run by every scoped parser
|
||||
/// worker in `pipeline_forward`. Drains `parser_recv` to exhaustion.
|
||||
fn parser_loop(
|
||||
parser_recv: Receiver<ScannedBlock>,
|
||||
reorder: &Mutex<ReorderState>,
|
||||
done: &AtomicBool,
|
||||
canonical: &CanonicalRange,
|
||||
xor_bytes: XORBytes,
|
||||
) {
|
||||
for ScannedBlock { metadata, bytes, xor_state, canonical_offset } in parser_recv {
|
||||
if done.load(Ordering::Relaxed) {
|
||||
continue;
|
||||
}
|
||||
let height = Height::from(*canonical.start + canonical_offset);
|
||||
let block = match parse_canonical_body(bytes, metadata, xor_state, xor_bytes, height) {
|
||||
Ok(block) => block,
|
||||
Err(e) => {
|
||||
warn!("parse_canonical_body failed: {e}");
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let pipeline_finished = {
|
||||
let mut state = reorder.lock();
|
||||
!state.try_emit(canonical_offset, block)
|
||||
|| state.next_offset as usize >= canonical.len()
|
||||
};
|
||||
if pipeline_finished {
|
||||
done.store(true, Ordering::Relaxed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk blk files from `first_blk_index`, scan each one, and ship
|
||||
/// canonical blocks to the parser pool. Non-canonical blocks are
|
||||
/// rejected via `peek_canonical_offset` *before* being cloned — the
|
||||
/// cheap filter is what lets a sparse catchup avoid allocating for the
|
||||
/// ~99% of blocks outside the window.
|
||||
fn read_and_dispatch(
|
||||
paths: &BlkIndexToBlkPath,
|
||||
first_blk_index: u16,
|
||||
xor_bytes: XORBytes,
|
||||
canonical: &CanonicalRange,
|
||||
parser_send: &Sender<ScannedBlock>,
|
||||
done: &AtomicBool,
|
||||
) -> Result<()> {
|
||||
for (&blk_index, blk_path) in paths.range(first_blk_index..) {
|
||||
if done.load(Ordering::Relaxed) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut bytes = fs::read(blk_path).map_err(|e| {
|
||||
error!("Failed to read blk file {}: {e}", blk_path.display());
|
||||
Error::Internal("Failed to read blk file")
|
||||
})?;
|
||||
|
||||
let result = scan_bytes(
|
||||
&mut bytes,
|
||||
blk_index,
|
||||
0,
|
||||
xor_bytes,
|
||||
|metadata, block_bytes, xor_state| {
|
||||
if done.load(Ordering::Relaxed) {
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
let Some(canonical_offset) =
|
||||
peek_canonical_offset(block_bytes, xor_state, xor_bytes, canonical)
|
||||
else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
let scanned = ScannedBlock {
|
||||
metadata,
|
||||
bytes: block_bytes.to_vec(),
|
||||
xor_state,
|
||||
canonical_offset,
|
||||
};
|
||||
if parser_send.send(scanned).is_err() {
|
||||
ControlFlow::Break(())
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
if result.interrupted {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
// Tail pipeline — reverse-scan the newest blk files in 5 MB chunks until
|
||||
// every canonical hash has been matched, then emit them forward.
|
||||
// ─────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
/// Single-threaded tail-range pipeline for small `canonical.len()`.
|
||||
/// Walks blk files in reverse-index order, reads each one in 5 MB
|
||||
/// chunks from tail to head, and stuffs every canonical match into an
|
||||
/// offset-indexed `slots` vec. Once every canonical block is matched,
|
||||
/// emits them in order through [`ReorderState`] (which doubles as the
|
||||
/// shared continuity checker). Bails on missing blocks or a chain
|
||||
/// break just like [`pipeline_forward`].
|
||||
fn pipeline_tail(
|
||||
paths: &BlkIndexToBlkPath,
|
||||
xor_bytes: XORBytes,
|
||||
canonical: &CanonicalRange,
|
||||
send: &Sender<ReadBlock>,
|
||||
) -> Result<()> {
|
||||
let mut slots: Vec<Option<ReadBlock>> = (0..canonical.len()).map(|_| None).collect();
|
||||
let mut remaining = canonical.len();
|
||||
// Carries the bytes before a chunk's first magic into the next
|
||||
// (earlier) chunk so blocks straddling the boundary survive.
|
||||
let mut spillover: Vec<u8> = Vec::new();
|
||||
|
||||
'files: for (&blk_index, path) in paths.iter().rev() {
|
||||
let mut file = File::open(path).map_err(|_| Error::Internal("Failed to open blk file"))?;
|
||||
let file_len = file.metadata().map(|m| m.len() as usize).unwrap_or(0);
|
||||
if file_len == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut read_end = file_len;
|
||||
spillover.clear();
|
||||
|
||||
while read_end > 0 && remaining > 0 {
|
||||
let read_start = read_end.saturating_sub(TAIL_CHUNK);
|
||||
let chunk_len = read_end - read_start;
|
||||
read_end = read_start;
|
||||
|
||||
file.seek(SeekFrom::Start(read_start as u64))
|
||||
.map_err(|_| Error::Internal("Failed to seek blk file"))?;
|
||||
let mut buf = vec![0u8; chunk_len + spillover.len()];
|
||||
file.read_exact(&mut buf[..chunk_len])
|
||||
.map_err(|_| Error::Internal("Failed to read blk chunk"))?;
|
||||
buf[chunk_len..].copy_from_slice(&spillover);
|
||||
spillover.clear();
|
||||
|
||||
let result = scan_bytes(
|
||||
&mut buf,
|
||||
blk_index,
|
||||
read_start,
|
||||
xor_bytes,
|
||||
|metadata, block_bytes, xor_state| {
|
||||
let Some(offset) =
|
||||
peek_canonical_offset(block_bytes, xor_state, xor_bytes, canonical)
|
||||
else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
if slots[offset as usize].is_some() {
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
let height = Height::from(*canonical.start + offset);
|
||||
match parse_canonical_body(
|
||||
block_bytes.to_vec(),
|
||||
metadata,
|
||||
xor_state,
|
||||
xor_bytes,
|
||||
height,
|
||||
) {
|
||||
Ok(block) => {
|
||||
slots[offset as usize] = Some(block);
|
||||
remaining -= 1;
|
||||
}
|
||||
Err(e) => warn!("parse_canonical_body failed in tail pipeline: {e}"),
|
||||
}
|
||||
if remaining == 0 {
|
||||
ControlFlow::Break(())
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
if remaining == 0 {
|
||||
break 'files;
|
||||
}
|
||||
if read_start > 0 {
|
||||
spillover.extend_from_slice(&buf[..result.first_magic.unwrap_or(buf.len())]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if remaining > 0 {
|
||||
return Err(Error::Internal(
|
||||
"tail pipeline: blk files missing canonical blocks",
|
||||
));
|
||||
}
|
||||
|
||||
// Emit in canonical order via the same `ReorderState` the forward
|
||||
// pipeline uses, which verifies `prev_blockhash` continuity between
|
||||
// adjacent blocks as a side effect of `try_emit`.
|
||||
let mut reorder = ReorderState::new(send.clone());
|
||||
for (offset, block) in slots.into_iter().flatten().enumerate() {
|
||||
if !reorder.try_emit(offset as u32, block) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if reorder.chain_broken {
|
||||
return Err(Error::Internal(
|
||||
"tail pipeline: canonical batch stitched across a reorg",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
164
crates/brk_reader/src/pipeline/forward.rs
Normal file
164
crates/brk_reader/src/pipeline/forward.rs
Normal file
@@ -0,0 +1,164 @@
|
||||
//! Forward pipeline: 1 reader thread + N scoped parser threads.
|
||||
//! Reader walks blk files from a bisection lower bound, peeks each
|
||||
//! block's header against `CanonicalRange`, and ships hits to the
|
||||
//! parser pool. Parsers decode bodies in parallel and emit in-order
|
||||
//! through `ReorderState`.
|
||||
|
||||
use std::{fs, ops::ControlFlow, sync::OnceLock, thread};
|
||||
|
||||
use bitcoin::block::Header;
|
||||
use brk_error::{Error, Result};
|
||||
use brk_types::{BlkMetadata, Height, ReadBlock};
|
||||
use crossbeam::channel::{Receiver, Sender, bounded};
|
||||
use parking_lot::Mutex;
|
||||
use tracing::error;
|
||||
|
||||
use crate::{
|
||||
BlkIndexToBlkPath, BlockHash, XORBytes, XORIndex,
|
||||
canonical::CanonicalRange,
|
||||
parse::{parse_canonical_body, peek_canonical},
|
||||
pipeline::{CHANNEL_CAPACITY, reorder::ReorderState},
|
||||
scan::scan_bytes,
|
||||
};
|
||||
|
||||
/// Reader→parser channel message. `header` was decoded during the
|
||||
/// peek and is reused so the parser doesn't redo it.
|
||||
struct ScannedBlock {
|
||||
metadata: BlkMetadata,
|
||||
bytes: Vec<u8>,
|
||||
xor_state: XORIndex,
|
||||
canonical_offset: u32,
|
||||
header: Header,
|
||||
}
|
||||
|
||||
/// Single shared signal carrying both the cancel flag and (when set
|
||||
/// to `Failed`) the first parse error. `stop.get().is_some()` is the
|
||||
/// reader's cheap "should I stop" check.
|
||||
enum Stop {
|
||||
Done,
|
||||
Failed(Error),
|
||||
}
|
||||
|
||||
pub(super) fn pipeline_forward(
|
||||
paths: &BlkIndexToBlkPath,
|
||||
first_blk_index: u16,
|
||||
xor_bytes: XORBytes,
|
||||
canonical: &CanonicalRange,
|
||||
anchor: Option<BlockHash>,
|
||||
send: &Sender<Result<ReadBlock>>,
|
||||
parser_threads: usize,
|
||||
) -> Result<()> {
|
||||
let (parser_send, parser_recv) = bounded::<ScannedBlock>(CHANNEL_CAPACITY);
|
||||
let reorder = Mutex::new(ReorderState::new(send.clone(), anchor));
|
||||
let stop: OnceLock<Stop> = OnceLock::new();
|
||||
|
||||
thread::scope(|scope| {
|
||||
for _ in 0..parser_threads {
|
||||
let parser_recv = parser_recv.clone();
|
||||
scope.spawn(|| parser_loop(parser_recv, &reorder, &stop, canonical, xor_bytes));
|
||||
}
|
||||
// Every parser owns its own clone; ours would otherwise leak
|
||||
// a dangling receiver.
|
||||
drop(parser_recv);
|
||||
|
||||
let read_result =
|
||||
read_and_dispatch(paths, first_blk_index, xor_bytes, canonical, &parser_send, &stop);
|
||||
// End-of-input signal so parser `for` loops exit and the
|
||||
// scope can join.
|
||||
drop(parser_send);
|
||||
read_result
|
||||
})?;
|
||||
|
||||
if let Some(Stop::Failed(e)) = stop.into_inner() {
|
||||
return Err(e);
|
||||
}
|
||||
reorder.into_inner().finalize(canonical.len())
|
||||
}
|
||||
|
||||
/// Per-thread parser body: drain `parser_recv`, decode each block,
|
||||
/// emit through `reorder`. Stops on `stop`.
|
||||
fn parser_loop(
|
||||
parser_recv: Receiver<ScannedBlock>,
|
||||
reorder: &Mutex<ReorderState>,
|
||||
stop: &OnceLock<Stop>,
|
||||
canonical: &CanonicalRange,
|
||||
xor_bytes: XORBytes,
|
||||
) {
|
||||
for ScannedBlock {
|
||||
metadata,
|
||||
bytes,
|
||||
xor_state,
|
||||
canonical_offset,
|
||||
header,
|
||||
} in parser_recv
|
||||
{
|
||||
if stop.get().is_some() {
|
||||
continue;
|
||||
}
|
||||
let height = Height::from(*canonical.start + canonical_offset);
|
||||
let block =
|
||||
match parse_canonical_body(bytes, metadata, xor_state, xor_bytes, height, header) {
|
||||
Ok(block) => block,
|
||||
Err(e) => {
|
||||
error!("parse_canonical_body failed at height {height}: {e}");
|
||||
let _ = stop.set(Stop::Failed(e));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let pipeline_finished = {
|
||||
let mut state = reorder.lock();
|
||||
!state.try_emit(canonical_offset, block)
|
||||
|| state.next_offset as usize >= canonical.len()
|
||||
};
|
||||
if pipeline_finished {
|
||||
let _ = stop.set(Stop::Done);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// `peek_canonical` filters orphans **before** the block bytes are
|
||||
/// cloned, so a sparse catchup avoids allocating for the ~99% of
|
||||
/// blocks outside the window.
|
||||
fn read_and_dispatch(
|
||||
paths: &BlkIndexToBlkPath,
|
||||
first_blk_index: u16,
|
||||
xor_bytes: XORBytes,
|
||||
canonical: &CanonicalRange,
|
||||
parser_send: &Sender<ScannedBlock>,
|
||||
stop: &OnceLock<Stop>,
|
||||
) -> Result<()> {
|
||||
for (&blk_index, blk_path) in paths.range(first_blk_index..) {
|
||||
if stop.get().is_some() {
|
||||
return Ok(());
|
||||
}
|
||||
let mut bytes = fs::read(blk_path)?;
|
||||
scan_bytes(
|
||||
&mut bytes,
|
||||
blk_index,
|
||||
xor_bytes,
|
||||
|metadata, block_bytes, xor_state| {
|
||||
if stop.get().is_some() {
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
let Some((canonical_offset, header)) =
|
||||
peek_canonical(block_bytes, xor_state, xor_bytes, canonical)
|
||||
else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
let scanned = ScannedBlock {
|
||||
metadata,
|
||||
bytes: block_bytes.to_vec(),
|
||||
xor_state,
|
||||
canonical_offset,
|
||||
header,
|
||||
};
|
||||
if parser_send.send(scanned).is_err() {
|
||||
ControlFlow::Break(())
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
},
|
||||
);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
127
crates/brk_reader/src/pipeline/mod.rs
Normal file
127
crates/brk_reader/src/pipeline/mod.rs
Normal file
@@ -0,0 +1,127 @@
|
||||
//! Two-strategy block-streaming pipeline. [`spawn`] picks between:
|
||||
//!
|
||||
//! * **forward** — one reader thread walks blk files in order from a
|
||||
//! bisection lower bound; canonical hits ship to a parser pool that
|
||||
//! emits in-order through [`reorder::ReorderState`].
|
||||
//! * **tail** — single-threaded reverse scan of the newest blk files,
|
||||
//! buffering matches in offset slots, then emitting forward with
|
||||
//! an inline chain check.
|
||||
//!
|
||||
//! Both strategies verify `block.header.prev_blockhash` against the
|
||||
//! previously emitted block — and against the user-supplied `anchor`
|
||||
//! for the very first block — and propagate a final `Err` to the
|
||||
//! consumer on chain breaks, parse failures, or missing blocks.
|
||||
|
||||
use std::{sync::Arc, thread};
|
||||
|
||||
use brk_error::Result;
|
||||
use brk_rpc::Client;
|
||||
use brk_types::{BlockHash, Height, ReadBlock};
|
||||
use crossbeam::channel::{Receiver, bounded};
|
||||
|
||||
use crate::{
|
||||
BlkIndexToBlkPath, ReaderInner, XORBytes, bisect,
|
||||
canonical::CanonicalRange,
|
||||
};
|
||||
|
||||
mod forward;
|
||||
mod reorder;
|
||||
mod tail;
|
||||
|
||||
pub(crate) const CHANNEL_CAPACITY: usize = 50;
|
||||
|
||||
/// If `canonical.start` lives within this many files of the chain
|
||||
/// tip, use the reverse-scan pipeline. The forward pipeline pays the
|
||||
/// bisection + 21-file backoff (~2.7 GB of reads) regardless of how
|
||||
/// few canonical blocks live in the window, so for any tip-clustered
|
||||
/// catchup the tail wins until the window grows past this many files.
|
||||
const TAIL_DISTANCE_FILES: usize = 8;
|
||||
|
||||
/// The indexer is CPU-bound on the consumer side, so 1 reader + 1
|
||||
/// parser leaves the rest of the cores for it. Bench tools that
|
||||
/// drain the channel cheaply can override.
|
||||
pub(crate) const DEFAULT_PARSER_THREADS: usize = 1;
|
||||
|
||||
enum Strategy {
|
||||
Tail,
|
||||
Forward { first_blk_index: u16 },
|
||||
}
|
||||
|
||||
/// `anchor`, when supplied, is the hash the consumer expects to be
|
||||
/// the **parent** of the first emitted block. Seeded into the chain
|
||||
/// check so a stale `Reader::after` anchor (e.g. the tip of a
|
||||
/// reorged-out chain) cannot silently produce a stitched stream.
|
||||
/// `None` skips the check (genesis or `range`-style calls have no
|
||||
/// anchor to verify against).
|
||||
pub(crate) fn spawn(
|
||||
reader: Arc<ReaderInner>,
|
||||
canonical: CanonicalRange,
|
||||
anchor: Option<BlockHash>,
|
||||
parser_threads: usize,
|
||||
) -> Result<Receiver<Result<ReadBlock>>> {
|
||||
// Cap at the parser channel capacity: beyond that, extra parsers
|
||||
// are idle (they all contend for the same buffered items) and
|
||||
// absurd inputs would otherwise OOM the scoped spawn.
|
||||
let parser_threads = parser_threads.clamp(1, CHANNEL_CAPACITY);
|
||||
|
||||
if canonical.is_empty() {
|
||||
return Ok(bounded(0).1);
|
||||
}
|
||||
|
||||
let paths = reader.refresh_paths()?;
|
||||
let xor_bytes = reader.xor_bytes;
|
||||
let strategy = pick_strategy(&reader.client, &paths, xor_bytes, canonical.start);
|
||||
|
||||
let (send, recv) = bounded(CHANNEL_CAPACITY);
|
||||
|
||||
thread::spawn(move || {
|
||||
let result = match strategy {
|
||||
Strategy::Tail => {
|
||||
tail::pipeline_tail(&reader.client, &paths, xor_bytes, &canonical, anchor, &send)
|
||||
}
|
||||
Strategy::Forward { first_blk_index } => forward::pipeline_forward(
|
||||
&paths,
|
||||
first_blk_index,
|
||||
xor_bytes,
|
||||
&canonical,
|
||||
anchor,
|
||||
&send,
|
||||
parser_threads,
|
||||
),
|
||||
};
|
||||
if let Err(e) = result {
|
||||
// No-op if the consumer already dropped the receiver.
|
||||
let _ = send.send(Err(e));
|
||||
}
|
||||
});
|
||||
|
||||
Ok(recv)
|
||||
}
|
||||
|
||||
/// Tail iff one of the last `TAIL_DISTANCE_FILES` files starts at a
|
||||
/// height ≤ `canonical_start`; that file is where tail iteration
|
||||
/// would land. Otherwise bisect for the forward start. Genesis-rooted
|
||||
/// catchups skip the tail probes since no file's first block is ≤
|
||||
/// genesis.
|
||||
fn pick_strategy(
|
||||
client: &Client,
|
||||
paths: &BlkIndexToBlkPath,
|
||||
xor_bytes: XORBytes,
|
||||
canonical_start: Height,
|
||||
) -> Strategy {
|
||||
if canonical_start != Height::ZERO
|
||||
&& paths
|
||||
.iter()
|
||||
.rev()
|
||||
.take(TAIL_DISTANCE_FILES)
|
||||
.any(|(_, path)| {
|
||||
bisect::first_block_height(client, path, xor_bytes)
|
||||
.is_ok_and(|h| h <= canonical_start)
|
||||
})
|
||||
{
|
||||
return Strategy::Tail;
|
||||
}
|
||||
Strategy::Forward {
|
||||
first_blk_index: bisect::find_start_blk_index(client, canonical_start, paths, xor_bytes),
|
||||
}
|
||||
}
|
||||
110
crates/brk_reader/src/pipeline/reorder.rs
Normal file
110
crates/brk_reader/src/pipeline/reorder.rs
Normal file
@@ -0,0 +1,110 @@
|
||||
//! In-order emission buffer + chain-continuity check used by the
|
||||
//! forward pipeline. Parsers complete blocks out of order, so this
|
||||
//! parks ahead-of-line matches in `pending` until `next_offset`
|
||||
//! catches up.
|
||||
|
||||
use std::cmp::Ordering;
|
||||
|
||||
use brk_error::{Error, Result};
|
||||
use brk_types::{BlockHash, ReadBlock};
|
||||
use crossbeam::channel::Sender;
|
||||
use rustc_hash::FxHashMap;
|
||||
use tracing::warn;
|
||||
|
||||
/// Accessed by the parser pool under a `parking_lot::Mutex` owned by
|
||||
/// `pipeline_forward`; at `parser_threads = 1` the lock is always
|
||||
/// uncontended.
|
||||
pub(super) struct ReorderState {
|
||||
pub(super) next_offset: u32,
|
||||
pending: FxHashMap<u32, ReadBlock>,
|
||||
send_to_consumer: Sender<Result<ReadBlock>>,
|
||||
/// Seeded with the user-supplied anchor so the first emit is
|
||||
/// also verified against it.
|
||||
last_emitted_hash: Option<BlockHash>,
|
||||
/// A `prev_blockhash` mismatch fires this; converted into a
|
||||
/// final `Err` by `finalize`.
|
||||
chain_broken: bool,
|
||||
/// Distinguishes "consumer cancelled" from "ran out of work
|
||||
/// early" in the missing-blocks check inside `finalize`.
|
||||
consumer_dropped: bool,
|
||||
}
|
||||
|
||||
impl ReorderState {
|
||||
pub(super) fn new(send_to_consumer: Sender<Result<ReadBlock>>, anchor: Option<BlockHash>) -> Self {
|
||||
Self {
|
||||
next_offset: 0,
|
||||
pending: FxHashMap::default(),
|
||||
send_to_consumer,
|
||||
last_emitted_hash: anchor,
|
||||
chain_broken: false,
|
||||
consumer_dropped: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolves the pipeline's exit state. Called by
|
||||
/// `pipeline_forward` after the read loop has finished and all
|
||||
/// parser threads have joined.
|
||||
pub(super) fn finalize(self, expected_count: usize) -> Result<()> {
|
||||
if self.chain_broken {
|
||||
return Err(Error::Internal(
|
||||
"forward pipeline: canonical batch stitched across a reorg",
|
||||
));
|
||||
}
|
||||
if !self.consumer_dropped && (self.next_offset as usize) < expected_count {
|
||||
return Err(Error::Internal(
|
||||
"forward pipeline: blk files missing canonical blocks",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Emits `block` if it's the next expected offset (and drains
|
||||
/// any contiguous pending matches), otherwise parks it. Returns
|
||||
/// `false` once the pipeline should stop (consumer drop or chain
|
||||
/// break).
|
||||
pub(super) fn try_emit(&mut self, offset: u32, block: ReadBlock) -> bool {
|
||||
match offset.cmp(&self.next_offset) {
|
||||
Ordering::Equal => {
|
||||
if !self.send_in_order(block) {
|
||||
return false;
|
||||
}
|
||||
while let Some(next) = self.pending.remove(&self.next_offset) {
|
||||
if !self.send_in_order(next) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
Ordering::Greater => {
|
||||
self.pending.insert(offset, block);
|
||||
true
|
||||
}
|
||||
// Each canonical hash appears at exactly one offset and
|
||||
// each block is parsed once, so this is unreachable in
|
||||
// practice.
|
||||
Ordering::Less => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn send_in_order(&mut self, block: ReadBlock) -> bool {
|
||||
if let Some(last) = &self.last_emitted_hash {
|
||||
let prev = BlockHash::from(block.header.prev_blockhash);
|
||||
if prev != *last {
|
||||
warn!(
|
||||
"canonical chain broken at offset {}: expected prev={} got {}",
|
||||
self.next_offset, last, prev,
|
||||
);
|
||||
self.chain_broken = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
let hash = block.hash().clone();
|
||||
if self.send_to_consumer.send(Ok(block)).is_err() {
|
||||
self.consumer_dropped = true;
|
||||
return false;
|
||||
}
|
||||
self.last_emitted_hash = Some(hash);
|
||||
self.next_offset += 1;
|
||||
true
|
||||
}
|
||||
}
|
||||
129
crates/brk_reader/src/pipeline/tail.rs
Normal file
129
crates/brk_reader/src/pipeline/tail.rs
Normal file
@@ -0,0 +1,129 @@
|
||||
//! Tail pipeline: single-threaded reverse scan of the newest blk
|
||||
//! files until every canonical hash is matched, then forward-emit
|
||||
//! with an inline chain check. Avoids the forward pipeline's
|
||||
//! bisection + out-of-order backoff (~2.7 GB of reads) for any
|
||||
//! tip-clustered catchup.
|
||||
|
||||
use std::{fs, ops::ControlFlow};
|
||||
|
||||
use brk_error::{Error, Result};
|
||||
use brk_rpc::Client;
|
||||
use brk_types::{Height, ReadBlock};
|
||||
use crossbeam::channel::Sender;
|
||||
|
||||
use crate::{
|
||||
BlkIndexToBlkPath, BlockHash, OUT_OF_ORDER_FILE_BACKOFF, XORBytes, bisect,
|
||||
canonical::CanonicalRange,
|
||||
parse::{parse_canonical_body, peek_canonical},
|
||||
scan::scan_bytes,
|
||||
};
|
||||
|
||||
pub(super) fn pipeline_tail(
|
||||
client: &Client,
|
||||
paths: &BlkIndexToBlkPath,
|
||||
xor_bytes: XORBytes,
|
||||
canonical: &CanonicalRange,
|
||||
anchor: Option<BlockHash>,
|
||||
send: &Sender<Result<ReadBlock>>,
|
||||
) -> Result<()> {
|
||||
let mut slots: Vec<Option<ReadBlock>> = (0..canonical.len()).map(|_| None).collect();
|
||||
let mut remaining = canonical.len();
|
||||
let mut parse_failure: Option<Error> = None;
|
||||
// Bailout streak: gives up after OUT_OF_ORDER_FILE_BACKOFF
|
||||
// consecutive files below the canonical window so a permanent
|
||||
// miss doesn't scan the entire chain in reverse.
|
||||
let mut below_floor_streak: usize = 0;
|
||||
|
||||
for (&blk_index, path) in paths.iter().rev() {
|
||||
// If this file's first block is below the lowest still-missing
|
||||
// canonical height, we've walked past the window.
|
||||
if let Some(missing_idx) = slots.iter().position(Option::is_none)
|
||||
&& let Ok(first_height) = bisect::first_block_height(client, path, xor_bytes)
|
||||
{
|
||||
let lowest_missing = Height::from(*canonical.start + missing_idx as u32);
|
||||
if first_height < lowest_missing {
|
||||
below_floor_streak += 1;
|
||||
if below_floor_streak >= OUT_OF_ORDER_FILE_BACKOFF {
|
||||
return Err(Error::Internal(
|
||||
"tail pipeline: walked past the canonical window without finding all blocks",
|
||||
));
|
||||
}
|
||||
} else {
|
||||
below_floor_streak = 0;
|
||||
}
|
||||
}
|
||||
|
||||
let mut bytes = fs::read(path)?;
|
||||
scan_bytes(
|
||||
&mut bytes,
|
||||
blk_index,
|
||||
xor_bytes,
|
||||
|metadata, block_bytes, xor_state| {
|
||||
let Some((offset, header)) =
|
||||
peek_canonical(block_bytes, xor_state, xor_bytes, canonical)
|
||||
else {
|
||||
return ControlFlow::Continue(());
|
||||
};
|
||||
if slots[offset as usize].is_some() {
|
||||
return ControlFlow::Continue(());
|
||||
}
|
||||
let height = Height::from(*canonical.start + offset);
|
||||
match parse_canonical_body(
|
||||
block_bytes.to_vec(),
|
||||
metadata,
|
||||
xor_state,
|
||||
xor_bytes,
|
||||
height,
|
||||
header,
|
||||
) {
|
||||
Ok(block) => {
|
||||
slots[offset as usize] = Some(block);
|
||||
remaining -= 1;
|
||||
}
|
||||
Err(e) => {
|
||||
parse_failure = Some(e);
|
||||
return ControlFlow::Break(());
|
||||
}
|
||||
}
|
||||
if remaining == 0 {
|
||||
ControlFlow::Break(())
|
||||
} else {
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
if let Some(e) = parse_failure {
|
||||
return Err(e);
|
||||
}
|
||||
if remaining == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if remaining > 0 {
|
||||
return Err(Error::Internal(
|
||||
"tail pipeline: blk files missing canonical blocks",
|
||||
));
|
||||
}
|
||||
|
||||
// Inline chain check; ReorderState would be 130 lines of
|
||||
// machinery for the single-threaded path.
|
||||
let mut last_hash: Option<BlockHash> = anchor;
|
||||
for slot in slots {
|
||||
let block = slot.expect("tail pipeline left a slot empty after `remaining == 0`");
|
||||
if let Some(prev) = &last_hash {
|
||||
let actual_prev = BlockHash::from(block.header.prev_blockhash);
|
||||
if actual_prev != *prev {
|
||||
return Err(Error::Internal(
|
||||
"tail pipeline: canonical batch stitched across a reorg",
|
||||
));
|
||||
}
|
||||
}
|
||||
last_hash = Some(block.hash().clone());
|
||||
if send.send(Ok(block)).is_err() {
|
||||
return Ok(()); // consumer dropped — clean exit
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -2,75 +2,76 @@ use std::ops::ControlFlow;
|
||||
|
||||
use brk_types::{BlkMetadata, BlkPosition};
|
||||
|
||||
use crate::{XORBytes, XORIndex};
|
||||
use crate::{XORBytes, XORIndex, xor_bytes::XOR_LEN};
|
||||
|
||||
const MAGIC_BYTES: [u8; 4] = [249, 190, 180, 217];
|
||||
const MAGIC_BYTES: [u8; 4] = [0xF9, 0xBE, 0xB4, 0xD9];
|
||||
|
||||
pub fn find_magic(bytes: &[u8], xor_i: &mut XORIndex, xor_bytes: XORBytes) -> Option<usize> {
|
||||
let mut window = [0u8; 4];
|
||||
for (i, &b) in bytes.iter().enumerate() {
|
||||
window.rotate_left(1);
|
||||
window[3] = xor_i.byte(b, xor_bytes);
|
||||
if window == MAGIC_BYTES {
|
||||
return Some(i + 1);
|
||||
}
|
||||
/// Returns the position **immediately after** the matched magic, or
|
||||
/// `None` if no match. Advances `xor_i` by the bytes consumed either
|
||||
/// way.
|
||||
pub(crate) fn find_magic(bytes: &[u8], xor_i: &mut XORIndex, xor_bytes: XORBytes) -> Option<usize> {
|
||||
let len = bytes.len();
|
||||
if len < MAGIC_BYTES.len() {
|
||||
xor_i.add_assign(len);
|
||||
return None;
|
||||
}
|
||||
|
||||
let xb = *xor_bytes;
|
||||
let mut phase = xor_i.phase();
|
||||
let mut i = 0;
|
||||
let stop = len - MAGIC_BYTES.len();
|
||||
|
||||
while i <= stop {
|
||||
if bytes[i] ^ xb[phase] == MAGIC_BYTES[0] {
|
||||
let p1 = (phase + 1) & (XOR_LEN - 1);
|
||||
let p2 = (phase + 2) & (XOR_LEN - 1);
|
||||
let p3 = (phase + 3) & (XOR_LEN - 1);
|
||||
if bytes[i + 1] ^ xb[p1] == MAGIC_BYTES[1]
|
||||
&& bytes[i + 2] ^ xb[p2] == MAGIC_BYTES[2]
|
||||
&& bytes[i + 3] ^ xb[p3] == MAGIC_BYTES[3]
|
||||
{
|
||||
xor_i.set_phase(phase + MAGIC_BYTES.len());
|
||||
return Some(i + MAGIC_BYTES.len());
|
||||
}
|
||||
}
|
||||
phase = (phase + 1) & (XOR_LEN - 1);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
xor_i.set_phase(phase + (len - i));
|
||||
None
|
||||
}
|
||||
|
||||
pub struct ScanResult {
|
||||
pub first_magic: Option<usize>,
|
||||
pub interrupted: bool,
|
||||
}
|
||||
|
||||
/// Scans `buf` for blocks. `file_offset` is the absolute position of
|
||||
/// `buf[0]` in the file. Calls `on_block` for each complete block found,
|
||||
/// passing the block's raw bytes as a mutable borrow of the buffer — the
|
||||
/// caller decides whether to clone them (e.g. to ship owned data to a
|
||||
/// parser thread) or process them in place (e.g. cheap header peek).
|
||||
pub fn scan_bytes(
|
||||
/// Scans `buf` (the full contents of one blk file) for blocks,
|
||||
/// calling `on_block` for each. The block bytes are passed as a
|
||||
/// mutable borrow so the callback can clone (to ship to a parser
|
||||
/// thread) or process in place (to peek the header).
|
||||
pub(crate) fn scan_bytes(
|
||||
buf: &mut [u8],
|
||||
blk_index: u16,
|
||||
file_offset: usize,
|
||||
xor_bytes: XORBytes,
|
||||
mut on_block: impl FnMut(BlkMetadata, &mut [u8], XORIndex) -> ControlFlow<()>,
|
||||
) -> ScanResult {
|
||||
) {
|
||||
let mut xor_i = XORIndex::default();
|
||||
xor_i.add_assign(file_offset);
|
||||
let mut first_magic = None;
|
||||
let mut i = 0;
|
||||
|
||||
while let Some(off) = find_magic(&buf[i..], &mut xor_i, xor_bytes) {
|
||||
let before = i;
|
||||
i += off;
|
||||
first_magic.get_or_insert(before + off.saturating_sub(4));
|
||||
if i + 4 > buf.len() {
|
||||
break;
|
||||
return;
|
||||
}
|
||||
let len = u32::from_le_bytes(
|
||||
xor_i
|
||||
.bytes(&mut buf[i..i + 4], xor_bytes)
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
) as usize;
|
||||
let mut size_bytes = [buf[i], buf[i + 1], buf[i + 2], buf[i + 3]];
|
||||
xor_i.bytes(&mut size_bytes, xor_bytes);
|
||||
let len = u32::from_le_bytes(size_bytes) as usize;
|
||||
i += 4;
|
||||
if i + len > buf.len() {
|
||||
break;
|
||||
return;
|
||||
}
|
||||
let position = BlkPosition::new(blk_index, (file_offset + i) as u32);
|
||||
let metadata = BlkMetadata::new(position, len as u32);
|
||||
let metadata = BlkMetadata::new(BlkPosition::new(blk_index, i as u32), len as u32);
|
||||
if on_block(metadata, &mut buf[i..i + len], xor_i).is_break() {
|
||||
return ScanResult {
|
||||
first_magic,
|
||||
interrupted: true,
|
||||
};
|
||||
return;
|
||||
}
|
||||
i += len;
|
||||
xor_i.add_assign(len);
|
||||
}
|
||||
|
||||
ScanResult {
|
||||
first_magic,
|
||||
interrupted: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,20 +7,30 @@ pub const XOR_LEN: usize = 8;
|
||||
#[derive(Debug, Clone, Copy, Deref, PartialEq, Eq)]
|
||||
pub struct XORBytes([u8; XOR_LEN]);
|
||||
|
||||
impl XORBytes {
|
||||
/// All-zero mask: nodes without `xor.dat` need no decode.
|
||||
#[inline]
|
||||
pub fn is_identity(self) -> bool {
|
||||
self.0 == [0u8; XOR_LEN]
|
||||
}
|
||||
}
|
||||
|
||||
impl From<[u8; XOR_LEN]> for XORBytes {
|
||||
#[inline]
|
||||
fn from(value: [u8; XOR_LEN]) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&Path> for XORBytes {
|
||||
/// Loads `<blocks_dir>/xor.dat`. Falls back to the identity mask
|
||||
/// if missing, unreadable, or the wrong length.
|
||||
#[inline]
|
||||
fn from(value: &Path) -> Self {
|
||||
Self(
|
||||
fs::read(value.join("xor.dat"))
|
||||
.unwrap_or(vec![0; 8])
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
let mask = fs::read(value.join("xor.dat"))
|
||||
.ok()
|
||||
.and_then(|v| <[u8; XOR_LEN]>::try_from(v).ok())
|
||||
.unwrap_or([0; XOR_LEN]);
|
||||
Self(mask)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,44 +4,67 @@ use crate::xor_bytes::{XOR_LEN, XORBytes};
|
||||
pub struct XORIndex(usize);
|
||||
|
||||
impl XORIndex {
|
||||
pub fn bytes<'a>(&mut self, bytes: &'a mut [u8], xor_bytes: XORBytes) -> &'a mut [u8] {
|
||||
let len = bytes.len();
|
||||
let mut bytes_index = 0;
|
||||
|
||||
while bytes_index < len {
|
||||
bytes[bytes_index] ^= xor_bytes[self.0];
|
||||
self.increment();
|
||||
bytes_index += 1;
|
||||
}
|
||||
|
||||
bytes
|
||||
/// Phase-aligned `XORIndex` for a buffer that conceptually starts
|
||||
/// at `offset` in the blk file.
|
||||
#[inline]
|
||||
pub fn at_offset(offset: usize) -> Self {
|
||||
Self(offset & (XOR_LEN - 1))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn byte(&mut self, mut byte: u8, xor_bytes: XORBytes) -> u8 {
|
||||
byte ^= xor_bytes[self.0];
|
||||
self.increment();
|
||||
byte
|
||||
pub(crate) fn phase(self) -> usize {
|
||||
self.0
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn increment(&mut self) {
|
||||
self.0 += 1;
|
||||
if self.0 == XOR_LEN {
|
||||
self.0 = 0;
|
||||
}
|
||||
pub(crate) fn set_phase(&mut self, phase: usize) {
|
||||
self.0 = phase & (XOR_LEN - 1);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn add_assign(&mut self, i: usize) {
|
||||
self.0 = (self.0 + i) % XOR_LEN;
|
||||
self.0 = (self.0 + i) & (XOR_LEN - 1);
|
||||
}
|
||||
|
||||
/// XOR-decode `buffer` starting at `offset`.
|
||||
/// XOR-decode `bytes` in place, advancing the phase. Aligned 8-byte
|
||||
/// chunks XOR against the full mask in one go (auto-vectorised by
|
||||
/// LLVM); only the head/tail straddling alignment are scalar.
|
||||
pub fn bytes<'a>(&mut self, bytes: &'a mut [u8], xor_bytes: XORBytes) -> &'a mut [u8] {
|
||||
if xor_bytes.is_identity() {
|
||||
return bytes;
|
||||
}
|
||||
let xb = *xor_bytes;
|
||||
let mut phase = self.0;
|
||||
let len = bytes.len();
|
||||
let mut i = 0;
|
||||
|
||||
while phase != 0 && i < len {
|
||||
bytes[i] ^= xb[phase];
|
||||
phase = (phase + 1) & (XOR_LEN - 1);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let body_len = (len - i) & !(XOR_LEN - 1);
|
||||
for chunk in bytes[i..i + body_len].chunks_exact_mut(XOR_LEN) {
|
||||
for (b, m) in chunk.iter_mut().zip(xb) {
|
||||
*b ^= m;
|
||||
}
|
||||
}
|
||||
i += body_len;
|
||||
|
||||
while i < len {
|
||||
bytes[i] ^= xb[phase];
|
||||
phase = (phase + 1) & (XOR_LEN - 1);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
self.0 = phase;
|
||||
bytes
|
||||
}
|
||||
|
||||
/// XOR-decode `buffer` as if it lived at `offset` in the blk file.
|
||||
#[inline]
|
||||
pub fn decode_at(buffer: &mut [u8], offset: usize, xor_bytes: XORBytes) {
|
||||
let mut xori = Self::default();
|
||||
xori.add_assign(offset);
|
||||
xori.bytes(buffer, xor_bytes);
|
||||
Self::at_offset(offset).bytes(buffer, xor_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user