diff --git a/Cargo.lock b/Cargo.lock index febd9b188..18ede3b22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -491,6 +491,7 @@ dependencies = [ name = "brk_iterator" version = "0.3.0-beta.7" dependencies = [ + "bitcoin", "brk_error", "brk_reader", "brk_rpc", @@ -553,6 +554,7 @@ dependencies = [ "quickmatch", "rustc-hash", "serde_json", + "smallvec", "tokio", "vecdb", ] diff --git a/crates/brk_cli/src/config.rs b/crates/brk_cli/src/config.rs index 7e6b33a01..2acc90c96 100644 --- a/crates/brk_cli/src/config.rs +++ b/crates/brk_cli/src/config.rs @@ -5,7 +5,7 @@ use std::{ use brk_error::{Error, Result}; use brk_rpc::{Auth, Client}; -use brk_server::{CdnCacheMode, DEFAULT_MAX_WEIGHT, Website}; +use brk_server::{CdnCacheMode, DEFAULT_MAX_UTXOS, DEFAULT_MAX_WEIGHT, Website}; use brk_types::Port; use owo_colors::OwoColorize; use serde::{Deserialize, Serialize}; @@ -30,6 +30,9 @@ pub struct Config { #[serde(default)] maxweight: Option, + #[serde(default)] + maxutxos: Option, + #[serde(default)] bitcoindir: Option, @@ -79,6 +82,9 @@ impl Config { if let Some(v) = config_args.maxweight { config.maxweight = Some(v); } + if let Some(v) = config_args.maxutxos { + config.maxutxos = Some(v); + } if let Some(v) = config_args.bitcoindir { config.bitcoindir = Some(v); } @@ -129,6 +135,9 @@ impl Config { Long("maxweight") => { config.maxweight = Some(parser.value().unwrap().parse().unwrap()) } + Long("maxutxos") => { + config.maxutxos = Some(parser.value().unwrap().parse().unwrap()) + } Long("bitcoindir") => { config.bitcoindir = Some(parser.value().unwrap().parse().unwrap()) } @@ -194,10 +203,15 @@ impl Config { "[false]".bright_black() ); println!( - " --maxweight {} Max series response weight in bytes {}", + " --maxweight {} Server cap on series response weight in bytes; rejects /api/{{series,metric}}/... over the limit {}", "".bright_black(), format!("[{}]", DEFAULT_MAX_WEIGHT).bright_black() ); + println!( + " --maxutxos {} Server cap on UTXOs per address; /api/address/{{addr}}/utxo errors past the limit {}", + "".bright_black(), + format!("[{}]", DEFAULT_MAX_UTXOS).bright_black() + ); println!(); println!( " --bitcoindir {} Bitcoin directory {}", @@ -380,6 +394,10 @@ Finally, you can run the program with '-h' for help." self.maxweight.unwrap_or(DEFAULT_MAX_WEIGHT) } + pub fn max_utxos(&self) -> usize { + self.maxutxos.unwrap_or(DEFAULT_MAX_UTXOS) + } + pub fn brkport(&self) -> Option { self.brkport } diff --git a/crates/brk_cli/src/main.rs b/crates/brk_cli/src/main.rs index 00d6192aa..59f39bfe1 100644 --- a/crates/brk_cli/src/main.rs +++ b/crates/brk_cli/src/main.rs @@ -75,6 +75,7 @@ pub fn main() -> anyhow::Result<()> { website: config.website(), cdn_cache_mode: config.cdn_cache_mode(), max_weight: config.max_weight(), + max_utxos: config.max_utxos(), }; let port = config.brkport(); diff --git a/crates/brk_client/src/lib.rs b/crates/brk_client/src/lib.rs index c89864cfb..4f3cb20e9 100644 --- a/crates/brk_client/src/lib.rs +++ b/crates/brk_client/src/lib.rs @@ -9142,7 +9142,7 @@ impl BrkClient { /// *[Mempool.space docs](https://mempool.space/docs/api/rest#get-block-transaction-id)* /// /// Endpoint: `GET /api/block/{hash}/txid/{index}` - pub fn get_block_txid(&self, hash: BlockHash, index: TxIndex) -> Result { + pub fn get_block_txid(&self, hash: BlockHash, index: BlockTxIndex) -> Result { self.base.get_text(&format!("/api/block/{hash}/txid/{index}")) } @@ -9175,7 +9175,7 @@ impl BrkClient { /// *[Mempool.space docs](https://mempool.space/docs/api/rest#get-block-transactions)* /// /// Endpoint: `GET /api/block/{hash}/txs/{start_index}` - pub fn get_block_txs_from_index(&self, hash: BlockHash, start_index: TxIndex) -> Result> { + pub fn get_block_txs_from_index(&self, hash: BlockHash, start_index: BlockTxIndex) -> Result> { self.base.get_json(&format!("/api/block/{hash}/txs/{start_index}")) } diff --git a/crates/brk_computer/src/blocks/lookback.rs b/crates/brk_computer/src/blocks/lookback.rs index 6c1d0a6e2..73f11ac7e 100644 --- a/crates/brk_computer/src/blocks/lookback.rs +++ b/crates/brk_computer/src/blocks/lookback.rs @@ -1,6 +1,6 @@ use brk_error::Result; use brk_traversable::Traversable; -use brk_types::{Height, Indexes, TimePeriod, Timestamp, Version}; +use brk_types::{Height, Indexes, Timestamp, Version}; use vecdb::{ AnyVec, CachedVec, Cursor, Database, EagerVec, Exit, ImportableVec, PcoVec, ReadableVec, Rw, StorageMode, VecIndex, @@ -58,26 +58,6 @@ pub struct Vecs { pub _26y: M::Stored>>, // 9490d } -impl Vecs { - /// First block height inside `period` looking back from `tip`; `None` for `All`. - /// Walks real block timestamps, matching mempool.space's wall-clock - /// `time > NOW() - INTERVAL ${period}` cutoff. - pub fn start_height(&self, period: TimePeriod, tip: Height) -> Option { - match period { - TimePeriod::Day => self._24h.collect_one(tip), - TimePeriod::ThreeDays => self._3d.collect_one(tip), - TimePeriod::Week => self._1w.collect_one(tip), - TimePeriod::Month => self._1m.collect_one(tip), - TimePeriod::ThreeMonths => self._3m.collect_one(tip), - TimePeriod::SixMonths => self._6m.collect_one(tip), - TimePeriod::Year => self._1y.collect_one(tip), - TimePeriod::TwoYears => self._2y.collect_one(tip), - TimePeriod::ThreeYears => self._3y.collect_one(tip), - TimePeriod::All => None, - } - } -} - impl Vecs { pub(crate) fn forced_import(db: &Database, version: Version) -> Result { let _1h = ImportableVec::forced_import(db, "height_1h_ago", version)?; diff --git a/crates/brk_computer/src/lib.rs b/crates/brk_computer/src/lib.rs index 08ad78a05..f35119219 100644 --- a/crates/brk_computer/src/lib.rs +++ b/crates/brk_computer/src/lib.rs @@ -5,7 +5,7 @@ use std::{fs, path::Path, thread, time::Instant}; use brk_error::Result; use brk_indexer::Indexer; use brk_traversable::Traversable; -use brk_types::Version; +use brk_types::{Height, Version}; use tracing::info; use vecdb::{AnyExportableVec, Exit, Ro, Rw, StorageMode}; @@ -480,6 +480,14 @@ impl Computer { } } +impl Computer { + /// Last height whose computed-side state is durably stamped, derived + /// from `distribution.supply_state`'s stamp. + pub fn computed_height(&self) -> Height { + Height::from(self.distribution.supply_state.stamp()) + } +} + macro_rules! impl_iter_named { ($($field:ident),+ $(,)?) => { impl_iter_named!(@mode Ro, $($field),+); diff --git a/crates/brk_error/src/lib.rs b/crates/brk_error/src/lib.rs index 8c6602617..540bc424e 100644 --- a/crates/brk_error/src/lib.rs +++ b/crates/brk_error/src/lib.rs @@ -1,6 +1,6 @@ #![doc = include_str!("../README.md")] -use std::{fmt, io, path::PathBuf, result, time}; +use std::{borrow::Cow, fmt, io, path::PathBuf, result, time}; use thiserror::Error; @@ -126,7 +126,7 @@ pub enum Error { NotFound(String), #[error("{0}")] - OutOfRange(String), + OutOfRange(Cow<'static, str>), #[error("{0}")] Parse(String), @@ -234,23 +234,33 @@ fn is_io_error_permanent(e: &std::io::Error) -> bool { } } +/// Maximum length of a user-supplied series name in error messages before +/// truncating with an ellipsis. +const SERIES_NAME_MAX_DISPLAY_LEN: usize = 100; + +/// Truncate a user-supplied series name for inclusion in an error message, +/// appending an ellipsis if it exceeds the display cap. Used for both +/// `SeriesNotFound` and `SeriesUnsupportedIndex` so far-too-long names don't +/// blow up the response body. +pub fn truncate_series_name(mut series: String) -> String { + if series.len() > SERIES_NAME_MAX_DISPLAY_LEN { + series.truncate(SERIES_NAME_MAX_DISPLAY_LEN); + series.push_str("..."); + } + series +} + #[derive(Debug)] pub struct SeriesNotFound { pub series: String, - pub suggestions: Vec, + pub suggestions: Vec<&'static str>, pub total_matches: usize, } impl SeriesNotFound { - pub fn new(mut series: String, all_matches: Vec) -> Self { - let total_matches = all_matches.len(); - let suggestions = all_matches.into_iter().take(3).collect(); - if series.len() > 100 { - series.truncate(100); - series.push_str("..."); - } + pub fn new(series: String, suggestions: Vec<&'static str>, total_matches: usize) -> Self { Self { - series, + series: truncate_series_name(series), suggestions, total_matches, } diff --git a/crates/brk_indexer/src/lib.rs b/crates/brk_indexer/src/lib.rs index 21118a4ec..bbc8e21a2 100644 --- a/crates/brk_indexer/src/lib.rs +++ b/crates/brk_indexer/src/lib.rs @@ -43,7 +43,15 @@ pub struct Indexer { impl Indexer { pub fn tip_blockhash(&self) -> BlockHash { - self.tip_blockhash.read().clone() + *self.tip_blockhash.read() + } +} + +impl Indexer { + /// Last height whose data is durably indexed, derived from the + /// `blockhash` vec's stamp. + pub fn indexed_height(&self) -> Height { + Height::from(self.vecs.blocks.blockhash.inner.stamp()) } } @@ -197,7 +205,7 @@ impl Indexer { self.vecs.rollback_if_needed(&starting_indexes)?; debug!("Rollback vecs done."); if let Some(hash) = prev_hash.as_ref() { - *self.tip_blockhash.write() = hash.clone(); + *self.tip_blockhash.write() = *hash; } drop(lock); diff --git a/crates/brk_indexer/src/processor/metadata.rs b/crates/brk_indexer/src/processor/metadata.rs index 33f628695..26b67ca87 100644 --- a/crates/brk_indexer/src/processor/metadata.rs +++ b/crates/brk_indexer/src/processor/metadata.rs @@ -32,7 +32,7 @@ impl BlockProcessor<'_> { .blocks .blockhash .inner - .checked_push(height, blockhash.clone())?; + .checked_push(height, *blockhash)?; self.vecs .blocks .coinbase_tag diff --git a/crates/brk_iterator/Cargo.toml b/crates/brk_iterator/Cargo.toml index 43a02a12a..3392363d2 100644 --- a/crates/brk_iterator/Cargo.toml +++ b/crates/brk_iterator/Cargo.toml @@ -13,3 +13,6 @@ brk_error = { workspace = true } brk_reader = { workspace = true } brk_rpc = { workspace = true } brk_types = { workspace = true } + +[dev-dependencies] +bitcoin = { workspace = true } diff --git a/crates/brk_iterator/examples/sigops_bench.rs b/crates/brk_iterator/examples/sigops_bench.rs new file mode 100644 index 000000000..5ac7576c3 --- /dev/null +++ b/crates/brk_iterator/examples/sigops_bench.rs @@ -0,0 +1,131 @@ +//! Microbenchmark: cost of `bitcoin::Transaction::total_sigop_cost` on +//! real recent blocks. +//! +//! Strategy: pull a sample of recent blocks via RPC (already-decoded +//! `bitcoin::Block`), and for each tx, time `total_sigop_cost` twice: +//! +//! 1. `|_| None` lookup — counts only legacy script_sig / script_pubkey +//! sigops (skips P2SH redeem + witness). Cheap lower bound. +//! 2. Synthetic prevout map seeded with a P2WSH-shaped script_pubkey for +//! every input, forcing the witness sigop walk to fire on every input. +//! Pessimistic upper bound. +//! +//! The realistic cost is between these two, weighted by how many inputs +//! are actually P2SH/witness (~95% on mainnet today). +//! +//! Sample = N most recent blocks via `getblock` (verbosity 0 = raw bytes, +//! decoded by the iterator). + +use std::time::Instant; + +use bitcoin::{OutPoint, ScriptBuf, TxOut}; +use brk_error::Result; +use brk_iterator::Blocks; +use brk_reader::Reader; +use brk_rpc::{Auth, Client}; +use brk_types::Height; + +fn main() -> Result<()> { + let bitcoin_dir = Client::default_bitcoin_path(); + let client = Client::new( + Client::default_url(), + Auth::CookieFile(bitcoin_dir.join(".cookie")), + )?; + let reader = Reader::new(bitcoin_dir.join("blocks"), &client); + let blocks = Blocks::new(&client, &reader); + + let tip: u32 = client.get_block_count()? as u32; + const SAMPLE_BLOCKS: u32 = 16; + let start = Height::new(tip - SAMPLE_BLOCKS); + let end = Height::new(tip); + + println!( + "Sampling blocks {}..{} ({} blocks)", + u32::from(start), + u32::from(end), + SAMPLE_BLOCKS + ); + + let mut all_txs: Vec = Vec::with_capacity(64_000); + let mut total_inputs: usize = 0; + let mut total_outputs: usize = 0; + let mut total_witness_bytes: usize = 0; + let mut total_script_sig_bytes: usize = 0; + + let t_fetch = Instant::now(); + for block in blocks.range(start, end)? { + let block = block?; + for tx in &block.txdata { + total_inputs += tx.input.len(); + total_outputs += tx.output.len(); + for input in &tx.input { + total_script_sig_bytes += input.script_sig.len(); + total_witness_bytes += input.witness.iter().map(|w| w.len()).sum::(); + } + all_txs.push(tx.clone()); + } + } + let t_fetch = t_fetch.elapsed(); + + let n = all_txs.len(); + println!( + "Fetched {n} txs in {:?}: {} inputs, {} outputs, \ + scriptSig={} bytes, witness={} bytes", + t_fetch, total_inputs, total_outputs, total_script_sig_bytes, total_witness_bytes + ); + + // 1) Cheap lower bound: |_| None lookup. + let t1 = Instant::now(); + let mut sum_low: u64 = 0; + for tx in &all_txs { + sum_low += tx.total_sigop_cost(|_| None) as u64; + } + let elapsed_low = t1.elapsed(); + println!( + "[None lookup ] {n} txs in {:?} = {:.0} ns/tx, sum sigops={}", + elapsed_low, + elapsed_low.as_nanos() as f64 / n as f64, + sum_low + ); + + // 2) Pessimistic upper bound: P2WSH-shaped prevout for every input, + // forcing the full witness walk. Use a 32-byte zero hash; the witness + // last element will be empty so witness sigop count is 0, but the + // is_p2wsh path runs end-to-end. + let p2wsh_spk = { + let mut bytes = vec![0x00, 0x20]; + bytes.extend_from_slice(&[0u8; 32]); + ScriptBuf::from_bytes(bytes) + }; + let synthetic_txout = TxOut { + value: bitcoin::Amount::from_sat(0), + script_pubkey: p2wsh_spk, + }; + + let t2 = Instant::now(); + let mut sum_hi: u64 = 0; + for tx in &all_txs { + sum_hi += tx + .total_sigop_cost(|_op: &OutPoint| Some(synthetic_txout.clone())) + as u64; + } + let elapsed_hi = t2.elapsed(); + println!( + "[P2WSH lookup] {n} txs in {:?} = {:.0} ns/tx, sum sigops={}", + elapsed_hi, + elapsed_hi.as_nanos() as f64 / n as f64, + sum_hi + ); + + // 3) Block-level extrapolation. Mainnet averages ~3000 tx/block, so + // per-block cost ~= ns/tx * 3000. + let txs_per_block = (n / SAMPLE_BLOCKS as usize) as f64; + let block_low_us = elapsed_low.as_nanos() as f64 / SAMPLE_BLOCKS as f64 / 1000.0; + let block_hi_us = elapsed_hi.as_nanos() as f64 / SAMPLE_BLOCKS as f64 / 1000.0; + println!( + "Per-block (avg {:.0} tx): low={:.1} us, high={:.1} us", + txs_per_block, block_low_us, block_hi_us + ); + + Ok(()) +} diff --git a/crates/brk_iterator/src/iterator.rs b/crates/brk_iterator/src/iterator.rs index 80de4466b..1ee0d87dd 100644 --- a/crates/brk_iterator/src/iterator.rs +++ b/crates/brk_iterator/src/iterator.rs @@ -40,7 +40,7 @@ impl Iterator for BlockIterator { ))); } - prev_hash.replace(hash.clone()); + prev_hash.replace(hash); Some(Ok(Block::from((height, hash, block)))) } diff --git a/crates/brk_mempool/examples/mempool.rs b/crates/brk_mempool/examples/mempool.rs index d4d66f604..4ed06ec3e 100644 --- a/crates/brk_mempool/examples/mempool.rs +++ b/crates/brk_mempool/examples/mempool.rs @@ -15,58 +15,52 @@ fn main() -> Result<()> { let mempool = Mempool::new(&client); - // Start mempool sync in background thread let mempool_clone = mempool.clone(); thread::spawn(move || { mempool_clone.start(); }); - // Poll and display stats every 5 seconds loop { thread::sleep(Duration::from_secs(5)); - // Basic mempool info let info = mempool.info(); - let block_stats = mempool.block_stats(); - let total_fees: u64 = block_stats.iter().map(|s| u64::from(s.total_fee)).sum(); - println!("\n=== Mempool Info ==="); - println!(" Transactions: {}", info.count); - println!(" Total vsize: {} vB", info.vsize); - println!( - " Total fees: {:.4} BTC", - total_fees as f64 / 100_000_000.0 - ); - - // Fee recommendations (like mempool.space) - let fees = mempool.fees(); - println!("\n=== Recommended Fees (sat/vB) ==="); - println!(" No Priority {:.4}", f64::from(fees.economy_fee)); - println!(" Low Priority {:.4}", f64::from(fees.hour_fee)); - println!(" Medium Priority {:.4}", f64::from(fees.half_hour_fee)); - println!(" High Priority {:.4}", f64::from(fees.fastest_fee)); - - // Projected blocks (like mempool.space) - if !block_stats.is_empty() { - println!("\n=== Projected Blocks ==="); - for (i, stats) in block_stats.iter().enumerate() { - let total_fee_btc = u64::from(stats.total_fee) as f64 / 100_000_000.0; - println!( - " Block {}: ~{:.4} sat/vB, {:.4}-{:.4} sat/vB, {:.3} BTC, {} txs", - i + 1, - f64::from(stats.median_fee_rate()), - f64::from(stats.min_fee_rate()), - f64::from(stats.max_fee_rate()), - total_fee_btc, - stats.tx_count, - ); - } - } - - // Address tracking stats + let entries = mempool.entries(); + let txs = mempool.txs(); let addrs = mempool.addrs(); - println!("\n=== Address Tracking ==="); - println!(" Addresses with pending txs: {}", addrs.len()); + let graveyard = mempool.graveyard(); + let outpoint_spends = mempool.state().outpoint_spends.read(); + let snapshot = mempool.snapshot(); - println!("\n----------------------------------------"); + let cluster_nodes_total: usize = snapshot.clusters.iter().map(|c| c.nodes.len()).sum(); + let blocks_tx_total: usize = snapshot.blocks.iter().map(|b| b.len()).sum(); + let (skip_clean, skip_throttled) = mempool.skip_counts(); + + println!( + "info.count={} entries.slots={} entries.active={} entries.free={} \ + txs={} unresolved={} addrs={} outpoints={} \ + graveyard.tombstones={} graveyard.order={} \ + snap.clusters={} snap.cluster_nodes={} snap.cluster_of.len={} snap.cluster_of.active={} \ + snap.blocks={} snap.blocks_txs={} \ + rebuilds={} skip.clean={} skip.throttled={}", + info.count, + entries.entries().len(), + entries.active_count(), + entries.free_slots_count(), + txs.len(), + txs.unresolved().len(), + addrs.len(), + outpoint_spends.len(), + graveyard.tombstones_len(), + graveyard.order_len(), + snapshot.clusters.len(), + cluster_nodes_total, + snapshot.cluster_of_len(), + snapshot.cluster_of_active(), + snapshot.blocks.len(), + blocks_tx_total, + mempool.rebuild_count(), + skip_clean, + skip_throttled, + ); } } diff --git a/crates/brk_mempool/src/cluster/chunk.rs b/crates/brk_mempool/src/cluster/chunk.rs new file mode 100644 index 000000000..9cf278c1f --- /dev/null +++ b/crates/brk_mempool/src/cluster/chunk.rs @@ -0,0 +1,31 @@ +use brk_types::{CpfpClusterChunk, CpfpClusterTxIndex, FeeRate, Sats, VSize}; +use smallvec::SmallVec; + +use super::LocalIdx; + +pub struct Chunk { + /// Cluster-local positions of the txs in this chunk, in topological + /// order (parents before children). Populated by `Cluster::new`. + pub txs: SmallVec<[LocalIdx; 4]>, + pub fee: Sats, + pub vsize: VSize, +} + +impl Chunk { + pub fn fee_rate(&self) -> FeeRate { + FeeRate::from((self.fee, self.vsize)) + } +} + +impl From<&Chunk> for CpfpClusterChunk { + fn from(chunk: &Chunk) -> Self { + Self { + txs: chunk + .txs + .iter() + .map(|&local| CpfpClusterTxIndex::from(local.inner())) + .collect(), + feerate: chunk.fee_rate(), + } + } +} diff --git a/crates/brk_mempool/src/cluster/chunk_id.rs b/crates/brk_mempool/src/cluster/chunk_id.rs new file mode 100644 index 000000000..ed8bdb0d2 --- /dev/null +++ b/crates/brk_mempool/src/cluster/chunk_id.rs @@ -0,0 +1,33 @@ +/// Index of a `Chunk` inside a `Cluster.chunks`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct ChunkId(u32); + +impl ChunkId { + pub const ZERO: Self = Self(0); + + #[inline] + pub fn as_usize(self) -> usize { + self.0 as usize + } + + #[inline] + pub fn inner(self) -> u32 { + self.0 + } +} + +impl From for ChunkId { + #[inline] + fn from(v: u32) -> Self { + Self(v) + } +} + +impl From for ChunkId { + #[inline] + fn from(v: usize) -> Self { + debug_assert!(v <= u32::MAX as usize, "ChunkId overflow: {v}"); + Self(v as u32) + } +} diff --git a/crates/brk_mempool/src/cluster/cluster_id.rs b/crates/brk_mempool/src/cluster/cluster_id.rs new file mode 100644 index 000000000..281b0a35b --- /dev/null +++ b/crates/brk_mempool/src/cluster/cluster_id.rs @@ -0,0 +1,31 @@ +/// Index of a `Cluster` inside `Snapshot::clusters`. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct ClusterId(u32); + +impl ClusterId { + #[inline] + pub fn as_usize(self) -> usize { + self.0 as usize + } + + #[inline] + pub fn inner(self) -> u32 { + self.0 + } +} + +impl From for ClusterId { + #[inline] + fn from(v: u32) -> Self { + Self(v) + } +} + +impl From for ClusterId { + #[inline] + fn from(v: usize) -> Self { + debug_assert!(v <= u32::MAX as usize, "ClusterId overflow: {v}"); + Self(v as u32) + } +} diff --git a/crates/brk_mempool/src/cluster/cluster_node.rs b/crates/brk_mempool/src/cluster/cluster_node.rs new file mode 100644 index 000000000..f1c47e8f1 --- /dev/null +++ b/crates/brk_mempool/src/cluster/cluster_node.rs @@ -0,0 +1,48 @@ +use brk_types::{CpfpClusterTx, CpfpClusterTxIndex, CpfpEntry, Sats, Txid, VSize, Weight}; +use smallvec::SmallVec; + +use super::LocalIdx; + +/// A node inside a `Cluster`. The `id` carries whatever the caller +/// uses to refer back to the source tx: `brk_mempool::stores::TxIndex` +/// (live pool slot) on the mempool path, `brk_types::TxIndex` (global +/// indexer position) on the confirmed path. `Cluster::new` and the SFL +/// algorithm don't read it. +/// +/// All fields are `pub` and callers construct directly with struct +/// literals; `parents` are always supplied at construction (no +/// post-init mutation pattern). +pub struct ClusterNode { + pub id: I, + pub txid: Txid, + pub fee: Sats, + pub vsize: VSize, + pub weight: Weight, + /// Direct parents in the cluster. Caller-supplied. + pub parents: SmallVec<[LocalIdx; 2]>, +} + +impl From<&ClusterNode> for CpfpEntry { + fn from(node: &ClusterNode) -> Self { + Self { + txid: node.txid, + weight: node.weight, + fee: node.fee, + } + } +} + +impl From<&ClusterNode> for CpfpClusterTx { + fn from(node: &ClusterNode) -> Self { + Self { + txid: node.txid, + weight: node.weight, + fee: node.fee, + parents: node + .parents + .iter() + .map(|&p| CpfpClusterTxIndex::from(p.inner())) + .collect(), + } + } +} diff --git a/crates/brk_mempool/src/cluster/cluster_ref.rs b/crates/brk_mempool/src/cluster/cluster_ref.rs new file mode 100644 index 000000000..3f88630c6 --- /dev/null +++ b/crates/brk_mempool/src/cluster/cluster_ref.rs @@ -0,0 +1,9 @@ +use super::{ClusterId, LocalIdx}; + +/// Locates a node within the cluster forest: which cluster it lives in, +/// and its `LocalIdx` inside that cluster. +#[derive(Debug, Clone, Copy)] +pub struct ClusterRef { + pub cluster_id: ClusterId, + pub local: LocalIdx, +} diff --git a/crates/brk_mempool/src/cluster/local_idx.rs b/crates/brk_mempool/src/cluster/local_idx.rs new file mode 100644 index 000000000..0ec5ffa27 --- /dev/null +++ b/crates/brk_mempool/src/cluster/local_idx.rs @@ -0,0 +1,34 @@ +/// Index of a node within a single `Cluster`. Cluster-local; meaningless +/// across clusters. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[repr(transparent)] +pub struct LocalIdx(u32); + +impl LocalIdx { + pub const ZERO: Self = Self(0); + + #[inline] + pub fn as_usize(self) -> usize { + self.0 as usize + } + + #[inline] + pub fn inner(self) -> u32 { + self.0 + } +} + +impl From for LocalIdx { + #[inline] + fn from(v: u32) -> Self { + Self(v) + } +} + +impl From for LocalIdx { + #[inline] + fn from(v: usize) -> Self { + debug_assert!(v <= u32::MAX as usize, "LocalIdx overflow: {v}"); + Self(v as u32) + } +} diff --git a/crates/brk_mempool/src/cluster/mod.rs b/crates/brk_mempool/src/cluster/mod.rs new file mode 100644 index 000000000..42a53639e --- /dev/null +++ b/crates/brk_mempool/src/cluster/mod.rs @@ -0,0 +1,145 @@ +//! Cluster primitive shared by the live mempool snapshot rebuilder +//! and the per-request CPFP path. A `Cluster` is a connected component +//! of the mempool dependency graph, locally re-indexed in topological +//! order and SFL-linearized into chunks ordered by descending feerate. +//! +//! Callers supply `ClusterNode`s with parent edges only; `Cluster::new` +//! permutes them into Kahn topological order (so `LocalIdx == position +//! in `nodes` == topological position`), then runs SFL. + +mod chunk; +mod chunk_id; +mod cluster_id; +mod cluster_node; +mod cluster_ref; +mod local_idx; +mod sfl; + +pub use chunk::Chunk; +pub use chunk_id::ChunkId; +pub use cluster_id::ClusterId; +pub use cluster_node::ClusterNode; +pub use cluster_ref::ClusterRef; +pub use local_idx::LocalIdx; + +use smallvec::SmallVec; + +/// A connected component of the mempool graph, stored in topological +/// order (parents before children) and SFL-linearized into chunks. +/// +/// `I` is the caller's identifier for each node: `brk_mempool::stores::TxIndex` +/// (live pool slot) on the mempool path, `brk_types::TxIndex` (global indexer +/// position) on the confirmed path. The SFL algorithm doesn't touch it; only +/// consumers that need to map a `LocalIdx` back to source-tx state read it. +/// +/// Because nodes are stored topologically, every `LocalIdx` is also +/// its topological position: parent edges always point to lower +/// indices, and a forward iteration over `nodes` is a valid topo +/// sweep. +pub struct Cluster { + pub nodes: Vec>, + /// SFL-emitted chunks, ordered by descending feerate. + pub chunks: Vec, + /// `node_to_chunk[local]` is the `ChunkId` that contains the node. + pub node_to_chunk: Vec, +} + +impl Cluster { + pub fn new(nodes: Vec>) -> Self { + let nodes = Self::permute_to_topo_order(nodes); + let chunk_masks = sfl::linearize(&nodes); + let (chunks, node_to_chunk) = Self::materialize_chunks(&chunk_masks, nodes.len()); + Self { + nodes, + chunks, + node_to_chunk, + } + } + + /// O(1) chunk lookup for a node. + #[inline] + pub fn chunk_of(&self, local: LocalIdx) -> &Chunk { + &self.chunks[self.node_to_chunk[local.as_usize()].as_usize()] + } + + /// Reorder `nodes` into Kahn topological order and remap every + /// parent edge into the new index space. Single pass: build the + /// child adjacency and in-degrees, then Kahn-pop directly into the + /// output Vec while remapping each node's parents through the + /// `new_pos[old] -> new` map populated as we pop. Post-condition: + /// for every `i`, every parent of `nodes[i]` has a `LocalIdx` + /// strictly less than `i`. + fn permute_to_topo_order(mut nodes: Vec>) -> Vec> { + let n = nodes.len(); + let mut children: Vec> = + (0..n).map(|_| SmallVec::new()).collect(); + let mut indegree: Vec = vec![0; n]; + for (i, node) in nodes.iter().enumerate() { + indegree[i] = node.parents.len() as u32; + for &p in &node.parents { + children[p.as_usize()].push(LocalIdx::from(i)); + } + } + + // Sources (in-degree 0) seed the queue. We hold them as `LocalIdx` + // pointing at the *old* slot; `out` drains nodes out as it pops. + let mut queue: Vec = (0..n) + .filter(|&i| indegree[i] == 0) + .map(LocalIdx::from) + .collect(); + let mut new_pos = vec![LocalIdx::ZERO; n]; + let mut out: Vec> = Vec::with_capacity(n); + let mut taken: Vec>> = nodes.drain(..).map(Some).collect(); + + let mut head = 0; + while head < queue.len() { + let v = queue[head]; + head += 1; + new_pos[v.as_usize()] = LocalIdx::from(out.len()); + let mut node = taken[v.as_usize()].take().unwrap(); + for p in node.parents.iter_mut() { + *p = new_pos[p.as_usize()]; + } + out.push(node); + for &c in &children[v.as_usize()] { + indegree[c.as_usize()] -= 1; + if indegree[c.as_usize()] == 0 { + queue.push(c); + } + } + } + + debug_assert_eq!(out.len(), n, "cluster contained a cycle"); + out + } + + /// Convert SFL's raw bit-masks into final `Chunk`s with topo-ordered + /// `txs` and a `tx → ChunkId` reverse map. Bit iteration via + /// `trailing_zeros` visits each chunk's bits in ascending order, and + /// nodes are stored in topo order (`LocalIdx == position`), so each + /// pushed `LocalIdx` lands parents-first in `chunk.txs`. + fn materialize_chunks( + chunk_masks: &[sfl::ChunkMask], + n: usize, + ) -> (Vec, Vec) { + let mut chunks: Vec = Vec::with_capacity(chunk_masks.len()); + let mut node_to_chunk = vec![ChunkId::ZERO; n]; + for (cid, cm) in chunk_masks.iter().enumerate() { + let chunk_id = ChunkId::from(cid); + let mut chunk = Chunk { + txs: SmallVec::new(), + fee: cm.fee, + vsize: cm.vsize, + }; + let mut bits = cm.mask; + while bits != 0 { + let i = bits.trailing_zeros() as usize; + node_to_chunk[i] = chunk_id; + chunk.txs.push(LocalIdx::from(i)); + bits &= bits - 1; + } + chunks.push(chunk); + } + (chunks, node_to_chunk) + } +} diff --git a/crates/brk_mempool/src/cluster/sfl.rs b/crates/brk_mempool/src/cluster/sfl.rs new file mode 100644 index 000000000..f150a65e0 --- /dev/null +++ b/crates/brk_mempool/src/cluster/sfl.rs @@ -0,0 +1,283 @@ +//! Cluster linearizer. +//! +//! Two-branch dispatch by cluster size: +//! - **n ≤ 18**: recursive enumeration of topologically-closed subsets. +//! Provably optimal. Visits only valid subsets (skips non-closed ones +//! without filtering) and maintains running fee/vsize incrementally. +//! - **n > 18**: "greedy-union" ancestor-set search. Seeds with each +//! node's ancestor closure, then greedily adds any other ancestor +//! closure whose inclusion raises the combined feerate. Strict +//! superset of ancestor-set-sort's candidate space, catching the +//! sibling-union shapes that pure ASS misses. +//! +//! A final stack-based `canonicalize` pass merges adjacent chunks when +//! the later one's feerate beats the earlier's, restoring the +//! non-increasing-rate invariant. +//! +//! Everything runs on `u128` bitmasks (covers Bitcoin Core 31's cluster +//! cap of 100). Rate comparisons go through `FeeRate`. The caller is +//! `Cluster::new`, which has already permuted nodes into topological +//! order — so `LocalIdx == position == topological rank`, and this +//! module never has to take a `topo_order` permutation. + +use brk_types::{FeeRate, Sats, VSize}; + +use super::ClusterNode; + +const BRUTE_FORCE_LIMIT: usize = 18; +/// Cluster nodes are indexed by `u128` bitmask, so `n < 128`. Bitcoin +/// Core's cluster cap is 100, so this leaves comfortable margin. +const BITMASK_LIMIT: usize = 128; + +/// Raw SFL output: a chunk's bitmask plus its totals. `Cluster::new` +/// converts these into final `Chunk`s with topo-ordered `txs`, so the +/// algorithm doesn't have to materialize them itself. +pub(super) struct ChunkMask { + pub mask: u128, + pub fee: Sats, + pub vsize: VSize, +} + +impl ChunkMask { + fn fee_rate(&self) -> FeeRate { + FeeRate::from((self.fee, self.vsize)) + } +} + +/// Linearize a cluster into SFL chunks. +pub(super) fn linearize(nodes: &[ClusterNode]) -> Vec { + assert!( + nodes.len() < BITMASK_LIMIT, + "cluster size {} exceeds u128 capacity", + nodes.len() + ); + let tables = Tables::build(nodes); + let chunks = extract_chunks(&tables); + canonicalize(chunks) +} + +/// Peel the cluster one chunk at a time. Each iteration picks the +/// highest-feerate topologically-closed subset of `remaining` and +/// removes it. Loop terminates because every iteration removes at +/// least one node. +fn extract_chunks(t: &Tables) -> Vec { + let pick: fn(&Tables, u128) -> (u128, Sats, VSize) = if t.n <= BRUTE_FORCE_LIMIT { + best_subset + } else { + best_ancestor_union + }; + let mut chunks: Vec = Vec::new(); + let mut remaining: u128 = t.all; + while remaining != 0 { + let (mask, fee, vsize) = pick(t, remaining); + chunks.push(ChunkMask { mask, fee, vsize }); + remaining &= !mask; + } + chunks +} + +/// Recursive enumeration of topologically-closed subsets of +/// `remaining`. Returns the (mask, fee, vsize) with the highest rate; +/// when `remaining` is all zero-fee (e.g. a CPFP-parent leftover after +/// the paying chunk was extracted), the first non-empty subset wins so +/// `extract_chunks` always makes progress. Iterates nodes by index +/// `0..n`; since the cluster is stored in topological order, that *is* +/// a topological sweep. +fn best_subset(t: &Tables, remaining: u128) -> (u128, Sats, VSize) { + let ctx = Ctx { tables: t, remaining }; + let mut best = (0u128, Sats::ZERO, VSize::default()); + recurse(&ctx, 0, 0, Sats::ZERO, VSize::default(), &mut best); + best +} + +fn recurse( + ctx: &Ctx, + idx: usize, + included: u128, + f: Sats, + v: VSize, + best: &mut (u128, Sats, VSize), +) { + if idx == ctx.tables.n { + if included != 0 + && (best.0 == 0 || FeeRate::from((f, v)) > FeeRate::from((best.1, best.2))) + { + *best = (included, f, v); + } + return; + } + let bit = 1u128 << idx; + + // Not in remaining, or a parent (within remaining) is excluded: + // this node is forced-excluded, no branching. + if (bit & ctx.remaining) == 0 + || (ctx.tables.parents_mask[idx] & ctx.remaining & !included) != 0 + { + recurse(ctx, idx + 1, included, f, v, best); + return; + } + + recurse(ctx, idx + 1, included, f, v, best); + recurse( + ctx, + idx + 1, + included | bit, + f + ctx.tables.fee_of[idx], + v + ctx.tables.vsize_of[idx], + best, + ); +} + +/// For each node v in `remaining`, seed with anc(v) ∩ remaining, then +/// greedily extend by adding any anc(u) whose inclusion raises the +/// feerate. Pick the best result across all seeds; when every seed has +/// rate 0 (e.g. a CPFP-parent leftover after the paying chunk was +/// extracted), the first seed wins so `extract_chunks` always makes +/// progress. +/// +/// Every candidate evaluated is a union of ancestor closures, so it +/// is topologically closed by construction. Strictly explores more +/// candidates than pure ancestor-set-sort, at O(n³) per chunk step. +fn best_ancestor_union(t: &Tables, remaining: u128) -> (u128, Sats, VSize) { + let mut best = (0u128, Sats::ZERO, VSize::default()); + let mut best_rate = FeeRate::default(); + let mut seeds = remaining; + while seeds != 0 { + let i = seeds.trailing_zeros() as usize; + seeds &= seeds - 1; + + let mut s = t.ancestor_incl[i] & remaining; + let (mut f, mut v) = totals(s, &t.fee_of, &t.vsize_of); + let mut rate = FeeRate::from((f, v)); + + // Greedy extension to fixed point: pick the ancestor-closure + // addition that yields the highest resulting feerate, if any. + loop { + let mut picked: Option<(u128, Sats, VSize, FeeRate)> = None; + let mut cands = remaining & !s; + while cands != 0 { + let j = cands.trailing_zeros() as usize; + cands &= cands - 1; + let add = t.ancestor_incl[j] & remaining & !s; + if add == 0 { + continue; + } + let (df, dv) = totals(add, &t.fee_of, &t.vsize_of); + let nf = f + df; + let nv = v + dv; + let nrate = FeeRate::from((nf, nv)); + if nrate <= rate { + continue; + } + if picked.is_none_or(|(_, _, _, prate)| nrate > prate) { + picked = Some((add, nf, nv, nrate)); + } + } + match picked { + Some((add, nf, nv, nrate)) => { + s |= add; + f = nf; + v = nv; + rate = nrate; + } + None => break, + } + } + + if best.0 == 0 || rate > best_rate { + best = (s, f, v); + best_rate = rate; + } + } + best +} + +/// Single-pass stack merge: for each incoming chunk, merge it into +/// the stack top while the merge would raise the top's feerate, then +/// push. O(n) total regardless of how many merges cascade. +fn canonicalize(chunks: Vec) -> Vec { + let mut out: Vec = Vec::with_capacity(chunks.len()); + for mut cur in chunks { + while let Some(top) = out.last() { + if cur.fee_rate() <= top.fee_rate() { + break; + } + let prev = out.pop().unwrap(); + cur = ChunkMask { + mask: prev.mask | cur.mask, + fee: prev.fee + cur.fee, + vsize: prev.vsize + cur.vsize, + }; + } + out.push(cur); + } + out +} + +#[inline] +fn totals(mask: u128, fee_of: &[Sats], vsize_of: &[VSize]) -> (Sats, VSize) { + let mut f = Sats::ZERO; + let mut v = VSize::default(); + let mut bits = mask; + while bits != 0 { + let i = bits.trailing_zeros() as usize; + f += fee_of[i]; + v += vsize_of[i]; + bits &= bits - 1; + } + (f, v) +} + +/// Per-cluster precomputed bitmasks and lookups, shared across every +/// chunk-extraction iteration. Built once in `linearize`. +struct Tables { + n: usize, + /// Bitmask with one bit set per node (i.e. `(1 << n) - 1`). + all: u128, + /// `parents_mask[i]` = bits set for direct parents of node `i`. + parents_mask: Vec, + /// `ancestor_incl[i]` = bits set for `i` and all ancestors. + ancestor_incl: Vec, + fee_of: Vec, + vsize_of: Vec, +} + +impl Tables { + /// Single pass over nodes (in topological order, so each parent's + /// `ancestor_incl` is ready before the child reads it): build + /// parent-bit masks, ancestor closures, and pick out fee/vsize. + fn build(nodes: &[ClusterNode]) -> Self { + let n = nodes.len(); + let mut parents_mask: Vec = vec![0; n]; + let mut ancestor_incl: Vec = vec![0; n]; + let mut fee_of: Vec = Vec::with_capacity(n); + let mut vsize_of: Vec = Vec::with_capacity(n); + for (vi, node) in nodes.iter().enumerate() { + let mut par = 0u128; + let mut acc = 1u128 << vi; + for &p in &node.parents { + par |= 1u128 << p.inner(); + acc |= ancestor_incl[p.as_usize()]; + } + parents_mask[vi] = par; + ancestor_incl[vi] = acc; + fee_of.push(node.fee); + vsize_of.push(node.vsize); + } + Self { + n, + all: (1u128 << n) - 1, + parents_mask, + ancestor_incl, + fee_of, + vsize_of, + } + } +} + +/// Per-iteration immutable bundle for the brute-force recursion. +/// Keeping it small lets `recurse` stay at four moving args. +struct Ctx<'a> { + tables: &'a Tables, + remaining: u128, +} diff --git a/crates/brk_mempool/src/cpfp.rs b/crates/brk_mempool/src/cpfp.rs index 1ec526478..65197ffde 100644 --- a/crates/brk_mempool/src/cpfp.rs +++ b/crates/brk_mempool/src/cpfp.rs @@ -1,246 +1,123 @@ -//! CPFP (Child Pays For Parent) cluster reasoning for live mempool -//! transactions. Cluster scope is the seed's projected block: txs in -//! other projected blocks share no mining fate with the seed, so -//! including them in `effectiveFeePerVsize` would be misleading. +//! CPFP (Child Pays For Parent) cluster reasoning. //! -//! Confirmed-tx CPFP (the same-block connected component on the -//! chain) lives in `brk_query`, since it reads indexer/computer vecs. +//! Two consumers, one shared converter: +//! +//! - **Mempool path** (`Mempool::cpfp_info`): looks up the seed in the +//! `Snapshot.cluster_of` map, which already contains the SFL-linearized +//! connected component built once per snapshot cycle. No graph walk, +//! no SFL recomputation. +//! - **Confirmed path** (`brk_query::Query::confirmed_cpfp`): builds a +//! `Cluster` from same-block parent/child edges on demand. +//! +//! Both feed `Cluster::to_cpfp_info`, which walks the cluster from the +//! seed (parents → ancestors, topo-sweep → descendants), reads the seed's +//! chunk feerate as `effectiveFeePerVsize`, and emits the wire shape. +//! +//! The cluster spans the full connected component (matches mempool.space); +//! we don't scope to the seed's projected block, which would drop info +//! when a cluster crosses the projection floor. use brk_types::{ - CpfpCluster, CpfpClusterChunk, CpfpClusterTx, CpfpClusterTxIndex, CpfpEntry, CpfpInfo, FeeRate, - TxidPrefix, VSize, Weight, + CpfpCluster, CpfpClusterChunk, CpfpClusterTx, CpfpEntry, CpfpInfo, FeeRate, SigOps, TxidPrefix, + VSize, }; -use rustc_hash::{FxHashMap, FxHashSet}; -use smallvec::SmallVec; -use crate::steps::rebuilder::linearize::{ - LocalIdx, cluster::Cluster, cluster_node::ClusterNode, sfl::Sfl, -}; -use crate::stores::{EntryPool, TxIndex}; -use crate::{Mempool, TxEntry}; -/// Cap matches Bitcoin Core's default mempool ancestor/descendant -/// chain limits and `confirmed_cpfp`'s cap. -const MAX: usize = 25; - -impl Mempool { - /// CPFP info for a live mempool tx, scoped to the seed's projected - /// block. Returns `None` if the tx is not in the mempool, so - /// callers can fall through to the confirmed path. Returns `Some` - /// with empty arms if the tx is in the mempool but below the - /// projection floor (no projected block to share fate with). - pub fn cpfp_info(&self, prefix: &TxidPrefix) -> Option { - let snapshot = self.snapshot(); - let entries = self.entries(); - let txs = self.txs(); - let seed_idx = entries.idx_of(prefix)?; - let seed = entries.slot(seed_idx)?; - - let mut ancestor_idxs: Vec = Vec::new(); - let mut descendant_idxs: Vec = Vec::new(); - let mut ancestors: Vec = Vec::new(); - let mut descendants: Vec = Vec::new(); - - if let Some(seed_block) = snapshot.block_of(seed_idx) { - let mut visited: FxHashSet = FxHashSet::default(); - visited.insert(*prefix); - let mut stack: Vec = seed.depends.iter().copied().collect(); - while let Some(p) = stack.pop() { - if ancestors.len() >= MAX { - break; - } - if !visited.insert(p) { - continue; - } - let Some(idx) = entries.idx_of(&p) else { continue }; - if snapshot.block_of(idx) != Some(seed_block) { - continue; - } - let Some(anc) = entries.slot(idx) else { continue }; - ancestor_idxs.push(idx); - ancestors.push(to_entry(anc)); - stack.extend(anc.depends.iter().copied()); - } - - let mut desc_set: FxHashSet = FxHashSet::default(); - desc_set.insert(*prefix); - for &i in &snapshot.blocks[seed_block.as_usize()] { - if descendants.len() >= MAX { - break; - } - let Some(e) = entries.slot(i) else { continue }; - if !e.depends.iter().any(|d| desc_set.contains(d)) { - continue; - } - desc_set.insert(e.txid_prefix()); - descendant_idxs.push(i); - descendants.push(to_entry(e)); - } - } +use crate::Mempool; +use crate::cluster::{Cluster, ClusterRef, LocalIdx}; +impl Cluster { + /// Wire-shape `CpfpInfo` for `seed` inside this cluster. `txid` and + /// `weight` come straight off each `ClusterNode`, so the converter + /// is self-contained — no parallel `members` slice required. + pub fn to_cpfp_info(&self, seed: LocalIdx, sigops: SigOps) -> CpfpInfo { + let descendants = self.walk_descendants(seed); let best_descendant = descendants .iter() .max_by_key(|e| FeeRate::from((e.fee, e.weight))) .cloned(); + let seed_node = &self.nodes[seed.as_usize()]; - let sigops = txs.get(&seed.txid).map(|tx| { - // Bitcoin Core's `total_sigop_cost` is the segwit-weighted sigop - // count (legacy * 4 + segwit * 1), divided by 5 to match - // mempool.space's reported `sigops`. Mempool.space converts - // back to count via `sigopcost / 5`. - u32::try_from(tx.total_sigop_cost / 5).unwrap_or(u32::MAX) - }); + let vsize = VSize::from(seed_node.weight); + let adjusted_vsize = sigops.adjust_vsize(vsize); - // mempool.space's adjustedVsize = max(vsize, sigops * 5). - let adjusted_vsize = match sigops { - Some(s) => VSize::from(u64::from(seed.vsize).max(u64::from(s) * 5)), - None => seed.vsize, - }; - - let cluster = build_cluster(seed_idx, &ancestor_idxs, &descendant_idxs, &entries); - - // mempool.space sets effectiveFeePerVsize to the seed's chunk feerate - // when the cluster is known, falls back to the seed's own rate. - let effective = cluster - .as_ref() - .and_then(|c| c.chunks.get(c.chunk_index as usize)) - .map(|chunk| chunk.feerate) - .unwrap_or_else(|| seed.fee_rate()); - - Some(CpfpInfo { - ancestors, + CpfpInfo { + ancestors: self.walk_ancestors(seed), best_descendant, descendants, - effective_fee_per_vsize: Some(effective), + effective_fee_per_vsize: self.chunk_of(seed).fee_rate(), sigops, - fee: Some(seed.fee), - adjusted_vsize: Some(adjusted_vsize), - cluster, - }) - } -} - -fn to_entry(e: &TxEntry) -> CpfpEntry { - CpfpEntry { - txid: e.txid.clone(), - weight: Weight::from(e.vsize), - fee: e.fee, - } -} - -/// Build the cluster output: seed + ancestors + descendants in topological -/// order, with parent indexes inside the cluster, plus SFL-linearized chunks. -fn build_cluster( - seed_idx: TxIndex, - ancestor_idxs: &[TxIndex], - descendant_idxs: &[TxIndex], - entries: &EntryPool, -) -> Option { - let mut ordered: Vec = Vec::with_capacity(ancestor_idxs.len() + 1 + descendant_idxs.len()); - ordered.extend(ancestor_idxs.iter().copied()); - ordered.push(seed_idx); - ordered.extend(descendant_idxs.iter().copied()); - - let pool: Vec<&TxEntry> = ordered.iter().filter_map(|&i| entries.slot(i)).collect(); - if pool.len() != ordered.len() { - return None; - } - - let prefix_to_local: FxHashMap = pool - .iter() - .enumerate() - .map(|(i, e)| (e.txid_prefix(), i as LocalIdx)) - .collect(); - - let mut children_of: Vec> = vec![SmallVec::new(); pool.len()]; - let parents_of: Vec> = pool - .iter() - .enumerate() - .map(|(i, e)| { - let parents: SmallVec<[LocalIdx; 2]> = e - .depends - .iter() - .filter_map(|p| prefix_to_local.get(p).copied()) - .collect(); - for &p in &parents { - children_of[p as usize].push(i as LocalIdx); - } - parents - }) - .collect(); - - let cluster_nodes: Vec = pool - .iter() - .enumerate() - .map(|(i, e)| ClusterNode { - tx_index: ordered[i], - fee: e.fee, - vsize: e.vsize, - parents: parents_of[i].clone(), - children: children_of[i].clone(), - }) - .collect(); - - let cluster = Cluster::new(cluster_nodes); - - // Re-order pool so parents come before children (mempool.space convention). - // `topo_rank[i]` gives the position of local index `i` in topological order. - let mut local_to_topo: Vec = (0..pool.len()).collect(); - local_to_topo.sort_unstable_by_key(|&i| cluster.topo_rank[i]); - let topo_to_local: Vec = { - let mut v = vec![0usize; pool.len()]; - for (topo_pos, &local) in local_to_topo.iter().enumerate() { - v[local] = topo_pos; + fee: seed_node.fee, + vsize, + adjusted_vsize, + cluster: self.cluster_view(seed), } - v - }; + } - let topo_idx = |local: usize| CpfpClusterTxIndex::from(topo_to_local[local] as u32); - - let txs: Vec = local_to_topo - .iter() - .map(|&local| { - let e = pool[local]; - let parents: Vec = parents_of[local] - .iter() - .map(|&p| topo_idx(p as usize)) - .collect(); - CpfpClusterTx { - txid: e.txid.clone(), - fee: e.fee, - weight: Weight::from(e.vsize), - parents, + /// DFS up the parent edges from `seed`, exclusive. Cluster size is + /// capped at 128 by SFL, so a `u128` covers the visited set. + fn walk_ancestors(&self, seed: LocalIdx) -> Vec { + let mut visited = 1u128 << seed.inner(); + let mut out: Vec = Vec::new(); + let mut stack: Vec = self.nodes[seed.as_usize()].parents.to_vec(); + while let Some(idx) = stack.pop() { + let b = 1u128 << idx.inner(); + if visited & b != 0 { + continue; } - }) - .collect(); + visited |= b; + let node = &self.nodes[idx.as_usize()]; + out.push(CpfpEntry::from(node)); + stack.extend(node.parents.iter().copied()); + } + out + } - let raw_chunks = Sfl::linearize(&cluster); - let chunks: Vec = raw_chunks - .iter() - .map(|chunk| { - let mut chunk_txs: Vec = chunk - .nodes - .iter() - .map(|&local| topo_idx(local as usize)) - .collect(); - chunk_txs.sort_unstable(); - CpfpClusterChunk { - txs: chunk_txs, - feerate: chunk.fee_rate(), + /// Forward sweep over the topo-ordered tail after `seed`. A node is + /// a descendant iff any of its parents is `seed` or already-reached. + /// Nodes before `seed` can't reach it, so they're skipped entirely. + fn walk_descendants(&self, seed: LocalIdx) -> Vec { + let seed_pos = seed.as_usize(); + let mut reachable = 1u128 << seed.inner(); + let mut out: Vec = Vec::new(); + for (i, node) in self.nodes.iter().enumerate().skip(seed_pos + 1) { + if node.parents.iter().any(|&p| reachable & (1u128 << p.inner()) != 0) { + reachable |= 1u128 << i; + out.push(CpfpEntry::from(node)); } - }) - .collect(); + } + out + } - let seed_local = *prefix_to_local.get(&entries.slot(seed_idx)?.txid_prefix())?; - let seed_topo = topo_idx(seed_local as usize); - let chunk_index = chunks - .iter() - .position(|c| c.txs.contains(&seed_topo)) - .unwrap_or(0) as u32; - - Some(CpfpCluster { - txs, - chunks, - chunk_index, - }) + /// Wire-shape `CpfpCluster`. Cluster nodes are stored in topological + /// order, so `LocalIdx` maps directly onto `CpfpClusterTxIndex` + /// without a permutation lookup. + fn cluster_view(&self, seed: LocalIdx) -> CpfpCluster { + CpfpCluster { + txs: self.nodes.iter().map(CpfpClusterTx::from).collect(), + chunks: self.chunks.iter().map(CpfpClusterChunk::from).collect(), + chunk_index: self.node_to_chunk[seed.as_usize()].inner(), + } + } +} + +impl Mempool { + /// CPFP info for a live mempool tx. Returns `None` only when the + /// tx isn't in the mempool, so callers can fall through to the + /// confirmed path. + pub fn cpfp_info(&self, prefix: &TxidPrefix) -> Option { + let snapshot = self.snapshot(); + let seed_idx = self.entries().idx_of(prefix)?; + let ClusterRef { cluster_id, local: seed_local } = snapshot.cluster_of(seed_idx)?; + let cluster = &snapshot.clusters[cluster_id.as_usize()]; + let seed_txid = &cluster.nodes[seed_local.as_usize()].txid; + + let sigops = self + .txs() + .get(seed_txid) + .map(|tx| tx.total_sigop_cost) + .unwrap_or(SigOps::ZERO); + + Some(cluster.to_cpfp_info(seed_local, sigops)) + } } diff --git a/crates/brk_mempool/src/lib.rs b/crates/brk_mempool/src/lib.rs index 5bd963542..c006265f3 100644 --- a/crates/brk_mempool/src/lib.rs +++ b/crates/brk_mempool/src/lib.rs @@ -21,6 +21,7 @@ use brk_types::{AddrBytes, MempoolInfo, OutpointPrefix, TxOut, Txid, TxidPrefix, use parking_lot::RwLockReadGuard; use tracing::error; +pub mod cluster; mod cpfp; pub(crate) mod steps; pub(crate) mod stores; @@ -28,7 +29,7 @@ pub(crate) mod stores; mod tests; use steps::{Applier, Fetcher, Preparer, Rebuilder, Resolver}; -pub use steps::{BlkIndex, BlockStats, RecommendedFees, Snapshot, TxEntry, TxRemoval}; +pub use steps::{BlockStats, RecommendedFees, Snapshot, TxEntry, TxRemoval}; use stores::{AddrTracker, MempoolState}; pub use stores::{EntryPool, TxGraveyard, TxStore, TxTombstone}; @@ -59,6 +60,14 @@ impl Mempool { self.0.rebuilder.snapshot() } + pub fn rebuild_count(&self) -> u64 { + self.0.rebuilder.rebuild_count() + } + + pub fn skip_counts(&self) -> (u64, u64) { + self.0.rebuilder.skip_counts() + } + pub fn fees(&self) -> RecommendedFees { self.snapshot().fees.clone() } @@ -85,7 +94,7 @@ impl Mempool { let entries = self.0.state.entries.read(); let outpoint_spends = self.0.state.outpoint_spends.read(); let idx = outpoint_spends.get(&key)?; - let spender_txid = entries.slot(idx)?.txid.clone(); + let spender_txid = entries.slot(idx)?.txid; let spender_tx = txs.get(&spender_txid)?; let vin_pos = spender_tx .input @@ -139,7 +148,11 @@ impl Mempool { /// One sync cycle: fetch, prepare, apply, resolve, maybe rebuild. pub fn update(&self) -> Result<()> { - let Inner { client, state, rebuilder } = &*self.0; + let Inner { + client, + state, + rebuilder, + } = &*self.0; let fetched = Fetcher::fetch(client, state)?; let pulled = Preparer::prepare(fetched, state); @@ -149,4 +162,8 @@ impl Mempool { Ok(()) } + + pub fn state(&self) -> &MempoolState { + &self.0.state + } } diff --git a/crates/brk_mempool/src/steps/applier.rs b/crates/brk_mempool/src/steps/applier.rs index 735e2539d..2b8f782e4 100644 --- a/crates/brk_mempool/src/steps/applier.rs +++ b/crates/brk_mempool/src/steps/applier.rs @@ -35,7 +35,7 @@ impl Applier { let Some((idx, entry)) = s.entries.remove(prefix) else { return; }; - let txid = entry.txid.clone(); + let txid = entry.txid; let Some(tx) = s.txs.remove(&txid) else { return; }; @@ -71,7 +71,7 @@ impl Applier { fn publish_one(s: &mut LockedState, tx: Transaction, entry: TxEntry) -> (Txid, Transaction) { s.info.add(&tx, entry.fee); s.addrs.add_tx(&tx, &entry.txid); - let txid = entry.txid.clone(); + let txid = entry.txid; let idx = s.entries.insert(entry); s.outpoint_spends.insert_spends(&tx, idx); (txid, tx) diff --git a/crates/brk_mempool/src/steps/fetcher/mod.rs b/crates/brk_mempool/src/steps/fetcher/mod.rs index 4ef406814..f73ec2b06 100644 --- a/crates/brk_mempool/src/steps/fetcher/mod.rs +++ b/crates/brk_mempool/src/steps/fetcher/mod.rs @@ -72,7 +72,7 @@ impl Fetcher { .iter() .filter(|info| !known.contains(&info.txid) && !graveyard.contains(&info.txid)) .take(MAX_TX_FETCHES_PER_CYCLE) - .map(|info| info.txid.clone()) + .map(|info| info.txid) .collect() } diff --git a/crates/brk_mempool/src/steps/mod.rs b/crates/brk_mempool/src/steps/mod.rs index 791427e1b..a3e1fa4e3 100644 --- a/crates/brk_mempool/src/steps/mod.rs +++ b/crates/brk_mempool/src/steps/mod.rs @@ -9,5 +9,5 @@ mod resolver; pub use applier::Applier; pub use fetcher::Fetcher; pub use preparer::{Preparer, TxEntry, TxRemoval}; -pub use rebuilder::{BlkIndex, BlockStats, Rebuilder, RecommendedFees, Snapshot}; +pub use rebuilder::{BlockStats, Rebuilder, RecommendedFees, Snapshot}; pub use resolver::Resolver; diff --git a/crates/brk_mempool/src/steps/preparer/tx_addition.rs b/crates/brk_mempool/src/steps/preparer/tx_addition.rs index bca4d4a92..6a48293c3 100644 --- a/crates/brk_mempool/src/steps/preparer/tx_addition.rs +++ b/crates/brk_mempool/src/steps/preparer/tx_addition.rs @@ -10,7 +10,7 @@ use std::mem; use brk_rpc::RawTx; -use brk_types::{MempoolEntryInfo, Transaction, TxIn, TxOut, TxStatus, Txid, Vout}; +use brk_types::{MempoolEntryInfo, SigOps, Transaction, TxIn, TxOut, TxStatus, Txid, Vout}; use rustc_hash::FxHashMap; use crate::{TxTombstone, stores::TxStore}; @@ -52,10 +52,10 @@ impl TxAddition { .collect(); let mut tx = Transaction { index: None, - txid: info.txid.clone(), + txid: info.txid, version: raw.tx.version.into(), - total_sigop_cost: 0, - weight: info.weight.into(), + total_sigop_cost: SigOps::ZERO, + weight: info.weight, lock_time: raw.tx.lock_time.into(), total_size, fee: info.fee, diff --git a/crates/brk_mempool/src/steps/preparer/tx_entry.rs b/crates/brk_mempool/src/steps/preparer/tx_entry.rs index 6fe6b897a..7a264253f 100644 --- a/crates/brk_mempool/src/steps/preparer/tx_entry.rs +++ b/crates/brk_mempool/src/steps/preparer/tx_entry.rs @@ -1,4 +1,4 @@ -use brk_types::{FeeRate, MempoolEntryInfo, Sats, Timestamp, Txid, TxidPrefix, VSize}; +use brk_types::{FeeRate, MempoolEntryInfo, Sats, Timestamp, Txid, TxidPrefix, VSize, Weight}; use smallvec::SmallVec; /// A mempool transaction entry. @@ -12,6 +12,7 @@ pub struct TxEntry { pub txid: Txid, pub fee: Sats, pub vsize: VSize, + pub weight: Weight, /// Serialized tx size in bytes (witness + non-witness), from the raw tx. pub size: u64, /// Parent txid prefixes (most txs have 0-2 parents). @@ -28,9 +29,10 @@ pub struct TxEntry { impl TxEntry { pub(super) fn new(info: &MempoolEntryInfo, size: u64, rbf: bool) -> Self { Self { - txid: info.txid.clone(), + txid: info.txid, fee: info.fee, - vsize: VSize::from(info.vsize), + vsize: info.vsize, + weight: info.weight, size, depends: info.depends.iter().map(TxidPrefix::from).collect(), first_seen: info.first_seen, diff --git a/crates/brk_mempool/src/steps/preparer/tx_removal.rs b/crates/brk_mempool/src/steps/preparer/tx_removal.rs index d82e1aae1..fae358d38 100644 --- a/crates/brk_mempool/src/steps/preparer/tx_removal.rs +++ b/crates/brk_mempool/src/steps/preparer/tx_removal.rs @@ -45,7 +45,7 @@ impl TxRemoval { fn find_removal(tx: &Transaction, spent_by: &SpentBy) -> Self { tx.input .iter() - .find_map(|i| spent_by.get(&(i.txid.clone(), i.vout)).cloned()) + .find_map(|i| spent_by.get(&(i.txid, i.vout)).cloned()) .map_or(Self::Vanished, |by| Self::Replaced { by }) } @@ -56,7 +56,7 @@ impl TxRemoval { for addition in added { if let TxAddition::Fresh { tx, .. } = addition { for txin in &tx.input { - spent_by.insert((txin.txid.clone(), txin.vout), tx.txid.clone()); + spent_by.insert((txin.txid, txin.vout), tx.txid); } } } diff --git a/crates/brk_mempool/src/steps/rebuilder/clusters.rs b/crates/brk_mempool/src/steps/rebuilder/clusters.rs new file mode 100644 index 000000000..fff709c33 --- /dev/null +++ b/crates/brk_mempool/src/steps/rebuilder/clusters.rs @@ -0,0 +1,157 @@ +//! Build the cluster forest for a snapshot directly from the live +//! `EntryPool`. One traversal indexes live entries, builds parent +//! edges, floods the connected components, and constructs each +//! `Cluster` (which mirrors child edges and runs SFL +//! internally). +//! +//! Returns the cluster forest plus a `tx_index → ClusterRef` reverse +//! map for O(1) lookup back from `EntryPool` slot to cluster position. + +use brk_types::TxidPrefix; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use smallvec::SmallVec; + +use crate::TxEntry; +use crate::cluster::{Cluster, ClusterId, ClusterNode, ClusterRef, LocalIdx}; +use crate::stores::TxIndex; + +/// Per-live-entry indexing position in the parents/children adjacency +/// arrays below. Local to this module; not exposed. +type Pos = u32; + +pub fn build_clusters( + entries: &[Option], +) -> (Vec>, Vec>) { + let live = index_live(entries); + if live.is_empty() { + return (Vec::new(), vec![None; entries.len()]); + } + + let parents = build_parent_edges(&live); + let children = mirror_children(&parents); + + let mut seen = vec![false; live.len()]; + let mut clusters: Vec> = Vec::new(); + let mut cluster_of: Vec> = vec![None; entries.len()]; + let mut stack: Vec = Vec::new(); + // Reused across components: `local_of[pos]` is `Some(local)` while + // we're building the current cluster, `None` otherwise. Cleared by + // walking each cluster's members at the end of its iteration. + let mut local_of: Vec> = vec![None; live.len()]; + + for start in 0..live.len() { + if seen[start] { + continue; + } + let members = flood_component(start as Pos, &parents, &children, &mut seen, &mut stack); + for (i, &pos) in members.iter().enumerate() { + local_of[pos as usize] = Some(LocalIdx::from(i)); + } + + let cluster_id = ClusterId::from(clusters.len()); + let cluster = build_cluster(&live, &parents, &members, &local_of); + for (local_pos, node) in cluster.nodes.iter().enumerate() { + cluster_of[node.id.as_usize()] = Some(ClusterRef { + cluster_id, + local: LocalIdx::from(local_pos), + }); + } + clusters.push(cluster); + + for &pos in &members { + local_of[pos as usize] = None; + } + } + + (clusters, cluster_of) +} + +fn flood_component( + start: Pos, + parents: &[SmallVec<[Pos; 4]>], + children: &[SmallVec<[Pos; 8]>], + seen: &mut [bool], + stack: &mut Vec, +) -> Vec { + let mut members: Vec = Vec::new(); + stack.clear(); + stack.push(start); + seen[start as usize] = true; + + while let Some(pos) = stack.pop() { + members.push(pos); + for &n in parents[pos as usize].iter().chain(children[pos as usize].iter()) { + if !seen[n as usize] { + seen[n as usize] = true; + stack.push(n); + } + } + } + members +} + +/// `local_of` is set only for `Pos`es in this cluster, so each parent's +/// `LocalIdx` is one direct lookup (cross-cluster parents return `None` +/// and get filtered). +fn build_cluster( + live: &[(TxIndex, &TxEntry)], + parents: &[SmallVec<[Pos; 4]>], + members: &[Pos], + local_of: &[Option], +) -> Cluster { + let cluster_nodes: Vec> = members + .iter() + .map(|&pos| { + let (tx_index, entry) = live[pos as usize]; + ClusterNode { + id: tx_index, + txid: entry.txid, + fee: entry.fee, + vsize: entry.vsize, + weight: entry.weight, + parents: parents[pos as usize] + .iter() + .filter_map(|&p| local_of[p as usize]) + .collect(), + } + }) + .collect(); + + Cluster::new(cluster_nodes) +} + +fn index_live(entries: &[Option]) -> Vec<(TxIndex, &TxEntry)> { + entries + .iter() + .enumerate() + .filter_map(|(i, opt)| opt.as_ref().map(|e| (TxIndex::from(i), e))) + .collect() +} + +fn build_parent_edges(live: &[(TxIndex, &TxEntry)]) -> Vec> { + let mut prefix_to_pos: FxHashMap = + FxHashMap::with_capacity_and_hasher(live.len(), FxBuildHasher); + for (i, (_, entry)) in live.iter().enumerate() { + prefix_to_pos.insert(entry.txid_prefix(), i as Pos); + } + live.iter() + .map(|(_, entry)| { + entry + .depends + .iter() + .filter_map(|p| prefix_to_pos.get(p).copied()) + .collect() + }) + .collect() +} + +fn mirror_children(parents: &[SmallVec<[Pos; 4]>]) -> Vec> { + let mut children: Vec> = + (0..parents.len()).map(|_| SmallVec::new()).collect(); + for (child_pos, ps) in parents.iter().enumerate() { + for &p in ps { + children[p as usize].push(child_pos as Pos); + } + } + children +} diff --git a/crates/brk_mempool/src/steps/rebuilder/graph/mod.rs b/crates/brk_mempool/src/steps/rebuilder/graph/mod.rs deleted file mode 100644 index 69825d4f5..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/graph/mod.rs +++ /dev/null @@ -1,73 +0,0 @@ -mod pool_index; -mod tx_node; - -pub use pool_index::PoolIndex; -pub use tx_node::TxNode; - -use brk_types::TxidPrefix; -use rustc_hash::{FxBuildHasher, FxHashMap}; - -use crate::{TxEntry, stores::TxIndex}; - -pub struct Graph; - -impl Graph { - /// Build the dependency graph for the live mempool. - /// - /// Nodes are indexed by `PoolIndex`; the caller indexes with - /// `idx.as_usize()`. - pub fn build(entries: &[Option]) -> Vec { - let (live, prefix_to_pool) = Self::index_live(entries); - if live.is_empty() { - return Vec::new(); - } - let mut nodes = Self::build_parent_edges(&live, &prefix_to_pool); - Self::mirror_child_edges(&mut nodes); - nodes - } - - /// First pass: collect live entries and map their prefixes to pool - /// indexes. Done before parent edges so a parent appearing later in - /// slot order than its child is still resolvable. - fn index_live( - entries: &[Option], - ) -> (Vec<(TxIndex, &TxEntry)>, FxHashMap) { - let mut live: Vec<(TxIndex, &TxEntry)> = Vec::with_capacity(entries.len()); - let mut prefix_to_pool: FxHashMap = - FxHashMap::with_capacity_and_hasher(entries.len(), FxBuildHasher); - for (i, opt) in entries.iter().enumerate() { - if let Some(e) = opt.as_ref() { - prefix_to_pool.insert(e.txid_prefix(), PoolIndex::from(live.len())); - live.push((TxIndex::from(i), e)); - } - } - (live, prefix_to_pool) - } - - fn build_parent_edges( - live: &[(TxIndex, &TxEntry)], - prefix_to_pool: &FxHashMap, - ) -> Vec { - live.iter() - .map(|(tx_index, entry)| { - let mut node = TxNode::new(*tx_index, entry.fee, entry.vsize); - for parent_prefix in &entry.depends { - if let Some(&parent_pool_idx) = prefix_to_pool.get(parent_prefix) { - node.parents.push(parent_pool_idx); - } - } - node - }) - .collect() - } - - fn mirror_child_edges(nodes: &mut [TxNode]) { - for i in 0..nodes.len() { - let plen = nodes[i].parents.len(); - for j in 0..plen { - let parent_idx = nodes[i].parents[j].as_usize(); - nodes[parent_idx].children.push(PoolIndex::from(i)); - } - } - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/graph/pool_index.rs b/crates/brk_mempool/src/steps/rebuilder/graph/pool_index.rs deleted file mode 100644 index 5c56f4fdf..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/graph/pool_index.rs +++ /dev/null @@ -1,17 +0,0 @@ -/// Index into the temporary pool used during block building. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct PoolIndex(u32); - -impl PoolIndex { - #[inline] - pub fn as_usize(self) -> usize { - self.0 as usize - } -} - -impl From for PoolIndex { - #[inline] - fn from(value: usize) -> Self { - Self(value as u32) - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/graph/tx_node.rs b/crates/brk_mempool/src/steps/rebuilder/graph/tx_node.rs deleted file mode 100644 index 4778365ed..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/graph/tx_node.rs +++ /dev/null @@ -1,26 +0,0 @@ -use brk_types::{Sats, VSize}; -use smallvec::SmallVec; - -use super::PoolIndex; -use crate::stores::TxIndex; - -/// Built fresh per block-building cycle, then discarded. -pub struct TxNode { - pub tx_index: TxIndex, - pub fee: Sats, - pub vsize: VSize, - pub parents: SmallVec<[PoolIndex; 4]>, - pub children: SmallVec<[PoolIndex; 8]>, -} - -impl TxNode { - pub fn new(tx_index: TxIndex, fee: Sats, vsize: VSize) -> Self { - Self { - tx_index, - fee, - vsize, - parents: SmallVec::new(), - children: SmallVec::new(), - } - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/linearize/chunk.rs b/crates/brk_mempool/src/steps/rebuilder/linearize/chunk.rs deleted file mode 100644 index d97b634b9..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/linearize/chunk.rs +++ /dev/null @@ -1,26 +0,0 @@ -use brk_types::{FeeRate, Sats, VSize}; -use smallvec::SmallVec; - -use super::LocalIdx; - -pub(crate) struct Chunk { - pub(crate) nodes: SmallVec<[LocalIdx; 4]>, - pub(crate) fee: Sats, - pub(crate) vsize: VSize, -} - -impl Chunk { - pub(super) fn from_mask(mask: u128, fee: Sats, vsize: VSize) -> Self { - let mut nodes: SmallVec<[LocalIdx; 4]> = SmallVec::new(); - let mut bits = mask; - while bits != 0 { - nodes.push(bits.trailing_zeros() as LocalIdx); - bits &= bits - 1; - } - Self { nodes, fee, vsize } - } - - pub(crate) fn fee_rate(&self) -> FeeRate { - FeeRate::from((self.fee, self.vsize)) - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/linearize/cluster.rs b/crates/brk_mempool/src/steps/rebuilder/linearize/cluster.rs deleted file mode 100644 index af41a5238..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/linearize/cluster.rs +++ /dev/null @@ -1,43 +0,0 @@ -use super::{ClusterNode, LocalIdx}; - -/// A connected component of the mempool graph, re-indexed locally. -pub(crate) struct Cluster { - pub(crate) nodes: Vec, - /// Used during chunk emission to print txs parents-first. - pub(crate) topo_rank: Vec, -} - -impl Cluster { - pub(crate) fn new(nodes: Vec) -> Self { - let topo_rank = Self::kahn_topo_rank(&nodes); - Self { nodes, topo_rank } - } - - fn kahn_topo_rank(nodes: &[ClusterNode]) -> Vec { - let n = nodes.len(); - let mut indegree: Vec = nodes.iter().map(|n| n.parents.len() as u32).collect(); - let mut ready: Vec = (0..n as LocalIdx) - .filter(|&i| indegree[i as usize] == 0) - .collect(); - - let mut rank: Vec = vec![0; n]; - let mut position: u32 = 0; - let mut head = 0; - - while head < ready.len() { - let v = ready[head]; - head += 1; - rank[v as usize] = position; - position += 1; - for &c in &nodes[v as usize].children { - indegree[c as usize] -= 1; - if indegree[c as usize] == 0 { - ready.push(c); - } - } - } - - debug_assert_eq!(position as usize, n, "cluster contained a cycle"); - rank - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/linearize/cluster_node.rs b/crates/brk_mempool/src/steps/rebuilder/linearize/cluster_node.rs deleted file mode 100644 index abdd00ba6..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/linearize/cluster_node.rs +++ /dev/null @@ -1,14 +0,0 @@ -use brk_types::{Sats, VSize}; -use smallvec::SmallVec; - -use crate::stores::TxIndex; - -use super::LocalIdx; - -pub(crate) struct ClusterNode { - pub(crate) tx_index: TxIndex, - pub(crate) fee: Sats, - pub(crate) vsize: VSize, - pub(crate) parents: SmallVec<[LocalIdx; 2]>, - pub(crate) children: SmallVec<[LocalIdx; 2]>, -} diff --git a/crates/brk_mempool/src/steps/rebuilder/linearize/mod.rs b/crates/brk_mempool/src/steps/rebuilder/linearize/mod.rs deleted file mode 100644 index 22f3f96d6..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/linearize/mod.rs +++ /dev/null @@ -1,136 +0,0 @@ -//! Cluster-mempool linearization. -//! -//! Partitions the mempool dependency graph into connected components -//! ("clusters"), linearizes each into chunks ordered by descending -//! feerate, and emits the resulting chunks as `Package`s. The inner -//! algorithm (see `sfl.rs`) is a topologically-closed-subset search, -//! optimal for clusters up to 18 txs and near-optimal beyond that. - -pub(crate) mod chunk; -pub(crate) mod cluster; -pub(crate) mod cluster_node; -pub(crate) mod package; -pub(crate) mod sfl; - -pub use package::Package; - -use rustc_hash::{FxBuildHasher, FxHashMap}; -use smallvec::SmallVec; - -use cluster::Cluster; -use cluster_node::ClusterNode; -use sfl::Sfl; - -use super::graph::{PoolIndex, TxNode}; - -pub(crate) type LocalIdx = u32; - -pub struct Linearizer; - -impl Linearizer { - /// Order across clusters is unspecified: the partitioner re-sorts by - /// fee rate downstream. - pub fn linearize(nodes: &[TxNode]) -> Vec { - let clusters = Self::find_components(nodes); - Self::pack_clusters(clusters) - } - - fn pack_clusters(clusters: Vec) -> Vec { - clusters - .iter() - .enumerate() - .flat_map(|(cluster_id, cluster)| Self::pack_cluster(cluster, cluster_id as u32)) - .collect() - } - - fn pack_cluster(cluster: &Cluster, cluster_id: u32) -> Vec { - if cluster.nodes.len() == 1 { - return vec![Package::singleton(cluster, cluster_id)]; - } - Sfl::linearize(cluster) - .into_iter() - .enumerate() - .map(|(chunk_order, chunk)| { - Package::from_chunk(cluster, chunk, cluster_id, chunk_order as u32) - }) - .collect() - } - - fn find_components(nodes: &[TxNode]) -> Vec { - let n = nodes.len(); - let mut seen: Vec = vec![false; n]; - let mut clusters: Vec = Vec::new(); - let mut stack: Vec = Vec::new(); - - for start in 0..n { - if seen[start] { - continue; - } - let mut members = Self::flood_component(start, nodes, &mut seen, &mut stack); - // Deterministic LocalIdx assignment keeps SFL output stable - // across sync ticks. - members.sort_unstable(); - clusters.push(Self::build_cluster(nodes, &members)); - } - - clusters - } - - fn flood_component( - start: usize, - nodes: &[TxNode], - seen: &mut [bool], - stack: &mut Vec, - ) -> Vec { - let mut members: Vec = Vec::new(); - stack.clear(); - stack.push(PoolIndex::from(start)); - seen[start] = true; - - while let Some(idx) = stack.pop() { - members.push(idx); - let node = &nodes[idx.as_usize()]; - for &n in node.parents.iter().chain(node.children.iter()) { - if !seen[n.as_usize()] { - seen[n.as_usize()] = true; - stack.push(n); - } - } - } - members - } - - fn build_cluster(nodes: &[TxNode], members: &[PoolIndex]) -> Cluster { - let mut pool_to_local: FxHashMap = - FxHashMap::with_capacity_and_hasher(members.len(), FxBuildHasher); - for (i, &p) in members.iter().enumerate() { - pool_to_local.insert(p, i as LocalIdx); - } - - let cluster_nodes: Vec = members - .iter() - .map(|&pool_idx| { - let node = &nodes[pool_idx.as_usize()]; - ClusterNode { - tx_index: node.tx_index, - fee: node.fee, - vsize: node.vsize, - parents: Self::local_neighbors(&node.parents, &pool_to_local), - children: Self::local_neighbors(&node.children, &pool_to_local), - } - }) - .collect(); - - Cluster::new(cluster_nodes) - } - - fn local_neighbors( - pool_neighbors: &[PoolIndex], - pool_to_local: &FxHashMap, - ) -> SmallVec<[LocalIdx; 2]> { - pool_neighbors - .iter() - .filter_map(|p| pool_to_local.get(p).copied()) - .collect() - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/linearize/package.rs b/crates/brk_mempool/src/steps/rebuilder/linearize/package.rs deleted file mode 100644 index 9b1a0f440..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/linearize/package.rs +++ /dev/null @@ -1,67 +0,0 @@ -use brk_types::{FeeRate, VSize}; -use smallvec::SmallVec; - -use super::{LocalIdx, chunk::Chunk, cluster::Cluster}; -use crate::stores::TxIndex; - -/// A CPFP package: transactions mined together because a child pays -/// for its parent. Atomic (all-or-nothing) at mining time. -/// -/// `fee_rate` is the package's combined rate (sum of fees / sum of -/// vsizes). SFL emits packages in descending-`fee_rate` order within -/// a cluster. -/// -/// `cluster_id` + `chunk_order` let the partitioner enforce -/// intra-cluster ordering when its look-ahead would otherwise pull a -/// child chunk into an earlier block than its parent chunk. -pub struct Package { - /// Transactions in topological order (parents before children). - pub txs: Vec, - pub vsize: VSize, - pub fee_rate: FeeRate, - pub cluster_id: u32, - pub chunk_order: u32, -} - -impl Package { - pub(super) fn singleton(cluster: &Cluster, cluster_id: u32) -> Self { - let node = &cluster.nodes[0]; - let mut package = Self::empty(FeeRate::from((node.fee, node.vsize)), cluster_id, 0); - package.add_tx(node.tx_index, node.vsize); - package - } - - /// Txs inside the package are ordered parents-first by `topo_rank`. - pub(super) fn from_chunk( - cluster: &Cluster, - chunk: Chunk, - cluster_id: u32, - chunk_order: u32, - ) -> Self { - let mut package = Self::empty(chunk.fee_rate(), cluster_id, chunk_order); - - let mut ordered: SmallVec<[LocalIdx; 8]> = chunk.nodes.into_iter().collect(); - ordered.sort_by_key(|&local| cluster.topo_rank[local as usize]); - - for local in ordered { - let node = &cluster.nodes[local as usize]; - package.add_tx(node.tx_index, node.vsize); - } - package - } - - fn empty(fee_rate: FeeRate, cluster_id: u32, chunk_order: u32) -> Self { - Self { - txs: Vec::new(), - vsize: VSize::default(), - fee_rate, - cluster_id, - chunk_order, - } - } - - fn add_tx(&mut self, tx_index: TxIndex, vsize: VSize) { - self.txs.push(tx_index); - self.vsize += vsize; - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/linearize/sfl.rs b/crates/brk_mempool/src/steps/rebuilder/linearize/sfl.rs deleted file mode 100644 index 98945f073..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/linearize/sfl.rs +++ /dev/null @@ -1,276 +0,0 @@ -//! Cluster linearizer. -//! -//! Two-branch dispatch by cluster size: -//! - **n ≤ 18**: recursive enumeration of topologically-closed subsets. -//! Provably optimal. Visits only valid subsets (skips non-closed ones -//! without filtering) and maintains running fee/vsize incrementally. -//! - **n > 18**: "greedy-union" ancestor-set search. Seeds with each -//! node's ancestor closure, then greedily adds any other ancestor -//! closure whose inclusion raises the combined feerate. Strict -//! superset of ancestor-set-sort's candidate space, catching the -//! sibling-union shapes that pure ASS misses. -//! -//! A final stack-based `canonicalize` pass merges adjacent chunks when -//! the later one's feerate beats the earlier's, restoring the -//! non-increasing-rate invariant. -//! -//! Everything runs on `u128` bitmasks (covers Bitcoin Core 31's cluster -//! cap of 100). Rate comparisons go through `FeeRate`. - -use brk_types::{FeeRate, Sats, VSize}; - -use super::LocalIdx; -use super::chunk::Chunk; -use super::cluster::Cluster; - -const BRUTE_FORCE_LIMIT: usize = 18; -const BITMASK_LIMIT: usize = 128; - -pub struct Sfl; - -impl Sfl { - pub fn linearize(cluster: &Cluster) -> Vec { - assert!( - cluster.nodes.len() <= BITMASK_LIMIT, - "cluster size {} exceeds u128 capacity", - cluster.nodes.len() - ); - let tables = Tables::build(cluster); - let chunks = Self::extract_chunks(&tables); - Self::canonicalize(chunks) - } - - /// Peel the cluster one chunk at a time. Each iteration picks the - /// highest-feerate topologically-closed subset of `remaining` and - /// removes it. Loop terminates because every iteration removes at - /// least one node. - fn extract_chunks(t: &Tables) -> Vec { - let mut chunks: Vec = Vec::new(); - let mut remaining: u128 = t.all; - while remaining != 0 { - let (mask, fee, vsize) = if t.n <= BRUTE_FORCE_LIMIT { - Self::best_subset(t, remaining) - } else { - Self::best_ancestor_union(t, remaining) - }; - chunks.push(Chunk::from_mask(mask, fee, vsize)); - remaining &= !mask; - } - chunks - } - - /// Recursive enumeration of topologically-closed subsets of - /// `remaining`. Returns the (mask, fee, vsize) with the highest rate. - fn best_subset(t: &Tables, remaining: u128) -> (u128, Sats, VSize) { - let ctx = Ctx { tables: t, remaining }; - let mut best = (0u128, Sats::ZERO, VSize::default()); - Self::recurse(&ctx, 0, 0, Sats::ZERO, VSize::default(), &mut best); - best - } - - fn recurse( - ctx: &Ctx, - idx: usize, - included: u128, - f: Sats, - v: VSize, - best: &mut (u128, Sats, VSize), - ) { - if idx == ctx.tables.topo_order.len() { - if included != 0 && FeeRate::from((f, v)) > FeeRate::from((best.1, best.2)) { - *best = (included, f, v); - } - return; - } - let node = ctx.tables.topo_order[idx]; - let bit = 1u128 << node; - - // Not in remaining, or a parent (within remaining) is excluded: - // this node is forced-excluded, no branching. - if (bit & ctx.remaining) == 0 - || (ctx.tables.parents_mask[node as usize] & ctx.remaining & !included) != 0 - { - Self::recurse(ctx, idx + 1, included, f, v, best); - return; - } - - Self::recurse(ctx, idx + 1, included, f, v, best); - Self::recurse( - ctx, - idx + 1, - included | bit, - f + ctx.tables.fee_of[node as usize], - v + ctx.tables.vsize_of[node as usize], - best, - ); - } - - /// For each node v in `remaining`, seed with anc(v) ∩ remaining, then - /// greedily extend by adding any anc(u) whose inclusion raises the - /// feerate. Pick the best result across all seeds. - /// - /// Every candidate evaluated is a union of ancestor closures, so it - /// is topologically closed by construction. Strictly explores more - /// candidates than pure ancestor-set-sort, at O(n³) per chunk step. - fn best_ancestor_union(t: &Tables, remaining: u128) -> (u128, Sats, VSize) { - let mut best = (0u128, Sats::ZERO, VSize::default()); - let mut best_rate = FeeRate::default(); - let mut seeds = remaining; - while seeds != 0 { - let i = seeds.trailing_zeros() as usize; - seeds &= seeds - 1; - - let mut s = t.ancestor_incl[i] & remaining; - let (mut f, mut v) = Self::totals(s, &t.fee_of, &t.vsize_of); - let mut rate = FeeRate::from((f, v)); - - // Greedy extension to fixed point: pick the ancestor-closure - // addition that yields the highest resulting feerate, if any. - loop { - let mut picked: Option<(u128, Sats, VSize, FeeRate)> = None; - let mut cands = remaining & !s; - while cands != 0 { - let j = cands.trailing_zeros() as usize; - cands &= cands - 1; - let add = t.ancestor_incl[j] & remaining & !s; - if add == 0 { - continue; - } - let (df, dv) = Self::totals(add, &t.fee_of, &t.vsize_of); - let nf = f + df; - let nv = v + dv; - let nrate = FeeRate::from((nf, nv)); - if nrate <= rate { - continue; - } - if picked.is_none_or(|(_, _, _, prate)| nrate > prate) { - picked = Some((add, nf, nv, nrate)); - } - } - match picked { - Some((add, nf, nv, nrate)) => { - s |= add; - f = nf; - v = nv; - rate = nrate; - } - None => break, - } - } - - if rate > best_rate { - best = (s, f, v); - best_rate = rate; - } - } - best - } - - /// Single-pass stack merge: for each incoming chunk, merge it into - /// the stack top while the merge would raise the top's feerate, then - /// push. O(n) total regardless of how many merges cascade. - fn canonicalize(chunks: Vec) -> Vec { - let mut out: Vec = Vec::with_capacity(chunks.len()); - for mut cur in chunks { - while let Some(top) = out.last() { - if cur.fee_rate() <= top.fee_rate() { - break; - } - let mut prev = out.pop().unwrap(); - prev.fee += cur.fee; - prev.vsize += cur.vsize; - prev.nodes.extend(cur.nodes); - cur = prev; - } - out.push(cur); - } - out - } - - #[inline] - fn totals(mask: u128, fee_of: &[Sats], vsize_of: &[VSize]) -> (Sats, VSize) { - let mut f = Sats::ZERO; - let mut v = VSize::default(); - let mut bits = mask; - while bits != 0 { - let i = bits.trailing_zeros() as usize; - f += fee_of[i]; - v += vsize_of[i]; - bits &= bits - 1; - } - (f, v) - } -} - -/// Per-cluster precomputed bitmasks and lookups, shared across every -/// chunk-extraction iteration. Built once in `Sfl::linearize`. -struct Tables { - n: usize, - /// Bitmask with one bit set per node (i.e. `(1 << n) - 1`). - all: u128, - /// `parents_mask[i]` = bits set for direct parents of node `i`. - parents_mask: Vec, - /// `ancestor_incl[i]` = bits set for `i` and all ancestors. - ancestor_incl: Vec, - /// LocalIdx order respecting `cluster.topo_rank`. - topo_order: Vec, - fee_of: Vec, - vsize_of: Vec, -} - -impl Tables { - fn build(cluster: &Cluster) -> Self { - let n = cluster.nodes.len(); - let topo_order = Self::build_topo_order(cluster); - let (parents_mask, ancestor_incl) = Self::build_ancestor_masks(cluster, &topo_order); - let fee_of: Vec = cluster.nodes.iter().map(|node| node.fee).collect(); - let vsize_of: Vec = cluster.nodes.iter().map(|node| node.vsize).collect(); - let all: u128 = if n == 128 { !0 } else { (1u128 << n) - 1 }; - Self { - n, - all, - parents_mask, - ancestor_incl, - topo_order, - fee_of, - vsize_of, - } - } - - fn build_topo_order(cluster: &Cluster) -> Vec { - let mut topo_order: Vec = (0..cluster.nodes.len() as LocalIdx).collect(); - topo_order.sort_by_key(|&i| cluster.topo_rank[i as usize]); - topo_order - } - - /// For each node `v`, compute its direct-parent bitmask and the - /// closure of all its ancestors (including itself). Visits nodes - /// in topological order so a parent's `ancestor_incl` is ready - /// before any child reads it. - fn build_ancestor_masks( - cluster: &Cluster, - topo_order: &[LocalIdx], - ) -> (Vec, Vec) { - let n = cluster.nodes.len(); - let mut parents_mask: Vec = vec![0; n]; - let mut ancestor_incl: Vec = vec![0; n]; - for &v in topo_order { - let mut par = 0u128; - let mut acc = 1u128 << v; - for &p in &cluster.nodes[v as usize].parents { - par |= 1u128 << p; - acc |= ancestor_incl[p as usize]; - } - parents_mask[v as usize] = par; - ancestor_incl[v as usize] = acc; - } - (parents_mask, ancestor_incl) - } -} - -/// Per-iteration immutable bundle for the brute-force recursion. -/// Keeping it small lets `recurse` stay at four moving args. -struct Ctx<'a> { - tables: &'a Tables, - remaining: u128, -} diff --git a/crates/brk_mempool/src/steps/rebuilder/mod.rs b/crates/brk_mempool/src/steps/rebuilder/mod.rs index a657356bb..00a483d0f 100644 --- a/crates/brk_mempool/src/steps/rebuilder/mod.rs +++ b/crates/brk_mempool/src/steps/rebuilder/mod.rs @@ -1,7 +1,7 @@ use std::{ sync::{ Arc, - atomic::{AtomicBool, Ordering}, + atomic::{AtomicBool, AtomicU64, Ordering}, }, time::{Duration, Instant}, }; @@ -11,22 +11,20 @@ use brk_types::FeeRate; use parking_lot::{Mutex, RwLock}; use tracing::warn; -use graph::Graph; -use linearize::Linearizer; +use clusters::build_clusters; use partition::Partitioner; #[cfg(debug_assertions)] use verify::Verifier; use crate::stores::MempoolState; -pub(crate) mod graph; -pub(crate) mod linearize; +pub(crate) mod clusters; mod partition; mod snapshot; #[cfg(debug_assertions)] mod verify; pub use brk_types::RecommendedFees; -pub use snapshot::{BlkIndex, BlockStats, Snapshot}; +pub use snapshot::{BlockStats, Snapshot}; const MIN_REBUILD_INTERVAL: Duration = Duration::from_secs(1); const NUM_BLOCKS: usize = 8; @@ -36,6 +34,9 @@ pub struct Rebuilder { snapshot: RwLock>, dirty: AtomicBool, last_rebuild: Mutex>, + rebuild_count: AtomicU64, + skip_throttled: AtomicU64, + skip_clean: AtomicU64, } impl Rebuilder { @@ -49,6 +50,18 @@ impl Rebuilder { return; } self.publish(Self::build_snapshot(client, state)); + self.rebuild_count.fetch_add(1, Ordering::Relaxed); + } + + pub fn rebuild_count(&self) -> u64 { + self.rebuild_count.load(Ordering::Relaxed) + } + + pub fn skip_counts(&self) -> (u64, u64) { + ( + self.skip_clean.load(Ordering::Relaxed), + self.skip_throttled.load(Ordering::Relaxed), + ) } fn build_snapshot(client: &Client, state: &MempoolState) -> Snapshot { @@ -56,14 +69,13 @@ impl Rebuilder { let entries = state.entries.read(); let entries_slice = entries.entries(); - let nodes = Graph::build(entries_slice); - let packages = Linearizer::linearize(&nodes); - let blocks = Partitioner::partition(packages, NUM_BLOCKS); + let (clusters, cluster_of) = build_clusters(entries_slice); + let blocks = Partitioner::partition(&clusters, NUM_BLOCKS); #[cfg(debug_assertions)] - Verifier::check(client, &blocks, entries_slice); + Verifier::check(client, &blocks, &clusters, &cluster_of, entries_slice); - Snapshot::build(blocks, entries_slice, min_fee) + Snapshot::build(clusters, cluster_of, blocks, entries_slice, min_fee) } pub fn snapshot(&self) -> Arc { @@ -82,10 +94,12 @@ impl Rebuilder { /// retry. fn try_claim_rebuild(&self) -> bool { if !self.dirty.load(Ordering::Acquire) { + self.skip_clean.fetch_add(1, Ordering::Relaxed); return false; } let mut last = self.last_rebuild.lock(); if last.is_some_and(|t| t.elapsed() < MIN_REBUILD_INTERVAL) { + self.skip_throttled.fetch_add(1, Ordering::Relaxed); return false; } *last = Some(Instant::now()); diff --git a/crates/brk_mempool/src/steps/rebuilder/partition.rs b/crates/brk_mempool/src/steps/rebuilder/partition.rs index 3411ebe61..a6c930671 100644 --- a/crates/brk_mempool/src/steps/rebuilder/partition.rs +++ b/crates/brk_mempool/src/steps/rebuilder/partition.rs @@ -1,50 +1,78 @@ use std::cmp::Reverse; -use brk_types::VSize; +use brk_types::{FeeRate, VSize}; -use super::linearize::Package; +use crate::cluster::{ChunkId, Cluster, ClusterId}; +use crate::stores::TxIndex; const LOOK_AHEAD_COUNT: usize = 100; -/// Packs ranked packages into `num_blocks` blocks. The first -/// `num_blocks - 1` are filled greedily up to `VSize::MAX_BLOCK`; the last -/// is a catch-all so no low-rate tx is silently dropped (matches -/// mempool.space). +/// Packs SFL chunks (referenced by `(ClusterId, ChunkId)`) into +/// `num_blocks` blocks. The first `num_blocks - 1` are filled greedily +/// up to `VSize::MAX_BLOCK`; the last is a catch-all so no low-rate tx +/// is silently dropped (matches mempool.space). /// /// Look-ahead respects intra-cluster order: a chunk is only taken once /// every earlier-rate chunk of the same cluster has been placed, so a /// child chunk never lands in an earlier block than its parent chunk. -pub struct Partitioner { - slots: Vec>, - blocks: Vec>, - cluster_next: Vec, - current: Vec, +/// +/// Output is the flat tx-list per block, parents-first within each +/// chunk via the cluster's `topo_order`. +pub struct Partitioner<'a> { + clusters: &'a [Cluster], + /// Candidate chunks sorted by descending feerate. Slots are taken + /// (set to `None`) as they're placed. + slots: Vec>, + /// Per-cluster cursor: the next `ChunkId` that must be taken next. + cluster_next: Vec, + blocks: Vec>, + current: Vec, current_vsize: VSize, idx: usize, } -impl Partitioner { - pub fn partition(mut packages: Vec, num_blocks: usize) -> Vec> { - // Stable sort preserves SFL's per-cluster non-increasing-rate - // emission order in the global list, which is what `cluster_next` - // relies on. - packages.sort_by_key(|p| Reverse(p.fee_rate)); +#[derive(Clone, Copy)] +struct Candidate { + cluster_id: ClusterId, + chunk_id: ChunkId, + fee_rate: FeeRate, + vsize: VSize, +} - let mut p = Self::new(packages, num_blocks); +impl<'a> Partitioner<'a> { + pub fn partition(clusters: &'a [Cluster], num_blocks: usize) -> Vec> { + let mut p = Self::new(clusters, num_blocks); p.fill_normal_blocks(num_blocks.saturating_sub(1)); p.flush_overflow(num_blocks); p.blocks } - fn new(packages: Vec, num_blocks: usize) -> Self { - let num_clusters = packages + fn new(clusters: &'a [Cluster], num_blocks: usize) -> Self { + let mut candidates: Vec = clusters .iter() - .map(|p| p.cluster_id as usize + 1) - .max() - .unwrap_or(0); + .enumerate() + .flat_map(|(cid, cluster)| { + let cluster_id = ClusterId::from(cid); + cluster + .chunks + .iter() + .enumerate() + .map(move |(chid, chunk)| Candidate { + cluster_id, + chunk_id: ChunkId::from(chid), + fee_rate: chunk.fee_rate(), + vsize: chunk.vsize, + }) + }) + .collect(); + // Stable sort preserves SFL's per-cluster non-increasing-rate + // order, which is what `cluster_next` relies on. + candidates.sort_by_key(|c| Reverse(c.fee_rate)); + Self { - cluster_next: vec![0; num_clusters], - slots: packages.into_iter().map(Some).collect(), + clusters, + slots: candidates.into_iter().map(Some).collect(), + cluster_next: vec![ChunkId::ZERO; clusters.len()], blocks: Vec::with_capacity(num_blocks), current: Vec::new(), current_vsize: VSize::default(), @@ -54,7 +82,7 @@ impl Partitioner { fn fill_normal_blocks(&mut self, target_blocks: usize) { while self.idx < self.slots.len() && self.blocks.len() < target_blocks { - let Some(pkg) = &self.slots[self.idx] else { + let Some(cand) = self.slots[self.idx] else { self.idx += 1; continue; }; @@ -62,8 +90,8 @@ impl Partitioner { let remaining_space = VSize::MAX_BLOCK.saturating_sub(self.current_vsize); // Take if it fits, or if the current block is empty (avoids - // stalling on an oversized package larger than MAX_BLOCK). - if pkg.vsize <= remaining_space || self.current.is_empty() { + // stalling on an oversized chunk larger than MAX_BLOCK). + if cand.vsize <= remaining_space || self.current.is_empty() { self.take(self.idx); self.idx += 1; continue; @@ -86,11 +114,11 @@ impl Partitioner { fn try_fill_with_smaller(&mut self, start: usize, remaining_space: VSize) -> bool { let end = (start + LOOK_AHEAD_COUNT).min(self.slots.len()); for idx in (start + 1)..end { - let Some(pkg) = &self.slots[idx] else { continue }; - if pkg.vsize > remaining_space { + let Some(cand) = self.slots[idx] else { continue }; + if cand.vsize > remaining_space { continue; } - if pkg.chunk_order != self.cluster_next[pkg.cluster_id as usize] { + if cand.chunk_id != self.cluster_next[cand.cluster_id.as_usize()] { continue; } self.take(idx); @@ -100,18 +128,21 @@ impl Partitioner { } fn take(&mut self, idx: usize) { - let pkg = self.slots[idx].take().unwrap(); + let cand = self.slots[idx].take().unwrap(); debug_assert_eq!( - pkg.chunk_order, self.cluster_next[pkg.cluster_id as usize], + cand.chunk_id, + self.cluster_next[cand.cluster_id.as_usize()], "partitioner took a chunk out of cluster order" ); - self.cluster_next[pkg.cluster_id as usize] = pkg.chunk_order + 1; - self.current_vsize += pkg.vsize; - self.current.push(pkg); + self.cluster_next[cand.cluster_id.as_usize()] = ChunkId::from(cand.chunk_id.inner() + 1); + self.current_vsize += cand.vsize; + self.current.push(cand); } fn flush_block(&mut self) { - self.blocks.push(std::mem::take(&mut self.current)); + let candidates = std::mem::take(&mut self.current); + let block = Self::materialize(self.clusters, candidates); + self.blocks.push(block); self.current_vsize = VSize::default(); } @@ -119,12 +150,27 @@ impl Partitioner { if self.blocks.len() >= num_blocks { return; } - let overflow: Vec = self.slots[self.idx..] + let overflow: Vec = self.slots[self.idx..] .iter_mut() .filter_map(Option::take) .collect(); if !overflow.is_empty() { - self.blocks.push(overflow); + let block = Self::materialize(self.clusters, overflow); + self.blocks.push(block); } } + + /// Expand each chunk into its txs. `chunk.txs` is already topo-ordered + /// (parents-first) by `Cluster::new`, so we iterate it directly. + fn materialize(clusters: &[Cluster], candidates: Vec) -> Vec { + let mut out: Vec = Vec::new(); + for cand in candidates { + let cluster = &clusters[cand.cluster_id.as_usize()]; + let chunk = &cluster.chunks[cand.chunk_id.as_usize()]; + for &local in &chunk.txs { + out.push(cluster.nodes[local.as_usize()].id); + } + } + out + } } diff --git a/crates/brk_mempool/src/steps/rebuilder/snapshot/blk_index.rs b/crates/brk_mempool/src/steps/rebuilder/snapshot/blk_index.rs deleted file mode 100644 index cb264b593..000000000 --- a/crates/brk_mempool/src/steps/rebuilder/snapshot/blk_index.rs +++ /dev/null @@ -1,26 +0,0 @@ -/// Projected-block index in a mempool snapshot. `u8` because the -/// projection horizon is ~8 blocks at typical loads; `BlkIndex::MAX` -/// is reserved as the "not in any projected block" sentinel used by -/// `Snapshot::block_of` for txs below the mempool floor. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] -pub struct BlkIndex(u8); - -impl BlkIndex { - /// Sentinel for "not in any projected block". - pub const MAX: BlkIndex = BlkIndex(u8::MAX); - - pub fn is_not_in_projected(self) -> bool { - self == Self::MAX - } - - pub fn as_usize(self) -> usize { - self.0 as usize - } -} - -impl From for BlkIndex { - fn from(v: usize) -> Self { - debug_assert!(v < u8::MAX as usize, "BlkIndex overflow: {v}"); - Self(v as u8) - } -} diff --git a/crates/brk_mempool/src/steps/rebuilder/snapshot/fees.rs b/crates/brk_mempool/src/steps/rebuilder/snapshot/fees.rs index 3588147da..fe30d9617 100644 --- a/crates/brk_mempool/src/steps/rebuilder/snapshot/fees.rs +++ b/crates/brk_mempool/src/steps/rebuilder/snapshot/fees.rs @@ -13,6 +13,13 @@ const MIN_INCREMENT: FeeRate = FeeRate::new(0.001); const PRIORITY_FACTOR: FeeRate = FeeRate::new(0.5); const MIN_FASTEST_FEE: FeeRate = FeeRate::new(1.0); const MIN_HALF_HOUR_FEE: FeeRate = FeeRate::new(0.5); +/// At or below this projected-block vsize, the block carries no fee +/// signal and the tier collapses to `min_fee`. +const EMPTY_BLOCK_VSIZE: u64 = 500_000; +/// Above this projected-block vsize, no taper applies. Between +/// `EMPTY_BLOCK_VSIZE` and this threshold, the final-block fee is +/// scaled linearly by `(vsize - EMPTY_BLOCK_VSIZE) / EMPTY_BLOCK_VSIZE`. +const FULL_BLOCK_VSIZE: u64 = 950_000; pub struct Fees; @@ -70,11 +77,11 @@ impl Fees { let median = block.median_fee_rate(); let use_fee = previous_fee.map_or(median, |prev| FeeRate::mean(median, prev)); let vsize = u64::from(block.total_vsize); - if vsize <= 500_000 || median < min_fee { + if vsize <= EMPTY_BLOCK_VSIZE || median < min_fee { return min_fee; } - if vsize <= 950_000 && next_block.is_none() { - let multiplier = (vsize - 500_000) as f64 / 500_000.0; + if vsize <= FULL_BLOCK_VSIZE && next_block.is_none() { + let multiplier = (vsize - EMPTY_BLOCK_VSIZE) as f64 / EMPTY_BLOCK_VSIZE as f64; return (use_fee * multiplier).round_to(MIN_INCREMENT).max(min_fee); } use_fee.ceil_to(MIN_INCREMENT).max(min_fee) diff --git a/crates/brk_mempool/src/steps/rebuilder/snapshot/mod.rs b/crates/brk_mempool/src/steps/rebuilder/snapshot/mod.rs index 7c1fe7be4..49d47acaa 100644 --- a/crates/brk_mempool/src/steps/rebuilder/snapshot/mod.rs +++ b/crates/brk_mempool/src/steps/rebuilder/snapshot/mod.rs @@ -1,26 +1,30 @@ -mod blk_index; mod fees; mod stats; -pub use blk_index::BlkIndex; pub use stats::BlockStats; use std::hash::{DefaultHasher, Hash, Hasher}; use brk_types::{FeeRate, RecommendedFees}; -use super::linearize::Package; -use crate::{TxEntry, stores::TxIndex}; +use crate::TxEntry; +use crate::cluster::{Cluster, ClusterRef}; +use crate::stores::TxIndex; use fees::Fees; -#[derive(Debug, Clone, Default)] +#[derive(Default)] pub struct Snapshot { + /// SFL-linearized cluster forest. Snapshot is `Arc`'d, so consumers + /// share the cluster data without cloning. Each `ClusterNode.id` + /// is the live `TxIndex` (pool slot) of that node. + pub clusters: Vec>, + /// Reverse of `clusters`: indexed by `TxIndex.as_usize()`. `None` + /// means the slot is empty (between two cycles a tx confirmed/was + /// evicted) or never made it into the live pool. Read via + /// `cluster_of(idx)` from outside the snapshot. + cluster_of: Vec>, pub blocks: Vec>, - /// Reverse of `blocks`: indexed by `TxIndex.as_usize()`. Slots that - /// hold no entry, or hold an entry that didn't make any projected - /// block, store `BlkIndex::MAX`. Read via the `block_of` accessor. - block_of: Vec, pub block_stats: Vec, pub fees: RecommendedFees, /// ETag-like cache key for the first projected block. A hash of @@ -32,56 +36,30 @@ pub struct Snapshot { impl Snapshot { /// `min_fee` is bitcoind's live `mempoolminfee`, used as the floor /// for every recommended-fee tier. - pub fn build(blocks: Vec>, entries: &[Option], min_fee: FeeRate) -> Self { - let block_stats = Self::compute_block_stats(&blocks, entries); + pub fn build( + clusters: Vec>, + cluster_of: Vec>, + blocks: Vec>, + entries: &[Option], + min_fee: FeeRate, + ) -> Self { + let block_stats: Vec = blocks + .iter() + .map(|block| BlockStats::compute(block, &clusters, &cluster_of, entries)) + .collect(); let fees = Fees::compute(&block_stats, min_fee); - let blocks = Self::flatten_blocks(blocks); - let block_of = Self::build_block_of(&blocks, entries.len()); let next_block_hash = Self::hash_next_block(&blocks); Self { + clusters, + cluster_of, blocks, - block_of, block_stats, fees, next_block_hash, } } - fn compute_block_stats( - blocks: &[Vec], - entries: &[Option], - ) -> Vec { - blocks - .iter() - .map(|block| BlockStats::compute(block, entries)) - .collect() - } - - /// Drop the package grouping, keep only the linearized tx order. - /// Packages were a vehicle for chunk-level fee accounting; once - /// `compute_block_stats` is done, they're noise to API consumers. - fn flatten_blocks(blocks: Vec>) -> Vec> { - blocks - .into_iter() - .map(|block| block.into_iter().flat_map(|pkg| pkg.txs).collect()) - .collect() - } - - /// One pass over `blocks` to invert the mapping. `BlkIndex::MAX` - /// stays as the sentinel for slots that aren't in any projected - /// block (empty slots and below-floor txs alike). - fn build_block_of(blocks: &[Vec], entry_count: usize) -> Vec { - let mut block_of = vec![BlkIndex::MAX; entry_count]; - for (b, txs) in blocks.iter().enumerate() { - let blk = BlkIndex::from(b); - for &idx in txs { - block_of[idx.as_usize()] = blk; - } - } - block_of - } - fn hash_next_block(blocks: &[Vec]) -> u64 { let Some(block) = blocks.first() else { return 0; @@ -91,12 +69,25 @@ impl Snapshot { hasher.finish() } - /// Projected block that holds `idx`, or `None` if the tx is below - /// the mempool floor (or `idx` is out of range). - pub fn block_of(&self, idx: TxIndex) -> Option { - self.block_of - .get(idx.as_usize()) - .copied() - .filter(|b| !b.is_not_in_projected()) + /// Cluster + local position for a live tx, or `None` if the slot + /// is empty or `idx` is out of range. + pub fn cluster_of(&self, idx: TxIndex) -> Option { + self.cluster_of.get(idx.as_usize()).copied().flatten() + } + + pub fn cluster_of_len(&self) -> usize { + self.cluster_of.len() + } + + pub fn cluster_of_active(&self) -> usize { + self.cluster_of.iter().filter(|c| c.is_some()).count() + } + + /// SFL chunk feerate for a live tx, or `None` if it isn't in any + /// cluster. Cheap shortcut for callers that need the rate but not + /// the full `CpfpInfo`. + pub fn chunk_rate_of(&self, idx: TxIndex) -> Option { + let ClusterRef { cluster_id, local } = self.cluster_of(idx)?; + Some(self.clusters[cluster_id.as_usize()].chunk_of(local).fee_rate()) } } diff --git a/crates/brk_mempool/src/steps/rebuilder/snapshot/stats.rs b/crates/brk_mempool/src/steps/rebuilder/snapshot/stats.rs index 645ebe5c7..c6923f2bb 100644 --- a/crates/brk_mempool/src/steps/rebuilder/snapshot/stats.rs +++ b/crates/brk_mempool/src/steps/rebuilder/snapshot/stats.rs @@ -1,8 +1,8 @@ use brk_types::{FeeRate, Sats, VSize}; use crate::TxEntry; - -use super::super::linearize::Package; +use crate::cluster::{Cluster, ClusterRef}; +use crate::stores::TxIndex; /// Percentile points reported in [`BlockStats::fee_range`], in the /// same order: 0% (min), 10%, 25%, median, 75%, 90%, 100% (max). @@ -20,24 +20,31 @@ pub struct BlockStats { } impl BlockStats { - /// Each tx contributes its containing package's `fee_rate` to the + /// Each tx contributes its containing chunk's `fee_rate` to the /// percentile distribution, since that's the rate the miner /// collects per vsize. - pub fn compute(block: &[Package], entries: &[Option]) -> Self { + pub fn compute( + block: &[TxIndex], + clusters: &[Cluster], + cluster_of: &[Option], + entries: &[Option], + ) -> Self { let mut total_fee = Sats::default(); let mut total_vsize = VSize::default(); let mut total_size: u64 = 0; let mut fee_rates: Vec = Vec::new(); - for pkg in block { - for &tx_index in &pkg.txs { - if let Some(entry) = &entries[tx_index.as_usize()] { - total_fee += entry.fee; - total_vsize += entry.vsize; - total_size += entry.size; - fee_rates.push(pkg.fee_rate); - } - } + for &tx_index in block { + let Some(entry) = &entries[tx_index.as_usize()] else { + continue; + }; + let Some(cref) = cluster_of[tx_index.as_usize()] else { + continue; + }; + total_fee += entry.fee; + total_vsize += entry.vsize; + total_size += entry.size; + fee_rates.push(clusters[cref.cluster_id.as_usize()].chunk_of(cref.local).fee_rate()); } let tx_count = fee_rates.len() as u32; diff --git a/crates/brk_mempool/src/steps/rebuilder/verify.rs b/crates/brk_mempool/src/steps/rebuilder/verify.rs index daab1057d..428178e61 100644 --- a/crates/brk_mempool/src/steps/rebuilder/verify.rs +++ b/crates/brk_mempool/src/steps/rebuilder/verify.rs @@ -3,8 +3,9 @@ use brk_types::{Sats, SatsSigned, TxidPrefix, VSize}; use rustc_hash::{FxHashMap, FxHashSet}; use tracing::{debug, warn}; -use super::linearize::Package; -use crate::{TxEntry, stores::TxIndex}; +use crate::TxEntry; +use crate::cluster::{Cluster, ClusterRef}; +use crate::stores::TxIndex; type PrefixSet = FxHashSet; type FeeByPrefix = FxHashMap; @@ -12,12 +13,23 @@ type FeeByPrefix = FxHashMap; pub struct Verifier; impl Verifier { - pub fn check(client: &Client, blocks: &[Vec], entries: &[Option]) { - Self::check_structure(blocks, entries); + pub fn check( + client: &Client, + blocks: &[Vec], + clusters: &[Cluster], + cluster_of: &[Option], + entries: &[Option], + ) { + Self::check_structure(blocks, clusters, cluster_of, entries); Self::compare_to_core(client, blocks, entries); } - fn check_structure(blocks: &[Vec], entries: &[Option]) { + fn check_structure( + blocks: &[Vec], + clusters: &[Cluster], + cluster_of: &[Option], + entries: &[Option], + ) { let in_pool: PrefixSet = entries .iter() .filter_map(|e| e.as_ref().map(TxEntry::txid_prefix)) @@ -25,30 +37,35 @@ impl Verifier { let mut placed = PrefixSet::default(); for (b, block) in blocks.iter().enumerate() { - for (p, pkg) in block.iter().enumerate() { - let mut summed_vsize = VSize::default(); - for &tx_index in &pkg.txs { - let entry = Self::live_entry(entries, tx_index, b, p); - Self::assert_parents_placed_first(entry, &in_pool, &placed, b, p); - Self::place(entry, &mut placed, b, p); - summed_vsize += entry.vsize; - } - assert_eq!( - pkg.vsize, summed_vsize, - "block {b} pkg {p}: pkg.vsize {} != sum {summed_vsize}", - pkg.vsize - ); + let mut block_vsize = VSize::default(); + for &tx_index in block { + let entry = Self::live_entry(entries, tx_index, b); + Self::assert_parents_placed_first(entry, &in_pool, &placed, b); + Self::place(entry, &mut placed, b); + Self::assert_in_a_chunk(clusters, cluster_of, tx_index, b); + block_vsize += entry.vsize; } if b + 1 < blocks.len() { - Self::assert_block_fits_budget(block, b); + Self::assert_block_fits_budget(block_vsize, block.len(), b); } } } - fn live_entry(entries: &[Option], tx_index: TxIndex, b: usize, p: usize) -> &TxEntry { + fn assert_in_a_chunk( + clusters: &[Cluster], + cluster_of: &[Option], + tx_index: TxIndex, + b: usize, + ) { + let cref = cluster_of[tx_index.as_usize()] + .unwrap_or_else(|| panic!("block {b}: tx_index {tx_index:?} has no cluster")); + let _ = clusters[cref.cluster_id.as_usize()].chunk_of(cref.local); + } + + fn live_entry(entries: &[Option], tx_index: TxIndex, b: usize) -> &TxEntry { entries[tx_index.as_usize()] .as_ref() - .unwrap_or_else(|| panic!("block {b} pkg {p}: dead tx_index {tx_index:?}")) + .unwrap_or_else(|| panic!("block {b}: dead tx_index {tx_index:?}")) } fn assert_parents_placed_first( @@ -56,28 +73,26 @@ impl Verifier { in_pool: &PrefixSet, placed: &PrefixSet, b: usize, - p: usize, ) { for parent in &entry.depends { assert!( !in_pool.contains(parent) || placed.contains(parent), - "block {b} pkg {p}: {} placed before its parent", + "block {b}: {} placed before its parent", entry.txid, ); } } - fn place(entry: &TxEntry, placed: &mut PrefixSet, b: usize, p: usize) { + fn place(entry: &TxEntry, placed: &mut PrefixSet, b: usize) { assert!( placed.insert(entry.txid_prefix()), - "block {b} pkg {p}: duplicate txid {}", + "block {b}: duplicate txid {}", entry.txid ); } - fn assert_block_fits_budget(block: &[Package], b: usize) { - let total: VSize = block.iter().map(|pkg| pkg.vsize).sum(); - let is_oversized_singleton = block.len() == 1 && total > VSize::MAX_BLOCK; + fn assert_block_fits_budget(total: VSize, tx_count: usize, b: usize) { + let is_oversized_singleton = tx_count == 1 && total > VSize::MAX_BLOCK; if is_oversized_singleton { return; } @@ -88,7 +103,7 @@ impl Verifier { ); } - fn compare_to_core(client: &Client, blocks: &[Vec], entries: &[Option]) { + fn compare_to_core(client: &Client, blocks: &[Vec], entries: &[Option]) { let Some(next_block) = blocks.first() else { return; }; @@ -104,7 +119,6 @@ impl Verifier { }; let ours: FeeByPrefix = next_block .iter() - .flat_map(|pkg| &pkg.txs) .filter_map(|&i| entries[i.as_usize()].as_ref()) .map(|e| (e.txid_prefix(), e.fee)) .collect(); diff --git a/crates/brk_mempool/src/steps/resolver.rs b/crates/brk_mempool/src/steps/resolver.rs index 6bb0bb8b0..7269c8972 100644 --- a/crates/brk_mempool/src/steps/resolver.rs +++ b/crates/brk_mempool/src/steps/resolver.rs @@ -90,7 +90,7 @@ impl Resolver { Some((Vin::from(i), out.clone())) }) .collect(); - (!fills.is_empty()).then_some((txid.clone(), fills)) + (!fills.is_empty()).then_some((*txid, fills)) }) .collect() } @@ -108,9 +108,9 @@ impl Resolver { .iter() .enumerate() .filter(|(_, txin)| txin.prevout.is_none()) - .map(|(i, txin)| (Vin::from(i), txin.txid.clone(), txin.vout)) + .map(|(i, txin)| (Vin::from(i), txin.txid, txin.vout)) .collect(); - (!holes.is_empty()).then_some((txid.clone(), holes)) + (!holes.is_empty()).then_some((*txid, holes)) }) .collect() } diff --git a/crates/brk_mempool/src/stores/addr_tracker/mod.rs b/crates/brk_mempool/src/stores/addr_tracker/mod.rs index 83a1e544c..ce1047e3a 100644 --- a/crates/brk_mempool/src/stores/addr_tracker/mod.rs +++ b/crates/brk_mempool/src/stores/addr_tracker/mod.rs @@ -83,7 +83,7 @@ impl AddrTracker { update_stats: impl FnOnce(&mut AddrMempoolStats), ) { let entry = self.0.entry(bytes).or_default(); - entry.txids.insert(txid.clone()); + entry.txids.insert(*txid); update_stats(&mut entry.stats); entry.stats.update_tx_count(entry.txids.len() as u32); } diff --git a/crates/brk_mempool/src/stores/entry_pool/mod.rs b/crates/brk_mempool/src/stores/entry_pool/mod.rs index cee8d6f51..9a9bd6a95 100644 --- a/crates/brk_mempool/src/stores/entry_pool/mod.rs +++ b/crates/brk_mempool/src/stores/entry_pool/mod.rs @@ -64,4 +64,12 @@ impl EntryPool { pub fn entries(&self) -> &[Option] { &self.entries } + + pub fn active_count(&self) -> usize { + self.prefix_to_idx.len() + } + + pub fn free_slots_count(&self) -> usize { + self.free_slots.len() + } } diff --git a/crates/brk_mempool/src/stores/state.rs b/crates/brk_mempool/src/stores/state.rs index 2312c478f..a1d30916e 100644 --- a/crates/brk_mempool/src/stores/state.rs +++ b/crates/brk_mempool/src/stores/state.rs @@ -14,7 +14,7 @@ pub struct MempoolState { pub(crate) txs: RwLock, pub(crate) addrs: RwLock, pub(crate) entries: RwLock, - pub(crate) outpoint_spends: RwLock, + pub outpoint_spends: RwLock, pub(crate) graveyard: RwLock, } diff --git a/crates/brk_mempool/src/stores/tx_graveyard/mod.rs b/crates/brk_mempool/src/stores/tx_graveyard/mod.rs index 4db8229e9..61eb732c3 100644 --- a/crates/brk_mempool/src/stores/tx_graveyard/mod.rs +++ b/crates/brk_mempool/src/stores/tx_graveyard/mod.rs @@ -27,6 +27,14 @@ impl TxGraveyard { self.tombstones.contains_key(txid) } + pub fn tombstones_len(&self) -> usize { + self.tombstones.len() + } + + pub fn order_len(&self) -> usize { + self.order.len() + } + pub fn get(&self, txid: &Txid) -> Option<&TxTombstone> { self.tombstones.get(txid) } @@ -63,7 +71,7 @@ impl TxGraveyard { pub fn bury(&mut self, txid: Txid, tx: Transaction, entry: TxEntry, removal: TxRemoval) { let now = Instant::now(); self.tombstones - .insert(txid.clone(), TxTombstone::new(tx, entry, removal, now)); + .insert(txid, TxTombstone::new(tx, entry, removal, now)); self.order.push_back((now, txid)); } diff --git a/crates/brk_mempool/src/stores/tx_store.rs b/crates/brk_mempool/src/stores/tx_store.rs index 33244740e..da39a6090 100644 --- a/crates/brk_mempool/src/stores/tx_store.rs +++ b/crates/brk_mempool/src/stores/tx_store.rs @@ -44,7 +44,7 @@ impl TxStore { fn track_unresolved(&mut self, txid: &Txid, tx: &Transaction) { if tx.input.iter().any(|i| i.prevout.is_none()) { - self.unresolved.insert(txid.clone()); + self.unresolved.insert(*txid); } } diff --git a/crates/brk_mempool/src/tests/graph_bench.rs b/crates/brk_mempool/src/tests/graph_bench.rs index c49f8e953..a2fc8666a 100644 --- a/crates/brk_mempool/src/tests/graph_bench.rs +++ b/crates/brk_mempool/src/tests/graph_bench.rs @@ -1,10 +1,10 @@ use std::time::Instant; use bitcoin::hashes::Hash; -use brk_types::{Sats, Timestamp, Txid, TxidPrefix, VSize}; +use brk_types::{Sats, Timestamp, Txid, TxidPrefix, VSize, Weight}; use smallvec::SmallVec; -use crate::{TxEntry, steps::rebuilder::graph::Graph}; +use crate::TxEntry; fn synthetic_mempool(n: usize) -> Vec> { let make_txid = |i: usize| -> Txid { @@ -18,7 +18,7 @@ fn synthetic_mempool(n: usize) -> Vec> { let mut txids: Vec = Vec::with_capacity(n); for i in 0..n { let txid = make_txid(i); - txids.push(txid.clone()); + txids.push(txid); let depends: SmallVec<[TxidPrefix; 2]> = match i % 100 { 0..=94 => SmallVec::new(), @@ -40,6 +40,7 @@ fn synthetic_mempool(n: usize) -> Vec> { txid, fee: Sats::from((i as u64).wrapping_mul(137) % 10_000 + 1), vsize: VSize::from(250u64), + weight: Weight::from(1000u64), size: 250, depends, first_seen: Timestamp::now(), @@ -51,18 +52,20 @@ fn synthetic_mempool(n: usize) -> Vec> { #[test] #[ignore = "perf benchmark; run with --ignored --nocapture"] -fn perf_build_graph() { +fn perf_build_clusters() { + use crate::steps::rebuilder::clusters::build_clusters; + let sizes = [1_000usize, 10_000, 50_000, 100_000, 300_000]; eprintln!(); - eprintln!("Graph::build perf (release, single call):"); + eprintln!("build_clusters perf (release, single call):"); eprintln!(" n build"); eprintln!(" ------------------------"); for &n in &sizes { let entries = synthetic_mempool(n); - let _ = Graph::build(&entries); + let _ = build_clusters(&entries); let t = Instant::now(); - let g = Graph::build(&entries); + let (clusters, _) = build_clusters(&entries); let dt = t.elapsed(); let ns = dt.as_nanos(); let pretty = if ns >= 1_000_000 { @@ -70,7 +73,7 @@ fn perf_build_graph() { } else { format!("{:.2} µs", ns as f64 / 1_000.0) }; - eprintln!(" {:<10} {:<10} ({} nodes)", n, pretty, g.len()); + eprintln!(" {:<10} {:<10} ({} clusters)", n, pretty, clusters.len()); } eprintln!(); } diff --git a/crates/brk_mempool/src/tests/linearize/basic.rs b/crates/brk_mempool/src/tests/linearize/basic.rs index 946fa6630..e10588151 100644 --- a/crates/brk_mempool/src/tests/linearize/basic.rs +++ b/crates/brk_mempool/src/tests/linearize/basic.rs @@ -1,13 +1,14 @@ use brk_types::{Sats, VSize}; use super::{Chunk, chunk_shapes, make_cluster, run}; +use crate::cluster::LocalIdx; #[test] fn singleton() { let cluster = make_cluster(&[(100, 10)], &[]); let chunks = run(&cluster); assert_eq!(chunks.len(), 1); - assert_eq!(chunks[0].nodes.len(), 1); + assert_eq!(chunks[0].txs.len(), 1); assert_eq!(chunks[0].fee, Sats::from(100u64)); assert_eq!(chunks[0].vsize, VSize::from(10u64)); } @@ -17,9 +18,9 @@ fn two_chain_parent_richer() { let cluster = make_cluster(&[(100, 10), (1, 1)], &[(0, 1)]); let chunks = run(&cluster); assert_eq!(chunks.len(), 2); - assert!(chunks[0].nodes.contains(&0)); + assert!(chunks[0].txs.contains(&LocalIdx::from(0u32))); assert_eq!(chunks[0].vsize, VSize::from(10u64)); - assert!(chunks[1].nodes.contains(&1)); + assert!(chunks[1].txs.contains(&LocalIdx::from(1u32))); assert_eq!(chunks[1].vsize, VSize::from(1u64)); } @@ -28,7 +29,7 @@ fn two_chain_child_pays_parent_cpfp() { let cluster = make_cluster(&[(1, 10), (100, 1)], &[(0, 1)]); let chunks = run(&cluster); assert_eq!(chunks.len(), 1); - assert_eq!(chunks[0].nodes.len(), 2); + assert_eq!(chunks[0].txs.len(), 2); assert_eq!(chunks[0].fee, Sats::from(101u64)); assert_eq!(chunks[0].vsize, VSize::from(11u64)); } @@ -38,7 +39,7 @@ fn v_shape_two_parents_one_child() { let cluster = make_cluster(&[(1, 1), (1, 1), (100, 1)], &[(0, 2), (1, 2)]); let chunks = run(&cluster); assert_eq!(chunks.len(), 1); - assert_eq!(chunks[0].nodes.len(), 3); + assert_eq!(chunks[0].txs.len(), 3); assert_eq!(chunks[0].fee, Sats::from(102u64)); assert_eq!(chunks[0].vsize, VSize::from(3u64)); } @@ -60,7 +61,7 @@ fn diamond() { ); let chunks = run(&cluster); assert_eq!(chunks.len(), 1); - assert_eq!(chunks[0].nodes.len(), 4); + assert_eq!(chunks[0].txs.len(), 4); assert_eq!(chunks[0].fee, Sats::from(103u64)); assert_eq!(chunks[0].vsize, VSize::from(4u64)); } @@ -72,9 +73,9 @@ fn chain_alternating_high_low() { &[(0, 1), (1, 2), (2, 3)], ); let chunks = run(&cluster); - assert_eq!(chunks_total_fee(&chunks), Sats::from(22u64)); - assert_eq!(chunks_total_vsize(&chunks), VSize::from(4u64)); - assert_non_increasing(&chunks); + assert_eq!(chunks_total_fee(chunks), Sats::from(22u64)); + assert_eq!(chunks_total_vsize(chunks), VSize::from(4u64)); + assert_non_increasing(chunks); } #[test] @@ -84,9 +85,9 @@ fn chain_starts_low_ends_high() { &[(0, 1), (1, 2), (2, 3)], ); let chunks = run(&cluster); - assert_eq!(chunks_total_fee(&chunks), Sats::from(202u64)); - assert_eq!(chunks_total_vsize(&chunks), VSize::from(4u64)); - assert_non_increasing(&chunks); + assert_eq!(chunks_total_fee(chunks), Sats::from(202u64)); + assert_eq!(chunks_total_vsize(chunks), VSize::from(4u64)); + assert_non_increasing(chunks); } #[test] @@ -96,13 +97,13 @@ fn two_disconnected_clusters_would_each_be_separate() { &[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)], ); let chunks = run(&cluster); - assert_eq!(chunks_total_fee(&chunks), Sats::from(151u64)); - assert_eq!(chunks_total_vsize(&chunks), VSize::from(6u64)); - assert_non_increasing(&chunks); + assert_eq!(chunks_total_fee(chunks), Sats::from(151u64)); + assert_eq!(chunks_total_vsize(chunks), VSize::from(6u64)); + assert_non_increasing(chunks); let mut seen: Vec = Vec::new(); - for ch in &chunks { - for &n in &ch.nodes { - seen.push(n as usize); + for ch in chunks { + for &local in &ch.txs { + seen.push(local.as_usize()); } } seen.sort_unstable(); @@ -127,11 +128,44 @@ fn shapes_are_stable_on_identical_input() { &[(1, 1), (100, 1), (1, 1), (100, 1)], &[(0, 1), (1, 2), (2, 3)], ); - let a = chunk_shapes(&run(&cluster)); - let b = chunk_shapes(&run(&cluster)); + let a = chunk_shapes(run(&cluster)); + let b = chunk_shapes(run(&cluster)); assert_eq!(a, b); } +#[test] +fn singleton_zero_fee() { + let cluster = make_cluster(&[(0, 10)], &[]); + let chunks = run(&cluster); + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0].txs.len(), 1); + assert_eq!(chunks[0].fee, Sats::from(0u64)); +} + +#[test] +fn zero_fee_leftover_after_paying_chunk() { + let cluster = make_cluster(&[(0, 1), (10, 1), (0, 1)], &[(0, 1), (1, 2)]); + let chunks = run(&cluster); + assert_eq!(chunks_total_vsize(chunks), VSize::from(3u64)); + assert_eq!(chunks_total_fee(chunks), Sats::from(10u64)); + let mut seen: Vec = Vec::new(); + for ch in chunks { + for &local in &ch.txs { + seen.push(local.as_usize()); + } + } + seen.sort_unstable(); + assert_eq!(seen, vec![0, 1, 2]); +} + +#[test] +fn all_zero_fee_chain() { + let cluster = make_cluster(&[(0, 1), (0, 1), (0, 1)], &[(0, 1), (1, 2)]); + let chunks = run(&cluster); + assert_eq!(chunks_total_vsize(chunks), VSize::from(3u64)); + assert_eq!(chunks_total_fee(chunks), Sats::from(0u64)); +} + fn chunks_total_fee(chunks: &[Chunk]) -> Sats { chunks.iter().map(|c| c.fee).sum() } diff --git a/crates/brk_mempool/src/tests/linearize/mod.rs b/crates/brk_mempool/src/tests/linearize/mod.rs index 34219900f..da3190e71 100644 --- a/crates/brk_mempool/src/tests/linearize/mod.rs +++ b/crates/brk_mempool/src/tests/linearize/mod.rs @@ -2,44 +2,47 @@ mod basic; mod oracle; mod stress; -use brk_types::{Sats, VSize}; +use brk_types::{Sats, Txid, VSize, Weight}; use smallvec::SmallVec; -use crate::{ - steps::rebuilder::linearize::{ - LocalIdx, chunk::Chunk, cluster::Cluster, cluster_node::ClusterNode, sfl::Sfl, - }, - stores::TxIndex, -}; +use crate::cluster::{Chunk, Cluster, ClusterNode, LocalIdx}; -pub(super) fn make_cluster(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) -> Cluster { - let mut nodes: Vec = fees_vsizes +/// Test cluster: each node carries its input position as `id`, so +/// invariant checks can map `LocalIdx` (post-permutation) back to the +/// caller's `fees_vsizes` / `edges` index space. +pub(super) type TestCluster = Cluster; + +pub(super) fn make_cluster(fees_vsizes: &[(u64, u64)], edges: &[(u32, u32)]) -> TestCluster { + let mut parents: Vec> = + (0..fees_vsizes.len()).map(|_| SmallVec::new()).collect(); + for &(p, c) in edges { + parents[c as usize].push(LocalIdx::from(p)); + } + + let nodes: Vec> = fees_vsizes .iter() + .zip(parents) .enumerate() - .map(|(i, &(fee, vsize))| ClusterNode { - tx_index: TxIndex::from(i), + .map(|(i, (&(fee, vsize), parents))| ClusterNode { + id: i as u32, + txid: Txid::COINBASE, fee: Sats::from(fee), vsize: VSize::from(vsize), - parents: SmallVec::new(), - children: SmallVec::new(), + weight: Weight::from(vsize * 4), + parents, }) .collect(); - for &(p, c) in edges { - nodes[c as usize].parents.push(p); - nodes[p as usize].children.push(c); - } - Cluster::new(nodes) } -pub(super) fn run(cluster: &Cluster) -> Vec { - Sfl::linearize(cluster) +pub(super) fn run(cluster: &TestCluster) -> &[Chunk] { + &cluster.chunks } pub(super) fn chunk_shapes(chunks: &[Chunk]) -> Vec<(usize, Sats, VSize)> { chunks .iter() - .map(|c| (c.nodes.len(), c.fee, c.vsize)) + .map(|c| (c.txs.len(), c.fee, c.vsize)) .collect() } diff --git a/crates/brk_mempool/src/tests/linearize/oracle.rs b/crates/brk_mempool/src/tests/linearize/oracle.rs index 9e8803639..b8506cfca 100644 --- a/crates/brk_mempool/src/tests/linearize/oracle.rs +++ b/crates/brk_mempool/src/tests/linearize/oracle.rs @@ -1,6 +1,6 @@ use brk_types::{FeeRate, Sats, VSize}; -use super::{Chunk, LocalIdx, Sfl, make_cluster, run}; +use super::{Chunk, make_cluster, run}; fn to_typed(fv: &[(u64, u64)]) -> Vec<(Sats, VSize)> { fv.iter() @@ -29,37 +29,37 @@ fn canonical_chunking(path: &[(Sats, VSize)]) -> Vec<(Sats, VSize)> { chunks } -fn all_topo_orders(parents: &[Vec]) -> Vec> { +fn all_topo_orders(parents: &[Vec]) -> Vec> { let n = parents.len(); let indegree: Vec = parents.iter().map(|p| p.len() as u32).collect(); - let children: Vec> = { + let children: Vec> = { let mut out = vec![Vec::new(); n]; for (c, ps) in parents.iter().enumerate() { for &p in ps { - out[p as usize].push(c as LocalIdx); + out[p as usize].push(c as u32); } } out }; let mut results = Vec::new(); - let mut current: Vec = Vec::new(); + let mut current: Vec = Vec::new(); let mut indeg = indegree.clone(); walk(&children, &mut indeg, &mut current, n, &mut results); return results; fn walk( - children: &[Vec], + children: &[Vec], indeg: &mut [u32], - current: &mut Vec, + current: &mut Vec, n: usize, - out: &mut Vec>, + out: &mut Vec>, ) { if current.len() == n { out.push(current.clone()); return; } - let ready: Vec = (0..n as LocalIdx) + let ready: Vec = (0..n as u32) .filter(|&i| indeg[i as usize] == 0) .collect(); for v in ready { @@ -78,10 +78,7 @@ fn all_topo_orders(parents: &[Vec]) -> Vec> { } } -fn oracle_best( - fees_vsizes: &[(Sats, VSize)], - edges: &[(LocalIdx, LocalIdx)], -) -> Vec<(Sats, VSize)> { +fn oracle_best(fees_vsizes: &[(Sats, VSize)], edges: &[(u32, u32)]) -> Vec<(Sats, VSize)> { let n = fees_vsizes.len(); let mut parents = vec![Vec::new(); n]; for &(p, c) in edges { @@ -166,10 +163,10 @@ fn chunk_rate(chunks: &[Chunk]) -> Vec<(Sats, VSize)> { chunks.iter().map(|c| (c.fee, c.vsize)).collect() } -fn assert_matches_oracle(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) { +fn assert_matches_oracle(fees_vsizes: &[(u64, u64)], edges: &[(u32, u32)]) { let cluster = make_cluster(fees_vsizes, edges); let chunks = run(&cluster); - let got = chunk_rate(&chunks); + let got = chunk_rate(chunks); let want = oracle_best(&to_typed(fees_vsizes), edges); let got_cum = cumulative(&got); @@ -265,7 +262,7 @@ impl DagRng { } } -type FvAndEdges = (Vec<(u64, u64)>, Vec<(LocalIdx, LocalIdx)>); +type FvAndEdges = (Vec<(u64, u64)>, Vec<(u32, u32)>); fn random_dag(n: usize, seed: u64) -> FvAndEdges { let mut rng = DagRng::new(seed); @@ -279,15 +276,15 @@ fn random_dag(n: usize, seed: u64) -> FvAndEdges { let mut edges = Vec::new(); for i in 1..n { let k = rng.range(4) as usize; - let mut picks: Vec = Vec::new(); + let mut picks: Vec = Vec::new(); for _ in 0..k { - let p = rng.range(i as u64) as LocalIdx; + let p = rng.range(i as u64) as u32; if !picks.contains(&p) { picks.push(p); } } for p in picks { - edges.push((p, i as LocalIdx)); + edges.push((p, i as u32)); } } (fees_vsizes, edges) @@ -301,7 +298,7 @@ fn assert_optimal_on_random(n: usize, seed: u64) { let (fv, edges) = random_dag(n, seed); let cluster = make_cluster(&fv, &edges); let chunks = run(&cluster); - let got = chunk_rate(&chunks); + let got = chunk_rate(chunks); let want = oracle_best(&to_typed(&fv), &edges); @@ -355,7 +352,7 @@ fn optimality_gap_of(got: &[(Sats, VSize)], want: &[(Sats, VSize)]) -> Option Option { let (fv, edges) = random_dag(n, seed); let cluster = make_cluster(&fv, &edges); - let chunks = Sfl::linearize(&cluster); + let chunks = run(&cluster); let got: Vec<(Sats, VSize)> = chunks.iter().map(|c| (c.fee, c.vsize)).collect(); let want = oracle_best(&to_typed(&fv), &edges); optimality_gap_of(&got, &want) @@ -433,7 +430,7 @@ fn perf_linearize() { let t = Instant::now(); let mut sink = 0u64; for c in &clusters { - for chunk in Sfl::linearize(c) { + for chunk in &c.chunks { sink = sink.wrapping_add(u64::from(chunk.fee)); } } diff --git a/crates/brk_mempool/src/tests/linearize/stress.rs b/crates/brk_mempool/src/tests/linearize/stress.rs index f7ef2d996..4bebd3571 100644 --- a/crates/brk_mempool/src/tests/linearize/stress.rs +++ b/crates/brk_mempool/src/tests/linearize/stress.rs @@ -1,6 +1,6 @@ use brk_types::{Sats, VSize}; -use super::{Chunk, LocalIdx, make_cluster, run}; +use super::{TestCluster, make_cluster, run}; struct Rng(u64); impl Rng { @@ -20,7 +20,7 @@ impl Rng { } } -type FvAndEdges = (Vec<(u64, u64)>, Vec<(LocalIdx, LocalIdx)>); +type FvAndEdges = (Vec<(u64, u64)>, Vec<(u32, u32)>); fn random_cluster(n: usize, seed: u64) -> FvAndEdges { let mut rng = Rng::new(seed); @@ -34,58 +34,70 @@ fn random_cluster(n: usize, seed: u64) -> FvAndEdges { let mut edges = Vec::new(); for i in 1..n { let k = rng.range(4) as usize; - let mut picks: Vec = Vec::new(); + let mut picks: Vec = Vec::new(); for _ in 0..k { - let p = rng.range(i as u64) as LocalIdx; + let p = rng.range(i as u64) as u32; if !picks.contains(&p) { picks.push(p); } } for p in picks { - edges.push((p, i as LocalIdx)); + edges.push((p, i as u32)); } } (fees_vsizes, edges) } -fn check_invariants(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)], chunks: &[Chunk]) { +/// `cluster.nodes` is in topological order, so each node's `LocalIdx` +/// may differ from the caller's input position. The cluster's `id` +/// field carries the input index, and we use it to map back when the +/// invariant being checked is expressed in input space (fees/vsizes +/// table, edges list). +fn check_invariants(fees_vsizes: &[(u64, u64)], edges: &[(u32, u32)], cluster: &TestCluster) { let n = fees_vsizes.len(); + let chunks = &cluster.chunks; + let input_of = |l: crate::cluster::LocalIdx| cluster.nodes[l.as_usize()].id as usize; let mut seen = vec![false; n]; for chunk in chunks { - for &local in &chunk.nodes { - assert!( - !seen[local as usize], - "node {} appears in multiple chunks", - local - ); - seen[local as usize] = true; + for &local in &chunk.txs { + let i = input_of(local); + assert!(!seen[i], "input node {} appears in multiple chunks", i); + seen[i] = true; } } for (i, s) in seen.iter().enumerate() { - assert!(*s, "node {} missing from all chunks", i); + assert!(*s, "input node {} missing from all chunks", i); } for chunk in chunks { - let fee: u64 = chunk.nodes.iter().map(|&l| fees_vsizes[l as usize].0).sum(); - let vsize: u64 = chunk.nodes.iter().map(|&l| fees_vsizes[l as usize].1).sum(); + let fee: u64 = chunk + .txs + .iter() + .map(|&l| fees_vsizes[input_of(l)].0) + .sum(); + let vsize: u64 = chunk + .txs + .iter() + .map(|&l| fees_vsizes[input_of(l)].1) + .sum(); assert_eq!(chunk.fee, Sats::from(fee), "chunk fee mismatch"); assert_eq!(chunk.vsize, VSize::from(vsize), "chunk vsize mismatch"); } - let chunk_of: Vec = { + let chunk_of_input: Vec = { let mut out = vec![usize::MAX; n]; for (ci, chunk) in chunks.iter().enumerate() { - for &local in &chunk.nodes { - out[local as usize] = ci; + for &local in &chunk.txs { + out[input_of(local)] = ci; } } out }; for &(p, c) in edges { - let cp = chunk_of[p as usize]; - let cc = chunk_of[c as usize]; + let cp = chunk_of_input[p as usize]; + let cc = chunk_of_input[c as usize]; assert!( cp <= cc, "parent {} in chunk {} but child {} in earlier chunk {}", @@ -114,8 +126,7 @@ fn random_small_clusters() { let n = 2 + (seed % 10) as usize; let (fv, edges) = random_cluster(n, seed.wrapping_add(1)); let cluster = make_cluster(&fv, &edges); - let chunks = run(&cluster); - check_invariants(&fv, &edges, &chunks); + check_invariants(&fv, &edges, &cluster); } } @@ -125,8 +136,7 @@ fn random_medium_clusters() { let n = 10 + (seed % 20) as usize; let (fv, edges) = random_cluster(n, seed.wrapping_add(100)); let cluster = make_cluster(&fv, &edges); - let chunks = run(&cluster); - check_invariants(&fv, &edges, &chunks); + check_invariants(&fv, &edges, &cluster); } } @@ -135,8 +145,7 @@ fn random_large_clusters() { for seed in 0..10u64 { let (fv, edges) = random_cluster(30, seed.wrapping_add(1000)); let cluster = make_cluster(&fv, &edges); - let chunks = run(&cluster); - check_invariants(&fv, &edges, &chunks); + check_invariants(&fv, &edges, &cluster); } } @@ -154,7 +163,6 @@ fn random_cluster_at_policy_limit() { for seed in 0..5u64 { let (fv, edges) = random_cluster(100, seed.wrapping_add(9000)); let cluster = make_cluster(&fv, &edges); - let chunks = run(&cluster); - check_invariants(&fv, &edges, &chunks); + check_invariants(&fv, &edges, &cluster); } } diff --git a/crates/brk_query/Cargo.toml b/crates/brk_query/Cargo.toml index e0fac3371..84e8615b3 100644 --- a/crates/brk_query/Cargo.toml +++ b/crates/brk_query/Cargo.toml @@ -27,6 +27,7 @@ parking_lot = { workspace = true } # quickmatch = { path = "../../../quickmatch" } quickmatch = "0.4.0" rustc-hash = { workspace = true } +smallvec = { workspace = true } tokio = { workspace = true, optional = true } serde_json = { workspace = true } vecdb = { workspace = true } diff --git a/crates/brk_query/src/impl/addr.rs b/crates/brk_query/src/impl/addr.rs index 63524b37c..8e43ce0db 100644 --- a/crates/brk_query/src/impl/addr.rs +++ b/crates/brk_query/src/impl/addr.rs @@ -13,9 +13,7 @@ use crate::Query; impl Query { pub fn addr(&self, addr: Addr) -> Result { - let indexer = self.indexer(); let computer = self.computer(); - let stores = &indexer.stores; let script = if let Ok(addr) = bitcoin::Address::from_str(&addr) { if !addr.is_valid_for_network(Network::Bitcoin) { @@ -34,13 +32,7 @@ impl Query { return Err(Error::InvalidAddr); }; let hash = AddrHash::from(&bytes); - - let Some(store) = stores.addr_type_to_addr_hash_to_addr_index.get(output_type) else { - return Err(Error::InvalidAddr); - }; - let Some(type_index) = store.get(&hash)?.map(|cow| cow.into_owned()) else { - return Err(Error::UnknownAddr); - }; + let type_index = self.type_index_for(output_type, &hash)?; let any_addr_index = computer .distribution @@ -158,9 +150,8 @@ impl Query { .map(|(key, _): (AddrIndexTxIndex, Unit)| key.tx_index()) .collect()) } else { - let prefix = u32::from(type_index).to_be_bytes(); Ok(store - .prefix(prefix) + .prefix(type_index) .rev() .take(limit) .map(|(key, _): (AddrIndexTxIndex, Unit)| key.tx_index()) @@ -180,10 +171,8 @@ impl Query { .get(output_type) .data()?; - let prefix = u32::from(type_index).to_be_bytes(); - let outpoints: Vec<(TxIndex, Vout)> = store - .prefix(prefix) + .prefix(type_index) .map(|(key, _): (AddrIndexOutPoint, Unit)| (key.tx_index(), key.vout())) .take(max_utxos + 1) .collect(); @@ -268,9 +257,8 @@ impl Query { .addr_type_to_addr_index_and_tx_index .get(output_type) .data()?; - let prefix = u32::from(type_index).to_be_bytes(); let last_tx_index = store - .prefix(prefix) + .prefix(type_index) .next_back() .map(|(key, _): (AddrIndexTxIndex, Unit)| key.tx_index()) .ok_or(Error::UnknownAddr)?; @@ -278,22 +266,23 @@ impl Query { } fn resolve_addr(&self, addr: &Addr) -> Result<(OutputType, TypeIndex)> { - let stores = &self.indexer().stores; - let bytes = AddrBytes::from_str(addr)?; let output_type = OutputType::from(&bytes); let hash = AddrHash::from(&bytes); + let type_index = self.type_index_for(output_type, &hash)?; + Ok((output_type, type_index)) + } - let Some(type_index) = stores + /// Lookup the per-type index of an address by `(output_type, hash)`. + /// Returns `UnknownAddr` if the hash is absent from the type's index. + fn type_index_for(&self, output_type: OutputType, hash: &AddrHash) -> Result { + self.indexer() + .stores .addr_type_to_addr_hash_to_addr_index .get(output_type) .data()? - .get(&hash)? + .get(hash)? .map(|cow| cow.into_owned()) - else { - return Err(Error::UnknownAddr); - }; - - Ok((output_type, type_index)) + .ok_or(Error::UnknownAddr) } } diff --git a/crates/brk_query/src/impl/block/info.rs b/crates/brk_query/src/impl/block/info.rs index c864eadbb..9f081dde4 100644 --- a/crates/brk_query/src/impl/block/info.rs +++ b/crates/brk_query/src/impl/block/info.rs @@ -13,6 +13,29 @@ use crate::Query; const HEADER_SIZE: usize = 80; +/// Decoded coinbase fields consumed by `blocks_v1_range`. +/// +/// Returned by `Query::parse_coinbase_from_read`. On decode failure the +/// caller hard-fails on header reads but accepts a `Coinbase::default()` +/// here (manifests as missing `extras` rather than a 5xx). +#[derive(Default)] +struct Coinbase { + /// Hex-encoded scriptsig bytes. + raw_hex: String, + /// Primary payout address (first non-duplicate output address). + primary_address: Option, + /// Deduped payout address list (consecutive duplicates collapsed). + addresses: Vec, + /// Payout-output `asm` (first non-OP_RETURN output, or first output). + payout_asm: String, + /// Scriptsig rendered as ASCII chars (one byte per char). + scriptsig_ascii: String, + /// Raw scriptsig bytes (used for Datum miner-name parsing). + scriptsig_bytes: Vec, + /// On-disk total size of the coinbase tx. + total_size: usize, +} + impl Query { /// Block by hash. Unknown hash → 404 via `height_by_hash`. pub fn block(&self, hash: &BlockHash) -> Result { @@ -65,14 +88,14 @@ impl Query { /// Most recent `count` blocks ending at `start_height` (default tip), /// returned in descending-height order. pub fn blocks(&self, start_height: Option, count: u32) -> Result> { - let (begin, end) = self.resolve_block_range(start_height, count); + let (begin, end) = self.resolve_block_range(start_height, count, self.tip_height()); self.blocks_range(begin, end) } /// V1 most recent `count` blocks with extras ending at `start_height` /// (default tip), returned in descending-height order. pub fn blocks_v1(&self, start_height: Option, count: u32) -> Result> { - let (begin, end) = self.resolve_block_range(start_height, count); + let (begin, end) = self.resolve_block_range(start_height, count, self.height()); self.blocks_v1_range(begin, end) } @@ -152,7 +175,7 @@ impl Query { Self::compute_median_time(&median_timestamps, begin + i, median_start); blocks.push(BlockInfo { - id: blockhashes[i].clone(), + id: blockhashes[i], height: Height::from(begin + i), version: header.version, timestamp: timestamps[i], @@ -171,9 +194,12 @@ impl Query { Ok(blocks) } + /// Build `BlockInfoV1` rows for `[begin, end)` in descending-height order. + /// Caller must bounds-check `end <= min(indexed, computed) + 1`. Returns + /// `Internal` on bulk-read short returns or per-block header read failures. pub(crate) fn blocks_v1_range(&self, begin: usize, end: usize) -> Result> { if begin >= end { - return Ok(vec![]); + return Ok(Vec::new()); } let count = end - begin; @@ -289,6 +315,50 @@ impl Query { .timestamp .collect_range_at(median_start, end); + let per_block_lens = [ + blockhashes.len(), + difficulties.len(), + timestamps.len(), + sizes.len(), + weights.len(), + positions.len(), + pool_slugs.len(), + segwit_txs.len(), + segwit_sizes.len(), + segwit_weights.len(), + fee_sats.len(), + subsidy_sats.len(), + input_counts.len(), + output_counts.len(), + utxo_set_sizes.len(), + input_volumes.len(), + prices.len(), + output_volumes.len(), + fr_min.len(), + fr_pct10.len(), + fr_pct25.len(), + fr_median.len(), + fr_pct75.len(), + fr_pct90.len(), + fr_max.len(), + fa_min.len(), + fa_pct10.len(), + fa_pct25.len(), + fa_median.len(), + fa_pct75.len(), + fa_pct90.len(), + fa_max.len(), + ]; + if per_block_lens.iter().any(|&l| l != count) { + return Err(Error::Internal("blocks_v1_range: short read on per-block vecs")); + } + if first_tx_indexes.len() < count { + return Err(Error::Internal("blocks_v1_range: short read on first_tx_index")); + } + if median_timestamps.len() != end - median_start { + return Err(Error::Internal("blocks_v1_range: short read on median window")); + } + let mut blocks = Vec::with_capacity(count); for i in (0..count).rev() { @@ -298,53 +368,26 @@ impl Query { (total_txs - first_tx_indexes[i].to_usize()) as u32 }; - // Single reader for header + coinbase (adjacent in blk file) + // Single reader for header + coinbase (adjacent in blk file). + // Header read errors hard-fail; coinbase parsing silent-degrades. let varint_len = Self::compact_size_len(tx_count) as usize; - let ( - raw_header, - coinbase_raw, - coinbase_address, - coinbase_addresses, - coinbase_signature, - coinbase_signature_ascii, + let mut blk = reader + .reader_at(positions[i]) + .map_err(|_| Error::Internal("blocks_v1_range: failed to open block reader"))?; + let mut raw_header = [0u8; HEADER_SIZE]; + blk.read_exact(&mut raw_header) + .map_err(|_| Error::Internal("blocks_v1_range: failed to read block header"))?; + let mut skip = [0u8; 5]; + let _ = blk.read_exact(&mut skip[..varint_len]); + let Coinbase { + raw_hex: coinbase_raw, + primary_address: coinbase_address, + addresses: coinbase_addresses, + payout_asm: coinbase_signature, + scriptsig_ascii: coinbase_signature_ascii, scriptsig_bytes, - coinbase_total_size, - ) = match reader.reader_at(positions[i]) { - Ok(mut blk) => { - let mut header_buf = [0u8; HEADER_SIZE]; - if blk.read_exact(&mut header_buf).is_err() { - ( - [0u8; HEADER_SIZE], - String::new(), - None, - vec![], - String::new(), - String::new(), - vec![], - 0, - ) - } else { - // Skip tx count varint - let mut skip = [0u8; 5]; - let _ = blk.read_exact(&mut skip[..varint_len]); - let coinbase = Self::parse_coinbase_from_read(blk); - ( - header_buf, coinbase.0, coinbase.1, coinbase.2, coinbase.3, coinbase.4, - coinbase.5, coinbase.6, - ) - } - } - Err(_) => ( - [0u8; HEADER_SIZE], - String::new(), - None, - vec![], - String::new(), - String::new(), - vec![], - 0, - ), - }; + total_size: coinbase_total_size, + } = Self::parse_coinbase_from_read(blk); let header = Self::decode_header(&raw_header)?; let weight = weights[i]; @@ -370,7 +413,7 @@ impl Query { Self::compute_median_time(&median_timestamps, begin + i, median_start); let info = BlockInfo { - id: blockhashes[i].clone(), + id: blockhashes[i], height: Height::from(begin + i), version: header.version, timestamp: timestamps[i], @@ -464,17 +507,29 @@ impl Query { Height::from(self.indexer().vecs.blocks.blockhash.len().saturating_sub(1)) } + /// Hash to height. The prefix store keys on the first 8 bytes of + /// the hash, so the resolved height is verified against the full + /// `blockhash[height]` before being returned. Prefix collisions + /// (or unknown hashes) surface as `NotFound`. pub fn height_by_hash(&self, hash: &BlockHash) -> Result { let indexer = self.indexer(); let prefix = BlockHashPrefix::from(hash); - indexer + let height = indexer .stores .blockhash_prefix_to_height .get(&prefix)? .map(|h| *h) - .ok_or(Error::NotFound("Block not found".into())) + .ok_or(Error::NotFound("Block not found".into()))?; + match indexer.vecs.blocks.blockhash.get(height) { + Some(stored) if &stored == hash => Ok(height), + _ => Err(Error::NotFound("Block not found".into())), + } } + /// Read the on-disk 80-byte header at `height` and decode it. + /// Caller must bounds-check `height` (no `OutOfRange` mapping here). + /// Returns `bitcoin::block::Header` because callers feed it into + /// upstream consensus-encoding APIs (`serialize_hex`, `MerkleBlock`). pub fn read_block_header(&self, height: Height) -> Result { let position = self .indexer() @@ -488,9 +543,21 @@ impl Query { .map_err(|_| Error::Internal("Failed to decode block header")) } - fn resolve_block_range(&self, start_height: Option, count: u32) -> (usize, usize) { - let max_height = self.height(); - let start = start_height.unwrap_or(max_height).min(max_height); + /// `(begin, end)` half-open window of up to `count` blocks ending + /// at `start_height` (default `cap`), clamped to `[0, cap]`. Caller + /// supplies `cap`: `tip_height()` when reading indexer-only series, + /// `height() = min(indexed, computed)` when reading computer-stamped + /// series too. + fn resolve_block_range( + &self, + start_height: Option, + count: u32, + cap: Height, + ) -> (usize, usize) { + let start = match start_height { + Some(h) => h.min(cap), + None => cap, + }; let start_u32: u32 = start.into(); let count = count.min(start_u32 + 1) as usize; let end = start_u32 as usize + 1; @@ -498,12 +565,23 @@ impl Query { (begin, end) } + /// Consensus-decodes 80 raw header bytes into the crate's `BlockHeader`. + /// Failure means on-disk corruption (the bytes already passed indexer + /// validation), so it surfaces as `Error::Internal`, not `OutOfRange`. fn decode_header(bytes: &[u8]) -> Result { let raw = bitcoin::block::Header::consensus_decode(&mut &bytes[..]) .map_err(|_| Error::Internal("Failed to decode block header"))?; Ok(BlockHeader::from(raw)) } + /// BIP113 Median Time Past for `height`: median of timestamps over + /// `[height-10, height]` (11 blocks). For `height < 10` the window is + /// shorter and the median is the upper-middle of available data, matching + /// Bitcoin Core's behavior. + /// + /// `all_timestamps` is the contiguous slab covering `[window_start, ..)` + /// pre-fetched by the caller, so this helper only translates absolute + /// heights into relative slice indices. fn compute_median_time( all_timestamps: &[Timestamp], height: usize, @@ -511,14 +589,15 @@ impl Query { ) -> Timestamp { let rel_start = height.saturating_sub(10) - window_start; let rel_end = height + 1 - window_start; - let mut sorted: Vec = all_timestamps[rel_start..rel_end] - .iter() - .map(|t| usize::from(*t)) - .collect(); + let mut sorted = all_timestamps[rel_start..rel_end].to_vec(); sorted.sort_unstable(); - Timestamp::from(sorted[sorted.len() / 2]) + sorted[sorted.len() / 2] } + /// Byte length of Bitcoin's CompactSize varint for a tx count. + /// `1` for `<= 0xFC`, `3` for the `0xFD`-prefixed u16 form, `5` for + /// the `0xFE`-prefixed u32 form. The 9-byte `0xFF`-prefixed u64 form + /// is unreachable here because the input is `u32`. fn compact_size_len(tx_count: u32) -> u32 { if tx_count <= 0xFC { 1 @@ -529,8 +608,18 @@ impl Query { } } - /// Parse OCEAN DATUM protocol miner names from coinbase scriptsig. - /// Skips BIP34 height push, reads tag payload, splits on 0x0F delimiter. + /// Parse OCEAN DATUM protocol miner names from a coinbase scriptsig. + /// + /// Layout: `[height_len][height_bytes][tags_push][tags_bytes...]`. + /// `tags_push` is either a direct push length (`<= 0x4b`) or + /// `OP_PUSHDATA1 (0x4c)` followed by a length byte. `tags_bytes` is + /// split on `0x0F` and each segment is sanitized to ASCII alphanumeric + /// plus space. + /// + /// Any structural mismatch (truncation, missing fields) returns `None`. + /// `OP_PUSHDATA2`/`OP_PUSHDATA4` are not handled: today's payloads are + /// well under 255 bytes, so this only matters if OCEAN ever publishes + /// a longer tag list. fn parse_datum_miner_names(scriptsig: &[u8]) -> Option> { if scriptsig.is_empty() { return None; @@ -558,19 +647,13 @@ impl Query { return None; } - // Decode tag bytes, strip nulls, split on 0x0F, keep only alphanumeric + space let tag_bytes = &scriptsig[tag_start..tag_start + tags_len]; - let tag_string: String = tag_bytes - .iter() - .filter(|&&b| b != 0x00) - .map(|&b| b as char) - .collect(); - - let names: Vec = tag_string - .split('\x0f') - .map(|s| { - s.chars() - .filter(|c| c.is_ascii_alphanumeric() || *c == ' ') + let names: Vec = tag_bytes + .split(|&b| b == 0x0f) + .map(|seg| { + seg.iter() + .filter(|&&b| b.is_ascii_alphanumeric() || b == b' ') + .map(|&b| b as char) .collect::() }) .filter(|s| !s.trim().is_empty()) @@ -579,34 +662,18 @@ impl Query { if names.is_empty() { None } else { Some(names) } } - fn parse_coinbase_from_read( - reader: impl Read, - ) -> ( - String, - Option, - Vec, - String, - String, - Vec, - usize, - ) { - let empty = ( - String::new(), - None, - vec![], - String::new(), - String::new(), - vec![], - 0, - ); + /// Decode a coinbase transaction off the block reader into a + /// `Coinbase` struct. Decode failure is silent: returns + /// `Coinbase::default()`. The caller hard-fails on header-read errors + /// but accepts coinbase parse failures (they manifest as missing + /// `extras` rather than a 5xx). + fn parse_coinbase_from_read(reader: impl Read) -> Coinbase { + let tx = match bitcoin::Transaction::consensus_decode(&mut bitcoin::io::FromStd::new(reader)) { + Ok(tx) => tx, + Err(_) => return Coinbase::default(), + }; - let tx = - match bitcoin::Transaction::consensus_decode(&mut bitcoin::io::FromStd::new(reader)) { - Ok(tx) => tx, - Err(_) => return empty, - }; - - let coinbase_total_size = tx.total_size(); + let total_size = tx.total_size(); let scriptsig_bytes: Vec = tx .input @@ -614,11 +681,11 @@ impl Query { .map(|input| input.script_sig.as_bytes().to_vec()) .unwrap_or_default(); - let coinbase_raw = scriptsig_bytes.to_lower_hex_string(); + let raw_hex = scriptsig_bytes.to_lower_hex_string(); - let coinbase_signature_ascii: String = scriptsig_bytes.iter().map(|&b| b as char).collect(); + let scriptsig_ascii: String = scriptsig_bytes.iter().map(|&b| b as char).collect(); - let mut coinbase_addresses: Vec = tx + let mut addresses: Vec = tx .output .iter() .filter_map(|output| { @@ -627,10 +694,12 @@ impl Query { .map(|a| a.to_string()) }) .collect(); - coinbase_addresses.dedup(); - let coinbase_address = coinbase_addresses.first().cloned(); + // Collapse consecutive duplicates only: padding outputs to the same + // payout get merged, multi-payout pools keep distinct order. + addresses.dedup(); + let primary_address = addresses.first().cloned(); - let coinbase_signature = tx + let payout_asm = tx .output .iter() .find(|output| !output.script_pubkey.is_op_return()) @@ -638,14 +707,14 @@ impl Query { .map(|output| output.script_pubkey.to_asm_string()) .unwrap_or_default(); - ( - coinbase_raw, - coinbase_address, - coinbase_addresses, - coinbase_signature, - coinbase_signature_ascii, + Coinbase { + raw_hex, + primary_address, + addresses, + payout_asm, + scriptsig_ascii, scriptsig_bytes, - coinbase_total_size, - ) + total_size, + } } } diff --git a/crates/brk_query/src/impl/block/mod.rs b/crates/brk_query/src/impl/block/mod.rs index 999d42dc8..18d227f27 100644 --- a/crates/brk_query/src/impl/block/mod.rs +++ b/crates/brk_query/src/impl/block/mod.rs @@ -3,5 +3,3 @@ mod raw; mod status; mod timestamp; mod txs; - -pub const BLOCK_TXS_PAGE_SIZE: usize = 25; diff --git a/crates/brk_query/src/impl/block/raw.rs b/crates/brk_query/src/impl/block/raw.rs index 39f13b98e..cd3c199d2 100644 --- a/crates/brk_query/src/impl/block/raw.rs +++ b/crates/brk_query/src/impl/block/raw.rs @@ -13,9 +13,9 @@ impl Query { fn block_raw_by_height(&self, height: Height) -> Result> { let max_height = self.tip_height(); if height > max_height { - return Err(Error::OutOfRange(format!( - "Block height {height} out of range (tip {max_height})" - ))); + return Err(Error::OutOfRange( + format!("Block height {height} out of range (tip {max_height})").into(), + )); } let indexer = self.indexer(); diff --git a/crates/brk_query/src/impl/block/txs.rs b/crates/brk_query/src/impl/block/txs.rs index 583d02211..74c83af22 100644 --- a/crates/brk_query/src/impl/block/txs.rs +++ b/crates/brk_query/src/impl/block/txs.rs @@ -3,22 +3,34 @@ use std::io::Cursor; use bitcoin::consensus::Decodable; use brk_error::{Error, OptionData, Result}; use brk_types::{ - BlkPosition, BlockHash, Height, OutPoint, OutputType, RawLockTime, Sats, StoredU32, - Transaction, TxIn, TxInIndex, TxIndex, TxOut, TxStatus, Txid, TypeIndex, Vout, Weight, + BlkPosition, BlockHash, BlockTxIndex, Height, OutPoint, OutputType, RawLockTime, Sats, SigOps, + StoredU32, Transaction, TxIn, TxInIndex, TxIndex, TxOut, TxStatus, Txid, TypeIndex, Vout, + Weight, }; use rustc_hash::FxHashMap; use vecdb::{AnyVec, ReadableVec, VecIndex}; -use super::BLOCK_TXS_PAGE_SIZE; use crate::Query; impl Query { + /// All txids in the block, canonical order (coinbase first). + /// `NotFound` if the hash is unknown (or only collides on the 8-byte + /// prefix), `OutOfRange` if the resolved height is past the indexed tip. + /// Unpaginated by design. pub fn block_txids(&self, hash: &BlockHash) -> Result> { let height = self.height_by_hash(hash)?; self.block_txids_by_height(height) } - pub fn block_txs(&self, hash: &BlockHash, start_index: TxIndex) -> Result> { + /// Up to `count` transactions from the block, starting at the in-block + /// offset `start_index` (0 = coinbase). `OutOfRange` when `start_index` + /// is past the last tx in the block. Caller (route layer) sets `count`. + pub fn block_txs( + &self, + hash: &BlockHash, + start_index: BlockTxIndex, + count: u32, + ) -> Result> { let height = self.height_by_hash(hash)?; let (first, tx_count) = self.block_tx_range(height)?; let start: usize = start_index.into(); @@ -27,51 +39,77 @@ impl Query { "start index past last transaction in block".into(), )); } - let count = BLOCK_TXS_PAGE_SIZE.min(tx_count - start); + let count = (count as usize).min(tx_count - start); let indices: Vec = (first + start..first + start + count) .map(TxIndex::from) .collect(); self.transactions_by_indices(&indices) } - pub fn block_txid_at_index(&self, hash: &BlockHash, index: TxIndex) -> Result { + /// Txid at an in-block offset (`index` is the position within the block, + /// 0 = coinbase). `NotFound` if the hash is unknown or only collides on + /// the 8-byte prefix; `OutOfRange` if `index` is past the last tx in + /// the block. + pub fn block_txid_at_index(&self, hash: &BlockHash, index: BlockTxIndex) -> Result { let height = self.height_by_hash(hash)?; self.block_txid_at_index_by_height(height, index.into()) } // === Helper methods === + /// All txids in the block at `height`, canonical order. `OutOfRange` + /// when `height` is past the indexed tip; `Internal` if any read hits + /// the stamp-before-data race or short-returns. Used by both the + /// hash-keyed and height-keyed entry points so they share bounds + /// semantics. pub(crate) fn block_txids_by_height(&self, height: Height) -> Result> { let (first, tx_count) = self.block_tx_range(height)?; - Ok(self + let txids = self .indexer() .vecs .transactions .txid - .collect_range_at(first, first + tx_count)) + .collect_range_at(first, first + tx_count); + if txids.len() != tx_count { + return Err(Error::Internal( + "block_txids_by_height: short txid read", + )); + } + Ok(txids) } + /// Single txid at an in-block offset. `OutOfRange` when `index` is past + /// the last tx in the block. `Internal` if the underlying read finds + /// the stamp-before-data race (`first_tx_index` flushed ahead of `txid`). fn block_txid_at_index_by_height(&self, height: Height, index: usize) -> Result { let (first, tx_count) = self.block_tx_range(height)?; if index >= tx_count { return Err(Error::OutOfRange("Transaction index out of range".into())); } - Ok(self - .indexer() + self.indexer() .vecs .transactions .txid .reader() - .get(first + index)) + .try_get(first + index) + .ok_or(Error::Internal( + "block_txid_at_index_by_height: txid index past data", + )) } /// Batch-read transactions at arbitrary indices. /// Reads in ascending index order for I/O locality, returns in caller's order. /// - /// Three-phase approach for optimal I/O: - /// Phase 1 — Decode transactions & collect outpoints (sorted by tx_index) - /// Phase 2 — Batch-read all prevout data (sorted by prev_tx_index, then txout_index) - /// Phase 3 — Assemble Transaction objects from pre-fetched data + /// Three-phase approach for sequential cursor I/O: + /// Phase 1: decode transactions, collect outpoints + per-input prevout + /// metadata (sorted by tx_index). + /// Phase 2: resolve each prevout's script_pubkey (sorted by + /// output_type, then type_index, for sequential addr-vec reads). + /// Phase 3: assemble `Transaction` objects, compute sigops + fees. + /// + /// The final `unwrap` is provably safe: `order` is a permutation of + /// `0..len`, Phase 1 produces exactly one `DecodedTx` per position, and + /// Phase 3 assigns each `txs[pos]` once before the collect. pub fn transactions_by_indices(&self, indices: &[TxIndex]) -> Result> { if indices.is_empty() { return Ok(Vec::new()); @@ -84,6 +122,7 @@ impl Query { order.sort_unstable_by_key(|&i| indices[i]); let indexer = self.indexer(); + // BLK file reader, distinct from the vec cursors below. let reader = self.reader(); // ── Phase 1: Decode all transactions, collect outpoints ───────── @@ -147,8 +186,8 @@ impl Query { }); } - // Phase 1b: Batch-read outpoints + prevout data via cursors (PcoVec — - // sequential cursor avoids re-decompressing the same pages). + // Phase 1b: Batch-read outpoints + prevout data via cursors. PcoVec + // sequential cursors avoid re-decompressing the same pages. // Reading output_type/type_index/value HERE from inputs vecs (sequential) // avoids random-reading them from outputs vecs in Phase 2. let mut outpoint_cursor = indexer.vecs.inputs.outpoint.cursor(); @@ -247,7 +286,7 @@ impl Query { .map(|(j, txin)| (txin.previous_output, j)) .collect(); - let total_sigop_cost = dtx.decoded.total_sigop_cost(|outpoint| { + let total_sigop_cost = SigOps::of_bitcoin_tx(&dtx.decoded, |outpoint| { outpoint_to_idx .get(outpoint) .and_then(|&j| input[j].prevout.as_ref()) @@ -280,7 +319,15 @@ impl Query { Ok(txs.into_iter().map(Option::unwrap).collect()) } - /// Returns (first_tx_raw_index, tx_count) for a block at `height`. + /// Half-open `[first, first + tx_count)` window into the flat tx vecs + /// for the block at `height`. Single source of truth for the four + /// `block_*` callers in this file. + /// + /// `OutOfRange` when `height` is past the indexed-tip stamp. + /// `Internal` when `first_tx_index[height]` is missing under the + /// stamp-before-data race. For the tip block (where + /// `first_tx_index[height+1]` is not yet written), `next` falls back + /// to `txid.len()`. fn block_tx_range(&self, height: Height) -> Result<(usize, usize)> { let indexer = self.indexer(); if height > self.indexed_height() { diff --git a/crates/brk_query/src/impl/cpfp.rs b/crates/brk_query/src/impl/cpfp.rs new file mode 100644 index 000000000..05814381d --- /dev/null +++ b/crates/brk_query/src/impl/cpfp.rs @@ -0,0 +1,346 @@ +//! CPFP queries: dispatches between the live mempool path (handled by +//! `brk_mempool`) and the confirmed-tx path built here from indexer +//! and computer vecs. +//! +//! Confirmed clusters are built on demand by walking the same-block +//! parent/child edges in `TxIndex` space (no `Transaction` +//! reconstruction, no `txid → tx_index` lookup), then handing the +//! resulting `brk_mempool::cluster::Cluster` to `Cluster::to_cpfp_info` +//! — the same wire converter the mempool path uses, so both produce +//! identical `CpfpInfo` shapes. + +use std::io::Cursor; + +use bitcoin::consensus::Decodable; +use brk_error::{Error, OptionData, Result}; +use brk_mempool::cluster::{Cluster, ClusterNode, LocalIdx}; +use brk_types::{ + CpfpInfo, FeeRate, Height, OutPoint, OutputType, Sats, SigOps, TxIndex, TxInIndex, TypeIndex, + Txid, TxidPrefix, VSize, Weight, +}; +use rustc_hash::{FxBuildHasher, FxHashMap}; +use smallvec::SmallVec; +use vecdb::{AnyVec, ReadableVec, VecIndex}; + +use crate::Query; + +/// Cap matches Bitcoin Core's default mempool ancestor/descendant +/// chain limits and mempool.space's truncation. +const MAX: usize = 25; + +struct WalkResult { + /// Cluster members in build order (`[seed, ancestors..., descendants...]`), + /// each paired with its in-cluster parent edges already resolved to + /// `LocalIdx`. Vec position equals the node's `LocalIdx`. + nodes: Vec<(TxIndex, SmallVec<[LocalIdx; 2]>)>, + /// Pre-permutation `LocalIdx` of the seed. Equals `ancestor_count` + /// because all of seed's in-cluster ancestors topo-sort before it + /// and only ancestors do, so after `Cluster::new` permutes nodes + /// into topological order seed lands at this exact position. + seed_local: LocalIdx, +} + +impl Query { + /// CPFP cluster for `txid`. Returns the mempool cluster when the + /// txid is unconfirmed; otherwise reconstructs the confirmed + /// same-block cluster from indexer state. Works even when the + /// mempool feature is off. + pub fn cpfp(&self, txid: &Txid) -> Result { + let prefix = TxidPrefix::from(txid); + if let Some(info) = self.mempool().and_then(|m| m.cpfp_info(&prefix)) { + return Ok(info); + } + self.confirmed_cpfp(txid) + } + + /// Effective fee rate for `txid` using the same SFL chunk-rate + /// semantics across paths: + /// + /// - Live mempool: snapshot `cluster_of` lookup → seed's chunk rate. + /// If the tx is in the pool but not in the latest snapshot (e.g. + /// just added), falls back to the entry's simple `fee/vsize`. + /// - Confirmed: precomputed `effective_fee_rate.tx_index` (the same + /// SFL chunk rate, computed at index time). + /// - Graveyard-only RBF predecessor: simple `fee/vsize` snapshotted + /// at burial. + /// + /// Returns `Error::UnknownTxid` for txids not seen in any of those. + pub fn effective_fee_rate(&self, txid: &Txid) -> Result { + let prefix = TxidPrefix::from(txid); + + if let Some(mempool) = self.mempool() { + let entries = mempool.entries(); + if let Some(seed_idx) = entries.idx_of(&prefix) + && let Some(rate) = mempool.snapshot().chunk_rate_of(seed_idx) + { + return Ok(rate); + } + if let Some(entry) = entries.get(&prefix) { + return Ok(entry.fee_rate()); + } + } + + if let Ok(idx) = self.resolve_tx_index(txid) + && let Some(rate) = self + .computer() + .transactions + .fees + .effective_fee_rate + .tx_index + .collect_one(idx) + { + return Ok(rate); + } + + if let Some(mempool) = self.mempool() + && let Some(tomb) = mempool.graveyard().get(txid) + { + return Ok(tomb.entry.fee_rate()); + } + + Err(Error::UnknownTxid) + } + + /// CPFP cluster for a confirmed tx: the connected component of + /// same-block parent/child edges, walked on demand. SFL runs on + /// the result so `effectiveFeePerVsize` matches the live path's + /// chunk-rate semantics. + fn confirmed_cpfp(&self, txid: &Txid) -> Result { + let seed = self.resolve_tx_index(txid)?; + let height = self.confirmed_status_height(seed)?; + let (cluster, seed_local) = self.build_confirmed_cluster(seed, height)?; + let sigops = self.seed_sigop_cost(seed)?; + Ok(cluster.to_cpfp_info(seed_local, sigops)) + } + + /// BIP-141 sigop cost for a single confirmed tx, computed on demand: + /// re-decode the raw tx, rebuild its prevout map from `inputs.*` + + /// addr vecs, then defer the actual count to `SigOps::of_bitcoin_tx`. + /// Cost is one BLK read plus `n_inputs` cursor hops, so a few hundred + /// microseconds per CPFP request. + fn seed_sigop_cost(&self, tx_index: TxIndex) -> Result { + let indexer = self.indexer(); + let total_size = indexer + .vecs + .transactions + .total_size + .collect_one(tx_index) + .data()?; + let position = indexer + .vecs + .transactions + .position + .collect_one(tx_index) + .data()?; + let buffer = self.reader().read_raw_bytes(position, *total_size as usize)?; + let decoded = bitcoin::Transaction::consensus_decode(&mut Cursor::new(buffer)) + .map_err(|_| Error::Parse("Failed to decode transaction".into()))?; + + let first_txin = indexer + .vecs + .transactions + .first_txin_index + .collect_one(tx_index) + .data()?; + let start = usize::from(first_txin); + let count = decoded.input.len(); + + let mut outpoint_cursor = indexer.vecs.inputs.outpoint.cursor(); + let mut output_type_cursor = indexer.vecs.inputs.output_type.cursor(); + let mut type_index_cursor = indexer.vecs.inputs.type_index.cursor(); + let mut value_cursor = self.computer().inputs.spent.value.cursor(); + + let addr_readers = indexer.vecs.addrs.addr_readers(); + + let mut prevout_map: FxHashMap = + FxHashMap::with_capacity_and_hasher(count, FxBuildHasher); + + for (j, txin) in decoded.input.iter().enumerate() { + let op: OutPoint = outpoint_cursor.get(start + j).data()?; + if op.is_coinbase() { + continue; + } + let ot: OutputType = output_type_cursor.get(start + j).data()?; + let ti: TypeIndex = type_index_cursor.get(start + j).data()?; + let val: Sats = value_cursor.get(start + j).data()?; + let script_pubkey = addr_readers.script_pubkey(ot, ti); + prevout_map.insert( + txin.previous_output, + bitcoin::TxOut { + value: bitcoin::Amount::from_sat(u64::from(val)), + script_pubkey, + }, + ); + } + + Ok(SigOps::of_bitcoin_tx(&decoded, |outpoint| { + prevout_map.get(outpoint).cloned() + })) + } + + /// Walk the seed's same-block parent/child edges, materialize each + /// member's `(txid, weight, fee)` from indexer/computer cursors, + /// and build a `Cluster`. The seed's `LocalIdx` comes + /// straight from the walk (`ancestor_count`), since `Cluster::new` + /// preserves the "ancestors before seed before descendants" ordering + /// that defines that index. + fn build_confirmed_cluster( + &self, + seed: TxIndex, + height: Height, + ) -> Result<(Cluster, LocalIdx)> { + let indexer = self.indexer(); + let computer = self.computer(); + let block_first = indexer + .vecs + .transactions + .first_tx_index + .collect_one(height) + .data()?; + let block_end = indexer + .vecs + .transactions + .first_tx_index + .collect_one(height.incremented()) + .unwrap_or_else(|| TxIndex::from(indexer.vecs.transactions.txid.len())); + let same_block = |idx: TxIndex| idx >= block_first && idx < block_end; + + let WalkResult { nodes, seed_local } = self.walk_same_block_edges(seed, same_block); + + let mut base_size = indexer.vecs.transactions.base_size.cursor(); + let mut total_size = indexer.vecs.transactions.total_size.cursor(); + let mut fee_cursor = computer.transactions.fees.fee.tx_index.cursor(); + let txid_reader = indexer.vecs.transactions.txid.reader(); + + let cluster_nodes: Vec> = nodes + .into_iter() + .map(|(tx_index, parents)| { + let i = tx_index.to_usize(); + let weight = Weight::from_sizes(*base_size.get(i).data()?, *total_size.get(i).data()?); + Ok(ClusterNode { + id: tx_index, + txid: txid_reader.get(i), + fee: fee_cursor.get(i).data()?, + vsize: VSize::from(weight), + weight, + parents, + }) + }) + .collect::>()?; + + Ok((Cluster::new(cluster_nodes), seed_local)) + } + + /// BFS the seed's same-block ancestors (via `outpoint`) and + /// descendants (via `spent.txin_index` → `spending_tx`), capped + /// at `MAX` each side to match Core/mempool.space. Each node is + /// pushed in build order with its full parent-outpoint list, then + /// at end of walk those lists are filtered against the membership + /// map to keep only in-cluster parents (resolved to `LocalIdx`). + fn walk_same_block_edges( + &self, + seed: TxIndex, + same_block: impl Fn(TxIndex) -> bool, + ) -> WalkResult { + let indexer = self.indexer(); + let computer = self.computer(); + let mut first_txin = indexer.vecs.transactions.first_txin_index.cursor(); + let mut first_txout = indexer.vecs.transactions.first_txout_index.cursor(); + let mut outpoint = indexer.vecs.inputs.outpoint.cursor(); + let mut spent = computer.outputs.spent.txin_index.cursor(); + let mut spending_tx = indexer.vecs.inputs.tx_index.cursor(); + + let mut walk_inputs = |tx: TxIndex| -> SmallVec<[TxIndex; 2]> { + let mut out: SmallVec<[TxIndex; 2]> = SmallVec::new(); + let Ok(start) = first_txin.get(tx.to_usize()).data() else { return out }; + let Ok(end) = first_txin.get(tx.to_usize() + 1).data() else { return out }; + for i in usize::from(start)..usize::from(end) { + let Ok(op) = outpoint.get(i).data() else { continue }; + if op.is_coinbase() { + continue; + } + out.push(op.tx_index()); + } + out + }; + + let mut raw: Vec<(TxIndex, SmallVec<[TxIndex; 2]>)> = Vec::with_capacity(2 * MAX + 1); + let mut local_of: FxHashMap = + FxHashMap::with_capacity_and_hasher(2 * MAX + 1, FxBuildHasher); + raw.push((seed, walk_inputs(seed))); + local_of.insert(seed, LocalIdx::ZERO); + + // Ancestor BFS. Stack holds indices into `raw`; each pop reads + // that node's already-recorded parents and explores any same-block + // ones we haven't visited yet. `walk_inputs` runs at push time so + // parents are ready for the post-walk filter. + let mut stack: Vec = vec![0]; + let mut ancestor_count: usize = 0; + 'a: while let Some(idx) = stack.pop() { + let parents = raw[idx].1.clone(); + for parent in parents { + if ancestor_count >= MAX { + break 'a; + } + if local_of.contains_key(&parent) || !same_block(parent) { + continue; + } + let new_idx = raw.len(); + raw.push((parent, walk_inputs(parent))); + local_of.insert(parent, LocalIdx::from(new_idx)); + stack.push(new_idx); + ancestor_count += 1; + } + } + + // Descendant BFS. Stack holds tx_indices since we look up each + // tx's txouts via `first_txout`/`spent`/`spending_tx`. `local_of` + // already contains the seed and every ancestor, so they're + // skipped by the membership check. + let mut stack: Vec = vec![seed]; + let mut descendant_count = 0; + 'd: while let Some(cur) = stack.pop() { + let Ok(start) = first_txout.get(cur.to_usize()).data() else { continue }; + let Ok(end) = first_txout.get(cur.to_usize() + 1).data() else { continue }; + for i in usize::from(start)..usize::from(end) { + let Ok(txin_idx) = spent.get(i).data() else { continue }; + if txin_idx == TxInIndex::UNSPENT { + continue; + } + let Ok(child) = spending_tx.get(usize::from(txin_idx)).data() else { continue }; + if local_of.contains_key(&child) || !same_block(child) { + continue; + } + let new_idx = raw.len(); + raw.push((child, walk_inputs(child))); + local_of.insert(child, LocalIdx::from(new_idx)); + stack.push(child); + descendant_count += 1; + if descendant_count >= MAX { + break 'd; + } + } + } + + // Filter each node's full input list against `local_of` to keep + // only in-cluster parents, resolved to their `LocalIdx`. + let nodes: Vec<(TxIndex, SmallVec<[LocalIdx; 2]>)> = raw + .into_iter() + .map(|(tx_index, full_inputs)| { + let parents: SmallVec<[LocalIdx; 2]> = full_inputs + .iter() + .filter_map(|p| local_of.get(p).copied()) + .collect(); + (tx_index, parents) + }) + .collect(); + + // Seed's pre-permutation index is 0; after `Cluster::new` topo-sorts + // it lands at `ancestor_count` (all in-cluster ancestors come first, + // and only ancestors do). + WalkResult { + nodes, + seed_local: LocalIdx::from(ancestor_count), + } + } +} diff --git a/crates/brk_query/src/impl/mempool.rs b/crates/brk_query/src/impl/mempool.rs index bf0d21193..7084355bb 100644 --- a/crates/brk_query/src/impl/mempool.rs +++ b/crates/brk_query/src/impl/mempool.rs @@ -1,12 +1,12 @@ -use brk_error::{Error, OptionData, Result}; +use brk_error::{Error, Result}; use brk_mempool::{EntryPool, Mempool, TxEntry, TxGraveyard, TxRemoval, TxStore, TxTombstone}; use brk_types::{ - CheckedSub, CpfpEntry, CpfpInfo, FeeRate, MempoolBlock, MempoolInfo, MempoolRecentTx, - OutputType, RbfResponse, RbfTx, RecommendedFees, ReplacementNode, Sats, Timestamp, Transaction, - TxIndex, TxInIndex, TxOut, TxOutIndex, Txid, TxidPrefix, TypeIndex, VSize, Weight, + CheckedSub, MempoolBlock, MempoolInfo, MempoolRecentTx, OutputType, RbfResponse, RbfTx, + RecommendedFees, ReplacementNode, Sats, Timestamp, Transaction, TxOut, TxOutIndex, Txid, + TxidPrefix, TypeIndex, }; use rustc_hash::FxHashSet; -use vecdb::{AnyVec, ReadableVec, VecIndex}; +use vecdb::VecIndex; use crate::Query; @@ -93,197 +93,6 @@ impl Query { Ok(self.require_mempool()?.txs().recent().to_vec()) } - /// CPFP cluster for `txid`. Returns the mempool cluster when the txid is - /// unconfirmed; otherwise reconstructs the confirmed same-block cluster - /// from indexer state. Works even when the mempool feature is off. - pub fn cpfp(&self, txid: &Txid) -> Result { - let prefix = TxidPrefix::from(txid); - let mempool_cluster = self.mempool().and_then(|m| m.cpfp_info(&prefix)); - Ok(mempool_cluster.unwrap_or_else(|| self.confirmed_cpfp(txid))) - } - - /// CPFP cluster for a confirmed tx: the connected component of - /// same-block parent/child edges, reconstructed by a depth-first - /// walk on demand. Walks entirely in `TxIndex` space using direct - /// vec reads (height, weight, fee) - skips full `Transaction` - /// reconstruction and avoids `txid -> tx_index` lookups by reading - /// `OutPoint`'s packed `tx_index` directly. Capped at 25 each side - /// to match Bitcoin Core's default mempool chain limits and - /// mempool.space's own truncation. `effectiveFeePerVsize` is the - /// simple package rate; mempool's `calculateGoodBlockCpfp` - /// chunk-rate algorithm is not ported. - fn confirmed_cpfp(&self, txid: &Txid) -> CpfpInfo { - const MAX: usize = 25; - let Ok(seed_idx) = self.resolve_tx_index(txid) else { - return CpfpInfo::default(); - }; - let Ok(seed_height) = self.confirmed_status_height(seed_idx) else { - return CpfpInfo::default(); - }; - - let indexer = self.indexer(); - let computer = self.computer(); - // Block's tx_index range. Reduces the per-neighbor height check to a - // pair of integer compares (vs `tx_heights.get_shared` which acquires - // a read lock and walks a `RangeMap`). - let Ok(block_first) = indexer - .vecs - .transactions - .first_tx_index - .collect_one(seed_height) - .data() - else { - return CpfpInfo::default(); - }; - let block_end = indexer - .vecs - .transactions - .first_tx_index - .collect_one(seed_height.incremented()) - .unwrap_or_else(|| TxIndex::from(indexer.vecs.transactions.txid.len())); - let same_block = |idx: TxIndex| idx >= block_first && idx < block_end; - - let mut first_txin = indexer.vecs.transactions.first_txin_index.cursor(); - let mut first_txout = indexer.vecs.transactions.first_txout_index.cursor(); - let mut outpoint = indexer.vecs.inputs.outpoint.cursor(); - let mut spent = computer.outputs.spent.txin_index.cursor(); - let mut spending_tx = indexer.vecs.inputs.tx_index.cursor(); - - let mut visited: FxHashSet = FxHashSet::with_capacity_and_hasher( - 2 * MAX + 1, - Default::default(), - ); - visited.insert(seed_idx); - - let mut ancestor_idxs: Vec = Vec::with_capacity(MAX); - let mut queue: Vec = vec![seed_idx]; - 'a: while let Some(cur) = queue.pop() { - let Ok(start) = first_txin.get(cur.to_usize()).data() else { continue }; - let Ok(end) = first_txin.get(cur.to_usize() + 1).data() else { continue }; - for i in usize::from(start)..usize::from(end) { - let Ok(op) = outpoint.get(i).data() else { continue }; - if op.is_coinbase() { - continue; - } - let parent = op.tx_index(); - if !visited.insert(parent) || !same_block(parent) { - continue; - } - ancestor_idxs.push(parent); - queue.push(parent); - if ancestor_idxs.len() >= MAX { - break 'a; - } - } - } - - let mut descendant_idxs: Vec = Vec::with_capacity(MAX); - let mut queue: Vec = vec![seed_idx]; - 'd: while let Some(cur) = queue.pop() { - let Ok(start) = first_txout.get(cur.to_usize()).data() else { continue }; - let Ok(end) = first_txout.get(cur.to_usize() + 1).data() else { continue }; - for i in usize::from(start)..usize::from(end) { - let Ok(txin_idx) = spent.get(i).data() else { continue }; - if txin_idx == TxInIndex::UNSPENT { - continue; - } - let Ok(child) = spending_tx.get(usize::from(txin_idx)).data() else { continue }; - if !visited.insert(child) || !same_block(child) { - continue; - } - descendant_idxs.push(child); - queue.push(child); - if descendant_idxs.len() >= MAX { - break 'd; - } - } - } - - // Phase 2: bulk-fetch (weight, fee) for seed + cluster, cursors opened - // once and reads issued in tx_index order for sequential page locality. - let mut all = Vec::with_capacity(1 + ancestor_idxs.len() + descendant_idxs.len()); - all.push(seed_idx); - all.extend(&ancestor_idxs); - all.extend(&descendant_idxs); - let Ok(weights_fees) = self.txs_weight_fee(&all) else { - return CpfpInfo::default(); - }; - - let txid_reader = indexer.vecs.transactions.txid.reader(); - let entry_at = |i: usize, idx: TxIndex| { - let (weight, fee) = weights_fees[i]; - CpfpEntry { - txid: txid_reader.get(idx.to_usize()), - weight, - fee, - } - }; - let (seed_weight, seed_fee) = weights_fees[0]; - let seed_vsize = VSize::from(seed_weight); - let ancestors: Vec = ancestor_idxs - .iter() - .enumerate() - .map(|(k, &idx)| entry_at(1 + k, idx)) - .collect(); - let descendants: Vec = descendant_idxs - .iter() - .enumerate() - .map(|(k, &idx)| entry_at(1 + ancestor_idxs.len() + k, idx)) - .collect(); - - let (sum_fee, sum_vsize) = ancestors - .iter() - .chain(descendants.iter()) - .fold((u64::from(seed_fee), u64::from(seed_vsize)), |(f, v), e| { - (f + u64::from(e.fee), v + u64::from(VSize::from(e.weight))) - }); - let package_rate = FeeRate::from((Sats::from(sum_fee), VSize::from(sum_vsize))); - let effective = FeeRate::from((seed_fee, seed_vsize)).max(package_rate); - - let best_descendant = descendants - .iter() - .max_by_key(|e| FeeRate::from((e.fee, e.weight))) - .cloned(); - - CpfpInfo { - ancestors, - best_descendant, - descendants, - effective_fee_per_vsize: Some(effective), - sigops: None, - fee: Some(seed_fee), - adjusted_vsize: Some(seed_vsize), - cluster: None, - } - } - - /// Bulk read `(weight, fee)` for many tx_indexes. Cursors opened once; - /// reads issued in ascending `tx_index` order for sequential I/O, - /// results returned in the caller's order. - fn txs_weight_fee(&self, idxs: &[TxIndex]) -> Result> { - if idxs.is_empty() { - return Ok(vec![]); - } - let indexer = self.indexer(); - let computer = self.computer(); - let mut base_size = indexer.vecs.transactions.base_size.cursor(); - let mut total_size = indexer.vecs.transactions.total_size.cursor(); - let mut fee_cursor = computer.transactions.fees.fee.tx_index.cursor(); - - let mut order: Vec = (0..idxs.len()).collect(); - order.sort_unstable_by_key(|&i| idxs[i]); - - let mut out = vec![(Weight::default(), Sats::ZERO); idxs.len()]; - for &pos in &order { - let i = idxs[pos].to_usize(); - let bs = base_size.get(i).data()?; - let ts = total_size.get(i).data()?; - let f = fee_cursor.get(i).data()?; - out[pos] = (Weight::from_sizes(*bs, *ts), f); - } - Ok(out) - } - /// RBF history for a tx, matching mempool.space's /// `GET /api/v1/tx/:txid/rbf`. Walks forward through the graveyard /// to find the latest known replacer (tree root), then recursively @@ -295,21 +104,15 @@ impl Query { let entries = mempool.entries(); let graveyard = mempool.graveyard(); - let mut root_txid = txid.clone(); - while let Some(TxRemoval::Replaced { by }) = - graveyard.get(&root_txid).map(TxTombstone::reason) - { - root_txid = by.clone(); - } + let root_txid = Self::walk_to_replacement_root(&graveyard, *txid); let replaces_vec: Vec = graveyard .predecessors_of(txid) - .map(|(p, _)| p.clone()) + .map(|(p, _)| *p) .collect(); let replaces = (!replaces_vec.is_empty()).then_some(replaces_vec); - let replacements = - self.build_rbf_node(&root_txid, None, mempool, &txs, &entries, &graveyard); + let replacements = self.build_rbf_node(&root_txid, None, &txs, &entries, &graveyard); Ok(RbfResponse { replacements, @@ -317,6 +120,18 @@ impl Query { }) } + /// Walk forward through `Replaced { by }` links to the terminal + /// replacer of an RBF chain. Returns `txid` itself if it's already + /// the root. + fn walk_to_replacement_root(graveyard: &TxGraveyard, mut root: Txid) -> Txid { + while let Some(TxRemoval::Replaced { by }) = + graveyard.get(&root).map(TxTombstone::reason) + { + root = *by; + } + root + } + /// Resolve a txid to the data we need for an `RbfTx`. The live /// pool takes priority; the graveyard is the fallback. Returns /// `None` if the tx has no known data in either. @@ -337,16 +152,13 @@ impl Query { /// `Removal::Replaced` lives), so the recursion only needs the /// graveyard; the live pool is consulted for the root. /// - /// `rate` matches mempool.space's `tx.effectiveFeePerVsize`: live - /// txs get the live CPFP-cluster effective rate; mined txs get the - /// computer's stored same-block-cluster effective rate; never-mined - /// replaced predecessors have no recorded effective rate, so we - /// fall back to the simple `fee/vsize` snapshotted at burial. + /// `rate` matches mempool.space's `tx.effectiveFeePerVsize` via + /// `Query::effective_fee_rate`, with a fall-back to the entry's + /// simple `fee/vsize` when the rate lookup fails. fn build_rbf_node( &self, txid: &Txid, successor_time: Option, - mempool: &Mempool, txs: &TxStore, entries: &EntryPool, graveyard: &TxGraveyard, @@ -356,14 +168,7 @@ impl Query { let replaces: Vec = graveyard .predecessors_of(txid) .filter_map(|(pred_txid, _)| { - self.build_rbf_node( - pred_txid, - Some(entry.first_seen), - mempool, - txs, - entries, - graveyard, - ) + self.build_rbf_node(pred_txid, Some(entry.first_seen), txs, entries, graveyard) }) .collect(); @@ -371,31 +176,17 @@ impl Query { let interval = successor_time .and_then(|st| st.checked_sub(entry.first_seen)) - .map(|d| usize::from(d) as u32); + .map(|d| *d); - let value = Sats::from(tx.output.iter().map(|o| u64::from(o.value)).sum::()); - let tx_index = self.resolve_tx_index(txid).ok(); - let mined = tx_index.map(|_| true); - let rate = if txs.contains(txid) { - mempool - .cpfp_info(&TxidPrefix::from(txid)) - .and_then(|info| info.effective_fee_per_vsize) - .unwrap_or_else(|| entry.fee_rate()) - } else if let Some(idx) = tx_index { - self.computer() - .transactions - .fees - .effective_fee_rate - .tx_index - .collect_one(idx) - .unwrap_or_else(|| entry.fee_rate()) - } else { - entry.fee_rate() - }; + let value: Sats = tx.output.iter().map(|o| o.value).sum(); + let mined = self.resolve_tx_index(txid).is_ok().then_some(true); + let rate = self + .effective_fee_rate(txid) + .unwrap_or_else(|_| entry.fee_rate()); Some(ReplacementNode { tx: RbfTx { - txid: txid.clone(), + txid: *txid, fee: entry.fee, vsize: entry.vsize, value, @@ -435,17 +226,10 @@ impl Query { Ok(graveyard .replaced_iter_recent_first() .filter_map(|(_, by)| { - let mut root = by.clone(); - while let Some(TxRemoval::Replaced { by: next }) = - graveyard.get(&root).map(TxTombstone::reason) - { - root = next.clone(); - } - seen.insert(root.clone()).then_some(root) - }) - .filter_map(|root| { - self.build_rbf_node(&root, None, mempool, &txs, &entries, &graveyard) + let root = Self::walk_to_replacement_root(&graveyard, *by); + seen.insert(root).then_some(root) }) + .filter_map(|root| self.build_rbf_node(&root, None, &txs, &entries, &graveyard)) .filter(|node| !full_rbf_only || node.full_rbf) .take(RECENT_REPLACEMENTS_LIMIT) .collect()) @@ -461,8 +245,7 @@ impl Query { .map(|txid| { entries .get(&TxidPrefix::from(txid)) - .map(|e| u64::from(e.first_seen)) - .unwrap_or(0) + .map_or(0, |e| u64::from(e.first_seen)) }) .collect()) } diff --git a/crates/brk_query/src/impl/mining/block_fee_rates.rs b/crates/brk_query/src/impl/mining/block_fee_rates.rs index a2b1364ec..6745d1678 100644 --- a/crates/brk_query/src/impl/mining/block_fee_rates.rs +++ b/crates/brk_query/src/impl/mining/block_fee_rates.rs @@ -5,8 +5,12 @@ use super::block_window::BlockWindow; use crate::Query; impl Query { + /// Time-bucketed fee-rate percentiles over `time_period`. One entry per + /// bucket, ordered chronologically. Each entry carries the bucket's + /// average height/timestamp and the seven percentile means + /// (`min, pct10, pct25, median, pct75, pct90, max`). pub fn block_fee_rates(&self, time_period: TimePeriod) -> Result> { - let bw = BlockWindow::new(self, time_period); + let bw = BlockWindow::new(self, time_period)?; let frd = &self .computer() .transactions @@ -15,13 +19,13 @@ impl Query { .distribution .block; - let min = bw.read(&frd.min.height); - let pct10 = bw.read(&frd.pct10.height); - let pct25 = bw.read(&frd.pct25.height); - let median = bw.read(&frd.median.height); - let pct75 = bw.read(&frd.pct75.height); - let pct90 = bw.read(&frd.pct90.height); - let max = bw.read(&frd.max.height); + let min = bw.read(&frd.min.height)?; + let pct10 = bw.read(&frd.pct10.height)?; + let pct25 = bw.read(&frd.pct25.height)?; + let median = bw.read(&frd.median.height)?; + let pct75 = bw.read(&frd.pct75.height)?; + let pct90 = bw.read(&frd.pct90.height)?; + let max = bw.read(&frd.max.height)?; Ok(bw .buckets diff --git a/crates/brk_query/src/impl/mining/block_fees.rs b/crates/brk_query/src/impl/mining/block_fees.rs index d90bd8eee..94fa3c577 100644 --- a/crates/brk_query/src/impl/mining/block_fees.rs +++ b/crates/brk_query/src/impl/mining/block_fees.rs @@ -5,10 +5,15 @@ use super::block_window::BlockWindow; use crate::Query; impl Query { + /// Time-bucketed average block fees over `time_period`. One entry per + /// bucket, ordered chronologically. Each entry carries the bucket's + /// average height/timestamp, the round-half-up mean of block fees in + /// sats, and the bucket-mean USD spot price (the spot price, not + /// fees-in-USD: clients multiply). pub fn block_fees(&self, time_period: TimePeriod) -> Result> { - let bw = BlockWindow::new(self, time_period); - let fees: Vec = bw.read(&self.computer().mining.rewards.fees.block.sats); - let prices: Vec = bw.read(&self.computer().prices.spot.cents.height); + let bw = BlockWindow::new(self, time_period)?; + let fees: Vec = bw.read(&self.computer().mining.rewards.fees.block.sats)?; + let prices: Vec = bw.read(&self.computer().prices.spot.cents.height)?; Ok(bw .buckets diff --git a/crates/brk_query/src/impl/mining/block_rewards.rs b/crates/brk_query/src/impl/mining/block_rewards.rs index 8374523f3..1da3526e8 100644 --- a/crates/brk_query/src/impl/mining/block_rewards.rs +++ b/crates/brk_query/src/impl/mining/block_rewards.rs @@ -5,10 +5,15 @@ use super::block_window::BlockWindow; use crate::Query; impl Query { + /// Time-bucketed average block rewards (subsidy + fees) over + /// `time_period`. One entry per bucket, ordered chronologically. Each + /// entry carries the bucket's average height/timestamp, the round-half-up + /// mean of coinbase rewards in sats, and the bucket-mean USD spot price + /// (the spot price, not rewards-in-USD: clients multiply). pub fn block_rewards(&self, time_period: TimePeriod) -> Result> { - let bw = BlockWindow::new(self, time_period); - let rewards: Vec = bw.read(&self.computer().mining.rewards.coinbase.block.sats); - let prices: Vec = bw.read(&self.computer().prices.spot.cents.height); + let bw = BlockWindow::new(self, time_period)?; + let rewards: Vec = bw.read(&self.computer().mining.rewards.coinbase.block.sats)?; + let prices: Vec = bw.read(&self.computer().prices.spot.cents.height)?; Ok(bw .buckets diff --git a/crates/brk_query/src/impl/mining/block_sizes.rs b/crates/brk_query/src/impl/mining/block_sizes.rs index 9fa64daf5..bd4ee3125 100644 --- a/crates/brk_query/src/impl/mining/block_sizes.rs +++ b/crates/brk_query/src/impl/mining/block_sizes.rs @@ -7,12 +7,18 @@ use super::block_window::BlockWindow; use crate::Query; impl Query { + /// Time-bucketed average block size and weight over `time_period`. Returns + /// two parallel vecs (one entry per bucket, ordered chronologically): byte + /// size in `sizes`, weight units in `weights`. Each entry carries the + /// bucket's average height/timestamp and the round-half-up mean of the + /// corresponding metric. Single bucket-pass: built via `.map(...).unzip()` + /// to avoid re-walking buckets. pub fn block_sizes_weights(&self, time_period: TimePeriod) -> Result { let blocks = &self.indexer().vecs.blocks; - let bw = BlockWindow::new(self, time_period); + let bw = BlockWindow::new(self, time_period)?; - let block_sizes: Vec = bw.read(&blocks.total); - let block_weights: Vec = bw.read(&blocks.weight); + let block_sizes: Vec = bw.read(&blocks.total)?; + let block_weights: Vec = bw.read(&blocks.weight)?; let (sizes, weights) = bw .buckets diff --git a/crates/brk_query/src/impl/mining/block_window.rs b/crates/brk_query/src/impl/mining/block_window.rs index c544e985f..015e8c561 100644 --- a/crates/brk_query/src/impl/mining/block_window.rs +++ b/crates/brk_query/src/impl/mining/block_window.rs @@ -4,13 +4,15 @@ use std::{ ops::{Deref, Div}, }; +use brk_error::{Error, Result}; use brk_types::{Height, TimePeriod, Timestamp}; use vecdb::{ReadableVec, VecValue}; use crate::Query; -/// Mempool.space's `GROUP BY UNIX_TIMESTAMP(blockTimestamp) DIV ${div}` divisor in seconds. -/// `div = 1` puts each block in its own bucket. +/// Time-bucket divisor in seconds: blocks are grouped by `timestamp / div`. +/// `div = 1` puts each block in its own bucket; coarser values down-sample +/// long windows so the response stays bounded. fn time_div(period: TimePeriod) -> u32 { match period { TimePeriod::Day | TimePeriod::ThreeDays => 1, @@ -39,7 +41,10 @@ pub struct BlockBucket { impl BlockBucket { /// Float arithmetic mean of `values[offset]` across this bucket's blocks. - /// Use for float-backed types like `FeeRate`. + /// Use for float-backed types like `FeeRate`. Soundness: `offsets.len() >= 1` + /// is guaranteed by `BlockWindow::new` (only non-empty groups become buckets), + /// and indexing `values[i]` is in range when `values` was obtained via + /// `BlockWindow::read` (which validates `values.len() >= window.len`). pub fn mean(&self, values: &[T]) -> T where T: Copy + Sum + Div, @@ -47,8 +52,11 @@ impl BlockBucket { self.offsets.iter().map(|&i| values[i]).sum::() / self.offsets.len() } - /// Round-half-up arithmetic mean for u64-backed integer types, matching - /// mempool.space's `CAST(AVG(...) AS INT)`. + /// Round-half-up arithmetic mean for u64-backed integer types: returns + /// `T::from((sum + n/2) / n)`. Use when truncating integer division would + /// bias rolling averages downward. Soundness: `offsets.len() >= 1` is + /// guaranteed by `BlockWindow::new`, and `values[i]` is in range when + /// `values` was obtained via `BlockWindow::read`. pub fn mean_rounded(&self, values: &[T]) -> T where T: Copy + Deref + From, @@ -65,11 +73,22 @@ pub struct BlockWindow { pub start: Height, pub end: Height, pub buckets: Vec, + /// Number of blocks observed in `[start, end)` at construction. Equals + /// `timestamps.len()` after the prefetch; may be less than `end - start` + /// when the timestamp vec lags under per-vec stamp race. Every value vec + /// passed to `read` must yield at least this many elements. + pub len: usize, } impl BlockWindow { - pub fn new(query: &Query, period: TimePeriod) -> Self { - let start = query.start_height(period); + /// Build a time-bucketed window over `[start_height(period), tip + 1)`. + /// Prefetches `blocks.timestamp` once, groups block indices by + /// `ts / div(period)` (chronological), and stores per-bucket offsets + /// into the prefetched slice. Downstream metric reads (`BlockWindow::read`) + /// reuse the same `[start, end)` so each bucket's offsets index directly + /// into the value vec without a second walk. + pub fn new(query: &Query, period: TimePeriod) -> Result { + let start = query.start_height(period)?; let end = query.height() + 1usize; let div = time_div(period); @@ -85,6 +104,8 @@ impl BlockWindow { groups.entry(**ts / div).or_default().push(i); } + let len = timestamps.len(); + let buckets = groups .into_values() .map(|offsets| { @@ -99,19 +120,29 @@ impl BlockWindow { }) .collect(); - Self { + Ok(Self { start, end, buckets, - } + len, + }) } /// Read a height-keyed vec over this window's `[start, end)` range. - pub fn read(&self, vec: &V) -> Vec + /// Errors if the vec returns fewer elements than the window observed at + /// construction (per-vec stamp lag): bucket offsets reach up to `len - 1` + /// and would otherwise panic in `BlockBucket::mean(&values)`. + pub fn read(&self, vec: &V) -> Result> where V: ReadableVec, T: VecValue, { - vec.collect_range(self.start, self.end) + let values = vec.collect_range(self.start, self.end); + if values.len() < self.len { + return Err(Error::Internal( + "BlockWindow::read: value vec shorter than window (per-vec stamp lag)", + )); + } + Ok(values) } } diff --git a/crates/brk_query/src/impl/mining/difficulty.rs b/crates/brk_query/src/impl/mining/difficulty.rs index 30d81a6fd..40b667efd 100644 --- a/crates/brk_query/src/impl/mining/difficulty.rs +++ b/crates/brk_query/src/impl/mining/difficulty.rs @@ -13,13 +13,18 @@ const BLOCKS_PER_EPOCH: u32 = 2016; const TARGET_BLOCK_TIME: u64 = 600; impl Query { + /// Live difficulty-adjustment snapshot for the current epoch. Bundles + /// progress through the 2016-block window, the projected next-retarget + /// percentage from observed pace, an estimated wall-clock retarget time, + /// remaining blocks/time, the previous retarget percentage (current epoch + /// vs previous epoch first-block difficulty), and the time offset from a + /// 600s/block schedule. Output time fields are in milliseconds. pub fn difficulty_adjustment(&self) -> Result { let indexer = self.indexer(); let computer = self.computer(); let current_height = self.height(); let current_height_u32: u32 = current_height.into(); - // Get current epoch let current_epoch = computer .indexes .height @@ -28,7 +33,6 @@ impl Query { .data()?; let current_epoch_usize: usize = current_epoch.into(); - // Get epoch start height let epoch_start_height = computer .indexes .epoch @@ -37,13 +41,11 @@ impl Query { .data()?; let epoch_start_u32: u32 = epoch_start_height.into(); - // Calculate epoch progress let next_retarget_height = epoch_start_u32 + BLOCKS_PER_EPOCH; let blocks_into_epoch = current_height_u32 - epoch_start_u32; let remaining_blocks = next_retarget_height - current_height_u32; let progress_percent = (blocks_into_epoch as f64 / BLOCKS_PER_EPOCH as f64) * 100.0; - // Get timestamps using difficulty_to_timestamp for epoch start let epoch_start_timestamp = computer .indexes .timestamp @@ -57,8 +59,11 @@ impl Query { .collect_one(current_height) .data()?; - // Calculate average block time in current epoch - let elapsed_time = (*current_timestamp - *epoch_start_timestamp) as u64; + // Bitcoin block timestamps can step backward within MTP rules, so + // saturate the subtraction to avoid u32 underflow on a backwards-going + // first block of an epoch. + let elapsed_time = + u64::from((*current_timestamp).saturating_sub(*epoch_start_timestamp)); let time_avg = if blocks_into_epoch > 0 { elapsed_time / blocks_into_epoch as u64 } else { @@ -66,7 +71,8 @@ impl Query { }; // Per-block time needed over remaining blocks to land the epoch at - // 2016 * TARGET_BLOCK_TIME. Matches mempool.space's adjustedTimeAvg. + // BLOCKS_PER_EPOCH * TARGET_BLOCK_TIME (the convergence path that + // client UIs render as adjustedTimeAvg). let target_total = BLOCKS_PER_EPOCH as u64 * TARGET_BLOCK_TIME; let adjusted_time_avg = if remaining_blocks > 0 { target_total.saturating_sub(elapsed_time) / remaining_blocks as u64 @@ -74,15 +80,13 @@ impl Query { TARGET_BLOCK_TIME }; - // Estimate remaining time and retarget date let remaining_time = remaining_blocks as u64 * adjusted_time_avg; let now = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_secs()) - .unwrap_or(*current_timestamp as u64); + .unwrap_or(u64::from(*current_timestamp)); let estimated_retarget_date = now + remaining_time; - // Calculate expected vs actual time for difficulty change estimate let expected_time = blocks_into_epoch as u64 * TARGET_BLOCK_TIME; let difficulty_change = if elapsed_time > 0 && blocks_into_epoch > 0 { ((expected_time as f64 / elapsed_time as f64) - 1.0) * 100.0 @@ -90,10 +94,8 @@ impl Query { 0.0 }; - // Time offset from expected schedule let time_offset = expected_time as i64 - elapsed_time as i64; - // Calculate previous retarget using stored difficulty values let (previous_retarget, previous_time) = if current_epoch_usize > 0 { let prev_epoch = Epoch::from(current_epoch_usize - 1); let prev_epoch_start = computer @@ -127,7 +129,6 @@ impl Query { (0.0, epoch_start_timestamp) }; - // Expected blocks based on wall clock time since epoch start let expected_blocks = elapsed_time as f64 / TARGET_BLOCK_TIME as f64; Ok(DifficultyAdjustment { diff --git a/crates/brk_query/src/impl/mining/difficulty_adjustments.rs b/crates/brk_query/src/impl/mining/difficulty_adjustments.rs index 76eee4e49..27d5e5251 100644 --- a/crates/brk_query/src/impl/mining/difficulty_adjustments.rs +++ b/crates/brk_query/src/impl/mining/difficulty_adjustments.rs @@ -6,21 +6,23 @@ use super::epochs::iter_difficulty_epochs; use crate::Query; impl Query { + /// All difficulty adjustments (one entry per retarget) whose first block + /// lies within `time_period`, in reverse chronological order (newest + /// first). `None` walks every epoch from genesis. The window cutoff is + /// wall-clock (via `start_height`) rather than block-count, so the + /// returned set is "epochs whose first block lies within the period", + /// not "the last N epochs". pub fn difficulty_adjustments( &self, time_period: Option, ) -> Result> { let end = self.height().to_usize(); - // Match mempool.space's wall-clock `time > NOW() - INTERVAL ${period}` cutoff - // by walking back through real block timestamps, not estimating via block count. let start = match time_period { - Some(tp) => self.start_height(tp).to_usize(), + Some(tp) => self.start_height(tp)?.to_usize(), None => 0, }; - let mut entries = iter_difficulty_epochs(self.computer(), start, end); - - // Return in reverse chronological order (newest first) + let mut entries = iter_difficulty_epochs(self.computer(), start, end)?; entries.reverse(); Ok(entries) } diff --git a/crates/brk_query/src/impl/mining/epochs.rs b/crates/brk_query/src/impl/mining/epochs.rs index 4d61a43a2..c9f0e2716 100644 --- a/crates/brk_query/src/impl/mining/epochs.rs +++ b/crates/brk_query/src/impl/mining/epochs.rs @@ -1,25 +1,38 @@ use brk_computer::Computer; +use brk_error::{Error, Result}; use brk_types::{DifficultyAdjustmentEntry, Height}; use vecdb::{ReadableVec, Ro, VecIndex}; -/// Iterate over difficulty epochs within a height range. -pub fn iter_difficulty_epochs( +/// Walk every difficulty epoch overlapping `[start_height, end_height]` and +/// return one `DifficultyAdjustmentEntry` per retarget whose first block +/// lies inside the window. Each entry carries the epoch's first-block +/// timestamp/height, the epoch's difficulty, and the new/previous difficulty +/// ratio (e.g. 1.068 = +6.8%, matching the field's contract). Epochs whose +/// first block falls before `start_height` are skipped but their difficulty +/// is still read so the next in-window entry can compute its ratio. Returns +/// `Error::Internal` on any missing cursor read so corrupt zero-valued +/// entries cannot slip into the output under per-vec stamp lag. +pub(super) fn iter_difficulty_epochs( computer: &Computer, start_height: usize, end_height: usize, -) -> Vec { +) -> Result> { let start_epoch = computer .indexes .height .epoch .collect_one(Height::from(start_height)) - .unwrap_or_default(); + .ok_or(Error::Internal( + "iter_difficulty_epochs: start_height not in epoch index", + ))?; let end_epoch = computer .indexes .height .epoch .collect_one(Height::from(end_height)) - .unwrap_or_default(); + .ok_or(Error::Internal( + "iter_difficulty_epochs: end_height not in epoch index", + ))?; let mut height_cursor = computer.indexes.epoch.first_height.cursor(); let mut timestamp_cursor = computer.indexes.timestamp.epoch.cursor(); @@ -29,16 +42,25 @@ pub fn iter_difficulty_epochs( let mut prev_difficulty: Option = None; for epoch_usize in start_epoch.to_usize()..=end_epoch.to_usize() { - let epoch_height = height_cursor.get(epoch_usize).unwrap_or_default(); + let epoch_height = height_cursor.get(epoch_usize).ok_or(Error::Internal( + "iter_difficulty_epochs: missing epoch first_height", + ))?; - // Skip epochs before our start height but track difficulty + // Epochs that start before the window are skipped; we still record + // their difficulty so the next in-window entry can compute its ratio. if epoch_height.to_usize() < start_height { - prev_difficulty = difficulty_cursor.get(epoch_usize).map(|d| *d); + prev_difficulty = Some(*difficulty_cursor.get(epoch_usize).ok_or( + Error::Internal("iter_difficulty_epochs: missing pre-window epoch difficulty"), + )?); continue; } - let epoch_timestamp = timestamp_cursor.get(epoch_usize).unwrap_or_default(); - let epoch_difficulty = *difficulty_cursor.get(epoch_usize).unwrap_or_default(); + let epoch_timestamp = timestamp_cursor.get(epoch_usize).ok_or(Error::Internal( + "iter_difficulty_epochs: missing epoch timestamp", + ))?; + let epoch_difficulty = *difficulty_cursor.get(epoch_usize).ok_or(Error::Internal( + "iter_difficulty_epochs: missing epoch difficulty", + ))?; let change_percent = match prev_difficulty { Some(prev) if prev > 0.0 => epoch_difficulty / prev, @@ -55,5 +77,5 @@ pub fn iter_difficulty_epochs( prev_difficulty = Some(epoch_difficulty); } - results + Ok(results) } diff --git a/crates/brk_query/src/impl/mining/hashrate.rs b/crates/brk_query/src/impl/mining/hashrate.rs index 4ac11aaa1..8c0937ccd 100644 --- a/crates/brk_query/src/impl/mining/hashrate.rs +++ b/crates/brk_query/src/impl/mining/hashrate.rs @@ -6,12 +6,39 @@ use super::epochs::iter_difficulty_epochs; use crate::Query; impl Query { - pub fn hashrate(&self, time_period: Option) -> Result { + /// Network 1-day hashrate at the day containing `height`. Errors on + /// stamp lag in the day1 index or in the daily-hashrate vec, so a + /// transient dropout surfaces instead of silently reporting zero. + pub(super) fn hashrate_at(&self, height: Height) -> Result { + let computer = self.computer(); + let day = computer.indexes.height.day1.collect_one(height).data()?; + Ok(*computer + .mining + .hashrate + .rate + .base + .day1 + .collect_one_flat(day) + .data()? as u128) + } + + /// Network hashrate summary for `time_period` (`None` walks the full + /// chain). Bundles a downsampled daily hashrate series (at most + /// `max_points` samples; sampling step is `total_days / max_points`, + /// floored at 1), every difficulty retarget within the window, the + /// current 1-day hashrate, and the current block's difficulty. The + /// window cutoff is wall-clock (via `start_height`), matching + /// `difficulty_adjustments` so the two endpoints agree on the same + /// `time_period`. + pub fn hashrate( + &self, + time_period: Option, + max_points: usize, + ) -> Result { let indexer = self.indexer(); let computer = self.computer(); let current_height = self.height(); - // Get current difficulty let current_difficulty = *indexer .vecs .blocks @@ -19,7 +46,7 @@ impl Query { .collect_one(current_height) .data()?; - // Get current hashrate + let current_hashrate = self.hashrate_at(current_height)?; let current_day1 = computer .indexes .height @@ -27,23 +54,12 @@ impl Query { .collect_one(current_height) .data()?; - let current_hashrate = *computer - .mining - .hashrate - .rate - .base - .day1 - .collect_one_flat(current_day1) - .unwrap_or_default() as u128; - - // Calculate start height based on time period let end = current_height.to_usize(); let start = match time_period { - Some(tp) => end.saturating_sub(tp.block_count()), + Some(tp) => self.start_height(tp)?.to_usize(), None => 0, }; - // Get hashrate entries using iterators for efficiency let start_day1 = computer .indexes .height @@ -52,9 +68,10 @@ impl Query { .data()?; let end_day1 = current_day1; - // Sample at regular intervals to avoid too many data points + // Sample at regular intervals so the chart payload stays bounded + // regardless of window size. let total_days = end_day1.to_usize().saturating_sub(start_day1.to_usize()) + 1; - let step = (total_days / 200).max(1); // Max ~200 data points + let step = (total_days / max_points.max(1)).max(1); let mut hr_cursor = computer.mining.hashrate.rate.base.day1.cursor(); let mut ts_cursor = computer.indexes.timestamp.day1.cursor(); @@ -71,8 +88,7 @@ impl Query { di += step; } - // Get difficulty adjustments within the period - let difficulty: Vec = iter_difficulty_epochs(computer, start, end) + let difficulty: Vec = iter_difficulty_epochs(computer, start, end)? .into_iter() .map(|e| DifficultyEntry { time: e.timestamp, diff --git a/crates/brk_query/src/impl/mining/period_start.rs b/crates/brk_query/src/impl/mining/period_start.rs index c3ce1bb78..251827669 100644 --- a/crates/brk_query/src/impl/mining/period_start.rs +++ b/crates/brk_query/src/impl/mining/period_start.rs @@ -1,14 +1,29 @@ +use brk_error::{OptionData, Result}; use brk_types::{Height, TimePeriod}; +use vecdb::ReadableVec; use crate::Query; impl Query { - /// First block height inside `period` looking back from the tip; genesis (0) for `All`. - pub(super) fn start_height(&self, period: TimePeriod) -> Height { - self.computer() - .blocks - .lookback - .start_height(period, self.height()) - .unwrap_or_default() + /// First block height inside `period` looking back from the tip; + /// genesis (`Height(0)`) for `All`. Errors `Internal` if the chosen + /// lookback vec is stamped short of the tip - separating the + /// "all-time" case from a transient stamp-lag dropout that would + /// otherwise silently widen a windowed query to the full chain. + pub(super) fn start_height(&self, period: TimePeriod) -> Result { + let lookback = &self.computer().blocks.lookback; + let tip = self.height(); + Ok(match period { + TimePeriod::Day => lookback._24h.collect_one(tip).data()?, + TimePeriod::ThreeDays => lookback._3d.collect_one(tip).data()?, + TimePeriod::Week => lookback._1w.collect_one(tip).data()?, + TimePeriod::Month => lookback._1m.collect_one(tip).data()?, + TimePeriod::ThreeMonths => lookback._3m.collect_one(tip).data()?, + TimePeriod::SixMonths => lookback._6m.collect_one(tip).data()?, + TimePeriod::Year => lookback._1y.collect_one(tip).data()?, + TimePeriod::TwoYears => lookback._2y.collect_one(tip).data()?, + TimePeriod::ThreeYears => lookback._3y.collect_one(tip).data()?, + TimePeriod::All => Height::from(0_usize), + }) } } diff --git a/crates/brk_query/src/impl/mining/pools.rs b/crates/brk_query/src/impl/mining/pools.rs index 695f2d560..9d4a462c4 100644 --- a/crates/brk_query/src/impl/mining/pools.rs +++ b/crates/brk_query/src/impl/mining/pools.rs @@ -1,6 +1,6 @@ -use std::cmp::Reverse; +use std::{borrow::Cow, cmp::Reverse}; -use brk_error::{Error, Result}; +use brk_error::{Error, OptionData, Result}; use brk_types::{ BlockInfoV1, Day1, Height, Pool, PoolBlockCounts, PoolBlockShares, PoolDetail, PoolDetailInfo, PoolHashrateEntry, PoolInfo, PoolSlug, PoolStats, PoolsSummary, StoredF64, StoredU64, @@ -10,9 +10,9 @@ use vecdb::{AnyVec, ReadableVec, VecIndex}; use crate::Query; -/// 7-day lookback for share computation (matching mempool.space) +/// 7-day lookback for share computation. const LOOKBACK_DAYS: usize = 7; -/// Weekly sample interval (matching mempool.space's 604800s interval) +/// Weekly sample interval (~604800s). const SAMPLE_WEEKLY: usize = 7; /// Pre-read shared data for hashrate computation. @@ -24,11 +24,18 @@ struct HashrateSharedData { } impl Query { + /// Mining-pool leaderboard for `time_period`. For each pool, computes + /// block count over the window via `cumulative(end) - cumulative(start - 1)` + /// (tip-cumulative minus pre-window-cumulative), sorts pools by count + /// descending, assigns ranks, and emits the per-pool share. Also bundles + /// current / 3d / 1w network hashrate snapshots. Returns zeros early + /// when no blocks have been indexed. The window start uses the + /// timestamp-based lookback vecs (`_24h`, `_3d`, ...) rather than + /// block-count math; `TimePeriod::All` walks from genesis. pub fn mining_pools(&self, time_period: TimePeriod) -> Result { let computer = self.computer(); let current_height = self.height(); - // No blocks indexed yet if computer.pools.pool.len() == 0 { return Ok(PoolsSummary { pools: vec![], @@ -39,27 +46,13 @@ impl Query { }); } - // Use timestamp-based lookback for accurate time boundaries + let start = self.start_height(time_period)?.to_usize(); let lookback = &computer.blocks.lookback; - let start = match time_period { - TimePeriod::Day => lookback._24h.collect_one(current_height), - TimePeriod::ThreeDays => lookback._3d.collect_one(current_height), - TimePeriod::Week => lookback._1w.collect_one(current_height), - TimePeriod::Month => lookback._1m.collect_one(current_height), - TimePeriod::ThreeMonths => lookback._3m.collect_one(current_height), - TimePeriod::SixMonths => lookback._6m.collect_one(current_height), - TimePeriod::Year => lookback._1y.collect_one(current_height), - TimePeriod::TwoYears => lookback._2y.collect_one(current_height), - TimePeriod::ThreeYears => lookback._3y.collect_one(current_height), - TimePeriod::All => None, - } - .unwrap_or_default() - .to_usize(); let pools = pools(); let mut pool_data: Vec<(&'static Pool, u64)> = Vec::new(); - // For each pool, get cumulative count at end and start, subtract to get range count + // Range count = cumulative(end) - cumulative(start - 1). for (pool_id, cumulative) in computer .pools .major @@ -73,14 +66,12 @@ impl Query { .map(|(id, v)| (id, &v.blocks_mined.cumulative.height)), ) { - let count_at_end: u64 = *cumulative.collect_one(current_height).unwrap_or_default(); + let count_at_end: u64 = *cumulative.collect_one(current_height).data()?; let count_at_start: u64 = if start == 0 { 0 } else { - *cumulative - .collect_one(Height::from(start - 1)) - .unwrap_or_default() + *cumulative.collect_one(Height::from(start - 1)).data()? }; let block_count = count_at_end.saturating_sub(count_at_start); @@ -90,12 +81,10 @@ impl Query { } } - // Sort by block count descending pool_data.sort_by_key(|p| Reverse(p.1)); let total_blocks: u64 = pool_data.iter().map(|(_, count)| count).sum(); - // Build stats with ranks let pool_stats: Vec = pool_data .into_iter() .enumerate() @@ -109,31 +98,11 @@ impl Query { }) .collect(); - let hashrate_at = |height: Height| -> u128 { - let day = computer - .indexes - .height - .day1 - .collect_one(height) - .unwrap_or_default(); - computer - .mining - .hashrate - .rate - .base - .day1 - .collect_one(day) - .flatten() - .map(|v| *v as u128) - .unwrap_or(0) - }; - - let lookback = &computer.blocks.lookback; - let last_estimated_hashrate = hashrate_at(current_height); + let last_estimated_hashrate = self.hashrate_at(current_height)?; let last_estimated_hashrate3d = - hashrate_at(lookback._3d.collect_one(current_height).unwrap_or_default()); + self.hashrate_at(lookback._3d.collect_one(current_height).data()?)?; let last_estimated_hashrate1w = - hashrate_at(lookback._1w.collect_one(current_height).unwrap_or_default()); + self.hashrate_at(lookback._1w.collect_one(current_height).data()?)?; Ok(PoolsSummary { pools: pool_stats, @@ -144,10 +113,18 @@ impl Query { }) } + /// All supported pools as `PoolInfo`. Static list, no indexer reads, can't fail. pub fn all_pools(&self) -> Vec { pools().iter().map(PoolInfo::from).collect() } + /// Per-pool detail: lifetime block count plus 24h and 1w windowed counts, + /// each as a share of network blocks in the same window. The 24h share is + /// also used to weight the current 1-day network hashrate into a per-pool + /// `estimated_hashrate`. `total_reward` is `Some` only for major pools + /// (minor pools don't track per-pool reward sums); under stamp lag on a + /// major pool's reward vec this errors rather than silently reporting + /// `None`. pub fn pool_detail(&self, slug: PoolSlug) -> Result { let computer = self.computer(); let current_height = self.height(); @@ -156,7 +133,6 @@ impl Query { let pools_list = pools(); let pool = pools_list.get(slug); - // Get cumulative blocks for this pool (works for both major and minor) let cumulative = computer .pools .major @@ -169,42 +145,31 @@ impl Query { .get(&slug) .map(|v| &v.blocks_mined.cumulative.height) }) - .ok_or_else(|| Error::NotFound("Pool data not found".into()))?; + .ok_or_else(|| { + Error::Internal( + "pool slug present in static list but missing from major/minor maps", + ) + })?; - // Get total blocks (all time) - let total_all: u64 = *cumulative.collect_one(current_height).unwrap_or_default(); + let total_all: u64 = *cumulative.collect_one(current_height).data()?; - // Use timestamp-based lookback for accurate time boundaries let lookback = &computer.blocks.lookback; - let start_24h = lookback - ._24h - .collect_one(current_height) - .unwrap_or_default() - .to_usize(); + let start_24h = lookback._24h.collect_one(current_height).data()?.to_usize(); let count_before_24h: u64 = if start_24h == 0 { 0 } else { - *cumulative - .collect_one(Height::from(start_24h - 1)) - .unwrap_or_default() + *cumulative.collect_one(Height::from(start_24h - 1)).data()? }; let total_24h = total_all.saturating_sub(count_before_24h); - let start_1w = lookback - ._1w - .collect_one(current_height) - .unwrap_or_default() - .to_usize(); + let start_1w = lookback._1w.collect_one(current_height).data()?.to_usize(); let count_before_1w: u64 = if start_1w == 0 { 0 } else { - *cumulative - .collect_one(Height::from(start_1w - 1)) - .unwrap_or_default() + *cumulative.collect_one(Height::from(start_1w - 1)).data()? }; let total_1w = total_all.saturating_sub(count_before_1w); - // Calculate total network blocks for share calculation let network_blocks_all = (end + 1) as u64; let network_blocks_24h = (end - start_24h + 1) as u64; let network_blocks_1w = (end - start_1w + 1) as u64; @@ -225,6 +190,15 @@ impl Query { 0.0 }; + let network_hr = self.hashrate_at(current_height)?; + let estimated_hashrate = (share_24h * network_hr as f64) as u128; + + let total_reward = if let Some(major) = computer.pools.major.get(&slug) { + Some(major.rewards.cumulative.sats.height.collect_one(current_height).data()?) + } else { + None + }; + Ok(PoolDetail { pool: PoolDetailInfo::from(pool), block_count: PoolBlockCounts { @@ -237,45 +211,28 @@ impl Query { day: share_24h, week: share_1w, }, - estimated_hashrate: { - let day = computer - .indexes - .height - .day1 - .collect_one(current_height) - .unwrap_or_default(); - let network_hr = computer - .mining - .hashrate - .rate - .base - .day1 - .collect_one(day) - .flatten() - .map(|v| *v as u128) - .unwrap_or(0); - (share_24h * network_hr as f64) as u128 - }, + estimated_hashrate, reported_hashrate: None, - total_reward: computer - .pools - .major - .get(&slug) - .and_then(|v| v.rewards.cumulative.sats.height.collect_one(current_height)), + total_reward, }) } + /// Page of blocks mined by `slug`, in descending height order, capped at + /// `limit`. `before_height` is the inclusive upper bound to paginate from + /// (defaults to tip). Returns an empty `Vec` if the pool has no recorded + /// blocks. Heights come from a sorted-ascending per-pool index, so the + /// page is computed via `partition_point` then reversed; consecutive + /// runs are merged into a single bulk read of `blocks_v1_range`. pub fn pool_blocks( &self, slug: PoolSlug, - start_height: Option, + before_height: Option, + limit: usize, ) -> Result> { let computer = self.computer(); - let max_height = self.height().to_usize(); - let start = start_height.map(|h| h.to_usize()).unwrap_or(max_height); - let end = start.min(computer.pools.pool.len().saturating_sub(1)); - - const POOL_BLOCKS_LIMIT: usize = 100; + let tip = self.height().to_usize(); + let upper = before_height.map(|h| h.to_usize()).unwrap_or(tip); + let end = upper.min(computer.pools.pool.len().saturating_sub(1)); let heights: Vec = computer .pools @@ -284,7 +241,7 @@ impl Query { .get(&slug) .map(|pool_heights| { let pos = pool_heights.partition_point(|h| h.to_usize() <= end); - let start = pos.saturating_sub(POOL_BLOCKS_LIMIT); + let start = pos.saturating_sub(limit); pool_heights[start..pos] .iter() .rev() @@ -293,7 +250,7 @@ impl Query { }) .unwrap_or_default(); - // Group consecutive descending heights into ranges for batch reads + // Group consecutive descending heights into ranges for batch reads. let mut blocks = Vec::with_capacity(heights.len()); let mut i = 0; while i < heights.len() { @@ -301,50 +258,42 @@ impl Query { while i + 1 < heights.len() && heights[i + 1] + 1 == heights[i] { i += 1; } - if let Ok(mut v) = self.blocks_v1_range(heights[i], hi + 1) { - blocks.append(&mut v); - } + let mut v = self.blocks_v1_range(heights[i], hi + 1)?; + blocks.append(&mut v); i += 1; } Ok(blocks) } + /// Weekly-sampled hashrate series for a single pool over the full chain. + /// Each point's hashrate is `network_hashrate(day) * pool_share_over_7d`, + /// where the share is the pool's last-7-days block count divided by the + /// network's last-7-days block count. pub fn pool_hashrate(&self, slug: PoolSlug) -> Result> { - let pool_name = pools().get(slug).name.to_string(); + let pool_name = pools().get(slug).name; let shared = self.hashrate_shared_data(0)?; let pool_cum = self.pool_daily_cumulative(slug, shared.start_day, shared.end_day)?; Ok(Self::compute_hashrate_entries( &shared, &pool_cum, - &pool_name, + pool_name, SAMPLE_WEEKLY, )) } + /// Multi-pool weekly-sampled hashrate series over `time_period`. Walks + /// the full chain when `time_period` is `None` or `Some(TimePeriod::All)`. + /// For each known pool, emits one entry per weekly sample where the + /// hashrate is `network_hashrate(day) * pool_share_over_7d`, tagged with + /// `pool_name`. Entries from all pools are concatenated; the chart layer + /// groups by pool name. pub fn pools_hashrate( &self, time_period: Option, ) -> Result> { let start_height = match time_period { - Some(tp) => { - let lookback = &self.computer().blocks.lookback; - let current_height = self.height(); - match tp { - TimePeriod::Day => lookback._24h.collect_one(current_height), - TimePeriod::ThreeDays => lookback._3d.collect_one(current_height), - TimePeriod::Week => lookback._1w.collect_one(current_height), - TimePeriod::Month => lookback._1m.collect_one(current_height), - TimePeriod::ThreeMonths => lookback._3m.collect_one(current_height), - TimePeriod::SixMonths => lookback._6m.collect_one(current_height), - TimePeriod::Year => lookback._1y.collect_one(current_height), - TimePeriod::TwoYears => lookback._2y.collect_one(current_height), - TimePeriod::ThreeYears => lookback._3y.collect_one(current_height), - TimePeriod::All => None, - } - .unwrap_or_default() - .to_usize() - } + Some(tp) => self.start_height(tp)?.to_usize(), None => 0, }; @@ -353,11 +302,8 @@ impl Query { let mut entries = Vec::new(); for pool in pools_list.iter() { - let Ok(pool_cum) = - self.pool_daily_cumulative(pool.slug, shared.start_day, shared.end_day) - else { - continue; - }; + let pool_cum = + self.pool_daily_cumulative(pool.slug, shared.start_day, shared.end_day)?; entries.extend(Self::compute_hashrate_entries( &shared, &pool_cum, @@ -369,7 +315,11 @@ impl Query { Ok(entries) } - /// Shared data needed for hashrate computation (read once, reuse across pools). + /// Pre-loads the network-wide day1 series (network hashrate, per-day + /// first heights) over `[start_day, end_day)`, where `start_day` is the + /// day index of `start_height` and `end_day` is the day index of the + /// current tip plus one (exclusive). Reused across pools so the network + /// series is read only once per request. fn hashrate_shared_data(&self, start_height: usize) -> Result { let computer = self.computer(); let current_height = self.height(); @@ -378,14 +328,14 @@ impl Query { .height .day1 .collect_one_at(start_height) - .unwrap_or_default() + .data()? .to_usize(); let end_day = computer .indexes .height .day1 .collect_one(current_height) - .unwrap_or_default() + .data()? .to_usize() + 1; let daily_hashrate = computer @@ -409,7 +359,13 @@ impl Query { }) } - /// Read daily cumulative blocks mined for a pool. + /// Reads the pool's daily-cumulative blocks-mined vec over the half-open + /// day range `[start_day, end_day)`. Major pools nest under `.base` + /// (additional derived computations), minor pools don't, so the slug is + /// looked up in both maps. Errors `Internal` if the slug is in neither + /// map: this can only fire on a static-pool-list / indexer-map mismatch + /// since both callers guarantee the slug is in the static list, so the + /// route layer never reaches a user-driven not-found path here. fn pool_daily_cumulative( &self, slug: PoolSlug, @@ -436,18 +392,38 @@ impl Query { .collect_range_at(start_day, end_day) }) }) - .ok_or_else(|| Error::NotFound("Pool not found".into())) + .ok_or_else(|| { + Error::Internal( + "pool slug present in static list but missing from major/minor maps", + ) + }) } - /// Compute hashrate entries from daily cumulative blocks + shared data. - /// Uses 7-day windowed share: pool_blocks_in_week / total_blocks_in_week. + /// Per-pool hashrate-share entries from pre-loaded daily cumulative blocks + /// plus the shared network series. Walks samples from `LOOKBACK_DAYS` + /// onward in `sample_days` strides; for each sample emits one entry with + /// pool_blocks = pool_cum[i] - pool_cum[i - LOOKBACK_DAYS] + /// total_blocks = first_heights[i] - first_heights[i - LOOKBACK_DAYS] + /// share = pool_blocks / total_blocks + /// avg_hashrate = daily_hashrate[i] * share + /// Skips samples where either cumulative value is `None`, where + /// `pool_blocks == 0`, where `total_blocks == 0`, or where the network + /// hashrate for that day is unavailable. The iteration is bounded by + /// the shortest of `pool_cum`, `shared.first_heights`, and + /// `shared.daily_hashrate` so per-vec stamp-lag truncation from + /// `collect_range_at` degrades the chart's tail rather than panicking + /// on out-of-bounds indexing. `LOOKBACK_DAYS` (rolling window) and + /// `sample_days` (point spacing) are independent. fn compute_hashrate_entries( shared: &HashrateSharedData, pool_cum: &[Option], - pool_name: &str, + pool_name: &'static str, sample_days: usize, ) -> Vec { - let total = pool_cum.len(); + let total = pool_cum + .len() + .min(shared.first_heights.len()) + .min(shared.daily_hashrate.len()); if total <= LOOKBACK_DAYS { return vec![]; } @@ -472,7 +448,7 @@ impl Query { timestamp: day.to_timestamp(), avg_hashrate: (network_hr * share) as u128, share, - pool_name: pool_name.to_string(), + pool_name: Cow::Borrowed(pool_name), }); } } diff --git a/crates/brk_query/src/impl/mining/reward_stats.rs b/crates/brk_query/src/impl/mining/reward_stats.rs index 8175358e6..de08ae34a 100644 --- a/crates/brk_query/src/impl/mining/reward_stats.rs +++ b/crates/brk_query/src/impl/mining/reward_stats.rs @@ -1,11 +1,20 @@ -use brk_error::Result; +use brk_error::{Error, Result}; use brk_types::{Height, RewardStats, Sats}; -use vecdb::{ReadableVec, VecIndex}; +use vecdb::{AnyVec, ReadableVec, VecIndex}; use crate::Query; impl Query { + /// Sums coinbase rewards, fees, and tx counts over the last `block_count` + /// blocks ending at the current tip. Errors `OutOfRange` if `block_count` + /// is zero, and `Internal` if any of the three per-block vecs (coinbase, + /// fees, tx count) is stamped short of the tip - silent truncation by + /// `fold_range_at` would otherwise produce a quietly low total. pub fn reward_stats(&self, block_count: usize) -> Result { + if block_count == 0 { + return Err(Error::OutOfRange("block_count must be >= 1".into())); + } + let computer = self.computer(); let current_height = self.height(); @@ -19,6 +28,12 @@ impl Query { let start = start_block.to_usize(); let end = end_block.to_usize() + 1; + if coinbase_vec.len() < end || fee_vec.len() < end || tx_count_vec.len() < end { + return Err(Error::Internal( + "reward stats vecs lag the tip; retry once indexing catches up", + )); + } + let total_reward = coinbase_vec.fold_range_at(start, end, Sats::ZERO, |acc, v| acc + v); let total_fee = fee_vec.fold_range_at(start, end, Sats::ZERO, |acc, v| acc + v); let total_tx = tx_count_vec.fold_range_at(start, end, 0u64, |acc, v| acc + *v); diff --git a/crates/brk_query/src/impl/mod.rs b/crates/brk_query/src/impl/mod.rs index ad085a7bd..338121348 100644 --- a/crates/brk_query/src/impl/mod.rs +++ b/crates/brk_query/src/impl/mod.rs @@ -1,5 +1,6 @@ mod addr; mod block; +mod cpfp; mod mempool; mod mining; mod price; @@ -7,5 +8,4 @@ mod series; mod tx; mod urpd; -pub use block::BLOCK_TXS_PAGE_SIZE; pub use series::ResolvedQuery; diff --git a/crates/brk_query/src/impl/series.rs b/crates/brk_query/src/impl/series.rs index f25e1b535..5ebe87167 100644 --- a/crates/brk_query/src/impl/series.rs +++ b/crates/brk_query/src/impl/series.rs @@ -39,18 +39,19 @@ impl Query { .collect::>() .join(", "); return Error::SeriesUnsupportedIndex { - series: series.to_string(), + series: brk_error::truncate_series_name(series.to_string()), supported, }; } - let matches = self - .vecs() - .matches(series, Limit::DEFAULT) - .into_iter() - .map(|s| s.to_string()) - .collect(); - Error::SeriesNotFound(brk_error::SeriesNotFound::new(series.to_string(), matches)) + let matches = self.vecs().matches(series, Limit::DEFAULT); + let total_matches = matches.len(); + let suggestions = matches.into_iter().take(3).collect(); + Error::SeriesNotFound(brk_error::SeriesNotFound::new( + series.to_string(), + suggestions, + total_matches, + )) } pub(crate) fn columns_to_csv( @@ -345,7 +346,7 @@ impl Query { } } - pub fn indexes(&self) -> &[IndexInfo] { + pub fn indexes(&self) -> &'static [IndexInfo] { &self.vecs().indexes } @@ -353,7 +354,7 @@ impl Query { self.vecs().series(pagination) } - pub fn series_catalog(&self) -> &TreeNode { + pub fn series_catalog(&self) -> &'static TreeNode { self.vecs().catalog() } diff --git a/crates/brk_query/src/impl/tx.rs b/crates/brk_query/src/impl/tx.rs index c09f4e46f..0903b7580 100644 --- a/crates/brk_query/src/impl/tx.rs +++ b/crates/brk_query/src/impl/tx.rs @@ -206,10 +206,10 @@ impl Query { let (block_hash, block_time) = if let Some((h, ref bh, bt)) = cached_status && h == spending_height { - (bh.clone(), bt) + (*bh, bt) } else { let (bh, bt) = self.block_hash_and_time(spending_height)?; - cached_status = Some((spending_height, bh.clone(), bt)); + cached_status = Some((spending_height, bh, bt)); (bh, bt) }; @@ -315,10 +315,11 @@ impl Query { let txids = self.block_txids_by_height(height)?; let target: bitcoin::Txid = txid.into(); - let btxids: Vec = txids.iter().map(bitcoin::Txid::from).collect(); - let mb = bitcoin::MerkleBlock::from_header_txids_with_predicate(&header, &btxids, |t| { - *t == target - }); + let mb = bitcoin::MerkleBlock::from_header_txids_with_predicate( + &header, + Txid::as_bitcoin_slice(&txids), + |t| *t == target, + ); Ok(bitcoin::consensus::encode::serialize_hex(&mb)) } diff --git a/crates/brk_query/src/lib.rs b/crates/brk_query/src/lib.rs index 0f944ce53..2136af468 100644 --- a/crates/brk_query/src/lib.rs +++ b/crates/brk_query/src/lib.rs @@ -20,7 +20,7 @@ mod r#impl; #[cfg(feature = "tokio")] pub use r#async::*; -pub use r#impl::{BLOCK_TXS_PAGE_SIZE, ResolvedQuery}; +pub use r#impl::ResolvedQuery; pub use vecs::Vecs; #[derive(Clone)] @@ -59,12 +59,12 @@ impl Query { /// Current indexed height pub fn indexed_height(&self) -> Height { - Height::from(self.indexer().vecs.blocks.blockhash.inner.stamp()) + self.indexer().indexed_height() } /// Current computed height (series) pub fn computed_height(&self) -> Height { - Height::from(self.computer().distribution.supply_state.stamp()) + self.computer().computed_height() } /// Minimum of indexed and computed heights diff --git a/crates/brk_reader/examples/after_bench.rs b/crates/brk_reader/examples/after_bench.rs index 70c6382ca..d33f1a2af 100644 --- a/crates/brk_reader/examples/after_bench.rs +++ b/crates/brk_reader/examples/after_bench.rs @@ -57,7 +57,7 @@ fn main() -> Result<()> { let mut first: Option = None; for &p in PARSER_COUNTS { - let stats = bench(REPEATS, || reader.after_with(anchor.clone(), p))?; + let stats = bench(REPEATS, || reader.after_with(anchor, p))?; print_row(n, p, &stats); if let Some(baseline) = &first { sanity_check(n, baseline, &stats); diff --git a/crates/brk_reader/src/lib.rs b/crates/brk_reader/src/lib.rs index b6a5fe734..1b466bb97 100644 --- a/crates/brk_reader/src/lib.rs +++ b/crates/brk_reader/src/lib.rs @@ -70,10 +70,12 @@ impl Reader { } } + #[inline] pub fn client(&self) -> &Client { &self.0.client } + #[inline] pub fn blocks_dir(&self) -> &Path { &self.0.blocks_dir } @@ -140,9 +142,9 @@ impl Reader { ) -> Result>> { let tip = self.0.client.get_last_height()?; if end > tip { - return Err(Error::OutOfRange(format!( - "range end {end} is past current tip {tip}" - ))); + return Err(Error::OutOfRange( + format!("range end {end} is past current tip {tip}").into(), + )); } let canonical = CanonicalRange::between(&self.0.client, start, end)?; pipeline::spawn(self.0.clone(), canonical, parser_threads) diff --git a/crates/brk_rpc/src/methods.rs b/crates/brk_rpc/src/methods.rs index 1a48aaead..1475dbcc6 100644 --- a/crates/brk_rpc/src/methods.rs +++ b/crates/brk_rpc/src/methods.rs @@ -2,7 +2,10 @@ use std::{thread::sleep, time::Duration}; use bitcoin::{consensus::encode, hex::FromHex}; use brk_error::{Error, Result}; -use brk_types::{Bitcoin, BlockHash, FeeRate, Height, MempoolEntryInfo, Sats, Timestamp, Txid, Vout}; +use brk_types::{ + Bitcoin, BlockHash, FeeRate, Height, MempoolEntryInfo, Sats, Timestamp, Txid, VSize, Vout, + Weight, +}; use corepc_jsonrpc::error::Error as JsonRpcError; use corepc_types::v30::{ GetBlockCount, GetBlockHash, GetBlockHeader, GetBlockHeaderVerbose, GetBlockVerboseOne, @@ -208,8 +211,8 @@ impl Client { .collect::>>()?; Ok(MempoolEntryInfo { txid: Self::parse_txid(&txid_str, "mempool txid")?, - vsize: entry.vsize as u64, - weight: entry.weight as u64, + vsize: VSize::from(entry.vsize as u64), + weight: Weight::from(entry.weight as u64), fee: Sats::from(Bitcoin::from(entry.fees.base)), first_seen: Timestamp::from(entry.time), ancestor_count: entry.ancestor_count as u64, @@ -292,7 +295,7 @@ impl Client { }) }) { Ok(raw) => { - out.insert(txid.clone(), raw); + out.insert(*txid, raw); } Err(Error::CorepcRPC(JsonRpcError::Rpc(rpc))) if rpc.code == RPC_NOT_FOUND => {} diff --git a/crates/brk_server/src/api/addrs.rs b/crates/brk_server/src/api/addrs.rs index 72fbad3d7..fe74abfd1 100644 --- a/crates/brk_server/src/api/addrs.rs +++ b/crates/brk_server/src/api/addrs.rs @@ -142,7 +142,8 @@ impl AddrRoutes for ApiRouter { State(state): State | { let strategy = state.addr_strategy(Version::ONE, &path.addr, false); - state.respond_json(&headers, strategy, &uri, move |q| q.addr_utxos(path.addr, 1000)).await + let max_utxos = state.max_utxos; + state.respond_json(&headers, strategy, &uri, move |q| q.addr_utxos(path.addr, max_utxos)).await }, |op| op .id("get_address_utxos") .addrs_tag() diff --git a/crates/brk_server/src/api/blocks.rs b/crates/brk_server/src/api/blocks.rs index db57d9f33..10f24ccc4 100644 --- a/crates/brk_server/src/api/blocks.rs +++ b/crates/brk_server/src/api/blocks.rs @@ -3,10 +3,9 @@ use axum::{ extract::{Path, State}, http::{HeaderMap, Uri}, }; -use brk_query::BLOCK_TXS_PAGE_SIZE; use brk_types::{ - BlockHash, BlockInfo, BlockInfoV1, BlockStatus, BlockTimestamp, Height, Hex, Transaction, - TxIndex, Txid, Version, + BlockHash, BlockInfo, BlockInfoV1, BlockStatus, BlockTimestamp, BlockTxIndex, Height, Hex, + Transaction, Txid, Version, }; use crate::{ @@ -17,6 +16,8 @@ use crate::{ }, }; +const BLOCK_TXS_PAGE_SIZE: u32 = 25; + pub trait BlockRoutes { fn add_block_routes(self) -> Self; } @@ -278,7 +279,7 @@ impl BlockRoutes for ApiRouter { Path(path): Path, _: Empty, State(state): State| { let strategy = state.block_strategy(Version::ONE, &path.hash); - state.respond_json(&headers, strategy, &uri, move |q| q.block_txs(&path.hash, TxIndex::default())).await + state.respond_json(&headers, strategy, &uri, move |q| q.block_txs(&path.hash, BlockTxIndex::default(), BLOCK_TXS_PAGE_SIZE)).await }, |op| { op.id("get_block_txs") @@ -304,7 +305,7 @@ impl BlockRoutes for ApiRouter { Path(path): Path, _: Empty, State(state): State| { let strategy = state.block_strategy(Version::ONE, &path.hash); - state.respond_json(&headers, strategy, &uri, move |q| q.block_txs(&path.hash, path.start_index)).await + state.respond_json(&headers, strategy, &uri, move |q| q.block_txs(&path.hash, path.start_index, BLOCK_TXS_PAGE_SIZE)).await }, |op| { op.id("get_block_txs_from_index") diff --git a/crates/brk_server/src/api/metrics.rs b/crates/brk_server/src/api/metrics.rs index cae6e1455..af3bec6f1 100644 --- a/crates/brk_server/src/api/metrics.rs +++ b/crates/brk_server/src/api/metrics.rs @@ -44,7 +44,7 @@ impl ApiMetricsLegacyRoutes for ApiRouter { "/api/metrics", get_with( async |uri: Uri, headers: HeaderMap, _: Empty, State(state): State| { - state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.series_catalog().clone())).await + state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.series_catalog())).await }, |op| op .id("get_metrics_tree_deprecated") @@ -92,7 +92,7 @@ impl ApiMetricsLegacyRoutes for ApiRouter { _: Empty, State(state): State | { - state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.indexes().to_vec())).await + state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.indexes())).await }, |op| op .id("get_indexes_deprecated") diff --git a/crates/brk_server/src/api/mining.rs b/crates/brk_server/src/api/mining.rs index 47ca5639a..86385b6d0 100644 --- a/crates/brk_server/src/api/mining.rs +++ b/crates/brk_server/src/api/mining.rs @@ -15,6 +15,9 @@ use crate::{ params::{BlockCountParam, Empty, PoolSlugAndHeightParam, PoolSlugParam, TimePeriodParam}, }; +const HASHRATE_MAX_POINTS: usize = 200; +const POOL_BLOCKS_LIMIT: usize = 100; + pub trait MiningRoutes { fn add_mining_routes(self) -> Self; } @@ -132,7 +135,7 @@ impl MiningRoutes for ApiRouter { "/api/v1/mining/pool/{slug}/blocks", get_with( async |uri: Uri, headers: HeaderMap, Path(path): Path, _: Empty, State(state): State| { - state.respond_json(&headers, CacheStrategy::Tip, &uri, move |q| q.pool_blocks(path.slug, None)).await + state.respond_json(&headers, CacheStrategy::Tip, &uri, move |q| q.pool_blocks(path.slug, None, POOL_BLOCKS_LIMIT)).await }, |op| { op.id("get_pool_blocks") @@ -150,7 +153,7 @@ impl MiningRoutes for ApiRouter { "/api/v1/mining/pool/{slug}/blocks/{height}", get_with( async |uri: Uri, headers: HeaderMap, Path(PoolSlugAndHeightParam {slug, height}): Path, _: Empty, State(state): State| { - state.respond_json(&headers, state.height_strategy(Version::ONE, height), &uri, move |q| q.pool_blocks(slug, Some(height))).await + state.respond_json(&headers, state.height_strategy(Version::ONE, height), &uri, move |q| q.pool_blocks(slug, Some(height), POOL_BLOCKS_LIMIT)).await }, |op| { op.id("get_pool_blocks_from") @@ -168,7 +171,7 @@ impl MiningRoutes for ApiRouter { "/api/v1/mining/hashrate", get_with( async |uri: Uri, headers: HeaderMap, _: Empty, State(state): State| { - state.respond_json(&headers, CacheStrategy::Tip, &uri, |q| q.hashrate(None)).await + state.respond_json(&headers, CacheStrategy::Tip, &uri, |q| q.hashrate(None, HASHRATE_MAX_POINTS)).await }, |op| { op.id("get_hashrate") @@ -185,7 +188,7 @@ impl MiningRoutes for ApiRouter { "/api/v1/mining/hashrate/{time_period}", get_with( async |uri: Uri, headers: HeaderMap, Path(path): Path, _: Empty, State(state): State| { - state.respond_json(&headers, CacheStrategy::Tip, &uri, move |q| q.hashrate(Some(path.time_period))).await + state.respond_json(&headers, CacheStrategy::Tip, &uri, move |q| q.hashrate(Some(path.time_period), HASHRATE_MAX_POINTS)).await }, |op| { op.id("get_hashrate_by_period") diff --git a/crates/brk_server/src/api/series.rs b/crates/brk_server/src/api/series.rs index 1ea6d0e0c..876ab719a 100644 --- a/crates/brk_server/src/api/series.rs +++ b/crates/brk_server/src/api/series.rs @@ -108,7 +108,7 @@ impl ApiSeriesRoutes for ApiRouter { "/api/series", get_with( async |uri: Uri, headers: HeaderMap, _: Empty, State(state): State| { - state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.series_catalog().clone())).await + state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.series_catalog())).await }, |op| op .id("get_series_tree") @@ -151,7 +151,7 @@ impl ApiSeriesRoutes for ApiRouter { _: Empty, State(state): State | { - state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.indexes().to_vec())).await + state.respond_json(&headers, CacheStrategy::Deploy, &uri, |q| Ok(q.indexes())).await }, |op| op .id("get_indexes") diff --git a/crates/brk_server/src/config.rs b/crates/brk_server/src/config.rs index 61e1bce17..d2f405e4d 100644 --- a/crates/brk_server/src/config.rs +++ b/crates/brk_server/src/config.rs @@ -8,6 +8,10 @@ use crate::cache::CdnCacheMode; /// 50 MB - generous enough for any honest query, low enough to limit cache-buster leverage. pub const DEFAULT_MAX_WEIGHT: usize = 50 * 1_000_000; +/// Default max UTXOs returned per address. +/// Bounds worst-case work and response size, prevents heavy-address DDoS. +pub const DEFAULT_MAX_UTXOS: usize = 1000; + /// Server-wide configuration set at startup. #[derive(Debug, Clone)] pub struct ServerConfig { @@ -15,6 +19,7 @@ pub struct ServerConfig { pub website: Website, pub cdn_cache_mode: CdnCacheMode, pub max_weight: usize, + pub max_utxos: usize, } impl Default for ServerConfig { @@ -24,6 +29,7 @@ impl Default for ServerConfig { website: Website::default(), cdn_cache_mode: CdnCacheMode::default(), max_weight: DEFAULT_MAX_WEIGHT, + max_utxos: DEFAULT_MAX_UTXOS, } } } diff --git a/crates/brk_server/src/lib.rs b/crates/brk_server/src/lib.rs index 8826d95bb..e6684d641 100644 --- a/crates/brk_server/src/lib.rs +++ b/crates/brk_server/src/lib.rs @@ -50,7 +50,7 @@ pub use brk_types::Port; pub use brk_website::Website; pub use cache::CdnCacheMode; use cache::{CacheParams, CacheStrategy}; -pub use config::{DEFAULT_MAX_WEIGHT, ServerConfig}; +pub use config::{DEFAULT_MAX_UTXOS, DEFAULT_MAX_WEIGHT, ServerConfig}; pub use error::{Error, Result}; use state::*; @@ -84,6 +84,7 @@ impl Server { started_at: jiff::Timestamp::now(), started_instant: Instant::now(), max_weight: config.max_weight, + max_utxos: config.max_utxos, }) } diff --git a/crates/brk_server/src/params/blockhash_start_index.rs b/crates/brk_server/src/params/blockhash_start_index.rs index 40583a012..d77492153 100644 --- a/crates/brk_server/src/params/blockhash_start_index.rs +++ b/crates/brk_server/src/params/blockhash_start_index.rs @@ -1,7 +1,7 @@ use schemars::JsonSchema; use serde::Deserialize; -use brk_types::{BlockHash, TxIndex}; +use brk_types::{BlockHash, BlockTxIndex}; /// Block hash + starting transaction index path parameters #[derive(Deserialize, JsonSchema)] @@ -11,5 +11,5 @@ pub struct BlockHashStartIndex { /// Starting transaction index within the block (0-based) #[schemars(example = 0)] - pub start_index: TxIndex, + pub start_index: BlockTxIndex, } diff --git a/crates/brk_server/src/params/blockhash_tx_index.rs b/crates/brk_server/src/params/blockhash_tx_index.rs index 78dfef8fb..ab6dcd029 100644 --- a/crates/brk_server/src/params/blockhash_tx_index.rs +++ b/crates/brk_server/src/params/blockhash_tx_index.rs @@ -1,7 +1,7 @@ use schemars::JsonSchema; use serde::Deserialize; -use brk_types::{BlockHash, TxIndex}; +use brk_types::{BlockHash, BlockTxIndex}; /// Block hash + transaction index path parameters #[derive(Deserialize, JsonSchema)] @@ -11,5 +11,5 @@ pub struct BlockHashTxIndex { /// Transaction index within the block (0-based) #[schemars(example = 0)] - pub index: TxIndex, + pub index: BlockTxIndex, } diff --git a/crates/brk_server/src/state.rs b/crates/brk_server/src/state.rs index 0f545d244..7edf51026 100644 --- a/crates/brk_server/src/state.rs +++ b/crates/brk_server/src/state.rs @@ -25,6 +25,7 @@ pub struct AppState { pub started_at: Timestamp, pub started_instant: Instant, pub max_weight: usize, + pub max_utxos: usize, } impl AppState { diff --git a/crates/brk_store/src/lib.rs b/crates/brk_store/src/lib.rs index b5874ed90..7dd72c870 100644 --- a/crates/brk_store/src/lib.rs +++ b/crates/brk_store/src/lib.rs @@ -236,12 +236,13 @@ where } #[inline] - pub fn prefix>( + pub fn prefix>( &self, prefix: P, ) -> impl DoubleEndedIterator + '_ { + let prefix: ByteView = prefix.into(); self.keyspace - .prefix(prefix) + .prefix(&*prefix) .map(|res| res.into_inner().unwrap()) .map(|(k, v)| (K::from(ByteView::from(&*k)), V::from(ByteView::from(&*v)))) } diff --git a/crates/brk_types/src/addr_bytes.rs b/crates/brk_types/src/addr_bytes.rs index 8bf268355..6b535b0fb 100644 --- a/crates/brk_types/src/addr_bytes.rs +++ b/crates/brk_types/src/addr_bytes.rs @@ -35,7 +35,7 @@ impl AddrBytes { } pub fn hash(&self) -> u64 { - rapidhash::v3::rapidhash_v3(self.as_slice()).to_le() + rapidhash::v3::rapidhash_v3(self.as_slice()) } /// Reconstruct the script_pubkey from the address bytes diff --git a/crates/brk_types/src/block_tx_index.rs b/crates/brk_types/src/block_tx_index.rs new file mode 100644 index 000000000..60b337ba3 --- /dev/null +++ b/crates/brk_types/src/block_tx_index.rs @@ -0,0 +1,39 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Position of a transaction within a single block (0 = coinbase). +/// Distinct from `TxIndex`, which is the chain-wide global tx index. +#[derive( + Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Default, Serialize, Deserialize, JsonSchema, +)] +#[schemars(example = 0)] +pub struct BlockTxIndex(u32); + +impl From for BlockTxIndex { + #[inline] + fn from(value: u32) -> Self { + Self(value) + } +} + +impl From for u32 { + #[inline] + fn from(value: BlockTxIndex) -> Self { + value.0 + } +} + +impl From for usize { + #[inline] + fn from(value: BlockTxIndex) -> Self { + value.0 as usize + } +} + +impl std::fmt::Display for BlockTxIndex { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut buf = itoa::Buffer::new(); + let str = buf.format(self.0); + f.write_str(str) + } +} diff --git a/crates/brk_types/src/blockhash.rs b/crates/brk_types/src/blockhash.rs index 332befcea..29a7145fe 100644 --- a/crates/brk_types/src/blockhash.rs +++ b/crates/brk_types/src/blockhash.rs @@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize, Serializer, de}; use vecdb::{Bytes, Formattable}; /// Block hash -#[derive(Default, Debug, Deref, Clone, PartialEq, Eq, Hash, Bytes, JsonSchema)] +#[derive(Default, Debug, Deref, Clone, Copy, PartialEq, Eq, Hash, Bytes, JsonSchema)] #[repr(C)] #[schemars( transparent, diff --git a/crates/brk_types/src/blockhash_prefix.rs b/crates/brk_types/src/blockhash_prefix.rs index d9f0603a7..4c9a23de6 100644 --- a/crates/brk_types/src/blockhash_prefix.rs +++ b/crates/brk_types/src/blockhash_prefix.rs @@ -3,6 +3,10 @@ use derive_more::Deref; use super::BlockHash; +/// First-8-bytes prefix of a block hash, packed as a `u64`. Both +/// `From<&BlockHash>` (via `from_le_bytes`) and `From` (via +/// `from_be_bytes`, inverse of the `to_be_bytes` writer) are +/// host-independent so on-disk keys are portable across architectures. #[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct BlockHashPrefix(u64); @@ -16,7 +20,7 @@ impl From for BlockHashPrefix { impl From<&BlockHash> for BlockHashPrefix { #[inline] fn from(value: &BlockHash) -> Self { - Self(u64::from_ne_bytes( + Self(u64::from_le_bytes( value.as_slice()[0..8].try_into().unwrap(), )) } diff --git a/crates/brk_types/src/cpfp.rs b/crates/brk_types/src/cpfp.rs deleted file mode 100644 index 3b2513903..000000000 --- a/crates/brk_types/src/cpfp.rs +++ /dev/null @@ -1,99 +0,0 @@ -use derive_more::Deref; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -use crate::{FeeRate, Sats, Txid, VSize, Weight}; - -/// Position of a transaction inside a `CpfpCluster.txs` array. Cluster-local, -/// has no meaning outside the enclosing cluster. -#[derive( - Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, - Default, Deref, Serialize, Deserialize, JsonSchema, -)] -#[serde(transparent)] -pub struct CpfpClusterTxIndex(u32); - -impl From for CpfpClusterTxIndex { - fn from(v: u32) -> Self { - Self(v) - } -} - -impl From for u32 { - fn from(v: CpfpClusterTxIndex) -> Self { - v.0 - } -} - -/// CPFP (Child Pays For Parent) information for a transaction -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct CpfpInfo { - /// Ancestor transactions in the CPFP chain - pub ancestors: Vec, - /// Best (highest fee rate) descendant, if any - #[serde(skip_serializing_if = "Option::is_none")] - pub best_descendant: Option, - /// Descendant transactions in the CPFP chain - #[serde(skip_serializing_if = "Vec::is_empty")] - pub descendants: Vec, - /// Effective fee rate considering CPFP relationships (sat/vB) - #[serde(skip_serializing_if = "Option::is_none")] - pub effective_fee_per_vsize: Option, - /// Total signature operation count for the seed tx - #[serde(skip_serializing_if = "Option::is_none")] - pub sigops: Option, - /// Transaction fee (sats) - #[serde(skip_serializing_if = "Option::is_none")] - pub fee: Option, - /// Adjusted virtual size (accounting for sigops) - #[serde(skip_serializing_if = "Option::is_none")] - pub adjusted_vsize: Option, - /// Mempool cluster the seed belongs to: full tx list, SFL-linearized - /// chunks, and the seed's chunk index. Only set for unconfirmed txs. - #[serde(skip_serializing_if = "Option::is_none")] - pub cluster: Option, -} - -/// A transaction in a CPFP relationship -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct CpfpEntry { - /// Transaction ID - pub txid: Txid, - /// Transaction weight - pub weight: Weight, - /// Transaction fee (sats) - pub fee: Sats, -} - -/// CPFP cluster output for an unconfirmed tx: the connected component -/// the seed belongs to, plus its SFL linearization. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct CpfpCluster { - /// All txs in the cluster, in topological order (parents before children). - pub txs: Vec, - /// SFL-emitted chunks ordered by descending feerate. - pub chunks: Vec, - /// Index into `chunks` of the chunk containing the seed tx. - pub chunk_index: u32, -} - -/// One entry in a `CpfpCluster.txs` array. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct CpfpClusterTx { - pub txid: Txid, - pub fee: Sats, - pub weight: Weight, - /// In-cluster parents of this tx. - pub parents: Vec, -} - -/// One SFL chunk inside a `CpfpCluster`. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct CpfpClusterChunk { - /// Txs in this chunk. - pub txs: Vec, - /// Combined feerate of the chunk (sat/vB). - pub feerate: FeeRate, -} diff --git a/crates/brk_types/src/cpfp/cluster.rs b/crates/brk_types/src/cpfp/cluster.rs new file mode 100644 index 000000000..029ec599e --- /dev/null +++ b/crates/brk_types/src/cpfp/cluster.rs @@ -0,0 +1,17 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use super::{CpfpClusterChunk, CpfpClusterTx}; + +/// CPFP cluster: the connected component the seed belongs to, plus its +/// SFL linearization. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct CpfpCluster { + /// All txs in the cluster, in topological order (parents before children). + pub txs: Vec, + /// SFL-emitted chunks ordered by descending feerate. + pub chunks: Vec, + /// Index into `chunks` of the chunk containing the seed tx. + pub chunk_index: u32, +} diff --git a/crates/brk_types/src/cpfp/cluster_chunk.rs b/crates/brk_types/src/cpfp/cluster_chunk.rs new file mode 100644 index 000000000..9558894e8 --- /dev/null +++ b/crates/brk_types/src/cpfp/cluster_chunk.rs @@ -0,0 +1,16 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::FeeRate; + +use super::CpfpClusterTxIndex; + +/// One SFL chunk inside a `CpfpCluster`. `txs` is in topological order +/// (matches `CpfpCluster.txs` ordering); the chunk's `feerate` is the +/// per-chunk SFL feerate and is the same for every tx in this chunk. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct CpfpClusterChunk { + pub txs: Vec, + pub feerate: FeeRate, +} diff --git a/crates/brk_types/src/cpfp/cluster_tx.rs b/crates/brk_types/src/cpfp/cluster_tx.rs new file mode 100644 index 000000000..3033ccf7c --- /dev/null +++ b/crates/brk_types/src/cpfp/cluster_tx.rs @@ -0,0 +1,17 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::{Sats, Txid, Weight}; + +use super::CpfpClusterTxIndex; + +/// One entry in a `CpfpCluster.txs` array. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct CpfpClusterTx { + pub txid: Txid, + pub weight: Weight, + pub fee: Sats, + /// In-cluster parents of this tx. + pub parents: Vec, +} diff --git a/crates/brk_types/src/cpfp/cluster_tx_index.rs b/crates/brk_types/src/cpfp/cluster_tx_index.rs new file mode 100644 index 000000000..dda37c89f --- /dev/null +++ b/crates/brk_types/src/cpfp/cluster_tx_index.rs @@ -0,0 +1,20 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Position of a transaction inside a `CpfpCluster.txs` array. Cluster-local, +/// has no meaning outside the enclosing cluster. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(transparent)] +pub struct CpfpClusterTxIndex(u32); + +impl From for CpfpClusterTxIndex { + fn from(v: u32) -> Self { + Self(v) + } +} + +impl From for u32 { + fn from(v: CpfpClusterTxIndex) -> Self { + v.0 + } +} diff --git a/crates/brk_types/src/cpfp/entry.rs b/crates/brk_types/src/cpfp/entry.rs new file mode 100644 index 000000000..419516ee6 --- /dev/null +++ b/crates/brk_types/src/cpfp/entry.rs @@ -0,0 +1,13 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::{Sats, Txid, Weight}; + +/// A transaction in a CPFP relationship. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct CpfpEntry { + pub txid: Txid, + pub weight: Weight, + pub fee: Sats, +} diff --git a/crates/brk_types/src/cpfp/info.rs b/crates/brk_types/src/cpfp/info.rs new file mode 100644 index 000000000..5e0f8e438 --- /dev/null +++ b/crates/brk_types/src/cpfp/info.rs @@ -0,0 +1,32 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::{FeeRate, Sats, SigOps, VSize}; + +use super::{CpfpCluster, CpfpEntry}; + +/// CPFP (Child Pays For Parent) information for a transaction. +#[derive(Debug, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct CpfpInfo { + /// Ancestor transactions in the CPFP chain. + pub ancestors: Vec, + /// Best (highest fee rate) descendant, if any. + pub best_descendant: Option, + /// Descendant transactions in the CPFP chain. + pub descendants: Vec, + /// Effective fee rate considering CPFP relationships (sat/vB). + pub effective_fee_per_vsize: FeeRate, + /// BIP-141 sigop cost for the seed tx (witness sigops count as 1, + /// legacy and P2SH-redeem sigops count as 4). + pub sigops: SigOps, + /// Transaction fee (sats). + pub fee: Sats, + /// Virtual size of the seed tx (vbytes). + pub vsize: VSize, + /// Policy-adjusted virtual size: `max(vsize, sigops * 5)`. + pub adjusted_vsize: VSize, + /// Cluster the seed belongs to: full tx list, SFL-linearized chunks, + /// and the seed's chunk index. + pub cluster: CpfpCluster, +} diff --git a/crates/brk_types/src/cpfp/mod.rs b/crates/brk_types/src/cpfp/mod.rs new file mode 100644 index 000000000..d5da1286a --- /dev/null +++ b/crates/brk_types/src/cpfp/mod.rs @@ -0,0 +1,13 @@ +mod cluster; +mod cluster_chunk; +mod cluster_tx; +mod cluster_tx_index; +mod entry; +mod info; + +pub use cluster::CpfpCluster; +pub use cluster_chunk::CpfpClusterChunk; +pub use cluster_tx::CpfpClusterTx; +pub use cluster_tx_index::CpfpClusterTxIndex; +pub use entry::CpfpEntry; +pub use info::CpfpInfo; diff --git a/crates/brk_types/src/lib.rs b/crates/brk_types/src/lib.rs index 3d76017f5..fb5d2ce95 100644 --- a/crates/brk_types/src/lib.rs +++ b/crates/brk_types/src/lib.rs @@ -33,6 +33,7 @@ mod block_size_entry; mod block_sizes_weights; mod block_status; mod block_timestamp; +mod block_tx_index; mod block_weight_entry; mod blockhash; mod blockhash_prefix; @@ -135,6 +136,7 @@ mod reward_stats; mod sats; mod sats_fract; mod sats_signed; +mod sigops; mod search_query; mod series_count; mod series_data; @@ -225,6 +227,7 @@ pub use block_size_entry::*; pub use block_sizes_weights::*; pub use block_status::*; pub use block_timestamp::*; +pub use block_tx_index::*; pub use block_weight_entry::*; pub use blockhash::*; pub use blockhash_prefix::*; @@ -327,6 +330,7 @@ pub use reward_stats::*; pub use sats::*; pub use sats_fract::*; pub use sats_signed::*; +pub use sigops::*; pub use search_query::*; pub use series_count::*; pub use series_data::*; diff --git a/crates/brk_types/src/mempool_entry_info.rs b/crates/brk_types/src/mempool_entry_info.rs index f5937a468..4a15eee04 100644 --- a/crates/brk_types/src/mempool_entry_info.rs +++ b/crates/brk_types/src/mempool_entry_info.rs @@ -1,11 +1,11 @@ -use crate::{Sats, Timestamp, Txid}; +use crate::{Sats, Timestamp, Txid, VSize, Weight}; /// Mempool entry info from Bitcoin Core's getrawmempool verbose #[derive(Debug, Clone)] pub struct MempoolEntryInfo { pub txid: Txid, - pub vsize: u64, - pub weight: u64, + pub vsize: VSize, + pub weight: Weight, pub fee: Sats, pub first_seen: Timestamp, pub ancestor_count: u64, diff --git a/crates/brk_types/src/mempool_recent_tx.rs b/crates/brk_types/src/mempool_recent_tx.rs index 64c166df0..c319b0c3d 100644 --- a/crates/brk_types/src/mempool_recent_tx.rs +++ b/crates/brk_types/src/mempool_recent_tx.rs @@ -19,7 +19,7 @@ pub struct MempoolRecentTx { impl From<(&Txid, &Transaction)> for MempoolRecentTx { fn from((txid, tx): (&Txid, &Transaction)) -> Self { Self { - txid: txid.clone(), + txid: *txid, fee: tx.fee, vsize: tx.vsize(), value: tx.output.iter().map(|o| o.value).sum(), diff --git a/crates/brk_types/src/pagination.rs b/crates/brk_types/src/pagination.rs index dec7abed5..b08ac2172 100644 --- a/crates/brk_types/src/pagination.rs +++ b/crates/brk_types/src/pagination.rs @@ -2,7 +2,7 @@ use schemars::JsonSchema; use serde::{Deserialize, Serialize}; /// Pagination parameters for paginated API endpoints -#[derive(Debug, Default, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] pub struct Pagination { /// Pagination index diff --git a/crates/brk_types/src/pool_hashrate_entry.rs b/crates/brk_types/src/pool_hashrate_entry.rs index a3f202d5d..0e509c11b 100644 --- a/crates/brk_types/src/pool_hashrate_entry.rs +++ b/crates/brk_types/src/pool_hashrate_entry.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -18,5 +20,5 @@ pub struct PoolHashrateEntry { /// Pool name #[serde(rename = "poolName")] #[schemars(example = &"Foundry USA")] - pub pool_name: String, + pub pool_name: Cow<'static, str>, } diff --git a/crates/brk_types/src/sigops.rs b/crates/brk_types/src/sigops.rs new file mode 100644 index 000000000..eefba083f --- /dev/null +++ b/crates/brk_types/src/sigops.rs @@ -0,0 +1,86 @@ +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +use crate::VSize; + +/// BIP-141 sigop cost. The block-level budget is 80,000, so a `u32` +/// fits a single tx's count with room to spare. +/// +/// Witness sigops count as 1; legacy and P2SH-redeem sigops count as 4. +/// Five vbytes per sigop is the policy adjustment Core applies in +/// `nSigOpCost` to discourage sigop-heavy txs (`max(weight/4, sigops*5)`). +#[derive( + Debug, + Default, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Ord, + Hash, + Serialize, + Deserialize, + JsonSchema, +)] +#[serde(transparent)] +pub struct SigOps(u32); + +impl SigOps { + pub const ZERO: Self = Self(0); + + /// Vbytes per sigop under BIP-141 policy. Core's `nSigOpCost` + /// adjustment factor: `adjusted_vsize = max(vsize, sigops * 5)`. + pub const VBYTES_PER_SIGOP: u64 = 5; + + #[inline] + pub const fn new(value: u32) -> Self { + Self(value) + } + + /// BIP-141 vbyte equivalent of this sigop count. + #[inline] + pub fn vsize_cost(self) -> VSize { + VSize::new(u64::from(self.0) * Self::VBYTES_PER_SIGOP) + } + + /// Policy-adjusted vsize: `max(vsize, sigops * 5)`. The denominator + /// Core uses when ranking sigop-heavy txs at fixed fee. + #[inline] + pub fn adjust_vsize(self, vsize: VSize) -> VSize { + vsize.max(self.vsize_cost()) + } + + /// BIP-141 sigop cost of a `bitcoin::Transaction`, given a prevout + /// lookup closure (P2SH redeem-script and witness sigops need the + /// spending script). Wraps `bitcoin::Transaction::total_sigop_cost` + /// and narrows its `usize` result to `SigOps`. + #[inline] + pub fn of_bitcoin_tx(tx: &bitcoin::Transaction, prevout_lookup: F) -> Self + where + F: FnMut(&bitcoin::OutPoint) -> Option, + { + Self::from(tx.total_sigop_cost(prevout_lookup)) + } +} + +impl From for SigOps { + #[inline] + fn from(value: u32) -> Self { + Self(value) + } +} + +impl From for SigOps { + #[inline] + fn from(value: usize) -> Self { + Self(value as u32) + } +} + +impl From for u32 { + #[inline] + fn from(value: SigOps) -> Self { + value.0 + } +} diff --git a/crates/brk_types/src/tx.rs b/crates/brk_types/src/tx.rs index cbf7a64bf..67938d0ba 100644 --- a/crates/brk_types/src/tx.rs +++ b/crates/brk_types/src/tx.rs @@ -1,6 +1,6 @@ use crate::{ - FeeRate, RawLockTime, Sats, TxIn, TxIndex, TxOut, TxStatus, TxVersionRaw, Txid, VSize, Weight, - Witness, + FeeRate, RawLockTime, Sats, SigOps, TxIn, TxIndex, TxOut, TxStatus, TxVersionRaw, Txid, VSize, + Weight, Witness, }; use bitcoin::Script; use schemars::JsonSchema; @@ -45,9 +45,9 @@ pub struct Transaction { pub weight: Weight, /// Number of signature operations - #[schemars(example = 1)] + #[schemars(example = SigOps::new(1))] #[serde(rename = "sigops")] - pub total_sigop_cost: usize, + pub total_sigop_cost: SigOps, /// Transaction fee in satoshis #[schemars(example = Sats::new(31))] @@ -105,7 +105,7 @@ impl Transaction { /// `Script::redeem_script` (push-only check + last-push extraction /// in one). Inputs whose `prevout` is `None` skip the P2SH and /// witness components - legacy script-sig sigops are still counted. - pub fn total_sigop_cost(&self) -> usize { + pub fn total_sigop_cost(&self) -> SigOps { let mut legacy: usize = 0; let mut redeem: usize = 0; let mut witness: usize = 0; @@ -143,10 +143,12 @@ impl Transaction { legacy = legacy.saturating_add(output.script_pubkey.count_sigops_legacy()); } - legacy - .saturating_mul(4) - .saturating_add(redeem.saturating_mul(4)) - .saturating_add(witness) + SigOps::from( + legacy + .saturating_mul(4) + .saturating_add(redeem.saturating_mul(4)) + .saturating_add(witness), + ) } } diff --git a/crates/brk_types/src/tx_index.rs b/crates/brk_types/src/tx_index.rs index 6f5795d1a..c54bca3e3 100644 --- a/crates/brk_types/src/tx_index.rs +++ b/crates/brk_types/src/tx_index.rs @@ -8,7 +8,8 @@ use vecdb::{CheckedSub, Formattable, Pco, PrintableIndex}; use super::StoredU32; -/// Transaction index within a block (0 = coinbase) +/// Chain-wide transaction index (0 = the genesis coinbase). For an +/// in-block position, use `BlockTxIndex` instead. #[derive( Debug, PartialEq, diff --git a/crates/brk_types/src/txid.rs b/crates/brk_types/src/txid.rs index bc40ff69a..50043a7d6 100644 --- a/crates/brk_types/src/txid.rs +++ b/crates/brk_types/src/txid.rs @@ -7,7 +7,7 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; use vecdb::{Bytes, Formattable}; /// Transaction ID (hash) -#[derive(Debug, Deref, Clone, PartialEq, Eq, JsonSchema, Bytes, Hash)] +#[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, JsonSchema, Bytes, Hash)] #[schemars( example = "4a5e1e4baab89f3a32518a88c31bc87f618f76673e2cc77ab2127b7afdeda33b", example = "2bb85f4b004be6da54f766c17c1e855187327112c231ef2ff35ebad0ea67c69e", @@ -20,6 +20,14 @@ pub struct Txid([u8; 32]); impl Txid { /// Coinbase transaction "txid" - all zeros (used for coinbase inputs) pub const COINBASE: Self = Self([0u8; 32]); + + /// Reinterpret a slice of `Txid`s as a slice of `bitcoin::Txid`s. + /// Both are `#[repr(C)]` newtypes over `[u8; 32]` with identical + /// layout, so this is a zero-cost view (no allocation, no copy). + #[inline] + pub fn as_bitcoin_slice(slice: &[Txid]) -> &[bitcoin::Txid] { + unsafe { &*(slice as *const [Txid] as *const [bitcoin::Txid]) } + } } impl From for Txid { diff --git a/crates/brk_types/src/txid_prefix.rs b/crates/brk_types/src/txid_prefix.rs index 7f0d0b801..b806a30d8 100644 --- a/crates/brk_types/src/txid_prefix.rs +++ b/crates/brk_types/src/txid_prefix.rs @@ -3,6 +3,10 @@ use derive_more::Deref; use super::Txid; +/// First-8-bytes prefix of a txid, packed as a `u64`. Both `From<&Txid>` +/// (via `from_le_bytes`) and `From` (via `from_be_bytes`, +/// inverse of the `to_be_bytes` writer) are host-independent so on-disk +/// keys are portable across architectures. #[derive(Debug, Deref, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct TxidPrefix(u64); @@ -16,7 +20,7 @@ impl From for TxidPrefix { impl From<&Txid> for TxidPrefix { #[inline] fn from(value: &Txid) -> Self { - Self(u64::from_ne_bytes( + Self(u64::from_le_bytes( value.as_slice()[0..8].try_into().unwrap(), )) } diff --git a/crates/brk_types/src/vsize.rs b/crates/brk_types/src/vsize.rs index 1bbea96a4..44d4a2d0d 100644 --- a/crates/brk_types/src/vsize.rs +++ b/crates/brk_types/src/vsize.rs @@ -1,6 +1,6 @@ use std::{ iter::Sum, - ops::{Add, AddAssign, Div, Sub, SubAssign}, + ops::{Add, AddAssign, Div, Mul, Sub, SubAssign}, }; use derive_more::Deref; @@ -57,6 +57,13 @@ impl From for VSize { } } +impl From for VSize { + #[inline] + fn from(value: u32) -> Self { + Self(u64::from(value)) + } +} + impl From for u64 { #[inline] fn from(value: VSize) -> Self { @@ -127,6 +134,13 @@ impl Div for VSize { } } +impl Mul for VSize { + type Output = Self; + fn mul(self, rhs: u32) -> Self::Output { + Self(self.0 * u64::from(rhs)) + } +} + impl Sum for VSize { fn sum>(iter: I) -> Self { Self(iter.map(|v| v.0).sum()) diff --git a/modules/brk-client/index.js b/modules/brk-client/index.js index 6cad0765d..5fac84df5 100644 --- a/modules/brk-client/index.js +++ b/modules/brk-client/index.js @@ -193,14 +193,14 @@ Matches mempool.space/bitcoin-cli behavior. * * @typedef {Object} BlockHashStartIndex * @property {BlockHash} hash - Bitcoin block hash - * @property {TxIndex} startIndex - Starting transaction index within the block (0-based) + * @property {BlockTxIndex} startIndex - Starting transaction index within the block (0-based) */ /** * Block hash + transaction index path parameters * * @typedef {Object} BlockHashTxIndex * @property {BlockHash} hash - Bitcoin block hash - * @property {TxIndex} index - Transaction index within the block (0-based) + * @property {BlockTxIndex} index - Transaction index within the block (0-based) */ /** * Block information matching mempool.space /api/block/{hash} @@ -289,6 +289,12 @@ Matches mempool.space/bitcoin-cli behavior. * @property {BlockHash} hash - Block hash * @property {string} timestamp - Block timestamp in ISO 8601 format */ +/** + * Position of a transaction within a single block (0 = coinbase). + * Distinct from `TxIndex`, which is the chain-wide global tx index. + * + * @typedef {number} BlockTxIndex + */ /** * A single block weight data point. * @@ -367,8 +373,8 @@ Matches mempool.space/bitcoin-cli behavior. * @typedef {("supply"|"realized"|"unrealized")} CostBasisValue */ /** - * CPFP cluster output for an unconfirmed tx: the connected component - * the seed belongs to, plus its SFL linearization. + * CPFP cluster: the connected component the seed belongs to, plus its + * SFL linearization. * * @typedef {Object} CpfpCluster * @property {CpfpClusterTx[]} txs - All txs in the cluster, in topological order (parents before children). @@ -376,19 +382,21 @@ Matches mempool.space/bitcoin-cli behavior. * @property {number} chunkIndex - Index into `chunks` of the chunk containing the seed tx. */ /** - * One SFL chunk inside a `CpfpCluster`. + * One SFL chunk inside a `CpfpCluster`. `txs` is in topological order + * (matches `CpfpCluster.txs` ordering); the chunk's `feerate` is the + * per-chunk SFL feerate and is the same for every tx in this chunk. * * @typedef {Object} CpfpClusterChunk - * @property {CpfpClusterTxIndex[]} txs - Txs in this chunk. - * @property {FeeRate} feerate - Combined feerate of the chunk (sat/vB). + * @property {CpfpClusterTxIndex[]} txs + * @property {FeeRate} feerate */ /** * One entry in a `CpfpCluster.txs` array. * * @typedef {Object} CpfpClusterTx * @property {Txid} txid - * @property {Sats} fee * @property {Weight} weight + * @property {Sats} fee * @property {CpfpClusterTxIndex[]} parents - In-cluster parents of this tx. */ /** @@ -398,26 +406,28 @@ Matches mempool.space/bitcoin-cli behavior. * @typedef {number} CpfpClusterTxIndex */ /** - * A transaction in a CPFP relationship + * A transaction in a CPFP relationship. * * @typedef {Object} CpfpEntry - * @property {Txid} txid - Transaction ID - * @property {Weight} weight - Transaction weight - * @property {Sats} fee - Transaction fee (sats) + * @property {Txid} txid + * @property {Weight} weight + * @property {Sats} fee */ /** - * CPFP (Child Pays For Parent) information for a transaction + * CPFP (Child Pays For Parent) information for a transaction. * * @typedef {Object} CpfpInfo - * @property {CpfpEntry[]} ancestors - Ancestor transactions in the CPFP chain - * @property {(CpfpEntry|null)=} bestDescendant - Best (highest fee rate) descendant, if any - * @property {CpfpEntry[]} descendants - Descendant transactions in the CPFP chain - * @property {(FeeRate|null)=} effectiveFeePerVsize - Effective fee rate considering CPFP relationships (sat/vB) - * @property {?number=} sigops - Total signature operation count for the seed tx - * @property {(Sats|null)=} fee - Transaction fee (sats) - * @property {(VSize|null)=} adjustedVsize - Adjusted virtual size (accounting for sigops) - * @property {(CpfpCluster|null)=} cluster - Mempool cluster the seed belongs to: full tx list, SFL-linearized -chunks, and the seed's chunk index. Only set for unconfirmed txs. + * @property {CpfpEntry[]} ancestors - Ancestor transactions in the CPFP chain. + * @property {(CpfpEntry|null)=} bestDescendant - Best (highest fee rate) descendant, if any. + * @property {CpfpEntry[]} descendants - Descendant transactions in the CPFP chain. + * @property {FeeRate} effectiveFeePerVsize - Effective fee rate considering CPFP relationships (sat/vB). + * @property {SigOps} sigops - BIP-141 sigop cost for the seed tx (witness sigops count as 1, +legacy and P2SH-redeem sigops count as 4). + * @property {Sats} fee - Transaction fee (sats). + * @property {VSize} vsize - Virtual size of the seed tx (vbytes). + * @property {VSize} adjustedVsize - Policy-adjusted virtual size: `max(vsize, sigops * 5)`. + * @property {CpfpCluster} cluster - Cluster the seed belongs to: full tx list, SFL-linearized chunks, +and the seed's chunk index. */ /** * Range parameters with output format for API query parameters. @@ -1050,6 +1060,16 @@ on serialization otherwise. * @property {(Limit|null)=} limit - Maximum number of values to return (ignored if `end` is set). Aliases: `count`, `c`, `l` * @property {Format=} format - Format of the output */ +/** + * BIP-141 sigop cost. The block-level budget is 80,000, so a `u32` + * fits a single tx's count with room to spare. + * + * Witness sigops count as 1; legacy and P2SH-redeem sigops count as 4. + * Five vbytes per sigop is the policy adjustment Core applies in + * `nSigOpCost` to discourage sigop-heavy txs (`max(weight/4, sigops*5)`). + * + * @typedef {number} SigOps + */ /** @typedef {boolean} StoredBool */ /** * Stored 32-bit floating point value @@ -1133,7 +1153,7 @@ on serialization otherwise. * @property {TxOut[]} vout - Transaction outputs * @property {number} size - Transaction size in bytes * @property {Weight} weight - Transaction weight - * @property {number} sigops - Number of signature operations + * @property {SigOps} sigops - Number of signature operations * @property {Sats} fee - Transaction fee in satoshis * @property {TxStatus} status - Confirmation status (confirmed, block height/hash/time) */ @@ -1159,7 +1179,8 @@ on serialization otherwise. */ /** @typedef {number} TxInIndex */ /** - * Transaction index within a block (0 = coinbase) + * Chain-wide transaction index (0 = the genesis coinbase). For an + * in-block position, use `BlockTxIndex` instead. * * @typedef {number} TxIndex */ @@ -10670,7 +10691,7 @@ class BrkClient extends BrkClientBase { * Endpoint: `GET /api/block/{hash}/txid/{index}` * * @param {BlockHash} hash - Bitcoin block hash - * @param {TxIndex} index - Transaction index within the block (0-based) + * @param {BlockTxIndex} index - Transaction index within the block (0-based) * @param {{ signal?: AbortSignal, onValue?: (value: Txid) => void }} [options] * @returns {Promise} */ @@ -10725,7 +10746,7 @@ class BrkClient extends BrkClientBase { * Endpoint: `GET /api/block/{hash}/txs/{start_index}` * * @param {BlockHash} hash - Bitcoin block hash - * @param {TxIndex} start_index - Starting transaction index within the block (0-based) + * @param {BlockTxIndex} start_index - Starting transaction index within the block (0-based) * @param {{ signal?: AbortSignal, onValue?: (value: Transaction[]) => void }} [options] * @returns {Promise} */ diff --git a/packages/brk_client/brk_client/__init__.py b/packages/brk_client/brk_client/__init__.py index efe06f109..e13ad2eb8 100644 --- a/packages/brk_client/brk_client/__init__.py +++ b/packages/brk_client/brk_client/__init__.py @@ -65,8 +65,9 @@ Height = int Timestamp = int # Block hash BlockHash = str -# Transaction index within a block (0 = coinbase) -TxIndex = int +# Position of a transaction within a single block (0 = coinbase). +# Distinct from `TxIndex`, which is the chain-wide global tx index. +BlockTxIndex = int # Unsigned cents (u64) - for values that should never be negative. # Used for invested capital, realized cap, etc. Cents = int @@ -105,6 +106,13 @@ UrpdAggregation = Literal["raw", "lin200", "lin500", "lin1000", "log10", "log50" # Position of a transaction inside a `CpfpCluster.txs` array. Cluster-local, # has no meaning outside the enclosing cluster. CpfpClusterTxIndex = int +# BIP-141 sigop cost. The block-level budget is 80,000, so a `u32` +# fits a single tx's count with room to spare. +# +# Witness sigops count as 1; legacy and P2SH-redeem sigops count as 4. +# Five vbytes per sigop is the policy adjustment Core applies in +# `nSigOpCost` to discourage sigop-heavy txs (`max(weight/4, sigops*5)`). +SigOps = int # Virtual size in vbytes (weight / 4, rounded up). Max block vsize is ~1,000,000 vB. VSize = int # Date in YYYYMMDD format stored as u32 @@ -221,6 +229,9 @@ Vout = int # and matching brk's `script_sig: ScriptBuf` (bytes internally, hex # on the wire). Witness = List[str] +# Chain-wide transaction index (0 = the genesis coinbase). For an +# in-block position, use `BlockTxIndex` instead. +TxIndex = int # Raw transaction version (i32) from Bitcoin protocol. # Unlike TxVersion (u8, indexed), this preserves non-standard values # used in coinbase txs for miner signaling/branding. @@ -481,7 +492,7 @@ class BlockHashStartIndex(TypedDict): start_index: Starting transaction index within the block (0-based) """ hash: BlockHash - start_index: TxIndex + start_index: BlockTxIndex class BlockHashTxIndex(TypedDict): """ @@ -492,7 +503,7 @@ class BlockHashTxIndex(TypedDict): index: Transaction index within the block (0-based) """ hash: BlockHash - index: TxIndex + index: BlockTxIndex class BlockInfo(TypedDict): """ @@ -655,11 +666,9 @@ class CostBasisQuery(TypedDict): class CpfpClusterChunk(TypedDict): """ - One SFL chunk inside a `CpfpCluster`. - - Attributes: - txs: Txs in this chunk. - feerate: Combined feerate of the chunk (sat/vB). + One SFL chunk inside a `CpfpCluster`. `txs` is in topological order + (matches `CpfpCluster.txs` ordering); the chunk's `feerate` is the + per-chunk SFL feerate and is the same for every tx in this chunk. """ txs: List[CpfpClusterTxIndex] feerate: FeeRate @@ -672,14 +681,14 @@ class CpfpClusterTx(TypedDict): parents: In-cluster parents of this tx. """ txid: Txid - fee: Sats weight: Weight + fee: Sats parents: List[CpfpClusterTxIndex] class CpfpCluster(TypedDict): """ - CPFP cluster output for an unconfirmed tx: the connected component - the seed belongs to, plus its SFL linearization. + CPFP cluster: the connected component the seed belongs to, plus its + SFL linearization. Attributes: txs: All txs in the cluster, in topological order (parents before children). @@ -692,12 +701,7 @@ class CpfpCluster(TypedDict): class CpfpEntry(TypedDict): """ - A transaction in a CPFP relationship - - Attributes: - txid: Transaction ID - weight: Transaction weight - fee: Transaction fee (sats) + A transaction in a CPFP relationship. """ txid: Txid weight: Weight @@ -705,27 +709,30 @@ class CpfpEntry(TypedDict): class CpfpInfo(TypedDict): """ - CPFP (Child Pays For Parent) information for a transaction + CPFP (Child Pays For Parent) information for a transaction. Attributes: - ancestors: Ancestor transactions in the CPFP chain - bestDescendant: Best (highest fee rate) descendant, if any - descendants: Descendant transactions in the CPFP chain - effectiveFeePerVsize: Effective fee rate considering CPFP relationships (sat/vB) - sigops: Total signature operation count for the seed tx - fee: Transaction fee (sats) - adjustedVsize: Adjusted virtual size (accounting for sigops) - cluster: Mempool cluster the seed belongs to: full tx list, SFL-linearized -chunks, and the seed's chunk index. Only set for unconfirmed txs. + ancestors: Ancestor transactions in the CPFP chain. + bestDescendant: Best (highest fee rate) descendant, if any. + descendants: Descendant transactions in the CPFP chain. + effectiveFeePerVsize: Effective fee rate considering CPFP relationships (sat/vB). + sigops: BIP-141 sigop cost for the seed tx (witness sigops count as 1, +legacy and P2SH-redeem sigops count as 4). + fee: Transaction fee (sats). + vsize: Virtual size of the seed tx (vbytes). + adjustedVsize: Policy-adjusted virtual size: `max(vsize, sigops * 5)`. + cluster: Cluster the seed belongs to: full tx list, SFL-linearized chunks, +and the seed's chunk index. """ ancestors: List[CpfpEntry] bestDescendant: Union[CpfpEntry, None] descendants: List[CpfpEntry] - effectiveFeePerVsize: Union[FeeRate, None] - sigops: Optional[int] - fee: Union[Sats, None] - adjustedVsize: Union[VSize, None] - cluster: Union[CpfpCluster, None] + effectiveFeePerVsize: FeeRate + sigops: SigOps + fee: Sats + vsize: VSize + adjustedVsize: VSize + cluster: CpfpCluster class DataRangeFormat(TypedDict): """ @@ -1589,7 +1596,7 @@ class Transaction(TypedDict): vout: List[TxOut] size: int weight: Weight - sigops: int + sigops: SigOps fee: Sats status: TxStatus @@ -7935,7 +7942,7 @@ class BrkClient(BrkClientBase): Endpoint: `GET /api/block/{hash}/status`""" return self.get_json(f'/api/block/{hash}/status') - def get_block_txid(self, hash: BlockHash, index: TxIndex) -> Txid: + def get_block_txid(self, hash: BlockHash, index: BlockTxIndex) -> Txid: """Transaction ID at index. Retrieve a single transaction ID at a specific index within a block. Returns plain text txid. @@ -7965,7 +7972,7 @@ class BrkClient(BrkClientBase): Endpoint: `GET /api/block/{hash}/txs`""" return self.get_json(f'/api/block/{hash}/txs') - def get_block_txs_from_index(self, hash: BlockHash, start_index: TxIndex) -> List[Transaction]: + def get_block_txs_from_index(self, hash: BlockHash, start_index: BlockTxIndex) -> List[Transaction]: """Block transactions (paginated). Retrieve transactions in a block by block hash, starting from the specified index. Returns up to 25 transactions at a time. diff --git a/packages/brk_client/tests/mempool_compat/transactions/test_cpfp.py b/packages/brk_client/tests/mempool_compat/transactions/test_cpfp.py index 42f2267e5..bb3797d45 100644 --- a/packages/brk_client/tests/mempool_compat/transactions/test_cpfp.py +++ b/packages/brk_client/tests/mempool_compat/transactions/test_cpfp.py @@ -38,15 +38,15 @@ def test_cpfp_invariants(brk, live): assert int(c["adjustedVsize"]) > 0 -def test_cpfp_unknown_tx_returns_empty(brk, mempool): - """Both servers return {ancestors: []} for any 64-char hex (no 404).""" +def test_cpfp_unknown_txid(brk, mempool): + """mempool.space returns 200 with {ancestors: []}; brk distinguishes + 'unknown txid' from 'tx with no neighbors' and returns an error.""" bad = "0" * 64 path = f"/api/v1/cpfp/{bad}" - b = brk.get_cpfp(bad) m = mempool.get_json(path) - show("GET", path, b, m) - assert b.get("ancestors") == [] assert m.get("ancestors") == [] + with pytest.raises(BrkError): + brk.get_cpfp(bad) @pytest.mark.parametrize("bad", ["abc", "deadbeef"])