mempool: snap

This commit is contained in:
nym21
2026-04-22 15:30:08 +02:00
parent 6afce0bbdc
commit bdc3ba1df6
24 changed files with 1557 additions and 299 deletions

View File

@@ -42,65 +42,134 @@ impl IndexMut<PoolIndex> for Graph {
/// Build a dependency graph from mempool entries.
pub fn build_graph(entries: &[Option<Entry>]) -> Graph {
// Collect live entries with their indices
let live: Vec<(TxIndex, &Entry)> = entries
.iter()
.enumerate()
.filter_map(|(i, opt)| opt.as_ref().map(|e| (TxIndex::from(i), e)))
.collect();
let mut live: Vec<(TxIndex, &Entry)> = Vec::with_capacity(entries.len());
for (i, opt) in entries.iter().enumerate() {
if let Some(e) = opt.as_ref() {
live.push((TxIndex::from(i), e));
}
}
if live.is_empty() {
return Graph(Vec::new());
}
// Map TxidPrefix -> PoolIndex for parent lookups
let prefix_to_pool: FxHashMap<TxidPrefix, PoolIndex> = live
.iter()
.enumerate()
.map(|(i, (_, entry))| (entry.txid_prefix(), PoolIndex::from(i)))
.collect();
let mut prefix_to_pool: FxHashMap<TxidPrefix, PoolIndex> =
FxHashMap::with_capacity_and_hasher(live.len(), Default::default());
for (i, (_, entry)) in live.iter().enumerate() {
prefix_to_pool.insert(entry.txid_prefix(), PoolIndex::from(i));
}
// Build nodes with parent relationships
let mut nodes: Vec<TxNode> = live
.iter()
.enumerate()
.map(|(pool_idx, (tx_index, entry))| {
let pool_index = PoolIndex::from(pool_idx);
let mut node = TxNode::new(
*tx_index,
pool_index,
entry.fee,
entry.vsize,
entry.ancestor_fee,
entry.ancestor_vsize,
);
// Add in-mempool parents
.map(|(tx_index, entry)| {
let mut node = TxNode::new(*tx_index, entry.fee, entry.vsize);
for parent_prefix in &entry.depends {
if let Some(&parent_pool_idx) = prefix_to_pool.get(parent_prefix) {
node.parents.push(parent_pool_idx);
}
}
node
})
.collect();
// Collect parent->child edges (avoids cloning each node's parents)
let edges: Vec<(usize, PoolIndex)> = nodes
.iter()
.enumerate()
.flat_map(|(i, node)| {
node.parents
.iter()
.map(move |&p| (p.as_usize(), PoolIndex::from(i)))
})
.collect();
// Build child relationships
for (parent_idx, child_idx) in edges {
nodes[parent_idx].children.push(child_idx);
// Populate children via direct indexing; no intermediate edge vec.
// Reading parents[j] as a Copy value releases the immutable borrow
// before the mutable borrow of children's owner.
for i in 0..nodes.len() {
let plen = nodes[i].parents.len();
for j in 0..plen {
let parent_idx = nodes[i].parents[j].as_usize();
nodes[parent_idx].children.push(PoolIndex::from(i));
}
}
Graph(nodes)
}
#[cfg(test)]
mod bench {
use std::time::Instant;
use bitcoin::hashes::Hash;
use brk_types::{Sats, Timestamp, Txid, VSize};
use smallvec::SmallVec;
use super::build_graph;
use crate::entry::Entry;
/// Synthetic mempool: mostly singletons, some CPFP chains/trees.
fn synthetic_mempool(n: usize) -> Vec<Option<Entry>> {
let make_txid = |i: usize| -> Txid {
let mut bytes = [0u8; 32];
bytes[0..8].copy_from_slice(&(i as u64).to_ne_bytes());
bytes[8..16].copy_from_slice(&((i as u64).wrapping_mul(2654435761)).to_ne_bytes());
Txid::from(bitcoin::Txid::from_slice(&bytes).unwrap())
};
let mut entries: Vec<Option<Entry>> = Vec::with_capacity(n);
let mut txids: Vec<Txid> = Vec::with_capacity(n);
for i in 0..n {
let txid = make_txid(i);
txids.push(txid.clone());
// 95% singletons, 4% 1-parent, 1% 2-parent (mimics real mempool).
let depends: SmallVec<[brk_types::TxidPrefix; 2]> = match i % 100 {
0..=94 => SmallVec::new(),
95..=98 if i > 0 => {
let p = (i.wrapping_mul(7919)) % i;
std::iter::once(brk_types::TxidPrefix::from(&txids[p])).collect()
}
_ if i > 1 => {
let p1 = (i.wrapping_mul(7919)) % i;
let p2 = (i.wrapping_mul(6151)) % i;
[
brk_types::TxidPrefix::from(&txids[p1]),
brk_types::TxidPrefix::from(&txids[p2]),
]
.into_iter()
.collect()
}
_ => SmallVec::new(),
};
entries.push(Some(Entry {
txid,
fee: Sats::from((i as u64).wrapping_mul(137) % 10_000 + 1),
vsize: VSize::from(250u64),
size: 250,
ancestor_fee: Sats::from(0u64),
ancestor_vsize: VSize::from(250u64),
depends,
first_seen: Timestamp::now(),
}));
}
entries
}
#[test]
#[ignore = "perf benchmark; run with --ignored --nocapture"]
fn perf_build_graph() {
let sizes = [1_000usize, 10_000, 50_000, 100_000, 300_000];
eprintln!();
eprintln!("build_graph perf (release, single call):");
eprintln!(" n build");
eprintln!(" ------------------------");
for &n in &sizes {
let entries = synthetic_mempool(n);
// Warm up allocator.
let _ = build_graph(&entries);
let t = Instant::now();
let g = build_graph(&entries);
let dt = t.elapsed();
let ns = dt.as_nanos();
let pretty = if ns >= 1_000_000 {
format!("{:.2} ms", ns as f64 / 1_000_000.0)
} else {
format!("{:.2} µs", ns as f64 / 1_000.0)
};
eprintln!(" {:<10} {:<10} ({} nodes)", n, pretty, g.len());
}
eprintln!();
}
}

View File

@@ -1,70 +0,0 @@
use std::cmp::Ordering;
use brk_types::{Sats, VSize};
use super::tx_node::TxNode;
use crate::types::PoolIndex;
/// Entry in the priority heap for transaction selection.
///
/// Stores a snapshot of the score at insertion time. The `generation` field
/// lets the selector detect and skip stale entries after descendants are
/// re-pushed with updated ancestor totals.
#[derive(Clone, Copy)]
pub struct HeapEntry {
pub pool_index: PoolIndex,
pub generation: u32,
ancestor_fee: Sats,
ancestor_vsize: VSize,
}
impl HeapEntry {
pub fn new(node: &TxNode) -> Self {
Self {
pool_index: node.pool_index,
generation: node.generation,
ancestor_fee: node.ancestor_fee,
ancestor_vsize: node.ancestor_vsize,
}
}
/// Compare fee rates: self > other?
#[inline]
fn has_higher_fee_rate_than(&self, other: &Self) -> bool {
// Cross multiply to avoid division:
// fee_a/vsize_a > fee_b/vsize_b ⟺ fee_a * vsize_b > fee_b * vsize_a
let self_score =
u64::from(self.ancestor_fee) as u128 * u64::from(other.ancestor_vsize) as u128;
let other_score =
u64::from(other.ancestor_fee) as u128 * u64::from(self.ancestor_vsize) as u128;
self_score > other_score
}
}
impl PartialEq for HeapEntry {
fn eq(&self, other: &Self) -> bool {
self.cmp(other).is_eq()
}
}
impl Eq for HeapEntry {}
impl PartialOrd for HeapEntry {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl Ord for HeapEntry {
fn cmp(&self, other: &Self) -> Ordering {
// Higher fee rate = higher priority
if self.has_higher_fee_rate_than(other) {
Ordering::Greater
} else if other.has_higher_fee_rate_than(self) {
Ordering::Less
} else {
// Tiebreaker: lower index first (deterministic)
other.pool_index.cmp(&self.pool_index)
}
}
}

View File

@@ -0,0 +1,194 @@
//! Cluster-mempool linearization.
//!
//! Partitions the mempool dependency graph into connected components
//! ("clusters"), linearizes each into chunks ordered by descending
//! feerate, and emits the resulting chunks as `Package`s. The inner
//! algorithm (see `sfl.rs`) is a topologically-closed-subset search,
//! optimal for clusters up to 18 txs and near-optimal beyond that.
mod sfl;
#[cfg(test)]
mod tests;
use brk_types::{FeeRate, Sats, VSize};
use rustc_hash::FxHashMap;
use smallvec::SmallVec;
use super::{graph::Graph, package::Package};
use crate::types::{PoolIndex, TxIndex};
/// Cluster-local index for a node within one cluster's flat array.
type LocalIdx = u32;
/// A connected component of the mempool graph, re-indexed locally.
struct Cluster {
/// Nodes indexed by `LocalIdx`.
nodes: Vec<ClusterNode>,
/// `topo_rank[i] = position of node i in a Kahn topological order`.
/// Used during chunk emission to print txs parents-first.
topo_rank: Vec<u32>,
}
struct ClusterNode {
tx_index: TxIndex,
fee: Sats,
vsize: VSize,
parents: SmallVec<[LocalIdx; 2]>,
children: SmallVec<[LocalIdx; 2]>,
}
/// Partition `graph` into clusters, linearize each, and flatten the
/// resulting chunks into a `Vec<Package>`. Order across clusters is
/// unspecified; the partitioner re-sorts by fee rate downstream.
pub fn linearize_clusters(graph: &Graph) -> Vec<Package> {
let clusters = find_components(graph);
let mut packages: Vec<Package> = Vec::with_capacity(clusters.len());
for cluster in clusters {
if cluster.nodes.len() == 1 {
packages.push(singleton_package(&cluster));
continue;
}
for chunk in sfl::linearize(&cluster) {
packages.push(chunk_to_package(&cluster, &chunk));
}
}
packages
}
/// BFS over (parents + children) adjacency to partition `graph` into
/// connected components, each re-indexed locally.
fn find_components(graph: &Graph) -> Vec<Cluster> {
let n = graph.len();
let mut seen: Vec<bool> = vec![false; n];
let mut clusters: Vec<Cluster> = Vec::new();
let mut queue: Vec<PoolIndex> = Vec::new();
for start in 0..n {
if seen[start] {
continue;
}
let mut members: Vec<PoolIndex> = Vec::new();
queue.clear();
queue.push(PoolIndex::from(start));
seen[start] = true;
while let Some(idx) = queue.pop() {
members.push(idx);
let node = &graph[idx];
for &p in &node.parents {
if !seen[p.as_usize()] {
seen[p.as_usize()] = true;
queue.push(p);
}
}
for &c in &node.children {
if !seen[c.as_usize()] {
seen[c.as_usize()] = true;
queue.push(c);
}
}
}
// Sort by PoolIndex for deterministic LocalIdx assignment (keeps
// SFL output stable across sync ticks).
members.sort_unstable();
clusters.push(build_cluster(graph, members));
}
clusters
}
/// Build a re-indexed `Cluster` from a set of graph members.
fn build_cluster(graph: &Graph, members: Vec<PoolIndex>) -> Cluster {
let pool_to_local: FxHashMap<PoolIndex, LocalIdx> = members
.iter()
.enumerate()
.map(|(i, &p)| (p, i as LocalIdx))
.collect();
let mut nodes: Vec<ClusterNode> = Vec::with_capacity(members.len());
for &pool_idx in &members {
let node = &graph[pool_idx];
let mut parents: SmallVec<[LocalIdx; 2]> = SmallVec::new();
for &p in &node.parents {
if let Some(&local) = pool_to_local.get(&p) {
parents.push(local);
}
}
let mut children: SmallVec<[LocalIdx; 2]> = SmallVec::new();
for &c in &node.children {
if let Some(&local) = pool_to_local.get(&c) {
children.push(local);
}
}
nodes.push(ClusterNode {
tx_index: node.tx_index,
fee: node.fee,
vsize: node.vsize,
parents,
children,
});
}
let topo_rank = kahn_topo_rank(&nodes);
Cluster { nodes, topo_rank }
}
/// Kahn's algorithm: returns `rank[i] = position in a topological order`.
fn kahn_topo_rank(nodes: &[ClusterNode]) -> Vec<u32> {
let n = nodes.len();
let mut indegree: Vec<u32> = nodes.iter().map(|n| n.parents.len() as u32).collect();
let mut ready: Vec<LocalIdx> = (0..n as LocalIdx)
.filter(|&i| indegree[i as usize] == 0)
.collect();
let mut rank: Vec<u32> = vec![0; n];
let mut position: u32 = 0;
let mut head = 0;
while head < ready.len() {
let v = ready[head];
head += 1;
rank[v as usize] = position;
position += 1;
for &c in &nodes[v as usize].children {
indegree[c as usize] -= 1;
if indegree[c as usize] == 0 {
ready.push(c);
}
}
}
debug_assert_eq!(position as usize, n, "cluster contained a cycle");
rank
}
/// Build a one-tx `Package` for a cluster of size 1.
fn singleton_package(cluster: &Cluster) -> Package {
let node = &cluster.nodes[0];
let fee_rate = FeeRate::from((node.fee, node.vsize));
let mut package = Package::new(fee_rate);
package.add_tx(node.tx_index, u64::from(node.vsize));
package
}
/// Convert an SFL-emitted chunk (set of local indices) into a `Package`.
/// Txs inside the package are ordered parents-first by `topo_rank`.
fn chunk_to_package(cluster: &Cluster, chunk: &sfl::Chunk) -> Package {
let fee_rate = FeeRate::from((Sats::from(chunk.fee), VSize::from(chunk.vsize)));
let mut package = Package::new(fee_rate);
let mut ordered: SmallVec<[LocalIdx; 8]> = chunk.nodes.iter().copied().collect();
ordered.sort_by_key(|&local| cluster.topo_rank[local as usize]);
for local in ordered {
let node = &cluster.nodes[local as usize];
package.add_tx(node.tx_index, u64::from(node.vsize));
}
package
}

View File

@@ -0,0 +1,264 @@
//! Cluster linearizer.
//!
//! Two-branch dispatch by cluster size:
//! - **n ≤ 18**: recursive enumeration of topologically-closed subsets.
//! Provably optimal. Visits only valid subsets (skips non-closed ones
//! without filtering) and maintains running fee/vsize incrementally.
//! - **n > 18**: "greedy-union" ancestor-set search. Seeds with each
//! node's ancestor closure, then greedily adds any other ancestor
//! closure whose inclusion raises the combined feerate. Strict
//! superset of ancestor-set-sort's candidate space — catches the
//! sibling-union shapes that pure ASS misses.
//!
//! A final stack-based `canonicalize` pass merges adjacent chunks when
//! the later one's feerate beats the earlier's, restoring the
//! non-increasing-rate invariant.
//!
//! Everything runs on `u128` bitmasks (covers Bitcoin Core 31's cluster
//! cap of 100). No RNG, no spanning-forest state, no floating-point.
use smallvec::SmallVec;
use super::{Cluster, LocalIdx};
pub struct Chunk {
pub nodes: SmallVec<[LocalIdx; 4]>,
pub fee: u64,
pub vsize: u64,
}
const BRUTE_FORCE_LIMIT: usize = 18;
const BITMASK_LIMIT: usize = 128;
pub fn linearize(cluster: &Cluster) -> Vec<Chunk> {
let n = cluster.nodes.len();
if n == 0 {
return Vec::new();
}
assert!(n <= BITMASK_LIMIT, "cluster size {} exceeds u128 capacity", n);
let mut parents_mask: Vec<u128> = vec![0; n];
let mut ancestor_incl: Vec<u128> = vec![0; n];
let mut order: Vec<LocalIdx> = (0..n as LocalIdx).collect();
order.sort_by_key(|&i| cluster.topo_rank[i as usize]);
for &v in &order {
let mut par = 0u128;
let mut acc = 1u128 << v;
for &p in &cluster.nodes[v as usize].parents {
par |= 1u128 << p;
acc |= ancestor_incl[p as usize];
}
parents_mask[v as usize] = par;
ancestor_incl[v as usize] = acc;
}
let fee_of: Vec<u64> = cluster.nodes.iter().map(|n| u64::from(n.fee)).collect();
let vsize_of: Vec<u64> = cluster.nodes.iter().map(|n| u64::from(n.vsize)).collect();
let all: u128 = if n == 128 { !0 } else { (1u128 << n) - 1 };
let mut chunks: Vec<Chunk> = Vec::new();
let mut remaining: u128 = all;
while remaining != 0 {
let (mask, fee, vsize) = if n <= BRUTE_FORCE_LIMIT {
best_subset(remaining, &order, &parents_mask, &fee_of, &vsize_of)
} else {
best_ancestor_union(remaining, &ancestor_incl, &fee_of, &vsize_of)
};
chunks.push(chunk_of(mask, fee, vsize));
remaining &= !mask;
}
canonicalize(&mut chunks);
chunks
}
/// Recursive enumeration of topologically-closed subsets of
/// `remaining`. Returns the (mask, fee, vsize) with the highest rate.
fn best_subset(
remaining: u128,
topo_order: &[LocalIdx],
parents_mask: &[u128],
fee_of: &[u64],
vsize_of: &[u64],
) -> (u128, u64, u64) {
let mut best = (0u128, 0u64, 1u64);
recurse(
0,
topo_order,
parents_mask,
remaining,
0,
0,
0,
fee_of,
vsize_of,
&mut best,
);
best
}
fn recurse(
idx: usize,
topo_order: &[LocalIdx],
parents_mask: &[u128],
remaining: u128,
included: u128,
f: u64,
v: u64,
fee_of: &[u64],
vsize_of: &[u64],
best: &mut (u128, u64, u64),
) {
if idx == topo_order.len() {
if included != 0 && f as u128 * best.2 as u128 > best.1 as u128 * v as u128 {
*best = (included, f, v);
}
return;
}
let node = topo_order[idx];
let bit = 1u128 << node;
// Not in remaining, or a parent (within remaining) is excluded:
// this node is forced-excluded, no branching.
if (bit & remaining) == 0
|| (parents_mask[node as usize] & remaining & !included) != 0
{
recurse(
idx + 1, topo_order, parents_mask, remaining, included, f, v, fee_of, vsize_of, best,
);
return;
}
// Exclude
recurse(
idx + 1, topo_order, parents_mask, remaining, included, f, v, fee_of, vsize_of, best,
);
// Include
recurse(
idx + 1,
topo_order,
parents_mask,
remaining,
included | bit,
f + fee_of[node as usize],
v + vsize_of[node as usize],
fee_of,
vsize_of,
best,
);
}
/// For each node v in `remaining`, seed with anc(v) ∩ remaining, then
/// greedily extend by adding any anc(u) whose inclusion raises the
/// feerate. Pick the best result across all seeds.
///
/// Every candidate evaluated is a union of ancestor closures —
/// topologically closed by construction. Strictly explores more
/// candidates than pure ancestor-set-sort, at O(n³) per chunk step.
fn best_ancestor_union(
remaining: u128,
ancestor_incl: &[u128],
fee_of: &[u64],
vsize_of: &[u64],
) -> (u128, u64, u64) {
let mut best = (0u128, 0u64, 1u64);
let mut seeds = remaining;
while seeds != 0 {
let i = seeds.trailing_zeros() as usize;
seeds &= seeds - 1;
let mut s = ancestor_incl[i] & remaining;
let (mut f, mut v) = totals(s, fee_of, vsize_of);
// Greedy extension to fixed point: pick the ancestor-closure
// addition that yields the highest resulting feerate, if any.
loop {
let mut picked: Option<(u128, u64, u64)> = None;
let mut cands = remaining & !s;
while cands != 0 {
let j = cands.trailing_zeros() as usize;
cands &= cands - 1;
let add = ancestor_incl[j] & remaining & !s;
if add == 0 {
continue;
}
let (df, dv) = totals(add, fee_of, vsize_of);
let nf = f + df;
let nv = v + dv;
// Must strictly improve current rate: nf/nv > f/v.
if nf as u128 * v as u128 <= f as u128 * nv as u128 {
continue;
}
match picked {
None => picked = Some((add, nf, nv)),
Some((_, pf, pv)) => {
if nf as u128 * pv as u128 > pf as u128 * nv as u128 {
picked = Some((add, nf, nv));
}
}
}
}
match picked {
Some((add, nf, nv)) => {
s |= add;
f = nf;
v = nv;
}
None => break,
}
}
if f as u128 * best.2 as u128 > best.1 as u128 * v as u128 {
best = (s, f, v);
}
}
best
}
/// Single-pass stack merge: for each incoming chunk, merge it into
/// the stack top while the merge would raise the top's feerate, then
/// push. O(n) total regardless of how many merges cascade.
fn canonicalize(chunks: &mut Vec<Chunk>) {
let taken = std::mem::take(chunks);
let mut out: Vec<Chunk> = Vec::with_capacity(taken.len());
for mut cur in taken {
while let Some(top) = out.last() {
if cur.fee as u128 * top.vsize as u128 > top.fee as u128 * cur.vsize as u128 {
let mut prev = out.pop().unwrap();
prev.fee += cur.fee;
prev.vsize += cur.vsize;
prev.nodes.extend(cur.nodes);
cur = prev;
} else {
break;
}
}
out.push(cur);
}
*chunks = out;
}
#[inline]
fn totals(mask: u128, fee_of: &[u64], vsize_of: &[u64]) -> (u64, u64) {
let mut f = 0u64;
let mut v = 0u64;
let mut bits = mask;
while bits != 0 {
let i = bits.trailing_zeros() as usize;
f += fee_of[i];
v += vsize_of[i];
bits &= bits - 1;
}
(f, v)
}
fn chunk_of(mask: u128, fee: u64, vsize: u64) -> Chunk {
let mut nodes: SmallVec<[LocalIdx; 4]> = SmallVec::new();
let mut bits = mask;
while bits != 0 {
let i = bits.trailing_zeros();
nodes.push(i as LocalIdx);
bits &= bits - 1;
}
Chunk { nodes, fee, vsize }
}

View File

@@ -0,0 +1,179 @@
//! Hand-built cluster shapes with known-good SFL outputs.
use super::{chunk_shapes, make_cluster, run};
#[test]
fn singleton() {
let cluster = make_cluster(&[(100, 10)], &[]);
let chunks = run(&cluster);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].nodes.len(), 1);
assert_eq!(chunks[0].fee, 100);
assert_eq!(chunks[0].vsize, 10);
}
#[test]
fn two_chain_parent_richer() {
// A (rate 10) → B (rate 1). Parent is more profitable alone; SFL
// should emit two chunks, A first.
let cluster = make_cluster(&[(100, 10), (1, 1)], &[(0, 1)]);
let chunks = run(&cluster);
assert_eq!(chunks.len(), 2);
// First chunk is A alone.
assert!(chunks[0].nodes.contains(&0));
assert_eq!(chunks[0].vsize, 10);
// Second chunk is B alone.
assert!(chunks[1].nodes.contains(&1));
assert_eq!(chunks[1].vsize, 1);
}
#[test]
fn two_chain_child_pays_parent_cpfp() {
// A (rate 0.1) → B (rate 100). Classic CPFP: bundle them.
let cluster = make_cluster(&[(1, 10), (100, 1)], &[(0, 1)]);
let chunks = run(&cluster);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].nodes.len(), 2);
assert_eq!(chunks[0].fee, 101);
assert_eq!(chunks[0].vsize, 11);
}
#[test]
fn v_shape_two_parents_one_child() {
// P0 (rate 1), P1 (rate 1) → C (rate 100). Expect single chunk.
let cluster = make_cluster(&[(1, 1), (1, 1), (100, 1)], &[(0, 2), (1, 2)]);
let chunks = run(&cluster);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].nodes.len(), 3);
assert_eq!(chunks[0].fee, 102);
assert_eq!(chunks[0].vsize, 3);
}
#[test]
fn lambda_shape_one_parent_two_children_uneven() {
// A(1) → B(5), A(1) → C(5). The "non-ancestor-set" case: {A, B, C}
// has rate 11/3 ≈ 3.67, beating any ancestor set ({A,B} or {A,C}
// at rate 3). SFL should produce a single chunk.
let cluster = make_cluster(&[(1, 1), (5, 1), (5, 1)], &[(0, 1), (0, 2)]);
let chunks = run(&cluster);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].fee, 11);
assert_eq!(chunks[0].vsize, 3);
}
#[test]
fn diamond() {
// 4-node diamond: A → B, A → C, B → D, C → D. With D the payer,
// everything ends up in one chunk.
let cluster = make_cluster(
&[(1, 1), (1, 1), (1, 1), (100, 1)],
&[(0, 1), (0, 2), (1, 3), (2, 3)],
);
let chunks = run(&cluster);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].nodes.len(), 4);
assert_eq!(chunks[0].fee, 103);
assert_eq!(chunks[0].vsize, 4);
}
#[test]
fn chain_alternating_high_low() {
// 4-chain with rates [10, 1, 10, 1] all vsize 1. Bubble-up should
// merge them all (every new tx brings its chunk rate up). Verify
// one chunk with correct totals rather than a specific partition.
let cluster = make_cluster(
&[(10, 1), (1, 1), (10, 1), (1, 1)],
&[(0, 1), (1, 2), (2, 3)],
);
let chunks = run(&cluster);
assert_eq!(chunks_total_fee(&chunks), 22);
assert_eq!(chunks_total_vsize(&chunks), 4);
assert_non_increasing(&chunks);
}
#[test]
fn chain_starts_low_ends_high() {
// 4-chain [1, 100, 1, 100]: the optimal chunking groups pairs so
// high-rate bumps lift low-rate predecessors. Exact partition is
// implementation-dependent; check invariants.
let cluster = make_cluster(
&[(1, 1), (100, 1), (1, 1), (100, 1)],
&[(0, 1), (1, 2), (2, 3)],
);
let chunks = run(&cluster);
assert_eq!(chunks_total_fee(&chunks), 202);
assert_eq!(chunks_total_vsize(&chunks), 4);
assert_non_increasing(&chunks);
}
#[test]
fn two_disconnected_clusters_would_each_be_separate() {
// NOTE: this file tests SFL on a single cluster; multi-cluster
// flow is tested via `linearize_clusters` at the higher level.
// For a single-cluster test: fan-out of 5 children.
let cluster = make_cluster(
&[(1, 1), (10, 1), (20, 1), (30, 1), (40, 1), (50, 1)],
&[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)],
);
let chunks = run(&cluster);
assert_eq!(chunks_total_fee(&chunks), 151);
assert_eq!(chunks_total_vsize(&chunks), 6);
assert_non_increasing(&chunks);
// Every tx exactly once.
let mut seen: Vec<usize> = Vec::new();
for ch in &chunks {
for &n in &ch.nodes {
seen.push(n as usize);
}
}
seen.sort();
assert_eq!(seen, vec![0, 1, 2, 3, 4, 5]);
}
#[test]
fn wide_fan_in() {
// 5 parents → 1 child. Parents at rate 1, child at rate 100.
let cluster = make_cluster(
&[(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (100, 1)],
&[(0, 5), (1, 5), (2, 5), (3, 5), (4, 5)],
);
let chunks = run(&cluster);
assert_eq!(chunks.len(), 1);
assert_eq!(chunks[0].fee, 105);
assert_eq!(chunks[0].vsize, 6);
}
#[test]
fn shapes_are_stable_on_identical_input() {
// Determinism: identical cluster should produce identical chunking.
let cluster = make_cluster(
&[(1, 1), (100, 1), (1, 1), (100, 1)],
&[(0, 1), (1, 2), (2, 3)],
);
let a = chunk_shapes(&run(&cluster));
let b = chunk_shapes(&run(&cluster));
assert_eq!(a, b);
}
// --- helpers ---
fn chunks_total_fee(chunks: &[super::Chunk]) -> u64 {
chunks.iter().map(|c| c.fee).sum()
}
fn chunks_total_vsize(chunks: &[super::Chunk]) -> u64 {
chunks.iter().map(|c| c.vsize).sum()
}
fn assert_non_increasing(chunks: &[super::Chunk]) {
for pair in chunks.windows(2) {
let a_rate = pair[0].fee as u128 * pair[1].vsize as u128;
let b_rate = pair[1].fee as u128 * pair[0].vsize as u128;
assert!(
a_rate >= b_rate,
"chunk feerates not non-increasing: {:?} vs {:?}",
(pair[0].fee, pair[0].vsize),
(pair[1].fee, pair[1].vsize),
);
}
}

View File

@@ -0,0 +1,53 @@
//! Tests for the SFL linearizer.
//!
//! Mirrors Bitcoin Core's `src/test/cluster_linearize_tests.cpp` split:
//! - `basic` — hand-built cluster shapes, deterministic assertions.
//! - `oracle` — brute-force optimality checks for small clusters.
//! - `stress` — randomized invariant checks for larger clusters.
mod basic;
mod oracle;
mod stress;
use smallvec::SmallVec;
use super::sfl::Chunk;
use super::{Cluster, ClusterNode, LocalIdx, kahn_topo_rank, sfl};
use crate::types::TxIndex;
/// Build a `Cluster` from `(fee, vsize)` tuples plus a list of
/// `(parent_local, child_local)` edges. Tx indices are assigned 0..n.
/// Panics if the graph has a cycle or a bad edge.
pub(super) fn make_cluster(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) -> Cluster {
let mut nodes: Vec<ClusterNode> = fees_vsizes
.iter()
.enumerate()
.map(|(i, &(fee, vsize))| ClusterNode {
tx_index: TxIndex::from(i),
fee: brk_types::Sats::from(fee),
vsize: brk_types::VSize::from(vsize),
parents: SmallVec::new(),
children: SmallVec::new(),
})
.collect();
for &(p, c) in edges {
nodes[c as usize].parents.push(p);
nodes[p as usize].children.push(c);
}
let topo_rank = kahn_topo_rank(&nodes);
Cluster { nodes, topo_rank }
}
pub(super) fn run(cluster: &Cluster) -> Vec<Chunk> {
sfl::linearize(cluster)
}
/// Shortcut: return `(chunk_size, fee, vsize)` tuples in emitted order.
pub(super) fn chunk_shapes(chunks: &[Chunk]) -> Vec<(usize, u64, u64)> {
chunks
.iter()
.map(|c| (c.nodes.len(), c.fee, c.vsize))
.collect()
}

View File

@@ -0,0 +1,484 @@
//! Brute-force optimality oracle.
//!
//! For small clusters (n ≤ 6), enumerate every topological ordering and
//! compute the canonical chunking of each. The "best" chunking is the
//! one whose fee diagram dominates pointwise. SFL must match.
//!
//! This file focuses on a handful of hand-picked shapes plus every
//! topological variant of a few DAGs where ancestor-set-sort would pick
//! a suboptimal chunking. Exhaustive DAG enumeration is out of scope;
//! the invariant tests in `stress.rs` cover random shapes.
use super::super::LocalIdx;
use super::{Chunk, make_cluster, run};
// ---------- oracle ----------
/// Compute the canonical (upper-concave-envelope) chunking of a
/// linearization expressed as `(fee, vsize)` for each position.
fn canonical_chunking(path: &[(u64, u64)]) -> Vec<(u64, u64)> {
// Start with singletons; repeatedly merge a chunk with its right
// neighbour while that improves its feerate (i.e. the merge would
// make the earlier chunk have the SAME OR HIGHER rate than a strict
// ordering requires). This is the standard left-to-right canonical
// chunking pass.
let mut chunks: Vec<(u64, u64)> = path.to_vec();
let mut changed = true;
while changed {
changed = false;
let mut i = 0;
while i + 1 < chunks.len() {
let (fa, va) = chunks[i];
let (fb, vb) = chunks[i + 1];
// Merge if later chunk has strictly higher feerate (would
// be out of non-increasing order).
if fb as u128 * va as u128 > fa as u128 * vb as u128 {
chunks[i] = (fa + fb, va + vb);
chunks.remove(i + 1);
changed = true;
} else {
i += 1;
}
}
}
chunks
}
/// All topological orderings of a DAG; Heap's algorithm wouldn't
/// respect topology, so do an explicit DFS over available-next-sets.
fn all_topo_orders(parents: &[Vec<LocalIdx>]) -> Vec<Vec<LocalIdx>> {
let n = parents.len();
let indegree: Vec<u32> = parents.iter().map(|p| p.len() as u32).collect();
let children: Vec<Vec<LocalIdx>> = {
let mut out = vec![Vec::new(); n];
for (c, ps) in parents.iter().enumerate() {
for &p in ps {
out[p as usize].push(c as LocalIdx);
}
}
out
};
let mut results = Vec::new();
let mut current: Vec<LocalIdx> = Vec::new();
let mut indeg = indegree.clone();
walk(&children, &mut indeg, &mut current, n, &mut results);
return results;
fn walk(
children: &[Vec<LocalIdx>],
indeg: &mut [u32],
current: &mut Vec<LocalIdx>,
n: usize,
out: &mut Vec<Vec<LocalIdx>>,
) {
if current.len() == n {
out.push(current.clone());
return;
}
let ready: Vec<LocalIdx> = (0..n as LocalIdx)
.filter(|&i| indeg[i as usize] == 0)
.collect();
for v in ready {
indeg[v as usize] = u32::MAX; // mark unavailable
current.push(v);
for &c in &children[v as usize] {
indeg[c as usize] -= 1;
}
walk(children, indeg, current, n, out);
current.pop();
for &c in &children[v as usize] {
indeg[c as usize] += 1;
}
indeg[v as usize] = 0; // restore
}
}
}
/// Best canonical chunking over all topological orderings of
/// `(fees_vsizes, edges)`. "Best" = lexicographic dominance of the
/// sequence of `(fee, vsize)` per chunk (earlier chunks weigh more).
fn oracle_best(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) -> Vec<(u64, u64)> {
let n = fees_vsizes.len();
let mut parents = vec![Vec::new(); n];
for &(p, c) in edges {
parents[c as usize].push(p);
}
let mut best: Option<Vec<(u64, u64)>> = None;
for order in all_topo_orders(&parents) {
let path: Vec<(u64, u64)> = order.iter().map(|&i| fees_vsizes[i as usize]).collect();
let chunking = canonical_chunking(&path);
best = Some(match best {
None => chunking,
Some(cur) => {
if dominates(&chunking, &cur) {
chunking
} else {
cur
}
}
});
}
best.expect("at least one topological order")
}
/// `a` dominates `b` iff its cumulative-fee-at-vsize curve sits at
/// or above `b`'s everywhere along the combined vsize axis.
fn dominates(a: &[(u64, u64)], b: &[(u64, u64)]) -> bool {
// Compare pointwise at each "breakpoint" of either curve.
let a_points = cumulative(a);
let b_points = cumulative(b);
let total_vsize = a_points.last().map(|p| p.0).unwrap_or(0);
debug_assert_eq!(total_vsize, b_points.last().map(|p| p.0).unwrap_or(0));
for v in 1..=total_vsize {
let fa = fee_at(&a_points, v);
let fb = fee_at(&b_points, v);
if fa < fb {
return false;
}
if fa > fb {
return true; // strictly better somewhere; dominates
}
}
// Identical curves — neither dominates strictly; treat as domination
// (for "best" bookkeeping it's a tie and the first-seen wins).
true
}
fn cumulative(chunks: &[(u64, u64)]) -> Vec<(u64, u64)> {
let mut out = Vec::with_capacity(chunks.len() + 1);
let mut v = 0u64;
let mut f = 0u64;
out.push((0, 0));
for &(fee, vsize) in chunks {
v += vsize;
f += fee;
out.push((v, f));
}
out
}
fn fee_at(cum: &[(u64, u64)], v: u64) -> u128 {
// Linear interpolation between breakpoints; but since chunks are
// atomic, we instead compute the straight-line fee at exactly
// cumulative vsize positions by walking chunks.
for pair in cum.windows(2) {
let (v0, f0) = pair[0];
let (v1, f1) = pair[1];
if v <= v1 {
// within this chunk: linear from (v0, f0) to (v1, f1).
let dv = v1 - v0;
if dv == 0 {
return f0 as u128;
}
let df = f1 - f0;
return f0 as u128 + (df as u128) * ((v - v0) as u128) / (dv as u128);
}
}
cum.last().map(|&(_, f)| f as u128).unwrap_or(0)
}
fn chunk_rate(chunks: &[Chunk]) -> Vec<(u64, u64)> {
chunks.iter().map(|c| (c.fee, c.vsize)).collect()
}
/// Assert that SFL's output matches the oracle fee diagram.
fn assert_matches_oracle(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) {
let cluster = make_cluster(fees_vsizes, edges);
let chunks = run(&cluster);
let got = chunk_rate(&chunks);
let want = oracle_best(fees_vsizes, edges);
let got_cum = cumulative(&got);
let want_cum = cumulative(&want);
let total = got_cum.last().unwrap().0;
assert_eq!(total, want_cum.last().unwrap().0, "total vsize mismatch");
for v in 1..=total {
let fa = fee_at(&got_cum, v);
let fb = fee_at(&want_cum, v);
assert!(
fa >= fb,
"SFL diagram below oracle at vsize {}: got {} want {}\n got={:?}\n want={:?}",
v,
fa,
fb,
got,
want,
);
}
}
// ---------- tests ----------
#[test]
fn oracle_singleton() {
assert_matches_oracle(&[(100, 10)], &[]);
}
#[test]
fn oracle_chain_cpfp() {
assert_matches_oracle(&[(1, 10), (100, 1)], &[(0, 1)]);
}
#[test]
fn oracle_chain_parent_richer() {
assert_matches_oracle(&[(100, 10), (1, 1)], &[(0, 1)]);
}
#[test]
fn oracle_v_shape() {
assert_matches_oracle(&[(1, 1), (1, 1), (100, 1)], &[(0, 2), (1, 2)]);
}
#[test]
fn oracle_lambda_non_ancestor_beats_ancestor() {
// The "non-ancestor-set wins" case: SFL should match the oracle's
// single-chunk optimum at rate 11/3.
assert_matches_oracle(&[(1, 1), (5, 1), (5, 1)], &[(0, 1), (0, 2)]);
}
#[test]
fn oracle_diamond() {
assert_matches_oracle(
&[(1, 1), (1, 1), (1, 1), (100, 1)],
&[(0, 1), (0, 2), (1, 3), (2, 3)],
);
}
#[test]
fn oracle_tree_depth_3() {
// A → B → D, A → C → E. Leaves pay.
assert_matches_oracle(
&[(1, 1), (1, 1), (1, 1), (100, 1), (100, 1)],
&[(0, 1), (0, 2), (1, 3), (2, 4)],
);
}
#[test]
fn oracle_branching_with_cheap_sibling() {
// A(1) → B(50), A → C(100). SFL's expected optimum: single chunk.
assert_matches_oracle(&[(1, 1), (50, 1), (100, 1)], &[(0, 1), (0, 2)]);
}
#[test]
fn oracle_four_chain_alternating() {
// Alternating rates; brute force up to 6-tx.
assert_matches_oracle(
&[(10, 1), (1, 1), (10, 1), (1, 1)],
&[(0, 1), (1, 2), (2, 3)],
);
}
// ---------- exhaustive random DAG sweep ----------
//
// Enumerate random DAG shapes up to n=8 (40320 topo-orders max per DAG)
// and check merge-only's output matches the brute-force optimum. Runs
// thousands of cases; catches tie-break pathologies the hand-picked
// shapes above might miss.
struct DagRng(u64);
impl DagRng {
fn new(seed: u64) -> Self {
Self(seed | 1)
}
fn next(&mut self) -> u64 {
let mut x = self.0;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
self.0 = x;
x
}
fn range(&mut self, n: u64) -> u64 {
if n == 0 { 0 } else { self.next() % n }
}
}
/// Random DAG with `n` nodes: each node i > 0 has 0-3 parents drawn
/// uniformly from nodes {0..i}. Fees/vsizes are varied.
fn random_dag(n: usize, seed: u64) -> (Vec<(u64, u64)>, Vec<(LocalIdx, LocalIdx)>) {
let mut rng = DagRng::new(seed);
let fees_vsizes: Vec<(u64, u64)> = (0..n)
.map(|_| {
let fee = 1 + rng.range(200);
let vsize = 1 + rng.range(5);
(fee, vsize)
})
.collect();
let mut edges = Vec::new();
for i in 1..n {
let k = rng.range(4) as usize;
let mut picks: Vec<LocalIdx> = Vec::new();
for _ in 0..k {
let p = rng.range(i as u64) as LocalIdx;
if !picks.contains(&p) {
picks.push(p);
}
}
for p in picks {
edges.push((p, i as LocalIdx));
}
}
(fees_vsizes, edges)
}
fn assert_optimal_on_random(n: usize, seed: u64) {
let (fv, edges) = random_dag(n, seed);
let cluster = super::make_cluster(&fv, &edges);
let chunks = super::run(&cluster);
let got = chunk_rate(&chunks);
let want = oracle_best(&fv, &edges);
let got_cum = cumulative(&got);
let want_cum = cumulative(&want);
let total = got_cum.last().unwrap().0;
assert_eq!(total, want_cum.last().unwrap().0);
for v in 1..=total {
let fa = fee_at(&got_cum, v);
let fb = fee_at(&want_cum, v);
assert!(
fa >= fb,
"merge-only suboptimal (n={}, seed={})\n fv = {:?}\n edges = {:?}\n got = {:?}\n want = {:?}\n at vsize {}: got {}, want {}",
n,
seed,
fv,
edges,
got,
want,
v,
fa,
fb,
);
}
}
/// Check whether an algorithm's output matches the brute-force optimum.
/// Returns Some(max_gap_at_any_vsize) if suboptimal, None if optimal.
fn optimality_gap_of(got: &[(u64, u64)], want: &[(u64, u64)]) -> Option<u128> {
let got_cum = cumulative(got);
let want_cum = cumulative(want);
let total = got_cum.last().unwrap().0;
debug_assert_eq!(total, want_cum.last().unwrap().0);
let mut worst_gap: u128 = 0;
for v in 1..=total {
let fa = fee_at(&got_cum, v);
let fb = fee_at(&want_cum, v);
if fb > fa {
worst_gap = worst_gap.max(fb - fa);
}
}
if worst_gap == 0 { None } else { Some(worst_gap) }
}
/// Gap for the production linearizer on one random DAG.
fn optimality_gap(n: usize, seed: u64) -> Option<u128> {
let (fv, edges) = random_dag(n, seed);
let cluster = super::make_cluster(&fv, &edges);
let chunks = super::super::sfl::linearize(&cluster);
let got: Vec<(u64, u64)> = chunks.iter().map(|c| (c.fee, c.vsize)).collect();
let want = oracle_best(&fv, &edges);
optimality_gap_of(&got, &want)
}
/// Diagnostic sweep: report the linearizer's optimality gap on random DAGs.
#[test]
#[ignore = "diagnostic sweep; run with --ignored to print stats"]
fn oracle_random_sweep_stats() {
let sizes: &[(usize, u64, u64)] = &[
(4, 500, 1),
(5, 500, 1_000),
(6, 300, 2_000),
(7, 100, 3_000),
(8, 50, 4_000),
];
eprintln!();
eprintln!("Optimality sweep (random DAGs vs brute-force optimum):");
eprintln!(" n cases sub max-gap");
eprintln!(" ---------------------------");
let mut total = 0usize;
let mut cases_total = 0usize;
for &(n, count, base) in sizes {
let mut sub = 0;
let mut gap: u128 = 0;
for seed in 0..count {
let s = seed.wrapping_add(base);
if let Some(g) = optimality_gap(n, s) {
sub += 1;
gap = gap.max(g);
}
}
total += sub;
cases_total += count as usize;
eprintln!(" {} {:5} {:3} {:4}", n, count, sub, gap);
}
eprintln!(" ---------------------------");
let pct = (total as f64 / cases_total as f64) * 100.0;
eprintln!(" totals {:4} {:3} ({:.1}%)", cases_total, total, pct);
eprintln!();
}
/// Perf benchmark across cluster sizes. Run with
/// `cargo test -p brk_mempool perf_linearize --release -- --ignored --nocapture`.
#[test]
#[ignore = "perf benchmark; run with --ignored --nocapture"]
fn perf_linearize() {
use std::time::Instant;
let sizes: &[(usize, u64)] = &[
(2, 5_000),
(5, 5_000),
(10, 2_000),
(15, 1_000),
(18, 500),
(20, 500),
(30, 200),
(50, 100),
(75, 50),
(100, 30),
];
eprintln!();
eprintln!("Linearize perf (release, per-call avg):");
eprintln!(" n calls avg total");
eprintln!(" -------------------------------------");
for &(n, calls) in sizes {
let clusters: Vec<_> = (0..calls)
.map(|s| {
let (fv, edges) = random_dag(n, s + 77);
super::make_cluster(&fv, &edges)
})
.collect();
let t = Instant::now();
let mut sink = 0u64;
for c in &clusters {
for chunk in super::super::sfl::linearize(c) {
sink = sink.wrapping_add(chunk.fee);
}
}
let elapsed = t.elapsed();
let _ = sink;
let avg_ns = elapsed.as_nanos() / calls as u128;
let pretty = if avg_ns >= 1_000_000 {
format!("{:.2} ms", avg_ns as f64 / 1_000_000.0)
} else if avg_ns >= 1_000 {
format!("{:.2} µs", avg_ns as f64 / 1_000.0)
} else {
format!("{} ns", avg_ns)
};
eprintln!(
" {:<4} {:<8} {:<10} {:.2?}",
n, calls, pretty, elapsed
);
}
eprintln!();
}

View File

@@ -0,0 +1,186 @@
//! Randomized invariant tests.
//!
//! Generates random DAGs up to size 30 with varied fee rates and
//! verifies SFL's output respects:
//! 1. Every node appears in exactly one chunk.
//! 2. Each chunk is topologically closed (no intra-cluster parent
//! of a chunk member lies in a later-emitted chunk).
//! 3. Chunk feerates are non-increasing along emission order.
use super::super::LocalIdx;
use super::{make_cluster, run};
/// Tiny deterministic xorshift so tests are reproducible.
struct Rng(u64);
impl Rng {
fn new(seed: u64) -> Self {
Self(seed | 1)
}
fn next_u64(&mut self) -> u64 {
let mut x = self.0;
x ^= x << 13;
x ^= x >> 7;
x ^= x << 17;
self.0 = x;
x
}
fn range(&mut self, n: u64) -> u64 {
self.next_u64() % n
}
}
/// Build a random DAG with `n` nodes. For each node `i` > 0, add a
/// random number of parents from nodes with index < i (guarantees
/// acyclic). Fee and vsize are random in a small range.
fn random_cluster(n: usize, seed: u64) -> (Vec<(u64, u64)>, Vec<(LocalIdx, LocalIdx)>) {
let mut rng = Rng::new(seed);
let mut fees_vsizes = Vec::with_capacity(n);
for _ in 0..n {
let fee = 1 + rng.range(1000);
let vsize = 1 + rng.range(100);
fees_vsizes.push((fee, vsize));
}
let mut edges = Vec::new();
for i in 1..n {
// 0-3 parents, each picked uniformly from earlier nodes.
let k = rng.range(4) as usize;
let mut picks: Vec<LocalIdx> = Vec::new();
for _ in 0..k {
let p = rng.range(i as u64) as LocalIdx;
if !picks.contains(&p) {
picks.push(p);
}
}
for p in picks {
edges.push((p, i as LocalIdx));
}
}
(fees_vsizes, edges)
}
fn check_invariants(
fees_vsizes: &[(u64, u64)],
edges: &[(LocalIdx, LocalIdx)],
chunks: &[super::Chunk],
) {
let n = fees_vsizes.len();
// (1) Each node in exactly one chunk.
let mut seen = vec![false; n];
for chunk in chunks {
for &local in &chunk.nodes {
assert!(
!seen[local as usize],
"node {} appears in multiple chunks",
local
);
seen[local as usize] = true;
}
}
for (i, s) in seen.iter().enumerate() {
assert!(*s, "node {} missing from all chunks", i);
}
// Chunk aggregates match declared totals.
for chunk in chunks {
let fee: u64 = chunk.nodes.iter().map(|&l| fees_vsizes[l as usize].0).sum();
let vsize: u64 = chunk.nodes.iter().map(|&l| fees_vsizes[l as usize].1).sum();
assert_eq!(chunk.fee, fee, "chunk fee mismatch");
assert_eq!(chunk.vsize, vsize, "chunk vsize mismatch");
}
// (2) Chunks are topologically closed in emission order: a parent
// in cluster must be in the same or earlier chunk.
let chunk_of: Vec<usize> = {
let mut out = vec![usize::MAX; n];
for (ci, chunk) in chunks.iter().enumerate() {
for &local in &chunk.nodes {
out[local as usize] = ci;
}
}
out
};
for &(p, c) in edges {
let cp = chunk_of[p as usize];
let cc = chunk_of[c as usize];
assert!(
cp <= cc,
"parent {} in chunk {} but child {} in earlier chunk {}",
p,
cp,
c,
cc
);
}
// (3) Non-increasing chunk feerates in emission order.
for pair in chunks.windows(2) {
let a = pair[0].fee as u128 * pair[1].vsize as u128;
let b = pair[1].fee as u128 * pair[0].vsize as u128;
assert!(
a >= b,
"chunk feerates not non-increasing: {}/{} then {}/{}",
pair[0].fee,
pair[0].vsize,
pair[1].fee,
pair[1].vsize,
);
}
}
#[test]
fn random_small_clusters() {
for seed in 0..200u64 {
let n = 2 + (seed % 10) as usize;
let (fv, edges) = random_cluster(n, seed.wrapping_add(1));
let cluster = make_cluster(&fv, &edges);
let chunks = run(&cluster);
check_invariants(&fv, &edges, &chunks);
}
}
#[test]
fn random_medium_clusters() {
for seed in 0..50u64 {
let n = 10 + (seed % 20) as usize;
let (fv, edges) = random_cluster(n, seed.wrapping_add(100));
let cluster = make_cluster(&fv, &edges);
let chunks = run(&cluster);
check_invariants(&fv, &edges, &chunks);
}
}
#[test]
fn random_large_clusters() {
for seed in 0..10u64 {
let (fv, edges) = random_cluster(30, seed.wrapping_add(1000));
let cluster = make_cluster(&fv, &edges);
let chunks = run(&cluster);
check_invariants(&fv, &edges, &chunks);
}
}
#[test]
fn determinism_same_seed_same_output() {
let (fv, edges) = random_cluster(15, 42);
let cluster = make_cluster(&fv, &edges);
let a: Vec<(u64, u64)> = run(&cluster).iter().map(|c| (c.fee, c.vsize)).collect();
let b: Vec<(u64, u64)> = run(&cluster).iter().map(|c| (c.fee, c.vsize)).collect();
assert_eq!(a, b);
}
/// Exercise the perf path: large clusters with many edges. If any
/// individual call exceeds a generous budget we'd know SFL is slow for
/// realistic workloads.
#[test]
fn random_cluster_at_policy_limit() {
for seed in 0..5u64 {
// 100-tx cluster approximates Bitcoin Core's cluster policy cap.
let (fv, edges) = random_cluster(100, seed.wrapping_add(9000));
let cluster = make_cluster(&fv, &edges);
let chunks = run(&cluster);
check_invariants(&fv, &edges, &chunks);
}
}

View File

@@ -1,8 +1,7 @@
mod graph;
mod heap_entry;
mod linearize;
mod package;
mod partitioner;
mod selector;
mod tx_node;
pub use package::Package;
@@ -22,12 +21,12 @@ const NUM_BLOCKS: usize = 8;
/// descending; the final block is a catch-all containing every remaining
/// package (matches mempool.space behavior).
pub fn build_projected_blocks(entries: &[Option<Entry>]) -> Vec<Vec<Package>> {
let mut graph = graph::build_graph(entries);
let graph = graph::build_graph(entries);
if graph.is_empty() {
return Vec::new();
}
let packages = selector::select_packages(&mut graph);
let packages = linearize::linearize_clusters(&graph);
partitioner::partition_into_blocks(packages, NUM_BLOCKS)
}

View File

@@ -2,24 +2,18 @@ use brk_types::FeeRate;
use crate::types::TxIndex;
/// A CPFP package: transactions the selector decided to mine together
/// A CPFP package: transactions the linearizer decided to mine together
/// because a child pays for its parent.
///
/// Carries two rates:
/// - `fee_rate` is the package's own rate (sum of fees / sum of vsizes),
/// i.e. what a miner collects per vsize when the package is mined.
/// Used for per-tx fee stats and user-facing recommendations.
/// - `placement_rate` is the key the partitioner sorts by. It's the own
/// rate clamped below by the `placement_rate` of any ancestor packages,
/// so that sorting packages by this rate descending keeps dependent
/// packages in topological order even when a child's own rate exceeds
/// its parent's (possible in branching CPFP).
/// `fee_rate` is the package's own rate (sum of fees / sum of vsizes),
/// i.e. what a miner collects per vsize when the package is mined.
/// Packages are produced by SFL in descending-`fee_rate` order within a
/// cluster and are atomic (all-or-nothing) at mining time.
pub struct Package {
/// Transactions in topological order (parents before children).
pub txs: Vec<TxIndex>,
pub vsize: u64,
pub fee_rate: FeeRate,
pub placement_rate: FeeRate,
}
impl Package {
@@ -28,7 +22,6 @@ impl Package {
txs: Vec::new(),
vsize: 0,
fee_rate,
placement_rate: fee_rate,
}
}

View File

@@ -5,7 +5,7 @@ use super::{BLOCK_VSIZE, package::Package};
/// How many packages to look ahead when the current one doesn't fit.
const LOOK_AHEAD_COUNT: usize = 100;
/// Partition packages into blocks by placement rate.
/// Partition packages into blocks by fee rate.
///
/// The first `num_blocks - 1` blocks are packed greedily into ~`BLOCK_VSIZE`
/// chunks. The final block is a catch-all containing every remaining
@@ -15,10 +15,11 @@ pub fn partition_into_blocks(
mut packages: Vec<Package>,
num_blocks: usize,
) -> Vec<Vec<Package>> {
// Stable sort for deterministic output across equal placement rates.
// Topology across dependent packages is already enforced by the
// placement_rate cap in the selector.
packages.sort_by_key(|p| Reverse(p.placement_rate));
// Stable sort for deterministic output across equal fee rates. SFL
// guarantees chunks within a cluster come in non-increasing rate
// order, so stable sorting by fee_rate preserves intra-cluster
// topology automatically.
packages.sort_by_key(|p| Reverse(p.fee_rate));
let mut slots: Vec<Option<Package>> = packages.into_iter().map(Some).collect();
let mut blocks: Vec<Vec<Package>> = Vec::with_capacity(num_blocks);

View File

@@ -1,121 +0,0 @@
use std::collections::BinaryHeap;
use brk_types::FeeRate;
use rustc_hash::FxHashSet;
use smallvec::SmallVec;
use super::{graph::Graph, heap_entry::HeapEntry, package::Package};
use crate::types::PoolIndex;
/// Sentinel for `package_of` entries that haven't been placed in a package yet.
const UNASSIGNED: u32 = u32::MAX;
/// Select transactions from the graph and group them into CPFP packages,
/// running until every unselected tx has been placed into a package.
pub fn select_packages(graph: &mut Graph) -> Vec<Package> {
let mut packages: Vec<Package> = Vec::new();
let mut package_of: Vec<u32> = vec![UNASSIGNED; graph.len()];
let mut heap: BinaryHeap<HeapEntry> = (0..graph.len())
.map(|i| HeapEntry::new(&graph[PoolIndex::from(i)]))
.collect();
while let Some(entry) = heap.pop() {
let node = &graph[entry.pool_index];
if node.selected || entry.generation != node.generation {
continue;
}
let own_rate = FeeRate::from((node.ancestor_fee, node.ancestor_vsize));
let package_idx = packages.len() as u32;
let mut package = Package::new(own_rate);
for pool_idx in select_with_ancestors(graph, entry.pool_index) {
let tx = &graph[pool_idx];
package.add_tx(tx.tx_index, u64::from(tx.vsize));
package_of[pool_idx.as_usize()] = package_idx;
// Cap placement_rate by any ancestor packages this tx depends on.
// select_with_ancestors returns parents before children, so a
// parent sitting in this same package already has package_of
// set to package_idx; only parents in earlier packages matter.
for &parent in &tx.parents {
let parent_pkg = package_of[parent.as_usize()];
if parent_pkg != package_idx && parent_pkg != UNASSIGNED {
package.placement_rate = package
.placement_rate
.min(packages[parent_pkg as usize].placement_rate);
}
}
update_descendants(graph, pool_idx, &mut heap);
}
packages.push(package);
}
packages
}
/// Return `pool_idx` and all its unselected ancestors in topological order
/// (parents before children), marking each one selected as we go.
fn select_with_ancestors(graph: &mut Graph, pool_idx: PoolIndex) -> SmallVec<[PoolIndex; 8]> {
let mut result: SmallVec<[PoolIndex; 8]> = SmallVec::new();
let mut stack: SmallVec<[(PoolIndex, bool); 16]> = smallvec::smallvec![(pool_idx, false)];
while let Some((idx, parents_done)) = stack.pop() {
if graph[idx].selected {
continue;
}
if parents_done {
graph[idx].selected = true;
result.push(idx);
} else {
stack.push((idx, true));
for &parent in &graph[idx].parents {
if !graph[parent].selected {
stack.push((parent, false));
}
}
}
}
result
}
/// Subtract the selected tx's fee and vsize from every unselected
/// descendant's ancestor totals, and re-push updated entries to the heap.
fn update_descendants(
graph: &mut Graph,
selected_idx: PoolIndex,
heap: &mut BinaryHeap<HeapEntry>,
) {
let selected_fee = graph[selected_idx].fee;
let selected_vsize = graph[selected_idx].vsize;
// Track visited to avoid double-updates in diamond patterns
let mut visited: FxHashSet<PoolIndex> = FxHashSet::default();
let mut stack: SmallVec<[PoolIndex; 16]> =
graph[selected_idx].children.iter().copied().collect();
while let Some(child_idx) = stack.pop() {
if !visited.insert(child_idx) {
continue;
}
let child = &mut graph[child_idx];
// Walk through selected intermediates: descendants behind them still
// need their ancestor totals reduced, otherwise CPFP chains with
// already-selected parents keep inflated scores and get split.
if !child.selected {
child.ancestor_fee -= selected_fee;
child.ancestor_vsize -= selected_vsize;
child.generation += 1;
heap.push(HeapEntry::new(child));
}
stack.extend(child.children.iter().copied());
}
}

View File

@@ -7,57 +7,30 @@ use crate::types::{PoolIndex, TxIndex};
///
/// Created fresh for each block building cycle, then discarded.
pub struct TxNode {
/// Index into mempool entries (for final output)
/// Index into mempool entries (carried into the final `Package`).
pub tx_index: TxIndex,
/// Index in the graph pool
pub pool_index: PoolIndex,
/// Transaction fee
/// Transaction fee.
pub fee: Sats,
/// Transaction virtual size
/// Transaction virtual size.
pub vsize: VSize,
/// Parent transactions (dependencies)
/// Parent transactions (dependencies).
pub parents: SmallVec<[PoolIndex; 4]>,
/// Child transactions (dependents)
/// Child transactions (dependents).
pub children: SmallVec<[PoolIndex; 8]>,
/// Cumulative fee (self + all ancestors)
pub ancestor_fee: Sats,
/// Cumulative vsize (self + all ancestors)
pub ancestor_vsize: VSize,
/// Whether this tx has been selected
pub selected: bool,
/// Generation counter for heap staleness detection
pub generation: u32,
}
impl TxNode {
pub fn new(
tx_index: TxIndex,
pool_index: PoolIndex,
fee: Sats,
vsize: VSize,
ancestor_fee: Sats,
ancestor_vsize: VSize,
) -> Self {
pub fn new(tx_index: TxIndex, fee: Sats, vsize: VSize) -> Self {
Self {
tx_index,
pool_index,
fee,
vsize,
parents: SmallVec::new(),
children: SmallVec::new(),
ancestor_fee,
ancestor_vsize,
selected: false,
generation: 0,
}
}
}

View File

@@ -14,7 +14,7 @@ use brk_error::Result;
use brk_rpc::Client;
use brk_types::{
AddrBytes, BlockHash, MempoolEntryInfo, MempoolInfo, Timestamp, Transaction, TxIn, TxOut,
TxStatus, TxWithHex, Txid, TxidPrefix, VSize, Vout,
TxStatus, Txid, TxidPrefix, VSize, Vout,
};
use derive_more::Deref;
use parking_lot::{RwLock, RwLockReadGuard};
@@ -28,6 +28,7 @@ use crate::{
entry_pool::EntryPool,
projected_blocks::{BlockStats, RecommendedFees, Snapshot},
tx_store::TxStore,
types::TxWithHex,
};
/// Max new txs to fetch full data for per update cycle (for address tracking).

View File

@@ -1,7 +1,9 @@
use brk_types::{MempoolRecentTx, TxWithHex, Txid};
use brk_types::{MempoolRecentTx, Txid};
use derive_more::Deref;
use rustc_hash::FxHashMap;
use crate::types::TxWithHex;
const RECENT_CAP: usize = 10;
/// Store of full transaction data for API access.

View File

@@ -1,5 +1,7 @@
mod pool_index;
mod tx_index;
mod tx_with_hex;
pub use pool_index::PoolIndex;
pub use tx_index::TxIndex;
pub use tx_with_hex::TxWithHex;

View File

@@ -15,3 +15,10 @@ impl From<usize> for TxIndex {
Self(value as u32)
}
}
impl From<TxIndex> for u64 {
#[inline]
fn from(value: TxIndex) -> Self {
u64::from(value.0)
}
}

View File

@@ -0,0 +1,26 @@
use brk_types::Transaction;
/// A transaction with its raw hex representation
#[derive(Debug, Clone)]
pub struct TxWithHex {
tx: Transaction,
hex: String,
}
impl TxWithHex {
pub fn new(tx: Transaction, hex: String) -> Self {
Self { tx, hex }
}
pub fn tx(&self) -> &Transaction {
&self.tx
}
pub fn hex(&self) -> &str {
&self.hex
}
pub fn into_parts(self) -> (Transaction, String) {
(self.tx, self.hex)
}
}