mempool: snap

2026-05-19 22:34:46 -07:00 · 2026-04-22 15:30:08 +02:00
parent 6afce0bbdc
commit bdc3ba1df6
24 changed files with 1557 additions and 299 deletions
--- a/crates/brk_mempool/src/block_builder/graph.rs
+++ b/crates/brk_mempool/src/block_builder/graph.rs
@@ -42,65 +42,134 @@ impl IndexMut<PoolIndex> for Graph {

 /// Build a dependency graph from mempool entries.
 pub fn build_graph(entries: &[Option<Entry>]) -> Graph {
-    // Collect live entries with their indices
-    let live: Vec<(TxIndex, &Entry)> = entries
-        .iter()
-        .enumerate()
-        .filter_map(|(i, opt)| opt.as_ref().map(|e| (TxIndex::from(i), e)))
-        .collect();
+    let mut live: Vec<(TxIndex, &Entry)> = Vec::with_capacity(entries.len());
+    for (i, opt) in entries.iter().enumerate() {
+        if let Some(e) = opt.as_ref() {
+            live.push((TxIndex::from(i), e));
+        }
+    }

    if live.is_empty() {
        return Graph(Vec::new());
    }

-    // Map TxidPrefix -> PoolIndex for parent lookups
-    let prefix_to_pool: FxHashMap<TxidPrefix, PoolIndex> = live
-        .iter()
-        .enumerate()
-        .map(|(i, (_, entry))| (entry.txid_prefix(), PoolIndex::from(i)))
-        .collect();
+    let mut prefix_to_pool: FxHashMap<TxidPrefix, PoolIndex> =
+        FxHashMap::with_capacity_and_hasher(live.len(), Default::default());
+    for (i, (_, entry)) in live.iter().enumerate() {
+        prefix_to_pool.insert(entry.txid_prefix(), PoolIndex::from(i));
+    }

-    // Build nodes with parent relationships
    let mut nodes: Vec<TxNode> = live
        .iter()
-        .enumerate()
-        .map(|(pool_idx, (tx_index, entry))| {
-            let pool_index = PoolIndex::from(pool_idx);
-            let mut node = TxNode::new(
-                *tx_index,
-                pool_index,
-                entry.fee,
-                entry.vsize,
-                entry.ancestor_fee,
-                entry.ancestor_vsize,
-            );
-
-            // Add in-mempool parents
+        .map(|(tx_index, entry)| {
+            let mut node = TxNode::new(*tx_index, entry.fee, entry.vsize);
            for parent_prefix in &entry.depends {
                if let Some(&parent_pool_idx) = prefix_to_pool.get(parent_prefix) {
                    node.parents.push(parent_pool_idx);
                }
            }
-
            node
        })
        .collect();

-    // Collect parent->child edges (avoids cloning each node's parents)
-    let edges: Vec<(usize, PoolIndex)> = nodes
-        .iter()
-        .enumerate()
-        .flat_map(|(i, node)| {
-            node.parents
-                .iter()
-                .map(move |&p| (p.as_usize(), PoolIndex::from(i)))
-        })
-        .collect();
-
-    // Build child relationships
-    for (parent_idx, child_idx) in edges {
-        nodes[parent_idx].children.push(child_idx);
+    // Populate children via direct indexing; no intermediate edge vec.
+    // Reading parents[j] as a Copy value releases the immutable borrow
+    // before the mutable borrow of children's owner.
+    for i in 0..nodes.len() {
+        let plen = nodes[i].parents.len();
+        for j in 0..plen {
+            let parent_idx = nodes[i].parents[j].as_usize();
+            nodes[parent_idx].children.push(PoolIndex::from(i));
+        }
    }

    Graph(nodes)
 }
+
+#[cfg(test)]
+mod bench {
+    use std::time::Instant;
+
+    use bitcoin::hashes::Hash;
+    use brk_types::{Sats, Timestamp, Txid, VSize};
+    use smallvec::SmallVec;
+
+    use super::build_graph;
+    use crate::entry::Entry;
+
+    /// Synthetic mempool: mostly singletons, some CPFP chains/trees.
+    fn synthetic_mempool(n: usize) -> Vec<Option<Entry>> {
+        let make_txid = |i: usize| -> Txid {
+            let mut bytes = [0u8; 32];
+            bytes[0..8].copy_from_slice(&(i as u64).to_ne_bytes());
+            bytes[8..16].copy_from_slice(&((i as u64).wrapping_mul(2654435761)).to_ne_bytes());
+            Txid::from(bitcoin::Txid::from_slice(&bytes).unwrap())
+        };
+
+        let mut entries: Vec<Option<Entry>> = Vec::with_capacity(n);
+        let mut txids: Vec<Txid> = Vec::with_capacity(n);
+        for i in 0..n {
+            let txid = make_txid(i);
+            txids.push(txid.clone());
+
+            // 95% singletons, 4% 1-parent, 1% 2-parent (mimics real mempool).
+            let depends: SmallVec<[brk_types::TxidPrefix; 2]> = match i % 100 {
+                0..=94 => SmallVec::new(),
+                95..=98 if i > 0 => {
+                    let p = (i.wrapping_mul(7919)) % i;
+                    std::iter::once(brk_types::TxidPrefix::from(&txids[p])).collect()
+                }
+                _ if i > 1 => {
+                    let p1 = (i.wrapping_mul(7919)) % i;
+                    let p2 = (i.wrapping_mul(6151)) % i;
+                    [
+                        brk_types::TxidPrefix::from(&txids[p1]),
+                        brk_types::TxidPrefix::from(&txids[p2]),
+                    ]
+                    .into_iter()
+                    .collect()
+                }
+                _ => SmallVec::new(),
+            };
+
+            entries.push(Some(Entry {
+                txid,
+                fee: Sats::from((i as u64).wrapping_mul(137) % 10_000 + 1),
+                vsize: VSize::from(250u64),
+                size: 250,
+                ancestor_fee: Sats::from(0u64),
+                ancestor_vsize: VSize::from(250u64),
+                depends,
+                first_seen: Timestamp::now(),
+            }));
+        }
+        entries
+    }
+
+    #[test]
+    #[ignore = "perf benchmark; run with --ignored --nocapture"]
+    fn perf_build_graph() {
+        let sizes = [1_000usize, 10_000, 50_000, 100_000, 300_000];
+        eprintln!();
+        eprintln!("build_graph perf (release, single call):");
+        eprintln!("  n          build");
+        eprintln!("  ------------------------");
+        for &n in &sizes {
+            let entries = synthetic_mempool(n);
+            // Warm up allocator.
+            let _ = build_graph(&entries);
+
+            let t = Instant::now();
+            let g = build_graph(&entries);
+            let dt = t.elapsed();
+            let ns = dt.as_nanos();
+            let pretty = if ns >= 1_000_000 {
+                format!("{:.2} ms", ns as f64 / 1_000_000.0)
+            } else {
+                format!("{:.2} µs", ns as f64 / 1_000.0)
+            };
+            eprintln!("  {:<10} {:<10} ({} nodes)", n, pretty, g.len());
+        }
+        eprintln!();
+    }
+}
--- a/crates/brk_mempool/src/block_builder/heap_entry.rs
+++ b/crates/brk_mempool/src/block_builder/heap_entry.rs
@@ -1,70 +0,0 @@
-use std::cmp::Ordering;
-
-use brk_types::{Sats, VSize};
-
-use super::tx_node::TxNode;
-use crate::types::PoolIndex;
-
-/// Entry in the priority heap for transaction selection.
-///
-/// Stores a snapshot of the score at insertion time. The `generation` field
-/// lets the selector detect and skip stale entries after descendants are
-/// re-pushed with updated ancestor totals.
-#[derive(Clone, Copy)]
-pub struct HeapEntry {
-    pub pool_index: PoolIndex,
-    pub generation: u32,
-    ancestor_fee: Sats,
-    ancestor_vsize: VSize,
-}
-
-impl HeapEntry {
-    pub fn new(node: &TxNode) -> Self {
-        Self {
-            pool_index: node.pool_index,
-            generation: node.generation,
-            ancestor_fee: node.ancestor_fee,
-            ancestor_vsize: node.ancestor_vsize,
-        }
-    }
-
-    /// Compare fee rates: self > other?
-    #[inline]
-    fn has_higher_fee_rate_than(&self, other: &Self) -> bool {
-        // Cross multiply to avoid division:
-        // fee_a/vsize_a > fee_b/vsize_b  ⟺  fee_a * vsize_b > fee_b * vsize_a
-        let self_score =
-            u64::from(self.ancestor_fee) as u128 * u64::from(other.ancestor_vsize) as u128;
-        let other_score =
-            u64::from(other.ancestor_fee) as u128 * u64::from(self.ancestor_vsize) as u128;
-        self_score > other_score
-    }
-}
-
-impl PartialEq for HeapEntry {
-    fn eq(&self, other: &Self) -> bool {
-        self.cmp(other).is_eq()
-    }
-}
-
-impl Eq for HeapEntry {}
-
-impl PartialOrd for HeapEntry {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl Ord for HeapEntry {
-    fn cmp(&self, other: &Self) -> Ordering {
-        // Higher fee rate = higher priority
-        if self.has_higher_fee_rate_than(other) {
-            Ordering::Greater
-        } else if other.has_higher_fee_rate_than(self) {
-            Ordering::Less
-        } else {
-            // Tiebreaker: lower index first (deterministic)
-            other.pool_index.cmp(&self.pool_index)
-        }
-    }
-}
--- a/crates/brk_mempool/src/block_builder/linearize/mod.rs
+++ b/crates/brk_mempool/src/block_builder/linearize/mod.rs
@@ -0,0 +1,194 @@
+//! Cluster-mempool linearization.
+//!
+//! Partitions the mempool dependency graph into connected components
+//! ("clusters"), linearizes each into chunks ordered by descending
+//! feerate, and emits the resulting chunks as `Package`s. The inner
+//! algorithm (see `sfl.rs`) is a topologically-closed-subset search,
+//! optimal for clusters up to 18 txs and near-optimal beyond that.
+
+mod sfl;
+
+#[cfg(test)]
+mod tests;
+
+use brk_types::{FeeRate, Sats, VSize};
+use rustc_hash::FxHashMap;
+use smallvec::SmallVec;
+
+use super::{graph::Graph, package::Package};
+use crate::types::{PoolIndex, TxIndex};
+
+/// Cluster-local index for a node within one cluster's flat array.
+type LocalIdx = u32;
+
+/// A connected component of the mempool graph, re-indexed locally.
+struct Cluster {
+    /// Nodes indexed by `LocalIdx`.
+    nodes: Vec<ClusterNode>,
+    /// `topo_rank[i] = position of node i in a Kahn topological order`.
+    /// Used during chunk emission to print txs parents-first.
+    topo_rank: Vec<u32>,
+}
+
+struct ClusterNode {
+    tx_index: TxIndex,
+    fee: Sats,
+    vsize: VSize,
+    parents: SmallVec<[LocalIdx; 2]>,
+    children: SmallVec<[LocalIdx; 2]>,
+}
+
+/// Partition `graph` into clusters, linearize each, and flatten the
+/// resulting chunks into a `Vec<Package>`. Order across clusters is
+/// unspecified; the partitioner re-sorts by fee rate downstream.
+pub fn linearize_clusters(graph: &Graph) -> Vec<Package> {
+    let clusters = find_components(graph);
+    let mut packages: Vec<Package> = Vec::with_capacity(clusters.len());
+
+    for cluster in clusters {
+        if cluster.nodes.len() == 1 {
+            packages.push(singleton_package(&cluster));
+            continue;
+        }
+        for chunk in sfl::linearize(&cluster) {
+            packages.push(chunk_to_package(&cluster, &chunk));
+        }
+    }
+
+    packages
+}
+
+/// BFS over (parents + children) adjacency to partition `graph` into
+/// connected components, each re-indexed locally.
+fn find_components(graph: &Graph) -> Vec<Cluster> {
+    let n = graph.len();
+    let mut seen: Vec<bool> = vec![false; n];
+    let mut clusters: Vec<Cluster> = Vec::new();
+    let mut queue: Vec<PoolIndex> = Vec::new();
+
+    for start in 0..n {
+        if seen[start] {
+            continue;
+        }
+
+        let mut members: Vec<PoolIndex> = Vec::new();
+        queue.clear();
+        queue.push(PoolIndex::from(start));
+        seen[start] = true;
+
+        while let Some(idx) = queue.pop() {
+            members.push(idx);
+            let node = &graph[idx];
+            for &p in &node.parents {
+                if !seen[p.as_usize()] {
+                    seen[p.as_usize()] = true;
+                    queue.push(p);
+                }
+            }
+            for &c in &node.children {
+                if !seen[c.as_usize()] {
+                    seen[c.as_usize()] = true;
+                    queue.push(c);
+                }
+            }
+        }
+
+        // Sort by PoolIndex for deterministic LocalIdx assignment (keeps
+        // SFL output stable across sync ticks).
+        members.sort_unstable();
+        clusters.push(build_cluster(graph, members));
+    }
+
+    clusters
+}
+
+/// Build a re-indexed `Cluster` from a set of graph members.
+fn build_cluster(graph: &Graph, members: Vec<PoolIndex>) -> Cluster {
+    let pool_to_local: FxHashMap<PoolIndex, LocalIdx> = members
+        .iter()
+        .enumerate()
+        .map(|(i, &p)| (p, i as LocalIdx))
+        .collect();
+
+    let mut nodes: Vec<ClusterNode> = Vec::with_capacity(members.len());
+    for &pool_idx in &members {
+        let node = &graph[pool_idx];
+        let mut parents: SmallVec<[LocalIdx; 2]> = SmallVec::new();
+        for &p in &node.parents {
+            if let Some(&local) = pool_to_local.get(&p) {
+                parents.push(local);
+            }
+        }
+        let mut children: SmallVec<[LocalIdx; 2]> = SmallVec::new();
+        for &c in &node.children {
+            if let Some(&local) = pool_to_local.get(&c) {
+                children.push(local);
+            }
+        }
+        nodes.push(ClusterNode {
+            tx_index: node.tx_index,
+            fee: node.fee,
+            vsize: node.vsize,
+            parents,
+            children,
+        });
+    }
+
+    let topo_rank = kahn_topo_rank(&nodes);
+    Cluster { nodes, topo_rank }
+}
+
+/// Kahn's algorithm: returns `rank[i] = position in a topological order`.
+fn kahn_topo_rank(nodes: &[ClusterNode]) -> Vec<u32> {
+    let n = nodes.len();
+    let mut indegree: Vec<u32> = nodes.iter().map(|n| n.parents.len() as u32).collect();
+    let mut ready: Vec<LocalIdx> = (0..n as LocalIdx)
+        .filter(|&i| indegree[i as usize] == 0)
+        .collect();
+
+    let mut rank: Vec<u32> = vec![0; n];
+    let mut position: u32 = 0;
+    let mut head = 0;
+
+    while head < ready.len() {
+        let v = ready[head];
+        head += 1;
+        rank[v as usize] = position;
+        position += 1;
+        for &c in &nodes[v as usize].children {
+            indegree[c as usize] -= 1;
+            if indegree[c as usize] == 0 {
+                ready.push(c);
+            }
+        }
+    }
+
+    debug_assert_eq!(position as usize, n, "cluster contained a cycle");
+    rank
+}
+
+/// Build a one-tx `Package` for a cluster of size 1.
+fn singleton_package(cluster: &Cluster) -> Package {
+    let node = &cluster.nodes[0];
+    let fee_rate = FeeRate::from((node.fee, node.vsize));
+    let mut package = Package::new(fee_rate);
+    package.add_tx(node.tx_index, u64::from(node.vsize));
+    package
+}
+
+/// Convert an SFL-emitted chunk (set of local indices) into a `Package`.
+/// Txs inside the package are ordered parents-first by `topo_rank`.
+fn chunk_to_package(cluster: &Cluster, chunk: &sfl::Chunk) -> Package {
+    let fee_rate = FeeRate::from((Sats::from(chunk.fee), VSize::from(chunk.vsize)));
+    let mut package = Package::new(fee_rate);
+
+    let mut ordered: SmallVec<[LocalIdx; 8]> = chunk.nodes.iter().copied().collect();
+    ordered.sort_by_key(|&local| cluster.topo_rank[local as usize]);
+
+    for local in ordered {
+        let node = &cluster.nodes[local as usize];
+        package.add_tx(node.tx_index, u64::from(node.vsize));
+    }
+
+    package
+}
--- a/crates/brk_mempool/src/block_builder/linearize/sfl.rs
+++ b/crates/brk_mempool/src/block_builder/linearize/sfl.rs
@@ -0,0 +1,264 @@
+//! Cluster linearizer.
+//!
+//! Two-branch dispatch by cluster size:
+//! - **n ≤ 18**: recursive enumeration of topologically-closed subsets.
+//!   Provably optimal. Visits only valid subsets (skips non-closed ones
+//!   without filtering) and maintains running fee/vsize incrementally.
+//! - **n > 18**: "greedy-union" ancestor-set search. Seeds with each
+//!   node's ancestor closure, then greedily adds any other ancestor
+//!   closure whose inclusion raises the combined feerate. Strict
+//!   superset of ancestor-set-sort's candidate space — catches the
+//!   sibling-union shapes that pure ASS misses.
+//!
+//! A final stack-based `canonicalize` pass merges adjacent chunks when
+//! the later one's feerate beats the earlier's, restoring the
+//! non-increasing-rate invariant.
+//!
+//! Everything runs on `u128` bitmasks (covers Bitcoin Core 31's cluster
+//! cap of 100). No RNG, no spanning-forest state, no floating-point.
+
+use smallvec::SmallVec;
+
+use super::{Cluster, LocalIdx};
+
+pub struct Chunk {
+    pub nodes: SmallVec<[LocalIdx; 4]>,
+    pub fee: u64,
+    pub vsize: u64,
+}
+
+const BRUTE_FORCE_LIMIT: usize = 18;
+const BITMASK_LIMIT: usize = 128;
+
+pub fn linearize(cluster: &Cluster) -> Vec<Chunk> {
+    let n = cluster.nodes.len();
+    if n == 0 {
+        return Vec::new();
+    }
+    assert!(n <= BITMASK_LIMIT, "cluster size {} exceeds u128 capacity", n);
+
+    let mut parents_mask: Vec<u128> = vec![0; n];
+    let mut ancestor_incl: Vec<u128> = vec![0; n];
+    let mut order: Vec<LocalIdx> = (0..n as LocalIdx).collect();
+    order.sort_by_key(|&i| cluster.topo_rank[i as usize]);
+    for &v in &order {
+        let mut par = 0u128;
+        let mut acc = 1u128 << v;
+        for &p in &cluster.nodes[v as usize].parents {
+            par |= 1u128 << p;
+            acc |= ancestor_incl[p as usize];
+        }
+        parents_mask[v as usize] = par;
+        ancestor_incl[v as usize] = acc;
+    }
+
+    let fee_of: Vec<u64> = cluster.nodes.iter().map(|n| u64::from(n.fee)).collect();
+    let vsize_of: Vec<u64> = cluster.nodes.iter().map(|n| u64::from(n.vsize)).collect();
+    let all: u128 = if n == 128 { !0 } else { (1u128 << n) - 1 };
+
+    let mut chunks: Vec<Chunk> = Vec::new();
+    let mut remaining: u128 = all;
+    while remaining != 0 {
+        let (mask, fee, vsize) = if n <= BRUTE_FORCE_LIMIT {
+            best_subset(remaining, &order, &parents_mask, &fee_of, &vsize_of)
+        } else {
+            best_ancestor_union(remaining, &ancestor_incl, &fee_of, &vsize_of)
+        };
+        chunks.push(chunk_of(mask, fee, vsize));
+        remaining &= !mask;
+    }
+
+    canonicalize(&mut chunks);
+    chunks
+}
+
+/// Recursive enumeration of topologically-closed subsets of
+/// `remaining`. Returns the (mask, fee, vsize) with the highest rate.
+fn best_subset(
+    remaining: u128,
+    topo_order: &[LocalIdx],
+    parents_mask: &[u128],
+    fee_of: &[u64],
+    vsize_of: &[u64],
+) -> (u128, u64, u64) {
+    let mut best = (0u128, 0u64, 1u64);
+    recurse(
+        0,
+        topo_order,
+        parents_mask,
+        remaining,
+        0,
+        0,
+        0,
+        fee_of,
+        vsize_of,
+        &mut best,
+    );
+    best
+}
+
+fn recurse(
+    idx: usize,
+    topo_order: &[LocalIdx],
+    parents_mask: &[u128],
+    remaining: u128,
+    included: u128,
+    f: u64,
+    v: u64,
+    fee_of: &[u64],
+    vsize_of: &[u64],
+    best: &mut (u128, u64, u64),
+) {
+    if idx == topo_order.len() {
+        if included != 0 && f as u128 * best.2 as u128 > best.1 as u128 * v as u128 {
+            *best = (included, f, v);
+        }
+        return;
+    }
+    let node = topo_order[idx];
+    let bit = 1u128 << node;
+
+    // Not in remaining, or a parent (within remaining) is excluded:
+    // this node is forced-excluded, no branching.
+    if (bit & remaining) == 0
+        || (parents_mask[node as usize] & remaining & !included) != 0
+    {
+        recurse(
+            idx + 1, topo_order, parents_mask, remaining, included, f, v, fee_of, vsize_of, best,
+        );
+        return;
+    }
+
+    // Exclude
+    recurse(
+        idx + 1, topo_order, parents_mask, remaining, included, f, v, fee_of, vsize_of, best,
+    );
+    // Include
+    recurse(
+        idx + 1,
+        topo_order,
+        parents_mask,
+        remaining,
+        included | bit,
+        f + fee_of[node as usize],
+        v + vsize_of[node as usize],
+        fee_of,
+        vsize_of,
+        best,
+    );
+}
+
+/// For each node v in `remaining`, seed with anc(v) ∩ remaining, then
+/// greedily extend by adding any anc(u) whose inclusion raises the
+/// feerate. Pick the best result across all seeds.
+///
+/// Every candidate evaluated is a union of ancestor closures —
+/// topologically closed by construction. Strictly explores more
+/// candidates than pure ancestor-set-sort, at O(n³) per chunk step.
+fn best_ancestor_union(
+    remaining: u128,
+    ancestor_incl: &[u128],
+    fee_of: &[u64],
+    vsize_of: &[u64],
+) -> (u128, u64, u64) {
+    let mut best = (0u128, 0u64, 1u64);
+    let mut seeds = remaining;
+    while seeds != 0 {
+        let i = seeds.trailing_zeros() as usize;
+        seeds &= seeds - 1;
+
+        let mut s = ancestor_incl[i] & remaining;
+        let (mut f, mut v) = totals(s, fee_of, vsize_of);
+
+        // Greedy extension to fixed point: pick the ancestor-closure
+        // addition that yields the highest resulting feerate, if any.
+        loop {
+            let mut picked: Option<(u128, u64, u64)> = None;
+            let mut cands = remaining & !s;
+            while cands != 0 {
+                let j = cands.trailing_zeros() as usize;
+                cands &= cands - 1;
+                let add = ancestor_incl[j] & remaining & !s;
+                if add == 0 {
+                    continue;
+                }
+                let (df, dv) = totals(add, fee_of, vsize_of);
+                let nf = f + df;
+                let nv = v + dv;
+                // Must strictly improve current rate: nf/nv > f/v.
+                if nf as u128 * v as u128 <= f as u128 * nv as u128 {
+                    continue;
+                }
+                match picked {
+                    None => picked = Some((add, nf, nv)),
+                    Some((_, pf, pv)) => {
+                        if nf as u128 * pv as u128 > pf as u128 * nv as u128 {
+                            picked = Some((add, nf, nv));
+                        }
+                    }
+                }
+            }
+            match picked {
+                Some((add, nf, nv)) => {
+                    s |= add;
+                    f = nf;
+                    v = nv;
+                }
+                None => break,
+            }
+        }
+
+        if f as u128 * best.2 as u128 > best.1 as u128 * v as u128 {
+            best = (s, f, v);
+        }
+    }
+    best
+}
+
+/// Single-pass stack merge: for each incoming chunk, merge it into
+/// the stack top while the merge would raise the top's feerate, then
+/// push. O(n) total regardless of how many merges cascade.
+fn canonicalize(chunks: &mut Vec<Chunk>) {
+    let taken = std::mem::take(chunks);
+    let mut out: Vec<Chunk> = Vec::with_capacity(taken.len());
+    for mut cur in taken {
+        while let Some(top) = out.last() {
+            if cur.fee as u128 * top.vsize as u128 > top.fee as u128 * cur.vsize as u128 {
+                let mut prev = out.pop().unwrap();
+                prev.fee += cur.fee;
+                prev.vsize += cur.vsize;
+                prev.nodes.extend(cur.nodes);
+                cur = prev;
+            } else {
+                break;
+            }
+        }
+        out.push(cur);
+    }
+    *chunks = out;
+}
+
+#[inline]
+fn totals(mask: u128, fee_of: &[u64], vsize_of: &[u64]) -> (u64, u64) {
+    let mut f = 0u64;
+    let mut v = 0u64;
+    let mut bits = mask;
+    while bits != 0 {
+        let i = bits.trailing_zeros() as usize;
+        f += fee_of[i];
+        v += vsize_of[i];
+        bits &= bits - 1;
+    }
+    (f, v)
+}
+
+fn chunk_of(mask: u128, fee: u64, vsize: u64) -> Chunk {
+    let mut nodes: SmallVec<[LocalIdx; 4]> = SmallVec::new();
+    let mut bits = mask;
+    while bits != 0 {
+        let i = bits.trailing_zeros();
+        nodes.push(i as LocalIdx);
+        bits &= bits - 1;
+    }
+    Chunk { nodes, fee, vsize }
+}
--- a/crates/brk_mempool/src/block_builder/linearize/tests/basic.rs
+++ b/crates/brk_mempool/src/block_builder/linearize/tests/basic.rs
@@ -0,0 +1,179 @@
+//! Hand-built cluster shapes with known-good SFL outputs.
+
+use super::{chunk_shapes, make_cluster, run};
+
+#[test]
+fn singleton() {
+    let cluster = make_cluster(&[(100, 10)], &[]);
+    let chunks = run(&cluster);
+    assert_eq!(chunks.len(), 1);
+    assert_eq!(chunks[0].nodes.len(), 1);
+    assert_eq!(chunks[0].fee, 100);
+    assert_eq!(chunks[0].vsize, 10);
+}
+
+#[test]
+fn two_chain_parent_richer() {
+    // A (rate 10) → B (rate 1). Parent is more profitable alone; SFL
+    // should emit two chunks, A first.
+    let cluster = make_cluster(&[(100, 10), (1, 1)], &[(0, 1)]);
+    let chunks = run(&cluster);
+    assert_eq!(chunks.len(), 2);
+    // First chunk is A alone.
+    assert!(chunks[0].nodes.contains(&0));
+    assert_eq!(chunks[0].vsize, 10);
+    // Second chunk is B alone.
+    assert!(chunks[1].nodes.contains(&1));
+    assert_eq!(chunks[1].vsize, 1);
+}
+
+#[test]
+fn two_chain_child_pays_parent_cpfp() {
+    // A (rate 0.1) → B (rate 100). Classic CPFP: bundle them.
+    let cluster = make_cluster(&[(1, 10), (100, 1)], &[(0, 1)]);
+    let chunks = run(&cluster);
+    assert_eq!(chunks.len(), 1);
+    assert_eq!(chunks[0].nodes.len(), 2);
+    assert_eq!(chunks[0].fee, 101);
+    assert_eq!(chunks[0].vsize, 11);
+}
+
+#[test]
+fn v_shape_two_parents_one_child() {
+    // P0 (rate 1), P1 (rate 1) → C (rate 100). Expect single chunk.
+    let cluster = make_cluster(&[(1, 1), (1, 1), (100, 1)], &[(0, 2), (1, 2)]);
+    let chunks = run(&cluster);
+    assert_eq!(chunks.len(), 1);
+    assert_eq!(chunks[0].nodes.len(), 3);
+    assert_eq!(chunks[0].fee, 102);
+    assert_eq!(chunks[0].vsize, 3);
+}
+
+#[test]
+fn lambda_shape_one_parent_two_children_uneven() {
+    // A(1) → B(5), A(1) → C(5). The "non-ancestor-set" case: {A, B, C}
+    // has rate 11/3 ≈ 3.67, beating any ancestor set ({A,B} or {A,C}
+    // at rate 3). SFL should produce a single chunk.
+    let cluster = make_cluster(&[(1, 1), (5, 1), (5, 1)], &[(0, 1), (0, 2)]);
+    let chunks = run(&cluster);
+    assert_eq!(chunks.len(), 1);
+    assert_eq!(chunks[0].fee, 11);
+    assert_eq!(chunks[0].vsize, 3);
+}
+
+#[test]
+fn diamond() {
+    // 4-node diamond: A → B, A → C, B → D, C → D. With D the payer,
+    // everything ends up in one chunk.
+    let cluster = make_cluster(
+        &[(1, 1), (1, 1), (1, 1), (100, 1)],
+        &[(0, 1), (0, 2), (1, 3), (2, 3)],
+    );
+    let chunks = run(&cluster);
+    assert_eq!(chunks.len(), 1);
+    assert_eq!(chunks[0].nodes.len(), 4);
+    assert_eq!(chunks[0].fee, 103);
+    assert_eq!(chunks[0].vsize, 4);
+}
+
+#[test]
+fn chain_alternating_high_low() {
+    // 4-chain with rates [10, 1, 10, 1] all vsize 1. Bubble-up should
+    // merge them all (every new tx brings its chunk rate up). Verify
+    // one chunk with correct totals rather than a specific partition.
+    let cluster = make_cluster(
+        &[(10, 1), (1, 1), (10, 1), (1, 1)],
+        &[(0, 1), (1, 2), (2, 3)],
+    );
+    let chunks = run(&cluster);
+    assert_eq!(chunks_total_fee(&chunks), 22);
+    assert_eq!(chunks_total_vsize(&chunks), 4);
+    assert_non_increasing(&chunks);
+}
+
+#[test]
+fn chain_starts_low_ends_high() {
+    // 4-chain [1, 100, 1, 100]: the optimal chunking groups pairs so
+    // high-rate bumps lift low-rate predecessors. Exact partition is
+    // implementation-dependent; check invariants.
+    let cluster = make_cluster(
+        &[(1, 1), (100, 1), (1, 1), (100, 1)],
+        &[(0, 1), (1, 2), (2, 3)],
+    );
+    let chunks = run(&cluster);
+    assert_eq!(chunks_total_fee(&chunks), 202);
+    assert_eq!(chunks_total_vsize(&chunks), 4);
+    assert_non_increasing(&chunks);
+}
+
+#[test]
+fn two_disconnected_clusters_would_each_be_separate() {
+    // NOTE: this file tests SFL on a single cluster; multi-cluster
+    // flow is tested via `linearize_clusters` at the higher level.
+    // For a single-cluster test: fan-out of 5 children.
+    let cluster = make_cluster(
+        &[(1, 1), (10, 1), (20, 1), (30, 1), (40, 1), (50, 1)],
+        &[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)],
+    );
+    let chunks = run(&cluster);
+    assert_eq!(chunks_total_fee(&chunks), 151);
+    assert_eq!(chunks_total_vsize(&chunks), 6);
+    assert_non_increasing(&chunks);
+    // Every tx exactly once.
+    let mut seen: Vec<usize> = Vec::new();
+    for ch in &chunks {
+        for &n in &ch.nodes {
+            seen.push(n as usize);
+        }
+    }
+    seen.sort();
+    assert_eq!(seen, vec![0, 1, 2, 3, 4, 5]);
+}
+
+#[test]
+fn wide_fan_in() {
+    // 5 parents → 1 child. Parents at rate 1, child at rate 100.
+    let cluster = make_cluster(
+        &[(1, 1), (1, 1), (1, 1), (1, 1), (1, 1), (100, 1)],
+        &[(0, 5), (1, 5), (2, 5), (3, 5), (4, 5)],
+    );
+    let chunks = run(&cluster);
+    assert_eq!(chunks.len(), 1);
+    assert_eq!(chunks[0].fee, 105);
+    assert_eq!(chunks[0].vsize, 6);
+}
+
+#[test]
+fn shapes_are_stable_on_identical_input() {
+    // Determinism: identical cluster should produce identical chunking.
+    let cluster = make_cluster(
+        &[(1, 1), (100, 1), (1, 1), (100, 1)],
+        &[(0, 1), (1, 2), (2, 3)],
+    );
+    let a = chunk_shapes(&run(&cluster));
+    let b = chunk_shapes(&run(&cluster));
+    assert_eq!(a, b);
+}
+
+// --- helpers ---
+
+fn chunks_total_fee(chunks: &[super::Chunk]) -> u64 {
+    chunks.iter().map(|c| c.fee).sum()
+}
+
+fn chunks_total_vsize(chunks: &[super::Chunk]) -> u64 {
+    chunks.iter().map(|c| c.vsize).sum()
+}
+
+fn assert_non_increasing(chunks: &[super::Chunk]) {
+    for pair in chunks.windows(2) {
+        let a_rate = pair[0].fee as u128 * pair[1].vsize as u128;
+        let b_rate = pair[1].fee as u128 * pair[0].vsize as u128;
+        assert!(
+            a_rate >= b_rate,
+            "chunk feerates not non-increasing: {:?} vs {:?}",
+            (pair[0].fee, pair[0].vsize),
+            (pair[1].fee, pair[1].vsize),
+        );
+    }
+}
--- a/crates/brk_mempool/src/block_builder/linearize/tests/mod.rs
+++ b/crates/brk_mempool/src/block_builder/linearize/tests/mod.rs
@@ -0,0 +1,53 @@
+//! Tests for the SFL linearizer.
+//!
+//! Mirrors Bitcoin Core's `src/test/cluster_linearize_tests.cpp` split:
+//! - `basic`  — hand-built cluster shapes, deterministic assertions.
+//! - `oracle` — brute-force optimality checks for small clusters.
+//! - `stress` — randomized invariant checks for larger clusters.
+
+mod basic;
+mod oracle;
+mod stress;
+
+use smallvec::SmallVec;
+
+use super::sfl::Chunk;
+use super::{Cluster, ClusterNode, LocalIdx, kahn_topo_rank, sfl};
+use crate::types::TxIndex;
+
+/// Build a `Cluster` from `(fee, vsize)` tuples plus a list of
+/// `(parent_local, child_local)` edges. Tx indices are assigned 0..n.
+/// Panics if the graph has a cycle or a bad edge.
+pub(super) fn make_cluster(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) -> Cluster {
+    let mut nodes: Vec<ClusterNode> = fees_vsizes
+        .iter()
+        .enumerate()
+        .map(|(i, &(fee, vsize))| ClusterNode {
+            tx_index: TxIndex::from(i),
+            fee: brk_types::Sats::from(fee),
+            vsize: brk_types::VSize::from(vsize),
+            parents: SmallVec::new(),
+            children: SmallVec::new(),
+        })
+        .collect();
+
+    for &(p, c) in edges {
+        nodes[c as usize].parents.push(p);
+        nodes[p as usize].children.push(c);
+    }
+
+    let topo_rank = kahn_topo_rank(&nodes);
+    Cluster { nodes, topo_rank }
+}
+
+pub(super) fn run(cluster: &Cluster) -> Vec<Chunk> {
+    sfl::linearize(cluster)
+}
+
+/// Shortcut: return `(chunk_size, fee, vsize)` tuples in emitted order.
+pub(super) fn chunk_shapes(chunks: &[Chunk]) -> Vec<(usize, u64, u64)> {
+    chunks
+        .iter()
+        .map(|c| (c.nodes.len(), c.fee, c.vsize))
+        .collect()
+}
--- a/crates/brk_mempool/src/block_builder/linearize/tests/oracle.rs
+++ b/crates/brk_mempool/src/block_builder/linearize/tests/oracle.rs
@@ -0,0 +1,484 @@
+//! Brute-force optimality oracle.
+//!
+//! For small clusters (n ≤ 6), enumerate every topological ordering and
+//! compute the canonical chunking of each. The "best" chunking is the
+//! one whose fee diagram dominates pointwise. SFL must match.
+//!
+//! This file focuses on a handful of hand-picked shapes plus every
+//! topological variant of a few DAGs where ancestor-set-sort would pick
+//! a suboptimal chunking. Exhaustive DAG enumeration is out of scope;
+//! the invariant tests in `stress.rs` cover random shapes.
+
+use super::super::LocalIdx;
+use super::{Chunk, make_cluster, run};
+
+// ---------- oracle ----------
+
+/// Compute the canonical (upper-concave-envelope) chunking of a
+/// linearization expressed as `(fee, vsize)` for each position.
+fn canonical_chunking(path: &[(u64, u64)]) -> Vec<(u64, u64)> {
+    // Start with singletons; repeatedly merge a chunk with its right
+    // neighbour while that improves its feerate (i.e. the merge would
+    // make the earlier chunk have the SAME OR HIGHER rate than a strict
+    // ordering requires). This is the standard left-to-right canonical
+    // chunking pass.
+    let mut chunks: Vec<(u64, u64)> = path.to_vec();
+    let mut changed = true;
+    while changed {
+        changed = false;
+        let mut i = 0;
+        while i + 1 < chunks.len() {
+            let (fa, va) = chunks[i];
+            let (fb, vb) = chunks[i + 1];
+            // Merge if later chunk has strictly higher feerate (would
+            // be out of non-increasing order).
+            if fb as u128 * va as u128 > fa as u128 * vb as u128 {
+                chunks[i] = (fa + fb, va + vb);
+                chunks.remove(i + 1);
+                changed = true;
+            } else {
+                i += 1;
+            }
+        }
+    }
+    chunks
+}
+
+/// All topological orderings of a DAG; Heap's algorithm wouldn't
+/// respect topology, so do an explicit DFS over available-next-sets.
+fn all_topo_orders(parents: &[Vec<LocalIdx>]) -> Vec<Vec<LocalIdx>> {
+    let n = parents.len();
+    let indegree: Vec<u32> = parents.iter().map(|p| p.len() as u32).collect();
+    let children: Vec<Vec<LocalIdx>> = {
+        let mut out = vec![Vec::new(); n];
+        for (c, ps) in parents.iter().enumerate() {
+            for &p in ps {
+                out[p as usize].push(c as LocalIdx);
+            }
+        }
+        out
+    };
+
+    let mut results = Vec::new();
+    let mut current: Vec<LocalIdx> = Vec::new();
+    let mut indeg = indegree.clone();
+    walk(&children, &mut indeg, &mut current, n, &mut results);
+    return results;
+
+    fn walk(
+        children: &[Vec<LocalIdx>],
+        indeg: &mut [u32],
+        current: &mut Vec<LocalIdx>,
+        n: usize,
+        out: &mut Vec<Vec<LocalIdx>>,
+    ) {
+        if current.len() == n {
+            out.push(current.clone());
+            return;
+        }
+        let ready: Vec<LocalIdx> = (0..n as LocalIdx)
+            .filter(|&i| indeg[i as usize] == 0)
+            .collect();
+        for v in ready {
+            indeg[v as usize] = u32::MAX; // mark unavailable
+            current.push(v);
+            for &c in &children[v as usize] {
+                indeg[c as usize] -= 1;
+            }
+            walk(children, indeg, current, n, out);
+            current.pop();
+            for &c in &children[v as usize] {
+                indeg[c as usize] += 1;
+            }
+            indeg[v as usize] = 0; // restore
+        }
+    }
+}
+
+/// Best canonical chunking over all topological orderings of
+/// `(fees_vsizes, edges)`. "Best" = lexicographic dominance of the
+/// sequence of `(fee, vsize)` per chunk (earlier chunks weigh more).
+fn oracle_best(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) -> Vec<(u64, u64)> {
+    let n = fees_vsizes.len();
+    let mut parents = vec![Vec::new(); n];
+    for &(p, c) in edges {
+        parents[c as usize].push(p);
+    }
+
+    let mut best: Option<Vec<(u64, u64)>> = None;
+    for order in all_topo_orders(&parents) {
+        let path: Vec<(u64, u64)> = order.iter().map(|&i| fees_vsizes[i as usize]).collect();
+        let chunking = canonical_chunking(&path);
+        best = Some(match best {
+            None => chunking,
+            Some(cur) => {
+                if dominates(&chunking, &cur) {
+                    chunking
+                } else {
+                    cur
+                }
+            }
+        });
+    }
+    best.expect("at least one topological order")
+}
+
+/// `a` dominates `b` iff its cumulative-fee-at-vsize curve sits at
+/// or above `b`'s everywhere along the combined vsize axis.
+fn dominates(a: &[(u64, u64)], b: &[(u64, u64)]) -> bool {
+    // Compare pointwise at each "breakpoint" of either curve.
+    let a_points = cumulative(a);
+    let b_points = cumulative(b);
+    let total_vsize = a_points.last().map(|p| p.0).unwrap_or(0);
+    debug_assert_eq!(total_vsize, b_points.last().map(|p| p.0).unwrap_or(0));
+    for v in 1..=total_vsize {
+        let fa = fee_at(&a_points, v);
+        let fb = fee_at(&b_points, v);
+        if fa < fb {
+            return false;
+        }
+        if fa > fb {
+            return true; // strictly better somewhere; dominates
+        }
+    }
+    // Identical curves — neither dominates strictly; treat as domination
+    // (for "best" bookkeeping it's a tie and the first-seen wins).
+    true
+}
+
+fn cumulative(chunks: &[(u64, u64)]) -> Vec<(u64, u64)> {
+    let mut out = Vec::with_capacity(chunks.len() + 1);
+    let mut v = 0u64;
+    let mut f = 0u64;
+    out.push((0, 0));
+    for &(fee, vsize) in chunks {
+        v += vsize;
+        f += fee;
+        out.push((v, f));
+    }
+    out
+}
+
+fn fee_at(cum: &[(u64, u64)], v: u64) -> u128 {
+    // Linear interpolation between breakpoints; but since chunks are
+    // atomic, we instead compute the straight-line fee at exactly
+    // cumulative vsize positions by walking chunks.
+    for pair in cum.windows(2) {
+        let (v0, f0) = pair[0];
+        let (v1, f1) = pair[1];
+        if v <= v1 {
+            // within this chunk: linear from (v0, f0) to (v1, f1).
+            let dv = v1 - v0;
+            if dv == 0 {
+                return f0 as u128;
+            }
+            let df = f1 - f0;
+            return f0 as u128 + (df as u128) * ((v - v0) as u128) / (dv as u128);
+        }
+    }
+    cum.last().map(|&(_, f)| f as u128).unwrap_or(0)
+}
+
+fn chunk_rate(chunks: &[Chunk]) -> Vec<(u64, u64)> {
+    chunks.iter().map(|c| (c.fee, c.vsize)).collect()
+}
+
+/// Assert that SFL's output matches the oracle fee diagram.
+fn assert_matches_oracle(fees_vsizes: &[(u64, u64)], edges: &[(LocalIdx, LocalIdx)]) {
+    let cluster = make_cluster(fees_vsizes, edges);
+    let chunks = run(&cluster);
+    let got = chunk_rate(&chunks);
+    let want = oracle_best(fees_vsizes, edges);
+
+    let got_cum = cumulative(&got);
+    let want_cum = cumulative(&want);
+    let total = got_cum.last().unwrap().0;
+    assert_eq!(total, want_cum.last().unwrap().0, "total vsize mismatch");
+
+    for v in 1..=total {
+        let fa = fee_at(&got_cum, v);
+        let fb = fee_at(&want_cum, v);
+        assert!(
+            fa >= fb,
+            "SFL diagram below oracle at vsize {}: got {} want {}\n  got={:?}\n  want={:?}",
+            v,
+            fa,
+            fb,
+            got,
+            want,
+        );
+    }
+}
+
+// ---------- tests ----------
+
+#[test]
+fn oracle_singleton() {
+    assert_matches_oracle(&[(100, 10)], &[]);
+}
+
+#[test]
+fn oracle_chain_cpfp() {
+    assert_matches_oracle(&[(1, 10), (100, 1)], &[(0, 1)]);
+}
+
+#[test]
+fn oracle_chain_parent_richer() {
+    assert_matches_oracle(&[(100, 10), (1, 1)], &[(0, 1)]);
+}
+
+#[test]
+fn oracle_v_shape() {
+    assert_matches_oracle(&[(1, 1), (1, 1), (100, 1)], &[(0, 2), (1, 2)]);
+}
+
+#[test]
+fn oracle_lambda_non_ancestor_beats_ancestor() {
+    // The "non-ancestor-set wins" case: SFL should match the oracle's
+    // single-chunk optimum at rate 11/3.
+    assert_matches_oracle(&[(1, 1), (5, 1), (5, 1)], &[(0, 1), (0, 2)]);
+}
+
+#[test]
+fn oracle_diamond() {
+    assert_matches_oracle(
+        &[(1, 1), (1, 1), (1, 1), (100, 1)],
+        &[(0, 1), (0, 2), (1, 3), (2, 3)],
+    );
+}
+
+#[test]
+fn oracle_tree_depth_3() {
+    // A → B → D, A → C → E. Leaves pay.
+    assert_matches_oracle(
+        &[(1, 1), (1, 1), (1, 1), (100, 1), (100, 1)],
+        &[(0, 1), (0, 2), (1, 3), (2, 4)],
+    );
+}
+
+#[test]
+fn oracle_branching_with_cheap_sibling() {
+    // A(1) → B(50), A → C(100). SFL's expected optimum: single chunk.
+    assert_matches_oracle(&[(1, 1), (50, 1), (100, 1)], &[(0, 1), (0, 2)]);
+}
+
+#[test]
+fn oracle_four_chain_alternating() {
+    // Alternating rates; brute force up to 6-tx.
+    assert_matches_oracle(
+        &[(10, 1), (1, 1), (10, 1), (1, 1)],
+        &[(0, 1), (1, 2), (2, 3)],
+    );
+}
+
+// ---------- exhaustive random DAG sweep ----------
+//
+// Enumerate random DAG shapes up to n=8 (40320 topo-orders max per DAG)
+// and check merge-only's output matches the brute-force optimum. Runs
+// thousands of cases; catches tie-break pathologies the hand-picked
+// shapes above might miss.
+
+struct DagRng(u64);
+impl DagRng {
+    fn new(seed: u64) -> Self {
+        Self(seed | 1)
+    }
+    fn next(&mut self) -> u64 {
+        let mut x = self.0;
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        self.0 = x;
+        x
+    }
+    fn range(&mut self, n: u64) -> u64 {
+        if n == 0 { 0 } else { self.next() % n }
+    }
+}
+
+/// Random DAG with `n` nodes: each node i > 0 has 0-3 parents drawn
+/// uniformly from nodes {0..i}. Fees/vsizes are varied.
+fn random_dag(n: usize, seed: u64) -> (Vec<(u64, u64)>, Vec<(LocalIdx, LocalIdx)>) {
+    let mut rng = DagRng::new(seed);
+    let fees_vsizes: Vec<(u64, u64)> = (0..n)
+        .map(|_| {
+            let fee = 1 + rng.range(200);
+            let vsize = 1 + rng.range(5);
+            (fee, vsize)
+        })
+        .collect();
+    let mut edges = Vec::new();
+    for i in 1..n {
+        let k = rng.range(4) as usize;
+        let mut picks: Vec<LocalIdx> = Vec::new();
+        for _ in 0..k {
+            let p = rng.range(i as u64) as LocalIdx;
+            if !picks.contains(&p) {
+                picks.push(p);
+            }
+        }
+        for p in picks {
+            edges.push((p, i as LocalIdx));
+        }
+    }
+    (fees_vsizes, edges)
+}
+
+fn assert_optimal_on_random(n: usize, seed: u64) {
+    let (fv, edges) = random_dag(n, seed);
+    let cluster = super::make_cluster(&fv, &edges);
+    let chunks = super::run(&cluster);
+    let got = chunk_rate(&chunks);
+
+    let want = oracle_best(&fv, &edges);
+
+    let got_cum = cumulative(&got);
+    let want_cum = cumulative(&want);
+    let total = got_cum.last().unwrap().0;
+    assert_eq!(total, want_cum.last().unwrap().0);
+
+    for v in 1..=total {
+        let fa = fee_at(&got_cum, v);
+        let fb = fee_at(&want_cum, v);
+        assert!(
+            fa >= fb,
+            "merge-only suboptimal (n={}, seed={})\n  fv = {:?}\n  edges = {:?}\n  got = {:?}\n  want = {:?}\n  at vsize {}: got {}, want {}",
+            n,
+            seed,
+            fv,
+            edges,
+            got,
+            want,
+            v,
+            fa,
+            fb,
+        );
+    }
+}
+
+/// Check whether an algorithm's output matches the brute-force optimum.
+/// Returns Some(max_gap_at_any_vsize) if suboptimal, None if optimal.
+fn optimality_gap_of(got: &[(u64, u64)], want: &[(u64, u64)]) -> Option<u128> {
+    let got_cum = cumulative(got);
+    let want_cum = cumulative(want);
+    let total = got_cum.last().unwrap().0;
+    debug_assert_eq!(total, want_cum.last().unwrap().0);
+
+    let mut worst_gap: u128 = 0;
+    for v in 1..=total {
+        let fa = fee_at(&got_cum, v);
+        let fb = fee_at(&want_cum, v);
+        if fb > fa {
+            worst_gap = worst_gap.max(fb - fa);
+        }
+    }
+    if worst_gap == 0 { None } else { Some(worst_gap) }
+}
+
+/// Gap for the production linearizer on one random DAG.
+fn optimality_gap(n: usize, seed: u64) -> Option<u128> {
+    let (fv, edges) = random_dag(n, seed);
+    let cluster = super::make_cluster(&fv, &edges);
+    let chunks = super::super::sfl::linearize(&cluster);
+    let got: Vec<(u64, u64)> = chunks.iter().map(|c| (c.fee, c.vsize)).collect();
+    let want = oracle_best(&fv, &edges);
+    optimality_gap_of(&got, &want)
+}
+
+/// Diagnostic sweep: report the linearizer's optimality gap on random DAGs.
+#[test]
+#[ignore = "diagnostic sweep; run with --ignored to print stats"]
+fn oracle_random_sweep_stats() {
+    let sizes: &[(usize, u64, u64)] = &[
+        (4, 500, 1),
+        (5, 500, 1_000),
+        (6, 300, 2_000),
+        (7, 100, 3_000),
+        (8, 50, 4_000),
+    ];
+
+    eprintln!();
+    eprintln!("Optimality sweep (random DAGs vs brute-force optimum):");
+    eprintln!("  n   cases     sub   max-gap");
+    eprintln!("  ---------------------------");
+
+    let mut total = 0usize;
+    let mut cases_total = 0usize;
+    for &(n, count, base) in sizes {
+        let mut sub = 0;
+        let mut gap: u128 = 0;
+        for seed in 0..count {
+            let s = seed.wrapping_add(base);
+            if let Some(g) = optimality_gap(n, s) {
+                sub += 1;
+                gap = gap.max(g);
+            }
+        }
+        total += sub;
+        cases_total += count as usize;
+        eprintln!("  {}   {:5}     {:3}     {:4}", n, count, sub, gap);
+    }
+    eprintln!("  ---------------------------");
+    let pct = (total as f64 / cases_total as f64) * 100.0;
+    eprintln!("  totals {:4}   {:3} ({:.1}%)", cases_total, total, pct);
+    eprintln!();
+}
+
+/// Perf benchmark across cluster sizes. Run with
+/// `cargo test -p brk_mempool perf_linearize --release -- --ignored --nocapture`.
+#[test]
+#[ignore = "perf benchmark; run with --ignored --nocapture"]
+fn perf_linearize() {
+    use std::time::Instant;
+
+    let sizes: &[(usize, u64)] = &[
+        (2, 5_000),
+        (5, 5_000),
+        (10, 2_000),
+        (15, 1_000),
+        (18, 500),
+        (20, 500),
+        (30, 200),
+        (50, 100),
+        (75, 50),
+        (100, 30),
+    ];
+
+    eprintln!();
+    eprintln!("Linearize perf (release, per-call avg):");
+    eprintln!("  n    calls     avg       total");
+    eprintln!("  -------------------------------------");
+
+    for &(n, calls) in sizes {
+        let clusters: Vec<_> = (0..calls)
+            .map(|s| {
+                let (fv, edges) = random_dag(n, s + 77);
+                super::make_cluster(&fv, &edges)
+            })
+            .collect();
+
+        let t = Instant::now();
+        let mut sink = 0u64;
+        for c in &clusters {
+            for chunk in super::super::sfl::linearize(c) {
+                sink = sink.wrapping_add(chunk.fee);
+            }
+        }
+        let elapsed = t.elapsed();
+        let _ = sink;
+
+        let avg_ns = elapsed.as_nanos() / calls as u128;
+        let pretty = if avg_ns >= 1_000_000 {
+            format!("{:.2} ms", avg_ns as f64 / 1_000_000.0)
+        } else if avg_ns >= 1_000 {
+            format!("{:.2} µs", avg_ns as f64 / 1_000.0)
+        } else {
+            format!("{} ns", avg_ns)
+        };
+        eprintln!(
+            "  {:<4} {:<8}  {:<10} {:.2?}",
+            n, calls, pretty, elapsed
+        );
+    }
+    eprintln!();
+}
--- a/crates/brk_mempool/src/block_builder/linearize/tests/stress.rs
+++ b/crates/brk_mempool/src/block_builder/linearize/tests/stress.rs
@@ -0,0 +1,186 @@
+//! Randomized invariant tests.
+//!
+//! Generates random DAGs up to size 30 with varied fee rates and
+//! verifies SFL's output respects:
+//!   1. Every node appears in exactly one chunk.
+//!   2. Each chunk is topologically closed (no intra-cluster parent
+//!      of a chunk member lies in a later-emitted chunk).
+//!   3. Chunk feerates are non-increasing along emission order.
+
+use super::super::LocalIdx;
+use super::{make_cluster, run};
+
+/// Tiny deterministic xorshift so tests are reproducible.
+struct Rng(u64);
+impl Rng {
+    fn new(seed: u64) -> Self {
+        Self(seed | 1)
+    }
+    fn next_u64(&mut self) -> u64 {
+        let mut x = self.0;
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        self.0 = x;
+        x
+    }
+    fn range(&mut self, n: u64) -> u64 {
+        self.next_u64() % n
+    }
+}
+
+/// Build a random DAG with `n` nodes. For each node `i` > 0, add a
+/// random number of parents from nodes with index < i (guarantees
+/// acyclic). Fee and vsize are random in a small range.
+fn random_cluster(n: usize, seed: u64) -> (Vec<(u64, u64)>, Vec<(LocalIdx, LocalIdx)>) {
+    let mut rng = Rng::new(seed);
+    let mut fees_vsizes = Vec::with_capacity(n);
+    for _ in 0..n {
+        let fee = 1 + rng.range(1000);
+        let vsize = 1 + rng.range(100);
+        fees_vsizes.push((fee, vsize));
+    }
+
+    let mut edges = Vec::new();
+    for i in 1..n {
+        // 0-3 parents, each picked uniformly from earlier nodes.
+        let k = rng.range(4) as usize;
+        let mut picks: Vec<LocalIdx> = Vec::new();
+        for _ in 0..k {
+            let p = rng.range(i as u64) as LocalIdx;
+            if !picks.contains(&p) {
+                picks.push(p);
+            }
+        }
+        for p in picks {
+            edges.push((p, i as LocalIdx));
+        }
+    }
+
+    (fees_vsizes, edges)
+}
+
+fn check_invariants(
+    fees_vsizes: &[(u64, u64)],
+    edges: &[(LocalIdx, LocalIdx)],
+    chunks: &[super::Chunk],
+) {
+    let n = fees_vsizes.len();
+
+    // (1) Each node in exactly one chunk.
+    let mut seen = vec![false; n];
+    for chunk in chunks {
+        for &local in &chunk.nodes {
+            assert!(
+                !seen[local as usize],
+                "node {} appears in multiple chunks",
+                local
+            );
+            seen[local as usize] = true;
+        }
+    }
+    for (i, s) in seen.iter().enumerate() {
+        assert!(*s, "node {} missing from all chunks", i);
+    }
+
+    // Chunk aggregates match declared totals.
+    for chunk in chunks {
+        let fee: u64 = chunk.nodes.iter().map(|&l| fees_vsizes[l as usize].0).sum();
+        let vsize: u64 = chunk.nodes.iter().map(|&l| fees_vsizes[l as usize].1).sum();
+        assert_eq!(chunk.fee, fee, "chunk fee mismatch");
+        assert_eq!(chunk.vsize, vsize, "chunk vsize mismatch");
+    }
+
+    // (2) Chunks are topologically closed in emission order: a parent
+    //     in cluster must be in the same or earlier chunk.
+    let chunk_of: Vec<usize> = {
+        let mut out = vec![usize::MAX; n];
+        for (ci, chunk) in chunks.iter().enumerate() {
+            for &local in &chunk.nodes {
+                out[local as usize] = ci;
+            }
+        }
+        out
+    };
+    for &(p, c) in edges {
+        let cp = chunk_of[p as usize];
+        let cc = chunk_of[c as usize];
+        assert!(
+            cp <= cc,
+            "parent {} in chunk {} but child {} in earlier chunk {}",
+            p,
+            cp,
+            c,
+            cc
+        );
+    }
+
+    // (3) Non-increasing chunk feerates in emission order.
+    for pair in chunks.windows(2) {
+        let a = pair[0].fee as u128 * pair[1].vsize as u128;
+        let b = pair[1].fee as u128 * pair[0].vsize as u128;
+        assert!(
+            a >= b,
+            "chunk feerates not non-increasing: {}/{} then {}/{}",
+            pair[0].fee,
+            pair[0].vsize,
+            pair[1].fee,
+            pair[1].vsize,
+        );
+    }
+}
+
+#[test]
+fn random_small_clusters() {
+    for seed in 0..200u64 {
+        let n = 2 + (seed % 10) as usize;
+        let (fv, edges) = random_cluster(n, seed.wrapping_add(1));
+        let cluster = make_cluster(&fv, &edges);
+        let chunks = run(&cluster);
+        check_invariants(&fv, &edges, &chunks);
+    }
+}
+
+#[test]
+fn random_medium_clusters() {
+    for seed in 0..50u64 {
+        let n = 10 + (seed % 20) as usize;
+        let (fv, edges) = random_cluster(n, seed.wrapping_add(100));
+        let cluster = make_cluster(&fv, &edges);
+        let chunks = run(&cluster);
+        check_invariants(&fv, &edges, &chunks);
+    }
+}
+
+#[test]
+fn random_large_clusters() {
+    for seed in 0..10u64 {
+        let (fv, edges) = random_cluster(30, seed.wrapping_add(1000));
+        let cluster = make_cluster(&fv, &edges);
+        let chunks = run(&cluster);
+        check_invariants(&fv, &edges, &chunks);
+    }
+}
+
+#[test]
+fn determinism_same_seed_same_output() {
+    let (fv, edges) = random_cluster(15, 42);
+    let cluster = make_cluster(&fv, &edges);
+    let a: Vec<(u64, u64)> = run(&cluster).iter().map(|c| (c.fee, c.vsize)).collect();
+    let b: Vec<(u64, u64)> = run(&cluster).iter().map(|c| (c.fee, c.vsize)).collect();
+    assert_eq!(a, b);
+}
+
+/// Exercise the perf path: large clusters with many edges. If any
+/// individual call exceeds a generous budget we'd know SFL is slow for
+/// realistic workloads.
+#[test]
+fn random_cluster_at_policy_limit() {
+    for seed in 0..5u64 {
+        // 100-tx cluster approximates Bitcoin Core's cluster policy cap.
+        let (fv, edges) = random_cluster(100, seed.wrapping_add(9000));
+        let cluster = make_cluster(&fv, &edges);
+        let chunks = run(&cluster);
+        check_invariants(&fv, &edges, &chunks);
+    }
+}
--- a/crates/brk_mempool/src/block_builder/mod.rs
+++ b/crates/brk_mempool/src/block_builder/mod.rs
@@ -1,8 +1,7 @@
 mod graph;
-mod heap_entry;
+mod linearize;
 mod package;
 mod partitioner;
-mod selector;
 mod tx_node;

 pub use package::Package;
@@ -22,12 +21,12 @@ const NUM_BLOCKS: usize = 8;
 /// descending; the final block is a catch-all containing every remaining
 /// package (matches mempool.space behavior).
 pub fn build_projected_blocks(entries: &[Option<Entry>]) -> Vec<Vec<Package>> {
-    let mut graph = graph::build_graph(entries);
+    let graph = graph::build_graph(entries);

    if graph.is_empty() {
        return Vec::new();
    }

-    let packages = selector::select_packages(&mut graph);
+    let packages = linearize::linearize_clusters(&graph);
    partitioner::partition_into_blocks(packages, NUM_BLOCKS)
 }
--- a/crates/brk_mempool/src/block_builder/package.rs
+++ b/crates/brk_mempool/src/block_builder/package.rs
@@ -2,24 +2,18 @@ use brk_types::FeeRate;

 use crate::types::TxIndex;

-/// A CPFP package: transactions the selector decided to mine together
+/// A CPFP package: transactions the linearizer decided to mine together
 /// because a child pays for its parent.
 ///
-/// Carries two rates:
-/// - `fee_rate` is the package's own rate (sum of fees / sum of vsizes),
-///   i.e. what a miner collects per vsize when the package is mined.
-///   Used for per-tx fee stats and user-facing recommendations.
-/// - `placement_rate` is the key the partitioner sorts by. It's the own
-///   rate clamped below by the `placement_rate` of any ancestor packages,
-///   so that sorting packages by this rate descending keeps dependent
-///   packages in topological order even when a child's own rate exceeds
-///   its parent's (possible in branching CPFP).
+/// `fee_rate` is the package's own rate (sum of fees / sum of vsizes),
+/// i.e. what a miner collects per vsize when the package is mined.
+/// Packages are produced by SFL in descending-`fee_rate` order within a
+/// cluster and are atomic (all-or-nothing) at mining time.
 pub struct Package {
    /// Transactions in topological order (parents before children).
    pub txs: Vec<TxIndex>,
    pub vsize: u64,
    pub fee_rate: FeeRate,
-    pub placement_rate: FeeRate,
 }

 impl Package {
@@ -28,7 +22,6 @@ impl Package {
            txs: Vec::new(),
            vsize: 0,
            fee_rate,
-            placement_rate: fee_rate,
        }
    }

--- a/crates/brk_mempool/src/block_builder/partitioner.rs
+++ b/crates/brk_mempool/src/block_builder/partitioner.rs
@@ -5,7 +5,7 @@ use super::{BLOCK_VSIZE, package::Package};
 /// How many packages to look ahead when the current one doesn't fit.
 const LOOK_AHEAD_COUNT: usize = 100;

-/// Partition packages into blocks by placement rate.
+/// Partition packages into blocks by fee rate.
 ///
 /// The first `num_blocks - 1` blocks are packed greedily into ~`BLOCK_VSIZE`
 /// chunks. The final block is a catch-all containing every remaining
@@ -15,10 +15,11 @@ pub fn partition_into_blocks(
    mut packages: Vec<Package>,
    num_blocks: usize,
 ) -> Vec<Vec<Package>> {
-    // Stable sort for deterministic output across equal placement rates.
-    // Topology across dependent packages is already enforced by the
-    // placement_rate cap in the selector.
-    packages.sort_by_key(|p| Reverse(p.placement_rate));
+    // Stable sort for deterministic output across equal fee rates. SFL
+    // guarantees chunks within a cluster come in non-increasing rate
+    // order, so stable sorting by fee_rate preserves intra-cluster
+    // topology automatically.
+    packages.sort_by_key(|p| Reverse(p.fee_rate));

    let mut slots: Vec<Option<Package>> = packages.into_iter().map(Some).collect();
    let mut blocks: Vec<Vec<Package>> = Vec::with_capacity(num_blocks);
--- a/crates/brk_mempool/src/block_builder/selector.rs
+++ b/crates/brk_mempool/src/block_builder/selector.rs
@@ -1,121 +0,0 @@
-use std::collections::BinaryHeap;
-
-use brk_types::FeeRate;
-use rustc_hash::FxHashSet;
-use smallvec::SmallVec;
-
-use super::{graph::Graph, heap_entry::HeapEntry, package::Package};
-use crate::types::PoolIndex;
-
-/// Sentinel for `package_of` entries that haven't been placed in a package yet.
-const UNASSIGNED: u32 = u32::MAX;
-
-/// Select transactions from the graph and group them into CPFP packages,
-/// running until every unselected tx has been placed into a package.
-pub fn select_packages(graph: &mut Graph) -> Vec<Package> {
-    let mut packages: Vec<Package> = Vec::new();
-    let mut package_of: Vec<u32> = vec![UNASSIGNED; graph.len()];
-
-    let mut heap: BinaryHeap<HeapEntry> = (0..graph.len())
-        .map(|i| HeapEntry::new(&graph[PoolIndex::from(i)]))
-        .collect();
-
-    while let Some(entry) = heap.pop() {
-        let node = &graph[entry.pool_index];
-        if node.selected || entry.generation != node.generation {
-            continue;
-        }
-
-        let own_rate = FeeRate::from((node.ancestor_fee, node.ancestor_vsize));
-        let package_idx = packages.len() as u32;
-        let mut package = Package::new(own_rate);
-
-        for pool_idx in select_with_ancestors(graph, entry.pool_index) {
-            let tx = &graph[pool_idx];
-            package.add_tx(tx.tx_index, u64::from(tx.vsize));
-            package_of[pool_idx.as_usize()] = package_idx;
-
-            // Cap placement_rate by any ancestor packages this tx depends on.
-            // select_with_ancestors returns parents before children, so a
-            // parent sitting in this same package already has package_of
-            // set to package_idx; only parents in earlier packages matter.
-            for &parent in &tx.parents {
-                let parent_pkg = package_of[parent.as_usize()];
-                if parent_pkg != package_idx && parent_pkg != UNASSIGNED {
-                    package.placement_rate = package
-                        .placement_rate
-                        .min(packages[parent_pkg as usize].placement_rate);
-                }
-            }
-
-            update_descendants(graph, pool_idx, &mut heap);
-        }
-
-        packages.push(package);
-    }
-
-    packages
-}
-
-/// Return `pool_idx` and all its unselected ancestors in topological order
-/// (parents before children), marking each one selected as we go.
-fn select_with_ancestors(graph: &mut Graph, pool_idx: PoolIndex) -> SmallVec<[PoolIndex; 8]> {
-    let mut result: SmallVec<[PoolIndex; 8]> = SmallVec::new();
-    let mut stack: SmallVec<[(PoolIndex, bool); 16]> = smallvec::smallvec![(pool_idx, false)];
-
-    while let Some((idx, parents_done)) = stack.pop() {
-        if graph[idx].selected {
-            continue;
-        }
-
-        if parents_done {
-            graph[idx].selected = true;
-            result.push(idx);
-        } else {
-            stack.push((idx, true));
-            for &parent in &graph[idx].parents {
-                if !graph[parent].selected {
-                    stack.push((parent, false));
-                }
-            }
-        }
-    }
-
-    result
-}
-
-/// Subtract the selected tx's fee and vsize from every unselected
-/// descendant's ancestor totals, and re-push updated entries to the heap.
-fn update_descendants(
-    graph: &mut Graph,
-    selected_idx: PoolIndex,
-    heap: &mut BinaryHeap<HeapEntry>,
-) {
-    let selected_fee = graph[selected_idx].fee;
-    let selected_vsize = graph[selected_idx].vsize;
-
-    // Track visited to avoid double-updates in diamond patterns
-    let mut visited: FxHashSet<PoolIndex> = FxHashSet::default();
-    let mut stack: SmallVec<[PoolIndex; 16]> =
-        graph[selected_idx].children.iter().copied().collect();
-
-    while let Some(child_idx) = stack.pop() {
-        if !visited.insert(child_idx) {
-            continue;
-        }
-
-        let child = &mut graph[child_idx];
-
-        // Walk through selected intermediates: descendants behind them still
-        // need their ancestor totals reduced, otherwise CPFP chains with
-        // already-selected parents keep inflated scores and get split.
-        if !child.selected {
-            child.ancestor_fee -= selected_fee;
-            child.ancestor_vsize -= selected_vsize;
-            child.generation += 1;
-            heap.push(HeapEntry::new(child));
-        }
-
-        stack.extend(child.children.iter().copied());
-    }
-}
--- a/crates/brk_mempool/src/block_builder/tx_node.rs
+++ b/crates/brk_mempool/src/block_builder/tx_node.rs
@@ -7,57 +7,30 @@ use crate::types::{PoolIndex, TxIndex};
 ///
 /// Created fresh for each block building cycle, then discarded.
 pub struct TxNode {
-    /// Index into mempool entries (for final output)
+    /// Index into mempool entries (carried into the final `Package`).
    pub tx_index: TxIndex,

-    /// Index in the graph pool
-    pub pool_index: PoolIndex,
-
-    /// Transaction fee
+    /// Transaction fee.
    pub fee: Sats,

-    /// Transaction virtual size
+    /// Transaction virtual size.
    pub vsize: VSize,

-    /// Parent transactions (dependencies)
+    /// Parent transactions (dependencies).
    pub parents: SmallVec<[PoolIndex; 4]>,

-    /// Child transactions (dependents)
+    /// Child transactions (dependents).
    pub children: SmallVec<[PoolIndex; 8]>,
-
-    /// Cumulative fee (self + all ancestors)
-    pub ancestor_fee: Sats,
-
-    /// Cumulative vsize (self + all ancestors)
-    pub ancestor_vsize: VSize,
-
-    /// Whether this tx has been selected
-    pub selected: bool,
-
-    /// Generation counter for heap staleness detection
-    pub generation: u32,
 }

 impl TxNode {
-    pub fn new(
-        tx_index: TxIndex,
-        pool_index: PoolIndex,
-        fee: Sats,
-        vsize: VSize,
-        ancestor_fee: Sats,
-        ancestor_vsize: VSize,
-    ) -> Self {
+    pub fn new(tx_index: TxIndex, fee: Sats, vsize: VSize) -> Self {
        Self {
            tx_index,
-            pool_index,
            fee,
            vsize,
            parents: SmallVec::new(),
            children: SmallVec::new(),
-            ancestor_fee,
-            ancestor_vsize,
-            selected: false,
-            generation: 0,
        }
    }
 }
--- a/crates/brk_mempool/src/sync.rs
+++ b/crates/brk_mempool/src/sync.rs
@@ -14,7 +14,7 @@ use brk_error::Result;
 use brk_rpc::Client;
 use brk_types::{
    AddrBytes, BlockHash, MempoolEntryInfo, MempoolInfo, Timestamp, Transaction, TxIn, TxOut,
-    TxStatus, TxWithHex, Txid, TxidPrefix, VSize, Vout,
+    TxStatus, Txid, TxidPrefix, VSize, Vout,
 };
 use derive_more::Deref;
 use parking_lot::{RwLock, RwLockReadGuard};
@@ -28,6 +28,7 @@ use crate::{
    entry_pool::EntryPool,
    projected_blocks::{BlockStats, RecommendedFees, Snapshot},
    tx_store::TxStore,
+    types::TxWithHex,
 };

 /// Max new txs to fetch full data for per update cycle (for address tracking).
--- a/crates/brk_mempool/src/tx_store.rs
+++ b/crates/brk_mempool/src/tx_store.rs
@@ -1,7 +1,9 @@
-use brk_types::{MempoolRecentTx, TxWithHex, Txid};
+use brk_types::{MempoolRecentTx, Txid};
 use derive_more::Deref;
 use rustc_hash::FxHashMap;

+use crate::types::TxWithHex;
+
 const RECENT_CAP: usize = 10;

 /// Store of full transaction data for API access.
--- a/crates/brk_mempool/src/types/mod.rs
+++ b/crates/brk_mempool/src/types/mod.rs
@@ -1,5 +1,7 @@
 mod pool_index;
 mod tx_index;
+mod tx_with_hex;

 pub use pool_index::PoolIndex;
 pub use tx_index::TxIndex;
+pub use tx_with_hex::TxWithHex;
--- a/crates/brk_mempool/src/types/tx_index.rs
+++ b/crates/brk_mempool/src/types/tx_index.rs
@@ -15,3 +15,10 @@ impl From<usize> for TxIndex {
        Self(value as u32)
    }
 }
+
+impl From<TxIndex> for u64 {
+    #[inline]
+    fn from(value: TxIndex) -> Self {
+        u64::from(value.0)
+    }
+}
--- a/crates/brk_mempool/src/types/tx_with_hex.rs
+++ b/crates/brk_mempool/src/types/tx_with_hex.rs
@@ -0,0 +1,26 @@
+use brk_types::Transaction;
+
+/// A transaction with its raw hex representation
+#[derive(Debug, Clone)]
+pub struct TxWithHex {
+    tx: Transaction,
+    hex: String,
+}
+
+impl TxWithHex {
+    pub fn new(tx: Transaction, hex: String) -> Self {
+        Self { tx, hex }
+    }
+
+    pub fn tx(&self) -> &Transaction {
+        &self.tx
+    }
+
+    pub fn hex(&self) -> &str {
+        &self.hex
+    }
+
+    pub fn into_parts(self) -> (Transaction, String) {
+        (self.tx, self.hex)
+    }
+}