global: snapshot

This commit is contained in:
nym21
2026-01-14 16:38:53 +01:00
parent ddb1db7a8e
commit d75c2a881b
226 changed files with 7776 additions and 20942 deletions

View File

@@ -187,12 +187,14 @@ impl Vecs {
continue;
}
// Check witness size (SegWit era only, activated Aug 2017)
// Check witness size per input (SegWit era only, activated Aug 2017)
// Pre-SegWit transactions have no witness data
// Python checks each input's witness ≤ 500 bytes; we approximate with average
if cached_year >= 2017 {
let base_size: StoredU32 = txindex_to_base_size_iter.get_at_unwrap(txindex);
let total_size: StoredU32 = txindex_to_total_size_iter.get_at_unwrap(txindex);
if *total_size - *base_size > 500 {
let witness_size = *total_size - *base_size;
if witness_size / *input_count as u32 > 500 {
continue;
}
}
@@ -379,10 +381,9 @@ impl Vecs {
}
};
self.ohlc_cents
.truncate_push_at(dateindex.to_usize(), ohlc)?;
self.ohlc_cents.truncate_push(dateindex, ohlc)?;
self.tx_count
.truncate_push_at(dateindex.to_usize(), StoredU32::from(tx_count))?;
.truncate_push(dateindex, StoredU32::from(tx_count))?;
}
// Write daily data

View File

@@ -48,11 +48,12 @@ impl OracleConfig {
blocks_per_window: 144, // ~1 day
min_tx_count: 1000,
},
// 2017+: Modern era ($1,000 - $1,000,000+)
// 2017+: Modern era ($10,000 - $500,000)
// Matches Python's slide range of -141 to 201
_ => Self {
min_price_cents: 100_000, // $1,000
max_price_cents: 100_000_000, // $1,000,000
blocks_per_window: 144, // ~1 day
min_price_cents: 1_000_000, // $10,000 (gives max_slide = 200)
max_price_cents: 50_000_000, // $500,000 (gives min_slide ≈ -140)
blocks_per_window: 144, // ~1 day
min_tx_count: 2000,
},
}
@@ -90,9 +91,10 @@ mod tests {
#[test]
fn test_config_for_year() {
// 2017+ config matches Python: $10,000 to $500,000
let c2020 = OracleConfig::for_year(2020);
assert_eq!(c2020.min_price_cents, 100_000);
assert_eq!(c2020.max_price_cents, 100_000_000);
assert_eq!(c2020.min_price_cents, 1_000_000);
assert_eq!(c2020.max_price_cents, 50_000_000);
let c2015 = OracleConfig::for_year(2015);
assert_eq!(c2015.min_price_cents, 10_000);
@@ -101,13 +103,13 @@ mod tests {
#[test]
fn test_slide_range() {
// 2024 config: $1,000 to $1,000,000
// 2024 config: $10,000 to $500,000 (matches Python's -141 to 201)
let config = OracleConfig::for_year(2024);
let (min, max) = config.slide_range();
// $1,000,000 = 10^8 cents → slide = (7-8)*200 = -200
// $1,000 = 10^5 cents → slide = (7-5)*200 = 400
assert_eq!(min, -200);
assert_eq!(max, 400);
// $500,000 = 5*10^7 cents → slide = (7-7.699)*200 -140
// $10,000 = 10^6 cents → slide = (7-6)*200 = 200
assert!((-141..=-139).contains(&min)); // ~-140
assert_eq!(max, 200);
// 2015 config: $100 to $20,000
let config = OracleConfig::for_year(2015);

View File

@@ -11,10 +11,12 @@ pub const MAX_LOG_BTC: f64 = 2.0; // 10^2 BTC = 100 BTC
pub const NUM_DECADES: usize = 8; // -6 to +2
pub const TOTAL_BINS: usize = NUM_DECADES * BINS_PER_DECADE; // 1600 bins
/// Minimum output value to consider (10,000 sats = 0.0001 BTC)
pub const MIN_OUTPUT_SATS: Sats = Sats::_10K;
/// Maximum output value to consider (10 BTC)
pub const MAX_OUTPUT_SATS: Sats = Sats::_10BTC;
/// Minimum output value to consider (~1,000 sats = 0.00001 BTC)
/// Matches Python: zeros bins 0-200 which is 10^-5 BTC
pub const MIN_OUTPUT_SATS: Sats = Sats::_1K;
/// Maximum output value to consider (100 BTC)
/// Matches Python: zeros bins 1601+ which is ~10^2 BTC
pub const MAX_OUTPUT_SATS: Sats = Sats::_100BTC;
/// Round BTC bin indices that should be smoothed to avoid false positives
/// These are bins where round BTC amounts would naturally cluster

View File

@@ -5,8 +5,11 @@ use vecdb::{BytesVec, Database, ImportableVec, IterableCloneableVec, LazyVecFrom
use super::Vecs;
impl Vecs {
pub fn forced_import(db: &Database, version: Version) -> Result<Self> {
let price_cents = PcoVec::forced_import(db, "orange_price_cents", version)?;
pub fn forced_import(db: &Database, parent_version: Version) -> Result<Self> {
// v2: Fixed spike stencil positions and Gaussian center to match Python's empirical data
let version = parent_version + Version::TWO;
let price_cents = PcoVec::forced_import(db, "oracle_price_cents", version)?;
let ohlc_cents = BytesVec::forced_import(db, "oracle_ohlc_cents", version)?;
let tx_count = PcoVec::forced_import(db, "oracle_tx_count", version)?;

View File

@@ -12,68 +12,66 @@ use super::histogram::{BINS_PER_DECADE, Histogram, TOTAL_BINS};
/// Number of parallel chunks for stencil sliding
const PARALLEL_CHUNKS: i32 = 4;
/// USD spike stencil entries: (bin offset from $100 center, weight)
/// USD spike stencil entries: (bin offset from center_bin, weight)
/// These represent the expected frequency of round USD amounts in transactions
/// Offset formula: log10(USD/100) * 200 bins/decade
/// Companion spikes at ±2 bins from main spike (Rust 200 bins/decade ≈ Python's ±1 at 180 bins/decade)
/// Matches Python's 29 entries from utxo_oracle.py lines 1013-1041
/// Positions derived from Python's empirical data (utxo_oracle.py lines 1013-1041)
/// Offset = python_stencil_index - 402 (since Python stencil starts at bin 199, center is 601)
const SPIKE_STENCIL: &[(i32, f64)] = &[
// $1 (single)
(-400, 0.00130),
// $5 (single)
(-260, 0.00168),
// $10 (main + companion)
(-200, 0.00347),
(-198, 0.00199),
// $15 (single)
(-165, 0.00191),
// $20 (main + companion)
(-140, 0.00334),
(-138, 0.00259),
// $30 (main + companion)
(-105, 0.00258),
(-103, 0.00273),
// $50 (main + 2 companions)
// $1 (single) - Python index 40
(-362, 0.00130),
// $5 (single) - Python index 141
(-261, 0.00168),
// $10 (main + companion) - Python indices 201-202
(-201, 0.00347),
(-200, 0.00199),
// $15 (single) - Python index 236
(-166, 0.00191),
// $20 (main + companion) - Python indices 261-262
(-141, 0.00334),
(-140, 0.00259),
// $30 (main + companion) - Python indices 296-297
(-106, 0.00258),
(-105, 0.00273),
// $50 (main + 2 companions) - Python indices 340-342
(-62, 0.00308),
(-60, 0.00561),
(-58, 0.00309),
// $100 (main + 3 companions) - center
(-61, 0.00561),
(-60, 0.00309),
// $100 (main + 3 companions) - Python indices 400-403
(-2, 0.00292),
(0, 0.00617),
(2, 0.00442),
(4, 0.00263),
// $150 (single)
(35, 0.00286),
// $200 (main + companion)
(60, 0.00410),
(62, 0.00335),
// $300 (main + companion)
(95, 0.00252),
(97, 0.00278),
// $500 (single)
(140, 0.00379),
// $1000 (main + companion)
(200, 0.00369),
(202, 0.00239),
// $1500 (single)
(235, 0.00128),
// $2000 (main + companion)
(260, 0.00165),
(262, 0.00140),
// $5000 (single)
(340, 0.00115),
// $10000 (single)
(400, 0.00083),
(-1, 0.00617),
(0, 0.00442),
(1, 0.00263),
// $150 (single) - Python index 436
(34, 0.00286),
// $200 (main + companion) - Python indices 461-462
(59, 0.00410),
(60, 0.00335),
// $300 (main + companion) - Python indices 496-497
(94, 0.00252),
(95, 0.00278),
// $500 (single) - Python index 541
(139, 0.00379),
// $1000 (main + companion) - Python indices 601-602
(199, 0.00369),
(200, 0.00239),
// $1500 (single) - Python index 636
(234, 0.00128),
// $2000 (main + companion) - Python indices 661-662
(259, 0.00165),
(260, 0.00140),
// $5000 (single) - Python index 741
(339, 0.00115),
// $10000 (single) - Python index 801
(399, 0.00083),
];
/// Width of the smooth stencil in bins (Gaussian sigma)
/// Python uses std_dev=201 with 803 bins. Our histogram has 1600 bins (2x),
/// so we use 201 * (1600/803) ≈ 400 bins sigma equivalent
const SMOOTH_WIDTH: f64 = 400.0;
/// Both Python and Rust use 200 bins per decade, so sigma is the same
const SMOOTH_WIDTH: f64 = 201.0;
/// Linear term coefficient for smooth stencil (per Python: 0.0000005 * x)
/// Scaled for our larger histogram: 0.0000005 * (803/1600) ≈ 0.00000025
const SMOOTH_LINEAR_COEF: f64 = 0.00000025;
/// NOT scaled - the linear term uses window position (0-802), same as Python
const SMOOTH_LINEAR_COEF: f64 = 0.0000005;
/// Weight given to smooth stencil vs spike stencil
const SMOOTH_WEIGHT: f64 = 0.65;
@@ -84,6 +82,12 @@ const SPIKE_WEIGHT: f64 = 1.0;
/// This avoids computing exp() billions of times
const SMOOTH_RANGE: usize = 800;
/// Gaussian center bin offset from spike center
/// Python's Gaussian has mean=411 in 803-element stencil
/// Stencil starts at bin 199, so Gaussian centers at bin 199+411=610
/// Spike center is at bin 601, so Gaussian is offset by +9 bins
const GAUSSIAN_CENTER_OFFSET: i32 = 9;
/// Lazily initialized Gaussian weight lookup table
fn gaussian_weights() -> &'static [f64; SMOOTH_RANGE + 1] {
use std::sync::OnceLock;
@@ -110,16 +114,6 @@ fn gaussian_weights() -> &'static [f64; SMOOTH_RANGE + 1] {
pub fn find_best_price(histogram: &Histogram, min_slide: i32, max_slide: i32) -> Option<Cents> {
let bins = histogram.bins();
// Pre-compute the linear term sum (constant for all slide positions)
// linear_sum = Σ bins[i] * SMOOTH_LINEAR_COEF * i
let linear_sum: f64 = bins
.iter()
.copied()
.enumerate()
.filter(|(_, v)| *v > 0.0)
.map(|(i, v)| v * SMOOTH_LINEAR_COEF * i as f64)
.sum();
// Collect non-zero bins: Vec for Gaussian (needs iteration), HashMap for spike (needs lookup)
let non_zero_bins: Vec<(usize, f64)> = bins
.iter()
@@ -147,7 +141,7 @@ pub fn find_best_price(histogram: &Histogram, min_slide: i32, max_slide: i32) ->
let mut local_total = 0.0;
for slide in chunk_start..=chunk_end {
let score = compute_score_fast(&non_zero_bins, &bin_map, linear_sum, slide);
let score = compute_score_fast(&non_zero_bins, &bin_map, slide);
local_total += score;
if score > local_best_score {
local_best_score = score;
@@ -170,8 +164,8 @@ pub fn find_best_price(histogram: &Histogram, min_slide: i32, max_slide: i32) ->
);
// Compute neighbor scores for sub-bin interpolation (matches Python behavior)
let neighbor_up_score = compute_score_fast(&non_zero_bins, &bin_map, linear_sum, best_position + 1);
let neighbor_down_score = compute_score_fast(&non_zero_bins, &bin_map, linear_sum, best_position - 1);
let neighbor_up_score = compute_score_fast(&non_zero_bins, &bin_map, best_position + 1);
let neighbor_down_score = compute_score_fast(&non_zero_bins, &bin_map, best_position - 1);
// Find best neighbor
let (best_neighbor_offset, neighbor_score) = if neighbor_up_score > neighbor_down_score {
@@ -204,7 +198,6 @@ pub fn find_best_price(histogram: &Histogram, min_slide: i32, max_slide: i32) ->
fn compute_score_fast(
non_zero_bins: &[(usize, f64)],
bin_map: &FxHashMap<usize, f64>,
linear_sum: f64,
slide: i32,
) -> f64 {
let spike_score = compute_spike_score_hash(bin_map, slide);
@@ -212,17 +205,40 @@ fn compute_score_fast(
// Python: smooth weight only applied for slide < 150
if slide < 150 {
let gaussian_score = compute_gaussian_score_sparse(non_zero_bins, slide);
let linear_score = compute_linear_score_sparse(non_zero_bins, slide);
// Combine Gaussian and linear parts of smooth score
let smooth_score = 0.0015 * gaussian_score + linear_sum;
let smooth_score = 0.0015 * gaussian_score + linear_score;
SMOOTH_WEIGHT * smooth_score + SPIKE_WEIGHT * spike_score
} else {
SPIKE_WEIGHT * spike_score
}
}
/// Compute the linear part of the smooth stencil (per-slide, matches Python)
/// Python: sum(shifted_curve[n] * 0.0000005 * n) where n is window position (0-802)
fn compute_linear_score_sparse(non_zero_bins: &[(usize, f64)], slide: i32) -> f64 {
// Window starts at left_p001 + slide = (center_bin - 402) + slide = 199 + slide
// Python: left_p001 = center_p001 - int((803+1)/2) = 601 - 402 = 199
let window_start = 199 + slide;
let window_end = window_start + 803; // 803 elements like Python's stencil
let mut score = 0.0;
for &(i, bin_value) in non_zero_bins {
let bin_idx = i as i32;
if bin_idx >= window_start && bin_idx < window_end {
let window_pos = bin_idx - window_start;
score += bin_value * SMOOTH_LINEAR_COEF * window_pos as f64;
}
}
score
}
/// Compute just the Gaussian part of the smooth stencil (sparse iteration)
/// Note: Gaussian center is offset from spike center by GAUSSIAN_CENTER_OFFSET
fn compute_gaussian_score_sparse(non_zero_bins: &[(usize, f64)], slide: i32) -> f64 {
let center = center_bin() as i32 + slide;
// Python's Gaussian is centered at bin 610 (not 601), so we add the offset
let center = center_bin() as i32 + GAUSSIAN_CENTER_OFFSET + slide;
let weights = gaussian_weights();
let mut score = 0.0;