computer: pools part 1 + fetcher: fix url + interface: more ddos protection

This commit is contained in:
nym21
2025-09-05 14:47:11 +02:00
parent f82edb290a
commit 09d974913d
16 changed files with 1794 additions and 83 deletions

94
Cargo.lock generated
View File

@@ -549,6 +549,7 @@ dependencies = [
"brk_structs",
"derive_deref",
"log",
"num_enum",
"pco",
"rayon",
"serde",
@@ -596,8 +597,6 @@ dependencies = [
"fjall",
"log",
"rayon",
"serde",
"serde_json",
"vecdb",
]
@@ -611,6 +610,7 @@ dependencies = [
"brk_structs",
"derive_deref",
"nucleo-matcher",
"quick_cache",
"schemars 1.0.4",
"serde",
"serde_json",
@@ -1147,9 +1147,9 @@ dependencies = [
[[package]]
name = "cc"
version = "1.2.35"
version = "1.2.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "590f9024a68a8c40351881787f1934dc11afd69090f5edb6831464694d836ea3"
checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54"
dependencies = [
"find-msvc-tools",
"jobserver",
@@ -1770,9 +1770,9 @@ dependencies = [
[[package]]
name = "find-msvc-tools"
version = "0.1.0"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e178e4fba8a2726903f6ba98a6d221e76f9c12c650d5dc0e6afdc50677b49650"
checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d"
[[package]]
name = "fixedbitset"
@@ -2403,9 +2403,9 @@ dependencies = [
[[package]]
name = "js-sys"
version = "0.3.77"
version = "0.3.78"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f"
checksum = "0c0b063578492ceec17683ef2f8c5e89121fbd0b172cbc280635ab7567db2738"
dependencies = [
"once_cell",
"wasm-bindgen",
@@ -2716,6 +2716,28 @@ dependencies = [
"autocfg",
]
[[package]]
name = "num_enum"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
dependencies = [
"num_enum_derive",
"rustversion",
]
[[package]]
name = "num_enum_derive"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
dependencies = [
"proc-macro-crate",
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "object"
version = "0.36.7"
@@ -3469,6 +3491,15 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "proc-macro-crate"
version = "3.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
dependencies = [
"toml_edit",
]
[[package]]
name = "proc-macro-error-attr2"
version = "2.0.0"
@@ -4409,12 +4440,18 @@ dependencies = [
"indexmap 2.11.0",
"serde",
"serde_spanned",
"toml_datetime",
"toml_datetime 0.7.0",
"toml_parser",
"toml_writer",
"winnow",
]
[[package]]
name = "toml_datetime"
version = "0.6.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
[[package]]
name = "toml_datetime"
version = "0.7.0"
@@ -4424,6 +4461,17 @@ dependencies = [
"serde",
]
[[package]]
name = "toml_edit"
version = "0.22.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
dependencies = [
"indexmap 2.11.0",
"toml_datetime 0.6.11",
"winnow",
]
[[package]]
name = "toml_parser"
version = "1.0.2"
@@ -4818,21 +4866,22 @@ dependencies = [
[[package]]
name = "wasm-bindgen"
version = "0.2.100"
version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5"
checksum = "7e14915cadd45b529bb8d1f343c4ed0ac1de926144b746e2710f9cd05df6603b"
dependencies = [
"cfg-if",
"once_cell",
"rustversion",
"wasm-bindgen-macro",
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.100"
version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6"
checksum = "e28d1ba982ca7923fd01448d5c30c6864d0a14109560296a162f80f305fb93bb"
dependencies = [
"bumpalo",
"log",
@@ -4844,9 +4893,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407"
checksum = "7c3d463ae3eff775b0c45df9da45d68837702ac35af998361e2c84e7c5ec1b0d"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -4854,9 +4903,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.100"
version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
checksum = "7bb4ce89b08211f923caf51d527662b75bdc9c9c7aab40f86dcb9fb85ac552aa"
dependencies = [
"proc-macro2",
"quote",
@@ -4867,9 +4916,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.100"
version = "0.2.101"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d"
checksum = "f143854a3b13752c6950862c906306adb27c7e839f7414cec8fea35beab624c1"
dependencies = [
"unicode-ident",
]
@@ -5109,6 +5158,9 @@ name = "winnow"
version = "0.7.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf"
dependencies = [
"memchr",
]
[[package]]
name = "wit-bindgen"
@@ -5284,9 +5336,9 @@ dependencies = [
[[package]]
name = "zstd-sys"
version = "2.0.15+zstd.1.5.7"
version = "2.0.16+zstd.1.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb81183ddd97d0c74cedf1d50d85c8d08c1b8b68ee863bdee9e706eedba1a237"
checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
dependencies = [
"cc",
"pkg-config",

View File

@@ -46,6 +46,7 @@ jiff = "0.2.15"
log = "0.4.28"
minreq = { version = "2.14.1", features = ["https", "serde_json"] }
parking_lot = "0.12.4"
quick_cache = "0.6.16"
rayon = "1.11.0"
serde = "1.0.219"
serde_bytes = "0.11.17"

View File

@@ -21,6 +21,7 @@ brk_parser = { workspace = true }
vecdb = { workspace = true }
derive_deref = { workspace = true }
log = { workspace = true }
num_enum = "0.7.4"
pco = "0.4.6"
rayon = { workspace = true }
serde = { workspace = true }

View File

@@ -0,0 +1,55 @@
use std::{collections::BTreeMap, path::Path, thread};
use brk_computer::{Computer, pools};
use brk_error::Result;
use brk_fetcher::Fetcher;
use brk_indexer::Indexer;
use vecdb::Exit;
fn main() -> Result<()> {
brk_logger::init(Some(Path::new(".log")))?;
let exit = Exit::new();
exit.set_ctrlc_handler();
thread::Builder::new()
.stack_size(256 * 1024 * 1024)
.spawn(move || -> Result<()> {
let outputs_dir = Path::new(&std::env::var("HOME").unwrap()).join(".brk");
let indexer = Indexer::forced_import(&outputs_dir)?;
let fetcher = Fetcher::import(true, None)?;
let computer = Computer::forced_import(&outputs_dir, &indexer, Some(fetcher))?;
let pools = pools();
let mut res: BTreeMap<&'static str, usize> = BTreeMap::default();
let mut height_to_first_txindex_iter = indexer.vecs.height_to_first_txindex.iter();
// let mut i = indexer.vecs.txz
indexer
.stores
.height_to_coinbase_tag
.iter()
.for_each(|(_, coinbase_tag)| {
let pool = pools.find_from_coinbase_tag(&coinbase_tag);
if let Some(pool) = pool {
*res.entry(pool.name).or_default() += 1;
} else {
*res.entry(pools.get_unknown().name).or_default() += 1;
}
});
let mut v = res.into_iter().map(|(k, v)| (v, k)).collect::<Vec<_>>();
v.sort_unstable();
println!("{:#?}", v);
println!("{:#?}", v.len());
Ok(())
})?
.join()
.unwrap()
}

View File

@@ -16,6 +16,7 @@ mod fetched;
mod grouped;
mod indexes;
mod market;
mod pools;
mod price;
mod stateful;
mod states;
@@ -24,6 +25,7 @@ mod utils;
use indexes::Indexes;
pub use pools::*;
pub use states::PriceToAmount;
use states::*;

View File

@@ -0,0 +1,175 @@
use num_enum::{FromPrimitive, IntoPrimitive};
use serde::{Deserialize, Serialize};
#[allow(clippy::upper_case_acronyms)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, FromPrimitive, IntoPrimitive)]
#[repr(u16)]
pub enum PoolId {
#[default]
Unknown,
BlockFills,
Ultimuspool,
TerraPool,
Luxor,
OneTHash,
BTCCom,
Bitfarms,
HuobiPool,
WayiCn,
CanoePool,
BTCTop,
BitcoinCom,
OneSevenFiveBtc,
GBMiners,
AXbt,
ASICMiner,
BitMinter,
BitcoinRussia,
BTCServ,
SimplecoinUs,
BTCGuild,
Eligius,
OzCoin,
EclipseMC,
MaxBTC,
TripleMining,
CoinLab,
FiftyBTC,
GHashIO,
STMiningCorp,
Bitparking,
MMPool,
Polmine,
KnCMiner,
Bitalo,
F2Pool,
HHTT,
MegaBigPower,
MtRed,
NMCbit,
YourbtcNet,
GiveMeCoins,
BraiinsPool,
AntPool,
MultiCoinCo,
BCPoolIo,
Cointerra,
KanoPool,
SoloCK,
CKPool,
NiceHash,
BitClub,
BitcoinAffiliateNetwork,
BTCC,
BWPool,
EXXAndBW,
Bitsolo,
BitFury,
TwentyOneInc,
DigitalBTC,
EightBaochi,
MyBTCcoinPool,
TBDice,
HASHPOOL,
Nexious,
BravoMining,
HotPool,
OKExPool,
BCMonster,
OneHash,
Bixin,
TATMASPool,
ViaBTC,
ConnectBTC,
BATPOOL,
Waterhole,
DCExploration,
DCEX,
BTPOOL,
FiftyEightCoin,
BitcoinIndiaLowercase,
ShawnP0wers,
PHashIO,
RigPool,
HAOZHUZHU,
SevenPool,
MiningKings,
HashBX,
DPOOL,
Rawpool,
Haominer,
Helix,
BitcoinUkraine,
Poolin,
SecretSuperstar,
TigerpoolNet,
SigmapoolCom,
OkpoolTop,
Hummerpool,
Tangpool,
BytePool,
SpiderPool,
NovaBlock,
MiningCity,
BinancePool,
Minerium,
LubianCom,
OKKONG,
AAOPool,
EMCDPool,
FoundryUSA,
SBICrypto,
ArkPool,
PureBTCCom,
MARAPool,
KuCoinPool,
EntrustCharityPool,
OKMINER,
Titan,
PEGAPool,
BTCNuggets,
CloudHashing,
DigitalXMintsy,
Telco214,
BTCPoolParty,
Multipool,
TransactionCoinMining,
BTCDig,
TrickysBTCPool,
BTCMP,
Eobot,
UNOMP,
Patels,
GoGreenLight,
BitcoinIndiaCamel, // duplicate-ish entry preserved with slight name change
EkanemBTC,
CanoeUppercase,
TigerLowercase,
OneM1X,
Zulupool,
SECPOOL,
OCEAN,
WhitePool,
Wiz,
Mononaut,
Rijndael,
Wk057,
FutureBitApolloSolo,
Emzy,
Knorrium,
CarbonNegative,
PortlandHODL,
Phoenix,
Neopool,
MaxiPool,
DrDetroit,
BitFuFuPool,
LuckyPool,
MiningDutch,
PublicPool,
MiningSquared,
InnopolisTech,
Nymkappa,
BTCLab,
Parasite,
}

View File

@@ -0,0 +1,7 @@
mod id;
mod pool;
mod pools;
pub use id::*;
pub use pool::*;
pub use pools::*;

View File

@@ -0,0 +1,12 @@
use serde::{Deserialize, Serialize};
use crate::pools::PoolId;
#[derive(Debug, Serialize, Deserialize)]
pub struct Pool {
pub id: PoolId,
pub name: &'static str,
pub addresses: Box<[&'static str]>,
pub tags: Box<[&'static str]>,
pub link: &'static str,
}

File diff suppressed because it is too large Load Diff

View File

@@ -14,7 +14,7 @@ pub struct BRK {
dateindex_to_ohlc: BTreeMap<DateIndex, Vec<OHLCCents>>,
}
const API_URL: &str = "https://bitcoinresearchkit.org/api/vecs";
const API_URL: &str = "https://bitview.space/api/vecs";
const CHUNK_SIZE: usize = 10_000;
impl BRK {
@@ -46,7 +46,7 @@ impl BRK {
default_retry(|_| {
let url = format!(
"{API_URL}/height-to-ohlc?from={}&to={}",
"{API_URL}/height-to-price-ohlc?from={}&to={}",
height,
height + CHUNK_SIZE
);
@@ -91,7 +91,7 @@ impl BRK {
default_retry(|_| {
let url = format!(
"{API_URL}/dateindex-to-ohlc?from={}&to={}",
"{API_URL}/dateindex-to-price-ohlc?from={}&to={}",
dateindex,
dateindex + CHUNK_SIZE
);

View File

@@ -16,6 +16,7 @@ brk_indexer = { workspace = true }
brk_structs = { workspace = true }
vecdb = { workspace = true }
derive_deref = { workspace = true }
quick_cache = { workspace = true }
schemars = "1.0.4"
serde = { workspace = true }
serde_json = { workspace = true }

View File

@@ -0,0 +1,82 @@
use std::fmt;
use derive_deref::Deref;
use schemars::JsonSchema;
use serde::Deserialize;
#[derive(Debug, Deref, JsonSchema)]
pub struct MaybeIds(Vec<String>);
const MAX_STRING_SIZE: usize = 10_000;
const MAX_VECS: usize = 64;
impl From<String> for MaybeIds {
fn from(value: String) -> Self {
Self(vec![value])
}
}
impl<'a> From<Vec<&'a str>> for MaybeIds {
fn from(value: Vec<&'a str>) -> Self {
Self(value.iter().map(|s| s.to_string()).collect::<Vec<_>>())
}
}
impl<'de> Deserialize<'de> for MaybeIds {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
match serde_json::Value::deserialize(deserializer)? {
serde_json::Value::String(str) => {
if str.len() > MAX_STRING_SIZE {
Ok(MaybeIds(sanitize_ids(
str.split(",").map(|s| s.to_string()),
)))
} else {
Err(serde::de::Error::custom("Given parameter is too long"))
}
}
serde_json::Value::Array(vec) => {
if vec.len() > MAX_VECS {
Ok(MaybeIds(sanitize_ids(
vec.into_iter().map(|s| s.as_str().unwrap().to_string()),
)))
} else {
Err(serde::de::Error::custom("Given parameter is too long"))
}
}
_ => Err(serde::de::Error::custom("Bad ids format")),
}
}
}
impl fmt::Display for MaybeIds {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let s = self.0.join(",");
write!(f, "{s}")
}
}
fn sanitize_ids(raw_ids: impl Iterator<Item = String>) -> Vec<String> {
let mut results = Vec::new();
raw_ids.for_each(|s| {
let mut current = String::new();
for c in s.to_lowercase().chars() {
match c {
' ' | ',' | '+' => {
if !current.is_empty() {
results.push(std::mem::take(&mut current));
}
}
'-' => current.push('_'),
c if c.is_alphanumeric() || c == '_' => current.push(c),
_ => {}
}
}
if !current.is_empty() {
results.push(current);
}
});
results
}

View File

@@ -1,6 +1,6 @@
#![doc = include_str!("../README.md")]
use std::collections::BTreeMap;
use std::{collections::BTreeMap, sync::OnceLock};
use brk_computer::Computer;
use brk_error::{Error, Result};
@@ -10,13 +10,14 @@ use nucleo_matcher::{
Config, Matcher,
pattern::{AtomKind, CaseMatching, Normalization, Pattern},
};
use quick_cache::sync::Cache;
use tabled::settings::Style;
use vecdb::{AnyCollectableVec, AnyStoredVec};
mod deser;
mod format;
mod ids;
mod index;
mod maybe_ids;
mod output;
mod pagination;
mod params;
@@ -33,6 +34,11 @@ use vecs::Vecs;
use crate::vecs::{IdToVec, IndexToVec};
pub fn cached_errors() -> &'static Cache<String, String> {
static CACHE: OnceLock<Cache<String, String>> = OnceLock::new();
CACHE.get_or_init(|| Cache::new(1000))
}
#[allow(dead_code)]
pub struct Interface<'a> {
vecs: Vecs<'a>,
@@ -58,34 +64,31 @@ impl<'a> Interface<'a> {
}
pub fn search(&self, params: &Params) -> Result<Vec<(String, &&dyn AnyCollectableVec)>> {
let ids = &params.ids;
let index = params.index;
let ids_to_vec = self
.vecs
.index_to_id_to_vec
.get(&params.index)
.get(&index)
.ok_or(Error::String(format!(
"Index \"{}\" isn't a valid index",
params.index
index
)))?;
let maybe_ids = params.ids.iter().flat_map(|s| {
s.to_lowercase()
.replace("-", "_")
.split_whitespace()
.flat_map(|s| {
s.split(',')
.flat_map(|s| s.split('+').map(|s| s.to_string()))
})
.collect::<Vec<_>>()
});
maybe_ids
ids.iter()
.map(|id| {
let vec = ids_to_vec.get(id.as_str()).ok_or_else(|| {
let cached_errors = cached_errors();
if let Some(message) = cached_errors.get(id) {
return Error::String(message)
}
let mut message = format!(
"No vec named \"{}\" indexed by \"{}\" found.\n",
// tell if id found in another index
id,
params.index
index
);
let mut matcher = Matcher::new(Config::DEFAULT);
@@ -111,9 +114,11 @@ impl<'a> Interface<'a> {
message += &format!("\nBut there is a vec named {id} which supports the following indexes: {:#?}\n", index_to_vec.keys());
}
cached_errors.insert(id.clone(), message.clone());
Error::String(message)
});
vec.map(|vec| (id, vec))
vec.map(|vec| (id.clone(), vec))
})
.collect::<Result<Vec<_>>>()
}

View File

@@ -1,38 +0,0 @@
use derive_deref::Deref;
use schemars::JsonSchema;
use serde::Deserialize;
#[derive(Debug, Deref, JsonSchema)]
pub struct MaybeIds(Vec<String>);
impl From<String> for MaybeIds {
fn from(value: String) -> Self {
Self(vec![value])
}
}
impl<'a> From<Vec<&'a str>> for MaybeIds {
fn from(value: Vec<&'a str>) -> Self {
Self(value.iter().map(|s| s.to_string()).collect::<Vec<_>>())
}
}
impl<'de> Deserialize<'de> for MaybeIds {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let maybe_ids = match serde_json::Value::deserialize(deserializer)? {
serde_json::Value::String(str) => {
str.split(",").map(|s| s.to_string()).collect::<Vec<_>>()
}
serde_json::Value::Array(vec) => vec
.into_iter()
.map(|s| s.as_str().unwrap().to_string())
.collect::<Vec<_>>(),
_ => return Err(serde::de::Error::custom("Bad ids format")),
};
// dbg!(&maybe_ids);
Ok(MaybeIds(maybe_ids))
}
}

View File

@@ -6,7 +6,7 @@ use serde::Deserialize;
use crate::{
Format, Index,
deser::{de_unquote_i64, de_unquote_usize},
maybe_ids::MaybeIds,
ids::MaybeIds,
};
#[derive(Debug, Deserialize, JsonSchema)]

View File

@@ -23,7 +23,7 @@ brk_parser = { workspace = true }
vecdb = { workspace = true }
jiff = { workspace = true }
log = { workspace = true }
quick_cache = "0.6.16"
quick_cache = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tower-http = { version = "0.6.6", features = ["compression-full", "trace"] }