Files
brk/scripts/mempool_compat/conftest.py
2026-04-12 18:00:02 +02:00

339 lines
12 KiB
Python

"""
Mempool.space API compatibility tests.
Compares every brk mempool_space endpoint against the real mempool.space API
using live blockchain data — nothing is hardcoded or deterministic.
Usage:
cd scripts/mempool_compat
uv run pytest -sv # all tests, verbose
uv run pytest -sv test_blocks.py # one category
uv run pytest -sv -k "test_block_header" # one test
BRK_URL=http://host:port uv run pytest -sv # custom brk server
Environment variables:
BRK_URL brk server base URL (default: http://localhost:3000)
MEMPOOL_URL mempool.space base URL (default: https://mempool.space)
RATE_LIMIT seconds between mempool.space requests (default: 0.5)
"""
import json
import os
import time
from dataclasses import dataclass
from typing import Any, Optional, Set
import pytest
import requests
BRK_BASE = os.environ.get("BRK_URL", "http://localhost:3000")
MEMPOOL_BASE = os.environ.get("MEMPOOL_URL", "https://mempool.space")
RATE_LIMIT = float(os.environ.get("RATE_LIMIT", "0.5"))
# ── API client ────────────────────────────────────────────────────────
class ApiClient:
"""HTTP client for a single API server with optional rate limiting."""
def __init__(self, base_url: str, name: str, rate_limit: float = 0.0):
self.base_url = base_url.rstrip("/")
self.name = name
self.rate_limit = rate_limit
self._last_request = 0.0
self.session = requests.Session()
self.session.headers["User-Agent"] = "brk-compat-test/1.0"
def _wait(self):
if self.rate_limit > 0:
elapsed = time.monotonic() - self._last_request
if elapsed < self.rate_limit:
time.sleep(self.rate_limit - elapsed)
self._last_request = time.monotonic()
def get(self, path: str, params=None, timeout: int = 30) -> requests.Response:
self._wait()
url = f"{self.base_url}{path}"
for attempt in range(3):
resp = self.session.get(url, params=params, timeout=timeout)
if resp.status_code == 429:
wait = int(resp.headers.get("Retry-After", 5))
time.sleep(wait)
continue
resp.raise_for_status()
return resp
resp.raise_for_status()
return resp
def get_json(self, path: str, params=None, timeout: int = 30) -> Any:
return self.get(path, params=params, timeout=timeout).json()
def get_text(self, path: str, params=None, timeout: int = 30) -> str:
return self.get(path, params=params, timeout=timeout).text
def get_bytes(self, path: str, params=None, timeout: int = 30) -> bytes:
return self.get(path, params=params, timeout=timeout).content
# ── Live data ─────────────────────────────────────────────────────────
# Absolute heights for well-known eras + relative depths for recent blocks.
# Covers: genesis-era, early, mid, post-halving, taproot-era, recent, near-tip.
FIXED_HEIGHTS = [100, 100_000, 400_000, 630_000, 800_000]
RELATIVE_DEPTHS = [1000, 100, 10]
@dataclass
class BlockData:
"""A discovered block with associated txids."""
height: int
hash: str
txid: str
coinbase_txid: str
@dataclass
class LiveData:
"""Live blockchain data discovered at session start."""
tip_height: int
tip_hash: str
# Multiple blocks at various depths for parametrized tests
blocks: list # list[BlockData]
# Addresses keyed by scriptpubkey_type
addresses: dict # dict[str, str]
# Convenience aliases (first block)
stable_height: int
stable_hash: str
stable_block: dict
sample_txid: str
coinbase_txid: str
sample_address: str
# ── Fixtures ──────────────────────────────────────────────────────────
@pytest.fixture(scope="session")
def brk():
return ApiClient(BRK_BASE, "brk")
@pytest.fixture(scope="session")
def mempool():
return ApiClient(MEMPOOL_BASE, "mempool.space", rate_limit=RATE_LIMIT)
@pytest.fixture(scope="session", autouse=True)
def check_servers(brk, mempool):
"""Fail fast if either server is unreachable."""
try:
brk.get("/api/blocks/tip/height")
except Exception as e:
pytest.exit(f"brk server not reachable at {brk.base_url}: {e}")
try:
mempool.get("/api/blocks/tip/height")
except Exception as e:
pytest.exit(f"mempool.space not reachable at {mempool.base_url}: {e}")
@pytest.fixture(scope="session")
def live(mempool) -> LiveData:
"""Discover live blockchain data for all tests.
Fetches blocks at several depths and extracts txids + addresses of
different types so parametrized tests hit varied real data.
"""
tip_height = int(mempool.get_text("/api/blocks/tip/height"))
tip_hash = mempool.get_text("/api/blocks/tip/hash")
heights = FIXED_HEIGHTS + [tip_height - d for d in RELATIVE_DEPTHS]
heights.sort()
blocks: list[BlockData] = []
addresses: dict[str, str] = {}
for h in heights:
bh = mempool.get_text(f"/api/block-height/{h}")
txids = mempool.get_json(f"/api/block/{bh}/txids")
coinbase = txids[0]
sample = txids[min(1, len(txids) - 1)]
blocks.append(BlockData(height=h, hash=bh, txid=sample, coinbase_txid=coinbase))
# Collect addresses of different types from non-coinbase outputs
if len(addresses) < 8:
tx = mempool.get_json(f"/api/tx/{sample}")
for vout in tx.get("vout", []):
atype = vout.get("scriptpubkey_type")
addr = vout.get("scriptpubkey_address")
if addr and atype and atype not in addresses:
addresses[atype] = addr
stable = blocks[0]
stable_block = mempool.get_json(f"/api/block/{stable.hash}")
sample_address = next(iter(addresses.values()), "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa")
data = LiveData(
tip_height=tip_height,
tip_hash=tip_hash,
blocks=blocks,
addresses=addresses,
stable_height=stable.height,
stable_hash=stable.hash,
stable_block=stable_block,
sample_txid=stable.txid,
coinbase_txid=stable.coinbase_txid,
sample_address=sample_address,
)
print(f"\n{'='*70}")
print(f" LIVE TEST DATA (from {MEMPOOL_BASE})")
print(f"{'='*70}")
print(f" tip {data.tip_height} {data.tip_hash[:20]}")
for i, b in enumerate(blocks):
print(f" block[{i}] {b.height} {b.hash[:20]}… tx={b.txid[:16]}")
for atype, addr in addresses.items():
print(f" addr {atype:12s} {addr}")
print(f"{'='*70}\n")
return data
# ── Display helpers ───────────────────────────────────────────────────
def show(method: str, path: str, brk_data: Any, mem_data: Any, max_lines: int = 20):
"""Print both responses so the runner can see what was fetched."""
print(f"\n{''*70}")
print(f" {method} {path}")
print(f"{''*70}")
for label, data in [("mempool.space", mem_data), ("brk", brk_data)]:
print(f"\n [{label}]")
if isinstance(data, (dict, list)):
text = json.dumps(data, indent=2)
elif isinstance(data, bytes):
text = f"<{len(data)} bytes>"
else:
text = str(data)
lines = text.split("\n")
for line in lines[:max_lines]:
print(f" {line}")
if len(lines) > max_lines:
print(f" … ({len(lines) - max_lines} more lines)")
# ── Comparison helpers ────────────────────────────────────────────────
# Keys that brk is intentionally not implementing (mempool.space-specific features).
# Everything else that mempool.space returns MUST be present in brk.
ALLOWED_MISSING = {
"matchRate", "expectedFees", "expectedWeight",
# brk only tracks USD — non-USD currencies and exchange rates are intentionally absent
"EUR", "GBP", "CAD", "CHF", "AUD", "JPY",
"USDEUR", "USDGBP", "USDCAD", "USDCHF", "USDAUD", "USDJPY",
# brk doesn't compute block health scores
"avgBlockHealth",
# brk doesn't compute block similarity/template matching
"similarity",
# brk doesn't compute fee delta or match rate per pool
"avgFeeDelta", "avgMatchRate",
}
# Coinbase transactions use vout=65535 (u16::MAX) in brk vs 4294967295 (u32::MAX)
# in mempool.space. This is an intentional representation difference.
COINBASE_VOUT_BRK = 65535
COINBASE_VOUT_MEMPOOL = 4294967295
def assert_same_structure(brk_data: Any, mem_data: Any, path: str = "root"):
"""brk must have every key mempool.space has (extra brk keys are fine).
Recurses into nested dicts; for arrays, compares the first element.
int/float are treated as equivalent; None is compatible with anything.
"""
if isinstance(mem_data, dict):
assert isinstance(brk_data, dict), (
f"Expected dict at {path}, got {type(brk_data).__name__}"
)
brk_keys = set(brk_data.keys())
mem_keys = set(mem_data.keys())
missing = mem_keys - brk_keys - ALLOWED_MISSING
assert not missing, f"brk missing keys at {path}: {missing}"
for key in brk_keys & mem_keys:
assert_same_structure(brk_data[key], mem_data[key], f"{path}.{key}")
elif isinstance(mem_data, list):
assert isinstance(brk_data, list), (
f"Expected list at {path}, got {type(brk_data).__name__}"
)
if mem_data and brk_data:
assert_same_structure(brk_data[0], mem_data[0], f"{path}[0]")
else:
if mem_data is None or brk_data is None:
return
bt = type(brk_data).__name__
mt = type(mem_data).__name__
if {bt, mt} <= {"int", "float"}:
return
# int/str are compatible when the string is a numeric literal
# (mempool.space serializes large numbers as strings)
if {bt, mt} == {"int", "str"}:
return
assert bt == mt, (
f"Type mismatch at {path}: brk={bt}({brk_data!r}) "
f"vs mempool={mt}({mem_data!r})"
)
def assert_same_values(
brk_data: Any,
mem_data: Any,
path: str = "root",
exclude: Optional[Set[str]] = None,
):
"""Both responses must have identical values.
Floats are compared with relative tolerance 1e-4.
Pass ``exclude`` to skip keys that are expected to differ.
"""
exclude = exclude or set()
if isinstance(mem_data, dict):
assert isinstance(brk_data, dict), (
f"Expected dict at {path}, got {type(brk_data).__name__}"
)
# brk must have every mempool key; extra brk keys are fine
mem_keys = set(mem_data.keys())
for key in mem_keys - exclude - ALLOWED_MISSING:
assert key in brk_data, f"brk missing '{key}' at {path}"
assert_same_values(brk_data[key], mem_data[key], f"{path}.{key}", exclude)
elif isinstance(mem_data, list):
assert isinstance(brk_data, list), (
f"Expected list at {path}, got {type(brk_data).__name__}"
)
assert len(brk_data) == len(mem_data), (
f"Length mismatch at {path}: brk={len(brk_data)} vs mempool={len(mem_data)}"
)
for i, (b, m) in enumerate(zip(brk_data, mem_data)):
assert_same_values(b, m, f"{path}[{i}]", exclude)
elif mem_data is None:
# mempool returns null, brk computes a value — that's fine
return
elif isinstance(mem_data, float) or isinstance(brk_data, float):
if brk_data is None:
return
assert float(brk_data) == pytest.approx(
float(mem_data), rel=1e-4, abs=1e-6
), f"Float mismatch at {path}: brk={brk_data} vs mempool={mem_data}"
else:
# Coinbase vout: brk uses u16::MAX, mempool uses u32::MAX — both valid
if (
brk_data == COINBASE_VOUT_BRK
and mem_data == COINBASE_VOUT_MEMPOOL
):
return
assert brk_data == mem_data, (
f"Value mismatch at {path}: brk={brk_data!r} vs mempool={mem_data!r}"
)