mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-04-24 22:59:58 -07:00
global: snapshot
This commit is contained in:
417
modules/quickmatch-js/0.4.0/src/index.js
Normal file
417
modules/quickmatch-js/0.4.0/src/index.js
Normal file
@@ -0,0 +1,417 @@
|
||||
const DEFAULT_SEPARATORS = "_- :/";
|
||||
const DEFAULT_TRIGRAM_BUDGET = 6;
|
||||
const DEFAULT_LIMIT = 100;
|
||||
const DEFAULT_MIN_SCORE = 2;
|
||||
|
||||
/**
|
||||
* Search configuration.
|
||||
*
|
||||
* Defaults work well for most use cases.
|
||||
* Tweak `trigramBudget` to trade speed for typo tolerance.
|
||||
*/
|
||||
export class QuickMatchConfig {
|
||||
/** Characters that separate words in items (e.g. "hash_rate" → ["hash", "rate"]).
|
||||
* @type {string} */
|
||||
separators = DEFAULT_SEPARATORS;
|
||||
|
||||
/** Max results returned per query.
|
||||
* @type {number} */
|
||||
limit = DEFAULT_LIMIT;
|
||||
|
||||
/** How hard to try matching typos (0 = off, 3–6 = fast, 9–15 = thorough, max 20).
|
||||
* @type {number} */
|
||||
trigramBudget = DEFAULT_TRIGRAM_BUDGET;
|
||||
|
||||
/** Min overlap required for a typo match. Higher = fewer false positives.
|
||||
* @type {number} */
|
||||
minScore = DEFAULT_MIN_SCORE;
|
||||
|
||||
/** @param {number} n - Max results (default: 100, min: 1) */
|
||||
withLimit(n) {
|
||||
this.limit = Math.max(1, n);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @param {number} n - Trigram budget (0-20, default: 6) */
|
||||
withTrigramBudget(n) {
|
||||
this.trigramBudget = Math.max(0, Math.min(20, n));
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @param {string} s - Separator characters (default: '_- :/') */
|
||||
withSeparators(s) {
|
||||
this.separators = s;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @param {number} n - Min trigram score (default: 2, min: 1) */
|
||||
withMinScore(n) {
|
||||
this.minScore = Math.max(1, n);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Instant search over a list of strings.
|
||||
*
|
||||
* Supports exact words, prefixes ("dom" → "dominance"), joined words
|
||||
* ("hashrate" → "hash_rate"), and typo tolerance ("suply" → "supply").
|
||||
* Results are ranked: exact matches first, then by specificity.
|
||||
*/
|
||||
export class QuickMatch {
|
||||
/** @param {string[]} items - Searchable items (lowercase) @param {QuickMatchConfig} [config] */
|
||||
constructor(items, config = new QuickMatchConfig()) {
|
||||
this.config = config;
|
||||
this.items = items;
|
||||
/** @type {Map<string, number[]>} */
|
||||
this.wordIndex = new Map();
|
||||
/** @type {Map<string, number[]>} */
|
||||
this.trigramIndex = new Map();
|
||||
this._sepLookup = sepLookup(config.separators);
|
||||
this._scores = new Uint32Array(items.length);
|
||||
/** @type {number[]} */
|
||||
this._dirty = [];
|
||||
|
||||
let maxWordLen = 0;
|
||||
let maxQueryLen = 0;
|
||||
let maxWords = 0;
|
||||
const sep = this._sepLookup;
|
||||
|
||||
for (let idx = 0; idx < items.length; idx++) {
|
||||
const item = items[idx];
|
||||
if (item.length > maxQueryLen) maxQueryLen = item.length;
|
||||
|
||||
const words = [];
|
||||
let start = 0;
|
||||
|
||||
for (let i = 0; i <= item.length; i++) {
|
||||
if (i < item.length && !sep[item.charCodeAt(i)]) continue;
|
||||
if (i > start) {
|
||||
const word = item.slice(start, i);
|
||||
words.push(word);
|
||||
if (word.length > maxWordLen) maxWordLen = word.length;
|
||||
for (let len = 1; len <= word.length; len++) {
|
||||
addToIndex(this.wordIndex, word.slice(0, len), idx);
|
||||
}
|
||||
for (let k = 0; k <= word.length - 3; k++) {
|
||||
addToIndex(this.trigramIndex, word[k] + word[k + 1] + word[k + 2], idx);
|
||||
}
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
|
||||
for (let i = 0; i < words.length - 1; i++) {
|
||||
const compound = words[i] + words[i + 1];
|
||||
const from = words[i].length + 1;
|
||||
for (let len = from; len <= compound.length; len++) {
|
||||
addToIndex(this.wordIndex, compound.slice(0, len), idx);
|
||||
}
|
||||
}
|
||||
|
||||
if (words.length > maxWords) maxWords = words.length;
|
||||
}
|
||||
|
||||
this.maxWordLen = maxWordLen + 4;
|
||||
this.maxQueryLen = maxQueryLen + 6;
|
||||
this.maxWords = maxWords + 2;
|
||||
}
|
||||
|
||||
/** @param {string} query */
|
||||
matches(query) {
|
||||
return this.matchesWith(query, this.config);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} query
|
||||
* @param {QuickMatchConfig} config
|
||||
*/
|
||||
matchesWith(query, config) {
|
||||
const { limit, trigramBudget } = config;
|
||||
const sep =
|
||||
config.separators === this.config.separators
|
||||
? this._sepLookup
|
||||
: sepLookup(config.separators);
|
||||
|
||||
const q = normalize(query);
|
||||
if (!q || q.length > this.maxQueryLen) return [];
|
||||
|
||||
const qwords = splitWords(q, sep, this.maxWordLen);
|
||||
if (!qwords.length || qwords.length > this.maxWords) return [];
|
||||
|
||||
const known = [];
|
||||
const unknown = [];
|
||||
|
||||
for (const w of qwords) {
|
||||
const hits = this.wordIndex.get(w);
|
||||
if (hits) {
|
||||
known.push(hits);
|
||||
} else if (w.length >= 3 && unknown.length < trigramBudget) {
|
||||
unknown.push(w);
|
||||
}
|
||||
}
|
||||
|
||||
const pool = intersect(known);
|
||||
|
||||
// Try typo matching for unknown words
|
||||
if (unknown.length && trigramBudget) {
|
||||
const { _scores: scores, _dirty: dirty } = this;
|
||||
|
||||
if (pool) {
|
||||
for (const i of pool) {
|
||||
scores[i] = 1;
|
||||
dirty.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
const hitCount = this._scoreTrigrams(
|
||||
unknown,
|
||||
trigramBudget,
|
||||
pool !== null,
|
||||
Math.max(0, q.length - 3),
|
||||
);
|
||||
const minScore = Math.max(config.minScore, Math.ceil(hitCount / 2));
|
||||
const result = this._rank(dirty, minScore, qwords, sep, limit);
|
||||
|
||||
for (const i of dirty) scores[i] = 0;
|
||||
dirty.length = 0;
|
||||
|
||||
if (result.length > 0) return result;
|
||||
}
|
||||
|
||||
// Rank known candidates (intersection, or union as fallback)
|
||||
const candidates = pool || union(known);
|
||||
return candidates.length > 0
|
||||
? this._rank(candidates, null, qwords, sep, limit)
|
||||
: [];
|
||||
}
|
||||
|
||||
/** @private @param {string[]} unknown @param {number} budget @param {boolean} poolOnly @param {number} minLen */
|
||||
_scoreTrigrams(unknown, budget, poolOnly, minLen) {
|
||||
const { _scores: scores, _dirty: dirty, items } = this;
|
||||
const visited = new Set();
|
||||
const maxRounds = budget;
|
||||
let hits = 0;
|
||||
|
||||
outer: for (let round = 0; round < maxRounds; round++) {
|
||||
for (const word of unknown) {
|
||||
if (budget <= 0) break outer;
|
||||
|
||||
const pos = trigramPosition(word.length, round);
|
||||
if (pos < 0) continue;
|
||||
|
||||
const tri = word[pos] + word[pos + 1] + word[pos + 2];
|
||||
if (visited.has(tri)) continue;
|
||||
visited.add(tri);
|
||||
budget--;
|
||||
|
||||
const matched = this.trigramIndex.get(tri);
|
||||
if (!matched) continue;
|
||||
hits++;
|
||||
|
||||
if (poolOnly) {
|
||||
for (let j = 0; j < matched.length; j++) {
|
||||
const i = matched[j];
|
||||
if (scores[i] > 0) scores[i]++;
|
||||
}
|
||||
} else {
|
||||
for (let j = 0; j < matched.length; j++) {
|
||||
const i = matched[j];
|
||||
if (items[i].length >= minLen) {
|
||||
if (scores[i] === 0) dirty.push(i);
|
||||
scores[i]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hits;
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param {number[]} indices
|
||||
* @param {number|null} minScore
|
||||
* @param {string[]} qwords
|
||||
* @param {Uint8Array} sep
|
||||
* @param {number} limit
|
||||
*/
|
||||
_rank(indices, minScore, qwords, sep, limit) {
|
||||
const { items, _scores: scores } = this;
|
||||
const buckets = [[], [], []]; // ps=0, ps=1, ps=2
|
||||
|
||||
for (let i = 0; i < indices.length; i++) {
|
||||
const idx = indices[i];
|
||||
if (minScore !== null && scores[idx] < minScore) continue;
|
||||
buckets[prefixScore(items[idx], qwords, sep)].push(idx);
|
||||
}
|
||||
|
||||
const results = [];
|
||||
for (let ps = 2; ps >= 0 && results.length < limit; ps--) {
|
||||
const bucket = buckets[ps];
|
||||
if (!bucket.length) continue;
|
||||
bucket.sort(
|
||||
(a, b) => scores[b] - scores[a] || items[a].length - items[b].length,
|
||||
);
|
||||
const take = Math.min(bucket.length, limit - results.length);
|
||||
for (let i = 0; i < take; i++) results.push(items[bucket[i]]);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
/** @param {string} query */
|
||||
function normalize(query) {
|
||||
let out = "";
|
||||
let start = 0;
|
||||
let end = query.length;
|
||||
while (start < end && query.charCodeAt(start) <= 32) start++;
|
||||
while (end > start && query.charCodeAt(end - 1) <= 32) end--;
|
||||
for (let i = start; i < end; i++) {
|
||||
const c = query.charCodeAt(i);
|
||||
if (c >= 128) continue;
|
||||
out += c >= 65 && c <= 90 ? String.fromCharCode(c + 32) : query[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** @param {string} separators */
|
||||
function sepLookup(separators) {
|
||||
const t = new Uint8Array(128);
|
||||
for (let i = 0; i < separators.length; i++) {
|
||||
const c = separators.charCodeAt(i);
|
||||
if (c < 128) t[c] = 1;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {Uint8Array} sep
|
||||
* @param {number} maxLen
|
||||
*/
|
||||
function splitWords(text, sep, maxLen) {
|
||||
/** @type {string[]} */
|
||||
const words = [];
|
||||
let start = 0;
|
||||
for (let i = 0; i <= text.length; i++) {
|
||||
if (i < text.length && !sep[text.charCodeAt(i)]) continue;
|
||||
if (i > start) {
|
||||
const w = text.slice(start, i);
|
||||
if (w.length <= maxLen && !words.includes(w)) words.push(w);
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
return words;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Map<string, number[]>} index
|
||||
* @param {string} key
|
||||
* @param {number} value
|
||||
*/
|
||||
function addToIndex(index, key, value) {
|
||||
const arr = index.get(key);
|
||||
if (arr) {
|
||||
if (arr[arr.length - 1] !== value) arr.push(value);
|
||||
} else {
|
||||
index.set(key, [value]);
|
||||
}
|
||||
}
|
||||
|
||||
/** @param {number[][]} arrays */
|
||||
function union(arrays) {
|
||||
if (arrays.length <= 1) return arrays[0] || [];
|
||||
const seen = new Set();
|
||||
const result = [];
|
||||
for (const arr of arrays) {
|
||||
for (const idx of arr) {
|
||||
if (!seen.has(idx)) {
|
||||
seen.add(idx);
|
||||
result.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** @param {number[][]} arrays @returns {number[]|null} */
|
||||
function intersect(arrays) {
|
||||
if (arrays.length <= 1) return arrays[0] || null;
|
||||
|
||||
let si = 0;
|
||||
for (let i = 1; i < arrays.length; i++) {
|
||||
if (arrays[i].length < arrays[si].length) si = i;
|
||||
}
|
||||
|
||||
const result = arrays[si].slice();
|
||||
for (let i = 0; i < arrays.length; i++) {
|
||||
if (i === si) continue;
|
||||
let w = 0;
|
||||
for (let j = 0; j < result.length; j++) {
|
||||
if (bsearch(arrays[i], result[j])) result[w++] = result[j];
|
||||
}
|
||||
result.length = w;
|
||||
if (!w) return null;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number[]} arr
|
||||
* @param {number} val
|
||||
*/
|
||||
function bsearch(arr, val) {
|
||||
let lo = 0,
|
||||
hi = arr.length - 1;
|
||||
while (lo <= hi) {
|
||||
const mid = (lo + hi) >> 1;
|
||||
if (arr[mid] === val) return true;
|
||||
if (arr[mid] < val) lo = mid + 1;
|
||||
else hi = mid - 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @param {string} item @param {string[]} qwords @param {Uint8Array} sep */
|
||||
function prefixScore(item, qwords, sep) {
|
||||
let qi = 0,
|
||||
pos = 0;
|
||||
const len = item.length;
|
||||
|
||||
while (qi < qwords.length) {
|
||||
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
|
||||
if (pos >= len) return 0;
|
||||
|
||||
const ws = pos;
|
||||
while (pos < len && !sep[item.charCodeAt(pos)]) pos++;
|
||||
|
||||
const qw = qwords[qi];
|
||||
if (pos - ws < qw.length) return 0;
|
||||
for (let j = 0; j < qw.length; j++) {
|
||||
if (item.charCodeAt(ws + j) !== qw.charCodeAt(j)) return 0;
|
||||
}
|
||||
qi++;
|
||||
}
|
||||
|
||||
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
|
||||
return pos >= len ? 2 : 1;
|
||||
}
|
||||
|
||||
/** @param {number} len @param {number} round */
|
||||
function trigramPosition(len, round) {
|
||||
const max = len - 3;
|
||||
if (max < 0) return -1;
|
||||
if (round === 0) return 0;
|
||||
if (round === 1 && max > 0) return max;
|
||||
if (round === 2 && max > 1) return max >> 1;
|
||||
if (max <= 2) return -1;
|
||||
|
||||
const mid = max >> 1;
|
||||
const off = (round - 2) >> 1;
|
||||
const pos = round & 1 ? Math.max(0, mid - off) : mid + off;
|
||||
return pos === 0 || pos >= max || pos === mid ? -1 : pos;
|
||||
}
|
||||
Reference in New Issue
Block a user