Files
brk/modules/quickmatch-js/0.4.1/src/index.js
2026-03-26 15:57:22 +01:00

419 lines
11 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
const DEFAULT_SEPARATORS = "_- :/";
const DEFAULT_TRIGRAM_BUDGET = 6;
const DEFAULT_LIMIT = 100;
const DEFAULT_MIN_SCORE = 2;
/**
* Search configuration.
*
* Defaults work well for most use cases.
* Tweak `trigramBudget` to trade speed for typo tolerance.
*/
export class QuickMatchConfig {
/** Characters that separate words in items (e.g. "hash_rate" → ["hash", "rate"]).
* @type {string} */
separators = DEFAULT_SEPARATORS;
/** Max results returned per query.
* @type {number} */
limit = DEFAULT_LIMIT;
/** How hard to try matching typos (0 = off, 36 = fast, 915 = thorough, max 20).
* @type {number} */
trigramBudget = DEFAULT_TRIGRAM_BUDGET;
/** Min overlap required for a typo match. Higher = fewer false positives.
* @type {number} */
minScore = DEFAULT_MIN_SCORE;
/** @param {number} n - Max results (default: 100, min: 1) */
withLimit(n) {
this.limit = Math.max(1, n);
return this;
}
/** @param {number} n - Trigram budget (0-20, default: 6) */
withTrigramBudget(n) {
this.trigramBudget = Math.max(0, Math.min(20, n));
return this;
}
/** @param {string} s - Separator characters (default: '_- :/') */
withSeparators(s) {
this.separators = s;
return this;
}
/** @param {number} n - Min trigram score (default: 2, min: 1) */
withMinScore(n) {
this.minScore = Math.max(1, n);
return this;
}
}
/**
* Instant search over a list of strings.
*
* Supports exact words, prefixes ("dom" → "dominance"), joined words
* ("hashrate" → "hash_rate"), and typo tolerance ("suply" → "supply").
* Results are ranked: exact matches first, then by specificity.
*/
export class QuickMatch {
/** @param {string[]} items - Searchable items (lowercase) @param {QuickMatchConfig} [config] */
constructor(items, config = new QuickMatchConfig()) {
this.config = config;
this.items = items;
/** @type {Map<string, number[]>} */
this.wordIndex = new Map();
/** @type {Map<string, number[]>} */
this.trigramIndex = new Map();
this._sepLookup = sepLookup(config.separators);
this._scores = new Uint32Array(items.length);
/** @type {number[]} */
this._dirty = [];
let maxWordLen = 0;
let maxQueryLen = 0;
let maxWords = 0;
const sep = this._sepLookup;
for (let idx = 0; idx < items.length; idx++) {
const item = items[idx];
if (item.length > maxQueryLen) maxQueryLen = item.length;
const words = [];
let start = 0;
for (let i = 0; i <= item.length; i++) {
if (i < item.length && !sep[item.charCodeAt(i)]) continue;
if (i > start) {
const word = item.slice(start, i);
words.push(word);
if (word.length > maxWordLen) maxWordLen = word.length;
for (let len = 1; len <= word.length; len++) {
addToIndex(this.wordIndex, word.slice(0, len), idx);
}
for (let k = 0; k <= word.length - 3; k++) {
addToIndex(this.trigramIndex, word[k] + word[k + 1] + word[k + 2], idx);
}
}
start = i + 1;
}
for (let i = 0; i < words.length - 1; i++) {
const compound = words[i] + words[i + 1];
const from = words[i].length + 1;
for (let len = from; len <= compound.length; len++) {
addToIndex(this.wordIndex, compound.slice(0, len), idx);
}
}
if (words.length > maxWords) maxWords = words.length;
}
this.maxWordLen = maxWordLen + 4;
this.maxQueryLen = maxQueryLen + 6;
this.maxWords = maxWords + 2;
}
/** @param {string} query */
matches(query) {
return this.matchesWith(query, this.config);
}
/**
* @param {string} query
* @param {QuickMatchConfig} config
*/
matchesWith(query, config) {
const { limit, trigramBudget } = config;
const sep =
config.separators === this.config.separators
? this._sepLookup
: sepLookup(config.separators);
const q = normalize(query);
if (!q || q.length > this.maxQueryLen) return [];
const qwords = splitWords(q, sep, this.maxWordLen);
if (!qwords.length || qwords.length > this.maxWords) return [];
const known = [];
const unknown = [];
for (const w of qwords) {
const hits = this.wordIndex.get(w);
if (hits) {
known.push(hits);
} else if (w.length >= 3 && unknown.length < trigramBudget) {
unknown.push(w);
}
}
const pool = intersect(known);
// Try typo matching for unknown words
if (unknown.length && trigramBudget) {
const { _scores: scores, _dirty: dirty } = this;
if (pool) {
for (const i of pool) {
scores[i] = 1;
dirty.push(i);
}
}
const hitCount = this._scoreTrigrams(
unknown,
trigramBudget,
pool !== null,
Math.max(0, q.length - 3),
);
const minScore = Math.max(config.minScore, Math.ceil(hitCount / 2));
const result = this._rank(dirty, minScore, qwords, sep, limit);
for (const i of dirty) scores[i] = 0;
dirty.length = 0;
if (result.length > 0) return result;
}
// Rank known candidates (intersection, or union as fallback)
const candidates = pool || union(known);
return candidates.length > 0
? this._rank(candidates, null, qwords, sep, limit)
: [];
}
/** @private @param {string[]} unknown @param {number} budget @param {boolean} poolOnly @param {number} minLen */
_scoreTrigrams(unknown, budget, poolOnly, minLen) {
const { _scores: scores, _dirty: dirty, items } = this;
const visited = new Set();
const maxRounds = budget;
let hits = 0;
outer: for (let round = 0; round < maxRounds; round++) {
for (const word of unknown) {
if (budget <= 0) break outer;
const pos = trigramPosition(word.length, round);
if (pos < 0) continue;
const tri = word[pos] + word[pos + 1] + word[pos + 2];
if (visited.has(tri)) continue;
visited.add(tri);
budget--;
const matched = this.trigramIndex.get(tri);
if (!matched) continue;
hits++;
if (poolOnly) {
for (let j = 0; j < matched.length; j++) {
const i = matched[j];
if (scores[i] > 0) scores[i]++;
}
} else {
for (let j = 0; j < matched.length; j++) {
const i = matched[j];
if (items[i].length >= minLen) {
if (scores[i] === 0) dirty.push(i);
scores[i]++;
}
}
}
}
}
return hits;
}
/**
* @private
* @param {number[]} indices
* @param {number|null} minScore
* @param {string[]} qwords
* @param {Uint8Array} sep
* @param {number} limit
*/
_rank(indices, minScore, qwords, sep, limit) {
const { items, _scores: scores } = this;
/** @type {[number[], number[], number[]]} */
const buckets = [[], [], []]; // ps=0, ps=1, ps=2
for (let i = 0; i < indices.length; i++) {
const idx = indices[i];
if (minScore !== null && scores[idx] < minScore) continue;
buckets[prefixScore(items[idx], qwords, sep)].push(idx);
}
const results = [];
for (let ps = 2; ps >= 0 && results.length < limit; ps--) {
const bucket = buckets[ps];
if (!bucket.length) continue;
bucket.sort(
(a, b) => scores[b] - scores[a] || items[a].length - items[b].length,
);
const take = Math.min(bucket.length, limit - results.length);
for (let i = 0; i < take; i++) results.push(items[bucket[i]]);
}
return results;
}
}
// --- Helpers ---
/** @param {string} query */
function normalize(query) {
let out = "";
let start = 0;
let end = query.length;
while (start < end && query.charCodeAt(start) <= 32) start++;
while (end > start && query.charCodeAt(end - 1) <= 32) end--;
for (let i = start; i < end; i++) {
const c = query.charCodeAt(i);
if (c >= 128) continue;
out += c >= 65 && c <= 90 ? String.fromCharCode(c + 32) : query[i];
}
return out;
}
/** @param {string} separators */
function sepLookup(separators) {
const t = new Uint8Array(128);
for (let i = 0; i < separators.length; i++) {
const c = separators.charCodeAt(i);
if (c < 128) t[c] = 1;
}
return t;
}
/**
* @param {string} text
* @param {Uint8Array} sep
* @param {number} maxLen
*/
function splitWords(text, sep, maxLen) {
/** @type {string[]} */
const words = [];
let start = 0;
for (let i = 0; i <= text.length; i++) {
if (i < text.length && !sep[text.charCodeAt(i)]) continue;
if (i > start) {
const w = text.slice(start, i);
if (w.length <= maxLen && !words.includes(w)) words.push(w);
}
start = i + 1;
}
return words;
}
/**
* @param {Map<string, number[]>} index
* @param {string} key
* @param {number} value
*/
function addToIndex(index, key, value) {
const arr = index.get(key);
if (arr) {
if (arr[arr.length - 1] !== value) arr.push(value);
} else {
index.set(key, [value]);
}
}
/** @param {number[][]} arrays */
function union(arrays) {
if (arrays.length <= 1) return arrays[0] || [];
const seen = new Set();
const result = [];
for (const arr of arrays) {
for (const idx of arr) {
if (!seen.has(idx)) {
seen.add(idx);
result.push(idx);
}
}
}
return result;
}
/** @param {number[][]} arrays @returns {number[]|null} */
function intersect(arrays) {
if (arrays.length <= 1) return arrays[0] || null;
let si = 0;
for (let i = 1; i < arrays.length; i++) {
if (arrays[i].length < arrays[si].length) si = i;
}
const result = arrays[si].slice();
for (let i = 0; i < arrays.length; i++) {
if (i === si) continue;
let w = 0;
for (let j = 0; j < result.length; j++) {
if (bsearch(arrays[i], result[j])) result[w++] = result[j];
}
result.length = w;
if (!w) return null;
}
return result;
}
/**
* @param {number[]} arr
* @param {number} val
*/
function bsearch(arr, val) {
let lo = 0,
hi = arr.length - 1;
while (lo <= hi) {
const mid = (lo + hi) >> 1;
if (arr[mid] === val) return true;
if (arr[mid] < val) lo = mid + 1;
else hi = mid - 1;
}
return false;
}
/** @param {string} item @param {string[]} qwords @param {Uint8Array} sep */
function prefixScore(item, qwords, sep) {
let qi = 0,
pos = 0;
const len = item.length;
while (qi < qwords.length) {
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
if (pos >= len) return 0;
const ws = pos;
while (pos < len && !sep[item.charCodeAt(pos)]) pos++;
const qw = qwords[qi];
if (pos - ws < qw.length) return 0;
for (let j = 0; j < qw.length; j++) {
if (item.charCodeAt(ws + j) !== qw.charCodeAt(j)) return 0;
}
qi++;
}
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
return pos >= len ? 2 : 1;
}
/** @param {number} len @param {number} round */
function trigramPosition(len, round) {
const max = len - 3;
if (max < 0) return -1;
if (round === 0) return 0;
if (round === 1 && max > 0) return max;
if (round === 2 && max > 1) return max >> 1;
if (max <= 2) return -1;
const mid = max >> 1;
const off = (round - 2) >> 1;
const pos = round & 1 ? Math.max(0, mid - off) : mid + off;
return pos === 0 || pos >= max || pos === mid ? -1 : pos;
}