mirror of
https://github.com/bitcoinresearchkit/brk.git
synced 2026-06-21 12:08:24 -07:00
global: snapshot
This commit is contained in:
@@ -7926,8 +7926,8 @@ class BrkClient extends BrkClientBase {
|
||||
},
|
||||
adjusted: {
|
||||
inflationRate: createBpsPercentRatioPattern(this, 'cointime_adj_inflation_rate'),
|
||||
txVelocityNative: createSeriesPattern1(this, 'cointime_adj_tx_velocity'),
|
||||
txVelocityFiat: createSeriesPattern1(this, 'cointime_adj_tx_velocity_fiat'),
|
||||
txVelocityNative: createSeriesPattern1(this, 'cointime_adj_tx_velocity_btc'),
|
||||
txVelocityFiat: createSeriesPattern1(this, 'cointime_adj_tx_velocity_usd'),
|
||||
},
|
||||
reserveRisk: {
|
||||
value: createSeriesPattern1(this, 'reserve_risk'),
|
||||
@@ -8531,8 +8531,8 @@ class BrkClient extends BrkClientBase {
|
||||
burned: createBlockCumulativePattern(this, 'unspendable_supply'),
|
||||
inflationRate: createBpsPercentRatioPattern(this, 'inflation_rate'),
|
||||
velocity: {
|
||||
native: createSeriesPattern1(this, 'velocity'),
|
||||
fiat: createSeriesPattern1(this, 'velocity_fiat'),
|
||||
native: createSeriesPattern1(this, 'velocity_btc'),
|
||||
fiat: createSeriesPattern1(this, 'velocity_usd'),
|
||||
},
|
||||
marketCap: createCentsDeltaUsdPattern(this, 'market_cap'),
|
||||
marketMinusRealizedCapGrowthRate: create_1m1w1y24hPattern(this, 'market_minus_realized_cap_growth_rate'),
|
||||
|
||||
@@ -1,441 +0,0 @@
|
||||
const DEFAULT_SEPARATORS = "_- ,:";
|
||||
const DEFAULT_TRIGRAM_BUDGET = 6;
|
||||
const DEFAULT_LIMIT = 100;
|
||||
const DEFAULT_MIN_SCORE = 2;
|
||||
|
||||
/**
|
||||
* Configuration for QuickMatch.
|
||||
*/
|
||||
export class QuickMatchConfig {
|
||||
/** @type {string} Characters used to split items into words */
|
||||
separators = DEFAULT_SEPARATORS;
|
||||
|
||||
/** @type {number} Maximum number of results to return */
|
||||
limit = DEFAULT_LIMIT;
|
||||
|
||||
/** @type {number} Number of trigram lookups for fuzzy matching (0-20) */
|
||||
trigramBudget = DEFAULT_TRIGRAM_BUDGET;
|
||||
|
||||
/** @type {number} Minimum trigram score required for fuzzy matches */
|
||||
minScore = DEFAULT_MIN_SCORE;
|
||||
|
||||
/**
|
||||
* Set maximum number of results.
|
||||
* @param {number} n
|
||||
*/
|
||||
withLimit(n) {
|
||||
this.limit = Math.max(1, n);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set trigram budget for fuzzy matching.
|
||||
* Higher values find more typos but cost more.
|
||||
* @param {number} n - Budget (0-20, default: 6)
|
||||
*/
|
||||
withTrigramBudget(n) {
|
||||
this.trigramBudget = Math.max(0, Math.min(20, n));
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set word separator characters.
|
||||
* @param {string} s - Separator characters (default: '_- ')
|
||||
*/
|
||||
withSeparators(s) {
|
||||
this.separators = s;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set minimum trigram score for fuzzy matches.
|
||||
* Higher values require more trigram overlap, reducing noise.
|
||||
* @param {number} n - Minimum score (default: 2, min: 1)
|
||||
*/
|
||||
withMinScore(n) {
|
||||
this.minScore = Math.max(1, n);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fast fuzzy string matcher using word and trigram indexing.
|
||||
*/
|
||||
export class QuickMatch {
|
||||
/**
|
||||
* Create a new matcher.
|
||||
* @param {string[]} items - Items to index (should be lowercase)
|
||||
* @param {QuickMatchConfig} [config] - Optional configuration
|
||||
*/
|
||||
constructor(items, config = new QuickMatchConfig()) {
|
||||
this.config = config;
|
||||
this.items = items;
|
||||
/** @type {Map<string, number[]>} */
|
||||
this.wordIndex = new Map();
|
||||
/** @type {Map<string, number[]>} */
|
||||
this.trigramIndex = new Map();
|
||||
|
||||
let maxWordLength = 0;
|
||||
let maxQueryLength = 0;
|
||||
let maxWordCount = 0;
|
||||
|
||||
const { separators } = config;
|
||||
|
||||
for (let itemIndex = 0; itemIndex < items.length; itemIndex++) {
|
||||
const item = items[itemIndex];
|
||||
|
||||
if (item.length > maxQueryLength) {
|
||||
maxQueryLength = item.length;
|
||||
}
|
||||
|
||||
let wordCount = 0;
|
||||
let wordStart = 0;
|
||||
|
||||
for (let i = 0; i <= item.length; i++) {
|
||||
const isEndOfWord = i === item.length || separators.includes(item[i]);
|
||||
|
||||
if (isEndOfWord && i > wordStart) {
|
||||
wordCount++;
|
||||
const word = item.slice(wordStart, i);
|
||||
|
||||
if (word.length > maxWordLength) {
|
||||
maxWordLength = word.length;
|
||||
}
|
||||
|
||||
addToIndex(this.wordIndex, word, itemIndex);
|
||||
addTrigramsToIndex(this.trigramIndex, word, itemIndex);
|
||||
|
||||
wordStart = i + 1;
|
||||
} else if (isEndOfWord) {
|
||||
wordStart = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (wordCount > maxWordCount) {
|
||||
maxWordCount = wordCount;
|
||||
}
|
||||
}
|
||||
|
||||
this.maxWordLength = maxWordLength + 4;
|
||||
this.maxQueryLength = maxQueryLength + 6;
|
||||
this.maxWordCount = maxWordCount + 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find matching items. Returns items sorted by relevance.
|
||||
* @param {string} query - Search query
|
||||
*/
|
||||
matches(query) {
|
||||
return this.matchesWith(query, this.config);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find matching items with custom config. Returns items sorted by relevance.
|
||||
* @param {string} query - Search query
|
||||
* @param {QuickMatchConfig} config - Configuration to use
|
||||
*/
|
||||
matchesWith(query, config) {
|
||||
const { limit, trigramBudget, separators } = config;
|
||||
|
||||
const normalizedQuery = normalizeQuery(query);
|
||||
|
||||
if (!normalizedQuery || normalizedQuery.length > this.maxQueryLength) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const queryWords = parseWords(
|
||||
normalizedQuery,
|
||||
separators,
|
||||
this.maxWordLength,
|
||||
);
|
||||
|
||||
if (!queryWords.length || queryWords.length > this.maxWordCount) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const knownWords = [];
|
||||
const unknownWords = [];
|
||||
|
||||
for (const word of queryWords) {
|
||||
const matchingItems = this.wordIndex.get(word);
|
||||
|
||||
if (matchingItems) {
|
||||
knownWords.push(matchingItems);
|
||||
} else if (word.length >= 3 && unknownWords.length < trigramBudget) {
|
||||
unknownWords.push(word);
|
||||
}
|
||||
}
|
||||
|
||||
const exactMatches = intersectAll(knownWords);
|
||||
const hasExactMatches = exactMatches.length > 0;
|
||||
const needsFuzzyMatching = unknownWords.length > 0 && trigramBudget > 0;
|
||||
|
||||
if (!needsFuzzyMatching) {
|
||||
if (!hasExactMatches) return [];
|
||||
return this.sortedByLength(exactMatches, limit);
|
||||
}
|
||||
|
||||
const scores = new Map();
|
||||
|
||||
if (hasExactMatches) {
|
||||
for (const index of exactMatches) {
|
||||
scores.set(index, 1);
|
||||
}
|
||||
}
|
||||
|
||||
const minItemLength = Math.max(0, normalizedQuery.length - 3);
|
||||
|
||||
const hitCount = this.scoreByTrigrams({
|
||||
unknownWords,
|
||||
budget: trigramBudget,
|
||||
scores,
|
||||
hasExactMatches,
|
||||
minItemLength,
|
||||
});
|
||||
|
||||
const minScoreToInclude = Math.max(config.minScore, Math.ceil(hitCount / 2));
|
||||
|
||||
return this.rankedResults(scores, minScoreToInclude, limit);
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param {{unknownWords: string[], budget: number, scores: Map<number, number>, hasExactMatches: boolean, minItemLength: number}} args
|
||||
*/
|
||||
scoreByTrigrams({
|
||||
unknownWords,
|
||||
budget,
|
||||
scores,
|
||||
hasExactMatches,
|
||||
minItemLength,
|
||||
}) {
|
||||
const visitedTrigrams = new Set();
|
||||
let budgetRemaining = budget;
|
||||
let hitCount = 0;
|
||||
|
||||
outer: for (let round = 0; round < budget; round++) {
|
||||
for (const word of unknownWords) {
|
||||
if (budgetRemaining <= 0) break outer;
|
||||
|
||||
const position = pickTrigramPosition(word.length, round);
|
||||
if (position < 0) continue;
|
||||
|
||||
const trigram =
|
||||
word[position] + word[position + 1] + word[position + 2];
|
||||
|
||||
if (visitedTrigrams.has(trigram)) continue;
|
||||
visitedTrigrams.add(trigram);
|
||||
|
||||
budgetRemaining--;
|
||||
|
||||
const matchingItems = this.trigramIndex.get(trigram);
|
||||
if (!matchingItems) continue;
|
||||
|
||||
hitCount++;
|
||||
|
||||
for (const itemIndex of matchingItems) {
|
||||
if (hasExactMatches) {
|
||||
const currentScore = scores.get(itemIndex);
|
||||
if (currentScore !== undefined) {
|
||||
scores.set(itemIndex, currentScore + 1);
|
||||
}
|
||||
} else if (this.items[itemIndex].length >= minItemLength) {
|
||||
scores.set(itemIndex, (scores.get(itemIndex) || 0) + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hitCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param {number[]} indices
|
||||
* @param {number} limit
|
||||
*/
|
||||
sortedByLength(indices, limit) {
|
||||
const { items } = this;
|
||||
indices.sort((a, b) => items[a].length - items[b].length);
|
||||
if (indices.length > limit) indices.length = limit;
|
||||
return indices.map((i) => items[i]);
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param {Map<number, number>} scores
|
||||
* @param {number} minScore
|
||||
* @param {number} limit
|
||||
*/
|
||||
rankedResults(scores, minScore, limit) {
|
||||
const { items } = this;
|
||||
const results = [];
|
||||
|
||||
for (const [index, score] of scores) {
|
||||
if (score >= minScore) {
|
||||
results.push({ index, score });
|
||||
}
|
||||
}
|
||||
|
||||
results.sort((a, b) => {
|
||||
if (b.score !== a.score) return b.score - a.score;
|
||||
return items[a.index].length - items[b.index].length;
|
||||
});
|
||||
|
||||
if (results.length > limit) results.length = limit;
|
||||
|
||||
return results.map((r) => items[r.index]);
|
||||
}
|
||||
}
|
||||
|
||||
/** @param {string} query */
|
||||
function normalizeQuery(query) {
|
||||
let result = "";
|
||||
let start = 0;
|
||||
let end = query.length;
|
||||
|
||||
while (start < end && query.charCodeAt(start) <= 32) start++;
|
||||
while (end > start && query.charCodeAt(end - 1) <= 32) end--;
|
||||
|
||||
for (let i = start; i < end; i++) {
|
||||
const code = query.charCodeAt(i);
|
||||
if (code >= 128) continue;
|
||||
result +=
|
||||
code >= 65 && code <= 90 ? String.fromCharCode(code + 32) : query[i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {string} separators
|
||||
* @param {number} maxLength
|
||||
*/
|
||||
function parseWords(text, separators, maxLength) {
|
||||
/** @type {string[]} */
|
||||
const words = [];
|
||||
let start = 0;
|
||||
|
||||
for (let i = 0; i <= text.length; i++) {
|
||||
const isEnd = i === text.length || separators.includes(text[i]);
|
||||
|
||||
if (isEnd && i > start) {
|
||||
const word = text.slice(start, i);
|
||||
if (word.length <= maxLength && !words.includes(word)) {
|
||||
words.push(word);
|
||||
}
|
||||
start = i + 1;
|
||||
} else if (isEnd) {
|
||||
start = i + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return words;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Map<string, number[]>} index
|
||||
* @param {string} key
|
||||
* @param {number} value
|
||||
*/
|
||||
function addToIndex(index, key, value) {
|
||||
const existing = index.get(key);
|
||||
if (existing) {
|
||||
existing.push(value);
|
||||
} else {
|
||||
index.set(key, [value]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Map<string, number[]>} index
|
||||
* @param {string} word
|
||||
* @param {number} itemIndex
|
||||
*/
|
||||
function addTrigramsToIndex(index, word, itemIndex) {
|
||||
if (word.length < 3) return;
|
||||
|
||||
for (let i = 0; i <= word.length - 3; i++) {
|
||||
const trigram = word[i] + word[i + 1] + word[i + 2];
|
||||
const existing = index.get(trigram);
|
||||
|
||||
if (!existing) {
|
||||
index.set(trigram, [itemIndex]);
|
||||
} else if (existing[existing.length - 1] !== itemIndex) {
|
||||
existing.push(itemIndex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @param {number[][]} arrays */
|
||||
function intersectAll(arrays) {
|
||||
if (!arrays.length) return [];
|
||||
|
||||
let smallestIndex = 0;
|
||||
for (let i = 1; i < arrays.length; i++) {
|
||||
if (arrays[i].length < arrays[smallestIndex].length) {
|
||||
smallestIndex = i;
|
||||
}
|
||||
}
|
||||
|
||||
const result = arrays[smallestIndex].slice();
|
||||
|
||||
for (let i = 0; i < arrays.length && result.length > 0; i++) {
|
||||
if (i === smallestIndex) continue;
|
||||
|
||||
let writeIndex = 0;
|
||||
for (let j = 0; j < result.length; j++) {
|
||||
if (binarySearch(arrays[i], result[j])) {
|
||||
result[writeIndex++] = result[j];
|
||||
}
|
||||
}
|
||||
result.length = writeIndex;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number[]} sortedArray
|
||||
* @param {number} value
|
||||
*/
|
||||
function binarySearch(sortedArray, value) {
|
||||
let low = 0;
|
||||
let high = sortedArray.length - 1;
|
||||
|
||||
while (low <= high) {
|
||||
const mid = (low + high) >> 1;
|
||||
const midValue = sortedArray[mid];
|
||||
|
||||
if (midValue === value) return true;
|
||||
if (midValue < value) low = mid + 1;
|
||||
else high = mid - 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} wordLength
|
||||
* @param {number} round
|
||||
*/
|
||||
function pickTrigramPosition(wordLength, round) {
|
||||
const maxPosition = wordLength - 3;
|
||||
if (maxPosition < 0) return -1;
|
||||
|
||||
if (round === 0) return 0;
|
||||
if (round === 1 && maxPosition > 0) return maxPosition;
|
||||
if (round === 2 && maxPosition > 1) return maxPosition >> 1;
|
||||
if (maxPosition <= 2) return -1;
|
||||
|
||||
const middle = maxPosition >> 1;
|
||||
const offset = (round - 2) >> 1;
|
||||
const position = round & 1 ? Math.max(0, middle - offset) : middle + offset;
|
||||
|
||||
if (position === 0 || position >= maxPosition || position === middle) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
return position;
|
||||
}
|
||||
@@ -0,0 +1,417 @@
|
||||
const DEFAULT_SEPARATORS = "_- :/";
|
||||
const DEFAULT_TRIGRAM_BUDGET = 6;
|
||||
const DEFAULT_LIMIT = 100;
|
||||
const DEFAULT_MIN_SCORE = 2;
|
||||
|
||||
/**
|
||||
* Search configuration.
|
||||
*
|
||||
* Defaults work well for most use cases.
|
||||
* Tweak `trigramBudget` to trade speed for typo tolerance.
|
||||
*/
|
||||
export class QuickMatchConfig {
|
||||
/** Characters that separate words in items (e.g. "hash_rate" → ["hash", "rate"]).
|
||||
* @type {string} */
|
||||
separators = DEFAULT_SEPARATORS;
|
||||
|
||||
/** Max results returned per query.
|
||||
* @type {number} */
|
||||
limit = DEFAULT_LIMIT;
|
||||
|
||||
/** How hard to try matching typos (0 = off, 3–6 = fast, 9–15 = thorough, max 20).
|
||||
* @type {number} */
|
||||
trigramBudget = DEFAULT_TRIGRAM_BUDGET;
|
||||
|
||||
/** Min overlap required for a typo match. Higher = fewer false positives.
|
||||
* @type {number} */
|
||||
minScore = DEFAULT_MIN_SCORE;
|
||||
|
||||
/** @param {number} n - Max results (default: 100, min: 1) */
|
||||
withLimit(n) {
|
||||
this.limit = Math.max(1, n);
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @param {number} n - Trigram budget (0-20, default: 6) */
|
||||
withTrigramBudget(n) {
|
||||
this.trigramBudget = Math.max(0, Math.min(20, n));
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @param {string} s - Separator characters (default: '_- :/') */
|
||||
withSeparators(s) {
|
||||
this.separators = s;
|
||||
return this;
|
||||
}
|
||||
|
||||
/** @param {number} n - Min trigram score (default: 2, min: 1) */
|
||||
withMinScore(n) {
|
||||
this.minScore = Math.max(1, n);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Instant search over a list of strings.
|
||||
*
|
||||
* Supports exact words, prefixes ("dom" → "dominance"), joined words
|
||||
* ("hashrate" → "hash_rate"), and typo tolerance ("suply" → "supply").
|
||||
* Results are ranked: exact matches first, then by specificity.
|
||||
*/
|
||||
export class QuickMatch {
|
||||
/** @param {string[]} items - Searchable items (lowercase) @param {QuickMatchConfig} [config] */
|
||||
constructor(items, config = new QuickMatchConfig()) {
|
||||
this.config = config;
|
||||
this.items = items;
|
||||
/** @type {Map<string, number[]>} */
|
||||
this.wordIndex = new Map();
|
||||
/** @type {Map<string, number[]>} */
|
||||
this.trigramIndex = new Map();
|
||||
this._sepLookup = sepLookup(config.separators);
|
||||
this._scores = new Uint32Array(items.length);
|
||||
/** @type {number[]} */
|
||||
this._dirty = [];
|
||||
|
||||
let maxWordLen = 0;
|
||||
let maxQueryLen = 0;
|
||||
let maxWords = 0;
|
||||
const sep = this._sepLookup;
|
||||
|
||||
for (let idx = 0; idx < items.length; idx++) {
|
||||
const item = items[idx];
|
||||
if (item.length > maxQueryLen) maxQueryLen = item.length;
|
||||
|
||||
const words = [];
|
||||
let start = 0;
|
||||
|
||||
for (let i = 0; i <= item.length; i++) {
|
||||
if (i < item.length && !sep[item.charCodeAt(i)]) continue;
|
||||
if (i > start) {
|
||||
const word = item.slice(start, i);
|
||||
words.push(word);
|
||||
if (word.length > maxWordLen) maxWordLen = word.length;
|
||||
for (let len = 1; len <= word.length; len++) {
|
||||
addToIndex(this.wordIndex, word.slice(0, len), idx);
|
||||
}
|
||||
for (let k = 0; k <= word.length - 3; k++) {
|
||||
addToIndex(this.trigramIndex, word[k] + word[k + 1] + word[k + 2], idx);
|
||||
}
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
|
||||
for (let i = 0; i < words.length - 1; i++) {
|
||||
const compound = words[i] + words[i + 1];
|
||||
const from = words[i].length + 1;
|
||||
for (let len = from; len <= compound.length; len++) {
|
||||
addToIndex(this.wordIndex, compound.slice(0, len), idx);
|
||||
}
|
||||
}
|
||||
|
||||
if (words.length > maxWords) maxWords = words.length;
|
||||
}
|
||||
|
||||
this.maxWordLen = maxWordLen + 4;
|
||||
this.maxQueryLen = maxQueryLen + 6;
|
||||
this.maxWords = maxWords + 2;
|
||||
}
|
||||
|
||||
/** @param {string} query */
|
||||
matches(query) {
|
||||
return this.matchesWith(query, this.config);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} query
|
||||
* @param {QuickMatchConfig} config
|
||||
*/
|
||||
matchesWith(query, config) {
|
||||
const { limit, trigramBudget } = config;
|
||||
const sep =
|
||||
config.separators === this.config.separators
|
||||
? this._sepLookup
|
||||
: sepLookup(config.separators);
|
||||
|
||||
const q = normalize(query);
|
||||
if (!q || q.length > this.maxQueryLen) return [];
|
||||
|
||||
const qwords = splitWords(q, sep, this.maxWordLen);
|
||||
if (!qwords.length || qwords.length > this.maxWords) return [];
|
||||
|
||||
const known = [];
|
||||
const unknown = [];
|
||||
|
||||
for (const w of qwords) {
|
||||
const hits = this.wordIndex.get(w);
|
||||
if (hits) {
|
||||
known.push(hits);
|
||||
} else if (w.length >= 3 && unknown.length < trigramBudget) {
|
||||
unknown.push(w);
|
||||
}
|
||||
}
|
||||
|
||||
const pool = intersect(known);
|
||||
|
||||
// Try typo matching for unknown words
|
||||
if (unknown.length && trigramBudget) {
|
||||
const { _scores: scores, _dirty: dirty } = this;
|
||||
|
||||
if (pool) {
|
||||
for (const i of pool) {
|
||||
scores[i] = 1;
|
||||
dirty.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
const hitCount = this._scoreTrigrams(
|
||||
unknown,
|
||||
trigramBudget,
|
||||
pool !== null,
|
||||
Math.max(0, q.length - 3),
|
||||
);
|
||||
const minScore = Math.max(config.minScore, Math.ceil(hitCount / 2));
|
||||
const result = this._rank(dirty, minScore, qwords, sep, limit);
|
||||
|
||||
for (const i of dirty) scores[i] = 0;
|
||||
dirty.length = 0;
|
||||
|
||||
if (result.length > 0) return result;
|
||||
}
|
||||
|
||||
// Rank known candidates (intersection, or union as fallback)
|
||||
const candidates = pool || union(known);
|
||||
return candidates.length > 0
|
||||
? this._rank(candidates, null, qwords, sep, limit)
|
||||
: [];
|
||||
}
|
||||
|
||||
/** @private @param {string[]} unknown @param {number} budget @param {boolean} poolOnly @param {number} minLen */
|
||||
_scoreTrigrams(unknown, budget, poolOnly, minLen) {
|
||||
const { _scores: scores, _dirty: dirty, items } = this;
|
||||
const visited = new Set();
|
||||
const maxRounds = budget;
|
||||
let hits = 0;
|
||||
|
||||
outer: for (let round = 0; round < maxRounds; round++) {
|
||||
for (const word of unknown) {
|
||||
if (budget <= 0) break outer;
|
||||
|
||||
const pos = trigramPosition(word.length, round);
|
||||
if (pos < 0) continue;
|
||||
|
||||
const tri = word[pos] + word[pos + 1] + word[pos + 2];
|
||||
if (visited.has(tri)) continue;
|
||||
visited.add(tri);
|
||||
budget--;
|
||||
|
||||
const matched = this.trigramIndex.get(tri);
|
||||
if (!matched) continue;
|
||||
hits++;
|
||||
|
||||
if (poolOnly) {
|
||||
for (let j = 0; j < matched.length; j++) {
|
||||
const i = matched[j];
|
||||
if (scores[i] > 0) scores[i]++;
|
||||
}
|
||||
} else {
|
||||
for (let j = 0; j < matched.length; j++) {
|
||||
const i = matched[j];
|
||||
if (items[i].length >= minLen) {
|
||||
if (scores[i] === 0) dirty.push(i);
|
||||
scores[i]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return hits;
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param {number[]} indices
|
||||
* @param {number|null} minScore
|
||||
* @param {string[]} qwords
|
||||
* @param {Uint8Array} sep
|
||||
* @param {number} limit
|
||||
*/
|
||||
_rank(indices, minScore, qwords, sep, limit) {
|
||||
const { items, _scores: scores } = this;
|
||||
const buckets = [[], [], []]; // ps=0, ps=1, ps=2
|
||||
|
||||
for (let i = 0; i < indices.length; i++) {
|
||||
const idx = indices[i];
|
||||
if (minScore !== null && scores[idx] < minScore) continue;
|
||||
buckets[prefixScore(items[idx], qwords, sep)].push(idx);
|
||||
}
|
||||
|
||||
const results = [];
|
||||
for (let ps = 2; ps >= 0 && results.length < limit; ps--) {
|
||||
const bucket = buckets[ps];
|
||||
if (!bucket.length) continue;
|
||||
bucket.sort(
|
||||
(a, b) => scores[b] - scores[a] || items[a].length - items[b].length,
|
||||
);
|
||||
const take = Math.min(bucket.length, limit - results.length);
|
||||
for (let i = 0; i < take; i++) results.push(items[bucket[i]]);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
// --- Helpers ---
|
||||
|
||||
/** @param {string} query */
|
||||
function normalize(query) {
|
||||
let out = "";
|
||||
let start = 0;
|
||||
let end = query.length;
|
||||
while (start < end && query.charCodeAt(start) <= 32) start++;
|
||||
while (end > start && query.charCodeAt(end - 1) <= 32) end--;
|
||||
for (let i = start; i < end; i++) {
|
||||
const c = query.charCodeAt(i);
|
||||
if (c >= 128) continue;
|
||||
out += c >= 65 && c <= 90 ? String.fromCharCode(c + 32) : query[i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
/** @param {string} separators */
|
||||
function sepLookup(separators) {
|
||||
const t = new Uint8Array(128);
|
||||
for (let i = 0; i < separators.length; i++) {
|
||||
const c = separators.charCodeAt(i);
|
||||
if (c < 128) t[c] = 1;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @param {Uint8Array} sep
|
||||
* @param {number} maxLen
|
||||
*/
|
||||
function splitWords(text, sep, maxLen) {
|
||||
/** @type {string[]} */
|
||||
const words = [];
|
||||
let start = 0;
|
||||
for (let i = 0; i <= text.length; i++) {
|
||||
if (i < text.length && !sep[text.charCodeAt(i)]) continue;
|
||||
if (i > start) {
|
||||
const w = text.slice(start, i);
|
||||
if (w.length <= maxLen && !words.includes(w)) words.push(w);
|
||||
}
|
||||
start = i + 1;
|
||||
}
|
||||
return words;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Map<string, number[]>} index
|
||||
* @param {string} key
|
||||
* @param {number} value
|
||||
*/
|
||||
function addToIndex(index, key, value) {
|
||||
const arr = index.get(key);
|
||||
if (arr) {
|
||||
if (arr[arr.length - 1] !== value) arr.push(value);
|
||||
} else {
|
||||
index.set(key, [value]);
|
||||
}
|
||||
}
|
||||
|
||||
/** @param {number[][]} arrays */
|
||||
function union(arrays) {
|
||||
if (arrays.length <= 1) return arrays[0] || [];
|
||||
const seen = new Set();
|
||||
const result = [];
|
||||
for (const arr of arrays) {
|
||||
for (const idx of arr) {
|
||||
if (!seen.has(idx)) {
|
||||
seen.add(idx);
|
||||
result.push(idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** @param {number[][]} arrays @returns {number[]|null} */
|
||||
function intersect(arrays) {
|
||||
if (arrays.length <= 1) return arrays[0] || null;
|
||||
|
||||
let si = 0;
|
||||
for (let i = 1; i < arrays.length; i++) {
|
||||
if (arrays[i].length < arrays[si].length) si = i;
|
||||
}
|
||||
|
||||
const result = arrays[si].slice();
|
||||
for (let i = 0; i < arrays.length; i++) {
|
||||
if (i === si) continue;
|
||||
let w = 0;
|
||||
for (let j = 0; j < result.length; j++) {
|
||||
if (bsearch(arrays[i], result[j])) result[w++] = result[j];
|
||||
}
|
||||
result.length = w;
|
||||
if (!w) return null;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number[]} arr
|
||||
* @param {number} val
|
||||
*/
|
||||
function bsearch(arr, val) {
|
||||
let lo = 0,
|
||||
hi = arr.length - 1;
|
||||
while (lo <= hi) {
|
||||
const mid = (lo + hi) >> 1;
|
||||
if (arr[mid] === val) return true;
|
||||
if (arr[mid] < val) lo = mid + 1;
|
||||
else hi = mid - 1;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @param {string} item @param {string[]} qwords @param {Uint8Array} sep */
|
||||
function prefixScore(item, qwords, sep) {
|
||||
let qi = 0,
|
||||
pos = 0;
|
||||
const len = item.length;
|
||||
|
||||
while (qi < qwords.length) {
|
||||
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
|
||||
if (pos >= len) return 0;
|
||||
|
||||
const ws = pos;
|
||||
while (pos < len && !sep[item.charCodeAt(pos)]) pos++;
|
||||
|
||||
const qw = qwords[qi];
|
||||
if (pos - ws < qw.length) return 0;
|
||||
for (let j = 0; j < qw.length; j++) {
|
||||
if (item.charCodeAt(ws + j) !== qw.charCodeAt(j)) return 0;
|
||||
}
|
||||
qi++;
|
||||
}
|
||||
|
||||
while (pos < len && sep[item.charCodeAt(pos)]) pos++;
|
||||
return pos >= len ? 2 : 1;
|
||||
}
|
||||
|
||||
/** @param {number} len @param {number} round */
|
||||
function trigramPosition(len, round) {
|
||||
const max = len - 3;
|
||||
if (max < 0) return -1;
|
||||
if (round === 0) return 0;
|
||||
if (round === 1 && max > 0) return max;
|
||||
if (round === 2 && max > 1) return max >> 1;
|
||||
if (max <= 2) return -1;
|
||||
|
||||
const mid = max >> 1;
|
||||
const off = (round - 2) >> 1;
|
||||
const pos = round & 1 ? Math.max(0, mid - off) : mid + off;
|
||||
return pos === 0 || pos >= max || pos === mid ? -1 : pos;
|
||||
}
|
||||
Reference in New Issue
Block a user