/** * Advanced search query parser — inspired by ants (dergigi/ants). * * Supported modifiers: * by: — filter by author * mentions: — notes that tag a specific pubkey * kind: — filter by event kind * is: — shorthand for kind (article, note, highlight, etc.) * has: — notes containing specific media (image, video, link) * since: — events after date (YYYY-MM-DD) * until: — events before date (YYYY-MM-DD) * #hashtag — hashtag search * "quoted phrase" — exact phrase (passed to NIP-50 search) * * Boolean: * OR between terms — runs multiple queries (client-side merge) * * Everything else is passed as NIP-50 full-text search. */ import { nip19 } from "@nostr-dev-kit/ndk"; export interface ParsedSearch { /** Text terms for NIP-50 search field */ searchTerms: string[]; /** Hashtags to filter by (#t tag) */ hashtags: string[]; /** Author pubkeys (hex) to filter by */ authors: string[]; /** Pubkeys mentioned in events (#p tag) */ mentions: string[]; /** Event kinds to search */ kinds: number[]; /** Media content filters (applied client-side) */ hasFilters: string[]; /** Unix timestamp — events after this */ since: number | null; /** Unix timestamp — events before this */ until: number | null; /** Original raw query for display */ raw: string; /** Whether this is an OR query (multiple sub-queries) */ orQueries: ParsedSearch[] | null; /** Unresolved NIP-05 identifiers that need async resolution */ unresolvedNip05: string[]; } const KIND_ALIASES: Record = { note: 1, text: 1, article: 30023, "long-form": 30023, longform: 30023, reaction: 7, repost: 6, dm: 4, highlight: 9802, bookmark: 10003, profile: 0, metadata: 0, contacts: 3, relay: 10002, zap: 9735, }; const IS_ALIASES: Record = { ...KIND_ALIASES, code: 1, // client-side filter for code blocks }; const MEDIA_PATTERNS: Record = { image: /\.(jpg|jpeg|png|gif|webp|avif|svg|apng)/i, video: /\.(mp4|webm|mov|m4v|ogg)/i, audio: /\.(mp3|wav|flac|m4a|ogg)/i, link: /https?:\/\//i, youtube: /youtu(\.be|be\.com)/i, }; /** * Parse a date string (YYYY-MM-DD) to unix timestamp. * Returns null on invalid date. */ function parseDateToUnix(dateStr: string): number | null { const match = dateStr.match(/^(\d{4})-(\d{2})-(\d{2})$/); if (!match) return null; const d = new Date(`${match[1]}-${match[2]}-${match[3]}T00:00:00Z`); if (isNaN(d.getTime())) return null; return Math.floor(d.getTime() / 1000); } /** * Try to resolve an npub to hex pubkey. * Returns hex pubkey or null. */ function resolveNpub(input: string): string | null { if (input.startsWith("npub1")) { try { const decoded = nip19.decode(input); if (decoded.type === "npub") return decoded.data; } catch { /* not a valid npub */ } } return null; } /** * Tokenize a query string, respecting quoted phrases. */ function tokenize(query: string): string[] { const tokens: string[] = []; let current = ""; let inQuote = false; for (let i = 0; i < query.length; i++) { const char = query[i]; if (char === '"') { if (inQuote) { tokens.push(`"${current}"`); current = ""; inQuote = false; } else { if (current.trim()) tokens.push(current.trim()); current = ""; inQuote = true; } } else if (char === " " && !inQuote) { if (current.trim()) tokens.push(current.trim()); current = ""; } else { current += char; } } if (current.trim()) tokens.push(current.trim()); return tokens; } /** * Parse a search query into structured search parameters. */ export function parseSearchQuery(raw: string): ParsedSearch { const trimmed = raw.trim(); // Handle OR queries — split on top-level OR if (/\bOR\b/i.test(trimmed)) { // Simple OR split (doesn't handle OR inside quotes, good enough) const parts = trimmed.split(/\s+OR\s+/i).map((p) => p.trim()).filter(Boolean); if (parts.length > 1) { return { searchTerms: [], hashtags: [], authors: [], mentions: [], kinds: [], hasFilters: [], since: null, until: null, raw: trimmed, orQueries: parts.map(parseSearchQuery), unresolvedNip05: [], }; } } const tokens = tokenize(trimmed); const result: ParsedSearch = { searchTerms: [], hashtags: [], authors: [], mentions: [], kinds: [], hasFilters: [], since: null, until: null, raw: trimmed, orQueries: null, unresolvedNip05: [], }; for (const token of tokens) { const lower = token.toLowerCase(); // by: if (lower.startsWith("by:")) { const value = token.slice(3); const hex = resolveNpub(value); if (hex) { result.authors.push(hex); } else if (value.includes(".") || value.includes("@")) { // Looks like a NIP-05 — needs async resolution result.unresolvedNip05.push(value); } else { // Treat as a search term for now (name-based lookup needs profile search) result.unresolvedNip05.push(value); } continue; } // mentions: if (lower.startsWith("mentions:")) { const value = token.slice(9); const hex = resolveNpub(value); if (hex) { result.mentions.push(hex); } continue; } // kind: if (lower.startsWith("kind:")) { const value = token.slice(5).toLowerCase(); const num = parseInt(value); if (!isNaN(num)) { result.kinds.push(num); } else if (KIND_ALIASES[value] !== undefined) { result.kinds.push(KIND_ALIASES[value]); } continue; } // is: if (lower.startsWith("is:")) { const value = token.slice(3).toLowerCase(); if (IS_ALIASES[value] !== undefined) { result.kinds.push(IS_ALIASES[value]); } if (value === "code") { result.hasFilters.push("code"); } continue; } // has: if (lower.startsWith("has:")) { const value = token.slice(4).toLowerCase(); result.hasFilters.push(value); continue; } // since: if (lower.startsWith("since:")) { const ts = parseDateToUnix(token.slice(6)); if (ts) result.since = ts; continue; } // until: if (lower.startsWith("until:")) { const ts = parseDateToUnix(token.slice(6)); if (ts) result.until = ts; continue; } // #hashtag if (token.startsWith("#") && token.length > 1) { result.hashtags.push(token.slice(1).toLowerCase()); continue; } // Quoted phrase — keep quotes for NIP-50 if (token.startsWith('"') && token.endsWith('"')) { result.searchTerms.push(token); continue; } // Regular search term result.searchTerms.push(token); } return result; } /** * Check if an event's content matches a "has:" filter. */ export function matchesHasFilter(content: string, filter: string): boolean { if (filter === "code") { return content.includes("```") || content.includes("`"); } const pattern = MEDIA_PATTERNS[filter]; if (pattern) return pattern.test(content); // Generic: just check if the filter text appears in content return content.toLowerCase().includes(filter); } /** * Format a ParsedSearch back into a human-readable hint. */ export function describeSearch(parsed: ParsedSearch): string { const parts: string[] = []; if (parsed.searchTerms.length > 0) parts.push(parsed.searchTerms.join(" ")); if (parsed.hashtags.length > 0) parts.push(parsed.hashtags.map((h) => `#${h}`).join(" ")); if (parsed.authors.length > 0) parts.push(`by ${parsed.authors.length} author(s)`); if (parsed.kinds.length > 0) parts.push(`kind: ${parsed.kinds.join(", ")}`); if (parsed.hasFilters.length > 0) parts.push(`has: ${parsed.hasFilters.join(", ")}`); if (parsed.since) parts.push(`since ${new Date(parsed.since * 1000).toLocaleDateString()}`); if (parsed.until) parts.push(`until ${new Date(parsed.until * 1000).toLocaleDateString()}`); return parts.join(" · ") || "empty search"; }