From 10332fbaa90b9cd4824191b63ffef304da8b7bdc Mon Sep 17 00:00:00 2001 From: "Thomas G. Lopes" <26071571+TGlide@users.noreply.github.com> Date: Wed, 18 Jun 2025 17:29:07 +0100 Subject: [PATCH] better search --- src/lib/backend/convex/conversations.ts | 76 ++++++---- src/lib/backend/convex/user_settings.ts | 2 +- src/lib/utils/fuzzy-search.ts | 188 +++++++++++++++++++++--- src/routes/account/models/+page.svelte | 2 +- src/routes/chat/model-picker.svelte | 2 +- src/routes/chat/search-modal.svelte | 93 ++++++++---- 6 files changed, 288 insertions(+), 75 deletions(-) diff --git a/src/lib/backend/convex/conversations.ts b/src/lib/backend/convex/conversations.ts index b12c08e..e3b30d2 100644 --- a/src/lib/backend/convex/conversations.ts +++ b/src/lib/backend/convex/conversations.ts @@ -1,8 +1,8 @@ import { v } from 'convex/values'; -import { fuzzyMatchString } from '../../utils/fuzzy-search'; +import enhancedSearch, { type SearchResult } from '../../utils/fuzzy-search'; import { getFirstSentence } from '../../utils/strings'; import { api } from './_generated/api'; -import { Doc, type Id } from './_generated/dataModel'; +import { type Doc, type Id } from './_generated/dataModel'; import { query } from './_generated/server'; import { type SessionObj } from './betterAuth'; import { mutation } from './functions'; @@ -284,6 +284,7 @@ export const search = query({ args: { session_token: v.string(), search_term: v.string(), + search_mode: v.optional(v.union(v.literal('exact'), v.literal('words'), v.literal('fuzzy'))), }, handler: async (ctx, args) => { const session = await ctx.runQuery(api.betterAuth.publicGetSession, { @@ -294,44 +295,65 @@ export const search = query({ throw new Error('Unauthorized'); } - type SearchResult = { + type ConversationSearchResult = { conversation: Doc<'conversations'>; messages: Doc<'messages'>[]; + score: number; + titleMatch: boolean; }; - const res: SearchResult[] = []; - if (!args.search_term.trim()) return res; + if (!args.search_term.trim()) return []; - const convQuery = ctx.db + const searchMode = args.search_mode || 'words'; + const results: ConversationSearchResult[] = []; + + // Get all conversations for the user + const conversations = await ctx.db .query('conversations') - .withIndex('by_user', (q) => q.eq('user_id', session.userId)); + .withIndex('by_user', (q) => q.eq('user_id', session.userId)) + .collect(); - for await (const conversation of convQuery) { - const searchResult: SearchResult = { - conversation, - messages: [], - }; - - const msgQuery = ctx.db + // Search through conversations and messages + for (const conversation of conversations) { + // Get messages for this conversation + const conversationMessages = await ctx.db .query('messages') .withIndex('by_conversation', (q) => q.eq('conversation_id', conversation._id)) - .order('asc'); + .collect(); + + // Search title + const titleResults = enhancedSearch({ + needle: args.search_term, + haystack: [conversation], + property: 'title', + mode: searchMode, + minScore: 0.3, + }); - for await (const message of msgQuery) { - if (fuzzyMatchString(args.search_term, message.content)) { - console.log('Found message for search'); - searchResult.messages.push(message); - } - } + // Search messages + const messageResults = enhancedSearch({ + needle: args.search_term, + haystack: conversationMessages, + property: 'content', + mode: searchMode, + minScore: 0.3, + }); - if ( - searchResult.messages.length > 0 || - fuzzyMatchString(args.search_term, conversation.title) - ) { - res.push(searchResult); + // If we have matches in title or messages, add to results + if (titleResults.length > 0 || messageResults.length > 0) { + const titleScore = titleResults.length > 0 ? titleResults[0]?.score ?? 0 : 0; + const messageScore = messageResults.length > 0 ? Math.max(...messageResults.map(r => r.score)) : 0; + + results.push({ + conversation, + messages: messageResults.map(r => r.item), + score: Math.max(titleScore, messageScore), + titleMatch: titleResults.length > 0, + }); } } - return res; + // Sort by score (highest first) + return results.sort((a, b) => b.score - a.score); }, }); diff --git a/src/lib/backend/convex/user_settings.ts b/src/lib/backend/convex/user_settings.ts index ec13829..6179635 100644 --- a/src/lib/backend/convex/user_settings.ts +++ b/src/lib/backend/convex/user_settings.ts @@ -1,6 +1,6 @@ import { internal } from './_generated/api'; import { query } from './_generated/server'; -import { SessionObj } from './betterAuth'; +import { type SessionObj } from './betterAuth'; import { mutation } from './functions'; import { v } from 'convex/values'; diff --git a/src/lib/utils/fuzzy-search.ts b/src/lib/utils/fuzzy-search.ts index 9ebea48..b87f3d6 100644 --- a/src/lib/utils/fuzzy-search.ts +++ b/src/lib/utils/fuzzy-search.ts @@ -1,15 +1,22 @@ +export type SearchMode = 'exact' | 'words' | 'fuzzy'; + +export interface SearchResult { + item: T; + score: number; + matchType: 'exact' | 'word' | 'fuzzy'; +} + /** - * Generic fuzzy search function that searches through arrays and returns matching items - * - * @param options Configuration object for the fuzzy search - * @returns Array of items that match the search criteria + * Enhanced search function with scoring and multiple search modes */ -export default function fuzzysearch(options: { +export default function enhancedSearch(options: { needle: string; haystack: T[]; property: keyof T | ((item: T) => string); -}): T[] { - const { needle, haystack, property } = options; + mode?: SearchMode; + minScore?: number; +}): SearchResult[] { + const { needle, haystack, property, mode = 'words', minScore = 0.3 } = options; if (!Array.isArray(haystack)) { throw new Error('Haystack must be an array'); @@ -19,24 +26,171 @@ export default function fuzzysearch(options: { throw new Error('Property selector is required'); } - // Convert needle to lowercase for case-insensitive matching - const lowerNeedle = needle.toLowerCase(); + const lowerNeedle = needle.toLowerCase().trim(); + if (!lowerNeedle) return []; - // Filter the haystack to find matching items - return haystack.filter((item) => { - // Extract the string value from the item based on the property selector + const results: SearchResult[] = []; + + for (const item of haystack) { const value = typeof property === 'function' ? property(item) : String(item[property]); - - // Convert to lowercase for case-insensitive matching const lowerValue = value.toLowerCase(); - // Perform the fuzzy search - return fuzzyMatchString(lowerNeedle, lowerValue); + const result = scoreMatch(lowerNeedle, lowerValue, mode); + if (result && result.score >= minScore) { + results.push({ + item, + score: result.score, + matchType: result.matchType, + }); + } + } + + // Sort by score (highest first), then by match type priority + return results.sort((a, b) => { + if (a.score !== b.score) return b.score - a.score; + + const typePriority = { exact: 3, word: 2, fuzzy: 1 }; + return typePriority[b.matchType] - typePriority[a.matchType]; }); } /** - * Internal helper function that performs the actual fuzzy string matching + * Legacy fuzzy search function for backward compatibility + */ +export function fuzzysearch(options: { + needle: string; + haystack: T[]; + property: keyof T | ((item: T) => string); +}): T[] { + return enhancedSearch(options).map(result => result.item); +} + +/** + * Score a match between needle and haystack + */ +function scoreMatch(needle: string, haystack: string, mode: SearchMode): { score: number; matchType: 'exact' | 'word' | 'fuzzy' } | null { + // Exact match gets highest score + if (haystack === needle) { + return { score: 1.0, matchType: 'exact' }; + } + + // Check for exact substring match + if (haystack.includes(needle)) { + const score = needle.length / haystack.length; + return { score: Math.max(0.8, score), matchType: 'exact' }; + } + + // Word boundary matching - check if needle matches at word boundaries + if (mode === 'words' || mode === 'fuzzy') { + const wordScore = scoreWordMatch(needle, haystack); + if (wordScore > 0) { + return { score: wordScore, matchType: 'word' }; + } + } + + // Fuzzy matching as fallback + if (mode === 'fuzzy') { + const fuzzyScore = scoreFuzzyMatch(needle, haystack); + if (fuzzyScore > 0) { + return { score: fuzzyScore, matchType: 'fuzzy' }; + } + } + + return null; +} + +/** + * Score word boundary matches + */ +function scoreWordMatch(needle: string, haystack: string): number { + const words = haystack.split(/\s+/); + const needleWords = needle.split(/\s+/); + + // Check for exact word matches + let exactWordMatches = 0; + let partialWordMatches = 0; + + for (const needleWord of needleWords) { + let found = false; + for (const word of words) { + if (word === needleWord) { + exactWordMatches++; + found = true; + break; + } else if (word.startsWith(needleWord)) { + partialWordMatches++; + found = true; + break; + } + } + } + + const totalNeedleWords = needleWords.length; + if (exactWordMatches === totalNeedleWords) { + return 0.9; // High score for all words matching exactly + } + + if (exactWordMatches + partialWordMatches === totalNeedleWords) { + return 0.7 * (exactWordMatches / totalNeedleWords) + 0.3 * (partialWordMatches / totalNeedleWords); + } + + // Check if needle appears at the start of any word + for (const word of words) { + if (word.startsWith(needle)) { + return 0.6; + } + } + + return 0; +} + +/** + * Score fuzzy matches with distance penalty + */ +function scoreFuzzyMatch(needle: string, haystack: string): number { + if (!fuzzyMatchString(needle, haystack)) { + return 0; + } + + // Calculate a score based on how close the characters are + let score = 0; + let lastIndex = -1; + let consecutiveMatches = 0; + + for (let i = 0; i < needle.length; i++) { + const char = needle.charAt(i); + const index = haystack.indexOf(char, lastIndex + 1); + + if (index === -1) { + return 0; // This shouldn't happen if fuzzyMatchString returned true + } + + if (index === lastIndex + 1) { + consecutiveMatches++; + score += 0.1; // Bonus for consecutive matches + } else { + consecutiveMatches = 0; + } + + // Penalty based on distance + const distance = index - lastIndex - 1; + score += Math.max(0, 0.05 - distance * 0.01); + + lastIndex = index; + } + + // Normalize score + score = score / needle.length; + + // Length ratio bonus + const lengthRatio = needle.length / haystack.length; + score *= (0.5 + lengthRatio * 0.5); + + return Math.min(0.5, Math.max(0.1, score)); // Cap fuzzy scores at 0.5 +} + +/** + * Legacy fuzzy match function for backward compatibility */ export function fuzzyMatchString(needle: string, haystack: string): boolean { const hlen = haystack.length; diff --git a/src/routes/account/models/+page.svelte b/src/routes/account/models/+page.svelte index 768e399..6c9a5ef 100644 --- a/src/routes/account/models/+page.svelte +++ b/src/routes/account/models/+page.svelte @@ -11,7 +11,7 @@ import XIcon from '~icons/lucide/x'; import PlusIcon from '~icons/lucide/plus'; import { models } from '$lib/state/models.svelte'; - import fuzzysearch from '$lib/utils/fuzzy-search'; + import { fuzzysearch } from '$lib/utils/fuzzy-search'; const openRouterKeyQuery = useCachedQuery(api.user_keys.get, { provider: Provider.OpenRouter, diff --git a/src/routes/chat/model-picker.svelte b/src/routes/chat/model-picker.svelte index 13e5ef6..58c1a5c 100644 --- a/src/routes/chat/model-picker.svelte +++ b/src/routes/chat/model-picker.svelte @@ -10,7 +10,7 @@ import { session } from '$lib/state/session.svelte'; import { settings } from '$lib/state/settings.svelte'; import { Provider } from '$lib/types'; - import fuzzysearch from '$lib/utils/fuzzy-search'; + import { fuzzysearch } from '$lib/utils/fuzzy-search'; import { supportsImages } from '$lib/utils/model-capabilities'; import { capitalize } from '$lib/utils/strings'; import { cn } from '$lib/utils/utils'; diff --git a/src/routes/chat/search-modal.svelte b/src/routes/chat/search-modal.svelte index 76ac229..57c925d 100644 --- a/src/routes/chat/search-modal.svelte +++ b/src/routes/chat/search-modal.svelte @@ -8,15 +8,16 @@ import { Debounced } from 'runed'; import SearchIcon from '~icons/lucide/search'; - let open = $state(true); - + let open = $state(false); let input = $state(''); + let searchMode = $state<'exact' | 'words' | 'fuzzy'>('words'); let inputEl = $state(); const debouncedInput = new Debounced(() => input, 500); const search = useQuery(api.conversations.search, () => ({ search_term: debouncedInput.current, + search_mode: searchMode, session_token: session.current?.session.token ?? '', })); @@ -38,32 +39,68 @@ -

Search

- - - {#if search.isLoading} -
-
-
- {:else if search.data?.length} -
- {#each search.data as { conversation, messages }} -
+

Search Conversations

+ +
+ + +
+ + +
- {/if} + + {#if search.isLoading} +
+
+
+ {:else if search.data?.length} +
+ {#each search.data as { conversation, messages, score, titleMatch }} +
+
+
+
+ {conversation.title} +
+
+ {Math.round(score * 100)}% +
+
+
+ {messages.length} matching message{messages.length !== 1 ? 's' : ''} + {#if titleMatch} + • Title match + {/if} +
+
+ +
+ {/each} +
+ {:else if debouncedInput.current.trim()} +
+

No results found for "{debouncedInput.current}"

+

Try a different search term or mode

+
+ {:else} +
+

Start typing to search your conversations

+
+ {/if} +