better search

This commit is contained in:
Thomas G. Lopes 2025-06-18 17:29:07 +01:00
parent c4151f16a0
commit 10332fbaa9
6 changed files with 288 additions and 75 deletions

View file

@ -1,8 +1,8 @@
import { v } from 'convex/values';
import { fuzzyMatchString } from '../../utils/fuzzy-search';
import enhancedSearch, { type SearchResult } from '../../utils/fuzzy-search';
import { getFirstSentence } from '../../utils/strings';
import { api } from './_generated/api';
import { Doc, type Id } from './_generated/dataModel';
import { type Doc, type Id } from './_generated/dataModel';
import { query } from './_generated/server';
import { type SessionObj } from './betterAuth';
import { mutation } from './functions';
@ -284,6 +284,7 @@ export const search = query({
args: {
session_token: v.string(),
search_term: v.string(),
search_mode: v.optional(v.union(v.literal('exact'), v.literal('words'), v.literal('fuzzy'))),
},
handler: async (ctx, args) => {
const session = await ctx.runQuery(api.betterAuth.publicGetSession, {
@ -294,44 +295,65 @@ export const search = query({
throw new Error('Unauthorized');
}
type SearchResult = {
type ConversationSearchResult = {
conversation: Doc<'conversations'>;
messages: Doc<'messages'>[];
score: number;
titleMatch: boolean;
};
const res: SearchResult[] = [];
if (!args.search_term.trim()) return res;
if (!args.search_term.trim()) return [];
const convQuery = ctx.db
const searchMode = args.search_mode || 'words';
const results: ConversationSearchResult[] = [];
// Get all conversations for the user
const conversations = await ctx.db
.query('conversations')
.withIndex('by_user', (q) => q.eq('user_id', session.userId));
.withIndex('by_user', (q) => q.eq('user_id', session.userId))
.collect();
for await (const conversation of convQuery) {
const searchResult: SearchResult = {
conversation,
messages: [],
};
const msgQuery = ctx.db
// Search through conversations and messages
for (const conversation of conversations) {
// Get messages for this conversation
const conversationMessages = await ctx.db
.query('messages')
.withIndex('by_conversation', (q) => q.eq('conversation_id', conversation._id))
.order('asc');
.collect();
// Search title
const titleResults = enhancedSearch({
needle: args.search_term,
haystack: [conversation],
property: 'title',
mode: searchMode,
minScore: 0.3,
});
for await (const message of msgQuery) {
if (fuzzyMatchString(args.search_term, message.content)) {
console.log('Found message for search');
searchResult.messages.push(message);
}
}
// Search messages
const messageResults = enhancedSearch({
needle: args.search_term,
haystack: conversationMessages,
property: 'content',
mode: searchMode,
minScore: 0.3,
});
if (
searchResult.messages.length > 0 ||
fuzzyMatchString(args.search_term, conversation.title)
) {
res.push(searchResult);
// If we have matches in title or messages, add to results
if (titleResults.length > 0 || messageResults.length > 0) {
const titleScore = titleResults.length > 0 ? titleResults[0]?.score ?? 0 : 0;
const messageScore = messageResults.length > 0 ? Math.max(...messageResults.map(r => r.score)) : 0;
results.push({
conversation,
messages: messageResults.map(r => r.item),
score: Math.max(titleScore, messageScore),
titleMatch: titleResults.length > 0,
});
}
}
return res;
// Sort by score (highest first)
return results.sort((a, b) => b.score - a.score);
},
});

View file

@ -1,6 +1,6 @@
import { internal } from './_generated/api';
import { query } from './_generated/server';
import { SessionObj } from './betterAuth';
import { type SessionObj } from './betterAuth';
import { mutation } from './functions';
import { v } from 'convex/values';

View file

@ -1,15 +1,22 @@
export type SearchMode = 'exact' | 'words' | 'fuzzy';
export interface SearchResult<T> {
item: T;
score: number;
matchType: 'exact' | 'word' | 'fuzzy';
}
/**
* Generic fuzzy search function that searches through arrays and returns matching items
*
* @param options Configuration object for the fuzzy search
* @returns Array of items that match the search criteria
* Enhanced search function with scoring and multiple search modes
*/
export default function fuzzysearch<T>(options: {
export default function enhancedSearch<T>(options: {
needle: string;
haystack: T[];
property: keyof T | ((item: T) => string);
}): T[] {
const { needle, haystack, property } = options;
mode?: SearchMode;
minScore?: number;
}): SearchResult<T>[] {
const { needle, haystack, property, mode = 'words', minScore = 0.3 } = options;
if (!Array.isArray(haystack)) {
throw new Error('Haystack must be an array');
@ -19,24 +26,171 @@ export default function fuzzysearch<T>(options: {
throw new Error('Property selector is required');
}
// Convert needle to lowercase for case-insensitive matching
const lowerNeedle = needle.toLowerCase();
const lowerNeedle = needle.toLowerCase().trim();
if (!lowerNeedle) return [];
// Filter the haystack to find matching items
return haystack.filter((item) => {
// Extract the string value from the item based on the property selector
const results: SearchResult<T>[] = [];
for (const item of haystack) {
const value = typeof property === 'function' ? property(item) : String(item[property]);
// Convert to lowercase for case-insensitive matching
const lowerValue = value.toLowerCase();
// Perform the fuzzy search
return fuzzyMatchString(lowerNeedle, lowerValue);
const result = scoreMatch(lowerNeedle, lowerValue, mode);
if (result && result.score >= minScore) {
results.push({
item,
score: result.score,
matchType: result.matchType,
});
}
}
// Sort by score (highest first), then by match type priority
return results.sort((a, b) => {
if (a.score !== b.score) return b.score - a.score;
const typePriority = { exact: 3, word: 2, fuzzy: 1 };
return typePriority[b.matchType] - typePriority[a.matchType];
});
}
/**
* Internal helper function that performs the actual fuzzy string matching
* Legacy fuzzy search function for backward compatibility
*/
export function fuzzysearch<T>(options: {
needle: string;
haystack: T[];
property: keyof T | ((item: T) => string);
}): T[] {
return enhancedSearch(options).map(result => result.item);
}
/**
* Score a match between needle and haystack
*/
function scoreMatch(needle: string, haystack: string, mode: SearchMode): { score: number; matchType: 'exact' | 'word' | 'fuzzy' } | null {
// Exact match gets highest score
if (haystack === needle) {
return { score: 1.0, matchType: 'exact' };
}
// Check for exact substring match
if (haystack.includes(needle)) {
const score = needle.length / haystack.length;
return { score: Math.max(0.8, score), matchType: 'exact' };
}
// Word boundary matching - check if needle matches at word boundaries
if (mode === 'words' || mode === 'fuzzy') {
const wordScore = scoreWordMatch(needle, haystack);
if (wordScore > 0) {
return { score: wordScore, matchType: 'word' };
}
}
// Fuzzy matching as fallback
if (mode === 'fuzzy') {
const fuzzyScore = scoreFuzzyMatch(needle, haystack);
if (fuzzyScore > 0) {
return { score: fuzzyScore, matchType: 'fuzzy' };
}
}
return null;
}
/**
* Score word boundary matches
*/
function scoreWordMatch(needle: string, haystack: string): number {
const words = haystack.split(/\s+/);
const needleWords = needle.split(/\s+/);
// Check for exact word matches
let exactWordMatches = 0;
let partialWordMatches = 0;
for (const needleWord of needleWords) {
let found = false;
for (const word of words) {
if (word === needleWord) {
exactWordMatches++;
found = true;
break;
} else if (word.startsWith(needleWord)) {
partialWordMatches++;
found = true;
break;
}
}
}
const totalNeedleWords = needleWords.length;
if (exactWordMatches === totalNeedleWords) {
return 0.9; // High score for all words matching exactly
}
if (exactWordMatches + partialWordMatches === totalNeedleWords) {
return 0.7 * (exactWordMatches / totalNeedleWords) + 0.3 * (partialWordMatches / totalNeedleWords);
}
// Check if needle appears at the start of any word
for (const word of words) {
if (word.startsWith(needle)) {
return 0.6;
}
}
return 0;
}
/**
* Score fuzzy matches with distance penalty
*/
function scoreFuzzyMatch(needle: string, haystack: string): number {
if (!fuzzyMatchString(needle, haystack)) {
return 0;
}
// Calculate a score based on how close the characters are
let score = 0;
let lastIndex = -1;
let consecutiveMatches = 0;
for (let i = 0; i < needle.length; i++) {
const char = needle.charAt(i);
const index = haystack.indexOf(char, lastIndex + 1);
if (index === -1) {
return 0; // This shouldn't happen if fuzzyMatchString returned true
}
if (index === lastIndex + 1) {
consecutiveMatches++;
score += 0.1; // Bonus for consecutive matches
} else {
consecutiveMatches = 0;
}
// Penalty based on distance
const distance = index - lastIndex - 1;
score += Math.max(0, 0.05 - distance * 0.01);
lastIndex = index;
}
// Normalize score
score = score / needle.length;
// Length ratio bonus
const lengthRatio = needle.length / haystack.length;
score *= (0.5 + lengthRatio * 0.5);
return Math.min(0.5, Math.max(0.1, score)); // Cap fuzzy scores at 0.5
}
/**
* Legacy fuzzy match function for backward compatibility
*/
export function fuzzyMatchString(needle: string, haystack: string): boolean {
const hlen = haystack.length;

View file

@ -11,7 +11,7 @@
import XIcon from '~icons/lucide/x';
import PlusIcon from '~icons/lucide/plus';
import { models } from '$lib/state/models.svelte';
import fuzzysearch from '$lib/utils/fuzzy-search';
import { fuzzysearch } from '$lib/utils/fuzzy-search';
const openRouterKeyQuery = useCachedQuery(api.user_keys.get, {
provider: Provider.OpenRouter,

View file

@ -10,7 +10,7 @@
import { session } from '$lib/state/session.svelte';
import { settings } from '$lib/state/settings.svelte';
import { Provider } from '$lib/types';
import fuzzysearch from '$lib/utils/fuzzy-search';
import { fuzzysearch } from '$lib/utils/fuzzy-search';
import { supportsImages } from '$lib/utils/model-capabilities';
import { capitalize } from '$lib/utils/strings';
import { cn } from '$lib/utils/utils';

View file

@ -8,15 +8,16 @@
import { Debounced } from 'runed';
import SearchIcon from '~icons/lucide/search';
let open = $state(true);
let open = $state(false);
let input = $state('');
let searchMode = $state<'exact' | 'words' | 'fuzzy'>('words');
let inputEl = $state<HTMLInputElement>();
const debouncedInput = new Debounced(() => input, 500);
const search = useQuery(api.conversations.search, () => ({
search_term: debouncedInput.current,
search_mode: searchMode,
session_token: session.current?.session.token ?? '',
}));
</script>
@ -38,32 +39,68 @@
</Tooltip>
<Modal bind:open>
<h2>Search</h2>
<input bind:this={inputEl} bind:value={input} class="w-full border" placeholder="Search" />
{#if search.isLoading}
<div class="text-center">
<div class="animate-spin rounded-full border-2 border-current border-t-transparent" />
</div>
{:else if search.data?.length}
<div class="space-y-2">
{#each search.data as { conversation, messages }}
<div
class="border-border flex items-center justify-between gap-2 rounded-lg border px-3 py-2 text-sm"
<div class="space-y-4">
<h2 class="text-lg font-semibold">Search Conversations</h2>
<div class="space-y-3">
<input
bind:this={inputEl}
bind:value={input}
class="w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2"
placeholder="Search conversations and messages..."
/>
<div class="flex gap-2 items-center">
<label for="search-mode" class="text-sm font-medium text-muted-foreground">Search mode:</label>
<select
id="search-mode"
bind:value={searchMode}
class="rounded border border-input bg-background px-2 py-1 text-xs"
>
<div class="flex items-center gap-2">
<div class="text-muted-foreground text-xs">
{conversation.title}
</div>
<div class="text-muted-foreground text-xs">
{messages.length} message{messages.length > 1 ? 's' : ''}
</div>
</div>
<!-- TODO: Add message count to conversation -->
<Button variant="secondary" size="sm" class="text-xs">View</Button>
</div>
{/each}
<option value="words">Word matching</option>
<option value="exact">Exact match</option>
<option value="fuzzy">Fuzzy search</option>
</select>
</div>
</div>
{/if}
{#if search.isLoading}
<div class="flex justify-center py-8">
<div class="size-6 animate-spin rounded-full border-2 border-current border-t-transparent"></div>
</div>
{:else if search.data?.length}
<div class="space-y-2 max-h-96 overflow-y-auto">
{#each search.data as { conversation, messages, score, titleMatch }}
<div class="border-border flex items-center justify-between gap-2 rounded-lg border px-3 py-2">
<div class="flex-1 min-w-0">
<div class="flex items-center gap-2 mb-1">
<div class="font-medium truncate" class:text-blue-600={titleMatch}>
{conversation.title}
</div>
<div class="text-xs text-muted-foreground bg-muted px-1.5 py-0.5 rounded">
{Math.round(score * 100)}%
</div>
</div>
<div class="text-xs text-muted-foreground">
{messages.length} matching message{messages.length !== 1 ? 's' : ''}
{#if titleMatch}
<span class="text-blue-600">• Title match</span>
{/if}
</div>
</div>
<Button variant="secondary" size="sm" class="text-xs shrink-0">View</Button>
</div>
{/each}
</div>
{:else if debouncedInput.current.trim()}
<div class="text-center py-8 text-muted-foreground">
<p>No results found for "{debouncedInput.current}"</p>
<p class="text-xs mt-1">Try a different search term or mode</p>
</div>
{:else}
<div class="text-center py-8 text-muted-foreground">
<p>Start typing to search your conversations</p>
</div>
{/if}
</div>
</Modal>