From 255640f90139e2e20144a2aff3846369a0614357 Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:07 +0000 Subject: [PATCH 01/45] feat(knowledge): add Ollama embedding types --- apps/sim/lib/knowledge/types.ts | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/sim/lib/knowledge/types.ts b/apps/sim/lib/knowledge/types.ts index f962a65fb1a..4bc98a728b6 100644 --- a/apps/sim/lib/knowledge/types.ts +++ b/apps/sim/lib/knowledge/types.ts @@ -33,8 +33,9 @@ export interface CreateKnowledgeBaseData { name: string description?: string workspaceId: string - embeddingModel: 'text-embedding-3-small' - embeddingDimension: 1536 + embeddingModel: string + embeddingDimension: number + ollamaBaseUrl?: string chunkingConfig: ChunkingConfig userId: string } @@ -109,6 +110,8 @@ export interface ExtendedChunkingConfig extends ChunkingConfig { recipe?: string lang?: string strategy?: 'recursive' | 'semantic' | 'sentence' | 'paragraph' + /** Ollama server base URL — stored here to avoid a schema migration */ + ollamaBaseUrl?: string [key: string]: unknown } From b043bc23a99cebb852f13f6467f688ace99ca9ad Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:07 +0000 Subject: [PATCH 02/45] feat(knowledge): add per-KB dynamic pgvector tables --- apps/sim/lib/knowledge/dynamic-tables.ts | 460 +++++++++++++++++++++++ 1 file changed, 460 insertions(+) create mode 100644 apps/sim/lib/knowledge/dynamic-tables.ts diff --git a/apps/sim/lib/knowledge/dynamic-tables.ts b/apps/sim/lib/knowledge/dynamic-tables.ts new file mode 100644 index 00000000000..8ae074f8863 --- /dev/null +++ b/apps/sim/lib/knowledge/dynamic-tables.ts @@ -0,0 +1,460 @@ +import { db } from '@sim/db' +import { createLogger } from '@sim/logger' +import { sql } from 'drizzle-orm' +import { + boolean, + doublePrecision, + integer, + pgTable, + text, + timestamp, + vector, +} from 'drizzle-orm/pg-core' +import type { StructuredFilter } from '@/lib/knowledge/types' +import type { SearchResult } from '@/app/api/knowledge/search/utils' + +const logger = createLogger('DynamicKBTables') + +const TAG_SLOT_KEYS = [ + 'tag1', + 'tag2', + 'tag3', + 'tag4', + 'tag5', + 'tag6', + 'tag7', + 'number1', + 'number2', + 'number3', + 'number4', + 'number5', + 'date1', + 'date2', + 'boolean1', + 'boolean2', + 'boolean3', +] as const + +type TagSlotKey = (typeof TAG_SLOT_KEYS)[number] + +function isTagSlotKey(key: string): key is TagSlotKey { + return TAG_SLOT_KEYS.includes(key as TagSlotKey) +} + +/** Convert a KB UUID to a valid Postgres table name */ +export function kbTableName(kbId: string): string { + return `kb_embeddings_${kbId.replace(/-/g, '_')}` +} + +/** Parse provider and model name from an embedding model string like 'ollama/nomic-embed-text' */ +export function parseEmbeddingModel(embeddingModel: string | null | undefined): { + provider: 'openai' | 'ollama' + modelName: string +} { + if (embeddingModel?.startsWith('ollama/')) { + return { provider: 'ollama', modelName: embeddingModel.slice(7) } + } + return { provider: 'openai', modelName: embeddingModel || 'text-embedding-3-small' } +} + +/** Create a dedicated embedding table for a knowledge base with the exact vector dimension */ +export async function createKBEmbeddingTable(kbId: string, dimension: number): Promise { + const table = kbTableName(kbId) + logger.info(`Creating per-KB embedding table: ${table} (${dimension}d)`) + + await db.execute( + sql.raw(` + CREATE TABLE IF NOT EXISTS "${table}" ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + knowledge_base_id TEXT NOT NULL, + document_id TEXT NOT NULL, + chunk_index INTEGER NOT NULL, + chunk_hash TEXT NOT NULL, + content TEXT NOT NULL, + content_length INTEGER NOT NULL, + token_count INTEGER NOT NULL, + embedding vector(${dimension}) NOT NULL, + embedding_model TEXT NOT NULL, + start_offset INTEGER, + end_offset INTEGER, + tag1 TEXT, tag2 TEXT, tag3 TEXT, tag4 TEXT, tag5 TEXT, tag6 TEXT, tag7 TEXT, + number1 NUMERIC, number2 NUMERIC, number3 NUMERIC, number4 NUMERIC, number5 NUMERIC, + date1 TIMESTAMPTZ, date2 TIMESTAMPTZ, + boolean1 BOOLEAN, boolean2 BOOLEAN, boolean3 BOOLEAN, + enabled BOOLEAN NOT NULL DEFAULT TRUE, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() + ) + `) + ) + + await db.execute( + sql.raw(` + CREATE INDEX IF NOT EXISTS "${table}_hnsw" + ON "${table}" USING hnsw (embedding vector_cosine_ops) + WITH (m = 16, ef_construction = 64) + `) + ) + + await db.execute( + sql.raw(` + CREATE INDEX IF NOT EXISTS "${table}_enabled" + ON "${table}" (knowledge_base_id, enabled) + `) + ) + + logger.info(`Created per-KB embedding table: ${table}`) +} + +/** Drop a per-KB embedding table when the knowledge base is deleted */ +export async function dropKBEmbeddingTable(kbId: string): Promise { + const table = kbTableName(kbId) + await db.execute(sql.raw(`DROP TABLE IF EXISTS "${table}"`)) + logger.info(`Dropped per-KB embedding table: ${table}`) +} + +/** Delete all embeddings for a document from a per-KB table (used before re-insert) */ +export async function deleteKBDocumentEmbeddings(kbId: string, documentId: string): Promise { + const table = kbTableName(kbId) + await db.execute(sql`DELETE FROM ${sql.raw(`"${table}"`)} WHERE document_id = ${documentId}`) +} + +export interface KBEmbeddingRecord { + id: string + knowledgeBaseId: string + documentId: string + chunkIndex: number + chunkHash: string + content: string + contentLength: number + tokenCount: number + embedding: number[] + embeddingModel: string + startOffset: number | null + endOffset: number | null + tag1: string | null + tag2: string | null + tag3: string | null + tag4: string | null + tag5: string | null + tag6: string | null + tag7: string | null + number1: number | null + number2: number | null + number3: number | null + number4: number | null + number5: number | null + date1: Date | null + date2: Date | null + boolean1: boolean | null + boolean2: boolean | null + boolean3: boolean | null + createdAt: Date + updatedAt: Date +} + +/** Validate embedding values — reject NaN/Infinity which pgvector cannot store */ +function validateEmbedding(embedding: number[], chunkIndex: number): void { + for (let i = 0; i < embedding.length; i++) { + if (!Number.isFinite(embedding[i])) { + throw new Error( + `Invalid embedding value at chunk ${chunkIndex}, dimension ${i}: ${embedding[i]}. ` + + `Embedding vectors must contain only finite numbers.` + ) + } + } +} + +/** + * Create a dynamic table schema for a per-KB embedding table. + * This allows drizzle to properly handle vector serialization. + * @param tableName - The name of the table + * @param dimensions - The vector dimension (e.g., 768 for Ollama nomic-embed-text) + */ +function createDynamicKBTable(tableName: string, dimensions: number) { + return pgTable(tableName, { + id: text('id').primaryKey(), + knowledgeBaseId: text('knowledge_base_id').notNull(), + documentId: text('document_id').notNull(), + chunkIndex: integer('chunk_index').notNull(), + chunkHash: text('chunk_hash').notNull(), + content: text('content').notNull(), + contentLength: integer('content_length').notNull(), + tokenCount: integer('token_count').notNull(), + embedding: vector('embedding', { dimensions }), + embeddingModel: text('embedding_model').notNull(), + startOffset: integer('start_offset'), + endOffset: integer('end_offset'), + tag1: text('tag1'), + tag2: text('tag2'), + tag3: text('tag3'), + tag4: text('tag4'), + tag5: text('tag5'), + tag6: text('tag6'), + tag7: text('tag7'), + number1: doublePrecision('number1'), + number2: doublePrecision('number2'), + number3: doublePrecision('number3'), + number4: doublePrecision('number4'), + number5: doublePrecision('number5'), + date1: timestamp('date1'), + date2: timestamp('date2'), + boolean1: boolean('boolean1'), + boolean2: boolean('boolean2'), + boolean3: boolean('boolean3'), + createdAt: timestamp('created_at').notNull(), + updatedAt: timestamp('updated_at').notNull(), + }) +} + +/** Insert embedding records into a per-KB table in batches */ +export async function insertKBEmbeddings( + kbId: string, + records: KBEmbeddingRecord[], + dimension = 768 +): Promise { + if (records.length === 0) return + + const table = kbTableName(kbId) + const BATCH = 100 + + // Create a dynamic table schema so drizzle can properly serialize vectors + const dynamicTable = createDynamicKBTable(table, dimension) + + for (let i = 0; i < records.length; i += BATCH) { + const batch = records.slice(i, i + BATCH) + + try { + // Validate all embeddings before insertion + for (const r of batch) { + if (!Array.isArray(r.embedding) || r.embedding.length === 0) { + throw new Error( + `Missing or empty embedding for chunk ${r.chunkIndex} in document ${r.documentId}` + ) + } + validateEmbedding(r.embedding, r.chunkIndex) + } + + // Use drizzle's insert API with dynamic table schema + await db.insert(dynamicTable).values(batch) + } catch (err: unknown) { + const pg = err as { code?: string; detail?: string; message?: string; cause?: unknown } + logger.error(`insertKBEmbeddings failed for table ${table}`, { + code: pg.code, + detail: pg.detail, + message: pg.message, + cause: pg.cause, + batchSize: batch.length, + sampleEmbeddingDim: batch[0]?.embedding?.length, + }) + throw err + } + } +} + +/** Vector similarity search against a per-KB table with optional tag filters */ +export async function searchKBTable( + kbId: string, + queryVector: string, + topK: number, + distanceThreshold: number, + structuredFilters?: StructuredFilter[] +): Promise { + const table = kbTableName(kbId) + + const filterConditions = buildRawFilterConditions(structuredFilters ?? []) + + const vecLiteral = sql.raw(`'${queryVector}'::vector`) + + const allConditions = [ + sql`knowledge_base_id = ${kbId}`, + sql`enabled = TRUE`, + sql`embedding <=> ${vecLiteral} < ${distanceThreshold}`, + ...filterConditions, + ] + + const whereClause = sql.join(allConditions, sql` AND `) + + const result = await db.execute(sql` + SELECT + id::text, + content, + document_id AS "documentId", + chunk_index AS "chunkIndex", + tag1, tag2, tag3, tag4, tag5, tag6, tag7, + number1::float8, number2::float8, number3::float8, number4::float8, number5::float8, + date1, date2, + boolean1, boolean2, boolean3, + (embedding <=> ${vecLiteral})::float8 AS distance, + knowledge_base_id AS "knowledgeBaseId" + FROM ${sql.raw(`"${table}"`)} + WHERE ${whereClause} + ORDER BY distance + LIMIT ${topK} + `) + + return result as any as SearchResult[] +} + +/** Tag-only search against a per-KB table (no vector similarity) */ +export async function searchKBTableTagOnly( + kbId: string, + topK: number, + structuredFilters: StructuredFilter[] +): Promise { + const table = kbTableName(kbId) + + const filterConditions = buildRawFilterConditions(structuredFilters) + + const allConditions = [sql`knowledge_base_id = ${kbId}`, sql`enabled = TRUE`, ...filterConditions] + + const whereClause = sql.join(allConditions, sql` AND `) + + const result = await db.execute(sql` + SELECT + id::text, + content, + document_id AS "documentId", + chunk_index AS "chunkIndex", + tag1, tag2, tag3, tag4, tag5, tag6, tag7, + number1::float8, number2::float8, number3::float8, number4::float8, number5::float8, + date1, date2, + boolean1, boolean2, boolean3, + 0::float8 AS distance, + knowledge_base_id AS "knowledgeBaseId" + FROM ${sql.raw(`"${table}"`)} + WHERE ${whereClause} + LIMIT ${topK} + `) + + return result as any as SearchResult[] +} + +/** Build SQL conditions from structured filters for raw-SQL per-KB table queries */ +function buildRawFilterConditions(filters: StructuredFilter[]): ReturnType[] { + const filtersBySlot = new Map() + for (const filter of filters) { + const slot = filter.tagSlot + if (!filtersBySlot.has(slot)) filtersBySlot.set(slot, []) + filtersBySlot.get(slot)!.push(filter) + } + + const conditions: ReturnType[] = [] + + for (const [slot, slotFilters] of filtersBySlot) { + if (!isTagSlotKey(slot)) continue + + const slotConditions = slotFilters + .map((f) => buildRawFilterCondition(f)) + .filter((c): c is ReturnType => c !== null) + + if (slotConditions.length === 0) continue + + conditions.push( + slotConditions.length === 1 + ? slotConditions[0] + : sql`(${sql.join(slotConditions, sql` OR `)})` + ) + } + + return conditions +} + +/** Build a single SQL condition for a structured filter using raw column name references */ +function buildRawFilterCondition(filter: StructuredFilter): ReturnType | null { + const { tagSlot, fieldType, operator, value, valueTo } = filter + + if (!isTagSlotKey(tagSlot)) return null + + // tagSlot is validated against TAG_SLOT_KEYS (all simple alphanumeric) — safe for sql.raw + const col = sql.raw(tagSlot) + + if (fieldType === 'text') { + const stringValue = String(value) + switch (operator) { + case 'eq': + return sql`LOWER(${col}) = LOWER(${stringValue})` + case 'neq': + return sql`LOWER(${col}) != LOWER(${stringValue})` + case 'contains': + return sql`LOWER(${col}) LIKE LOWER(${`%${stringValue}%`})` + case 'not_contains': + return sql`LOWER(${col}) NOT LIKE LOWER(${`%${stringValue}%`})` + case 'starts_with': + return sql`LOWER(${col}) LIKE LOWER(${`${stringValue}%`})` + case 'ends_with': + return sql`LOWER(${col}) LIKE LOWER(${`%${stringValue}`})` + default: + return sql`LOWER(${col}) = LOWER(${stringValue})` + } + } + + if (fieldType === 'number') { + const numValue = typeof value === 'number' ? value : Number.parseFloat(String(value)) + if (Number.isNaN(numValue)) return null + switch (operator) { + case 'eq': + return sql`${col} = ${numValue}` + case 'neq': + return sql`${col} != ${numValue}` + case 'gt': + return sql`${col} > ${numValue}` + case 'gte': + return sql`${col} >= ${numValue}` + case 'lt': + return sql`${col} < ${numValue}` + case 'lte': + return sql`${col} <= ${numValue}` + case 'between': + if (valueTo !== undefined) { + const numTo = typeof valueTo === 'number' ? valueTo : Number.parseFloat(String(valueTo)) + if (!Number.isNaN(numTo)) return sql`${col} >= ${numValue} AND ${col} <= ${numTo}` + } + return sql`${col} = ${numValue}` + default: + return sql`${col} = ${numValue}` + } + } + + if (fieldType === 'date') { + const dateStr = String(value) + if (!/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) return null + switch (operator) { + case 'eq': + return sql`${col}::date = ${dateStr}::date` + case 'neq': + return sql`${col}::date != ${dateStr}::date` + case 'gt': + return sql`${col}::date > ${dateStr}::date` + case 'gte': + return sql`${col}::date >= ${dateStr}::date` + case 'lt': + return sql`${col}::date < ${dateStr}::date` + case 'lte': + return sql`${col}::date <= ${dateStr}::date` + case 'between': + if (valueTo !== undefined) { + const dateStrTo = String(valueTo) + if (/^\d{4}-\d{2}-\d{2}$/.test(dateStrTo)) + return sql`${col}::date >= ${dateStr}::date AND ${col}::date <= ${dateStrTo}::date` + } + return sql`${col}::date = ${dateStr}::date` + default: + return sql`${col}::date = ${dateStr}::date` + } + } + + if (fieldType === 'boolean') { + const boolValue = value === true || value === 'true' + switch (operator) { + case 'eq': + return sql`${col} = ${boolValue}` + case 'neq': + return sql`${col} != ${boolValue}` + default: + return sql`${col} = ${boolValue}` + } + } + + return sql`${col} = ${value}` +} From 61f05a7a8ea5bbe5f3586101cc703d64f72b8139 Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:08 +0000 Subject: [PATCH 03/45] feat(knowledge): add Ollama embedding generation with retry and smart batching --- apps/sim/lib/knowledge/embeddings.ts | 207 ++++++++++++++++++++++++++- 1 file changed, 205 insertions(+), 2 deletions(-) diff --git a/apps/sim/lib/knowledge/embeddings.ts b/apps/sim/lib/knowledge/embeddings.ts index 1171065c75d..d9febf37715 100644 --- a/apps/sim/lib/knowledge/embeddings.ts +++ b/apps/sim/lib/knowledge/embeddings.ts @@ -10,6 +10,97 @@ const MAX_TOKENS_PER_REQUEST = 8000 const MAX_CONCURRENT_BATCHES = env.KB_CONFIG_CONCURRENCY_LIMIT || 50 const EMBEDDING_DIMENSIONS = 1536 +const OLLAMA_TIMEOUT_MS = 120_000 + +/** Default context length for Ollama embedding models when it cannot be queried */ +const OLLAMA_DEFAULT_CONTEXT_LENGTH = 2048 +/** Default embedding dimension for Ollama models when it cannot be queried */ +const OLLAMA_DEFAULT_EMBEDDING_DIMENSION = 768 +/** Cache TTL for Ollama model info (5 minutes) */ +const OLLAMA_MODEL_CACHE_TTL_MS = 5 * 60 * 1000 + +export interface OllamaModelInfo { + contextLength: number + embeddingLength: number +} + +/** In-memory cache for Ollama model info to avoid repeated /api/show calls */ +const ollamaModelInfoCache = new Map() + +/** + * Query an Ollama model's info via the /api/show endpoint. + * Returns context_length and embedding_length with in-memory caching. + */ +export async function getOllamaModelInfo( + modelName: string, + baseUrl = 'http://localhost:11434' +): Promise { + const cacheKey = `${modelName}@${baseUrl}` + const cached = ollamaModelInfoCache.get(cacheKey) + if (cached && Date.now() - cached.ts < OLLAMA_MODEL_CACHE_TTL_MS) { + return cached.info + } + + const defaults: OllamaModelInfo = { + contextLength: OLLAMA_DEFAULT_CONTEXT_LENGTH, + embeddingLength: OLLAMA_DEFAULT_EMBEDDING_DIMENSION, + } + + try { + const url = `${baseUrl.replace(/\/$/, '')}/api/show` + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ name: modelName }), + signal: AbortSignal.timeout(5000), + }) + + if (!response.ok) { + logger.warn(`Failed to query Ollama model info for ${modelName}: ${response.status}`) + ollamaModelInfoCache.set(cacheKey, { info: defaults, ts: Date.now() }) + return defaults + } + + const data = await response.json() + const modelInfo = data?.model_info ?? {} + + const info: OllamaModelInfo = { ...defaults } + + for (const [key, value] of Object.entries(modelInfo)) { + const lowerKey = key.toLowerCase() + if (lowerKey.includes('context_length') && typeof value === 'number') { + info.contextLength = value + } + if (lowerKey.includes('embedding_length') && typeof value === 'number') { + info.embeddingLength = value + } + } + + logger.info( + `Ollama model ${modelName}: context_length=${info.contextLength}, embedding_length=${info.embeddingLength}` + ) + ollamaModelInfoCache.set(cacheKey, { info, ts: Date.now() }) + return info + } catch (error) { + logger.warn( + `Error querying Ollama model info: ${error instanceof Error ? error.message : String(error)}` + ) + ollamaModelInfoCache.set(cacheKey, { info: defaults, ts: Date.now() }) + return defaults + } +} + +/** + * Query an Ollama model's context length (convenience wrapper). + */ +export async function getOllamaModelContextLength( + modelName: string, + baseUrl = 'http://localhost:11434' +): Promise { + const info = await getOllamaModelInfo(modelName, baseUrl) + return info.contextLength +} + /** * Check if the model supports custom dimensions. * text-embedding-3-* models support the dimensions parameter. @@ -172,14 +263,117 @@ async function processWithConcurrency( return results } +/** + * Call Ollama's /api/embed endpoint for batch embedding generation. + * Requires Ollama 0.1.26+ for the /api/embed endpoint with array input. + */ +async function callOllamaEmbeddingAPI( + inputs: string[], + modelName: string, + baseUrl: string +): Promise { + const url = `${baseUrl.replace(/\/$/, '')}/api/embed` + + const response = await fetch(url, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ model: modelName, input: inputs }), + signal: AbortSignal.timeout(OLLAMA_TIMEOUT_MS), + }) + + if (!response.ok) { + const errorText = await response.text() + throw new EmbeddingAPIError( + `Ollama embedding API failed: ${response.status} ${response.statusText} - ${errorText}`, + response.status + ) + } + + const data: { embeddings: number[][] } = await response.json() + return data.embeddings +} + /** * Generate embeddings for multiple texts with token-aware batching and parallel processing */ export async function generateEmbeddings( texts: string[], embeddingModel = 'text-embedding-3-small', - workspaceId?: string | null + workspaceId?: string | null, + ollamaBaseUrl?: string, + contextLengthHint?: number ): Promise { + if (embeddingModel.startsWith('ollama/')) { + const modelName = embeddingModel.slice(7) + const baseUrl = ollamaBaseUrl ?? 'http://localhost:11434' + logger.info(`Using Ollama (${baseUrl}) for embedding generation with model ${modelName}`) + + // Use pre-queried context length if provided, otherwise query it + const contextLength = + contextLengthHint ?? (await getOllamaModelContextLength(modelName, baseUrl)) + // Use contextLength as the max character count (assumes worst case ~1 char per token) + const maxChars = contextLength + + // Truncate any chunks that exceed the context length, then batch by total character count + const prepared: string[] = texts.map((text, i) => { + if (text.length > maxChars) { + const lastSentenceEnd = text.lastIndexOf('. ', maxChars) + const truncatedLength = lastSentenceEnd > maxChars * 0.5 ? lastSentenceEnd + 1 : maxChars + logger.warn( + `Truncating chunk ${i} from ${text.length} to ${truncatedLength} chars ` + + `(Ollama model ${modelName} context length: ${contextLength})` + ) + return text.slice(0, truncatedLength) + } + return text + }) + + // Smart batching: group chunks so total characters per batch stays within maxChars + const batches: string[][] = [] + let currentBatch: string[] = [] + let currentBatchChars = 0 + for (const text of prepared) { + if (currentBatch.length > 0 && currentBatchChars + text.length > maxChars) { + batches.push(currentBatch) + currentBatch = [] + currentBatchChars = 0 + } + currentBatch.push(text) + currentBatchChars += text.length + } + if (currentBatch.length > 0) { + batches.push(currentBatch) + } + + logger.info( + `[Ollama] Processing ${prepared.length} chunks in ${batches.length} batches (maxChars=${maxChars})` + ) + + // Process each batch with retry logic + const allEmbeddings: number[][] = [] + for (let batchIdx = 0; batchIdx < batches.length; batchIdx++) { + const batch = batches[batchIdx] + const batchEmbeddings = await retryWithExponentialBackoff( + () => callOllamaEmbeddingAPI(batch, modelName, baseUrl), + { + maxRetries: 3, + initialDelayMs: 1000, + maxDelayMs: 10000, + retryCondition: (error: unknown) => { + if (error instanceof EmbeddingAPIError) { + return error.status === 429 || error.status >= 500 + } + return isRetryableError(error) + }, + } + ) + for (const emb of batchEmbeddings) { + allEmbeddings.push(emb) + } + } + return allEmbeddings + } + const config = await getEmbeddingConfig(embeddingModel, workspaceId) const batches = batchByTokenLimit(texts, MAX_TOKENS_PER_REQUEST, embeddingModel) @@ -213,8 +407,17 @@ export async function generateEmbeddings( export async function generateSearchEmbedding( query: string, embeddingModel = 'text-embedding-3-small', - workspaceId?: string | null + workspaceId?: string | null, + ollamaBaseUrl?: string ): Promise { + if (embeddingModel.startsWith('ollama/')) { + const modelName = embeddingModel.slice(7) + const baseUrl = ollamaBaseUrl ?? 'http://localhost:11434' + logger.info(`Using Ollama (${baseUrl}) for search embedding with model ${modelName}`) + const embeddings = await callOllamaEmbeddingAPI([query], modelName, baseUrl) + return embeddings[0] + } + const config = await getEmbeddingConfig(embeddingModel, workspaceId) logger.info( From 546dd7c0bbe33eac26d2f62a233b17cd6903913d Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:08 +0000 Subject: [PATCH 04/45] feat(knowledge): store ollamaBaseUrl in KB config --- apps/sim/lib/knowledge/service.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/apps/sim/lib/knowledge/service.ts b/apps/sim/lib/knowledge/service.ts index 863a39b5cc3..7291b79f8a9 100644 --- a/apps/sim/lib/knowledge/service.ts +++ b/apps/sim/lib/knowledge/service.ts @@ -100,7 +100,10 @@ export async function createKnowledgeBase( tokenCount: 0, embeddingModel: data.embeddingModel, embeddingDimension: data.embeddingDimension, - chunkingConfig: data.chunkingConfig, + // Store ollamaBaseUrl inside chunkingConfig JSONB to avoid a schema migration + chunkingConfig: data.ollamaBaseUrl + ? { ...data.chunkingConfig, ollamaBaseUrl: data.ollamaBaseUrl } + : data.chunkingConfig, createdAt: now, updatedAt: now, deletedAt: null, From 616761dd659309510e4fd8a7e1cf5d6bb29d3335 Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:13 +0000 Subject: [PATCH 05/45] feat(chunkers): add embeddingModel to ChunkerOptions --- apps/sim/lib/chunkers/types.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/apps/sim/lib/chunkers/types.ts b/apps/sim/lib/chunkers/types.ts index a316d643f03..5bb780668d0 100644 --- a/apps/sim/lib/chunkers/types.ts +++ b/apps/sim/lib/chunkers/types.ts @@ -2,7 +2,9 @@ * Options for configuring text chunkers * * Units: - * - chunkSize: Maximum chunk size in TOKENS (1 token ≈ 4 characters) + * - chunkSize: Maximum chunk size in TOKENS + * - For OpenAI: 1 token ≈ 4 characters + * - For Ollama: 1 token ≈ 3 characters (conservative estimate) * - chunkOverlap: Overlap between chunks in TOKENS * - minCharactersPerChunk: Minimum chunk size in CHARACTERS (filters tiny fragments) */ @@ -13,6 +15,8 @@ export interface ChunkerOptions { chunkOverlap?: number /** Minimum chunk size in characters to avoid tiny fragments (default: 100) */ minCharactersPerChunk?: number + /** Embedding model to use for accurate token estimation (optional) */ + embeddingModel?: string } export interface Chunk { From 133f326006dd0d95ac247a08884a54c7b0d7105c Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:14 +0000 Subject: [PATCH 06/45] feat(chunkers): add model-aware token estimation ratio --- apps/sim/lib/chunkers/text-chunker.ts | 48 +++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/apps/sim/lib/chunkers/text-chunker.ts b/apps/sim/lib/chunkers/text-chunker.ts index 7dbbde0cf97..2c25739f5ec 100644 --- a/apps/sim/lib/chunkers/text-chunker.ts +++ b/apps/sim/lib/chunkers/text-chunker.ts @@ -2,17 +2,21 @@ import type { Chunk, ChunkerOptions } from '@/lib/chunkers/types' /** * Lightweight text chunker optimized for RAG applications - * Uses hierarchical splitting with simple character-based token estimation + * Uses hierarchical splitting with character-based token estimation * * Parameters: * - chunkSize: Maximum chunk size in TOKENS (default: 1024) * - chunkOverlap: Overlap between chunks in TOKENS (default: 0) * - minCharactersPerChunk: Minimum characters to keep a chunk (default: 100) + * - embeddingModel: Embedding model to use for accurate token estimation (optional) + * Ollama models use conservative estimates, OpenAI uses standard estimates */ export class TextChunker { private readonly chunkSize: number // Max chunk size in tokens private readonly chunkOverlap: number // Overlap in tokens private readonly minCharactersPerChunk: number // Min characters per chunk + private readonly embeddingModel?: string // Embedding model for accurate estimation + private readonly tokenEstimationRatio: number // Characters per token (chars/token) // Hierarchical separators ordered from largest to smallest semantic units private readonly separators = [ @@ -42,22 +46,54 @@ export class TextChunker { const maxOverlap = Math.floor(this.chunkSize * 0.5) this.chunkOverlap = Math.min(options.chunkOverlap ?? 0, maxOverlap) this.minCharactersPerChunk = options.minCharactersPerChunk ?? 100 + this.embeddingModel = options.embeddingModel + + // Determine token estimation ratio based on embedding model + // Ollama models need conservative estimates since they may tokenize differently + // OpenAI models use standard 1 token ≈ 4 characters + this.tokenEstimationRatio = this.getTokenEstimationRatio() + } + + /** + * Get token estimation ratio based on embedding model + * Ollama models: conservative 1 token ≈ 3 characters (fewer chars per token = more tokens estimated) + * OpenAI models: standard 1 token ≈ 4 characters + * + * Ollama models may tokenize differently, so we use a lower ratio to produce + * smaller chunks that stay safely within the model's context window. + */ + private getTokenEstimationRatio(): number { + if (!this.embeddingModel) { + // Default to OpenAI ratio for backwards compatibility + return 4 + } + + if (this.embeddingModel.startsWith('ollama/')) { + // Conservative estimate for Ollama — lower ratio means more estimated tokens + // per chunk, so chunks are split smaller to fit within context limits + return 3 + } + + // Default to OpenAI ratio for other models + return 4 } /** - * Simple token estimation using character count - * 1 token ≈ 4 characters for English text + * Token estimation using character count + * Ratio depends on embedding model: + * - OpenAI: 1 token ≈ 4 characters + * - Ollama: 1 token ≈ 3 characters (conservative) */ private estimateTokens(text: string): number { if (!text?.trim()) return 0 - return Math.ceil(text.length / 4) + return Math.ceil(text.length / this.tokenEstimationRatio) } /** - * Convert tokens to approximate character count + * Convert tokens to approximate character count using the estimation ratio */ private tokensToChars(tokens: number): number { - return tokens * 4 + return tokens * this.tokenEstimationRatio } /** From 2693251bd9368054b0cc54d4d6235163b26bb41c Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:15 +0000 Subject: [PATCH 07/45] feat(knowledge): pass embeddingModel to all chunkers --- .../lib/knowledge/documents/document-processor.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/apps/sim/lib/knowledge/documents/document-processor.ts b/apps/sim/lib/knowledge/documents/document-processor.ts index 0185de495b1..a87e651f18b 100644 --- a/apps/sim/lib/knowledge/documents/document-processor.ts +++ b/apps/sim/lib/knowledge/documents/document-processor.ts @@ -119,7 +119,8 @@ export async function processDocument( chunkOverlap = 200, minCharactersPerChunk = 100, userId?: string, - workspaceId?: string | null + workspaceId?: string | null, + embeddingModel?: string ): Promise<{ chunks: Chunk[] metadata: { @@ -154,6 +155,7 @@ export async function processDocument( chunks = await JsonYamlChunker.chunkJsonYaml(content, { chunkSize, minCharactersPerChunk, + embeddingModel, }) } else if (StructuredDataChunker.isStructuredData(content, mimeType)) { logger.info('Using structured data chunker for spreadsheet/CSV content') @@ -163,9 +165,15 @@ export async function processDocument( headers: metadata.headers, totalRows: typeof rowCount === 'number' ? rowCount : undefined, sheetName: metadata.sheetNames?.[0], + embeddingModel, }) } else { - const chunker = new TextChunker({ chunkSize, chunkOverlap, minCharactersPerChunk }) + const chunker = new TextChunker({ + chunkSize, + chunkOverlap, + minCharactersPerChunk, + embeddingModel, + }) chunks = await chunker.chunk(content) } From 18e7ac2ca9fe3d0cf6d5775da6993222c3f4e54f Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:16 +0000 Subject: [PATCH 08/45] feat(knowledge): add Ollama chunk size and overlap capping --- apps/sim/lib/knowledge/documents/service.ts | 121 ++++++++++++++------ 1 file changed, 89 insertions(+), 32 deletions(-) diff --git a/apps/sim/lib/knowledge/documents/service.ts b/apps/sim/lib/knowledge/documents/service.ts index cd4210b551d..ee0fa3e5b06 100644 --- a/apps/sim/lib/knowledge/documents/service.ts +++ b/apps/sim/lib/knowledge/documents/service.ts @@ -9,7 +9,12 @@ import { getStorageMethod, isRedisStorage } from '@/lib/core/storage' import { processDocument } from '@/lib/knowledge/documents/document-processor' import { DocumentProcessingQueue } from '@/lib/knowledge/documents/queue' import type { DocumentSortField, SortOrder } from '@/lib/knowledge/documents/types' -import { generateEmbeddings } from '@/lib/knowledge/embeddings' +import { + deleteKBDocumentEmbeddings, + insertKBEmbeddings, + parseEmbeddingModel, +} from '@/lib/knowledge/dynamic-tables' +import { generateEmbeddings, getOllamaModelContextLength } from '@/lib/knowledge/embeddings' import { buildUndefinedTagsError, parseBooleanValue, @@ -410,6 +415,8 @@ export async function processDocumentAsync( userId: knowledgeBase.userId, workspaceId: knowledgeBase.workspaceId, chunkingConfig: knowledgeBase.chunkingConfig, + embeddingModel: knowledgeBase.embeddingModel, + embeddingDimension: knowledgeBase.embeddingDimension, }) .from(knowledgeBase) .where(eq(knowledgeBase.id, knowledgeBaseId)) @@ -430,7 +437,47 @@ export async function processDocumentAsync( logger.info(`[${documentId}] Status updated to 'processing', starting document processor`) - const kbConfig = kb[0].chunkingConfig as { maxSize: number; minSize: number; overlap: number } + const kbConfig = kb[0].chunkingConfig as { + maxSize: number + minSize: number + overlap: number + ollamaBaseUrl?: string + } + const { provider: embeddingProvider, modelName: embeddingModelName } = parseEmbeddingModel( + kb[0].embeddingModel + ) + + // For Ollama models, query the model's context length and cap chunk size accordingly. + // TextChunker uses ratio 3 for Ollama (1 estimated token = 3 chars), but the actual + // Ollama tokenizer may produce ~1 token per 1-2 chars (especially for PDF text with + // special characters). We use 30% of context length as the safe estimated-token limit + // so the resulting character count stays well within the model's actual token limit. + let effectiveChunkSize = processingOptions.chunkSize ?? kbConfig.maxSize + let effectiveOverlap = processingOptions.chunkOverlap ?? kbConfig.overlap + let ollamaContextLength: number | undefined + if (embeddingProvider === 'ollama') { + ollamaContextLength = await getOllamaModelContextLength( + embeddingModelName, + kbConfig.ollamaBaseUrl + ) + const safeChunkSize = Math.floor(ollamaContextLength * 0.3) + if (effectiveChunkSize > safeChunkSize) { + logger.info( + `[${documentId}] Capping chunk size from ${effectiveChunkSize} to ${safeChunkSize} tokens ` + + `(Ollama model ${embeddingModelName} context length: ${ollamaContextLength})` + ) + effectiveChunkSize = safeChunkSize + } + // Cap overlap to 20% of effective chunk size so overlap doesn't push chunks over context limit + const maxOverlap = Math.max(0, Math.floor(effectiveChunkSize * 0.2)) + if (effectiveOverlap > maxOverlap) { + logger.info( + `[${documentId}] Capping chunk overlap from ${effectiveOverlap} to ${maxOverlap} tokens ` + + `(20% of effective chunk size ${effectiveChunkSize})` + ) + effectiveOverlap = maxOverlap + } + } await withTimeout( (async () => { @@ -438,11 +485,12 @@ export async function processDocumentAsync( docData.fileUrl, docData.filename, docData.mimeType, - processingOptions.chunkSize ?? kbConfig.maxSize, - processingOptions.chunkOverlap ?? kbConfig.overlap, + effectiveChunkSize, + effectiveOverlap, processingOptions.minCharactersPerChunk ?? kbConfig.minSize, kb[0].userId, - kb[0].workspaceId + kb[0].workspaceId, + kb[0].embeddingModel ) if (processed.chunks.length > LARGE_DOC_CONFIG.MAX_CHUNKS_PER_DOCUMENT) { @@ -472,7 +520,13 @@ export async function processDocumentAsync( const batchNum = Math.floor(i / batchSize) + 1 logger.info(`[${documentId}] Processing embedding batch ${batchNum}/${totalBatches}`) - const batchEmbeddings = await generateEmbeddings(batch, undefined, kb[0].workspaceId) + const batchEmbeddings = await generateEmbeddings( + batch, + kb[0].embeddingModel, + kb[0].workspaceId, + kbConfig.ollamaBaseUrl, + ollamaContextLength + ) for (const emb of batchEmbeddings) { embeddings.push(emb) } @@ -523,7 +577,7 @@ export async function processDocumentAsync( contentLength: chunk.text.length, tokenCount: Math.ceil(chunk.text.length / 4), embedding: embeddings[chunkIndex] || null, - embeddingModel: 'text-embedding-3-small', + embeddingModel: embeddingModelName, startOffset: chunk.metadata.startIndex, endOffset: chunk.metadata.endIndex, // Copy text tags from document (7 slots) @@ -551,34 +605,37 @@ export async function processDocumentAsync( updatedAt: now, })) - await db.transaction(async (tx) => { - if (embeddingRecords.length > 0) { - await tx.delete(embedding).where(eq(embedding.documentId, documentId)) - - const insertBatchSize = LARGE_DOC_CONFIG.MAX_CHUNKS_PER_BATCH - const batches: (typeof embeddingRecords)[] = [] - for (let i = 0; i < embeddingRecords.length; i += insertBatchSize) { - batches.push(embeddingRecords.slice(i, i + insertBatchSize)) - } + if (embeddingRecords.length > 0) { + logger.info(`[${documentId}] Inserting ${embeddingRecords.length} embeddings`) - logger.info(`[${documentId}] Inserting ${embeddingRecords.length} embeddings`) - for (const batch of batches) { - await tx.insert(embedding).values(batch) - } + if (embeddingProvider === 'ollama') { + // Per-KB table: delete old chunks then bulk-insert new ones + await deleteKBDocumentEmbeddings(knowledgeBaseId, documentId) + await insertKBEmbeddings(knowledgeBaseId, embeddingRecords, kb[0].embeddingDimension) + } else { + // Shared embedding table: delete + insert inside a transaction + await db.transaction(async (tx) => { + await tx.delete(embedding).where(eq(embedding.documentId, documentId)) + + const insertBatchSize = LARGE_DOC_CONFIG.MAX_CHUNKS_PER_BATCH + for (let i = 0; i < embeddingRecords.length; i += insertBatchSize) { + await tx.insert(embedding).values(embeddingRecords.slice(i, i + insertBatchSize)) + } + }) } + } - await tx - .update(document) - .set({ - chunkCount: processed.metadata.chunkCount, - tokenCount: processed.metadata.tokenCount, - characterCount: processed.metadata.characterCount, - processingStatus: 'completed', - processingCompletedAt: now, - processingError: null, - }) - .where(eq(document.id, documentId)) - }) + await db + .update(document) + .set({ + chunkCount: processed.metadata.chunkCount, + tokenCount: processed.metadata.tokenCount, + characterCount: processed.metadata.characterCount, + processingStatus: 'completed', + processingCompletedAt: now, + processingError: null, + }) + .where(eq(document.id, documentId)) })(), TIMEOUTS.OVERALL_PROCESSING, 'Document processing' From 983efc35a91667662093604308aed7a198b30119 Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:21 +0000 Subject: [PATCH 09/45] feat(knowledge): add Ollama model validation and auto-detect dimension --- apps/sim/app/api/knowledge/route.ts | 55 +++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) diff --git a/apps/sim/app/api/knowledge/route.ts b/apps/sim/app/api/knowledge/route.ts index f266d90d8da..f406a8564b0 100644 --- a/apps/sim/app/api/knowledge/route.ts +++ b/apps/sim/app/api/knowledge/route.ts @@ -5,6 +5,8 @@ import { AuditAction, AuditResourceType, recordAudit } from '@/lib/audit/log' import { getSession } from '@/lib/auth' import { PlatformEvents } from '@/lib/core/telemetry' import { generateRequestId } from '@/lib/core/utils/request' +import { createKBEmbeddingTable, parseEmbeddingModel } from '@/lib/knowledge/dynamic-tables' +import { getOllamaModelInfo } from '@/lib/knowledge/embeddings' import { createKnowledgeBase, getKnowledgeBases } from '@/lib/knowledge/service' const logger = createLogger('KnowledgeBaseAPI') @@ -21,8 +23,15 @@ const CreateKnowledgeBaseSchema = z.object({ name: z.string().min(1, 'Name is required'), description: z.string().optional(), workspaceId: z.string().min(1, 'Workspace ID is required'), - embeddingModel: z.literal('text-embedding-3-small').default('text-embedding-3-small'), - embeddingDimension: z.literal(1536).default(1536), + embeddingModel: z + .union([ + z.literal('text-embedding-3-small'), + z.literal('text-embedding-3-large'), + z.string().regex(/^ollama\/.+/, 'Ollama models must be prefixed with "ollama/"'), + ]) + .default('text-embedding-3-small'), + embeddingDimension: z.number().int().min(64).max(8192).default(1536), + ollamaBaseUrl: z.string().url('Ollama base URL must be a valid URL').optional(), chunkingConfig: z .object({ /** Maximum chunk size in tokens (1 token ≈ 4 characters) */ @@ -89,13 +98,55 @@ export async function POST(req: NextRequest) { try { const validatedData = CreateKnowledgeBaseSchema.parse(body) + const { provider, modelName } = parseEmbeddingModel(validatedData.embeddingModel) + + // For Ollama models, validate the model is available and auto-detect dimension + let effectiveDimension = validatedData.embeddingDimension + if (provider === 'ollama') { + const ollamaBaseUrl = validatedData.ollamaBaseUrl ?? 'http://localhost:11434' + try { + const modelInfo = await getOllamaModelInfo(modelName, ollamaBaseUrl) + + // Auto-correct dimension if the model reports a different one + if (modelInfo.embeddingLength && modelInfo.embeddingLength !== effectiveDimension) { + logger.info( + `[${requestId}] Auto-correcting embedding dimension from ${effectiveDimension} ` + + `to ${modelInfo.embeddingLength} (reported by Ollama model ${modelName})` + ) + effectiveDimension = modelInfo.embeddingLength + } + } catch { + return NextResponse.json( + { + error: + `Cannot reach Ollama at ${ollamaBaseUrl} or model "${modelName}" is not available. ` + + `Make sure Ollama is running and the model is pulled (ollama pull ${modelName}).`, + }, + { status: 400 } + ) + } + } + const createData = { ...validatedData, + embeddingDimension: effectiveDimension, userId: session.user.id, } const newKnowledgeBase = await createKnowledgeBase(createData, requestId) + if (provider === 'ollama') { + try { + await createKBEmbeddingTable(newKnowledgeBase.id, effectiveDimension) + } catch (tableError) { + logger.error( + `[${requestId}] Failed to create embedding table for KB ${newKnowledgeBase.id}`, + tableError + ) + throw tableError + } + } + try { PlatformEvents.knowledgeBaseCreated({ knowledgeBaseId: newKnowledgeBase.id, From 53a142339aac2b9fc4dc2e9f5605690f3ee815d3 Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:22 +0000 Subject: [PATCH 10/45] feat(knowledge): update KB detail API for Ollama support --- apps/sim/app/api/knowledge/[id]/route.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/apps/sim/app/api/knowledge/[id]/route.ts b/apps/sim/app/api/knowledge/[id]/route.ts index 7c3075a5d8b..b0e6a184572 100644 --- a/apps/sim/app/api/knowledge/[id]/route.ts +++ b/apps/sim/app/api/knowledge/[id]/route.ts @@ -5,6 +5,7 @@ import { AuditAction, AuditResourceType, recordAudit } from '@/lib/audit/log' import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' import { PlatformEvents } from '@/lib/core/telemetry' import { generateRequestId } from '@/lib/core/utils/request' +import { dropKBEmbeddingTable } from '@/lib/knowledge/dynamic-tables' import { deleteKnowledgeBase, getKnowledgeBaseById, @@ -200,6 +201,8 @@ export async function DELETE( } await deleteKnowledgeBase(id, requestId) + // Drop the per-KB embedding table if this was an Ollama KB (no-op for OpenAI KBs) + await dropKBEmbeddingTable(id) try { PlatformEvents.knowledgeBaseDeleted({ From 0b5d2183e004f824a97ac3eb37f52309cc03a97e Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:25 +0000 Subject: [PATCH 11/45] feat(knowledge): add provider routing and cross-provider score normalization --- apps/sim/app/api/knowledge/search/route.ts | 203 ++++++++++++++++----- 1 file changed, 160 insertions(+), 43 deletions(-) diff --git a/apps/sim/app/api/knowledge/search/route.ts b/apps/sim/app/api/knowledge/search/route.ts index 686f7c19cc1..809f7b0e8d8 100644 --- a/apps/sim/app/api/knowledge/search/route.ts +++ b/apps/sim/app/api/knowledge/search/route.ts @@ -1,17 +1,25 @@ +import { db } from '@sim/db' +import { knowledgeBase } from '@sim/db/schema' import { createLogger } from '@sim/logger' +import { inArray } from 'drizzle-orm' import { type NextRequest, NextResponse } from 'next/server' import { z } from 'zod' import { checkSessionOrInternalAuth } from '@/lib/auth/hybrid' import { PlatformEvents } from '@/lib/core/telemetry' import { generateRequestId } from '@/lib/core/utils/request' import { ALL_TAG_SLOTS } from '@/lib/knowledge/constants' +import { + parseEmbeddingModel, + searchKBTable, + searchKBTableTagOnly, +} from '@/lib/knowledge/dynamic-tables' +import { generateSearchEmbedding } from '@/lib/knowledge/embeddings' import { getDocumentTagDefinitions } from '@/lib/knowledge/tags/service' import { buildUndefinedTagsError, validateTagValue } from '@/lib/knowledge/tags/utils' -import type { StructuredFilter } from '@/lib/knowledge/types' +import type { ExtendedChunkingConfig, StructuredFilter } from '@/lib/knowledge/types' import { estimateTokenCount } from '@/lib/tokenization/estimators' import { authorizeWorkflowByWorkspacePermission } from '@/lib/workflows/utils' import { - generateSearchEmbedding, getDocumentNamesByIds, getQueryStrategy, handleTagAndVectorSearch, @@ -197,11 +205,6 @@ export async function POST(request: NextRequest) { const workspaceId = accessChecks.find((ac) => ac?.hasAccess)?.knowledgeBase?.workspaceId - const hasQuery = validatedData.query && validatedData.query.trim().length > 0 - const queryEmbeddingPromise = hasQuery - ? generateSearchEmbedding(validatedData.query!, undefined, workspaceId) - : Promise.resolve(null) - // Check if any requested knowledge bases were not accessible const inaccessibleKbIds = knowledgeBaseIds.filter((id) => !accessibleKbIds.includes(id)) @@ -212,46 +215,161 @@ export async function POST(request: NextRequest) { ) } - let results: SearchResult[] + // Fetch KB configs to determine provider routing + const kbConfigRows = await db + .select({ + id: knowledgeBase.id, + embeddingModel: knowledgeBase.embeddingModel, + chunkingConfig: knowledgeBase.chunkingConfig, + }) + .from(knowledgeBase) + .where(inArray(knowledgeBase.id, accessibleKbIds)) + + const kbConfigMap = new Map(kbConfigRows.map((kb) => [kb.id, kb])) + + const openaiKbIds: string[] = [] + const ollamaKbIds: string[] = [] + + for (const kbId of accessibleKbIds) { + const config = kbConfigMap.get(kbId) + if (!config) continue + const { provider } = parseEmbeddingModel(config.embeddingModel) + if (provider === 'ollama') { + ollamaKbIds.push(kbId) + } else { + openaiKbIds.push(kbId) + } + } + const hasQuery = validatedData.query && validatedData.query.trim().length > 0 const hasFilters = structuredFilters && structuredFilters.length > 0 - if (!hasQuery && hasFilters) { - // Tag-only search without vector similarity - results = await handleTagOnlySearch({ - knowledgeBaseIds: accessibleKbIds, - topK: validatedData.topK, - structuredFilters, - }) - } else if (hasQuery && hasFilters) { - // Tag + Vector search - logger.debug( - `[${requestId}] Executing tag + vector search with filters:`, - structuredFilters + // Generate OpenAI search embedding + let openaiQueryVector: string | null = null + if (hasQuery && openaiKbIds.length > 0) { + const emb = await generateSearchEmbedding(validatedData.query!, undefined, workspaceId) + openaiQueryVector = JSON.stringify(emb) + } + + // Generate Ollama search embeddings — one per unique (model, url) pair + const ollamaQueryVectors = new Map() + if (hasQuery && ollamaKbIds.length > 0) { + const uniquePairs = new Map() + for (const kbId of ollamaKbIds) { + const config = kbConfigMap.get(kbId)! + const cfg = config.chunkingConfig as ExtendedChunkingConfig + const { modelName } = parseEmbeddingModel(config.embeddingModel) + const baseUrl = cfg.ollamaBaseUrl ?? 'http://localhost:11434' + uniquePairs.set(`${modelName}:${baseUrl}`, { modelName, ollamaBaseUrl: baseUrl }) + } + await Promise.all( + Array.from(uniquePairs.entries()).map(async ([pairKey, { modelName, ollamaBaseUrl }]) => { + const emb = await generateSearchEmbedding( + validatedData.query!, + `ollama/${modelName}`, + undefined, + ollamaBaseUrl + ) + ollamaQueryVectors.set(pairKey, JSON.stringify(emb)) + }) ) - const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK) - const queryVector = JSON.stringify(await queryEmbeddingPromise) + } - results = await handleTagAndVectorSearch({ - knowledgeBaseIds: accessibleKbIds, - topK: validatedData.topK, - structuredFilters, - queryVector, - distanceThreshold: strategy.distanceThreshold, - }) - } else if (hasQuery && !hasFilters) { - // Vector-only search - const strategy = getQueryStrategy(accessibleKbIds.length, validatedData.topK) - const queryVector = JSON.stringify(await queryEmbeddingPromise) + const allResults: SearchResult[] = [] - results = await handleVectorOnlySearch({ - knowledgeBaseIds: accessibleKbIds, - topK: validatedData.topK, - queryVector, - distanceThreshold: strategy.distanceThreshold, - }) + // OpenAI KBs — existing search handlers + if (openaiKbIds.length > 0) { + const strategy = getQueryStrategy(openaiKbIds.length, validatedData.topK) + + if (!hasQuery && hasFilters) { + allResults.push( + ...(await handleTagOnlySearch({ + knowledgeBaseIds: openaiKbIds, + topK: validatedData.topK, + structuredFilters, + })) + ) + } else if (hasQuery && hasFilters && openaiQueryVector) { + logger.debug( + `[${requestId}] Executing tag + vector search with filters:`, + structuredFilters + ) + allResults.push( + ...(await handleTagAndVectorSearch({ + knowledgeBaseIds: openaiKbIds, + topK: validatedData.topK, + structuredFilters, + queryVector: openaiQueryVector, + distanceThreshold: strategy.distanceThreshold, + })) + ) + } else if (hasQuery && openaiQueryVector) { + allResults.push( + ...(await handleVectorOnlySearch({ + knowledgeBaseIds: openaiKbIds, + topK: validatedData.topK, + queryVector: openaiQueryVector, + distanceThreshold: strategy.distanceThreshold, + })) + ) + } + } + + // Ollama KBs — per-KB table search + for (const kbId of ollamaKbIds) { + const config = kbConfigMap.get(kbId)! + const cfg = config.chunkingConfig as ExtendedChunkingConfig + const { modelName } = parseEmbeddingModel(config.embeddingModel) + const baseUrl = cfg.ollamaBaseUrl ?? 'http://localhost:11434' + const pairKey = `${modelName}:${baseUrl}` + const strategy = getQueryStrategy(1, validatedData.topK) + + if (!hasQuery && hasFilters) { + allResults.push( + ...(await searchKBTableTagOnly(kbId, validatedData.topK, structuredFilters)) + ) + } else if (hasQuery) { + const queryVector = ollamaQueryVectors.get(pairKey) + if (queryVector) { + allResults.push( + ...(await searchKBTable( + kbId, + queryVector, + validatedData.topK, + strategy.distanceThreshold, + hasFilters ? structuredFilters : undefined + )) + ) + } + } + } + + // Merge and re-rank when results come from multiple providers. + // Distance scores from different embedding spaces are not directly comparable, + // so normalize each provider's scores to 0-1 range before merging. + let results: SearchResult[] + if (openaiKbIds.length > 0 && ollamaKbIds.length > 0) { + const normalizeScores = (items: SearchResult[]): SearchResult[] => { + if (items.length === 0) return items + const min = Math.min(...items.map((r) => r.distance)) + const max = Math.max(...items.map((r) => r.distance)) + const range = max - min || 1 + return items.map((r) => ({ ...r, distance: (r.distance - min) / range })) + } + const openaiResults = normalizeScores( + allResults.filter((r) => openaiKbIds.includes(r.knowledgeBaseId)) + ) + const ollamaResults = normalizeScores( + allResults.filter((r) => ollamaKbIds.includes(r.knowledgeBaseId)) + ) + results = [...openaiResults, ...ollamaResults] + .sort((a, b) => a.distance - b.distance) + .slice(0, validatedData.topK) } else { - // This should never happen due to schema validation, but just in case + results = allResults + } + + if (!hasQuery && !hasFilters) { return NextResponse.json( { error: @@ -261,10 +379,10 @@ export async function POST(request: NextRequest) { ) } - // Calculate cost for the embedding (with fallback if calculation fails) + // Calculate cost — only for OpenAI embedding calls let cost = null let tokenCount = null - if (hasQuery) { + if (hasQuery && openaiKbIds.length > 0) { try { tokenCount = estimateTokenCount(validatedData.query!, 'openai') cost = calculateCost('text-embedding-3-small', tokenCount.count, 0, false) @@ -272,7 +390,6 @@ export async function POST(request: NextRequest) { logger.warn(`[${requestId}] Failed to calculate cost for search query`, { error: error instanceof Error ? error.message : 'Unknown error', }) - // Continue without cost information rather than failing the search } } From 606b70b9e27d4425dbfa4fb573a7cae02f5b2f93 Mon Sep 17 00:00:00 2001 From: teedonk Date: Sun, 22 Mar 2026 23:20:26 +0000 Subject: [PATCH 12/45] feat(knowledge): add Ollama provider selection UI --- .../create-base-modal/create-base-modal.tsx | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-base-modal/create-base-modal.tsx b/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-base-modal/create-base-modal.tsx index 70419c82119..2e103639023 100644 --- a/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-base-modal/create-base-modal.tsx +++ b/apps/sim/app/workspace/[workspaceId]/knowledge/components/create-base-modal/create-base-modal.tsx @@ -24,6 +24,17 @@ import { ACCEPT_ATTRIBUTE } from '@/lib/uploads/utils/validation' import { useKnowledgeUpload } from '@/app/workspace/[workspaceId]/knowledge/hooks/use-knowledge-upload' import { useCreateKnowledgeBase, useDeleteKnowledgeBase } from '@/hooks/queries/knowledge' +type EmbeddingProvider = 'openai' | 'ollama' + +const OLLAMA_PRESET_MODELS: { label: string; value: string; dimension: number }[] = [ + { label: 'nomic-embed-text (768d)', value: 'nomic-embed-text', dimension: 768 }, + { label: 'mxbai-embed-large (1024d)', value: 'mxbai-embed-large', dimension: 1024 }, + { label: 'all-minilm (384d)', value: 'all-minilm', dimension: 384 }, + { label: 'snowflake-arctic-embed (1024d)', value: 'snowflake-arctic-embed', dimension: 1024 }, + { label: 'bge-m3 (1024d)', value: 'bge-m3', dimension: 1024 }, + { label: 'Custom…', value: 'custom', dimension: 0 }, +] + const logger = createLogger('CreateBaseModal') interface FileWithPreview extends File { @@ -93,6 +104,12 @@ export function CreateBaseModal({ open, onOpenChange }: CreateBaseModalProps) { const [dragCounter, setDragCounter] = useState(0) const [retryingIndexes, setRetryingIndexes] = useState>(new Set()) + const [embeddingProvider, setEmbeddingProvider] = useState('openai') + const [ollamaBaseUrl, setOllamaBaseUrl] = useState('http://localhost:11434') + const [ollamaPreset, setOllamaPreset] = useState(OLLAMA_PRESET_MODELS[0].value) + const [ollamaCustomModel, setOllamaCustomModel] = useState('') + const [ollamaDimension, setOllamaDimension] = useState(768) + const scrollContainerRef = useRef(null) const { uploadFiles, isUploading, uploadProgress, uploadError, clearError } = useKnowledgeUpload({ @@ -144,6 +161,11 @@ export function CreateBaseModal({ open, onOpenChange }: CreateBaseModalProps) { setIsDragging(false) setDragCounter(0) setRetryingIndexes(new Set()) + setEmbeddingProvider('openai') + setOllamaBaseUrl('http://localhost:11434') + setOllamaPreset(OLLAMA_PRESET_MODELS[0].value) + setOllamaCustomModel('') + setOllamaDimension(768) reset({ name: '', description: '', @@ -251,11 +273,29 @@ export function CreateBaseModal({ open, onOpenChange }: CreateBaseModalProps) { const onSubmit = async (data: FormValues) => { setSubmitStatus(null) + let embeddingModel = 'text-embedding-3-small' + let embeddingDimension = 1536 + let ollamaBaseUrlValue: string | undefined + + if (embeddingProvider === 'ollama') { + const modelName = ollamaPreset === 'custom' ? ollamaCustomModel.trim() : ollamaPreset + if (!modelName) { + setSubmitStatus({ type: 'error', message: 'Please enter an Ollama model name.' }) + return + } + embeddingModel = `ollama/${modelName}` + embeddingDimension = ollamaDimension + ollamaBaseUrlValue = ollamaBaseUrl.trim() || 'http://localhost:11434' + } + try { const newKnowledgeBase = await createKnowledgeBaseMutation.mutateAsync({ name: data.name, description: data.description || undefined, workspaceId: workspaceId, + embeddingModel, + embeddingDimension, + ollamaBaseUrl: ollamaBaseUrlValue, chunkingConfig: { maxSize: data.maxChunkSize, minSize: data.minChunkSize, @@ -394,6 +434,100 @@ export function CreateBaseModal({ open, onOpenChange }: CreateBaseModalProps) {

+
+ +
+ {(['openai', 'ollama'] as EmbeddingProvider[]).map((p) => ( + + ))} +
+
+ + {embeddingProvider === 'ollama' && ( + <> +
+ + setOllamaBaseUrl(e.target.value)} + autoComplete='off' + data-form-type='other' + /> +
+ +
+ + +
+ + {ollamaPreset === 'custom' && ( +
+ + setOllamaCustomModel(e.target.value)} + autoComplete='off' + data-form-type='other' + /> +
+ )} + + {ollamaPreset === 'custom' && ( +
+ + setOllamaDimension(Number(e.target.value))} + autoComplete='off' + data-form-type='other' + /> +

+ Must match the model's output dimension exactly. +

+
+ )} + + )} +