|
|
@@ -0,0 +1,327 @@
|
|
|
+import Database from "better-sqlite3"
|
|
|
+import {
|
|
|
+ afterAll,
|
|
|
+ beforeAll,
|
|
|
+ beforeEach,
|
|
|
+ describe,
|
|
|
+ expect,
|
|
|
+ it,
|
|
|
+ vi,
|
|
|
+} from "vitest"
|
|
|
+
|
|
|
+// Mock @enesis/editor — same pattern as indexer.test.ts
|
|
|
+let blockIdSeed = 0
|
|
|
+vi.mock("@enesis/editor", () => ({
|
|
|
+ splitMarkdownIntoBlocks: (content: string) =>
|
|
|
+ content
|
|
|
+ .split("\n")
|
|
|
+ .filter(Boolean)
|
|
|
+ .map((line) => ({
|
|
|
+ id: `test-block-${++blockIdSeed}`,
|
|
|
+ content: line,
|
|
|
+ depth: 0,
|
|
|
+ })),
|
|
|
+}))
|
|
|
+
|
|
|
+vi.mock("@tauri-apps/plugin-sql", () => {
|
|
|
+ function translate(sql: string): string {
|
|
|
+ return sql.replace(/\$\d+/g, "?")
|
|
|
+ }
|
|
|
+
|
|
|
+ class MockDatabase {
|
|
|
+ private db: Database.Database
|
|
|
+
|
|
|
+ private constructor(db: Database.Database) {
|
|
|
+ this.db = db
|
|
|
+ }
|
|
|
+
|
|
|
+ static async load(_connectionString: string): Promise<MockDatabase> {
|
|
|
+ const { default: BS3 } = await import("better-sqlite3")
|
|
|
+ const db = new BS3(":memory:")
|
|
|
+ db.pragma("foreign_keys = ON")
|
|
|
+ return new MockDatabase(db)
|
|
|
+ }
|
|
|
+
|
|
|
+ async execute(
|
|
|
+ sql: string,
|
|
|
+ params?: unknown[],
|
|
|
+ ): Promise<{ rowsAffected: number }> {
|
|
|
+ if (params) {
|
|
|
+ const stmt = this.db.prepare(translate(sql))
|
|
|
+ const result = stmt.run(...params)
|
|
|
+ return { rowsAffected: result.changes }
|
|
|
+ }
|
|
|
+ const result = this.db.exec(sql)
|
|
|
+ return { rowsAffected: result.changes ?? 0 }
|
|
|
+ }
|
|
|
+
|
|
|
+ async select<T>(sql: string, params?: unknown[]): Promise<T[]> {
|
|
|
+ const stmt = this.db.prepare(translate(sql))
|
|
|
+ return (params ? stmt.all(...params) : stmt.all()) as T[]
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return { default: MockDatabase }
|
|
|
+})
|
|
|
+
|
|
|
+import { fullReindex, initIndex, syncIndex } from "./indexer"
|
|
|
+import { SCHEMA_SQL } from "./schema"
|
|
|
+
|
|
|
+let db: Awaited<ReturnType<typeof initIndex>>
|
|
|
+let raw: Database.Database
|
|
|
+
|
|
|
+async function dumpDB(db: typeof db) {
|
|
|
+ console.log("=== pages ===")
|
|
|
+ console.log(await db.select("SELECT rowid, * FROM pages"))
|
|
|
+ console.log("=== blocks ===")
|
|
|
+ console.log(await db.select("SELECT rowid, * FROM blocks"))
|
|
|
+ console.log("=== blocks_fts ===")
|
|
|
+ console.log(await db.select("SELECT rowid, * FROM blocks_fts"))
|
|
|
+}
|
|
|
+
|
|
|
+beforeAll(() => {
|
|
|
+ raw = new Database(":memory:")
|
|
|
+ raw.exec(SCHEMA_SQL)
|
|
|
+})
|
|
|
+
|
|
|
+afterAll(() => {
|
|
|
+ vi.restoreAllMocks()
|
|
|
+ raw.close()
|
|
|
+})
|
|
|
+
|
|
|
+// Each test gets a fresh database to avoid FTS rowid mismatch across tests
|
|
|
+beforeEach(async () => {
|
|
|
+ db = await initIndex(":memory:")
|
|
|
+})
|
|
|
+
|
|
|
+function opts(files: Record<string, string>, workspacePath = "/ws") {
|
|
|
+ return {
|
|
|
+ listDirectory: vi.fn(async () =>
|
|
|
+ Object.keys(files).map((f) => `${workspacePath}/${f}`),
|
|
|
+ ),
|
|
|
+ readFile: vi.fn(async (path: string) => {
|
|
|
+ const relative = path.replace(`${workspacePath}/`, "")
|
|
|
+ if (!(relative in files)) throw new Error(`ENOENT: ${path}`)
|
|
|
+ return files[relative]
|
|
|
+ }),
|
|
|
+ workspacePath,
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+describe("schema — embedding columns", () => {
|
|
|
+ it("blocks table has embedding BLOB column", () => {
|
|
|
+ const cols = raw
|
|
|
+ .prepare("SELECT name, type FROM pragma_table_info('blocks') WHERE name = 'embedding'")
|
|
|
+ .get() as { name: string; type: string } | undefined
|
|
|
+ expect(cols?.name).toBe("embedding")
|
|
|
+ expect(cols?.type).toMatch(/BLOB/i)
|
|
|
+ })
|
|
|
+
|
|
|
+ it("blocks table has embedding_hash column with default ''", () => {
|
|
|
+ const cols = raw
|
|
|
+ .prepare("SELECT name, dflt_value FROM pragma_table_info('blocks') WHERE name = 'embedding_hash'")
|
|
|
+ .get() as { name: string; dflt_value: string } | undefined
|
|
|
+ expect(cols?.name).toBe("embedding_hash")
|
|
|
+ expect(cols?.dflt_value).toBe("''")
|
|
|
+ })
|
|
|
+})
|
|
|
+
|
|
|
+describe("FTS5 searchability", () => {
|
|
|
+ it("indexes content and returns results via blocks_fts MATCH", async () => {
|
|
|
+ const o = opts({
|
|
|
+ "pages/Rust.md": [
|
|
|
+ "# Rust Programming",
|
|
|
+ "Rust is a systems programming language.",
|
|
|
+ "It focuses on safety and performance.",
|
|
|
+ "The borrow checker ensures memory safety.",
|
|
|
+ ].join("\n"),
|
|
|
+ })
|
|
|
+ await syncIndex(o, db)
|
|
|
+
|
|
|
+ // Query FTS5 directly — same query pattern the search composable uses
|
|
|
+ const results = await db.select<{ block_id: string; snippet: string }[]>(
|
|
|
+ `SELECT b.id as block_id, b.content as snippet
|
|
|
+ FROM blocks_fts f
|
|
|
+ JOIN blocks b ON b.rowid = f.rowid
|
|
|
+ WHERE blocks_fts MATCH ?
|
|
|
+ ORDER BY rank
|
|
|
+ LIMIT 20`,
|
|
|
+ ['"rust"*'],
|
|
|
+ )
|
|
|
+
|
|
|
+ expect(results.length).toBeGreaterThan(0)
|
|
|
+
|
|
|
+ // Each result should have a block_id and content
|
|
|
+ const content = results.map((r) => r.snippet.toLowerCase())
|
|
|
+ const hasRustMatch = content.some((c) => c.includes("rust"))
|
|
|
+ expect(hasRustMatch).toBe(true)
|
|
|
+ })
|
|
|
+
|
|
|
+ it("returns no results for unmatched queries", async () => {
|
|
|
+ const o = opts({
|
|
|
+ "pages/Cooking.md": "Boil water and add pasta.",
|
|
|
+ })
|
|
|
+ await syncIndex(o, db)
|
|
|
+
|
|
|
+ const results = await db.select<{ block_id: string }[]>(
|
|
|
+ "SELECT b.id as block_id FROM blocks_fts f JOIN blocks b ON b.rowid = f.rowid WHERE blocks_fts MATCH ? LIMIT 20",
|
|
|
+ ['"quantum"*'],
|
|
|
+ )
|
|
|
+
|
|
|
+ expect(results).toHaveLength(0)
|
|
|
+ })
|
|
|
+
|
|
|
+ it("indexes multiple files without ID collision", async () => {
|
|
|
+ const o = opts({
|
|
|
+ "pages/Fish.md": "Salmon is a popular fish.",
|
|
|
+ "pages/Birds.md": "Eagles are birds of prey.",
|
|
|
+ })
|
|
|
+ await syncIndex(o, db)
|
|
|
+
|
|
|
+ const pages = await db.select<{ path: string }[]>(
|
|
|
+ "SELECT path FROM pages ORDER BY path",
|
|
|
+ )
|
|
|
+ expect(pages).toHaveLength(2)
|
|
|
+ expect(pages[0].path).toBe("pages/Birds.md")
|
|
|
+ expect(pages[1].path).toBe("pages/Fish.md")
|
|
|
+
|
|
|
+ const blocks = await db.select<{ page_path: string; content: string }[]>(
|
|
|
+ "SELECT page_path, content FROM blocks ORDER BY page_path",
|
|
|
+ )
|
|
|
+ expect(blocks).toHaveLength(2)
|
|
|
+ expect(blocks[0].content).toBe("Eagles are birds of prey.")
|
|
|
+ expect(blocks[1].content).toBe("Salmon is a popular fish.")
|
|
|
+ })
|
|
|
+
|
|
|
+ it("survives re-index (unchanged blocks keep same searchability)", async () => {
|
|
|
+ const o = opts({
|
|
|
+ "pages/Test.md": "Content that should be searchable.",
|
|
|
+ })
|
|
|
+ await syncIndex(o, db)
|
|
|
+ await syncIndex(o, db) // re-index unchanged
|
|
|
+
|
|
|
+ const results = await db.select<{ block_id: string }[]>(
|
|
|
+ "SELECT b.id as block_id FROM blocks_fts f JOIN blocks b ON b.rowid = f.rowid WHERE blocks_fts MATCH ? LIMIT 20",
|
|
|
+ ['"searchable"*'],
|
|
|
+ )
|
|
|
+
|
|
|
+ expect(results.length).toBeGreaterThan(0)
|
|
|
+ })
|
|
|
+})
|
|
|
+
|
|
|
+describe("FTS5 prefix matching", () => {
|
|
|
+ it("finds partial word matches via prefix syntax", async () => {
|
|
|
+ const o = opts({
|
|
|
+ "pages/Dev.md": "Development environment setup guide.",
|
|
|
+ })
|
|
|
+ await syncIndex(o, db)
|
|
|
+
|
|
|
+ // Prefix match "devel"* should find "Development"
|
|
|
+ const results = await db.select<{ block_id: string }[]>(
|
|
|
+ "SELECT b.id as block_id FROM blocks_fts f JOIN blocks b ON b.rowid = f.rowid WHERE blocks_fts MATCH ? LIMIT 20",
|
|
|
+ ['"devel"*'],
|
|
|
+ )
|
|
|
+
|
|
|
+ expect(results.length).toBeGreaterThan(0)
|
|
|
+ })
|
|
|
+
|
|
|
+ it("matches multiple terms combined", async () => {
|
|
|
+ const o = opts({
|
|
|
+ "pages/Full.md": "The quick brown fox jumps over the lazy dog.",
|
|
|
+ })
|
|
|
+ await syncIndex(o, db)
|
|
|
+
|
|
|
+ const results = await db.select<{ block_id: string }[]>(
|
|
|
+ "SELECT b.id as block_id FROM blocks_fts f JOIN blocks b ON b.rowid = f.rowid WHERE blocks_fts MATCH ? LIMIT 20",
|
|
|
+ ['"quick"* "fox"*'],
|
|
|
+ )
|
|
|
+
|
|
|
+ expect(results.length).toBeGreaterThan(0)
|
|
|
+ })
|
|
|
+})
|
|
|
+
|
|
|
+describe("buildExcerpt", () => {
|
|
|
+ function buildExcerpt(content: string, query: string): string {
|
|
|
+ const lowerContent = content.toLowerCase()
|
|
|
+ const terms = query.split(/\s+/)
|
|
|
+ const pos = terms
|
|
|
+ .map((t) => lowerContent.indexOf(t.toLowerCase()))
|
|
|
+ .filter((p) => p >= 0)
|
|
|
+ .sort((a, b) => a - b)[0]
|
|
|
+
|
|
|
+ if (pos === undefined) return content.slice(0, 120) + (content.length > 120 ? "…" : "")
|
|
|
+
|
|
|
+ const start = Math.max(0, pos - 60)
|
|
|
+ const end = Math.min(content.length, pos + 60)
|
|
|
+ const excerpt = content.slice(start, end)
|
|
|
+
|
|
|
+ let result = ""
|
|
|
+ if (start > 0) result += "…"
|
|
|
+ result += excerpt
|
|
|
+ if (end < content.length) result += "…"
|
|
|
+ return result
|
|
|
+ }
|
|
|
+
|
|
|
+ it("returns full content when shorter than 120 chars and no match", () => {
|
|
|
+ expect(buildExcerpt("Short text", "nonexistent")).toBe("Short text")
|
|
|
+ })
|
|
|
+
|
|
|
+ it("returns truncated content when longer than 120 chars and no match", () => {
|
|
|
+ const long = "a".repeat(200)
|
|
|
+ const result = buildExcerpt(long, "nonexistent")
|
|
|
+ expect(result).toHaveLength(121)
|
|
|
+ expect(result.endsWith("…")).toBe(true)
|
|
|
+ })
|
|
|
+
|
|
|
+ it("centers excerpt around matched term", () => {
|
|
|
+ const content = "This is a very long text ".repeat(20) + "Rust is great. " + "More trailing text ".repeat(20)
|
|
|
+ const result = buildExcerpt(content, "Rust")
|
|
|
+ expect(result).toContain("Rust")
|
|
|
+ expect(result.length).toBeLessThan(content.length)
|
|
|
+ expect(result.startsWith("…")).toBe(true)
|
|
|
+ expect(result.endsWith("…")).toBe(true)
|
|
|
+ })
|
|
|
+
|
|
|
+ it("handles content shorter than window", () => {
|
|
|
+ const content = "Rust programming language"
|
|
|
+ const result = buildExcerpt(content, "Rust")
|
|
|
+ expect(result).toBe("Rust programming language")
|
|
|
+ })
|
|
|
+
|
|
|
+ it("matches case-insensitively", () => {
|
|
|
+ const content = "The RUST language is fast."
|
|
|
+ const result = buildExcerpt(content, "rust")
|
|
|
+ expect(result).toContain("RUST")
|
|
|
+ })
|
|
|
+
|
|
|
+ it("handles multi-word queries", () => {
|
|
|
+ const content = "Learning Rust programming for systems development."
|
|
|
+ const result = buildExcerpt(content, "Rust programming")
|
|
|
+ expect(result).toContain("Rust")
|
|
|
+ })
|
|
|
+
|
|
|
+ it("handles empty query", () => {
|
|
|
+ const result = buildExcerpt("Some content", "")
|
|
|
+ expect(result).toBe("Some content")
|
|
|
+ })
|
|
|
+})
|
|
|
+
|
|
|
+describe("embedding encode/decode round-trip", () => {
|
|
|
+ it("Float32Array round-trips through Uint8Array BLOB", () => {
|
|
|
+ const original = new Float32Array([0.1, 0.2, 0.3, 0.0, -0.5, 1.0])
|
|
|
+ const blob = new Uint8Array(original.buffer)
|
|
|
+ const decoded = new Float32Array(blob.buffer)
|
|
|
+
|
|
|
+ expect(decoded.length).toBe(original.length)
|
|
|
+ for (let i = 0; i < original.length; i++) {
|
|
|
+ expect(decoded[i]).toBeCloseTo(original[i], 5)
|
|
|
+ }
|
|
|
+ })
|
|
|
+
|
|
|
+ it("produces correct BLOB size for 384-dim vector", () => {
|
|
|
+ const vec = new Float32Array(384)
|
|
|
+ const blob = new Uint8Array(vec.buffer)
|
|
|
+ // 384 floats × 4 bytes each = 1536 bytes
|
|
|
+ expect(blob.byteLength).toBe(1536)
|
|
|
+ })
|
|
|
+})
|