|
|
@@ -14,10 +14,11 @@
|
|
|
import { splitMarkdownIntoBlocks } from "@enesis/editor"
|
|
|
import Database from "@tauri-apps/plugin-sql"
|
|
|
import {
|
|
|
+ computePageTitle,
|
|
|
contentHash,
|
|
|
detectBlockType,
|
|
|
extractLinks,
|
|
|
- extractTitle,
|
|
|
+ toRelativePath,
|
|
|
} from "./parse"
|
|
|
import { SCHEMA_SQL } from "./schema"
|
|
|
|
|
|
@@ -32,15 +33,33 @@ export interface IndexerOptions {
|
|
|
|
|
|
/**
|
|
|
* Initialize the SQLite index at the given path.
|
|
|
- * Creates the database file if it doesn't exist and runs CREATE TABLE IF NOT EXISTS.
|
|
|
+ * Creates the database file if it doesn't exist, runs CREATE TABLE IF NOT EXISTS,
|
|
|
+ * and applies any schema migrations needed for existing databases.
|
|
|
* Tables, indices, FTS5 virtual table, and sync triggers are all idempotent.
|
|
|
*/
|
|
|
export async function initIndex(dbPath: string): Promise<Database> {
|
|
|
const db = await Database.load(`sqlite:${dbPath}`)
|
|
|
await db.execute(SCHEMA_SQL)
|
|
|
+ await migrateContentHash(db)
|
|
|
return db
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * Add the `content_hash` column to `pages` if it doesn't exist yet.
|
|
|
+ * SQLite doesn't support `ADD COLUMN IF NOT EXISTS`, so we query PRAGMA
|
|
|
+ * table_info first and only ALTER when the column is absent.
|
|
|
+ */
|
|
|
+async function migrateContentHash(db: Database): Promise<void> {
|
|
|
+ const cols = await db.select<{ name: string }[]>(
|
|
|
+ "SELECT name FROM pragma_table_info('pages')",
|
|
|
+ )
|
|
|
+ if (!cols.some((c) => c.name === "content_hash")) {
|
|
|
+ await db.execute(
|
|
|
+ "ALTER TABLE pages ADD COLUMN content_hash TEXT NOT NULL DEFAULT ''",
|
|
|
+ )
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* Index a single markdown file into the database.
|
|
|
*
|
|
|
@@ -49,6 +68,30 @@ export async function initIndex(dbPath: string): Promise<Database> {
|
|
|
* - Upserts changed or new blocks
|
|
|
* - Re-indexes links (delete-all + re-insert within each changed block)
|
|
|
* - Removes blocks that no longer exist in the file
|
|
|
+ *
|
|
|
+ * The page **title** is always non-empty: `extractTitle` returns the first
|
|
|
+ * `# Heading 1` in the content, falling back to the filename stem (without
|
|
|
+ * `.md` extension) when there's no heading. This guarantees downstream queries
|
|
|
+ * like `WHERE title = 'New Page'` from `[[New Page]]` links resolve correctly
|
|
|
+ * regardless of whether the page has a heading yet.
|
|
|
+ *
|
|
|
+ * @param db — Open SQLite database connection
|
|
|
+ * @param filePath — Path relative to workspace root, e.g. `"pages/Architecture.md"`
|
|
|
+ * @param content — Raw markdown file content
|
|
|
+ *
|
|
|
+ * @remarks Edge cases & deferred work:
|
|
|
+ * - **Existing workspaces** that have pages with empty titles (indexed before
|
|
|
+ * the `computePageTitle` fallback was added) won't get fixed until the
|
|
|
+ * next `fullReindex` (app restart) or incremental re-index (file change).
|
|
|
+ * A one-time migration (`UPDATE pages SET title = filename_stem WHERE
|
|
|
+ * title = ''`) would fix this proactively.
|
|
|
+ * - `modified_at` is set to `Date.now() / 1000` at index time, NOT the file's
|
|
|
+ * mtime from `fs::metadata`. This means the same file indexed twice in
|
|
|
+ * quick succession gets the same timestamp, making `ORDER BY modified_at`
|
|
|
+ * unreliable for ordering blocks by actual edit time.
|
|
|
+ * - **Block ID stamping** (`has_id_stamp`) is tracked but stamp creation
|
|
|
+ * (writing `id:: <nanoid>` to the file on reference creation) is deferred
|
|
|
+ * to D2. The indexer is read-only and never modifies files.
|
|
|
*/
|
|
|
export async function indexFile(
|
|
|
db: Database,
|
|
|
@@ -56,17 +99,18 @@ export async function indexFile(
|
|
|
content: string,
|
|
|
): Promise<void> {
|
|
|
const now = Math.floor(Date.now() / 1000)
|
|
|
- const _hash = contentHash(content)
|
|
|
- const title = extractTitle(content)
|
|
|
+ const hash = contentHash(content)
|
|
|
+ const title = computePageTitle(content, filePath)
|
|
|
|
|
|
await db.execute(
|
|
|
- `INSERT INTO pages (path, title, modified_at, word_count)
|
|
|
- VALUES ($1, $2, $3, $4)
|
|
|
+ `INSERT INTO pages (path, title, modified_at, word_count, content_hash)
|
|
|
+ VALUES ($1, $2, $3, $4, $5)
|
|
|
ON CONFLICT(path) DO UPDATE SET
|
|
|
title = excluded.title,
|
|
|
modified_at = excluded.modified_at,
|
|
|
- word_count = excluded.word_count`,
|
|
|
- [filePath, title, now, content.split(/\s+/).length],
|
|
|
+ word_count = excluded.word_count,
|
|
|
+ content_hash = excluded.content_hash`,
|
|
|
+ [filePath, title, now, content.split(/\s+/).length, hash],
|
|
|
)
|
|
|
|
|
|
const existing = await db.select<{ id: string; content_hash: string }[]>(
|
|
|
@@ -139,33 +183,87 @@ export async function removeFile(
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * Rebuild the entire index by walking every .md file in the workspace.
|
|
|
- * Compares existing indexed pages against the filesystem and:
|
|
|
- * - Indexes new or changed files
|
|
|
- * - Removes pages that no longer exist
|
|
|
- * - Skips files that fail to read (logs warning, continues)
|
|
|
+ * Rebuild the entire index from scratch.
|
|
|
+ * Deletes all existing index data (FK cascade handles blocks, links, FTS),
|
|
|
+ * then re-indexes every markdown file found on disk.
|
|
|
+ * Skips files that fail to read (logs warning, continues).
|
|
|
*/
|
|
|
export async function fullReindex(
|
|
|
opts: IndexerOptions,
|
|
|
db: Database,
|
|
|
): Promise<void> {
|
|
|
+ // Wipe clean — FK cascade handles blocks, links, and FTS
|
|
|
+ await db.execute("DELETE FROM pages")
|
|
|
+
|
|
|
const allFiles = await opts.listDirectory(opts.workspacePath)
|
|
|
const mdFiles = allFiles.filter((f) => f.endsWith(".md"))
|
|
|
|
|
|
- const indexed = await db.select<{ path: string }[]>("SELECT path FROM pages")
|
|
|
- const indexedSet = new Set(indexed.map((p) => p.path))
|
|
|
+ for (const absolutePath of mdFiles) {
|
|
|
+ try {
|
|
|
+ const content = await opts.readFile(absolutePath)
|
|
|
+
|
|
|
+ // Normalize to workspace-relative path for the index.
|
|
|
+ // Storing relative paths means backlink source_page_path can be
|
|
|
+ // passed directly to the router for navigation.
|
|
|
+ const relativePath = toRelativePath(absolutePath, opts.workspacePath)
|
|
|
+
|
|
|
+ await indexFile(db, relativePath, content)
|
|
|
+ } catch (e) {
|
|
|
+ console.warn(`[indexer] failed to index ${absolutePath}:`, e)
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * Incrementally sync the index with the filesystem on app startup.
|
|
|
+ * Skips parsing and SQL writes for files whose content hash hasn't changed.
|
|
|
+ *
|
|
|
+ * Flow for each file on disk:
|
|
|
+ * - **New** (not in DB) — read, hash, full indexFile
|
|
|
+ * - **Changed** (in DB, hash differs) — read, hash, full indexFile
|
|
|
+ * - **Unchanged** (in DB, hash matches) — skip parsing and SQL writes
|
|
|
+ * - **Deleted** (in DB, not on disk) — remove from index (FK cascade)
|
|
|
+ *
|
|
|
+ * @remarks The file watcher starts before this sync completes. If a watched
|
|
|
+ * file changes mid-sync, the watcher fires `indexFile` on that path
|
|
|
+ * concurrently. Worst case: a stale index entry that corrects on the next
|
|
|
+ * change. Low probability, acceptable for startup.
|
|
|
+ */
|
|
|
+export async function syncIndex(
|
|
|
+ opts: IndexerOptions,
|
|
|
+ db: Database,
|
|
|
+): Promise<void> {
|
|
|
+ const allFiles = await opts.listDirectory(opts.workspacePath)
|
|
|
+ const mdFiles = new Set(allFiles.filter((f) => f.endsWith(".md")))
|
|
|
+ const relativePaths = new Set<string>()
|
|
|
+
|
|
|
+ for (const absolutePath of mdFiles) {
|
|
|
+ const relativePath = toRelativePath(absolutePath, opts.workspacePath)
|
|
|
+ relativePaths.add(relativePath)
|
|
|
|
|
|
- for (const filePath of mdFiles) {
|
|
|
try {
|
|
|
- const content = await opts.readFile(filePath)
|
|
|
- await indexFile(db, filePath, content)
|
|
|
+ const content = await opts.readFile(absolutePath)
|
|
|
+ const hash = contentHash(content)
|
|
|
+
|
|
|
+ const rows = await db.select<{ content_hash: string }[]>(
|
|
|
+ "SELECT content_hash FROM pages WHERE path = $1",
|
|
|
+ [relativePath],
|
|
|
+ )
|
|
|
+
|
|
|
+ // Skip unchanged files — no parsing, no SQL writes
|
|
|
+ if (rows.length > 0 && rows[0].content_hash === hash) continue
|
|
|
+
|
|
|
+ await indexFile(db, relativePath, content)
|
|
|
} catch (e) {
|
|
|
- console.warn(`[indexer] failed to index ${filePath}:`, e)
|
|
|
+ console.warn(`[indexer] failed to sync ${absolutePath}:`, e)
|
|
|
}
|
|
|
- indexedSet.delete(filePath)
|
|
|
}
|
|
|
|
|
|
- for (const stalePath of indexedSet) {
|
|
|
- await removeFile(db, stalePath)
|
|
|
+ // Remove files that no longer exist on disk
|
|
|
+ const indexed = await db.select<{ path: string }[]>("SELECT path FROM pages")
|
|
|
+ for (const row of indexed) {
|
|
|
+ if (!relativePaths.has(row.path)) {
|
|
|
+ await db.execute("DELETE FROM pages WHERE path = $1", [row.path])
|
|
|
+ }
|
|
|
}
|
|
|
}
|