parse.ts 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172
  1. /** Simple content hash for change detection (djb2 variant, base-36 encoded). */
  2. export function contentHash(content: string): string {
  3. let hash = 0
  4. for (let i = 0; i < content.length; i++) {
  5. const chr = content.charCodeAt(i)
  6. hash = (hash << 5) - hash + chr
  7. hash |= 0
  8. }
  9. return hash.toString(36)
  10. }
  11. /**
  12. * Classify a block's type from its first line content.
  13. * Matches the block_type CHECK constraint in the schema.
  14. */
  15. export function detectBlockType(content: string): string {
  16. const first = content.trimStart()
  17. if (/^#{1,6}\s/.test(first)) return "heading"
  18. if (
  19. /^TODO\s|^DOING\s|^DONE\s|^LATER\s|^NOW\s|^WAITING\s|^CANCELLED\s/i.test(
  20. first,
  21. )
  22. )
  23. return "task"
  24. if (/^>\s*\[!/.test(first)) return "callout"
  25. if (/^>\s/.test(first)) return "blockquote"
  26. if (/^```/.test(first)) return "code"
  27. if (/^\|.+\|/.test(first)) return "table"
  28. if (/^\$\$/.test(first)) return "math"
  29. if (/^\w+::\s/.test(first)) return "property"
  30. return "paragraph"
  31. }
  32. /** Extract the first `# Heading 1` from markdown content. */
  33. export function extractTitle(content: string): string {
  34. const match = content.match(/^# (.+)/m)
  35. return match ? match[1]?.trim() : ""
  36. }
  37. export interface ExtractedLink {
  38. target: string
  39. linkType: "page-ref" | "block-ref" | "tag"
  40. offset: number
  41. }
  42. /**
  43. * Extract [[Page Name]], ((block-id)), and #tag references from block content.
  44. * Returns target text, link type, and character offset within the block.
  45. */
  46. export function extractLinks(content: string): ExtractedLink[] {
  47. const links: ExtractedLink[] = []
  48. for (const m of content.matchAll(/\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g)) {
  49. links.push({
  50. target: m[1]!, // biome-ignore lint/style/noNonNullAssertion: matchAll guarantees group 1
  51. linkType: "page-ref",
  52. offset: m.index,
  53. })
  54. }
  55. for (const m of content.matchAll(/\(\(([a-zA-Z0-9_-]+)\)\)/g)) {
  56. links.push({ target: m[1]!, linkType: "block-ref", offset: m.index })
  57. }
  58. for (const m of content.matchAll(/(?:\s|^)#([a-zA-Z0-9_-]+)/g)) {
  59. links.push({
  60. target: m[1]!,
  61. linkType: "tag",
  62. offset: m.index + 1,
  63. })
  64. }
  65. return links
  66. }