소스 검색

feat: add code_block schema node and fenced code content model

Zander Hawke 1 주 전
부모
커밋
d226d7908b

+ 109 - 0
packages/editor/src/lib/__tests__/content-model.test.ts

@@ -18,6 +18,48 @@ describe("contentToDoc", () => {
     expect(doc.child(1).type.name).toBe("paragraph")
   })
 
+  it("handles fenced code block with backticks", () => {
+    const doc = contentToDoc("```python\ndef foo():\n    pass\n```")
+    expect(doc.childCount).toBe(1)
+    expect(doc.child(0).type.name).toBe("code_block")
+    expect(doc.child(0).attrs.language).toBe("python")
+    expect(doc.child(0).textContent).toBe("```python\ndef foo():\n    pass\n```")
+  })
+
+  it("handles fenced code block with tildes", () => {
+    const doc = contentToDoc("~~~js\nconsole.log('hi')\n~~~")
+    expect(doc.childCount).toBe(1)
+    expect(doc.child(0).type.name).toBe("code_block")
+    expect(doc.child(0).attrs.language).toBe("js")
+    expect(doc.child(0).textContent).toBe("~~~js\nconsole.log('hi')\n~~~")
+  })
+
+  it("handles fenced code block without language", () => {
+    const doc = contentToDoc("```\nplain code\n```")
+    expect(doc.childCount).toBe(1)
+    expect(doc.child(0).type.name).toBe("code_block")
+    expect(doc.child(0).attrs.language).toBe("")
+    expect(doc.child(0).textContent).toBe("```\nplain code\n```")
+  })
+
+  it("handles code block with empty content", () => {
+    const doc = contentToDoc("```\n```")
+    expect(doc.childCount).toBe(1)
+    expect(doc.child(0).type.name).toBe("code_block")
+    expect(doc.child(0).textContent).toBe("```\n```")
+  })
+
+  it("mixes code blocks and paragraphs", () => {
+    const doc = contentToDoc("before\n```\ncode\n```\nafter")
+    expect(doc.childCount).toBe(3)
+    expect(doc.child(0).type.name).toBe("paragraph")
+    expect(doc.child(0).textContent).toBe("before")
+    expect(doc.child(1).type.name).toBe("code_block")
+    expect(doc.child(1).textContent).toBe("```\ncode\n```")
+    expect(doc.child(2).type.name).toBe("paragraph")
+    expect(doc.child(2).textContent).toBe("after")
+  })
+
   it("handles unicode and emoji", () => {
     const doc = contentToDoc("Hello 🌍 你好 🔥")
     expect(doc.child(0).textContent).toBe("Hello 🌍 你好 🔥")
@@ -54,6 +96,48 @@ describe("docToContent", () => {
     expect(docToContent(doc)).toBe("line1\nline2")
   })
 
+  it("serializes code block with language", () => {
+    const doc = schema.nodes.doc.create(null, [
+      schema.nodes.code_block.create(
+        { language: "python" },
+        [schema.text("```python\ndef foo():\n    pass\n```")],
+      ),
+    ])
+    expect(docToContent(doc)).toBe("```python\ndef foo():\n    pass\n```")
+  })
+
+  it("serializes code block without language", () => {
+    const doc = schema.nodes.doc.create(null, [
+      schema.nodes.code_block.create(
+        null,
+        [schema.text("```\nplain code\n```")],
+      ),
+    ])
+    expect(docToContent(doc)).toBe("```\nplain code\n```")
+  })
+
+  it("serializes code block with empty content", () => {
+    const doc = schema.nodes.doc.create(null, [
+      schema.nodes.code_block.create(
+        null,
+        [schema.text("```\n```")],
+      ),
+    ])
+    expect(docToContent(doc)).toBe("```\n```")
+  })
+
+  it("serializes mixed paragraphs and code blocks", () => {
+    const doc = schema.nodes.doc.create(null, [
+      schema.nodes.paragraph.create(null, [schema.text("before")]),
+      schema.nodes.code_block.create(
+        { language: "js" },
+        [schema.text("```js\ncode\n```")],
+      ),
+      schema.nodes.paragraph.create(null, [schema.text("after")]),
+    ])
+    expect(docToContent(doc)).toBe("before\n```js\ncode\n```\nafter")
+  })
+
   it("handles empty paragraph", () => {
     const doc = schema.nodes.doc.create(null, [schema.nodes.paragraph.create()])
     expect(docToContent(doc)).toBe("")
@@ -84,4 +168,29 @@ describe("roundtrip", () => {
     const doc = contentToDoc(original)
     expect(docToContent(doc)).toBe(original)
   })
+
+  it("fenced code block with language roundtrips", () => {
+    const original = "```python\ndef foo():\n    pass\n```"
+    const doc = contentToDoc(original)
+    expect(docToContent(doc)).toBe(original)
+  })
+
+  it("fenced code block without language roundtrips", () => {
+    const original = "```\nplain code\n```"
+    const doc = contentToDoc(original)
+    expect(docToContent(doc)).toBe(original)
+  })
+
+  it("mixed paragraphs and code blocks roundtrips", () => {
+    const original = "before\n```js\ncode\n```\nafter"
+    const doc = contentToDoc(original)
+    expect(docToContent(doc)).toBe(original)
+  })
+
+  it("tildes fenced code block roundtrips", () => {
+    const original = "~~~js\nconsole.log('hi')\n~~~"
+    const doc = contentToDoc(original)
+    // Fences are preserved as-is in the text content
+    expect(docToContent(doc)).toBe(original)
+  })
 })

+ 71 - 18
packages/editor/src/lib/content-model.ts

@@ -2,45 +2,98 @@
  * Content model conversion functions.
  *
  * Converts between raw markdown string and ProseMirror document nodes.
- * Each block is one or more <p> paragraphs; one paragraph per line.
+ * Supports paragraphs (one per line) and fenced code blocks.
  */
 
 import type { Node as ProsemirrorNode } from "prosemirror-model"
+import { createLogger } from "@/lib/logger"
 import { schema } from "@/lib/schema"
 
+const log = createLogger("ContentModel")
+
 /**
  * Convert markdown string to ProseMirror document.
- * Each line becomes one paragraph.
- * @param content - Raw markdown string
- * @returns ProseMirror document node
+ * Detects fenced code blocks (``` / ~~~) and creates code_block nodes.
+ * Everything else becomes one paragraph per line.
  */
 export function contentToDoc(content: string): ProsemirrorNode {
+  const nodes: ProsemirrorNode[] = []
+
   if (!content) {
-    return schema.nodes.doc.create(null, [
-      schema.nodes.paragraph.create()
-    ])
+    return schema.nodes.doc.create(null, [schema.nodes.paragraph.create()])
   }
 
-  const paragraphs = content.split("\n").map(line =>
-    line.length === 0
-      ? schema.nodes.paragraph.create()
-      : schema.nodes.paragraph.create(null, [schema.text(line)]),
-  )
+  const lines = content.split("\n")
+  let i = 0
+  let codeBlockCount = 0
+  let paraCount = 0
+  while (i < lines.length) {
+    const fence = lines[i]?.match(/^(```|~~~)\s*(\w*)/)
+    if (fence) {
+      codeBlockCount++
+      log.debug("contentToDoc fence", { language: fence[2], line: i })
+      const marker = fence[1]!
+      const language = fence[2] ?? ""
+      const codeLines: string[] = [lines[i]!]
+      i++
+      while (i < lines.length && !lines[i]?.startsWith(marker)) {
+        codeLines.push(lines[i] ?? "")
+        i++
+      }
+      // Include the closing fence line
+      if (i < lines.length) {
+        codeLines.push(lines[i]!)
+      }
+      i++
+      const codeText = codeLines.join("\n")
+      nodes.push(
+        schema.nodes.code_block.create(
+          { language },
+          codeText ? [schema.text(codeText)] : [],
+        ),
+      )
+    } else {
+      const line = lines[i]!
+      paraCount++
+      nodes.push(
+        line.length === 0
+          ? schema.nodes.paragraph.create()
+          : schema.nodes.paragraph.create(null, [schema.text(line)]),
+      )
+      i++
+    }
+  }
 
-  return schema.nodes.doc.create(null, paragraphs)
+  log.debug("contentToDoc result", {
+    paragraphs: paraCount,
+    codeBlocks: codeBlockCount,
+    totalLines: lines.length,
+  })
+  return schema.nodes.doc.create(null, nodes)
 }
 
 /**
  * Convert ProseMirror document to markdown string.
- * @param doc - ProseMirror document node
- * @returns Raw markdown string
+ * code_block nodes are serialized as fenced code blocks.
  */
 export function docToContent(doc: ProsemirrorNode): string {
   const lines: string[] = []
+  let codeBlockCount = 0
 
-  doc.forEach((paragraph: ProsemirrorNode) => {
-    lines.push(paragraph.textContent)
+  doc.forEach((node: ProsemirrorNode) => {
+    if (node.type.name === "code_block") {
+      codeBlockCount++
+      lines.push(node.textContent)
+    } else {
+      lines.push(node.textContent)
+    }
   })
 
-  return lines.join("\n")
+  const result = lines.join("\n")
+  log.debug("docToContent", {
+    nodeCount: doc.childCount,
+    codeBlocks: codeBlockCount,
+    resultLen: result.length,
+  })
+  return result
 }

+ 18 - 0
packages/editor/src/lib/markdown-rules/block-classifier.ts

@@ -18,6 +18,7 @@ export type BlockKind =
   | "callout"
   | "blockquote"
   | "task"
+  | "code"
 
 export type ParagraphResult = { kind: "paragraph" }
 
@@ -29,12 +30,15 @@ export type CalloutResult = { kind: "callout"; calloutType: string }
 
 export type BlockquoteResult = { kind: "blockquote" }
 
+export type CodeResult = { kind: "code"; language: string }
+
 export type ClassificationResult =
   | ParagraphResult
   | HeadingResult
   | TaskResult
   | CalloutResult
   | BlockquoteResult
+  | CodeResult
 
 // ─── Regex constants ────────────────────────────────────────────────
 
@@ -50,6 +54,9 @@ const BLOCKQUOTE_RE = /^>\s*(.*)/
 /** Task: `  TODO buy milk` — optional leading whitespace */
 const TASK_RE = /^\s*(TODO|DOING|DONE|LATER|NOW|WAITING|CANCELLED)\s+(.*)/i
 
+/** Fenced code: ````python` or `~~~` */
+const CODE_FENCE_RE = /^(```|~~~)\s*(\w*)/
+
 // ─── Public API ─────────────────────────────────────────────────────
 
 /**
@@ -102,5 +109,16 @@ export function classifyBlock(text: string): ClassificationResult {
     }
   }
 
+  {
+    const match = text.match(CODE_FENCE_RE)
+    if (match) {
+      return {
+        kind: "code",
+        // biome-ignore lint/style/noNonNullAssertion: match confirmed by CODE_FENCE_RE
+        language: match[2]!,
+      }
+    }
+  }
+
   return { kind: "paragraph" }
 }

+ 24 - 1
packages/editor/src/lib/schema.ts

@@ -7,7 +7,7 @@
  */
 import { type NodeSpec, Schema } from "prosemirror-model"
 
-type DOMNode = "doc" | "paragraph" | "text"
+type DOMNode = "doc" | "paragraph" | "code_block" | "text"
 
 /** Node definitions */
 const nodes: Record<DOMNode, NodeSpec> = {
@@ -22,6 +22,29 @@ const nodes: Record<DOMNode, NodeSpec> = {
     parseDOM: [{ tag: "p" }],
     toDOM: () => ["p", 0],
   },
+  /** Fenced code block — rendered via CodeMirror 6 node view */
+  code_block: {
+    group: "block",
+    content: "text*",
+    code: true,
+    defining: true,
+    attrs: { language: { default: "" } },
+    parseDOM: [
+      {
+        tag: "pre",
+        getAttrs(dom) {
+          const code = dom.querySelector("code")
+          const lang = code?.className?.match(/language-(\w+)/)?.[1] ?? ""
+          return { language: lang }
+        },
+      },
+    ],
+    toDOM: (node) => [
+      "pre",
+      { class: node.attrs.language ? `language-${node.attrs.language}` : "" },
+      ["code", 0],
+    ],
+  },
   /** Text content */
   text: {
     group: "inline",