diff --git a/packages/interpreter/Readme.md b/packages/interpreter/Readme.md index 8266745c..dbbdb913 100644 --- a/packages/interpreter/Readme.md +++ b/packages/interpreter/Readme.md @@ -46,7 +46,34 @@ For example, here's an excerpt of some notes I wrote as I was studying Service W > > A. Two. -The empty line between the question and answer is optional. The question and answer cannot currently span multiple paragraphs: the paragraph including `Q. ` or `A. ` is extracted as that field. +The empty line between the question and answer is optional. So you can also write it like this, with a newline separating the question and answer. + +> Q. How many dimensions are in a qubit's vector space? \ +> A. Two. + +If you'd like to make the question or answer field span multiple Markdown blocks, add a newline after the `Q.` or `A.` prefix, like this: + +``` +Q. +What is this a picture of? + +1. Apples +2. Bananas +3. Pears + +A. +Apples + + +Q. How to do the hokey pokey? +A. +1. You put your right foot in +2. You put your right foot out +3. You put your right foot in +4. And you shake it all about +``` + +As shown above, you can mix and match the "multi-block" style with the "single-line" style. In "multi-block" mode, all the content after the prefix will be included in the field, until the next question, heading, or horizontal rule (`---`). #### Creating cloze deletion prompts diff --git a/packages/interpreter/src/interpreters/markdown/MarkdownInterpreter.ts b/packages/interpreter/src/interpreters/markdown/MarkdownInterpreter.ts index 90b5dc9c..391a9719 100644 --- a/packages/interpreter/src/interpreters/markdown/MarkdownInterpreter.ts +++ b/packages/interpreter/src/interpreters/markdown/MarkdownInterpreter.ts @@ -11,7 +11,7 @@ import { IngestibleSource, IngestibleSourceIdentifier, } from "@withorbit/ingester"; -import mdast, * as Mdast from "mdast"; +import * as Mdast from "mdast"; import { selectAll } from "unist-util-select"; import { Hasher } from "../../hasher/hasher.js"; import { InterpretableFile, Interpreter } from "../../interpreter.js"; @@ -78,13 +78,13 @@ function convertInterpreterPromptToIngestible(prompt: Prompt): TaskSpec { type: TaskContentType.QA, body: { text: processor - .stringify(prompt.question as unknown as mdast.Root) + .stringify({ type: "root", children: prompt.question }) .trimEnd(), attachments: [], }, answer: { text: processor - .stringify(prompt.answer as unknown as mdast.Root) + .stringify({ type: "root", children: prompt.answer }) .trimEnd(), attachments: [], }, diff --git a/packages/interpreter/src/interpreters/markdown/markdown.test.ts b/packages/interpreter/src/interpreters/markdown/markdown.test.ts index 3261f1a3..f9d2a205 100644 --- a/packages/interpreter/src/interpreters/markdown/markdown.test.ts +++ b/packages/interpreter/src/interpreters/markdown/markdown.test.ts @@ -68,12 +68,12 @@ test("cloze in backlink section", () => { expect(prompts).toHaveLength(0); }); -test("QA prompt in blockquote", () => { +test("QA prompts aren't recognized in blockquote", () => { const prompts = getPrompts(`# Heading > Q. Test. > A. Answer. `); - expect(prompts).toHaveLength(1); + expect(prompts).toHaveLength(0); }); diff --git a/packages/interpreter/src/interpreters/markdown/markdown.ts b/packages/interpreter/src/interpreters/markdown/markdown.ts index 00d35960..5bcf7189 100644 --- a/packages/interpreter/src/interpreters/markdown/markdown.ts +++ b/packages/interpreter/src/interpreters/markdown/markdown.ts @@ -26,10 +26,10 @@ export interface ClozePrompt extends JsonMap { } export const qaPromptType = "qaPrompt"; -export interface QAPrompt extends JsonMap { +export interface QAPrompt { type: typeof qaPromptType; - question: mdast.RootContent & JsonMap; - answer: mdast.RootContent & JsonMap; + question: mdast.RootContent[]; + answer: mdast.RootContent[]; } export type Prompt = ClozePrompt | QAPrompt; @@ -43,8 +43,8 @@ export interface ClozePromptNode extends unist.Node { export const qaPromptNodeType = "qaPrompt"; export interface QAPromptNode extends unist.Node { type: typeof qaPromptNodeType; - question: mdast.RootContent; - answer: mdast.RootContent; + question: mdast.RootContent[]; + answer: mdast.RootContent[]; } type NodeWithParent = mdast.Nodes & { @@ -104,8 +104,8 @@ export function findAllPrompts(tree: mdast.Root): Prompt[] { const qaPromptNode = n as QAPromptNode; const qaPrompt: QAPrompt = { type: "qaPrompt", - question: qaPromptNode.question as mdast.RootContent & JsonMap, - answer: qaPromptNode.answer as mdast.RootContent & JsonMap, + question: qaPromptNode.question, + answer: qaPromptNode.answer, }; return qaPrompt; }); diff --git a/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.test.ts b/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.test.ts index 5791ae68..e64bcc6f 100644 --- a/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.test.ts +++ b/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.test.ts @@ -18,27 +18,25 @@ Some more text`; const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; expect( input.slice( - qaPromptNode.question.position!.start.offset, - qaPromptNode.question.position!.end.offset, + qaPromptNode.question[0].position!.start.offset, + qaPromptNode.question.at(-1)!.position!.end.offset, ), - ).toEqual("A question prompt"); + ).toEqual("Q. A question prompt"); expect( input.slice( - qaPromptNode.answer.position!.start.offset, - qaPromptNode.answer.position!.end.offset, + qaPromptNode.answer[0].position!.start.offset, + qaPromptNode.answer.at(-1)!.position!.end.offset, ), - ).toEqual("An answer prompt"); - expect(qaPromptNode.question.position!.start.column).toEqual(4); // n.b. column is 1-indexed! - expect(qaPromptNode.answer.position!.start.column).toEqual(4); + ).toEqual("A. An answer prompt"); expect( processor - .stringify({ type: "root", children: [qaPromptNode.question] }) + .stringify({ type: "root", children: qaPromptNode.question }) .trimEnd(), ).toEqual("A question prompt"); expect( processor - .stringify({ type: "root", children: [qaPromptNode.answer] }) + .stringify({ type: "root", children: qaPromptNode.answer }) .trimEnd(), ).toEqual("An answer prompt"); }); @@ -54,29 +52,27 @@ Some more text`; const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; expect( input.slice( - qaPromptNode.question.position!.start.offset, - qaPromptNode.question.position!.end.offset, + qaPromptNode.question[0].position!.start.offset, + qaPromptNode.question.at(-1)!.position!.end.offset, ), - ).toEqual("A question prompt"); + ).toEqual("Q. A question prompt"); expect( input.slice( - qaPromptNode.answer.position!.start.offset, - qaPromptNode.answer.position!.end.offset, + qaPromptNode.answer[0].position!.start.offset, + qaPromptNode.answer.at(-1)!.position!.end.offset, ), - ).toEqual("An answer prompt"); - expect(qaPromptNode.question.position!.start.column).toEqual(4); // n.b. column is 1-indexed! - expect(qaPromptNode.question.position!.start.line).toEqual(3); - expect(qaPromptNode.answer.position!.start.column).toEqual(4); - expect(qaPromptNode.answer.position!.start.line).toEqual(4); + ).toEqual("A. An answer prompt"); + expect(qaPromptNode.question[0].position!.start.line).toEqual(3); + expect(qaPromptNode.answer[0].position!.start.line).toEqual(4); expect( processor - .stringify({ type: "root", children: [qaPromptNode.question] }) + .stringify({ type: "root", children: qaPromptNode.question }) .trimEnd(), ).toEqual("A question prompt"); expect( processor - .stringify({ type: "root", children: [qaPromptNode.answer] }) + .stringify({ type: "root", children: qaPromptNode.answer }) .trimEnd(), ).toEqual("An answer prompt"); }); @@ -92,29 +88,27 @@ Some more text`; const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; expect( input.slice( - qaPromptNode.question.position!.start.offset, - qaPromptNode.question.position!.end.offset, + qaPromptNode.question[0].position!.start.offset, + qaPromptNode.question.at(-1)!.position!.end.offset, ), - ).toEqual("A question *prompt*"); + ).toEqual("Q. A question *prompt*"); expect( input.slice( - qaPromptNode.answer.position!.start.offset, - qaPromptNode.answer.position!.end.offset, + qaPromptNode.answer[0].position!.start.offset, + qaPromptNode.answer.at(-1)!.position!.end.offset, ), - ).toEqual("An answer prompt"); - expect(qaPromptNode.question.position!.start.column).toEqual(4); // n.b. column is 1-indexed! - expect(qaPromptNode.question.position!.start.line).toEqual(3); - expect(qaPromptNode.answer.position!.start.column).toEqual(4); - expect(qaPromptNode.answer.position!.start.line).toEqual(4); + ).toEqual("A. An answer prompt"); + expect(qaPromptNode.question[0].position!.start.line).toEqual(3); + expect(qaPromptNode.answer[0].position!.start.line).toEqual(4); expect( processor - .stringify({ type: "root", children: [qaPromptNode.question] }) + .stringify({ type: "root", children: qaPromptNode.question }) .trimEnd(), ).toEqual("A question *prompt*"); expect( processor - .stringify({ type: "root", children: [qaPromptNode.answer] }) + .stringify({ type: "root", children: qaPromptNode.answer }) .trimEnd(), ).toEqual("An answer prompt"); }); @@ -132,34 +126,200 @@ Some more text`; const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; expect( input.slice( - qaPromptNode.question.position!.start.offset, - qaPromptNode.question.position!.end.offset, + qaPromptNode.question[0].position!.start.offset, + qaPromptNode.question.at(-1)!.position!.end.offset, ), - ).toEqual("A question\nprompt"); + ).toEqual("Q. A question\nprompt"); expect( input.slice( - qaPromptNode.answer.position!.start.offset, - qaPromptNode.answer.position!.end.offset, + qaPromptNode.answer[0].position!.start.offset, + qaPromptNode.answer.at(-1)!.position!.end.offset, ), - ).toEqual("An answer\nprompt"); - expect(qaPromptNode.question.position!.start.column).toEqual(4); // n.b. column is 1-indexed! - expect(qaPromptNode.question.position!.start.line).toEqual(3); - expect(qaPromptNode.answer.position!.start.column).toEqual(4); - expect(qaPromptNode.answer.position!.start.line).toEqual(5); + ).toEqual("A. An answer\nprompt"); + expect(qaPromptNode.question[0].position!.start.line).toEqual(3); + expect(qaPromptNode.answer[0].position!.start.line).toEqual(5); expect( processor - .stringify({ type: "root", children: [qaPromptNode.question] }) + .stringify({ type: "root", children: qaPromptNode.question }) .trimEnd(), ).toEqual("A question\nprompt"); expect( processor - .stringify({ type: "root", children: [qaPromptNode.answer] }) + .stringify({ type: "root", children: qaPromptNode.answer }) .trimEnd(), ).toEqual("An answer\nprompt"); }); - test("single line", () => { + test("multiblock question with answer in separate paragraph", () => { + const input = `Q. +First paragraph. + +Second paragraph. + +A. An answer`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect( + input.slice( + qaPromptNode.question[0].position!.start.offset, + qaPromptNode.question.at(-1)!.position!.end.offset, + ), + ).toEqual("Q.\nFirst paragraph.\n\nSecond paragraph."); + expect( + input.slice( + qaPromptNode.answer[0].position!.start.offset, + qaPromptNode.answer.at(-1)!.position!.end.offset, + ), + ).toEqual("A. An answer"); + expect(qaPromptNode.question[0].position!.start.line).toEqual(1); + expect(qaPromptNode.answer[0].position!.start.line).toEqual(6); + + expect( + processor + .stringify({ type: "root", children: qaPromptNode.question }) + .trimEnd(), + ).toEqual("First paragraph.\n\nSecond paragraph."); + expect( + processor + .stringify({ type: "root", children: qaPromptNode.answer }) + .trimEnd(), + ).toEqual("An answer"); + }); + + test("multiblock question with answer in last block", () => { + const input = `Q. +First paragraph. + +Second paragraph. +A. An answer`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect( + input.slice( + qaPromptNode.question[0].position!.start.offset, + qaPromptNode.question.at(-1)!.position!.end.offset, + ), + ).toEqual("Q.\nFirst paragraph.\n\nSecond paragraph."); + expect( + input.slice( + qaPromptNode.answer[0].position!.start.offset, + qaPromptNode.answer.at(-1)!.position!.end.offset, + ), + ).toEqual("A. An answer"); + expect(qaPromptNode.question[0].position!.start.line).toEqual(1); + expect(qaPromptNode.answer[0].position!.start.line).toEqual(5); + + expect( + processor + .stringify({ type: "root", children: qaPromptNode.question }) + .trimEnd(), + ).toEqual("First paragraph.\n\nSecond paragraph."); + expect( + processor + .stringify({ type: "root", children: qaPromptNode.answer }) + .trimEnd(), + ).toEqual("An answer"); + }); + + test("multiblock answer", () => { + const input = `Q. Foo +A. +First + +Second`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect( + input.slice( + qaPromptNode.question[0].position!.start.offset, + qaPromptNode.question.at(-1)!.position!.end.offset, + ), + ).toEqual("Q. Foo"); + expect( + input.slice( + qaPromptNode.answer[0].position!.start.offset, + qaPromptNode.answer.at(-1)!.position!.end.offset, + ), + ).toEqual("A.\nFirst\n\nSecond"); + expect(qaPromptNode.question[0].position!.start.line).toEqual(1); + expect(qaPromptNode.answer[0].position!.start.line).toEqual(2); + + expect( + processor + .stringify({ type: "root", children: qaPromptNode.question }) + .trimEnd(), + ).toEqual("Foo"); + expect( + processor + .stringify({ type: "root", children: qaPromptNode.answer }) + .trimEnd(), + ).toEqual("First\n\nSecond"); + }); + + test("multiblock answer terminated in heading", () => { + const input = `Q. Foo +A. +First + +Second + +# Heading`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect( + processor + .stringify({ type: "root", children: qaPromptNode.answer }) + .trimEnd(), + ).toEqual("First\n\nSecond"); + }); + + test("multiblock answer terminated in thematic break", () => { + const input = `Q. Foo +A. +First + +Second + +---`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect( + processor + .stringify({ type: "root", children: qaPromptNode.answer }) + .trimEnd(), + ).toEqual("First\n\nSecond"); + }); + + test("multiblock q and a", () => { + const input = `Q. +What's this? + +![](testimage.png) + +A. +Another multiblock + +Answer + +--- + +More irrelevant text.`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect( + processor + .stringify({ type: "root", children: qaPromptNode.question }) + .trimEnd(), + ).toEqual("What's this?\n\n![](testimage.png)"); + expect( + processor + .stringify({ type: "root", children: qaPromptNode.answer }) + .trimEnd(), + ).toEqual("Another multiblock\n\nAnswer"); + }); + + test("single line shouldn't recognize", () => { const input = `Some other text Q. A question *prompt*. A. An answer prompt @@ -170,7 +330,7 @@ Some more text`; expect(qaPromptNode).toBeUndefined(); }); - test("fake QA prompt", () => { + test("answer only shouldn't recognize", () => { const input = `Some other text A. An answer prompt @@ -181,6 +341,36 @@ Some more text`; expect(qaPromptNode).toBeUndefined(); }); + test("single-line-style question block doesn't automatically extend", () => { + const input = `Q. A question + +With another paragraph + +A. And then an answer block`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect(qaPromptNode).toBeUndefined(); + }); + + test("multi-line question block is interrupted by a heading or ---", () => { + const input = `Q. +A question + +--- + +A. And then an answer block + +Q. +Another question + +# A heading + +A. An answer`; + const ast = processor.runSync(processor.parse(input)); + const qaPromptNode = select(qaPromptNodeType, ast)! as QAPromptNode; + expect(qaPromptNode).toBeUndefined(); + }); + test("multiple prompts", () => { const input = `Some other text @@ -196,4 +386,56 @@ Some more text`; const ast = processor.runSync(processor.parse(input)); expect(selectAll(qaPromptNodeType, ast)).toHaveLength(2); }); + + test("questions terminate multiblock answers", () => { + const input = `Q. Test +A. +Multi + +Block answer + +Q. Another q +A. Another a`; + const ast = processor.runSync(processor.parse(input)); + const qs = selectAll(qaPromptNodeType, ast) as QAPromptNode[]; + expect(qs).toHaveLength(2); + + expect( + processor.stringify({ type: "root", children: qs[0].answer }).trimEnd(), + ).toEqual("Multi\n\nBlock answer"); + + expect( + processor.stringify({ type: "root", children: qs[1].question }).trimEnd(), + ).toEqual("Another q"); + }); + + test("questions terminate multiblock questions", () => { + const input = `Q. +Test + +Q. Another q + +A. Another a +`; + const ast = processor.runSync(processor.parse(input)); + const qs = selectAll(qaPromptNodeType, ast) as QAPromptNode[]; + expect(qs).toHaveLength(1); + + expect( + processor.stringify({ type: "root", children: qs[0].question }).trimEnd(), + ).toEqual("Another q"); + + expect( + processor.stringify({ type: "root", children: qs[0].answer }).trimEnd(), + ).toEqual("Another a"); + }); + + test("don't recognize answer prefix when it is on a later line of a subsequent paragraph block", () => { + const input = `Q. Test + +Another line. +A. Chouara`; + const ast = processor.runSync(processor.parse(input)); + expect(selectAll(qaPromptNodeType, ast)).toHaveLength(0); + }); }); diff --git a/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.ts b/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.ts index 2a52cac2..c55d53ea 100644 --- a/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.ts +++ b/packages/interpreter/src/interpreters/markdown/plugins/qaPromptPlugin.ts @@ -1,11 +1,7 @@ import mdast from "mdast"; import * as unified from "unified"; -import unist from "unist"; -import { parents } from "unist-util-parents"; -import * as unistUtilSelect from "unist-util-select"; import { QAPromptNode, qaPromptNodeType } from "../markdown.js"; -// TODO: don't match QA prompts inside coxde and html blocks export default function qaPromptPlugin(this: unified.Processor) { return extractQAPromptNodes; } @@ -16,194 +12,196 @@ declare module "mdast" { } } -const questionPrefix = "Q. "; -const answerPrefix = "A. "; -const answerSplitRegexp = new RegExp(`\n${answerPrefix}`, "m"); - -type NodeWithParent = unist.Node & { - parent?: NodeWithParent; - node: N; -}; - -function extractQAPromptNodes(node: unist.Node): unist.Node { - const nodeWithParents = parents(node); - const answerNodes = unistUtilSelect.selectAll( - `paragraph>text[value^='${answerPrefix}']`, - nodeWithParents, - ) as NodeWithParent[]; - for (const answerNode of answerNodes) { - const parent = answerNode.parent!.parent!.node; - const answerParagraphIndex = parent.children.indexOf( - answerNode.parent!.node, - ); - if (answerParagraphIndex === -1 || answerParagraphIndex === 0) { - continue; - } - const questionParagraphNode = parent.children[ - answerParagraphIndex - 1 - ] as mdast.Paragraph; - if (questionParagraphNode.type === "paragraph") { - const questionTextNode = questionParagraphNode.children[0] as mdast.Text; +const questionPrefixRegexp = /^Q\.(\s+)/; + +// If the answer prefix appears in the same paragraph block as the question, or if it's in multiblock mode, it can be on a line after the first line (i.e. via hard wrapping). +const answerPrefixRegexpMultiline = /^A\.(\s+)/m; +// But in later paragraph blocks, it must start the paragraph. +const answerPrefixRegexp = /^A\.(\s+)/; +// Note that the answer prefix regexp has the multiline flag set. It can appear mid-"paragraph" because in Markdown, a single line break doesn't end a paragraph. + +function matchQuestionPrefix(node: mdast.RootContent): RegExpMatchArray | null { + if (node.type !== "paragraph") return null; + // The "Q." prefix must be the start of the paragraph. + const qPrefixContentNode = node.children[0]; + if (!qPrefixContentNode) return null; + if (qPrefixContentNode.type !== "text") return null; + + return qPrefixContentNode.value.match(questionPrefixRegexp); +} + +function isMultiblockTerminator(node: mdast.RootContent): boolean { + return node.type === "thematicBreak" || node.type === "heading"; +} + +function extractQAPromptNodes(root: mdast.Node): mdast.Node { + // We only parse root-level paragraphs for the q/a syntax. + if (!("children" in root)) { + return root; + } + const rootContent = root.children as mdast.RootContent[]; + for (let qBlockIndex = 0; qBlockIndex < rootContent.length; qBlockIndex++) { + const qBlock = rootContent[qBlockIndex]; + if (qBlock.type !== "paragraph") continue; + const qPrefixMatch = matchQuestionPrefix(qBlock); + if (!qPrefixMatch) continue; + const qIsMultiBlock = qPrefixMatch[1].endsWith("\n"); + + // OK, now we've got a paragraph that starts with "Q." + // Scan forward and try to find an "^A." before the next heading or thematic break. + let aBlockIndex = qBlockIndex; // the index of the block in rootContent containing the answer prefix + let aContentIndex = 0; // the index of the content node in that block containing the answer prefix + let aPrefixMatch: RegExpMatchArray | null = null; + for ( + aBlockIndex = qBlockIndex; + qIsMultiBlock + ? aBlockIndex < rootContent.length + : aBlockIndex <= qBlockIndex + 1; + aBlockIndex++ + ) { + const aBlock = rootContent[aBlockIndex]; if ( - questionParagraphNode.children.length === 1 && - questionTextNode.type === "text" + isMultiblockTerminator(aBlock) || + // Bail if we find another question prefix before the answer prefix. + (aBlockIndex > qBlockIndex && matchQuestionPrefix(aBlock)) + ) + break; + + // For paragraphs, check each text node for "^A.". + if (aBlock.type !== "paragraph") continue; + for ( + aContentIndex = 0; + qIsMultiBlock + ? aContentIndex < aBlock.children.length + : // If the question is not multiblock, the answer must begin the next paragraph. + aBlockIndex === qBlockIndex + ? aContentIndex < aBlock.children.length + : aContentIndex === 0; + aContentIndex++ ) { - if (questionTextNode.value.startsWith(questionPrefix)) { - // Now we'll strip the prefixes off. - const answerParagraphNode = parent.children[ - answerParagraphIndex - ] as mdast.Paragraph; - questionTextNode.value = questionTextNode.value.slice( - questionPrefix.length, - ); - const answerTextNode = answerParagraphNode.children[0] as mdast.Text; - answerTextNode.value = answerTextNode.value.slice( - answerPrefix.length, - ); - - const qaPromptNode: QAPromptNode = { - type: qaPromptNodeType, - question: offsetNodePositionByPrefix( - questionParagraphNode, - questionPrefix, - ), - answer: offsetNodePositionByPrefix( - answerParagraphNode, - answerPrefix, - ), - }; - parent.children.splice(answerParagraphIndex - 1, 2, qaPromptNode); - } - } - } - } + const content = aBlock.children[aContentIndex]; + if (content.type !== "text") continue; - const questionNodes = unistUtilSelect.selectAll( - `paragraph>text[value^='${questionPrefix}']`, - nodeWithParents, - ) as NodeWithParent[]; - for (const questionNode of questionNodes) { - const paragraphNode = questionNode.parent!.node as mdast.Paragraph; - const splitNodeIndex = paragraphNode.children.findIndex( - (node) => - node.type === "text" && - answerSplitRegexp.test((node as mdast.Text).value), - ); - if (splitNodeIndex === -1) { - continue; + aPrefixMatch = content.value.match( + qIsMultiBlock || aBlockIndex === qBlockIndex + ? answerPrefixRegexpMultiline + : answerPrefixRegexp, + ); + if (aPrefixMatch) break; + } + if (aPrefixMatch) break; } - const splitNode = paragraphNode.children[splitNodeIndex] as mdast.Text; - const match = splitNode.value.match(answerSplitRegexp)!; - const preSplitString = splitNode.value.slice(0, match.index!); - const postSplitString = splitNode.value.slice(match.index!); + if (!aPrefixMatch) continue; - const questionPhrasingNodes = paragraphNode.children.slice( - 0, - splitNodeIndex, - ); - const answerPhrasingNodes = paragraphNode.children.slice(splitNodeIndex); - if (preSplitString !== "" && answerPhrasingNodes[0].type === "text") { - // We've gotta split that node. - const splitNodeStart = splitNode.position!.start; - const preSplitStringLines = preSplitString.split("\n"); - questionPhrasingNodes.push({ + // If the answer prefix comes mid-content node (as in the example below, we need to split the content node. + // Q. Foo + // A. Bar + if (aPrefixMatch.index !== 0) { + const aBlock = rootContent[aBlockIndex] as mdast.Paragraph; + const splitNode = aBlock.children[aContentIndex] as mdast.Text; + const splitText = splitNode.value; + // The text before the answer prefix will become part of the question. + splitNode.value = splitText.slice(0, aPrefixMatch.index).trimEnd(); + const newTextNode: mdast.Text = { type: "text", - value: preSplitString, - position: { - start: splitNodeStart, - end: { - line: splitNodeStart.line + preSplitStringLines.length - 1, - column: preSplitStringLines.at(-1)!.length, - offset: splitNodeStart.offset! + preSplitString.length, + value: splitText.slice(aPrefixMatch.index), + }; + + // Fix up the line / column / offset position values for the split. + if (splitNode.position) { + const qLines = splitNode.value.split("\n"); + const aLineCount = newTextNode.value.split("\n").length; + const aLineNumber = splitNode.position.end.line - aLineCount + 1; + newTextNode.position = { + start: { + column: 1, + line: aLineNumber, }, - }, - }); - answerPhrasingNodes[0].value = postSplitString; - // Correct the first answer phrasing node's position for the split. - answerPhrasingNodes[0].position = { - start: { - line: splitNodeStart.line + preSplitStringLines.length, - column: 1, - offset: splitNodeStart.offset! + preSplitString.length + 1, // +1 for the newline - }, - end: answerPhrasingNodes[0].position!.end, + end: { ...splitNode.position.end }, + }; + splitNode.position.end = { + line: splitNode.position.start.line + qLines.length - 1, + column: qLines.at(-1)!.length, + }; + if (splitNode.position.start.offset !== undefined) { + newTextNode.position.start.offset = + aPrefixMatch.index! + splitNode.position.start.offset; + splitNode.position.end.offset = + splitNode.position.start.offset + splitNode.value.length; + } + } + aBlock.children.splice(aContentIndex + 1, 0, newTextNode); + aContentIndex++; + } + + // Now the answer prefix is at the start of a content node. But if that node is not at the start of a paragraph block, we must split it. + if (aContentIndex !== 0) { + const splitBlock = rootContent[aBlockIndex] as mdast.Paragraph; + const aPrefixText = splitBlock.children[aContentIndex]; + const aContentNodes = splitBlock.children.slice(aContentIndex); + // The content nodes before the answer prefix will become part of the question. + splitBlock.children = splitBlock.children.slice(0, aContentIndex); + const newABlock: mdast.Paragraph = { + type: "paragraph", + children: aContentNodes, }; - } else { - // The answer text is in its own node. - // It's starting at the end of the previous line (before the \n); we need to shift it to the start of the next line. - answerPhrasingNodes[0].position!.start.line += 1; - answerPhrasingNodes[0].position!.start.column = 1; - answerPhrasingNodes[0].position!.start.offset! += 1; + + // Fix up the position structures for the split. + if (splitBlock.position) { + if (aPrefixText.position) { + newABlock.position = { + start: aPrefixText.position.start, + end: { ...splitBlock.position.end }, + }; + } + const lastSplitBlockContentNode = splitBlock.children.at(-1); + if (lastSplitBlockContentNode?.position) { + splitBlock.position.end = { + ...splitBlock.children.at(-1)!.position!.end, + }; + } else { + splitBlock.position = undefined; + } + } + rootContent.splice(aBlockIndex + 1, 0, newABlock); + aBlockIndex++; + aContentIndex = 0; } - questionPhrasingNodes[0] = offsetNodePositionByPrefix( - questionPhrasingNodes[0], - questionPrefix, - ); - answerPhrasingNodes[0] = offsetNodePositionByPrefix( - answerPhrasingNodes[0], - answerPrefix, - ); - (questionPhrasingNodes[0] as mdast.Text).value = ( - questionPhrasingNodes[0] as mdast.Text - ).value.slice(questionPrefix.length); - (answerPhrasingNodes[0] as mdast.Text).value = ( - answerPhrasingNodes[0] as mdast.Text - ).value.slice( - answerPrefix.length + 1, // add 1 for the newline + + // If the answer block is multiline, then it'll end at the next heading, ---, or eof. + let afterABlockIndex = aBlockIndex + 1; + if (aPrefixMatch[1].endsWith("\n")) { + while (afterABlockIndex < rootContent.length) { + const block = rootContent[afterABlockIndex]; + if (isMultiblockTerminator(block) || matchQuestionPrefix(block)) { + break; + } + afterABlockIndex++; + } + } + + // Remove the question prefix from the content. + const qPrefixContentNode = qBlock.children[0] as mdast.Text; + qPrefixContentNode.value = qPrefixContentNode.value.slice( + qPrefixMatch[0].length, ); - const qaPromptNode: QAPromptNode = { + // Remove the answer prefix from its content node. + const aBlock = rootContent[aBlockIndex] as mdast.Paragraph; + const aPrefixText = aBlock.children[aContentIndex] as mdast.Text; + aPrefixText.value = aPrefixText.value.slice(aPrefixMatch[0].length); + + const qaPromptNode = { type: qaPromptNodeType, - question: { - type: "paragraph", - children: questionPhrasingNodes, - position: { - start: questionPhrasingNodes[0].position!.start, - end: questionPhrasingNodes.at(-1)!.position!.end, - }, - }, - answer: { - type: "paragraph", - children: answerPhrasingNodes, - position: { - start: answerPhrasingNodes[0].position!.start, - end: answerPhrasingNodes.at(-1)!.position!.end, - }, - }, - }; - const paragraphContainer = questionNode.parent!.parent! - .node as unist.Parent; - paragraphContainer.children.splice( - paragraphContainer.children.indexOf(paragraphNode), - 1, + question: rootContent.slice(qBlockIndex, aBlockIndex), + answer: rootContent.slice(aBlockIndex, afterABlockIndex), + } satisfies QAPromptNode; + rootContent.splice( + qBlockIndex, + aBlockIndex - qBlockIndex + 1, // TODO: support multiblock answers qaPromptNode, ); } - - return node; -} - -function offsetNodePositionByPrefix( - node: N, - prefix: string, -): N { - const { position } = node; - if (!position) { - throw new Error("Node doesn't have a position"); - } - if (position.start.offset === undefined) { - throw new Error("position.start doesn't have an offset"); - } - return { - ...node, - position: { - ...position, - start: { - ...position.start, - offset: position.start.offset! + prefix.length, - column: position.start.column + prefix.length, - }, - }, - }; + return root; }