// @ts-check import { directive } from "micromark-extension-directive"; import { gfmAutolinkLiteral } from "micromark-extension-gfm-autolink-literal"; import { gfmFootnote } from "micromark-extension-gfm-footnote"; import { gfmTable } from "micromark-extension-gfm-table"; import { math } from "micromark-extension-math"; import { parse as micromarkParse, postprocess as micromarkPostprocess, preprocess as micromarkPreprocess } from "micromark"; // micromark-core-commonmark must exactly match what's used by micromark for the shim below to work correctly // Unfortunately, omitting this dependency from package.json breaks strict dependency resolution (e.g., pnpm) import { labelEnd } from "micromark-core-commonmark"; import { isHtmlFlowComment } from "../helpers/micromark-helpers.cjs"; import { flatTokensSymbol, htmlFlowSymbol, newLineRe } from "../helpers/shared.cjs"; /** @typedef {import("micromark-util-types").Event} Event */ /** @typedef {import("micromark-util-types").ParseOptions} MicromarkParseOptions */ /** @typedef {import("micromark-util-types").State} State */ /** @typedef {import("micromark-util-types").Token} Token */ /** @typedef {import("micromark-util-types").Tokenizer} Tokenizer */ /** @typedef {import("markdownlint").MicromarkToken} MicromarkToken */ /** @typedef {import("./micromark-types.d.mts")} */ /** * Parse options. * * @typedef {Object} ParseOptions * @property {boolean} [freezeTokens] Whether to freeze output Tokens. */ /** * Parses a Markdown document and returns Micromark events. * * @param {string} markdown Markdown document. * @param {MicromarkParseOptions} [micromarkParseOptions] Options for micromark. * @returns {Event[]} Micromark events. */ export function getEvents( markdown, micromarkParseOptions = {} ) { // Customize extensions list to add useful extensions const extensions = [ directive(), gfmAutolinkLiteral(), gfmFootnote(), gfmTable(), math(), ...(micromarkParseOptions.extensions || []) ]; // // Shim labelEnd to identify undefined link labels /** @type {Event[][]} */ const artificialEventLists = []; const tokenizeOriginal = labelEnd.tokenize; /** @type {Tokenizer} */ function tokenizeShim(effects, okOriginal, nokOriginal) { // eslint-disable-next-line consistent-this, unicorn/no-this-assignment, no-invalid-this const tokenizeContext = this; const events = tokenizeContext.events; /** @type {State} */ const nokShim = (code) => { // Find start of label (image or link) let indexStart = events.length; while (--indexStart >= 0) { const event = events[indexStart]; const [ kind, token ] = event; if (kind === "enter") { const { type } = token; if ((type === "labelImage") || (type === "labelLink")) { // Found it break; } } } // If found... if (indexStart >= 0) { // Create artificial enter/exit events and replicate all data/lineEnding events within const eventStart = events[indexStart]; const [ , eventStartToken ] = eventStart; const eventEnd = events[events.length - 1]; const [ , eventEndToken ] = eventEnd; /** @type {Token} */ const undefinedReferenceType = { "type": "undefinedReferenceShortcut", "start": eventStartToken.start, "end": eventEndToken.end }; /** @type {Token} */ const undefinedReference = { "type": "undefinedReference", "start": eventStartToken.start, "end": eventEndToken.end }; const eventsToReplicate = events .slice(indexStart) .filter((event) => { const [ , eventToken ] = event; const { type } = eventToken; return (type === "data") || (type === "lineEnding"); }); // Determine the type of the undefined reference const previousUndefinedEvent = (artificialEventLists.length > 0) && artificialEventLists[artificialEventLists.length - 1][0]; const previousUndefinedToken = previousUndefinedEvent && previousUndefinedEvent[1]; if ( previousUndefinedToken && (previousUndefinedToken.end.line === undefinedReferenceType.start.line) && (previousUndefinedToken.end.column === undefinedReferenceType.start.column) ) { // Previous undefined reference event is immediately before this one if (eventsToReplicate.length === 0) { // The pair represent a collapsed reference (ex: [...][]) previousUndefinedToken.type = "undefinedReferenceCollapsed"; previousUndefinedToken.end = eventEndToken.end; } else { // The pair represent a full reference (ex: [...][...]) undefinedReferenceType.type = "undefinedReferenceFull"; undefinedReferenceType.start = previousUndefinedToken.start; artificialEventLists.pop(); } } // Create artificial event list and replicate content const text = eventsToReplicate .filter((event) => event[0] === "enter") .map((event) => tokenizeContext.sliceSerialize(event[1])) .join("") .trim(); if ((text.length > 0) && !text.includes("]")) { /** @type {Event[]} */ const artificialEvents = []; artificialEvents.push( [ "enter", undefinedReferenceType, tokenizeContext ], [ "enter", undefinedReference, tokenizeContext ] ); for (const event of eventsToReplicate) { const [ kind, token ] = event; // Copy token because the current object will get modified by the parser artificialEvents.push([ kind, { ...token }, tokenizeContext ]); } artificialEvents.push( [ "exit", undefinedReference, tokenizeContext ], [ "exit", undefinedReferenceType, tokenizeContext ] ); artificialEventLists.push(artificialEvents); } } // Continue with original behavior return nokOriginal(code); }; // Shim nok handler of labelEnd's tokenize return tokenizeOriginal.call(tokenizeContext, effects, okOriginal, nokShim); } try { // Shim labelEnd behavior to detect undefined references labelEnd.tokenize = tokenizeShim; // Use micromark to parse document into Events const encoding = undefined; const eol = true; const parseContext = micromarkParse({ ...micromarkParseOptions, extensions }); const chunks = micromarkPreprocess()(markdown, encoding, eol); const events = micromarkPostprocess(parseContext.document().write(chunks)); // Append artificial events and return all events // eslint-disable-next-line unicorn/prefer-spread return events.concat(...artificialEventLists); } finally { // Restore shimmed labelEnd behavior labelEnd.tokenize = tokenizeOriginal; } } /** * Parses a Markdown document and returns micromark tokens (internal). * * @param {string} markdown Markdown document. * @param {ParseOptions} [parseOptions] Options. * @param {MicromarkParseOptions} [micromarkParseOptions] Options for micromark. * @param {number} [lineDelta] Offset for start/end line. * @param {MicromarkToken} [ancestor] Parent of top-most tokens. * @returns {MicromarkToken[]} Micromark tokens. */ function parseInternal( markdown, parseOptions = {}, micromarkParseOptions = {}, lineDelta = 0, ancestor = undefined ) { // Get options const freezeTokens = Boolean(parseOptions.freezeTokens); // Use micromark to parse document into Events const events = getEvents(markdown, micromarkParseOptions); // Create Token objects const document = []; let flatTokens = []; /** @type {MicromarkToken} */ const root = { "type": "data", "startLine": -1, "startColumn": -1, "endLine": -1, "endColumn": -1, "text": "ROOT", "children": document, "parent": null }; const history = [ root ]; let current = root; /** @type {MicromarkParseOptions | null} */ let reparseOptions = null; let lines = null; let skipHtmlFlowChildren = false; for (const event of events) { const [ kind, token, context ] = event; const { type, start, end } = token; const { "column": startColumn, "line": startLine } = start; const { "column": endColumn, "line": endLine } = end; const text = context.sliceSerialize(token); if ((kind === "enter") && !skipHtmlFlowChildren) { const previous = current; history.push(previous); current = { type, "startLine": startLine + lineDelta, startColumn, "endLine": endLine + lineDelta, endColumn, text, "children": [], "parent": ((previous === root) ? (ancestor || null) : previous) }; if (ancestor) { Object.defineProperty(current, htmlFlowSymbol, { "value": true }); } previous.children.push(current); flatTokens.push(current); if ((current.type === "htmlFlow") && !isHtmlFlowComment(current)) { skipHtmlFlowChildren = true; if (!reparseOptions || !lines) { reparseOptions = { ...micromarkParseOptions, "extensions": [ { "disable": { "null": [ "codeIndented", "htmlFlow" ] } } ] }; lines = markdown.split(newLineRe); } const reparseMarkdown = lines .slice(current.startLine - 1, current.endLine) .join("\n"); const tokens = parseInternal( reparseMarkdown, parseOptions, reparseOptions, current.startLine - 1, current ); current.children = tokens; // Avoid stack overflow of Array.push(...spread) // eslint-disable-next-line unicorn/prefer-spread flatTokens = flatTokens.concat(tokens[flatTokensSymbol]); } } else if (kind === "exit") { if (type === "htmlFlow") { skipHtmlFlowChildren = false; } if (!skipHtmlFlowChildren) { if (freezeTokens) { Object.freeze(current.children); Object.freeze(current); } // @ts-ignore current = history.pop(); } } } // Return document Object.defineProperty(document, flatTokensSymbol, { "value": flatTokens }); if (freezeTokens) { Object.freeze(document); } return document; } /** * Parses a Markdown document and returns micromark tokens. * * @param {string} markdown Markdown document. * @param {ParseOptions} [parseOptions] Options. * @returns {MicromarkToken[]} Micromark tokens. */ export function parse(markdown, parseOptions) { return parseInternal(markdown, parseOptions); }