markdownlint/lib/micromark-parse.mjs

309 lines
11 KiB
JavaScript

// @ts-check
import { directive } from "micromark-extension-directive";
import { gfmAutolinkLiteral } from "micromark-extension-gfm-autolink-literal";
import { gfmFootnote } from "micromark-extension-gfm-footnote";
import { gfmTable } from "micromark-extension-gfm-table";
import { math } from "micromark-extension-math";
import { parse as micromarkParse, postprocess as micromarkPostprocess, preprocess as micromarkPreprocess } from "micromark";
// micromark-core-commonmark must exactly match what's used by micromark for the shim below to work correctly
// Unfortunately, omitting this dependency from package.json breaks strict dependency resolution (e.g., pnpm)
import { labelEnd } from "micromark-core-commonmark";
import { isHtmlFlowComment } from "../helpers/micromark-helpers.cjs";
import { flatTokensSymbol, htmlFlowSymbol, newLineRe } from "../helpers/shared.cjs";
/** @typedef {import("micromark-util-types").Event} Event */
/** @typedef {import("micromark-util-types").ParseOptions} MicromarkParseOptions */
/** @typedef {import("micromark-util-types").State} State */
/** @typedef {import("micromark-util-types").Token} Token */
/** @typedef {import("micromark-util-types").Tokenizer} Tokenizer */
/** @typedef {import("markdownlint").MicromarkToken} MicromarkToken */
/** @typedef {import("./micromark-types.d.mts")} */
/**
* Parse options.
*
* @typedef {Object} ParseOptions
* @property {boolean} [freezeTokens] Whether to freeze output Tokens.
*/
/**
* Parses a Markdown document and returns Micromark events.
*
* @param {string} markdown Markdown document.
* @param {MicromarkParseOptions} [micromarkParseOptions] Options for micromark.
* @returns {Event[]} Micromark events.
*/
export function getEvents(
markdown,
micromarkParseOptions = {}
) {
// Customize extensions list to add useful extensions
const extensions = [
directive(),
gfmAutolinkLiteral(),
gfmFootnote(),
gfmTable(),
math(),
...(micromarkParseOptions.extensions || [])
];
// // Shim labelEnd to identify undefined link labels
/** @type {Event[][]} */
const artificialEventLists = [];
const tokenizeOriginal = labelEnd.tokenize;
/** @type {Tokenizer} */
function tokenizeShim(effects, okOriginal, nokOriginal) {
// eslint-disable-next-line consistent-this, unicorn/no-this-assignment, no-invalid-this
const tokenizeContext = this;
const events = tokenizeContext.events;
/** @type {State} */
const nokShim = (code) => {
// Find start of label (image or link)
let indexStart = events.length;
while (--indexStart >= 0) {
const event = events[indexStart];
const [ kind, token ] = event;
if (kind === "enter") {
const { type } = token;
if ((type === "labelImage") || (type === "labelLink")) {
// Found it
break;
}
}
}
// If found...
if (indexStart >= 0) {
// Create artificial enter/exit events and replicate all data/lineEnding events within
const eventStart = events[indexStart];
const [ , eventStartToken ] = eventStart;
const eventEnd = events[events.length - 1];
const [ , eventEndToken ] = eventEnd;
/** @type {Token} */
const undefinedReferenceType = {
"type": "undefinedReferenceShortcut",
"start": eventStartToken.start,
"end": eventEndToken.end
};
/** @type {Token} */
const undefinedReference = {
"type": "undefinedReference",
"start": eventStartToken.start,
"end": eventEndToken.end
};
const eventsToReplicate = events
.slice(indexStart)
.filter((event) => {
const [ , eventToken ] = event;
const { type } = eventToken;
return (type === "data") || (type === "lineEnding");
});
// Determine the type of the undefined reference
const previousUndefinedEvent = (artificialEventLists.length > 0) && artificialEventLists[artificialEventLists.length - 1][0];
const previousUndefinedToken = previousUndefinedEvent && previousUndefinedEvent[1];
if (
previousUndefinedToken &&
(previousUndefinedToken.end.line === undefinedReferenceType.start.line) &&
(previousUndefinedToken.end.column === undefinedReferenceType.start.column)
) {
// Previous undefined reference event is immediately before this one
if (eventsToReplicate.length === 0) {
// The pair represent a collapsed reference (ex: [...][])
previousUndefinedToken.type = "undefinedReferenceCollapsed";
previousUndefinedToken.end = eventEndToken.end;
} else {
// The pair represent a full reference (ex: [...][...])
undefinedReferenceType.type = "undefinedReferenceFull";
undefinedReferenceType.start = previousUndefinedToken.start;
artificialEventLists.pop();
}
}
// Create artificial event list and replicate content
const text = eventsToReplicate
.filter((event) => event[0] === "enter")
.map((event) => tokenizeContext.sliceSerialize(event[1]))
.join("")
.trim();
if ((text.length > 0) && !text.includes("]")) {
/** @type {Event[]} */
const artificialEvents = [];
artificialEvents.push(
[ "enter", undefinedReferenceType, tokenizeContext ],
[ "enter", undefinedReference, tokenizeContext ]
);
for (const event of eventsToReplicate) {
const [ kind, token ] = event;
// Copy token because the current object will get modified by the parser
artificialEvents.push([ kind, { ...token }, tokenizeContext ]);
}
artificialEvents.push(
[ "exit", undefinedReference, tokenizeContext ],
[ "exit", undefinedReferenceType, tokenizeContext ]
);
artificialEventLists.push(artificialEvents);
}
}
// Continue with original behavior
return nokOriginal(code);
};
// Shim nok handler of labelEnd's tokenize
return tokenizeOriginal.call(tokenizeContext, effects, okOriginal, nokShim);
}
try {
// Shim labelEnd behavior to detect undefined references
labelEnd.tokenize = tokenizeShim;
// Use micromark to parse document into Events
const encoding = undefined;
const eol = true;
const parseContext = micromarkParse({ ...micromarkParseOptions, extensions });
const chunks = micromarkPreprocess()(markdown, encoding, eol);
const events = micromarkPostprocess(parseContext.document().write(chunks));
// Append artificial events and return all events
// eslint-disable-next-line unicorn/prefer-spread
return events.concat(...artificialEventLists);
} finally {
// Restore shimmed labelEnd behavior
labelEnd.tokenize = tokenizeOriginal;
}
}
/**
* Parses a Markdown document and returns micromark tokens (internal).
*
* @param {string} markdown Markdown document.
* @param {ParseOptions} [parseOptions] Options.
* @param {MicromarkParseOptions} [micromarkParseOptions] Options for micromark.
* @param {number} [lineDelta] Offset for start/end line.
* @param {MicromarkToken} [ancestor] Parent of top-most tokens.
* @returns {MicromarkToken[]} Micromark tokens.
*/
function parseInternal(
markdown,
parseOptions = {},
micromarkParseOptions = {},
lineDelta = 0,
ancestor = undefined
) {
// Get options
const freezeTokens = Boolean(parseOptions.freezeTokens);
// Use micromark to parse document into Events
const events = getEvents(markdown, micromarkParseOptions);
// Create Token objects
const document = [];
let flatTokens = [];
/** @type {MicromarkToken} */
const root = {
"type": "data",
"startLine": -1,
"startColumn": -1,
"endLine": -1,
"endColumn": -1,
"text": "ROOT",
"children": document,
"parent": null
};
const history = [ root ];
let current = root;
/** @type {MicromarkParseOptions | null} */
let reparseOptions = null;
let lines = null;
let skipHtmlFlowChildren = false;
for (const event of events) {
const [ kind, token, context ] = event;
const { type, start, end } = token;
const { "column": startColumn, "line": startLine } = start;
const { "column": endColumn, "line": endLine } = end;
const text = context.sliceSerialize(token);
if ((kind === "enter") && !skipHtmlFlowChildren) {
const previous = current;
history.push(previous);
current = {
type,
"startLine": startLine + lineDelta,
startColumn,
"endLine": endLine + lineDelta,
endColumn,
text,
"children": [],
"parent": ((previous === root) ? (ancestor || null) : previous)
};
if (ancestor) {
Object.defineProperty(current, htmlFlowSymbol, { "value": true });
}
previous.children.push(current);
flatTokens.push(current);
if ((current.type === "htmlFlow") && !isHtmlFlowComment(current)) {
skipHtmlFlowChildren = true;
if (!reparseOptions || !lines) {
reparseOptions = {
...micromarkParseOptions,
"extensions": [
{
"disable": {
"null": [ "codeIndented", "htmlFlow" ]
}
}
]
};
lines = markdown.split(newLineRe);
}
const reparseMarkdown = lines
.slice(current.startLine - 1, current.endLine)
.join("\n");
const tokens = parseInternal(
reparseMarkdown,
parseOptions,
reparseOptions,
current.startLine - 1,
current
);
current.children = tokens;
// Avoid stack overflow of Array.push(...spread)
// eslint-disable-next-line unicorn/prefer-spread
flatTokens = flatTokens.concat(tokens[flatTokensSymbol]);
}
} else if (kind === "exit") {
if (type === "htmlFlow") {
skipHtmlFlowChildren = false;
}
if (!skipHtmlFlowChildren) {
if (freezeTokens) {
Object.freeze(current.children);
Object.freeze(current);
}
// @ts-ignore
current = history.pop();
}
}
}
// Return document
Object.defineProperty(document, flatTokensSymbol, { "value": flatTokens });
if (freezeTokens) {
Object.freeze(document);
}
return document;
}
/**
* Parses a Markdown document and returns micromark tokens.
*
* @param {string} markdown Markdown document.
* @param {ParseOptions} [parseOptions] Options.
* @returns {MicromarkToken[]} Micromark tokens.
*/
export function parse(markdown, parseOptions) {
return parseInternal(markdown, parseOptions);
}