Split micromark.cjs into separate -parse and -helpers files.

This commit is contained in:
David Anson 2024-09-28 16:26:38 -07:00
parent 5701d0bf52
commit 33631a5984
47 changed files with 353 additions and 1236 deletions

View file

@ -2,15 +2,8 @@
"use strict";
const { directive, gfmAutolinkLiteral, gfmFootnote, gfmTable, math, parse, postprocess, preprocess } =
require("markdownlint-micromark");
const { newLineRe } = require("./shared.js");
const { flatTokensSymbol, htmlFlowSymbol } = require("./shared.js");
const flatTokensSymbol = Symbol("flat-tokens");
const htmlFlowSymbol = Symbol("html-flow");
/** @typedef {import("markdownlint-micromark").Event} Event */
/** @typedef {import("markdownlint-micromark").ParseOptions} ParseOptions */
/** @typedef {import("markdownlint-micromark").TokenType} TokenType */
/** @typedef {import("../lib/markdownlint.js").MicromarkToken} Token */
@ -53,180 +46,6 @@ function isHtmlFlowComment(token) {
return false;
}
/**
* Parses a Markdown document and returns Micromark events.
*
* @param {string} markdown Markdown document.
* @param {ParseOptions} [micromarkOptions] Options for micromark.
* @param {boolean} [referencesDefined] Treat references as defined.
* @returns {Event[]} Micromark events.
*/
function getMicromarkEvents(
markdown,
micromarkOptions = {},
referencesDefined = true
) {
// Customize options object to add useful extensions
micromarkOptions.extensions = micromarkOptions.extensions || [];
micromarkOptions.extensions.push(
directive(),
gfmAutolinkLiteral(),
gfmFootnote(),
gfmTable(),
math()
);
// Use micromark to parse document into Events
const encoding = undefined;
const eol = true;
const parseContext = parse(micromarkOptions);
if (referencesDefined) {
// Customize ParseContext to treat all references as defined
parseContext.defined.includes = (searchElement) => searchElement.length > 0;
}
const chunks = preprocess()(markdown, encoding, eol);
const events = postprocess(parseContext.document().write(chunks));
return events;
}
/**
* Parses a Markdown document and returns (frozen) tokens.
*
* @param {string} markdown Markdown document.
* @param {ParseOptions} micromarkOptions Options for micromark.
* @param {boolean} referencesDefined Treat references as defined.
* @param {number} lineDelta Offset to apply to start/end line.
* @param {Token} [ancestor] Parent of top-most tokens.
* @returns {Token[]} Micromark tokens (frozen).
*/
function micromarkParseWithOffset(
markdown,
micromarkOptions,
referencesDefined,
lineDelta,
ancestor
) {
// Use micromark to parse document into Events
const events = getMicromarkEvents(
markdown, micromarkOptions, referencesDefined
);
// Create Token objects
const document = [];
let flatTokens = [];
/** @type {Token} */
const root = {
"type": "data",
"startLine": -1,
"startColumn": -1,
"endLine": -1,
"endColumn": -1,
"text": "ROOT",
"children": document,
"parent": null
};
const history = [ root ];
let current = root;
// eslint-disable-next-line jsdoc/valid-types
/** @type ParseOptions | null */
let reparseOptions = null;
let lines = null;
let skipHtmlFlowChildren = false;
for (const event of events) {
const [ kind, token, context ] = event;
const { type, start, end } = token;
const { "column": startColumn, "line": startLine } = start;
const { "column": endColumn, "line": endLine } = end;
const text = context.sliceSerialize(token);
if ((kind === "enter") && !skipHtmlFlowChildren) {
const previous = current;
history.push(previous);
current = {
type,
"startLine": startLine + lineDelta,
startColumn,
"endLine": endLine + lineDelta,
endColumn,
text,
"children": [],
"parent": ((previous === root) ? (ancestor || null) : previous)
};
if (ancestor) {
Object.defineProperty(current, htmlFlowSymbol, { "value": true });
}
previous.children.push(current);
flatTokens.push(current);
if ((current.type === "htmlFlow") && !isHtmlFlowComment(current)) {
skipHtmlFlowChildren = true;
if (!reparseOptions || !lines) {
reparseOptions = {
...micromarkOptions,
"extensions": [
{
"disable": {
"null": [ "codeIndented", "htmlFlow" ]
}
}
]
};
lines = markdown.split(newLineRe);
}
const reparseMarkdown = lines
.slice(current.startLine - 1, current.endLine)
.join("\n");
const tokens = micromarkParseWithOffset(
reparseMarkdown,
reparseOptions,
referencesDefined,
current.startLine - 1,
current
);
current.children = tokens;
// Avoid stack overflow of Array.push(...spread)
// eslint-disable-next-line unicorn/prefer-spread
flatTokens = flatTokens.concat(tokens[flatTokensSymbol]);
}
} else if (kind === "exit") {
if (type === "htmlFlow") {
skipHtmlFlowChildren = false;
}
if (!skipHtmlFlowChildren) {
Object.freeze(current.children);
Object.freeze(current);
// @ts-ignore
current = history.pop();
}
}
}
// Return document
Object.defineProperty(document, flatTokensSymbol, { "value": flatTokens });
Object.freeze(document);
return document;
}
/**
* Parses a Markdown document and returns (frozen) tokens.
*
* @param {string} markdown Markdown document.
* @param {ParseOptions} [micromarkOptions] Options for micromark.
* @param {boolean} [referencesDefined] Treat references as defined.
* @returns {Token[]} Micromark tokens (frozen).
*/
function micromarkParse(
markdown,
micromarkOptions = {},
referencesDefined = true
) {
return micromarkParseWithOffset(
markdown,
micromarkOptions,
referencesDefined,
0
);
}
/**
* Adds a range of numbers to a set.
*
@ -445,7 +264,6 @@ const nonContentTokens = new Set([
]);
module.exports = {
"parse": micromarkParse,
addRangeToSet,
filterByPredicate,
filterByTypes,
@ -455,7 +273,6 @@ module.exports = {
getHeadingText,
getHtmlTagInfo,
getParentOfType,
getMicromarkEvents,
inHtmlFlow,
isHtmlFlowComment,
nonContentTokens