mirror of
https://github.com/DavidAnson/markdownlint.git
synced 2025-12-16 14:00:13 +01:00
Move micromark-parse.mjs from helpers to library, remove all dependencies from helpers.
This commit is contained in:
parent
1e71f6f44e
commit
3599f694ba
7 changed files with 6 additions and 14 deletions
|
|
@ -9,7 +9,7 @@ import { promisify } from "node:util";
|
|||
import { initialize as cacheInitialize } from "./cache.mjs";
|
||||
import { version } from "./constants.mjs";
|
||||
import rules from "./rules.mjs";
|
||||
import { parse as micromarkParse } from "../helpers/micromark-parse.mjs";
|
||||
import { parse as micromarkParse } from "./micromark-parse.mjs";
|
||||
import * as helpers from "../helpers/helpers.cjs";
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import { addErrorDetailIf, escapeForRegExp, hasOverlap } from "../helpers/helpers.cjs";
|
||||
import { filterByPredicate, filterByTypes } from "../helpers/micromark-helpers.cjs";
|
||||
import { parse } from "../helpers/micromark-parse.mjs";
|
||||
import { parse } from "./micromark-parse.mjs";
|
||||
|
||||
const ignoredChildTypes = new Set(
|
||||
[ "codeFencedFence", "definition", "reference", "resource" ]
|
||||
|
|
|
|||
309
lib/micromark-parse.mjs
Normal file
309
lib/micromark-parse.mjs
Normal file
|
|
@ -0,0 +1,309 @@
|
|||
// @ts-check
|
||||
|
||||
import { directive } from "micromark-extension-directive";
|
||||
import { gfmAutolinkLiteral } from "micromark-extension-gfm-autolink-literal";
|
||||
import { gfmFootnote } from "micromark-extension-gfm-footnote";
|
||||
import { gfmTable } from "micromark-extension-gfm-table";
|
||||
import { math } from "micromark-extension-math";
|
||||
import { parse as micromarkParse, postprocess as micromarkPostprocess, preprocess as micromarkPreprocess } from "micromark";
|
||||
// micromark-core-commonmark is not a dependency because this instance must match what's used by micromark
|
||||
// eslint-disable-next-line n/no-extraneous-import
|
||||
import { labelEnd } from "micromark-core-commonmark";
|
||||
import { isHtmlFlowComment } from "../helpers/micromark-helpers.cjs";
|
||||
import { flatTokensSymbol, htmlFlowSymbol, newLineRe } from "../helpers/shared.cjs";
|
||||
|
||||
/** @typedef {import("micromark-util-types").Event} Event */
|
||||
/** @typedef {import("micromark-util-types").ParseOptions} MicromarkParseOptions */
|
||||
/** @typedef {import("micromark-util-types").State} State */
|
||||
/** @typedef {import("micromark-util-types").Token} Token */
|
||||
/** @typedef {import("micromark-util-types").Tokenizer} Tokenizer */
|
||||
/** @typedef {import("./micromark-types.d.mts")} */
|
||||
/** @typedef {import("../lib/markdownlint.mjs").MicromarkToken} MicromarkToken */
|
||||
|
||||
/**
|
||||
* Parse options.
|
||||
*
|
||||
* @typedef {Object} ParseOptions
|
||||
* @property {boolean} [freezeTokens] Whether to freeze output Tokens.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Parses a Markdown document and returns Micromark events.
|
||||
*
|
||||
* @param {string} markdown Markdown document.
|
||||
* @param {MicromarkParseOptions} [micromarkParseOptions] Options for micromark.
|
||||
* @returns {Event[]} Micromark events.
|
||||
*/
|
||||
export function getEvents(
|
||||
markdown,
|
||||
micromarkParseOptions = {}
|
||||
) {
|
||||
// Customize extensions list to add useful extensions
|
||||
const extensions = [
|
||||
directive(),
|
||||
gfmAutolinkLiteral(),
|
||||
gfmFootnote(),
|
||||
gfmTable(),
|
||||
math(),
|
||||
...(micromarkParseOptions.extensions || [])
|
||||
];
|
||||
|
||||
// // Shim labelEnd to identify undefined link labels
|
||||
/** @type {Event[][]} */
|
||||
const artificialEventLists = [];
|
||||
const tokenizeOriginal = labelEnd.tokenize;
|
||||
|
||||
/** @type {Tokenizer} */
|
||||
function tokenizeShim(effects, okOriginal, nokOriginal) {
|
||||
// eslint-disable-next-line consistent-this, unicorn/no-this-assignment, no-invalid-this
|
||||
const tokenizeContext = this;
|
||||
const events = tokenizeContext.events;
|
||||
|
||||
/** @type {State} */
|
||||
const nokShim = (code) => {
|
||||
// Find start of label (image or link)
|
||||
let indexStart = events.length;
|
||||
while (--indexStart >= 0) {
|
||||
const event = events[indexStart];
|
||||
const [ kind, token ] = event;
|
||||
if (kind === "enter") {
|
||||
const { type } = token;
|
||||
if ((type === "labelImage") || (type === "labelLink")) {
|
||||
// Found it
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If found...
|
||||
if (indexStart >= 0) {
|
||||
// Create artificial enter/exit events and replicate all data/lineEnding events within
|
||||
const eventStart = events[indexStart];
|
||||
const [ , eventStartToken ] = eventStart;
|
||||
const eventEnd = events[events.length - 1];
|
||||
const [ , eventEndToken ] = eventEnd;
|
||||
/** @type {Token} */
|
||||
const undefinedReferenceType = {
|
||||
"type": "undefinedReferenceShortcut",
|
||||
"start": eventStartToken.start,
|
||||
"end": eventEndToken.end
|
||||
};
|
||||
/** @type {Token} */
|
||||
const undefinedReference = {
|
||||
"type": "undefinedReference",
|
||||
"start": eventStartToken.start,
|
||||
"end": eventEndToken.end
|
||||
};
|
||||
const eventsToReplicate = events
|
||||
.slice(indexStart)
|
||||
.filter((event) => {
|
||||
const [ , eventToken ] = event;
|
||||
const { type } = eventToken;
|
||||
return (type === "data") || (type === "lineEnding");
|
||||
});
|
||||
|
||||
// Determine the type of the undefined reference
|
||||
const previousUndefinedEvent = (artificialEventLists.length > 0) && artificialEventLists[artificialEventLists.length - 1][0];
|
||||
const previousUndefinedToken = previousUndefinedEvent && previousUndefinedEvent[1];
|
||||
if (
|
||||
previousUndefinedToken &&
|
||||
(previousUndefinedToken.end.line === undefinedReferenceType.start.line) &&
|
||||
(previousUndefinedToken.end.column === undefinedReferenceType.start.column)
|
||||
) {
|
||||
// Previous undefined reference event is immediately before this one
|
||||
if (eventsToReplicate.length === 0) {
|
||||
// The pair represent a collapsed reference (ex: [...][])
|
||||
previousUndefinedToken.type = "undefinedReferenceCollapsed";
|
||||
previousUndefinedToken.end = eventEndToken.end;
|
||||
} else {
|
||||
// The pair represent a full reference (ex: [...][...])
|
||||
undefinedReferenceType.type = "undefinedReferenceFull";
|
||||
undefinedReferenceType.start = previousUndefinedToken.start;
|
||||
artificialEventLists.pop();
|
||||
}
|
||||
}
|
||||
|
||||
// Create artificial event list and replicate content
|
||||
const text = eventsToReplicate
|
||||
.filter((event) => event[0] === "enter")
|
||||
.map((event) => tokenizeContext.sliceSerialize(event[1]))
|
||||
.join("")
|
||||
.trim();
|
||||
if ((text.length > 0) && !text.includes("]")) {
|
||||
/** @type {Event[]} */
|
||||
const artificialEvents = [];
|
||||
artificialEvents.push(
|
||||
[ "enter", undefinedReferenceType, tokenizeContext ],
|
||||
[ "enter", undefinedReference, tokenizeContext ]
|
||||
);
|
||||
for (const event of eventsToReplicate) {
|
||||
const [ kind, token ] = event;
|
||||
// Copy token because the current object will get modified by the parser
|
||||
artificialEvents.push([ kind, { ...token }, tokenizeContext ]);
|
||||
}
|
||||
artificialEvents.push(
|
||||
[ "exit", undefinedReference, tokenizeContext ],
|
||||
[ "exit", undefinedReferenceType, tokenizeContext ]
|
||||
);
|
||||
artificialEventLists.push(artificialEvents);
|
||||
}
|
||||
}
|
||||
|
||||
// Continue with original behavior
|
||||
return nokOriginal(code);
|
||||
};
|
||||
|
||||
// Shim nok handler of labelEnd's tokenize
|
||||
return tokenizeOriginal.call(tokenizeContext, effects, okOriginal, nokShim);
|
||||
}
|
||||
|
||||
try {
|
||||
// Shim labelEnd behavior to detect undefined references
|
||||
labelEnd.tokenize = tokenizeShim;
|
||||
|
||||
// Use micromark to parse document into Events
|
||||
const encoding = undefined;
|
||||
const eol = true;
|
||||
const parseContext = micromarkParse({ ...micromarkParseOptions, extensions });
|
||||
const chunks = micromarkPreprocess()(markdown, encoding, eol);
|
||||
const events = micromarkPostprocess(parseContext.document().write(chunks));
|
||||
|
||||
// Append artificial events and return all events
|
||||
// eslint-disable-next-line unicorn/prefer-spread
|
||||
return events.concat(...artificialEventLists);
|
||||
} finally {
|
||||
// Restore shimmed labelEnd behavior
|
||||
labelEnd.tokenize = tokenizeOriginal;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a Markdown document and returns micromark tokens (internal).
|
||||
*
|
||||
* @param {string} markdown Markdown document.
|
||||
* @param {ParseOptions} [parseOptions] Options.
|
||||
* @param {MicromarkParseOptions} [micromarkParseOptions] Options for micromark.
|
||||
* @param {number} [lineDelta] Offset for start/end line.
|
||||
* @param {MicromarkToken} [ancestor] Parent of top-most tokens.
|
||||
* @returns {MicromarkToken[]} Micromark tokens.
|
||||
*/
|
||||
function parseInternal(
|
||||
markdown,
|
||||
parseOptions = {},
|
||||
micromarkParseOptions = {},
|
||||
lineDelta = 0,
|
||||
ancestor = undefined
|
||||
) {
|
||||
// Get options
|
||||
const freezeTokens = Boolean(parseOptions.freezeTokens);
|
||||
|
||||
// Use micromark to parse document into Events
|
||||
const events = getEvents(markdown, micromarkParseOptions);
|
||||
|
||||
// Create Token objects
|
||||
const document = [];
|
||||
let flatTokens = [];
|
||||
/** @type {MicromarkToken} */
|
||||
const root = {
|
||||
"type": "data",
|
||||
"startLine": -1,
|
||||
"startColumn": -1,
|
||||
"endLine": -1,
|
||||
"endColumn": -1,
|
||||
"text": "ROOT",
|
||||
"children": document,
|
||||
"parent": null
|
||||
};
|
||||
const history = [ root ];
|
||||
let current = root;
|
||||
/** @type {MicromarkParseOptions | null} */
|
||||
let reparseOptions = null;
|
||||
let lines = null;
|
||||
let skipHtmlFlowChildren = false;
|
||||
for (const event of events) {
|
||||
const [ kind, token, context ] = event;
|
||||
const { type, start, end } = token;
|
||||
const { "column": startColumn, "line": startLine } = start;
|
||||
const { "column": endColumn, "line": endLine } = end;
|
||||
const text = context.sliceSerialize(token);
|
||||
if ((kind === "enter") && !skipHtmlFlowChildren) {
|
||||
const previous = current;
|
||||
history.push(previous);
|
||||
current = {
|
||||
type,
|
||||
"startLine": startLine + lineDelta,
|
||||
startColumn,
|
||||
"endLine": endLine + lineDelta,
|
||||
endColumn,
|
||||
text,
|
||||
"children": [],
|
||||
"parent": ((previous === root) ? (ancestor || null) : previous)
|
||||
};
|
||||
if (ancestor) {
|
||||
Object.defineProperty(current, htmlFlowSymbol, { "value": true });
|
||||
}
|
||||
previous.children.push(current);
|
||||
flatTokens.push(current);
|
||||
if ((current.type === "htmlFlow") && !isHtmlFlowComment(current)) {
|
||||
skipHtmlFlowChildren = true;
|
||||
if (!reparseOptions || !lines) {
|
||||
reparseOptions = {
|
||||
...micromarkParseOptions,
|
||||
"extensions": [
|
||||
{
|
||||
"disable": {
|
||||
"null": [ "codeIndented", "htmlFlow" ]
|
||||
}
|
||||
}
|
||||
]
|
||||
};
|
||||
lines = markdown.split(newLineRe);
|
||||
}
|
||||
const reparseMarkdown = lines
|
||||
.slice(current.startLine - 1, current.endLine)
|
||||
.join("\n");
|
||||
const tokens = parseInternal(
|
||||
reparseMarkdown,
|
||||
parseOptions,
|
||||
reparseOptions,
|
||||
current.startLine - 1,
|
||||
current
|
||||
);
|
||||
current.children = tokens;
|
||||
// Avoid stack overflow of Array.push(...spread)
|
||||
// eslint-disable-next-line unicorn/prefer-spread
|
||||
flatTokens = flatTokens.concat(tokens[flatTokensSymbol]);
|
||||
}
|
||||
} else if (kind === "exit") {
|
||||
if (type === "htmlFlow") {
|
||||
skipHtmlFlowChildren = false;
|
||||
}
|
||||
if (!skipHtmlFlowChildren) {
|
||||
if (freezeTokens) {
|
||||
Object.freeze(current.children);
|
||||
Object.freeze(current);
|
||||
}
|
||||
// @ts-ignore
|
||||
current = history.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return document
|
||||
Object.defineProperty(document, flatTokensSymbol, { "value": flatTokens });
|
||||
if (freezeTokens) {
|
||||
Object.freeze(document);
|
||||
}
|
||||
return document;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a Markdown document and returns micromark tokens.
|
||||
*
|
||||
* @param {string} markdown Markdown document.
|
||||
* @param {ParseOptions} [parseOptions] Options.
|
||||
* @returns {MicromarkToken[]} Micromark tokens.
|
||||
*/
|
||||
export function parse(markdown, parseOptions) {
|
||||
return parseInternal(markdown, parseOptions);
|
||||
}
|
||||
11
lib/micromark-types.d.mts
Normal file
11
lib/micromark-types.d.mts
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
export {};
|
||||
|
||||
// Augment TokenTypeMap with markdownlint-specific types.
|
||||
declare module "micromark-util-types" {
|
||||
export interface TokenTypeMap {
|
||||
undefinedReference: "undefinedReference"
|
||||
undefinedReferenceCollapsed: "undefinedReferenceCollapsed"
|
||||
undefinedReferenceFull: "undefinedReferenceFull"
|
||||
undefinedReferenceShortcut: "undefinedReferenceShortcut"
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue