Refactor micromark token handling to remove optional Token.htmlFlowChildren property and make related code more efficient for a ~6% elapsed time reduction.

This commit is contained in:
David Anson 2023-09-02 12:07:14 -07:00
parent e282874fe3
commit 24c97a54fb
16 changed files with 274 additions and 283 deletions

View file

@ -46,7 +46,9 @@ module.exports = {
const topLevelLists = filterByPredicate(
parsers.micromark.tokens,
isList,
(token) => (isList(token) ? [] : token.children)
(token) => (
(isList(token) || (token.type === "htmlFlow")) ? [] : token.children
)
);
for (const list of topLevelLists) {

View file

@ -3,7 +3,7 @@
"use strict";
const { addError } = require("../helpers");
const { filterByHtmlTokens, getHtmlTagInfo } =
const { filterByTypes, getHtmlTagInfo } =
require("../helpers/micromark.cjs");
const nextLinesRe = /[\r\n][\s\S]*$/;
@ -16,7 +16,8 @@ module.exports = {
let allowedElements = params.config.allowed_elements;
allowedElements = Array.isArray(allowedElements) ? allowedElements : [];
allowedElements = allowedElements.map((element) => element.toLowerCase());
for (const token of filterByHtmlTokens(params.parsers.micromark.tokens)) {
const { tokens } = params.parsers.micromark;
for (const token of filterByTypes(tokens, [ "htmlText" ])) {
const htmlTagInfo = getHtmlTagInfo(token);
if (
htmlTagInfo &&

View file

@ -3,6 +3,7 @@
"use strict";
const { addError } = require("../helpers");
const { filterByPredicate } = require("../helpers/micromark.cjs");
module.exports = {
"names": [ "MD037", "no-space-in-emphasis" ],
@ -16,16 +17,11 @@ module.exports = {
for (const marker of [ "_", "__", "___", "*", "**", "***" ]) {
emphasisTokensByMarker.set(marker, []);
}
const pending = [ ...parsers.micromark.tokens ];
let token = null;
while ((token = pending.shift())) {
// Use reparsed children of htmlFlow tokens
if (token.type === "htmlFlow") {
pending.unshift(...token.htmlFlowChildren);
continue;
}
pending.push(...token.children);
const tokens = filterByPredicate(
parsers.micromark.tokens,
(token) => token.children.some((child) => child.type === "data")
);
for (const token of tokens) {
// Build lists of bare tokens for each emphasis marker type
for (const emphasisTokens of emphasisTokensByMarker.values()) {

View file

@ -29,27 +29,22 @@ module.exports = {
const htmlElements = params.config.html_elements;
const includeHtmlElements =
(htmlElements === undefined) ? true : !!htmlElements;
const scannedTypes = new Set([ "data", "htmlFlowData" ]);
const scannedTypes = new Set([ "data" ]);
if (includeCodeBlocks) {
scannedTypes.add("codeFlowValue");
scannedTypes.add("codeTextData");
}
if (includeHtmlElements) {
scannedTypes.add("htmlFlowData");
scannedTypes.add("htmlTextData");
}
const contentTokens =
filterByPredicate(
params.parsers.micromark.tokens,
(token) => scannedTypes.has(token.type),
(token) => {
let { children } = token;
const { htmlFlowChildren, text, type } = token;
if (!includeHtmlElements && (type === "htmlFlow")) {
children = text.startsWith("<!--") ?
// Remove comment content
[] :
// Examine htmlText content
htmlFlowChildren;
}
return children.filter((t) => !ignoredChildTypes.has(t.type));
}
(token) => (
token.children.filter((t) => !ignoredChildTypes.has(t.type))
)
);
const exclusions = [];
const autoLinked = new Set();

View file

@ -3,15 +3,18 @@
"use strict";
const { addError, emphasisOrStrongStyleFor } = require("../helpers");
const { filterByTypes, tokenIfType } = require("../helpers/micromark.cjs");
const { filterByPredicate, tokenIfType } = require("../helpers/micromark.cjs");
const intrawordRe = /\w/;
const impl =
(params, onError, type, asterisk, underline, style = "consistent") => {
const { lines, parsers } = params;
const emphasisTokens =
filterByTypes(parsers.micromark.tokens, [ type ]);
const emphasisTokens = filterByPredicate(
parsers.micromark.tokens,
(token) => token.type === type,
(token) => ((token.type === "htmlFlow") ? [] : token.children)
);
for (const token of emphasisTokens) {
const { children } = token;
const childType = `${type}Sequence`;

View file

@ -3,8 +3,7 @@
"use strict";
const { addError, addErrorDetailIf } = require("../helpers");
const { filterByHtmlTokens, filterByTypes, getHtmlTagInfo } =
require("../helpers/micromark.cjs");
const { filterByTypes, getHtmlTagInfo } = require("../helpers/micromark.cjs");
// Regular expression for identifying HTML anchor names
const idRe = /\sid\s*=\s*['"]?([^'"\s>]+)/iu;
@ -68,7 +67,7 @@ module.exports = {
}
// Process HTML anchors
for (const token of filterByHtmlTokens(tokens)) {
for (const token of filterByTypes(tokens, [ "htmlText" ])) {
const htmlTagInfo = getHtmlTagInfo(token);
if (htmlTagInfo && !htmlTagInfo.close) {
const anchorMatch = idRe.exec(token.text) ||