Update MD044/proper-names to parse into micromark htmlFlow tokens for better behavior when html_elements is false (fixes #832).

This commit is contained in:
David Anson 2023-05-30 20:14:02 -07:00
parent 8cb6b8eff5
commit b1eff75034
8 changed files with 386 additions and 25 deletions

View file

@ -2,7 +2,7 @@
"use strict";
const { addErrorDetailIf, escapeForRegExp, withinAnyRange } =
const { addErrorDetailIf, escapeForRegExp, newLineRe, withinAnyRange } =
require("../helpers");
const { filterByPredicate, filterByTypes, parse } =
require("../helpers/micromark.cjs");
@ -30,16 +30,32 @@ module.exports = {
scannedTypes.add("codeFlowValue");
scannedTypes.add("codeTextData");
}
const tokenAdjustments = new Map();
const contentTokens =
filterByPredicate(
params.parsers.micromark.tokens,
(token) => scannedTypes.has(token.type),
(token) => {
let { children } = token;
const { startLine, text } = token;
if (!includeHtmlElements && (token.type === "htmlFlow")) {
children = (children[0] && children[0].text === "<!--") ?
[] :
children.slice(1, -1);
if (text.startsWith("<!--")) {
// Remove comment content
children = [];
} else {
// Re-parse to get htmlText elements for detailed tokenization
const htmlTextLines =
`<md044>\n${text}\n</md044>`.split(newLineRe);
children = parse(htmlTextLines.join(""));
const reTokens = [ ...children ];
for (const reToken of reTokens) {
tokenAdjustments.set(reToken, {
htmlTextLines,
startLine
});
reTokens.push(...reToken.children);
}
}
}
return children.filter((t) => !ignoredChildTypes.has(t.type));
}
@ -80,10 +96,22 @@ module.exports = {
autoLinked.add(token);
}
if (!withinAnyRange(urlRanges, lineIndex, index, length)) {
const column = index + 1;
let lineNumber = token.startLine;
let column = index;
if (tokenAdjustments.has(token)) {
const { htmlTextLines, startLine } =
tokenAdjustments.get(token);
let lineDelta = 0;
while (htmlTextLines[lineDelta].length <= column) {
column -= htmlTextLines[lineDelta].length;
lineDelta++;
}
lineNumber = startLine + lineDelta - 1;
}
column++;
addErrorDetailIf(
onError,
token.startLine,
lineNumber,
name,
nameMatch,
null,