Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better.

This commit is contained in:
David Anson 2023-08-04 20:53:38 -07:00
parent dd73b0ad7f
commit ef1bd286a9
14 changed files with 458 additions and 346 deletions

View file

@ -2,8 +2,9 @@
"use strict";
const { addError, addErrorDetailIf, escapeForRegExp, filterTokens,
forEachInlineChild, forEachHeading, htmlElementRe } = require("../helpers");
const { addError, addErrorDetailIf } = require("../helpers");
const { filterByHtmlTokens, filterByTypes, getHtmlTagInfo } =
require("../helpers/micromark.cjs");
// Regular expression for identifying HTML anchor names
const idRe = /\sid\s*=\s*['"]?([^'"\s>]+)/iu;
@ -14,14 +15,14 @@ const anchorRe = /\{(#[a-z\d]+(?:[-_][a-z\d]+)*)\}/gu;
* Converts a Markdown heading into an HTML fragment according to the rules
* used by GitHub.
*
* @param {Object} inline Inline token for heading.
* @param {Object} headingText Heading text token.
* @returns {string} Fragment string for heading.
*/
function convertHeadingToHTMLFragment(inline) {
const inlineText = inline.children
.filter((token) => token.type !== "html_inline")
.map((token) => token.content)
.join("");
function convertHeadingToHTMLFragment(headingText) {
const inlineText =
filterByTypes(headingText.children, [ "codeTextData", "data" ])
.map((token) => token.text)
.join("");
return "#" + encodeURIComponent(
inlineText
.toLowerCase()
@ -42,86 +43,96 @@ module.exports = {
"description": "Link fragments should be valid",
"tags": [ "links" ],
"function": function MD051(params, onError) {
const { tokens } = params.parsers.micromark;
const fragments = new Map();
// Process headings
forEachHeading(params, (heading, content, inline) => {
const fragment = convertHeadingToHTMLFragment(inline);
const headingTexts = filterByTypes(
tokens,
[ "atxHeadingText", "setextHeadingText" ]
);
for (const headingText of headingTexts) {
const fragment = convertHeadingToHTMLFragment(headingText);
const count = fragments.get(fragment) || 0;
if (count) {
fragments.set(`${fragment}-${count}`, 0);
}
fragments.set(fragment, count + 1);
let match = null;
while ((match = anchorRe.exec(content)) !== null) {
while ((match = anchorRe.exec(headingText.text)) !== null) {
const [ , anchor ] = match;
if (!fragments.has(anchor)) {
fragments.set(anchor, 1);
}
}
});
}
// Process HTML anchors
const processHtmlToken = (token) => {
let match = null;
while ((match = htmlElementRe.exec(token.content)) !== null) {
const [ tag, , element ] = match;
const anchorMatch = idRe.exec(tag) ||
(element.toLowerCase() === "a" && nameRe.exec(tag));
for (const token of filterByHtmlTokens(tokens)) {
const htmlTagInfo = getHtmlTagInfo(token);
if (htmlTagInfo && !htmlTagInfo.close) {
const anchorMatch = idRe.exec(token.text) ||
(htmlTagInfo.name.toLowerCase() === "a" && nameRe.exec(token.text));
if (anchorMatch) {
fragments.set(`#${anchorMatch[1]}`, 0);
}
}
};
filterTokens(params, "html_block", processHtmlToken);
forEachInlineChild(params, "html_inline", processHtmlToken);
// Process link fragments
forEachInlineChild(params, "link_open", (token) => {
const { attrs, lineNumber, line } = token;
const href = attrs.find((attr) => attr[0] === "href");
const id = href && href[1];
if (id && (id.length > 1) && (id[0] === "#") && !fragments.has(id)) {
let context = id;
let range = null;
let fixInfo = null;
const match = line.match(
new RegExp(`\\[.*?\\]\\(${escapeForRegExp(context)}\\)`)
);
if (match) {
[ context ] = match;
const index = match.index;
const length = context.length;
range = [ index + 1, length ];
fixInfo = {
"editColumn": index + (length - id.length),
"deleteCount": id.length,
"insertText": null
};
}
const idLower = id.toLowerCase();
const mixedCaseKey = [ ...fragments.keys() ]
.find((key) => idLower === key.toLowerCase());
if (mixedCaseKey) {
(fixInfo || {}).insertText = mixedCaseKey;
addErrorDetailIf(
onError,
lineNumber,
mixedCaseKey,
id,
undefined,
context,
range,
fixInfo
);
} else {
addError(
onError,
lineNumber,
undefined,
context,
// @ts-ignore
range
);
}
// Process link and definition fragments
const parentChilds = [
[ "link", "resourceDestinationString" ],
[ "definition", "definitionDestinationString" ]
];
for (const [ parentType, definitionType ] of parentChilds) {
const links = filterByTypes(tokens, [ parentType ]);
for (const link of links) {
const definitions = filterByTypes(link.children, [ definitionType ]);
for (const definition of definitions) {
if (
(definition.text.length > 1) &&
definition.text.startsWith("#") &&
!fragments.has(definition.text)
) {
// eslint-disable-next-line no-undef-init
let range = undefined;
// eslint-disable-next-line no-undef-init
let fixInfo = undefined;
if (link.startLine === link.endLine) {
range = [ link.startColumn, link.endColumn - link.startColumn ];
fixInfo = {
"editColumn": definition.startColumn,
"deleteCount": definition.endColumn - definition.startColumn
};
}
const definitionTextLower = definition.text.toLowerCase();
const mixedCaseKey = [ ...fragments.keys() ]
.find((key) => definitionTextLower === key.toLowerCase());
if (mixedCaseKey) {
// @ts-ignore
(fixInfo || {}).insertText = mixedCaseKey;
addErrorDetailIf(
onError,
link.startLine,
mixedCaseKey,
definition.text,
undefined,
link.text,
range,
fixInfo
);
} else {
addError(
onError,
link.startLine,
undefined,
link.text,
range
);
}
}
}
}
});
}
}
};