Reimplement getReferenceLinkImageData using micromark tokens.

This commit is contained in:
David Anson 2023-01-29 20:36:53 -08:00
parent 57c612cfa4
commit 97f99befb8
13 changed files with 1235 additions and 1068 deletions

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,8 @@
"use strict";
const micromark = require("./micromark.cjs");
// Regular expression for matching common newline characters
// See NEWLINES_RE in markdown-it/lib/rules_core/normalize.js
const newLineRe = /\r\n?|\n/g;
@ -33,10 +35,6 @@ const emphasisMarkersRe = /[_*]/g;
const blockquotePrefixRe = /^[>\s]*/;
module.exports.blockquotePrefixRe = blockquotePrefixRe;
// Regular expression for reference links (full, collapsed, and shortcut)
const referenceLinkRe =
/!?\\?\[((?:\[[^\]\0]*\]|[^[\]\0])*)\](?:\[([^\]\0]*)\]|([^(])|$)/g;
// Regular expression for link reference definitions
const linkReferenceDefinitionRe = /^ {0,3}\[([^\]]*[^\\])\]:/;
module.exports.linkReferenceDefinitionRe = linkReferenceDefinitionRe;
@ -805,137 +803,115 @@ module.exports.emphasisMarkersInContent = emphasisMarkersInContent;
/**
* Returns an object with information about reference links and images.
*
* @param {Object} lineMetadata Line metadata object.
* @param {Object} params RuleParams instance.
* @returns {Object} Reference link/image data.
*/
function getReferenceLinkImageData(lineMetadata) {
// Initialize return values
function getReferenceLinkImageData(params) {
const normalizeReference = (s) => s.toLowerCase().trim().replace(/\s+/g, " ");
const definitions = new Map();
const definitionLineIndices = [];
const duplicateDefinitions = [];
const references = new Map();
const shortcuts = new Map();
const definitions = new Map();
const duplicateDefinitions = [];
const definitionLineIndices = [];
// Define helper functions
const normalizeLabel = (s) => s.toLowerCase().trim().replace(/\s+/g, " ");
const exclusions = [];
const excluded = (match) => withinAnyRange(
exclusions, 0, match.index, match[0].length - (match[3] || "").length
const filteredTokens =
micromark.filterByTypes(
params.parsers.micromark.tokens,
// definitionLineIndices
"definition", "gfmFootnoteDefinition",
// definitions and definitionLineIndices
"definitionLabelString", "gfmFootnoteDefinitionLabelString",
// references and shortcuts
"gfmFootnoteCall", "image", "link"
);
// Convert input to single-line so multi-line links/images are easier
const lineOffsets = [];
let currentOffset = 0;
const contentLines = [];
forEachLine(lineMetadata, (line, lineIndex, inCode) => {
lineOffsets[lineIndex] = currentOffset;
if (!inCode) {
line = line.replace(blockquotePrefixRe, "");
if (line.trim().length === 0) {
// Allow RegExp to detect the end of a block
line = "\0";
for (const token of filteredTokens) {
let labelPrefix = "";
// eslint-disable-next-line default-case
switch (token.type) {
case "definition":
case "gfmFootnoteDefinition":
// definitionLineIndices
for (let i = token.startLine; i <= token.endLine; i++) {
definitionLineIndices.push(i - 1);
}
contentLines.push(line);
currentOffset += line.length + 1;
}
});
lineOffsets.push(currentOffset);
const contentLine = contentLines.join(" ");
// Determine single-line exclusions for inline code spans
forEachInlineCodeSpan(contentLine, (code, lineIndex, columnIndex) => {
exclusions.push([ 0, columnIndex, code.length ]);
});
// Identify all link/image reference definitions
forEachLine(lineMetadata, (line, lineIndex, inCode) => {
if (!inCode) {
const linkReferenceDefinitionMatch = linkReferenceDefinitionRe.exec(line);
if (linkReferenceDefinitionMatch) {
const label = normalizeLabel(linkReferenceDefinitionMatch[1]);
if (definitions.has(label)) {
duplicateDefinitions.push([ label, lineIndex ]);
} else {
definitions.set(label, lineIndex);
}
const labelLength = linkReferenceDefinitionMatch[0].length;
exclusions.push([ 0, lineOffsets[lineIndex], labelLength ]);
const hasDefinition = line.slice(labelLength).trim().length > 0;
definitionLineIndices.push(lineIndex);
if (!hasDefinition) {
definitionLineIndices.push(lineIndex + 1);
}
}
}
});
// Identify all link and image references
let lineIndex = 0;
const pendingContents = [
break;
case "gfmFootnoteDefinitionLabelString":
labelPrefix = "^";
case "definitionLabelString": // eslint-disable-line no-fallthrough
{
"content": contentLine,
"contentLineIndex": 0,
"contentIndex": 0,
"topLevel": true
// definitions and definitionLineIndices
const reference = normalizeReference(`${labelPrefix}${token.text}`);
if (definitions.has(reference)) {
duplicateDefinitions.push([ reference, token.startLine - 1 ]);
} else {
definitions.set(reference, token.startLine - 1);
}
];
let pendingContent = null;
while ((pendingContent = pendingContents.shift())) {
const { content, contentLineIndex, contentIndex, topLevel } =
pendingContent;
let referenceLinkMatch = null;
while ((referenceLinkMatch = referenceLinkRe.exec(content)) !== null) {
const [ matchString, matchText, matchLabel ] = referenceLinkMatch;
if (
!matchString.startsWith("\\") &&
!matchString.startsWith("!\\") &&
!matchText.endsWith("\\") &&
!(matchLabel || "").endsWith("\\") &&
!(topLevel && excluded(referenceLinkMatch))
) {
const shortcutLink = (matchLabel === undefined);
const collapsedLink =
(!shortcutLink && (matchLabel.length === 0));
const label = normalizeLabel(
(shortcutLink || collapsedLink) ? matchText : matchLabel
}
break;
case "gfmFootnoteCall":
case "image":
case "link":
{
let isShortcut = false;
let isFullOrCollapsed = false;
let labelText = null;
let referenceStringText = null;
const shortcutCandidate =
micromark.matchAndGetTokensByType(token.tokens, [ "label" ]);
if (shortcutCandidate) {
labelText =
micromark.getTokenTextByType(
shortcutCandidate.label.tokens, "labelText"
);
if (label.length > 0) {
const referenceindex = referenceLinkMatch.index;
if (topLevel) {
// Calculate line index
while (lineOffsets[lineIndex + 1] <= referenceindex) {
lineIndex++;
isShortcut = (labelText !== null);
}
} else {
// Use provided line index
lineIndex = contentLineIndex;
const fullAndCollapsedCandidate =
micromark.matchAndGetTokensByType(
token.tokens, [ "label", "reference" ]
);
if (fullAndCollapsedCandidate) {
labelText =
micromark.getTokenTextByType(
fullAndCollapsedCandidate.label.tokens, "labelText"
);
referenceStringText =
micromark.getTokenTextByType(
fullAndCollapsedCandidate.reference.tokens, "referenceString"
);
isFullOrCollapsed = (labelText !== null);
}
const referenceIndex = referenceindex +
(topLevel ? -lineOffsets[lineIndex] : contentIndex);
const footnote = micromark.matchAndGetTokensByType(
token.tokens,
[
"gfmFootnoteCallLabelMarker", "gfmFootnoteCallMarker",
"gfmFootnoteCallString", "gfmFootnoteCallLabelMarker"
],
[ "gfmFootnoteCallMarker", "gfmFootnoteCallString" ]
);
if (footnote) {
const callMarkerText = footnote.gfmFootnoteCallMarker.text;
const callString = footnote.gfmFootnoteCallString.text;
labelText = `${callMarkerText}${callString}`;
isShortcut = true;
}
// Track shortcuts separately due to ambiguity in "text [text] text"
if (isShortcut || isFullOrCollapsed) {
const referenceDatum = [
lineIndex,
referenceIndex,
matchString.length,
matchText.length,
(matchLabel || "").length
token.startLine - 1,
token.startColumn - 1,
token.text.length,
// @ts-ignore
labelText.length,
(referenceStringText || "").length
];
if (shortcutLink) {
// Track separately due to ambiguity in "text [text] text"
const shortcutData = shortcuts.get(label) || [];
shortcutData.push(referenceDatum);
shortcuts.set(label, shortcutData);
} else {
// Track reference and location
const referenceData = references.get(label) || [];
const reference =
normalizeReference(referenceStringText || labelText);
const dictionary = isShortcut ? shortcuts : references;
const referenceData = dictionary.get(reference) || [];
referenceData.push(referenceDatum);
references.set(label, referenceData);
}
// Check for links embedded in brackets
if (!matchString.startsWith("!")) {
pendingContents.push({
"content": matchText,
"contentLineIndex": lineIndex,
"contentIndex": referenceIndex + 1,
"topLevel": false
});
}
dictionary.set(reference, referenceData);
}
}
break;
}
}
return {

View file

@ -5,7 +5,7 @@
/* eslint-disable n/no-unpublished-require */
// @ts-ignore
const { parse, postprocess, preprocess } = require("../micromark/micromark.cjs");
const { gfmFootnote, parse, postprocess, preprocess } = require("../micromark/micromark.cjs");
/**
* Markdown token.
@ -27,14 +27,20 @@ const { parse, postprocess, preprocess } = require("../micromark/micromark.cjs")
* @param {Object} [options] Options for micromark.
* @returns {Token[]} Micromark tokens (frozen).
*/
function micromarkParse(markdown, options) {
function micromarkParse(markdown, options = {}) {
// Customize options object to add useful extensions
options.extensions ||= [];
options.extensions.push(gfmFootnote());
// Use micromark to parse document into Events
const encoding = undefined;
const eol = true;
const parseContext = parse(options);
// Customize ParseContext to treat all references as defined
parseContext.defined.includes = (searchElement) => searchElement.length > 0;
const chunks = preprocess()(markdown, encoding, eol);
const parseContext = parse(options).document().write(chunks);
const events = postprocess(parseContext);
const events = postprocess(parseContext.document().write(chunks));
// Create Token objects
const document = [];
@ -110,8 +116,46 @@ function filterByTypes(tokens, ...types) {
return filterByPredicate(tokens, (token) => types.includes(token.type));
}
/**
* Get the text of a single token from a list of Micromark tokens by type.
*
* @param {Token[]} tokens Micromark tokens.
* @param {string} type Types to match.
* @returns {string | null} Text of token.
*/
function getTokenTextByType(tokens, type) {
const filtered = tokens.filter((token) => token.type === type);
return (filtered.length === 1) ? filtered[0].text : null;
}
/**
* Determines a list of Micromark tokens matches and returns a subset.
*
* @param {Token[]} tokens Micromark tokens.
* @param {string[]} matchTypes Types to match.
* @param {string[]} [resultTypes] Types to return.
* @returns {Object | null} Matching tokens by type.
*/
function matchAndGetTokensByType(tokens, matchTypes, resultTypes) {
if (tokens.length !== matchTypes.length) {
return null;
}
resultTypes ||= matchTypes;
const result = {};
for (let i = 0; i < matchTypes.length; i++) {
if (tokens[i].type !== matchTypes[i]) {
return null;
} else if (resultTypes.includes(matchTypes[i])) {
result[matchTypes[i]] = tokens[i];
}
}
return result;
}
module.exports = {
filterByPredicate,
filterByTypes,
getTokenTextByType,
matchAndGetTokensByType,
"parse": micromarkParse
};

View file

@ -583,7 +583,7 @@ function lintContent(
const htmlElementRanges =
helpers.htmlElementRanges(paramsBase, lineMetadata);
const referenceLinkImageData =
helpers.getReferenceLinkImageData(lineMetadata);
helpers.getReferenceLinkImageData(paramsBase);
cache.set({
codeBlockAndSpanRanges,
flattenedLists,

View file

@ -2,6 +2,7 @@
/* eslint-disable n/file-extension-in-import */
export { gfmFootnote } from "micromark-extension-gfm-footnote";
export { parse } from "micromark/lib/parse";
export { postprocess } from "micromark/lib/postprocess";
export { preprocess } from "micromark/lib/preprocess";

View file

@ -18,10 +18,9 @@
"engines": {
"node": ">=14.18.0"
},
"dependencies": {
"micromark": "3.1.0"
},
"devDependencies": {
"micromark": "3.1.0",
"micromark-extension-gfm-footnote": "1.0.4",
"webpack": "5.75.0",
"webpack-cli": "5.0.1"
}

View file

@ -79,13 +79,13 @@
"eslint-plugin-unicorn": "45.0.2",
"globby": "13.1.3",
"js-yaml": "4.1.0",
"markdown-it-footnote": "3.0.3",
"markdown-it-for-inline": "0.1.1",
"markdown-it-sub": "1.0.0",
"markdown-it-sup": "1.0.0",
"markdown-it-texmath": "1.0.0",
"markdownlint-rule-helpers": "0.18.0",
"micromark": "3.1.0",
"micromark-extension-gfm-footnote": "1.0.4",
"npm-run-all": "4.1.5",
"strip-json-comments": "5.0.0",
"terser-webpack-plugin": "5.3.6",

View file

@ -1433,9 +1433,8 @@ test("getReferenceLinkImageData().shortcuts", (t) => {
"tags": [ "-" ],
"function":
(params) => {
const lineMetadata = helpers.getLineMetadata(params);
const { shortcuts } =
helpers.getReferenceLinkImageData(lineMetadata);
helpers.getReferenceLinkImageData(params);
t.is(shortcuts.size, 0, [ ...shortcuts.keys() ].join(", "));
}
}

View file

@ -6,7 +6,6 @@ const fs = require("node:fs");
const path = require("node:path");
const jsYaml = require("js-yaml");
const md = require("markdown-it")();
const pluginFootnote = require("markdown-it-footnote");
const pluginInline = require("markdown-it-for-inline");
const pluginSub = require("markdown-it-sub");
const pluginSup = require("markdown-it-sup");
@ -1154,7 +1153,7 @@ test("texmath test files with texmath plugin", (t) => new Promise((resolve) => {
});
}));
test("Pandoc footnote via footnote plugin", (t) => new Promise((resolve) => {
test("Pandoc footnote", (t) => new Promise((resolve) => {
t.plan(2);
markdownlint({
"strings": {
@ -1170,7 +1169,6 @@ Text with: [^footnote]
[reference]: https://example.com
`
},
"markdownItPlugins": [ [ pluginFootnote ] ],
"resultVersion": 0
}, (err, actual) => {
t.falsy(err);

View file

@ -33,7 +33,7 @@ A sentence with a long footnotes: [^long] [^longer] [^longest]
[PCW2]: https://www.example.com/article.html
[Pandoc2]: https://pandoc.org/MANUAL.html#extension-footnotes
[Another2]: https://www.example.com/another.{MD053}.html
[Another2]: https://www.example.com/another.html
## GitHub Footnotes

View file

@ -155,7 +155,6 @@ Missing[^2]
[image7]: https://example.com/image7
[image8]: https://example.com/image8
[`code`]: https://example.com/code
[^1]: https://example.com/footnote
[multi line full text]: https://example.com/multi-line-full-text
[multi line full label]: https://example.com/multi-line-full-label
[multi line collapsed label]: https://example.com/multi-line-collapsed-label
@ -179,6 +178,7 @@ https://example.com/multi-line-label
[unique8]: https://example.com/unique8
[unique9]: https://example.com/unique9
[unique10]: https://example.com/unique10
[^1]: https://example.com/footnote
## Ignored Labels
@ -188,15 +188,16 @@ https://example.com/multi-line-label
## Invalid Labels
Duplicate:
Duplicate/unused:
[label]: {MD053}
Unused:
[blank-line-filler-0]: https://example.com
[unused]: {MD053}
Unused footnote:
[blank-line-filler-1]: https://example.com
[^3]: {MD053}
[blank-line-filler-0][] [blank-line-filler-1][]
[Duplicate unused multi-line label {MD053}]:
https://example.com/duplicate-unused-multi-line-label

View file

@ -32971,26 +32971,7 @@ Generated by [AVA](https://avajs.dev).
> Snapshot 1
{
errors: [
{
errorContext: '[Another2]: https://www.exampl...',
errorDetail: 'Unused link or image reference definition: "another2"',
errorRange: [
1,
56,
],
fixInfo: {
deleteCount: -1,
},
lineNumber: 36,
ruleDescription: 'Link and image reference definitions should be needed',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md053.md',
ruleNames: [
'MD053',
'link-image-reference-definitions',
],
},
],
errors: [],
fixed: `# Pandoc Footnotes␊
> Examples taken from [GitHub issue 599](https://github.com/DavidAnson/markdownlint/issues/599)␊
@ -33026,6 +33007,7 @@ Generated by [AVA](https://avajs.dev).
[PCW2]: https://www.example.com/article.html␊
[Pandoc2]: https://pandoc.org/MANUAL.html#extension-footnotes␊
[Another2]: https://www.example.com/another.html␊
## GitHub Footnotes␊
@ -35306,7 +35288,7 @@ Generated by [AVA](https://avajs.dev).
25,
],
fixInfo: null,
lineNumber: 216,
lineNumber: 217,
ruleDescription: 'Reference links and images should use a label that is defined',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md052.md',
ruleNames: [
@ -35322,7 +35304,7 @@ Generated by [AVA](https://avajs.dev).
10,
],
fixInfo: null,
lineNumber: 230,
lineNumber: 231,
ruleDescription: 'Reference links and images should use a label that is defined',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md052.md',
ruleNames: [
@ -35340,7 +35322,7 @@ Generated by [AVA](https://avajs.dev).
fixInfo: {
deleteCount: -1,
},
lineNumber: 192,
lineNumber: 193,
ruleDescription: 'Link and image reference definitions should be needed',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md053.md',
ruleNames: [
@ -35376,7 +35358,7 @@ Generated by [AVA](https://avajs.dev).
fixInfo: {
deleteCount: -1,
},
lineNumber: 198,
lineNumber: 197,
ruleDescription: 'Link and image reference definitions should be needed',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md053.md',
ruleNames: [
@ -35392,7 +35374,7 @@ Generated by [AVA](https://avajs.dev).
44,
],
fixInfo: null,
lineNumber: 200,
lineNumber: 201,
ruleDescription: 'Link and image reference definitions should be needed',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md053.md',
ruleNames: [
@ -35408,7 +35390,7 @@ Generated by [AVA](https://avajs.dev).
44,
],
fixInfo: null,
lineNumber: 203,
lineNumber: 204,
ruleDescription: 'Link and image reference definitions should be needed',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md053.md',
ruleNames: [
@ -35574,7 +35556,6 @@ Generated by [AVA](https://avajs.dev).
[image7]: https://example.com/image7␊
[image8]: https://example.com/image8␊
[\`code\`]: https://example.com/code␊
[^1]: https://example.com/footnote␊
[multi line full text]: https://example.com/multi-line-full-text␊
[multi line full label]: https://example.com/multi-line-full-label␊
[multi line collapsed label]: https://example.com/multi-line-collapsed-label␊
@ -35598,6 +35579,7 @@ Generated by [AVA](https://avajs.dev).
[unique8]: https://example.com/unique8␊
[unique9]: https://example.com/unique9␊
[unique10]: https://example.com/unique10␊
[^1]: https://example.com/footnote␊
## Ignored Labels␊
@ -35607,11 +35589,12 @@ Generated by [AVA](https://avajs.dev).
## Invalid Labels␊
Duplicate:␊
Duplicate/unused:␊
Unused:␊
[blank-line-filler-0]: https://example.com␊
[blank-line-filler-1]: https://example.com␊
Unused footnote:
[blank-line-filler-0][] [blank-line-filler-1][]
[Duplicate unused multi-line label {MD053}]:␊
https://example.com/duplicate-unused-multi-line-label␊