mirror of
https://github.com/DavidAnson/markdownlint.git
synced 2025-12-16 14:00:13 +01:00
Reimplement getReferenceLinkImageData using micromark tokens.
This commit is contained in:
parent
57c612cfa4
commit
97f99befb8
13 changed files with 1235 additions and 1068 deletions
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
"use strict";
|
||||
|
||||
const micromark = require("./micromark.cjs");
|
||||
|
||||
// Regular expression for matching common newline characters
|
||||
// See NEWLINES_RE in markdown-it/lib/rules_core/normalize.js
|
||||
const newLineRe = /\r\n?|\n/g;
|
||||
|
|
@ -33,10 +35,6 @@ const emphasisMarkersRe = /[_*]/g;
|
|||
const blockquotePrefixRe = /^[>\s]*/;
|
||||
module.exports.blockquotePrefixRe = blockquotePrefixRe;
|
||||
|
||||
// Regular expression for reference links (full, collapsed, and shortcut)
|
||||
const referenceLinkRe =
|
||||
/!?\\?\[((?:\[[^\]\0]*\]|[^[\]\0])*)\](?:\[([^\]\0]*)\]|([^(])|$)/g;
|
||||
|
||||
// Regular expression for link reference definitions
|
||||
const linkReferenceDefinitionRe = /^ {0,3}\[([^\]]*[^\\])\]:/;
|
||||
module.exports.linkReferenceDefinitionRe = linkReferenceDefinitionRe;
|
||||
|
|
@ -805,137 +803,115 @@ module.exports.emphasisMarkersInContent = emphasisMarkersInContent;
|
|||
/**
|
||||
* Returns an object with information about reference links and images.
|
||||
*
|
||||
* @param {Object} lineMetadata Line metadata object.
|
||||
* @param {Object} params RuleParams instance.
|
||||
* @returns {Object} Reference link/image data.
|
||||
*/
|
||||
function getReferenceLinkImageData(lineMetadata) {
|
||||
// Initialize return values
|
||||
function getReferenceLinkImageData(params) {
|
||||
const normalizeReference = (s) => s.toLowerCase().trim().replace(/\s+/g, " ");
|
||||
const definitions = new Map();
|
||||
const definitionLineIndices = [];
|
||||
const duplicateDefinitions = [];
|
||||
const references = new Map();
|
||||
const shortcuts = new Map();
|
||||
const definitions = new Map();
|
||||
const duplicateDefinitions = [];
|
||||
const definitionLineIndices = [];
|
||||
// Define helper functions
|
||||
const normalizeLabel = (s) => s.toLowerCase().trim().replace(/\s+/g, " ");
|
||||
const exclusions = [];
|
||||
const excluded = (match) => withinAnyRange(
|
||||
exclusions, 0, match.index, match[0].length - (match[3] || "").length
|
||||
);
|
||||
// Convert input to single-line so multi-line links/images are easier
|
||||
const lineOffsets = [];
|
||||
let currentOffset = 0;
|
||||
const contentLines = [];
|
||||
forEachLine(lineMetadata, (line, lineIndex, inCode) => {
|
||||
lineOffsets[lineIndex] = currentOffset;
|
||||
if (!inCode) {
|
||||
line = line.replace(blockquotePrefixRe, "");
|
||||
if (line.trim().length === 0) {
|
||||
// Allow RegExp to detect the end of a block
|
||||
line = "\0";
|
||||
}
|
||||
contentLines.push(line);
|
||||
currentOffset += line.length + 1;
|
||||
}
|
||||
});
|
||||
lineOffsets.push(currentOffset);
|
||||
const contentLine = contentLines.join(" ");
|
||||
// Determine single-line exclusions for inline code spans
|
||||
forEachInlineCodeSpan(contentLine, (code, lineIndex, columnIndex) => {
|
||||
exclusions.push([ 0, columnIndex, code.length ]);
|
||||
});
|
||||
// Identify all link/image reference definitions
|
||||
forEachLine(lineMetadata, (line, lineIndex, inCode) => {
|
||||
if (!inCode) {
|
||||
const linkReferenceDefinitionMatch = linkReferenceDefinitionRe.exec(line);
|
||||
if (linkReferenceDefinitionMatch) {
|
||||
const label = normalizeLabel(linkReferenceDefinitionMatch[1]);
|
||||
if (definitions.has(label)) {
|
||||
duplicateDefinitions.push([ label, lineIndex ]);
|
||||
} else {
|
||||
definitions.set(label, lineIndex);
|
||||
const filteredTokens =
|
||||
micromark.filterByTypes(
|
||||
params.parsers.micromark.tokens,
|
||||
// definitionLineIndices
|
||||
"definition", "gfmFootnoteDefinition",
|
||||
// definitions and definitionLineIndices
|
||||
"definitionLabelString", "gfmFootnoteDefinitionLabelString",
|
||||
// references and shortcuts
|
||||
"gfmFootnoteCall", "image", "link"
|
||||
);
|
||||
for (const token of filteredTokens) {
|
||||
let labelPrefix = "";
|
||||
// eslint-disable-next-line default-case
|
||||
switch (token.type) {
|
||||
case "definition":
|
||||
case "gfmFootnoteDefinition":
|
||||
// definitionLineIndices
|
||||
for (let i = token.startLine; i <= token.endLine; i++) {
|
||||
definitionLineIndices.push(i - 1);
|
||||
}
|
||||
const labelLength = linkReferenceDefinitionMatch[0].length;
|
||||
exclusions.push([ 0, lineOffsets[lineIndex], labelLength ]);
|
||||
const hasDefinition = line.slice(labelLength).trim().length > 0;
|
||||
definitionLineIndices.push(lineIndex);
|
||||
if (!hasDefinition) {
|
||||
definitionLineIndices.push(lineIndex + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
// Identify all link and image references
|
||||
let lineIndex = 0;
|
||||
const pendingContents = [
|
||||
{
|
||||
"content": contentLine,
|
||||
"contentLineIndex": 0,
|
||||
"contentIndex": 0,
|
||||
"topLevel": true
|
||||
}
|
||||
];
|
||||
let pendingContent = null;
|
||||
while ((pendingContent = pendingContents.shift())) {
|
||||
const { content, contentLineIndex, contentIndex, topLevel } =
|
||||
pendingContent;
|
||||
let referenceLinkMatch = null;
|
||||
while ((referenceLinkMatch = referenceLinkRe.exec(content)) !== null) {
|
||||
const [ matchString, matchText, matchLabel ] = referenceLinkMatch;
|
||||
if (
|
||||
!matchString.startsWith("\\") &&
|
||||
!matchString.startsWith("!\\") &&
|
||||
!matchText.endsWith("\\") &&
|
||||
!(matchLabel || "").endsWith("\\") &&
|
||||
!(topLevel && excluded(referenceLinkMatch))
|
||||
) {
|
||||
const shortcutLink = (matchLabel === undefined);
|
||||
const collapsedLink =
|
||||
(!shortcutLink && (matchLabel.length === 0));
|
||||
const label = normalizeLabel(
|
||||
(shortcutLink || collapsedLink) ? matchText : matchLabel
|
||||
);
|
||||
if (label.length > 0) {
|
||||
const referenceindex = referenceLinkMatch.index;
|
||||
if (topLevel) {
|
||||
// Calculate line index
|
||||
while (lineOffsets[lineIndex + 1] <= referenceindex) {
|
||||
lineIndex++;
|
||||
}
|
||||
break;
|
||||
case "gfmFootnoteDefinitionLabelString":
|
||||
labelPrefix = "^";
|
||||
case "definitionLabelString": // eslint-disable-line no-fallthrough
|
||||
{
|
||||
// definitions and definitionLineIndices
|
||||
const reference = normalizeReference(`${labelPrefix}${token.text}`);
|
||||
if (definitions.has(reference)) {
|
||||
duplicateDefinitions.push([ reference, token.startLine - 1 ]);
|
||||
} else {
|
||||
// Use provided line index
|
||||
lineIndex = contentLineIndex;
|
||||
definitions.set(reference, token.startLine - 1);
|
||||
}
|
||||
const referenceIndex = referenceindex +
|
||||
(topLevel ? -lineOffsets[lineIndex] : contentIndex);
|
||||
const referenceDatum = [
|
||||
lineIndex,
|
||||
referenceIndex,
|
||||
matchString.length,
|
||||
matchText.length,
|
||||
(matchLabel || "").length
|
||||
];
|
||||
if (shortcutLink) {
|
||||
// Track separately due to ambiguity in "text [text] text"
|
||||
const shortcutData = shortcuts.get(label) || [];
|
||||
shortcutData.push(referenceDatum);
|
||||
shortcuts.set(label, shortcutData);
|
||||
} else {
|
||||
// Track reference and location
|
||||
const referenceData = references.get(label) || [];
|
||||
}
|
||||
break;
|
||||
case "gfmFootnoteCall":
|
||||
case "image":
|
||||
case "link":
|
||||
{
|
||||
let isShortcut = false;
|
||||
let isFullOrCollapsed = false;
|
||||
let labelText = null;
|
||||
let referenceStringText = null;
|
||||
const shortcutCandidate =
|
||||
micromark.matchAndGetTokensByType(token.tokens, [ "label" ]);
|
||||
if (shortcutCandidate) {
|
||||
labelText =
|
||||
micromark.getTokenTextByType(
|
||||
shortcutCandidate.label.tokens, "labelText"
|
||||
);
|
||||
isShortcut = (labelText !== null);
|
||||
}
|
||||
const fullAndCollapsedCandidate =
|
||||
micromark.matchAndGetTokensByType(
|
||||
token.tokens, [ "label", "reference" ]
|
||||
);
|
||||
if (fullAndCollapsedCandidate) {
|
||||
labelText =
|
||||
micromark.getTokenTextByType(
|
||||
fullAndCollapsedCandidate.label.tokens, "labelText"
|
||||
);
|
||||
referenceStringText =
|
||||
micromark.getTokenTextByType(
|
||||
fullAndCollapsedCandidate.reference.tokens, "referenceString"
|
||||
);
|
||||
isFullOrCollapsed = (labelText !== null);
|
||||
}
|
||||
const footnote = micromark.matchAndGetTokensByType(
|
||||
token.tokens,
|
||||
[
|
||||
"gfmFootnoteCallLabelMarker", "gfmFootnoteCallMarker",
|
||||
"gfmFootnoteCallString", "gfmFootnoteCallLabelMarker"
|
||||
],
|
||||
[ "gfmFootnoteCallMarker", "gfmFootnoteCallString" ]
|
||||
);
|
||||
if (footnote) {
|
||||
const callMarkerText = footnote.gfmFootnoteCallMarker.text;
|
||||
const callString = footnote.gfmFootnoteCallString.text;
|
||||
labelText = `${callMarkerText}${callString}`;
|
||||
isShortcut = true;
|
||||
}
|
||||
// Track shortcuts separately due to ambiguity in "text [text] text"
|
||||
if (isShortcut || isFullOrCollapsed) {
|
||||
const referenceDatum = [
|
||||
token.startLine - 1,
|
||||
token.startColumn - 1,
|
||||
token.text.length,
|
||||
// @ts-ignore
|
||||
labelText.length,
|
||||
(referenceStringText || "").length
|
||||
];
|
||||
const reference =
|
||||
normalizeReference(referenceStringText || labelText);
|
||||
const dictionary = isShortcut ? shortcuts : references;
|
||||
const referenceData = dictionary.get(reference) || [];
|
||||
referenceData.push(referenceDatum);
|
||||
references.set(label, referenceData);
|
||||
}
|
||||
// Check for links embedded in brackets
|
||||
if (!matchString.startsWith("!")) {
|
||||
pendingContents.push({
|
||||
"content": matchText,
|
||||
"contentLineIndex": lineIndex,
|
||||
"contentIndex": referenceIndex + 1,
|
||||
"topLevel": false
|
||||
});
|
||||
dictionary.set(reference, referenceData);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
return {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
/* eslint-disable n/no-unpublished-require */
|
||||
|
||||
// @ts-ignore
|
||||
const { parse, postprocess, preprocess } = require("../micromark/micromark.cjs");
|
||||
const { gfmFootnote, parse, postprocess, preprocess } = require("../micromark/micromark.cjs");
|
||||
|
||||
/**
|
||||
* Markdown token.
|
||||
|
|
@ -27,14 +27,20 @@ const { parse, postprocess, preprocess } = require("../micromark/micromark.cjs")
|
|||
* @param {Object} [options] Options for micromark.
|
||||
* @returns {Token[]} Micromark tokens (frozen).
|
||||
*/
|
||||
function micromarkParse(markdown, options) {
|
||||
function micromarkParse(markdown, options = {}) {
|
||||
|
||||
// Customize options object to add useful extensions
|
||||
options.extensions ||= [];
|
||||
options.extensions.push(gfmFootnote());
|
||||
|
||||
// Use micromark to parse document into Events
|
||||
const encoding = undefined;
|
||||
const eol = true;
|
||||
const parseContext = parse(options);
|
||||
// Customize ParseContext to treat all references as defined
|
||||
parseContext.defined.includes = (searchElement) => searchElement.length > 0;
|
||||
const chunks = preprocess()(markdown, encoding, eol);
|
||||
const parseContext = parse(options).document().write(chunks);
|
||||
const events = postprocess(parseContext);
|
||||
const events = postprocess(parseContext.document().write(chunks));
|
||||
|
||||
// Create Token objects
|
||||
const document = [];
|
||||
|
|
@ -110,8 +116,46 @@ function filterByTypes(tokens, ...types) {
|
|||
return filterByPredicate(tokens, (token) => types.includes(token.type));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the text of a single token from a list of Micromark tokens by type.
|
||||
*
|
||||
* @param {Token[]} tokens Micromark tokens.
|
||||
* @param {string} type Types to match.
|
||||
* @returns {string | null} Text of token.
|
||||
*/
|
||||
function getTokenTextByType(tokens, type) {
|
||||
const filtered = tokens.filter((token) => token.type === type);
|
||||
return (filtered.length === 1) ? filtered[0].text : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines a list of Micromark tokens matches and returns a subset.
|
||||
*
|
||||
* @param {Token[]} tokens Micromark tokens.
|
||||
* @param {string[]} matchTypes Types to match.
|
||||
* @param {string[]} [resultTypes] Types to return.
|
||||
* @returns {Object | null} Matching tokens by type.
|
||||
*/
|
||||
function matchAndGetTokensByType(tokens, matchTypes, resultTypes) {
|
||||
if (tokens.length !== matchTypes.length) {
|
||||
return null;
|
||||
}
|
||||
resultTypes ||= matchTypes;
|
||||
const result = {};
|
||||
for (let i = 0; i < matchTypes.length; i++) {
|
||||
if (tokens[i].type !== matchTypes[i]) {
|
||||
return null;
|
||||
} else if (resultTypes.includes(matchTypes[i])) {
|
||||
result[matchTypes[i]] = tokens[i];
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
filterByPredicate,
|
||||
filterByTypes,
|
||||
getTokenTextByType,
|
||||
matchAndGetTokensByType,
|
||||
"parse": micromarkParse
|
||||
};
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue