Replace helpers.linkRe with helpers.forEachLink to fix "Polynomial regular expression used on uncontrolled data" and to better support link syntax.

This commit is contained in:
David Anson 2022-02-18 21:14:14 -08:00
parent 861443c740
commit 7a53caa7fb
4 changed files with 458 additions and 28 deletions

View file

@ -49,9 +49,6 @@ module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
// Regular expression for all instances of emphasis markers // Regular expression for all instances of emphasis markers
var emphasisMarkersRe = /[_*]/g; var emphasisMarkersRe = /[_*]/g;
// Regular expression for inline links and shortcut reference links
var linkRe = /(\[(?:[^[\]]?(?:\[[^[\]]*\])?)*\])(\([^)]*\)|\[[^\]]*\])?/g;
module.exports.linkRe = linkRe;
// Regular expression for link reference definition lines // Regular expression for link reference definition lines
module.exports.linkReferenceRe = /^ {0,3}\[[^\]]+]:\s.*$/; module.exports.linkReferenceRe = /^ {0,3}\[[^\]]+]:\s.*$/;
// All punctuation characters (normal and full-width) // All punctuation characters (normal and full-width)
@ -630,6 +627,89 @@ module.exports.frontMatterHasTitle =
return !ignoreFrontMatter && return !ignoreFrontMatter &&
frontMatterLines.some(function (line) { return frontMatterTitleRe.test(line); }); frontMatterLines.some(function (line) { return frontMatterTitleRe.test(line); });
}; };
/**
* Calls the provided function for each link.
*
* @param {string} line Line of Markdown input.
* @param {Function} handler Function taking (index, link, text, destination).
* @returns {void}
*/
function forEachLink(line, handler) {
// Helper to find matching close symbol for link text/destination
var findClosingSymbol = function (index) {
var begin = line[index];
var end = (begin === "[") ? "]" : ")";
var nesting = 0;
var escaping = false;
var pointy = false;
for (var i = index + 1; i < line.length; i++) {
var current = line[i];
if (current === "\\") {
escaping = !escaping;
}
else if (!escaping && (current === begin)) {
nesting++;
}
else if (!escaping && (current === end)) {
if (nesting > 0) {
nesting--;
}
else if (!pointy) {
// Return index after matching close symbol
return i + 1;
}
}
else if ((i === index + 1) && (begin === "(") && (current === "<")) {
pointy = true;
}
else if (!escaping && pointy && current === ">") {
pointy = false;
nesting = 0;
}
else {
escaping = false;
}
}
// No match found
return -1;
};
// Scan line for unescaped "[" character
var escaping = false;
for (var i = 0; i < line.length; i++) {
var current = line[i];
if (current === "\\") {
escaping = !escaping;
}
else if (!escaping && (current === "[")) {
// Scan for matching close "]" of link text
var textEnd = findClosingSymbol(i);
if (textEnd !== -1) {
if ((line[textEnd] === "(") || (line[textEnd] === "[")) {
// Scan for matching close ")" or "]" of link destination
var destEnd = findClosingSymbol(textEnd);
if (destEnd !== -1) {
// Call handler with link text and destination
var link = line.slice(i, destEnd);
var text = line.slice(i, textEnd);
var dest = line.slice(textEnd, destEnd);
handler(i, link, text, dest);
i = destEnd;
}
}
if (i < textEnd) {
// Call handler with link text only
var text = line.slice(i, textEnd);
handler(i, text, text);
i = textEnd;
}
}
}
else {
escaping = false;
}
}
}
module.exports.forEachLink = forEachLink;
/** /**
* Returns a list of emphasis markers in code spans and links. * Returns a list of emphasis markers in code spans and links.
* *
@ -642,13 +722,12 @@ function emphasisMarkersInContent(params) {
// Search links // Search links
lines.forEach(function (tokenLine, tokenLineIndex) { lines.forEach(function (tokenLine, tokenLineIndex) {
var inLine = []; var inLine = [];
var linkMatch = null; forEachLink(tokenLine, function (index, match) {
while ((linkMatch = linkRe.exec(tokenLine))) {
var markerMatch = null; var markerMatch = null;
while ((markerMatch = emphasisMarkersRe.exec(linkMatch[0]))) { while ((markerMatch = emphasisMarkersRe.exec(match))) {
inLine.push(linkMatch.index + markerMatch.index); inLine.push(index + markerMatch.index);
} }
} });
byLine[tokenLineIndex] = inLine; byLine[tokenLineIndex] = inLine;
}); });
// Search code spans // Search code spans
@ -4035,7 +4114,7 @@ module.exports = {
"use strict"; "use strict";
// @ts-check // @ts-check
var _a = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"), addErrorDetailIf = _a.addErrorDetailIf, bareUrlRe = _a.bareUrlRe, escapeForRegExp = _a.escapeForRegExp, forEachLine = _a.forEachLine, overlapsAnyRange = _a.overlapsAnyRange, linkRe = _a.linkRe, linkReferenceRe = _a.linkReferenceRe; var _a = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"), addErrorDetailIf = _a.addErrorDetailIf, bareUrlRe = _a.bareUrlRe, escapeForRegExp = _a.escapeForRegExp, forEachLine = _a.forEachLine, forEachLink = _a.forEachLink, overlapsAnyRange = _a.overlapsAnyRange, linkReferenceRe = _a.linkReferenceRe;
var _b = __webpack_require__(/*! ./cache */ "../lib/cache.js"), codeBlockAndSpanRanges = _b.codeBlockAndSpanRanges, lineMetadata = _b.lineMetadata; var _b = __webpack_require__(/*! ./cache */ "../lib/cache.js"), codeBlockAndSpanRanges = _b.codeBlockAndSpanRanges, lineMetadata = _b.lineMetadata;
module.exports = { module.exports = {
"names": ["MD044", "proper-names"], "names": ["MD044", "proper-names"],
@ -4057,12 +4136,11 @@ module.exports = {
while ((match = bareUrlRe.exec(line)) !== null) { while ((match = bareUrlRe.exec(line)) !== null) {
exclusions.push([lineIndex, match.index, match[0].length]); exclusions.push([lineIndex, match.index, match[0].length]);
} }
while ((match = linkRe.exec(line)) !== null) { forEachLink(line, function (index, _, text, destination) {
var text = match[1], destination = match[2];
if (destination) { if (destination) {
exclusions.push([lineIndex, match.index + text.length, destination.length]); exclusions.push([lineIndex, index + text.length, destination.length]);
} }
} });
} }
}); });
if (!includeCodeBlocks) { if (!includeCodeBlocks) {

View file

@ -26,11 +26,6 @@ module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
// Regular expression for all instances of emphasis markers // Regular expression for all instances of emphasis markers
const emphasisMarkersRe = /[_*]/g; const emphasisMarkersRe = /[_*]/g;
// Regular expression for inline links and shortcut reference links
const linkRe =
/(\[(?:[^[\]]?(?:\[[^[\]]*\])?)*\])(\([^)]*\)|\[[^\]]*\])?/g;
module.exports.linkRe = linkRe;
// Regular expression for link reference definition lines // Regular expression for link reference definition lines
module.exports.linkReferenceRe = /^ {0,3}\[[^\]]+]:\s.*$/; module.exports.linkReferenceRe = /^ {0,3}\[[^\]]+]:\s.*$/;
@ -648,6 +643,82 @@ module.exports.frontMatterHasTitle =
frontMatterLines.some((line) => frontMatterTitleRe.test(line)); frontMatterLines.some((line) => frontMatterTitleRe.test(line));
}; };
/**
* Calls the provided function for each link.
*
* @param {string} line Line of Markdown input.
* @param {Function} handler Function taking (index, link, text, destination).
* @returns {void}
*/
function forEachLink(line, handler) {
// Helper to find matching close symbol for link text/destination
const findClosingSymbol = (index) => {
const begin = line[index];
const end = (begin === "[") ? "]" : ")";
let nesting = 0;
let escaping = false;
let pointy = false;
for (let i = index + 1; i < line.length; i++) {
const current = line[i];
if (current === "\\") {
escaping = !escaping;
} else if (!escaping && (current === begin)) {
nesting++;
} else if (!escaping && (current === end)) {
if (nesting > 0) {
nesting--;
} else if (!pointy) {
// Return index after matching close symbol
return i + 1;
}
} else if ((i === index + 1) && (begin === "(") && (current === "<")) {
pointy = true;
} else if (!escaping && pointy && current === ">") {
pointy = false;
nesting = 0;
} else {
escaping = false;
}
}
// No match found
return -1;
};
// Scan line for unescaped "[" character
let escaping = false;
for (let i = 0; i < line.length; i++) {
const current = line[i];
if (current === "\\") {
escaping = !escaping;
} else if (!escaping && (current === "[")) {
// Scan for matching close "]" of link text
const textEnd = findClosingSymbol(i);
if (textEnd !== -1) {
if ((line[textEnd] === "(") || (line[textEnd] === "[")) {
// Scan for matching close ")" or "]" of link destination
const destEnd = findClosingSymbol(textEnd);
if (destEnd !== -1) {
// Call handler with link text and destination
const link = line.slice(i, destEnd);
const text = line.slice(i, textEnd);
const dest = line.slice(textEnd, destEnd);
handler(i, link, text, dest);
i = destEnd;
}
}
if (i < textEnd) {
// Call handler with link text only
const text = line.slice(i, textEnd);
handler(i, text, text);
i = textEnd;
}
}
} else {
escaping = false;
}
}
}
module.exports.forEachLink = forEachLink;
/** /**
* Returns a list of emphasis markers in code spans and links. * Returns a list of emphasis markers in code spans and links.
* *
@ -660,13 +731,12 @@ function emphasisMarkersInContent(params) {
// Search links // Search links
lines.forEach((tokenLine, tokenLineIndex) => { lines.forEach((tokenLine, tokenLineIndex) => {
const inLine = []; const inLine = [];
let linkMatch = null; forEachLink(tokenLine, (index, match) => {
while ((linkMatch = linkRe.exec(tokenLine))) {
let markerMatch = null; let markerMatch = null;
while ((markerMatch = emphasisMarkersRe.exec(linkMatch[0]))) { while ((markerMatch = emphasisMarkersRe.exec(match))) {
inLine.push(linkMatch.index + markerMatch.index); inLine.push(index + markerMatch.index);
} }
} });
byLine[tokenLineIndex] = inLine; byLine[tokenLineIndex] = inLine;
}); });
// Search code spans // Search code spans

View file

@ -3,7 +3,7 @@
"use strict"; "use strict";
const { addErrorDetailIf, bareUrlRe, escapeForRegExp, forEachLine, const { addErrorDetailIf, bareUrlRe, escapeForRegExp, forEachLine,
overlapsAnyRange, linkRe, linkReferenceRe } = require("../helpers"); forEachLink, overlapsAnyRange, linkReferenceRe } = require("../helpers");
const { codeBlockAndSpanRanges, lineMetadata } = require("./cache"); const { codeBlockAndSpanRanges, lineMetadata } = require("./cache");
module.exports = { module.exports = {
@ -25,14 +25,13 @@ module.exports = {
while ((match = bareUrlRe.exec(line)) !== null) { while ((match = bareUrlRe.exec(line)) !== null) {
exclusions.push([ lineIndex, match.index, match[0].length ]); exclusions.push([ lineIndex, match.index, match[0].length ]);
} }
while ((match = linkRe.exec(line)) !== null) { forEachLink(line, (index, _, text, destination) => {
const [ , text, destination ] = match;
if (destination) { if (destination) {
exclusions.push( exclusions.push(
[ lineIndex, match.index + text.length, destination.length ] [ lineIndex, index + text.length, destination.length ]
); );
} }
} });
} }
}); });
if (!includeCodeBlocks) { if (!includeCodeBlocks) {

View file

@ -1000,3 +1000,286 @@ test("deepFreeze", (t) => {
t.throws(scenario, null, "Assigned to frozen object."); t.throws(scenario, null, "Assigned to frozen object.");
}); });
}); });
test("forEachLink", (t) => {
t.plan(291);
const testCases = [
[
"",
[]
],
[
"Text",
[]
],
[
"Text [text] (text) text",
[ [ 5, "[text]", "[text]", undefined ] ]
],
[
"Text [text] text (text) text",
[ [ 5, "[text]", "[text]", undefined ] ]
],
[
"Text [text] [text] text",
[
[ 5, "[text]", "[text]", undefined ],
[ 12, "[text]", "[text]", undefined ]
]
],
[
"Text [text] text [text] text",
[
[ 5, "[text]", "[text]", undefined ],
[ 17, "[text]", "[text]", undefined ]
]
],
[
"Text [link](destination) text",
[ [ 5, "[link](destination)", "[link]", "(destination)" ] ]
],
[
"Text [link0](destination0) text [link1](destination1) text",
[
[ 5, "[link0](destination0)", "[link0]", "(destination0)" ],
[ 32, "[link1](destination1)", "[link1]", "(destination1)" ]
]
],
[
"Text [link0] text [link1](destination1) text [link2] text",
[
[ 5, "[link0]", "[link0]", undefined ],
[ 18, "[link1](destination1)", "[link1]", "(destination1)" ],
[ 45, "[link2]", "[link2]", undefined ]
]
],
[
"Text [link0](destination0) text [link1] text [link2](destination2) text",
[
[ 5, "[link0](destination0)", "[link0]", "(destination0)" ],
[ 32, "[link1]", "[link1]", undefined ],
[ 45, "[link2](destination2)", "[link2]", "(destination2)" ]
]
],
[
"Text [link0][destination0] text [link1] text [link2](destination2) text",
[
[ 5, "[link0][destination0]", "[link0]", "[destination0]" ],
[ 32, "[link1]", "[link1]", undefined ],
[ 45, "[link2](destination2)", "[link2]", "(destination2)" ]
]
],
[
"Text [link0](destination0) text [link1] text [link2][destination2] text",
[
[ 5, "[link0](destination0)", "[link0]", "(destination0)" ],
[ 32, "[link1]", "[link1]", undefined ],
[ 45, "[link2][destination2]", "[link2]", "[destination2]" ]
]
],
[
"Text [link](destination \"title\") text",
[
[
5,
"[link](destination \"title\")",
"[link]",
"(destination \"title\")"
]
]
],
[
"Text [link](<destination> \"title\") text",
[
[
5,
"[link](<destination> \"title\")",
"[link]",
"(<destination> \"title\")"
]
]
],
[
"Text [link](destination \"ti\\\"tle\") text",
[
[
5,
"[link](destination \"ti\\\"tle\")",
"[link]",
"(destination \"ti\\\"tle\")"
]
]
],
[
"Text [link](destination 'title') text",
[
[
5,
"[link](destination 'title')",
"[link]",
"(destination 'title')"
]
]
],
[
"Text [link](destination (title)) text",
[
[
5,
"[link](destination (title))",
"[link]",
"(destination (title))"
]
]
],
[
"Text [link](\"title\") text",
[ [ 5, "[link](\"title\")", "[link]", "(\"title\")" ] ]
],
[
"[]()",
[ [ 0, "[]()", "[]", "()" ] ]
],
[
"[l](d)",
[ [ 0, "[l](d)", "[l]", "(d)" ] ]
],
[
"Text [li[nk](dest) text",
[ [ 8, "[nk](dest)", "[nk]", "(dest)" ] ]
],
[
"Text [li\\[nk](dest) text",
[ [ 5, "[li\\[nk](dest)", "[li\\[nk]", "(dest)" ] ]
],
[
"Text [li]nk](dest) text",
[ [ 5, "[li]", "[li]", undefined ] ]
],
[
"Text [li\\]nk](dest) text",
[ [ 5, "[li\\]nk](dest)", "[li\\]nk]", "(dest)" ] ]
],
[
"Text [l[in]k](dest) text",
[ [ 5, "[l[in]k](dest)", "[l[in]k]", "(dest)" ] ]
],
[
"Text [li(nk](dest) text",
[ [ 5, "[li(nk](dest)", "[li(nk]", "(dest)" ] ]
],
[
"Text [li)nk](dest) text",
[ [ 5, "[li)nk](dest)", "[li)nk]", "(dest)" ] ]
],
[
"Text [l(in)k](dest) text",
[ [ 5, "[l(in)k](dest)", "[l(in)k]", "(dest)" ] ]
],
[
"Text [link](de(st) text",
[ [ 5, "[link]", "[link]", undefined ] ]
],
[
"Text [link](de\\(st) text",
[ [ 5, "[link](de\\(st)", "[link]", "(de\\(st)" ] ]
],
[
"Text [link](de)st) text",
[ [ 5, "[link](de)", "[link]", "(de)" ] ]
],
[
"Text [link](de\\)st) text",
[ [ 5, "[link](de\\)st)", "[link]", "(de\\)st)" ] ]
],
[
"Text [link](d(es)t) text",
[ [ 5, "[link](d(es)t)", "[link]", "(d(es)t)" ] ]
],
[
"Text [link]() text",
[ [ 5, "[link]()", "[link]", "()" ] ]
],
[
"Text [link](#) text",
[ [ 5, "[link](#)", "[link]", "(#)" ] ]
],
[
"Text [link](<de) text",
[ [ 5, "[link]", "[link]", undefined ] ]
],
[
"Text [link](<de)st> text",
[ [ 5, "[link]", "[link]", undefined ] ]
],
[
"Text [link](<>) text",
[ [ 5, "[link](<>)", "[link]", "(<>)" ] ]
],
[
"Text [link](<dest>) text",
[ [ 5, "[link](<dest>)", "[link]", "(<dest>)" ] ]
],
[
"Text [link](<de st>) text",
[ [ 5, "[link](<de st>)", "[link]", "(<de st>)" ] ]
],
[
"Text [link](<de)st>) text",
[ [ 5, "[link](<de)st>)", "[link]", "(<de)st>)" ] ]
],
[
"Text [<link](dest) text",
[ [ 5, "[<link](dest)", "[<link]", "(dest)" ] ]
],
[
"Text [<link>](dest) text",
[ [ 5, "[<link>](dest)", "[<link>]", "(dest)" ] ]
],
[
"Text [<link>](<dest) text",
[ [ 5, "[<link>]", "[<link>]", undefined ] ]
],
[
"Text [<link>](<dest>) text",
[ [ 5, "[<link>](<dest>)", "[<link>]", "(<dest>)" ] ]
],
[
"Text [[[[l[i]n[k]](dest) text",
[ [ 8, "[l[i]n[k]](dest)", "[l[i]n[k]]", "(dest)" ] ]
],
[
"Text [link](d(e(st))) text",
[ [ 5, "[link](d(e(st)))", "[link]", "(d(e(st)))" ] ]
],
[
"Text [link](d(e(st)) text",
[ [ 5, "[link]", "[link]", undefined ] ]
],
[
"Text [link](<d(e(st)>) text",
[ [ 5, "[link](<d(e(st)>)", "[link]", "(<d(e(st)>)" ] ]
],
[
"Text [link][reference] text",
[ [ 5, "[link][reference]", "[link]", "[reference]" ] ]
],
[
"Text [link][refer]ence] text",
[ [ 5, "[link][refer]", "[link]", "[refer]" ] ]
]
];
for (const testCase of testCases) {
const [ markdown, matches ] = testCase;
helpers.forEachLink(String(markdown), (idx, lnk, txt, des) => {
// @ts-ignore
const match = matches.shift();
const [ index, link, text, destination ] = match;
t.is(idx, index, String(markdown));
t.is(lnk, link, String(markdown));
t.is(txt, text, String(markdown));
t.is(des, destination, String(markdown));
});
t.is(matches.length, 0, "Missing match");
}
});