From 7a53caa7fb208bcd01fbcdae23816f5a7087b68c Mon Sep 17 00:00:00 2001 From: David Anson Date: Fri, 18 Feb 2022 21:14:14 -0800 Subject: [PATCH] Replace helpers.linkRe with helpers.forEachLink to fix "Polynomial regular expression used on uncontrolled data" and to better support link syntax. --- demo/markdownlint-browser.js | 104 +++++++++-- helpers/helpers.js | 90 ++++++++-- lib/md044.js | 9 +- test/markdownlint-test-helpers.js | 283 ++++++++++++++++++++++++++++++ 4 files changed, 458 insertions(+), 28 deletions(-) diff --git a/demo/markdownlint-browser.js b/demo/markdownlint-browser.js index 70f86f47..c8f7c023 100644 --- a/demo/markdownlint-browser.js +++ b/demo/markdownlint-browser.js @@ -49,9 +49,6 @@ module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/; module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; // Regular expression for all instances of emphasis markers var emphasisMarkersRe = /[_*]/g; -// Regular expression for inline links and shortcut reference links -var linkRe = /(\[(?:[^[\]]?(?:\[[^[\]]*\])?)*\])(\([^)]*\)|\[[^\]]*\])?/g; -module.exports.linkRe = linkRe; // Regular expression for link reference definition lines module.exports.linkReferenceRe = /^ {0,3}\[[^\]]+]:\s.*$/; // All punctuation characters (normal and full-width) @@ -630,6 +627,89 @@ module.exports.frontMatterHasTitle = return !ignoreFrontMatter && frontMatterLines.some(function (line) { return frontMatterTitleRe.test(line); }); }; +/** + * Calls the provided function for each link. + * + * @param {string} line Line of Markdown input. + * @param {Function} handler Function taking (index, link, text, destination). + * @returns {void} + */ +function forEachLink(line, handler) { + // Helper to find matching close symbol for link text/destination + var findClosingSymbol = function (index) { + var begin = line[index]; + var end = (begin === "[") ? "]" : ")"; + var nesting = 0; + var escaping = false; + var pointy = false; + for (var i = index + 1; i < line.length; i++) { + var current = line[i]; + if (current === "\\") { + escaping = !escaping; + } + else if (!escaping && (current === begin)) { + nesting++; + } + else if (!escaping && (current === end)) { + if (nesting > 0) { + nesting--; + } + else if (!pointy) { + // Return index after matching close symbol + return i + 1; + } + } + else if ((i === index + 1) && (begin === "(") && (current === "<")) { + pointy = true; + } + else if (!escaping && pointy && current === ">") { + pointy = false; + nesting = 0; + } + else { + escaping = false; + } + } + // No match found + return -1; + }; + // Scan line for unescaped "[" character + var escaping = false; + for (var i = 0; i < line.length; i++) { + var current = line[i]; + if (current === "\\") { + escaping = !escaping; + } + else if (!escaping && (current === "[")) { + // Scan for matching close "]" of link text + var textEnd = findClosingSymbol(i); + if (textEnd !== -1) { + if ((line[textEnd] === "(") || (line[textEnd] === "[")) { + // Scan for matching close ")" or "]" of link destination + var destEnd = findClosingSymbol(textEnd); + if (destEnd !== -1) { + // Call handler with link text and destination + var link = line.slice(i, destEnd); + var text = line.slice(i, textEnd); + var dest = line.slice(textEnd, destEnd); + handler(i, link, text, dest); + i = destEnd; + } + } + if (i < textEnd) { + // Call handler with link text only + var text = line.slice(i, textEnd); + handler(i, text, text); + i = textEnd; + } + } + } + else { + escaping = false; + } + } +} +module.exports.forEachLink = forEachLink; /** * Returns a list of emphasis markers in code spans and links. * @@ -642,13 +722,12 @@ function emphasisMarkersInContent(params) { // Search links lines.forEach(function (tokenLine, tokenLineIndex) { var inLine = []; - var linkMatch = null; - while ((linkMatch = linkRe.exec(tokenLine))) { + forEachLink(tokenLine, function (index, match) { var markerMatch = null; - while ((markerMatch = emphasisMarkersRe.exec(linkMatch[0]))) { - inLine.push(linkMatch.index + markerMatch.index); + while ((markerMatch = emphasisMarkersRe.exec(match))) { + inLine.push(index + markerMatch.index); } - } + }); byLine[tokenLineIndex] = inLine; }); // Search code spans @@ -4035,7 +4114,7 @@ module.exports = { "use strict"; // @ts-check -var _a = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"), addErrorDetailIf = _a.addErrorDetailIf, bareUrlRe = _a.bareUrlRe, escapeForRegExp = _a.escapeForRegExp, forEachLine = _a.forEachLine, overlapsAnyRange = _a.overlapsAnyRange, linkRe = _a.linkRe, linkReferenceRe = _a.linkReferenceRe; +var _a = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"), addErrorDetailIf = _a.addErrorDetailIf, bareUrlRe = _a.bareUrlRe, escapeForRegExp = _a.escapeForRegExp, forEachLine = _a.forEachLine, forEachLink = _a.forEachLink, overlapsAnyRange = _a.overlapsAnyRange, linkReferenceRe = _a.linkReferenceRe; var _b = __webpack_require__(/*! ./cache */ "../lib/cache.js"), codeBlockAndSpanRanges = _b.codeBlockAndSpanRanges, lineMetadata = _b.lineMetadata; module.exports = { "names": ["MD044", "proper-names"], @@ -4057,12 +4136,11 @@ module.exports = { while ((match = bareUrlRe.exec(line)) !== null) { exclusions.push([lineIndex, match.index, match[0].length]); } - while ((match = linkRe.exec(line)) !== null) { - var text = match[1], destination = match[2]; + forEachLink(line, function (index, _, text, destination) { if (destination) { - exclusions.push([lineIndex, match.index + text.length, destination.length]); + exclusions.push([lineIndex, index + text.length, destination.length]); } - } + }); } }); if (!includeCodeBlocks) { diff --git a/helpers/helpers.js b/helpers/helpers.js index da378898..dc34d3b1 100644 --- a/helpers/helpers.js +++ b/helpers/helpers.js @@ -26,11 +26,6 @@ module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; // Regular expression for all instances of emphasis markers const emphasisMarkersRe = /[_*]/g; -// Regular expression for inline links and shortcut reference links -const linkRe = - /(\[(?:[^[\]]?(?:\[[^[\]]*\])?)*\])(\([^)]*\)|\[[^\]]*\])?/g; -module.exports.linkRe = linkRe; - // Regular expression for link reference definition lines module.exports.linkReferenceRe = /^ {0,3}\[[^\]]+]:\s.*$/; @@ -648,6 +643,82 @@ module.exports.frontMatterHasTitle = frontMatterLines.some((line) => frontMatterTitleRe.test(line)); }; +/** + * Calls the provided function for each link. + * + * @param {string} line Line of Markdown input. + * @param {Function} handler Function taking (index, link, text, destination). + * @returns {void} + */ +function forEachLink(line, handler) { + // Helper to find matching close symbol for link text/destination + const findClosingSymbol = (index) => { + const begin = line[index]; + const end = (begin === "[") ? "]" : ")"; + let nesting = 0; + let escaping = false; + let pointy = false; + for (let i = index + 1; i < line.length; i++) { + const current = line[i]; + if (current === "\\") { + escaping = !escaping; + } else if (!escaping && (current === begin)) { + nesting++; + } else if (!escaping && (current === end)) { + if (nesting > 0) { + nesting--; + } else if (!pointy) { + // Return index after matching close symbol + return i + 1; + } + } else if ((i === index + 1) && (begin === "(") && (current === "<")) { + pointy = true; + } else if (!escaping && pointy && current === ">") { + pointy = false; + nesting = 0; + } else { + escaping = false; + } + } + // No match found + return -1; + }; + // Scan line for unescaped "[" character + let escaping = false; + for (let i = 0; i < line.length; i++) { + const current = line[i]; + if (current === "\\") { + escaping = !escaping; + } else if (!escaping && (current === "[")) { + // Scan for matching close "]" of link text + const textEnd = findClosingSymbol(i); + if (textEnd !== -1) { + if ((line[textEnd] === "(") || (line[textEnd] === "[")) { + // Scan for matching close ")" or "]" of link destination + const destEnd = findClosingSymbol(textEnd); + if (destEnd !== -1) { + // Call handler with link text and destination + const link = line.slice(i, destEnd); + const text = line.slice(i, textEnd); + const dest = line.slice(textEnd, destEnd); + handler(i, link, text, dest); + i = destEnd; + } + } + if (i < textEnd) { + // Call handler with link text only + const text = line.slice(i, textEnd); + handler(i, text, text); + i = textEnd; + } + } + } else { + escaping = false; + } + } +} +module.exports.forEachLink = forEachLink; + /** * Returns a list of emphasis markers in code spans and links. * @@ -660,13 +731,12 @@ function emphasisMarkersInContent(params) { // Search links lines.forEach((tokenLine, tokenLineIndex) => { const inLine = []; - let linkMatch = null; - while ((linkMatch = linkRe.exec(tokenLine))) { + forEachLink(tokenLine, (index, match) => { let markerMatch = null; - while ((markerMatch = emphasisMarkersRe.exec(linkMatch[0]))) { - inLine.push(linkMatch.index + markerMatch.index); + while ((markerMatch = emphasisMarkersRe.exec(match))) { + inLine.push(index + markerMatch.index); } - } + }); byLine[tokenLineIndex] = inLine; }); // Search code spans diff --git a/lib/md044.js b/lib/md044.js index d8bc16fd..44c5a987 100644 --- a/lib/md044.js +++ b/lib/md044.js @@ -3,7 +3,7 @@ "use strict"; const { addErrorDetailIf, bareUrlRe, escapeForRegExp, forEachLine, - overlapsAnyRange, linkRe, linkReferenceRe } = require("../helpers"); + forEachLink, overlapsAnyRange, linkReferenceRe } = require("../helpers"); const { codeBlockAndSpanRanges, lineMetadata } = require("./cache"); module.exports = { @@ -25,14 +25,13 @@ module.exports = { while ((match = bareUrlRe.exec(line)) !== null) { exclusions.push([ lineIndex, match.index, match[0].length ]); } - while ((match = linkRe.exec(line)) !== null) { - const [ , text, destination ] = match; + forEachLink(line, (index, _, text, destination) => { if (destination) { exclusions.push( - [ lineIndex, match.index + text.length, destination.length ] + [ lineIndex, index + text.length, destination.length ] ); } - } + }); } }); if (!includeCodeBlocks) { diff --git a/test/markdownlint-test-helpers.js b/test/markdownlint-test-helpers.js index 3924bd27..44d2acb9 100644 --- a/test/markdownlint-test-helpers.js +++ b/test/markdownlint-test-helpers.js @@ -1000,3 +1000,286 @@ test("deepFreeze", (t) => { t.throws(scenario, null, "Assigned to frozen object."); }); }); + +test("forEachLink", (t) => { + t.plan(291); + const testCases = [ + [ + "", + [] + ], + [ + "Text", + [] + ], + [ + "Text [text] (text) text", + [ [ 5, "[text]", "[text]", undefined ] ] + ], + [ + "Text [text] text (text) text", + [ [ 5, "[text]", "[text]", undefined ] ] + ], + [ + "Text [text] [text] text", + [ + [ 5, "[text]", "[text]", undefined ], + [ 12, "[text]", "[text]", undefined ] + ] + ], + [ + "Text [text] text [text] text", + [ + [ 5, "[text]", "[text]", undefined ], + [ 17, "[text]", "[text]", undefined ] + ] + ], + [ + "Text [link](destination) text", + [ [ 5, "[link](destination)", "[link]", "(destination)" ] ] + ], + [ + "Text [link0](destination0) text [link1](destination1) text", + [ + [ 5, "[link0](destination0)", "[link0]", "(destination0)" ], + [ 32, "[link1](destination1)", "[link1]", "(destination1)" ] + ] + ], + [ + "Text [link0] text [link1](destination1) text [link2] text", + [ + [ 5, "[link0]", "[link0]", undefined ], + [ 18, "[link1](destination1)", "[link1]", "(destination1)" ], + [ 45, "[link2]", "[link2]", undefined ] + ] + ], + [ + "Text [link0](destination0) text [link1] text [link2](destination2) text", + [ + [ 5, "[link0](destination0)", "[link0]", "(destination0)" ], + [ 32, "[link1]", "[link1]", undefined ], + [ 45, "[link2](destination2)", "[link2]", "(destination2)" ] + ] + ], + [ + "Text [link0][destination0] text [link1] text [link2](destination2) text", + [ + [ 5, "[link0][destination0]", "[link0]", "[destination0]" ], + [ 32, "[link1]", "[link1]", undefined ], + [ 45, "[link2](destination2)", "[link2]", "(destination2)" ] + ] + ], + [ + "Text [link0](destination0) text [link1] text [link2][destination2] text", + [ + [ 5, "[link0](destination0)", "[link0]", "(destination0)" ], + [ 32, "[link1]", "[link1]", undefined ], + [ 45, "[link2][destination2]", "[link2]", "[destination2]" ] + ] + ], + [ + "Text [link](destination \"title\") text", + [ + [ + 5, + "[link](destination \"title\")", + "[link]", + "(destination \"title\")" + ] + ] + ], + [ + "Text [link]( \"title\") text", + [ + [ + 5, + "[link]( \"title\")", + "[link]", + "( \"title\")" + ] + ] + ], + [ + "Text [link](destination \"ti\\\"tle\") text", + [ + [ + 5, + "[link](destination \"ti\\\"tle\")", + "[link]", + "(destination \"ti\\\"tle\")" + ] + ] + ], + [ + "Text [link](destination 'title') text", + [ + [ + 5, + "[link](destination 'title')", + "[link]", + "(destination 'title')" + ] + ] + ], + [ + "Text [link](destination (title)) text", + [ + [ + 5, + "[link](destination (title))", + "[link]", + "(destination (title))" + ] + ] + ], + [ + "Text [link](\"title\") text", + [ [ 5, "[link](\"title\")", "[link]", "(\"title\")" ] ] + ], + [ + "[]()", + [ [ 0, "[]()", "[]", "()" ] ] + ], + [ + "[l](d)", + [ [ 0, "[l](d)", "[l]", "(d)" ] ] + ], + [ + "Text [li[nk](dest) text", + [ [ 8, "[nk](dest)", "[nk]", "(dest)" ] ] + ], + [ + "Text [li\\[nk](dest) text", + [ [ 5, "[li\\[nk](dest)", "[li\\[nk]", "(dest)" ] ] + ], + [ + "Text [li]nk](dest) text", + [ [ 5, "[li]", "[li]", undefined ] ] + ], + [ + "Text [li\\]nk](dest) text", + [ [ 5, "[li\\]nk](dest)", "[li\\]nk]", "(dest)" ] ] + ], + [ + "Text [l[in]k](dest) text", + [ [ 5, "[l[in]k](dest)", "[l[in]k]", "(dest)" ] ] + ], + [ + "Text [li(nk](dest) text", + [ [ 5, "[li(nk](dest)", "[li(nk]", "(dest)" ] ] + ], + [ + "Text [li)nk](dest) text", + [ [ 5, "[li)nk](dest)", "[li)nk]", "(dest)" ] ] + ], + [ + "Text [l(in)k](dest) text", + [ [ 5, "[l(in)k](dest)", "[l(in)k]", "(dest)" ] ] + ], + [ + "Text [link](de(st) text", + [ [ 5, "[link]", "[link]", undefined ] ] + ], + [ + "Text [link](de\\(st) text", + [ [ 5, "[link](de\\(st)", "[link]", "(de\\(st)" ] ] + ], + [ + "Text [link](de)st) text", + [ [ 5, "[link](de)", "[link]", "(de)" ] ] + ], + [ + "Text [link](de\\)st) text", + [ [ 5, "[link](de\\)st)", "[link]", "(de\\)st)" ] ] + ], + [ + "Text [link](d(es)t) text", + [ [ 5, "[link](d(es)t)", "[link]", "(d(es)t)" ] ] + ], + [ + "Text [link]() text", + [ [ 5, "[link]()", "[link]", "()" ] ] + ], + [ + "Text [link](#) text", + [ [ 5, "[link](#)", "[link]", "(#)" ] ] + ], + [ + "Text [link]( text", + [ [ 5, "[link]", "[link]", undefined ] ] + ], + [ + "Text [link](<>) text", + [ [ 5, "[link](<>)", "[link]", "(<>)" ] ] + ], + [ + "Text [link]() text", + [ [ 5, "[link]()", "[link]", "()" ] ] + ], + [ + "Text [link]() text", + [ [ 5, "[link]()", "[link]", "()" ] ] + ], + [ + "Text [link]() text", + [ [ 5, "[link]()", "[link]", "()" ] ] + ], + [ + "Text [](dest) text", + [ [ 5, "[](dest)", "[]", "(dest)" ] ] + ], + [ + "Text [](]", "[]", undefined ] ] + ], + [ + "Text []() text", + [ [ 5, "[]()", "[]", "()" ] ] + ], + [ + "Text [[[[l[i]n[k]](dest) text", + [ [ 8, "[l[i]n[k]](dest)", "[l[i]n[k]]", "(dest)" ] ] + ], + [ + "Text [link](d(e(st))) text", + [ [ 5, "[link](d(e(st)))", "[link]", "(d(e(st)))" ] ] + ], + [ + "Text [link](d(e(st)) text", + [ [ 5, "[link]", "[link]", undefined ] ] + ], + [ + "Text [link]() text", + [ [ 5, "[link]()", "[link]", "()" ] ] + ], + [ + "Text [link][reference] text", + [ [ 5, "[link][reference]", "[link]", "[reference]" ] ] + ], + [ + "Text [link][refer]ence] text", + [ [ 5, "[link][refer]", "[link]", "[refer]" ] ] + ] + ]; + for (const testCase of testCases) { + const [ markdown, matches ] = testCase; + helpers.forEachLink(String(markdown), (idx, lnk, txt, des) => { + // @ts-ignore + const match = matches.shift(); + const [ index, link, text, destination ] = match; + t.is(idx, index, String(markdown)); + t.is(lnk, link, String(markdown)); + t.is(txt, text, String(markdown)); + t.is(des, destination, String(markdown)); + }); + t.is(matches.length, 0, "Missing match"); + } +});