Replace helpers.urlRe with helpers.urlFe to address "innefficient regular expression" CodeQL alert, introduce function expressions as an alternative, test more inputs.

This commit is contained in:
David Anson 2022-12-18 15:46:19 -08:00
parent 485c63c579
commit 8d6e0b5419
5 changed files with 277 additions and 35 deletions

View file

@ -47,9 +47,6 @@ module.exports.inlineCommentStartRe = inlineCommentStartRe;
const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g; const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g;
module.exports.htmlElementRe = htmlElementRe; module.exports.htmlElementRe = htmlElementRe;
// Regular expressions for range matching // Regular expressions for range matching
module.exports.urlRe =
// eslint-disable-next-line max-len
/(?:http|ftp)s?:\/\/(?:[^\s()<>\]"'`]|\([^\s<>\]"'`]*\))*\b(?:[-#/]|\([^\s<>\]"'`]*\))*/ig;
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/; module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
// Regular expression for all instances of emphasis markers // Regular expression for all instances of emphasis markers
@ -1137,6 +1134,109 @@ function expandTildePath(file, os) {
return homedir ? file.replace(/^~($|\/|\\)/, `${homedir}$1`) : file; return homedir ? file.replace(/^~($|\/|\\)/, `${homedir}$1`) : file;
} }
module.exports.expandTildePath = expandTildePath; module.exports.expandTildePath = expandTildePath;
/**
* RegExp.exec-style implementation of function expressions.
*
* @param {Function} funcExp Function that takes string and returns
* [index, length] or null.
* @param {string} input String to search.
* @returns {string[] | null} RegExp.exec-style [match] with an index property.
*/
function funcExpExec(funcExp, input) {
// Start or resume match
// @ts-ignore
const lastIndex = funcExp.lastIndex || 0;
const result = funcExp(input.slice(lastIndex));
if (result) {
// Update lastIndex and return match
const [subIndex, length] = result;
const index = lastIndex + subIndex;
// @ts-ignore
funcExp.lastIndex = index + length;
const match = [input.slice(index, index + length)];
// @ts-ignore
match.index = index;
return match;
}
// Reset lastIndex and return no match
// @ts-ignore
funcExp.lastIndex = 0;
return null;
}
module.exports.funcExpExec = funcExpExec;
const urlFeProtocolRe = /(?:http|ftp)s?:\/\//i;
const urlFeAutolinkTerminalsRe = / |$/;
const urlFeBareTerminalsRe = /[ ,!`'"\]]|$/;
const urlFeNonTerminalsRe = "-#/";
const urlFePunctuationRe = /\p{Punctuation}/u;
const urlFePrefixToPostfix = new Map([
[" ", " "],
["`", "`"],
["'", "'"],
["\"", "\""],
["", ""],
["“", "”"],
["«", "»"],
["*", "*"],
["_", "_"],
["(", ")"],
["[", "]"],
["{", "}"],
["<", ">"],
[">", "<"]
]);
/**
* Function expression that matches URLs.
*
* @param {string} input Substring to search for a URL.
* @returns {Array | null} [index, length] of URL or null.
*/
function urlFe(input) {
// Find start of URL by searching for protocol
const match = input.match(urlFeProtocolRe);
if (match) {
// Look for matching pre/postfix characters (ex: <...>)
const start = match.index || 0;
const length = match[0].length;
const prefix = input[start - 1] || " ";
const postfix = urlFePrefixToPostfix.get(prefix);
// @ts-ignore
let endPostfix = input.indexOf(postfix, start + length);
if (endPostfix === -1) {
endPostfix = input.length;
}
// Look for characters that terminate a URL
const terminalsRe = (prefix === "<") ? urlFeAutolinkTerminalsRe : urlFeBareTerminalsRe;
const endTerminal = start + input.slice(start).search(terminalsRe);
// Determine tentative end of URL
let end = Math.min(endPostfix, endTerminal);
if (prefix === " ") {
// If the URL used " " as pre/postfix characters, trim the end
if (input[end - 1] === ")") {
// Trim any ")" beyond the last "(...)" pair
const lastOpenParen = input.lastIndexOf("(", end - 2);
if (lastOpenParen <= start) {
end--;
}
else {
const nextCloseParen = input.indexOf(")", lastOpenParen + 1);
end = nextCloseParen + 1;
}
}
else {
// Trim unwanted punctuation
while (!urlFeNonTerminalsRe.includes(input[end - 1]) &&
urlFePunctuationRe.test(input[end - 1])) {
end--;
}
}
}
return [start, end - start];
}
// No match
return null;
}
module.exports.urlFe = urlFe;
/***/ }), /***/ }),
@ -3756,7 +3856,7 @@ module.exports = {
"use strict"; "use strict";
// @ts-check // @ts-check
const { addErrorContext, filterTokens, urlRe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"); const { addErrorContext, filterTokens, funcExpExec, urlFe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } = __webpack_require__(/*! ./cache */ "../lib/cache.js"); const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } = __webpack_require__(/*! ./cache */ "../lib/cache.js");
const htmlLinkRe = /<a(?:|\s[^>]+)>[^<>]*<\/a\s*>/ig; const htmlLinkRe = /<a(?:|\s[^>]+)>[^<>]*<\/a\s*>/ig;
module.exports = { module.exports = {
@ -3785,8 +3885,9 @@ module.exports = {
while ((match = htmlLinkRe.exec(line)) !== null) { while ((match = htmlLinkRe.exec(line)) !== null) {
lineExclusions.push([lineIndex, match.index, match[0].length]); lineExclusions.push([lineIndex, match.index, match[0].length]);
} }
while ((match = urlRe.exec(line)) !== null) { while ((match = funcExpExec(urlFe, line)) !== null) {
const [bareUrl] = match; const [bareUrl] = match;
// @ts-ignore
const matchIndex = match.index; const matchIndex = match.index;
const bareUrlLength = bareUrl.length; const bareUrlLength = bareUrl.length;
const prefix = line.slice(0, matchIndex); const prefix = line.slice(0, matchIndex);
@ -4444,7 +4545,7 @@ module.exports = {
"use strict"; "use strict";
// @ts-check // @ts-check
const { addErrorDetailIf, escapeForRegExp, forEachLine, forEachLink, linkReferenceDefinitionRe, urlRe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"); const { addErrorDetailIf, escapeForRegExp, forEachLine, forEachLink, funcExpExec, linkReferenceDefinitionRe, urlFe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const { codeBlockAndSpanRanges, htmlElementRanges, lineMetadata } = __webpack_require__(/*! ./cache */ "../lib/cache.js"); const { codeBlockAndSpanRanges, htmlElementRanges, lineMetadata } = __webpack_require__(/*! ./cache */ "../lib/cache.js");
module.exports = { module.exports = {
"names": ["MD044", "proper-names"], "names": ["MD044", "proper-names"],
@ -4465,7 +4566,8 @@ module.exports = {
} }
else { else {
let match = null; let match = null;
while ((match = urlRe.exec(line)) !== null) { while ((match = funcExpExec(urlFe, line)) !== null) {
// @ts-ignore
exclusions.push([lineIndex, match.index, match[0].length]); exclusions.push([lineIndex, match.index, match[0].length]);
} }
forEachLink(line, (index, _, text, destination) => { forEachLink(line, (index, _, text, destination) => {

View file

@ -23,9 +23,6 @@ const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g;
module.exports.htmlElementRe = htmlElementRe; module.exports.htmlElementRe = htmlElementRe;
// Regular expressions for range matching // Regular expressions for range matching
module.exports.urlRe =
// eslint-disable-next-line max-len
/(?:http|ftp)s?:\/\/(?:[^\s()<>\]"'`]|\([^\s<>\]"'`]*\))*\b(?:[-#/]|\([^\s<>\]"'`]*\))*/ig;
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/; module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
@ -1187,3 +1184,110 @@ function expandTildePath(file, os) {
return homedir ? file.replace(/^~($|\/|\\)/, `${homedir}$1`) : file; return homedir ? file.replace(/^~($|\/|\\)/, `${homedir}$1`) : file;
} }
module.exports.expandTildePath = expandTildePath; module.exports.expandTildePath = expandTildePath;
/**
* RegExp.exec-style implementation of function expressions.
*
* @param {Function} funcExp Function that takes string and returns
* [index, length] or null.
* @param {string} input String to search.
* @returns {string[] | null} RegExp.exec-style [match] with an index property.
*/
function funcExpExec(funcExp, input) {
// Start or resume match
// @ts-ignore
const lastIndex = funcExp.lastIndex || 0;
const result = funcExp(input.slice(lastIndex));
if (result) {
// Update lastIndex and return match
const [ subIndex, length ] = result;
const index = lastIndex + subIndex;
// @ts-ignore
funcExp.lastIndex = index + length;
const match = [ input.slice(index, index + length) ];
// @ts-ignore
match.index = index;
return match;
}
// Reset lastIndex and return no match
// @ts-ignore
funcExp.lastIndex = 0;
return null;
}
module.exports.funcExpExec = funcExpExec;
const urlFeProtocolRe = /(?:http|ftp)s?:\/\//i;
const urlFeAutolinkTerminalsRe = / |$/;
const urlFeBareTerminalsRe = /[ ,!`'"\]]|$/;
const urlFeNonTerminalsRe = "-#/";
const urlFePunctuationRe = /\p{Punctuation}/u;
const urlFePrefixToPostfix = new Map([
[ " ", " " ],
[ "`", "`" ],
[ "'", "'" ],
[ "\"", "\"" ],
[ "", "" ],
[ "“", "”" ],
[ "«", "»" ],
[ "*", "*" ],
[ "_", "_" ],
[ "(", ")" ],
[ "[", "]" ],
[ "{", "}" ],
[ "<", ">" ],
[ ">", "<" ]
]);
/**
* Function expression that matches URLs.
*
* @param {string} input Substring to search for a URL.
* @returns {Array | null} [index, length] of URL or null.
*/
function urlFe(input) {
// Find start of URL by searching for protocol
const match = input.match(urlFeProtocolRe);
if (match) {
// Look for matching pre/postfix characters (ex: <...>)
const start = match.index || 0;
const length = match[0].length;
const prefix = input[start - 1] || " ";
const postfix = urlFePrefixToPostfix.get(prefix);
// @ts-ignore
let endPostfix = input.indexOf(postfix, start + length);
if (endPostfix === -1) {
endPostfix = input.length;
}
// Look for characters that terminate a URL
const terminalsRe =
(prefix === "<") ? urlFeAutolinkTerminalsRe : urlFeBareTerminalsRe;
const endTerminal = start + input.slice(start).search(terminalsRe);
// Determine tentative end of URL
let end = Math.min(endPostfix, endTerminal);
if (prefix === " ") {
// If the URL used " " as pre/postfix characters, trim the end
if (input[end - 1] === ")") {
// Trim any ")" beyond the last "(...)" pair
const lastOpenParen = input.lastIndexOf("(", end - 2);
if (lastOpenParen <= start) {
end--;
} else {
const nextCloseParen = input.indexOf(")", lastOpenParen + 1);
end = nextCloseParen + 1;
}
} else {
// Trim unwanted punctuation
while (
!urlFeNonTerminalsRe.includes(input[end - 1]) &&
urlFePunctuationRe.test(input[end - 1])
) {
end--;
}
}
}
return [ start, end - start ];
}
// No match
return null;
}
module.exports.urlFe = urlFe;

View file

@ -2,7 +2,7 @@
"use strict"; "use strict";
const { addErrorContext, filterTokens, urlRe, withinAnyRange } = const { addErrorContext, filterTokens, funcExpExec, urlFe, withinAnyRange } =
require("../helpers"); require("../helpers");
const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } = const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } =
require("./cache"); require("./cache");
@ -34,8 +34,9 @@ module.exports = {
while ((match = htmlLinkRe.exec(line)) !== null) { while ((match = htmlLinkRe.exec(line)) !== null) {
lineExclusions.push([ lineIndex, match.index, match[0].length ]); lineExclusions.push([ lineIndex, match.index, match[0].length ]);
} }
while ((match = urlRe.exec(line)) !== null) { while ((match = funcExpExec(urlFe, line)) !== null) {
const [ bareUrl ] = match; const [ bareUrl ] = match;
// @ts-ignore
const matchIndex = match.index; const matchIndex = match.index;
const bareUrlLength = bareUrl.length; const bareUrlLength = bareUrl.length;
const prefix = line.slice(0, matchIndex); const prefix = line.slice(0, matchIndex);

View file

@ -3,7 +3,8 @@
"use strict"; "use strict";
const { addErrorDetailIf, escapeForRegExp, forEachLine, forEachLink, const { addErrorDetailIf, escapeForRegExp, forEachLine, forEachLink,
linkReferenceDefinitionRe, urlRe, withinAnyRange } = require("../helpers"); funcExpExec, linkReferenceDefinitionRe, urlFe, withinAnyRange } =
require("../helpers");
const { codeBlockAndSpanRanges, htmlElementRanges, lineMetadata } = const { codeBlockAndSpanRanges, htmlElementRanges, lineMetadata } =
require("./cache"); require("./cache");
@ -27,7 +28,8 @@ module.exports = {
exclusions.push([ lineIndex, 0, line.length ]); exclusions.push([ lineIndex, 0, line.length ]);
} else { } else {
let match = null; let match = null;
while ((match = urlRe.exec(line)) !== null) { while ((match = funcExpExec(urlFe, line)) !== null) {
// @ts-ignore
exclusions.push([ lineIndex, match.index, match[0].length ]); exclusions.push([ lineIndex, match.index, match[0].length ]);
} }
forEachLink(line, (index, _, text, destination) => { forEachLink(line, (index, _, text, destination) => {

View file

@ -1309,7 +1309,7 @@ test("expandTildePath", (t) => {
t.is(helpers.expandTildePath("~/dir/file", null), "~/dir/file"); t.is(helpers.expandTildePath("~/dir/file", null), "~/dir/file");
}); });
test("urlRe", (t) => { test("urlFe", (t) => {
t.plan(1); t.plan(1);
const input = ` const input = `
Text ftp://example.com text Text ftp://example.com text
@ -1329,6 +1329,11 @@ Text https://example.com/path() text
Text https://example.com/path(path) text Text https://example.com/path(path) text
Text https://example.com/path(path)path text Text https://example.com/path(path)path text
Text https://example.com/path-(path) text Text https://example.com/path-(path) text
Text https://example.com/path(() text
Text https://example.com/path()) text
Text https://example.com/path(()) text
Text https://example.com/path((())) text
Text https://example.com/path()() text
Text (https://example.com/path) text Text (https://example.com/path) text
Text <https://example.com/path> text Text <https://example.com/path> text
Text >https://example.com/path< text Text >https://example.com/path< text
@ -1350,24 +1355,35 @@ Text *https://example.com* text
Text **https://example.com** text Text **https://example.com** text
Text _https://example.com_ text Text _https://example.com_ text
Text __https://example.com__ text Text __https://example.com__ text
Text https://example.com. Text Text https://example.com. text
Text https://example.com, text Text https://example.com, text
Text https://example.com; text Text https://example.com; text
Text https://example.com: text Text https://example.com: text
Text https://example.com? Text Text https://example.com? text
Text https://example.com! Text Text https://example.com! text
Text https://example.com。 Text Text https://example.com。 text
Text https://example.com Text Text https://example.com text
Text https://example.com Text Text https://example.com text
Text https://example.com Text Text https://example.com text
Text https://example.com Text Text https://example.com text
Text https://example.com,text Text https://example.com,text
Text https://example.com.path text Text https://example.com.path text
Text https://example.com?path text Text https://example.com?path text
Text https://example.com!text Text https://example.com!text
Text https://example.com.. text
Text https://example.com... text
Text https://example.com.co text
Text <https://example.com/path text> text
Text <https://example.com/path.path> text
Text <https://example.com/path,path> text
Text <https://example.com/path;path> text
Text <https://example.com/path:path> text
Text <https://example.com/path?path> text
Text <https://example.com/path!path> text
[https://example.com/path](https://example.com/path) [https://example.com/path](https://example.com/path)
[ https://example.com/path](https://example.com/path) [ https://example.com/path](https://example.com/path)
[https://example.com/path ](https://example.com/path) [https://example.com/path ](https://example.com/path)
https://example.com/ text https://example.com/path text https://example.com/
https://example.com https://example.com
https://example.com https://example.com
https://example.com https://example.com
@ -1390,6 +1406,11 @@ Text text
Text text Text text
Text text Text text
Text text Text text
Text text
Text ) text
Text ) text
Text )) text
Text text
Text () text Text () text
Text <> text Text <> text
Text >< text Text >< text
@ -1409,26 +1430,37 @@ Text <a href="">link</a> text
Text <a href=""></a> text Text <a href=""></a> text
Text ** text Text ** text
Text **** text Text **** text
Text _ text
Text __ text Text __ text
Text . Text Text ____ text
Text . text
Text , text Text , text
Text ; text Text ; text
Text : text Text : text
Text ? Text Text ? text
Text ! Text Text ! text
Text Text Text text
Text Text Text text
Text Text Text text
Text Text Text text
Text Text Text text
Text Text ,text
Text text Text text
Text text Text text
Text Text !text
Text .. text
Text ... text
Text text
Text < text> text
Text <> text
Text <> text
Text <> text
Text <> text
Text <> text
Text <> text
[]() []()
[ ]() [ ]()
[ ]() [ ]()
text text
@ -1437,7 +1469,8 @@ Text
for (let line of input) { for (let line of input) {
const urlRanges = []; const urlRanges = [];
let match = null; let match = null;
while ((match = helpers.urlRe.exec(line)) !== null) { while ((match = helpers.funcExpExec(helpers.urlFe, line)) !== null) {
// @ts-ignore
urlRanges.push([ match.index, match[0].length ]); urlRanges.push([ match.index, match[0].length ]);
} }
urlRanges.reverse(); urlRanges.reverse();