Replace helpers.urlRe with helpers.urlFe to address "innefficient regular expression" CodeQL alert, introduce function expressions as an alternative, test more inputs.

This commit is contained in:
David Anson 2022-12-18 15:46:19 -08:00
parent 485c63c579
commit 8d6e0b5419
5 changed files with 277 additions and 35 deletions

View file

@ -47,9 +47,6 @@ module.exports.inlineCommentStartRe = inlineCommentStartRe;
const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g;
module.exports.htmlElementRe = htmlElementRe;
// Regular expressions for range matching
module.exports.urlRe =
// eslint-disable-next-line max-len
/(?:http|ftp)s?:\/\/(?:[^\s()<>\]"'`]|\([^\s<>\]"'`]*\))*\b(?:[-#/]|\([^\s<>\]"'`]*\))*/ig;
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
// Regular expression for all instances of emphasis markers
@ -1137,6 +1134,109 @@ function expandTildePath(file, os) {
return homedir ? file.replace(/^~($|\/|\\)/, `${homedir}$1`) : file;
}
module.exports.expandTildePath = expandTildePath;
/**
* RegExp.exec-style implementation of function expressions.
*
* @param {Function} funcExp Function that takes string and returns
* [index, length] or null.
* @param {string} input String to search.
* @returns {string[] | null} RegExp.exec-style [match] with an index property.
*/
function funcExpExec(funcExp, input) {
// Start or resume match
// @ts-ignore
const lastIndex = funcExp.lastIndex || 0;
const result = funcExp(input.slice(lastIndex));
if (result) {
// Update lastIndex and return match
const [subIndex, length] = result;
const index = lastIndex + subIndex;
// @ts-ignore
funcExp.lastIndex = index + length;
const match = [input.slice(index, index + length)];
// @ts-ignore
match.index = index;
return match;
}
// Reset lastIndex and return no match
// @ts-ignore
funcExp.lastIndex = 0;
return null;
}
module.exports.funcExpExec = funcExpExec;
const urlFeProtocolRe = /(?:http|ftp)s?:\/\//i;
const urlFeAutolinkTerminalsRe = / |$/;
const urlFeBareTerminalsRe = /[ ,!`'"\]]|$/;
const urlFeNonTerminalsRe = "-#/";
const urlFePunctuationRe = /\p{Punctuation}/u;
const urlFePrefixToPostfix = new Map([
[" ", " "],
["`", "`"],
["'", "'"],
["\"", "\""],
["", ""],
["“", "”"],
["«", "»"],
["*", "*"],
["_", "_"],
["(", ")"],
["[", "]"],
["{", "}"],
["<", ">"],
[">", "<"]
]);
/**
* Function expression that matches URLs.
*
* @param {string} input Substring to search for a URL.
* @returns {Array | null} [index, length] of URL or null.
*/
function urlFe(input) {
// Find start of URL by searching for protocol
const match = input.match(urlFeProtocolRe);
if (match) {
// Look for matching pre/postfix characters (ex: <...>)
const start = match.index || 0;
const length = match[0].length;
const prefix = input[start - 1] || " ";
const postfix = urlFePrefixToPostfix.get(prefix);
// @ts-ignore
let endPostfix = input.indexOf(postfix, start + length);
if (endPostfix === -1) {
endPostfix = input.length;
}
// Look for characters that terminate a URL
const terminalsRe = (prefix === "<") ? urlFeAutolinkTerminalsRe : urlFeBareTerminalsRe;
const endTerminal = start + input.slice(start).search(terminalsRe);
// Determine tentative end of URL
let end = Math.min(endPostfix, endTerminal);
if (prefix === " ") {
// If the URL used " " as pre/postfix characters, trim the end
if (input[end - 1] === ")") {
// Trim any ")" beyond the last "(...)" pair
const lastOpenParen = input.lastIndexOf("(", end - 2);
if (lastOpenParen <= start) {
end--;
}
else {
const nextCloseParen = input.indexOf(")", lastOpenParen + 1);
end = nextCloseParen + 1;
}
}
else {
// Trim unwanted punctuation
while (!urlFeNonTerminalsRe.includes(input[end - 1]) &&
urlFePunctuationRe.test(input[end - 1])) {
end--;
}
}
}
return [start, end - start];
}
// No match
return null;
}
module.exports.urlFe = urlFe;
/***/ }),
@ -3756,7 +3856,7 @@ module.exports = {
"use strict";
// @ts-check
const { addErrorContext, filterTokens, urlRe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const { addErrorContext, filterTokens, funcExpExec, urlFe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } = __webpack_require__(/*! ./cache */ "../lib/cache.js");
const htmlLinkRe = /<a(?:|\s[^>]+)>[^<>]*<\/a\s*>/ig;
module.exports = {
@ -3785,8 +3885,9 @@ module.exports = {
while ((match = htmlLinkRe.exec(line)) !== null) {
lineExclusions.push([lineIndex, match.index, match[0].length]);
}
while ((match = urlRe.exec(line)) !== null) {
while ((match = funcExpExec(urlFe, line)) !== null) {
const [bareUrl] = match;
// @ts-ignore
const matchIndex = match.index;
const bareUrlLength = bareUrl.length;
const prefix = line.slice(0, matchIndex);
@ -4444,7 +4545,7 @@ module.exports = {
"use strict";
// @ts-check
const { addErrorDetailIf, escapeForRegExp, forEachLine, forEachLink, linkReferenceDefinitionRe, urlRe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const { addErrorDetailIf, escapeForRegExp, forEachLine, forEachLink, funcExpExec, linkReferenceDefinitionRe, urlFe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const { codeBlockAndSpanRanges, htmlElementRanges, lineMetadata } = __webpack_require__(/*! ./cache */ "../lib/cache.js");
module.exports = {
"names": ["MD044", "proper-names"],
@ -4465,7 +4566,8 @@ module.exports = {
}
else {
let match = null;
while ((match = urlRe.exec(line)) !== null) {
while ((match = funcExpExec(urlFe, line)) !== null) {
// @ts-ignore
exclusions.push([lineIndex, match.index, match[0].length]);
}
forEachLink(line, (index, _, text, destination) => {