Reimplement MD044/proper-names using micromark tokens.

This commit is contained in:
David Anson 2023-03-14 21:03:07 -07:00
parent e8a85c91f2
commit 5bff33d41b
9 changed files with 182 additions and 495 deletions

View file

@ -1152,110 +1152,3 @@ function expandTildePath(file, os) {
return homedir ? file.replace(/^~($|\/|\\)/, `${homedir}$1`) : file;
}
module.exports.expandTildePath = expandTildePath;
/**
* RegExp.exec-style implementation of function expressions.
*
* @param {Function} funcExp Function that takes string and returns
* [index, length] or null.
* @param {string} input String to search.
* @returns {string[] | null} RegExp.exec-style [match] with an index property.
*/
function funcExpExec(funcExp, input) {
// Start or resume match
// @ts-ignore
const lastIndex = funcExp.lastIndex || 0;
const result = funcExp(input.slice(lastIndex));
if (result) {
// Update lastIndex and return match
const [ subIndex, length ] = result;
const index = lastIndex + subIndex;
// @ts-ignore
funcExp.lastIndex = index + length;
const match = [ input.slice(index, index + length) ];
// @ts-ignore
match.index = index;
return match;
}
// Reset lastIndex and return no match
// @ts-ignore
funcExp.lastIndex = 0;
return null;
}
module.exports.funcExpExec = funcExpExec;
const urlFeProtocolRe = /(?:http|ftp)s?:\/\//i;
const urlFeAutolinkTerminalsRe = / |$/;
const urlFeBareTerminalsRe = /[ ,!`'"\]]|$/;
const urlFeNonTerminalsRe = "-#/";
const urlFePunctuationRe = /\p{Punctuation}/u;
const urlFePrefixToPostfix = new Map([
[ " ", " " ],
[ "`", "`" ],
[ "'", "'" ],
[ "\"", "\"" ],
[ "", "" ],
[ "“", "”" ],
[ "«", "»" ],
[ "*", "*" ],
[ "_", "_" ],
[ "(", ")" ],
[ "[", "]" ],
[ "{", "}" ],
[ "<", ">" ],
[ ">", "<" ]
]);
/**
* Function expression that matches URLs.
*
* @param {string} input Substring to search for a URL.
* @returns {Array | null} [index, length] of URL or null.
*/
function urlFe(input) {
// Find start of URL by searching for protocol
const match = input.match(urlFeProtocolRe);
if (match) {
// Look for matching pre/postfix characters (ex: <...>)
const start = match.index || 0;
const length = match[0].length;
const prefix = input[start - 1] || " ";
const postfix = urlFePrefixToPostfix.get(prefix);
// @ts-ignore
let endPostfix = input.indexOf(postfix, start + length);
if (endPostfix === -1) {
endPostfix = input.length;
}
// Look for characters that terminate a URL
const terminalsRe =
(prefix === "<") ? urlFeAutolinkTerminalsRe : urlFeBareTerminalsRe;
const endTerminal = start + input.slice(start).search(terminalsRe);
// Determine tentative end of URL
let end = Math.min(endPostfix, endTerminal);
if (prefix === " ") {
// If the URL used " " as pre/postfix characters, trim the end
if (input[end - 1] === ")") {
// Trim any ")" beyond the last "(...)" pair
const lastOpenParen = input.lastIndexOf("(", end - 2);
if (lastOpenParen <= start) {
end--;
} else {
const nextCloseParen = input.indexOf(")", lastOpenParen + 1);
end = nextCloseParen + 1;
}
} else {
// Trim unwanted punctuation
while (
!urlFeNonTerminalsRe.includes(input[end - 1]) &&
urlFePunctuationRe.test(input[end - 1])
) {
end--;
}
}
}
return [ start, end - start ];
}
// No match
return null;
}
module.exports.urlFe = urlFe;

View file

@ -105,10 +105,10 @@ function micromarkParse(markdown, options = {}) {
*
* @param {Token[]} tokens Micromark tokens.
* @param {Function} allowed Allowed token predicate.
* @param {Function} [transform] Transform token list predicate.
* @param {Function} [transformChildren] Transform children predicate.
* @returns {Token[]} Filtered tokens.
*/
function filterByPredicate(tokens, allowed, transform) {
function filterByPredicate(tokens, allowed, transformChildren) {
const result = [];
const pending = [ ...tokens ];
let token = null;
@ -118,7 +118,7 @@ function filterByPredicate(tokens, allowed, transform) {
}
if (token.children.length > 0) {
const transformed =
transform ? transform(token.children) : token.children;
transformChildren ? transformChildren(token) : token.children;
pending.unshift(...transformed);
}
}