From 4db40256d901651b746983abb686f46d32e5d112 Mon Sep 17 00:00:00 2001 From: David Anson Date: Sat, 12 Jun 2021 17:10:59 -0700 Subject: [PATCH] Re-implement MD044/proper-names for better accuracy (range and fixInfo are now always valid) (fixes #402, fixes #403). --- demo/markdownlint-browser.js | 109 +++++++++------------- lib/md044.js | 136 ++++++++++++---------------- test/markdownlint-test-scenarios.js | 1 + test/proper-names-no-code.md | 20 +++- test/proper-names-projects.json | 1 - test/proper-names-projects.md | 8 +- test/proper-names.md | 10 +- 7 files changed, 135 insertions(+), 150 deletions(-) diff --git a/demo/markdownlint-browser.js b/demo/markdownlint-browser.js index e4d4b89b..71c25871 100644 --- a/demo/markdownlint-browser.js +++ b/demo/markdownlint-browser.js @@ -3745,9 +3745,8 @@ module.exports = { "use strict"; // @ts-check -var _a = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"), addErrorDetailIf = _a.addErrorDetailIf, bareUrlRe = _a.bareUrlRe, escapeForRegExp = _a.escapeForRegExp, filterTokens = _a.filterTokens, forEachInlineChild = _a.forEachInlineChild, newLineRe = _a.newLineRe; -var startNonWordRe = /^\W/; -var endNonWordRe = /\W$/; +var _a = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"), addErrorDetailIf = _a.addErrorDetailIf, bareUrlRe = _a.bareUrlRe, escapeForRegExp = _a.escapeForRegExp, forEachLine = _a.forEachLine, newLineRe = _a.newLineRe, forEachInlineCodeSpan = _a.forEachInlineCodeSpan; +var lineMetadata = __webpack_require__(/*! ./cache */ "../lib/cache.js").lineMetadata; module.exports = { "names": ["MD044", "proper-names"], "description": "Proper names should have the correct capitalization", @@ -3755,73 +3754,55 @@ module.exports = { "function": function MD044(params, onError) { var names = params.config.names; names = Array.isArray(names) ? names : []; + names.sort(function (a, b) { return (b.length - a.length) || a.localeCompare(b); }); var codeBlocks = params.config.code_blocks; var includeCodeBlocks = (codeBlocks === undefined) ? true : !!codeBlocks; - // Text of automatic hyperlinks is implicitly a URL - var autolinkText = new Set(); - filterTokens(params, "inline", function (token) { - var inAutoLink = false; - token.children.forEach(function (child) { - var info = child.info, type = child.type; - if ((type === "link_open") && (info === "auto")) { - inAutoLink = true; - } - else if (type === "link_close") { - inAutoLink = false; - } - else if ((type === "text") && inAutoLink) { - autolinkText.add(child); + var exclusions = []; + if (!includeCodeBlocks) { + forEachInlineCodeSpan(params.lines.join("\n"), function (code, lineIndex, columnIndex) { + var codeLines = code.split(newLineRe); + // eslint-disable-next-line unicorn/no-for-loop + for (var i = 0; i < codeLines.length; i++) { + exclusions.push([lineIndex + i, columnIndex, codeLines[i].length]); + columnIndex = 0; } }); - }); - // For each proper name... - names.forEach(function (name) { - var escapedName = escapeForRegExp(name); - var startNamePattern = startNonWordRe.test(name) ? "" : "\\S*\\b"; - var endNamePattern = endNonWordRe.test(name) ? "" : "\\b\\S*"; - var namePattern = "(" + startNamePattern + ")(" + escapedName + ")(" + endNamePattern + ")"; - var anyNameRe = new RegExp(namePattern, "gi"); - // eslint-disable-next-line jsdoc/require-jsdoc - function forToken(token) { - if (!autolinkText.has(token)) { - var fenceOffset_1 = (token.type === "fence") ? 1 : 0; - token.content.split(newLineRe).forEach(function (line, index) { - var match = null; - while ((match = anyNameRe.exec(line)) !== null) { - var fullMatch = match[0], leftMatch = match[1], nameMatch = match[2], rightMatch = match[3]; - if (fullMatch.search(bareUrlRe) === -1) { - var wordMatch = fullMatch - .replace(new RegExp("^\\W{0," + leftMatch.length + "}"), "") - .replace(new RegExp("\\W{0," + rightMatch.length + "}$"), ""); - if (!names.includes(wordMatch)) { - var lineNumber = token.lineNumber + index + fenceOffset_1; - var fullLine = params.lines[lineNumber - 1]; - var matchLength = wordMatch.length; - var matchIndex = fullLine.indexOf(wordMatch); - var range = (matchIndex === -1) ? - null : - [matchIndex + 1, matchLength]; - var fixInfo = (matchIndex === -1) ? - null : - { - "editColumn": matchIndex + 1, - "deleteCount": matchLength, - "insertText": name - }; - addErrorDetailIf(onError, lineNumber, name, nameMatch, null, null, range, fixInfo); - } - } + } + var _loop_1 = function (name_1) { + var escapedName = escapeForRegExp(name_1); + var startNamePattern = /^\W/.test(name_1) ? "" : "[^\\s([\"]*\\b_*"; + var endNamePattern = /\W$/.test(name_1) ? "" : "_*\\b[^\\s)\\]\"]*"; + var namePattern = "(" + startNamePattern + ")(" + escapedName + ")" + endNamePattern; + var nameRe = new RegExp(namePattern, "gi"); + forEachLine(lineMetadata(), function (line, lineIndex, inCode, onFence) { + if (includeCodeBlocks || (!inCode && !onFence)) { + var match = null; + var _loop_2 = function () { + var fullMatch = match[0], leftMatch = match[1], nameMatch = match[2]; + var index = match.index + leftMatch.length; + var length_1 = nameMatch.length; + if ((fullMatch.search(bareUrlRe) === -1) && + exclusions.every(function (span) { return ((lineIndex !== span[0]) || + (index + length_1 < span[1]) || + (index > span[1] + span[2])); })) { + addErrorDetailIf(onError, lineIndex + 1, name_1, nameMatch, null, null, [index + 1, length_1], { + "editColumn": index + 1, + "deleteCount": length_1, + "insertText": name_1 + }); } - }); + exclusions.push([lineIndex, index, length_1]); + }; + while ((match = nameRe.exec(line)) !== null) { + _loop_2(); + } } - } - forEachInlineChild(params, "text", forToken); - if (includeCodeBlocks) { - forEachInlineChild(params, "code_inline", forToken); - filterTokens(params, "code_block", forToken); - filterTokens(params, "fence", forToken); - } - }); + }); + }; + for (var _i = 0, names_1 = names; _i < names_1.length; _i++) { + var name_1 = names_1[_i]; + _loop_1(name_1); + } } }; diff --git a/lib/md044.js b/lib/md044.js index 41778e8c..d6f1187a 100644 --- a/lib/md044.js +++ b/lib/md044.js @@ -2,11 +2,9 @@ "use strict"; -const { addErrorDetailIf, bareUrlRe, escapeForRegExp, filterTokens, - forEachInlineChild, newLineRe } = require("../helpers"); - -const startNonWordRe = /^\W/; -const endNonWordRe = /\W$/; +const { addErrorDetailIf, bareUrlRe, escapeForRegExp, forEachLine, newLineRe, + forEachInlineCodeSpan } = require("../helpers"); +const { lineMetadata } = require("./cache"); module.exports = { "names": [ "MD044", "proper-names" ], @@ -15,80 +13,66 @@ module.exports = { "function": function MD044(params, onError) { let names = params.config.names; names = Array.isArray(names) ? names : []; + names.sort((a, b) => (b.length - a.length) || a.localeCompare(b)); const codeBlocks = params.config.code_blocks; const includeCodeBlocks = (codeBlocks === undefined) ? true : !!codeBlocks; - // Text of automatic hyperlinks is implicitly a URL - const autolinkText = new Set(); - filterTokens(params, "inline", (token) => { - let inAutoLink = false; - token.children.forEach((child) => { - const { info, type } = child; - if ((type === "link_open") && (info === "auto")) { - inAutoLink = true; - } else if (type === "link_close") { - inAutoLink = false; - } else if ((type === "text") && inAutoLink) { - autolinkText.add(child); + const exclusions = []; + if (!includeCodeBlocks) { + forEachInlineCodeSpan( + params.lines.join("\n"), + (code, lineIndex, columnIndex) => { + const codeLines = code.split(newLineRe); + // eslint-disable-next-line unicorn/no-for-loop + for (let i = 0; i < codeLines.length; i++) { + exclusions.push( + [ lineIndex + i, columnIndex, codeLines[i].length ] + ); + columnIndex = 0; + } + } + ); + } + for (const name of names) { + const escapedName = escapeForRegExp(name); + const startNamePattern = /^\W/.test(name) ? "" : "[^\\s([\"]*\\b_*"; + const endNamePattern = /\W$/.test(name) ? "" : "_*\\b[^\\s)\\]\"]*"; + const namePattern = + `(${startNamePattern})(${escapedName})${endNamePattern}`; + const nameRe = new RegExp(namePattern, "gi"); + forEachLine(lineMetadata(), (line, lineIndex, inCode, onFence) => { + if (includeCodeBlocks || (!inCode && !onFence)) { + let match = null; + while ((match = nameRe.exec(line)) !== null) { + const [ fullMatch, leftMatch, nameMatch ] = match; + const index = match.index + leftMatch.length; + const length = nameMatch.length; + if ( + (fullMatch.search(bareUrlRe) === -1) && + exclusions.every((span) => ( + (lineIndex !== span[0]) || + (index + length < span[1]) || + (index > span[1] + span[2]) + )) + ) { + addErrorDetailIf( + onError, + lineIndex + 1, + name, + nameMatch, + null, + null, + [ index + 1, length ], + { + "editColumn": index + 1, + "deleteCount": length, + "insertText": name + } + ); + } + exclusions.push([ lineIndex, index, length ]); + } } }); - }); - // For each proper name... - names.forEach((name) => { - const escapedName = escapeForRegExp(name); - const startNamePattern = startNonWordRe.test(name) ? "" : "\\S*\\b"; - const endNamePattern = endNonWordRe.test(name) ? "" : "\\b\\S*"; - const namePattern = - `(${startNamePattern})(${escapedName})(${endNamePattern})`; - const anyNameRe = new RegExp(namePattern, "gi"); - // eslint-disable-next-line jsdoc/require-jsdoc - function forToken(token) { - if (!autolinkText.has(token)) { - const fenceOffset = (token.type === "fence") ? 1 : 0; - token.content.split(newLineRe).forEach((line, index) => { - let match = null; - while ((match = anyNameRe.exec(line)) !== null) { - const [ fullMatch, leftMatch, nameMatch, rightMatch ] = match; - if (fullMatch.search(bareUrlRe) === -1) { - const wordMatch = fullMatch - .replace(new RegExp(`^\\W{0,${leftMatch.length}}`), "") - .replace(new RegExp(`\\W{0,${rightMatch.length}}$`), ""); - if (!names.includes(wordMatch)) { - const lineNumber = token.lineNumber + index + fenceOffset; - const fullLine = params.lines[lineNumber - 1]; - const matchLength = wordMatch.length; - const matchIndex = fullLine.indexOf(wordMatch); - const range = (matchIndex === -1) ? - null : - [ matchIndex + 1, matchLength ]; - const fixInfo = (matchIndex === -1) ? - null : - { - "editColumn": matchIndex + 1, - "deleteCount": matchLength, - "insertText": name - }; - addErrorDetailIf( - onError, - lineNumber, - name, - nameMatch, - null, - null, - range, - fixInfo - ); - } - } - } - }); - } - } - forEachInlineChild(params, "text", forToken); - if (includeCodeBlocks) { - forEachInlineChild(params, "code_inline", forToken); - filterTokens(params, "code_block", forToken); - filterTokens(params, "fence", forToken); - } - }); + } } }; diff --git a/test/markdownlint-test-scenarios.js b/test/markdownlint-test-scenarios.js index 98beeaba..1254813e 100644 --- a/test/markdownlint-test-scenarios.js +++ b/test/markdownlint-test-scenarios.js @@ -29,6 +29,7 @@ function createTestForFile(file) { .then( function configFileExists() { return fs.promises.readFile(configFile, "utf8") + // @ts-ignore .then(JSON.parse); }, function noConfigFile() { diff --git a/test/proper-names-no-code.md b/test/proper-names-no-code.md index 66a0dc3a..488738d0 100644 --- a/test/proper-names-no-code.md +++ b/test/proper-names-no-code.md @@ -22,4 +22,22 @@ node.js is runtime A short paragraph about node.js and {MD044} -javascript. {MD044} +also javascript. {MD044} + +`javascript` + +`code +javascript` + +`code +javascript +code` + +`javascript +code` + +text JavaScript text `javascript` text JavaScript text +text `javascript` text JavaScript text `javascript` text + +text javascript text `javascript` text {MD044} +text `javascript` text javascript text {MD044} diff --git a/test/proper-names-projects.json b/test/proper-names-projects.json index 5b6fbc7f..71daf346 100644 --- a/test/proper-names-projects.json +++ b/test/proper-names-projects.json @@ -6,7 +6,6 @@ "github.com", "github.com/about", "npm", - "NPM", "Vue", "Vuex", "vue-router" diff --git a/test/proper-names-projects.md b/test/proper-names-projects.md index 6e304bf2..0360e14c 100644 --- a/test/proper-names-projects.md +++ b/test/proper-names-projects.md @@ -32,14 +32,16 @@ The library vue-router Not Vue-router {MD044} -Or vue-router-extra {MD044} +But vue-router-extra is different -Or extra-vue-router {MD044} +As is extra-vue-router Quoted "Vue" and "vue-router" Emphasized *Vue* and *vue-router* +Underscored _Vue_ and _vue-router_ + Call it npm -Or NPM But not Npm {MD044} +Or NPM {MD044} diff --git a/test/proper-names.md b/test/proper-names.md index b4ca9f03..57f5a7c2 100644 --- a/test/proper-names.md +++ b/test/proper-names.md @@ -72,12 +72,12 @@ javascript. {MD044} {MD044} `javascript` -{MD044} `code -javascript` +`code +javascript` {MD044} -{MD044} `code -javascript +`code +javascript {MD044} code` -{MD044} `javascript +`javascript {MD044} code`