From c4e236b858f1d4d54ec43b632835447a53fd32be Mon Sep 17 00:00:00 2001 From: David Anson Date: Sat, 30 Jan 2021 13:08:57 -0800 Subject: [PATCH] Update clearHtmlCommentText helper to match CommonMark (instead of HTML) specification (refs #361). --- .eslintrc.json | 1 + demo/markdownlint-browser.js | 49 +++++++++++++++-------- helpers/helpers.js | 51 +++++++++++++++--------- test/html-comments.md | 65 +++++++++++++++++++++++++++++++ test/ignore-comments.md | 4 +- test/markdownlint-test-helpers.js | 26 +++++++------ 6 files changed, 148 insertions(+), 48 deletions(-) create mode 100644 test/html-comments.md diff --git a/.eslintrc.json b/.eslintrc.json index 3c48fda6..84a0eebc 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -36,6 +36,7 @@ "id-length": "off", "indent": ["error", 2, { "SwitchCase": 1 }], "linebreak-style": "off", + "max-depth": "off", "max-lines": "off", "max-lines-per-function": "off", "max-params": ["error", 10], diff --git a/demo/markdownlint-browser.js b/demo/markdownlint-browser.js index 2a928148..a5feca2f 100644 --- a/demo/markdownlint-browser.js +++ b/demo/markdownlint-browser.js @@ -115,31 +115,48 @@ module.exports.includesSorted = function includesSorted(array, element) { // Replaces the text of all properly-formatted HTML comments with whitespace // This preserves the line/column information for the rest of the document // Trailing whitespace is avoided with a '\' character in the last column -// See https://www.w3.org/TR/html5/syntax.html#comments for details +// https://spec.commonmark.org/0.29/#html-blocks +// https://spec.commonmark.org/0.29/#html-comment var htmlCommentBegin = ""; module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) { var i = 0; while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) { - var j = text.indexOf(htmlCommentEnd, i); + var j = text.indexOf(htmlCommentEnd, i + 2); if (j === -1) { // Un-terminated comments are treated as text break; } - var comment = text.slice(i + htmlCommentBegin.length, j); - if ((comment.length > 0) && - !comment.startsWith(">") && - !comment.startsWith("->") && - !comment.endsWith("") && - !comment.includes("--!>") && - (text.slice(i, j + htmlCommentEnd.length).search(inlineCommentRe) === -1)) { - var blanks = comment - .replace(/[^\r\n]/g, " ") - .replace(/ ([\r\n])/g, "\\$1"); - text = text.slice(0, i + htmlCommentBegin.length) + - blanks + text.slice(j); + // If the comment has content... + if (j > i + htmlCommentBegin.length) { + var k = i - 1; + while (text[k] === " ") { + k--; + } + // If comment is not within an indented code block... + if (k >= i - 4) { + var content = text.slice(i + htmlCommentBegin.length, j); + var isBlock = (k < 0) || (text[k] === "\n"); + var isValid = isBlock || + (!content.startsWith(">") && !content.startsWith("->") && + !content.endsWith("-") && !content.includes("--")); + // If a valid block/inline comment... + if (isValid) { + var inlineCommentIndex = text + .slice(i, j + htmlCommentEnd.length) + .search(inlineCommentRe); + // If not a markdownlint inline directive... + if (inlineCommentIndex === -1) { + var blanks = content + .replace(/[^\r\n]/g, " ") + .replace(/ ([\r\n])/g, "\\$1"); + text = + text.slice(0, i + htmlCommentBegin.length) + + blanks + + text.slice(j); + } + } + } } i = j + htmlCommentEnd.length; } diff --git a/helpers/helpers.js b/helpers/helpers.js index 50ee54c8..85c64eb8 100644 --- a/helpers/helpers.js +++ b/helpers/helpers.js @@ -102,33 +102,48 @@ module.exports.includesSorted = function includesSorted(array, element) { // Replaces the text of all properly-formatted HTML comments with whitespace // This preserves the line/column information for the rest of the document // Trailing whitespace is avoided with a '\' character in the last column -// See https://www.w3.org/TR/html5/syntax.html#comments for details +// https://spec.commonmark.org/0.29/#html-blocks +// https://spec.commonmark.org/0.29/#html-comment const htmlCommentBegin = ""; module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) { let i = 0; while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) { - const j = text.indexOf(htmlCommentEnd, i); + const j = text.indexOf(htmlCommentEnd, i + 2); if (j === -1) { // Un-terminated comments are treated as text break; } - const comment = text.slice(i + htmlCommentBegin.length, j); - if ( - (comment.length > 0) && - !comment.startsWith(">") && - !comment.startsWith("->") && - !comment.endsWith("") && - !comment.includes("--!>") && - (text.slice(i, j + htmlCommentEnd.length).search(inlineCommentRe) === -1) - ) { - const blanks = comment - .replace(/[^\r\n]/g, " ") - .replace(/ ([\r\n])/g, "\\$1"); - text = text.slice(0, i + htmlCommentBegin.length) + - blanks + text.slice(j); + // If the comment has content... + if (j > i + htmlCommentBegin.length) { + let k = i - 1; + while (text[k] === " ") { + k--; + } + // If comment is not within an indented code block... + if (k >= i - 4) { + const content = text.slice(i + htmlCommentBegin.length, j); + const isBlock = (k < 0) || (text[k] === "\n"); + const isValid = isBlock || + (!content.startsWith(">") && !content.startsWith("->") && + !content.endsWith("-") && !content.includes("--")); + // If a valid block/inline comment... + if (isValid) { + const inlineCommentIndex = text + .slice(i, j + htmlCommentEnd.length) + .search(inlineCommentRe); + // If not a markdownlint inline directive... + if (inlineCommentIndex === -1) { + const blanks = content + .replace(/[^\r\n]/g, " ") + .replace(/ ([\r\n])/g, "\\$1"); + text = + text.slice(0, i + htmlCommentBegin.length) + + blanks + + text.slice(j); + } + } + } } i = j + htmlCommentEnd.length; } diff --git a/test/html-comments.md b/test/html-comments.md new file mode 100644 index 00000000..83b0ffd6 --- /dev/null +++ b/test/html-comments.md @@ -0,0 +1,65 @@ +# HTML Comments + +## Block Comments + + + + + +text + + + + + + + + + + *{MD037} * --> + + *{MD037} * --> + + + + + + + +## Inline Comments + + + +t + +ttext + + t + + t + +t + +t + +t *{MD037} * --> + +t *{MD037} * --> + +t + +t + +t + +## Notes + +It's important that the rule used above is one that calls +`helpers.forEachLine` so `markdown-it` doesn't ignore any +incorrectly-remaining comment blocks. diff --git a/test/ignore-comments.md b/test/ignore-comments.md index e4388cea..fac33e00 100644 --- a/test/ignore-comments.md +++ b/test/ignore-comments.md @@ -18,11 +18,11 @@ Hard tab Hard tab --> -comment Hard tab {MD010} ---> +--> text Text diff --git a/test/markdownlint-test-helpers.js b/test/markdownlint-test-helpers.js index 29790cc3..8190a08b 100644 --- a/test/markdownlint-test-helpers.js +++ b/test/markdownlint-test-helpers.js @@ -9,6 +9,12 @@ const helpers = require("../helpers"); test("clearHtmlCommentTextValid", (t) => { t.plan(1); const validComments = [ + "", + "", + "", + "", + " ", + " ", "", "", "", @@ -47,6 +53,12 @@ test("clearHtmlCommentTextValid", (t) => { "text" ]; const validResult = [ + "", + "", + "", + "", + " ", + " ", "", "", "", @@ -100,22 +112,12 @@ test("clearHtmlCommentTextInvalid", (t) => { "text-->", "text-->", "", - // Restrictions from specification "-->", "t-->", "-->", "t-->", - "", - "", - "", - // "t-->", - // "-->", - // "t-->", - "t-->", - "-->", - "t-->", - "", - "" + "t-->", + " " ]; const actual = helpers.clearHtmlCommentText(invalidComments.join("\n")); const expected = invalidComments.join("\n");