Update clearHtmlCommentText helper to match CommonMark (instead of HTML) specification (refs #361).

This commit is contained in:
David Anson 2021-01-30 13:08:57 -08:00
parent 6f39df1417
commit c4e236b858
6 changed files with 148 additions and 48 deletions

View file

@ -36,6 +36,7 @@
"id-length": "off", "id-length": "off",
"indent": ["error", 2, { "SwitchCase": 1 }], "indent": ["error", 2, { "SwitchCase": 1 }],
"linebreak-style": "off", "linebreak-style": "off",
"max-depth": "off",
"max-lines": "off", "max-lines": "off",
"max-lines-per-function": "off", "max-lines-per-function": "off",
"max-params": ["error", 10], "max-params": ["error", 10],

View file

@ -115,31 +115,48 @@ module.exports.includesSorted = function includesSorted(array, element) {
// Replaces the text of all properly-formatted HTML comments with whitespace // Replaces the text of all properly-formatted HTML comments with whitespace
// This preserves the line/column information for the rest of the document // This preserves the line/column information for the rest of the document
// Trailing whitespace is avoided with a '\' character in the last column // Trailing whitespace is avoided with a '\' character in the last column
// See https://www.w3.org/TR/html5/syntax.html#comments for details // https://spec.commonmark.org/0.29/#html-blocks
// https://spec.commonmark.org/0.29/#html-comment
var htmlCommentBegin = "<!--"; var htmlCommentBegin = "<!--";
var htmlCommentEnd = "-->"; var htmlCommentEnd = "-->";
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) { module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
var i = 0; var i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) { while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
var j = text.indexOf(htmlCommentEnd, i); var j = text.indexOf(htmlCommentEnd, i + 2);
if (j === -1) { if (j === -1) {
// Un-terminated comments are treated as text // Un-terminated comments are treated as text
break; break;
} }
var comment = text.slice(i + htmlCommentBegin.length, j); // If the comment has content...
if ((comment.length > 0) && if (j > i + htmlCommentBegin.length) {
!comment.startsWith(">") && var k = i - 1;
!comment.startsWith("->") && while (text[k] === " ") {
!comment.endsWith("<!-") && k--;
!comment.includes("<!--") && }
// !comment.includes("-->") && // If comment is not within an indented code block...
!comment.includes("--!>") && if (k >= i - 4) {
(text.slice(i, j + htmlCommentEnd.length).search(inlineCommentRe) === -1)) { var content = text.slice(i + htmlCommentBegin.length, j);
var blanks = comment var isBlock = (k < 0) || (text[k] === "\n");
var isValid = isBlock ||
(!content.startsWith(">") && !content.startsWith("->") &&
!content.endsWith("-") && !content.includes("--"));
// If a valid block/inline comment...
if (isValid) {
var inlineCommentIndex = text
.slice(i, j + htmlCommentEnd.length)
.search(inlineCommentRe);
// If not a markdownlint inline directive...
if (inlineCommentIndex === -1) {
var blanks = content
.replace(/[^\r\n]/g, " ") .replace(/[^\r\n]/g, " ")
.replace(/ ([\r\n])/g, "\\$1"); .replace(/ ([\r\n])/g, "\\$1");
text = text.slice(0, i + htmlCommentBegin.length) + text =
blanks + text.slice(j); text.slice(0, i + htmlCommentBegin.length) +
blanks +
text.slice(j);
}
}
}
} }
i = j + htmlCommentEnd.length; i = j + htmlCommentEnd.length;
} }

View file

@ -102,33 +102,48 @@ module.exports.includesSorted = function includesSorted(array, element) {
// Replaces the text of all properly-formatted HTML comments with whitespace // Replaces the text of all properly-formatted HTML comments with whitespace
// This preserves the line/column information for the rest of the document // This preserves the line/column information for the rest of the document
// Trailing whitespace is avoided with a '\' character in the last column // Trailing whitespace is avoided with a '\' character in the last column
// See https://www.w3.org/TR/html5/syntax.html#comments for details // https://spec.commonmark.org/0.29/#html-blocks
// https://spec.commonmark.org/0.29/#html-comment
const htmlCommentBegin = "<!--"; const htmlCommentBegin = "<!--";
const htmlCommentEnd = "-->"; const htmlCommentEnd = "-->";
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) { module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
let i = 0; let i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) { while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
const j = text.indexOf(htmlCommentEnd, i); const j = text.indexOf(htmlCommentEnd, i + 2);
if (j === -1) { if (j === -1) {
// Un-terminated comments are treated as text // Un-terminated comments are treated as text
break; break;
} }
const comment = text.slice(i + htmlCommentBegin.length, j); // If the comment has content...
if ( if (j > i + htmlCommentBegin.length) {
(comment.length > 0) && let k = i - 1;
!comment.startsWith(">") && while (text[k] === " ") {
!comment.startsWith("->") && k--;
!comment.endsWith("<!-") && }
!comment.includes("<!--") && // If comment is not within an indented code block...
// !comment.includes("-->") && if (k >= i - 4) {
!comment.includes("--!>") && const content = text.slice(i + htmlCommentBegin.length, j);
(text.slice(i, j + htmlCommentEnd.length).search(inlineCommentRe) === -1) const isBlock = (k < 0) || (text[k] === "\n");
) { const isValid = isBlock ||
const blanks = comment (!content.startsWith(">") && !content.startsWith("->") &&
!content.endsWith("-") && !content.includes("--"));
// If a valid block/inline comment...
if (isValid) {
const inlineCommentIndex = text
.slice(i, j + htmlCommentEnd.length)
.search(inlineCommentRe);
// If not a markdownlint inline directive...
if (inlineCommentIndex === -1) {
const blanks = content
.replace(/[^\r\n]/g, " ") .replace(/[^\r\n]/g, " ")
.replace(/ ([\r\n])/g, "\\$1"); .replace(/ ([\r\n])/g, "\\$1");
text = text.slice(0, i + htmlCommentBegin.length) + text =
blanks + text.slice(j); text.slice(0, i + htmlCommentBegin.length) +
blanks +
text.slice(j);
}
}
}
} }
i = j + htmlCommentEnd.length; i = j + htmlCommentEnd.length;
} }

65
test/html-comments.md Normal file
View file

@ -0,0 +1,65 @@
# HTML Comments
## Block Comments
<https://spec.commonmark.org/0.29/#html-blocks>
<!-- *comment * -->
<!-- *comment * -->text
<!-- *comment * -->
<!-- *code * -->
<!-- *comment *
*comment * -->
<!-- *comment *
*comment *
*comment * -->
<!--> *{MD037} * -->
<!---> *{MD037} * -->
<!-- *comment * --->
<!-- -- *comment * -->
<!-- *comment * -- -->
## Inline Comments
<https://spec.commonmark.org/0.29/#html-comment>
t<!-- *comment * -->
t<!-- *comment * -->text
t<!-- *comment * -->
t<!-- *code * -->
t<!-- *comment *
*comment * -->
t<!-- *comment *
*comment *
*comment * -->
t<!--> *{MD037} * -->
t<!---> *{MD037} * -->
t<!-- *{MD037} * --->
t<!-- -- *{MD037} * -->
t<!-- *{MD037} * -- -->
## Notes
It's important that the rule used above is one that calls
`helpers.forEachLine` so `markdown-it` doesn't ignore any
incorrectly-remaining comment blocks.

View file

@ -18,11 +18,11 @@ Hard tab
Hard tab Hard tab
--> -->
<!-- Text <!--
Hard tab {MD010} Hard tab {MD010}
Invalid--!>comment Invalid--!>comment
Hard tab {MD010} Hard tab {MD010}
--> --> text
Te<!-- Hard tab -->xt Te<!-- Hard tab -->xt

View file

@ -9,6 +9,12 @@ const helpers = require("../helpers");
test("clearHtmlCommentTextValid", (t) => { test("clearHtmlCommentTextValid", (t) => {
t.plan(1); t.plan(1);
const validComments = [ const validComments = [
"<!-->",
"<!--->",
"<!---->",
"<!-- comment -->",
" <!-- comment -->",
" <!-- comment -->",
"<!-- text -->", "<!-- text -->",
"<!--text-->", "<!--text-->",
"<!-- -->", "<!-- -->",
@ -47,6 +53,12 @@ test("clearHtmlCommentTextValid", (t) => {
"text" "text"
]; ];
const validResult = [ const validResult = [
"<!-->",
"<!--->",
"<!---->",
"<!-- -->",
" <!-- -->",
" <!-- -->",
"<!-- -->", "<!-- -->",
"<!-- -->", "<!-- -->",
"<!-- -->", "<!-- -->",
@ -100,22 +112,12 @@ test("clearHtmlCommentTextInvalid", (t) => {
"<!-->text-->", "<!-->text-->",
"<!--->text-->", "<!--->text-->",
"<!---->", "<!---->",
// Restrictions from specification
"<!-->-->", "<!-->-->",
"<!-->t-->", "<!-->t-->",
"<!--->-->", "<!--->-->",
"<!--->t-->", "<!--->t-->",
"<!--<!--t-->", "<!---->t-->",
"<!--t<!---->", " <!-- indented code block -->"
"<!--t<!--t-->",
// "<!---->t-->",
// "<!--t-->-->",
// "<!--t-->t-->",
"<!----!>t-->",
"<!--t--!>-->",
"<!--t--!>t-->",
"<!--<!--->",
"<!--t<!--->"
]; ];
const actual = helpers.clearHtmlCommentText(invalidComments.join("\n")); const actual = helpers.clearHtmlCommentText(invalidComments.join("\n"));
const expected = invalidComments.join("\n"); const expected = invalidComments.join("\n");