Update clearHtmlCommentText helper to match CommonMark (instead of HTML) specification (refs #361).

This commit is contained in:
David Anson 2021-01-30 13:08:57 -08:00
parent 6f39df1417
commit c4e236b858
6 changed files with 148 additions and 48 deletions

View file

@ -36,6 +36,7 @@
"id-length": "off",
"indent": ["error", 2, { "SwitchCase": 1 }],
"linebreak-style": "off",
"max-depth": "off",
"max-lines": "off",
"max-lines-per-function": "off",
"max-params": ["error", 10],

View file

@ -115,31 +115,48 @@ module.exports.includesSorted = function includesSorted(array, element) {
// Replaces the text of all properly-formatted HTML comments with whitespace
// This preserves the line/column information for the rest of the document
// Trailing whitespace is avoided with a '\' character in the last column
// See https://www.w3.org/TR/html5/syntax.html#comments for details
// https://spec.commonmark.org/0.29/#html-blocks
// https://spec.commonmark.org/0.29/#html-comment
var htmlCommentBegin = "<!--";
var htmlCommentEnd = "-->";
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
var i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
var j = text.indexOf(htmlCommentEnd, i);
var j = text.indexOf(htmlCommentEnd, i + 2);
if (j === -1) {
// Un-terminated comments are treated as text
break;
}
var comment = text.slice(i + htmlCommentBegin.length, j);
if ((comment.length > 0) &&
!comment.startsWith(">") &&
!comment.startsWith("->") &&
!comment.endsWith("<!-") &&
!comment.includes("<!--") &&
// !comment.includes("-->") &&
!comment.includes("--!>") &&
(text.slice(i, j + htmlCommentEnd.length).search(inlineCommentRe) === -1)) {
var blanks = comment
.replace(/[^\r\n]/g, " ")
.replace(/ ([\r\n])/g, "\\$1");
text = text.slice(0, i + htmlCommentBegin.length) +
blanks + text.slice(j);
// If the comment has content...
if (j > i + htmlCommentBegin.length) {
var k = i - 1;
while (text[k] === " ") {
k--;
}
// If comment is not within an indented code block...
if (k >= i - 4) {
var content = text.slice(i + htmlCommentBegin.length, j);
var isBlock = (k < 0) || (text[k] === "\n");
var isValid = isBlock ||
(!content.startsWith(">") && !content.startsWith("->") &&
!content.endsWith("-") && !content.includes("--"));
// If a valid block/inline comment...
if (isValid) {
var inlineCommentIndex = text
.slice(i, j + htmlCommentEnd.length)
.search(inlineCommentRe);
// If not a markdownlint inline directive...
if (inlineCommentIndex === -1) {
var blanks = content
.replace(/[^\r\n]/g, " ")
.replace(/ ([\r\n])/g, "\\$1");
text =
text.slice(0, i + htmlCommentBegin.length) +
blanks +
text.slice(j);
}
}
}
}
i = j + htmlCommentEnd.length;
}

View file

@ -102,33 +102,48 @@ module.exports.includesSorted = function includesSorted(array, element) {
// Replaces the text of all properly-formatted HTML comments with whitespace
// This preserves the line/column information for the rest of the document
// Trailing whitespace is avoided with a '\' character in the last column
// See https://www.w3.org/TR/html5/syntax.html#comments for details
// https://spec.commonmark.org/0.29/#html-blocks
// https://spec.commonmark.org/0.29/#html-comment
const htmlCommentBegin = "<!--";
const htmlCommentEnd = "-->";
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
let i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
const j = text.indexOf(htmlCommentEnd, i);
const j = text.indexOf(htmlCommentEnd, i + 2);
if (j === -1) {
// Un-terminated comments are treated as text
break;
}
const comment = text.slice(i + htmlCommentBegin.length, j);
if (
(comment.length > 0) &&
!comment.startsWith(">") &&
!comment.startsWith("->") &&
!comment.endsWith("<!-") &&
!comment.includes("<!--") &&
// !comment.includes("-->") &&
!comment.includes("--!>") &&
(text.slice(i, j + htmlCommentEnd.length).search(inlineCommentRe) === -1)
) {
const blanks = comment
.replace(/[^\r\n]/g, " ")
.replace(/ ([\r\n])/g, "\\$1");
text = text.slice(0, i + htmlCommentBegin.length) +
blanks + text.slice(j);
// If the comment has content...
if (j > i + htmlCommentBegin.length) {
let k = i - 1;
while (text[k] === " ") {
k--;
}
// If comment is not within an indented code block...
if (k >= i - 4) {
const content = text.slice(i + htmlCommentBegin.length, j);
const isBlock = (k < 0) || (text[k] === "\n");
const isValid = isBlock ||
(!content.startsWith(">") && !content.startsWith("->") &&
!content.endsWith("-") && !content.includes("--"));
// If a valid block/inline comment...
if (isValid) {
const inlineCommentIndex = text
.slice(i, j + htmlCommentEnd.length)
.search(inlineCommentRe);
// If not a markdownlint inline directive...
if (inlineCommentIndex === -1) {
const blanks = content
.replace(/[^\r\n]/g, " ")
.replace(/ ([\r\n])/g, "\\$1");
text =
text.slice(0, i + htmlCommentBegin.length) +
blanks +
text.slice(j);
}
}
}
}
i = j + htmlCommentEnd.length;
}

65
test/html-comments.md Normal file
View file

@ -0,0 +1,65 @@
# HTML Comments
## Block Comments
<https://spec.commonmark.org/0.29/#html-blocks>
<!-- *comment * -->
<!-- *comment * -->text
<!-- *comment * -->
<!-- *code * -->
<!-- *comment *
*comment * -->
<!-- *comment *
*comment *
*comment * -->
<!--> *{MD037} * -->
<!---> *{MD037} * -->
<!-- *comment * --->
<!-- -- *comment * -->
<!-- *comment * -- -->
## Inline Comments
<https://spec.commonmark.org/0.29/#html-comment>
t<!-- *comment * -->
t<!-- *comment * -->text
t<!-- *comment * -->
t<!-- *code * -->
t<!-- *comment *
*comment * -->
t<!-- *comment *
*comment *
*comment * -->
t<!--> *{MD037} * -->
t<!---> *{MD037} * -->
t<!-- *{MD037} * --->
t<!-- -- *{MD037} * -->
t<!-- *{MD037} * -- -->
## Notes
It's important that the rule used above is one that calls
`helpers.forEachLine` so `markdown-it` doesn't ignore any
incorrectly-remaining comment blocks.

View file

@ -18,11 +18,11 @@ Hard tab
Hard tab
-->
<!--
Text <!--
Hard tab {MD010}
Invalid--!>comment
Hard tab {MD010}
-->
--> text
Te<!-- Hard tab -->xt

View file

@ -9,6 +9,12 @@ const helpers = require("../helpers");
test("clearHtmlCommentTextValid", (t) => {
t.plan(1);
const validComments = [
"<!-->",
"<!--->",
"<!---->",
"<!-- comment -->",
" <!-- comment -->",
" <!-- comment -->",
"<!-- text -->",
"<!--text-->",
"<!-- -->",
@ -47,6 +53,12 @@ test("clearHtmlCommentTextValid", (t) => {
"text"
];
const validResult = [
"<!-->",
"<!--->",
"<!---->",
"<!-- -->",
" <!-- -->",
" <!-- -->",
"<!-- -->",
"<!-- -->",
"<!-- -->",
@ -100,22 +112,12 @@ test("clearHtmlCommentTextInvalid", (t) => {
"<!-->text-->",
"<!--->text-->",
"<!---->",
// Restrictions from specification
"<!-->-->",
"<!-->t-->",
"<!--->-->",
"<!--->t-->",
"<!--<!--t-->",
"<!--t<!---->",
"<!--t<!--t-->",
// "<!---->t-->",
// "<!--t-->-->",
// "<!--t-->t-->",
"<!----!>t-->",
"<!--t--!>-->",
"<!--t--!>t-->",
"<!--<!--->",
"<!--t<!--->"
"<!---->t-->",
" <!-- indented code block -->"
];
const actual = helpers.clearHtmlCommentText(invalidComments.join("\n"));
const expected = invalidComments.join("\n");