Change HTML comment content sanitization to avoid breaking list item indent, respect table cell content rules (fixes #563).

This commit is contained in:
David Anson 2022-10-14 20:59:42 -07:00
parent 7f8962c882
commit f0bb4c639b
8 changed files with 593 additions and 63 deletions

View file

@ -154,6 +154,12 @@ module.exports.includesSorted = function includesSorted(array, element) {
// https://spec.commonmark.org/0.29/#html-comment // https://spec.commonmark.org/0.29/#html-comment
const htmlCommentBegin = "<!--"; const htmlCommentBegin = "<!--";
const htmlCommentEnd = "-->"; const htmlCommentEnd = "-->";
const safeCommentCharacter = ".";
const startsWithPipeRe = /^ *\|/;
const notCrLfRe = /[^\r\n]/g;
const notSpaceCrLfRe = /[^ \r\n]/g;
const trailingSpaceRe = / +[\r\n]/g;
const replaceTrailingSpace = (s) => s.replace(notCrLfRe, safeCommentCharacter);
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) { module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
let i = 0; let i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) { while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
@ -164,24 +170,27 @@ module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
} }
// If the comment has content... // If the comment has content...
if (j > i + htmlCommentBegin.length) { if (j > i + htmlCommentBegin.length) {
let k = i - 1; const content = text.slice(i + htmlCommentBegin.length, j);
while (text[k] === " ") { const lastLf = text.lastIndexOf("\n", i) + 1;
k--; const preText = text.slice(lastLf, i);
} const isBlock = preText.trim().length === 0;
// If comment is not within an indented code block... const couldBeTable = startsWithPipeRe.test(preText);
if (k >= i - 4) { const spansTableCells = couldBeTable && content.includes("\n");
const content = text.slice(i + htmlCommentBegin.length, j); const isValid = isBlock ||
const isBlock = (k < 0) || (text[k] === "\n"); !(spansTableCells ||
const isValid = isBlock || content.startsWith(">") ||
(!content.startsWith(">") && !content.startsWith("->") && content.startsWith("->") ||
!content.endsWith("-") && !content.includes("--")); content.endsWith("-") ||
// If a valid block/inline comment... content.includes("--"));
if (isValid) { // If a valid block/inline comment...
text = if (isValid) {
text.slice(0, i + htmlCommentBegin.length) + const clearedContent = content
content.replace(/[^\r\n]/g, ".") + .replace(notSpaceCrLfRe, safeCommentCharacter)
text.slice(j); .replace(trailingSpaceRe, replaceTrailingSpace);
} text =
text.slice(0, i + htmlCommentBegin.length) +
clearedContent +
text.slice(j);
} }
} }
i = j + htmlCommentEnd.length; i = j + htmlCommentEnd.length;

View file

@ -142,6 +142,12 @@ module.exports.includesSorted = function includesSorted(array, element) {
// https://spec.commonmark.org/0.29/#html-comment // https://spec.commonmark.org/0.29/#html-comment
const htmlCommentBegin = "<!--"; const htmlCommentBegin = "<!--";
const htmlCommentEnd = "-->"; const htmlCommentEnd = "-->";
const safeCommentCharacter = ".";
const startsWithPipeRe = /^ *\|/;
const notCrLfRe = /[^\r\n]/g;
const notSpaceCrLfRe = /[^ \r\n]/g;
const trailingSpaceRe = / +[\r\n]/g;
const replaceTrailingSpace = (s) => s.replace(notCrLfRe, safeCommentCharacter);
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) { module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
let i = 0; let i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) { while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
@ -152,24 +158,30 @@ module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
} }
// If the comment has content... // If the comment has content...
if (j > i + htmlCommentBegin.length) { if (j > i + htmlCommentBegin.length) {
let k = i - 1; const content = text.slice(i + htmlCommentBegin.length, j);
while (text[k] === " ") { const lastLf = text.lastIndexOf("\n", i) + 1;
k--; const preText = text.slice(lastLf, i);
} const isBlock = preText.trim().length === 0;
// If comment is not within an indented code block... const couldBeTable = startsWithPipeRe.test(preText);
if (k >= i - 4) { const spansTableCells = couldBeTable && content.includes("\n");
const content = text.slice(i + htmlCommentBegin.length, j); const isValid =
const isBlock = (k < 0) || (text[k] === "\n"); isBlock ||
const isValid = isBlock || !(
(!content.startsWith(">") && !content.startsWith("->") && spansTableCells ||
!content.endsWith("-") && !content.includes("--")); content.startsWith(">") ||
// If a valid block/inline comment... content.startsWith("->") ||
if (isValid) { content.endsWith("-") ||
text = content.includes("--")
text.slice(0, i + htmlCommentBegin.length) + );
content.replace(/[^\r\n]/g, ".") + // If a valid block/inline comment...
text.slice(j); if (isValid) {
} const clearedContent = content
.replace(notSpaceCrLfRe, safeCommentCharacter)
.replace(trailingSpaceRe, replaceTrailingSpace);
text =
text.slice(0, i + htmlCommentBegin.length) +
clearedContent +
text.slice(j);
} }
} }
i = j + htmlCommentEnd.length; i = j + htmlCommentEnd.length;

View file

@ -0,0 +1,43 @@
# HTML Comment in Code and Table
`{MD038} `
<!-- `ignored ` -->
```text
<!-- `ignored ` -->
```
<!-- `ignored ` -->
| Table |
|------------|
| `{MD038} ` |
* item
`{MD038} `
* item
<!-- `ignored ` -->
* item
```text
<!-- `ignored ` -->
```
* item
<!-- `ignored ` -->
* item
| Table |
|------------|
| `{MD038} ` |
<!-- markdownlint-configure-file {
"code-block-style": false
} -->

View file

@ -0,0 +1,55 @@
# HTML Comment in List Item
- item
<!--
-->
- item
x<!--
-->
- item
<!--
-->x
- item
x<!--
x
-->x
- item
```html
<!--
-->
```
- item
```html
x<!--
-->
```
- item
```html
<!--
-->x
```
- item
```html
x<!--
x
-->x
```
- item
Placeholder issue {MD047}

View file

@ -7,11 +7,47 @@
| Table | | Table |
|-------| |-------|
| <!-- | | <!-- |
|comment| | cell |
| --> | | --> |
| Table | | Table |
|-------| |------------|
| <!-- \ | <!-- |
\ | `{MD038} ` |
--> | | --> |
| Table |
|----------------|
| <!--
`{MD038} ` --> |
| cell |
| Table |
|----------------|
| <!-- \
`{MD038} ` --> |
| cell |
| Table | Table |
|-------|-------|
| cell | <!-- |
| cell | cell |
| cell | --> |
| Table | Table |
|-------|------------|
| cell | <!-- |
| cell | `{MD038} ` |
| cell | --> |
| Table | Table |
|-------|----------------|
| cell | <!--
| cell | `{MD038} ` --> |
| cell | cell |
| Table | Table |
|-------|----------------|
| cell | <!-- \
| cell | `{MD038} ` --> |
| cell | cell |

View file

@ -57,17 +57,17 @@ test("clearHtmlCommentTextValid", (t) => {
"<!-->", "<!-->",
"<!--->", "<!--->",
"<!---->", "<!---->",
"<!--.........-->", "<!-- ....... -->",
" <!--.........-->", " <!-- ....... -->",
" <!--.........-->", " <!-- ....... -->",
"<!--......-->", "<!-- .... -->",
"<!--....-->",
"<!--.-->",
"<!--....-->", "<!--....-->",
"<!-- -->",
"<!-- .. -->",
"<!---->", "<!---->",
"<!--.....-->", "<!--.....-->",
"<!--.........-->", "<!--.........-->",
"<!--..-->", "<!--. -->",
"<!--", "<!--",
"-->", "-->",
"<!--", "<!--",
@ -79,7 +79,7 @@ test("clearHtmlCommentTextValid", (t) => {
"-->", "-->",
"<!--", "<!--",
"", "",
"......", " .....",
"", "",
"-->", "-->",
"<!--....", "<!--....",
@ -93,7 +93,7 @@ test("clearHtmlCommentTextValid", (t) => {
"-->text", "-->text",
"<!--....--><!--....-->", "<!--....--><!--....-->",
"text<!--....-->text<!--....-->text", "text<!--....-->text<!--....-->text",
"text<!--..............-->text", "text<!--.... . .... ..-->text",
"<!--", "<!--",
"text" "text"
]; ];
@ -118,7 +118,7 @@ test("clearHtmlCommentTextInvalid", (t) => {
"<!--->-->", "<!--->-->",
"<!--->t-->", "<!--->t-->",
"<!---->t-->", "<!---->t-->",
" <!-- indented code block -->" " <!-- ........ .... ..... -->"
]; ];
const actual = helpers.clearHtmlCommentText(invalidComments.join("\n")); const actual = helpers.clearHtmlCommentText(invalidComments.join("\n"));
const expected = invalidComments.join("\n"); const expected = invalidComments.join("\n");
@ -134,8 +134,8 @@ test("clearHtmlCommentTextNonGreedy", (t) => {
"<!----> -->" "<!----> -->"
]; ];
const nonGreedyResult = [ const nonGreedyResult = [
"<!--......--> -->", "<!-- .... --> -->",
"<!--......--> -->", "<!--..... --> -->",
"<!--.--> -->", "<!--.--> -->",
"<!----> -->" "<!----> -->"
]; ];
@ -155,9 +155,9 @@ test("clearHtmlCommentTextEmbedded", (t) => {
]; ];
const embeddedResult = [ const embeddedResult = [
"text<!--....-->text", "text<!--....-->text",
"<!--............................-->", "<!-- .................... ..... -->",
"text<!--....-->text", "text<!--....-->text",
"text<!--............................-->text", "text<!-- .................... ..... -->text",
"text<!--....-->text" "text<!--....-->text"
]; ];
const actual = helpers.clearHtmlCommentText(embeddedComments.join("\n")); const actual = helpers.clearHtmlCommentText(embeddedComments.join("\n"));

View file

@ -15276,12 +15276,351 @@ Generated by [AVA](https://avajs.dev).
`, `,
} }
## html-comment-in-code-and-table.md
> Snapshot 1
{
errors: [
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
1,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 2,
insertText: '{MD038}',
},
lineNumber: 3,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 15,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 19,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
5,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 6,
insertText: '{MD038}',
},
lineNumber: 39,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
],
fixed: `# HTML Comment in Code and Table␊
\`{MD038}\`␊
<!-- \`ignored \` -->
\`\`\`text␊
<!-- \`ignored \` -->
\`\`\`␊
<!-- \`ignored \` -->
| Table |␊
|------------|␊
| \`{MD038}\` |␊
* item␊
\`{MD038}\`␊
* item␊
<!-- \`ignored \` -->
* item␊
\`\`\`text␊
<!-- \`ignored \` -->
\`\`\`␊
* item␊
<!-- \`ignored \` -->
* item␊
| Table |␊
|------------|␊
| \`{MD038}\` |␊
<!-- markdownlint-configure-file {␊
"code-block-style": false␊
} -->␊
`,
}
## html-comment-in-list-item.md
> Snapshot 1
{
errors: [
{
errorContext: null,
errorDetail: null,
errorRange: [
25,
1,
],
fixInfo: {
editColumn: 26,
insertText: `␊
`,
},
lineNumber: 55,
ruleDescription: 'Files should end with a single newline character',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md047',
ruleNames: [
'MD047',
'single-trailing-newline',
],
},
],
fixed: `# HTML Comment in List Item␊
- item␊
<!--
-->␊
- item␊
x<!--
-->␊
- item␊
<!--
-->x␊
- item␊
x<!--
x␊
-->x␊
- item␊
\`\`\`html␊
<!--
-->␊
\`\`\`␊
- item␊
\`\`\`html␊
x<!--
-->␊
\`\`\`␊
- item␊
\`\`\`html␊
<!--
-->x␊
\`\`\`␊
- item␊
\`\`\`html␊
x<!--
x␊
-->x␊
\`\`\`␊
- item␊
Placeholder issue {MD047}␊
`,
}
## html-comment-in-markdown-table.md ## html-comment-in-markdown-table.md
> Snapshot 1 > Snapshot 1
{ {
errors: [], errors: [
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 16,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 22,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 28,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
11,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 12,
insertText: '{MD038}',
},
lineNumber: 40,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
11,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 12,
insertText: '{MD038}',
},
lineNumber: 46,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
11,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 12,
insertText: '{MD038}',
},
lineNumber: 52,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
],
fixed: `# HTML Comment in Markdown Table␊ fixed: `# HTML Comment in Markdown Table␊
\`\`\`xml␊ \`\`\`xml␊
@ -15291,14 +15630,50 @@ Generated by [AVA](https://avajs.dev).
| Table |␊ | Table |␊
|-------|␊ |-------|␊
| <!-- |␊ | <!-- |␊
|comment|␊ | cell |␊
| --> |␊ | --> |␊
| Table |␊ | Table |␊
|-------|␊ |------------|␊
| <!-- \\ | <!-- |␊
\\␊ | \`{MD038}\` |␊
--> |␊ | --> |␊
| Table |␊
|----------------|␊
| <!--
\`{MD038}\` --> |␊
| cell |␊
| Table |␊
|----------------|␊
| <!-- \\
\`{MD038}\` --> |␊
| cell |␊
| Table | Table |␊
|-------|-------|␊
| cell | <!-- |␊
| cell | cell |␊
| cell | --> |␊
| Table | Table |␊
|-------|------------|␊
| cell | <!-- |␊
| cell | \`{MD038}\` |␊
| cell | --> |␊
| Table | Table |␊
|-------|----------------|␊
| cell | <!--
| cell | \`{MD038}\` --> |␊
| cell | cell |␊
| Table | Table |␊
|-------|----------------|␊
| cell | <!-- \\
| cell | \`{MD038}\` --> |␊
| cell | cell |␊
`, `,
} }