Change HTML comment content sanitization to avoid breaking list item indent, respect table cell content rules (fixes #563).

This commit is contained in:
David Anson 2022-10-14 20:59:42 -07:00
parent 7f8962c882
commit f0bb4c639b
8 changed files with 593 additions and 63 deletions

View file

@ -154,6 +154,12 @@ module.exports.includesSorted = function includesSorted(array, element) {
// https://spec.commonmark.org/0.29/#html-comment
const htmlCommentBegin = "<!--";
const htmlCommentEnd = "-->";
const safeCommentCharacter = ".";
const startsWithPipeRe = /^ *\|/;
const notCrLfRe = /[^\r\n]/g;
const notSpaceCrLfRe = /[^ \r\n]/g;
const trailingSpaceRe = / +[\r\n]/g;
const replaceTrailingSpace = (s) => s.replace(notCrLfRe, safeCommentCharacter);
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
let i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
@ -164,26 +170,29 @@ module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
}
// If the comment has content...
if (j > i + htmlCommentBegin.length) {
let k = i - 1;
while (text[k] === " ") {
k--;
}
// If comment is not within an indented code block...
if (k >= i - 4) {
const content = text.slice(i + htmlCommentBegin.length, j);
const isBlock = (k < 0) || (text[k] === "\n");
const lastLf = text.lastIndexOf("\n", i) + 1;
const preText = text.slice(lastLf, i);
const isBlock = preText.trim().length === 0;
const couldBeTable = startsWithPipeRe.test(preText);
const spansTableCells = couldBeTable && content.includes("\n");
const isValid = isBlock ||
(!content.startsWith(">") && !content.startsWith("->") &&
!content.endsWith("-") && !content.includes("--"));
!(spansTableCells ||
content.startsWith(">") ||
content.startsWith("->") ||
content.endsWith("-") ||
content.includes("--"));
// If a valid block/inline comment...
if (isValid) {
const clearedContent = content
.replace(notSpaceCrLfRe, safeCommentCharacter)
.replace(trailingSpaceRe, replaceTrailingSpace);
text =
text.slice(0, i + htmlCommentBegin.length) +
content.replace(/[^\r\n]/g, ".") +
clearedContent +
text.slice(j);
}
}
}
i = j + htmlCommentEnd.length;
}
return text;

View file

@ -142,6 +142,12 @@ module.exports.includesSorted = function includesSorted(array, element) {
// https://spec.commonmark.org/0.29/#html-comment
const htmlCommentBegin = "<!--";
const htmlCommentEnd = "-->";
const safeCommentCharacter = ".";
const startsWithPipeRe = /^ *\|/;
const notCrLfRe = /[^\r\n]/g;
const notSpaceCrLfRe = /[^ \r\n]/g;
const trailingSpaceRe = / +[\r\n]/g;
const replaceTrailingSpace = (s) => s.replace(notCrLfRe, safeCommentCharacter);
module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
let i = 0;
while ((i = text.indexOf(htmlCommentBegin, i)) !== -1) {
@ -152,26 +158,32 @@ module.exports.clearHtmlCommentText = function clearHtmlCommentText(text) {
}
// If the comment has content...
if (j > i + htmlCommentBegin.length) {
let k = i - 1;
while (text[k] === " ") {
k--;
}
// If comment is not within an indented code block...
if (k >= i - 4) {
const content = text.slice(i + htmlCommentBegin.length, j);
const isBlock = (k < 0) || (text[k] === "\n");
const isValid = isBlock ||
(!content.startsWith(">") && !content.startsWith("->") &&
!content.endsWith("-") && !content.includes("--"));
const lastLf = text.lastIndexOf("\n", i) + 1;
const preText = text.slice(lastLf, i);
const isBlock = preText.trim().length === 0;
const couldBeTable = startsWithPipeRe.test(preText);
const spansTableCells = couldBeTable && content.includes("\n");
const isValid =
isBlock ||
!(
spansTableCells ||
content.startsWith(">") ||
content.startsWith("->") ||
content.endsWith("-") ||
content.includes("--")
);
// If a valid block/inline comment...
if (isValid) {
const clearedContent = content
.replace(notSpaceCrLfRe, safeCommentCharacter)
.replace(trailingSpaceRe, replaceTrailingSpace);
text =
text.slice(0, i + htmlCommentBegin.length) +
content.replace(/[^\r\n]/g, ".") +
clearedContent +
text.slice(j);
}
}
}
i = j + htmlCommentEnd.length;
}
return text;

View file

@ -0,0 +1,43 @@
# HTML Comment in Code and Table
`{MD038} `
<!-- `ignored ` -->
```text
<!-- `ignored ` -->
```
<!-- `ignored ` -->
| Table |
|------------|
| `{MD038} ` |
* item
`{MD038} `
* item
<!-- `ignored ` -->
* item
```text
<!-- `ignored ` -->
```
* item
<!-- `ignored ` -->
* item
| Table |
|------------|
| `{MD038} ` |
<!-- markdownlint-configure-file {
"code-block-style": false
} -->

View file

@ -0,0 +1,55 @@
# HTML Comment in List Item
- item
<!--
-->
- item
x<!--
-->
- item
<!--
-->x
- item
x<!--
x
-->x
- item
```html
<!--
-->
```
- item
```html
x<!--
-->
```
- item
```html
<!--
-->x
```
- item
```html
x<!--
x
-->x
```
- item
Placeholder issue {MD047}

View file

@ -7,11 +7,47 @@
| Table |
|-------|
| <!-- |
|comment|
| cell |
| --> |
| Table |
|-------|
|------------|
| <!-- |
| `{MD038} ` |
| --> |
| Table |
|----------------|
| <!--
`{MD038} ` --> |
| cell |
| Table |
|----------------|
| <!-- \
\
--> |
`{MD038} ` --> |
| cell |
| Table | Table |
|-------|-------|
| cell | <!-- |
| cell | cell |
| cell | --> |
| Table | Table |
|-------|------------|
| cell | <!-- |
| cell | `{MD038} ` |
| cell | --> |
| Table | Table |
|-------|----------------|
| cell | <!--
| cell | `{MD038} ` --> |
| cell | cell |
| Table | Table |
|-------|----------------|
| cell | <!-- \
| cell | `{MD038} ` --> |
| cell | cell |

View file

@ -57,17 +57,17 @@ test("clearHtmlCommentTextValid", (t) => {
"<!-->",
"<!--->",
"<!---->",
"<!--.........-->",
" <!--.........-->",
" <!--.........-->",
"<!--......-->",
"<!-- ....... -->",
" <!-- ....... -->",
" <!-- ....... -->",
"<!-- .... -->",
"<!--.-->",
"<!--....-->",
"<!-- -->",
"<!-- .. -->",
"<!---->",
"<!--.....-->",
"<!--.........-->",
"<!--..-->",
"<!--. -->",
"<!--",
"-->",
"<!--",
@ -79,7 +79,7 @@ test("clearHtmlCommentTextValid", (t) => {
"-->",
"<!--",
"",
"......",
" .....",
"",
"-->",
"<!--....",
@ -93,7 +93,7 @@ test("clearHtmlCommentTextValid", (t) => {
"-->text",
"<!--....--><!--....-->",
"text<!--....-->text<!--....-->text",
"text<!--..............-->text",
"text<!--.... . .... ..-->text",
"<!--",
"text"
];
@ -118,7 +118,7 @@ test("clearHtmlCommentTextInvalid", (t) => {
"<!--->-->",
"<!--->t-->",
"<!---->t-->",
" <!-- indented code block -->"
" <!-- ........ .... ..... -->"
];
const actual = helpers.clearHtmlCommentText(invalidComments.join("\n"));
const expected = invalidComments.join("\n");
@ -134,8 +134,8 @@ test("clearHtmlCommentTextNonGreedy", (t) => {
"<!----> -->"
];
const nonGreedyResult = [
"<!--......--> -->",
"<!--......--> -->",
"<!-- .... --> -->",
"<!--..... --> -->",
"<!--.--> -->",
"<!----> -->"
];
@ -155,9 +155,9 @@ test("clearHtmlCommentTextEmbedded", (t) => {
];
const embeddedResult = [
"text<!--....-->text",
"<!--............................-->",
"<!-- .................... ..... -->",
"text<!--....-->text",
"text<!--............................-->text",
"text<!-- .................... ..... -->text",
"text<!--....-->text"
];
const actual = helpers.clearHtmlCommentText(embeddedComments.join("\n"));

View file

@ -15276,12 +15276,351 @@ Generated by [AVA](https://avajs.dev).
`,
}
## html-comment-in-code-and-table.md
> Snapshot 1
{
errors: [
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
1,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 2,
insertText: '{MD038}',
},
lineNumber: 3,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 15,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 19,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
5,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 6,
insertText: '{MD038}',
},
lineNumber: 39,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
],
fixed: `# HTML Comment in Code and Table␊
\`{MD038}\`␊
<!-- \`ignored \` -->
\`\`\`text␊
<!-- \`ignored \` -->
\`\`\`␊
<!-- \`ignored \` -->
| Table |␊
|------------|␊
| \`{MD038}\` |␊
* item␊
\`{MD038}\`␊
* item␊
<!-- \`ignored \` -->
* item␊
\`\`\`text␊
<!-- \`ignored \` -->
\`\`\`␊
* item␊
<!-- \`ignored \` -->
* item␊
| Table |␊
|------------|␊
| \`{MD038}\` |␊
<!-- markdownlint-configure-file {␊
"code-block-style": false␊
} -->␊
`,
}
## html-comment-in-list-item.md
> Snapshot 1
{
errors: [
{
errorContext: null,
errorDetail: null,
errorRange: [
25,
1,
],
fixInfo: {
editColumn: 26,
insertText: `␊
`,
},
lineNumber: 55,
ruleDescription: 'Files should end with a single newline character',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md047',
ruleNames: [
'MD047',
'single-trailing-newline',
],
},
],
fixed: `# HTML Comment in List Item␊
- item␊
<!--
-->␊
- item␊
x<!--
-->␊
- item␊
<!--
-->x␊
- item␊
x<!--
x␊
-->x␊
- item␊
\`\`\`html␊
<!--
-->␊
\`\`\`␊
- item␊
\`\`\`html␊
x<!--
-->␊
\`\`\`␊
- item␊
\`\`\`html␊
<!--
-->x␊
\`\`\`␊
- item␊
\`\`\`html␊
x<!--
x␊
-->x␊
\`\`\`␊
- item␊
Placeholder issue {MD047}␊
`,
}
## html-comment-in-markdown-table.md
> Snapshot 1
{
errors: [],
errors: [
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 16,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 22,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
3,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 4,
insertText: '{MD038}',
},
lineNumber: 28,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
11,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 12,
insertText: '{MD038}',
},
lineNumber: 40,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
11,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 12,
insertText: '{MD038}',
},
lineNumber: 46,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
{
errorContext: '`{MD038} `',
errorDetail: null,
errorRange: [
11,
10,
],
fixInfo: {
deleteCount: 8,
editColumn: 12,
insertText: '{MD038}',
},
lineNumber: 52,
ruleDescription: 'Spaces inside code span elements',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/Rules.md#md038',
ruleNames: [
'MD038',
'no-space-in-code',
],
},
],
fixed: `# HTML Comment in Markdown Table␊
\`\`\`xml␊
@ -15291,14 +15630,50 @@ Generated by [AVA](https://avajs.dev).
| Table |␊
|-------|␊
| <!-- |␊
|comment|␊
| cell |␊
| --> |␊
| Table |␊
|-------|␊
|------------|␊
| <!-- |␊
| \`{MD038}\` |␊
| --> |␊
| Table |␊
|----------------|␊
| <!--
\`{MD038}\` --> |␊
| cell |␊
| Table |␊
|----------------|␊
| <!-- \\
\\␊
--> |␊
\`{MD038}\` --> |␊
| cell |␊
| Table | Table |␊
|-------|-------|␊
| cell | <!-- |␊
| cell | cell |␊
| cell | --> |␊
| Table | Table |␊
|-------|------------|␊
| cell | <!-- |␊
| cell | \`{MD038}\` |␊
| cell | --> |␊
| Table | Table |␊
|-------|----------------|␊
| cell | <!--
| cell | \`{MD038}\` --> |␊
| cell | cell |␊
| Table | Table |␊
|-------|----------------|␊
| cell | <!-- \\
| cell | \`{MD038}\` --> |␊
| cell | cell |␊
`,
}