Update MD034/no-bare-urls to better handle multiple similar issues on the same line (fixes https://github.com/igorshubovych/markdownlint-cli/issues/339).

This commit is contained in:
David Anson 2022-12-06 22:14:40 -08:00
parent ce5d393109
commit cba5e8d340
7 changed files with 290 additions and 119 deletions

View file

@ -47,7 +47,8 @@ module.exports.inlineCommentStartRe = inlineCommentStartRe;
const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g; const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g;
module.exports.htmlElementRe = htmlElementRe; module.exports.htmlElementRe = htmlElementRe;
// Regular expressions for range matching // Regular expressions for range matching
module.exports.bareUrlRe = /(?:http|ftp)s?:\/\/[^\s\]"']*(?:\/|[^\s\]"'\W])/ig; module.exports.bareUrlRe =
/(?:http|ftp)s?:\/\/[^\s\]<>"'`]*(?:\/|[^\s\]<>"'`\W])/ig;
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/; module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
// Regular expression for all instances of emphasis markers // Regular expression for all instances of emphasis markers
@ -414,17 +415,22 @@ module.exports.flattenLists = function flattenLists(tokens) {
} }
return flattenedLists; return flattenedLists;
}; };
// Calls the provided function for each specified inline child token /**
module.exports.forEachInlineChild = * Calls the provided function for each specified inline child token.
function forEachInlineChild(params, type, handler) { *
filterTokens(params, "inline", function forToken(token) { * @param {Object} params RuleParams instance.
for (const child of token.children) { * @param {string} type Token type identifier.
if (child.type === type) { * @param {Function} handler Callback function.
* @returns {void}
*/
function forEachInlineChild(params, type, handler) {
filterTokens(params, "inline", (token) => {
for (const child of token.children.filter((c) => c.type === type)) {
handler(child, token); handler(child, token);
} }
}
}); });
}; }
module.exports.forEachInlineChild = forEachInlineChild;
// Calls the provided function for each heading's content // Calls the provided function for each heading's content
module.exports.forEachHeading = function forEachHeading(params, handler) { module.exports.forEachHeading = function forEachHeading(params, handler) {
let heading = null; let heading = null;
@ -585,6 +591,7 @@ module.exports.codeBlockAndSpanRanges = (params, lineMetadata) => {
*/ */
module.exports.htmlElementRanges = (params, lineMetadata) => { module.exports.htmlElementRanges = (params, lineMetadata) => {
const exclusions = []; const exclusions = [];
// Match with htmlElementRe
forEachLine(lineMetadata, (line, lineIndex, inCode) => { forEachLine(lineMetadata, (line, lineIndex, inCode) => {
let match = null; let match = null;
// eslint-disable-next-line no-unmodified-loop-condition // eslint-disable-next-line no-unmodified-loop-condition
@ -592,6 +599,32 @@ module.exports.htmlElementRanges = (params, lineMetadata) => {
exclusions.push([lineIndex, match.index, match[0].length]); exclusions.push([lineIndex, match.index, match[0].length]);
} }
}); });
// Match with html_inline
forEachInlineChild(params, "html_inline", (token, parent) => {
const parentContent = parent.content;
let tokenContent = token.content;
const parentIndex = parentContent.indexOf(tokenContent);
let deltaLines = 0;
let indent = 0;
for (let i = parentIndex - 1; i >= 0; i--) {
if (parentContent[i] === "\n") {
deltaLines++;
}
else if (deltaLines === 0) {
indent++;
}
}
let lineIndex = token.lineNumber - 1 + deltaLines;
do {
const index = tokenContent.indexOf("\n");
const length = (index === -1) ? tokenContent.length : index;
exclusions.push([lineIndex, indent, length]);
tokenContent = tokenContent.slice(length + 1);
lineIndex++;
indent = 0;
} while (tokenContent.length > 0);
});
// Return results
return exclusions; return exclusions;
}; };
/** /**
@ -3704,60 +3737,62 @@ module.exports = {
"use strict"; "use strict";
// @ts-check // @ts-check
const { addErrorContext, bareUrlRe, filterTokens } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js"); const { addErrorContext, bareUrlRe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const htmlLinkOpenRe = /^<a[\s>]/i; const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } = __webpack_require__(/*! ./cache */ "../lib/cache.js");
const htmlLinkCloseRe = /^<\/a[\s>]/i; const htmlLinkRe = /<a(?:|\s[^>]+)>[^<>]*<\/a\s*>/ig;
module.exports = { module.exports = {
"names": ["MD034", "no-bare-urls"], "names": ["MD034", "no-bare-urls"],
"description": "Bare URL used", "description": "Bare URL used",
"tags": ["links", "url"], "tags": ["links", "url"],
"function": function MD034(params, onError) { "function": function MD034(params, onError) {
filterTokens(params, "inline", (token) => { const { lines } = params;
let inLink = false; const codeExclusions = [
let inInline = false; ...codeBlockAndSpanRanges(),
for (const child of token.children) { ...htmlElementRanges()
const { content, line, lineNumber, type } = child; ];
const { definitionLineIndices } = referenceLinkImageData();
for (const [lineIndex, line] of lines.entries()) {
if (definitionLineIndices[0] === lineIndex) {
definitionLineIndices.shift();
}
else {
let match = null; let match = null;
if (type === "link_open") { const lineExclusions = [];
inLink = true; while ((match = htmlLinkRe.exec(line)) !== null) {
lineExclusions.push([lineIndex, match.index, match[0].length]);
} }
else if (type === "link_close") { while ((match = bareUrlRe.exec(line)) !== null) {
inLink = false;
}
else if ((type === "html_inline") && htmlLinkOpenRe.test(content)) {
inInline = true;
}
else if ((type === "html_inline") && htmlLinkCloseRe.test(content)) {
inInline = false;
}
else if ((type === "text") && !inLink && !inInline) {
while ((match = bareUrlRe.exec(content)) !== null) {
const [bareUrl] = match; const [bareUrl] = match;
const matchIndex = match.index; const matchIndex = match.index;
const bareUrlLength = bareUrl.length; const bareUrlLength = bareUrl.length;
// Allow "[LINK]" to avoid conflicts with MD011/no-reversed-links const prefix = line.slice(0, matchIndex);
// Allow quoting as a way of deliberately including a bare URL const postfix = line.slice(matchIndex + bareUrlLength);
const leftChar = content[matchIndex - 1]; if (
const rightChar = content[matchIndex + bareUrlLength]; // Allow ](... to avoid reporting Markdown-style links
if (!((leftChar === "[") && (rightChar === "]")) && !(/\]\(\s*$/.test(prefix)) &&
!((leftChar === "\"") && (rightChar === "\"")) && // Allow <...> to avoid reporting non-bare links
!((leftChar === "'") && (rightChar === "'"))) { !(prefix.endsWith("<") && /^[#)]?>/.test(postfix)) &&
const index = line.indexOf(content); // Allow [...] to avoid MD011/no-reversed-links and nested links
const range = (index === -1) ? null : [ !(/\[[^\]]*$/.test(prefix) && /^[^[]*\]/.test(postfix)) &&
index + matchIndex + 1, // Allow "..." and '...' for deliberately including a bare link
!(prefix.endsWith("\"") && postfix.startsWith("\"")) &&
!(prefix.endsWith("'") && postfix.startsWith("'")) &&
!withinAnyRange(lineExclusions, lineIndex, matchIndex, bareUrlLength) &&
!withinAnyRange(codeExclusions, lineIndex, matchIndex, bareUrlLength)) {
const range = [
matchIndex + 1,
bareUrlLength bareUrlLength
]; ];
const fixInfo = range ? { const fixInfo = {
"editColumn": range[0], "editColumn": range[0],
"deleteCount": range[1], "deleteCount": range[1],
"insertText": `<${bareUrl}>` "insertText": `<${bareUrl}>`
} : null; };
addErrorContext(onError, lineNumber, bareUrl, null, null, range, fixInfo); addErrorContext(onError, lineIndex + 1, bareUrl, null, null, range, fixInfo);
} }
} }
} }
} }
});
} }
}; };

View file

@ -23,7 +23,8 @@ const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g;
module.exports.htmlElementRe = htmlElementRe; module.exports.htmlElementRe = htmlElementRe;
// Regular expressions for range matching // Regular expressions for range matching
module.exports.bareUrlRe = /(?:http|ftp)s?:\/\/[^\s\]"']*(?:\/|[^\s\]"'\W])/ig; module.exports.bareUrlRe =
/(?:http|ftp)s?:\/\/[^\s\]<>"'`]*(?:\/|[^\s\]<>"'`\W])/ig;
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/; module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
@ -418,17 +419,22 @@ module.exports.flattenLists = function flattenLists(tokens) {
return flattenedLists; return flattenedLists;
}; };
// Calls the provided function for each specified inline child token /**
module.exports.forEachInlineChild = * Calls the provided function for each specified inline child token.
*
* @param {Object} params RuleParams instance.
* @param {string} type Token type identifier.
* @param {Function} handler Callback function.
* @returns {void}
*/
function forEachInlineChild(params, type, handler) { function forEachInlineChild(params, type, handler) {
filterTokens(params, "inline", function forToken(token) { filterTokens(params, "inline", (token) => {
for (const child of token.children) { for (const child of token.children.filter((c) => c.type === type)) {
if (child.type === type) {
handler(child, token); handler(child, token);
} }
}
}); });
}; }
module.exports.forEachInlineChild = forEachInlineChild;
// Calls the provided function for each heading's content // Calls the provided function for each heading's content
module.exports.forEachHeading = function forEachHeading(params, handler) { module.exports.forEachHeading = function forEachHeading(params, handler) {
@ -608,6 +614,7 @@ module.exports.codeBlockAndSpanRanges = (params, lineMetadata) => {
*/ */
module.exports.htmlElementRanges = (params, lineMetadata) => { module.exports.htmlElementRanges = (params, lineMetadata) => {
const exclusions = []; const exclusions = [];
// Match with htmlElementRe
forEachLine(lineMetadata, (line, lineIndex, inCode) => { forEachLine(lineMetadata, (line, lineIndex, inCode) => {
let match = null; let match = null;
// eslint-disable-next-line no-unmodified-loop-condition // eslint-disable-next-line no-unmodified-loop-condition
@ -615,6 +622,31 @@ module.exports.htmlElementRanges = (params, lineMetadata) => {
exclusions.push([ lineIndex, match.index, match[0].length ]); exclusions.push([ lineIndex, match.index, match[0].length ]);
} }
}); });
// Match with html_inline
forEachInlineChild(params, "html_inline", (token, parent) => {
const parentContent = parent.content;
let tokenContent = token.content;
const parentIndex = parentContent.indexOf(tokenContent);
let deltaLines = 0;
let indent = 0;
for (let i = parentIndex - 1; i >= 0; i--) {
if (parentContent[i] === "\n") {
deltaLines++;
} else if (deltaLines === 0) {
indent++;
}
}
let lineIndex = token.lineNumber - 1 + deltaLines;
do {
const index = tokenContent.indexOf("\n");
const length = (index === -1) ? tokenContent.length : index;
exclusions.push([ lineIndex, indent, length ]);
tokenContent = tokenContent.slice(length + 1);
lineIndex++;
indent = 0;
} while (tokenContent.length > 0);
});
// Return results
return exclusions; return exclusions;
}; };

View file

@ -2,57 +2,67 @@
"use strict"; "use strict";
const { addErrorContext, bareUrlRe, filterTokens } = require("../helpers"); const { addErrorContext, bareUrlRe, withinAnyRange } = require("../helpers");
const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } =
require("./cache");
const htmlLinkOpenRe = /^<a[\s>]/i; const htmlLinkRe = /<a(?:|\s[^>]+)>[^<>]*<\/a\s*>/ig;
const htmlLinkCloseRe = /^<\/a[\s>]/i;
module.exports = { module.exports = {
"names": [ "MD034", "no-bare-urls" ], "names": [ "MD034", "no-bare-urls" ],
"description": "Bare URL used", "description": "Bare URL used",
"tags": [ "links", "url" ], "tags": [ "links", "url" ],
"function": function MD034(params, onError) { "function": function MD034(params, onError) {
filterTokens(params, "inline", (token) => { const { lines } = params;
let inLink = false; const codeExclusions = [
let inInline = false; ...codeBlockAndSpanRanges(),
for (const child of token.children) { ...htmlElementRanges()
const { content, line, lineNumber, type } = child; ];
const { definitionLineIndices } = referenceLinkImageData();
for (const [ lineIndex, line ] of lines.entries()) {
if (definitionLineIndices[0] === lineIndex) {
definitionLineIndices.shift();
} else {
let match = null; let match = null;
if (type === "link_open") { const lineExclusions = [];
inLink = true; while ((match = htmlLinkRe.exec(line)) !== null) {
} else if (type === "link_close") { lineExclusions.push([ lineIndex, match.index, match[0].length ]);
inLink = false; }
} else if ((type === "html_inline") && htmlLinkOpenRe.test(content)) { while ((match = bareUrlRe.exec(line)) !== null) {
inInline = true;
} else if ((type === "html_inline") && htmlLinkCloseRe.test(content)) {
inInline = false;
} else if ((type === "text") && !inLink && !inInline) {
while ((match = bareUrlRe.exec(content)) !== null) {
const [ bareUrl ] = match; const [ bareUrl ] = match;
const matchIndex = match.index; const matchIndex = match.index;
const bareUrlLength = bareUrl.length; const bareUrlLength = bareUrl.length;
// Allow "[LINK]" to avoid conflicts with MD011/no-reversed-links const prefix = line.slice(0, matchIndex);
// Allow quoting as a way of deliberately including a bare URL const postfix = line.slice(matchIndex + bareUrlLength);
const leftChar = content[matchIndex - 1];
const rightChar = content[matchIndex + bareUrlLength];
if ( if (
!((leftChar === "[") && (rightChar === "]")) && // Allow ](... to avoid reporting Markdown-style links
!((leftChar === "\"") && (rightChar === "\"")) && !(/\]\(\s*$/.test(prefix)) &&
!((leftChar === "'") && (rightChar === "'")) // Allow <...> to avoid reporting non-bare links
!(prefix.endsWith("<") && /^[#)]?>/.test(postfix)) &&
// Allow [...] to avoid MD011/no-reversed-links and nested links
!(/\[[^\]]*$/.test(prefix) && /^[^[]*\]/.test(postfix)) &&
// Allow "..." and '...' for deliberately including a bare link
!(prefix.endsWith("\"") && postfix.startsWith("\"")) &&
!(prefix.endsWith("'") && postfix.startsWith("'")) &&
!withinAnyRange(
lineExclusions, lineIndex, matchIndex, bareUrlLength
) &&
!withinAnyRange(
codeExclusions, lineIndex, matchIndex, bareUrlLength
)
) { ) {
const index = line.indexOf(content); const range = [
const range = (index === -1) ? null : [ matchIndex + 1,
index + matchIndex + 1,
bareUrlLength bareUrlLength
]; ];
const fixInfo = range ? { const fixInfo = {
"editColumn": range[0], "editColumn": range[0],
"deleteCount": range[1], "deleteCount": range[1],
"insertText": `<${bareUrl}>` "insertText": `<${bareUrl}>`
} : null; };
addErrorContext( addErrorContext(
onError, onError,
lineNumber, lineIndex + 1,
bareUrl, bareUrl,
null, null,
null, null,
@ -63,6 +73,5 @@ module.exports = {
} }
} }
} }
});
} }
}; };

View file

@ -29,3 +29,21 @@ As is <a href="https://example.com/info.htm">https://example.com/info.htm text</
<br> Another violation: https://example.com. {MD034} <br> <br> Another violation: https://example.com. {MD034} <br>
<br/> Another violation: https://example.com. {MD034} <br/> <br/> Another violation: https://example.com. {MD034} <br/>
This is not a bare [link]( https://example.com ).
URLs in HTML are not bare:
<element-name first-attribute=" https://example.com/first " second-attribute=" https://example.com/second ">
Text
</element-name>
<element-name
first-attribute=" https://example.com/first "
second-attribute=" https://example.com/second "></element-name>
URLs in link and image text are not bare:
Text [link to https://example.com site](https://example.com) text.
Image ![for https://example.com site](https://example.com) text.

View file

@ -28,3 +28,9 @@ Other enclosures are not allowed:
(https://example.com) {MD034} (https://example.com) {MD034}
{https://example.com} {MD034} {https://example.com} {MD034}
Duplicate links in tables should be handled:
| Link | Same Link | Violation |
|----------------------|----------------------|-----------|
| https://example.com/ | https://example.com/ | {MD034} |

View file

@ -2998,6 +2998,24 @@ Generated by [AVA](https://avajs.dev).
<br> Another violation: <https://example.com>. {MD034} <br> <br> Another violation: <https://example.com>. {MD034} <br>
<br/> Another violation: <https://example.com>. {MD034} <br/> <br/> Another violation: <https://example.com>. {MD034} <br/>
This is not a bare [link]( https://example.com ).␊
URLs in HTML are not bare:␊
<element-name first-attribute=" https://example.com/first " second-attribute=" https://example.com/second ">
Text␊
</element-name>
<element-name
first-attribute=" https://example.com/first "␊
second-attribute=" https://example.com/second "></element-name>
URLs in link and image text are not bare:␊
Text [link to https://example.com site](https://example.com) text.␊
Image ![for https://example.com site](https://example.com) text.␊
`, `,
} }
@ -21928,8 +21946,15 @@ Generated by [AVA](https://avajs.dev).
{ {
errorContext: 'https://example.com/same', errorContext: 'https://example.com/same',
errorDetail: null, errorDetail: null,
errorRange: null, errorRange: [
fixInfo: null, 46,
24,
],
fixInfo: {
deleteCount: 24,
editColumn: 46,
insertText: '<https://example.com/same>',
},
lineNumber: 26, lineNumber: 26,
ruleDescription: 'Bare URL used', ruleDescription: 'Bare URL used',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md034.md', ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md034.md',
@ -22144,7 +22169,7 @@ Generated by [AVA](https://avajs.dev).
Text <https://example.com/same> more text <https://example.com/same> still more text <https://example.com/same> done {MD034}␊ Text <https://example.com/same> more text <https://example.com/same> still more text <https://example.com/same> done {MD034}␊
Text <https://example.com/same> more \\* text https://example.com/same more \\[ text <https://example.com/same> done {MD034}␊ Text <https://example.com/same> more \\* text <https://example.com/same> more \\[ text <https://example.com/same> done {MD034}␊
Text <https://example.com/first> more text <https://example.com/second> still more text <https://example.com/third> done {MD034}␊ Text <https://example.com/first> more text <https://example.com/second> still more text <https://example.com/third> done {MD034}␊
@ -22379,6 +22404,46 @@ Generated by [AVA](https://avajs.dev).
'no-bare-urls', 'no-bare-urls',
], ],
}, },
{
errorContext: 'https://example.com/',
errorDetail: null,
errorRange: [
3,
20,
],
fixInfo: {
deleteCount: 20,
editColumn: 3,
insertText: '<https://example.com/>',
},
lineNumber: 36,
ruleDescription: 'Bare URL used',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md034.md',
ruleNames: [
'MD034',
'no-bare-urls',
],
},
{
errorContext: 'https://example.com/',
errorDetail: null,
errorRange: [
26,
20,
],
fixInfo: {
deleteCount: 20,
editColumn: 26,
insertText: '<https://example.com/>',
},
lineNumber: 36,
ruleDescription: 'Bare URL used',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md034.md',
ruleNames: [
'MD034',
'no-bare-urls',
],
},
], ],
fixed: `# Link test␊ fixed: `# Link test␊
@ -22410,6 +22475,12 @@ Generated by [AVA](https://avajs.dev).
(<https://example.com>) {MD034}␊ (<https://example.com>) {MD034}␊
{<https://example.com>} {MD034}␊ {<https://example.com>} {MD034}␊
Duplicate links in tables should be handled:␊
| Link | Same Link | Violation |␊
|----------------------|----------------------|-----------|␊
| <https://example.com/> | <https://example.com/> | {MD034} |␊
`, `,
} }