Update MD034/no-bare-urls to better handle multiple similar issues on the same line (fixes https://github.com/igorshubovych/markdownlint-cli/issues/339).

This commit is contained in:
David Anson 2022-12-06 22:14:40 -08:00
parent ce5d393109
commit cba5e8d340
7 changed files with 290 additions and 119 deletions

View file

@ -47,7 +47,8 @@ module.exports.inlineCommentStartRe = inlineCommentStartRe;
const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g;
module.exports.htmlElementRe = htmlElementRe;
// Regular expressions for range matching
module.exports.bareUrlRe = /(?:http|ftp)s?:\/\/[^\s\]"']*(?:\/|[^\s\]"'\W])/ig;
module.exports.bareUrlRe =
/(?:http|ftp)s?:\/\/[^\s\]<>"'`]*(?:\/|[^\s\]<>"'`\W])/ig;
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
// Regular expression for all instances of emphasis markers
@ -414,17 +415,22 @@ module.exports.flattenLists = function flattenLists(tokens) {
}
return flattenedLists;
};
// Calls the provided function for each specified inline child token
module.exports.forEachInlineChild =
/**
* Calls the provided function for each specified inline child token.
*
* @param {Object} params RuleParams instance.
* @param {string} type Token type identifier.
* @param {Function} handler Callback function.
* @returns {void}
*/
function forEachInlineChild(params, type, handler) {
filterTokens(params, "inline", function forToken(token) {
for (const child of token.children) {
if (child.type === type) {
filterTokens(params, "inline", (token) => {
for (const child of token.children.filter((c) => c.type === type)) {
handler(child, token);
}
}
});
};
}
module.exports.forEachInlineChild = forEachInlineChild;
// Calls the provided function for each heading's content
module.exports.forEachHeading = function forEachHeading(params, handler) {
let heading = null;
@ -585,6 +591,7 @@ module.exports.codeBlockAndSpanRanges = (params, lineMetadata) => {
*/
module.exports.htmlElementRanges = (params, lineMetadata) => {
const exclusions = [];
// Match with htmlElementRe
forEachLine(lineMetadata, (line, lineIndex, inCode) => {
let match = null;
// eslint-disable-next-line no-unmodified-loop-condition
@ -592,6 +599,32 @@ module.exports.htmlElementRanges = (params, lineMetadata) => {
exclusions.push([lineIndex, match.index, match[0].length]);
}
});
// Match with html_inline
forEachInlineChild(params, "html_inline", (token, parent) => {
const parentContent = parent.content;
let tokenContent = token.content;
const parentIndex = parentContent.indexOf(tokenContent);
let deltaLines = 0;
let indent = 0;
for (let i = parentIndex - 1; i >= 0; i--) {
if (parentContent[i] === "\n") {
deltaLines++;
}
else if (deltaLines === 0) {
indent++;
}
}
let lineIndex = token.lineNumber - 1 + deltaLines;
do {
const index = tokenContent.indexOf("\n");
const length = (index === -1) ? tokenContent.length : index;
exclusions.push([lineIndex, indent, length]);
tokenContent = tokenContent.slice(length + 1);
lineIndex++;
indent = 0;
} while (tokenContent.length > 0);
});
// Return results
return exclusions;
};
/**
@ -3704,60 +3737,62 @@ module.exports = {
"use strict";
// @ts-check
const { addErrorContext, bareUrlRe, filterTokens } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const htmlLinkOpenRe = /^<a[\s>]/i;
const htmlLinkCloseRe = /^<\/a[\s>]/i;
const { addErrorContext, bareUrlRe, withinAnyRange } = __webpack_require__(/*! ../helpers */ "../helpers/helpers.js");
const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } = __webpack_require__(/*! ./cache */ "../lib/cache.js");
const htmlLinkRe = /<a(?:|\s[^>]+)>[^<>]*<\/a\s*>/ig;
module.exports = {
"names": ["MD034", "no-bare-urls"],
"description": "Bare URL used",
"tags": ["links", "url"],
"function": function MD034(params, onError) {
filterTokens(params, "inline", (token) => {
let inLink = false;
let inInline = false;
for (const child of token.children) {
const { content, line, lineNumber, type } = child;
const { lines } = params;
const codeExclusions = [
...codeBlockAndSpanRanges(),
...htmlElementRanges()
];
const { definitionLineIndices } = referenceLinkImageData();
for (const [lineIndex, line] of lines.entries()) {
if (definitionLineIndices[0] === lineIndex) {
definitionLineIndices.shift();
}
else {
let match = null;
if (type === "link_open") {
inLink = true;
const lineExclusions = [];
while ((match = htmlLinkRe.exec(line)) !== null) {
lineExclusions.push([lineIndex, match.index, match[0].length]);
}
else if (type === "link_close") {
inLink = false;
}
else if ((type === "html_inline") && htmlLinkOpenRe.test(content)) {
inInline = true;
}
else if ((type === "html_inline") && htmlLinkCloseRe.test(content)) {
inInline = false;
}
else if ((type === "text") && !inLink && !inInline) {
while ((match = bareUrlRe.exec(content)) !== null) {
while ((match = bareUrlRe.exec(line)) !== null) {
const [bareUrl] = match;
const matchIndex = match.index;
const bareUrlLength = bareUrl.length;
// Allow "[LINK]" to avoid conflicts with MD011/no-reversed-links
// Allow quoting as a way of deliberately including a bare URL
const leftChar = content[matchIndex - 1];
const rightChar = content[matchIndex + bareUrlLength];
if (!((leftChar === "[") && (rightChar === "]")) &&
!((leftChar === "\"") && (rightChar === "\"")) &&
!((leftChar === "'") && (rightChar === "'"))) {
const index = line.indexOf(content);
const range = (index === -1) ? null : [
index + matchIndex + 1,
const prefix = line.slice(0, matchIndex);
const postfix = line.slice(matchIndex + bareUrlLength);
if (
// Allow ](... to avoid reporting Markdown-style links
!(/\]\(\s*$/.test(prefix)) &&
// Allow <...> to avoid reporting non-bare links
!(prefix.endsWith("<") && /^[#)]?>/.test(postfix)) &&
// Allow [...] to avoid MD011/no-reversed-links and nested links
!(/\[[^\]]*$/.test(prefix) && /^[^[]*\]/.test(postfix)) &&
// Allow "..." and '...' for deliberately including a bare link
!(prefix.endsWith("\"") && postfix.startsWith("\"")) &&
!(prefix.endsWith("'") && postfix.startsWith("'")) &&
!withinAnyRange(lineExclusions, lineIndex, matchIndex, bareUrlLength) &&
!withinAnyRange(codeExclusions, lineIndex, matchIndex, bareUrlLength)) {
const range = [
matchIndex + 1,
bareUrlLength
];
const fixInfo = range ? {
const fixInfo = {
"editColumn": range[0],
"deleteCount": range[1],
"insertText": `<${bareUrl}>`
} : null;
addErrorContext(onError, lineNumber, bareUrl, null, null, range, fixInfo);
};
addErrorContext(onError, lineIndex + 1, bareUrl, null, null, range, fixInfo);
}
}
}
}
});
}
};

View file

@ -23,7 +23,8 @@ const htmlElementRe = /<(([A-Za-z][A-Za-z0-9-]*)(?:\s[^`>]*)?)\/?>/g;
module.exports.htmlElementRe = htmlElementRe;
// Regular expressions for range matching
module.exports.bareUrlRe = /(?:http|ftp)s?:\/\/[^\s\]"']*(?:\/|[^\s\]"'\W])/ig;
module.exports.bareUrlRe =
/(?:http|ftp)s?:\/\/[^\s\]<>"'`]*(?:\/|[^\s\]<>"'`\W])/ig;
module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/;
module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/;
@ -418,17 +419,22 @@ module.exports.flattenLists = function flattenLists(tokens) {
return flattenedLists;
};
// Calls the provided function for each specified inline child token
module.exports.forEachInlineChild =
/**
* Calls the provided function for each specified inline child token.
*
* @param {Object} params RuleParams instance.
* @param {string} type Token type identifier.
* @param {Function} handler Callback function.
* @returns {void}
*/
function forEachInlineChild(params, type, handler) {
filterTokens(params, "inline", function forToken(token) {
for (const child of token.children) {
if (child.type === type) {
filterTokens(params, "inline", (token) => {
for (const child of token.children.filter((c) => c.type === type)) {
handler(child, token);
}
}
});
};
}
module.exports.forEachInlineChild = forEachInlineChild;
// Calls the provided function for each heading's content
module.exports.forEachHeading = function forEachHeading(params, handler) {
@ -608,6 +614,7 @@ module.exports.codeBlockAndSpanRanges = (params, lineMetadata) => {
*/
module.exports.htmlElementRanges = (params, lineMetadata) => {
const exclusions = [];
// Match with htmlElementRe
forEachLine(lineMetadata, (line, lineIndex, inCode) => {
let match = null;
// eslint-disable-next-line no-unmodified-loop-condition
@ -615,6 +622,31 @@ module.exports.htmlElementRanges = (params, lineMetadata) => {
exclusions.push([ lineIndex, match.index, match[0].length ]);
}
});
// Match with html_inline
forEachInlineChild(params, "html_inline", (token, parent) => {
const parentContent = parent.content;
let tokenContent = token.content;
const parentIndex = parentContent.indexOf(tokenContent);
let deltaLines = 0;
let indent = 0;
for (let i = parentIndex - 1; i >= 0; i--) {
if (parentContent[i] === "\n") {
deltaLines++;
} else if (deltaLines === 0) {
indent++;
}
}
let lineIndex = token.lineNumber - 1 + deltaLines;
do {
const index = tokenContent.indexOf("\n");
const length = (index === -1) ? tokenContent.length : index;
exclusions.push([ lineIndex, indent, length ]);
tokenContent = tokenContent.slice(length + 1);
lineIndex++;
indent = 0;
} while (tokenContent.length > 0);
});
// Return results
return exclusions;
};

View file

@ -2,57 +2,67 @@
"use strict";
const { addErrorContext, bareUrlRe, filterTokens } = require("../helpers");
const { addErrorContext, bareUrlRe, withinAnyRange } = require("../helpers");
const { codeBlockAndSpanRanges, htmlElementRanges, referenceLinkImageData } =
require("./cache");
const htmlLinkOpenRe = /^<a[\s>]/i;
const htmlLinkCloseRe = /^<\/a[\s>]/i;
const htmlLinkRe = /<a(?:|\s[^>]+)>[^<>]*<\/a\s*>/ig;
module.exports = {
"names": [ "MD034", "no-bare-urls" ],
"description": "Bare URL used",
"tags": [ "links", "url" ],
"function": function MD034(params, onError) {
filterTokens(params, "inline", (token) => {
let inLink = false;
let inInline = false;
for (const child of token.children) {
const { content, line, lineNumber, type } = child;
const { lines } = params;
const codeExclusions = [
...codeBlockAndSpanRanges(),
...htmlElementRanges()
];
const { definitionLineIndices } = referenceLinkImageData();
for (const [ lineIndex, line ] of lines.entries()) {
if (definitionLineIndices[0] === lineIndex) {
definitionLineIndices.shift();
} else {
let match = null;
if (type === "link_open") {
inLink = true;
} else if (type === "link_close") {
inLink = false;
} else if ((type === "html_inline") && htmlLinkOpenRe.test(content)) {
inInline = true;
} else if ((type === "html_inline") && htmlLinkCloseRe.test(content)) {
inInline = false;
} else if ((type === "text") && !inLink && !inInline) {
while ((match = bareUrlRe.exec(content)) !== null) {
const lineExclusions = [];
while ((match = htmlLinkRe.exec(line)) !== null) {
lineExclusions.push([ lineIndex, match.index, match[0].length ]);
}
while ((match = bareUrlRe.exec(line)) !== null) {
const [ bareUrl ] = match;
const matchIndex = match.index;
const bareUrlLength = bareUrl.length;
// Allow "[LINK]" to avoid conflicts with MD011/no-reversed-links
// Allow quoting as a way of deliberately including a bare URL
const leftChar = content[matchIndex - 1];
const rightChar = content[matchIndex + bareUrlLength];
const prefix = line.slice(0, matchIndex);
const postfix = line.slice(matchIndex + bareUrlLength);
if (
!((leftChar === "[") && (rightChar === "]")) &&
!((leftChar === "\"") && (rightChar === "\"")) &&
!((leftChar === "'") && (rightChar === "'"))
// Allow ](... to avoid reporting Markdown-style links
!(/\]\(\s*$/.test(prefix)) &&
// Allow <...> to avoid reporting non-bare links
!(prefix.endsWith("<") && /^[#)]?>/.test(postfix)) &&
// Allow [...] to avoid MD011/no-reversed-links and nested links
!(/\[[^\]]*$/.test(prefix) && /^[^[]*\]/.test(postfix)) &&
// Allow "..." and '...' for deliberately including a bare link
!(prefix.endsWith("\"") && postfix.startsWith("\"")) &&
!(prefix.endsWith("'") && postfix.startsWith("'")) &&
!withinAnyRange(
lineExclusions, lineIndex, matchIndex, bareUrlLength
) &&
!withinAnyRange(
codeExclusions, lineIndex, matchIndex, bareUrlLength
)
) {
const index = line.indexOf(content);
const range = (index === -1) ? null : [
index + matchIndex + 1,
const range = [
matchIndex + 1,
bareUrlLength
];
const fixInfo = range ? {
const fixInfo = {
"editColumn": range[0],
"deleteCount": range[1],
"insertText": `<${bareUrl}>`
} : null;
};
addErrorContext(
onError,
lineNumber,
lineIndex + 1,
bareUrl,
null,
null,
@ -63,6 +73,5 @@ module.exports = {
}
}
}
});
}
};

View file

@ -29,3 +29,21 @@ As is <a href="https://example.com/info.htm">https://example.com/info.htm text</
<br> Another violation: https://example.com. {MD034} <br>
<br/> Another violation: https://example.com. {MD034} <br/>
This is not a bare [link]( https://example.com ).
URLs in HTML are not bare:
<element-name first-attribute=" https://example.com/first " second-attribute=" https://example.com/second ">
Text
</element-name>
<element-name
first-attribute=" https://example.com/first "
second-attribute=" https://example.com/second "></element-name>
URLs in link and image text are not bare:
Text [link to https://example.com site](https://example.com) text.
Image ![for https://example.com site](https://example.com) text.

View file

@ -28,3 +28,9 @@ Other enclosures are not allowed:
(https://example.com) {MD034}
{https://example.com} {MD034}
Duplicate links in tables should be handled:
| Link | Same Link | Violation |
|----------------------|----------------------|-----------|
| https://example.com/ | https://example.com/ | {MD034} |

View file

@ -2998,6 +2998,24 @@ Generated by [AVA](https://avajs.dev).
<br> Another violation: <https://example.com>. {MD034} <br>
<br/> Another violation: <https://example.com>. {MD034} <br/>
This is not a bare [link]( https://example.com ).␊
URLs in HTML are not bare:␊
<element-name first-attribute=" https://example.com/first " second-attribute=" https://example.com/second ">
Text␊
</element-name>
<element-name
first-attribute=" https://example.com/first "␊
second-attribute=" https://example.com/second "></element-name>
URLs in link and image text are not bare:␊
Text [link to https://example.com site](https://example.com) text.␊
Image ![for https://example.com site](https://example.com) text.␊
`,
}
@ -21928,8 +21946,15 @@ Generated by [AVA](https://avajs.dev).
{
errorContext: 'https://example.com/same',
errorDetail: null,
errorRange: null,
fixInfo: null,
errorRange: [
46,
24,
],
fixInfo: {
deleteCount: 24,
editColumn: 46,
insertText: '<https://example.com/same>',
},
lineNumber: 26,
ruleDescription: 'Bare URL used',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md034.md',
@ -22144,7 +22169,7 @@ Generated by [AVA](https://avajs.dev).
Text <https://example.com/same> more text <https://example.com/same> still more text <https://example.com/same> done {MD034}␊
Text <https://example.com/same> more \\* text https://example.com/same more \\[ text <https://example.com/same> done {MD034}␊
Text <https://example.com/same> more \\* text <https://example.com/same> more \\[ text <https://example.com/same> done {MD034}␊
Text <https://example.com/first> more text <https://example.com/second> still more text <https://example.com/third> done {MD034}␊
@ -22379,6 +22404,46 @@ Generated by [AVA](https://avajs.dev).
'no-bare-urls',
],
},
{
errorContext: 'https://example.com/',
errorDetail: null,
errorRange: [
3,
20,
],
fixInfo: {
deleteCount: 20,
editColumn: 3,
insertText: '<https://example.com/>',
},
lineNumber: 36,
ruleDescription: 'Bare URL used',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md034.md',
ruleNames: [
'MD034',
'no-bare-urls',
],
},
{
errorContext: 'https://example.com/',
errorDetail: null,
errorRange: [
26,
20,
],
fixInfo: {
deleteCount: 20,
editColumn: 26,
insertText: '<https://example.com/>',
},
lineNumber: 36,
ruleDescription: 'Bare URL used',
ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md034.md',
ruleNames: [
'MD034',
'no-bare-urls',
],
},
],
fixed: `# Link test␊
@ -22410,6 +22475,12 @@ Generated by [AVA](https://avajs.dev).
(<https://example.com>) {MD034}␊
{<https://example.com>} {MD034}␊
Duplicate links in tables should be handled:␊
| Link | Same Link | Violation |␊
|----------------------|----------------------|-----------|␊
| <https://example.com/> | <https://example.com/> | {MD034} |␊
`,
}