From f5a71521d42f6fa8630b73ea247c40df9ff5a99e Mon Sep 17 00:00:00 2001 From: David Anson Date: Sat, 25 Apr 2020 15:10:07 -0700 Subject: [PATCH] Update MD037/no-space-in-emphasis to ignore emphasis markers in code spans (fixes #278). --- .eslintrc.json | 1 + helpers/helpers.js | 194 +++++++++++------- lib/md037.js | 9 +- ...-inside-emphasis-markers-multiple-lines.md | 47 +++++ test/spaces_inside_emphasis_markers.md | 20 ++ 5 files changed, 195 insertions(+), 76 deletions(-) diff --git a/.eslintrc.json b/.eslintrc.json index d79a9bb3..4981a747 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -40,6 +40,7 @@ "multiline-comment-style": ["error", "separate-lines"], "multiline-ternary": "off", "newline-per-chained-call": "off", + "no-continue": "off", "no-empty-function": "off", "no-extra-parens": "off", "no-implicit-coercion": "off", diff --git a/helpers/helpers.js b/helpers/helpers.js index 4e46c500..6629d258 100644 --- a/helpers/helpers.js +++ b/helpers/helpers.js @@ -25,6 +25,9 @@ module.exports.bareUrlRe = /(?:http|ftp)s?:\/\/[^\s\]"']*/ig; module.exports.listItemMarkerRe = /^([\s>]*)(?:[*+-]|\d+[.)])\s+/; module.exports.orderedListItemMarkerRe = /^[\s>]*0*(\d+)[.)]/; +// Regular expression for emphasis markers +const emphasisMarkersRe = /[_*]+/g; + // readFile options for reading with the UTF-8 encoding module.exports.utf8Encoding = { "encoding": "utf8" }; @@ -330,84 +333,90 @@ module.exports.forEachHeading = function forEachHeading(params, handler) { }); }; -// Calls the provided function for each inline code span's content -module.exports.forEachInlineCodeSpan = - function forEachInlineCodeSpan(input, handler) { - let currentLine = 0; - let currentColumn = 0; - let index = 0; - while (index < input.length) { - let startIndex = -1; - let startLine = -1; - let startColumn = -1; - let tickCount = 0; - let currentTicks = 0; - let state = "normal"; - // Deliberate <= so trailing 0 completes the last span (ex: "text `code`") - for (; index <= input.length; index++) { - const char = input[index]; - // Ignore backticks in link destination - if ((char === "[") && (state === "normal")) { - state = "linkTextOpen"; - } else if ((char === "]") && (state === "linkTextOpen")) { - state = "linkTextClosed"; - } else if ((char === "(") && (state === "linkTextClosed")) { - state = "linkDestinationOpen"; - } else if ( - ((char === "(") && (state === "linkDestinationOpen")) || - ((char === ")") && (state === "linkDestinationOpen")) || - (state === "linkTextClosed")) { - state = "normal"; - } - // Parse backtick open/close - if ((char === "`") && (state !== "linkDestinationOpen")) { - // Count backticks at start or end of code span - currentTicks++; - if ((startIndex === -1) || (startColumn === -1)) { - startIndex = index + 1; - } - } else { - if ((startIndex >= 0) && - (startColumn >= 0) && - (tickCount === currentTicks)) { - // Found end backticks; invoke callback for code span - handler( - input.substring(startIndex, index - currentTicks), - startLine, startColumn, tickCount); - startIndex = -1; - startColumn = -1; - } else if ((startIndex >= 0) && (startColumn === -1)) { - // Found start backticks - tickCount = currentTicks; - startLine = currentLine; - startColumn = currentColumn; - } - // Not in backticks - currentTicks = 0; - } - if (char === "\n") { - // On next line - currentLine++; - currentColumn = 0; - } else if ((char === "\\") && - ((startIndex === -1) || (startColumn === -1)) && - (input[index + 1] !== "\n")) { - // Escape character outside code, skip next - index++; - currentColumn += 2; - } else { - // On next column - currentColumn++; - } +/** + * Calls the provided function for each inline code span's content. + * + * @param {string} input Markdown content. + * @param {Function} handler Callback function. + * @returns {void} + */ +function forEachInlineCodeSpan(input, handler) { + let currentLine = 0; + let currentColumn = 0; + let index = 0; + while (index < input.length) { + let startIndex = -1; + let startLine = -1; + let startColumn = -1; + let tickCount = 0; + let currentTicks = 0; + let state = "normal"; + // Deliberate <= so trailing 0 completes the last span (ex: "text `code`") + for (; index <= input.length; index++) { + const char = input[index]; + // Ignore backticks in link destination + if ((char === "[") && (state === "normal")) { + state = "linkTextOpen"; + } else if ((char === "]") && (state === "linkTextOpen")) { + state = "linkTextClosed"; + } else if ((char === "(") && (state === "linkTextClosed")) { + state = "linkDestinationOpen"; + } else if ( + ((char === "(") && (state === "linkDestinationOpen")) || + ((char === ")") && (state === "linkDestinationOpen")) || + (state === "linkTextClosed")) { + state = "normal"; } - if (startIndex >= 0) { - // Restart loop after unmatched start backticks (ex: "`text``code``") - index = startIndex; - currentLine = startLine; - currentColumn = startColumn; + // Parse backtick open/close + if ((char === "`") && (state !== "linkDestinationOpen")) { + // Count backticks at start or end of code span + currentTicks++; + if ((startIndex === -1) || (startColumn === -1)) { + startIndex = index + 1; + } + } else { + if ((startIndex >= 0) && + (startColumn >= 0) && + (tickCount === currentTicks)) { + // Found end backticks; invoke callback for code span + handler( + input.substring(startIndex, index - currentTicks), + startLine, startColumn, tickCount); + startIndex = -1; + startColumn = -1; + } else if ((startIndex >= 0) && (startColumn === -1)) { + // Found start backticks + tickCount = currentTicks; + startLine = currentLine; + startColumn = currentColumn; + } + // Not in backticks + currentTicks = 0; + } + if (char === "\n") { + // On next line + currentLine++; + currentColumn = 0; + } else if ((char === "\\") && + ((startIndex === -1) || (startColumn === -1)) && + (input[index + 1] !== "\n")) { + // Escape character outside code, skip next + index++; + currentColumn += 2; + } else { + // On next column + currentColumn++; } } - }; + if (startIndex >= 0) { + // Restart loop after unmatched start backticks (ex: "`text``code``") + index = startIndex; + currentLine = startLine; + currentColumn = startColumn; + } + } +} +module.exports.forEachInlineCodeSpan = forEachInlineCodeSpan; /** * Adds a generic error object via the onError callback. @@ -484,6 +493,41 @@ module.exports.frontMatterHasTitle = frontMatterLines.some((line) => frontMatterTitleRe.test(line)); }; +/** + * Returns a list of emphasis markers in code spans. + * + * @param {Object} params RuleParams instance. + * @returns {number[][]} List of markers. + */ +function emphasisMarkersInCodeSpans(params) { + const { lines } = params; + const byLine = new Array(lines.length); + filterTokens(params, "inline", (token) => { + const { children, lineNumber, map } = token; + if (children.some((child) => child.type === "code_inline")) { + const tokenLines = lines.slice(map[0], map[1]); + forEachInlineCodeSpan( + tokenLines.join("\n"), + (code, lineIndex, column, tickCount) => { + const codeLines = code.split(newLineRe); + codeLines.forEach((codeLine, codeLineIndex) => { + let match = null; + while ((match = emphasisMarkersRe.exec(codeLine))) { + const byLineIndex = lineNumber - 1 + lineIndex + codeLineIndex; + const inLine = byLine[byLineIndex] || []; + const codeLineOffset = codeLineIndex ? 0 : column - 1 + tickCount; + inLine.push(codeLineOffset + match.index); + byLine[byLineIndex] = inLine; + } + }); + } + ); + } + }); + return byLine; +} +module.exports.emphasisMarkersInCodeSpans = emphasisMarkersInCodeSpans; + /** * Gets the most common line ending, falling back to the platform default. * diff --git a/lib/md037.js b/lib/md037.js index 14390837..75c13717 100644 --- a/lib/md037.js +++ b/lib/md037.js @@ -2,7 +2,8 @@ "use strict"; -const { addErrorContext, forEachLine, isBlankLine } = require("../helpers"); +const { addErrorContext, emphasisMarkersInCodeSpans, forEachLine, + includesSorted, isBlankLine } = require("../helpers"); const { lineMetadata } = require("./cache"); const emphasisRe = /(^|[^\\])(?:(\*\*?\*?)|(__?_?))/g; @@ -63,6 +64,7 @@ module.exports = { return null; } // Initialize + const ignoreMarkersByLine = emphasisMarkersInCodeSpans(params); resetRunTracking(); forEachLine( lineMetadata(), @@ -83,7 +85,12 @@ module.exports = { let match = null; // Match all emphasis-looking runs in the line... while ((match = emphasisRe.exec(line))) { + const ignoreMarkersForLine = ignoreMarkersByLine[lineIndex] || []; const matchIndex = match.index + match[1].length; + if (includesSorted(ignoreMarkersForLine, matchIndex)) { + // Ignore emphasis markers inside code spans + continue; + } const matchLength = match[0].length - match[1].length; if (emphasisIndex === -1) { // New run diff --git a/test/spaces-inside-emphasis-markers-multiple-lines.md b/test/spaces-inside-emphasis-markers-multiple-lines.md index 76ad38ec..4616ff55 100644 --- a/test/spaces-inside-emphasis-markers-multiple-lines.md +++ b/test/spaces-inside-emphasis-markers-multiple-lines.md @@ -73,3 +73,50 @@ emphasis _ text {MD037} Text ** bold {MD037} bold ** text {MD037} + +Emphasis `inside +of * code * +blocks` is okay. + +Emphasis `* inside` +code +`blocks *` is okay. + +Emphasis `inside *` +code +`* blocks` is okay. + +Emphasis `inside +_ code _ +blocks` is okay. + +Emphasis `_ inside` +code +`blocks _` is okay. + +Emphasis `inside _` +code +`_ blocks` is okay. + +Mixed `code_span` +scenarios +are _also_ okay. + +Mixed `code*span` +scenarios +are *also* okay. + +This paragraph +contains *a* mix +of `*` emphasis +scenarios and *should* +not trigger `*` any +violations at *all*. + +This paragraph +contains `a * slightly +more complicated +multi-line emphasis +scenario * that +should * not trigger +violations * either`. diff --git a/test/spaces_inside_emphasis_markers.md b/test/spaces_inside_emphasis_markers.md index ae3359bf..f9a0b4f1 100644 --- a/test/spaces_inside_emphasis_markers.md +++ b/test/spaces_inside_emphasis_markers.md @@ -154,3 +154,23 @@ Text *emph***strong ** text {MD037} ```markdown Violations * are * allowed in code blocks where emphasis does not apply. ``` + +Emphasis `inside * code * blocks` is okay. + +Emphasis `* inside` code `blocks *` is okay. + +Emphasis `inside *` code `* blocks` is okay. + +Emphasis `inside _ code _ blocks` is okay. + +Emphasis `_ inside` code `blocks _` is okay. + +Emphasis `inside _` code `_ blocks` is okay. + +Mixed `code_span` scenarios are _also_ okay. + +Mixed `code*span` scenarios are *also* okay. + +Mixed `code*span` scenarios are _also_ okay. + +Mixed `code_span` scenarios are *also* okay.