diff --git a/doc/Rules.md b/doc/Rules.md index 5cbbe072..20e17494 100644 --- a/doc/Rules.md +++ b/doc/Rules.md @@ -2697,7 +2697,7 @@ Parameters: - `style`: Table column style (`string`, default `any`, values `aligned` / `any` / `compact` / `tight`) - `wide_character`: RegExp for matching wide character(s) (`string`, default - `undefined`) + `TBD`) This rule is triggered when the column separators of a [GitHub Flavored Markdown table][gfm-table-060] are used inconsistently. diff --git a/doc/md060.md b/doc/md060.md index e2adbf3b..671a9143 100644 --- a/doc/md060.md +++ b/doc/md060.md @@ -9,7 +9,7 @@ Parameters: - `style`: Table column style (`string`, default `any`, values `aligned` / `any` / `compact` / `tight`) - `wide_character`: RegExp for matching wide character(s) (`string`, default - `undefined`) + `TBD`) This rule is triggered when the column separators of a [GitHub Flavored Markdown table][gfm-table-060] are used inconsistently. diff --git a/lib/md060.mjs b/lib/md060.mjs index cac49744..0026ac42 100644 --- a/lib/md060.mjs +++ b/lib/md060.mjs @@ -7,8 +7,19 @@ import { filterByTypesCached } from "./cache.mjs"; /** @typedef {import("markdownlint").MicromarkToken} MicromarkToken */ /** @typedef {import("markdownlint").RuleOnErrorInfo} RuleOnErrorInfo */ -// See https://unicode.org/reports/tr51/ -const defaultWideCharacterReString = "\\p{RGI_Emoji}"; +const regExpFlags = "gv"; +const anyCharacterRe = new RegExp("[\\s\\S]", regExpFlags); +// See: +// https://www.unicode.org/reports/tr11/ +// https://unicode.org/reports/tr24/ +// https://unicode.org/reports/tr51/ +// https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3AEast_Asian_Width%3DFullwidth%3A%5D&abb=on&esc=on&g=&i= +// Notes: +// The East_Asian_Width property is not supported (seemingly at all) by JavaScript, so East_Asian_Width=Fullwidth ranges are matched directly: +// https://github.com/tc39/proposal-regexp-unicode-property-escapes/issues/28 +// As an alternative to matching by Script names, consider matching East_Asian_Width=Wide (Wide is a superset of Fullwidth) directly as well: +// https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3AEast_Asian_Width%3DWide%3A%5D&abb=on&esc=on&g=&i= +const defaultWideCharacterReString = "[\\p{RGI_Emoji}\\p{Script=Han}\\p{Script=Hiragana}\\p{Script=Katakana}\\p{Script=Hangul}\\u3000\\uFF01-\\uFF60\\uFFE0-\\uFFE6]"; /** * @typedef Column @@ -21,12 +32,14 @@ const defaultWideCharacterReString = "\\p{RGI_Emoji}"; * * @param {string} line Line of text. * @param {number} column Actual column (1-based). - * @param {RegExp} wideRe Wide character RegExp. + * @param {RegExp} wideCharacterRe Wide character RegExp. * @returns {number} Effective column (1-based). */ -function effectiveColumn(line, column, wideRe) { - const wideCharacterCount = (line.slice(0, column - 1).match(wideRe) || []).length; - return column + wideCharacterCount; +function effectiveColumn(line, column, wideCharacterRe) { + const span = line.slice(0, column - 1); + const totalCharacterCount = (span.match(anyCharacterRe) || []).length; + const wideCharacterCount = (span.match(wideCharacterRe) || []).length; + return totalCharacterCount + wideCharacterCount; } /** @@ -34,13 +47,13 @@ function effectiveColumn(line, column, wideRe) { * * @param {readonly string[]} lines File/string lines. * @param {MicromarkToken} row Micromark row token. - * @param {RegExp} wideRe Wide character RegExp. + * @param {RegExp} wideCharacterRe Wide character RegExp. * @returns {Column[]} Divider columns. */ -function getTableDividerColumns(lines, row, wideRe) { +function getTableDividerColumns(lines, row, wideCharacterRe) { return filterByTypes( row.children, - [ "tableCellDivider" ]).map((divider) => ({ "actual": divider.startColumn, "effective": effectiveColumn(lines[row.startLine - 1], divider.startColumn, wideRe) }) + [ "tableCellDivider" ]).map((divider) => ({ "actual": divider.startColumn, "effective": effectiveColumn(lines[row.startLine - 1], divider.startColumn, wideCharacterRe) }) ); } @@ -71,9 +84,7 @@ export default { const styleAlignedAllowed = (style === "any") || (style === "aligned"); const styleCompactAllowed = (style === "any") || (style === "compact"); const styleTightAllowed = (style === "any") || (style === "tight"); - const wideCharacter = params.config.wide_character; - const wideCharacterReString = (wideCharacter === undefined) ? defaultWideCharacterReString : wideCharacter; - const wideCharacterRe = new RegExp(wideCharacterReString, "gv"); + const wideCharacterRe = new RegExp(params.config.wide_character || defaultWideCharacterReString, regExpFlags); // Scan all tables/rows const tables = filterByTypesCached([ "table" ]); diff --git a/schema/.markdownlint.jsonc b/schema/.markdownlint.jsonc index 2998d2cd..141cd691 100644 --- a/schema/.markdownlint.jsonc +++ b/schema/.markdownlint.jsonc @@ -342,5 +342,6 @@ // Table column style "style": "any", // RegExp for matching wide character(s) + "wide_character": "TBD" } } \ No newline at end of file diff --git a/schema/.markdownlint.yaml b/schema/.markdownlint.yaml index b6fa4d70..b7407272 100644 --- a/schema/.markdownlint.yaml +++ b/schema/.markdownlint.yaml @@ -305,3 +305,4 @@ MD060: # Table column style style: "any" # RegExp for matching wide character(s) + wide_character: "TBD" diff --git a/schema/build-config-schema.mjs b/schema/build-config-schema.mjs index 94713d2a..aa6414b9 100644 --- a/schema/build-config-schema.mjs +++ b/schema/build-config-schema.mjs @@ -649,7 +649,7 @@ for (const rule of rules) { subscheme.properties.wide_character = { "description": "RegExp for matching wide character(s)", "type": "string", - "default": undefined + "default": "TBD" }; break; default: diff --git a/schema/markdownlint-config-schema-strict.json b/schema/markdownlint-config-schema-strict.json index 3be0aaff..24ff7635 100644 --- a/schema/markdownlint-config-schema-strict.json +++ b/schema/markdownlint-config-schema-strict.json @@ -4704,7 +4704,8 @@ }, "wide_character": { "description": "RegExp for matching wide character(s)", - "type": "string" + "type": "string", + "default": "TBD" } } } @@ -4754,7 +4755,8 @@ }, "wide_character": { "description": "RegExp for matching wide character(s)", - "type": "string" + "type": "string", + "default": "TBD" } } } diff --git a/schema/markdownlint-config-schema.json b/schema/markdownlint-config-schema.json index 2213b075..a8df6fa3 100644 --- a/schema/markdownlint-config-schema.json +++ b/schema/markdownlint-config-schema.json @@ -4704,7 +4704,8 @@ }, "wide_character": { "description": "RegExp for matching wide character(s)", - "type": "string" + "type": "string", + "default": "TBD" } } } @@ -4754,7 +4755,8 @@ }, "wide_character": { "description": "RegExp for matching wide character(s)", - "type": "string" + "type": "string", + "default": "TBD" } } } diff --git a/test/markdownlint-test.mjs b/test/markdownlint-test.mjs index af0275e3..2798da7d 100644 --- a/test/markdownlint-test.mjs +++ b/test/markdownlint-test.mjs @@ -1101,7 +1101,7 @@ test("readme", async(t) => { }); test("validateJsonUsingConfigSchemaStrict", async(t) => { - t.plan(221); + t.plan(222); // @ts-ignore const ajv = new Ajv(ajvOptions); const validateSchemaStrict = ajv.compile(configSchemaStrict); diff --git a/test/snapshots/markdownlint-test-scenarios.mjs.md b/test/snapshots/markdownlint-test-scenarios.mjs.md index dbc453c7..ea51bd22 100644 --- a/test/snapshots/markdownlint-test-scenarios.mjs.md +++ b/test/snapshots/markdownlint-test-scenarios.mjs.md @@ -72651,6 +72651,124 @@ Generated by [AVA](https://avajs.dev). `, } +## table-column-style-wide-characters-disable.md + +> Snapshot 1 + + { + errors: [ + { + errorContext: null, + errorDetail: 'Table pipe does not align with heading for style "aligned"', + errorRange: [ + 5, + 1, + ], + fixInfo: null, + lineNumber: 6, + ruleDescription: 'Table column style', + ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md060.md', + ruleNames: [ + 'MD060', + 'table-column-style', + ], + severity: 'error', + }, + { + errorContext: null, + errorDetail: 'Table pipe does not align with heading for style "aligned"', + errorRange: [ + 10, + 1, + ], + fixInfo: null, + lineNumber: 6, + ruleDescription: 'Table column style', + ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md060.md', + ruleNames: [ + 'MD060', + 'table-column-style', + ], + severity: 'error', + }, + { + errorContext: null, + errorDetail: 'Table pipe does not align with heading for style "aligned"', + errorRange: [ + 10, + 1, + ], + fixInfo: null, + lineNumber: 7, + ruleDescription: 'Table column style', + ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md060.md', + ruleNames: [ + 'MD060', + 'table-column-style', + ], + severity: 'error', + }, + { + errorContext: null, + errorDetail: 'Table pipe does not align with heading for style "aligned"', + errorRange: [ + 5, + 1, + ], + fixInfo: null, + lineNumber: 8, + ruleDescription: 'Table column style', + ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md060.md', + ruleNames: [ + 'MD060', + 'table-column-style', + ], + severity: 'error', + }, + { + errorContext: null, + errorDetail: 'Table pipe does not align with heading for style "aligned"', + errorRange: [ + 9, + 1, + ], + fixInfo: null, + lineNumber: 8, + ruleDescription: 'Table column style', + ruleInformation: 'https://github.com/DavidAnson/markdownlint/blob/v0.0.0/doc/md060.md', + ruleNames: [ + 'MD060', + 'table-column-style', + ], + severity: 'error', + }, + ], + fixed: `# Table Column Style - Wide Characters (Disable)␊ + ␊ + | NN | WW |␊ + | -- | -- |␊ + | NN | NN |␊ + | W | NN |␊ + | NN | W |␊ + | W | W |␊ + | ✅N | NN |␊ + | NN | ✅N |␊ + | ✅N | ✅N |␊ + | WW | NN |␊ + | NN | WW |␊ + | WW | WW |␊ + ␊ + {MD060:-10} {MD060:-9} {MD060:-8}␊ + ␊ + ␊ + `, + } + ## table-column-style-wide-characters.md > Snapshot 1 @@ -72689,9 +72807,16 @@ Generated by [AVA](https://avajs.dev). | MN | ✅ |␊ | ✅ | ✅ |␊ ␊ - ## CJK␊ + ## Hello world␊ ␊ - TODO...␊ + | Language | Translation |␊ + |---------------------|----------------|␊ + | Emoji | 👋🌎 |␊ + | Portuguese (Brazil) | Olá mundo |␊ + | Turkish | Merhaba dünya |␊ + | Chinese (Mandarin) | 你好,世界 |␊ + | Japanese | こんにちは世界 |␊ + | Korean | 안녕 세상 |␊ ␊ diff --git a/test/table-column-style-wide-characters.md b/test/table-column-style-wide-characters.md index ee61596a..dcc2a37d 100644 --- a/test/table-column-style-wide-characters.md +++ b/test/table-column-style-wide-characters.md @@ -30,9 +30,16 @@ | MN | ✅ | | ✅ | ✅ | -## CJK +## Hello world -TODO... +| Language | Translation | +|---------------------|----------------| +| Emoji | 👋🌎 | +| Portuguese (Brazil) | Olá mundo | +| Turkish | Merhaba dünya | +| Chinese (Mandarin) | 你好,世界 | +| Japanese | こんにちは世界 | +| Korean | 안녕 세상 |