wip
Some checks are pending
Checkers / linkcheck (push) Waiting to run
Checkers / spellcheck (push) Waiting to run
CI / build (20, macos-latest) (push) Waiting to run
CI / build (20, ubuntu-latest) (push) Waiting to run
CI / build (20, windows-latest) (push) Waiting to run
CI / build (22, macos-latest) (push) Waiting to run
CI / build (22, ubuntu-latest) (push) Waiting to run
CI / build (22, windows-latest) (push) Waiting to run
CI / build (24, macos-latest) (push) Waiting to run
CI / build (24, ubuntu-latest) (push) Waiting to run
CI / build (24, windows-latest) (push) Waiting to run
CI / pnpm (push) Waiting to run
CodeQL / Analyze (push) Waiting to run
TestRepos / build (latest, ubuntu-latest) (push) Waiting to run
UpdateTestRepos / update (push) Waiting to run

This commit is contained in:
David Anson 2025-11-23 17:36:21 -08:00
parent 75bb84620e
commit 4447540366
13 changed files with 196 additions and 24 deletions

View file

@ -7,8 +7,19 @@ import { filterByTypesCached } from "./cache.mjs";
/** @typedef {import("markdownlint").MicromarkToken} MicromarkToken */
/** @typedef {import("markdownlint").RuleOnErrorInfo} RuleOnErrorInfo */
// See https://unicode.org/reports/tr51/
const defaultWideCharacterReString = "\\p{RGI_Emoji}";
const regExpFlags = "gv";
const anyCharacterRe = new RegExp("[\\s\\S]", regExpFlags);
// See:
// https://www.unicode.org/reports/tr11/
// https://unicode.org/reports/tr24/
// https://unicode.org/reports/tr51/
// https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3AEast_Asian_Width%3DFullwidth%3A%5D&abb=on&esc=on&g=&i=
// Notes:
// The East_Asian_Width property is not supported (seemingly at all) by JavaScript, so East_Asian_Width=Fullwidth ranges are matched directly:
// https://github.com/tc39/proposal-regexp-unicode-property-escapes/issues/28
// As an alternative to matching by Script names, consider matching East_Asian_Width=Wide (Wide is a superset of Fullwidth) directly as well:
// https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3AEast_Asian_Width%3DWide%3A%5D&abb=on&esc=on&g=&i=
const defaultWideCharacterReString = "[\\p{RGI_Emoji}\\p{Script=Han}\\p{Script=Hiragana}\\p{Script=Katakana}\\p{Script=Hangul}\\u3000\\uFF01-\\uFF60\\uFFE0-\\uFFE6]";
/**
* @typedef Column
@ -21,12 +32,14 @@ const defaultWideCharacterReString = "\\p{RGI_Emoji}";
*
* @param {string} line Line of text.
* @param {number} column Actual column (1-based).
* @param {RegExp} wideRe Wide character RegExp.
* @param {RegExp} wideCharacterRe Wide character RegExp.
* @returns {number} Effective column (1-based).
*/
function effectiveColumn(line, column, wideRe) {
const wideCharacterCount = (line.slice(0, column - 1).match(wideRe) || []).length;
return column + wideCharacterCount;
function effectiveColumn(line, column, wideCharacterRe) {
const span = line.slice(0, column - 1);
const totalCharacterCount = (span.match(anyCharacterRe) || []).length;
const wideCharacterCount = (span.match(wideCharacterRe) || []).length;
return totalCharacterCount + wideCharacterCount;
}
/**
@ -34,13 +47,13 @@ function effectiveColumn(line, column, wideRe) {
*
* @param {readonly string[]} lines File/string lines.
* @param {MicromarkToken} row Micromark row token.
* @param {RegExp} wideRe Wide character RegExp.
* @param {RegExp} wideCharacterRe Wide character RegExp.
* @returns {Column[]} Divider columns.
*/
function getTableDividerColumns(lines, row, wideRe) {
function getTableDividerColumns(lines, row, wideCharacterRe) {
return filterByTypes(
row.children,
[ "tableCellDivider" ]).map((divider) => ({ "actual": divider.startColumn, "effective": effectiveColumn(lines[row.startLine - 1], divider.startColumn, wideRe) })
[ "tableCellDivider" ]).map((divider) => ({ "actual": divider.startColumn, "effective": effectiveColumn(lines[row.startLine - 1], divider.startColumn, wideCharacterRe) })
);
}
@ -71,9 +84,7 @@ export default {
const styleAlignedAllowed = (style === "any") || (style === "aligned");
const styleCompactAllowed = (style === "any") || (style === "compact");
const styleTightAllowed = (style === "any") || (style === "tight");
const wideCharacter = params.config.wide_character;
const wideCharacterReString = (wideCharacter === undefined) ? defaultWideCharacterReString : wideCharacter;
const wideCharacterRe = new RegExp(wideCharacterReString, "gv");
const wideCharacterRe = new RegExp(params.config.wide_character || defaultWideCharacterReString, regExpFlags);
// Scan all tables/rows
const tables = filterByTypesCached([ "table" ]);