markdownlint/lib/md051.js

// @ts-check

"use strict";

const { addError, addErrorDetailIf } = require("../helpers");
const { filterByTypes, getHtmlTagInfo } = require("../helpers/micromark.cjs");

// Regular expression for identifying HTML anchor names
const idRe = /\sid\s*=\s*['"]?([^'"\s>]+)/iu;
const nameRe = /\sname\s*=\s*['"]?([^'"\s>]+)/iu;
const anchorRe = /\{(#[a-z\d]+(?:[-_][a-z\d]+)*)\}/gu;

/**
 * Converts a Markdown heading into an HTML fragment according to the rules
 * used by GitHub.
 *
 * @param {Object} headingText Heading text token.
 * @returns {string} Fragment string for heading.
 */
function convertHeadingToHTMLFragment(headingText) {
  const inlineText =
    filterByTypes(headingText.children, [ "codeTextData", "data" ])
      .map((token) => token.text)
      .join("");
  return "#" + encodeURIComponent(
    inlineText
      .toLowerCase()
      // RegExp source with Ruby's \p{Word} expanded into its General Categories
      // eslint-disable-next-line max-len
      // https://github.com/gjtorikian/html-pipeline/blob/main/lib/html/pipeline/toc_filter.rb
      // https://ruby-doc.org/core-3.0.2/Regexp.html
      .replace(
        /[^\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}\- ]/gu,
        ""
      )
      .replace(/ /gu, "-")
  );
}

module.exports = {
  "names": [ "MD051", "link-fragments" ],
  "description": "Link fragments should be valid",
  "tags": [ "links" ],
  "function": function MD051(params, onError) {
    const { tokens } = params.parsers.micromark;
    const fragments = new Map();

    // Process headings
    const headingTexts = filterByTypes(
      tokens,
      [ "atxHeadingText", "setextHeadingText" ]
    );
    for (const headingText of headingTexts) {
      const fragment = convertHeadingToHTMLFragment(headingText);
      const count = fragments.get(fragment) || 0;
      if (count) {
        fragments.set(`${fragment}-${count}`, 0);
      }
      fragments.set(fragment, count + 1);
      let match = null;
      while ((match = anchorRe.exec(headingText.text)) !== null) {
        const [ , anchor ] = match;
        if (!fragments.has(anchor)) {
          fragments.set(anchor, 1);
        }
      }
    }

    // Process HTML anchors
    for (const token of filterByTypes(tokens, [ "htmlText" ])) {
      const htmlTagInfo = getHtmlTagInfo(token);
      if (htmlTagInfo && !htmlTagInfo.close) {
        const anchorMatch = idRe.exec(token.text) ||
          (htmlTagInfo.name.toLowerCase() === "a" && nameRe.exec(token.text));
        if (anchorMatch) {
          fragments.set(`#${anchorMatch[1]}`, 0);
        }
      }
    }

    // Process link and definition fragments
    const parentChilds = [
      [ "link", "resourceDestinationString" ],
      [ "definition", "definitionDestinationString" ]
    ];
    for (const [ parentType, definitionType ] of parentChilds) {
      const links = filterByTypes(tokens, [ parentType ]);
      for (const link of links) {
        const definitions = filterByTypes(link.children, [ definitionType ]);
        for (const definition of definitions) {
          if (
            (definition.text.length > 1) &&
            definition.text.startsWith("#") &&
            !fragments.has(definition.text)
          ) {
            // eslint-disable-next-line no-undef-init
            let context = undefined;
            // eslint-disable-next-line no-undef-init
            let range = undefined;
            // eslint-disable-next-line no-undef-init
            let fixInfo = undefined;
            if (link.startLine === link.endLine) {
              context = link.text;
              range = [ link.startColumn, link.endColumn - link.startColumn ];
              fixInfo = {
                "editColumn": definition.startColumn,
                "deleteCount": definition.endColumn - definition.startColumn
              };
            }
            const definitionTextLower = definition.text.toLowerCase();
            const mixedCaseKey = [ ...fragments.keys() ]
              .find((key) => definitionTextLower === key.toLowerCase());
            if (mixedCaseKey) {
              // @ts-ignore
              (fixInfo || {}).insertText = mixedCaseKey;
              addErrorDetailIf(
                onError,
                link.startLine,
                mixedCaseKey,
                definition.text,
                undefined,
                context,
                range,
                fixInfo
              );
            } else {
              addError(
                onError,
                link.startLine,
                undefined,
                context,
                range
              );
            }
          }
        }
      }
    }
  }
};
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00			`// @ts-check`

			`"use strict";`

Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`const { addError, addErrorDetailIf } = require("../helpers");`
Refactor micromark token handling to remove optional Token.htmlFlowChildren property and make related code more efficient for a ~6% elapsed time reduction. 2023-09-02 12:07:14 -07:00			`const { filterByTypes, getHtmlTagInfo } = require("../helpers/micromark.cjs");`
Update previous commit for MD051/link-fragments to rename, refactor, add support for HTML anchors, and validate against markdown-link-check (fixes #253). 2022-04-10 05:37:57 +00:00
			`// Regular expression for identifying HTML anchor names`
Update MD051/link-fragments to support `id` attributes on non-`a` elements (fixes #538). The `name` is only an anchor on `a` elements, but `id` is a universal attribute on all elements. Also fix match on id/name to be complete, not just a suffix. 2022-07-28 00:42:05 -04:00			`const idRe = /\sid\s=\s['"]?([^'"\s>]+)/iu;`
			`const nameRe = /\sname\s=\s['"]?([^'"\s>]+)/iu;`
Add support for named heading fragments as supported by some platforms (fixes #830). 2023-07-08 22:14:00 -07:00			`const anchorRe = /\{(#[a-z\d]+(?:[-_][a-z\d]+)*)\}/gu;`
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00
			`/**`
Update previous commit for MD051/link-fragments to rename, refactor, add support for HTML anchors, and validate against markdown-link-check (fixes #253). 2022-04-10 05:37:57 +00:00			`* Converts a Markdown heading into an HTML fragment according to the rules`
			`* used by GitHub.`
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00			`*`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`* @param {Object} headingText Heading text token.`
Update previous commit for MD051/link-fragments to rename, refactor, add support for HTML anchors, and validate against markdown-link-check (fixes #253). 2022-04-10 05:37:57 +00:00			`* @returns {string} Fragment string for heading.`
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00			`*/`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`function convertHeadingToHTMLFragment(headingText) {`
			`const inlineText =`
			`filterByTypes(headingText.children, [ "codeTextData", "data" ])`
			`.map((token) => token.text)`
			`.join("");`
Update MD051/link-fragments to use the same character classes as GitHub, encode unprintable characters from emojii. 2022-04-18 20:59:01 -07:00			`return "#" + encodeURIComponent(`
			`inlineText`
			`.toLowerCase()`
			`// RegExp source with Ruby's \p{Word} expanded into its General Categories`
			`// eslint-disable-next-line max-len`
			`// https://github.com/gjtorikian/html-pipeline/blob/main/lib/html/pipeline/toc_filter.rb`
			`// https://ruby-doc.org/core-3.0.2/Regexp.html`
			`.replace(`
			`/[^\p{Letter}\p{Mark}\p{Number}\p{Connector_Punctuation}\- ]/gu,`
			`""`
			`)`
			`.replace(/ /gu, "-")`
			`);`
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00			`}`

			`module.exports = {`
Update previous commit for MD051/link-fragments to rename, refactor, add support for HTML anchors, and validate against markdown-link-check (fixes #253). 2022-04-10 05:37:57 +00:00			`"names": [ "MD051", "link-fragments" ],`
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00			`"description": "Link fragments should be valid",`
			`"tags": [ "links" ],`
			`"function": function MD051(params, onError) {`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`const { tokens } = params.parsers.micromark;`
Update MD051/link-fragments to support indexing of repated headings. 2022-04-21 21:02:46 -07:00			`const fragments = new Map();`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00
Refactor MD051/link-fragments slightly to reduce dependencies. 2022-04-20 21:27:04 -07:00			`// Process headings`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`const headingTexts = filterByTypes(`
			`tokens,`
			`[ "atxHeadingText", "setextHeadingText" ]`
			`);`
			`for (const headingText of headingTexts) {`
			`const fragment = convertHeadingToHTMLFragment(headingText);`
Update MD051/link-fragments to support indexing of repated headings. 2022-04-21 21:02:46 -07:00			`const count = fragments.get(fragment) \|\| 0;`
			`if (count) {`
			fragments.set(`${fragment}-${count}`, 0);
			`}`
			`fragments.set(fragment, count + 1);`
Add support for named heading fragments as supported by some platforms (fixes #830). 2023-07-08 22:14:00 -07:00			`let match = null;`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`while ((match = anchorRe.exec(headingText.text)) !== null) {`
Add support for named heading fragments as supported by some platforms (fixes #830). 2023-07-08 22:14:00 -07:00			`const [ , anchor ] = match;`
			`if (!fragments.has(anchor)) {`
			`fragments.set(anchor, 1);`
			`}`
			`}`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`}`

Refactor MD051/link-fragments slightly to reduce dependencies. 2022-04-20 21:27:04 -07:00			`// Process HTML anchors`
Refactor micromark token handling to remove optional Token.htmlFlowChildren property and make related code more efficient for a ~6% elapsed time reduction. 2023-09-02 12:07:14 -07:00			`for (const token of filterByTypes(tokens, [ "htmlText" ])) {`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`const htmlTagInfo = getHtmlTagInfo(token);`
			`if (htmlTagInfo && !htmlTagInfo.close) {`
			`const anchorMatch = idRe.exec(token.text) \|\|`
			`(htmlTagInfo.name.toLowerCase() === "a" && nameRe.exec(token.text));`
Update MD051/link-fragments to support `id` attributes on non-`a` elements (fixes #538). The `name` is only an anchor on `a` elements, but `id` is a universal attribute on all elements. Also fix match on id/name to be complete, not just a suffix. 2022-07-28 00:42:05 -04:00			`if (anchorMatch) {`
			fragments.set(`#${anchorMatch[1]}`, 0);
Update previous commit for MD051/link-fragments to rename, refactor, add support for HTML anchors, and validate against markdown-link-check (fixes #253). 2022-04-10 05:37:57 +00:00			`}`
			`}`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`}`

			`// Process link and definition fragments`
			`const parentChilds = [`
			`[ "link", "resourceDestinationString" ],`
			`[ "definition", "definitionDestinationString" ]`
			`];`
			`for (const [ parentType, definitionType ] of parentChilds) {`
			`const links = filterByTypes(tokens, [ parentType ]);`
			`for (const link of links) {`
			`const definitions = filterByTypes(link.children, [ definitionType ]);`
			`for (const definition of definitions) {`
			`if (`
			`(definition.text.length > 1) &&`
			`definition.text.startsWith("#") &&`
			`!fragments.has(definition.text)`
			`) {`
Update MD051/link-fragments to not provide error context for multi-line scenarios. 2023-08-04 21:23:43 -07:00			`// eslint-disable-next-line no-undef-init`
			`let context = undefined;`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`// eslint-disable-next-line no-undef-init`
			`let range = undefined;`
			`// eslint-disable-next-line no-undef-init`
			`let fixInfo = undefined;`
			`if (link.startLine === link.endLine) {`
Update MD051/link-fragments to not provide error context for multi-line scenarios. 2023-08-04 21:23:43 -07:00			`context = link.text;`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`range = [ link.startColumn, link.endColumn - link.startColumn ];`
			`fixInfo = {`
			`"editColumn": definition.startColumn,`
			`"deleteCount": definition.endColumn - definition.startColumn`
			`};`
			`}`
			`const definitionTextLower = definition.text.toLowerCase();`
			`const mixedCaseKey = [ ...fragments.keys() ]`
			`.find((key) => definitionTextLower === key.toLowerCase());`
			`if (mixedCaseKey) {`
			`// @ts-ignore`
			`(fixInfo \|\| {}).insertText = mixedCaseKey;`
			`addErrorDetailIf(`
			`onError,`
			`link.startLine,`
			`mixedCaseKey,`
			`definition.text,`
			`undefined,`
Update MD051/link-fragments to not provide error context for multi-line scenarios. 2023-08-04 21:23:43 -07:00			`context,`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`range,`
			`fixInfo`
			`);`
			`} else {`
			`addError(`
			`onError,`
			`link.startLine,`
			`undefined,`
Update MD051/link-fragments to not provide error context for multi-line scenarios. 2023-08-04 21:23:43 -07:00			`context,`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`range`
			`);`
			`}`
			`}`
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00			`}`
Update previous commit for MD051/link-fragments to rename, refactor, add support for HTML anchors, and validate against markdown-link-check (fixes #253). 2022-04-10 05:37:57 +00:00			`}`
Reimplement MD051/link-fragments using micromark tokens, report reference link issues for definition and fix when possible, handle reporting multiple violations on the same line better. 2023-08-04 20:53:38 -07:00			`}`
Initial implementation of MD051/valid-link-fragments (refs #253, closes #495). 2022-01-26 00:21:08 +01:00			`}`
			`};`