🧮 feat: Improve LaTeX rendering consistency (#3763)

* refactor: simplify LaTeX pre-processing for more consistent rendering, disables `singleDollarTextMath`

* refactor: disable singleDollarTextMath in all markdown components

* wip: first pass

* refactor: preserve code blocks and convert rather than preserve LaTeX delimiters

* refactor: remove unused escapeDollarNumber function from latex.ts
This commit is contained in:
Danny Avila 2024-08-23 13:45:27 -04:00 committed by GitHub
parent 967e8a1e92
commit ea5140ff0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 156 additions and 3 deletions

View file

@ -40,3 +40,66 @@ export const processLaTeX = (_content: string) => {
// Restore code blocks
return restoreCodeBlocks(processedContent, codeBlocks);
};
/**
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
*
* @param content The input string containing LaTeX expressions.
* @returns The processed string with replaced delimiters and escaped characters.
*/
export function preprocessLaTeX(content: string): string {
// Step 1: Protect code blocks
const codeBlocks: string[] = [];
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => {
codeBlocks.push(code);
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
});
// Step 2: Protect existing LaTeX expressions
const latexExpressions: string[] = [];
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
latexExpressions.push(match);
return `<<LATEX_${latexExpressions.length - 1}>>`;
});
// Step 3: Escape dollar signs that are likely currency indicators
content = content.replace(/\$(?=\d)/g, '\\$');
// Step 4: Restore LaTeX expressions
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => latexExpressions[parseInt(index)]);
// Step 5: Restore code blocks
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
// Step 6: Apply additional escaping functions
content = escapeBrackets(content);
content = escapeMhchem(content);
return content;
}
export function escapeBrackets(text: string): string {
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
return text.replace(
pattern,
(
match: string,
codeBlock: string | undefined,
squareBracket: string | undefined,
roundBracket: string | undefined,
): string => {
if (codeBlock != null) {
return codeBlock;
} else if (squareBracket != null) {
return `$$${squareBracket}$$`;
} else if (roundBracket != null) {
return `$${roundBracket}$`;
}
return match;
},
);
}
export function escapeMhchem(text: string) {
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
}