2024-01-18 14:44:10 -05:00
|
|
|
// Regex to check if the processed content contains any potential LaTeX patterns
|
|
|
|
|
const containsLatexRegex =
|
|
|
|
|
/\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
|
2024-01-22 10:02:36 -05:00
|
|
|
|
2024-01-18 14:44:10 -05:00
|
|
|
// Regex for inline and block LaTeX expressions
|
|
|
|
|
const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
|
|
|
|
|
const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
|
|
|
|
|
|
2024-01-22 10:02:36 -05:00
|
|
|
// Function to restore code blocks
|
|
|
|
|
const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
|
|
|
|
|
return content.replace(/<<CODE_BLOCK_(\d+)>>/g, (match, index) => codeBlocks[index]);
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Regex to identify code blocks and inline code
|
|
|
|
|
const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
|
|
|
|
|
|
|
|
|
|
export const processLaTeX = (_content: string) => {
|
|
|
|
|
let content = _content;
|
|
|
|
|
// Temporarily replace code blocks and inline code with placeholders
|
|
|
|
|
const codeBlocks: string[] = [];
|
|
|
|
|
let index = 0;
|
|
|
|
|
content = content.replace(codeBlockRegex, (match) => {
|
|
|
|
|
codeBlocks[index] = match;
|
|
|
|
|
return `<<CODE_BLOCK_${index++}>>`;
|
|
|
|
|
});
|
|
|
|
|
|
2024-01-18 14:44:10 -05:00
|
|
|
// Escape dollar signs followed by a digit or space and digit
|
|
|
|
|
let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
|
|
|
|
|
|
2024-01-22 10:02:36 -05:00
|
|
|
// If no LaTeX patterns are found, restore code blocks and return the processed content
|
2024-01-18 14:44:10 -05:00
|
|
|
if (!containsLatexRegex.test(processedContent)) {
|
2024-01-22 10:02:36 -05:00
|
|
|
return restoreCodeBlocks(processedContent, codeBlocks);
|
2024-01-18 14:44:10 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Convert LaTeX expressions to a markdown compatible format
|
|
|
|
|
processedContent = processedContent
|
|
|
|
|
.replace(inlineLatex, (match: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
|
|
|
|
|
.replace(blockLatex, (match: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
|
|
|
|
|
|
2024-01-22 10:02:36 -05:00
|
|
|
// Restore code blocks
|
|
|
|
|
return restoreCodeBlocks(processedContent, codeBlocks);
|
2024-01-18 14:44:10 -05:00
|
|
|
};
|
2024-08-23 13:45:27 -04:00
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
|
|
|
|
|
*
|
|
|
|
|
* @param content The input string containing LaTeX expressions.
|
|
|
|
|
* @returns The processed string with replaced delimiters and escaped characters.
|
|
|
|
|
*/
|
|
|
|
|
export function preprocessLaTeX(content: string): string {
|
|
|
|
|
// Step 1: Protect code blocks
|
|
|
|
|
const codeBlocks: string[] = [];
|
|
|
|
|
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => {
|
|
|
|
|
codeBlocks.push(code);
|
|
|
|
|
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Step 2: Protect existing LaTeX expressions
|
|
|
|
|
const latexExpressions: string[] = [];
|
|
|
|
|
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
|
|
|
|
|
latexExpressions.push(match);
|
|
|
|
|
return `<<LATEX_${latexExpressions.length - 1}>>`;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Step 3: Escape dollar signs that are likely currency indicators
|
|
|
|
|
content = content.replace(/\$(?=\d)/g, '\\$');
|
|
|
|
|
|
|
|
|
|
// Step 4: Restore LaTeX expressions
|
|
|
|
|
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => latexExpressions[parseInt(index)]);
|
|
|
|
|
|
|
|
|
|
// Step 5: Restore code blocks
|
|
|
|
|
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
|
|
|
|
|
|
|
|
|
|
// Step 6: Apply additional escaping functions
|
|
|
|
|
content = escapeBrackets(content);
|
|
|
|
|
content = escapeMhchem(content);
|
|
|
|
|
|
|
|
|
|
return content;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function escapeBrackets(text: string): string {
|
|
|
|
|
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
|
|
|
|
|
return text.replace(
|
|
|
|
|
pattern,
|
|
|
|
|
(
|
|
|
|
|
match: string,
|
|
|
|
|
codeBlock: string | undefined,
|
|
|
|
|
squareBracket: string | undefined,
|
|
|
|
|
roundBracket: string | undefined,
|
|
|
|
|
): string => {
|
|
|
|
|
if (codeBlock != null) {
|
|
|
|
|
return codeBlock;
|
|
|
|
|
} else if (squareBracket != null) {
|
|
|
|
|
return `$$${squareBracket}$$`;
|
|
|
|
|
} else if (roundBracket != null) {
|
|
|
|
|
return `$${roundBracket}$`;
|
|
|
|
|
}
|
|
|
|
|
return match;
|
|
|
|
|
},
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export function escapeMhchem(text: string) {
|
|
|
|
|
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
|
|
|
|
|
}
|