🛠️ fix: Preserve Dollar Signs in Code Blocks for LaTeX Parsing (#1612)

This commit is contained in:
Danny Avila 2024-01-22 10:02:36 -05:00 committed by GitHub
parent 367c78f8d2
commit 36560d5d9b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 42 additions and 5 deletions

View file

@ -83,4 +83,23 @@ describe('processLaTeX', () => {
LaTeX is a typesetting system commonly used for mathematical and scientific documents. It provides a wide range of formatting options and symbols for expressing mathematical expressions.`;
expect(processLaTeX(complexBlockLatex)).toBe(expectedOutput);
});
describe('processLaTeX with code block exception', () => {
test('ignores dollar signs inside inline code', () => {
const content = 'This is inline code: `$100`';
expect(processLaTeX(content)).toBe(content);
});
test('ignores dollar signs inside multi-line code blocks', () => {
const content = '```\n$100\n# $1000\n```';
expect(processLaTeX(content)).toBe(content);
});
test('processes LaTeX outside of code blocks', () => {
const content =
'Outside \\(x^2 + y^2 = z^2\\) and inside code block: ```\n$100\n# $1000\n```';
const expected = 'Outside $x^2 + y^2 = z^2$ and inside code block: ```\n$100\n# $1000\n```';
expect(processLaTeX(content)).toBe(expected);
});
});
});

View file

@ -1,18 +1,35 @@
// Regex to check if the processed content contains any potential LaTeX patterns
const containsLatexRegex =
/\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
// Regex for inline and block LaTeX expressions
const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
// const blockLatex = new RegExp(/\\\[(.*?)\\\]/, 'gs');
const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
export const processLaTeX = (content: string) => {
// Function to restore code blocks
const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
return content.replace(/<<CODE_BLOCK_(\d+)>>/g, (match, index) => codeBlocks[index]);
};
// Regex to identify code blocks and inline code
const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
export const processLaTeX = (_content: string) => {
let content = _content;
// Temporarily replace code blocks and inline code with placeholders
const codeBlocks: string[] = [];
let index = 0;
content = content.replace(codeBlockRegex, (match) => {
codeBlocks[index] = match;
return `<<CODE_BLOCK_${index++}>>`;
});
// Escape dollar signs followed by a digit or space and digit
let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
// If no LaTeX patterns are found, return the processed content
// If no LaTeX patterns are found, restore code blocks and return the processed content
if (!containsLatexRegex.test(processedContent)) {
return processedContent;
return restoreCodeBlocks(processedContent, codeBlocks);
}
// Convert LaTeX expressions to a markdown compatible format
@ -20,5 +37,6 @@ export const processLaTeX = (content: string) => {
.replace(inlineLatex, (match: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
.replace(blockLatex, (match: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
return processedContent;
// Restore code blocks
return restoreCodeBlocks(processedContent, codeBlocks);
};