mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 18:30:15 +01:00
🧮 feat: Improve LaTeX rendering consistency (#3763)
* refactor: simplify LaTeX pre-processing for more consistent rendering, disables `singleDollarTextMath` * refactor: disable singleDollarTextMath in all markdown components * wip: first pass * refactor: preserve code blocks and convert rather than preserve LaTeX delimiters * refactor: remove unused escapeDollarNumber function from latex.ts
This commit is contained in:
parent
967e8a1e92
commit
ea5140ff0f
3 changed files with 156 additions and 3 deletions
|
|
@ -8,7 +8,7 @@ import { useRecoilValue } from 'recoil';
|
|||
import ReactMarkdown from 'react-markdown';
|
||||
import type { PluggableList } from 'unified';
|
||||
import rehypeHighlight from 'rehype-highlight';
|
||||
import { cn, langSubset, validateIframe, processLaTeX, handleDoubleClick } from '~/utils';
|
||||
import { langSubset, validateIframe, preprocessLaTeX, handleDoubleClick } from '~/utils';
|
||||
import CodeBlock from '~/components/Messages/Content/CodeBlock';
|
||||
import { useFileDownload } from '~/data-provider';
|
||||
import useLocalize from '~/hooks/useLocalize';
|
||||
|
|
@ -123,7 +123,7 @@ const Markdown = memo(({ content = '', isEdited, showCursor, isLatestMessage }:
|
|||
let currentContent = content;
|
||||
if (!isInitializing) {
|
||||
currentContent = currentContent.replace('z-index: 1;', '') || '';
|
||||
currentContent = LaTeXParsing ? processLaTeX(currentContent) : currentContent;
|
||||
currentContent = LaTeXParsing ? preprocessLaTeX(currentContent) : currentContent;
|
||||
}
|
||||
|
||||
const rehypePlugins: PluggableList = [
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import { processLaTeX } from './latex';
|
||||
/* eslint-disable no-useless-escape */
|
||||
import { processLaTeX, preprocessLaTeX } from './latex';
|
||||
|
||||
describe('processLaTeX', () => {
|
||||
test('returns the same string if no LaTeX patterns are found', () => {
|
||||
|
|
@ -103,3 +104,92 @@ describe('processLaTeX', () => {
|
|||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('preprocessLaTeX', () => {
|
||||
test('returns the same string if no LaTeX patterns are found', () => {
|
||||
const content = 'This is a test string without LaTeX';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('escapes dollar signs followed by digits', () => {
|
||||
const content = 'Price is $50 and $100';
|
||||
const expected = 'Price is \\$50 and \\$100';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('does not escape dollar signs not followed by digits', () => {
|
||||
const content = 'This $variable is not escaped';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('preserves existing LaTeX expressions', () => {
|
||||
const content = 'Inline $x^2 + y^2 = z^2$ and block $$E = mc^2$$';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('handles mixed LaTeX and currency', () => {
|
||||
const content = 'LaTeX $x^2$ and price $50';
|
||||
const expected = 'LaTeX $x^2$ and price \\$50';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('converts LaTeX delimiters', () => {
|
||||
const content = 'Brackets \\[x^2\\] and parentheses \\(y^2\\)';
|
||||
const expected = 'Brackets $$x^2$$ and parentheses $y^2$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('escapes mhchem commands', () => {
|
||||
const content = '$\\ce{H2O}$ and $\\pu{123 J}$';
|
||||
const expected = '$\\\\ce{H2O}$ and $\\\\pu{123 J}$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles complex mixed content', () => {
|
||||
const content = `
|
||||
LaTeX inline $x^2$ and block $$y^2$$
|
||||
Currency $100 and $200
|
||||
Chemical $\\ce{H2O}$
|
||||
Brackets \\[z^2\\]
|
||||
`;
|
||||
const expected = `
|
||||
LaTeX inline $x^2$ and block $$y^2$$
|
||||
Currency \\$100 and \\$200
|
||||
Chemical $\\\\ce{H2O}$
|
||||
Brackets $$z^2$$
|
||||
`;
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles empty string', () => {
|
||||
expect(preprocessLaTeX('')).toBe('');
|
||||
});
|
||||
|
||||
test('preserves code blocks', () => {
|
||||
const content = '```\n$100\n```\nOutside $200';
|
||||
const expected = '```\n$100\n```\nOutside \\$200';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles multiple currency values in a sentence', () => {
|
||||
const content = 'I have $50 in my wallet and $100 in the bank.';
|
||||
const expected = 'I have \\$50 in my wallet and \\$100 in the bank.';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('preserves LaTeX expressions with numbers', () => {
|
||||
const content = 'The equation is $f(x) = 2x + 3$ where x is a variable.';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('handles currency values with commas', () => {
|
||||
const content = 'The price is $1,000,000 for this item.';
|
||||
const expected = 'The price is \\$1,000,000 for this item.';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('preserves LaTeX expressions with special characters', () => {
|
||||
const content = 'The set is defined as $\\{x | x > 0\\}$.';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -40,3 +40,66 @@ export const processLaTeX = (_content: string) => {
|
|||
// Restore code blocks
|
||||
return restoreCodeBlocks(processedContent, codeBlocks);
|
||||
};
|
||||
|
||||
/**
|
||||
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
|
||||
*
|
||||
* @param content The input string containing LaTeX expressions.
|
||||
* @returns The processed string with replaced delimiters and escaped characters.
|
||||
*/
|
||||
export function preprocessLaTeX(content: string): string {
|
||||
// Step 1: Protect code blocks
|
||||
const codeBlocks: string[] = [];
|
||||
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => {
|
||||
codeBlocks.push(code);
|
||||
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
|
||||
});
|
||||
|
||||
// Step 2: Protect existing LaTeX expressions
|
||||
const latexExpressions: string[] = [];
|
||||
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
|
||||
latexExpressions.push(match);
|
||||
return `<<LATEX_${latexExpressions.length - 1}>>`;
|
||||
});
|
||||
|
||||
// Step 3: Escape dollar signs that are likely currency indicators
|
||||
content = content.replace(/\$(?=\d)/g, '\\$');
|
||||
|
||||
// Step 4: Restore LaTeX expressions
|
||||
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => latexExpressions[parseInt(index)]);
|
||||
|
||||
// Step 5: Restore code blocks
|
||||
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
|
||||
|
||||
// Step 6: Apply additional escaping functions
|
||||
content = escapeBrackets(content);
|
||||
content = escapeMhchem(content);
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
export function escapeBrackets(text: string): string {
|
||||
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
|
||||
return text.replace(
|
||||
pattern,
|
||||
(
|
||||
match: string,
|
||||
codeBlock: string | undefined,
|
||||
squareBracket: string | undefined,
|
||||
roundBracket: string | undefined,
|
||||
): string => {
|
||||
if (codeBlock != null) {
|
||||
return codeBlock;
|
||||
} else if (squareBracket != null) {
|
||||
return `$$${squareBracket}$$`;
|
||||
} else if (roundBracket != null) {
|
||||
return `$${roundBracket}$`;
|
||||
}
|
||||
return match;
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
export function escapeMhchem(text: string) {
|
||||
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue