mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-20 18:30:15 +01:00
🧮 feat: Improve LaTeX rendering consistency (#3763)
* refactor: simplify LaTeX pre-processing for more consistent rendering, disables `singleDollarTextMath` * refactor: disable singleDollarTextMath in all markdown components * wip: first pass * refactor: preserve code blocks and convert rather than preserve LaTeX delimiters * refactor: remove unused escapeDollarNumber function from latex.ts
This commit is contained in:
parent
967e8a1e92
commit
ea5140ff0f
3 changed files with 156 additions and 3 deletions
|
|
@ -8,7 +8,7 @@ import { useRecoilValue } from 'recoil';
|
||||||
import ReactMarkdown from 'react-markdown';
|
import ReactMarkdown from 'react-markdown';
|
||||||
import type { PluggableList } from 'unified';
|
import type { PluggableList } from 'unified';
|
||||||
import rehypeHighlight from 'rehype-highlight';
|
import rehypeHighlight from 'rehype-highlight';
|
||||||
import { cn, langSubset, validateIframe, processLaTeX, handleDoubleClick } from '~/utils';
|
import { langSubset, validateIframe, preprocessLaTeX, handleDoubleClick } from '~/utils';
|
||||||
import CodeBlock from '~/components/Messages/Content/CodeBlock';
|
import CodeBlock from '~/components/Messages/Content/CodeBlock';
|
||||||
import { useFileDownload } from '~/data-provider';
|
import { useFileDownload } from '~/data-provider';
|
||||||
import useLocalize from '~/hooks/useLocalize';
|
import useLocalize from '~/hooks/useLocalize';
|
||||||
|
|
@ -123,7 +123,7 @@ const Markdown = memo(({ content = '', isEdited, showCursor, isLatestMessage }:
|
||||||
let currentContent = content;
|
let currentContent = content;
|
||||||
if (!isInitializing) {
|
if (!isInitializing) {
|
||||||
currentContent = currentContent.replace('z-index: 1;', '') || '';
|
currentContent = currentContent.replace('z-index: 1;', '') || '';
|
||||||
currentContent = LaTeXParsing ? processLaTeX(currentContent) : currentContent;
|
currentContent = LaTeXParsing ? preprocessLaTeX(currentContent) : currentContent;
|
||||||
}
|
}
|
||||||
|
|
||||||
const rehypePlugins: PluggableList = [
|
const rehypePlugins: PluggableList = [
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import { processLaTeX } from './latex';
|
/* eslint-disable no-useless-escape */
|
||||||
|
import { processLaTeX, preprocessLaTeX } from './latex';
|
||||||
|
|
||||||
describe('processLaTeX', () => {
|
describe('processLaTeX', () => {
|
||||||
test('returns the same string if no LaTeX patterns are found', () => {
|
test('returns the same string if no LaTeX patterns are found', () => {
|
||||||
|
|
@ -103,3 +104,92 @@ describe('processLaTeX', () => {
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('preprocessLaTeX', () => {
|
||||||
|
test('returns the same string if no LaTeX patterns are found', () => {
|
||||||
|
const content = 'This is a test string without LaTeX';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(content);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('escapes dollar signs followed by digits', () => {
|
||||||
|
const content = 'Price is $50 and $100';
|
||||||
|
const expected = 'Price is \\$50 and \\$100';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('does not escape dollar signs not followed by digits', () => {
|
||||||
|
const content = 'This $variable is not escaped';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(content);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('preserves existing LaTeX expressions', () => {
|
||||||
|
const content = 'Inline $x^2 + y^2 = z^2$ and block $$E = mc^2$$';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(content);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles mixed LaTeX and currency', () => {
|
||||||
|
const content = 'LaTeX $x^2$ and price $50';
|
||||||
|
const expected = 'LaTeX $x^2$ and price \\$50';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('converts LaTeX delimiters', () => {
|
||||||
|
const content = 'Brackets \\[x^2\\] and parentheses \\(y^2\\)';
|
||||||
|
const expected = 'Brackets $$x^2$$ and parentheses $y^2$';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('escapes mhchem commands', () => {
|
||||||
|
const content = '$\\ce{H2O}$ and $\\pu{123 J}$';
|
||||||
|
const expected = '$\\\\ce{H2O}$ and $\\\\pu{123 J}$';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles complex mixed content', () => {
|
||||||
|
const content = `
|
||||||
|
LaTeX inline $x^2$ and block $$y^2$$
|
||||||
|
Currency $100 and $200
|
||||||
|
Chemical $\\ce{H2O}$
|
||||||
|
Brackets \\[z^2\\]
|
||||||
|
`;
|
||||||
|
const expected = `
|
||||||
|
LaTeX inline $x^2$ and block $$y^2$$
|
||||||
|
Currency \\$100 and \\$200
|
||||||
|
Chemical $\\\\ce{H2O}$
|
||||||
|
Brackets $$z^2$$
|
||||||
|
`;
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles empty string', () => {
|
||||||
|
expect(preprocessLaTeX('')).toBe('');
|
||||||
|
});
|
||||||
|
|
||||||
|
test('preserves code blocks', () => {
|
||||||
|
const content = '```\n$100\n```\nOutside $200';
|
||||||
|
const expected = '```\n$100\n```\nOutside \\$200';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles multiple currency values in a sentence', () => {
|
||||||
|
const content = 'I have $50 in my wallet and $100 in the bank.';
|
||||||
|
const expected = 'I have \\$50 in my wallet and \\$100 in the bank.';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('preserves LaTeX expressions with numbers', () => {
|
||||||
|
const content = 'The equation is $f(x) = 2x + 3$ where x is a variable.';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(content);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('handles currency values with commas', () => {
|
||||||
|
const content = 'The price is $1,000,000 for this item.';
|
||||||
|
const expected = 'The price is \\$1,000,000 for this item.';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
test('preserves LaTeX expressions with special characters', () => {
|
||||||
|
const content = 'The set is defined as $\\{x | x > 0\\}$.';
|
||||||
|
expect(preprocessLaTeX(content)).toBe(content);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
|
||||||
|
|
@ -40,3 +40,66 @@ export const processLaTeX = (_content: string) => {
|
||||||
// Restore code blocks
|
// Restore code blocks
|
||||||
return restoreCodeBlocks(processedContent, codeBlocks);
|
return restoreCodeBlocks(processedContent, codeBlocks);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
|
||||||
|
*
|
||||||
|
* @param content The input string containing LaTeX expressions.
|
||||||
|
* @returns The processed string with replaced delimiters and escaped characters.
|
||||||
|
*/
|
||||||
|
export function preprocessLaTeX(content: string): string {
|
||||||
|
// Step 1: Protect code blocks
|
||||||
|
const codeBlocks: string[] = [];
|
||||||
|
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => {
|
||||||
|
codeBlocks.push(code);
|
||||||
|
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 2: Protect existing LaTeX expressions
|
||||||
|
const latexExpressions: string[] = [];
|
||||||
|
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
|
||||||
|
latexExpressions.push(match);
|
||||||
|
return `<<LATEX_${latexExpressions.length - 1}>>`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Step 3: Escape dollar signs that are likely currency indicators
|
||||||
|
content = content.replace(/\$(?=\d)/g, '\\$');
|
||||||
|
|
||||||
|
// Step 4: Restore LaTeX expressions
|
||||||
|
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => latexExpressions[parseInt(index)]);
|
||||||
|
|
||||||
|
// Step 5: Restore code blocks
|
||||||
|
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
|
||||||
|
|
||||||
|
// Step 6: Apply additional escaping functions
|
||||||
|
content = escapeBrackets(content);
|
||||||
|
content = escapeMhchem(content);
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function escapeBrackets(text: string): string {
|
||||||
|
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
|
||||||
|
return text.replace(
|
||||||
|
pattern,
|
||||||
|
(
|
||||||
|
match: string,
|
||||||
|
codeBlock: string | undefined,
|
||||||
|
squareBracket: string | undefined,
|
||||||
|
roundBracket: string | undefined,
|
||||||
|
): string => {
|
||||||
|
if (codeBlock != null) {
|
||||||
|
return codeBlock;
|
||||||
|
} else if (squareBracket != null) {
|
||||||
|
return `$$${squareBracket}$$`;
|
||||||
|
} else if (roundBracket != null) {
|
||||||
|
return `$${roundBracket}$`;
|
||||||
|
}
|
||||||
|
return match;
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function escapeMhchem(text: string) {
|
||||||
|
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue