mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-09-22 06:00:56 +02:00
🎯 refactor: LaTeX and Math Rendering (#7952)
* refactor: Markdown LaTeX processing - Added micromark-extension-llm-math as a dependency in package.json and package-lock.json. - Updated Vite configuration to alias micromark-extension-math. - Modified Markdown components to use singleDollarTextMath: false for improved LaTeX rendering. - Refactored latex utility functions to enhance LaTeX processing and escaping mechanisms. * chore: linting of `EditTextPart` * fix: handle key up to initiate edit of latest user message by adding id prop to Edit Message HoverButton * chore: linting in Artifact component * refactor: enhance LaTeX preprocessing functionality - Updated `preprocessLaTeX` to improve handling of currency and LaTeX expressions. - Introduced optimized regex patterns for better performance. - Added support for escaping mhchem commands and handling code blocks. - Enhanced tests for various LaTeX scenarios, including currency and special characters. - Refactored existing tests to align with new preprocessing logic. * chore: filter out false positives in unused packages workflow - Added a grep command to exclude the micromark-extension-llm-math package from the list of unused dependencies in the GitHub Actions workflow.
This commit is contained in:
parent
dba0ec4320
commit
c7e4523d7c
14 changed files with 341 additions and 251 deletions
2
.github/workflows/unused-packages.yml
vendored
2
.github/workflows/unused-packages.yml
vendored
|
@ -98,6 +98,8 @@ jobs:
|
|||
cd client
|
||||
UNUSED=$(depcheck --json | jq -r '.dependencies | join("\n")' || echo "")
|
||||
UNUSED=$(comm -23 <(echo "$UNUSED" | sort) <(cat ../client_used_deps.txt ../client_used_code.txt | sort) || echo "")
|
||||
# Filter out false positives
|
||||
UNUSED=$(echo "$UNUSED" | grep -v "^micromark-extension-llm-math$" || echo "")
|
||||
echo "CLIENT_UNUSED<<EOF" >> $GITHUB_ENV
|
||||
echo "$UNUSED" >> $GITHUB_ENV
|
||||
echo "EOF" >> $GITHUB_ENV
|
||||
|
|
|
@ -75,6 +75,7 @@
|
|||
"lodash": "^4.17.21",
|
||||
"lucide-react": "^0.394.0",
|
||||
"match-sorter": "^6.3.4",
|
||||
"micromark-extension-llm-math": "^3.1.0",
|
||||
"qrcode.react": "^4.2.0",
|
||||
"rc-input-number": "^7.4.2",
|
||||
"react": "^18.2.0",
|
||||
|
|
|
@ -40,7 +40,7 @@ const defaultType = 'unknown';
|
|||
const defaultIdentifier = 'lc-no-identifier';
|
||||
|
||||
export function Artifact({
|
||||
node,
|
||||
node: _node,
|
||||
...props
|
||||
}: Artifact & {
|
||||
children: React.ReactNode | { props: { children: React.ReactNode } };
|
||||
|
@ -95,7 +95,7 @@ export function Artifact({
|
|||
setArtifacts((prevArtifacts) => {
|
||||
if (
|
||||
prevArtifacts?.[artifactKey] != null &&
|
||||
prevArtifacts[artifactKey].content === content
|
||||
prevArtifacts[artifactKey]?.content === content
|
||||
) {
|
||||
return prevArtifacts;
|
||||
}
|
||||
|
|
|
@ -204,7 +204,7 @@ const Markdown = memo(({ content = '', isLatestMessage }: TContentProps) => {
|
|||
remarkGfm,
|
||||
remarkDirective,
|
||||
artifactPlugin,
|
||||
[remarkMath, { singleDollarTextMath: true }],
|
||||
[remarkMath, { singleDollarTextMath: false }],
|
||||
unicodeCitation,
|
||||
];
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ const MarkdownLite = memo(
|
|||
/** @ts-ignore */
|
||||
supersub,
|
||||
remarkGfm,
|
||||
[remarkMath, { singleDollarTextMath: true }],
|
||||
[remarkMath, { singleDollarTextMath: false }],
|
||||
]}
|
||||
/** @ts-ignore */
|
||||
rehypePlugins={rehypePlugins}
|
||||
|
|
|
@ -25,6 +25,7 @@ type THoverButtons = {
|
|||
};
|
||||
|
||||
type HoverButtonProps = {
|
||||
id?: string;
|
||||
onClick: (e?: React.MouseEvent<HTMLButtonElement>) => void;
|
||||
title: string;
|
||||
icon: React.ReactNode;
|
||||
|
@ -67,6 +68,7 @@ const extractMessageContent = (message: TMessage): string => {
|
|||
|
||||
const HoverButton = memo(
|
||||
({
|
||||
id,
|
||||
onClick,
|
||||
title,
|
||||
icon,
|
||||
|
@ -89,6 +91,7 @@ const HoverButton = memo(
|
|||
|
||||
return (
|
||||
<button
|
||||
id={id}
|
||||
className={buttonStyle}
|
||||
onClick={onClick}
|
||||
type="button"
|
||||
|
@ -213,6 +216,7 @@ const HoverButtons = ({
|
|||
{/* Edit Button */}
|
||||
{isEditableEndpoint && (
|
||||
<HoverButton
|
||||
id={`edit-${message.messageId}`}
|
||||
onClick={onEdit}
|
||||
title={localize('com_ui_edit')}
|
||||
icon={<EditIcon size="19" />}
|
||||
|
|
|
@ -143,7 +143,7 @@ export default function VariableForm({
|
|||
<div className="mb-6 max-h-screen max-w-[90vw] overflow-auto rounded-md bg-surface-tertiary p-4 text-text-secondary dark:bg-surface-primary sm:max-w-full md:max-h-96">
|
||||
<ReactMarkdown
|
||||
/** @ts-ignore */
|
||||
remarkPlugins={[supersub, remarkGfm, [remarkMath, { singleDollarTextMath: true }]]}
|
||||
remarkPlugins={[supersub, remarkGfm, [remarkMath, { singleDollarTextMath: false }]]}
|
||||
rehypePlugins={[
|
||||
/** @ts-ignore */
|
||||
[rehypeKatex],
|
||||
|
|
|
@ -55,7 +55,7 @@ const PromptDetails = ({ group }: { group?: TPromptGroup }) => {
|
|||
/** @ts-ignore */
|
||||
supersub,
|
||||
remarkGfm,
|
||||
[remarkMath, { singleDollarTextMath: true }],
|
||||
[remarkMath, { singleDollarTextMath: false }],
|
||||
]}
|
||||
rehypePlugins={[
|
||||
/** @ts-ignore */
|
||||
|
|
|
@ -130,7 +130,7 @@ const PromptEditor: React.FC<Props> = ({ name, isEditing, setIsEditing }) => {
|
|||
/** @ts-ignore */
|
||||
supersub,
|
||||
remarkGfm,
|
||||
[remarkMath, { singleDollarTextMath: true }],
|
||||
[remarkMath, { singleDollarTextMath: false }],
|
||||
]}
|
||||
/** @ts-ignore */
|
||||
rehypePlugins={rehypePlugins}
|
||||
|
|
|
@ -1,163 +1,92 @@
|
|||
|
||||
import { processLaTeX, preprocessLaTeX } from './latex';
|
||||
|
||||
describe('processLaTeX', () => {
|
||||
test('returns the same string if no LaTeX patterns are found', () => {
|
||||
const content = 'This is a test string without LaTeX';
|
||||
expect(processLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('converts inline LaTeX expressions correctly', () => {
|
||||
const content = 'This is an inline LaTeX expression: \\(x^2 + y^2 = z^2\\)';
|
||||
const expected = 'This is an inline LaTeX expression: $x^2 + y^2 = z^2$';
|
||||
expect(processLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('converts block LaTeX expressions correctly', () => {
|
||||
const content = 'This is a block LaTeX expression: \\[E = mc^2\\]';
|
||||
const expected = 'This is a block LaTeX expression: $$E = mc^2$$';
|
||||
expect(processLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('converts mixed LaTeX expressions correctly', () => {
|
||||
const content = 'Inline \\(a + b = c\\) and block \\[x^2 + y^2 = z^2\\]';
|
||||
const expected = 'Inline $a + b = c$ and block $$x^2 + y^2 = z^2$$';
|
||||
expect(processLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('escapes dollar signs followed by a digit or space and digit', () => {
|
||||
const content = 'Price is $50 and $ 100';
|
||||
const expected = 'Price is \\$50 and \\$ 100';
|
||||
expect(processLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles strings with no content', () => {
|
||||
const content = '';
|
||||
expect(processLaTeX(content)).toBe('');
|
||||
});
|
||||
|
||||
test('does not alter already valid inline Markdown LaTeX', () => {
|
||||
const content = 'This is a valid inline LaTeX: $x^2 + y^2 = z^2$';
|
||||
expect(processLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('does not alter already valid block Markdown LaTeX', () => {
|
||||
const content = 'This is a valid block LaTeX: $$E = mc^2$$';
|
||||
expect(processLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('correctly processes a mix of valid Markdown LaTeX and LaTeX patterns', () => {
|
||||
const content = 'Valid $a + b = c$ and LaTeX to convert \\(x^2 + y^2 = z^2\\)';
|
||||
const expected = 'Valid $a + b = c$ and LaTeX to convert $x^2 + y^2 = z^2$';
|
||||
expect(processLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('correctly handles strings with LaTeX and non-LaTeX dollar signs', () => {
|
||||
const content = 'Price $100 and LaTeX \\(x^2 + y^2 = z^2\\)';
|
||||
const expected = 'Price \\$100 and LaTeX $x^2 + y^2 = z^2$';
|
||||
expect(processLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('ignores non-LaTeX content enclosed in dollar signs', () => {
|
||||
const content = 'This is not LaTeX: $This is just text$';
|
||||
expect(processLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('correctly processes complex block LaTeX with line breaks', () => {
|
||||
const complexBlockLatex = `Certainly! Here's an example of a mathematical formula written in LaTeX:
|
||||
|
||||
\\[
|
||||
\\sum_{i=1}^{n} \\left( \\frac{x_i}{y_i} \\right)^2
|
||||
\\]
|
||||
|
||||
This formula represents the sum of the squares of the ratios of \\(x\\) to \\(y\\) for \\(n\\) terms, where \\(x_i\\) and \\(y_i\\) represent the values of \\(x\\) and \\(y\\) for each term.
|
||||
|
||||
LaTeX is a typesetting system commonly used for mathematical and scientific documents. It provides a wide range of formatting options and symbols for expressing mathematical expressions.`;
|
||||
const expectedOutput = `Certainly! Here's an example of a mathematical formula written in LaTeX:
|
||||
|
||||
$$
|
||||
\\sum_{i=1}^{n} \\left( \\frac{x_i}{y_i} \\right)^2
|
||||
$$
|
||||
|
||||
This formula represents the sum of the squares of the ratios of $x$ to $y$ for $n$ terms, where $x_i$ and $y_i$ represent the values of $x$ and $y$ for each term.
|
||||
|
||||
LaTeX is a typesetting system commonly used for mathematical and scientific documents. It provides a wide range of formatting options and symbols for expressing mathematical expressions.`;
|
||||
expect(processLaTeX(complexBlockLatex)).toBe(expectedOutput);
|
||||
});
|
||||
|
||||
describe('processLaTeX with code block exception', () => {
|
||||
test('ignores dollar signs inside inline code', () => {
|
||||
const content = 'This is inline code: `$100`';
|
||||
expect(processLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('ignores dollar signs inside multi-line code blocks', () => {
|
||||
const content = '```\n$100\n# $1000\n```';
|
||||
expect(processLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('processes LaTeX outside of code blocks', () => {
|
||||
const content =
|
||||
'Outside \\(x^2 + y^2 = z^2\\) and inside code block: ```\n$100\n# $1000\n```';
|
||||
const expected = 'Outside $x^2 + y^2 = z^2$ and inside code block: ```\n$100\n# $1000\n```';
|
||||
expect(processLaTeX(content)).toBe(expected);
|
||||
});
|
||||
});
|
||||
});
|
||||
import { preprocessLaTeX } from './latex';
|
||||
|
||||
describe('preprocessLaTeX', () => {
|
||||
test('returns the same string if no LaTeX patterns are found', () => {
|
||||
const content = 'This is a test string without LaTeX';
|
||||
const content = 'This is a test string without LaTeX or dollar signs';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('escapes dollar signs followed by digits', () => {
|
||||
test('returns the same string if no dollar signs are present', () => {
|
||||
const content = 'This has LaTeX \\(x^2\\) and \\[y^2\\] but no dollars';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('preserves valid inline LaTeX delimiters \\(...\\)', () => {
|
||||
const content = 'This is inline LaTeX: \\(x^2 + y^2 = z^2\\)';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('preserves valid block LaTeX delimiters \\[...\\]', () => {
|
||||
const content = 'This is block LaTeX: \\[E = mc^2\\]';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('preserves valid double dollar delimiters', () => {
|
||||
const content = 'This is valid: $$x^2 + y^2 = z^2$$';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('converts single dollar delimiters to double dollars', () => {
|
||||
const content = 'Inline math: $x^2 + y^2 = z^2$';
|
||||
const expected = 'Inline math: $$x^2 + y^2 = z^2$$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('converts multiple single dollar expressions', () => {
|
||||
const content = 'First $a + b = c$ and second $x^2 + y^2 = z^2$';
|
||||
const expected = 'First $$a + b = c$$ and second $$x^2 + y^2 = z^2$$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('escapes currency dollar signs', () => {
|
||||
const content = 'Price is $50 and $100';
|
||||
const expected = 'Price is \\$50 and \\$100';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('does not escape dollar signs not followed by digits', () => {
|
||||
const content = 'This $variable is not escaped';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
test('escapes currency with spaces', () => {
|
||||
const content = '$50 is $20 + $30';
|
||||
const expected = '\\$50 is \\$20 + \\$30';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('preserves existing LaTeX expressions', () => {
|
||||
const content = 'Inline $x^2 + y^2 = z^2$ and block $$E = mc^2$$';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
test('escapes currency with commas', () => {
|
||||
const content = 'The price is $1,000,000 for this item.';
|
||||
const expected = 'The price is \\$1,000,000 for this item.';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles mixed LaTeX and currency', () => {
|
||||
test('escapes currency with decimals', () => {
|
||||
const content = 'Total: $29.50 plus tax';
|
||||
const expected = 'Total: \\$29.50 plus tax';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('converts LaTeX expressions while escaping currency', () => {
|
||||
const content = 'LaTeX $x^2$ and price $50';
|
||||
const expected = 'LaTeX $x^2$ and price \\$50';
|
||||
const expected = 'LaTeX $$x^2$$ and price \\$50';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('converts LaTeX delimiters', () => {
|
||||
const content = 'Brackets \\[x^2\\] and parentheses \\(y^2\\)';
|
||||
const expected = 'Brackets $$x^2$$ and parentheses $y^2$';
|
||||
test('handles Goldbach Conjecture example', () => {
|
||||
const content = '- **Goldbach Conjecture**: $2n = p + q$ (every even integer > 2)';
|
||||
const expected = '- **Goldbach Conjecture**: $$2n = p + q$$ (every even integer > 2)';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('does not escape already escaped dollar signs', () => {
|
||||
const content = 'Already escaped \\$50 and \\$100';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('does not convert already escaped single dollars', () => {
|
||||
const content = 'Escaped \\$x^2\\$ should not change';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
});
|
||||
|
||||
test('escapes mhchem commands', () => {
|
||||
const content = '$\\ce{H2O}$ and $\\pu{123 J}$';
|
||||
const expected = '$\\\\ce{H2O}$ and $\\\\pu{123 J}$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles complex mixed content', () => {
|
||||
const content = `
|
||||
LaTeX inline $x^2$ and block $$y^2$$
|
||||
Currency $100 and $200
|
||||
Chemical $\\ce{H2O}$
|
||||
Brackets \\[z^2\\]
|
||||
`;
|
||||
const expected = `
|
||||
LaTeX inline $x^2$ and block $$y^2$$
|
||||
Currency \\$100 and \\$200
|
||||
Chemical $\\\\ce{H2O}$
|
||||
Brackets $$z^2$$
|
||||
`;
|
||||
const expected = '$$\\\\ce{H2O}$$ and $$\\\\pu{123 J}$$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
|
@ -165,31 +94,117 @@ describe('preprocessLaTeX', () => {
|
|||
expect(preprocessLaTeX('')).toBe('');
|
||||
});
|
||||
|
||||
test('preserves code blocks', () => {
|
||||
const content = '```\n$100\n```\nOutside $200';
|
||||
const expected = '```\n$100\n```\nOutside \\$200';
|
||||
test('handles complex mixed content', () => {
|
||||
const content = `Valid double $$y^2$$
|
||||
Currency $100 and $200
|
||||
Single dollar math $x^2 + y^2$
|
||||
Chemical $\\ce{H2O}$
|
||||
Valid brackets \\[z^2\\]`;
|
||||
const expected = `Valid double $$y^2$$
|
||||
Currency \\$100 and \\$200
|
||||
Single dollar math $$x^2 + y^2$$
|
||||
Chemical $$\\\\ce{H2O}$$
|
||||
Valid brackets \\[z^2\\]`;
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles multiple currency values in a sentence', () => {
|
||||
const content = 'I have $50 in my wallet and $100 in the bank.';
|
||||
const expected = 'I have \\$50 in my wallet and \\$100 in the bank.';
|
||||
test('handles multiple equations with currency', () => {
|
||||
const content = `- **Euler's Totient Function**: $\\phi(n) = n \\prod_{p|n} \\left(1 - \\frac{1}{p}\\right)$
|
||||
- **Total Savings**: $500 + $200 + $150 = $850`;
|
||||
const expected = `- **Euler's Totient Function**: $$\\phi(n) = n \\prod_{p|n} \\left(1 - \\frac{1}{p}\\right)$$
|
||||
- **Total Savings**: \\$500 + \\$200 + \\$150 = \\$850`;
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('preserves LaTeX expressions with numbers', () => {
|
||||
const content = 'The equation is $f(x) = 2x + 3$ where x is a variable.';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
test('handles inline code blocks', () => {
|
||||
const content = 'Outside $x^2$ and inside code: `$100`';
|
||||
const expected = 'Outside $$x^2$$ and inside code: `$100`';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles currency values with commas', () => {
|
||||
const content = 'The price is $1,000,000 for this item.';
|
||||
const expected = 'The price is \\$1,000,000 for this item.';
|
||||
test('handles multiline code blocks', () => {
|
||||
const content = '```\n$100\n$variable\n```\nOutside $x^2$';
|
||||
const expected = '```\n$100\n$variable\n```\nOutside $$x^2$$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('preserves LaTeX expressions with special characters', () => {
|
||||
const content = 'The set is defined as $\\{x | x > 0\\}$.';
|
||||
expect(preprocessLaTeX(content)).toBe(content);
|
||||
const expected = 'The set is defined as $$\\{x | x > 0\\}$$.';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles complex physics equations', () => {
|
||||
const content = `- **Schrödinger Equation**: $i\\hbar\\frac{\\partial}{\\partial t}|\\psi\\rangle = \\hat{H}|\\psi\\rangle$
|
||||
- **Einstein Field Equations**: $G_{\\mu\\nu} = \\frac{8\\pi G}{c^4} T_{\\mu\\nu}$`;
|
||||
const expected = `- **Schrödinger Equation**: $$i\\hbar\\frac{\\partial}{\\partial t}|\\psi\\rangle = \\hat{H}|\\psi\\rangle$$
|
||||
- **Einstein Field Equations**: $$G_{\\mu\\nu} = \\frac{8\\pi G}{c^4} T_{\\mu\\nu}$$`;
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles financial calculations with currency', () => {
|
||||
const content = `- **Simple Interest**: $A = P + Prt = $1,000 + ($1,000)(0.05)(2) = $1,100$
|
||||
- **ROI**: $\\text{ROI} = \\frac{$1,200 - $1,000}{$1,000} \\times 100\\% = 20\\%$`;
|
||||
const expected = `- **Simple Interest**: $$A = P + Prt = \\$1,000 + (\\$1,000)(0.05)(2) = \\$1,100$$
|
||||
- **ROI**: $$\\text{ROI} = \\frac{\\$1,200 - \\$1,000}{\\$1,000} \\times 100\\% = 20\\%$$`;
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('does not convert partial or malformed expressions', () => {
|
||||
const content = 'A single $ sign should not be converted';
|
||||
const expected = 'A single $ sign should not be converted';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles nested parentheses in LaTeX', () => {
|
||||
const content =
|
||||
'Matrix determinant: $\\det(A) = \\sum_{\\sigma \\in S_n} \\text{sgn}(\\sigma) \\prod_{i=1}^n a_{i,\\sigma(i)}$';
|
||||
const expected =
|
||||
'Matrix determinant: $$\\det(A) = \\sum_{\\sigma \\in S_n} \\text{sgn}(\\sigma) \\prod_{i=1}^n a_{i,\\sigma(i)}$$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('preserves spacing in equations', () => {
|
||||
const content = 'Equation: $f(x) = 2x + 3$ where x is a variable.';
|
||||
const expected = 'Equation: $$f(x) = 2x + 3$$ where x is a variable.';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles LaTeX with newlines inside should not be converted', () => {
|
||||
const content = `This has $x
|
||||
y$ which spans lines`;
|
||||
const expected = `This has $x
|
||||
y$ which spans lines`;
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles multiple dollar signs in text', () => {
|
||||
const content = 'Price $100 then equation $x + y = z$ then another price $50';
|
||||
const expected = 'Price \\$100 then equation $$x + y = z$$ then another price \\$50';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles complex LaTeX with currency in same expression', () => {
|
||||
const content = 'Calculate $\\text{Total} = \\$500 + \\$200$';
|
||||
const expected = 'Calculate $$\\text{Total} = \\$500 + \\$200$$';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('preserves already escaped dollars in LaTeX', () => {
|
||||
const content = 'The formula $f(x) = \\$2x$ represents cost';
|
||||
const expected = 'The formula $$f(x) = \\$2x$$ represents cost';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles adjacent LaTeX and currency', () => {
|
||||
const content = 'Formula $x^2$ costs $25';
|
||||
const expected = 'Formula $$x^2$$ costs \\$25';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
|
||||
test('handles LaTeX with special characters and currency', () => {
|
||||
const content = 'Set $\\{x | x > \\$0\\}$ for positive prices';
|
||||
const expected = 'Set $$\\{x | x > \\$0\\}$$ for positive prices';
|
||||
expect(preprocessLaTeX(content)).toBe(expected);
|
||||
});
|
||||
});
|
||||
|
|
|
@ -1,105 +1,152 @@
|
|||
// Regex to check if the processed content contains any potential LaTeX patterns
|
||||
const containsLatexRegex =
|
||||
/\\\(.*?\\\)|\\\[.*?\\\]|\$.*?\$|\\begin\{equation\}.*?\\end\{equation\}/;
|
||||
|
||||
// Regex for inline and block LaTeX expressions
|
||||
const inlineLatex = new RegExp(/\\\((.+?)\\\)/, 'g');
|
||||
const blockLatex = new RegExp(/\\\[(.*?[^\\])\\\]/, 'gs');
|
||||
|
||||
// Function to restore code blocks
|
||||
const restoreCodeBlocks = (content: string, codeBlocks: string[]) => {
|
||||
return content.replace(/<<CODE_BLOCK_(\d+)>>/g, (match, index) => codeBlocks[index]);
|
||||
};
|
||||
|
||||
// Regex to identify code blocks and inline code
|
||||
const codeBlockRegex = /(```[\s\S]*?```|`.*?`)/g;
|
||||
|
||||
export const processLaTeX = (_content: string) => {
|
||||
let content = _content;
|
||||
// Temporarily replace code blocks and inline code with placeholders
|
||||
const codeBlocks: string[] = [];
|
||||
let index = 0;
|
||||
content = content.replace(codeBlockRegex, (match) => {
|
||||
codeBlocks[index] = match;
|
||||
return `<<CODE_BLOCK_${index++}>>`;
|
||||
});
|
||||
|
||||
// Escape dollar signs followed by a digit or space and digit
|
||||
let processedContent = content.replace(/(\$)(?=\s?\d)/g, '\\$');
|
||||
|
||||
// If no LaTeX patterns are found, restore code blocks and return the processed content
|
||||
if (!containsLatexRegex.test(processedContent)) {
|
||||
return restoreCodeBlocks(processedContent, codeBlocks);
|
||||
}
|
||||
|
||||
// Convert LaTeX expressions to a markdown compatible format
|
||||
processedContent = processedContent
|
||||
.replace(inlineLatex, (match: string, equation: string) => `$${equation}$`) // Convert inline LaTeX
|
||||
.replace(blockLatex, (match: string, equation: string) => `$$${equation}$$`); // Convert block LaTeX
|
||||
|
||||
// Restore code blocks
|
||||
return restoreCodeBlocks(processedContent, codeBlocks);
|
||||
};
|
||||
// Pre-compile all regular expressions for better performance
|
||||
const MHCHEM_CE_REGEX = /\$\\ce\{/g;
|
||||
const MHCHEM_PU_REGEX = /\$\\pu\{/g;
|
||||
const MHCHEM_CE_ESCAPED_REGEX = /\$\\\\ce\{[^}]*\}\$/g;
|
||||
const MHCHEM_PU_ESCAPED_REGEX = /\$\\\\pu\{[^}]*\}\$/g;
|
||||
const CURRENCY_REGEX =
|
||||
/(?<![\\$])\$(?!\$)(?=\d{1,3}(?:,\d{3})*(?:\.\d{1,2})?(?:\s|$|[^a-zA-Z\d]))/g;
|
||||
const SINGLE_DOLLAR_REGEX = /(?<!\\)\$(?!\$)((?:[^$\n]|\\[$])+?)(?<!\\)\$(?!\$)/g;
|
||||
|
||||
/**
|
||||
* Preprocesses LaTeX content by replacing delimiters and escaping certain characters.
|
||||
* Escapes mhchem package notation in LaTeX by converting single dollar delimiters to double dollars
|
||||
* and escaping backslashes in mhchem commands.
|
||||
*
|
||||
* @param text - The input text containing potential mhchem notation
|
||||
* @returns The processed text with properly escaped mhchem notation
|
||||
*/
|
||||
function escapeMhchem(text: string): string {
|
||||
// First escape the backslashes in mhchem commands
|
||||
let result = text.replace(MHCHEM_CE_REGEX, '$\\\\ce{');
|
||||
result = result.replace(MHCHEM_PU_REGEX, '$\\\\pu{');
|
||||
|
||||
// Then convert single dollar mhchem to double dollar
|
||||
result = result.replace(MHCHEM_CE_ESCAPED_REGEX, (match) => `$${match}$`);
|
||||
result = result.replace(MHCHEM_PU_ESCAPED_REGEX, (match) => `$${match}$`);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Efficiently finds all code block regions in the content
|
||||
* @param content The content to analyze
|
||||
* @returns Array of code block regions [start, end]
|
||||
*/
|
||||
function findCodeBlockRegions(content: string): Array<[number, number]> {
|
||||
const regions: Array<[number, number]> = [];
|
||||
let inlineStart = -1;
|
||||
let multilineStart = -1;
|
||||
|
||||
for (let i = 0; i < content.length; i++) {
|
||||
const char = content[i];
|
||||
|
||||
// Check for multiline code blocks
|
||||
if (
|
||||
char === '`' &&
|
||||
i + 2 < content.length &&
|
||||
content[i + 1] === '`' &&
|
||||
content[i + 2] === '`'
|
||||
) {
|
||||
if (multilineStart === -1) {
|
||||
multilineStart = i;
|
||||
i += 2; // Skip the next two backticks
|
||||
} else {
|
||||
regions.push([multilineStart, i + 2]);
|
||||
multilineStart = -1;
|
||||
i += 2;
|
||||
}
|
||||
}
|
||||
// Check for inline code blocks (only if not in multiline)
|
||||
else if (char === '`' && multilineStart === -1) {
|
||||
if (inlineStart === -1) {
|
||||
inlineStart = i;
|
||||
} else {
|
||||
regions.push([inlineStart, i]);
|
||||
inlineStart = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return regions;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a position is inside any code block region using binary search
|
||||
* @param position The position to check
|
||||
* @param codeRegions Array of code block regions
|
||||
* @returns True if position is inside a code block
|
||||
*/
|
||||
function isInCodeBlock(position: number, codeRegions: Array<[number, number]>): boolean {
|
||||
let left = 0;
|
||||
let right = codeRegions.length - 1;
|
||||
|
||||
while (left <= right) {
|
||||
const mid = Math.floor((left + right) / 2);
|
||||
const [start, end] = codeRegions[mid];
|
||||
|
||||
if (position >= start && position <= end) {
|
||||
return true;
|
||||
} else if (position < start) {
|
||||
right = mid - 1;
|
||||
} else {
|
||||
left = mid + 1;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preprocesses LaTeX content by escaping currency indicators and converting single dollar math delimiters.
|
||||
* Optimized for high-frequency execution.
|
||||
* @param content The input string containing LaTeX expressions.
|
||||
* @returns The processed string with replaced delimiters and escaped characters.
|
||||
* @returns The processed string with escaped currency indicators and converted math delimiters.
|
||||
*/
|
||||
export function preprocessLaTeX(content: string): string {
|
||||
// Step 1: Protect code blocks
|
||||
const codeBlocks: string[] = [];
|
||||
content = content.replace(/(```[\s\S]*?```|`[^`\n]+`)/g, (match, code) => {
|
||||
codeBlocks.push(code);
|
||||
return `<<CODE_BLOCK_${codeBlocks.length - 1}>>`;
|
||||
});
|
||||
// Early return for most common case
|
||||
if (!content.includes('$')) return content;
|
||||
|
||||
// Step 2: Protect existing LaTeX expressions
|
||||
const latexExpressions: string[] = [];
|
||||
content = content.replace(/(\$\$[\s\S]*?\$\$|\\\[[\s\S]*?\\\]|\\\(.*?\\\))/g, (match) => {
|
||||
latexExpressions.push(match);
|
||||
return `<<LATEX_${latexExpressions.length - 1}>>`;
|
||||
});
|
||||
|
||||
// Step 3: Escape dollar signs that are likely currency indicators
|
||||
content = content.replace(/\$(?=\d)/g, '\\$');
|
||||
|
||||
// Step 4: Restore LaTeX expressions
|
||||
content = content.replace(/<<LATEX_(\d+)>>/g, (_, index) => latexExpressions[parseInt(index)]);
|
||||
|
||||
// Step 5: Restore code blocks
|
||||
content = content.replace(/<<CODE_BLOCK_(\d+)>>/g, (_, index) => codeBlocks[parseInt(index)]);
|
||||
|
||||
// Step 6: Apply additional escaping functions
|
||||
content = escapeBrackets(content);
|
||||
content = escapeMhchem(content);
|
||||
|
||||
return content;
|
||||
// Process mhchem first (usually rare, so check if needed)
|
||||
let processed = content;
|
||||
if (content.includes('\\ce{') || content.includes('\\pu{')) {
|
||||
processed = escapeMhchem(content);
|
||||
}
|
||||
|
||||
export function escapeBrackets(text: string): string {
|
||||
const pattern = /(```[\S\s]*?```|`.*?`)|\\\[([\S\s]*?[^\\])\\]|\\\((.*?)\\\)/g;
|
||||
return text.replace(
|
||||
pattern,
|
||||
(
|
||||
match: string,
|
||||
codeBlock: string | undefined,
|
||||
squareBracket: string | undefined,
|
||||
roundBracket: string | undefined,
|
||||
): string => {
|
||||
if (codeBlock != null) {
|
||||
return codeBlock;
|
||||
} else if (squareBracket != null) {
|
||||
return `$$${squareBracket}$$`;
|
||||
} else if (roundBracket != null) {
|
||||
return `$${roundBracket}$`;
|
||||
}
|
||||
return match;
|
||||
},
|
||||
);
|
||||
}
|
||||
// Find all code block regions once
|
||||
const codeRegions = findCodeBlockRegions(processed);
|
||||
|
||||
export function escapeMhchem(text: string) {
|
||||
return text.replaceAll('$\\ce{', '$\\\\ce{').replaceAll('$\\pu{', '$\\\\pu{');
|
||||
// First pass: escape currency dollar signs
|
||||
const parts: string[] = [];
|
||||
let lastIndex = 0;
|
||||
|
||||
// Reset regex for reuse
|
||||
CURRENCY_REGEX.lastIndex = 0;
|
||||
|
||||
let match: RegExpExecArray | null;
|
||||
while ((match = CURRENCY_REGEX.exec(processed)) !== null) {
|
||||
if (!isInCodeBlock(match.index, codeRegions)) {
|
||||
parts.push(processed.substring(lastIndex, match.index));
|
||||
parts.push('\\$');
|
||||
lastIndex = match.index + 1;
|
||||
}
|
||||
}
|
||||
parts.push(processed.substring(lastIndex));
|
||||
processed = parts.join('');
|
||||
|
||||
// Second pass: convert single dollar delimiters to double dollars
|
||||
const result: string[] = [];
|
||||
lastIndex = 0;
|
||||
|
||||
// Reset regex for reuse
|
||||
SINGLE_DOLLAR_REGEX.lastIndex = 0;
|
||||
|
||||
while ((match = SINGLE_DOLLAR_REGEX.exec(processed)) !== null) {
|
||||
if (!isInCodeBlock(match.index, codeRegions)) {
|
||||
result.push(processed.substring(lastIndex, match.index));
|
||||
result.push(`$$${match[1]}$$`);
|
||||
lastIndex = match.index + match[0].length;
|
||||
}
|
||||
}
|
||||
result.push(processed.substring(lastIndex));
|
||||
|
||||
return result.join('');
|
||||
}
|
||||
|
|
|
@ -232,6 +232,7 @@ export default defineConfig(({ command }) => ({
|
|||
alias: {
|
||||
'~': path.join(__dirname, 'src/'),
|
||||
$fonts: path.resolve(__dirname, 'public/fonts'),
|
||||
'micromark-extension-math': 'micromark-extension-llm-math',
|
||||
},
|
||||
},
|
||||
}));
|
||||
|
|
20
package-lock.json
generated
20
package-lock.json
generated
|
@ -2474,6 +2474,7 @@
|
|||
"lodash": "^4.17.21",
|
||||
"lucide-react": "^0.394.0",
|
||||
"match-sorter": "^6.3.4",
|
||||
"micromark-extension-llm-math": "^3.1.0",
|
||||
"qrcode.react": "^4.2.0",
|
||||
"rc-input-number": "^7.4.2",
|
||||
"react": "^18.2.0",
|
||||
|
@ -36460,6 +36461,25 @@
|
|||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/micromark-extension-llm-math": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/micromark-extension-llm-math/-/micromark-extension-llm-math-3.1.0.tgz",
|
||||
"integrity": "sha512-VIYHuIEk0gpHrojEtNGaxGwdpSLtdWYlLL2vu9PM4M1ilEtak10S8F9zzbNAPBNRoWFs/bjs+J7R3yUBoIQUEA==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/katex": "^0.16.0",
|
||||
"devlop": "^1.0.0",
|
||||
"katex": "^0.16.0",
|
||||
"micromark-factory-space": "^2.0.0",
|
||||
"micromark-util-character": "^2.0.0",
|
||||
"micromark-util-symbol": "^2.0.0",
|
||||
"micromark-util-types": "^2.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"type": "opencollective",
|
||||
"url": "https://opencollective.com/unified"
|
||||
}
|
||||
},
|
||||
"node_modules/micromark-extension-math": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/micromark-extension-math/-/micromark-extension-math-3.1.0.tgz",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue