// Pre-compile all regular expressions for better performance const MHCHEM_CE_REGEX = /\$\\ce\{/g; const MHCHEM_PU_REGEX = /\$\\pu\{/g; const MHCHEM_CE_ESCAPED_REGEX = /\$\\\\ce\{[^}]*\}\$/g; const MHCHEM_PU_ESCAPED_REGEX = /\$\\\\pu\{[^}]*\}\$/g; const CURRENCY_REGEX = /(? `$${match}$`); result = result.replace(MHCHEM_PU_ESCAPED_REGEX, (match) => `$${match}$`); return result; } /** * Efficiently finds all code block regions in the content * @param content The content to analyze * @returns Array of code block regions [start, end] */ function findCodeBlockRegions(content: string): Array<[number, number]> { const regions: Array<[number, number]> = []; let inlineStart = -1; let multilineStart = -1; for (let i = 0; i < content.length; i++) { const char = content[i]; // Check for multiline code blocks if ( char === '`' && i + 2 < content.length && content[i + 1] === '`' && content[i + 2] === '`' ) { if (multilineStart === -1) { multilineStart = i; i += 2; // Skip the next two backticks } else { regions.push([multilineStart, i + 2]); multilineStart = -1; i += 2; } } // Check for inline code blocks (only if not in multiline) else if (char === '`' && multilineStart === -1) { if (inlineStart === -1) { inlineStart = i; } else { regions.push([inlineStart, i]); inlineStart = -1; } } } return regions; } /** * Checks if a position is inside any code block region using binary search * @param position The position to check * @param codeRegions Array of code block regions * @returns True if position is inside a code block */ function isInCodeBlock(position: number, codeRegions: Array<[number, number]>): boolean { let left = 0; let right = codeRegions.length - 1; while (left <= right) { const mid = Math.floor((left + right) / 2); const [start, end] = codeRegions[mid]; if (position >= start && position <= end) { return true; } else if (position < start) { right = mid - 1; } else { left = mid + 1; } } return false; } /** * Preprocesses LaTeX content by escaping currency indicators and converting single dollar math delimiters. * Optimized for high-frequency execution. * @param content The input string containing LaTeX expressions. * @returns The processed string with escaped currency indicators and converted math delimiters. */ export function preprocessLaTeX(content: string): string { // Early return for most common case if (!content.includes('$')) return content; // Process mhchem first (usually rare, so check if needed) let processed = content; if (content.includes('\\ce{') || content.includes('\\pu{')) { processed = escapeMhchem(content); } // Find all code block regions once const codeRegions = findCodeBlockRegions(processed); // First pass: escape currency dollar signs const parts: string[] = []; let lastIndex = 0; // Reset regex for reuse CURRENCY_REGEX.lastIndex = 0; let match: RegExpExecArray | null; while ((match = CURRENCY_REGEX.exec(processed)) !== null) { if (!isInCodeBlock(match.index, codeRegions)) { parts.push(processed.substring(lastIndex, match.index)); parts.push('\\$'); lastIndex = match.index + 1; } } parts.push(processed.substring(lastIndex)); processed = parts.join(''); // Second pass: convert single dollar delimiters to double dollars const result: string[] = []; lastIndex = 0; // Reset regex for reuse SINGLE_DOLLAR_REGEX.lastIndex = 0; while ((match = SINGLE_DOLLAR_REGEX.exec(processed)) !== null) { if (!isInCodeBlock(match.index, codeRegions)) { result.push(processed.substring(lastIndex, match.index)); result.push(`$$${match[1]}$$`); lastIndex = match.index + match[0].length; } } result.push(processed.substring(lastIndex)); return result.join(''); }