mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
📑 refactor: File Search Citations Dual-Format Unicode Handling (#10888)
* 🔖 refactor: citation handling with support for both literal and Unicode formats * refactor: file search messages for edge cases in documents * 🔧 refactor: Enhance citation handling with detailed regex patterns for literal and Unicode formats * 🔧 refactor: Simplify file search query handling by removing unnecessary parameters and improving result formatting * ✨ test: Add comprehensive integration tests for citation processing flow with support for literal and Unicode formats * 🔧 refactor: Improve regex match handling and add performance tests for citation processing
This commit is contained in:
parent
af8394b05c
commit
03c9d5f79f
6 changed files with 638 additions and 18 deletions
|
|
@ -4,13 +4,29 @@ import type { Citation, CitationNode } from './types';
|
|||
import { SPAN_REGEX, STANDALONE_PATTERN, CLEANUP_REGEX, COMPOSITE_REGEX } from '~/utils/citations';
|
||||
|
||||
/**
|
||||
* Checks if a standalone marker is truly standalone (not inside a composite block)
|
||||
* Checks if a standalone marker is truly standalone (not inside a composite block).
|
||||
* A marker is inside a composite if there's an opening \ue200 without a closing \ue201 after it.
|
||||
*
|
||||
* Handles both literal text format ("\ue200") and actual Unicode (U+E200) by checking
|
||||
* for both and using the rightmost occurrence. This correctly handles:
|
||||
* - Pure literal format: "\ue200...\ue201"
|
||||
* - Pure Unicode format: "..."
|
||||
* - Mixed formats: "\ue200..." (different formats for open/close)
|
||||
*/
|
||||
function isStandaloneMarker(text: string, position: number): boolean {
|
||||
const beforeText = text.substring(0, position);
|
||||
const lastUe200 = beforeText.lastIndexOf('\\ue200');
|
||||
const lastUe201 = beforeText.lastIndexOf('\\ue201');
|
||||
|
||||
// Find rightmost composite block start (either format)
|
||||
const lastUe200Literal = beforeText.lastIndexOf('\\ue200');
|
||||
const lastUe200Char = beforeText.lastIndexOf('\ue200');
|
||||
const lastUe200 = Math.max(lastUe200Literal, lastUe200Char);
|
||||
|
||||
// Find rightmost composite block end (either format)
|
||||
const lastUe201Literal = beforeText.lastIndexOf('\\ue201');
|
||||
const lastUe201Char = beforeText.lastIndexOf('\ue201');
|
||||
const lastUe201 = Math.max(lastUe201Literal, lastUe201Char);
|
||||
|
||||
// Standalone if: no opening marker OR closing marker appears after opening
|
||||
return lastUe200 === -1 || (lastUe201 !== -1 && lastUe201 > lastUe200);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue