mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
📑 refactor: File Search Citations Dual-Format Unicode Handling (#10888)
* 🔖 refactor: citation handling with support for both literal and Unicode formats * refactor: file search messages for edge cases in documents * 🔧 refactor: Enhance citation handling with detailed regex patterns for literal and Unicode formats * 🔧 refactor: Simplify file search query handling by removing unnecessary parameters and improving result formatting * ✨ test: Add comprehensive integration tests for citation processing flow with support for literal and Unicode formats * 🔧 refactor: Improve regex match handling and add performance tests for citation processing
This commit is contained in:
parent
af8394b05c
commit
03c9d5f79f
6 changed files with 638 additions and 18 deletions
|
|
@ -86,7 +86,6 @@ const createFileSearchTool = async ({ userId, files, entity_id, fileCitations =
|
|||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {import('librechat-data-provider').TFile} file
|
||||
* @returns {{ file_id: string, query: string, k: number, entity_id?: string }}
|
||||
*/
|
||||
|
|
@ -135,11 +134,16 @@ const createFileSearchTool = async ({ userId, files, entity_id, fileCitations =
|
|||
page: docInfo.metadata.page || null,
|
||||
})),
|
||||
)
|
||||
// TODO: results should be sorted by relevance, not distance
|
||||
.sort((a, b) => a.distance - b.distance)
|
||||
// TODO: make this configurable
|
||||
.slice(0, 10);
|
||||
|
||||
if (formattedResults.length === 0) {
|
||||
return [
|
||||
'No content found in the files. The files may not have been processed correctly or you may need to refine your query.',
|
||||
undefined,
|
||||
];
|
||||
}
|
||||
|
||||
const formattedString = formattedResults
|
||||
.map(
|
||||
(result, index) =>
|
||||
|
|
@ -169,11 +173,12 @@ const createFileSearchTool = async ({ userId, files, entity_id, fileCitations =
|
|||
? `
|
||||
|
||||
**CITE FILE SEARCH RESULTS:**
|
||||
Use anchor markers immediately after statements derived from file content. Reference the filename in your text:
|
||||
Use the EXACT anchor markers shown below (copy them verbatim) immediately after statements derived from file content. Reference the filename in your text:
|
||||
- File citation: "The document.pdf states that... \\ue202turn0file0"
|
||||
- Page reference: "According to report.docx... \\ue202turn0file1"
|
||||
- Multi-file: "Multiple sources confirm... \\ue200\\ue202turn0file0\\ue202turn0file1\\ue201"
|
||||
|
||||
**CRITICAL:** Output these escape sequences EXACTLY as shown (e.g., \\ue202turn0file0). Do NOT substitute with other characters like † or similar symbols.
|
||||
**ALWAYS mention the filename in your text before the citation marker. NEVER use markdown links or footnotes.**`
|
||||
: ''
|
||||
}`,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue