mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
📚 feat: Add Source Citations for File Search in Agents (#8652)
* feat: Source Citations for file_search in Agents * Fix: Added citation limits and relevance score to app service. Removed duplicate tests * ✨ feat: implement Role-level toggle to optionally disable file Source Citation in Agents * 🐛 fix: update mock for librechat-data-provider to include PermissionTypes and SystemRoles --------- Co-authored-by: “Praneeth <praneeth.goparaju@slalom.com>
This commit is contained in:
parent
a955097faf
commit
52e59e40be
36 changed files with 1890 additions and 190 deletions
|
|
@ -114,11 +114,13 @@ const createFileSearchTool = async ({ req, files, entity_id }) => {
|
|||
}
|
||||
|
||||
const formattedResults = validResults
|
||||
.flatMap((result) =>
|
||||
.flatMap((result, fileIndex) =>
|
||||
result.data.map(([docInfo, distance]) => ({
|
||||
filename: docInfo.metadata.source.split('/').pop(),
|
||||
content: docInfo.page_content,
|
||||
distance,
|
||||
file_id: files[fileIndex]?.file_id,
|
||||
page: docInfo.metadata.page || null,
|
||||
})),
|
||||
)
|
||||
// TODO: results should be sorted by relevance, not distance
|
||||
|
|
@ -128,18 +130,34 @@ const createFileSearchTool = async ({ req, files, entity_id }) => {
|
|||
|
||||
const formattedString = formattedResults
|
||||
.map(
|
||||
(result) =>
|
||||
`File: ${result.filename}\nRelevance: ${1.0 - result.distance.toFixed(4)}\nContent: ${
|
||||
(result, index) =>
|
||||
`File: ${result.filename}\nAnchor: \\ue202turn0file${index} (${result.filename})\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nContent: ${
|
||||
result.content
|
||||
}\n`,
|
||||
)
|
||||
.join('\n---\n');
|
||||
|
||||
return formattedString;
|
||||
// Add hidden file_id data for processAgentResponse parsing
|
||||
const internalData = formattedResults
|
||||
.map(
|
||||
(result) =>
|
||||
`File: ${result.filename}\nFile_ID: ${result.file_id}\nRelevance: ${(1.0 - result.distance).toFixed(4)}\nPage: ${result.page || 'N/A'}\nContent: ${result.content}\n`,
|
||||
)
|
||||
.join('\n---\n');
|
||||
|
||||
return `${formattedString}\n\n<!-- INTERNAL_DATA_START -->\n${internalData}\n<!-- INTERNAL_DATA_END -->`;
|
||||
},
|
||||
{
|
||||
name: Tools.file_search,
|
||||
description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents.`,
|
||||
description: `Performs semantic search across attached "${Tools.file_search}" documents using natural language queries. This tool analyzes the content of uploaded files to find relevant information, quotes, and passages that best match your query. Use this to extract specific information or find relevant sections within the available documents.
|
||||
|
||||
**CITE FILE SEARCH RESULTS:**
|
||||
Use anchor markers immediately after statements derived from file content. Reference the filename in your text:
|
||||
- File citation: "The document.pdf states that... \\ue202turn0file0"
|
||||
- Page reference: "According to report.docx... \\ue202turn0file1"
|
||||
- Multi-file: "Multiple sources confirm... \\ue200\\ue202turn0file0\\ue202turn0file1\\ue201"
|
||||
|
||||
**ALWAYS mention the filename in your text before the citation marker. NEVER use markdown links or footnotes.**`,
|
||||
schema: z.object({
|
||||
query: z
|
||||
.string()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue