mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-02-22 18:34:08 +01:00
🪣 fix: Proper Key Extraction from S3 URL (#11241)
* ✨ feat: Enhance S3 URL handling and add comprehensive tests for CRUD operations * 🔒 fix: Improve S3 URL key extraction with enhanced logging and additional test cases * chore: removed some duplicate testcases and fixed incorrect apostrophes * fix: Log error for malformed URLs * test: Add additional test case for extracting keys from S3 URLs * fix: Enhance S3 URL key extraction logic and improve error handling with additional test cases * test: Add test case for stripping bucket from custom endpoint URLs with forcePathStyle enabled * refactor: Update S3 path style handling and enhance environment configuration for S3-compatible services * refactor: Remove S3_FORCE_PATH_STYLE dependency and streamline S3 URL key extraction logic --------- Co-authored-by: Danny Avila <danny@librechat.ai>
This commit is contained in:
parent
59717f5f50
commit
5d2b7fa4d5
2 changed files with 896 additions and 2 deletions
|
|
@ -252,15 +252,63 @@ function extractKeyFromS3Url(fileUrlOrKey) {
|
|||
|
||||
try {
|
||||
const url = new URL(fileUrlOrKey);
|
||||
return url.pathname.substring(1);
|
||||
const hostname = url.hostname;
|
||||
const pathname = url.pathname.substring(1); // Remove leading slash
|
||||
|
||||
if (
|
||||
hostname === 's3.amazonaws.com' ||
|
||||
hostname.match(/^s3[-.][a-z0-9-]+\.amazonaws\.com$/) ||
|
||||
(bucketName && pathname.startsWith(`${bucketName}/`))
|
||||
) {
|
||||
// Path-style: https://s3.amazonaws.com/bucket-name/key or custom endpoint (MinIO, R2, etc.)
|
||||
// Strip the bucket name (first path segment)
|
||||
const firstSlashIndex = pathname.indexOf('/');
|
||||
if (firstSlashIndex > 0) {
|
||||
const key = pathname.substring(firstSlashIndex + 1);
|
||||
|
||||
if (key === '') {
|
||||
logger.warn(
|
||||
`[extractKeyFromS3Url] Extracted key is empty after removing bucket name from URL: ${fileUrlOrKey}`,
|
||||
);
|
||||
} else {
|
||||
logger.debug(
|
||||
`[extractKeyFromS3Url] fileUrlOrKey: ${fileUrlOrKey}, Extracted key: ${key}`,
|
||||
);
|
||||
}
|
||||
|
||||
return key;
|
||||
} else {
|
||||
logger.warn(
|
||||
`[extractKeyFromS3Url] Unable to extract key from path-style URL: ${fileUrlOrKey}`,
|
||||
);
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
// Virtual-hosted-style or other: https://bucket-name.s3.amazonaws.com/key
|
||||
// Just return the pathname without leading slash
|
||||
logger.debug(`[extractKeyFromS3Url] fileUrlOrKey: ${fileUrlOrKey}, Extracted key: ${pathname}`);
|
||||
return pathname;
|
||||
} catch (error) {
|
||||
if (fileUrlOrKey.startsWith('http://') || fileUrlOrKey.startsWith('https://')) {
|
||||
logger.error(
|
||||
`[extractKeyFromS3Url] Error parsing URL: ${fileUrlOrKey}, Error: ${error.message}`,
|
||||
);
|
||||
} else {
|
||||
logger.debug(`[extractKeyFromS3Url] Non-URL input, using fallback: ${fileUrlOrKey}`);
|
||||
}
|
||||
|
||||
const parts = fileUrlOrKey.split('/');
|
||||
|
||||
if (parts.length >= 3 && !fileUrlOrKey.startsWith('http') && !fileUrlOrKey.startsWith('/')) {
|
||||
return fileUrlOrKey;
|
||||
}
|
||||
|
||||
return fileUrlOrKey.startsWith('/') ? fileUrlOrKey.substring(1) : fileUrlOrKey;
|
||||
const key = fileUrlOrKey.startsWith('/') ? fileUrlOrKey.substring(1) : fileUrlOrKey;
|
||||
logger.debug(
|
||||
`[extractKeyFromS3Url] FALLBACK. fileUrlOrKey: ${fileUrlOrKey}, Extracted key: ${key}`,
|
||||
);
|
||||
return key;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -482,4 +530,5 @@ module.exports = {
|
|||
refreshS3Url,
|
||||
needsRefresh,
|
||||
getNewS3URL,
|
||||
extractKeyFromS3Url,
|
||||
};
|
||||
|
|
|
|||
845
api/test/services/Files/S3/crud.test.js
Normal file
845
api/test/services/Files/S3/crud.test.js
Normal file
|
|
@ -0,0 +1,845 @@
|
|||
const fs = require('fs');
|
||||
const fetch = require('node-fetch');
|
||||
const { Readable } = require('stream');
|
||||
const { FileSources } = require('librechat-data-provider');
|
||||
const {
|
||||
PutObjectCommand,
|
||||
GetObjectCommand,
|
||||
HeadObjectCommand,
|
||||
DeleteObjectCommand,
|
||||
} = require('@aws-sdk/client-s3');
|
||||
const { getSignedUrl } = require('@aws-sdk/s3-request-presigner');
|
||||
|
||||
// Mock dependencies
|
||||
jest.mock('fs');
|
||||
jest.mock('node-fetch');
|
||||
jest.mock('@aws-sdk/s3-request-presigner');
|
||||
jest.mock('@aws-sdk/client-s3');
|
||||
|
||||
jest.mock('@librechat/api', () => ({
|
||||
initializeS3: jest.fn(),
|
||||
deleteRagFile: jest.fn().mockResolvedValue(undefined),
|
||||
}));
|
||||
|
||||
jest.mock('@librechat/data-schemas', () => ({
|
||||
logger: {
|
||||
debug: jest.fn(),
|
||||
info: jest.fn(),
|
||||
warn: jest.fn(),
|
||||
error: jest.fn(),
|
||||
},
|
||||
}));
|
||||
|
||||
const { initializeS3, deleteRagFile } = require('@librechat/api');
|
||||
const { logger } = require('@librechat/data-schemas');
|
||||
|
||||
// Set env vars before requiring crud so module-level constants pick them up
|
||||
process.env.AWS_BUCKET_NAME = 'test-bucket';
|
||||
process.env.S3_URL_EXPIRY_SECONDS = '120';
|
||||
|
||||
const {
|
||||
saveBufferToS3,
|
||||
saveURLToS3,
|
||||
getS3URL,
|
||||
deleteFileFromS3,
|
||||
uploadFileToS3,
|
||||
getS3FileStream,
|
||||
refreshS3FileUrls,
|
||||
refreshS3Url,
|
||||
needsRefresh,
|
||||
getNewS3URL,
|
||||
extractKeyFromS3Url,
|
||||
} = require('~/server/services/Files/S3/crud');
|
||||
|
||||
describe('S3 CRUD Operations', () => {
|
||||
let mockS3Client;
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
|
||||
// Setup mock S3 client
|
||||
mockS3Client = {
|
||||
send: jest.fn(),
|
||||
};
|
||||
initializeS3.mockReturnValue(mockS3Client);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
delete process.env.S3_URL_EXPIRY_SECONDS;
|
||||
delete process.env.S3_REFRESH_EXPIRY_MS;
|
||||
delete process.env.AWS_BUCKET_NAME;
|
||||
});
|
||||
|
||||
describe('saveBufferToS3', () => {
|
||||
it('should upload a buffer to S3 and return a signed URL', async () => {
|
||||
const mockBuffer = Buffer.from('test data');
|
||||
const mockSignedUrl =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/test.jpg?signature=abc';
|
||||
|
||||
mockS3Client.send.mockResolvedValue({});
|
||||
getSignedUrl.mockResolvedValue(mockSignedUrl);
|
||||
|
||||
const result = await saveBufferToS3({
|
||||
userId: 'user123',
|
||||
buffer: mockBuffer,
|
||||
fileName: 'test.jpg',
|
||||
basePath: 'images',
|
||||
});
|
||||
|
||||
expect(mockS3Client.send).toHaveBeenCalledWith(expect.any(PutObjectCommand));
|
||||
expect(result).toBe(mockSignedUrl);
|
||||
});
|
||||
|
||||
it('should use default basePath if not provided', async () => {
|
||||
const mockBuffer = Buffer.from('test data');
|
||||
const mockSignedUrl =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/test.jpg?signature=abc';
|
||||
|
||||
mockS3Client.send.mockResolvedValue({});
|
||||
getSignedUrl.mockResolvedValue(mockSignedUrl);
|
||||
|
||||
await saveBufferToS3({
|
||||
userId: 'user123',
|
||||
buffer: mockBuffer,
|
||||
fileName: 'test.jpg',
|
||||
});
|
||||
|
||||
expect(getSignedUrl).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle S3 upload errors', async () => {
|
||||
const mockBuffer = Buffer.from('test data');
|
||||
const error = new Error('S3 upload failed');
|
||||
|
||||
mockS3Client.send.mockRejectedValue(error);
|
||||
|
||||
await expect(
|
||||
saveBufferToS3({
|
||||
userId: 'user123',
|
||||
buffer: mockBuffer,
|
||||
fileName: 'test.jpg',
|
||||
}),
|
||||
).rejects.toThrow('S3 upload failed');
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
'[saveBufferToS3] Error uploading buffer to S3:',
|
||||
'S3 upload failed',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getS3URL', () => {
|
||||
it('should return a signed URL for a file', async () => {
|
||||
const mockSignedUrl =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/file.pdf?signature=xyz';
|
||||
getSignedUrl.mockResolvedValue(mockSignedUrl);
|
||||
|
||||
const result = await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'file.pdf',
|
||||
basePath: 'documents',
|
||||
});
|
||||
|
||||
expect(result).toBe(mockSignedUrl);
|
||||
expect(getSignedUrl).toHaveBeenCalledWith(
|
||||
mockS3Client,
|
||||
expect.any(GetObjectCommand),
|
||||
expect.objectContaining({ expiresIn: 120 }),
|
||||
);
|
||||
});
|
||||
|
||||
it('should add custom filename to Content-Disposition header', async () => {
|
||||
const mockSignedUrl =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/file.pdf?signature=xyz';
|
||||
getSignedUrl.mockResolvedValue(mockSignedUrl);
|
||||
|
||||
await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'file.pdf',
|
||||
customFilename: 'custom-name.pdf',
|
||||
});
|
||||
|
||||
expect(getSignedUrl).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should add custom content type', async () => {
|
||||
const mockSignedUrl =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/file.pdf?signature=xyz';
|
||||
getSignedUrl.mockResolvedValue(mockSignedUrl);
|
||||
|
||||
await getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'file.pdf',
|
||||
contentType: 'application/pdf',
|
||||
});
|
||||
|
||||
expect(getSignedUrl).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle errors when getting signed URL', async () => {
|
||||
const error = new Error('Failed to sign URL');
|
||||
getSignedUrl.mockRejectedValue(error);
|
||||
|
||||
await expect(
|
||||
getS3URL({
|
||||
userId: 'user123',
|
||||
fileName: 'file.pdf',
|
||||
}),
|
||||
).rejects.toThrow('Failed to sign URL');
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
'[getS3URL] Error getting signed URL from S3:',
|
||||
'Failed to sign URL',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('saveURLToS3', () => {
|
||||
it('should fetch a file from URL and save to S3', async () => {
|
||||
const mockBuffer = Buffer.from('downloaded data');
|
||||
const mockResponse = {
|
||||
buffer: jest.fn().mockResolvedValue(mockBuffer),
|
||||
};
|
||||
const mockSignedUrl =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/downloaded.jpg?signature=abc';
|
||||
|
||||
fetch.mockResolvedValue(mockResponse);
|
||||
mockS3Client.send.mockResolvedValue({});
|
||||
getSignedUrl.mockResolvedValue(mockSignedUrl);
|
||||
|
||||
const result = await saveURLToS3({
|
||||
userId: 'user123',
|
||||
URL: 'https://example.com/image.jpg',
|
||||
fileName: 'downloaded.jpg',
|
||||
});
|
||||
|
||||
expect(fetch).toHaveBeenCalledWith('https://example.com/image.jpg');
|
||||
expect(mockS3Client.send).toHaveBeenCalled();
|
||||
expect(result).toBe(mockSignedUrl);
|
||||
});
|
||||
|
||||
it('should handle fetch errors', async () => {
|
||||
const error = new Error('Network error');
|
||||
fetch.mockRejectedValue(error);
|
||||
|
||||
await expect(
|
||||
saveURLToS3({
|
||||
userId: 'user123',
|
||||
URL: 'https://example.com/image.jpg',
|
||||
fileName: 'downloaded.jpg',
|
||||
}),
|
||||
).rejects.toThrow('Network error');
|
||||
|
||||
expect(logger.error).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('deleteFileFromS3', () => {
|
||||
const mockReq = {
|
||||
user: { id: 'user123' },
|
||||
};
|
||||
|
||||
it('should delete a file from S3', async () => {
|
||||
const mockFile = {
|
||||
filepath: 'https://s3.amazonaws.com/test-bucket/images/user123/file.jpg',
|
||||
file_id: 'file123',
|
||||
};
|
||||
|
||||
// Mock HeadObject to verify file exists
|
||||
mockS3Client.send
|
||||
.mockResolvedValueOnce({}) // First HeadObject - exists
|
||||
.mockResolvedValueOnce({}) // DeleteObject
|
||||
.mockRejectedValueOnce({ name: 'NotFound' }); // Second HeadObject - deleted
|
||||
|
||||
await deleteFileFromS3(mockReq, mockFile);
|
||||
|
||||
expect(deleteRagFile).toHaveBeenCalledWith({ userId: 'user123', file: mockFile });
|
||||
expect(mockS3Client.send).toHaveBeenCalledWith(expect.any(HeadObjectCommand));
|
||||
expect(mockS3Client.send).toHaveBeenCalledWith(expect.any(DeleteObjectCommand));
|
||||
});
|
||||
|
||||
it('should handle file not found gracefully', async () => {
|
||||
const mockFile = {
|
||||
filepath: 'https://s3.amazonaws.com/test-bucket/images/user123/nonexistent.jpg',
|
||||
file_id: 'file123',
|
||||
};
|
||||
|
||||
mockS3Client.send.mockRejectedValue({ name: 'NotFound' });
|
||||
|
||||
await deleteFileFromS3(mockReq, mockFile);
|
||||
|
||||
expect(logger.warn).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should throw error if user ID does not match', async () => {
|
||||
const mockFile = {
|
||||
filepath: 'https://s3.amazonaws.com/test-bucket/images/different-user/file.jpg',
|
||||
file_id: 'file123',
|
||||
};
|
||||
|
||||
await expect(deleteFileFromS3(mockReq, mockFile)).rejects.toThrow('User ID mismatch');
|
||||
expect(logger.error).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle NoSuchKey error', async () => {
|
||||
const mockFile = {
|
||||
filepath: 'https://s3.amazonaws.com/test-bucket/images/user123/file.jpg',
|
||||
file_id: 'file123',
|
||||
};
|
||||
|
||||
mockS3Client.send
|
||||
.mockResolvedValueOnce({}) // HeadObject - exists
|
||||
.mockRejectedValueOnce({ code: 'NoSuchKey' }); // DeleteObject fails
|
||||
|
||||
await deleteFileFromS3(mockReq, mockFile);
|
||||
|
||||
expect(logger.debug).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('uploadFileToS3', () => {
|
||||
const mockReq = {
|
||||
user: { id: 'user123' },
|
||||
};
|
||||
|
||||
it('should upload a file from disk to S3', async () => {
|
||||
const mockFile = {
|
||||
path: '/tmp/upload.jpg',
|
||||
originalname: 'photo.jpg',
|
||||
};
|
||||
const mockStats = { size: 1024 };
|
||||
const mockSignedUrl =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/file123__photo.jpg?signature=xyz';
|
||||
|
||||
fs.promises = { stat: jest.fn().mockResolvedValue(mockStats) };
|
||||
fs.createReadStream = jest.fn().mockReturnValue(new Readable());
|
||||
mockS3Client.send.mockResolvedValue({});
|
||||
getSignedUrl.mockResolvedValue(mockSignedUrl);
|
||||
|
||||
const result = await uploadFileToS3({
|
||||
req: mockReq,
|
||||
file: mockFile,
|
||||
file_id: 'file123',
|
||||
basePath: 'images',
|
||||
});
|
||||
|
||||
expect(result).toEqual({
|
||||
filepath: mockSignedUrl,
|
||||
bytes: 1024,
|
||||
});
|
||||
expect(fs.createReadStream).toHaveBeenCalledWith('/tmp/upload.jpg');
|
||||
expect(mockS3Client.send).toHaveBeenCalledWith(expect.any(PutObjectCommand));
|
||||
});
|
||||
|
||||
it('should handle upload errors and clean up temp file', async () => {
|
||||
const mockFile = {
|
||||
path: '/tmp/upload.jpg',
|
||||
originalname: 'photo.jpg',
|
||||
};
|
||||
const error = new Error('Upload failed');
|
||||
|
||||
fs.promises = {
|
||||
stat: jest.fn().mockResolvedValue({ size: 1024 }),
|
||||
unlink: jest.fn().mockResolvedValue(),
|
||||
};
|
||||
fs.createReadStream = jest.fn().mockReturnValue(new Readable());
|
||||
mockS3Client.send.mockRejectedValue(error);
|
||||
|
||||
await expect(
|
||||
uploadFileToS3({
|
||||
req: mockReq,
|
||||
file: mockFile,
|
||||
file_id: 'file123',
|
||||
}),
|
||||
).rejects.toThrow('Upload failed');
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
'[uploadFileToS3] Error streaming file to S3:',
|
||||
error,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getS3FileStream', () => {
|
||||
it('should return a readable stream for a file', async () => {
|
||||
const mockStream = new Readable();
|
||||
const mockResponse = { Body: mockStream };
|
||||
|
||||
mockS3Client.send.mockResolvedValue(mockResponse);
|
||||
|
||||
const result = await getS3FileStream(
|
||||
{},
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/file.pdf',
|
||||
);
|
||||
|
||||
expect(result).toBe(mockStream);
|
||||
expect(mockS3Client.send).toHaveBeenCalledWith(expect.any(GetObjectCommand));
|
||||
});
|
||||
|
||||
it('should handle errors when retrieving stream', async () => {
|
||||
const error = new Error('Stream error');
|
||||
mockS3Client.send.mockRejectedValue(error);
|
||||
|
||||
await expect(getS3FileStream({}, 'images/user123/file.pdf')).rejects.toThrow('Stream error');
|
||||
expect(logger.error).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('needsRefresh', () => {
|
||||
it('should return false for non-signed URLs', () => {
|
||||
const url = 'https://example.com/proxy/file.jpg';
|
||||
const result = needsRefresh(url, 3600);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true for expired signed URLs', () => {
|
||||
const now = new Date();
|
||||
const past = new Date(now.getTime() - 3600 * 1000); // 1 hour ago
|
||||
const dateStr = past
|
||||
.toISOString()
|
||||
.replace(/[-:]/g, '')
|
||||
.replace(/\.\d{3}/, '');
|
||||
|
||||
const url = `https://s3.amazonaws.com/bucket/key?X-Amz-Signature=abc&X-Amz-Date=${dateStr}&X-Amz-Expires=60`;
|
||||
const result = needsRefresh(url, 60);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false for URLs that are not close to expiration', () => {
|
||||
const now = new Date();
|
||||
const recent = new Date(now.getTime() - 10 * 1000); // 10 seconds ago
|
||||
const dateStr = recent
|
||||
.toISOString()
|
||||
.replace(/[-:]/g, '')
|
||||
.replace(/\.\d{3}/, '');
|
||||
|
||||
const url = `https://s3.amazonaws.com/bucket/key?X-Amz-Signature=abc&X-Amz-Date=${dateStr}&X-Amz-Expires=7200`;
|
||||
const result = needsRefresh(url, 60);
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
|
||||
it('should use custom refresh expiry when S3_REFRESH_EXPIRY_MS is set', () => {
|
||||
process.env.S3_REFRESH_EXPIRY_MS = '30000'; // 30 seconds
|
||||
|
||||
const now = new Date();
|
||||
const recent = new Date(now.getTime() - 31 * 1000); // 31 seconds ago
|
||||
const dateStr = recent
|
||||
.toISOString()
|
||||
.replace(/[-:]/g, '')
|
||||
.replace(/\.\d{3}/, '');
|
||||
|
||||
const url = `https://s3.amazonaws.com/bucket/key?X-Amz-Signature=abc&X-Amz-Date=${dateStr}&X-Amz-Expires=7200`;
|
||||
|
||||
// Need to reload the module to pick up the env var change
|
||||
jest.resetModules();
|
||||
const { needsRefresh: needsRefreshReloaded } = require('~/server/services/Files/S3/crud');
|
||||
|
||||
const result = needsRefreshReloaded(url, 60);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
|
||||
it('should return true for malformed URLs', () => {
|
||||
const url = 'not-a-valid-url';
|
||||
const result = needsRefresh(url, 3600);
|
||||
expect(result).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getNewS3URL', () => {
|
||||
it('should generate a new URL from an existing S3 URL', async () => {
|
||||
const currentURL =
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/file.jpg?signature=old';
|
||||
const newURL = 'https://s3.amazonaws.com/test-bucket/images/user123/file.jpg?signature=new';
|
||||
|
||||
getSignedUrl.mockResolvedValue(newURL);
|
||||
|
||||
const result = await getNewS3URL(currentURL);
|
||||
|
||||
expect(result).toBe(newURL);
|
||||
expect(getSignedUrl).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return undefined for invalid URLs', async () => {
|
||||
const result = await getNewS3URL('invalid-url');
|
||||
expect(result).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should handle errors gracefully', async () => {
|
||||
const currentURL = 'https://s3.amazonaws.com/test-bucket/images/user123/file.jpg';
|
||||
getSignedUrl.mockRejectedValue(new Error('Failed'));
|
||||
|
||||
const result = await getNewS3URL(currentURL);
|
||||
|
||||
expect(result).toBeUndefined();
|
||||
expect(logger.error).toHaveBeenCalledWith('Error getting new S3 URL:', expect.any(Error));
|
||||
});
|
||||
|
||||
it('should construct GetObjectCommand with correct key (no bucket name duplication)', async () => {
|
||||
const currentURL =
|
||||
'https://s3.amazonaws.com/my-bucket/images/user123/file.jpg?X-Amz-Signature=old';
|
||||
getSignedUrl.mockResolvedValue(
|
||||
'https://s3.amazonaws.com/test-bucket/images/user123/file.jpg?signature=new',
|
||||
);
|
||||
|
||||
await getNewS3URL(currentURL);
|
||||
|
||||
expect(GetObjectCommand).toHaveBeenCalledWith(
|
||||
expect.objectContaining({ Key: 'images/user123/file.jpg' }),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('refreshS3FileUrls', () => {
|
||||
it('should refresh expired URLs for multiple files', async () => {
|
||||
const now = new Date();
|
||||
const past = new Date(now.getTime() - 3600 * 1000);
|
||||
const dateStr = past
|
||||
.toISOString()
|
||||
.replace(/[-:]/g, '')
|
||||
.replace(/\.\d{3}/, '');
|
||||
|
||||
const files = [
|
||||
{
|
||||
file_id: 'file1',
|
||||
source: FileSources.s3,
|
||||
filepath: `https://s3.amazonaws.com/bucket/images/user123/file1.jpg?X-Amz-Signature=abc&X-Amz-Date=${dateStr}&X-Amz-Expires=60`,
|
||||
},
|
||||
{
|
||||
file_id: 'file2',
|
||||
source: FileSources.s3,
|
||||
filepath: `https://s3.amazonaws.com/bucket/images/user123/file2.jpg?X-Amz-Signature=def&X-Amz-Date=${dateStr}&X-Amz-Expires=60`,
|
||||
},
|
||||
];
|
||||
|
||||
const newURL1 = 'https://s3.amazonaws.com/bucket/images/user123/file1.jpg?signature=new1';
|
||||
const newURL2 = 'https://s3.amazonaws.com/bucket/images/user123/file2.jpg?signature=new2';
|
||||
|
||||
getSignedUrl.mockResolvedValueOnce(newURL1).mockResolvedValueOnce(newURL2);
|
||||
|
||||
const mockBatchUpdate = jest.fn().mockResolvedValue();
|
||||
|
||||
const result = await refreshS3FileUrls(files, mockBatchUpdate, 60);
|
||||
|
||||
expect(result[0].filepath).toBe(newURL1);
|
||||
expect(result[1].filepath).toBe(newURL2);
|
||||
expect(mockBatchUpdate).toHaveBeenCalledWith([
|
||||
{ file_id: 'file1', filepath: newURL1 },
|
||||
{ file_id: 'file2', filepath: newURL2 },
|
||||
]);
|
||||
});
|
||||
|
||||
it('should skip non-S3 files', async () => {
|
||||
const files = [
|
||||
{
|
||||
file_id: 'file1',
|
||||
source: 'local',
|
||||
filepath: '/local/path/file.jpg',
|
||||
},
|
||||
];
|
||||
|
||||
const mockBatchUpdate = jest.fn();
|
||||
|
||||
const result = await refreshS3FileUrls(files, mockBatchUpdate);
|
||||
|
||||
expect(result).toEqual(files);
|
||||
expect(mockBatchUpdate).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle empty or invalid input', async () => {
|
||||
const mockBatchUpdate = jest.fn();
|
||||
|
||||
const result1 = await refreshS3FileUrls(null, mockBatchUpdate);
|
||||
expect(result1).toBe(null);
|
||||
|
||||
const result2 = await refreshS3FileUrls([], mockBatchUpdate);
|
||||
expect(result2).toEqual([]);
|
||||
|
||||
expect(mockBatchUpdate).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should handle errors for individual files gracefully', async () => {
|
||||
const now = new Date();
|
||||
const past = new Date(now.getTime() - 3600 * 1000);
|
||||
const dateStr = past
|
||||
.toISOString()
|
||||
.replace(/[-:]/g, '')
|
||||
.replace(/\.\d{3}/, '');
|
||||
|
||||
const files = [
|
||||
{
|
||||
file_id: 'file1',
|
||||
source: FileSources.s3,
|
||||
filepath: `https://s3.amazonaws.com/bucket/images/user123/file1.jpg?X-Amz-Signature=abc&X-Amz-Date=${dateStr}&X-Amz-Expires=60`,
|
||||
},
|
||||
];
|
||||
|
||||
getSignedUrl.mockRejectedValue(new Error('Failed to refresh'));
|
||||
const mockBatchUpdate = jest.fn();
|
||||
|
||||
await refreshS3FileUrls(files, mockBatchUpdate, 60);
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith('Error getting new S3 URL:', expect.any(Error));
|
||||
expect(mockBatchUpdate).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('refreshS3Url', () => {
|
||||
it('should refresh an expired S3 URL', async () => {
|
||||
const now = new Date();
|
||||
const past = new Date(now.getTime() - 3600 * 1000);
|
||||
const dateStr = past
|
||||
.toISOString()
|
||||
.replace(/[-:]/g, '')
|
||||
.replace(/\.\d{3}/, '');
|
||||
|
||||
const fileObj = {
|
||||
source: FileSources.s3,
|
||||
filepath: `https://s3.amazonaws.com/bucket/images/user123/file.jpg?X-Amz-Signature=abc&X-Amz-Date=${dateStr}&X-Amz-Expires=60`,
|
||||
};
|
||||
|
||||
const newURL = 'https://s3.amazonaws.com/bucket/images/user123/file.jpg?signature=new';
|
||||
getSignedUrl.mockResolvedValue(newURL);
|
||||
|
||||
const result = await refreshS3Url(fileObj, 60);
|
||||
|
||||
expect(result).toBe(newURL);
|
||||
});
|
||||
|
||||
it('should return original URL if not expired', async () => {
|
||||
const fileObj = {
|
||||
source: FileSources.s3,
|
||||
filepath: 'https://example.com/proxy/file.jpg',
|
||||
};
|
||||
|
||||
const result = await refreshS3Url(fileObj, 3600);
|
||||
|
||||
expect(result).toBe(fileObj.filepath);
|
||||
expect(getSignedUrl).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should return empty string for null input', async () => {
|
||||
const result = await refreshS3Url(null);
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
it('should return original URL for non-S3 files', async () => {
|
||||
const fileObj = {
|
||||
source: 'local',
|
||||
filepath: '/local/path/file.jpg',
|
||||
};
|
||||
|
||||
const result = await refreshS3Url(fileObj);
|
||||
|
||||
expect(result).toBe(fileObj.filepath);
|
||||
});
|
||||
|
||||
it('should handle errors and return original URL', async () => {
|
||||
const now = new Date();
|
||||
const past = new Date(now.getTime() - 3600 * 1000);
|
||||
const dateStr = past
|
||||
.toISOString()
|
||||
.replace(/[-:]/g, '')
|
||||
.replace(/\.\d{3}/, '');
|
||||
|
||||
const fileObj = {
|
||||
source: FileSources.s3,
|
||||
filepath: `https://s3.amazonaws.com/bucket/images/user123/file.jpg?X-Amz-Signature=abc&X-Amz-Date=${dateStr}&X-Amz-Expires=60`,
|
||||
};
|
||||
|
||||
getSignedUrl.mockRejectedValue(new Error('Refresh failed'));
|
||||
|
||||
const result = await refreshS3Url(fileObj, 60);
|
||||
|
||||
expect(result).toBe(fileObj.filepath);
|
||||
expect(logger.error).toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('extractKeyFromS3Url', () => {
|
||||
it('should extract key from a full S3 URL', () => {
|
||||
const url = 'https://s3.amazonaws.com/test-bucket/images/user123/file.jpg';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('images/user123/file.jpg');
|
||||
});
|
||||
|
||||
it('should extract key from a signed S3 URL with query parameters', () => {
|
||||
const url =
|
||||
'https://s3.amazonaws.com/test-bucket/documents/user456/report.pdf?X-Amz-Signature=abc123&X-Amz-Date=20260107';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('documents/user456/report.pdf');
|
||||
});
|
||||
|
||||
it('should extract key from S3 URL with different domain format', () => {
|
||||
const url = 'https://test-bucket.s3.amazonaws.com/uploads/user789/image.png';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('uploads/user789/image.png');
|
||||
});
|
||||
|
||||
it('should return key as-is if already properly formatted (3+ parts, no http)', () => {
|
||||
const key = 'images/user123/file.jpg';
|
||||
const result = extractKeyFromS3Url(key);
|
||||
expect(result).toBe('images/user123/file.jpg');
|
||||
});
|
||||
|
||||
it('should handle key with leading slash by removing it', () => {
|
||||
const key = '/images/user123/file.jpg';
|
||||
const result = extractKeyFromS3Url(key);
|
||||
expect(result).toBe('images/user123/file.jpg');
|
||||
});
|
||||
|
||||
it('should handle simple key without slashes', () => {
|
||||
const key = 'simple-file.txt';
|
||||
const result = extractKeyFromS3Url(key);
|
||||
expect(result).toBe('simple-file.txt');
|
||||
});
|
||||
|
||||
it('should handle key with only two parts', () => {
|
||||
const key = 'folder/file.txt';
|
||||
const result = extractKeyFromS3Url(key);
|
||||
expect(result).toBe('folder/file.txt');
|
||||
});
|
||||
|
||||
it('should throw error for empty input', () => {
|
||||
expect(() => extractKeyFromS3Url('')).toThrow('Invalid input: URL or key is empty');
|
||||
});
|
||||
|
||||
it('should throw error for null input', () => {
|
||||
expect(() => extractKeyFromS3Url(null)).toThrow('Invalid input: URL or key is empty');
|
||||
});
|
||||
|
||||
it('should throw error for undefined input', () => {
|
||||
expect(() => extractKeyFromS3Url(undefined)).toThrow('Invalid input: URL or key is empty');
|
||||
});
|
||||
|
||||
it('should handle URLs with encoded characters', () => {
|
||||
const url = 'https://s3.amazonaws.com/test-bucket/images/user123/my%20file%20name.jpg';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('images/user123/my%20file%20name.jpg');
|
||||
});
|
||||
|
||||
it('should handle deep nested paths', () => {
|
||||
const url = 'https://s3.amazonaws.com/bucket/a/b/c/d/e/f/file.jpg';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('a/b/c/d/e/f/file.jpg');
|
||||
});
|
||||
|
||||
it('should log debug message when extracting from URL', () => {
|
||||
const url = 'https://s3.amazonaws.com/bucket/images/user123/file.jpg';
|
||||
extractKeyFromS3Url(url);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
expect.stringContaining('[extractKeyFromS3Url] fileUrlOrKey:'),
|
||||
);
|
||||
});
|
||||
|
||||
it('should log fallback debug message for non-URL input', () => {
|
||||
const key = 'simple-file.txt';
|
||||
extractKeyFromS3Url(key);
|
||||
expect(logger.debug).toHaveBeenCalledWith(
|
||||
expect.stringContaining('[extractKeyFromS3Url] FALLBACK'),
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle valid URLs that contain only a bucket', () => {
|
||||
const url = 'https://s3.amazonaws.com/test-bucket/';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining(
|
||||
'[extractKeyFromS3Url] Extracted key is empty after removing bucket name from URL: https://s3.amazonaws.com/test-bucket/',
|
||||
),
|
||||
);
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
it('should handle invalid URLs that contain only a bucket', () => {
|
||||
const url = 'https://s3.amazonaws.com/test-bucket';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining(
|
||||
'[extractKeyFromS3Url] Unable to extract key from path-style URL: https://s3.amazonaws.com/test-bucket',
|
||||
),
|
||||
);
|
||||
expect(result).toBe('');
|
||||
});
|
||||
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html
|
||||
|
||||
// Path-style requests
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#path-style-access
|
||||
// https://s3.region-code.amazonaws.com/bucket-name/key-name
|
||||
it('should handle formatted according to Path-style regional endpoint', () => {
|
||||
const url = 'https://s3.us-west-2.amazonaws.com/amzn-s3-demo-bucket1/dogs/puppy.jpg';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('dogs/puppy.jpg');
|
||||
});
|
||||
|
||||
// virtual host style
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#virtual-hosted-style-access
|
||||
// https://bucket-name.s3.region-code.amazonaws.com/key-name
|
||||
it('should handle formatted according to Virtual-hosted–style Regional endpoint', () => {
|
||||
const url = 'https://amzn-s3-demo-bucket1.s3.us-west-2.amazonaws.com/dogs/puppy.png';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('dogs/puppy.png');
|
||||
});
|
||||
|
||||
// Legacy endpoints
|
||||
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html#VirtualHostingBackwardsCompatibility
|
||||
|
||||
// s3‐Region
|
||||
// https://bucket-name.s3-region-code.amazonaws.com
|
||||
it('should handle formatted according to s3‐Region', () => {
|
||||
const url = 'https://amzn-s3-demo-bucket1.s3-us-west-2.amazonaws.com/puppy.png';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('puppy.png');
|
||||
|
||||
const testcase2 = 'https://amzn-s3-demo-bucket1.s3-us-west-2.amazonaws.com/cats/kitten.png';
|
||||
const result2 = extractKeyFromS3Url(testcase2);
|
||||
expect(result2).toBe('cats/kitten.png');
|
||||
});
|
||||
|
||||
// Legacy global endpoint
|
||||
// bucket-name.s3.amazonaws.com
|
||||
it('should handle formatted according to Legacy global endpoint', () => {
|
||||
const url = 'https://amzn-s3-demo-bucket1.s3.amazonaws.com/dogs/puppy.png';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('dogs/puppy.png');
|
||||
});
|
||||
|
||||
it('should handle malformed URL and log error', () => {
|
||||
const malformedUrl = 'https://invalid url with spaces.com/key';
|
||||
const result = extractKeyFromS3Url(malformedUrl);
|
||||
|
||||
expect(logger.error).toHaveBeenCalledWith(
|
||||
expect.stringContaining('[extractKeyFromS3Url] Error parsing URL:'),
|
||||
);
|
||||
expect(logger.error).toHaveBeenCalledWith(expect.stringContaining(malformedUrl));
|
||||
|
||||
expect(result).toBe(malformedUrl);
|
||||
});
|
||||
|
||||
it('should return empty string for regional path-style URL with only bucket (no key)', () => {
|
||||
const url = 'https://s3.us-west-2.amazonaws.com/my-bucket';
|
||||
const result = extractKeyFromS3Url(url);
|
||||
expect(result).toBe('');
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
expect.stringContaining('[extractKeyFromS3Url] Unable to extract key from path-style URL:'),
|
||||
);
|
||||
});
|
||||
|
||||
it('should not log error when given a plain S3 key (non-URL input)', () => {
|
||||
extractKeyFromS3Url('images/user123/file.jpg');
|
||||
expect(logger.error).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should strip bucket from custom endpoint URLs (MinIO, R2, etc.) using bucketName', () => {
|
||||
// bucketName is the module-level const 'test-bucket', set before require at top of file
|
||||
expect(
|
||||
extractKeyFromS3Url('https://minio.example.com/test-bucket/images/user123/file.jpg'),
|
||||
).toBe('images/user123/file.jpg');
|
||||
expect(
|
||||
extractKeyFromS3Url(
|
||||
'https://abc123.r2.cloudflarestorage.com/test-bucket/images/user123/avatar.png',
|
||||
),
|
||||
).toBe('images/user123/avatar.png');
|
||||
});
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue