import path from 'path'; import { parseDocument } from './crud'; describe('Document Parser', () => { test('parseDocument() parses text from docx', async () => { const file = { originalname: 'sample.docx', path: path.join(__dirname, 'sample.docx'), mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', } as Express.Multer.File; const document = await parseDocument({ file }); expect(document).toEqual({ bytes: 29, filename: 'sample.docx', filepath: 'document_parser', images: [], text: 'This is a sample DOCX file.\n\n', }); }); test('parseDocument() parses text from xlsx', async () => { const file = { originalname: 'sample.xlsx', path: path.join(__dirname, 'sample.xlsx'), mimetype: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', } as Express.Multer.File; const document = await parseDocument({ file }); expect(document).toEqual({ bytes: 66, filename: 'sample.xlsx', filepath: 'document_parser', images: [], text: 'Sheet One:\nData,on,first,sheet\nSecond Sheet:\nData,On\nSecond,Sheet\n', }); }); test('parseDocument() parses text from xls', async () => { const file = { originalname: 'sample.xls', path: path.join(__dirname, 'sample.xls'), mimetype: 'application/vnd.ms-excel', } as Express.Multer.File; const document = await parseDocument({ file }); expect(document).toEqual({ bytes: 31, filename: 'sample.xls', filepath: 'document_parser', images: [], text: 'Sheet One:\nData,on,first,sheet\n', }); }); test('parseDocument() throws error for unhandled document type', async () => { const file = { originalname: 'nonexistent.file', path: path.join(__dirname, 'nonexistent.file'), mimetype: 'application/invalid', } as Express.Multer.File; await expect(parseDocument({ file })).rejects.toThrow( 'Unsupported file type in document parser: application/invalid', ); }); test('parseDocument() throws error for empty document', async () => { const file = { originalname: 'empty.docx', path: path.join(__dirname, 'empty.docx'), mimetype: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', } as Express.Multer.File; await expect(parseDocument({ file })).rejects.toThrow('No text found in document'); }); });