feat: implement PDF attachment extraction functionality with web worker support

- Added a new worker script to handle extraction of embedded attachments from PDF files.
- Created TypeScript definitions for the message structure and response types.
- Updated the main extraction logic to utilize the worker for improved performance and responsiveness.
- Integrated the extraction feature into the UI, allowing users to extract attachments as a ZIP file.
- Enhanced error handling and user feedback during the extraction process.
This commit is contained in:
abdullahalam123
2025-11-11 11:13:06 +05:30
parent 4125302d01
commit 85ce0c97b5
5 changed files with 253 additions and 87 deletions

View File

@@ -0,0 +1,106 @@
self.importScripts('/coherentpdf.browser.min.js');
function extractAttachmentsFromPDFsInWorker(fileBuffers, fileNames) {
try {
const allAttachments = [];
const totalFiles = fileBuffers.length;
for (let i = 0; i < totalFiles; i++) {
const buffer = fileBuffers[i];
const fileName = fileNames[i];
const uint8Array = new Uint8Array(buffer);
let pdf;
try {
pdf = coherentpdf.fromMemory(uint8Array, '');
} catch (error) {
console.warn(`Failed to load PDF: ${fileName}`, error);
continue;
}
coherentpdf.startGetAttachments(pdf);
const attachmentCount = coherentpdf.numberGetAttachments();
if (attachmentCount === 0) {
console.warn(`No attachments found in ${fileName}`);
coherentpdf.deletePdf(pdf);
continue;
}
const baseName = fileName.replace(/\.pdf$/i, '');
for (let j = 0; j < attachmentCount; j++) {
try {
const attachmentName = coherentpdf.getAttachmentName(j);
const attachmentPage = coherentpdf.getAttachmentPage(j);
const attachmentData = coherentpdf.getAttachmentData(j);
let uniqueName = attachmentName;
let counter = 1;
while (allAttachments.some(att => att.name === uniqueName)) {
const nameParts = attachmentName.split('.');
if (nameParts.length > 1) {
const extension = nameParts.pop();
uniqueName = `${nameParts.join('.')}_${counter}.${extension}`;
} else {
uniqueName = `${attachmentName}_${counter}`;
}
counter++;
}
if (attachmentPage > 0) {
uniqueName = `${baseName}_page${attachmentPage}_${uniqueName}`;
} else {
uniqueName = `${baseName}_${uniqueName}`;
}
allAttachments.push({
name: uniqueName,
data: attachmentData.buffer.slice(0)
});
} catch (error) {
console.warn(`Failed to extract attachment ${j} from ${fileName}:`, error);
}
}
coherentpdf.endGetAttachments();
coherentpdf.deletePdf(pdf);
}
if (allAttachments.length === 0) {
self.postMessage({
status: 'error',
message: 'No attachments were found in the selected PDF(s).'
});
return;
}
const response = {
status: 'success',
attachments: []
};
const transferBuffers = [];
for (const attachment of allAttachments) {
response.attachments.push({
name: attachment.name,
data: attachment.data
});
transferBuffers.push(attachment.data);
}
self.postMessage(response, transferBuffers);
} catch (error) {
self.postMessage({
status: 'error',
message: error instanceof Error
? error.message
: 'Unknown error occurred during attachment extraction.'
});
}
}
self.onmessage = (e) => {
if (e.data.command === 'extract-attachments') {
extractAttachmentsFromPDFsInWorker(e.data.fileBuffers, e.data.fileNames);
}
};