diff --git a/public/workers/extract-attachments.worker.d.ts b/public/workers/extract-attachments.worker.d.ts new file mode 100644 index 0000000..2bf1a2d --- /dev/null +++ b/public/workers/extract-attachments.worker.d.ts @@ -0,0 +1,19 @@ +declare const coherentpdf: typeof import('../../src/types/coherentpdf.global').coherentpdf; + +interface ExtractAttachmentsMessage { + command: 'extract-attachments'; + fileBuffers: ArrayBuffer[]; + fileNames: string[]; +} + +interface ExtractAttachmentSuccessResponse { + status: 'success'; + attachments: Array<{ name: string; data: ArrayBuffer }>; +} + +interface ExtractAttachmentErrorResponse { + status: 'error'; + message: string; +} + +type ExtractAttachmentResponse = ExtractAttachmentSuccessResponse | ExtractAttachmentErrorResponse; \ No newline at end of file diff --git a/public/workers/extract-attachments.worker.js b/public/workers/extract-attachments.worker.js new file mode 100644 index 0000000..0327c3a --- /dev/null +++ b/public/workers/extract-attachments.worker.js @@ -0,0 +1,106 @@ +self.importScripts('/coherentpdf.browser.min.js'); + +function extractAttachmentsFromPDFsInWorker(fileBuffers, fileNames) { + try { + const allAttachments = []; + const totalFiles = fileBuffers.length; + + for (let i = 0; i < totalFiles; i++) { + const buffer = fileBuffers[i]; + const fileName = fileNames[i]; + const uint8Array = new Uint8Array(buffer); + + let pdf; + try { + pdf = coherentpdf.fromMemory(uint8Array, ''); + } catch (error) { + console.warn(`Failed to load PDF: ${fileName}`, error); + continue; + } + + coherentpdf.startGetAttachments(pdf); + const attachmentCount = coherentpdf.numberGetAttachments(); + + if (attachmentCount === 0) { + console.warn(`No attachments found in ${fileName}`); + coherentpdf.deletePdf(pdf); + continue; + } + + const baseName = fileName.replace(/\.pdf$/i, ''); + for (let j = 0; j < attachmentCount; j++) { + try { + const attachmentName = coherentpdf.getAttachmentName(j); + const attachmentPage = coherentpdf.getAttachmentPage(j); + const attachmentData = coherentpdf.getAttachmentData(j); + + let uniqueName = attachmentName; + let counter = 1; + while (allAttachments.some(att => att.name === uniqueName)) { + const nameParts = attachmentName.split('.'); + if (nameParts.length > 1) { + const extension = nameParts.pop(); + uniqueName = `${nameParts.join('.')}_${counter}.${extension}`; + } else { + uniqueName = `${attachmentName}_${counter}`; + } + counter++; + } + + if (attachmentPage > 0) { + uniqueName = `${baseName}_page${attachmentPage}_${uniqueName}`; + } else { + uniqueName = `${baseName}_${uniqueName}`; + } + + allAttachments.push({ + name: uniqueName, + data: attachmentData.buffer.slice(0) + }); + } catch (error) { + console.warn(`Failed to extract attachment ${j} from ${fileName}:`, error); + } + } + + coherentpdf.endGetAttachments(); + coherentpdf.deletePdf(pdf); + } + + if (allAttachments.length === 0) { + self.postMessage({ + status: 'error', + message: 'No attachments were found in the selected PDF(s).' + }); + return; + } + + const response = { + status: 'success', + attachments: [] + }; + + const transferBuffers = []; + for (const attachment of allAttachments) { + response.attachments.push({ + name: attachment.name, + data: attachment.data + }); + transferBuffers.push(attachment.data); + } + + self.postMessage(response, transferBuffers); + } catch (error) { + self.postMessage({ + status: 'error', + message: error instanceof Error + ? error.message + : 'Unknown error occurred during attachment extraction.' + }); + } +} + +self.onmessage = (e) => { + if (e.data.command === 'extract-attachments') { + extractAttachmentsFromPDFsInWorker(e.data.fileBuffers, e.data.fileNames); + } +}; \ No newline at end of file diff --git a/src/js/config/tools.ts b/src/js/config/tools.ts index 1931f9e..5e61627 100644 --- a/src/js/config/tools.ts +++ b/src/js/config/tools.ts @@ -318,13 +318,12 @@ export const categories = [ icon: 'paperclip', subtitle: 'Embed one or more files into your PDF.', }, - // TODO@ALAM - MAKE THIS LATER, ONCE INTEGERATED WITH CPDF - // { - // id: 'extract-attachments', - // name: 'Extract Attachments', - // icon: 'download', - // subtitle: 'Extract all embedded files from PDF(s) as a ZIP.', - // }, + { + id: 'extract-attachments', + name: 'Extract Attachments', + icon: 'download', + subtitle: 'Extract all embedded files from PDF(s) as a ZIP.', + }, // { // id: 'edit-attachments', // name: 'Edit Attachments', diff --git a/src/js/logic/extract-attachments.ts b/src/js/logic/extract-attachments.ts index df7c81f..035b2c3 100644 --- a/src/js/logic/extract-attachments.ts +++ b/src/js/logic/extract-attachments.ts @@ -1,88 +1,130 @@ -// TODO@ALAM - USE CPDF HERE +import { downloadFile, formatBytes } from '../utils/helpers.js'; +import { state } from '../state.js'; +import JSZip from 'jszip'; -// import { showLoader, hideLoader, showAlert } from '../ui.js'; -// import { downloadFile, readFileAsArrayBuffer } from '../utils/helpers.js'; -// import { state } from '../state.js'; -// import { PDFDocument as PDFLibDocument } from 'pdf-lib'; -// import JSZip from 'jszip'; +const worker = new Worker('/workers/extract-attachments.worker.js'); -// export async function extractAttachments() { -// if (state.files.length === 0) { -// showAlert('No Files', 'Please select at least one PDF file.'); -// return; -// } +interface ExtractAttachmentSuccessResponse { + status: 'success'; + attachments: Array<{ name: string; data: ArrayBuffer }>; +} -// showLoader('Extracting attachments...'); -// try { -// const zip = new JSZip(); -// let totalAttachments = 0; +interface ExtractAttachmentErrorResponse { + status: 'error'; + message: string; +} -// for (const file of state.files) { -// const pdfBytes = await readFileAsArrayBuffer(file); -// const pdfDoc = await PDFLibDocument.load(pdfBytes as ArrayBuffer, { -// ignoreEncryption: true, -// }); +type ExtractAttachmentResponse = ExtractAttachmentSuccessResponse | ExtractAttachmentErrorResponse; -// const embeddedFiles = pdfDoc.context.enumerateIndirectObjects() -// .filter(([ref, obj]: any) => { -// // obj must be a PDFDict -// if (obj && typeof obj.get === 'function') { -// const type = obj.get('Type'); -// return type && type.toString() === '/Filespec'; -// } -// return false; -// }); +export async function extractAttachments() { + if (state.files.length === 0) { + showStatus('No Files', 'error'); + return; + } -// if (embeddedFiles.length === 0) { -// console.warn(`No attachments found in ${file.name}`); -// continue; -// } + document.getElementById('process-btn')?.classList.add('opacity-50', 'cursor-not-allowed'); + document.getElementById('process-btn')?.setAttribute('disabled', 'true'); + + showStatus('Reading files (Main Thread)...', 'info'); -// // Extract attachments -// const baseName = file.name.replace(/\.pdf$/i, ''); -// for (let i = 0; i < embeddedFiles.length; i++) { -// try { -// const [ref, fileSpec] = embeddedFiles[i]; -// const fileSpecDict = fileSpec as any; - -// // Get attachment name -// const fileName = fileSpecDict.get('UF')?.decodeText() || -// fileSpecDict.get('F')?.decodeText() || -// `attachment-${i + 1}`; - -// // Get embedded file stream -// const ef = fileSpecDict.get('EF'); -// if (ef) { -// const fRef = ef.get('F') || ef.get('UF'); -// if (fRef) { -// const fileStream = pdfDoc.context.lookup(fRef); -// if (fileStream) { -// const fileData = (fileStream as any).getContents(); -// zip.file(`${baseName}_${fileName}`, fileData); -// totalAttachments++; -// } -// } -// } -// } catch (e) { -// console.warn(`Failed to extract attachment ${i} from ${file.name}:`, e); -// } -// } -// } + try { + const fileBuffers: ArrayBuffer[] = []; + const fileNames: string[] = []; -// if (totalAttachments === 0) { -// showAlert('No Attachments', 'No attachments were found in the selected PDF(s).'); -// hideLoader(); -// return; -// } + for (const file of state.files) { + const buffer = await file.arrayBuffer(); + fileBuffers.push(buffer); + fileNames.push(file.name); + } -// const zipBlob = await zip.generateAsync({ type: 'blob' }); -// downloadFile(zipBlob, 'extracted-attachments.zip'); -// showAlert('Success', `Extracted ${totalAttachments} attachment(s) successfully!`); -// } catch (e) { -// console.error(e); -// showAlert('Error', 'Failed to extract attachments. The PDF may not contain attachments or may be corrupted.'); -// } finally { -// hideLoader(); -// } -// } + showStatus(`Extracting attachments from ${state.files.length} file(s)...`, 'info'); + const message: ExtractAttachmentsMessage = { + command: 'extract-attachments', + fileBuffers, + fileNames, + }; + + const transferables = fileBuffers.map(buf => buf); + worker.postMessage(message, transferables); + + } catch (error) { + console.error('Error reading files:', error); + showStatus( + `Error reading files: ${error instanceof Error ? error.message : 'Unknown error occurred'}`, + 'error' + ); + document.getElementById('process-btn')?.classList.remove('opacity-50', 'cursor-not-allowed'); + document.getElementById('process-btn')?.removeAttribute('disabled'); + } +} + +worker.onmessage = (e: MessageEvent) => { + document.getElementById('process-btn')?.classList.remove('opacity-50', 'cursor-not-allowed'); + document.getElementById('process-btn')?.removeAttribute('disabled'); + + if (e.data.status === 'success') { + const attachments = e.data.attachments; + + const zip = new JSZip(); + let totalSize = 0; + + for (const attachment of attachments) { + zip.file(attachment.name, new Uint8Array(attachment.data)); + totalSize += attachment.data.byteLength; + } + + zip.generateAsync({ type: 'blob' }).then((zipBlob) => { + downloadFile(zipBlob, 'extracted-attachments.zip'); + showStatus( + `Extraction completed! ${attachments.length} attachment(s) in zip file (${formatBytes(totalSize)}). Download started.`, + 'success' + ); + + state.files = []; + const fileDisplayArea = document.getElementById('file-display-area'); + if (fileDisplayArea) { + fileDisplayArea.innerHTML = ''; + fileDisplayArea.classList.add('hidden'); + } + const fileInput = document.getElementById('file-input') as HTMLInputElement; + if (fileInput) { + fileInput.value = ''; + } + document.getElementById('process-btn')?.classList.add('opacity-50', 'cursor-not-allowed'); + document.getElementById('process-btn')?.setAttribute('disabled', 'true'); + }); + } else if (e.data.status === 'error') { + const errorMessage = e.data.message || 'Unknown error occurred in worker.'; + console.error('Worker Error:', errorMessage); + showStatus(`Error: ${errorMessage}`, 'error'); + } +}; + +worker.onerror = (error) => { + console.error('Worker error:', error); + showStatus('Worker error occurred. Check console for details.', 'error'); + document.getElementById('process-btn')?.classList.remove('opacity-50', 'cursor-not-allowed'); + document.getElementById('process-btn')?.removeAttribute('disabled'); +}; + +function showStatus(message: string, type: 'success' | 'error' | 'info' = 'info') { + const statusMessage = document.getElementById('status-message') as HTMLElement; + if (!statusMessage) return; + + statusMessage.textContent = message; + statusMessage.className = `mt-4 p-3 rounded-lg text-sm ${ + type === 'success' + ? 'bg-green-900 text-green-200' + : type === 'error' + ? 'bg-red-900 text-red-200' + : 'bg-blue-900 text-blue-200' + }`; + statusMessage.classList.remove('hidden'); +} + +interface ExtractAttachmentsMessage { + command: 'extract-attachments'; + fileBuffers: ArrayBuffer[]; + fileNames: string[]; +} \ No newline at end of file diff --git a/src/js/logic/index.ts b/src/js/logic/index.ts index 780d450..1b50560 100644 --- a/src/js/logic/index.ts +++ b/src/js/logic/index.ts @@ -63,7 +63,7 @@ import { import { alternateMerge, setupAlternateMergeTool } from './alternate-merge.js'; import { linearizePdf } from './linearize.js'; import { addAttachments, setupAddAttachmentsTool } from './add-attachments.js'; -// import { extractAttachments } from './extract-attachments.js'; +import { extractAttachments } from './extract-attachments.js'; // import { editAttachments, setupEditAttachmentsTool } from './edit-attachments.js'; import { sanitizePdf } from './sanitize-pdf.js'; import { removeRestrictions } from './remove-restrictions.js'; @@ -140,7 +140,7 @@ export const toolLogic = { process: addAttachments, setup: setupAddAttachmentsTool, }, - // 'extract-attachments': extractAttachments, + 'extract-attachments': extractAttachments, // 'edit-attachments': { // process: editAttachments, // setup: setupEditAttachmentsTool,