/** * PDF/A Conversion using Ghostscript WASM * * Converts PDFs to PDF/A-1b, PDF/A-2b, or PDF/A-3b format. */ import loadWASM from '@bentopdf/gs-wasm'; import { getWasmBaseUrl, fetchWasmFile } from '../config/wasm-cdn-config.js'; interface GhostscriptModule { FS: { writeFile(path: string, data: Uint8Array | string): void; readFile(path: string, opts?: { encoding?: string }): Uint8Array; unlink(path: string): void; stat(path: string): { size: number }; }; callMain(args: string[]): number; } export type PdfALevel = 'PDF/A-1b' | 'PDF/A-2b' | 'PDF/A-3b'; let cachedGsModule: GhostscriptModule | null = null; export function setCachedGsModule(module: GhostscriptModule): void { cachedGsModule = module; } export function getCachedGsModule(): GhostscriptModule | null { return cachedGsModule; } /** * Encode binary data to Adobe ASCII85 (Base85) format. * This matches Python's base64.a85encode(data, adobe=True) */ // function encodeBase85(data: Uint8Array): string { // const POW85 = [85 * 85 * 85 * 85, 85 * 85 * 85, 85 * 85, 85, 1]; // let result = ''; // // Process 4 bytes at a time // for (let i = 0; i < data.length; i += 4) { // // Get 4 bytes (pad with zeros if needed) // let value = 0; // const remaining = Math.min(4, data.length - i); // for (let j = 0; j < 4; j++) { // value = value * 256 + (j < remaining ? data[i + j] : 0); // } // // Special case: all zeros become 'z' // if (value === 0 && remaining === 4) { // result += 'z'; // } else { // // Encode to 5 ASCII85 characters // const encoded: string[] = []; // for (let j = 0; j < 5; j++) { // encoded.push(String.fromCharCode((value / POW85[j]) % 85 + 33)); // } // // For partial blocks, only output needed characters // result += encoded.slice(0, remaining + 1).join(''); // } // } // return result; // } export async function convertToPdfA( pdfData: Uint8Array, level: PdfALevel = 'PDF/A-2b', onProgress?: (msg: string) => void ): Promise { onProgress?.('Loading Ghostscript...'); let gs: GhostscriptModule; if (cachedGsModule) { gs = cachedGsModule; } else { const gsBaseUrl = getWasmBaseUrl('ghostscript'); gs = await loadWASM({ locateFile: (path: string) => { if (path.endsWith('.wasm')) { return gsBaseUrl + 'gs.wasm'; } return path; }, print: (text: string) => console.log('[GS]', text), printErr: (text: string) => console.error('[GS Error]', text), }) as GhostscriptModule; cachedGsModule = gs; } const pdfaMap: Record = { 'PDF/A-1b': '1', 'PDF/A-2b': '2', 'PDF/A-3b': '3', }; const inputPath = '/tmp/input.pdf'; const outputPath = '/tmp/output.pdf'; gs.FS.writeFile(inputPath, pdfData); console.log('[Ghostscript] Input file size:', pdfData.length); onProgress?.(`Converting to ${level}...`); const pdfaDefPath = '/tmp/pdfa.ps'; try { const response = await fetchWasmFile('ghostscript', 'sRGB_v4_ICC_preference.icc'); const iccData = new Uint8Array(await response.arrayBuffer()); console.log('[Ghostscript] sRGB v4 ICC profile loaded:', iccData.length, 'bytes'); // Write ICC profile as a binary file to FS (eliminates encoding issues) const iccPath = '/tmp/pdfa.icc'; gs.FS.writeFile(iccPath, iccData); console.log('[Ghostscript] sRGB ICC profile written to FS:', iccPath); // Generate PostScript with reference to ICC file (Standard OCRmyPDF/GS approach) const pdfaPS = `%! % Define OutputIntent subtype based on PDF/A level /OutputIntentSubtype ${level === 'PDF/A-1b' ? '/GTS_PDFA1' : '/GTS_PDFA'} def [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark [{icc_PDFA} <> /PUT pdfmark [{icc_PDFA} (${iccPath}) (r) file /PUT pdfmark [/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark [{OutputIntent_PDFA} << /Type /OutputIntent /S OutputIntentSubtype /DestOutputProfile {icc_PDFA} /OutputConditionIdentifier (sRGB) >> /PUT pdfmark [{Catalog} << /OutputIntents [ {OutputIntent_PDFA} ] >> /PUT pdfmark `; gs.FS.writeFile(pdfaDefPath, pdfaPS); console.log('[Ghostscript] PDFA PostScript created with embedded ICC profile'); } catch (e) { console.error('[Ghostscript] Failed to create PDFA PostScript:', e); throw new Error('Conversion failed: could not create PDF/A definition'); } const args = [ '-dBATCH', '-dNOPAUSE', '-sDEVICE=pdfwrite', `-dPDFA=${pdfaMap[level]}`, '-dPDFACompatibilityPolicy=1', `-dCompatibilityLevel=${level === 'PDF/A-1b' ? '1.4' : '1.7'}`, '-sColorConversionStrategy=RGB', '-dEmbedAllFonts=true', '-dSubsetFonts=true', '-dAutoRotatePages=/None', `-sOutputFile=${outputPath}`, pdfaDefPath, inputPath, ]; console.log('[Ghostscript] Running PDF/A conversion...'); let exitCode: number; try { exitCode = gs.callMain(args); } catch (e) { console.error('[Ghostscript] Exception:', e); throw new Error(`Ghostscript threw an exception: ${e}`); } console.log('[Ghostscript] Exit code:', exitCode); if (exitCode !== 0) { try { gs.FS.unlink(inputPath); } catch { /* ignore */ } try { gs.FS.unlink(outputPath); } catch { /* ignore */ } throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`); } // Read output let output: Uint8Array; try { const stat = gs.FS.stat(outputPath); console.log('[Ghostscript] Output file size:', stat.size); output = gs.FS.readFile(outputPath); } catch (e) { console.error('[Ghostscript] Failed to read output:', e); throw new Error('Ghostscript did not produce output file'); } // Cleanup try { gs.FS.unlink(inputPath); } catch { /* ignore */ } try { gs.FS.unlink(outputPath); } catch { /* ignore */ } return output; } export async function convertFileToPdfA( file: File, level: PdfALevel = 'PDF/A-2b', onProgress?: (msg: string) => void ): Promise { const arrayBuffer = await file.arrayBuffer(); const pdfData = new Uint8Array(arrayBuffer); const result = await convertToPdfA(pdfData, level, onProgress); // Copy to regular ArrayBuffer to avoid SharedArrayBuffer issues const copy = new Uint8Array(result.length); copy.set(result); return new Blob([copy], { type: 'application/pdf' }); }