Reset branch to main

This commit is contained in:
Sebastian Espei
2026-03-09 17:02:30 +01:00
parent 68343a4c9a
commit e474d11730
606 changed files with 189976 additions and 43358 deletions

153
src/js/utils/compress.ts Normal file
View File

@@ -0,0 +1,153 @@
import { PDFDocument } from 'pdf-lib';
import { getPDFDocument } from './helpers.js';
import { loadPyMuPDF } from './pymupdf-loader.js';
export const CONDENSE_PRESETS = {
light: {
images: { quality: 90, dpiTarget: 150, dpiThreshold: 200 },
scrub: { metadata: false, thumbnails: true },
subsetFonts: true,
},
balanced: {
images: { quality: 75, dpiTarget: 96, dpiThreshold: 150 },
scrub: { metadata: true, thumbnails: true },
subsetFonts: true,
},
aggressive: {
images: { quality: 50, dpiTarget: 72, dpiThreshold: 100 },
scrub: { metadata: true, thumbnails: true, xmlMetadata: true },
subsetFonts: true,
},
extreme: {
images: { quality: 30, dpiTarget: 60, dpiThreshold: 96 },
scrub: { metadata: true, thumbnails: true, xmlMetadata: true },
subsetFonts: true,
},
};
export const PHOTON_PRESETS = {
light: { scale: 2.0, quality: 0.85 },
balanced: { scale: 1.5, quality: 0.65 },
aggressive: { scale: 1.2, quality: 0.45 },
extreme: { scale: 1.0, quality: 0.25 },
};
export interface CondenseCustomSettings {
imageQuality?: number;
dpiTarget?: number;
dpiThreshold?: number;
removeMetadata?: boolean;
subsetFonts?: boolean;
convertToGrayscale?: boolean;
removeThumbnails?: boolean;
}
export async function performCondenseCompression(
fileBlob: Blob,
level: string,
customSettings?: CondenseCustomSettings
) {
const pymupdf = await loadPyMuPDF();
const preset =
CONDENSE_PRESETS[level as keyof typeof CONDENSE_PRESETS] ||
CONDENSE_PRESETS.balanced;
const dpiTarget = customSettings?.dpiTarget ?? preset.images.dpiTarget;
const userThreshold =
customSettings?.dpiThreshold ?? preset.images.dpiThreshold;
const dpiThreshold = Math.max(userThreshold, dpiTarget + 10);
const options = {
images: {
enabled: true,
quality: customSettings?.imageQuality ?? preset.images.quality,
dpiTarget,
dpiThreshold,
convertToGray: customSettings?.convertToGrayscale ?? false,
},
scrub: {
metadata: customSettings?.removeMetadata ?? preset.scrub.metadata,
thumbnails: customSettings?.removeThumbnails ?? preset.scrub.thumbnails,
xmlMetadata: (preset.scrub as any).xmlMetadata ?? false,
},
subsetFonts: customSettings?.subsetFonts ?? preset.subsetFonts,
save: {
garbage: 4 as const,
deflate: true,
clean: true,
useObjstms: true,
},
};
try {
const result = await pymupdf.compressPdf(fileBlob, options);
return result;
} catch {
const fallbackOptions = {
...options,
images: {
...options.images,
enabled: false,
},
};
try {
const result = await pymupdf.compressPdf(fileBlob, fallbackOptions);
return { ...result, usedFallback: true };
} catch (fallbackError: any) {
const msg = fallbackError?.message || String(fallbackError);
throw new Error(`PDF compression failed: ${msg}`);
}
}
}
export async function performPhotonCompression(
arrayBuffer: ArrayBuffer,
level: string
): Promise<Uint8Array> {
const pdfJsDoc = await getPDFDocument({ data: arrayBuffer }).promise;
const newPdfDoc = await PDFDocument.create();
const settings =
PHOTON_PRESETS[level as keyof typeof PHOTON_PRESETS] ||
PHOTON_PRESETS.balanced;
for (let i = 1; i <= pdfJsDoc.numPages; i++) {
const page = await pdfJsDoc.getPage(i);
const viewport = page.getViewport({ scale: settings.scale });
const canvas = document.createElement('canvas');
const context = canvas.getContext('2d');
if (!context) throw new Error('Failed to create canvas context');
canvas.height = viewport.height;
canvas.width = viewport.width;
await page.render({ canvasContext: context, viewport, canvas: canvas })
.promise;
const jpegBlob = await new Promise<Blob>((resolve, reject) =>
canvas.toBlob(
(blob) => {
if (blob) resolve(blob);
else reject(new Error('Failed to create JPEG blob'));
},
'image/jpeg',
settings.quality
)
);
// Release canvas memory
canvas.width = 0;
canvas.height = 0;
const jpegBytes = await jpegBlob.arrayBuffer();
const jpegImage = await newPdfDoc.embedJpg(jpegBytes);
const newPage = newPdfDoc.addPage([viewport.width, viewport.height]);
newPage.drawImage(jpegImage, {
x: 0,
y: 0,
width: viewport.width,
height: viewport.height,
});
}
return await newPdfDoc.save();
}

View File

@@ -1,15 +1,35 @@
import { WasmProvider } from './wasm-provider';
let cpdfLoaded = false;
let cpdfLoadPromise: Promise<void> | null = null;
//TODO: @ALAM,is it better to use a worker to load the cpdf library?
// or just use the browser version?
export async function ensureCpdfLoaded(): Promise<void> {
function getCpdfUrl(): string | undefined {
const userUrl = WasmProvider.getUrl('cpdf');
if (userUrl) {
const baseUrl = userUrl.endsWith('/') ? userUrl : `${userUrl}/`;
return `${baseUrl}coherentpdf.browser.min.js`;
}
return undefined;
}
export function isCpdfAvailable(): boolean {
return WasmProvider.isConfigured('cpdf');
}
export async function isCpdfLoaded(): Promise<void> {
if (cpdfLoaded) return;
if (cpdfLoadPromise) {
return cpdfLoadPromise;
}
const cpdfUrl = getCpdfUrl();
if (!cpdfUrl) {
throw new Error(
'CoherentPDF is not configured. Please configure it in WASM Settings.'
);
}
cpdfLoadPromise = new Promise((resolve, reject) => {
if (typeof (window as any).coherentpdf !== 'undefined') {
cpdfLoaded = true;
@@ -18,13 +38,14 @@ export async function ensureCpdfLoaded(): Promise<void> {
}
const script = document.createElement('script');
script.src = import.meta.env.BASE_URL + 'coherentpdf.browser.min.js';
script.src = cpdfUrl;
script.onload = () => {
cpdfLoaded = true;
console.log('[CPDF] Loaded from:', script.src);
resolve();
};
script.onerror = () => {
reject(new Error('Failed to load CoherentPDF library'));
reject(new Error('Failed to load CoherentPDF library from: ' + cpdfUrl));
};
document.head.appendChild(script);
});
@@ -32,11 +53,7 @@ export async function ensureCpdfLoaded(): Promise<void> {
return cpdfLoadPromise;
}
/**
* Gets the cpdf instance, ensuring it's loaded first
*/
export async function getCpdf(): Promise<any> {
await ensureCpdfLoaded();
await isCpdfLoaded();
return (window as any).coherentpdf;
}

View File

@@ -0,0 +1,90 @@
import { jsPDF } from 'jspdf';
import autoTable from 'jspdf-autotable';
import Papa from 'papaparse';
export interface CsvToPdfOptions {
onProgress?: (percent: number, message: string) => void;
}
/**
* Convert a CSV file to PDF using jsPDF and autotable
*/
export async function convertCsvToPdf(
file: File,
options?: CsvToPdfOptions
): Promise<Blob> {
const { onProgress } = options || {};
return new Promise((resolve, reject) => {
onProgress?.(10, 'Reading CSV file...');
Papa.parse(file, {
complete: (results) => {
try {
onProgress?.(50, 'Generating PDF...');
const data = results.data as string[][];
// Filter out empty rows
const filteredData = data.filter(row =>
row.some(cell => cell && cell.trim() !== '')
);
if (filteredData.length === 0) {
reject(new Error('CSV file is empty'));
return;
}
// Create PDF document
const doc = new jsPDF({
orientation: 'landscape', // Better for wide tables
unit: 'mm',
format: 'a4'
});
// Extract headers (first row) and data
const headers = filteredData[0];
const rows = filteredData.slice(1);
onProgress?.(70, 'Creating table...');
// Generate table
autoTable(doc, {
head: [headers],
body: rows,
startY: 20,
styles: {
fontSize: 9,
cellPadding: 3,
overflow: 'linebreak',
cellWidth: 'wrap',
},
headStyles: {
fillColor: [41, 128, 185], // Nice blue header
textColor: 255,
fontStyle: 'bold',
},
alternateRowStyles: {
fillColor: [245, 245, 245], // Light gray for alternate rows
},
margin: { top: 20, left: 10, right: 10 },
theme: 'striped',
});
onProgress?.(90, 'Finalizing PDF...');
// Get PDF as blob
const pdfBlob = doc.output('blob');
onProgress?.(100, 'Complete!');
resolve(pdfBlob);
} catch (error) {
reject(error);
}
},
error: (error) => {
reject(new Error(`Failed to parse CSV: ${error.message}`));
},
});
});
}

View File

@@ -2,7 +2,7 @@
// This script applies the full-width preference from localStorage to page uploaders
export function initFullWidthMode() {
const savedFullWidth = localStorage.getItem('fullWidthMode') === 'true';
const savedFullWidth = localStorage.getItem('fullWidthMode') !== 'false';
if (savedFullWidth) {
applyFullWidthMode(true);

View File

@@ -0,0 +1,89 @@
import { WasmProvider } from './wasm-provider.js';
let cachedGS: any = null;
let loadPromise: Promise<any> | null = null;
export interface GhostscriptInterface {
convertToPDFA(pdfBuffer: ArrayBuffer, profile: string): Promise<ArrayBuffer>;
fontToOutline(pdfBuffer: ArrayBuffer): Promise<ArrayBuffer>;
}
export async function loadGhostscript(): Promise<GhostscriptInterface> {
if (cachedGS) {
return cachedGS;
}
if (loadPromise) {
return loadPromise;
}
loadPromise = (async () => {
const baseUrl = WasmProvider.getUrl('ghostscript');
if (!baseUrl) {
throw new Error(
'Ghostscript is not configured. Please configure it in Advanced Settings.'
);
}
const normalizedUrl = baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
try {
const wrapperUrl = `${normalizedUrl}gs.js`;
await loadScript(wrapperUrl);
const globalScope =
typeof globalThis !== 'undefined' ? globalThis : window;
if (typeof (globalScope as any).loadGS === 'function') {
cachedGS = await (globalScope as any).loadGS({
baseUrl: normalizedUrl,
});
} else if (typeof (globalScope as any).GhostscriptWASM === 'function') {
cachedGS = new (globalScope as any).GhostscriptWASM(normalizedUrl);
await cachedGS.init?.();
} else {
throw new Error(
'Ghostscript wrapper did not expose expected interface. Expected loadGS() or GhostscriptWASM class.'
);
}
return cachedGS;
} catch (error: any) {
loadPromise = null;
throw new Error(
`Failed to load Ghostscript from ${normalizedUrl}: ${error.message}`
);
}
})();
return loadPromise;
}
function loadScript(url: string): Promise<void> {
return new Promise((resolve, reject) => {
if (document.querySelector(`script[src="${url}"]`)) {
resolve();
return;
}
const script = document.createElement('script');
script.src = url;
script.type = 'text/javascript';
script.async = true;
script.onload = () => resolve();
script.onerror = () => reject(new Error(`Failed to load script: ${url}`));
document.head.appendChild(script);
});
}
export function isGhostscriptAvailable(): boolean {
return WasmProvider.isConfigured('ghostscript');
}
export function clearGhostscriptCache(): void {
cachedGS = null;
loadPromise = null;
}

View File

@@ -0,0 +1,481 @@
/**
* PDF/A Conversion using Ghostscript WASM
* Converts PDFs to PDF/A-1b, PDF/A-2b, or PDF/A-3b format.
* Requires user to configure Ghostscript URL in WASM Settings.
*/
import {
getWasmBaseUrl,
fetchWasmFile,
isWasmAvailable,
} from '../config/wasm-cdn-config.js';
import { PDFDocument, PDFDict, PDFName, PDFArray } from 'pdf-lib';
interface GhostscriptModule {
FS: {
writeFile(path: string, data: Uint8Array | string): void;
readFile(path: string, opts?: { encoding?: string }): Uint8Array;
unlink(path: string): void;
stat(path: string): { size: number };
};
callMain(args: string[]): number;
}
export type PdfALevel = 'PDF/A-1b' | 'PDF/A-2b' | 'PDF/A-3b';
let cachedGsModule: GhostscriptModule | null = null;
export function setCachedGsModule(module: GhostscriptModule): void {
cachedGsModule = module;
}
export function getCachedGsModule(): GhostscriptModule | null {
return cachedGsModule;
}
export async function loadGsModule(): Promise<GhostscriptModule> {
const gsBaseUrl = getWasmBaseUrl('ghostscript')!;
const normalizedUrl = gsBaseUrl.endsWith('/') ? gsBaseUrl : `${gsBaseUrl}/`;
const gsJsUrl = `${normalizedUrl}gs.js`;
const response = await fetch(gsJsUrl);
if (!response.ok) {
throw new Error(`Failed to fetch gs.js: HTTP ${response.status}`);
}
const jsText = await response.text();
const blob = new Blob([jsText], { type: 'application/javascript' });
const blobUrl = URL.createObjectURL(blob);
try {
const gsModule = await import(/* @vite-ignore */ blobUrl);
const ModuleFactory = gsModule.default;
return (await ModuleFactory({
locateFile: (path: string) => {
if (path.endsWith('.wasm')) {
return `${normalizedUrl}gs.wasm`;
}
return `${normalizedUrl}${path}`;
},
print: (text: string) => console.log('[GS]', text),
printErr: (text: string) => console.error('[GS Error]', text),
})) as GhostscriptModule;
} finally {
URL.revokeObjectURL(blobUrl);
}
}
export async function convertToPdfA(
pdfData: Uint8Array,
level: PdfALevel = 'PDF/A-2b',
onProgress?: (msg: string) => void
): Promise<Uint8Array> {
if (!isWasmAvailable('ghostscript')) {
throw new Error(
'Ghostscript is not configured. Please configure it in WASM Settings.'
);
}
onProgress?.('Loading Ghostscript...');
let gs: GhostscriptModule;
if (cachedGsModule) {
gs = cachedGsModule;
} else {
gs = await loadGsModule();
cachedGsModule = gs;
}
const pdfaMap: Record<PdfALevel, string> = {
'PDF/A-1b': '1',
'PDF/A-2b': '2',
'PDF/A-3b': '3',
};
const inputPath = '/tmp/input.pdf';
const outputPath = '/tmp/output.pdf';
const iccPath = '/tmp/pdfa.icc';
const pdfaDefPath = '/tmp/pdfa.ps';
gs.FS.writeFile(inputPath, pdfData);
console.log('[Ghostscript] Input file size:', pdfData.length);
onProgress?.(`Converting to ${level}...`);
try {
const iccFileName = 'sRGB_IEC61966-2-1_no_black_scaling.icc';
const iccUrl = `${import.meta.env.BASE_URL}${iccFileName}`;
const response = await fetch(iccUrl);
if (!response.ok) {
throw new Error(
`Failed to fetch ICC profile from ${iccUrl}: HTTP ${response.status}`
);
}
const iccData = new Uint8Array(await response.arrayBuffer());
console.log(
'[Ghostscript] sRGB v2 ICC profile loaded:',
iccData.length,
'bytes'
);
gs.FS.writeFile(iccPath, iccData);
console.log('[Ghostscript] sRGB ICC profile written to FS:', iccPath);
const iccHex = Array.from(iccData)
.map((b) => b.toString(16).padStart(2, '0'))
.join('');
console.log('[Ghostscript] ICC profile hex length:', iccHex.length);
const pdfaSubtype = level === 'PDF/A-1b' ? '/GTS_PDFA1' : '/GTS_PDFA';
const pdfaPS = `%!
% PDF/A definition file for ${level}
% Define the ICC profile stream object with embedded hex data
[/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
[{icc_PDFA} << /N 3 >> /PUT pdfmark
[{icc_PDFA} <${iccHex}> /PUT pdfmark
% Define the OutputIntent dictionary
[/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
[{OutputIntent_PDFA} <<
/Type /OutputIntent
/S ${pdfaSubtype}
/DestOutputProfile {icc_PDFA}
/OutputConditionIdentifier (sRGB IEC61966-2.1)
/Info (sRGB IEC61966-2.1)
/RegistryName (http://www.color.org)
>> /PUT pdfmark
% Attach OutputIntent to the document Catalog
[{Catalog} << /OutputIntents [ {OutputIntent_PDFA} ] >> /PUT pdfmark
`;
gs.FS.writeFile(pdfaDefPath, pdfaPS);
console.log(
'[Ghostscript] PDFA PostScript created with embedded ICC hex data'
);
} catch (e) {
console.error('[Ghostscript] Failed to setup PDF/A assets:', e);
throw new Error('Conversion failed: could not create PDF/A definition');
}
const args = [
'-dNOSAFER',
'-dBATCH',
'-dNOPAUSE',
'-sDEVICE=pdfwrite',
`-dPDFA=${pdfaMap[level]}`,
'-dPDFACompatibilityPolicy=1',
`-dCompatibilityLevel=${level === 'PDF/A-1b' ? '1.4' : '1.7'}`,
'-sColorConversionStrategy=UseDeviceIndependentColor',
'-sICCProfilesDir=/tmp/',
`-sOutputICCProfile=${iccPath}`,
`-sDefaultRGBProfile=${iccPath}`,
`-sBlendColorProfile=${iccPath}`,
'-dCompressPages=true',
'-dWriteObjStms=false',
'-dWriteXRefStm=false',
'-dEmbedAllFonts=true',
'-dSubsetFonts=true',
'-dAutoRotatePages=/None',
`-sOutputFile=${outputPath}`,
pdfaDefPath,
inputPath,
];
console.log('[Ghostscript] Running PDF/A conversion...');
try {
console.log('[Ghostscript] Checking version:');
gs.callMain(['--version']);
} catch (e) {
console.warn('[Ghostscript] Could not check version:', e);
}
let exitCode: number;
try {
exitCode = gs.callMain(args);
} catch (e) {
console.error('[Ghostscript] Exception:', e);
throw new Error(`Ghostscript threw an exception: ${e}`);
}
console.log('[Ghostscript] Exit code:', exitCode);
if (exitCode !== 0) {
try {
gs.FS.unlink(inputPath);
} catch {
/* ignore */
}
try {
gs.FS.unlink(outputPath);
} catch {
/* ignore */
}
try {
gs.FS.unlink(iccPath);
} catch {
/* ignore */
}
try {
gs.FS.unlink(pdfaDefPath);
} catch {
/* ignore */
}
throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`);
}
// Read output
let output: Uint8Array;
try {
const stat = gs.FS.stat(outputPath);
console.log('[Ghostscript] Output file size:', stat.size);
output = gs.FS.readFile(outputPath);
} catch (e) {
console.error('[Ghostscript] Failed to read output:', e);
throw new Error('Ghostscript did not produce output file');
}
// Cleanup
try {
gs.FS.unlink(inputPath);
} catch {
/* ignore */
}
try {
gs.FS.unlink(outputPath);
} catch {
/* ignore */
}
try {
gs.FS.unlink(iccPath);
} catch {
/* ignore */
}
try {
gs.FS.unlink(pdfaDefPath);
} catch {
/* ignore */
}
if (level !== 'PDF/A-1b') {
onProgress?.('Post-processing for transparency compliance...');
console.log(
'[Ghostscript] Adding Group dictionaries to pages for transparency compliance...'
);
try {
output = await addPageGroupDictionaries(output);
console.log('[Ghostscript] Page Group dictionaries added successfully');
} catch (e) {
console.error('[Ghostscript] Failed to add Group dictionaries:', e);
}
}
return output;
}
async function addPageGroupDictionaries(
pdfData: Uint8Array
): Promise<Uint8Array> {
const pdfDoc = await PDFDocument.load(pdfData, {
ignoreEncryption: true,
updateMetadata: false,
});
const catalog = pdfDoc.catalog;
const outputIntentsArray = catalog.lookup(PDFName.of('OutputIntents'));
let iccProfileRef: ReturnType<typeof PDFDict.prototype.get> = undefined;
if (outputIntentsArray instanceof PDFArray) {
const firstIntent = outputIntentsArray.lookup(0);
if (firstIntent instanceof PDFDict) {
iccProfileRef = firstIntent.get(PDFName.of('DestOutputProfile'));
}
}
const updateGroupCS = (groupDict: PDFDict) => {
if (!iccProfileRef) return;
const currentCS = groupDict.get(PDFName.of('CS'));
if (currentCS instanceof PDFName) {
const csName = currentCS.decodeText();
if (
csName === 'DeviceRGB' ||
csName === 'DeviceGray' ||
csName === 'DeviceCMYK'
) {
const iccColorSpace = pdfDoc.context.obj([
PDFName.of('ICCBased'),
iccProfileRef,
]);
groupDict.set(PDFName.of('CS'), iccColorSpace);
}
} else if (!currentCS) {
const iccColorSpace = pdfDoc.context.obj([
PDFName.of('ICCBased'),
iccProfileRef,
]);
groupDict.set(PDFName.of('CS'), iccColorSpace);
}
};
const pages = pdfDoc.getPages();
for (const page of pages) {
const pageDict = page.node;
const existingGroup = pageDict.lookup(PDFName.of('Group'));
if (existingGroup) {
if (existingGroup instanceof PDFDict) {
updateGroupCS(existingGroup);
}
} else if (iccProfileRef) {
const colorSpace = pdfDoc.context.obj([
PDFName.of('ICCBased'),
iccProfileRef,
]);
const groupDict = pdfDoc.context.obj({
Type: 'Group',
S: 'Transparency',
I: false,
K: false,
});
(groupDict as PDFDict).set(PDFName.of('CS'), colorSpace);
pageDict.set(PDFName.of('Group'), groupDict);
}
}
if (iccProfileRef) {
pdfDoc.context.enumerateIndirectObjects().forEach(([ref, obj]) => {
if (
obj instanceof PDFDict ||
(obj && typeof obj === 'object' && 'dict' in obj)
) {
const dict =
'dict' in obj ? (obj as { dict: PDFDict }).dict : (obj as PDFDict);
const subtype = dict.get(PDFName.of('Subtype'));
if (subtype instanceof PDFName && subtype.decodeText() === 'Form') {
const group = dict.lookup(PDFName.of('Group'));
if (group instanceof PDFDict) {
updateGroupCS(group);
}
}
}
});
}
return await pdfDoc.save({
useObjectStreams: false,
addDefaultPage: false,
updateFieldAppearances: false,
});
}
export async function convertFileToPdfA(
file: File,
level: PdfALevel = 'PDF/A-2b',
onProgress?: (msg: string) => void
): Promise<Blob> {
const arrayBuffer = await file.arrayBuffer();
const pdfData = new Uint8Array(arrayBuffer);
const result = await convertToPdfA(pdfData, level, onProgress);
const copy = new Uint8Array(result.length);
copy.set(result);
return new Blob([copy], { type: 'application/pdf' });
}
export async function convertFontsToOutlines(
pdfData: Uint8Array,
onProgress?: (msg: string) => void
): Promise<Uint8Array> {
if (!isWasmAvailable('ghostscript')) {
throw new Error(
'Ghostscript is not configured. Please configure it in WASM Settings.'
);
}
onProgress?.('Loading Ghostscript...');
let gs: GhostscriptModule;
if (cachedGsModule) {
gs = cachedGsModule;
} else {
gs = await loadGsModule();
cachedGsModule = gs;
}
const inputPath = '/tmp/input.pdf';
const outputPath = '/tmp/output.pdf';
gs.FS.writeFile(inputPath, pdfData);
onProgress?.('Converting fonts to outlines...');
const args = [
'-dNOSAFER',
'-dBATCH',
'-dNOPAUSE',
'-sDEVICE=pdfwrite',
'-dNoOutputFonts',
'-dCompressPages=true',
'-dAutoRotatePages=/None',
`-sOutputFile=${outputPath}`,
inputPath,
];
let exitCode: number;
try {
exitCode = gs.callMain(args);
} catch (e) {
try {
gs.FS.unlink(inputPath);
} catch {}
throw new Error(`Ghostscript threw an exception: ${e}`);
}
if (exitCode !== 0) {
try {
gs.FS.unlink(inputPath);
} catch {}
try {
gs.FS.unlink(outputPath);
} catch {}
throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`);
}
let output: Uint8Array;
try {
output = gs.FS.readFile(outputPath);
} catch (e) {
throw new Error('Ghostscript did not produce output file');
}
try {
gs.FS.unlink(inputPath);
} catch {}
try {
gs.FS.unlink(outputPath);
} catch {}
return output;
}
export async function convertFileToOutlines(
file: File,
onProgress?: (msg: string) => void
): Promise<Blob> {
const arrayBuffer = await file.arrayBuffer();
const pdfData = new Uint8Array(arrayBuffer);
const result = await convertFontsToOutlines(pdfData, onProgress);
const copy = new Uint8Array(result.length);
copy.set(result);
return new Blob([copy], { type: 'application/pdf' });
}

View File

@@ -2,8 +2,7 @@ import createModule from '@neslinesli93/qpdf-wasm';
import { showLoader, hideLoader, showAlert } from '../ui.js';
import { createIcons } from 'lucide';
import { state, resetState } from '../state.js';
import * as pdfjsLib from 'pdfjs-dist'
import * as pdfjsLib from 'pdfjs-dist';
const STANDARD_SIZES = {
A4: { width: 595.28, height: 841.89 },
@@ -50,14 +49,14 @@ export function convertPoints(points: any, unit: any) {
// Convert hex color to RGB
export function hexToRgb(hex: string): { r: number; g: number; b: number } {
const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex)
const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
return result
? {
r: parseInt(result[1], 16) / 255,
g: parseInt(result[2], 16) / 255,
b: parseInt(result[3], 16) / 255,
}
: { r: 0, g: 0, b: 0 }
r: parseInt(result[1], 16) / 255,
g: parseInt(result[2], 16) / 255,
b: parseInt(result[3], 16) / 255,
}
: { r: 0, g: 0, b: 0 };
}
export const formatBytes = (bytes: any, decimals = 1) => {
@@ -89,7 +88,10 @@ export const readFileAsArrayBuffer = (file: any) => {
});
};
export function parsePageRanges(rangeString: string, totalPages: number): number[] {
export function parsePageRanges(
rangeString: string,
totalPages: number
): number[] {
if (!rangeString || rangeString.trim() === '') {
return Array.from({ length: totalPages }, (_, i) => i);
}
@@ -128,11 +130,9 @@ export function parsePageRanges(rangeString: string, totalPages: number): number
}
}
return Array.from(indices).sort((a, b) => a - b);
}
/**
* Formats an ISO 8601 date string (e.g., "2008-02-21T17:15:56-08:00")
* into a localized, human-readable string.
@@ -198,7 +198,7 @@ export function formatStars(num: number) {
return (num / 1000).toFixed(1) + 'K';
}
return num.toLocaleString();
};
}
/**
* Truncates a filename to a maximum length, adding ellipsis if needed.
@@ -207,14 +207,18 @@ export function formatStars(num: number) {
* @param maxLength - Maximum length (default: 30)
* @returns Truncated filename with ellipsis if needed
*/
export function truncateFilename(filename: string, maxLength: number = 25): string {
export function truncateFilename(
filename: string,
maxLength: number = 25
): string {
if (filename.length <= maxLength) {
return filename;
}
const lastDotIndex = filename.lastIndexOf('.');
const extension = lastDotIndex !== -1 ? filename.substring(lastDotIndex) : '';
const nameWithoutExt = lastDotIndex !== -1 ? filename.substring(0, lastDotIndex) : filename;
const nameWithoutExt =
lastDotIndex !== -1 ? filename.substring(0, lastDotIndex) : filename;
const availableLength = maxLength - extension.length - 3; // 3 for '...'
@@ -225,7 +229,10 @@ export function truncateFilename(filename: string, maxLength: number = 25): stri
return nameWithoutExt.substring(0, availableLength) + '...' + extension;
}
export function formatShortcutDisplay(shortcut: string, isMac: boolean): string {
export function formatShortcutDisplay(
shortcut: string,
isMac: boolean
): string {
if (!shortcut) return '';
return shortcut
.replace('mod', isMac ? '⌘' : 'Ctrl')
@@ -233,7 +240,7 @@ export function formatShortcutDisplay(shortcut: string, isMac: boolean): string
.replace('alt', isMac ? '⌥' : 'Alt')
.replace('shift', 'Shift')
.split('+')
.map(k => k.charAt(0).toUpperCase() + k.slice(1))
.map((k) => k.charAt(0).toUpperCase() + k.slice(1))
.join(isMac ? '' : '+');
}
@@ -263,7 +270,7 @@ export function resetAndReloadTool(preResetCallback?: () => void) {
export function getPDFDocument(src: any) {
let params = src;
// Handle different input types similar to how getDocument handles them,
// Handle different input types similar to how getDocument handles them,
// but we ensure we have an object to attach wasmUrl to.
if (typeof src === 'string') {
params = { url: src };
@@ -285,20 +292,171 @@ export function getPDFDocument(src: any) {
}
/**
* Returns a sanitized PDF filename.
*
* The provided filename is processed as follows:
* - Removes a trailing `.pdf` file extension (case-insensitive)
* - Trims leading and trailing whitespace
* - Truncates the name to a maximum of 80 characters
*
* @param filename The original filename (including extension)
* @returns The sanitized filename without the `.pdf` extension, limited to 80 characters
* Escape HTML special characters to prevent XSS
* @param text - The text to escape
* @returns The escaped text
*/
export function getCleanPdfFilename(filename: string): string {
let clean = filename.replace(/\.pdf$/i, '').trim();
if (clean.length > 80) {
clean = clean.slice(0, 80);
}
return clean;
export function escapeHtml(text: string): string {
const map: Record<string, string> = {
'&': '&amp;',
'<': '&lt;',
'>': '&gt;',
'"': '&quot;',
"'": '&#039;',
};
return text.replace(/[&<>"']/g, (m) => map[m]);
}
export function uint8ArrayToBase64(bytes: Uint8Array): string {
const CHUNK_SIZE = 0x8000;
const chunks: string[] = [];
for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
const chunk = bytes.subarray(i, Math.min(i + CHUNK_SIZE, bytes.length));
chunks.push(String.fromCharCode(...chunk));
}
return btoa(chunks.join(''));
}
export function sanitizeEmailHtml(html: string): string {
if (!html) return html;
let sanitized = html;
sanitized = sanitized.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '');
sanitized = sanitized.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
sanitized = sanitized.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
sanitized = sanitized.replace(/<link[^>]*>/gi, '');
sanitized = sanitized.replace(/\s+style=["'][^"']*["']/gi, '');
sanitized = sanitized.replace(/\s+class=["'][^"']*["']/gi, '');
sanitized = sanitized.replace(/\s+data-[a-z-]+=["'][^"']*["']/gi, '');
sanitized = sanitized.replace(
/<img[^>]*(?:width=["']1["'][^>]*height=["']1["']|height=["']1["'][^>]*width=["']1["'])[^>]*\/?>/gi,
''
);
sanitized = sanitized.replace(
/href=["']https?:\/\/[^"']*safelinks\.protection\.outlook\.com[^"']*url=([^&"']+)[^"']*["']/gi,
(match, encodedUrl) => {
try {
const decodedUrl = decodeURIComponent(encodedUrl);
return `href="${decodedUrl}"`;
} catch {
return match;
}
}
);
sanitized = sanitized.replace(/\s+originalsrc=["'][^"']*["']/gi, '');
sanitized = sanitized.replace(
/href=["']([^"']{500,})["']/gi,
(match, url) => {
const baseUrl = url.split('?')[0];
if (baseUrl && baseUrl.length < 200) {
return `href="${baseUrl}"`;
}
return `href="${url.substring(0, 200)}"`;
}
);
sanitized = sanitized.replace(
/\s+(cellpadding|cellspacing|bgcolor|border|valign|align|width|height|role|dir|id)=["'][^"']*["']/gi,
''
);
sanitized = sanitized.replace(/<\/?table[^>]*>/gi, '<div>');
sanitized = sanitized.replace(/<\/?tbody[^>]*>/gi, '');
sanitized = sanitized.replace(/<\/?thead[^>]*>/gi, '');
sanitized = sanitized.replace(/<\/?tfoot[^>]*>/gi, '');
sanitized = sanitized.replace(/<tr[^>]*>/gi, '<div>');
sanitized = sanitized.replace(/<\/tr>/gi, '</div>');
sanitized = sanitized.replace(/<td[^>]*>/gi, '<span> ');
sanitized = sanitized.replace(/<\/td>/gi, ' </span>');
sanitized = sanitized.replace(/<th[^>]*>/gi, '<strong> ');
sanitized = sanitized.replace(/<\/th>/gi, ' </strong>');
sanitized = sanitized.replace(/<div>\s*<\/div>/gi, '');
sanitized = sanitized.replace(/<span>\s*<\/span>/gi, '');
sanitized = sanitized.replace(/(<div>)+/gi, '<div>');
sanitized = sanitized.replace(/(<\/div>)+/gi, '</div>');
sanitized = sanitized.replace(
/<a[^>]*href=["']\s*["'][^>]*>([^<]*)<\/a>/gi,
'$1'
);
const MAX_HTML_SIZE = 100000;
if (sanitized.length > MAX_HTML_SIZE) {
const truncateAt = sanitized.lastIndexOf('</div>', MAX_HTML_SIZE);
if (truncateAt > MAX_HTML_SIZE / 2) {
sanitized = sanitized.substring(0, truncateAt) + '</div></body></html>';
} else {
sanitized = sanitized.substring(0, MAX_HTML_SIZE) + '...</body></html>';
}
}
return sanitized;
}
/**
* Formats a raw RFC 2822 date string into a nicer human-readable format,
* while preserving the original timezone and time.
* Example input: "Sun, 8 Jan 2017 20:37:44 +0200"
* Example output: "Sunday, January 8, 2017 at 8:37 PM (+0200)"
*/
export function formatRawDate(raw: string): string {
try {
const match = raw.match(
/([A-Za-z]{3}),\s+(\d{1,2})\s+([A-Za-z]{3})\s+(\d{4})\s+(\d{2}):(\d{2})(?::(\d{2}))?\s+([+-]\d{4})/
);
if (match) {
const [
,
dayAbbr,
dom,
monthAbbr,
year,
hoursStr,
minsStr,
secsStr,
timezone,
] = match;
const days: Record<string, string> = {
Sun: 'Sunday',
Mon: 'Monday',
Tue: 'Tuesday',
Wed: 'Wednesday',
Thu: 'Thursday',
Fri: 'Friday',
Sat: 'Saturday',
};
const months: Record<string, string> = {
Jan: 'January',
Feb: 'February',
Mar: 'March',
Apr: 'April',
May: 'May',
Jun: 'June',
Jul: 'July',
Aug: 'August',
Sep: 'September',
Oct: 'October',
Nov: 'November',
Dec: 'December',
};
const fullDay = days[dayAbbr] || dayAbbr;
const fullMonth = months[monthAbbr] || monthAbbr;
let hours = parseInt(hoursStr, 10);
const ampm = hours >= 12 ? 'PM' : 'AM';
hours = hours % 12;
hours = hours ? hours : 12;
const tzSign = timezone.substring(0, 1);
const tzHours = timezone.substring(1, 3);
const tzMins = timezone.substring(3, 5);
const formattedTz = `UTC${tzSign}${tzHours}:${tzMins}`;
return `${fullDay}, ${fullMonth} ${dom}, ${year} at ${hours}:${minsStr} ${ampm} (${formattedTz})`;
}
} catch (e) {
// Fallback to raw string if parsing fails
}
return raw;
}

View File

@@ -0,0 +1,266 @@
import {
BBox,
OcrLine,
OcrPage,
OcrWord,
WordTransform,
Baseline,
} from '@/types';
const BBOX_PATTERN = /bbox\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/;
const BASELINE_PATTERN = /baseline\s+([-+]?\d*\.?\d*)\s+([-+]?\d+)/;
const TEXTANGLE_PATTERN = /textangle\s+([-+]?\d*\.?\d*)/;
export function parseBBox(title: string): BBox | null {
const match = title.match(BBOX_PATTERN);
if (!match) return null;
return {
x0: parseInt(match[1], 10),
y0: parseInt(match[2], 10),
x1: parseInt(match[3], 10),
y1: parseInt(match[4], 10),
};
}
export function parseBaseline(title: string): Baseline {
const match = title.match(BASELINE_PATTERN);
if (!match) {
return { slope: 0, intercept: 0 };
}
return {
slope: parseFloat(match[1]) || 0,
intercept: parseInt(match[2], 10) || 0,
};
}
export function parseTextangle(title: string): number {
const match = title.match(TEXTANGLE_PATTERN);
if (!match) return 0;
return parseFloat(match[1]) || 0;
}
export function getTextDirection(element: Element): 'ltr' | 'rtl' {
const dir = element.getAttribute('dir');
return dir === 'rtl' ? 'rtl' : 'ltr';
}
export function shouldInjectWordBreaks(element: Element): boolean {
const lang = element.getAttribute('lang') || '';
const cjkLangs = ['chi_sim', 'chi_tra', 'jpn', 'kor', 'zh', 'ja', 'ko'];
return !cjkLangs.includes(lang);
}
export function normalizeText(text: string): string {
return text.normalize('NFKC');
}
export function parseHocrDocument(hocrText: string): OcrPage {
const parser = new DOMParser();
const doc = parser.parseFromString(hocrText, 'text/html');
let width = 0;
let height = 0;
const pageDiv = doc.querySelector('.ocr_page');
if (pageDiv) {
const title = pageDiv.getAttribute('title') || '';
const bbox = parseBBox(title);
if (bbox) {
width = bbox.x1 - bbox.x0;
height = bbox.y1 - bbox.y0;
}
}
const lines: OcrLine[] = [];
const lineClasses = [
'ocr_line',
'ocr_textfloat',
'ocr_header',
'ocr_caption',
];
const lineSelectors = lineClasses.map((c) => `.${c}`).join(', ');
const lineElements = doc.querySelectorAll(lineSelectors);
if (lineElements.length > 0) {
lineElements.forEach((lineEl) => {
const line = parseHocrLine(lineEl);
if (line && line.words.length > 0) {
lines.push(line);
}
});
} else {
const wordElements = doc.querySelectorAll('.ocrx_word');
if (wordElements.length > 0) {
const words = parseWordsFromElements(wordElements);
if (words.length > 0) {
const allBBox = calculateBoundingBox(words.map((w) => w.bbox));
lines.push({
bbox: allBBox,
baseline: { slope: 0, intercept: 0 },
textangle: 0,
words,
direction: 'ltr',
injectWordBreaks: true,
});
}
}
}
return { width, height, dpi: 72, lines };
}
function parseHocrLine(lineElement: Element): OcrLine | null {
const title = lineElement.getAttribute('title') || '';
const bbox = parseBBox(title);
if (!bbox) return null;
const baseline = parseBaseline(title);
const textangle = parseTextangle(title);
const parent = lineElement.closest('.ocr_par') || lineElement.parentElement;
const direction = parent ? getTextDirection(parent) : 'ltr';
const injectWordBreaks = parent ? shouldInjectWordBreaks(parent) : true;
const wordElements = lineElement.querySelectorAll('.ocrx_word');
const words = parseWordsFromElements(wordElements);
return {
bbox,
baseline,
textangle,
words,
direction,
injectWordBreaks,
};
}
function parseWordsFromElements(wordElements: NodeListOf<Element>): OcrWord[] {
const words: OcrWord[] = [];
wordElements.forEach((wordEl) => {
const title = wordEl.getAttribute('title') || '';
const text = normalizeText((wordEl.textContent || '').trim());
if (!text) return;
const bbox = parseBBox(title);
if (!bbox) return;
const confMatch = title.match(/x_wconf\s+(\d+)/);
const confidence = confMatch ? parseInt(confMatch[1], 10) : 0;
words.push({
text,
bbox,
confidence,
});
});
return words;
}
function calculateBoundingBox(bboxes: BBox[]): BBox {
if (bboxes.length === 0) {
return { x0: 0, y0: 0, x1: 0, y1: 0 };
}
return {
x0: Math.min(...bboxes.map((b) => b.x0)),
y0: Math.min(...bboxes.map((b) => b.y0)),
x1: Math.max(...bboxes.map((b) => b.x1)),
y1: Math.max(...bboxes.map((b) => b.y1)),
};
}
/**
* Calculate the transformation parameters for drawing a word
*
* pdf-lib doesn't support horizontal text scaling (Tz operator),
* we calculate a font size that makes the text width exactly match the word bbox width.
*
* @param word - The word to position
* @param line - The line containing this word
* @param pageHeight - Height of the page in pixels (for coordinate flip)
* @param fontWidthFn - Function to calculate text width at a given font size
* @returns Transform parameters for pdf-lib
*/
export function calculateWordTransform(
word: OcrWord,
line: OcrLine,
pageHeight: number,
fontWidthFn: (text: string, fontSize: number) => number
): WordTransform {
const wordBBox = word.bbox;
const wordWidth = wordBBox.x1 - wordBBox.x0;
const wordHeight = wordBBox.y1 - wordBBox.y0;
let fontSize = wordHeight;
const maxIterations = 10;
for (let i = 0; i < maxIterations; i++) {
const currentWidth = fontWidthFn(word.text, fontSize);
if (currentWidth <= 0) break;
const ratio = wordWidth / currentWidth;
const newFontSize = fontSize * ratio;
if (Math.abs(newFontSize - fontSize) / fontSize < 0.01) {
fontSize = newFontSize;
break;
}
fontSize = newFontSize;
}
fontSize = Math.max(1, Math.min(fontSize, wordHeight * 2));
const fontWidth = fontWidthFn(word.text, fontSize);
const horizontalScale = fontWidth > 0 ? wordWidth / fontWidth : 1;
const slopeAngle = Math.atan(line.baseline.slope) * (180 / Math.PI);
const rotation = -line.textangle + slopeAngle;
const x = wordBBox.x0;
// pdf-lib draws text from baseline, so we position at word bottom
const y = pageHeight - wordBBox.y1;
return {
x,
y,
fontSize,
horizontalScale,
rotation,
};
}
export function calculateSpaceTransform(
prevWord: OcrWord,
nextWord: OcrWord,
line: OcrLine,
pageHeight: number,
spaceWidthFn: (fontSize: number) => number
): { x: number; y: number; horizontalScale: number; fontSize: number } | null {
const lineHeight = line.bbox.y1 - line.bbox.y0;
const fontSize = Math.max(lineHeight + line.baseline.intercept, 1);
const gapStart = prevWord.bbox.x1;
const gapEnd = nextWord.bbox.x0;
const gapWidth = gapEnd - gapStart;
if (gapWidth <= 0) return null;
const spaceWidth = spaceWidthFn(fontSize);
if (spaceWidth <= 0) return null;
const horizontalScale = gapWidth / spaceWidth;
const baselineY = pageHeight - line.bbox.y1 - line.baseline.intercept;
return {
x: gapStart,
y: baselineY,
horizontalScale,
fontSize,
};
}

View File

@@ -0,0 +1,158 @@
export type ImageQuality = 'high' | 'medium' | 'low';
interface QualityConfig {
jpegQuality: number;
maxDimension: number;
}
const QUALITY_CONFIGS: Record<ImageQuality, QualityConfig> = {
high: { jpegQuality: 0.92, maxDimension: 0 },
medium: { jpegQuality: 0.75, maxDimension: 2500 },
low: { jpegQuality: 0.5, maxDimension: 1500 },
};
export function getSelectedQuality(): ImageQuality {
const select = document.getElementById(
'jpg-pdf-quality'
) as HTMLSelectElement | null;
const value = select?.value;
if (value === 'high' || value === 'medium' || value === 'low') return value;
return 'medium';
}
export async function compressImageFile(
file: File,
quality: ImageQuality
): Promise<File> {
if (quality === 'high') return file;
const config = QUALITY_CONFIGS[quality];
return new Promise<File>((resolve, reject) => {
const img = new Image();
const url = URL.createObjectURL(file);
img.onload = () => {
let width = img.naturalWidth;
let height = img.naturalHeight;
if (
config.maxDimension > 0 &&
(width > config.maxDimension || height > config.maxDimension)
) {
const ratio = Math.min(
config.maxDimension / width,
config.maxDimension / height
);
width = Math.round(width * ratio);
height = Math.round(height * ratio);
}
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d');
if (!ctx) {
URL.revokeObjectURL(url);
reject(new Error('Canvas context failed'));
return;
}
ctx.drawImage(img, 0, 0, width, height);
URL.revokeObjectURL(url);
canvas.toBlob(
(blob) => {
if (!blob) {
reject(new Error('Canvas toBlob failed'));
return;
}
const newName = file.name.replace(/\.[^.]+$/, '.jpg');
resolve(new File([blob], newName, { type: 'image/jpeg' }));
},
'image/jpeg',
config.jpegQuality
);
};
img.onerror = () => {
URL.revokeObjectURL(url);
resolve(file);
};
img.src = url;
});
}
export async function compressImageBytes(
bytes: Uint8Array | ArrayBuffer,
quality: ImageQuality
): Promise<{ bytes: Uint8Array; type: 'jpeg' | 'png' }> {
if (quality === 'high') {
return {
bytes: bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes),
type: 'png',
};
}
const config = QUALITY_CONFIGS[quality];
return new Promise((resolve, reject) => {
const blob = new Blob([new Uint8Array(bytes)]);
const url = URL.createObjectURL(blob);
const img = new Image();
img.onload = () => {
let width = img.naturalWidth;
let height = img.naturalHeight;
if (
config.maxDimension > 0 &&
(width > config.maxDimension || height > config.maxDimension)
) {
const ratio = Math.min(
config.maxDimension / width,
config.maxDimension / height
);
width = Math.round(width * ratio);
height = Math.round(height * ratio);
}
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d');
if (!ctx) {
URL.revokeObjectURL(url);
reject(new Error('Canvas context failed'));
return;
}
ctx.drawImage(img, 0, 0, width, height);
URL.revokeObjectURL(url);
canvas.toBlob(
async (jpegBlob) => {
if (!jpegBlob) {
reject(new Error('Canvas toBlob failed'));
return;
}
const arrayBuffer = await jpegBlob.arrayBuffer();
resolve({ bytes: new Uint8Array(arrayBuffer), type: 'jpeg' });
},
'image/jpeg',
config.jpegQuality
);
};
img.onerror = () => {
URL.revokeObjectURL(url);
resolve({
bytes: bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes),
type: 'png',
});
};
img.src = url;
});
}

View File

@@ -0,0 +1,312 @@
import type { ScanSettings } from '../types/scanner-effect-type.js';
import type { AdjustColorsSettings } from '../types/adjust-colors-type.js';
export function applyGreyscale(imageData: ImageData): void {
const data = imageData.data;
for (let j = 0; j < data.length; j += 4) {
const grey = Math.round(
0.299 * data[j] + 0.587 * data[j + 1] + 0.114 * data[j + 2]
);
data[j] = grey;
data[j + 1] = grey;
data[j + 2] = grey;
}
}
export function applyInvertColors(imageData: ImageData): void {
const data = imageData.data;
for (let j = 0; j < data.length; j += 4) {
data[j] = 255 - data[j];
data[j + 1] = 255 - data[j + 1];
data[j + 2] = 255 - data[j + 2];
}
}
export function rgbToHsl(
r: number,
g: number,
b: number
): [number, number, number] {
r /= 255;
g /= 255;
b /= 255;
const max = Math.max(r, g, b);
const min = Math.min(r, g, b);
const l = (max + min) / 2;
let h = 0;
let s = 0;
if (max !== min) {
const d = max - min;
s = l > 0.5 ? d / (2 - max - min) : d / (max + min);
if (max === r) h = ((g - b) / d + (g < b ? 6 : 0)) / 6;
else if (max === g) h = ((b - r) / d + 2) / 6;
else h = ((r - g) / d + 4) / 6;
}
return [h, s, l];
}
export function hslToRgb(
h: number,
s: number,
l: number
): [number, number, number] {
if (s === 0) {
const v = Math.round(l * 255);
return [v, v, v];
}
const hue2rgb = (p: number, q: number, t: number): number => {
if (t < 0) t += 1;
if (t > 1) t -= 1;
if (t < 1 / 6) return p + (q - p) * 6 * t;
if (t < 1 / 2) return q;
if (t < 2 / 3) return p + (q - p) * (2 / 3 - t) * 6;
return p;
};
const q = l < 0.5 ? l * (1 + s) : l + s - l * s;
const p = 2 * l - q;
return [
Math.round(hue2rgb(p, q, h + 1 / 3) * 255),
Math.round(hue2rgb(p, q, h) * 255),
Math.round(hue2rgb(p, q, h - 1 / 3) * 255),
];
}
export function applyScannerEffect(
sourceData: ImageData,
canvas: HTMLCanvasElement,
settings: ScanSettings,
rotationAngle: number,
scale: number = 1
): void {
const ctx = canvas.getContext('2d')!;
const w = sourceData.width;
const h = sourceData.height;
const scaledBlur = settings.blur * scale;
const scaledNoise = settings.noise * scale;
const workCanvas = document.createElement('canvas');
workCanvas.width = w;
workCanvas.height = h;
const workCtx = workCanvas.getContext('2d')!;
if (scaledBlur > 0) {
workCtx.filter = `blur(${scaledBlur}px)`;
}
workCtx.putImageData(sourceData, 0, 0);
if (scaledBlur > 0) {
const tempCanvas = document.createElement('canvas');
tempCanvas.width = w;
tempCanvas.height = h;
const tempCtx = tempCanvas.getContext('2d')!;
tempCtx.filter = `blur(${scaledBlur}px)`;
tempCtx.drawImage(workCanvas, 0, 0);
workCtx.filter = 'none';
workCtx.clearRect(0, 0, w, h);
workCtx.drawImage(tempCanvas, 0, 0);
}
const imageData = workCtx.getImageData(0, 0, w, h);
const data = imageData.data;
const contrastFactor =
settings.contrast !== 0
? (259 * (settings.contrast + 255)) / (255 * (259 - settings.contrast))
: 1;
for (let i = 0; i < data.length; i += 4) {
let r = data[i];
let g = data[i + 1];
let b = data[i + 2];
if (settings.grayscale) {
const grey = Math.round(0.299 * r + 0.587 * g + 0.114 * b);
r = grey;
g = grey;
b = grey;
}
if (settings.brightness !== 0) {
r += settings.brightness;
g += settings.brightness;
b += settings.brightness;
}
if (settings.contrast !== 0) {
r = contrastFactor * (r - 128) + 128;
g = contrastFactor * (g - 128) + 128;
b = contrastFactor * (b - 128) + 128;
}
if (settings.yellowish > 0) {
const intensity = settings.yellowish / 50;
r += 20 * intensity;
g += 12 * intensity;
b -= 15 * intensity;
}
if (scaledNoise > 0) {
const n = (Math.random() - 0.5) * scaledNoise;
r += n;
g += n;
b += n;
}
data[i] = Math.max(0, Math.min(255, r));
data[i + 1] = Math.max(0, Math.min(255, g));
data[i + 2] = Math.max(0, Math.min(255, b));
}
workCtx.putImageData(imageData, 0, 0);
if (settings.border) {
const borderSize = Math.max(w, h) * 0.02;
const gradient1 = workCtx.createLinearGradient(0, 0, borderSize, 0);
gradient1.addColorStop(0, 'rgba(0,0,0,0.3)');
gradient1.addColorStop(1, 'rgba(0,0,0,0)');
workCtx.fillStyle = gradient1;
workCtx.fillRect(0, 0, borderSize, h);
const gradient2 = workCtx.createLinearGradient(w, 0, w - borderSize, 0);
gradient2.addColorStop(0, 'rgba(0,0,0,0.3)');
gradient2.addColorStop(1, 'rgba(0,0,0,0)');
workCtx.fillStyle = gradient2;
workCtx.fillRect(w - borderSize, 0, borderSize, h);
const gradient3 = workCtx.createLinearGradient(0, 0, 0, borderSize);
gradient3.addColorStop(0, 'rgba(0,0,0,0.3)');
gradient3.addColorStop(1, 'rgba(0,0,0,0)');
workCtx.fillStyle = gradient3;
workCtx.fillRect(0, 0, w, borderSize);
const gradient4 = workCtx.createLinearGradient(0, h, 0, h - borderSize);
gradient4.addColorStop(0, 'rgba(0,0,0,0.3)');
gradient4.addColorStop(1, 'rgba(0,0,0,0)');
workCtx.fillStyle = gradient4;
workCtx.fillRect(0, h - borderSize, w, borderSize);
}
if (rotationAngle !== 0) {
const rad = (rotationAngle * Math.PI) / 180;
const cos = Math.abs(Math.cos(rad));
const sin = Math.abs(Math.sin(rad));
const newW = Math.ceil(w * cos + h * sin);
const newH = Math.ceil(w * sin + h * cos);
canvas.width = newW;
canvas.height = newH;
ctx.fillStyle = '#ffffff';
ctx.fillRect(0, 0, newW, newH);
ctx.translate(newW / 2, newH / 2);
ctx.rotate(rad);
ctx.drawImage(workCanvas, -w / 2, -h / 2);
ctx.setTransform(1, 0, 0, 1, 0, 0);
} else {
canvas.width = w;
canvas.height = h;
ctx.drawImage(workCanvas, 0, 0);
}
}
export function applyColorAdjustments(
sourceData: ImageData,
canvas: HTMLCanvasElement,
settings: AdjustColorsSettings
): void {
const ctx = canvas.getContext('2d')!;
const w = sourceData.width;
const h = sourceData.height;
canvas.width = w;
canvas.height = h;
const imageData = new ImageData(new Uint8ClampedArray(sourceData.data), w, h);
const data = imageData.data;
const contrastFactor =
settings.contrast !== 0
? (259 * (settings.contrast + 255)) / (255 * (259 - settings.contrast))
: 1;
const gammaCorrection = settings.gamma !== 1.0 ? 1 / settings.gamma : 1;
const sepiaAmount = settings.sepia / 100;
for (let i = 0; i < data.length; i += 4) {
let r = data[i];
let g = data[i + 1];
let b = data[i + 2];
if (settings.brightness !== 0) {
const adj = settings.brightness * 2.55;
r += adj;
g += adj;
b += adj;
}
if (settings.contrast !== 0) {
r = contrastFactor * (r - 128) + 128;
g = contrastFactor * (g - 128) + 128;
b = contrastFactor * (b - 128) + 128;
}
if (settings.saturation !== 0 || settings.hueShift !== 0) {
const [hue, sat, lig] = rgbToHsl(
Math.max(0, Math.min(255, r)),
Math.max(0, Math.min(255, g)),
Math.max(0, Math.min(255, b))
);
let newHue = hue;
if (settings.hueShift !== 0) {
newHue = (hue + settings.hueShift / 360) % 1;
if (newHue < 0) newHue += 1;
}
let newSat = sat;
if (settings.saturation !== 0) {
const satAdj = settings.saturation / 100;
newSat = satAdj > 0 ? sat + (1 - sat) * satAdj : sat * (1 + satAdj);
newSat = Math.max(0, Math.min(1, newSat));
}
[r, g, b] = hslToRgb(newHue, newSat, lig);
}
if (settings.temperature !== 0) {
const t = settings.temperature / 50;
r += 30 * t;
b -= 30 * t;
}
if (settings.tint !== 0) {
const t = settings.tint / 50;
g += 30 * t;
}
if (settings.gamma !== 1.0) {
r = Math.pow(Math.max(0, Math.min(255, r)) / 255, gammaCorrection) * 255;
g = Math.pow(Math.max(0, Math.min(255, g)) / 255, gammaCorrection) * 255;
b = Math.pow(Math.max(0, Math.min(255, b)) / 255, gammaCorrection) * 255;
}
if (settings.sepia > 0) {
const sr = 0.393 * r + 0.769 * g + 0.189 * b;
const sg = 0.349 * r + 0.686 * g + 0.168 * b;
const sb = 0.272 * r + 0.534 * g + 0.131 * b;
r = r + (sr - r) * sepiaAmount;
g = g + (sg - g) * sepiaAmount;
b = b + (sb - b) * sepiaAmount;
}
data[i] = Math.max(0, Math.min(255, r));
data[i + 1] = Math.max(0, Math.min(255, g));
data[i + 2] = Math.max(0, Math.min(255, b));
}
ctx.putImageData(imageData, 0, 0);
}

View File

@@ -0,0 +1,160 @@
/**
* LibreOffice WASM Converter Wrapper
*
* Uses @matbee/libreoffice-converter package for document conversion.
* Handles progress tracking and provides simpler API.
*/
import { WorkerBrowserConverter } from '@matbee/libreoffice-converter/browser';
const LIBREOFFICE_LOCAL_PATH = import.meta.env.BASE_URL + 'libreoffice-wasm/';
export interface LoadProgress {
phase: 'loading' | 'initializing' | 'converting' | 'complete' | 'ready';
percent: number;
message: string;
}
export type ProgressCallback = (progress: LoadProgress) => void;
// Singleton for converter instance
let converterInstance: LibreOfficeConverter | null = null;
export class LibreOfficeConverter {
private converter: WorkerBrowserConverter | null = null;
private initialized = false;
private initializing = false;
private basePath: string;
constructor(basePath?: string) {
this.basePath = basePath || LIBREOFFICE_LOCAL_PATH;
}
async initialize(onProgress?: ProgressCallback): Promise<void> {
if (this.initialized) return;
if (this.initializing) {
while (this.initializing) {
await new Promise(r => setTimeout(r, 100));
}
return;
}
this.initializing = true;
let progressCallback = onProgress; // Store original callback
try {
progressCallback?.({ phase: 'loading', percent: 0, message: 'Loading conversion engine...' });
this.converter = new WorkerBrowserConverter({
sofficeJs: `${this.basePath}soffice.js`,
sofficeWasm: `${this.basePath}soffice.wasm.gz`,
sofficeData: `${this.basePath}soffice.data.gz`,
sofficeWorkerJs: `${this.basePath}soffice.worker.js`,
browserWorkerJs: `${this.basePath}browser.worker.global.js`,
verbose: false,
onProgress: (info: { phase: string; percent: number; message: string }) => {
if (progressCallback && !this.initialized) {
const simplifiedMessage = `Loading conversion engine (${Math.round(info.percent)}%)...`;
progressCallback({
phase: info.phase as LoadProgress['phase'],
percent: info.percent,
message: simplifiedMessage
});
}
},
onReady: () => {
console.log('[LibreOffice] Ready!');
},
onError: (error: Error) => {
console.error('[LibreOffice] Error:', error);
},
});
await this.converter.initialize();
this.initialized = true;
// Call completion message
progressCallback?.({ phase: 'ready', percent: 100, message: 'Conversion engine ready!' });
// Null out the callback to prevent any late-firing progress updates
progressCallback = undefined;
} finally {
this.initializing = false;
}
}
isReady(): boolean {
return this.initialized && this.converter !== null;
}
async convertToPdf(file: File): Promise<Blob> {
if (!this.converter) {
throw new Error('Converter not initialized');
}
console.log(`[LibreOffice] Converting ${file.name} to PDF...`);
console.log(`[LibreOffice] File type: ${file.type}, Size: ${file.size} bytes`);
try {
console.log(`[LibreOffice] Reading file as ArrayBuffer...`);
const arrayBuffer = await file.arrayBuffer();
const uint8Array = new Uint8Array(arrayBuffer);
console.log(`[LibreOffice] File loaded, ${uint8Array.length} bytes`);
console.log(`[LibreOffice] Calling converter.convert() with buffer...`);
const startTime = Date.now();
// Detect input format - critical for CSV to apply import filters
const ext = file.name.split('.').pop()?.toLowerCase() || '';
console.log(`[LibreOffice] Detected format from extension: ${ext}`);
const result = await this.converter.convert(uint8Array, {
outputFormat: 'pdf',
inputFormat: ext as any, // Explicitly specify format for CSV import filters
}, file.name);
const duration = Date.now() - startTime;
console.log(`[LibreOffice] Conversion complete! Duration: ${duration}ms, Size: ${result.data.length} bytes`);
// Create a copy to avoid SharedArrayBuffer type issues
const data = new Uint8Array(result.data);
return new Blob([data], { type: result.mimeType });
} catch (error) {
console.error(`[LibreOffice] Conversion FAILED for ${file.name}:`, error);
console.error(`[LibreOffice] Error details:`, {
message: error instanceof Error ? error.message : String(error),
stack: error instanceof Error ? error.stack : undefined
});
throw error;
}
}
async wordToPdf(file: File): Promise<Blob> {
return this.convertToPdf(file);
}
async pptToPdf(file: File): Promise<Blob> {
return this.convertToPdf(file);
}
async excelToPdf(file: File): Promise<Blob> {
return this.convertToPdf(file);
}
async destroy(): Promise<void> {
if (this.converter) {
await this.converter.destroy();
}
this.converter = null;
this.initialized = false;
}
}
export function getLibreOfficeConverter(basePath?: string): LibreOfficeConverter {
if (!converterInstance) {
converterInstance = new LibreOfficeConverter(basePath);
}
return converterInstance;
}

View File

@@ -0,0 +1,970 @@
import MarkdownIt from 'markdown-it';
import hljs from 'highlight.js/lib/core';
import javascript from 'highlight.js/lib/languages/javascript';
import typescript from 'highlight.js/lib/languages/typescript';
import python from 'highlight.js/lib/languages/python';
import css from 'highlight.js/lib/languages/css';
import xml from 'highlight.js/lib/languages/xml';
import json from 'highlight.js/lib/languages/json';
import bash from 'highlight.js/lib/languages/bash';
import markdownLang from 'highlight.js/lib/languages/markdown';
import sql from 'highlight.js/lib/languages/sql';
import java from 'highlight.js/lib/languages/java';
import csharp from 'highlight.js/lib/languages/csharp';
import cpp from 'highlight.js/lib/languages/cpp';
import go from 'highlight.js/lib/languages/go';
import rust from 'highlight.js/lib/languages/rust';
import yaml from 'highlight.js/lib/languages/yaml';
import 'highlight.js/styles/github.css';
import mermaid from 'mermaid';
import sub from 'markdown-it-sub';
import sup from 'markdown-it-sup';
import footnote from 'markdown-it-footnote';
import deflist from 'markdown-it-deflist';
import abbr from 'markdown-it-abbr';
import { full as emoji } from 'markdown-it-emoji';
import ins from 'markdown-it-ins';
import mark from 'markdown-it-mark';
import taskLists from 'markdown-it-task-lists';
import anchor from 'markdown-it-anchor';
import tocDoneRight from 'markdown-it-toc-done-right';
import { applyTranslations } from '../i18n/i18n';
// Register highlight.js languages
hljs.registerLanguage('javascript', javascript);
hljs.registerLanguage('js', javascript);
hljs.registerLanguage('typescript', typescript);
hljs.registerLanguage('ts', typescript);
hljs.registerLanguage('python', python);
hljs.registerLanguage('py', python);
hljs.registerLanguage('css', css);
hljs.registerLanguage('html', xml);
hljs.registerLanguage('xml', xml);
hljs.registerLanguage('json', json);
hljs.registerLanguage('bash', bash);
hljs.registerLanguage('sh', bash);
hljs.registerLanguage('shell', bash);
hljs.registerLanguage('markdown', markdownLang);
hljs.registerLanguage('md', markdownLang);
hljs.registerLanguage('sql', sql);
hljs.registerLanguage('java', java);
hljs.registerLanguage('csharp', csharp);
hljs.registerLanguage('cs', csharp);
hljs.registerLanguage('cpp', cpp);
hljs.registerLanguage('c', cpp);
hljs.registerLanguage('go', go);
hljs.registerLanguage('rust', rust);
hljs.registerLanguage('yaml', yaml);
hljs.registerLanguage('yml', yaml);
export interface MarkdownEditorOptions {
/** Initial markdown content */
initialContent?: string;
/** Callback when user wants to go back */
onBack?: () => void;
}
export interface MarkdownItOptions {
/** Enable HTML tags in source */
html: boolean;
/** Convert '\n' in paragraphs into <br> */
breaks: boolean;
/** Autoconvert URL-like text to links */
linkify: boolean;
/** Enable some language-neutral replacement + quotes beautification */
typographer: boolean;
/** Highlight function for fenced code blocks */
highlight?: (str: string, lang: string) => string;
}
const DEFAULT_MARKDOWN = `# Welcome to BentoPDF Markdown Editor
This is a **live preview** markdown editor with full plugin support.
\${toc}
## Basic Formatting
- **Bold** and *italic* text
- ~~Strikethrough~~ text
- [Links](https://bentopdf.com)
- ==Highlighted text== using mark
- ++Inserted text++ using ins
- H~2~O for subscript
- E=mc^2^ for superscript
## Task Lists
- [x] Completed task
- [x] Another done item
- [ ] Pending task
- [ ] Future work
## Emoji Support :rocket:
Use emoji shortcodes: :smile: :heart: :thumbsup: :star: :fire:
## Code with Syntax Highlighting
\`\`\`javascript
function greet(name) {
console.log(\`Hello, \${name}!\`);
return { message: 'Welcome!' };
}
\`\`\`
\`\`\`python
def fibonacci(n):
if n <= 1:
return n
return fibonacci(n-1) + fibonacci(n-2)
\`\`\`
## Tables
| Feature | Supported | Notes |
|---------|:---------:|-------|
| Headers | ✓ | Multiple levels |
| Lists | ✓ | Ordered & unordered |
| Code | ✓ | With highlighting |
| Tables | ✓ | With alignment |
| Emoji | ✓ | :white_check_mark: |
| Mermaid | ✓ | Diagrams! |
## Mermaid Diagrams
### Flowchart
\`\`\`mermaid
graph TD
A[Start] --> B{Decision}
B -->|Yes| C[OK]
B -->|No| D[Cancel]
\`\`\`
### Sequence Diagram
\`\`\`mermaid
sequenceDiagram
participant User
participant BentoPDF
participant Server
User->>BentoPDF: Upload PDF
BentoPDF->>BentoPDF: Process locally
BentoPDF-->>User: Download result
Note over BentoPDF: No server needed!
\`\`\`
### Pie Chart
\`\`\`mermaid
pie title PDF Tools Usage
"Merge" : 35
"Compress" : 25
"Convert" : 20
"Edit" : 15
"Other" : 5
\`\`\`
### Class Diagram
\`\`\`mermaid
classDiagram
class PDFDocument {
+String title
+int pageCount
+merge()
+split()
+compress()
}
class Page {
+int number
+rotate()
+crop()
}
PDFDocument "1" --> "*" Page
\`\`\`
### Gantt Chart
\`\`\`mermaid
gantt
title Project Timeline
dateFormat YYYY-MM-DD
section Planning
Research :a1, 2024-01-01, 7d
Design :a2, after a1, 5d
section Development
Implementation :a3, after a2, 14d
Testing :a4, after a3, 7d
\`\`\`
### Entity Relationship
\`\`\`mermaid
erDiagram
USER ||--o{ DOCUMENT : uploads
DOCUMENT ||--|{ PAGE : contains
DOCUMENT {
string id
string name
date created
}
PAGE {
int number
string content
}
\`\`\`
### Mindmap
\`\`\`mermaid
mindmap
root((BentoPDF))
Convert
Word to PDF
Excel to PDF
Image to PDF
Edit
Merge
Split
Compress
Secure
Encrypt
Sign
Watermark
\`\`\`
## Footnotes
Here's a sentence with a footnote[^1].
## Definition Lists
Term 1
: Definition for term 1
Term 2
: Definition for term 2
: Another definition for term 2
## Abbreviations
The HTML specification is maintained by the W3C.
*[HTML]: Hyper Text Markup Language
*[W3C]: World Wide Web Consortium
---
Start editing to see the magic happen!
[^1]: This is the footnote content.
`;
export class MarkdownEditor {
private container: HTMLElement;
private md: MarkdownIt;
private editor: HTMLTextAreaElement | null = null;
private preview: HTMLElement | null = null;
private onBack?: () => void;
private syncScroll: boolean = false;
private isSyncing: boolean = false;
private mermaidInitialized: boolean = false;
private mdOptions: MarkdownItOptions = {
html: true,
breaks: false,
linkify: true,
typographer: true
};
constructor(container: HTMLElement, options: MarkdownEditorOptions) {
this.container = container;
this.onBack = options.onBack;
this.initMermaid();
this.md = this.createMarkdownIt();
this.configureLinkRenderer();
this.render();
if (options.initialContent) {
this.setContent(options.initialContent);
} else {
this.setContent(DEFAULT_MARKDOWN);
}
}
private initMermaid(): void {
if (!this.mermaidInitialized) {
mermaid.initialize({
startOnLoad: false,
theme: 'default',
securityLevel: 'loose',
fontFamily: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif'
});
this.mermaidInitialized = true;
}
}
private configureLinkRenderer(): void {
// Override link renderer to add target="_blank" and rel="noopener"
const defaultRender = this.md.renderer.rules.link_open ||
((tokens: any[], idx: number, options: any, _env: any, self: any) => self.renderToken(tokens, idx, options));
this.md.renderer.rules.link_open = (tokens: any[], idx: number, options: any, env: any, self: any) => {
const token = tokens[idx];
token.attrSet('target', '_blank');
token.attrSet('rel', 'noopener noreferrer');
return defaultRender(tokens, idx, options, env, self);
};
}
private render(): void {
this.container.innerHTML = `
<div class="md-editor light-mode">
<div class="md-editor-wrapper">
<div class="md-editor-header">
<div class="md-editor-actions">
<input type="file" accept=".md,.markdown,.txt" id="mdFileInput" style="display: none;" />
<button class="md-editor-btn md-editor-btn-secondary" id="mdUpload">
<i data-lucide="upload"></i>
<span data-i18n="tools:markdownToPdf.btnUpload">Upload</span>
</button>
<div class="theme-toggle">
<i data-lucide="moon" width="16" height="16"></i>
<div class="theme-toggle-slider active" id="themeToggle"></div>
<i data-lucide="sun" width="16" height="16"></i>
</div>
<button class="md-editor-btn md-editor-btn-secondary" id="mdSyncScroll" title="Toggle sync scroll">
<i data-lucide="git-compare"></i>
<span data-i18n="tools:markdownToPdf.btnSyncScroll">Sync Scroll</span>
</button>
<button class="md-editor-btn md-editor-btn-secondary" id="mdSettings">
<i data-lucide="settings"></i>
<span data-i18n="tools:markdownToPdf.btnSettings">Settings</span>
</button>
<button class="md-editor-btn md-editor-btn-primary" id="mdExport">
<i data-lucide="download"></i>
<span data-i18n="tools:markdownToPdf.btnExportPdf">Export PDF</span>
</button>
</div>
</div>
<div class="md-editor-main">
<div class="md-editor-pane">
<div class="md-editor-pane-header">
<span data-i18n="tools:markdownToPdf.paneMarkdown">Markdown</span>
</div>
<textarea class="md-editor-textarea" id="mdTextarea" spellcheck="false"></textarea>
</div>
<div class="md-editor-pane">
<div class="md-editor-pane-header">
<span data-i18n="tools:markdownToPdf.panePreview">Preview</span>
</div>
<div class="md-editor-preview" id="mdPreview"></div>
</div>
</div>
</div>
</div>
<!-- Settings Modal (hidden by default) -->
<div class="md-editor-modal-overlay" id="mdSettingsModal" style="display: none;">
<div class="md-editor-modal">
<div class="md-editor-modal-header">
<h2 class="md-editor-modal-title" data-i18n="tools:markdownToPdf.settingsTitle">Markdown Settings</h2>
<button class="md-editor-modal-close" id="mdCloseSettings">
<i data-lucide="x" width="20" height="20"></i>
</button>
</div>
<div class="md-editor-settings-group">
<h3 data-i18n="tools:markdownToPdf.settingsPreset">Preset</h3>
<select id="mdPreset">
<option value="default" selected data-i18n="tools:markdownToPdf.presetDefault">Default (GFM-like)</option>
<option value="commonmark" data-i18n="tools:markdownToPdf.presetCommonmark">CommonMark (strict)</option>
<option value="zero" data-i18n="tools:markdownToPdf.presetZero">Minimal (no features)</option>
</select>
</div>
<div class="md-editor-settings-group">
<h3 data-i18n="tools:markdownToPdf.settingsOptions">Markdown Options</h3>
<label class="md-editor-checkbox">
<input type="checkbox" id="mdOptHtml" ${this.mdOptions.html ? 'checked' : ''} />
<span data-i18n="tools:markdownToPdf.optAllowHtml">Allow HTML tags</span>
</label>
<label class="md-editor-checkbox">
<input type="checkbox" id="mdOptBreaks" ${this.mdOptions.breaks ? 'checked' : ''} />
<span data-i18n="tools:markdownToPdf.optBreaks">Convert newlines to &lt;br&gt;</span>
</label>
<label class="md-editor-checkbox">
<input type="checkbox" id="mdOptLinkify" ${this.mdOptions.linkify ? 'checked' : ''} />
<span data-i18n="tools:markdownToPdf.optLinkify">Auto-convert URLs to links</span>
</label>
<label class="md-editor-checkbox">
<input type="checkbox" id="mdOptTypographer" ${this.mdOptions.typographer ? 'checked' : ''} />
<span data-i18n="tools:markdownToPdf.optTypographer">Typographer (smart quotes, etc.)</span>
</label>
</div>
</div>
</div>
`;
this.editor = document.getElementById('mdTextarea') as HTMLTextAreaElement;
this.preview = document.getElementById('mdPreview') as HTMLElement;
this.setupEventListeners();
this.applyI18n();
// Initialize Lucide icons
if (typeof (window as any).lucide !== 'undefined') {
(window as any).lucide.createIcons();
}
}
private setupEventListeners(): void {
// Editor input
this.editor?.addEventListener('input', () => {
this.updatePreview();
});
// Sync scroll
const syncScrollBtn = document.getElementById('mdSyncScroll');
syncScrollBtn?.addEventListener('click', () => {
this.syncScroll = !this.syncScroll;
syncScrollBtn.classList.toggle('md-editor-btn-primary');
syncScrollBtn.classList.toggle('md-editor-btn-secondary');
});
// Editor scroll sync
this.editor?.addEventListener('scroll', () => {
if (this.syncScroll && !this.isSyncing && this.editor && this.preview) {
this.isSyncing = true;
const scrollPercentage = this.editor.scrollTop / (this.editor.scrollHeight - this.editor.clientHeight);
this.preview.scrollTop = scrollPercentage * (this.preview.scrollHeight - this.preview.clientHeight);
setTimeout(() => this.isSyncing = false, 10);
}
});
// Preview scroll sync (bidirectional)
this.preview?.addEventListener('scroll', () => {
if (this.syncScroll && !this.isSyncing && this.editor && this.preview) {
this.isSyncing = true;
const scrollPercentage = this.preview.scrollTop / (this.preview.scrollHeight - this.preview.clientHeight);
this.editor.scrollTop = scrollPercentage * (this.editor.scrollHeight - this.editor.clientHeight);
setTimeout(() => this.isSyncing = false, 10);
}
});
// Theme toggle
const themeToggle = document.getElementById('themeToggle');
const editorContainer = document.querySelector('.md-editor');
themeToggle?.addEventListener('click', () => {
editorContainer?.classList.toggle('light-mode');
themeToggle.classList.toggle('active');
});
// Settings modal open
document.getElementById('mdSettings')?.addEventListener('click', () => {
const modal = document.getElementById('mdSettingsModal');
if (modal) {
modal.style.display = 'flex';
}
});
// Settings modal close
document.getElementById('mdCloseSettings')?.addEventListener('click', () => {
const modal = document.getElementById('mdSettingsModal');
if (modal) {
modal.style.display = 'none';
}
});
// Close modal on overlay click
document.getElementById('mdSettingsModal')?.addEventListener('click', (e) => {
if ((e.target as HTMLElement).classList.contains('md-editor-modal-overlay')) {
const modal = document.getElementById('mdSettingsModal');
if (modal) {
modal.style.display = 'none';
}
}
});
// Settings checkboxes
document.getElementById('mdOptHtml')?.addEventListener('change', (e) => {
this.mdOptions.html = (e.target as HTMLInputElement).checked;
this.updateMarkdownIt();
});
document.getElementById('mdOptBreaks')?.addEventListener('change', (e) => {
this.mdOptions.breaks = (e.target as HTMLInputElement).checked;
this.updateMarkdownIt();
});
document.getElementById('mdOptLinkify')?.addEventListener('change', (e) => {
this.mdOptions.linkify = (e.target as HTMLInputElement).checked;
this.updateMarkdownIt();
});
document.getElementById('mdOptTypographer')?.addEventListener('change', (e) => {
this.mdOptions.typographer = (e.target as HTMLInputElement).checked;
this.updateMarkdownIt();
});
// Preset selector
document.getElementById('mdPreset')?.addEventListener('change', (e) => {
const preset = (e.target as HTMLSelectElement).value;
this.applyPreset(preset as 'default' | 'commonmark' | 'zero');
});
// Upload button
document.getElementById('mdUpload')?.addEventListener('click', () => {
document.getElementById('mdFileInput')?.click();
});
// File input change
document.getElementById('mdFileInput')?.addEventListener('change', (e) => {
const file = (e.target as HTMLInputElement).files?.[0];
if (file) {
this.loadFile(file);
}
});
// Export PDF
document.getElementById('mdExport')?.addEventListener('click', () => {
this.exportPdf();
});
// Keyboard shortcuts
this.editor?.addEventListener('keydown', (e) => {
// Ctrl/Cmd + S to export
if ((e.ctrlKey || e.metaKey) && e.key === 's') {
e.preventDefault();
this.exportPdf();
}
// Tab key for indentation
if (e.key === 'Tab') {
e.preventDefault();
const start = this.editor!.selectionStart;
const end = this.editor!.selectionEnd;
const value = this.editor!.value;
this.editor!.value = value.substring(0, start) + ' ' + value.substring(end);
this.editor!.selectionStart = this.editor!.selectionEnd = start + 2;
this.updatePreview();
}
});
}
private currentPreset: 'default' | 'commonmark' | 'zero' = 'default';
private applyPreset(preset: 'default' | 'commonmark' | 'zero'): void {
this.currentPreset = preset;
// Update options based on preset
if (preset === 'commonmark') {
this.mdOptions = { html: false, breaks: false, linkify: false, typographer: false };
} else if (preset === 'zero') {
this.mdOptions = { html: false, breaks: false, linkify: false, typographer: false };
} else {
this.mdOptions = { html: true, breaks: false, linkify: true, typographer: true };
}
// Update UI checkboxes
(document.getElementById('mdOptHtml') as HTMLInputElement).checked = this.mdOptions.html;
(document.getElementById('mdOptBreaks') as HTMLInputElement).checked = this.mdOptions.breaks;
(document.getElementById('mdOptLinkify') as HTMLInputElement).checked = this.mdOptions.linkify;
(document.getElementById('mdOptTypographer') as HTMLInputElement).checked = this.mdOptions.typographer;
this.updateMarkdownIt();
}
private async loadFile(file: File): Promise<void> {
try {
const text = await file.text();
this.setContent(text);
} catch (error) {
console.error('Failed to load file:', error);
}
}
private createMarkdownIt(): MarkdownIt {
// Use preset if commonmark or zero
let md: MarkdownIt;
if (this.currentPreset === 'commonmark') {
md = new MarkdownIt('commonmark');
} else if (this.currentPreset === 'zero') {
md = new MarkdownIt('zero');
// Enable basic features for zero preset
md.enable(['paragraph', 'newline', 'text']);
} else {
md = new MarkdownIt({
...this.mdOptions,
highlight: (str: string, lang: string) => {
if (lang && hljs.getLanguage(lang)) {
try {
return hljs.highlight(str, { language: lang, ignoreIllegals: true }).value;
} catch {
// Fall through to default
}
}
return ''; // Use external default escaping
}
});
}
// Apply plugins only for default preset (plugins may not work well with commonmark/zero)
if (this.currentPreset === 'default') {
md.use(sub) // Subscript: ~text~ -> <sub>text</sub>
.use(sup) // Superscript: ^text^ -> <sup>text</sup>
.use(footnote) // Footnotes: [^1] and [^1]: footnote text
.use(deflist) // Definition lists
.use(abbr) // Abbreviations: *[abbr]: full text
.use(emoji) // Emoji: :smile: -> 😄
.use(ins) // Inserted text: ++text++ -> <ins>text</ins>
.use(mark) // Marked text: ==text== -> <mark>text</mark>
.use(taskLists, { enabled: true, label: true, labelAfter: true }) // Task lists: - [x] done
.use(anchor, { permalink: false }) // Header anchors
.use(tocDoneRight); // Table of contents: ${toc}
}
return md;
}
private updateMarkdownIt(): void {
this.md = this.createMarkdownIt();
this.configureLinkRenderer();
this.updatePreview();
}
private updatePreview(): void {
if (!this.editor || !this.preview) return;
const markdown = this.editor.value;
const html = this.md.render(markdown);
this.preview.innerHTML = html;
this.renderMermaidDiagrams();
}
private async renderMermaidDiagrams(): Promise<void> {
if (!this.preview) return;
const mermaidBlocks = this.preview.querySelectorAll('pre > code.language-mermaid');
for (let i = 0; i < mermaidBlocks.length; i++) {
const block = mermaidBlocks[i] as HTMLElement;
const code = block.textContent || '';
const pre = block.parentElement;
if (pre && code.trim()) {
try {
const id = `mermaid-diagram-${i}-${Date.now()}`;
const { svg } = await mermaid.render(id, code.trim());
const wrapper = document.createElement('div');
wrapper.className = 'mermaid-diagram';
wrapper.innerHTML = svg;
pre.replaceWith(wrapper);
} catch (error) {
console.error('Mermaid rendering error:', error);
const errorDiv = document.createElement('div');
errorDiv.className = 'mermaid-error';
errorDiv.textContent = `Mermaid Error: ${(error as Error).message}`;
pre.replaceWith(errorDiv);
}
}
}
}
public setContent(content: string): void {
if (this.editor) {
this.editor.value = content;
this.updatePreview();
}
}
public getContent(): string {
return this.editor?.value || '';
}
public getHtml(): string {
return this.md.render(this.getContent());
}
private exportPdf(): void {
// Use browser's native print functionality
window.print();
}
private getStyledHtml(): string {
const content = this.getHtml();
return `<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<style>
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
font-size: 14px;
line-height: 1.6;
color: #333;
max-width: 800px;
margin: 0 auto;
padding: 40px 20px;
}
h1, h2, h3, h4, h5, h6 {
margin-top: 1.5em;
margin-bottom: 0.5em;
font-weight: 600;
line-height: 1.25;
}
h1 { font-size: 2em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
h2 { font-size: 1.5em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
h3 { font-size: 1.25em; }
h4 { font-size: 1em; }
p { margin: 1em 0; }
a { color: #0366d6; text-decoration: none; }
a:hover { text-decoration: underline; }
code {
font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
font-size: 0.9em;
background: #f6f8fa;
padding: 0.2em 0.4em;
border-radius: 3px;
}
pre {
background: #f6f8fa;
padding: 16px;
overflow: auto;
border-radius: 6px;
line-height: 1.45;
}
pre code {
background: none;
padding: 0;
}
blockquote {
margin: 1em 0;
padding: 0 1em;
color: #6a737d;
border-left: 4px solid #dfe2e5;
}
ul, ol {
margin: 1em 0;
padding-left: 2em;
}
li { margin: 0.25em 0; }
table {
border-collapse: collapse;
width: 100%;
margin: 1em 0;
}
th, td {
border: 1px solid #dfe2e5;
padding: 8px 12px;
text-align: left;
}
th {
background: #f6f8fa;
font-weight: 600;
}
tr:nth-child(even) { background: #f6f8fa; }
hr {
border: none;
border-top: 1px solid #eee;
margin: 2em 0;
}
img {
max-width: 100%;
height: auto;
}
/* Syntax highlighting - GitHub style */
.hljs {
color: #24292e;
background: #f6f8fa;
}
.hljs-comment,
.hljs-quote {
color: #6a737d;
font-style: italic;
}
.hljs-keyword,
.hljs-selector-tag,
.hljs-subst {
color: #d73a49;
}
.hljs-number,
.hljs-literal,
.hljs-variable,
.hljs-template-variable,
.hljs-tag .hljs-attr {
color: #005cc5;
}
.hljs-string,
.hljs-doctag {
color: #032f62;
}
.hljs-title,
.hljs-section,
.hljs-selector-id {
color: #6f42c1;
font-weight: bold;
}
.hljs-type,
.hljs-class .hljs-title {
color: #6f42c1;
}
.hljs-tag,
.hljs-name,
.hljs-attribute {
color: #22863a;
}
.hljs-regexp,
.hljs-link {
color: #032f62;
}
.hljs-symbol,
.hljs-bullet {
color: #e36209;
}
.hljs-built_in,
.hljs-builtin-name {
color: #005cc5;
}
.hljs-meta {
color: #6a737d;
font-weight: bold;
}
.hljs-deletion {
color: #b31d28;
background-color: #ffeef0;
}
.hljs-addition {
color: #22863a;
background-color: #f0fff4;
}
/* Plugin styles */
mark {
background-color: #fff3cd;
padding: 0.1em 0.2em;
border-radius: 2px;
}
ins {
text-decoration: none;
background-color: #d4edda;
padding: 0.1em 0.2em;
border-radius: 2px;
}
sub, sup {
font-size: 0.75em;
}
.task-list-item {
list-style-type: none;
margin-left: -1.5em;
}
.task-list-item input[type="checkbox"] {
margin-right: 0.5em;
}
.footnotes {
margin-top: 2em;
padding-top: 1em;
border-top: 1px solid #eee;
font-size: 0.9em;
}
.footnotes-sep {
display: none;
}
.footnote-ref {
font-size: 0.75em;
vertical-align: super;
}
.footnote-backref {
font-size: 0.75em;
margin-left: 0.25em;
}
dl {
margin: 1em 0;
}
dt {
font-weight: 600;
margin-top: 1em;
}
dd {
margin-left: 2em;
margin-top: 0.25em;
color: #6a737d;
}
abbr {
text-decoration: underline dotted;
cursor: help;
}
.table-of-contents {
background: #f6f8fa;
padding: 1em 1.5em;
border-radius: 6px;
margin: 1em 0;
}
.table-of-contents ul {
margin: 0;
padding-left: 1.5em;
}
.table-of-contents li {
margin: 0.25em 0;
}
/* Mermaid diagrams */
.mermaid-diagram {
display: flex;
justify-content: center;
margin: 1.5em 0;
padding: 1em;
background: #f6f8fa;
border-radius: 6px;
}
.mermaid-diagram svg {
max-width: 100%;
height: auto;
}
.mermaid-error {
color: #cb2431;
background: #ffeef0;
padding: 1em;
border-radius: 6px;
font-family: monospace;
font-size: 0.9em;
}
</style>
</head>
<body>
${content}
</body>
</html>`;
}
private applyI18n(): void {
// Apply translations to elements within this component
applyTranslations();
// Special handling for select options (data-i18n on options doesn't work with applyTranslations)
const presetSelect = document.getElementById('mdPreset') as HTMLSelectElement;
if (presetSelect) {
const options = presetSelect.querySelectorAll('option[data-i18n]');
options.forEach((option) => {
const key = option.getAttribute('data-i18n');
if (key) {
// Use i18next directly for option text
const translated = (window as any).i18next?.t(key);
if (translated && translated !== key) {
option.textContent = translated;
}
}
});
}
}
public destroy(): void {
this.container.innerHTML = '';
}
}

304
src/js/utils/ocr.ts Normal file
View File

@@ -0,0 +1,304 @@
import Tesseract from 'tesseract.js';
import { PDFDocument, StandardFonts, rgb, PDFFont } from 'pdf-lib';
import fontkit from '@pdf-lib/fontkit';
import * as pdfjsLib from 'pdfjs-dist';
import { getFontForLanguage } from './font-loader.js';
import { OcrPage, OcrLine } from '@/types';
import {
parseHocrDocument,
calculateWordTransform,
calculateSpaceTransform,
} from './hocr-transform.js';
import { getPDFDocument } from './helpers.js';
export interface OcrOptions {
language: string;
resolution: number;
binarize: boolean;
whitelist: string;
onProgress?: (status: string, progress: number) => void;
}
export interface OcrResult {
pdfBytes: Uint8Array;
pdfDoc: PDFDocument;
fullText: string;
}
function binarizeCanvas(ctx: CanvasRenderingContext2D) {
const imageData = ctx.getImageData(0, 0, ctx.canvas.width, ctx.canvas.height);
const data = imageData.data;
for (let i = 0; i < data.length; i += 4) {
const brightness =
0.299 * data[i] + 0.587 * data[i + 1] + 0.114 * data[i + 2];
const color = brightness > 128 ? 255 : 0;
data[i] = data[i + 1] = data[i + 2] = color;
}
ctx.putImageData(imageData, 0, 0);
}
function drawOcrTextLayer(
page: ReturnType<typeof PDFDocument.prototype.addPage>,
ocrPage: OcrPage,
pageHeight: number,
primaryFont: PDFFont,
latinFont: PDFFont
): void {
ocrPage.lines.forEach(function (line: OcrLine) {
const words = line.words;
for (let i = 0; i < words.length; i++) {
const word = words[i];
const text = word.text.replace(
/[\u0000-\u001F\u007F-\u009F\u200E\u200F\u202A-\u202E\uFEFF]/g,
''
);
if (!text.trim()) continue;
const hasNonLatin = /[^\u0000-\u007F]/.test(text);
const font = hasNonLatin ? primaryFont : latinFont;
if (!font) {
console.warn('Font not available for text: "' + text + '"');
continue;
}
const transform = calculateWordTransform(
word,
line,
pageHeight,
(txt: string, size: number) => {
try {
return font.widthOfTextAtSize(txt, size);
} catch {
return 0;
}
}
);
if (transform.fontSize <= 0) continue;
try {
page.drawText(text, {
x: transform.x,
y: transform.y,
font,
size: transform.fontSize,
color: rgb(0, 0, 0),
opacity: 0,
});
} catch (error) {
console.warn(`Could not draw text "${text}":`, error);
}
if (line.injectWordBreaks && i < words.length - 1) {
const nextWord = words[i + 1];
const spaceTransform = calculateSpaceTransform(
word,
nextWord,
line,
pageHeight,
(size: number) => {
try {
return font.widthOfTextAtSize(' ', size);
} catch {
return 0;
}
}
);
if (spaceTransform && spaceTransform.horizontalScale > 0.1) {
try {
page.drawText(' ', {
x: spaceTransform.x,
y: spaceTransform.y,
font,
size: spaceTransform.fontSize,
color: rgb(0, 0, 0),
opacity: 0,
});
} catch {
console.warn(`Could not draw space between words`);
}
}
}
}
});
}
export async function performOcr(
pdfBytes: Uint8Array | ArrayBuffer,
options: OcrOptions
): Promise<OcrResult> {
const { language, resolution, binarize, whitelist, onProgress } = options;
const progress = onProgress || (() => {});
const worker = await Tesseract.createWorker(language, 1, {
logger: function (m: { status: string; progress: number }) {
progress(m.status, m.progress || 0);
},
});
await worker.setParameters({
tessjs_create_hocr: '1',
tessedit_pageseg_mode: Tesseract.PSM.AUTO,
});
if (whitelist) {
await worker.setParameters({
tessedit_char_whitelist: whitelist,
});
}
const pdf = await getPDFDocument({ data: pdfBytes }).promise;
const newPdfDoc = await PDFDocument.create();
newPdfDoc.registerFontkit(fontkit);
progress('Loading fonts...', 0);
const selectedLangs = language.split('+');
const cjkLangs = ['jpn', 'chi_sim', 'chi_tra', 'kor'];
const indicLangs = [
'hin',
'ben',
'guj',
'kan',
'mal',
'ori',
'pan',
'tam',
'tel',
'sin',
];
const priorityLangs = [...cjkLangs, ...indicLangs, 'ara', 'rus', 'ukr'];
const primaryLang =
selectedLangs.find((l) => priorityLangs.includes(l)) ||
selectedLangs[0] ||
'eng';
const hasCJK = selectedLangs.some((l) => cjkLangs.includes(l));
const hasIndic = selectedLangs.some((l) => indicLangs.includes(l));
const hasLatin =
selectedLangs.some((l) => !priorityLangs.includes(l)) ||
selectedLangs.includes('eng');
const isIndicPlusLatin = hasIndic && hasLatin && !hasCJK;
let primaryFont: PDFFont;
let latinFont: PDFFont;
try {
if (isIndicPlusLatin) {
const [scriptFontBytes, latinFontBytes] = await Promise.all([
getFontForLanguage(primaryLang),
getFontForLanguage('eng'),
]);
primaryFont = await newPdfDoc.embedFont(scriptFontBytes, {
subset: false,
});
latinFont = await newPdfDoc.embedFont(latinFontBytes, {
subset: false,
});
} else {
const fontBytes = await getFontForLanguage(primaryLang);
primaryFont = await newPdfDoc.embedFont(fontBytes, { subset: false });
latinFont = primaryFont;
}
} catch (e) {
console.error('Font loading failed, falling back to Helvetica', e);
primaryFont = await newPdfDoc.embedFont(StandardFonts.Helvetica);
latinFont = primaryFont;
}
let fullText = '';
try {
for (let i = 1; i <= pdf.numPages; i++) {
progress(
`Processing page ${i} of ${pdf.numPages}`,
(i - 1) / pdf.numPages
);
const page = await pdf.getPage(i);
const viewport = page.getViewport({ scale: resolution });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
const context = canvas.getContext('2d');
if (!context) throw new Error('Failed to create canvas context');
await page.render({ canvasContext: context, viewport, canvas }).promise;
if (binarize) {
binarizeCanvas(context);
}
const result = await worker.recognize(
canvas,
{},
{ text: true, hocr: true }
);
const data = result.data;
const newPage = newPdfDoc.addPage([viewport.width, viewport.height]);
const pngImageBytes = await new Promise<Uint8Array>(function (
resolve,
reject
) {
canvas.toBlob(function (blob) {
if (!blob) {
reject(new Error('Failed to create image blob'));
return;
}
const reader = new FileReader();
reader.onload = function () {
resolve(new Uint8Array(reader.result as ArrayBuffer));
};
reader.onerror = function () {
reject(new Error('Failed to read image data'));
};
reader.readAsArrayBuffer(blob);
}, 'image/png');
});
// Release canvas memory
canvas.width = 0;
canvas.height = 0;
const pngImage = await newPdfDoc.embedPng(pngImageBytes);
newPage.drawImage(pngImage, {
x: 0,
y: 0,
width: viewport.width,
height: viewport.height,
});
if (data.hocr) {
const ocrPage = parseHocrDocument(data.hocr);
drawOcrTextLayer(
newPage,
ocrPage,
viewport.height,
primaryFont,
latinFont
);
}
fullText += data.text + '\n\n';
}
} finally {
await worker.terminate();
}
const savedBytes = await newPdfDoc.save();
return {
pdfBytes: new Uint8Array(savedBytes),
pdfDoc: newPdfDoc,
fullText,
};
}

View File

@@ -0,0 +1,215 @@
import * as pdfjsLib from 'pdfjs-dist';
import type { PDFDocumentProxy } from 'pdfjs-dist';
import { PreviewState } from '@/types';
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
'pdfjs-dist/build/pdf.worker.min.mjs',
import.meta.url
).toString();
const state: PreviewState = {
modal: null,
pdfjsDoc: null,
currentPage: 1,
totalPages: 0,
isOpen: false,
container: null,
};
function getOrCreateModal(): HTMLElement {
if (state.modal) return state.modal;
const modal = document.createElement('div');
modal.id = 'page-preview-modal';
modal.className =
'fixed inset-0 bg-black/80 backdrop-blur-sm z-[60] flex items-center justify-center opacity-0 pointer-events-none transition-opacity duration-200';
modal.innerHTML = `
<button id="preview-close" class="absolute top-4 right-4 text-white/70 hover:text-white z-10 transition-colors" title="Close (Esc)">
<svg class="w-8 h-8" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12"/></svg>
</button>
<button id="preview-prev" class="absolute left-4 top-1/2 -translate-y-1/2 text-white/50 hover:text-white transition-colors p-2" title="Previous page">
<svg class="w-10 h-10" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M15 19l-7-7 7-7"/></svg>
</button>
<button id="preview-next" class="absolute right-4 top-1/2 -translate-y-1/2 text-white/50 hover:text-white transition-colors p-2" title="Next page">
<svg class="w-10 h-10" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M9 5l7 7-7 7"/></svg>
</button>
<div id="preview-canvas-container" class="flex items-center justify-center max-w-[90vw] max-h-[85vh]">
<div id="preview-loading" class="text-white/60 text-sm">Loading...</div>
</div>
<div id="preview-page-info" class="absolute bottom-6 left-1/2 -translate-x-1/2 bg-gray-900/80 text-white text-sm px-4 py-2 rounded-full backdrop-blur-sm"></div>
`;
modal.addEventListener('click', (e) => {
if (e.target === modal) hidePreview();
});
modal.querySelector('#preview-close')!.addEventListener('click', hidePreview);
modal
.querySelector('#preview-prev')!
.addEventListener('click', () => navigatePage(-1));
modal
.querySelector('#preview-next')!
.addEventListener('click', () => navigatePage(1));
document.body.appendChild(modal);
state.modal = modal;
return modal;
}
async function renderPreviewPage(pageNumber: number): Promise<void> {
if (!state.pdfjsDoc) return;
const modal = getOrCreateModal();
const container = modal.querySelector(
'#preview-canvas-container'
) as HTMLElement;
const pageInfo = modal.querySelector('#preview-page-info') as HTMLElement;
const prevBtn = modal.querySelector('#preview-prev') as HTMLElement;
const nextBtn = modal.querySelector('#preview-next') as HTMLElement;
container.innerHTML = '<div class="text-white/60 text-sm">Loading...</div>';
pageInfo.textContent = `Page ${pageNumber} of ${state.totalPages}`;
prevBtn.style.visibility = pageNumber > 1 ? 'visible' : 'hidden';
nextBtn.style.visibility =
pageNumber < state.totalPages ? 'visible' : 'hidden';
try {
const page = await state.pdfjsDoc.getPage(pageNumber);
const scale = 2.0;
const viewport = page.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.width = viewport.width;
canvas.height = viewport.height;
canvas.className =
'max-w-[90vw] max-h-[85vh] object-contain rounded-lg shadow-2xl';
canvas.style.width = 'auto';
canvas.style.height = 'auto';
canvas.style.maxWidth = '90vw';
canvas.style.maxHeight = '85vh';
const ctx = canvas.getContext('2d')!;
await page.render({ canvasContext: ctx, viewport, canvas }).promise;
container.innerHTML = '';
container.appendChild(canvas);
state.currentPage = pageNumber;
} catch (err) {
console.error('Preview render error:', err);
container.innerHTML =
'<div class="text-red-400 text-sm">Failed to render page</div>';
}
}
function navigatePage(delta: number): void {
const newPage = state.currentPage + delta;
if (newPage >= 1 && newPage <= state.totalPages) {
renderPreviewPage(newPage);
}
}
export function showPreview(
pdfjsDoc: PDFDocumentProxy,
pageNumber: number,
totalPages: number
): void {
state.pdfjsDoc = pdfjsDoc;
state.totalPages = totalPages;
state.isOpen = true;
const modal = getOrCreateModal();
modal.classList.remove('opacity-0', 'pointer-events-none');
document.body.style.overflow = 'hidden';
renderPreviewPage(pageNumber);
}
export function hidePreview(): void {
if (!state.modal) return;
state.isOpen = false;
state.modal.classList.add('opacity-0', 'pointer-events-none');
document.body.style.overflow = '';
}
function handleKeydown(e: KeyboardEvent): void {
if (!state.isOpen) return;
switch (e.key) {
case 'Escape':
hidePreview();
break;
case 'ArrowLeft':
e.preventDefault();
navigatePage(-1);
break;
case 'ArrowRight':
e.preventDefault();
navigatePage(1);
break;
}
}
document.addEventListener('keydown', handleKeydown);
export function initPagePreview(
container: HTMLElement,
pdfjsDoc: PDFDocumentProxy,
options: { pageAttr?: string } = {}
): void {
const totalPages = pdfjsDoc.numPages;
const thumbnails = container.querySelectorAll<HTMLElement>(
'[data-page-number], [data-page-index], [data-pageIndex]'
);
thumbnails.forEach((thumb) => {
if (thumb.dataset.previewInit) return;
thumb.dataset.previewInit = 'true';
let pageNum = 1;
if (thumb.dataset.pageNumber) {
pageNum = parseInt(thumb.dataset.pageNumber, 10);
} else if (thumb.dataset.pageIndex !== undefined) {
pageNum = parseInt(thumb.dataset.pageIndex, 10) + 1;
}
const icon = document.createElement('button');
icon.className =
'page-preview-btn absolute bottom-1 right-1 bg-gray-900/80 hover:bg-indigo-600 text-white/70 hover:text-white rounded-full w-7 h-7 flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity z-10';
icon.title = 'Preview';
icon.innerHTML =
'<svg class="w-4 h-4" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z"/></svg>';
icon.addEventListener('click', (e) => {
e.stopPropagation();
e.preventDefault();
showPreview(pdfjsDoc, pageNum, totalPages);
});
if (!thumb.classList.contains('relative')) {
thumb.classList.add('relative');
}
if (!thumb.classList.contains('group')) {
thumb.classList.add('group');
}
thumb.appendChild(icon);
});
container.addEventListener('keydown', (e) => {
if (e.key === ' ' && !state.isOpen) {
const hovered = container.querySelector<HTMLElement>(
'[data-preview-init]:hover'
);
if (hovered) {
e.preventDefault();
let pageNum = 1;
if (hovered.dataset.pageNumber) {
pageNum = parseInt(hovered.dataset.pageNumber, 10);
} else if (hovered.dataset.pageIndex !== undefined) {
pageNum = parseInt(hovered.dataset.pageIndex, 10) + 1;
}
showPreview(pdfjsDoc, pageNum, totalPages);
}
}
});
}

View File

@@ -0,0 +1,524 @@
import { PDFDocument, degrees, rgb, StandardFonts, PageSizes } from 'pdf-lib';
export async function mergePdfs(
pdfBytesList: Uint8Array[]
): Promise<Uint8Array> {
const mergedDoc = await PDFDocument.create();
for (const bytes of pdfBytesList) {
const srcDoc = await PDFDocument.load(bytes);
const copiedPages = await mergedDoc.copyPages(
srcDoc,
srcDoc.getPageIndices()
);
copiedPages.forEach((page) => mergedDoc.addPage(page));
}
return new Uint8Array(await mergedDoc.save({ addDefaultPage: false }));
}
export async function splitPdf(
pdfBytes: Uint8Array,
pageIndices: number[]
): Promise<Uint8Array> {
const srcDoc = await PDFDocument.load(pdfBytes);
const newPdf = await PDFDocument.create();
const copiedPages = await newPdf.copyPages(srcDoc, pageIndices);
copiedPages.forEach((page) => newPdf.addPage(page));
return new Uint8Array(await newPdf.save());
}
export async function rotatePdfUniform(
pdfBytes: Uint8Array,
angle: number
): Promise<Uint8Array> {
const srcDoc = await PDFDocument.load(pdfBytes);
const newPdfDoc = await PDFDocument.create();
const pageCount = srcDoc.getPageCount();
for (let i = 0; i < pageCount; i++) {
const originalPage = srcDoc.getPage(i);
const currentRotation = originalPage.getRotation().angle;
const totalRotation = currentRotation + angle;
if (totalRotation % 90 === 0) {
const [copiedPage] = await newPdfDoc.copyPages(srcDoc, [i]);
copiedPage.setRotation(degrees(totalRotation));
newPdfDoc.addPage(copiedPage);
} else {
const embeddedPage = await newPdfDoc.embedPage(originalPage);
const { width, height } = embeddedPage.scale(1);
const angleRad = (totalRotation * Math.PI) / 180;
const absCos = Math.abs(Math.cos(angleRad));
const absSin = Math.abs(Math.sin(angleRad));
const newWidth = width * absCos + height * absSin;
const newHeight = width * absSin + height * absCos;
const newPage = newPdfDoc.addPage([newWidth, newHeight]);
const x =
newWidth / 2 -
((width / 2) * Math.cos(angleRad) - (height / 2) * Math.sin(angleRad));
const y =
newHeight / 2 -
((width / 2) * Math.sin(angleRad) + (height / 2) * Math.cos(angleRad));
newPage.drawPage(embeddedPage, {
x,
y,
width,
height,
rotate: degrees(totalRotation),
});
}
}
return new Uint8Array(await newPdfDoc.save());
}
export async function rotatePdfPages(
pdfBytes: Uint8Array,
rotations: number[]
): Promise<Uint8Array> {
const srcDoc = await PDFDocument.load(pdfBytes);
const newPdfDoc = await PDFDocument.create();
const pageCount = srcDoc.getPageCount();
for (let i = 0; i < pageCount; i++) {
const rotation = rotations[i] || 0;
const originalPage = srcDoc.getPage(i);
const currentRotation = originalPage.getRotation().angle;
const totalRotation = currentRotation + rotation;
if (totalRotation % 90 === 0) {
const [copiedPage] = await newPdfDoc.copyPages(srcDoc, [i]);
copiedPage.setRotation(degrees(totalRotation));
newPdfDoc.addPage(copiedPage);
} else {
const embeddedPage = await newPdfDoc.embedPage(originalPage);
const { width, height } = embeddedPage.scale(1);
const angleRad = (totalRotation * Math.PI) / 180;
const absCos = Math.abs(Math.cos(angleRad));
const absSin = Math.abs(Math.sin(angleRad));
const newWidth = width * absCos + height * absSin;
const newHeight = width * absSin + height * absCos;
const newPage = newPdfDoc.addPage([newWidth, newHeight]);
const x =
newWidth / 2 -
((width / 2) * Math.cos(angleRad) - (height / 2) * Math.sin(angleRad));
const y =
newHeight / 2 -
((width / 2) * Math.sin(angleRad) + (height / 2) * Math.cos(angleRad));
newPage.drawPage(embeddedPage, {
x,
y,
width,
height,
rotate: degrees(totalRotation),
});
}
}
return new Uint8Array(await newPdfDoc.save());
}
export async function deletePdfPages(
pdfBytes: Uint8Array,
pagesToDelete: Set<number>
): Promise<Uint8Array> {
const srcDoc = await PDFDocument.load(pdfBytes);
const totalPages = srcDoc.getPageCount();
const pagesToKeep: number[] = [];
for (let i = 0; i < totalPages; i++) {
if (!pagesToDelete.has(i + 1)) {
pagesToKeep.push(i);
}
}
if (pagesToKeep.length === 0) throw new Error('Cannot delete all pages');
const newPdf = await PDFDocument.create();
const copiedPages = await newPdf.copyPages(srcDoc, pagesToKeep);
copiedPages.forEach((page) => newPdf.addPage(page));
return new Uint8Array(await newPdf.save());
}
export function parsePageRange(rangeStr: string, totalPages: number): number[] {
const indices: Set<number> = new Set();
const parts = rangeStr.split(',').map((s) => s.trim());
for (const part of parts) {
if (part.includes('-')) {
const [startStr, endStr] = part.split('-');
const start = Math.max(1, parseInt(startStr, 10) || 1);
const end = Math.min(totalPages, parseInt(endStr, 10) || totalPages);
for (let i = start; i <= end; i++) {
indices.add(i - 1);
}
} else {
const page = parseInt(part, 10);
if (page >= 1 && page <= totalPages) {
indices.add(page - 1);
}
}
}
return Array.from(indices).sort((a, b) => a - b);
}
export function parseDeletePages(str: string, totalPages: number): Set<number> {
const pages = new Set<number>();
const parts = str.split(',').map((s) => s.trim());
for (const part of parts) {
if (part.includes('-')) {
const [startStr, endStr] = part.split('-');
const start = Math.max(1, parseInt(startStr, 10) || 1);
const end = Math.min(totalPages, parseInt(endStr, 10) || totalPages);
for (let i = start; i <= end; i++) pages.add(i);
} else {
const page = parseInt(part, 10);
if (page >= 1 && page <= totalPages) pages.add(page);
}
}
return pages;
}
export interface TextWatermarkOptions {
text: string;
fontSize: number;
color: { r: number; g: number; b: number };
opacity: number;
angle: number;
x?: number;
y?: number;
pageIndices?: number[];
}
export async function addTextWatermark(
pdfBytes: Uint8Array,
options: TextWatermarkOptions
): Promise<Uint8Array> {
const pdfDoc = await PDFDocument.load(pdfBytes);
const canvas = document.createElement('canvas');
const ctx = canvas.getContext('2d');
if (!ctx) throw new Error('Failed to create canvas context');
const dpr = 2;
const colorR = Math.round(options.color.r * 255);
const colorG = Math.round(options.color.g * 255);
const colorB = Math.round(options.color.b * 255);
const fontStr = `bold ${options.fontSize * dpr}px "Noto Sans SC", "Noto Sans JP", "Noto Sans KR", "Noto Sans Arabic", Arial, sans-serif`;
ctx.font = fontStr;
const metrics = ctx.measureText(options.text);
canvas.width = Math.ceil(metrics.width) + 4;
canvas.height = Math.ceil(options.fontSize * dpr * 1.4);
ctx.font = fontStr;
ctx.fillStyle = `rgb(${colorR}, ${colorG}, ${colorB})`;
ctx.textBaseline = 'middle';
ctx.fillText(options.text, 2, canvas.height / 2);
const blob = await new Promise<Blob>((resolve, reject) => {
canvas.toBlob(
(b) => (b ? resolve(b) : reject(new Error('Canvas toBlob failed'))),
'image/png'
);
});
const imageBytes = new Uint8Array(await blob.arrayBuffer());
const image = await pdfDoc.embedPng(imageBytes);
const pages = pdfDoc.getPages();
const posX = options.x ?? 0.5;
const posY = options.y ?? 0.5;
const imgWidth = image.width / dpr;
const imgHeight = image.height / dpr;
const rad = (options.angle * Math.PI) / 180;
const halfW = imgWidth / 2;
const halfH = imgHeight / 2;
const targetIndices = options.pageIndices ?? pages.map((_, i) => i);
for (const idx of targetIndices) {
const page = pages[idx];
if (!page) continue;
const { width, height } = page.getSize();
const cx = posX * width;
const cy = posY * height;
page.drawImage(image, {
x: cx - Math.cos(rad) * halfW + Math.sin(rad) * halfH,
y: cy - Math.sin(rad) * halfW - Math.cos(rad) * halfH,
width: imgWidth,
height: imgHeight,
opacity: options.opacity,
rotate: degrees(options.angle),
});
}
return new Uint8Array(await pdfDoc.save());
}
export interface ImageWatermarkOptions {
imageBytes: Uint8Array;
imageType: 'png' | 'jpg';
opacity: number;
angle: number;
scale: number;
x?: number;
y?: number;
pageIndices?: number[];
}
export async function addImageWatermark(
pdfBytes: Uint8Array,
options: ImageWatermarkOptions
): Promise<Uint8Array> {
const pdfDoc = await PDFDocument.load(pdfBytes);
const image =
options.imageType === 'png'
? await pdfDoc.embedPng(options.imageBytes)
: await pdfDoc.embedJpg(options.imageBytes);
const pages = pdfDoc.getPages();
const posX = options.x ?? 0.5;
const posY = options.y ?? 0.5;
const imgWidth = image.width * options.scale;
const imgHeight = image.height * options.scale;
const rad = (options.angle * Math.PI) / 180;
const halfW = imgWidth / 2;
const halfH = imgHeight / 2;
const targetIndices = options.pageIndices ?? pages.map((_, i) => i);
for (const idx of targetIndices) {
const page = pages[idx];
if (!page) continue;
const { width, height } = page.getSize();
const cx = posX * width;
const cy = posY * height;
page.drawImage(image, {
x: cx - Math.cos(rad) * halfW + Math.sin(rad) * halfH,
y: cy - Math.sin(rad) * halfW - Math.cos(rad) * halfH,
width: imgWidth,
height: imgHeight,
opacity: options.opacity,
rotate: degrees(options.angle),
});
}
return new Uint8Array(await pdfDoc.save());
}
export type PageNumberPosition =
| 'bottom-center'
| 'bottom-left'
| 'bottom-right'
| 'top-center'
| 'top-left'
| 'top-right';
export type PageNumberFormat = 'simple' | 'page_x_of_y';
export interface PageNumberOptions {
position: PageNumberPosition;
fontSize: number;
format: PageNumberFormat;
color: { r: number; g: number; b: number };
}
export async function addPageNumbers(
pdfBytes: Uint8Array,
options: PageNumberOptions
): Promise<Uint8Array> {
const pdfDoc = await PDFDocument.load(pdfBytes);
const helveticaFont = await pdfDoc.embedFont(StandardFonts.Helvetica);
const pages = pdfDoc.getPages();
const totalPages = pages.length;
for (let i = 0; i < totalPages; i++) {
const page = pages[i];
const mediaBox = page.getMediaBox();
const cropBox = page.getCropBox();
const bounds = cropBox || mediaBox;
const width = bounds.width;
const height = bounds.height;
const xOffset = bounds.x || 0;
const yOffset = bounds.y || 0;
const pageNumText =
options.format === 'page_x_of_y'
? `${i + 1} / ${totalPages}`
: `${i + 1}`;
const textWidth = helveticaFont.widthOfTextAtSize(
pageNumText,
options.fontSize
);
const textHeight = options.fontSize;
const minMargin = 8;
const maxMargin = 40;
const marginPercentage = 0.04;
const horizontalMargin = Math.max(
minMargin,
Math.min(maxMargin, width * marginPercentage)
);
const verticalMargin = Math.max(
minMargin,
Math.min(maxMargin, height * marginPercentage)
);
const safeHorizontalMargin = Math.max(horizontalMargin, textWidth / 2 + 3);
const safeVerticalMargin = Math.max(verticalMargin, textHeight + 3);
let x = 0,
y = 0;
switch (options.position) {
case 'bottom-center':
x =
Math.max(
safeHorizontalMargin,
Math.min(
width - safeHorizontalMargin - textWidth,
(width - textWidth) / 2
)
) + xOffset;
y = safeVerticalMargin + yOffset;
break;
case 'bottom-left':
x = safeHorizontalMargin + xOffset;
y = safeVerticalMargin + yOffset;
break;
case 'bottom-right':
x =
Math.max(
safeHorizontalMargin,
width - safeHorizontalMargin - textWidth
) + xOffset;
y = safeVerticalMargin + yOffset;
break;
case 'top-center':
x =
Math.max(
safeHorizontalMargin,
Math.min(
width - safeHorizontalMargin - textWidth,
(width - textWidth) / 2
)
) + xOffset;
y = height - safeVerticalMargin - textHeight + yOffset;
break;
case 'top-left':
x = safeHorizontalMargin + xOffset;
y = height - safeVerticalMargin - textHeight + yOffset;
break;
case 'top-right':
x =
Math.max(
safeHorizontalMargin,
width - safeHorizontalMargin - textWidth
) + xOffset;
y = height - safeVerticalMargin - textHeight + yOffset;
break;
}
x = Math.max(xOffset + 3, Math.min(xOffset + width - textWidth - 3, x));
y = Math.max(yOffset + 3, Math.min(yOffset + height - textHeight - 3, y));
page.drawText(pageNumText, {
x,
y,
font: helveticaFont,
size: options.fontSize,
color: rgb(options.color.r, options.color.g, options.color.b),
});
}
return new Uint8Array(await pdfDoc.save());
}
export interface FixPageSizeOptions {
targetSize: string;
orientation: string;
scalingMode: string;
backgroundColor: { r: number; g: number; b: number };
customWidth?: number;
customHeight?: number;
customUnits?: string;
}
export async function fixPageSize(
pdfBytes: Uint8Array,
options: FixPageSizeOptions
): Promise<Uint8Array> {
let targetWidth: number;
let targetHeight: number;
if (options.targetSize.toLowerCase() === 'custom') {
const w = options.customWidth ?? 210;
const h = options.customHeight ?? 297;
const units = (options.customUnits ?? 'mm').toLowerCase();
if (units === 'in') {
targetWidth = w * 72;
targetHeight = h * 72;
} else {
targetWidth = w * (72 / 25.4);
targetHeight = h * (72 / 25.4);
}
} else {
const selected =
PageSizes[options.targetSize as keyof typeof PageSizes] || PageSizes.A4;
targetWidth = selected[0];
targetHeight = selected[1];
}
const orientation = options.orientation.toLowerCase();
if (orientation === 'landscape' && targetWidth < targetHeight) {
[targetWidth, targetHeight] = [targetHeight, targetWidth];
} else if (orientation === 'portrait' && targetWidth > targetHeight) {
[targetWidth, targetHeight] = [targetHeight, targetWidth];
}
const sourceDoc = await PDFDocument.load(pdfBytes);
const outputDoc = await PDFDocument.create();
for (const sourcePage of sourceDoc.getPages()) {
const { width: sourceWidth, height: sourceHeight } = sourcePage.getSize();
const embeddedPage = await outputDoc.embedPage(sourcePage);
const outputPage = outputDoc.addPage([targetWidth, targetHeight]);
outputPage.drawRectangle({
x: 0,
y: 0,
width: targetWidth,
height: targetHeight,
color: rgb(
options.backgroundColor.r,
options.backgroundColor.g,
options.backgroundColor.b
),
});
const scaleX = targetWidth / sourceWidth;
const scaleY = targetHeight / sourceHeight;
const useFill = options.scalingMode.toLowerCase() === 'fill';
const scale = useFill ? Math.max(scaleX, scaleY) : Math.min(scaleX, scaleY);
const scaledWidth = sourceWidth * scale;
const scaledHeight = sourceHeight * scale;
const x = (targetWidth - scaledWidth) / 2;
const y = (targetHeight - scaledHeight) / 2;
outputPage.drawPage(embeddedPage, {
x,
y,
width: scaledWidth,
height: scaledHeight,
});
}
return new Uint8Array(await outputDoc.save());
}

View File

@@ -0,0 +1,87 @@
import { WasmProvider } from './wasm-provider.js';
let cachedPyMuPDF: any = null;
let loadPromise: Promise<any> | null = null;
export interface PyMuPDFInterface {
load(): Promise<void>;
compressPdf(
file: Blob,
options: any
): Promise<{ blob: Blob; compressedSize: number }>;
convertToPdf(file: Blob, ext: string): Promise<Blob>;
extractText(file: Blob, options?: any): Promise<string>;
extractImages(file: Blob): Promise<Array<{ data: Uint8Array; ext: string }>>;
extractTables(file: Blob): Promise<any[]>;
toSvg(file: Blob, pageNum: number): Promise<string>;
renderPageToImage(file: Blob, pageNum: number, scale: number): Promise<Blob>;
getPageCount(file: Blob): Promise<number>;
rasterizePdf(file: Blob | File, options: any): Promise<Blob>;
}
export async function loadPyMuPDF(): Promise<any> {
if (cachedPyMuPDF) {
return cachedPyMuPDF;
}
if (loadPromise) {
return loadPromise;
}
loadPromise = (async () => {
if (!WasmProvider.isConfigured('pymupdf')) {
throw new Error(
'PyMuPDF is not configured. Please configure it in Advanced Settings.'
);
}
if (!WasmProvider.isConfigured('ghostscript')) {
throw new Error(
'Ghostscript is not configured. PyMuPDF requires Ghostscript for some operations. Please configure both in Advanced Settings.'
);
}
const pymupdfUrl = WasmProvider.getUrl('pymupdf')!;
const gsUrl = WasmProvider.getUrl('ghostscript')!;
const normalizedPymupdf = pymupdfUrl.endsWith('/')
? pymupdfUrl
: `${pymupdfUrl}/`;
try {
const wrapperUrl = `${normalizedPymupdf}dist/index.js`;
const module = await import(/* @vite-ignore */ wrapperUrl);
if (typeof module.PyMuPDF !== 'function') {
throw new Error(
'PyMuPDF module did not export expected PyMuPDF class.'
);
}
cachedPyMuPDF = new module.PyMuPDF({
assetPath: `${normalizedPymupdf}assets/`,
ghostscriptUrl: gsUrl,
});
await cachedPyMuPDF.load();
console.log('[PyMuPDF Loader] Successfully loaded from CDN');
return cachedPyMuPDF;
} catch (error: any) {
loadPromise = null;
throw new Error(`Failed to load PyMuPDF from CDN: ${error.message}`);
}
})();
return loadPromise;
}
export function isPyMuPDFAvailable(): boolean {
return (
WasmProvider.isConfigured('pymupdf') &&
WasmProvider.isConfigured('ghostscript')
);
}
export function clearPyMuPDFCache(): void {
cachedPyMuPDF = null;
loadPromise = null;
}

View File

@@ -1,381 +1,466 @@
import * as pdfjsLib from 'pdfjs-dist';
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL('pdfjs-dist/build/pdf.worker.min.mjs', import.meta.url).toString();
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
'pdfjs-dist/build/pdf.worker.min.mjs',
import.meta.url
).toString();
/**
* Configuration for progressive rendering
*/
export interface RenderConfig {
batchSize?: number;
useLazyLoading?: boolean;
lazyLoadMargin?: string;
eagerLoadBatches?: number; // Number of batches to load ahead eagerly (default: 2)
onProgress?: (current: number, total: number) => void;
onPageRendered?: (pageIndex: number, element: HTMLElement) => void;
onBatchComplete?: () => void;
shouldCancel?: () => boolean;
batchSize?: number;
useLazyLoading?: boolean;
lazyLoadMargin?: string;
eagerLoadBatches?: number; // Number of batches to load ahead eagerly (default: 2)
onProgress?: (current: number, total: number) => void;
onPageRendered?: (pageIndex: number, element: HTMLElement) => void;
onBatchComplete?: () => void;
shouldCancel?: () => boolean;
}
/**
* Page rendering task
*/
interface PageTask {
pageNumber: number;
pdfjsDoc: any;
fileName?: string;
container: HTMLElement;
scale?: number;
createWrapper: (canvas: HTMLCanvasElement, pageNumber: number, fileName?: string) => HTMLElement;
pageNumber: number;
pdfjsDoc: pdfjsLib.PDFDocumentProxy;
fileName?: string;
container: HTMLElement;
scale?: number;
createWrapper: (
canvas: HTMLCanvasElement,
pageNumber: number,
fileName?: string
) => HTMLElement;
placeholderElement?: HTMLElement;
}
/**
* Lazy loading state
*/
interface LazyLoadState {
observer: IntersectionObserver | null;
pendingTasks: Map<HTMLElement, PageTask>;
isRendering: boolean;
eagerLoadQueue: PageTask[];
nextEagerIndex: number;
observer: IntersectionObserver | null;
pendingTasks: Map<HTMLElement, PageTask>;
pendingTasksByPageNumber: Map<number, PageTask>;
isRendering: boolean;
eagerLoadQueue: PageTask[];
nextEagerIndex: number;
}
const lazyLoadState: LazyLoadState = {
observer: null,
pendingTasks: new Map(),
isRendering: false,
eagerLoadQueue: [],
nextEagerIndex: 0,
observer: null,
pendingTasks: new Map(),
pendingTasksByPageNumber: new Map(),
isRendering: false,
eagerLoadQueue: [],
nextEagerIndex: 0,
};
/**
* Creates a placeholder element for a page that will be lazy-loaded
*/
export function createPlaceholder(pageNumber: number, fileName?: string): HTMLElement {
const placeholder = document.createElement('div');
placeholder.className =
'page-thumbnail relative cursor-move flex flex-col items-center gap-1 p-2 border-2 border-gray-600 rounded-lg bg-gray-800 transition-colors';
placeholder.dataset.pageNumber = pageNumber.toString();
if (fileName) {
placeholder.dataset.fileName = fileName;
}
placeholder.dataset.lazyLoad = 'true';
export function createPlaceholder(
pageNumber: number,
fileName?: string
): HTMLElement {
const placeholder = document.createElement('div');
placeholder.className =
'page-thumbnail relative cursor-move flex flex-col items-center gap-1 p-2 border-2 border-gray-600 rounded-lg bg-gray-800 transition-colors';
placeholder.dataset.pageNumber = pageNumber.toString();
if (fileName) {
placeholder.dataset.fileName = fileName;
}
placeholder.dataset.lazyLoad = 'true';
// Create skeleton loader
const skeletonContainer = document.createElement('div');
skeletonContainer.className = 'relative w-full h-36 bg-gray-700 rounded-md animate-pulse flex items-center justify-center';
// Create skeleton loader
const skeletonContainer = document.createElement('div');
skeletonContainer.className =
'relative w-full h-36 bg-gray-700 rounded-md animate-pulse flex items-center justify-center';
const loadingText = document.createElement('span');
loadingText.className = 'text-gray-500 text-xs';
loadingText.textContent = 'Loading...';
const loadingText = document.createElement('span');
loadingText.className = 'text-gray-500 text-xs';
loadingText.textContent = 'Loading...';
skeletonContainer.appendChild(loadingText);
placeholder.appendChild(skeletonContainer);
skeletonContainer.appendChild(loadingText);
placeholder.appendChild(skeletonContainer);
return placeholder;
return placeholder;
}
/**
* Renders a single page to canvas
*/
export async function renderPageToCanvas(
pdfjsDoc: any,
pageNumber: number,
scale: number = 0.5
pdfjsDoc: pdfjsLib.PDFDocumentProxy,
pageNumber: number,
scale: number = 1
): Promise<HTMLCanvasElement> {
const page = await pdfjsDoc.getPage(pageNumber);
const viewport = page.getViewport({ scale });
const page = await pdfjsDoc.getPage(pageNumber);
const viewport = page.getViewport({ scale });
const canvas = document.createElement('canvas');
canvas.height = viewport.height;
canvas.width = viewport.width;
const canvas = document.createElement('canvas');
canvas.height = viewport.height;
canvas.width = viewport.width;
const context = canvas.getContext('2d')!;
const context = canvas.getContext('2d');
if (!context) {
throw new Error(`Failed to get 2D context for page ${pageNumber}`);
}
await page.render({
canvasContext: context,
canvas: canvas,
viewport,
}).promise;
await page.render({
canvasContext: context,
canvas: canvas,
viewport,
}).promise;
return canvas;
return canvas;
}
/**
* Renders a batch of pages in parallel
* Renders a batch of pages
*/
async function renderPageBatch(
tasks: PageTask[],
onProgress?: (current: number, total: number) => void
): Promise<void> {
const renderPromises = tasks.map(async (task) => {
try {
const canvas = await renderPageToCanvas(
task.pdfjsDoc,
task.pageNumber,
task.scale || 0.5
);
async function renderPageBatch(tasks: PageTask[]): Promise<void> {
for (const task of tasks) {
try {
const canvas = await renderPageToCanvas(
task.pdfjsDoc,
task.pageNumber,
task.scale || 0.5
);
const wrapper = task.createWrapper(canvas, task.pageNumber, task.fileName);
const wrapper = task.createWrapper(
canvas,
task.pageNumber,
task.fileName
);
// Find and replace the placeholder for this specific page number
const placeholder = task.container.querySelector(
`[data-page-number="${task.pageNumber}"][data-lazy-load="true"]`
);
let placeholder: Element | null = task.placeholderElement || null;
if (!placeholder) {
placeholder = task.container.querySelector(
`[data-page-number="${task.pageNumber}"][data-lazy-load="true"]`
);
}
if (placeholder) {
// Replace placeholder with rendered page
task.container.replaceChild(wrapper, placeholder);
} else {
// Fallback: shouldn't happen with new approach, but just in case
console.warn(`No placeholder found for page ${task.pageNumber}, appending instead`);
task.container.appendChild(wrapper);
}
return wrapper;
} catch (error) {
console.error(`Error rendering page ${task.pageNumber}:`, error);
return null;
if (placeholder && placeholder.parentNode) {
const parent = placeholder.parentNode;
parent.insertBefore(wrapper, placeholder);
parent.removeChild(placeholder);
} else {
const existingRendered = task.container.querySelector(
`[data-page-number="${task.pageNumber}"]:not([data-lazy-load="true"])`
);
if (existingRendered) {
continue;
}
});
await Promise.all(renderPromises);
const allChildren = Array.from(
task.container.children
) as HTMLElement[];
let insertBefore: Element | null = null;
for (const child of allChildren) {
const childPageNum = parseInt(child.dataset.pageNumber || '0', 10);
if (childPageNum > task.pageNumber) {
insertBefore = child;
break;
}
}
if (insertBefore) {
task.container.insertBefore(wrapper, insertBefore);
} else {
task.container.appendChild(wrapper);
}
console.warn(
`Placeholder not found for page ${task.pageNumber}, inserted at calculated position`
);
}
} catch (error) {
console.error(`Error rendering page ${task.pageNumber}:`, error);
}
}
}
/**
* Sets up Intersection Observer for lazy loading
*/
function setupLazyRendering(
container: HTMLElement,
config: RenderConfig
container: HTMLElement,
config: RenderConfig
): IntersectionObserver {
const options = {
root: container.closest('.overflow-auto') || null,
rootMargin: config.lazyLoadMargin || '200px',
threshold: 0.01,
};
const options = {
root: container.closest('.overflow-auto') || null,
rootMargin: config.lazyLoadMargin || '200px',
threshold: 0.01,
};
const observer = new IntersectionObserver((entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
const placeholder = entry.target as HTMLElement;
const task = lazyLoadState.pendingTasks.get(placeholder);
const observer = new IntersectionObserver((entries) => {
entries.forEach((entry) => {
if (entry.isIntersecting) {
const placeholder = entry.target as HTMLElement;
const pageNumberStr = placeholder.dataset.pageNumber;
if (!pageNumberStr) return;
if (task) {
// Immediately unobserve to prevent multiple triggers
observer.unobserve(placeholder);
lazyLoadState.pendingTasks.delete(placeholder);
const pageNumber = parseInt(pageNumberStr, 10);
const task = lazyLoadState.pendingTasksByPageNumber.get(pageNumber);
// Render this page immediately (not waiting for isRendering flag)
renderPageBatch([task], config.onProgress)
.then(() => {
// Trigger callback after lazy load batch
if (config.onBatchComplete) {
config.onBatchComplete();
}
if (task) {
// Immediately unobserve to prevent multiple triggers
observer.unobserve(placeholder);
lazyLoadState.pendingTasks.delete(placeholder);
lazyLoadState.pendingTasksByPageNumber.delete(pageNumber);
// Check if all pages are rendered
if (lazyLoadState.pendingTasks.size === 0 && lazyLoadState.observer) {
lazyLoadState.observer.disconnect();
lazyLoadState.observer = null;
}
})
.catch((error) => {
console.error(`Error lazy loading page ${task.pageNumber}:`, error);
});
}
}
});
}, options);
task.placeholderElement = placeholder;
lazyLoadState.observer = observer;
return observer;
// Render this page immediately (not waiting for isRendering flag)
renderPageBatch([task])
.then(() => {
// Trigger callback after lazy load batch
if (config.onBatchComplete) {
config.onBatchComplete();
}
// Check if all pages are rendered
if (
lazyLoadState.pendingTasks.size === 0 &&
lazyLoadState.observer
) {
lazyLoadState.observer.disconnect();
lazyLoadState.observer = null;
}
})
.catch((error) => {
console.error(
`Error lazy loading page ${task.pageNumber}:`,
error
);
});
}
}
});
}, options);
lazyLoadState.observer = observer;
return observer;
}
/**
* Request idle callback with fallback
*/
function requestIdleCallbackPolyfill(callback: () => void): void {
if ('requestIdleCallback' in window) {
requestIdleCallback(callback);
} else {
setTimeout(callback, 16); // ~60fps
}
if ('requestIdleCallback' in window) {
requestIdleCallback(callback);
} else {
setTimeout(callback, 16); // ~60fps
}
}
/**
* Main function to render pages progressively with optional lazy loading
*/
export async function renderPagesProgressively(
pdfjsDoc: any,
container: HTMLElement,
createWrapper: (canvas: HTMLCanvasElement, pageNumber: number, fileName?: string) => HTMLElement,
config: RenderConfig = {}
pdfjsDoc: pdfjsLib.PDFDocumentProxy,
container: HTMLElement,
createWrapper: (
canvas: HTMLCanvasElement,
pageNumber: number,
fileName?: string
) => HTMLElement,
config: RenderConfig = {}
): Promise<void> {
const {
batchSize = 8, // Increased from 5 to 8 for faster initial render
useLazyLoading = true,
eagerLoadBatches = 2, // Eagerly load 1 batch ahead by default
onProgress,
onBatchComplete,
} = config;
const {
batchSize = 8,
useLazyLoading = true,
eagerLoadBatches = 2,
onProgress,
onBatchComplete,
} = config;
const totalPages = pdfjsDoc.numPages;
const totalPages = pdfjsDoc.numPages;
// Render more pages initially to reduce lazy loading issues
const initialRenderCount = useLazyLoading
? Math.min(20, totalPages) // Increased from 12 to 20 pages
: totalPages;
const initialRenderCount = useLazyLoading
? Math.min(20, totalPages)
: totalPages;
// CRITICAL FIX: Create placeholders for ALL pages first to maintain order
const placeholders: HTMLElement[] = [];
for (let i = 1; i <= totalPages; i++) {
const placeholder = createPlaceholder(i);
container.appendChild(placeholder);
placeholders.push(placeholder);
const placeholders: HTMLElement[] = [];
for (let i = 1; i <= totalPages; i++) {
const placeholder = createPlaceholder(i);
container.appendChild(placeholder);
placeholders.push(placeholder);
}
const tasks: PageTask[] = [];
// Create tasks for all pages with direct placeholder references
for (let i = 1; i <= totalPages; i++) {
tasks.push({
pageNumber: i,
pdfjsDoc,
container,
scale: useLazyLoading ? 0.5 : 1,
createWrapper,
placeholderElement: placeholders[i - 1],
});
}
// If lazy loading is enabled, set up observer for pages beyond initial render
if (useLazyLoading && totalPages > initialRenderCount) {
const observer = setupLazyRendering(container, config);
for (let i = initialRenderCount + 1; i <= totalPages; i++) {
const placeholder = placeholders[i - 1];
const task = tasks[i - 1];
// Store the task for lazy rendering
lazyLoadState.pendingTasks.set(placeholder, task);
lazyLoadState.pendingTasksByPageNumber.set(task.pageNumber, task);
observer.observe(placeholder);
}
const tasks: PageTask[] = [];
// Prepare eager load queue
const eagerStartIndex = initialRenderCount;
const eagerEndIndex = Math.min(
eagerStartIndex + eagerLoadBatches * batchSize,
totalPages
);
lazyLoadState.eagerLoadQueue = tasks.slice(eagerStartIndex, eagerEndIndex);
lazyLoadState.nextEagerIndex = 0;
}
// Create tasks for all pages
for (let i = 1; i <= totalPages; i++) {
tasks.push({
pageNumber: i,
pdfjsDoc,
container,
scale: config.useLazyLoading ? 0.3 : 0.5,
createWrapper,
});
}
// Render initial pages in batches
const initialTasks = tasks.slice(0, initialRenderCount);
// If lazy loading is enabled, set up observer for pages beyond initial render
if (useLazyLoading && totalPages > initialRenderCount) {
const observer = setupLazyRendering(container, config);
for (let i = 0; i < initialTasks.length; i += batchSize) {
if (config.shouldCancel?.()) return;
for (let i = initialRenderCount + 1; i <= totalPages; i++) {
const placeholder = placeholders[i - 1];
// Store the task for lazy rendering
lazyLoadState.pendingTasks.set(placeholder, tasks[i - 1]);
observer.observe(placeholder);
}
const batch = initialTasks.slice(i, i + batchSize);
// Prepare eager load queue
const eagerStartIndex = initialRenderCount;
const eagerEndIndex = Math.min(
eagerStartIndex + (eagerLoadBatches * batchSize),
totalPages
);
lazyLoadState.eagerLoadQueue = tasks.slice(eagerStartIndex, eagerEndIndex);
lazyLoadState.nextEagerIndex = 0;
}
await new Promise<void>((resolve, reject) => {
requestIdleCallbackPolyfill(() => {
renderPageBatch(batch)
.then(() => {
if (onProgress) {
onProgress(
Math.min(i + batchSize, initialRenderCount),
totalPages
);
}
// Render initial pages in batches
const initialTasks = tasks.slice(0, initialRenderCount);
if (onBatchComplete) {
onBatchComplete();
}
for (let i = 0; i < initialTasks.length; i += batchSize) {
if (config.shouldCancel?.()) return;
resolve();
})
.catch(reject);
});
});
}
const batch = initialTasks.slice(i, i + batchSize);
await new Promise<void>((resolve) => {
requestIdleCallbackPolyfill(async () => {
await renderPageBatch(batch, onProgress);
if (onProgress) {
onProgress(Math.min(i + batchSize, initialRenderCount), totalPages);
}
if (onBatchComplete) {
onBatchComplete();
}
resolve();
});
});
}
// Start eager loading AFTER initial batch is complete
if (useLazyLoading && eagerLoadBatches > 0 && totalPages > initialRenderCount) {
renderEagerBatch(config);
}
// Start eager loading AFTER initial batch is complete
if (
useLazyLoading &&
eagerLoadBatches > 0 &&
totalPages > initialRenderCount
) {
renderEagerBatch(config);
}
}
/**
* Manually observe a placeholder element (useful for dynamically created placeholders)
*/
export function observePlaceholder(
placeholder: HTMLElement,
task: PageTask
placeholder: HTMLElement,
task: PageTask
): void {
if (!lazyLoadState.observer) {
console.warn('No active observer to register placeholder');
return;
}
lazyLoadState.pendingTasks.set(placeholder, task);
lazyLoadState.observer.observe(placeholder);
if (!lazyLoadState.observer) {
console.warn('No active observer to register placeholder');
return;
}
lazyLoadState.pendingTasks.set(placeholder, task);
lazyLoadState.pendingTasksByPageNumber.set(task.pageNumber, task);
lazyLoadState.observer.observe(placeholder);
}
/**
* Eagerly renders the next batch in the background
*/
function renderEagerBatch(config: RenderConfig): void {
const { eagerLoadBatches = 2, batchSize = 8 } = config;
const { eagerLoadBatches = 2, batchSize = 8 } = config;
if (eagerLoadBatches <= 0 || lazyLoadState.eagerLoadQueue.length === 0) {
return;
}
if (eagerLoadBatches <= 0 || lazyLoadState.eagerLoadQueue.length === 0) {
return;
}
if (config.shouldCancel?.()) return;
const { nextEagerIndex, eagerLoadQueue } = lazyLoadState;
if (nextEagerIndex >= eagerLoadQueue.length) {
return; // All eager batches rendered
}
const batchEnd = Math.min(nextEagerIndex + batchSize, eagerLoadQueue.length);
const batch = eagerLoadQueue.slice(nextEagerIndex, batchEnd);
requestIdleCallbackPolyfill(async () => {
if (config.shouldCancel?.()) return;
const { nextEagerIndex, eagerLoadQueue } = lazyLoadState;
const tasksToRender = batch.filter((task) =>
lazyLoadState.pendingTasksByPageNumber.has(task.pageNumber)
);
if (nextEagerIndex >= eagerLoadQueue.length) {
return; // All eager batches rendered
tasksToRender.forEach((task) => {
const placeholder = task.placeholderElement;
if (placeholder && lazyLoadState.observer) {
lazyLoadState.observer.unobserve(placeholder);
lazyLoadState.pendingTasks.delete(placeholder);
lazyLoadState.pendingTasksByPageNumber.delete(task.pageNumber);
}
});
if (tasksToRender.length === 0) {
lazyLoadState.nextEagerIndex = batchEnd;
const remainingBatches = Math.ceil(
(eagerLoadQueue.length - batchEnd) / batchSize
);
if (remainingBatches > 0 && remainingBatches < eagerLoadBatches) {
renderEagerBatch(config);
}
return;
}
const batchEnd = Math.min(nextEagerIndex + batchSize, eagerLoadQueue.length);
const batch = eagerLoadQueue.slice(nextEagerIndex, batchEnd);
await renderPageBatch(tasksToRender);
requestIdleCallbackPolyfill(async () => {
if (config.shouldCancel?.()) return;
if (config.onBatchComplete) {
config.onBatchComplete();
}
// Remove these tasks from pending since we're rendering them eagerly
batch.forEach(task => {
const placeholder = Array.from(lazyLoadState.pendingTasks.entries())
.find(([_, t]) => t.pageNumber === task.pageNumber)?.[0];
if (placeholder && lazyLoadState.observer) {
lazyLoadState.observer.unobserve(placeholder);
lazyLoadState.pendingTasks.delete(placeholder);
}
});
// Update next eager index
lazyLoadState.nextEagerIndex = batchEnd;
await renderPageBatch(batch, config.onProgress);
if (config.onBatchComplete) {
config.onBatchComplete();
}
// Update next eager index
lazyLoadState.nextEagerIndex = batchEnd;
// Queue next eager batch
const remainingBatches = Math.ceil((eagerLoadQueue.length - batchEnd) / batchSize);
if (remainingBatches > 0 && remainingBatches < eagerLoadBatches) {
// Continue eager loading if we have more batches within the eager threshold
renderEagerBatch(config);
}
});
// Queue next eager batch
const remainingBatches = Math.ceil(
(eagerLoadQueue.length - batchEnd) / batchSize
);
if (remainingBatches > 0 && remainingBatches < eagerLoadBatches) {
renderEagerBatch(config);
}
});
}
/**
* Cleanup function to disconnect observers
*/
export function cleanupLazyRendering(): void {
if (lazyLoadState.observer) {
lazyLoadState.observer.disconnect();
lazyLoadState.observer = null;
}
lazyLoadState.pendingTasks.clear();
lazyLoadState.isRendering = false;
lazyLoadState.eagerLoadQueue = [];
lazyLoadState.nextEagerIndex = 0;
if (lazyLoadState.observer) {
lazyLoadState.observer.disconnect();
lazyLoadState.observer = null;
}
lazyLoadState.pendingTasks.clear();
lazyLoadState.pendingTasksByPageNumber.clear();
lazyLoadState.isRendering = false;
lazyLoadState.eagerLoadQueue = [];
lazyLoadState.nextEagerIndex = 0;
}

590
src/js/utils/sanitize.ts Normal file
View File

@@ -0,0 +1,590 @@
import { PDFDocument, PDFName } from 'pdf-lib';
export interface SanitizeOptions {
flattenForms: boolean;
removeMetadata: boolean;
removeAnnotations: boolean;
removeJavascript: boolean;
removeEmbeddedFiles: boolean;
removeLayers: boolean;
removeLinks: boolean;
removeStructureTree: boolean;
removeMarkInfo: boolean;
removeFonts: boolean;
}
export const defaultSanitizeOptions: SanitizeOptions = {
flattenForms: true,
removeMetadata: true,
removeAnnotations: true,
removeJavascript: true,
removeEmbeddedFiles: true,
removeLayers: true,
removeLinks: true,
removeStructureTree: true,
removeMarkInfo: true,
removeFonts: false,
};
function removeMetadataFromDoc(pdfDoc: PDFDocument) {
const infoDict = (pdfDoc as any).getInfoDict();
const allKeys = infoDict.keys();
allKeys.forEach((key: any) => {
infoDict.delete(key);
});
pdfDoc.setTitle('');
pdfDoc.setAuthor('');
pdfDoc.setSubject('');
pdfDoc.setKeywords([]);
pdfDoc.setCreator('');
pdfDoc.setProducer('');
try {
const catalogDict = (pdfDoc.catalog as any).dict;
if (catalogDict.has(PDFName.of('Metadata'))) {
catalogDict.delete(PDFName.of('Metadata'));
}
} catch (e: any) {
console.warn('Could not remove XMP metadata:', e.message);
}
try {
const context = pdfDoc.context;
if ((context as any).trailerInfo) {
delete (context as any).trailerInfo.ID;
}
} catch (e: any) {
console.warn('Could not remove document IDs:', e.message);
}
try {
const catalogDict = (pdfDoc.catalog as any).dict;
if (catalogDict.has(PDFName.of('PieceInfo'))) {
catalogDict.delete(PDFName.of('PieceInfo'));
}
} catch (e: any) {
console.warn('Could not remove PieceInfo:', e.message);
}
}
function removeAnnotationsFromDoc(pdfDoc: PDFDocument) {
const pages = pdfDoc.getPages();
for (const page of pages) {
try {
page.node.delete(PDFName.of('Annots'));
} catch (e: any) {
console.warn('Could not remove annotations from page:', e.message);
}
}
}
function flattenFormsInDoc(pdfDoc: PDFDocument) {
const form = pdfDoc.getForm();
form.flatten();
}
function removeJavascriptFromDoc(pdfDoc: PDFDocument) {
if ((pdfDoc as any).javaScripts && (pdfDoc as any).javaScripts.length > 0) {
(pdfDoc as any).javaScripts = [];
}
const catalogDict = (pdfDoc.catalog as any).dict;
const namesRef = catalogDict.get(PDFName.of('Names'));
if (namesRef) {
try {
const namesDict = pdfDoc.context.lookup(namesRef) as any;
if (namesDict.has(PDFName.of('JavaScript'))) {
namesDict.delete(PDFName.of('JavaScript'));
}
} catch (e: any) {
console.warn('Could not access Names/JavaScript:', e.message);
}
}
if (catalogDict.has(PDFName.of('OpenAction'))) {
catalogDict.delete(PDFName.of('OpenAction'));
}
if (catalogDict.has(PDFName.of('AA'))) {
catalogDict.delete(PDFName.of('AA'));
}
const pages = pdfDoc.getPages();
for (const page of pages) {
try {
const pageDict = page.node;
if (pageDict.has(PDFName.of('AA'))) {
pageDict.delete(PDFName.of('AA'));
}
const annotRefs = pageDict.Annots()?.asArray() || [];
for (const annotRef of annotRefs) {
try {
const annot = pdfDoc.context.lookup(annotRef) as any;
if (annot.has(PDFName.of('A'))) {
const actionRef = annot.get(PDFName.of('A'));
try {
const actionDict = pdfDoc.context.lookup(actionRef) as any;
const actionType = actionDict
.get(PDFName.of('S'))
?.toString()
.substring(1);
if (actionType === 'JavaScript') {
annot.delete(PDFName.of('A'));
}
} catch (e: any) {
console.warn('Could not read action:', e.message);
}
}
if (annot.has(PDFName.of('AA'))) {
annot.delete(PDFName.of('AA'));
}
} catch (e: any) {
console.warn('Could not process annotation for JS:', e.message);
}
}
} catch (e: any) {
console.warn('Could not remove page actions:', e.message);
}
}
try {
const acroFormRef = catalogDict.get(PDFName.of('AcroForm'));
if (acroFormRef) {
const acroFormDict = pdfDoc.context.lookup(acroFormRef) as any;
const fieldsRef = acroFormDict.get(PDFName.of('Fields'));
if (fieldsRef) {
const fieldsArray = pdfDoc.context.lookup(fieldsRef) as any;
const fields = fieldsArray.asArray();
for (const fieldRef of fields) {
try {
const field = pdfDoc.context.lookup(fieldRef) as any;
if (field.has(PDFName.of('A'))) {
field.delete(PDFName.of('A'));
}
if (field.has(PDFName.of('AA'))) {
field.delete(PDFName.of('AA'));
}
} catch (e: any) {
console.warn('Could not process field for JS:', e.message);
}
}
}
}
} catch (e: any) {
console.warn('Could not process form fields for JS:', e.message);
}
}
function removeEmbeddedFilesFromDoc(pdfDoc: PDFDocument) {
const catalogDict = (pdfDoc.catalog as any).dict;
const namesRef = catalogDict.get(PDFName.of('Names'));
if (namesRef) {
try {
const namesDict = pdfDoc.context.lookup(namesRef) as any;
if (namesDict.has(PDFName.of('EmbeddedFiles'))) {
namesDict.delete(PDFName.of('EmbeddedFiles'));
}
} catch (e: any) {
console.warn('Could not access Names/EmbeddedFiles:', e.message);
}
}
if (catalogDict.has(PDFName.of('EmbeddedFiles'))) {
catalogDict.delete(PDFName.of('EmbeddedFiles'));
}
const pages = pdfDoc.getPages();
for (const page of pages) {
try {
const annotRefs = page.node.Annots()?.asArray() || [];
const annotsToKeep = [];
for (const ref of annotRefs) {
try {
const annot = pdfDoc.context.lookup(ref) as any;
const subtype = annot
.get(PDFName.of('Subtype'))
?.toString()
.substring(1);
if (subtype !== 'FileAttachment') {
annotsToKeep.push(ref);
}
} catch (e) {
annotsToKeep.push(ref);
}
}
if (annotsToKeep.length !== annotRefs.length) {
if (annotsToKeep.length > 0) {
const newAnnotsArray = pdfDoc.context.obj(annotsToKeep);
page.node.set(PDFName.of('Annots'), newAnnotsArray);
} else {
page.node.delete(PDFName.of('Annots'));
}
}
} catch (pageError: any) {
console.warn(
`Could not process page for attachments: ${pageError.message}`
);
}
}
if (
(pdfDoc as any).embeddedFiles &&
(pdfDoc as any).embeddedFiles.length > 0
) {
(pdfDoc as any).embeddedFiles = [];
}
if (catalogDict.has(PDFName.of('Collection'))) {
catalogDict.delete(PDFName.of('Collection'));
}
}
function removeLayersFromDoc(pdfDoc: PDFDocument) {
const catalogDict = (pdfDoc.catalog as any).dict;
if (catalogDict.has(PDFName.of('OCProperties'))) {
catalogDict.delete(PDFName.of('OCProperties'));
}
const pages = pdfDoc.getPages();
for (const page of pages) {
try {
const pageDict = page.node;
if (pageDict.has(PDFName.of('OCProperties'))) {
pageDict.delete(PDFName.of('OCProperties'));
}
const resourcesRef = pageDict.get(PDFName.of('Resources'));
if (resourcesRef) {
try {
const resourcesDict = pdfDoc.context.lookup(resourcesRef) as any;
if (resourcesDict.has(PDFName.of('Properties'))) {
resourcesDict.delete(PDFName.of('Properties'));
}
} catch (e: any) {
console.warn('Could not access Resources:', e.message);
}
}
} catch (e: any) {
console.warn('Could not remove page layers:', e.message);
}
}
}
function removeLinksFromDoc(pdfDoc: PDFDocument) {
const pages = pdfDoc.getPages();
for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
try {
const page = pages[pageIndex];
const pageDict = page.node;
const annotsRef = pageDict.get(PDFName.of('Annots'));
if (!annotsRef) continue;
const annotsArray = pdfDoc.context.lookup(annotsRef) as any;
const annotRefs = annotsArray.asArray();
if (annotRefs.length === 0) continue;
const annotsToKeep = [];
let linksRemoved = 0;
for (const ref of annotRefs) {
try {
const annot = pdfDoc.context.lookup(ref) as any;
const subtype = annot
.get(PDFName.of('Subtype'))
?.toString()
.substring(1);
let isLink = false;
if (subtype === 'Link') {
isLink = true;
linksRemoved++;
} else {
const actionRef = annot.get(PDFName.of('A'));
if (actionRef) {
try {
const actionDict = pdfDoc.context.lookup(actionRef) as any;
const actionType = actionDict
.get(PDFName.of('S'))
?.toString()
.substring(1);
if (
actionType === 'URI' ||
actionType === 'Launch' ||
actionType === 'GoTo' ||
actionType === 'GoToR'
) {
isLink = true;
linksRemoved++;
}
} catch (e: any) {
console.warn('Could not read action:', e.message);
}
}
const dest = annot.get(PDFName.of('Dest'));
if (dest && !isLink) {
isLink = true;
linksRemoved++;
}
}
if (!isLink) {
annotsToKeep.push(ref);
}
} catch (e: any) {
console.warn('Could not process annotation:', e.message);
annotsToKeep.push(ref);
}
}
if (linksRemoved > 0) {
if (annotsToKeep.length > 0) {
const newAnnotsArray = pdfDoc.context.obj(annotsToKeep);
pageDict.set(PDFName.of('Annots'), newAnnotsArray);
} else {
pageDict.delete(PDFName.of('Annots'));
}
}
} catch (pageError: any) {
console.warn(
`Could not process page ${pageIndex + 1} for links: ${pageError.message}`
);
}
}
try {
const catalogDict = (pdfDoc.catalog as any).dict;
const namesRef = catalogDict.get(PDFName.of('Names'));
if (namesRef) {
try {
const namesDict = pdfDoc.context.lookup(namesRef) as any;
if (namesDict.has(PDFName.of('Dests'))) {
namesDict.delete(PDFName.of('Dests'));
}
} catch (e: any) {
console.warn('Could not access Names/Dests:', e.message);
}
}
if (catalogDict.has(PDFName.of('Dests'))) {
catalogDict.delete(PDFName.of('Dests'));
}
} catch (e: any) {
console.warn('Could not remove named destinations:', e.message);
}
}
function removeStructureTreeFromDoc(pdfDoc: PDFDocument) {
const catalogDict = (pdfDoc.catalog as any).dict;
if (catalogDict.has(PDFName.of('StructTreeRoot'))) {
catalogDict.delete(PDFName.of('StructTreeRoot'));
}
const pages = pdfDoc.getPages();
for (const page of pages) {
try {
const pageDict = page.node;
if (pageDict.has(PDFName.of('StructParents'))) {
pageDict.delete(PDFName.of('StructParents'));
}
} catch (e: any) {
console.warn('Could not remove page StructParents:', e.message);
}
}
if (catalogDict.has(PDFName.of('ParentTree'))) {
catalogDict.delete(PDFName.of('ParentTree'));
}
}
function removeMarkInfoFromDoc(pdfDoc: PDFDocument) {
const catalogDict = (pdfDoc.catalog as any).dict;
if (catalogDict.has(PDFName.of('MarkInfo'))) {
catalogDict.delete(PDFName.of('MarkInfo'));
}
if (catalogDict.has(PDFName.of('Marked'))) {
catalogDict.delete(PDFName.of('Marked'));
}
}
function removeFontsFromDoc(pdfDoc: PDFDocument) {
const pages = pdfDoc.getPages();
for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
try {
const page = pages[pageIndex];
const pageDict = page.node;
const resourcesRef = pageDict.get(PDFName.of('Resources'));
if (resourcesRef) {
try {
const resourcesDict = pdfDoc.context.lookup(resourcesRef) as any;
if (resourcesDict.has(PDFName.of('Font'))) {
const fontRef = resourcesDict.get(PDFName.of('Font'));
try {
const fontDict = pdfDoc.context.lookup(fontRef) as any;
const fontKeys = fontDict.keys();
for (const fontKey of fontKeys) {
try {
const specificFontRef = fontDict.get(fontKey);
const specificFont = pdfDoc.context.lookup(
specificFontRef
) as any;
if (specificFont.has(PDFName.of('FontDescriptor'))) {
const descriptorRef = specificFont.get(
PDFName.of('FontDescriptor')
);
const descriptor = pdfDoc.context.lookup(
descriptorRef
) as any;
const fontFileKeys = ['FontFile', 'FontFile2', 'FontFile3'];
for (const key of fontFileKeys) {
if (descriptor.has(PDFName.of(key))) {
descriptor.delete(PDFName.of(key));
}
}
}
} catch (e: any) {
console.warn(`Could not process font ${fontKey}:`, e.message);
}
}
} catch (e: any) {
console.warn('Could not access font dictionary:', e.message);
}
}
} catch (e: any) {
console.warn('Could not access Resources for fonts:', e.message);
}
}
} catch (e: any) {
console.warn(
`Could not remove fonts from page ${pageIndex + 1}:`,
e.message
);
}
}
if ((pdfDoc as any).fonts && (pdfDoc as any).fonts.length > 0) {
(pdfDoc as any).fonts = [];
}
}
export async function sanitizePdf(
pdfBytes: Uint8Array,
options: SanitizeOptions
): Promise<{ pdfDoc: PDFDocument; bytes: Uint8Array }> {
const pdfDoc = await PDFDocument.load(pdfBytes);
if (options.flattenForms) {
try {
flattenFormsInDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not flatten forms: ${e.message}`);
try {
const catalogDict = (pdfDoc.catalog as any).dict;
if (catalogDict.has(PDFName.of('AcroForm'))) {
catalogDict.delete(PDFName.of('AcroForm'));
}
} catch (removeError: any) {
console.warn('Could not remove AcroForm:', removeError.message);
}
}
}
if (options.removeMetadata) {
removeMetadataFromDoc(pdfDoc);
}
if (options.removeAnnotations) {
removeAnnotationsFromDoc(pdfDoc);
}
if (options.removeJavascript) {
try {
removeJavascriptFromDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not remove JavaScript: ${e.message}`);
}
}
if (options.removeEmbeddedFiles) {
try {
removeEmbeddedFilesFromDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not remove embedded files: ${e.message}`);
}
}
if (options.removeLayers) {
try {
removeLayersFromDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not remove layers: ${e.message}`);
}
}
if (options.removeLinks) {
try {
removeLinksFromDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not remove links: ${e.message}`);
}
}
if (options.removeStructureTree) {
try {
removeStructureTreeFromDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not remove structure tree: ${e.message}`);
}
}
if (options.removeMarkInfo) {
try {
removeMarkInfoFromDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not remove MarkInfo: ${e.message}`);
}
}
if (options.removeFonts) {
try {
removeFontsFromDoc(pdfDoc);
} catch (e: any) {
console.warn(`Could not remove fonts: ${e.message}`);
}
}
const savedBytes = await pdfDoc.save();
return { pdfDoc, bytes: new Uint8Array(savedBytes) };
}

View File

@@ -1,45 +1,38 @@
import { APP_VERSION } from '../../version.js';
import { createLanguageSwitcher } from '../i18n/language-switcher.js';
// Handle simple mode footer replacement for tool pages
// Handle simple mode adjustments for tool pages
if (__SIMPLE_MODE__) {
const footer = document.querySelector('footer');
if (footer && !document.querySelector('[data-simple-footer]')) {
footer.style.display = 'none';
const sectionsToHide = [
'How It Works',
'Related PDF Tools',
'Related Tools',
'Frequently Asked Questions',
];
const simpleFooter = document.createElement('footer');
simpleFooter.className = 'mt-16 border-t-2 border-gray-700 py-8';
simpleFooter.setAttribute('data-simple-footer', 'true');
simpleFooter.innerHTML = `
<div class="container mx-auto px-4">
<div class="flex items-center justify-between flex-wrap gap-4">
<div>
<div class="flex items-center mb-2">
<img src="/images/favicon.svg" alt="Bento PDF Logo" class="h-8 w-8 mr-2">
<span class="text-white font-bold text-lg">BentoPDF</span>
</div>
<p class="text-gray-400 text-sm">
&copy; 2025 BentoPDF. All rights reserved.
</p>
<p class="text-gray-500 text-xs mt-2">
Version <span id="app-version-simple">${APP_VERSION}</span>
</p>
</div>
<div id="simple-mode-lang-switcher" class="flex-shrink-0"></div>
</div>
</div>
`;
document.body.appendChild(simpleFooter);
const langContainer = simpleFooter.querySelector('#simple-mode-lang-switcher');
if (langContainer) {
const switcher = createLanguageSwitcher();
const dropdown = switcher.querySelector('div[role="menu"]');
if (dropdown) {
dropdown.classList.remove('mt-2');
dropdown.classList.add('bottom-full', 'mb-2');
document.querySelectorAll('section').forEach((section) => {
const h2 = section.querySelector('h2');
if (h2) {
const heading = h2.textContent?.trim() || '';
if (sectionsToHide.some((text) => heading.includes(text))) {
(section as HTMLElement).style.display = 'none';
}
langContainer.appendChild(switcher);
}
});
const versionElement = document.getElementById('app-version-simple');
if (versionElement) {
versionElement.textContent = APP_VERSION;
}
const langContainer = document.getElementById('simple-mode-lang-switcher');
if (langContainer) {
const switcher = createLanguageSwitcher();
const dropdown = switcher.querySelector('div[role="menu"]');
if (dropdown) {
dropdown.classList.remove('mt-2');
dropdown.classList.add('bottom-full', 'mb-2');
}
langContainer.appendChild(switcher);
}
}

View File

@@ -0,0 +1,135 @@
import { getLibreOfficeConverter } from './libreoffice-loader.js';
import { isWasmAvailable, getWasmBaseUrl } from '../config/wasm-cdn-config.js';
export enum PreloadStatus {
IDLE = 'idle',
LOADING = 'loading',
READY = 'ready',
ERROR = 'error',
UNAVAILABLE = 'unavailable',
}
interface PreloadState {
libreoffice: PreloadStatus;
pymupdf: PreloadStatus;
ghostscript: PreloadStatus;
}
const preloadState: PreloadState = {
libreoffice: PreloadStatus.IDLE,
pymupdf: PreloadStatus.IDLE,
ghostscript: PreloadStatus.IDLE,
};
export function getPreloadStatus(): Readonly<PreloadState> {
return { ...preloadState };
}
async function preloadPyMuPDF(): Promise<void> {
if (preloadState.pymupdf !== PreloadStatus.IDLE) return;
if (!isWasmAvailable('pymupdf')) {
preloadState.pymupdf = PreloadStatus.UNAVAILABLE;
console.log('[Preloader] PyMuPDF not configured, skipping preload');
return;
}
preloadState.pymupdf = PreloadStatus.LOADING;
console.log('[Preloader] Starting PyMuPDF preload...');
try {
const pymupdfBaseUrl = getWasmBaseUrl('pymupdf')!;
const gsBaseUrl = getWasmBaseUrl('ghostscript');
const normalizedUrl = pymupdfBaseUrl.endsWith('/')
? pymupdfBaseUrl
: `${pymupdfBaseUrl}/`;
const wrapperUrl = `${normalizedUrl}dist/index.js`;
const module = await import(/* @vite-ignore */ wrapperUrl);
const pymupdfInstance = new module.PyMuPDF({
assetPath: `${normalizedUrl}assets/`,
ghostscriptUrl: gsBaseUrl || '',
});
await pymupdfInstance.load();
preloadState.pymupdf = PreloadStatus.READY;
console.log('[Preloader] PyMuPDF ready');
} catch (e) {
preloadState.pymupdf = PreloadStatus.ERROR;
console.warn('[Preloader] PyMuPDF preload failed:', e);
}
}
async function preloadGhostscript(): Promise<void> {
if (preloadState.ghostscript !== PreloadStatus.IDLE) return;
if (!isWasmAvailable('ghostscript')) {
preloadState.ghostscript = PreloadStatus.UNAVAILABLE;
console.log('[Preloader] Ghostscript not configured, skipping preload');
return;
}
preloadState.ghostscript = PreloadStatus.LOADING;
console.log('[Preloader] Starting Ghostscript WASM preload...');
try {
const { loadGsModule, setCachedGsModule } =
await import('./ghostscript-loader.js');
const gsModule = await loadGsModule();
setCachedGsModule(gsModule as any);
preloadState.ghostscript = PreloadStatus.READY;
console.log('[Preloader] Ghostscript WASM ready');
} catch (e) {
preloadState.ghostscript = PreloadStatus.ERROR;
console.warn('[Preloader] Ghostscript preload failed:', e);
}
}
function scheduleIdleTask(task: () => Promise<void>): void {
if ('requestIdleCallback' in window) {
requestIdleCallback(() => task(), { timeout: 5000 });
} else {
setTimeout(() => task(), 1000);
}
}
export function startBackgroundPreload(): void {
console.log('[Preloader] Scheduling background WASM preloads...');
const libreOfficePages = [
'word-to-pdf',
'excel-to-pdf',
'ppt-to-pdf',
'powerpoint-to-pdf',
'docx-to-pdf',
'xlsx-to-pdf',
'pptx-to-pdf',
'csv-to-pdf',
'rtf-to-pdf',
'odt-to-pdf',
'ods-to-pdf',
'odp-to-pdf',
];
const currentPath = window.location.pathname;
const isLibreOfficePage = libreOfficePages.some((page) =>
currentPath.includes(page)
);
if (isLibreOfficePage) {
console.log(
'[Preloader] Skipping preloads on LibreOffice page to save memory'
);
return;
}
scheduleIdleTask(async () => {
console.log('[Preloader] Starting sequential WASM preloads...');
await preloadPyMuPDF();
await preloadGhostscript();
console.log('[Preloader] Sequential preloads complete');
});
}

View File

@@ -0,0 +1,377 @@
export type WasmPackage = 'pymupdf' | 'ghostscript' | 'cpdf';
interface WasmProviderConfig {
pymupdf?: string;
ghostscript?: string;
cpdf?: string;
}
const STORAGE_KEY = 'bentopdf:wasm-providers';
const CDN_DEFAULTS: Record<WasmPackage, string> = {
pymupdf: 'https://cdn.jsdelivr.net/npm/@bentopdf/pymupdf-wasm@0.11.16/',
ghostscript: 'https://cdn.jsdelivr.net/npm/@bentopdf/gs-wasm/assets/',
cpdf: 'https://cdn.jsdelivr.net/npm/coherentpdf/dist/',
};
function envOrDefault(envVar: string | undefined, fallback: string): string {
return envVar || fallback;
}
const ENV_DEFAULTS: Record<WasmPackage, string> = {
pymupdf: envOrDefault(
import.meta.env.VITE_WASM_PYMUPDF_URL,
CDN_DEFAULTS.pymupdf
),
ghostscript: envOrDefault(
import.meta.env.VITE_WASM_GS_URL,
CDN_DEFAULTS.ghostscript
),
cpdf: envOrDefault(import.meta.env.VITE_WASM_CPDF_URL, CDN_DEFAULTS.cpdf),
};
class WasmProviderManager {
private config: WasmProviderConfig;
private validationCache: Map<WasmPackage, boolean> = new Map();
constructor() {
this.config = this.loadConfig();
}
private loadConfig(): WasmProviderConfig {
try {
const stored = localStorage.getItem(STORAGE_KEY);
if (stored) {
return JSON.parse(stored);
}
} catch (e) {
console.warn(
'[WasmProvider] Failed to load config from localStorage:',
e
);
}
return {};
}
private getEnvDefault(packageName: WasmPackage): string | undefined {
return ENV_DEFAULTS[packageName];
}
private saveConfig(): void {
try {
localStorage.setItem(STORAGE_KEY, JSON.stringify(this.config));
} catch (e) {
console.error('[WasmProvider] Failed to save config to localStorage:', e);
}
}
getUrl(packageName: WasmPackage): string | undefined {
return this.config[packageName] || this.getEnvDefault(packageName);
}
setUrl(packageName: WasmPackage, url: string): void {
const normalizedUrl = url.endsWith('/') ? url : `${url}/`;
this.config[packageName] = normalizedUrl;
this.validationCache.delete(packageName);
this.saveConfig();
}
removeUrl(packageName: WasmPackage): void {
delete this.config[packageName];
this.validationCache.delete(packageName);
this.saveConfig();
}
isConfigured(packageName: WasmPackage): boolean {
return !!(this.config[packageName] || this.getEnvDefault(packageName));
}
isUserConfigured(packageName: WasmPackage): boolean {
return !!this.config[packageName];
}
hasEnvDefault(packageName: WasmPackage): boolean {
return !!this.getEnvDefault(packageName);
}
hasAnyProvider(): boolean {
return (
Object.keys(this.config).length > 0 ||
Object.values(ENV_DEFAULTS).some(Boolean)
);
}
async validateUrl(
packageName: WasmPackage,
url?: string
): Promise<{ valid: boolean; error?: string }> {
const testUrl = url || this.config[packageName];
if (!testUrl) {
return { valid: false, error: 'No URL configured' };
}
try {
const parsedUrl = new URL(testUrl);
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
return {
valid: false,
error: 'URL must start with http:// or https://',
};
}
} catch {
return {
valid: false,
error:
'Invalid URL format. Please enter a valid URL (e.g., https://example.com/wasm/)',
};
}
const normalizedUrl = testUrl.endsWith('/') ? testUrl : `${testUrl}/`;
try {
const testFiles: Record<WasmPackage, string> = {
pymupdf: 'dist/index.js',
ghostscript: 'gs.js',
cpdf: 'coherentpdf.browser.min.js',
};
const testFile = testFiles[packageName];
const fullUrl = `${normalizedUrl}${testFile}`;
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000); // 10s
const response = await fetch(fullUrl, {
method: 'GET',
mode: 'cors',
signal: controller.signal,
});
clearTimeout(timeoutId);
if (!response.ok) {
return {
valid: false,
error: `Could not find ${testFile} at the specified URL (HTTP ${response.status}). Make sure the file exists.`,
};
}
const reader = response.body?.getReader();
if (reader) {
try {
await reader.read();
reader.cancel();
} catch {
return {
valid: false,
error: `File exists but could not be read. Check CORS configuration.`,
};
}
}
const contentType = response.headers.get('content-type');
if (
contentType &&
!contentType.includes('javascript') &&
!contentType.includes('application/octet-stream') &&
!contentType.includes('text/')
) {
return {
valid: false,
error: `The URL returned unexpected content type: ${contentType}. Expected a JavaScript file.`,
};
}
if (!url || url === this.config[packageName]) {
this.validationCache.set(packageName, true);
}
return { valid: true };
} catch (e: unknown) {
const errorMessage = e instanceof Error ? e.message : 'Unknown error';
if (
errorMessage.includes('Failed to fetch') ||
errorMessage.includes('NetworkError')
) {
return {
valid: false,
error:
'Network error: Could not connect to the URL. Check that the URL is correct and the server allows CORS requests.',
};
}
return {
valid: false,
error: `Network error: ${errorMessage}`,
};
}
}
getAllProviders(): WasmProviderConfig {
return {
pymupdf: this.config.pymupdf || ENV_DEFAULTS.pymupdf,
ghostscript: this.config.ghostscript || ENV_DEFAULTS.ghostscript,
cpdf: this.config.cpdf || ENV_DEFAULTS.cpdf,
};
}
clearAll(): void {
this.config = {};
this.validationCache.clear();
try {
localStorage.removeItem(STORAGE_KEY);
} catch (e) {
console.error('[WasmProvider] Failed to clear localStorage:', e);
}
}
resetToDefaults(): void {
this.clearAll();
}
getPackageDisplayName(packageName: WasmPackage): string {
const names: Record<WasmPackage, string> = {
pymupdf: 'PyMuPDF (Document Processing)',
ghostscript: 'Ghostscript (PDF/A Conversion)',
cpdf: 'CoherentPDF (Bookmarks & Metadata)',
};
return names[packageName];
}
getPackageFeatures(packageName: WasmPackage): string[] {
const features: Record<WasmPackage, string[]> = {
pymupdf: [
'PDF to Text',
'PDF to Markdown',
'PDF to SVG',
'PDF to Images (High Quality)',
'PDF to DOCX',
'PDF to Excel/CSV',
'Extract Images',
'Extract Tables',
'EPUB/MOBI/FB2/XPS/CBZ to PDF',
'Image Compression',
'Deskew PDF',
'PDF Layers',
],
ghostscript: ['PDF/A Conversion', 'Font to Outline'],
cpdf: [
'Merge PDF',
'Alternate Merge',
'Split by Bookmarks',
'Table of Contents',
'PDF to JSON',
'JSON to PDF',
'Add/Edit/Extract Attachments',
'Edit Bookmarks',
'PDF Metadata',
],
};
return features[packageName];
}
}
export const WasmProvider = new WasmProviderManager();
export function showWasmRequiredDialog(
packageName: WasmPackage,
onConfigure?: () => void
): void {
const displayName = WasmProvider.getPackageDisplayName(packageName);
const features = WasmProvider.getPackageFeatures(packageName);
// Create modal
const overlay = document.createElement('div');
overlay.className =
'fixed inset-0 bg-black/60 backdrop-blur-sm z-50 flex items-center justify-center p-4';
overlay.id = 'wasm-required-modal';
const modal = document.createElement('div');
modal.className =
'bg-gray-800 rounded-2xl max-w-md w-full shadow-2xl border border-gray-700';
modal.innerHTML = `
<div class="p-6">
<div class="flex items-center gap-3 mb-4">
<div class="w-12 h-12 rounded-full bg-amber-500/20 flex items-center justify-center">
<svg class="w-6 h-6 text-amber-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"/>
</svg>
</div>
<div>
<h3 class="text-lg font-semibold text-white">Advanced Feature Required</h3>
<p class="text-sm text-gray-400">External processing module needed</p>
</div>
</div>
<p class="text-gray-300 mb-4">
This feature requires <strong class="text-white">${displayName}</strong> to be configured.
</p>
<div class="bg-gray-700/50 rounded-lg p-4 mb-4">
<p class="text-sm text-gray-400 mb-2">Features enabled by this module:</p>
<ul class="text-sm text-gray-300 space-y-1">
${features
.slice(0, 4)
.map(
(f) =>
`<li class="flex items-center gap-2"><span class="text-green-400">✓</span> ${f}</li>`
)
.join('')}
${features.length > 4 ? `<li class="text-gray-500">+ ${features.length - 4} more...</li>` : ''}
</ul>
</div>
<p class="text-xs text-gray-500 mb-4">
This module is licensed under AGPL-3.0. By configuring it, you agree to its license terms.
</p>
</div>
<div class="border-t border-gray-700 p-4 flex gap-3">
<button id="wasm-modal-cancel" class="flex-1 px-4 py-2.5 rounded-lg bg-gray-700 text-gray-300 hover:bg-gray-600 transition-colors font-medium">
Cancel
</button>
<button id="wasm-modal-configure" class="flex-1 px-4 py-2.5 rounded-lg bg-gradient-to-r from-blue-600 to-blue-500 text-white hover:from-blue-500 hover:to-blue-400 transition-all font-medium">
Configure
</button>
</div>
`;
overlay.appendChild(modal);
document.body.appendChild(overlay);
const cancelBtn = modal.querySelector('#wasm-modal-cancel');
const configureBtn = modal.querySelector('#wasm-modal-configure');
const closeModal = () => {
overlay.remove();
};
cancelBtn?.addEventListener('click', closeModal);
overlay.addEventListener('click', (e) => {
if (e.target === overlay) closeModal();
});
configureBtn?.addEventListener('click', () => {
closeModal();
if (onConfigure) {
onConfigure();
} else {
window.location.href = `${import.meta.env.BASE_URL}wasm-settings.html`;
}
});
}
export function requireWasm(
packageName: WasmPackage,
onAvailable?: () => void
): boolean {
if (WasmProvider.isConfigured(packageName)) {
onAvailable?.();
return true;
}
showWasmRequiredDialog(packageName);
return false;
}

196
src/js/utils/xml-to-pdf.ts Normal file
View File

@@ -0,0 +1,196 @@
import { jsPDF } from 'jspdf';
import autoTable from 'jspdf-autotable';
export interface XmlToPdfOptions {
onProgress?: (percent: number, message: string) => void;
}
interface jsPDFWithAutoTable extends jsPDF {
lastAutoTable?: { finalY: number };
}
export async function convertXmlToPdf(
file: File,
options?: XmlToPdfOptions
): Promise<Blob> {
const { onProgress } = options || {};
onProgress?.(10, 'Reading XML file...');
const xmlText = await file.text();
onProgress?.(30, 'Parsing XML structure...');
const parser = new DOMParser();
const xmlDoc = parser.parseFromString(xmlText, 'text/xml');
const parseError = xmlDoc.querySelector('parsererror');
if (parseError) {
throw new Error('Invalid XML: ' + parseError.textContent);
}
onProgress?.(50, 'Analyzing data structure...');
const doc: jsPDFWithAutoTable = new jsPDF({
orientation: 'landscape',
unit: 'mm',
format: 'a4'
});
const pageWidth = doc.internal.pageSize.getWidth();
let yPosition = 20;
const root = xmlDoc.documentElement;
const rootName = formatTitle(root.tagName);
doc.setFontSize(18);
doc.setFont('helvetica', 'bold');
doc.text(rootName, pageWidth / 2, yPosition, { align: 'center' });
yPosition += 15;
onProgress?.(60, 'Generating formatted content...');
const children = Array.from(root.children);
if (children.length > 0) {
const groups = groupByTagName(children);
for (const [groupName, elements] of Object.entries(groups)) {
const { headers, rows } = extractTableData(elements);
if (headers.length > 0 && rows.length > 0) {
if (Object.keys(groups).length > 1) {
doc.setFontSize(14);
doc.setFont('helvetica', 'bold');
doc.text(formatTitle(groupName), 14, yPosition);
yPosition += 8;
}
autoTable(doc, {
head: [headers.map(h => formatTitle(h))],
body: rows,
startY: yPosition,
styles: {
fontSize: 9,
cellPadding: 4,
overflow: 'linebreak',
},
headStyles: {
fillColor: [79, 70, 229],
textColor: 255,
fontStyle: 'bold',
},
alternateRowStyles: {
fillColor: [243, 244, 246],
},
margin: { top: 20, left: 14, right: 14 },
theme: 'striped',
didDrawPage: (data) => {
yPosition = (data.cursor?.y || yPosition) + 10;
}
});
yPosition = (doc.lastAutoTable?.finalY || yPosition) + 15;
}
}
} else {
const kvPairs = extractKeyValuePairs(root);
if (kvPairs.length > 0) {
autoTable(doc, {
head: [['Property', 'Value']],
body: kvPairs,
startY: yPosition,
styles: {
fontSize: 10,
cellPadding: 5,
},
headStyles: {
fillColor: [79, 70, 229],
textColor: 255,
fontStyle: 'bold',
},
columnStyles: {
0: { fontStyle: 'bold', cellWidth: 60 },
1: { cellWidth: 'auto' },
},
margin: { left: 14, right: 14 },
theme: 'striped',
});
}
}
onProgress?.(90, 'Finalizing PDF...');
const pdfBlob = doc.output('blob');
onProgress?.(100, 'Complete!');
return pdfBlob;
}
function groupByTagName(elements: Element[]): Record<string, Element[]> {
const groups: Record<string, Element[]> = {};
for (const element of elements) {
const tagName = element.tagName;
if (!groups[tagName]) {
groups[tagName] = [];
}
groups[tagName].push(element);
}
return groups;
}
function extractTableData(elements: Element[]): { headers: string[], rows: string[][] } {
if (elements.length === 0) {
return { headers: [], rows: [] };
}
const headerSet = new Set<string>();
for (const element of elements) {
for (const child of Array.from(element.children)) {
headerSet.add(child.tagName);
}
}
const headers = Array.from(headerSet);
const rows: string[][] = [];
for (const element of elements) {
const row: string[] = [];
for (const header of headers) {
const child = element.querySelector(header);
row.push(child?.textContent?.trim() || '');
}
rows.push(row);
}
return { headers, rows };
}
function extractKeyValuePairs(element: Element): string[][] {
const pairs: string[][] = [];
for (const child of Array.from(element.children)) {
const key = child.tagName;
const value = child.textContent?.trim() || '';
if (value) {
pairs.push([formatTitle(key), value]);
}
}
for (const attr of Array.from(element.attributes)) {
pairs.push([formatTitle(attr.name), attr.value]);
}
return pairs;
}
function formatTitle(tagName: string): string {
return tagName
.replace(/[_-]/g, ' ')
.replace(/([a-z])([A-Z])/g, '$1 $2')
.split(' ')
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
.join(' ');
}