Reset branch to main
This commit is contained in:
153
src/js/utils/compress.ts
Normal file
153
src/js/utils/compress.ts
Normal file
@@ -0,0 +1,153 @@
|
||||
import { PDFDocument } from 'pdf-lib';
|
||||
import { getPDFDocument } from './helpers.js';
|
||||
import { loadPyMuPDF } from './pymupdf-loader.js';
|
||||
|
||||
export const CONDENSE_PRESETS = {
|
||||
light: {
|
||||
images: { quality: 90, dpiTarget: 150, dpiThreshold: 200 },
|
||||
scrub: { metadata: false, thumbnails: true },
|
||||
subsetFonts: true,
|
||||
},
|
||||
balanced: {
|
||||
images: { quality: 75, dpiTarget: 96, dpiThreshold: 150 },
|
||||
scrub: { metadata: true, thumbnails: true },
|
||||
subsetFonts: true,
|
||||
},
|
||||
aggressive: {
|
||||
images: { quality: 50, dpiTarget: 72, dpiThreshold: 100 },
|
||||
scrub: { metadata: true, thumbnails: true, xmlMetadata: true },
|
||||
subsetFonts: true,
|
||||
},
|
||||
extreme: {
|
||||
images: { quality: 30, dpiTarget: 60, dpiThreshold: 96 },
|
||||
scrub: { metadata: true, thumbnails: true, xmlMetadata: true },
|
||||
subsetFonts: true,
|
||||
},
|
||||
};
|
||||
|
||||
export const PHOTON_PRESETS = {
|
||||
light: { scale: 2.0, quality: 0.85 },
|
||||
balanced: { scale: 1.5, quality: 0.65 },
|
||||
aggressive: { scale: 1.2, quality: 0.45 },
|
||||
extreme: { scale: 1.0, quality: 0.25 },
|
||||
};
|
||||
|
||||
export interface CondenseCustomSettings {
|
||||
imageQuality?: number;
|
||||
dpiTarget?: number;
|
||||
dpiThreshold?: number;
|
||||
removeMetadata?: boolean;
|
||||
subsetFonts?: boolean;
|
||||
convertToGrayscale?: boolean;
|
||||
removeThumbnails?: boolean;
|
||||
}
|
||||
|
||||
export async function performCondenseCompression(
|
||||
fileBlob: Blob,
|
||||
level: string,
|
||||
customSettings?: CondenseCustomSettings
|
||||
) {
|
||||
const pymupdf = await loadPyMuPDF();
|
||||
|
||||
const preset =
|
||||
CONDENSE_PRESETS[level as keyof typeof CONDENSE_PRESETS] ||
|
||||
CONDENSE_PRESETS.balanced;
|
||||
|
||||
const dpiTarget = customSettings?.dpiTarget ?? preset.images.dpiTarget;
|
||||
const userThreshold =
|
||||
customSettings?.dpiThreshold ?? preset.images.dpiThreshold;
|
||||
const dpiThreshold = Math.max(userThreshold, dpiTarget + 10);
|
||||
|
||||
const options = {
|
||||
images: {
|
||||
enabled: true,
|
||||
quality: customSettings?.imageQuality ?? preset.images.quality,
|
||||
dpiTarget,
|
||||
dpiThreshold,
|
||||
convertToGray: customSettings?.convertToGrayscale ?? false,
|
||||
},
|
||||
scrub: {
|
||||
metadata: customSettings?.removeMetadata ?? preset.scrub.metadata,
|
||||
thumbnails: customSettings?.removeThumbnails ?? preset.scrub.thumbnails,
|
||||
xmlMetadata: (preset.scrub as any).xmlMetadata ?? false,
|
||||
},
|
||||
subsetFonts: customSettings?.subsetFonts ?? preset.subsetFonts,
|
||||
save: {
|
||||
garbage: 4 as const,
|
||||
deflate: true,
|
||||
clean: true,
|
||||
useObjstms: true,
|
||||
},
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await pymupdf.compressPdf(fileBlob, options);
|
||||
return result;
|
||||
} catch {
|
||||
const fallbackOptions = {
|
||||
...options,
|
||||
images: {
|
||||
...options.images,
|
||||
enabled: false,
|
||||
},
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await pymupdf.compressPdf(fileBlob, fallbackOptions);
|
||||
return { ...result, usedFallback: true };
|
||||
} catch (fallbackError: any) {
|
||||
const msg = fallbackError?.message || String(fallbackError);
|
||||
throw new Error(`PDF compression failed: ${msg}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function performPhotonCompression(
|
||||
arrayBuffer: ArrayBuffer,
|
||||
level: string
|
||||
): Promise<Uint8Array> {
|
||||
const pdfJsDoc = await getPDFDocument({ data: arrayBuffer }).promise;
|
||||
const newPdfDoc = await PDFDocument.create();
|
||||
const settings =
|
||||
PHOTON_PRESETS[level as keyof typeof PHOTON_PRESETS] ||
|
||||
PHOTON_PRESETS.balanced;
|
||||
|
||||
for (let i = 1; i <= pdfJsDoc.numPages; i++) {
|
||||
const page = await pdfJsDoc.getPage(i);
|
||||
const viewport = page.getViewport({ scale: settings.scale });
|
||||
const canvas = document.createElement('canvas');
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) throw new Error('Failed to create canvas context');
|
||||
canvas.height = viewport.height;
|
||||
canvas.width = viewport.width;
|
||||
|
||||
await page.render({ canvasContext: context, viewport, canvas: canvas })
|
||||
.promise;
|
||||
|
||||
const jpegBlob = await new Promise<Blob>((resolve, reject) =>
|
||||
canvas.toBlob(
|
||||
(blob) => {
|
||||
if (blob) resolve(blob);
|
||||
else reject(new Error('Failed to create JPEG blob'));
|
||||
},
|
||||
'image/jpeg',
|
||||
settings.quality
|
||||
)
|
||||
);
|
||||
|
||||
// Release canvas memory
|
||||
canvas.width = 0;
|
||||
canvas.height = 0;
|
||||
|
||||
const jpegBytes = await jpegBlob.arrayBuffer();
|
||||
const jpegImage = await newPdfDoc.embedJpg(jpegBytes);
|
||||
const newPage = newPdfDoc.addPage([viewport.width, viewport.height]);
|
||||
newPage.drawImage(jpegImage, {
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: viewport.width,
|
||||
height: viewport.height,
|
||||
});
|
||||
}
|
||||
return await newPdfDoc.save();
|
||||
}
|
||||
@@ -1,15 +1,35 @@
|
||||
import { WasmProvider } from './wasm-provider';
|
||||
|
||||
let cpdfLoaded = false;
|
||||
let cpdfLoadPromise: Promise<void> | null = null;
|
||||
|
||||
//TODO: @ALAM,is it better to use a worker to load the cpdf library?
|
||||
// or just use the browser version?
|
||||
export async function ensureCpdfLoaded(): Promise<void> {
|
||||
function getCpdfUrl(): string | undefined {
|
||||
const userUrl = WasmProvider.getUrl('cpdf');
|
||||
if (userUrl) {
|
||||
const baseUrl = userUrl.endsWith('/') ? userUrl : `${userUrl}/`;
|
||||
return `${baseUrl}coherentpdf.browser.min.js`;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function isCpdfAvailable(): boolean {
|
||||
return WasmProvider.isConfigured('cpdf');
|
||||
}
|
||||
|
||||
export async function isCpdfLoaded(): Promise<void> {
|
||||
if (cpdfLoaded) return;
|
||||
|
||||
if (cpdfLoadPromise) {
|
||||
return cpdfLoadPromise;
|
||||
}
|
||||
|
||||
const cpdfUrl = getCpdfUrl();
|
||||
if (!cpdfUrl) {
|
||||
throw new Error(
|
||||
'CoherentPDF is not configured. Please configure it in WASM Settings.'
|
||||
);
|
||||
}
|
||||
|
||||
cpdfLoadPromise = new Promise((resolve, reject) => {
|
||||
if (typeof (window as any).coherentpdf !== 'undefined') {
|
||||
cpdfLoaded = true;
|
||||
@@ -18,13 +38,14 @@ export async function ensureCpdfLoaded(): Promise<void> {
|
||||
}
|
||||
|
||||
const script = document.createElement('script');
|
||||
script.src = import.meta.env.BASE_URL + 'coherentpdf.browser.min.js';
|
||||
script.src = cpdfUrl;
|
||||
script.onload = () => {
|
||||
cpdfLoaded = true;
|
||||
console.log('[CPDF] Loaded from:', script.src);
|
||||
resolve();
|
||||
};
|
||||
script.onerror = () => {
|
||||
reject(new Error('Failed to load CoherentPDF library'));
|
||||
reject(new Error('Failed to load CoherentPDF library from: ' + cpdfUrl));
|
||||
};
|
||||
document.head.appendChild(script);
|
||||
});
|
||||
@@ -32,11 +53,7 @@ export async function ensureCpdfLoaded(): Promise<void> {
|
||||
return cpdfLoadPromise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the cpdf instance, ensuring it's loaded first
|
||||
*/
|
||||
export async function getCpdf(): Promise<any> {
|
||||
await ensureCpdfLoaded();
|
||||
await isCpdfLoaded();
|
||||
return (window as any).coherentpdf;
|
||||
}
|
||||
|
||||
|
||||
90
src/js/utils/csv-to-pdf.ts
Normal file
90
src/js/utils/csv-to-pdf.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
import { jsPDF } from 'jspdf';
|
||||
import autoTable from 'jspdf-autotable';
|
||||
import Papa from 'papaparse';
|
||||
|
||||
export interface CsvToPdfOptions {
|
||||
onProgress?: (percent: number, message: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a CSV file to PDF using jsPDF and autotable
|
||||
*/
|
||||
export async function convertCsvToPdf(
|
||||
file: File,
|
||||
options?: CsvToPdfOptions
|
||||
): Promise<Blob> {
|
||||
const { onProgress } = options || {};
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
onProgress?.(10, 'Reading CSV file...');
|
||||
|
||||
Papa.parse(file, {
|
||||
complete: (results) => {
|
||||
try {
|
||||
onProgress?.(50, 'Generating PDF...');
|
||||
|
||||
const data = results.data as string[][];
|
||||
|
||||
// Filter out empty rows
|
||||
const filteredData = data.filter(row =>
|
||||
row.some(cell => cell && cell.trim() !== '')
|
||||
);
|
||||
|
||||
if (filteredData.length === 0) {
|
||||
reject(new Error('CSV file is empty'));
|
||||
return;
|
||||
}
|
||||
|
||||
// Create PDF document
|
||||
const doc = new jsPDF({
|
||||
orientation: 'landscape', // Better for wide tables
|
||||
unit: 'mm',
|
||||
format: 'a4'
|
||||
});
|
||||
|
||||
// Extract headers (first row) and data
|
||||
const headers = filteredData[0];
|
||||
const rows = filteredData.slice(1);
|
||||
|
||||
onProgress?.(70, 'Creating table...');
|
||||
|
||||
// Generate table
|
||||
autoTable(doc, {
|
||||
head: [headers],
|
||||
body: rows,
|
||||
startY: 20,
|
||||
styles: {
|
||||
fontSize: 9,
|
||||
cellPadding: 3,
|
||||
overflow: 'linebreak',
|
||||
cellWidth: 'wrap',
|
||||
},
|
||||
headStyles: {
|
||||
fillColor: [41, 128, 185], // Nice blue header
|
||||
textColor: 255,
|
||||
fontStyle: 'bold',
|
||||
},
|
||||
alternateRowStyles: {
|
||||
fillColor: [245, 245, 245], // Light gray for alternate rows
|
||||
},
|
||||
margin: { top: 20, left: 10, right: 10 },
|
||||
theme: 'striped',
|
||||
});
|
||||
|
||||
onProgress?.(90, 'Finalizing PDF...');
|
||||
|
||||
// Get PDF as blob
|
||||
const pdfBlob = doc.output('blob');
|
||||
|
||||
onProgress?.(100, 'Complete!');
|
||||
resolve(pdfBlob);
|
||||
} catch (error) {
|
||||
reject(error);
|
||||
}
|
||||
},
|
||||
error: (error) => {
|
||||
reject(new Error(`Failed to parse CSV: ${error.message}`));
|
||||
},
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
// This script applies the full-width preference from localStorage to page uploaders
|
||||
|
||||
export function initFullWidthMode() {
|
||||
const savedFullWidth = localStorage.getItem('fullWidthMode') === 'true';
|
||||
const savedFullWidth = localStorage.getItem('fullWidthMode') !== 'false';
|
||||
|
||||
if (savedFullWidth) {
|
||||
applyFullWidthMode(true);
|
||||
|
||||
89
src/js/utils/ghostscript-dynamic-loader.ts
Normal file
89
src/js/utils/ghostscript-dynamic-loader.ts
Normal file
@@ -0,0 +1,89 @@
|
||||
import { WasmProvider } from './wasm-provider.js';
|
||||
|
||||
let cachedGS: any = null;
|
||||
let loadPromise: Promise<any> | null = null;
|
||||
|
||||
export interface GhostscriptInterface {
|
||||
convertToPDFA(pdfBuffer: ArrayBuffer, profile: string): Promise<ArrayBuffer>;
|
||||
fontToOutline(pdfBuffer: ArrayBuffer): Promise<ArrayBuffer>;
|
||||
}
|
||||
|
||||
export async function loadGhostscript(): Promise<GhostscriptInterface> {
|
||||
if (cachedGS) {
|
||||
return cachedGS;
|
||||
}
|
||||
|
||||
if (loadPromise) {
|
||||
return loadPromise;
|
||||
}
|
||||
|
||||
loadPromise = (async () => {
|
||||
const baseUrl = WasmProvider.getUrl('ghostscript');
|
||||
if (!baseUrl) {
|
||||
throw new Error(
|
||||
'Ghostscript is not configured. Please configure it in Advanced Settings.'
|
||||
);
|
||||
}
|
||||
|
||||
const normalizedUrl = baseUrl.endsWith('/') ? baseUrl : `${baseUrl}/`;
|
||||
|
||||
try {
|
||||
const wrapperUrl = `${normalizedUrl}gs.js`;
|
||||
|
||||
await loadScript(wrapperUrl);
|
||||
|
||||
const globalScope =
|
||||
typeof globalThis !== 'undefined' ? globalThis : window;
|
||||
|
||||
if (typeof (globalScope as any).loadGS === 'function') {
|
||||
cachedGS = await (globalScope as any).loadGS({
|
||||
baseUrl: normalizedUrl,
|
||||
});
|
||||
} else if (typeof (globalScope as any).GhostscriptWASM === 'function') {
|
||||
cachedGS = new (globalScope as any).GhostscriptWASM(normalizedUrl);
|
||||
await cachedGS.init?.();
|
||||
} else {
|
||||
throw new Error(
|
||||
'Ghostscript wrapper did not expose expected interface. Expected loadGS() or GhostscriptWASM class.'
|
||||
);
|
||||
}
|
||||
|
||||
return cachedGS;
|
||||
} catch (error: any) {
|
||||
loadPromise = null;
|
||||
throw new Error(
|
||||
`Failed to load Ghostscript from ${normalizedUrl}: ${error.message}`
|
||||
);
|
||||
}
|
||||
})();
|
||||
|
||||
return loadPromise;
|
||||
}
|
||||
|
||||
function loadScript(url: string): Promise<void> {
|
||||
return new Promise((resolve, reject) => {
|
||||
if (document.querySelector(`script[src="${url}"]`)) {
|
||||
resolve();
|
||||
return;
|
||||
}
|
||||
|
||||
const script = document.createElement('script');
|
||||
script.src = url;
|
||||
script.type = 'text/javascript';
|
||||
script.async = true;
|
||||
|
||||
script.onload = () => resolve();
|
||||
script.onerror = () => reject(new Error(`Failed to load script: ${url}`));
|
||||
|
||||
document.head.appendChild(script);
|
||||
});
|
||||
}
|
||||
|
||||
export function isGhostscriptAvailable(): boolean {
|
||||
return WasmProvider.isConfigured('ghostscript');
|
||||
}
|
||||
|
||||
export function clearGhostscriptCache(): void {
|
||||
cachedGS = null;
|
||||
loadPromise = null;
|
||||
}
|
||||
481
src/js/utils/ghostscript-loader.ts
Normal file
481
src/js/utils/ghostscript-loader.ts
Normal file
@@ -0,0 +1,481 @@
|
||||
/**
|
||||
* PDF/A Conversion using Ghostscript WASM
|
||||
* Converts PDFs to PDF/A-1b, PDF/A-2b, or PDF/A-3b format.
|
||||
* Requires user to configure Ghostscript URL in WASM Settings.
|
||||
*/
|
||||
|
||||
import {
|
||||
getWasmBaseUrl,
|
||||
fetchWasmFile,
|
||||
isWasmAvailable,
|
||||
} from '../config/wasm-cdn-config.js';
|
||||
import { PDFDocument, PDFDict, PDFName, PDFArray } from 'pdf-lib';
|
||||
|
||||
interface GhostscriptModule {
|
||||
FS: {
|
||||
writeFile(path: string, data: Uint8Array | string): void;
|
||||
readFile(path: string, opts?: { encoding?: string }): Uint8Array;
|
||||
unlink(path: string): void;
|
||||
stat(path: string): { size: number };
|
||||
};
|
||||
callMain(args: string[]): number;
|
||||
}
|
||||
|
||||
export type PdfALevel = 'PDF/A-1b' | 'PDF/A-2b' | 'PDF/A-3b';
|
||||
|
||||
let cachedGsModule: GhostscriptModule | null = null;
|
||||
|
||||
export function setCachedGsModule(module: GhostscriptModule): void {
|
||||
cachedGsModule = module;
|
||||
}
|
||||
|
||||
export function getCachedGsModule(): GhostscriptModule | null {
|
||||
return cachedGsModule;
|
||||
}
|
||||
|
||||
export async function loadGsModule(): Promise<GhostscriptModule> {
|
||||
const gsBaseUrl = getWasmBaseUrl('ghostscript')!;
|
||||
const normalizedUrl = gsBaseUrl.endsWith('/') ? gsBaseUrl : `${gsBaseUrl}/`;
|
||||
|
||||
const gsJsUrl = `${normalizedUrl}gs.js`;
|
||||
const response = await fetch(gsJsUrl);
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch gs.js: HTTP ${response.status}`);
|
||||
}
|
||||
const jsText = await response.text();
|
||||
const blob = new Blob([jsText], { type: 'application/javascript' });
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
|
||||
try {
|
||||
const gsModule = await import(/* @vite-ignore */ blobUrl);
|
||||
const ModuleFactory = gsModule.default;
|
||||
|
||||
return (await ModuleFactory({
|
||||
locateFile: (path: string) => {
|
||||
if (path.endsWith('.wasm')) {
|
||||
return `${normalizedUrl}gs.wasm`;
|
||||
}
|
||||
return `${normalizedUrl}${path}`;
|
||||
},
|
||||
print: (text: string) => console.log('[GS]', text),
|
||||
printErr: (text: string) => console.error('[GS Error]', text),
|
||||
})) as GhostscriptModule;
|
||||
} finally {
|
||||
URL.revokeObjectURL(blobUrl);
|
||||
}
|
||||
}
|
||||
|
||||
export async function convertToPdfA(
|
||||
pdfData: Uint8Array,
|
||||
level: PdfALevel = 'PDF/A-2b',
|
||||
onProgress?: (msg: string) => void
|
||||
): Promise<Uint8Array> {
|
||||
if (!isWasmAvailable('ghostscript')) {
|
||||
throw new Error(
|
||||
'Ghostscript is not configured. Please configure it in WASM Settings.'
|
||||
);
|
||||
}
|
||||
|
||||
onProgress?.('Loading Ghostscript...');
|
||||
|
||||
let gs: GhostscriptModule;
|
||||
|
||||
if (cachedGsModule) {
|
||||
gs = cachedGsModule;
|
||||
} else {
|
||||
gs = await loadGsModule();
|
||||
cachedGsModule = gs;
|
||||
}
|
||||
|
||||
const pdfaMap: Record<PdfALevel, string> = {
|
||||
'PDF/A-1b': '1',
|
||||
'PDF/A-2b': '2',
|
||||
'PDF/A-3b': '3',
|
||||
};
|
||||
|
||||
const inputPath = '/tmp/input.pdf';
|
||||
const outputPath = '/tmp/output.pdf';
|
||||
const iccPath = '/tmp/pdfa.icc';
|
||||
const pdfaDefPath = '/tmp/pdfa.ps';
|
||||
|
||||
gs.FS.writeFile(inputPath, pdfData);
|
||||
console.log('[Ghostscript] Input file size:', pdfData.length);
|
||||
|
||||
onProgress?.(`Converting to ${level}...`);
|
||||
|
||||
try {
|
||||
const iccFileName = 'sRGB_IEC61966-2-1_no_black_scaling.icc';
|
||||
const iccUrl = `${import.meta.env.BASE_URL}${iccFileName}`;
|
||||
const response = await fetch(iccUrl);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(
|
||||
`Failed to fetch ICC profile from ${iccUrl}: HTTP ${response.status}`
|
||||
);
|
||||
}
|
||||
|
||||
const iccData = new Uint8Array(await response.arrayBuffer());
|
||||
console.log(
|
||||
'[Ghostscript] sRGB v2 ICC profile loaded:',
|
||||
iccData.length,
|
||||
'bytes'
|
||||
);
|
||||
|
||||
gs.FS.writeFile(iccPath, iccData);
|
||||
console.log('[Ghostscript] sRGB ICC profile written to FS:', iccPath);
|
||||
|
||||
const iccHex = Array.from(iccData)
|
||||
.map((b) => b.toString(16).padStart(2, '0'))
|
||||
.join('');
|
||||
console.log('[Ghostscript] ICC profile hex length:', iccHex.length);
|
||||
|
||||
const pdfaSubtype = level === 'PDF/A-1b' ? '/GTS_PDFA1' : '/GTS_PDFA';
|
||||
|
||||
const pdfaPS = `%!
|
||||
% PDF/A definition file for ${level}
|
||||
|
||||
% Define the ICC profile stream object with embedded hex data
|
||||
[/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
|
||||
[{icc_PDFA} << /N 3 >> /PUT pdfmark
|
||||
[{icc_PDFA} <${iccHex}> /PUT pdfmark
|
||||
|
||||
% Define the OutputIntent dictionary
|
||||
[/_objdef {OutputIntent_PDFA} /type /dict /OBJ pdfmark
|
||||
[{OutputIntent_PDFA} <<
|
||||
/Type /OutputIntent
|
||||
/S ${pdfaSubtype}
|
||||
/DestOutputProfile {icc_PDFA}
|
||||
/OutputConditionIdentifier (sRGB IEC61966-2.1)
|
||||
/Info (sRGB IEC61966-2.1)
|
||||
/RegistryName (http://www.color.org)
|
||||
>> /PUT pdfmark
|
||||
|
||||
% Attach OutputIntent to the document Catalog
|
||||
[{Catalog} << /OutputIntents [ {OutputIntent_PDFA} ] >> /PUT pdfmark
|
||||
`;
|
||||
|
||||
gs.FS.writeFile(pdfaDefPath, pdfaPS);
|
||||
console.log(
|
||||
'[Ghostscript] PDFA PostScript created with embedded ICC hex data'
|
||||
);
|
||||
} catch (e) {
|
||||
console.error('[Ghostscript] Failed to setup PDF/A assets:', e);
|
||||
throw new Error('Conversion failed: could not create PDF/A definition');
|
||||
}
|
||||
|
||||
const args = [
|
||||
'-dNOSAFER',
|
||||
'-dBATCH',
|
||||
'-dNOPAUSE',
|
||||
'-sDEVICE=pdfwrite',
|
||||
`-dPDFA=${pdfaMap[level]}`,
|
||||
'-dPDFACompatibilityPolicy=1',
|
||||
`-dCompatibilityLevel=${level === 'PDF/A-1b' ? '1.4' : '1.7'}`,
|
||||
'-sColorConversionStrategy=UseDeviceIndependentColor',
|
||||
'-sICCProfilesDir=/tmp/',
|
||||
`-sOutputICCProfile=${iccPath}`,
|
||||
`-sDefaultRGBProfile=${iccPath}`,
|
||||
`-sBlendColorProfile=${iccPath}`,
|
||||
'-dCompressPages=true',
|
||||
'-dWriteObjStms=false',
|
||||
'-dWriteXRefStm=false',
|
||||
'-dEmbedAllFonts=true',
|
||||
'-dSubsetFonts=true',
|
||||
'-dAutoRotatePages=/None',
|
||||
`-sOutputFile=${outputPath}`,
|
||||
pdfaDefPath,
|
||||
inputPath,
|
||||
];
|
||||
|
||||
console.log('[Ghostscript] Running PDF/A conversion...');
|
||||
try {
|
||||
console.log('[Ghostscript] Checking version:');
|
||||
gs.callMain(['--version']);
|
||||
} catch (e) {
|
||||
console.warn('[Ghostscript] Could not check version:', e);
|
||||
}
|
||||
|
||||
let exitCode: number;
|
||||
try {
|
||||
exitCode = gs.callMain(args);
|
||||
} catch (e) {
|
||||
console.error('[Ghostscript] Exception:', e);
|
||||
throw new Error(`Ghostscript threw an exception: ${e}`);
|
||||
}
|
||||
|
||||
console.log('[Ghostscript] Exit code:', exitCode);
|
||||
|
||||
if (exitCode !== 0) {
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(iccPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(pdfaDefPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`);
|
||||
}
|
||||
|
||||
// Read output
|
||||
let output: Uint8Array;
|
||||
try {
|
||||
const stat = gs.FS.stat(outputPath);
|
||||
console.log('[Ghostscript] Output file size:', stat.size);
|
||||
output = gs.FS.readFile(outputPath);
|
||||
} catch (e) {
|
||||
console.error('[Ghostscript] Failed to read output:', e);
|
||||
throw new Error('Ghostscript did not produce output file');
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(iccPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(pdfaDefPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
if (level !== 'PDF/A-1b') {
|
||||
onProgress?.('Post-processing for transparency compliance...');
|
||||
console.log(
|
||||
'[Ghostscript] Adding Group dictionaries to pages for transparency compliance...'
|
||||
);
|
||||
|
||||
try {
|
||||
output = await addPageGroupDictionaries(output);
|
||||
console.log('[Ghostscript] Page Group dictionaries added successfully');
|
||||
} catch (e) {
|
||||
console.error('[Ghostscript] Failed to add Group dictionaries:', e);
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
async function addPageGroupDictionaries(
|
||||
pdfData: Uint8Array
|
||||
): Promise<Uint8Array> {
|
||||
const pdfDoc = await PDFDocument.load(pdfData, {
|
||||
ignoreEncryption: true,
|
||||
updateMetadata: false,
|
||||
});
|
||||
|
||||
const catalog = pdfDoc.catalog;
|
||||
const outputIntentsArray = catalog.lookup(PDFName.of('OutputIntents'));
|
||||
|
||||
let iccProfileRef: ReturnType<typeof PDFDict.prototype.get> = undefined;
|
||||
|
||||
if (outputIntentsArray instanceof PDFArray) {
|
||||
const firstIntent = outputIntentsArray.lookup(0);
|
||||
if (firstIntent instanceof PDFDict) {
|
||||
iccProfileRef = firstIntent.get(PDFName.of('DestOutputProfile'));
|
||||
}
|
||||
}
|
||||
|
||||
const updateGroupCS = (groupDict: PDFDict) => {
|
||||
if (!iccProfileRef) return;
|
||||
|
||||
const currentCS = groupDict.get(PDFName.of('CS'));
|
||||
|
||||
if (currentCS instanceof PDFName) {
|
||||
const csName = currentCS.decodeText();
|
||||
if (
|
||||
csName === 'DeviceRGB' ||
|
||||
csName === 'DeviceGray' ||
|
||||
csName === 'DeviceCMYK'
|
||||
) {
|
||||
const iccColorSpace = pdfDoc.context.obj([
|
||||
PDFName.of('ICCBased'),
|
||||
iccProfileRef,
|
||||
]);
|
||||
groupDict.set(PDFName.of('CS'), iccColorSpace);
|
||||
}
|
||||
} else if (!currentCS) {
|
||||
const iccColorSpace = pdfDoc.context.obj([
|
||||
PDFName.of('ICCBased'),
|
||||
iccProfileRef,
|
||||
]);
|
||||
groupDict.set(PDFName.of('CS'), iccColorSpace);
|
||||
}
|
||||
};
|
||||
|
||||
const pages = pdfDoc.getPages();
|
||||
for (const page of pages) {
|
||||
const pageDict = page.node;
|
||||
|
||||
const existingGroup = pageDict.lookup(PDFName.of('Group'));
|
||||
if (existingGroup) {
|
||||
if (existingGroup instanceof PDFDict) {
|
||||
updateGroupCS(existingGroup);
|
||||
}
|
||||
} else if (iccProfileRef) {
|
||||
const colorSpace = pdfDoc.context.obj([
|
||||
PDFName.of('ICCBased'),
|
||||
iccProfileRef,
|
||||
]);
|
||||
const groupDict = pdfDoc.context.obj({
|
||||
Type: 'Group',
|
||||
S: 'Transparency',
|
||||
I: false,
|
||||
K: false,
|
||||
});
|
||||
(groupDict as PDFDict).set(PDFName.of('CS'), colorSpace);
|
||||
pageDict.set(PDFName.of('Group'), groupDict);
|
||||
}
|
||||
}
|
||||
|
||||
if (iccProfileRef) {
|
||||
pdfDoc.context.enumerateIndirectObjects().forEach(([ref, obj]) => {
|
||||
if (
|
||||
obj instanceof PDFDict ||
|
||||
(obj && typeof obj === 'object' && 'dict' in obj)
|
||||
) {
|
||||
const dict =
|
||||
'dict' in obj ? (obj as { dict: PDFDict }).dict : (obj as PDFDict);
|
||||
|
||||
const subtype = dict.get(PDFName.of('Subtype'));
|
||||
if (subtype instanceof PDFName && subtype.decodeText() === 'Form') {
|
||||
const group = dict.lookup(PDFName.of('Group'));
|
||||
if (group instanceof PDFDict) {
|
||||
updateGroupCS(group);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return await pdfDoc.save({
|
||||
useObjectStreams: false,
|
||||
addDefaultPage: false,
|
||||
updateFieldAppearances: false,
|
||||
});
|
||||
}
|
||||
|
||||
export async function convertFileToPdfA(
|
||||
file: File,
|
||||
level: PdfALevel = 'PDF/A-2b',
|
||||
onProgress?: (msg: string) => void
|
||||
): Promise<Blob> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdfData = new Uint8Array(arrayBuffer);
|
||||
const result = await convertToPdfA(pdfData, level, onProgress);
|
||||
const copy = new Uint8Array(result.length);
|
||||
copy.set(result);
|
||||
return new Blob([copy], { type: 'application/pdf' });
|
||||
}
|
||||
|
||||
export async function convertFontsToOutlines(
|
||||
pdfData: Uint8Array,
|
||||
onProgress?: (msg: string) => void
|
||||
): Promise<Uint8Array> {
|
||||
if (!isWasmAvailable('ghostscript')) {
|
||||
throw new Error(
|
||||
'Ghostscript is not configured. Please configure it in WASM Settings.'
|
||||
);
|
||||
}
|
||||
|
||||
onProgress?.('Loading Ghostscript...');
|
||||
|
||||
let gs: GhostscriptModule;
|
||||
|
||||
if (cachedGsModule) {
|
||||
gs = cachedGsModule;
|
||||
} else {
|
||||
gs = await loadGsModule();
|
||||
cachedGsModule = gs;
|
||||
}
|
||||
|
||||
const inputPath = '/tmp/input.pdf';
|
||||
const outputPath = '/tmp/output.pdf';
|
||||
|
||||
gs.FS.writeFile(inputPath, pdfData);
|
||||
|
||||
onProgress?.('Converting fonts to outlines...');
|
||||
|
||||
const args = [
|
||||
'-dNOSAFER',
|
||||
'-dBATCH',
|
||||
'-dNOPAUSE',
|
||||
'-sDEVICE=pdfwrite',
|
||||
'-dNoOutputFonts',
|
||||
'-dCompressPages=true',
|
||||
'-dAutoRotatePages=/None',
|
||||
`-sOutputFile=${outputPath}`,
|
||||
inputPath,
|
||||
];
|
||||
|
||||
let exitCode: number;
|
||||
try {
|
||||
exitCode = gs.callMain(args);
|
||||
} catch (e) {
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {}
|
||||
throw new Error(`Ghostscript threw an exception: ${e}`);
|
||||
}
|
||||
|
||||
if (exitCode !== 0) {
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {}
|
||||
throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`);
|
||||
}
|
||||
|
||||
let output: Uint8Array;
|
||||
try {
|
||||
output = gs.FS.readFile(outputPath);
|
||||
} catch (e) {
|
||||
throw new Error('Ghostscript did not produce output file');
|
||||
}
|
||||
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
export async function convertFileToOutlines(
|
||||
file: File,
|
||||
onProgress?: (msg: string) => void
|
||||
): Promise<Blob> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdfData = new Uint8Array(arrayBuffer);
|
||||
const result = await convertFontsToOutlines(pdfData, onProgress);
|
||||
const copy = new Uint8Array(result.length);
|
||||
copy.set(result);
|
||||
return new Blob([copy], { type: 'application/pdf' });
|
||||
}
|
||||
@@ -2,8 +2,7 @@ import createModule from '@neslinesli93/qpdf-wasm';
|
||||
import { showLoader, hideLoader, showAlert } from '../ui.js';
|
||||
import { createIcons } from 'lucide';
|
||||
import { state, resetState } from '../state.js';
|
||||
import * as pdfjsLib from 'pdfjs-dist'
|
||||
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
const STANDARD_SIZES = {
|
||||
A4: { width: 595.28, height: 841.89 },
|
||||
@@ -50,14 +49,14 @@ export function convertPoints(points: any, unit: any) {
|
||||
|
||||
// Convert hex color to RGB
|
||||
export function hexToRgb(hex: string): { r: number; g: number; b: number } {
|
||||
const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex)
|
||||
const result = /^#?([a-f\d]{2})([a-f\d]{2})([a-f\d]{2})$/i.exec(hex);
|
||||
return result
|
||||
? {
|
||||
r: parseInt(result[1], 16) / 255,
|
||||
g: parseInt(result[2], 16) / 255,
|
||||
b: parseInt(result[3], 16) / 255,
|
||||
}
|
||||
: { r: 0, g: 0, b: 0 }
|
||||
r: parseInt(result[1], 16) / 255,
|
||||
g: parseInt(result[2], 16) / 255,
|
||||
b: parseInt(result[3], 16) / 255,
|
||||
}
|
||||
: { r: 0, g: 0, b: 0 };
|
||||
}
|
||||
|
||||
export const formatBytes = (bytes: any, decimals = 1) => {
|
||||
@@ -89,7 +88,10 @@ export const readFileAsArrayBuffer = (file: any) => {
|
||||
});
|
||||
};
|
||||
|
||||
export function parsePageRanges(rangeString: string, totalPages: number): number[] {
|
||||
export function parsePageRanges(
|
||||
rangeString: string,
|
||||
totalPages: number
|
||||
): number[] {
|
||||
if (!rangeString || rangeString.trim() === '') {
|
||||
return Array.from({ length: totalPages }, (_, i) => i);
|
||||
}
|
||||
@@ -128,11 +130,9 @@ export function parsePageRanges(rangeString: string, totalPages: number): number
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return Array.from(indices).sort((a, b) => a - b);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Formats an ISO 8601 date string (e.g., "2008-02-21T17:15:56-08:00")
|
||||
* into a localized, human-readable string.
|
||||
@@ -198,7 +198,7 @@ export function formatStars(num: number) {
|
||||
return (num / 1000).toFixed(1) + 'K';
|
||||
}
|
||||
return num.toLocaleString();
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncates a filename to a maximum length, adding ellipsis if needed.
|
||||
@@ -207,14 +207,18 @@ export function formatStars(num: number) {
|
||||
* @param maxLength - Maximum length (default: 30)
|
||||
* @returns Truncated filename with ellipsis if needed
|
||||
*/
|
||||
export function truncateFilename(filename: string, maxLength: number = 25): string {
|
||||
export function truncateFilename(
|
||||
filename: string,
|
||||
maxLength: number = 25
|
||||
): string {
|
||||
if (filename.length <= maxLength) {
|
||||
return filename;
|
||||
}
|
||||
|
||||
const lastDotIndex = filename.lastIndexOf('.');
|
||||
const extension = lastDotIndex !== -1 ? filename.substring(lastDotIndex) : '';
|
||||
const nameWithoutExt = lastDotIndex !== -1 ? filename.substring(0, lastDotIndex) : filename;
|
||||
const nameWithoutExt =
|
||||
lastDotIndex !== -1 ? filename.substring(0, lastDotIndex) : filename;
|
||||
|
||||
const availableLength = maxLength - extension.length - 3; // 3 for '...'
|
||||
|
||||
@@ -225,7 +229,10 @@ export function truncateFilename(filename: string, maxLength: number = 25): stri
|
||||
return nameWithoutExt.substring(0, availableLength) + '...' + extension;
|
||||
}
|
||||
|
||||
export function formatShortcutDisplay(shortcut: string, isMac: boolean): string {
|
||||
export function formatShortcutDisplay(
|
||||
shortcut: string,
|
||||
isMac: boolean
|
||||
): string {
|
||||
if (!shortcut) return '';
|
||||
return shortcut
|
||||
.replace('mod', isMac ? '⌘' : 'Ctrl')
|
||||
@@ -233,7 +240,7 @@ export function formatShortcutDisplay(shortcut: string, isMac: boolean): string
|
||||
.replace('alt', isMac ? '⌥' : 'Alt')
|
||||
.replace('shift', 'Shift')
|
||||
.split('+')
|
||||
.map(k => k.charAt(0).toUpperCase() + k.slice(1))
|
||||
.map((k) => k.charAt(0).toUpperCase() + k.slice(1))
|
||||
.join(isMac ? '' : '+');
|
||||
}
|
||||
|
||||
@@ -263,7 +270,7 @@ export function resetAndReloadTool(preResetCallback?: () => void) {
|
||||
export function getPDFDocument(src: any) {
|
||||
let params = src;
|
||||
|
||||
// Handle different input types similar to how getDocument handles them,
|
||||
// Handle different input types similar to how getDocument handles them,
|
||||
// but we ensure we have an object to attach wasmUrl to.
|
||||
if (typeof src === 'string') {
|
||||
params = { url: src };
|
||||
@@ -285,20 +292,171 @@ export function getPDFDocument(src: any) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a sanitized PDF filename.
|
||||
*
|
||||
* The provided filename is processed as follows:
|
||||
* - Removes a trailing `.pdf` file extension (case-insensitive)
|
||||
* - Trims leading and trailing whitespace
|
||||
* - Truncates the name to a maximum of 80 characters
|
||||
*
|
||||
* @param filename The original filename (including extension)
|
||||
* @returns The sanitized filename without the `.pdf` extension, limited to 80 characters
|
||||
* Escape HTML special characters to prevent XSS
|
||||
* @param text - The text to escape
|
||||
* @returns The escaped text
|
||||
*/
|
||||
export function getCleanPdfFilename(filename: string): string {
|
||||
let clean = filename.replace(/\.pdf$/i, '').trim();
|
||||
if (clean.length > 80) {
|
||||
clean = clean.slice(0, 80);
|
||||
}
|
||||
return clean;
|
||||
export function escapeHtml(text: string): string {
|
||||
const map: Record<string, string> = {
|
||||
'&': '&',
|
||||
'<': '<',
|
||||
'>': '>',
|
||||
'"': '"',
|
||||
"'": ''',
|
||||
};
|
||||
return text.replace(/[&<>"']/g, (m) => map[m]);
|
||||
}
|
||||
|
||||
export function uint8ArrayToBase64(bytes: Uint8Array): string {
|
||||
const CHUNK_SIZE = 0x8000;
|
||||
const chunks: string[] = [];
|
||||
for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
|
||||
const chunk = bytes.subarray(i, Math.min(i + CHUNK_SIZE, bytes.length));
|
||||
chunks.push(String.fromCharCode(...chunk));
|
||||
}
|
||||
return btoa(chunks.join(''));
|
||||
}
|
||||
|
||||
export function sanitizeEmailHtml(html: string): string {
|
||||
if (!html) return html;
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
sanitized = sanitized.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '');
|
||||
sanitized = sanitized.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
||||
sanitized = sanitized.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
||||
sanitized = sanitized.replace(/<link[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/\s+style=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(/\s+class=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(/\s+data-[a-z-]+=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(
|
||||
/<img[^>]*(?:width=["']1["'][^>]*height=["']1["']|height=["']1["'][^>]*width=["']1["'])[^>]*\/?>/gi,
|
||||
''
|
||||
);
|
||||
sanitized = sanitized.replace(
|
||||
/href=["']https?:\/\/[^"']*safelinks\.protection\.outlook\.com[^"']*url=([^&"']+)[^"']*["']/gi,
|
||||
(match, encodedUrl) => {
|
||||
try {
|
||||
const decodedUrl = decodeURIComponent(encodedUrl);
|
||||
return `href="${decodedUrl}"`;
|
||||
} catch {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
sanitized = sanitized.replace(/\s+originalsrc=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(
|
||||
/href=["']([^"']{500,})["']/gi,
|
||||
(match, url) => {
|
||||
const baseUrl = url.split('?')[0];
|
||||
if (baseUrl && baseUrl.length < 200) {
|
||||
return `href="${baseUrl}"`;
|
||||
}
|
||||
return `href="${url.substring(0, 200)}"`;
|
||||
}
|
||||
);
|
||||
|
||||
sanitized = sanitized.replace(
|
||||
/\s+(cellpadding|cellspacing|bgcolor|border|valign|align|width|height|role|dir|id)=["'][^"']*["']/gi,
|
||||
''
|
||||
);
|
||||
sanitized = sanitized.replace(/<\/?table[^>]*>/gi, '<div>');
|
||||
sanitized = sanitized.replace(/<\/?tbody[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/<\/?thead[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/<\/?tfoot[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/<tr[^>]*>/gi, '<div>');
|
||||
sanitized = sanitized.replace(/<\/tr>/gi, '</div>');
|
||||
sanitized = sanitized.replace(/<td[^>]*>/gi, '<span> ');
|
||||
sanitized = sanitized.replace(/<\/td>/gi, ' </span>');
|
||||
sanitized = sanitized.replace(/<th[^>]*>/gi, '<strong> ');
|
||||
sanitized = sanitized.replace(/<\/th>/gi, ' </strong>');
|
||||
sanitized = sanitized.replace(/<div>\s*<\/div>/gi, '');
|
||||
sanitized = sanitized.replace(/<span>\s*<\/span>/gi, '');
|
||||
sanitized = sanitized.replace(/(<div>)+/gi, '<div>');
|
||||
sanitized = sanitized.replace(/(<\/div>)+/gi, '</div>');
|
||||
sanitized = sanitized.replace(
|
||||
/<a[^>]*href=["']\s*["'][^>]*>([^<]*)<\/a>/gi,
|
||||
'$1'
|
||||
);
|
||||
|
||||
const MAX_HTML_SIZE = 100000;
|
||||
if (sanitized.length > MAX_HTML_SIZE) {
|
||||
const truncateAt = sanitized.lastIndexOf('</div>', MAX_HTML_SIZE);
|
||||
if (truncateAt > MAX_HTML_SIZE / 2) {
|
||||
sanitized = sanitized.substring(0, truncateAt) + '</div></body></html>';
|
||||
} else {
|
||||
sanitized = sanitized.substring(0, MAX_HTML_SIZE) + '...</body></html>';
|
||||
}
|
||||
}
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a raw RFC 2822 date string into a nicer human-readable format,
|
||||
* while preserving the original timezone and time.
|
||||
* Example input: "Sun, 8 Jan 2017 20:37:44 +0200"
|
||||
* Example output: "Sunday, January 8, 2017 at 8:37 PM (+0200)"
|
||||
*/
|
||||
export function formatRawDate(raw: string): string {
|
||||
try {
|
||||
const match = raw.match(
|
||||
/([A-Za-z]{3}),\s+(\d{1,2})\s+([A-Za-z]{3})\s+(\d{4})\s+(\d{2}):(\d{2})(?::(\d{2}))?\s+([+-]\d{4})/
|
||||
);
|
||||
|
||||
if (match) {
|
||||
const [
|
||||
,
|
||||
dayAbbr,
|
||||
dom,
|
||||
monthAbbr,
|
||||
year,
|
||||
hoursStr,
|
||||
minsStr,
|
||||
secsStr,
|
||||
timezone,
|
||||
] = match;
|
||||
|
||||
const days: Record<string, string> = {
|
||||
Sun: 'Sunday',
|
||||
Mon: 'Monday',
|
||||
Tue: 'Tuesday',
|
||||
Wed: 'Wednesday',
|
||||
Thu: 'Thursday',
|
||||
Fri: 'Friday',
|
||||
Sat: 'Saturday',
|
||||
};
|
||||
const months: Record<string, string> = {
|
||||
Jan: 'January',
|
||||
Feb: 'February',
|
||||
Mar: 'March',
|
||||
Apr: 'April',
|
||||
May: 'May',
|
||||
Jun: 'June',
|
||||
Jul: 'July',
|
||||
Aug: 'August',
|
||||
Sep: 'September',
|
||||
Oct: 'October',
|
||||
Nov: 'November',
|
||||
Dec: 'December',
|
||||
};
|
||||
|
||||
const fullDay = days[dayAbbr] || dayAbbr;
|
||||
const fullMonth = months[monthAbbr] || monthAbbr;
|
||||
|
||||
let hours = parseInt(hoursStr, 10);
|
||||
const ampm = hours >= 12 ? 'PM' : 'AM';
|
||||
hours = hours % 12;
|
||||
hours = hours ? hours : 12;
|
||||
const tzSign = timezone.substring(0, 1);
|
||||
const tzHours = timezone.substring(1, 3);
|
||||
const tzMins = timezone.substring(3, 5);
|
||||
const formattedTz = `UTC${tzSign}${tzHours}:${tzMins}`;
|
||||
|
||||
return `${fullDay}, ${fullMonth} ${dom}, ${year} at ${hours}:${minsStr} ${ampm} (${formattedTz})`;
|
||||
}
|
||||
} catch (e) {
|
||||
// Fallback to raw string if parsing fails
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
|
||||
266
src/js/utils/hocr-transform.ts
Normal file
266
src/js/utils/hocr-transform.ts
Normal file
@@ -0,0 +1,266 @@
|
||||
import {
|
||||
BBox,
|
||||
OcrLine,
|
||||
OcrPage,
|
||||
OcrWord,
|
||||
WordTransform,
|
||||
Baseline,
|
||||
} from '@/types';
|
||||
|
||||
const BBOX_PATTERN = /bbox\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/;
|
||||
const BASELINE_PATTERN = /baseline\s+([-+]?\d*\.?\d*)\s+([-+]?\d+)/;
|
||||
const TEXTANGLE_PATTERN = /textangle\s+([-+]?\d*\.?\d*)/;
|
||||
|
||||
export function parseBBox(title: string): BBox | null {
|
||||
const match = title.match(BBOX_PATTERN);
|
||||
if (!match) return null;
|
||||
|
||||
return {
|
||||
x0: parseInt(match[1], 10),
|
||||
y0: parseInt(match[2], 10),
|
||||
x1: parseInt(match[3], 10),
|
||||
y1: parseInt(match[4], 10),
|
||||
};
|
||||
}
|
||||
|
||||
export function parseBaseline(title: string): Baseline {
|
||||
const match = title.match(BASELINE_PATTERN);
|
||||
if (!match) {
|
||||
return { slope: 0, intercept: 0 };
|
||||
}
|
||||
|
||||
return {
|
||||
slope: parseFloat(match[1]) || 0,
|
||||
intercept: parseInt(match[2], 10) || 0,
|
||||
};
|
||||
}
|
||||
|
||||
export function parseTextangle(title: string): number {
|
||||
const match = title.match(TEXTANGLE_PATTERN);
|
||||
if (!match) return 0;
|
||||
return parseFloat(match[1]) || 0;
|
||||
}
|
||||
|
||||
export function getTextDirection(element: Element): 'ltr' | 'rtl' {
|
||||
const dir = element.getAttribute('dir');
|
||||
return dir === 'rtl' ? 'rtl' : 'ltr';
|
||||
}
|
||||
|
||||
export function shouldInjectWordBreaks(element: Element): boolean {
|
||||
const lang = element.getAttribute('lang') || '';
|
||||
const cjkLangs = ['chi_sim', 'chi_tra', 'jpn', 'kor', 'zh', 'ja', 'ko'];
|
||||
return !cjkLangs.includes(lang);
|
||||
}
|
||||
|
||||
export function normalizeText(text: string): string {
|
||||
return text.normalize('NFKC');
|
||||
}
|
||||
|
||||
export function parseHocrDocument(hocrText: string): OcrPage {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(hocrText, 'text/html');
|
||||
|
||||
let width = 0;
|
||||
let height = 0;
|
||||
const pageDiv = doc.querySelector('.ocr_page');
|
||||
if (pageDiv) {
|
||||
const title = pageDiv.getAttribute('title') || '';
|
||||
const bbox = parseBBox(title);
|
||||
if (bbox) {
|
||||
width = bbox.x1 - bbox.x0;
|
||||
height = bbox.y1 - bbox.y0;
|
||||
}
|
||||
}
|
||||
|
||||
const lines: OcrLine[] = [];
|
||||
|
||||
const lineClasses = [
|
||||
'ocr_line',
|
||||
'ocr_textfloat',
|
||||
'ocr_header',
|
||||
'ocr_caption',
|
||||
];
|
||||
const lineSelectors = lineClasses.map((c) => `.${c}`).join(', ');
|
||||
const lineElements = doc.querySelectorAll(lineSelectors);
|
||||
|
||||
if (lineElements.length > 0) {
|
||||
lineElements.forEach((lineEl) => {
|
||||
const line = parseHocrLine(lineEl);
|
||||
if (line && line.words.length > 0) {
|
||||
lines.push(line);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
const wordElements = doc.querySelectorAll('.ocrx_word');
|
||||
if (wordElements.length > 0) {
|
||||
const words = parseWordsFromElements(wordElements);
|
||||
if (words.length > 0) {
|
||||
const allBBox = calculateBoundingBox(words.map((w) => w.bbox));
|
||||
lines.push({
|
||||
bbox: allBBox,
|
||||
baseline: { slope: 0, intercept: 0 },
|
||||
textangle: 0,
|
||||
words,
|
||||
direction: 'ltr',
|
||||
injectWordBreaks: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { width, height, dpi: 72, lines };
|
||||
}
|
||||
|
||||
function parseHocrLine(lineElement: Element): OcrLine | null {
|
||||
const title = lineElement.getAttribute('title') || '';
|
||||
const bbox = parseBBox(title);
|
||||
|
||||
if (!bbox) return null;
|
||||
|
||||
const baseline = parseBaseline(title);
|
||||
const textangle = parseTextangle(title);
|
||||
|
||||
const parent = lineElement.closest('.ocr_par') || lineElement.parentElement;
|
||||
const direction = parent ? getTextDirection(parent) : 'ltr';
|
||||
const injectWordBreaks = parent ? shouldInjectWordBreaks(parent) : true;
|
||||
const wordElements = lineElement.querySelectorAll('.ocrx_word');
|
||||
const words = parseWordsFromElements(wordElements);
|
||||
|
||||
return {
|
||||
bbox,
|
||||
baseline,
|
||||
textangle,
|
||||
words,
|
||||
direction,
|
||||
injectWordBreaks,
|
||||
};
|
||||
}
|
||||
|
||||
function parseWordsFromElements(wordElements: NodeListOf<Element>): OcrWord[] {
|
||||
const words: OcrWord[] = [];
|
||||
|
||||
wordElements.forEach((wordEl) => {
|
||||
const title = wordEl.getAttribute('title') || '';
|
||||
const text = normalizeText((wordEl.textContent || '').trim());
|
||||
|
||||
if (!text) return;
|
||||
|
||||
const bbox = parseBBox(title);
|
||||
if (!bbox) return;
|
||||
|
||||
const confMatch = title.match(/x_wconf\s+(\d+)/);
|
||||
const confidence = confMatch ? parseInt(confMatch[1], 10) : 0;
|
||||
|
||||
words.push({
|
||||
text,
|
||||
bbox,
|
||||
confidence,
|
||||
});
|
||||
});
|
||||
|
||||
return words;
|
||||
}
|
||||
|
||||
function calculateBoundingBox(bboxes: BBox[]): BBox {
|
||||
if (bboxes.length === 0) {
|
||||
return { x0: 0, y0: 0, x1: 0, y1: 0 };
|
||||
}
|
||||
|
||||
return {
|
||||
x0: Math.min(...bboxes.map((b) => b.x0)),
|
||||
y0: Math.min(...bboxes.map((b) => b.y0)),
|
||||
x1: Math.max(...bboxes.map((b) => b.x1)),
|
||||
y1: Math.max(...bboxes.map((b) => b.y1)),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the transformation parameters for drawing a word
|
||||
*
|
||||
* pdf-lib doesn't support horizontal text scaling (Tz operator),
|
||||
* we calculate a font size that makes the text width exactly match the word bbox width.
|
||||
*
|
||||
* @param word - The word to position
|
||||
* @param line - The line containing this word
|
||||
* @param pageHeight - Height of the page in pixels (for coordinate flip)
|
||||
* @param fontWidthFn - Function to calculate text width at a given font size
|
||||
* @returns Transform parameters for pdf-lib
|
||||
*/
|
||||
export function calculateWordTransform(
|
||||
word: OcrWord,
|
||||
line: OcrLine,
|
||||
pageHeight: number,
|
||||
fontWidthFn: (text: string, fontSize: number) => number
|
||||
): WordTransform {
|
||||
const wordBBox = word.bbox;
|
||||
const wordWidth = wordBBox.x1 - wordBBox.x0;
|
||||
const wordHeight = wordBBox.y1 - wordBBox.y0;
|
||||
|
||||
let fontSize = wordHeight;
|
||||
const maxIterations = 10;
|
||||
|
||||
for (let i = 0; i < maxIterations; i++) {
|
||||
const currentWidth = fontWidthFn(word.text, fontSize);
|
||||
if (currentWidth <= 0) break;
|
||||
|
||||
const ratio = wordWidth / currentWidth;
|
||||
const newFontSize = fontSize * ratio;
|
||||
|
||||
if (Math.abs(newFontSize - fontSize) / fontSize < 0.01) {
|
||||
fontSize = newFontSize;
|
||||
break;
|
||||
}
|
||||
fontSize = newFontSize;
|
||||
}
|
||||
|
||||
fontSize = Math.max(1, Math.min(fontSize, wordHeight * 2));
|
||||
|
||||
const fontWidth = fontWidthFn(word.text, fontSize);
|
||||
const horizontalScale = fontWidth > 0 ? wordWidth / fontWidth : 1;
|
||||
|
||||
const slopeAngle = Math.atan(line.baseline.slope) * (180 / Math.PI);
|
||||
const rotation = -line.textangle + slopeAngle;
|
||||
|
||||
const x = wordBBox.x0;
|
||||
|
||||
// pdf-lib draws text from baseline, so we position at word bottom
|
||||
const y = pageHeight - wordBBox.y1;
|
||||
|
||||
return {
|
||||
x,
|
||||
y,
|
||||
fontSize,
|
||||
horizontalScale,
|
||||
rotation,
|
||||
};
|
||||
}
|
||||
|
||||
export function calculateSpaceTransform(
|
||||
prevWord: OcrWord,
|
||||
nextWord: OcrWord,
|
||||
line: OcrLine,
|
||||
pageHeight: number,
|
||||
spaceWidthFn: (fontSize: number) => number
|
||||
): { x: number; y: number; horizontalScale: number; fontSize: number } | null {
|
||||
const lineHeight = line.bbox.y1 - line.bbox.y0;
|
||||
const fontSize = Math.max(lineHeight + line.baseline.intercept, 1);
|
||||
|
||||
const gapStart = prevWord.bbox.x1;
|
||||
const gapEnd = nextWord.bbox.x0;
|
||||
const gapWidth = gapEnd - gapStart;
|
||||
|
||||
if (gapWidth <= 0) return null;
|
||||
|
||||
const spaceWidth = spaceWidthFn(fontSize);
|
||||
if (spaceWidth <= 0) return null;
|
||||
|
||||
const horizontalScale = gapWidth / spaceWidth;
|
||||
const baselineY = pageHeight - line.bbox.y1 - line.baseline.intercept;
|
||||
|
||||
return {
|
||||
x: gapStart,
|
||||
y: baselineY,
|
||||
horizontalScale,
|
||||
fontSize,
|
||||
};
|
||||
}
|
||||
158
src/js/utils/image-compress.ts
Normal file
158
src/js/utils/image-compress.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
export type ImageQuality = 'high' | 'medium' | 'low';
|
||||
|
||||
interface QualityConfig {
|
||||
jpegQuality: number;
|
||||
maxDimension: number;
|
||||
}
|
||||
|
||||
const QUALITY_CONFIGS: Record<ImageQuality, QualityConfig> = {
|
||||
high: { jpegQuality: 0.92, maxDimension: 0 },
|
||||
medium: { jpegQuality: 0.75, maxDimension: 2500 },
|
||||
low: { jpegQuality: 0.5, maxDimension: 1500 },
|
||||
};
|
||||
|
||||
export function getSelectedQuality(): ImageQuality {
|
||||
const select = document.getElementById(
|
||||
'jpg-pdf-quality'
|
||||
) as HTMLSelectElement | null;
|
||||
const value = select?.value;
|
||||
if (value === 'high' || value === 'medium' || value === 'low') return value;
|
||||
return 'medium';
|
||||
}
|
||||
|
||||
export async function compressImageFile(
|
||||
file: File,
|
||||
quality: ImageQuality
|
||||
): Promise<File> {
|
||||
if (quality === 'high') return file;
|
||||
|
||||
const config = QUALITY_CONFIGS[quality];
|
||||
|
||||
return new Promise<File>((resolve, reject) => {
|
||||
const img = new Image();
|
||||
const url = URL.createObjectURL(file);
|
||||
|
||||
img.onload = () => {
|
||||
let width = img.naturalWidth;
|
||||
let height = img.naturalHeight;
|
||||
|
||||
if (
|
||||
config.maxDimension > 0 &&
|
||||
(width > config.maxDimension || height > config.maxDimension)
|
||||
) {
|
||||
const ratio = Math.min(
|
||||
config.maxDimension / width,
|
||||
config.maxDimension / height
|
||||
);
|
||||
width = Math.round(width * ratio);
|
||||
height = Math.round(height * ratio);
|
||||
}
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = width;
|
||||
canvas.height = height;
|
||||
const ctx = canvas.getContext('2d');
|
||||
if (!ctx) {
|
||||
URL.revokeObjectURL(url);
|
||||
reject(new Error('Canvas context failed'));
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.drawImage(img, 0, 0, width, height);
|
||||
URL.revokeObjectURL(url);
|
||||
|
||||
canvas.toBlob(
|
||||
(blob) => {
|
||||
if (!blob) {
|
||||
reject(new Error('Canvas toBlob failed'));
|
||||
return;
|
||||
}
|
||||
const newName = file.name.replace(/\.[^.]+$/, '.jpg');
|
||||
resolve(new File([blob], newName, { type: 'image/jpeg' }));
|
||||
},
|
||||
'image/jpeg',
|
||||
config.jpegQuality
|
||||
);
|
||||
};
|
||||
|
||||
img.onerror = () => {
|
||||
URL.revokeObjectURL(url);
|
||||
resolve(file);
|
||||
};
|
||||
|
||||
img.src = url;
|
||||
});
|
||||
}
|
||||
|
||||
export async function compressImageBytes(
|
||||
bytes: Uint8Array | ArrayBuffer,
|
||||
quality: ImageQuality
|
||||
): Promise<{ bytes: Uint8Array; type: 'jpeg' | 'png' }> {
|
||||
if (quality === 'high') {
|
||||
return {
|
||||
bytes: bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes),
|
||||
type: 'png',
|
||||
};
|
||||
}
|
||||
|
||||
const config = QUALITY_CONFIGS[quality];
|
||||
|
||||
return new Promise((resolve, reject) => {
|
||||
const blob = new Blob([new Uint8Array(bytes)]);
|
||||
const url = URL.createObjectURL(blob);
|
||||
const img = new Image();
|
||||
|
||||
img.onload = () => {
|
||||
let width = img.naturalWidth;
|
||||
let height = img.naturalHeight;
|
||||
|
||||
if (
|
||||
config.maxDimension > 0 &&
|
||||
(width > config.maxDimension || height > config.maxDimension)
|
||||
) {
|
||||
const ratio = Math.min(
|
||||
config.maxDimension / width,
|
||||
config.maxDimension / height
|
||||
);
|
||||
width = Math.round(width * ratio);
|
||||
height = Math.round(height * ratio);
|
||||
}
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = width;
|
||||
canvas.height = height;
|
||||
const ctx = canvas.getContext('2d');
|
||||
if (!ctx) {
|
||||
URL.revokeObjectURL(url);
|
||||
reject(new Error('Canvas context failed'));
|
||||
return;
|
||||
}
|
||||
|
||||
ctx.drawImage(img, 0, 0, width, height);
|
||||
URL.revokeObjectURL(url);
|
||||
|
||||
canvas.toBlob(
|
||||
async (jpegBlob) => {
|
||||
if (!jpegBlob) {
|
||||
reject(new Error('Canvas toBlob failed'));
|
||||
return;
|
||||
}
|
||||
const arrayBuffer = await jpegBlob.arrayBuffer();
|
||||
resolve({ bytes: new Uint8Array(arrayBuffer), type: 'jpeg' });
|
||||
},
|
||||
'image/jpeg',
|
||||
config.jpegQuality
|
||||
);
|
||||
};
|
||||
|
||||
img.onerror = () => {
|
||||
URL.revokeObjectURL(url);
|
||||
resolve({
|
||||
bytes: bytes instanceof Uint8Array ? bytes : new Uint8Array(bytes),
|
||||
type: 'png',
|
||||
});
|
||||
};
|
||||
|
||||
img.src = url;
|
||||
});
|
||||
}
|
||||
312
src/js/utils/image-effects.ts
Normal file
312
src/js/utils/image-effects.ts
Normal file
@@ -0,0 +1,312 @@
|
||||
import type { ScanSettings } from '../types/scanner-effect-type.js';
|
||||
import type { AdjustColorsSettings } from '../types/adjust-colors-type.js';
|
||||
|
||||
export function applyGreyscale(imageData: ImageData): void {
|
||||
const data = imageData.data;
|
||||
for (let j = 0; j < data.length; j += 4) {
|
||||
const grey = Math.round(
|
||||
0.299 * data[j] + 0.587 * data[j + 1] + 0.114 * data[j + 2]
|
||||
);
|
||||
data[j] = grey;
|
||||
data[j + 1] = grey;
|
||||
data[j + 2] = grey;
|
||||
}
|
||||
}
|
||||
|
||||
export function applyInvertColors(imageData: ImageData): void {
|
||||
const data = imageData.data;
|
||||
for (let j = 0; j < data.length; j += 4) {
|
||||
data[j] = 255 - data[j];
|
||||
data[j + 1] = 255 - data[j + 1];
|
||||
data[j + 2] = 255 - data[j + 2];
|
||||
}
|
||||
}
|
||||
|
||||
export function rgbToHsl(
|
||||
r: number,
|
||||
g: number,
|
||||
b: number
|
||||
): [number, number, number] {
|
||||
r /= 255;
|
||||
g /= 255;
|
||||
b /= 255;
|
||||
const max = Math.max(r, g, b);
|
||||
const min = Math.min(r, g, b);
|
||||
const l = (max + min) / 2;
|
||||
let h = 0;
|
||||
let s = 0;
|
||||
|
||||
if (max !== min) {
|
||||
const d = max - min;
|
||||
s = l > 0.5 ? d / (2 - max - min) : d / (max + min);
|
||||
if (max === r) h = ((g - b) / d + (g < b ? 6 : 0)) / 6;
|
||||
else if (max === g) h = ((b - r) / d + 2) / 6;
|
||||
else h = ((r - g) / d + 4) / 6;
|
||||
}
|
||||
|
||||
return [h, s, l];
|
||||
}
|
||||
|
||||
export function hslToRgb(
|
||||
h: number,
|
||||
s: number,
|
||||
l: number
|
||||
): [number, number, number] {
|
||||
if (s === 0) {
|
||||
const v = Math.round(l * 255);
|
||||
return [v, v, v];
|
||||
}
|
||||
|
||||
const hue2rgb = (p: number, q: number, t: number): number => {
|
||||
if (t < 0) t += 1;
|
||||
if (t > 1) t -= 1;
|
||||
if (t < 1 / 6) return p + (q - p) * 6 * t;
|
||||
if (t < 1 / 2) return q;
|
||||
if (t < 2 / 3) return p + (q - p) * (2 / 3 - t) * 6;
|
||||
return p;
|
||||
};
|
||||
|
||||
const q = l < 0.5 ? l * (1 + s) : l + s - l * s;
|
||||
const p = 2 * l - q;
|
||||
return [
|
||||
Math.round(hue2rgb(p, q, h + 1 / 3) * 255),
|
||||
Math.round(hue2rgb(p, q, h) * 255),
|
||||
Math.round(hue2rgb(p, q, h - 1 / 3) * 255),
|
||||
];
|
||||
}
|
||||
|
||||
export function applyScannerEffect(
|
||||
sourceData: ImageData,
|
||||
canvas: HTMLCanvasElement,
|
||||
settings: ScanSettings,
|
||||
rotationAngle: number,
|
||||
scale: number = 1
|
||||
): void {
|
||||
const ctx = canvas.getContext('2d')!;
|
||||
const w = sourceData.width;
|
||||
const h = sourceData.height;
|
||||
|
||||
const scaledBlur = settings.blur * scale;
|
||||
const scaledNoise = settings.noise * scale;
|
||||
|
||||
const workCanvas = document.createElement('canvas');
|
||||
workCanvas.width = w;
|
||||
workCanvas.height = h;
|
||||
const workCtx = workCanvas.getContext('2d')!;
|
||||
|
||||
if (scaledBlur > 0) {
|
||||
workCtx.filter = `blur(${scaledBlur}px)`;
|
||||
}
|
||||
|
||||
workCtx.putImageData(sourceData, 0, 0);
|
||||
if (scaledBlur > 0) {
|
||||
const tempCanvas = document.createElement('canvas');
|
||||
tempCanvas.width = w;
|
||||
tempCanvas.height = h;
|
||||
const tempCtx = tempCanvas.getContext('2d')!;
|
||||
tempCtx.filter = `blur(${scaledBlur}px)`;
|
||||
tempCtx.drawImage(workCanvas, 0, 0);
|
||||
workCtx.filter = 'none';
|
||||
workCtx.clearRect(0, 0, w, h);
|
||||
workCtx.drawImage(tempCanvas, 0, 0);
|
||||
}
|
||||
|
||||
const imageData = workCtx.getImageData(0, 0, w, h);
|
||||
const data = imageData.data;
|
||||
|
||||
const contrastFactor =
|
||||
settings.contrast !== 0
|
||||
? (259 * (settings.contrast + 255)) / (255 * (259 - settings.contrast))
|
||||
: 1;
|
||||
|
||||
for (let i = 0; i < data.length; i += 4) {
|
||||
let r = data[i];
|
||||
let g = data[i + 1];
|
||||
let b = data[i + 2];
|
||||
|
||||
if (settings.grayscale) {
|
||||
const grey = Math.round(0.299 * r + 0.587 * g + 0.114 * b);
|
||||
r = grey;
|
||||
g = grey;
|
||||
b = grey;
|
||||
}
|
||||
|
||||
if (settings.brightness !== 0) {
|
||||
r += settings.brightness;
|
||||
g += settings.brightness;
|
||||
b += settings.brightness;
|
||||
}
|
||||
|
||||
if (settings.contrast !== 0) {
|
||||
r = contrastFactor * (r - 128) + 128;
|
||||
g = contrastFactor * (g - 128) + 128;
|
||||
b = contrastFactor * (b - 128) + 128;
|
||||
}
|
||||
|
||||
if (settings.yellowish > 0) {
|
||||
const intensity = settings.yellowish / 50;
|
||||
r += 20 * intensity;
|
||||
g += 12 * intensity;
|
||||
b -= 15 * intensity;
|
||||
}
|
||||
|
||||
if (scaledNoise > 0) {
|
||||
const n = (Math.random() - 0.5) * scaledNoise;
|
||||
r += n;
|
||||
g += n;
|
||||
b += n;
|
||||
}
|
||||
|
||||
data[i] = Math.max(0, Math.min(255, r));
|
||||
data[i + 1] = Math.max(0, Math.min(255, g));
|
||||
data[i + 2] = Math.max(0, Math.min(255, b));
|
||||
}
|
||||
|
||||
workCtx.putImageData(imageData, 0, 0);
|
||||
|
||||
if (settings.border) {
|
||||
const borderSize = Math.max(w, h) * 0.02;
|
||||
const gradient1 = workCtx.createLinearGradient(0, 0, borderSize, 0);
|
||||
gradient1.addColorStop(0, 'rgba(0,0,0,0.3)');
|
||||
gradient1.addColorStop(1, 'rgba(0,0,0,0)');
|
||||
workCtx.fillStyle = gradient1;
|
||||
workCtx.fillRect(0, 0, borderSize, h);
|
||||
|
||||
const gradient2 = workCtx.createLinearGradient(w, 0, w - borderSize, 0);
|
||||
gradient2.addColorStop(0, 'rgba(0,0,0,0.3)');
|
||||
gradient2.addColorStop(1, 'rgba(0,0,0,0)');
|
||||
workCtx.fillStyle = gradient2;
|
||||
workCtx.fillRect(w - borderSize, 0, borderSize, h);
|
||||
|
||||
const gradient3 = workCtx.createLinearGradient(0, 0, 0, borderSize);
|
||||
gradient3.addColorStop(0, 'rgba(0,0,0,0.3)');
|
||||
gradient3.addColorStop(1, 'rgba(0,0,0,0)');
|
||||
workCtx.fillStyle = gradient3;
|
||||
workCtx.fillRect(0, 0, w, borderSize);
|
||||
|
||||
const gradient4 = workCtx.createLinearGradient(0, h, 0, h - borderSize);
|
||||
gradient4.addColorStop(0, 'rgba(0,0,0,0.3)');
|
||||
gradient4.addColorStop(1, 'rgba(0,0,0,0)');
|
||||
workCtx.fillStyle = gradient4;
|
||||
workCtx.fillRect(0, h - borderSize, w, borderSize);
|
||||
}
|
||||
|
||||
if (rotationAngle !== 0) {
|
||||
const rad = (rotationAngle * Math.PI) / 180;
|
||||
const cos = Math.abs(Math.cos(rad));
|
||||
const sin = Math.abs(Math.sin(rad));
|
||||
const newW = Math.ceil(w * cos + h * sin);
|
||||
const newH = Math.ceil(w * sin + h * cos);
|
||||
|
||||
canvas.width = newW;
|
||||
canvas.height = newH;
|
||||
ctx.fillStyle = '#ffffff';
|
||||
ctx.fillRect(0, 0, newW, newH);
|
||||
ctx.translate(newW / 2, newH / 2);
|
||||
ctx.rotate(rad);
|
||||
ctx.drawImage(workCanvas, -w / 2, -h / 2);
|
||||
ctx.setTransform(1, 0, 0, 1, 0, 0);
|
||||
} else {
|
||||
canvas.width = w;
|
||||
canvas.height = h;
|
||||
ctx.drawImage(workCanvas, 0, 0);
|
||||
}
|
||||
}
|
||||
|
||||
export function applyColorAdjustments(
|
||||
sourceData: ImageData,
|
||||
canvas: HTMLCanvasElement,
|
||||
settings: AdjustColorsSettings
|
||||
): void {
|
||||
const ctx = canvas.getContext('2d')!;
|
||||
const w = sourceData.width;
|
||||
const h = sourceData.height;
|
||||
|
||||
canvas.width = w;
|
||||
canvas.height = h;
|
||||
|
||||
const imageData = new ImageData(new Uint8ClampedArray(sourceData.data), w, h);
|
||||
const data = imageData.data;
|
||||
|
||||
const contrastFactor =
|
||||
settings.contrast !== 0
|
||||
? (259 * (settings.contrast + 255)) / (255 * (259 - settings.contrast))
|
||||
: 1;
|
||||
|
||||
const gammaCorrection = settings.gamma !== 1.0 ? 1 / settings.gamma : 1;
|
||||
const sepiaAmount = settings.sepia / 100;
|
||||
|
||||
for (let i = 0; i < data.length; i += 4) {
|
||||
let r = data[i];
|
||||
let g = data[i + 1];
|
||||
let b = data[i + 2];
|
||||
|
||||
if (settings.brightness !== 0) {
|
||||
const adj = settings.brightness * 2.55;
|
||||
r += adj;
|
||||
g += adj;
|
||||
b += adj;
|
||||
}
|
||||
|
||||
if (settings.contrast !== 0) {
|
||||
r = contrastFactor * (r - 128) + 128;
|
||||
g = contrastFactor * (g - 128) + 128;
|
||||
b = contrastFactor * (b - 128) + 128;
|
||||
}
|
||||
|
||||
if (settings.saturation !== 0 || settings.hueShift !== 0) {
|
||||
const [hue, sat, lig] = rgbToHsl(
|
||||
Math.max(0, Math.min(255, r)),
|
||||
Math.max(0, Math.min(255, g)),
|
||||
Math.max(0, Math.min(255, b))
|
||||
);
|
||||
|
||||
let newHue = hue;
|
||||
if (settings.hueShift !== 0) {
|
||||
newHue = (hue + settings.hueShift / 360) % 1;
|
||||
if (newHue < 0) newHue += 1;
|
||||
}
|
||||
|
||||
let newSat = sat;
|
||||
if (settings.saturation !== 0) {
|
||||
const satAdj = settings.saturation / 100;
|
||||
newSat = satAdj > 0 ? sat + (1 - sat) * satAdj : sat * (1 + satAdj);
|
||||
newSat = Math.max(0, Math.min(1, newSat));
|
||||
}
|
||||
|
||||
[r, g, b] = hslToRgb(newHue, newSat, lig);
|
||||
}
|
||||
|
||||
if (settings.temperature !== 0) {
|
||||
const t = settings.temperature / 50;
|
||||
r += 30 * t;
|
||||
b -= 30 * t;
|
||||
}
|
||||
|
||||
if (settings.tint !== 0) {
|
||||
const t = settings.tint / 50;
|
||||
g += 30 * t;
|
||||
}
|
||||
|
||||
if (settings.gamma !== 1.0) {
|
||||
r = Math.pow(Math.max(0, Math.min(255, r)) / 255, gammaCorrection) * 255;
|
||||
g = Math.pow(Math.max(0, Math.min(255, g)) / 255, gammaCorrection) * 255;
|
||||
b = Math.pow(Math.max(0, Math.min(255, b)) / 255, gammaCorrection) * 255;
|
||||
}
|
||||
|
||||
if (settings.sepia > 0) {
|
||||
const sr = 0.393 * r + 0.769 * g + 0.189 * b;
|
||||
const sg = 0.349 * r + 0.686 * g + 0.168 * b;
|
||||
const sb = 0.272 * r + 0.534 * g + 0.131 * b;
|
||||
r = r + (sr - r) * sepiaAmount;
|
||||
g = g + (sg - g) * sepiaAmount;
|
||||
b = b + (sb - b) * sepiaAmount;
|
||||
}
|
||||
|
||||
data[i] = Math.max(0, Math.min(255, r));
|
||||
data[i + 1] = Math.max(0, Math.min(255, g));
|
||||
data[i + 2] = Math.max(0, Math.min(255, b));
|
||||
}
|
||||
|
||||
ctx.putImageData(imageData, 0, 0);
|
||||
}
|
||||
160
src/js/utils/libreoffice-loader.ts
Normal file
160
src/js/utils/libreoffice-loader.ts
Normal file
@@ -0,0 +1,160 @@
|
||||
/**
|
||||
* LibreOffice WASM Converter Wrapper
|
||||
*
|
||||
* Uses @matbee/libreoffice-converter package for document conversion.
|
||||
* Handles progress tracking and provides simpler API.
|
||||
*/
|
||||
|
||||
import { WorkerBrowserConverter } from '@matbee/libreoffice-converter/browser';
|
||||
|
||||
const LIBREOFFICE_LOCAL_PATH = import.meta.env.BASE_URL + 'libreoffice-wasm/';
|
||||
|
||||
export interface LoadProgress {
|
||||
phase: 'loading' | 'initializing' | 'converting' | 'complete' | 'ready';
|
||||
percent: number;
|
||||
message: string;
|
||||
}
|
||||
|
||||
export type ProgressCallback = (progress: LoadProgress) => void;
|
||||
|
||||
// Singleton for converter instance
|
||||
let converterInstance: LibreOfficeConverter | null = null;
|
||||
|
||||
export class LibreOfficeConverter {
|
||||
private converter: WorkerBrowserConverter | null = null;
|
||||
private initialized = false;
|
||||
private initializing = false;
|
||||
private basePath: string;
|
||||
|
||||
constructor(basePath?: string) {
|
||||
this.basePath = basePath || LIBREOFFICE_LOCAL_PATH;
|
||||
}
|
||||
|
||||
async initialize(onProgress?: ProgressCallback): Promise<void> {
|
||||
if (this.initialized) return;
|
||||
|
||||
if (this.initializing) {
|
||||
while (this.initializing) {
|
||||
await new Promise(r => setTimeout(r, 100));
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
this.initializing = true;
|
||||
let progressCallback = onProgress; // Store original callback
|
||||
|
||||
try {
|
||||
progressCallback?.({ phase: 'loading', percent: 0, message: 'Loading conversion engine...' });
|
||||
|
||||
this.converter = new WorkerBrowserConverter({
|
||||
sofficeJs: `${this.basePath}soffice.js`,
|
||||
sofficeWasm: `${this.basePath}soffice.wasm.gz`,
|
||||
sofficeData: `${this.basePath}soffice.data.gz`,
|
||||
sofficeWorkerJs: `${this.basePath}soffice.worker.js`,
|
||||
browserWorkerJs: `${this.basePath}browser.worker.global.js`,
|
||||
verbose: false,
|
||||
onProgress: (info: { phase: string; percent: number; message: string }) => {
|
||||
if (progressCallback && !this.initialized) {
|
||||
const simplifiedMessage = `Loading conversion engine (${Math.round(info.percent)}%)...`;
|
||||
progressCallback({
|
||||
phase: info.phase as LoadProgress['phase'],
|
||||
percent: info.percent,
|
||||
message: simplifiedMessage
|
||||
});
|
||||
}
|
||||
},
|
||||
onReady: () => {
|
||||
console.log('[LibreOffice] Ready!');
|
||||
},
|
||||
onError: (error: Error) => {
|
||||
console.error('[LibreOffice] Error:', error);
|
||||
},
|
||||
});
|
||||
|
||||
await this.converter.initialize();
|
||||
this.initialized = true;
|
||||
|
||||
// Call completion message
|
||||
progressCallback?.({ phase: 'ready', percent: 100, message: 'Conversion engine ready!' });
|
||||
|
||||
// Null out the callback to prevent any late-firing progress updates
|
||||
progressCallback = undefined;
|
||||
} finally {
|
||||
this.initializing = false;
|
||||
}
|
||||
}
|
||||
|
||||
isReady(): boolean {
|
||||
return this.initialized && this.converter !== null;
|
||||
}
|
||||
|
||||
async convertToPdf(file: File): Promise<Blob> {
|
||||
if (!this.converter) {
|
||||
throw new Error('Converter not initialized');
|
||||
}
|
||||
|
||||
console.log(`[LibreOffice] Converting ${file.name} to PDF...`);
|
||||
console.log(`[LibreOffice] File type: ${file.type}, Size: ${file.size} bytes`);
|
||||
|
||||
try {
|
||||
console.log(`[LibreOffice] Reading file as ArrayBuffer...`);
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const uint8Array = new Uint8Array(arrayBuffer);
|
||||
console.log(`[LibreOffice] File loaded, ${uint8Array.length} bytes`);
|
||||
|
||||
console.log(`[LibreOffice] Calling converter.convert() with buffer...`);
|
||||
const startTime = Date.now();
|
||||
|
||||
// Detect input format - critical for CSV to apply import filters
|
||||
const ext = file.name.split('.').pop()?.toLowerCase() || '';
|
||||
console.log(`[LibreOffice] Detected format from extension: ${ext}`);
|
||||
|
||||
const result = await this.converter.convert(uint8Array, {
|
||||
outputFormat: 'pdf',
|
||||
inputFormat: ext as any, // Explicitly specify format for CSV import filters
|
||||
}, file.name);
|
||||
|
||||
const duration = Date.now() - startTime;
|
||||
console.log(`[LibreOffice] Conversion complete! Duration: ${duration}ms, Size: ${result.data.length} bytes`);
|
||||
|
||||
// Create a copy to avoid SharedArrayBuffer type issues
|
||||
const data = new Uint8Array(result.data);
|
||||
return new Blob([data], { type: result.mimeType });
|
||||
} catch (error) {
|
||||
console.error(`[LibreOffice] Conversion FAILED for ${file.name}:`, error);
|
||||
console.error(`[LibreOffice] Error details:`, {
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
stack: error instanceof Error ? error.stack : undefined
|
||||
});
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async wordToPdf(file: File): Promise<Blob> {
|
||||
return this.convertToPdf(file);
|
||||
}
|
||||
|
||||
async pptToPdf(file: File): Promise<Blob> {
|
||||
return this.convertToPdf(file);
|
||||
}
|
||||
|
||||
async excelToPdf(file: File): Promise<Blob> {
|
||||
return this.convertToPdf(file);
|
||||
}
|
||||
|
||||
async destroy(): Promise<void> {
|
||||
if (this.converter) {
|
||||
await this.converter.destroy();
|
||||
}
|
||||
this.converter = null;
|
||||
this.initialized = false;
|
||||
}
|
||||
}
|
||||
|
||||
export function getLibreOfficeConverter(basePath?: string): LibreOfficeConverter {
|
||||
if (!converterInstance) {
|
||||
converterInstance = new LibreOfficeConverter(basePath);
|
||||
}
|
||||
return converterInstance;
|
||||
}
|
||||
970
src/js/utils/markdown-editor.ts
Normal file
970
src/js/utils/markdown-editor.ts
Normal file
@@ -0,0 +1,970 @@
|
||||
import MarkdownIt from 'markdown-it';
|
||||
import hljs from 'highlight.js/lib/core';
|
||||
import javascript from 'highlight.js/lib/languages/javascript';
|
||||
import typescript from 'highlight.js/lib/languages/typescript';
|
||||
import python from 'highlight.js/lib/languages/python';
|
||||
import css from 'highlight.js/lib/languages/css';
|
||||
import xml from 'highlight.js/lib/languages/xml';
|
||||
import json from 'highlight.js/lib/languages/json';
|
||||
import bash from 'highlight.js/lib/languages/bash';
|
||||
import markdownLang from 'highlight.js/lib/languages/markdown';
|
||||
import sql from 'highlight.js/lib/languages/sql';
|
||||
import java from 'highlight.js/lib/languages/java';
|
||||
import csharp from 'highlight.js/lib/languages/csharp';
|
||||
import cpp from 'highlight.js/lib/languages/cpp';
|
||||
import go from 'highlight.js/lib/languages/go';
|
||||
import rust from 'highlight.js/lib/languages/rust';
|
||||
import yaml from 'highlight.js/lib/languages/yaml';
|
||||
import 'highlight.js/styles/github.css';
|
||||
import mermaid from 'mermaid';
|
||||
import sub from 'markdown-it-sub';
|
||||
import sup from 'markdown-it-sup';
|
||||
import footnote from 'markdown-it-footnote';
|
||||
import deflist from 'markdown-it-deflist';
|
||||
import abbr from 'markdown-it-abbr';
|
||||
import { full as emoji } from 'markdown-it-emoji';
|
||||
import ins from 'markdown-it-ins';
|
||||
import mark from 'markdown-it-mark';
|
||||
import taskLists from 'markdown-it-task-lists';
|
||||
import anchor from 'markdown-it-anchor';
|
||||
import tocDoneRight from 'markdown-it-toc-done-right';
|
||||
import { applyTranslations } from '../i18n/i18n';
|
||||
|
||||
|
||||
|
||||
// Register highlight.js languages
|
||||
hljs.registerLanguage('javascript', javascript);
|
||||
hljs.registerLanguage('js', javascript);
|
||||
hljs.registerLanguage('typescript', typescript);
|
||||
hljs.registerLanguage('ts', typescript);
|
||||
hljs.registerLanguage('python', python);
|
||||
hljs.registerLanguage('py', python);
|
||||
hljs.registerLanguage('css', css);
|
||||
hljs.registerLanguage('html', xml);
|
||||
hljs.registerLanguage('xml', xml);
|
||||
hljs.registerLanguage('json', json);
|
||||
hljs.registerLanguage('bash', bash);
|
||||
hljs.registerLanguage('sh', bash);
|
||||
hljs.registerLanguage('shell', bash);
|
||||
hljs.registerLanguage('markdown', markdownLang);
|
||||
hljs.registerLanguage('md', markdownLang);
|
||||
hljs.registerLanguage('sql', sql);
|
||||
hljs.registerLanguage('java', java);
|
||||
hljs.registerLanguage('csharp', csharp);
|
||||
hljs.registerLanguage('cs', csharp);
|
||||
hljs.registerLanguage('cpp', cpp);
|
||||
hljs.registerLanguage('c', cpp);
|
||||
hljs.registerLanguage('go', go);
|
||||
hljs.registerLanguage('rust', rust);
|
||||
hljs.registerLanguage('yaml', yaml);
|
||||
hljs.registerLanguage('yml', yaml);
|
||||
|
||||
export interface MarkdownEditorOptions {
|
||||
/** Initial markdown content */
|
||||
initialContent?: string;
|
||||
/** Callback when user wants to go back */
|
||||
onBack?: () => void;
|
||||
}
|
||||
|
||||
export interface MarkdownItOptions {
|
||||
/** Enable HTML tags in source */
|
||||
html: boolean;
|
||||
/** Convert '\n' in paragraphs into <br> */
|
||||
breaks: boolean;
|
||||
/** Autoconvert URL-like text to links */
|
||||
linkify: boolean;
|
||||
/** Enable some language-neutral replacement + quotes beautification */
|
||||
typographer: boolean;
|
||||
/** Highlight function for fenced code blocks */
|
||||
highlight?: (str: string, lang: string) => string;
|
||||
}
|
||||
|
||||
const DEFAULT_MARKDOWN = `# Welcome to BentoPDF Markdown Editor
|
||||
|
||||
This is a **live preview** markdown editor with full plugin support.
|
||||
|
||||
\${toc}
|
||||
|
||||
## Basic Formatting
|
||||
|
||||
- **Bold** and *italic* text
|
||||
- ~~Strikethrough~~ text
|
||||
- [Links](https://bentopdf.com)
|
||||
- ==Highlighted text== using mark
|
||||
- ++Inserted text++ using ins
|
||||
- H~2~O for subscript
|
||||
- E=mc^2^ for superscript
|
||||
|
||||
## Task Lists
|
||||
|
||||
- [x] Completed task
|
||||
- [x] Another done item
|
||||
- [ ] Pending task
|
||||
- [ ] Future work
|
||||
|
||||
## Emoji Support :rocket:
|
||||
|
||||
Use emoji shortcodes: :smile: :heart: :thumbsup: :star: :fire:
|
||||
|
||||
## Code with Syntax Highlighting
|
||||
|
||||
\`\`\`javascript
|
||||
function greet(name) {
|
||||
console.log(\`Hello, \${name}!\`);
|
||||
return { message: 'Welcome!' };
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
\`\`\`python
|
||||
def fibonacci(n):
|
||||
if n <= 1:
|
||||
return n
|
||||
return fibonacci(n-1) + fibonacci(n-2)
|
||||
\`\`\`
|
||||
|
||||
## Tables
|
||||
|
||||
| Feature | Supported | Notes |
|
||||
|---------|:---------:|-------|
|
||||
| Headers | ✓ | Multiple levels |
|
||||
| Lists | ✓ | Ordered & unordered |
|
||||
| Code | ✓ | With highlighting |
|
||||
| Tables | ✓ | With alignment |
|
||||
| Emoji | ✓ | :white_check_mark: |
|
||||
| Mermaid | ✓ | Diagrams! |
|
||||
|
||||
## Mermaid Diagrams
|
||||
|
||||
### Flowchart
|
||||
|
||||
\`\`\`mermaid
|
||||
graph TD
|
||||
A[Start] --> B{Decision}
|
||||
B -->|Yes| C[OK]
|
||||
B -->|No| D[Cancel]
|
||||
\`\`\`
|
||||
|
||||
### Sequence Diagram
|
||||
|
||||
\`\`\`mermaid
|
||||
sequenceDiagram
|
||||
participant User
|
||||
participant BentoPDF
|
||||
participant Server
|
||||
User->>BentoPDF: Upload PDF
|
||||
BentoPDF->>BentoPDF: Process locally
|
||||
BentoPDF-->>User: Download result
|
||||
Note over BentoPDF: No server needed!
|
||||
\`\`\`
|
||||
|
||||
### Pie Chart
|
||||
|
||||
\`\`\`mermaid
|
||||
pie title PDF Tools Usage
|
||||
"Merge" : 35
|
||||
"Compress" : 25
|
||||
"Convert" : 20
|
||||
"Edit" : 15
|
||||
"Other" : 5
|
||||
\`\`\`
|
||||
|
||||
### Class Diagram
|
||||
|
||||
\`\`\`mermaid
|
||||
classDiagram
|
||||
class PDFDocument {
|
||||
+String title
|
||||
+int pageCount
|
||||
+merge()
|
||||
+split()
|
||||
+compress()
|
||||
}
|
||||
class Page {
|
||||
+int number
|
||||
+rotate()
|
||||
+crop()
|
||||
}
|
||||
PDFDocument "1" --> "*" Page
|
||||
\`\`\`
|
||||
|
||||
### Gantt Chart
|
||||
|
||||
\`\`\`mermaid
|
||||
gantt
|
||||
title Project Timeline
|
||||
dateFormat YYYY-MM-DD
|
||||
section Planning
|
||||
Research :a1, 2024-01-01, 7d
|
||||
Design :a2, after a1, 5d
|
||||
section Development
|
||||
Implementation :a3, after a2, 14d
|
||||
Testing :a4, after a3, 7d
|
||||
\`\`\`
|
||||
|
||||
### Entity Relationship
|
||||
|
||||
\`\`\`mermaid
|
||||
erDiagram
|
||||
USER ||--o{ DOCUMENT : uploads
|
||||
DOCUMENT ||--|{ PAGE : contains
|
||||
DOCUMENT {
|
||||
string id
|
||||
string name
|
||||
date created
|
||||
}
|
||||
PAGE {
|
||||
int number
|
||||
string content
|
||||
}
|
||||
\`\`\`
|
||||
|
||||
### Mindmap
|
||||
|
||||
\`\`\`mermaid
|
||||
mindmap
|
||||
root((BentoPDF))
|
||||
Convert
|
||||
Word to PDF
|
||||
Excel to PDF
|
||||
Image to PDF
|
||||
Edit
|
||||
Merge
|
||||
Split
|
||||
Compress
|
||||
Secure
|
||||
Encrypt
|
||||
Sign
|
||||
Watermark
|
||||
\`\`\`
|
||||
|
||||
## Footnotes
|
||||
|
||||
Here's a sentence with a footnote[^1].
|
||||
|
||||
## Definition Lists
|
||||
|
||||
Term 1
|
||||
: Definition for term 1
|
||||
|
||||
Term 2
|
||||
: Definition for term 2
|
||||
: Another definition for term 2
|
||||
|
||||
## Abbreviations
|
||||
|
||||
The HTML specification is maintained by the W3C.
|
||||
|
||||
*[HTML]: Hyper Text Markup Language
|
||||
*[W3C]: World Wide Web Consortium
|
||||
|
||||
---
|
||||
|
||||
Start editing to see the magic happen!
|
||||
|
||||
[^1]: This is the footnote content.
|
||||
`;
|
||||
|
||||
export class MarkdownEditor {
|
||||
private container: HTMLElement;
|
||||
private md: MarkdownIt;
|
||||
private editor: HTMLTextAreaElement | null = null;
|
||||
private preview: HTMLElement | null = null;
|
||||
private onBack?: () => void;
|
||||
private syncScroll: boolean = false;
|
||||
private isSyncing: boolean = false;
|
||||
private mermaidInitialized: boolean = false;
|
||||
private mdOptions: MarkdownItOptions = {
|
||||
html: true,
|
||||
breaks: false,
|
||||
linkify: true,
|
||||
typographer: true
|
||||
};
|
||||
|
||||
constructor(container: HTMLElement, options: MarkdownEditorOptions) {
|
||||
this.container = container;
|
||||
this.onBack = options.onBack;
|
||||
|
||||
this.initMermaid();
|
||||
this.md = this.createMarkdownIt();
|
||||
this.configureLinkRenderer();
|
||||
|
||||
this.render();
|
||||
|
||||
if (options.initialContent) {
|
||||
this.setContent(options.initialContent);
|
||||
} else {
|
||||
this.setContent(DEFAULT_MARKDOWN);
|
||||
}
|
||||
}
|
||||
|
||||
private initMermaid(): void {
|
||||
if (!this.mermaidInitialized) {
|
||||
mermaid.initialize({
|
||||
startOnLoad: false,
|
||||
theme: 'default',
|
||||
securityLevel: 'loose',
|
||||
fontFamily: '-apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif'
|
||||
});
|
||||
this.mermaidInitialized = true;
|
||||
}
|
||||
}
|
||||
|
||||
private configureLinkRenderer(): void {
|
||||
// Override link renderer to add target="_blank" and rel="noopener"
|
||||
const defaultRender = this.md.renderer.rules.link_open ||
|
||||
((tokens: any[], idx: number, options: any, _env: any, self: any) => self.renderToken(tokens, idx, options));
|
||||
|
||||
this.md.renderer.rules.link_open = (tokens: any[], idx: number, options: any, env: any, self: any) => {
|
||||
const token = tokens[idx];
|
||||
token.attrSet('target', '_blank');
|
||||
token.attrSet('rel', 'noopener noreferrer');
|
||||
return defaultRender(tokens, idx, options, env, self);
|
||||
};
|
||||
}
|
||||
|
||||
private render(): void {
|
||||
this.container.innerHTML = `
|
||||
<div class="md-editor light-mode">
|
||||
<div class="md-editor-wrapper">
|
||||
<div class="md-editor-header">
|
||||
<div class="md-editor-actions">
|
||||
<input type="file" accept=".md,.markdown,.txt" id="mdFileInput" style="display: none;" />
|
||||
<button class="md-editor-btn md-editor-btn-secondary" id="mdUpload">
|
||||
<i data-lucide="upload"></i>
|
||||
<span data-i18n="tools:markdownToPdf.btnUpload">Upload</span>
|
||||
</button>
|
||||
<div class="theme-toggle">
|
||||
<i data-lucide="moon" width="16" height="16"></i>
|
||||
<div class="theme-toggle-slider active" id="themeToggle"></div>
|
||||
<i data-lucide="sun" width="16" height="16"></i>
|
||||
</div>
|
||||
<button class="md-editor-btn md-editor-btn-secondary" id="mdSyncScroll" title="Toggle sync scroll">
|
||||
<i data-lucide="git-compare"></i>
|
||||
<span data-i18n="tools:markdownToPdf.btnSyncScroll">Sync Scroll</span>
|
||||
</button>
|
||||
<button class="md-editor-btn md-editor-btn-secondary" id="mdSettings">
|
||||
<i data-lucide="settings"></i>
|
||||
<span data-i18n="tools:markdownToPdf.btnSettings">Settings</span>
|
||||
</button>
|
||||
<button class="md-editor-btn md-editor-btn-primary" id="mdExport">
|
||||
<i data-lucide="download"></i>
|
||||
<span data-i18n="tools:markdownToPdf.btnExportPdf">Export PDF</span>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="md-editor-main">
|
||||
<div class="md-editor-pane">
|
||||
<div class="md-editor-pane-header">
|
||||
<span data-i18n="tools:markdownToPdf.paneMarkdown">Markdown</span>
|
||||
</div>
|
||||
<textarea class="md-editor-textarea" id="mdTextarea" spellcheck="false"></textarea>
|
||||
</div>
|
||||
<div class="md-editor-pane">
|
||||
<div class="md-editor-pane-header">
|
||||
<span data-i18n="tools:markdownToPdf.panePreview">Preview</span>
|
||||
</div>
|
||||
<div class="md-editor-preview" id="mdPreview"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Settings Modal (hidden by default) -->
|
||||
<div class="md-editor-modal-overlay" id="mdSettingsModal" style="display: none;">
|
||||
<div class="md-editor-modal">
|
||||
<div class="md-editor-modal-header">
|
||||
<h2 class="md-editor-modal-title" data-i18n="tools:markdownToPdf.settingsTitle">Markdown Settings</h2>
|
||||
<button class="md-editor-modal-close" id="mdCloseSettings">
|
||||
<i data-lucide="x" width="20" height="20"></i>
|
||||
</button>
|
||||
</div>
|
||||
<div class="md-editor-settings-group">
|
||||
<h3 data-i18n="tools:markdownToPdf.settingsPreset">Preset</h3>
|
||||
<select id="mdPreset">
|
||||
<option value="default" selected data-i18n="tools:markdownToPdf.presetDefault">Default (GFM-like)</option>
|
||||
<option value="commonmark" data-i18n="tools:markdownToPdf.presetCommonmark">CommonMark (strict)</option>
|
||||
<option value="zero" data-i18n="tools:markdownToPdf.presetZero">Minimal (no features)</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="md-editor-settings-group">
|
||||
<h3 data-i18n="tools:markdownToPdf.settingsOptions">Markdown Options</h3>
|
||||
<label class="md-editor-checkbox">
|
||||
<input type="checkbox" id="mdOptHtml" ${this.mdOptions.html ? 'checked' : ''} />
|
||||
<span data-i18n="tools:markdownToPdf.optAllowHtml">Allow HTML tags</span>
|
||||
</label>
|
||||
<label class="md-editor-checkbox">
|
||||
<input type="checkbox" id="mdOptBreaks" ${this.mdOptions.breaks ? 'checked' : ''} />
|
||||
<span data-i18n="tools:markdownToPdf.optBreaks">Convert newlines to <br></span>
|
||||
</label>
|
||||
<label class="md-editor-checkbox">
|
||||
<input type="checkbox" id="mdOptLinkify" ${this.mdOptions.linkify ? 'checked' : ''} />
|
||||
<span data-i18n="tools:markdownToPdf.optLinkify">Auto-convert URLs to links</span>
|
||||
</label>
|
||||
<label class="md-editor-checkbox">
|
||||
<input type="checkbox" id="mdOptTypographer" ${this.mdOptions.typographer ? 'checked' : ''} />
|
||||
<span data-i18n="tools:markdownToPdf.optTypographer">Typographer (smart quotes, etc.)</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
this.editor = document.getElementById('mdTextarea') as HTMLTextAreaElement;
|
||||
this.preview = document.getElementById('mdPreview') as HTMLElement;
|
||||
|
||||
this.setupEventListeners();
|
||||
this.applyI18n();
|
||||
|
||||
// Initialize Lucide icons
|
||||
if (typeof (window as any).lucide !== 'undefined') {
|
||||
(window as any).lucide.createIcons();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private setupEventListeners(): void {
|
||||
// Editor input
|
||||
this.editor?.addEventListener('input', () => {
|
||||
this.updatePreview();
|
||||
});
|
||||
|
||||
// Sync scroll
|
||||
const syncScrollBtn = document.getElementById('mdSyncScroll');
|
||||
syncScrollBtn?.addEventListener('click', () => {
|
||||
this.syncScroll = !this.syncScroll;
|
||||
syncScrollBtn.classList.toggle('md-editor-btn-primary');
|
||||
syncScrollBtn.classList.toggle('md-editor-btn-secondary');
|
||||
});
|
||||
|
||||
// Editor scroll sync
|
||||
this.editor?.addEventListener('scroll', () => {
|
||||
if (this.syncScroll && !this.isSyncing && this.editor && this.preview) {
|
||||
this.isSyncing = true;
|
||||
const scrollPercentage = this.editor.scrollTop / (this.editor.scrollHeight - this.editor.clientHeight);
|
||||
this.preview.scrollTop = scrollPercentage * (this.preview.scrollHeight - this.preview.clientHeight);
|
||||
setTimeout(() => this.isSyncing = false, 10);
|
||||
}
|
||||
});
|
||||
|
||||
// Preview scroll sync (bidirectional)
|
||||
this.preview?.addEventListener('scroll', () => {
|
||||
if (this.syncScroll && !this.isSyncing && this.editor && this.preview) {
|
||||
this.isSyncing = true;
|
||||
const scrollPercentage = this.preview.scrollTop / (this.preview.scrollHeight - this.preview.clientHeight);
|
||||
this.editor.scrollTop = scrollPercentage * (this.editor.scrollHeight - this.editor.clientHeight);
|
||||
setTimeout(() => this.isSyncing = false, 10);
|
||||
}
|
||||
});
|
||||
|
||||
// Theme toggle
|
||||
const themeToggle = document.getElementById('themeToggle');
|
||||
const editorContainer = document.querySelector('.md-editor');
|
||||
themeToggle?.addEventListener('click', () => {
|
||||
editorContainer?.classList.toggle('light-mode');
|
||||
themeToggle.classList.toggle('active');
|
||||
});
|
||||
|
||||
// Settings modal open
|
||||
document.getElementById('mdSettings')?.addEventListener('click', () => {
|
||||
const modal = document.getElementById('mdSettingsModal');
|
||||
if (modal) {
|
||||
modal.style.display = 'flex';
|
||||
}
|
||||
});
|
||||
|
||||
// Settings modal close
|
||||
document.getElementById('mdCloseSettings')?.addEventListener('click', () => {
|
||||
const modal = document.getElementById('mdSettingsModal');
|
||||
if (modal) {
|
||||
modal.style.display = 'none';
|
||||
}
|
||||
});
|
||||
|
||||
// Close modal on overlay click
|
||||
document.getElementById('mdSettingsModal')?.addEventListener('click', (e) => {
|
||||
if ((e.target as HTMLElement).classList.contains('md-editor-modal-overlay')) {
|
||||
const modal = document.getElementById('mdSettingsModal');
|
||||
if (modal) {
|
||||
modal.style.display = 'none';
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Settings checkboxes
|
||||
document.getElementById('mdOptHtml')?.addEventListener('change', (e) => {
|
||||
this.mdOptions.html = (e.target as HTMLInputElement).checked;
|
||||
this.updateMarkdownIt();
|
||||
});
|
||||
|
||||
document.getElementById('mdOptBreaks')?.addEventListener('change', (e) => {
|
||||
this.mdOptions.breaks = (e.target as HTMLInputElement).checked;
|
||||
this.updateMarkdownIt();
|
||||
});
|
||||
|
||||
document.getElementById('mdOptLinkify')?.addEventListener('change', (e) => {
|
||||
this.mdOptions.linkify = (e.target as HTMLInputElement).checked;
|
||||
this.updateMarkdownIt();
|
||||
});
|
||||
|
||||
document.getElementById('mdOptTypographer')?.addEventListener('change', (e) => {
|
||||
this.mdOptions.typographer = (e.target as HTMLInputElement).checked;
|
||||
this.updateMarkdownIt();
|
||||
});
|
||||
|
||||
// Preset selector
|
||||
document.getElementById('mdPreset')?.addEventListener('change', (e) => {
|
||||
const preset = (e.target as HTMLSelectElement).value;
|
||||
this.applyPreset(preset as 'default' | 'commonmark' | 'zero');
|
||||
});
|
||||
|
||||
// Upload button
|
||||
document.getElementById('mdUpload')?.addEventListener('click', () => {
|
||||
document.getElementById('mdFileInput')?.click();
|
||||
});
|
||||
|
||||
// File input change
|
||||
document.getElementById('mdFileInput')?.addEventListener('change', (e) => {
|
||||
const file = (e.target as HTMLInputElement).files?.[0];
|
||||
if (file) {
|
||||
this.loadFile(file);
|
||||
}
|
||||
});
|
||||
|
||||
// Export PDF
|
||||
document.getElementById('mdExport')?.addEventListener('click', () => {
|
||||
this.exportPdf();
|
||||
});
|
||||
|
||||
// Keyboard shortcuts
|
||||
this.editor?.addEventListener('keydown', (e) => {
|
||||
// Ctrl/Cmd + S to export
|
||||
if ((e.ctrlKey || e.metaKey) && e.key === 's') {
|
||||
e.preventDefault();
|
||||
this.exportPdf();
|
||||
}
|
||||
// Tab key for indentation
|
||||
if (e.key === 'Tab') {
|
||||
e.preventDefault();
|
||||
const start = this.editor!.selectionStart;
|
||||
const end = this.editor!.selectionEnd;
|
||||
const value = this.editor!.value;
|
||||
this.editor!.value = value.substring(0, start) + ' ' + value.substring(end);
|
||||
this.editor!.selectionStart = this.editor!.selectionEnd = start + 2;
|
||||
this.updatePreview();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private currentPreset: 'default' | 'commonmark' | 'zero' = 'default';
|
||||
|
||||
private applyPreset(preset: 'default' | 'commonmark' | 'zero'): void {
|
||||
this.currentPreset = preset;
|
||||
|
||||
// Update options based on preset
|
||||
if (preset === 'commonmark') {
|
||||
this.mdOptions = { html: false, breaks: false, linkify: false, typographer: false };
|
||||
} else if (preset === 'zero') {
|
||||
this.mdOptions = { html: false, breaks: false, linkify: false, typographer: false };
|
||||
} else {
|
||||
this.mdOptions = { html: true, breaks: false, linkify: true, typographer: true };
|
||||
}
|
||||
|
||||
// Update UI checkboxes
|
||||
(document.getElementById('mdOptHtml') as HTMLInputElement).checked = this.mdOptions.html;
|
||||
(document.getElementById('mdOptBreaks') as HTMLInputElement).checked = this.mdOptions.breaks;
|
||||
(document.getElementById('mdOptLinkify') as HTMLInputElement).checked = this.mdOptions.linkify;
|
||||
(document.getElementById('mdOptTypographer') as HTMLInputElement).checked = this.mdOptions.typographer;
|
||||
|
||||
this.updateMarkdownIt();
|
||||
}
|
||||
|
||||
private async loadFile(file: File): Promise<void> {
|
||||
try {
|
||||
const text = await file.text();
|
||||
this.setContent(text);
|
||||
} catch (error) {
|
||||
console.error('Failed to load file:', error);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private createMarkdownIt(): MarkdownIt {
|
||||
// Use preset if commonmark or zero
|
||||
let md: MarkdownIt;
|
||||
if (this.currentPreset === 'commonmark') {
|
||||
md = new MarkdownIt('commonmark');
|
||||
} else if (this.currentPreset === 'zero') {
|
||||
md = new MarkdownIt('zero');
|
||||
// Enable basic features for zero preset
|
||||
md.enable(['paragraph', 'newline', 'text']);
|
||||
} else {
|
||||
md = new MarkdownIt({
|
||||
...this.mdOptions,
|
||||
highlight: (str: string, lang: string) => {
|
||||
if (lang && hljs.getLanguage(lang)) {
|
||||
try {
|
||||
return hljs.highlight(str, { language: lang, ignoreIllegals: true }).value;
|
||||
} catch {
|
||||
// Fall through to default
|
||||
}
|
||||
}
|
||||
return ''; // Use external default escaping
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Apply plugins only for default preset (plugins may not work well with commonmark/zero)
|
||||
if (this.currentPreset === 'default') {
|
||||
md.use(sub) // Subscript: ~text~ -> <sub>text</sub>
|
||||
.use(sup) // Superscript: ^text^ -> <sup>text</sup>
|
||||
.use(footnote) // Footnotes: [^1] and [^1]: footnote text
|
||||
.use(deflist) // Definition lists
|
||||
.use(abbr) // Abbreviations: *[abbr]: full text
|
||||
.use(emoji) // Emoji: :smile: -> 😄
|
||||
.use(ins) // Inserted text: ++text++ -> <ins>text</ins>
|
||||
.use(mark) // Marked text: ==text== -> <mark>text</mark>
|
||||
.use(taskLists, { enabled: true, label: true, labelAfter: true }) // Task lists: - [x] done
|
||||
.use(anchor, { permalink: false }) // Header anchors
|
||||
.use(tocDoneRight); // Table of contents: ${toc}
|
||||
}
|
||||
|
||||
return md;
|
||||
}
|
||||
|
||||
private updateMarkdownIt(): void {
|
||||
this.md = this.createMarkdownIt();
|
||||
this.configureLinkRenderer();
|
||||
this.updatePreview();
|
||||
}
|
||||
|
||||
private updatePreview(): void {
|
||||
if (!this.editor || !this.preview) return;
|
||||
|
||||
const markdown = this.editor.value;
|
||||
const html = this.md.render(markdown);
|
||||
this.preview.innerHTML = html;
|
||||
this.renderMermaidDiagrams();
|
||||
}
|
||||
|
||||
private async renderMermaidDiagrams(): Promise<void> {
|
||||
if (!this.preview) return;
|
||||
|
||||
const mermaidBlocks = this.preview.querySelectorAll('pre > code.language-mermaid');
|
||||
|
||||
for (let i = 0; i < mermaidBlocks.length; i++) {
|
||||
const block = mermaidBlocks[i] as HTMLElement;
|
||||
const code = block.textContent || '';
|
||||
const pre = block.parentElement;
|
||||
|
||||
if (pre && code.trim()) {
|
||||
try {
|
||||
const id = `mermaid-diagram-${i}-${Date.now()}`;
|
||||
const { svg } = await mermaid.render(id, code.trim());
|
||||
|
||||
const wrapper = document.createElement('div');
|
||||
wrapper.className = 'mermaid-diagram';
|
||||
wrapper.innerHTML = svg;
|
||||
|
||||
pre.replaceWith(wrapper);
|
||||
} catch (error) {
|
||||
console.error('Mermaid rendering error:', error);
|
||||
const errorDiv = document.createElement('div');
|
||||
errorDiv.className = 'mermaid-error';
|
||||
errorDiv.textContent = `Mermaid Error: ${(error as Error).message}`;
|
||||
pre.replaceWith(errorDiv);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public setContent(content: string): void {
|
||||
if (this.editor) {
|
||||
this.editor.value = content;
|
||||
this.updatePreview();
|
||||
}
|
||||
}
|
||||
|
||||
public getContent(): string {
|
||||
return this.editor?.value || '';
|
||||
}
|
||||
|
||||
public getHtml(): string {
|
||||
return this.md.render(this.getContent());
|
||||
}
|
||||
|
||||
private exportPdf(): void {
|
||||
// Use browser's native print functionality
|
||||
window.print();
|
||||
}
|
||||
|
||||
private getStyledHtml(): string {
|
||||
const content = this.getHtml();
|
||||
|
||||
return `<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<style>
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
||||
font-size: 14px;
|
||||
line-height: 1.6;
|
||||
color: #333;
|
||||
max-width: 800px;
|
||||
margin: 0 auto;
|
||||
padding: 40px 20px;
|
||||
}
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
margin-top: 1.5em;
|
||||
margin-bottom: 0.5em;
|
||||
font-weight: 600;
|
||||
line-height: 1.25;
|
||||
}
|
||||
h1 { font-size: 2em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
|
||||
h2 { font-size: 1.5em; border-bottom: 1px solid #eee; padding-bottom: 0.3em; }
|
||||
h3 { font-size: 1.25em; }
|
||||
h4 { font-size: 1em; }
|
||||
p { margin: 1em 0; }
|
||||
a { color: #0366d6; text-decoration: none; }
|
||||
a:hover { text-decoration: underline; }
|
||||
code {
|
||||
font-family: 'SFMono-Regular', Consolas, 'Liberation Mono', Menlo, monospace;
|
||||
font-size: 0.9em;
|
||||
background: #f6f8fa;
|
||||
padding: 0.2em 0.4em;
|
||||
border-radius: 3px;
|
||||
}
|
||||
pre {
|
||||
background: #f6f8fa;
|
||||
padding: 16px;
|
||||
overflow: auto;
|
||||
border-radius: 6px;
|
||||
line-height: 1.45;
|
||||
}
|
||||
pre code {
|
||||
background: none;
|
||||
padding: 0;
|
||||
}
|
||||
blockquote {
|
||||
margin: 1em 0;
|
||||
padding: 0 1em;
|
||||
color: #6a737d;
|
||||
border-left: 4px solid #dfe2e5;
|
||||
}
|
||||
ul, ol {
|
||||
margin: 1em 0;
|
||||
padding-left: 2em;
|
||||
}
|
||||
li { margin: 0.25em 0; }
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
width: 100%;
|
||||
margin: 1em 0;
|
||||
}
|
||||
th, td {
|
||||
border: 1px solid #dfe2e5;
|
||||
padding: 8px 12px;
|
||||
text-align: left;
|
||||
}
|
||||
th {
|
||||
background: #f6f8fa;
|
||||
font-weight: 600;
|
||||
}
|
||||
tr:nth-child(even) { background: #f6f8fa; }
|
||||
hr {
|
||||
border: none;
|
||||
border-top: 1px solid #eee;
|
||||
margin: 2em 0;
|
||||
}
|
||||
img {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
/* Syntax highlighting - GitHub style */
|
||||
.hljs {
|
||||
color: #24292e;
|
||||
background: #f6f8fa;
|
||||
}
|
||||
.hljs-comment,
|
||||
.hljs-quote {
|
||||
color: #6a737d;
|
||||
font-style: italic;
|
||||
}
|
||||
.hljs-keyword,
|
||||
.hljs-selector-tag,
|
||||
.hljs-subst {
|
||||
color: #d73a49;
|
||||
}
|
||||
.hljs-number,
|
||||
.hljs-literal,
|
||||
.hljs-variable,
|
||||
.hljs-template-variable,
|
||||
.hljs-tag .hljs-attr {
|
||||
color: #005cc5;
|
||||
}
|
||||
.hljs-string,
|
||||
.hljs-doctag {
|
||||
color: #032f62;
|
||||
}
|
||||
.hljs-title,
|
||||
.hljs-section,
|
||||
.hljs-selector-id {
|
||||
color: #6f42c1;
|
||||
font-weight: bold;
|
||||
}
|
||||
.hljs-type,
|
||||
.hljs-class .hljs-title {
|
||||
color: #6f42c1;
|
||||
}
|
||||
.hljs-tag,
|
||||
.hljs-name,
|
||||
.hljs-attribute {
|
||||
color: #22863a;
|
||||
}
|
||||
.hljs-regexp,
|
||||
.hljs-link {
|
||||
color: #032f62;
|
||||
}
|
||||
.hljs-symbol,
|
||||
.hljs-bullet {
|
||||
color: #e36209;
|
||||
}
|
||||
.hljs-built_in,
|
||||
.hljs-builtin-name {
|
||||
color: #005cc5;
|
||||
}
|
||||
.hljs-meta {
|
||||
color: #6a737d;
|
||||
font-weight: bold;
|
||||
}
|
||||
.hljs-deletion {
|
||||
color: #b31d28;
|
||||
background-color: #ffeef0;
|
||||
}
|
||||
.hljs-addition {
|
||||
color: #22863a;
|
||||
background-color: #f0fff4;
|
||||
}
|
||||
/* Plugin styles */
|
||||
mark {
|
||||
background-color: #fff3cd;
|
||||
padding: 0.1em 0.2em;
|
||||
border-radius: 2px;
|
||||
}
|
||||
ins {
|
||||
text-decoration: none;
|
||||
background-color: #d4edda;
|
||||
padding: 0.1em 0.2em;
|
||||
border-radius: 2px;
|
||||
}
|
||||
sub, sup {
|
||||
font-size: 0.75em;
|
||||
}
|
||||
.task-list-item {
|
||||
list-style-type: none;
|
||||
margin-left: -1.5em;
|
||||
}
|
||||
.task-list-item input[type="checkbox"] {
|
||||
margin-right: 0.5em;
|
||||
}
|
||||
.footnotes {
|
||||
margin-top: 2em;
|
||||
padding-top: 1em;
|
||||
border-top: 1px solid #eee;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
.footnotes-sep {
|
||||
display: none;
|
||||
}
|
||||
.footnote-ref {
|
||||
font-size: 0.75em;
|
||||
vertical-align: super;
|
||||
}
|
||||
.footnote-backref {
|
||||
font-size: 0.75em;
|
||||
margin-left: 0.25em;
|
||||
}
|
||||
dl {
|
||||
margin: 1em 0;
|
||||
}
|
||||
dt {
|
||||
font-weight: 600;
|
||||
margin-top: 1em;
|
||||
}
|
||||
dd {
|
||||
margin-left: 2em;
|
||||
margin-top: 0.25em;
|
||||
color: #6a737d;
|
||||
}
|
||||
abbr {
|
||||
text-decoration: underline dotted;
|
||||
cursor: help;
|
||||
}
|
||||
.table-of-contents {
|
||||
background: #f6f8fa;
|
||||
padding: 1em 1.5em;
|
||||
border-radius: 6px;
|
||||
margin: 1em 0;
|
||||
}
|
||||
.table-of-contents ul {
|
||||
margin: 0;
|
||||
padding-left: 1.5em;
|
||||
}
|
||||
.table-of-contents li {
|
||||
margin: 0.25em 0;
|
||||
}
|
||||
/* Mermaid diagrams */
|
||||
.mermaid-diagram {
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
margin: 1.5em 0;
|
||||
padding: 1em;
|
||||
background: #f6f8fa;
|
||||
border-radius: 6px;
|
||||
}
|
||||
.mermaid-diagram svg {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
.mermaid-error {
|
||||
color: #cb2431;
|
||||
background: #ffeef0;
|
||||
padding: 1em;
|
||||
border-radius: 6px;
|
||||
font-family: monospace;
|
||||
font-size: 0.9em;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
${content}
|
||||
</body>
|
||||
</html>`;
|
||||
}
|
||||
|
||||
private applyI18n(): void {
|
||||
// Apply translations to elements within this component
|
||||
applyTranslations();
|
||||
|
||||
// Special handling for select options (data-i18n on options doesn't work with applyTranslations)
|
||||
const presetSelect = document.getElementById('mdPreset') as HTMLSelectElement;
|
||||
if (presetSelect) {
|
||||
const options = presetSelect.querySelectorAll('option[data-i18n]');
|
||||
options.forEach((option) => {
|
||||
const key = option.getAttribute('data-i18n');
|
||||
if (key) {
|
||||
// Use i18next directly for option text
|
||||
const translated = (window as any).i18next?.t(key);
|
||||
if (translated && translated !== key) {
|
||||
option.textContent = translated;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public destroy(): void {
|
||||
this.container.innerHTML = '';
|
||||
}
|
||||
}
|
||||
304
src/js/utils/ocr.ts
Normal file
304
src/js/utils/ocr.ts
Normal file
@@ -0,0 +1,304 @@
|
||||
import Tesseract from 'tesseract.js';
|
||||
import { PDFDocument, StandardFonts, rgb, PDFFont } from 'pdf-lib';
|
||||
import fontkit from '@pdf-lib/fontkit';
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
import { getFontForLanguage } from './font-loader.js';
|
||||
import { OcrPage, OcrLine } from '@/types';
|
||||
import {
|
||||
parseHocrDocument,
|
||||
calculateWordTransform,
|
||||
calculateSpaceTransform,
|
||||
} from './hocr-transform.js';
|
||||
import { getPDFDocument } from './helpers.js';
|
||||
|
||||
export interface OcrOptions {
|
||||
language: string;
|
||||
resolution: number;
|
||||
binarize: boolean;
|
||||
whitelist: string;
|
||||
onProgress?: (status: string, progress: number) => void;
|
||||
}
|
||||
|
||||
export interface OcrResult {
|
||||
pdfBytes: Uint8Array;
|
||||
pdfDoc: PDFDocument;
|
||||
fullText: string;
|
||||
}
|
||||
|
||||
function binarizeCanvas(ctx: CanvasRenderingContext2D) {
|
||||
const imageData = ctx.getImageData(0, 0, ctx.canvas.width, ctx.canvas.height);
|
||||
const data = imageData.data;
|
||||
for (let i = 0; i < data.length; i += 4) {
|
||||
const brightness =
|
||||
0.299 * data[i] + 0.587 * data[i + 1] + 0.114 * data[i + 2];
|
||||
const color = brightness > 128 ? 255 : 0;
|
||||
data[i] = data[i + 1] = data[i + 2] = color;
|
||||
}
|
||||
ctx.putImageData(imageData, 0, 0);
|
||||
}
|
||||
|
||||
function drawOcrTextLayer(
|
||||
page: ReturnType<typeof PDFDocument.prototype.addPage>,
|
||||
ocrPage: OcrPage,
|
||||
pageHeight: number,
|
||||
primaryFont: PDFFont,
|
||||
latinFont: PDFFont
|
||||
): void {
|
||||
ocrPage.lines.forEach(function (line: OcrLine) {
|
||||
const words = line.words;
|
||||
|
||||
for (let i = 0; i < words.length; i++) {
|
||||
const word = words[i];
|
||||
const text = word.text.replace(
|
||||
/[\u0000-\u001F\u007F-\u009F\u200E\u200F\u202A-\u202E\uFEFF]/g,
|
||||
''
|
||||
);
|
||||
|
||||
if (!text.trim()) continue;
|
||||
|
||||
const hasNonLatin = /[^\u0000-\u007F]/.test(text);
|
||||
const font = hasNonLatin ? primaryFont : latinFont;
|
||||
|
||||
if (!font) {
|
||||
console.warn('Font not available for text: "' + text + '"');
|
||||
continue;
|
||||
}
|
||||
|
||||
const transform = calculateWordTransform(
|
||||
word,
|
||||
line,
|
||||
pageHeight,
|
||||
(txt: string, size: number) => {
|
||||
try {
|
||||
return font.widthOfTextAtSize(txt, size);
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if (transform.fontSize <= 0) continue;
|
||||
|
||||
try {
|
||||
page.drawText(text, {
|
||||
x: transform.x,
|
||||
y: transform.y,
|
||||
font,
|
||||
size: transform.fontSize,
|
||||
color: rgb(0, 0, 0),
|
||||
opacity: 0,
|
||||
});
|
||||
} catch (error) {
|
||||
console.warn(`Could not draw text "${text}":`, error);
|
||||
}
|
||||
|
||||
if (line.injectWordBreaks && i < words.length - 1) {
|
||||
const nextWord = words[i + 1];
|
||||
const spaceTransform = calculateSpaceTransform(
|
||||
word,
|
||||
nextWord,
|
||||
line,
|
||||
pageHeight,
|
||||
(size: number) => {
|
||||
try {
|
||||
return font.widthOfTextAtSize(' ', size);
|
||||
} catch {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
if (spaceTransform && spaceTransform.horizontalScale > 0.1) {
|
||||
try {
|
||||
page.drawText(' ', {
|
||||
x: spaceTransform.x,
|
||||
y: spaceTransform.y,
|
||||
font,
|
||||
size: spaceTransform.fontSize,
|
||||
color: rgb(0, 0, 0),
|
||||
opacity: 0,
|
||||
});
|
||||
} catch {
|
||||
console.warn(`Could not draw space between words`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export async function performOcr(
|
||||
pdfBytes: Uint8Array | ArrayBuffer,
|
||||
options: OcrOptions
|
||||
): Promise<OcrResult> {
|
||||
const { language, resolution, binarize, whitelist, onProgress } = options;
|
||||
const progress = onProgress || (() => {});
|
||||
|
||||
const worker = await Tesseract.createWorker(language, 1, {
|
||||
logger: function (m: { status: string; progress: number }) {
|
||||
progress(m.status, m.progress || 0);
|
||||
},
|
||||
});
|
||||
|
||||
await worker.setParameters({
|
||||
tessjs_create_hocr: '1',
|
||||
tessedit_pageseg_mode: Tesseract.PSM.AUTO,
|
||||
});
|
||||
|
||||
if (whitelist) {
|
||||
await worker.setParameters({
|
||||
tessedit_char_whitelist: whitelist,
|
||||
});
|
||||
}
|
||||
|
||||
const pdf = await getPDFDocument({ data: pdfBytes }).promise;
|
||||
const newPdfDoc = await PDFDocument.create();
|
||||
|
||||
newPdfDoc.registerFontkit(fontkit);
|
||||
|
||||
progress('Loading fonts...', 0);
|
||||
|
||||
const selectedLangs = language.split('+');
|
||||
const cjkLangs = ['jpn', 'chi_sim', 'chi_tra', 'kor'];
|
||||
const indicLangs = [
|
||||
'hin',
|
||||
'ben',
|
||||
'guj',
|
||||
'kan',
|
||||
'mal',
|
||||
'ori',
|
||||
'pan',
|
||||
'tam',
|
||||
'tel',
|
||||
'sin',
|
||||
];
|
||||
const priorityLangs = [...cjkLangs, ...indicLangs, 'ara', 'rus', 'ukr'];
|
||||
|
||||
const primaryLang =
|
||||
selectedLangs.find((l) => priorityLangs.includes(l)) ||
|
||||
selectedLangs[0] ||
|
||||
'eng';
|
||||
|
||||
const hasCJK = selectedLangs.some((l) => cjkLangs.includes(l));
|
||||
const hasIndic = selectedLangs.some((l) => indicLangs.includes(l));
|
||||
const hasLatin =
|
||||
selectedLangs.some((l) => !priorityLangs.includes(l)) ||
|
||||
selectedLangs.includes('eng');
|
||||
const isIndicPlusLatin = hasIndic && hasLatin && !hasCJK;
|
||||
|
||||
let primaryFont: PDFFont;
|
||||
let latinFont: PDFFont;
|
||||
|
||||
try {
|
||||
if (isIndicPlusLatin) {
|
||||
const [scriptFontBytes, latinFontBytes] = await Promise.all([
|
||||
getFontForLanguage(primaryLang),
|
||||
getFontForLanguage('eng'),
|
||||
]);
|
||||
primaryFont = await newPdfDoc.embedFont(scriptFontBytes, {
|
||||
subset: false,
|
||||
});
|
||||
latinFont = await newPdfDoc.embedFont(latinFontBytes, {
|
||||
subset: false,
|
||||
});
|
||||
} else {
|
||||
const fontBytes = await getFontForLanguage(primaryLang);
|
||||
primaryFont = await newPdfDoc.embedFont(fontBytes, { subset: false });
|
||||
latinFont = primaryFont;
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Font loading failed, falling back to Helvetica', e);
|
||||
primaryFont = await newPdfDoc.embedFont(StandardFonts.Helvetica);
|
||||
latinFont = primaryFont;
|
||||
}
|
||||
|
||||
let fullText = '';
|
||||
|
||||
try {
|
||||
for (let i = 1; i <= pdf.numPages; i++) {
|
||||
progress(
|
||||
`Processing page ${i} of ${pdf.numPages}`,
|
||||
(i - 1) / pdf.numPages
|
||||
);
|
||||
|
||||
const page = await pdf.getPage(i);
|
||||
const viewport = page.getViewport({ scale: resolution });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) throw new Error('Failed to create canvas context');
|
||||
|
||||
await page.render({ canvasContext: context, viewport, canvas }).promise;
|
||||
|
||||
if (binarize) {
|
||||
binarizeCanvas(context);
|
||||
}
|
||||
|
||||
const result = await worker.recognize(
|
||||
canvas,
|
||||
{},
|
||||
{ text: true, hocr: true }
|
||||
);
|
||||
const data = result.data;
|
||||
|
||||
const newPage = newPdfDoc.addPage([viewport.width, viewport.height]);
|
||||
|
||||
const pngImageBytes = await new Promise<Uint8Array>(function (
|
||||
resolve,
|
||||
reject
|
||||
) {
|
||||
canvas.toBlob(function (blob) {
|
||||
if (!blob) {
|
||||
reject(new Error('Failed to create image blob'));
|
||||
return;
|
||||
}
|
||||
const reader = new FileReader();
|
||||
reader.onload = function () {
|
||||
resolve(new Uint8Array(reader.result as ArrayBuffer));
|
||||
};
|
||||
reader.onerror = function () {
|
||||
reject(new Error('Failed to read image data'));
|
||||
};
|
||||
reader.readAsArrayBuffer(blob);
|
||||
}, 'image/png');
|
||||
});
|
||||
|
||||
// Release canvas memory
|
||||
canvas.width = 0;
|
||||
canvas.height = 0;
|
||||
|
||||
const pngImage = await newPdfDoc.embedPng(pngImageBytes);
|
||||
newPage.drawImage(pngImage, {
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: viewport.width,
|
||||
height: viewport.height,
|
||||
});
|
||||
|
||||
if (data.hocr) {
|
||||
const ocrPage = parseHocrDocument(data.hocr);
|
||||
drawOcrTextLayer(
|
||||
newPage,
|
||||
ocrPage,
|
||||
viewport.height,
|
||||
primaryFont,
|
||||
latinFont
|
||||
);
|
||||
}
|
||||
|
||||
fullText += data.text + '\n\n';
|
||||
}
|
||||
} finally {
|
||||
await worker.terminate();
|
||||
}
|
||||
|
||||
const savedBytes = await newPdfDoc.save();
|
||||
|
||||
return {
|
||||
pdfBytes: new Uint8Array(savedBytes),
|
||||
pdfDoc: newPdfDoc,
|
||||
fullText,
|
||||
};
|
||||
}
|
||||
215
src/js/utils/page-preview.ts
Normal file
215
src/js/utils/page-preview.ts
Normal file
@@ -0,0 +1,215 @@
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
import type { PDFDocumentProxy } from 'pdfjs-dist';
|
||||
import { PreviewState } from '@/types';
|
||||
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
|
||||
'pdfjs-dist/build/pdf.worker.min.mjs',
|
||||
import.meta.url
|
||||
).toString();
|
||||
|
||||
const state: PreviewState = {
|
||||
modal: null,
|
||||
pdfjsDoc: null,
|
||||
currentPage: 1,
|
||||
totalPages: 0,
|
||||
isOpen: false,
|
||||
container: null,
|
||||
};
|
||||
|
||||
function getOrCreateModal(): HTMLElement {
|
||||
if (state.modal) return state.modal;
|
||||
|
||||
const modal = document.createElement('div');
|
||||
modal.id = 'page-preview-modal';
|
||||
modal.className =
|
||||
'fixed inset-0 bg-black/80 backdrop-blur-sm z-[60] flex items-center justify-center opacity-0 pointer-events-none transition-opacity duration-200';
|
||||
modal.innerHTML = `
|
||||
<button id="preview-close" class="absolute top-4 right-4 text-white/70 hover:text-white z-10 transition-colors" title="Close (Esc)">
|
||||
<svg class="w-8 h-8" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M6 18L18 6M6 6l12 12"/></svg>
|
||||
</button>
|
||||
<button id="preview-prev" class="absolute left-4 top-1/2 -translate-y-1/2 text-white/50 hover:text-white transition-colors p-2" title="Previous page">
|
||||
<svg class="w-10 h-10" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M15 19l-7-7 7-7"/></svg>
|
||||
</button>
|
||||
<button id="preview-next" class="absolute right-4 top-1/2 -translate-y-1/2 text-white/50 hover:text-white transition-colors p-2" title="Next page">
|
||||
<svg class="w-10 h-10" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M9 5l7 7-7 7"/></svg>
|
||||
</button>
|
||||
<div id="preview-canvas-container" class="flex items-center justify-center max-w-[90vw] max-h-[85vh]">
|
||||
<div id="preview-loading" class="text-white/60 text-sm">Loading...</div>
|
||||
</div>
|
||||
<div id="preview-page-info" class="absolute bottom-6 left-1/2 -translate-x-1/2 bg-gray-900/80 text-white text-sm px-4 py-2 rounded-full backdrop-blur-sm"></div>
|
||||
`;
|
||||
|
||||
modal.addEventListener('click', (e) => {
|
||||
if (e.target === modal) hidePreview();
|
||||
});
|
||||
modal.querySelector('#preview-close')!.addEventListener('click', hidePreview);
|
||||
modal
|
||||
.querySelector('#preview-prev')!
|
||||
.addEventListener('click', () => navigatePage(-1));
|
||||
modal
|
||||
.querySelector('#preview-next')!
|
||||
.addEventListener('click', () => navigatePage(1));
|
||||
|
||||
document.body.appendChild(modal);
|
||||
state.modal = modal;
|
||||
return modal;
|
||||
}
|
||||
|
||||
async function renderPreviewPage(pageNumber: number): Promise<void> {
|
||||
if (!state.pdfjsDoc) return;
|
||||
|
||||
const modal = getOrCreateModal();
|
||||
const container = modal.querySelector(
|
||||
'#preview-canvas-container'
|
||||
) as HTMLElement;
|
||||
const pageInfo = modal.querySelector('#preview-page-info') as HTMLElement;
|
||||
const prevBtn = modal.querySelector('#preview-prev') as HTMLElement;
|
||||
const nextBtn = modal.querySelector('#preview-next') as HTMLElement;
|
||||
|
||||
container.innerHTML = '<div class="text-white/60 text-sm">Loading...</div>';
|
||||
|
||||
pageInfo.textContent = `Page ${pageNumber} of ${state.totalPages}`;
|
||||
prevBtn.style.visibility = pageNumber > 1 ? 'visible' : 'hidden';
|
||||
nextBtn.style.visibility =
|
||||
pageNumber < state.totalPages ? 'visible' : 'hidden';
|
||||
|
||||
try {
|
||||
const page = await state.pdfjsDoc.getPage(pageNumber);
|
||||
const scale = 2.0;
|
||||
const viewport = page.getViewport({ scale });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
canvas.className =
|
||||
'max-w-[90vw] max-h-[85vh] object-contain rounded-lg shadow-2xl';
|
||||
canvas.style.width = 'auto';
|
||||
canvas.style.height = 'auto';
|
||||
canvas.style.maxWidth = '90vw';
|
||||
canvas.style.maxHeight = '85vh';
|
||||
|
||||
const ctx = canvas.getContext('2d')!;
|
||||
await page.render({ canvasContext: ctx, viewport, canvas }).promise;
|
||||
|
||||
container.innerHTML = '';
|
||||
container.appendChild(canvas);
|
||||
state.currentPage = pageNumber;
|
||||
} catch (err) {
|
||||
console.error('Preview render error:', err);
|
||||
container.innerHTML =
|
||||
'<div class="text-red-400 text-sm">Failed to render page</div>';
|
||||
}
|
||||
}
|
||||
|
||||
function navigatePage(delta: number): void {
|
||||
const newPage = state.currentPage + delta;
|
||||
if (newPage >= 1 && newPage <= state.totalPages) {
|
||||
renderPreviewPage(newPage);
|
||||
}
|
||||
}
|
||||
|
||||
export function showPreview(
|
||||
pdfjsDoc: PDFDocumentProxy,
|
||||
pageNumber: number,
|
||||
totalPages: number
|
||||
): void {
|
||||
state.pdfjsDoc = pdfjsDoc;
|
||||
state.totalPages = totalPages;
|
||||
state.isOpen = true;
|
||||
|
||||
const modal = getOrCreateModal();
|
||||
modal.classList.remove('opacity-0', 'pointer-events-none');
|
||||
document.body.style.overflow = 'hidden';
|
||||
|
||||
renderPreviewPage(pageNumber);
|
||||
}
|
||||
|
||||
export function hidePreview(): void {
|
||||
if (!state.modal) return;
|
||||
state.isOpen = false;
|
||||
state.modal.classList.add('opacity-0', 'pointer-events-none');
|
||||
document.body.style.overflow = '';
|
||||
}
|
||||
|
||||
function handleKeydown(e: KeyboardEvent): void {
|
||||
if (!state.isOpen) return;
|
||||
|
||||
switch (e.key) {
|
||||
case 'Escape':
|
||||
hidePreview();
|
||||
break;
|
||||
case 'ArrowLeft':
|
||||
e.preventDefault();
|
||||
navigatePage(-1);
|
||||
break;
|
||||
case 'ArrowRight':
|
||||
e.preventDefault();
|
||||
navigatePage(1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('keydown', handleKeydown);
|
||||
|
||||
export function initPagePreview(
|
||||
container: HTMLElement,
|
||||
pdfjsDoc: PDFDocumentProxy,
|
||||
options: { pageAttr?: string } = {}
|
||||
): void {
|
||||
const totalPages = pdfjsDoc.numPages;
|
||||
|
||||
const thumbnails = container.querySelectorAll<HTMLElement>(
|
||||
'[data-page-number], [data-page-index], [data-pageIndex]'
|
||||
);
|
||||
|
||||
thumbnails.forEach((thumb) => {
|
||||
if (thumb.dataset.previewInit) return;
|
||||
thumb.dataset.previewInit = 'true';
|
||||
|
||||
let pageNum = 1;
|
||||
if (thumb.dataset.pageNumber) {
|
||||
pageNum = parseInt(thumb.dataset.pageNumber, 10);
|
||||
} else if (thumb.dataset.pageIndex !== undefined) {
|
||||
pageNum = parseInt(thumb.dataset.pageIndex, 10) + 1;
|
||||
}
|
||||
|
||||
const icon = document.createElement('button');
|
||||
icon.className =
|
||||
'page-preview-btn absolute bottom-1 right-1 bg-gray-900/80 hover:bg-indigo-600 text-white/70 hover:text-white rounded-full w-7 h-7 flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity z-10';
|
||||
icon.title = 'Preview';
|
||||
icon.innerHTML =
|
||||
'<svg class="w-4 h-4" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"><path stroke-linecap="round" stroke-linejoin="round" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z"/></svg>';
|
||||
icon.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
e.preventDefault();
|
||||
showPreview(pdfjsDoc, pageNum, totalPages);
|
||||
});
|
||||
|
||||
if (!thumb.classList.contains('relative')) {
|
||||
thumb.classList.add('relative');
|
||||
}
|
||||
if (!thumb.classList.contains('group')) {
|
||||
thumb.classList.add('group');
|
||||
}
|
||||
|
||||
thumb.appendChild(icon);
|
||||
});
|
||||
|
||||
container.addEventListener('keydown', (e) => {
|
||||
if (e.key === ' ' && !state.isOpen) {
|
||||
const hovered = container.querySelector<HTMLElement>(
|
||||
'[data-preview-init]:hover'
|
||||
);
|
||||
if (hovered) {
|
||||
e.preventDefault();
|
||||
let pageNum = 1;
|
||||
if (hovered.dataset.pageNumber) {
|
||||
pageNum = parseInt(hovered.dataset.pageNumber, 10);
|
||||
} else if (hovered.dataset.pageIndex !== undefined) {
|
||||
pageNum = parseInt(hovered.dataset.pageIndex, 10) + 1;
|
||||
}
|
||||
showPreview(pdfjsDoc, pageNum, totalPages);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
524
src/js/utils/pdf-operations.ts
Normal file
524
src/js/utils/pdf-operations.ts
Normal file
@@ -0,0 +1,524 @@
|
||||
import { PDFDocument, degrees, rgb, StandardFonts, PageSizes } from 'pdf-lib';
|
||||
|
||||
export async function mergePdfs(
|
||||
pdfBytesList: Uint8Array[]
|
||||
): Promise<Uint8Array> {
|
||||
const mergedDoc = await PDFDocument.create();
|
||||
for (const bytes of pdfBytesList) {
|
||||
const srcDoc = await PDFDocument.load(bytes);
|
||||
const copiedPages = await mergedDoc.copyPages(
|
||||
srcDoc,
|
||||
srcDoc.getPageIndices()
|
||||
);
|
||||
copiedPages.forEach((page) => mergedDoc.addPage(page));
|
||||
}
|
||||
return new Uint8Array(await mergedDoc.save({ addDefaultPage: false }));
|
||||
}
|
||||
|
||||
export async function splitPdf(
|
||||
pdfBytes: Uint8Array,
|
||||
pageIndices: number[]
|
||||
): Promise<Uint8Array> {
|
||||
const srcDoc = await PDFDocument.load(pdfBytes);
|
||||
const newPdf = await PDFDocument.create();
|
||||
const copiedPages = await newPdf.copyPages(srcDoc, pageIndices);
|
||||
copiedPages.forEach((page) => newPdf.addPage(page));
|
||||
return new Uint8Array(await newPdf.save());
|
||||
}
|
||||
|
||||
export async function rotatePdfUniform(
|
||||
pdfBytes: Uint8Array,
|
||||
angle: number
|
||||
): Promise<Uint8Array> {
|
||||
const srcDoc = await PDFDocument.load(pdfBytes);
|
||||
const newPdfDoc = await PDFDocument.create();
|
||||
const pageCount = srcDoc.getPageCount();
|
||||
|
||||
for (let i = 0; i < pageCount; i++) {
|
||||
const originalPage = srcDoc.getPage(i);
|
||||
const currentRotation = originalPage.getRotation().angle;
|
||||
const totalRotation = currentRotation + angle;
|
||||
|
||||
if (totalRotation % 90 === 0) {
|
||||
const [copiedPage] = await newPdfDoc.copyPages(srcDoc, [i]);
|
||||
copiedPage.setRotation(degrees(totalRotation));
|
||||
newPdfDoc.addPage(copiedPage);
|
||||
} else {
|
||||
const embeddedPage = await newPdfDoc.embedPage(originalPage);
|
||||
const { width, height } = embeddedPage.scale(1);
|
||||
const angleRad = (totalRotation * Math.PI) / 180;
|
||||
const absCos = Math.abs(Math.cos(angleRad));
|
||||
const absSin = Math.abs(Math.sin(angleRad));
|
||||
const newWidth = width * absCos + height * absSin;
|
||||
const newHeight = width * absSin + height * absCos;
|
||||
const newPage = newPdfDoc.addPage([newWidth, newHeight]);
|
||||
const x =
|
||||
newWidth / 2 -
|
||||
((width / 2) * Math.cos(angleRad) - (height / 2) * Math.sin(angleRad));
|
||||
const y =
|
||||
newHeight / 2 -
|
||||
((width / 2) * Math.sin(angleRad) + (height / 2) * Math.cos(angleRad));
|
||||
newPage.drawPage(embeddedPage, {
|
||||
x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
rotate: degrees(totalRotation),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return new Uint8Array(await newPdfDoc.save());
|
||||
}
|
||||
|
||||
export async function rotatePdfPages(
|
||||
pdfBytes: Uint8Array,
|
||||
rotations: number[]
|
||||
): Promise<Uint8Array> {
|
||||
const srcDoc = await PDFDocument.load(pdfBytes);
|
||||
const newPdfDoc = await PDFDocument.create();
|
||||
const pageCount = srcDoc.getPageCount();
|
||||
|
||||
for (let i = 0; i < pageCount; i++) {
|
||||
const rotation = rotations[i] || 0;
|
||||
const originalPage = srcDoc.getPage(i);
|
||||
const currentRotation = originalPage.getRotation().angle;
|
||||
const totalRotation = currentRotation + rotation;
|
||||
|
||||
if (totalRotation % 90 === 0) {
|
||||
const [copiedPage] = await newPdfDoc.copyPages(srcDoc, [i]);
|
||||
copiedPage.setRotation(degrees(totalRotation));
|
||||
newPdfDoc.addPage(copiedPage);
|
||||
} else {
|
||||
const embeddedPage = await newPdfDoc.embedPage(originalPage);
|
||||
const { width, height } = embeddedPage.scale(1);
|
||||
const angleRad = (totalRotation * Math.PI) / 180;
|
||||
const absCos = Math.abs(Math.cos(angleRad));
|
||||
const absSin = Math.abs(Math.sin(angleRad));
|
||||
const newWidth = width * absCos + height * absSin;
|
||||
const newHeight = width * absSin + height * absCos;
|
||||
const newPage = newPdfDoc.addPage([newWidth, newHeight]);
|
||||
const x =
|
||||
newWidth / 2 -
|
||||
((width / 2) * Math.cos(angleRad) - (height / 2) * Math.sin(angleRad));
|
||||
const y =
|
||||
newHeight / 2 -
|
||||
((width / 2) * Math.sin(angleRad) + (height / 2) * Math.cos(angleRad));
|
||||
newPage.drawPage(embeddedPage, {
|
||||
x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
rotate: degrees(totalRotation),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return new Uint8Array(await newPdfDoc.save());
|
||||
}
|
||||
|
||||
export async function deletePdfPages(
|
||||
pdfBytes: Uint8Array,
|
||||
pagesToDelete: Set<number>
|
||||
): Promise<Uint8Array> {
|
||||
const srcDoc = await PDFDocument.load(pdfBytes);
|
||||
const totalPages = srcDoc.getPageCount();
|
||||
|
||||
const pagesToKeep: number[] = [];
|
||||
for (let i = 0; i < totalPages; i++) {
|
||||
if (!pagesToDelete.has(i + 1)) {
|
||||
pagesToKeep.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
if (pagesToKeep.length === 0) throw new Error('Cannot delete all pages');
|
||||
|
||||
const newPdf = await PDFDocument.create();
|
||||
const copiedPages = await newPdf.copyPages(srcDoc, pagesToKeep);
|
||||
copiedPages.forEach((page) => newPdf.addPage(page));
|
||||
return new Uint8Array(await newPdf.save());
|
||||
}
|
||||
|
||||
export function parsePageRange(rangeStr: string, totalPages: number): number[] {
|
||||
const indices: Set<number> = new Set();
|
||||
const parts = rangeStr.split(',').map((s) => s.trim());
|
||||
|
||||
for (const part of parts) {
|
||||
if (part.includes('-')) {
|
||||
const [startStr, endStr] = part.split('-');
|
||||
const start = Math.max(1, parseInt(startStr, 10) || 1);
|
||||
const end = Math.min(totalPages, parseInt(endStr, 10) || totalPages);
|
||||
for (let i = start; i <= end; i++) {
|
||||
indices.add(i - 1);
|
||||
}
|
||||
} else {
|
||||
const page = parseInt(part, 10);
|
||||
if (page >= 1 && page <= totalPages) {
|
||||
indices.add(page - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Array.from(indices).sort((a, b) => a - b);
|
||||
}
|
||||
|
||||
export function parseDeletePages(str: string, totalPages: number): Set<number> {
|
||||
const pages = new Set<number>();
|
||||
const parts = str.split(',').map((s) => s.trim());
|
||||
|
||||
for (const part of parts) {
|
||||
if (part.includes('-')) {
|
||||
const [startStr, endStr] = part.split('-');
|
||||
const start = Math.max(1, parseInt(startStr, 10) || 1);
|
||||
const end = Math.min(totalPages, parseInt(endStr, 10) || totalPages);
|
||||
for (let i = start; i <= end; i++) pages.add(i);
|
||||
} else {
|
||||
const page = parseInt(part, 10);
|
||||
if (page >= 1 && page <= totalPages) pages.add(page);
|
||||
}
|
||||
}
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
||||
export interface TextWatermarkOptions {
|
||||
text: string;
|
||||
fontSize: number;
|
||||
color: { r: number; g: number; b: number };
|
||||
opacity: number;
|
||||
angle: number;
|
||||
x?: number;
|
||||
y?: number;
|
||||
pageIndices?: number[];
|
||||
}
|
||||
|
||||
export async function addTextWatermark(
|
||||
pdfBytes: Uint8Array,
|
||||
options: TextWatermarkOptions
|
||||
): Promise<Uint8Array> {
|
||||
const pdfDoc = await PDFDocument.load(pdfBytes);
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
if (!ctx) throw new Error('Failed to create canvas context');
|
||||
|
||||
const dpr = 2;
|
||||
const colorR = Math.round(options.color.r * 255);
|
||||
const colorG = Math.round(options.color.g * 255);
|
||||
const colorB = Math.round(options.color.b * 255);
|
||||
const fontStr = `bold ${options.fontSize * dpr}px "Noto Sans SC", "Noto Sans JP", "Noto Sans KR", "Noto Sans Arabic", Arial, sans-serif`;
|
||||
|
||||
ctx.font = fontStr;
|
||||
const metrics = ctx.measureText(options.text);
|
||||
|
||||
canvas.width = Math.ceil(metrics.width) + 4;
|
||||
canvas.height = Math.ceil(options.fontSize * dpr * 1.4);
|
||||
|
||||
ctx.font = fontStr;
|
||||
ctx.fillStyle = `rgb(${colorR}, ${colorG}, ${colorB})`;
|
||||
ctx.textBaseline = 'middle';
|
||||
ctx.fillText(options.text, 2, canvas.height / 2);
|
||||
|
||||
const blob = await new Promise<Blob>((resolve, reject) => {
|
||||
canvas.toBlob(
|
||||
(b) => (b ? resolve(b) : reject(new Error('Canvas toBlob failed'))),
|
||||
'image/png'
|
||||
);
|
||||
});
|
||||
const imageBytes = new Uint8Array(await blob.arrayBuffer());
|
||||
|
||||
const image = await pdfDoc.embedPng(imageBytes);
|
||||
const pages = pdfDoc.getPages();
|
||||
const posX = options.x ?? 0.5;
|
||||
const posY = options.y ?? 0.5;
|
||||
const imgWidth = image.width / dpr;
|
||||
const imgHeight = image.height / dpr;
|
||||
|
||||
const rad = (options.angle * Math.PI) / 180;
|
||||
const halfW = imgWidth / 2;
|
||||
const halfH = imgHeight / 2;
|
||||
|
||||
const targetIndices = options.pageIndices ?? pages.map((_, i) => i);
|
||||
for (const idx of targetIndices) {
|
||||
const page = pages[idx];
|
||||
if (!page) continue;
|
||||
const { width, height } = page.getSize();
|
||||
const cx = posX * width;
|
||||
const cy = posY * height;
|
||||
|
||||
page.drawImage(image, {
|
||||
x: cx - Math.cos(rad) * halfW + Math.sin(rad) * halfH,
|
||||
y: cy - Math.sin(rad) * halfW - Math.cos(rad) * halfH,
|
||||
width: imgWidth,
|
||||
height: imgHeight,
|
||||
opacity: options.opacity,
|
||||
rotate: degrees(options.angle),
|
||||
});
|
||||
}
|
||||
|
||||
return new Uint8Array(await pdfDoc.save());
|
||||
}
|
||||
|
||||
export interface ImageWatermarkOptions {
|
||||
imageBytes: Uint8Array;
|
||||
imageType: 'png' | 'jpg';
|
||||
opacity: number;
|
||||
angle: number;
|
||||
scale: number;
|
||||
x?: number;
|
||||
y?: number;
|
||||
pageIndices?: number[];
|
||||
}
|
||||
|
||||
export async function addImageWatermark(
|
||||
pdfBytes: Uint8Array,
|
||||
options: ImageWatermarkOptions
|
||||
): Promise<Uint8Array> {
|
||||
const pdfDoc = await PDFDocument.load(pdfBytes);
|
||||
const image =
|
||||
options.imageType === 'png'
|
||||
? await pdfDoc.embedPng(options.imageBytes)
|
||||
: await pdfDoc.embedJpg(options.imageBytes);
|
||||
const pages = pdfDoc.getPages();
|
||||
const posX = options.x ?? 0.5;
|
||||
const posY = options.y ?? 0.5;
|
||||
|
||||
const imgWidth = image.width * options.scale;
|
||||
const imgHeight = image.height * options.scale;
|
||||
const rad = (options.angle * Math.PI) / 180;
|
||||
const halfW = imgWidth / 2;
|
||||
const halfH = imgHeight / 2;
|
||||
|
||||
const targetIndices = options.pageIndices ?? pages.map((_, i) => i);
|
||||
for (const idx of targetIndices) {
|
||||
const page = pages[idx];
|
||||
if (!page) continue;
|
||||
const { width, height } = page.getSize();
|
||||
const cx = posX * width;
|
||||
const cy = posY * height;
|
||||
|
||||
page.drawImage(image, {
|
||||
x: cx - Math.cos(rad) * halfW + Math.sin(rad) * halfH,
|
||||
y: cy - Math.sin(rad) * halfW - Math.cos(rad) * halfH,
|
||||
width: imgWidth,
|
||||
height: imgHeight,
|
||||
opacity: options.opacity,
|
||||
rotate: degrees(options.angle),
|
||||
});
|
||||
}
|
||||
|
||||
return new Uint8Array(await pdfDoc.save());
|
||||
}
|
||||
|
||||
export type PageNumberPosition =
|
||||
| 'bottom-center'
|
||||
| 'bottom-left'
|
||||
| 'bottom-right'
|
||||
| 'top-center'
|
||||
| 'top-left'
|
||||
| 'top-right';
|
||||
export type PageNumberFormat = 'simple' | 'page_x_of_y';
|
||||
|
||||
export interface PageNumberOptions {
|
||||
position: PageNumberPosition;
|
||||
fontSize: number;
|
||||
format: PageNumberFormat;
|
||||
color: { r: number; g: number; b: number };
|
||||
}
|
||||
|
||||
export async function addPageNumbers(
|
||||
pdfBytes: Uint8Array,
|
||||
options: PageNumberOptions
|
||||
): Promise<Uint8Array> {
|
||||
const pdfDoc = await PDFDocument.load(pdfBytes);
|
||||
const helveticaFont = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
||||
const pages = pdfDoc.getPages();
|
||||
const totalPages = pages.length;
|
||||
|
||||
for (let i = 0; i < totalPages; i++) {
|
||||
const page = pages[i];
|
||||
const mediaBox = page.getMediaBox();
|
||||
const cropBox = page.getCropBox();
|
||||
const bounds = cropBox || mediaBox;
|
||||
const width = bounds.width;
|
||||
const height = bounds.height;
|
||||
const xOffset = bounds.x || 0;
|
||||
const yOffset = bounds.y || 0;
|
||||
|
||||
const pageNumText =
|
||||
options.format === 'page_x_of_y'
|
||||
? `${i + 1} / ${totalPages}`
|
||||
: `${i + 1}`;
|
||||
|
||||
const textWidth = helveticaFont.widthOfTextAtSize(
|
||||
pageNumText,
|
||||
options.fontSize
|
||||
);
|
||||
const textHeight = options.fontSize;
|
||||
|
||||
const minMargin = 8;
|
||||
const maxMargin = 40;
|
||||
const marginPercentage = 0.04;
|
||||
|
||||
const horizontalMargin = Math.max(
|
||||
minMargin,
|
||||
Math.min(maxMargin, width * marginPercentage)
|
||||
);
|
||||
const verticalMargin = Math.max(
|
||||
minMargin,
|
||||
Math.min(maxMargin, height * marginPercentage)
|
||||
);
|
||||
|
||||
const safeHorizontalMargin = Math.max(horizontalMargin, textWidth / 2 + 3);
|
||||
const safeVerticalMargin = Math.max(verticalMargin, textHeight + 3);
|
||||
|
||||
let x = 0,
|
||||
y = 0;
|
||||
|
||||
switch (options.position) {
|
||||
case 'bottom-center':
|
||||
x =
|
||||
Math.max(
|
||||
safeHorizontalMargin,
|
||||
Math.min(
|
||||
width - safeHorizontalMargin - textWidth,
|
||||
(width - textWidth) / 2
|
||||
)
|
||||
) + xOffset;
|
||||
y = safeVerticalMargin + yOffset;
|
||||
break;
|
||||
case 'bottom-left':
|
||||
x = safeHorizontalMargin + xOffset;
|
||||
y = safeVerticalMargin + yOffset;
|
||||
break;
|
||||
case 'bottom-right':
|
||||
x =
|
||||
Math.max(
|
||||
safeHorizontalMargin,
|
||||
width - safeHorizontalMargin - textWidth
|
||||
) + xOffset;
|
||||
y = safeVerticalMargin + yOffset;
|
||||
break;
|
||||
case 'top-center':
|
||||
x =
|
||||
Math.max(
|
||||
safeHorizontalMargin,
|
||||
Math.min(
|
||||
width - safeHorizontalMargin - textWidth,
|
||||
(width - textWidth) / 2
|
||||
)
|
||||
) + xOffset;
|
||||
y = height - safeVerticalMargin - textHeight + yOffset;
|
||||
break;
|
||||
case 'top-left':
|
||||
x = safeHorizontalMargin + xOffset;
|
||||
y = height - safeVerticalMargin - textHeight + yOffset;
|
||||
break;
|
||||
case 'top-right':
|
||||
x =
|
||||
Math.max(
|
||||
safeHorizontalMargin,
|
||||
width - safeHorizontalMargin - textWidth
|
||||
) + xOffset;
|
||||
y = height - safeVerticalMargin - textHeight + yOffset;
|
||||
break;
|
||||
}
|
||||
|
||||
x = Math.max(xOffset + 3, Math.min(xOffset + width - textWidth - 3, x));
|
||||
y = Math.max(yOffset + 3, Math.min(yOffset + height - textHeight - 3, y));
|
||||
|
||||
page.drawText(pageNumText, {
|
||||
x,
|
||||
y,
|
||||
font: helveticaFont,
|
||||
size: options.fontSize,
|
||||
color: rgb(options.color.r, options.color.g, options.color.b),
|
||||
});
|
||||
}
|
||||
|
||||
return new Uint8Array(await pdfDoc.save());
|
||||
}
|
||||
|
||||
export interface FixPageSizeOptions {
|
||||
targetSize: string;
|
||||
orientation: string;
|
||||
scalingMode: string;
|
||||
backgroundColor: { r: number; g: number; b: number };
|
||||
customWidth?: number;
|
||||
customHeight?: number;
|
||||
customUnits?: string;
|
||||
}
|
||||
|
||||
export async function fixPageSize(
|
||||
pdfBytes: Uint8Array,
|
||||
options: FixPageSizeOptions
|
||||
): Promise<Uint8Array> {
|
||||
let targetWidth: number;
|
||||
let targetHeight: number;
|
||||
|
||||
if (options.targetSize.toLowerCase() === 'custom') {
|
||||
const w = options.customWidth ?? 210;
|
||||
const h = options.customHeight ?? 297;
|
||||
const units = (options.customUnits ?? 'mm').toLowerCase();
|
||||
if (units === 'in') {
|
||||
targetWidth = w * 72;
|
||||
targetHeight = h * 72;
|
||||
} else {
|
||||
targetWidth = w * (72 / 25.4);
|
||||
targetHeight = h * (72 / 25.4);
|
||||
}
|
||||
} else {
|
||||
const selected =
|
||||
PageSizes[options.targetSize as keyof typeof PageSizes] || PageSizes.A4;
|
||||
targetWidth = selected[0];
|
||||
targetHeight = selected[1];
|
||||
}
|
||||
|
||||
const orientation = options.orientation.toLowerCase();
|
||||
if (orientation === 'landscape' && targetWidth < targetHeight) {
|
||||
[targetWidth, targetHeight] = [targetHeight, targetWidth];
|
||||
} else if (orientation === 'portrait' && targetWidth > targetHeight) {
|
||||
[targetWidth, targetHeight] = [targetHeight, targetWidth];
|
||||
}
|
||||
|
||||
const sourceDoc = await PDFDocument.load(pdfBytes);
|
||||
const outputDoc = await PDFDocument.create();
|
||||
|
||||
for (const sourcePage of sourceDoc.getPages()) {
|
||||
const { width: sourceWidth, height: sourceHeight } = sourcePage.getSize();
|
||||
const embeddedPage = await outputDoc.embedPage(sourcePage);
|
||||
|
||||
const outputPage = outputDoc.addPage([targetWidth, targetHeight]);
|
||||
outputPage.drawRectangle({
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: targetWidth,
|
||||
height: targetHeight,
|
||||
color: rgb(
|
||||
options.backgroundColor.r,
|
||||
options.backgroundColor.g,
|
||||
options.backgroundColor.b
|
||||
),
|
||||
});
|
||||
|
||||
const scaleX = targetWidth / sourceWidth;
|
||||
const scaleY = targetHeight / sourceHeight;
|
||||
const useFill = options.scalingMode.toLowerCase() === 'fill';
|
||||
const scale = useFill ? Math.max(scaleX, scaleY) : Math.min(scaleX, scaleY);
|
||||
|
||||
const scaledWidth = sourceWidth * scale;
|
||||
const scaledHeight = sourceHeight * scale;
|
||||
|
||||
const x = (targetWidth - scaledWidth) / 2;
|
||||
const y = (targetHeight - scaledHeight) / 2;
|
||||
|
||||
outputPage.drawPage(embeddedPage, {
|
||||
x,
|
||||
y,
|
||||
width: scaledWidth,
|
||||
height: scaledHeight,
|
||||
});
|
||||
}
|
||||
|
||||
return new Uint8Array(await outputDoc.save());
|
||||
}
|
||||
87
src/js/utils/pymupdf-loader.ts
Normal file
87
src/js/utils/pymupdf-loader.ts
Normal file
@@ -0,0 +1,87 @@
|
||||
import { WasmProvider } from './wasm-provider.js';
|
||||
|
||||
let cachedPyMuPDF: any = null;
|
||||
let loadPromise: Promise<any> | null = null;
|
||||
|
||||
export interface PyMuPDFInterface {
|
||||
load(): Promise<void>;
|
||||
compressPdf(
|
||||
file: Blob,
|
||||
options: any
|
||||
): Promise<{ blob: Blob; compressedSize: number }>;
|
||||
convertToPdf(file: Blob, ext: string): Promise<Blob>;
|
||||
extractText(file: Blob, options?: any): Promise<string>;
|
||||
extractImages(file: Blob): Promise<Array<{ data: Uint8Array; ext: string }>>;
|
||||
extractTables(file: Blob): Promise<any[]>;
|
||||
toSvg(file: Blob, pageNum: number): Promise<string>;
|
||||
renderPageToImage(file: Blob, pageNum: number, scale: number): Promise<Blob>;
|
||||
getPageCount(file: Blob): Promise<number>;
|
||||
rasterizePdf(file: Blob | File, options: any): Promise<Blob>;
|
||||
}
|
||||
|
||||
export async function loadPyMuPDF(): Promise<any> {
|
||||
if (cachedPyMuPDF) {
|
||||
return cachedPyMuPDF;
|
||||
}
|
||||
|
||||
if (loadPromise) {
|
||||
return loadPromise;
|
||||
}
|
||||
|
||||
loadPromise = (async () => {
|
||||
if (!WasmProvider.isConfigured('pymupdf')) {
|
||||
throw new Error(
|
||||
'PyMuPDF is not configured. Please configure it in Advanced Settings.'
|
||||
);
|
||||
}
|
||||
if (!WasmProvider.isConfigured('ghostscript')) {
|
||||
throw new Error(
|
||||
'Ghostscript is not configured. PyMuPDF requires Ghostscript for some operations. Please configure both in Advanced Settings.'
|
||||
);
|
||||
}
|
||||
|
||||
const pymupdfUrl = WasmProvider.getUrl('pymupdf')!;
|
||||
const gsUrl = WasmProvider.getUrl('ghostscript')!;
|
||||
const normalizedPymupdf = pymupdfUrl.endsWith('/')
|
||||
? pymupdfUrl
|
||||
: `${pymupdfUrl}/`;
|
||||
|
||||
try {
|
||||
const wrapperUrl = `${normalizedPymupdf}dist/index.js`;
|
||||
const module = await import(/* @vite-ignore */ wrapperUrl);
|
||||
|
||||
if (typeof module.PyMuPDF !== 'function') {
|
||||
throw new Error(
|
||||
'PyMuPDF module did not export expected PyMuPDF class.'
|
||||
);
|
||||
}
|
||||
|
||||
cachedPyMuPDF = new module.PyMuPDF({
|
||||
assetPath: `${normalizedPymupdf}assets/`,
|
||||
ghostscriptUrl: gsUrl,
|
||||
});
|
||||
|
||||
await cachedPyMuPDF.load();
|
||||
|
||||
console.log('[PyMuPDF Loader] Successfully loaded from CDN');
|
||||
return cachedPyMuPDF;
|
||||
} catch (error: any) {
|
||||
loadPromise = null;
|
||||
throw new Error(`Failed to load PyMuPDF from CDN: ${error.message}`);
|
||||
}
|
||||
})();
|
||||
|
||||
return loadPromise;
|
||||
}
|
||||
|
||||
export function isPyMuPDFAvailable(): boolean {
|
||||
return (
|
||||
WasmProvider.isConfigured('pymupdf') &&
|
||||
WasmProvider.isConfigured('ghostscript')
|
||||
);
|
||||
}
|
||||
|
||||
export function clearPyMuPDFCache(): void {
|
||||
cachedPyMuPDF = null;
|
||||
loadPromise = null;
|
||||
}
|
||||
@@ -1,381 +1,466 @@
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL('pdfjs-dist/build/pdf.worker.min.mjs', import.meta.url).toString();
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
|
||||
'pdfjs-dist/build/pdf.worker.min.mjs',
|
||||
import.meta.url
|
||||
).toString();
|
||||
|
||||
/**
|
||||
* Configuration for progressive rendering
|
||||
*/
|
||||
export interface RenderConfig {
|
||||
batchSize?: number;
|
||||
useLazyLoading?: boolean;
|
||||
lazyLoadMargin?: string;
|
||||
eagerLoadBatches?: number; // Number of batches to load ahead eagerly (default: 2)
|
||||
onProgress?: (current: number, total: number) => void;
|
||||
onPageRendered?: (pageIndex: number, element: HTMLElement) => void;
|
||||
onBatchComplete?: () => void;
|
||||
shouldCancel?: () => boolean;
|
||||
batchSize?: number;
|
||||
useLazyLoading?: boolean;
|
||||
lazyLoadMargin?: string;
|
||||
eagerLoadBatches?: number; // Number of batches to load ahead eagerly (default: 2)
|
||||
onProgress?: (current: number, total: number) => void;
|
||||
onPageRendered?: (pageIndex: number, element: HTMLElement) => void;
|
||||
onBatchComplete?: () => void;
|
||||
shouldCancel?: () => boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Page rendering task
|
||||
*/
|
||||
interface PageTask {
|
||||
pageNumber: number;
|
||||
pdfjsDoc: any;
|
||||
fileName?: string;
|
||||
container: HTMLElement;
|
||||
scale?: number;
|
||||
createWrapper: (canvas: HTMLCanvasElement, pageNumber: number, fileName?: string) => HTMLElement;
|
||||
pageNumber: number;
|
||||
pdfjsDoc: pdfjsLib.PDFDocumentProxy;
|
||||
fileName?: string;
|
||||
container: HTMLElement;
|
||||
scale?: number;
|
||||
createWrapper: (
|
||||
canvas: HTMLCanvasElement,
|
||||
pageNumber: number,
|
||||
fileName?: string
|
||||
) => HTMLElement;
|
||||
placeholderElement?: HTMLElement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Lazy loading state
|
||||
*/
|
||||
interface LazyLoadState {
|
||||
observer: IntersectionObserver | null;
|
||||
pendingTasks: Map<HTMLElement, PageTask>;
|
||||
isRendering: boolean;
|
||||
eagerLoadQueue: PageTask[];
|
||||
nextEagerIndex: number;
|
||||
observer: IntersectionObserver | null;
|
||||
pendingTasks: Map<HTMLElement, PageTask>;
|
||||
pendingTasksByPageNumber: Map<number, PageTask>;
|
||||
isRendering: boolean;
|
||||
eagerLoadQueue: PageTask[];
|
||||
nextEagerIndex: number;
|
||||
}
|
||||
|
||||
const lazyLoadState: LazyLoadState = {
|
||||
observer: null,
|
||||
pendingTasks: new Map(),
|
||||
isRendering: false,
|
||||
eagerLoadQueue: [],
|
||||
nextEagerIndex: 0,
|
||||
observer: null,
|
||||
pendingTasks: new Map(),
|
||||
pendingTasksByPageNumber: new Map(),
|
||||
isRendering: false,
|
||||
eagerLoadQueue: [],
|
||||
nextEagerIndex: 0,
|
||||
};
|
||||
|
||||
/**
|
||||
* Creates a placeholder element for a page that will be lazy-loaded
|
||||
*/
|
||||
export function createPlaceholder(pageNumber: number, fileName?: string): HTMLElement {
|
||||
const placeholder = document.createElement('div');
|
||||
placeholder.className =
|
||||
'page-thumbnail relative cursor-move flex flex-col items-center gap-1 p-2 border-2 border-gray-600 rounded-lg bg-gray-800 transition-colors';
|
||||
placeholder.dataset.pageNumber = pageNumber.toString();
|
||||
if (fileName) {
|
||||
placeholder.dataset.fileName = fileName;
|
||||
}
|
||||
placeholder.dataset.lazyLoad = 'true';
|
||||
export function createPlaceholder(
|
||||
pageNumber: number,
|
||||
fileName?: string
|
||||
): HTMLElement {
|
||||
const placeholder = document.createElement('div');
|
||||
placeholder.className =
|
||||
'page-thumbnail relative cursor-move flex flex-col items-center gap-1 p-2 border-2 border-gray-600 rounded-lg bg-gray-800 transition-colors';
|
||||
placeholder.dataset.pageNumber = pageNumber.toString();
|
||||
if (fileName) {
|
||||
placeholder.dataset.fileName = fileName;
|
||||
}
|
||||
placeholder.dataset.lazyLoad = 'true';
|
||||
|
||||
// Create skeleton loader
|
||||
const skeletonContainer = document.createElement('div');
|
||||
skeletonContainer.className = 'relative w-full h-36 bg-gray-700 rounded-md animate-pulse flex items-center justify-center';
|
||||
// Create skeleton loader
|
||||
const skeletonContainer = document.createElement('div');
|
||||
skeletonContainer.className =
|
||||
'relative w-full h-36 bg-gray-700 rounded-md animate-pulse flex items-center justify-center';
|
||||
|
||||
const loadingText = document.createElement('span');
|
||||
loadingText.className = 'text-gray-500 text-xs';
|
||||
loadingText.textContent = 'Loading...';
|
||||
const loadingText = document.createElement('span');
|
||||
loadingText.className = 'text-gray-500 text-xs';
|
||||
loadingText.textContent = 'Loading...';
|
||||
|
||||
skeletonContainer.appendChild(loadingText);
|
||||
placeholder.appendChild(skeletonContainer);
|
||||
skeletonContainer.appendChild(loadingText);
|
||||
placeholder.appendChild(skeletonContainer);
|
||||
|
||||
return placeholder;
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders a single page to canvas
|
||||
*/
|
||||
export async function renderPageToCanvas(
|
||||
pdfjsDoc: any,
|
||||
pageNumber: number,
|
||||
scale: number = 0.5
|
||||
pdfjsDoc: pdfjsLib.PDFDocumentProxy,
|
||||
pageNumber: number,
|
||||
scale: number = 1
|
||||
): Promise<HTMLCanvasElement> {
|
||||
const page = await pdfjsDoc.getPage(pageNumber);
|
||||
const viewport = page.getViewport({ scale });
|
||||
const page = await pdfjsDoc.getPage(pageNumber);
|
||||
const viewport = page.getViewport({ scale });
|
||||
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.height = viewport.height;
|
||||
canvas.width = viewport.width;
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.height = viewport.height;
|
||||
canvas.width = viewport.width;
|
||||
|
||||
const context = canvas.getContext('2d')!;
|
||||
const context = canvas.getContext('2d');
|
||||
if (!context) {
|
||||
throw new Error(`Failed to get 2D context for page ${pageNumber}`);
|
||||
}
|
||||
|
||||
await page.render({
|
||||
canvasContext: context,
|
||||
canvas: canvas,
|
||||
viewport,
|
||||
}).promise;
|
||||
await page.render({
|
||||
canvasContext: context,
|
||||
canvas: canvas,
|
||||
viewport,
|
||||
}).promise;
|
||||
|
||||
return canvas;
|
||||
return canvas;
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders a batch of pages in parallel
|
||||
* Renders a batch of pages
|
||||
*/
|
||||
async function renderPageBatch(
|
||||
tasks: PageTask[],
|
||||
onProgress?: (current: number, total: number) => void
|
||||
): Promise<void> {
|
||||
const renderPromises = tasks.map(async (task) => {
|
||||
try {
|
||||
const canvas = await renderPageToCanvas(
|
||||
task.pdfjsDoc,
|
||||
task.pageNumber,
|
||||
task.scale || 0.5
|
||||
);
|
||||
async function renderPageBatch(tasks: PageTask[]): Promise<void> {
|
||||
for (const task of tasks) {
|
||||
try {
|
||||
const canvas = await renderPageToCanvas(
|
||||
task.pdfjsDoc,
|
||||
task.pageNumber,
|
||||
task.scale || 0.5
|
||||
);
|
||||
|
||||
const wrapper = task.createWrapper(canvas, task.pageNumber, task.fileName);
|
||||
const wrapper = task.createWrapper(
|
||||
canvas,
|
||||
task.pageNumber,
|
||||
task.fileName
|
||||
);
|
||||
|
||||
// Find and replace the placeholder for this specific page number
|
||||
const placeholder = task.container.querySelector(
|
||||
`[data-page-number="${task.pageNumber}"][data-lazy-load="true"]`
|
||||
);
|
||||
let placeholder: Element | null = task.placeholderElement || null;
|
||||
if (!placeholder) {
|
||||
placeholder = task.container.querySelector(
|
||||
`[data-page-number="${task.pageNumber}"][data-lazy-load="true"]`
|
||||
);
|
||||
}
|
||||
|
||||
if (placeholder) {
|
||||
// Replace placeholder with rendered page
|
||||
task.container.replaceChild(wrapper, placeholder);
|
||||
} else {
|
||||
// Fallback: shouldn't happen with new approach, but just in case
|
||||
console.warn(`No placeholder found for page ${task.pageNumber}, appending instead`);
|
||||
task.container.appendChild(wrapper);
|
||||
}
|
||||
|
||||
return wrapper;
|
||||
} catch (error) {
|
||||
console.error(`Error rendering page ${task.pageNumber}:`, error);
|
||||
return null;
|
||||
if (placeholder && placeholder.parentNode) {
|
||||
const parent = placeholder.parentNode;
|
||||
parent.insertBefore(wrapper, placeholder);
|
||||
parent.removeChild(placeholder);
|
||||
} else {
|
||||
const existingRendered = task.container.querySelector(
|
||||
`[data-page-number="${task.pageNumber}"]:not([data-lazy-load="true"])`
|
||||
);
|
||||
if (existingRendered) {
|
||||
continue;
|
||||
}
|
||||
});
|
||||
|
||||
await Promise.all(renderPromises);
|
||||
const allChildren = Array.from(
|
||||
task.container.children
|
||||
) as HTMLElement[];
|
||||
let insertBefore: Element | null = null;
|
||||
|
||||
for (const child of allChildren) {
|
||||
const childPageNum = parseInt(child.dataset.pageNumber || '0', 10);
|
||||
if (childPageNum > task.pageNumber) {
|
||||
insertBefore = child;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (insertBefore) {
|
||||
task.container.insertBefore(wrapper, insertBefore);
|
||||
} else {
|
||||
task.container.appendChild(wrapper);
|
||||
}
|
||||
console.warn(
|
||||
`Placeholder not found for page ${task.pageNumber}, inserted at calculated position`
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error(`Error rendering page ${task.pageNumber}:`, error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets up Intersection Observer for lazy loading
|
||||
*/
|
||||
function setupLazyRendering(
|
||||
container: HTMLElement,
|
||||
config: RenderConfig
|
||||
container: HTMLElement,
|
||||
config: RenderConfig
|
||||
): IntersectionObserver {
|
||||
const options = {
|
||||
root: container.closest('.overflow-auto') || null,
|
||||
rootMargin: config.lazyLoadMargin || '200px',
|
||||
threshold: 0.01,
|
||||
};
|
||||
const options = {
|
||||
root: container.closest('.overflow-auto') || null,
|
||||
rootMargin: config.lazyLoadMargin || '200px',
|
||||
threshold: 0.01,
|
||||
};
|
||||
|
||||
const observer = new IntersectionObserver((entries) => {
|
||||
entries.forEach((entry) => {
|
||||
if (entry.isIntersecting) {
|
||||
const placeholder = entry.target as HTMLElement;
|
||||
const task = lazyLoadState.pendingTasks.get(placeholder);
|
||||
const observer = new IntersectionObserver((entries) => {
|
||||
entries.forEach((entry) => {
|
||||
if (entry.isIntersecting) {
|
||||
const placeholder = entry.target as HTMLElement;
|
||||
const pageNumberStr = placeholder.dataset.pageNumber;
|
||||
if (!pageNumberStr) return;
|
||||
|
||||
if (task) {
|
||||
// Immediately unobserve to prevent multiple triggers
|
||||
observer.unobserve(placeholder);
|
||||
lazyLoadState.pendingTasks.delete(placeholder);
|
||||
const pageNumber = parseInt(pageNumberStr, 10);
|
||||
const task = lazyLoadState.pendingTasksByPageNumber.get(pageNumber);
|
||||
|
||||
// Render this page immediately (not waiting for isRendering flag)
|
||||
renderPageBatch([task], config.onProgress)
|
||||
.then(() => {
|
||||
// Trigger callback after lazy load batch
|
||||
if (config.onBatchComplete) {
|
||||
config.onBatchComplete();
|
||||
}
|
||||
if (task) {
|
||||
// Immediately unobserve to prevent multiple triggers
|
||||
observer.unobserve(placeholder);
|
||||
lazyLoadState.pendingTasks.delete(placeholder);
|
||||
lazyLoadState.pendingTasksByPageNumber.delete(pageNumber);
|
||||
|
||||
// Check if all pages are rendered
|
||||
if (lazyLoadState.pendingTasks.size === 0 && lazyLoadState.observer) {
|
||||
lazyLoadState.observer.disconnect();
|
||||
lazyLoadState.observer = null;
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error(`Error lazy loading page ${task.pageNumber}:`, error);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}, options);
|
||||
task.placeholderElement = placeholder;
|
||||
|
||||
lazyLoadState.observer = observer;
|
||||
return observer;
|
||||
// Render this page immediately (not waiting for isRendering flag)
|
||||
renderPageBatch([task])
|
||||
.then(() => {
|
||||
// Trigger callback after lazy load batch
|
||||
if (config.onBatchComplete) {
|
||||
config.onBatchComplete();
|
||||
}
|
||||
|
||||
// Check if all pages are rendered
|
||||
if (
|
||||
lazyLoadState.pendingTasks.size === 0 &&
|
||||
lazyLoadState.observer
|
||||
) {
|
||||
lazyLoadState.observer.disconnect();
|
||||
lazyLoadState.observer = null;
|
||||
}
|
||||
})
|
||||
.catch((error) => {
|
||||
console.error(
|
||||
`Error lazy loading page ${task.pageNumber}:`,
|
||||
error
|
||||
);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}, options);
|
||||
|
||||
lazyLoadState.observer = observer;
|
||||
return observer;
|
||||
}
|
||||
|
||||
/**
|
||||
* Request idle callback with fallback
|
||||
*/
|
||||
function requestIdleCallbackPolyfill(callback: () => void): void {
|
||||
if ('requestIdleCallback' in window) {
|
||||
requestIdleCallback(callback);
|
||||
} else {
|
||||
setTimeout(callback, 16); // ~60fps
|
||||
}
|
||||
if ('requestIdleCallback' in window) {
|
||||
requestIdleCallback(callback);
|
||||
} else {
|
||||
setTimeout(callback, 16); // ~60fps
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function to render pages progressively with optional lazy loading
|
||||
*/
|
||||
export async function renderPagesProgressively(
|
||||
pdfjsDoc: any,
|
||||
container: HTMLElement,
|
||||
createWrapper: (canvas: HTMLCanvasElement, pageNumber: number, fileName?: string) => HTMLElement,
|
||||
config: RenderConfig = {}
|
||||
pdfjsDoc: pdfjsLib.PDFDocumentProxy,
|
||||
container: HTMLElement,
|
||||
createWrapper: (
|
||||
canvas: HTMLCanvasElement,
|
||||
pageNumber: number,
|
||||
fileName?: string
|
||||
) => HTMLElement,
|
||||
config: RenderConfig = {}
|
||||
): Promise<void> {
|
||||
const {
|
||||
batchSize = 8, // Increased from 5 to 8 for faster initial render
|
||||
useLazyLoading = true,
|
||||
eagerLoadBatches = 2, // Eagerly load 1 batch ahead by default
|
||||
onProgress,
|
||||
onBatchComplete,
|
||||
} = config;
|
||||
const {
|
||||
batchSize = 8,
|
||||
useLazyLoading = true,
|
||||
eagerLoadBatches = 2,
|
||||
onProgress,
|
||||
onBatchComplete,
|
||||
} = config;
|
||||
|
||||
const totalPages = pdfjsDoc.numPages;
|
||||
const totalPages = pdfjsDoc.numPages;
|
||||
|
||||
// Render more pages initially to reduce lazy loading issues
|
||||
const initialRenderCount = useLazyLoading
|
||||
? Math.min(20, totalPages) // Increased from 12 to 20 pages
|
||||
: totalPages;
|
||||
const initialRenderCount = useLazyLoading
|
||||
? Math.min(20, totalPages)
|
||||
: totalPages;
|
||||
|
||||
// CRITICAL FIX: Create placeholders for ALL pages first to maintain order
|
||||
const placeholders: HTMLElement[] = [];
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
const placeholder = createPlaceholder(i);
|
||||
container.appendChild(placeholder);
|
||||
placeholders.push(placeholder);
|
||||
const placeholders: HTMLElement[] = [];
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
const placeholder = createPlaceholder(i);
|
||||
container.appendChild(placeholder);
|
||||
placeholders.push(placeholder);
|
||||
}
|
||||
|
||||
const tasks: PageTask[] = [];
|
||||
|
||||
// Create tasks for all pages with direct placeholder references
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
tasks.push({
|
||||
pageNumber: i,
|
||||
pdfjsDoc,
|
||||
container,
|
||||
scale: useLazyLoading ? 0.5 : 1,
|
||||
createWrapper,
|
||||
placeholderElement: placeholders[i - 1],
|
||||
});
|
||||
}
|
||||
|
||||
// If lazy loading is enabled, set up observer for pages beyond initial render
|
||||
if (useLazyLoading && totalPages > initialRenderCount) {
|
||||
const observer = setupLazyRendering(container, config);
|
||||
|
||||
for (let i = initialRenderCount + 1; i <= totalPages; i++) {
|
||||
const placeholder = placeholders[i - 1];
|
||||
const task = tasks[i - 1];
|
||||
// Store the task for lazy rendering
|
||||
lazyLoadState.pendingTasks.set(placeholder, task);
|
||||
lazyLoadState.pendingTasksByPageNumber.set(task.pageNumber, task);
|
||||
observer.observe(placeholder);
|
||||
}
|
||||
|
||||
const tasks: PageTask[] = [];
|
||||
// Prepare eager load queue
|
||||
const eagerStartIndex = initialRenderCount;
|
||||
const eagerEndIndex = Math.min(
|
||||
eagerStartIndex + eagerLoadBatches * batchSize,
|
||||
totalPages
|
||||
);
|
||||
lazyLoadState.eagerLoadQueue = tasks.slice(eagerStartIndex, eagerEndIndex);
|
||||
lazyLoadState.nextEagerIndex = 0;
|
||||
}
|
||||
|
||||
// Create tasks for all pages
|
||||
for (let i = 1; i <= totalPages; i++) {
|
||||
tasks.push({
|
||||
pageNumber: i,
|
||||
pdfjsDoc,
|
||||
container,
|
||||
scale: config.useLazyLoading ? 0.3 : 0.5,
|
||||
createWrapper,
|
||||
});
|
||||
}
|
||||
// Render initial pages in batches
|
||||
const initialTasks = tasks.slice(0, initialRenderCount);
|
||||
|
||||
// If lazy loading is enabled, set up observer for pages beyond initial render
|
||||
if (useLazyLoading && totalPages > initialRenderCount) {
|
||||
const observer = setupLazyRendering(container, config);
|
||||
for (let i = 0; i < initialTasks.length; i += batchSize) {
|
||||
if (config.shouldCancel?.()) return;
|
||||
|
||||
for (let i = initialRenderCount + 1; i <= totalPages; i++) {
|
||||
const placeholder = placeholders[i - 1];
|
||||
// Store the task for lazy rendering
|
||||
lazyLoadState.pendingTasks.set(placeholder, tasks[i - 1]);
|
||||
observer.observe(placeholder);
|
||||
}
|
||||
const batch = initialTasks.slice(i, i + batchSize);
|
||||
|
||||
// Prepare eager load queue
|
||||
const eagerStartIndex = initialRenderCount;
|
||||
const eagerEndIndex = Math.min(
|
||||
eagerStartIndex + (eagerLoadBatches * batchSize),
|
||||
totalPages
|
||||
);
|
||||
lazyLoadState.eagerLoadQueue = tasks.slice(eagerStartIndex, eagerEndIndex);
|
||||
lazyLoadState.nextEagerIndex = 0;
|
||||
}
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
requestIdleCallbackPolyfill(() => {
|
||||
renderPageBatch(batch)
|
||||
.then(() => {
|
||||
if (onProgress) {
|
||||
onProgress(
|
||||
Math.min(i + batchSize, initialRenderCount),
|
||||
totalPages
|
||||
);
|
||||
}
|
||||
|
||||
// Render initial pages in batches
|
||||
const initialTasks = tasks.slice(0, initialRenderCount);
|
||||
if (onBatchComplete) {
|
||||
onBatchComplete();
|
||||
}
|
||||
|
||||
for (let i = 0; i < initialTasks.length; i += batchSize) {
|
||||
if (config.shouldCancel?.()) return;
|
||||
resolve();
|
||||
})
|
||||
.catch(reject);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
const batch = initialTasks.slice(i, i + batchSize);
|
||||
|
||||
await new Promise<void>((resolve) => {
|
||||
requestIdleCallbackPolyfill(async () => {
|
||||
await renderPageBatch(batch, onProgress);
|
||||
|
||||
if (onProgress) {
|
||||
onProgress(Math.min(i + batchSize, initialRenderCount), totalPages);
|
||||
}
|
||||
|
||||
if (onBatchComplete) {
|
||||
onBatchComplete();
|
||||
}
|
||||
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Start eager loading AFTER initial batch is complete
|
||||
if (useLazyLoading && eagerLoadBatches > 0 && totalPages > initialRenderCount) {
|
||||
renderEagerBatch(config);
|
||||
}
|
||||
// Start eager loading AFTER initial batch is complete
|
||||
if (
|
||||
useLazyLoading &&
|
||||
eagerLoadBatches > 0 &&
|
||||
totalPages > initialRenderCount
|
||||
) {
|
||||
renderEagerBatch(config);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Manually observe a placeholder element (useful for dynamically created placeholders)
|
||||
*/
|
||||
export function observePlaceholder(
|
||||
placeholder: HTMLElement,
|
||||
task: PageTask
|
||||
placeholder: HTMLElement,
|
||||
task: PageTask
|
||||
): void {
|
||||
if (!lazyLoadState.observer) {
|
||||
console.warn('No active observer to register placeholder');
|
||||
return;
|
||||
}
|
||||
lazyLoadState.pendingTasks.set(placeholder, task);
|
||||
lazyLoadState.observer.observe(placeholder);
|
||||
if (!lazyLoadState.observer) {
|
||||
console.warn('No active observer to register placeholder');
|
||||
return;
|
||||
}
|
||||
lazyLoadState.pendingTasks.set(placeholder, task);
|
||||
lazyLoadState.pendingTasksByPageNumber.set(task.pageNumber, task);
|
||||
lazyLoadState.observer.observe(placeholder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Eagerly renders the next batch in the background
|
||||
*/
|
||||
function renderEagerBatch(config: RenderConfig): void {
|
||||
const { eagerLoadBatches = 2, batchSize = 8 } = config;
|
||||
const { eagerLoadBatches = 2, batchSize = 8 } = config;
|
||||
|
||||
if (eagerLoadBatches <= 0 || lazyLoadState.eagerLoadQueue.length === 0) {
|
||||
return;
|
||||
}
|
||||
if (eagerLoadBatches <= 0 || lazyLoadState.eagerLoadQueue.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (config.shouldCancel?.()) return;
|
||||
|
||||
const { nextEagerIndex, eagerLoadQueue } = lazyLoadState;
|
||||
|
||||
if (nextEagerIndex >= eagerLoadQueue.length) {
|
||||
return; // All eager batches rendered
|
||||
}
|
||||
|
||||
const batchEnd = Math.min(nextEagerIndex + batchSize, eagerLoadQueue.length);
|
||||
const batch = eagerLoadQueue.slice(nextEagerIndex, batchEnd);
|
||||
|
||||
requestIdleCallbackPolyfill(async () => {
|
||||
if (config.shouldCancel?.()) return;
|
||||
|
||||
const { nextEagerIndex, eagerLoadQueue } = lazyLoadState;
|
||||
const tasksToRender = batch.filter((task) =>
|
||||
lazyLoadState.pendingTasksByPageNumber.has(task.pageNumber)
|
||||
);
|
||||
|
||||
if (nextEagerIndex >= eagerLoadQueue.length) {
|
||||
return; // All eager batches rendered
|
||||
tasksToRender.forEach((task) => {
|
||||
const placeholder = task.placeholderElement;
|
||||
if (placeholder && lazyLoadState.observer) {
|
||||
lazyLoadState.observer.unobserve(placeholder);
|
||||
lazyLoadState.pendingTasks.delete(placeholder);
|
||||
lazyLoadState.pendingTasksByPageNumber.delete(task.pageNumber);
|
||||
}
|
||||
});
|
||||
|
||||
if (tasksToRender.length === 0) {
|
||||
lazyLoadState.nextEagerIndex = batchEnd;
|
||||
const remainingBatches = Math.ceil(
|
||||
(eagerLoadQueue.length - batchEnd) / batchSize
|
||||
);
|
||||
if (remainingBatches > 0 && remainingBatches < eagerLoadBatches) {
|
||||
renderEagerBatch(config);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const batchEnd = Math.min(nextEagerIndex + batchSize, eagerLoadQueue.length);
|
||||
const batch = eagerLoadQueue.slice(nextEagerIndex, batchEnd);
|
||||
await renderPageBatch(tasksToRender);
|
||||
|
||||
requestIdleCallbackPolyfill(async () => {
|
||||
if (config.shouldCancel?.()) return;
|
||||
if (config.onBatchComplete) {
|
||||
config.onBatchComplete();
|
||||
}
|
||||
|
||||
// Remove these tasks from pending since we're rendering them eagerly
|
||||
batch.forEach(task => {
|
||||
const placeholder = Array.from(lazyLoadState.pendingTasks.entries())
|
||||
.find(([_, t]) => t.pageNumber === task.pageNumber)?.[0];
|
||||
if (placeholder && lazyLoadState.observer) {
|
||||
lazyLoadState.observer.unobserve(placeholder);
|
||||
lazyLoadState.pendingTasks.delete(placeholder);
|
||||
}
|
||||
});
|
||||
// Update next eager index
|
||||
lazyLoadState.nextEagerIndex = batchEnd;
|
||||
|
||||
await renderPageBatch(batch, config.onProgress);
|
||||
|
||||
if (config.onBatchComplete) {
|
||||
config.onBatchComplete();
|
||||
}
|
||||
|
||||
// Update next eager index
|
||||
lazyLoadState.nextEagerIndex = batchEnd;
|
||||
|
||||
// Queue next eager batch
|
||||
const remainingBatches = Math.ceil((eagerLoadQueue.length - batchEnd) / batchSize);
|
||||
if (remainingBatches > 0 && remainingBatches < eagerLoadBatches) {
|
||||
// Continue eager loading if we have more batches within the eager threshold
|
||||
renderEagerBatch(config);
|
||||
}
|
||||
});
|
||||
// Queue next eager batch
|
||||
const remainingBatches = Math.ceil(
|
||||
(eagerLoadQueue.length - batchEnd) / batchSize
|
||||
);
|
||||
if (remainingBatches > 0 && remainingBatches < eagerLoadBatches) {
|
||||
renderEagerBatch(config);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup function to disconnect observers
|
||||
*/
|
||||
export function cleanupLazyRendering(): void {
|
||||
if (lazyLoadState.observer) {
|
||||
lazyLoadState.observer.disconnect();
|
||||
lazyLoadState.observer = null;
|
||||
}
|
||||
lazyLoadState.pendingTasks.clear();
|
||||
lazyLoadState.isRendering = false;
|
||||
lazyLoadState.eagerLoadQueue = [];
|
||||
lazyLoadState.nextEagerIndex = 0;
|
||||
if (lazyLoadState.observer) {
|
||||
lazyLoadState.observer.disconnect();
|
||||
lazyLoadState.observer = null;
|
||||
}
|
||||
lazyLoadState.pendingTasks.clear();
|
||||
lazyLoadState.pendingTasksByPageNumber.clear();
|
||||
lazyLoadState.isRendering = false;
|
||||
lazyLoadState.eagerLoadQueue = [];
|
||||
lazyLoadState.nextEagerIndex = 0;
|
||||
}
|
||||
|
||||
590
src/js/utils/sanitize.ts
Normal file
590
src/js/utils/sanitize.ts
Normal file
@@ -0,0 +1,590 @@
|
||||
import { PDFDocument, PDFName } from 'pdf-lib';
|
||||
|
||||
export interface SanitizeOptions {
|
||||
flattenForms: boolean;
|
||||
removeMetadata: boolean;
|
||||
removeAnnotations: boolean;
|
||||
removeJavascript: boolean;
|
||||
removeEmbeddedFiles: boolean;
|
||||
removeLayers: boolean;
|
||||
removeLinks: boolean;
|
||||
removeStructureTree: boolean;
|
||||
removeMarkInfo: boolean;
|
||||
removeFonts: boolean;
|
||||
}
|
||||
|
||||
export const defaultSanitizeOptions: SanitizeOptions = {
|
||||
flattenForms: true,
|
||||
removeMetadata: true,
|
||||
removeAnnotations: true,
|
||||
removeJavascript: true,
|
||||
removeEmbeddedFiles: true,
|
||||
removeLayers: true,
|
||||
removeLinks: true,
|
||||
removeStructureTree: true,
|
||||
removeMarkInfo: true,
|
||||
removeFonts: false,
|
||||
};
|
||||
|
||||
function removeMetadataFromDoc(pdfDoc: PDFDocument) {
|
||||
const infoDict = (pdfDoc as any).getInfoDict();
|
||||
const allKeys = infoDict.keys();
|
||||
allKeys.forEach((key: any) => {
|
||||
infoDict.delete(key);
|
||||
});
|
||||
|
||||
pdfDoc.setTitle('');
|
||||
pdfDoc.setAuthor('');
|
||||
pdfDoc.setSubject('');
|
||||
pdfDoc.setKeywords([]);
|
||||
pdfDoc.setCreator('');
|
||||
pdfDoc.setProducer('');
|
||||
|
||||
try {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
if (catalogDict.has(PDFName.of('Metadata'))) {
|
||||
catalogDict.delete(PDFName.of('Metadata'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove XMP metadata:', e.message);
|
||||
}
|
||||
|
||||
try {
|
||||
const context = pdfDoc.context;
|
||||
if ((context as any).trailerInfo) {
|
||||
delete (context as any).trailerInfo.ID;
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove document IDs:', e.message);
|
||||
}
|
||||
|
||||
try {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
if (catalogDict.has(PDFName.of('PieceInfo'))) {
|
||||
catalogDict.delete(PDFName.of('PieceInfo'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove PieceInfo:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function removeAnnotationsFromDoc(pdfDoc: PDFDocument) {
|
||||
const pages = pdfDoc.getPages();
|
||||
for (const page of pages) {
|
||||
try {
|
||||
page.node.delete(PDFName.of('Annots'));
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove annotations from page:', e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function flattenFormsInDoc(pdfDoc: PDFDocument) {
|
||||
const form = pdfDoc.getForm();
|
||||
form.flatten();
|
||||
}
|
||||
|
||||
function removeJavascriptFromDoc(pdfDoc: PDFDocument) {
|
||||
if ((pdfDoc as any).javaScripts && (pdfDoc as any).javaScripts.length > 0) {
|
||||
(pdfDoc as any).javaScripts = [];
|
||||
}
|
||||
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
|
||||
const namesRef = catalogDict.get(PDFName.of('Names'));
|
||||
if (namesRef) {
|
||||
try {
|
||||
const namesDict = pdfDoc.context.lookup(namesRef) as any;
|
||||
if (namesDict.has(PDFName.of('JavaScript'))) {
|
||||
namesDict.delete(PDFName.of('JavaScript'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not access Names/JavaScript:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
if (catalogDict.has(PDFName.of('OpenAction'))) {
|
||||
catalogDict.delete(PDFName.of('OpenAction'));
|
||||
}
|
||||
|
||||
if (catalogDict.has(PDFName.of('AA'))) {
|
||||
catalogDict.delete(PDFName.of('AA'));
|
||||
}
|
||||
|
||||
const pages = pdfDoc.getPages();
|
||||
for (const page of pages) {
|
||||
try {
|
||||
const pageDict = page.node;
|
||||
|
||||
if (pageDict.has(PDFName.of('AA'))) {
|
||||
pageDict.delete(PDFName.of('AA'));
|
||||
}
|
||||
|
||||
const annotRefs = pageDict.Annots()?.asArray() || [];
|
||||
for (const annotRef of annotRefs) {
|
||||
try {
|
||||
const annot = pdfDoc.context.lookup(annotRef) as any;
|
||||
|
||||
if (annot.has(PDFName.of('A'))) {
|
||||
const actionRef = annot.get(PDFName.of('A'));
|
||||
try {
|
||||
const actionDict = pdfDoc.context.lookup(actionRef) as any;
|
||||
const actionType = actionDict
|
||||
.get(PDFName.of('S'))
|
||||
?.toString()
|
||||
.substring(1);
|
||||
|
||||
if (actionType === 'JavaScript') {
|
||||
annot.delete(PDFName.of('A'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not read action:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
if (annot.has(PDFName.of('AA'))) {
|
||||
annot.delete(PDFName.of('AA'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not process annotation for JS:', e.message);
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove page actions:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const acroFormRef = catalogDict.get(PDFName.of('AcroForm'));
|
||||
if (acroFormRef) {
|
||||
const acroFormDict = pdfDoc.context.lookup(acroFormRef) as any;
|
||||
const fieldsRef = acroFormDict.get(PDFName.of('Fields'));
|
||||
|
||||
if (fieldsRef) {
|
||||
const fieldsArray = pdfDoc.context.lookup(fieldsRef) as any;
|
||||
const fields = fieldsArray.asArray();
|
||||
|
||||
for (const fieldRef of fields) {
|
||||
try {
|
||||
const field = pdfDoc.context.lookup(fieldRef) as any;
|
||||
|
||||
if (field.has(PDFName.of('A'))) {
|
||||
field.delete(PDFName.of('A'));
|
||||
}
|
||||
|
||||
if (field.has(PDFName.of('AA'))) {
|
||||
field.delete(PDFName.of('AA'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not process field for JS:', e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not process form fields for JS:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function removeEmbeddedFilesFromDoc(pdfDoc: PDFDocument) {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
|
||||
const namesRef = catalogDict.get(PDFName.of('Names'));
|
||||
if (namesRef) {
|
||||
try {
|
||||
const namesDict = pdfDoc.context.lookup(namesRef) as any;
|
||||
if (namesDict.has(PDFName.of('EmbeddedFiles'))) {
|
||||
namesDict.delete(PDFName.of('EmbeddedFiles'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not access Names/EmbeddedFiles:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
if (catalogDict.has(PDFName.of('EmbeddedFiles'))) {
|
||||
catalogDict.delete(PDFName.of('EmbeddedFiles'));
|
||||
}
|
||||
|
||||
const pages = pdfDoc.getPages();
|
||||
for (const page of pages) {
|
||||
try {
|
||||
const annotRefs = page.node.Annots()?.asArray() || [];
|
||||
const annotsToKeep = [];
|
||||
|
||||
for (const ref of annotRefs) {
|
||||
try {
|
||||
const annot = pdfDoc.context.lookup(ref) as any;
|
||||
const subtype = annot
|
||||
.get(PDFName.of('Subtype'))
|
||||
?.toString()
|
||||
.substring(1);
|
||||
|
||||
if (subtype !== 'FileAttachment') {
|
||||
annotsToKeep.push(ref);
|
||||
}
|
||||
} catch (e) {
|
||||
annotsToKeep.push(ref);
|
||||
}
|
||||
}
|
||||
|
||||
if (annotsToKeep.length !== annotRefs.length) {
|
||||
if (annotsToKeep.length > 0) {
|
||||
const newAnnotsArray = pdfDoc.context.obj(annotsToKeep);
|
||||
page.node.set(PDFName.of('Annots'), newAnnotsArray);
|
||||
} else {
|
||||
page.node.delete(PDFName.of('Annots'));
|
||||
}
|
||||
}
|
||||
} catch (pageError: any) {
|
||||
console.warn(
|
||||
`Could not process page for attachments: ${pageError.message}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (
|
||||
(pdfDoc as any).embeddedFiles &&
|
||||
(pdfDoc as any).embeddedFiles.length > 0
|
||||
) {
|
||||
(pdfDoc as any).embeddedFiles = [];
|
||||
}
|
||||
|
||||
if (catalogDict.has(PDFName.of('Collection'))) {
|
||||
catalogDict.delete(PDFName.of('Collection'));
|
||||
}
|
||||
}
|
||||
|
||||
function removeLayersFromDoc(pdfDoc: PDFDocument) {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
|
||||
if (catalogDict.has(PDFName.of('OCProperties'))) {
|
||||
catalogDict.delete(PDFName.of('OCProperties'));
|
||||
}
|
||||
|
||||
const pages = pdfDoc.getPages();
|
||||
for (const page of pages) {
|
||||
try {
|
||||
const pageDict = page.node;
|
||||
|
||||
if (pageDict.has(PDFName.of('OCProperties'))) {
|
||||
pageDict.delete(PDFName.of('OCProperties'));
|
||||
}
|
||||
|
||||
const resourcesRef = pageDict.get(PDFName.of('Resources'));
|
||||
if (resourcesRef) {
|
||||
try {
|
||||
const resourcesDict = pdfDoc.context.lookup(resourcesRef) as any;
|
||||
if (resourcesDict.has(PDFName.of('Properties'))) {
|
||||
resourcesDict.delete(PDFName.of('Properties'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not access Resources:', e.message);
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove page layers:', e.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function removeLinksFromDoc(pdfDoc: PDFDocument) {
|
||||
const pages = pdfDoc.getPages();
|
||||
|
||||
for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
|
||||
try {
|
||||
const page = pages[pageIndex];
|
||||
const pageDict = page.node;
|
||||
|
||||
const annotsRef = pageDict.get(PDFName.of('Annots'));
|
||||
if (!annotsRef) continue;
|
||||
|
||||
const annotsArray = pdfDoc.context.lookup(annotsRef) as any;
|
||||
const annotRefs = annotsArray.asArray();
|
||||
|
||||
if (annotRefs.length === 0) continue;
|
||||
|
||||
const annotsToKeep = [];
|
||||
let linksRemoved = 0;
|
||||
|
||||
for (const ref of annotRefs) {
|
||||
try {
|
||||
const annot = pdfDoc.context.lookup(ref) as any;
|
||||
const subtype = annot
|
||||
.get(PDFName.of('Subtype'))
|
||||
?.toString()
|
||||
.substring(1);
|
||||
|
||||
let isLink = false;
|
||||
|
||||
if (subtype === 'Link') {
|
||||
isLink = true;
|
||||
linksRemoved++;
|
||||
} else {
|
||||
const actionRef = annot.get(PDFName.of('A'));
|
||||
if (actionRef) {
|
||||
try {
|
||||
const actionDict = pdfDoc.context.lookup(actionRef) as any;
|
||||
const actionType = actionDict
|
||||
.get(PDFName.of('S'))
|
||||
?.toString()
|
||||
.substring(1);
|
||||
|
||||
if (
|
||||
actionType === 'URI' ||
|
||||
actionType === 'Launch' ||
|
||||
actionType === 'GoTo' ||
|
||||
actionType === 'GoToR'
|
||||
) {
|
||||
isLink = true;
|
||||
linksRemoved++;
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not read action:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
const dest = annot.get(PDFName.of('Dest'));
|
||||
if (dest && !isLink) {
|
||||
isLink = true;
|
||||
linksRemoved++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isLink) {
|
||||
annotsToKeep.push(ref);
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not process annotation:', e.message);
|
||||
annotsToKeep.push(ref);
|
||||
}
|
||||
}
|
||||
|
||||
if (linksRemoved > 0) {
|
||||
if (annotsToKeep.length > 0) {
|
||||
const newAnnotsArray = pdfDoc.context.obj(annotsToKeep);
|
||||
pageDict.set(PDFName.of('Annots'), newAnnotsArray);
|
||||
} else {
|
||||
pageDict.delete(PDFName.of('Annots'));
|
||||
}
|
||||
}
|
||||
} catch (pageError: any) {
|
||||
console.warn(
|
||||
`Could not process page ${pageIndex + 1} for links: ${pageError.message}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
const namesRef = catalogDict.get(PDFName.of('Names'));
|
||||
if (namesRef) {
|
||||
try {
|
||||
const namesDict = pdfDoc.context.lookup(namesRef) as any;
|
||||
if (namesDict.has(PDFName.of('Dests'))) {
|
||||
namesDict.delete(PDFName.of('Dests'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not access Names/Dests:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
if (catalogDict.has(PDFName.of('Dests'))) {
|
||||
catalogDict.delete(PDFName.of('Dests'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove named destinations:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function removeStructureTreeFromDoc(pdfDoc: PDFDocument) {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
|
||||
if (catalogDict.has(PDFName.of('StructTreeRoot'))) {
|
||||
catalogDict.delete(PDFName.of('StructTreeRoot'));
|
||||
}
|
||||
|
||||
const pages = pdfDoc.getPages();
|
||||
for (const page of pages) {
|
||||
try {
|
||||
const pageDict = page.node;
|
||||
if (pageDict.has(PDFName.of('StructParents'))) {
|
||||
pageDict.delete(PDFName.of('StructParents'));
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not remove page StructParents:', e.message);
|
||||
}
|
||||
}
|
||||
|
||||
if (catalogDict.has(PDFName.of('ParentTree'))) {
|
||||
catalogDict.delete(PDFName.of('ParentTree'));
|
||||
}
|
||||
}
|
||||
|
||||
function removeMarkInfoFromDoc(pdfDoc: PDFDocument) {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
|
||||
if (catalogDict.has(PDFName.of('MarkInfo'))) {
|
||||
catalogDict.delete(PDFName.of('MarkInfo'));
|
||||
}
|
||||
|
||||
if (catalogDict.has(PDFName.of('Marked'))) {
|
||||
catalogDict.delete(PDFName.of('Marked'));
|
||||
}
|
||||
}
|
||||
|
||||
function removeFontsFromDoc(pdfDoc: PDFDocument) {
|
||||
const pages = pdfDoc.getPages();
|
||||
|
||||
for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
|
||||
try {
|
||||
const page = pages[pageIndex];
|
||||
const pageDict = page.node;
|
||||
const resourcesRef = pageDict.get(PDFName.of('Resources'));
|
||||
|
||||
if (resourcesRef) {
|
||||
try {
|
||||
const resourcesDict = pdfDoc.context.lookup(resourcesRef) as any;
|
||||
|
||||
if (resourcesDict.has(PDFName.of('Font'))) {
|
||||
const fontRef = resourcesDict.get(PDFName.of('Font'));
|
||||
|
||||
try {
|
||||
const fontDict = pdfDoc.context.lookup(fontRef) as any;
|
||||
const fontKeys = fontDict.keys();
|
||||
|
||||
for (const fontKey of fontKeys) {
|
||||
try {
|
||||
const specificFontRef = fontDict.get(fontKey);
|
||||
const specificFont = pdfDoc.context.lookup(
|
||||
specificFontRef
|
||||
) as any;
|
||||
|
||||
if (specificFont.has(PDFName.of('FontDescriptor'))) {
|
||||
const descriptorRef = specificFont.get(
|
||||
PDFName.of('FontDescriptor')
|
||||
);
|
||||
const descriptor = pdfDoc.context.lookup(
|
||||
descriptorRef
|
||||
) as any;
|
||||
|
||||
const fontFileKeys = ['FontFile', 'FontFile2', 'FontFile3'];
|
||||
for (const key of fontFileKeys) {
|
||||
if (descriptor.has(PDFName.of(key))) {
|
||||
descriptor.delete(PDFName.of(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not process font ${fontKey}:`, e.message);
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not access font dictionary:', e.message);
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn('Could not access Resources for fonts:', e.message);
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
console.warn(
|
||||
`Could not remove fonts from page ${pageIndex + 1}:`,
|
||||
e.message
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if ((pdfDoc as any).fonts && (pdfDoc as any).fonts.length > 0) {
|
||||
(pdfDoc as any).fonts = [];
|
||||
}
|
||||
}
|
||||
|
||||
export async function sanitizePdf(
|
||||
pdfBytes: Uint8Array,
|
||||
options: SanitizeOptions
|
||||
): Promise<{ pdfDoc: PDFDocument; bytes: Uint8Array }> {
|
||||
const pdfDoc = await PDFDocument.load(pdfBytes);
|
||||
|
||||
if (options.flattenForms) {
|
||||
try {
|
||||
flattenFormsInDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not flatten forms: ${e.message}`);
|
||||
try {
|
||||
const catalogDict = (pdfDoc.catalog as any).dict;
|
||||
if (catalogDict.has(PDFName.of('AcroForm'))) {
|
||||
catalogDict.delete(PDFName.of('AcroForm'));
|
||||
}
|
||||
} catch (removeError: any) {
|
||||
console.warn('Could not remove AcroForm:', removeError.message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (options.removeMetadata) {
|
||||
removeMetadataFromDoc(pdfDoc);
|
||||
}
|
||||
|
||||
if (options.removeAnnotations) {
|
||||
removeAnnotationsFromDoc(pdfDoc);
|
||||
}
|
||||
|
||||
if (options.removeJavascript) {
|
||||
try {
|
||||
removeJavascriptFromDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not remove JavaScript: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.removeEmbeddedFiles) {
|
||||
try {
|
||||
removeEmbeddedFilesFromDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not remove embedded files: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.removeLayers) {
|
||||
try {
|
||||
removeLayersFromDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not remove layers: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.removeLinks) {
|
||||
try {
|
||||
removeLinksFromDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not remove links: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.removeStructureTree) {
|
||||
try {
|
||||
removeStructureTreeFromDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not remove structure tree: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.removeMarkInfo) {
|
||||
try {
|
||||
removeMarkInfoFromDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not remove MarkInfo: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
if (options.removeFonts) {
|
||||
try {
|
||||
removeFontsFromDoc(pdfDoc);
|
||||
} catch (e: any) {
|
||||
console.warn(`Could not remove fonts: ${e.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
const savedBytes = await pdfDoc.save();
|
||||
return { pdfDoc, bytes: new Uint8Array(savedBytes) };
|
||||
}
|
||||
@@ -1,45 +1,38 @@
|
||||
import { APP_VERSION } from '../../version.js';
|
||||
import { createLanguageSwitcher } from '../i18n/language-switcher.js';
|
||||
|
||||
// Handle simple mode footer replacement for tool pages
|
||||
// Handle simple mode adjustments for tool pages
|
||||
if (__SIMPLE_MODE__) {
|
||||
const footer = document.querySelector('footer');
|
||||
if (footer && !document.querySelector('[data-simple-footer]')) {
|
||||
footer.style.display = 'none';
|
||||
const sectionsToHide = [
|
||||
'How It Works',
|
||||
'Related PDF Tools',
|
||||
'Related Tools',
|
||||
'Frequently Asked Questions',
|
||||
];
|
||||
|
||||
const simpleFooter = document.createElement('footer');
|
||||
simpleFooter.className = 'mt-16 border-t-2 border-gray-700 py-8';
|
||||
simpleFooter.setAttribute('data-simple-footer', 'true');
|
||||
simpleFooter.innerHTML = `
|
||||
<div class="container mx-auto px-4">
|
||||
<div class="flex items-center justify-between flex-wrap gap-4">
|
||||
<div>
|
||||
<div class="flex items-center mb-2">
|
||||
<img src="/images/favicon.svg" alt="Bento PDF Logo" class="h-8 w-8 mr-2">
|
||||
<span class="text-white font-bold text-lg">BentoPDF</span>
|
||||
</div>
|
||||
<p class="text-gray-400 text-sm">
|
||||
© 2025 BentoPDF. All rights reserved.
|
||||
</p>
|
||||
<p class="text-gray-500 text-xs mt-2">
|
||||
Version <span id="app-version-simple">${APP_VERSION}</span>
|
||||
</p>
|
||||
</div>
|
||||
<div id="simple-mode-lang-switcher" class="flex-shrink-0"></div>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
document.body.appendChild(simpleFooter);
|
||||
|
||||
const langContainer = simpleFooter.querySelector('#simple-mode-lang-switcher');
|
||||
if (langContainer) {
|
||||
const switcher = createLanguageSwitcher();
|
||||
const dropdown = switcher.querySelector('div[role="menu"]');
|
||||
if (dropdown) {
|
||||
dropdown.classList.remove('mt-2');
|
||||
dropdown.classList.add('bottom-full', 'mb-2');
|
||||
document.querySelectorAll('section').forEach((section) => {
|
||||
const h2 = section.querySelector('h2');
|
||||
if (h2) {
|
||||
const heading = h2.textContent?.trim() || '';
|
||||
if (sectionsToHide.some((text) => heading.includes(text))) {
|
||||
(section as HTMLElement).style.display = 'none';
|
||||
}
|
||||
langContainer.appendChild(switcher);
|
||||
}
|
||||
});
|
||||
|
||||
const versionElement = document.getElementById('app-version-simple');
|
||||
if (versionElement) {
|
||||
versionElement.textContent = APP_VERSION;
|
||||
}
|
||||
|
||||
const langContainer = document.getElementById('simple-mode-lang-switcher');
|
||||
if (langContainer) {
|
||||
const switcher = createLanguageSwitcher();
|
||||
const dropdown = switcher.querySelector('div[role="menu"]');
|
||||
if (dropdown) {
|
||||
dropdown.classList.remove('mt-2');
|
||||
dropdown.classList.add('bottom-full', 'mb-2');
|
||||
}
|
||||
langContainer.appendChild(switcher);
|
||||
}
|
||||
}
|
||||
|
||||
135
src/js/utils/wasm-preloader.ts
Normal file
135
src/js/utils/wasm-preloader.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import { getLibreOfficeConverter } from './libreoffice-loader.js';
|
||||
import { isWasmAvailable, getWasmBaseUrl } from '../config/wasm-cdn-config.js';
|
||||
|
||||
export enum PreloadStatus {
|
||||
IDLE = 'idle',
|
||||
LOADING = 'loading',
|
||||
READY = 'ready',
|
||||
ERROR = 'error',
|
||||
UNAVAILABLE = 'unavailable',
|
||||
}
|
||||
|
||||
interface PreloadState {
|
||||
libreoffice: PreloadStatus;
|
||||
pymupdf: PreloadStatus;
|
||||
ghostscript: PreloadStatus;
|
||||
}
|
||||
|
||||
const preloadState: PreloadState = {
|
||||
libreoffice: PreloadStatus.IDLE,
|
||||
pymupdf: PreloadStatus.IDLE,
|
||||
ghostscript: PreloadStatus.IDLE,
|
||||
};
|
||||
|
||||
export function getPreloadStatus(): Readonly<PreloadState> {
|
||||
return { ...preloadState };
|
||||
}
|
||||
|
||||
async function preloadPyMuPDF(): Promise<void> {
|
||||
if (preloadState.pymupdf !== PreloadStatus.IDLE) return;
|
||||
|
||||
if (!isWasmAvailable('pymupdf')) {
|
||||
preloadState.pymupdf = PreloadStatus.UNAVAILABLE;
|
||||
console.log('[Preloader] PyMuPDF not configured, skipping preload');
|
||||
return;
|
||||
}
|
||||
|
||||
preloadState.pymupdf = PreloadStatus.LOADING;
|
||||
console.log('[Preloader] Starting PyMuPDF preload...');
|
||||
|
||||
try {
|
||||
const pymupdfBaseUrl = getWasmBaseUrl('pymupdf')!;
|
||||
const gsBaseUrl = getWasmBaseUrl('ghostscript');
|
||||
const normalizedUrl = pymupdfBaseUrl.endsWith('/')
|
||||
? pymupdfBaseUrl
|
||||
: `${pymupdfBaseUrl}/`;
|
||||
|
||||
const wrapperUrl = `${normalizedUrl}dist/index.js`;
|
||||
const module = await import(/* @vite-ignore */ wrapperUrl);
|
||||
|
||||
const pymupdfInstance = new module.PyMuPDF({
|
||||
assetPath: `${normalizedUrl}assets/`,
|
||||
ghostscriptUrl: gsBaseUrl || '',
|
||||
});
|
||||
await pymupdfInstance.load();
|
||||
preloadState.pymupdf = PreloadStatus.READY;
|
||||
console.log('[Preloader] PyMuPDF ready');
|
||||
} catch (e) {
|
||||
preloadState.pymupdf = PreloadStatus.ERROR;
|
||||
console.warn('[Preloader] PyMuPDF preload failed:', e);
|
||||
}
|
||||
}
|
||||
|
||||
async function preloadGhostscript(): Promise<void> {
|
||||
if (preloadState.ghostscript !== PreloadStatus.IDLE) return;
|
||||
|
||||
if (!isWasmAvailable('ghostscript')) {
|
||||
preloadState.ghostscript = PreloadStatus.UNAVAILABLE;
|
||||
console.log('[Preloader] Ghostscript not configured, skipping preload');
|
||||
return;
|
||||
}
|
||||
|
||||
preloadState.ghostscript = PreloadStatus.LOADING;
|
||||
console.log('[Preloader] Starting Ghostscript WASM preload...');
|
||||
|
||||
try {
|
||||
const { loadGsModule, setCachedGsModule } =
|
||||
await import('./ghostscript-loader.js');
|
||||
|
||||
const gsModule = await loadGsModule();
|
||||
setCachedGsModule(gsModule as any);
|
||||
preloadState.ghostscript = PreloadStatus.READY;
|
||||
console.log('[Preloader] Ghostscript WASM ready');
|
||||
} catch (e) {
|
||||
preloadState.ghostscript = PreloadStatus.ERROR;
|
||||
console.warn('[Preloader] Ghostscript preload failed:', e);
|
||||
}
|
||||
}
|
||||
|
||||
function scheduleIdleTask(task: () => Promise<void>): void {
|
||||
if ('requestIdleCallback' in window) {
|
||||
requestIdleCallback(() => task(), { timeout: 5000 });
|
||||
} else {
|
||||
setTimeout(() => task(), 1000);
|
||||
}
|
||||
}
|
||||
|
||||
export function startBackgroundPreload(): void {
|
||||
console.log('[Preloader] Scheduling background WASM preloads...');
|
||||
|
||||
const libreOfficePages = [
|
||||
'word-to-pdf',
|
||||
'excel-to-pdf',
|
||||
'ppt-to-pdf',
|
||||
'powerpoint-to-pdf',
|
||||
'docx-to-pdf',
|
||||
'xlsx-to-pdf',
|
||||
'pptx-to-pdf',
|
||||
'csv-to-pdf',
|
||||
'rtf-to-pdf',
|
||||
'odt-to-pdf',
|
||||
'ods-to-pdf',
|
||||
'odp-to-pdf',
|
||||
];
|
||||
|
||||
const currentPath = window.location.pathname;
|
||||
const isLibreOfficePage = libreOfficePages.some((page) =>
|
||||
currentPath.includes(page)
|
||||
);
|
||||
|
||||
if (isLibreOfficePage) {
|
||||
console.log(
|
||||
'[Preloader] Skipping preloads on LibreOffice page to save memory'
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
scheduleIdleTask(async () => {
|
||||
console.log('[Preloader] Starting sequential WASM preloads...');
|
||||
|
||||
await preloadPyMuPDF();
|
||||
await preloadGhostscript();
|
||||
|
||||
console.log('[Preloader] Sequential preloads complete');
|
||||
});
|
||||
}
|
||||
377
src/js/utils/wasm-provider.ts
Normal file
377
src/js/utils/wasm-provider.ts
Normal file
@@ -0,0 +1,377 @@
|
||||
export type WasmPackage = 'pymupdf' | 'ghostscript' | 'cpdf';
|
||||
|
||||
interface WasmProviderConfig {
|
||||
pymupdf?: string;
|
||||
ghostscript?: string;
|
||||
cpdf?: string;
|
||||
}
|
||||
|
||||
const STORAGE_KEY = 'bentopdf:wasm-providers';
|
||||
|
||||
const CDN_DEFAULTS: Record<WasmPackage, string> = {
|
||||
pymupdf: 'https://cdn.jsdelivr.net/npm/@bentopdf/pymupdf-wasm@0.11.16/',
|
||||
ghostscript: 'https://cdn.jsdelivr.net/npm/@bentopdf/gs-wasm/assets/',
|
||||
cpdf: 'https://cdn.jsdelivr.net/npm/coherentpdf/dist/',
|
||||
};
|
||||
|
||||
function envOrDefault(envVar: string | undefined, fallback: string): string {
|
||||
return envVar || fallback;
|
||||
}
|
||||
|
||||
const ENV_DEFAULTS: Record<WasmPackage, string> = {
|
||||
pymupdf: envOrDefault(
|
||||
import.meta.env.VITE_WASM_PYMUPDF_URL,
|
||||
CDN_DEFAULTS.pymupdf
|
||||
),
|
||||
ghostscript: envOrDefault(
|
||||
import.meta.env.VITE_WASM_GS_URL,
|
||||
CDN_DEFAULTS.ghostscript
|
||||
),
|
||||
cpdf: envOrDefault(import.meta.env.VITE_WASM_CPDF_URL, CDN_DEFAULTS.cpdf),
|
||||
};
|
||||
|
||||
class WasmProviderManager {
|
||||
private config: WasmProviderConfig;
|
||||
private validationCache: Map<WasmPackage, boolean> = new Map();
|
||||
|
||||
constructor() {
|
||||
this.config = this.loadConfig();
|
||||
}
|
||||
|
||||
private loadConfig(): WasmProviderConfig {
|
||||
try {
|
||||
const stored = localStorage.getItem(STORAGE_KEY);
|
||||
if (stored) {
|
||||
return JSON.parse(stored);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn(
|
||||
'[WasmProvider] Failed to load config from localStorage:',
|
||||
e
|
||||
);
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
private getEnvDefault(packageName: WasmPackage): string | undefined {
|
||||
return ENV_DEFAULTS[packageName];
|
||||
}
|
||||
|
||||
private saveConfig(): void {
|
||||
try {
|
||||
localStorage.setItem(STORAGE_KEY, JSON.stringify(this.config));
|
||||
} catch (e) {
|
||||
console.error('[WasmProvider] Failed to save config to localStorage:', e);
|
||||
}
|
||||
}
|
||||
|
||||
getUrl(packageName: WasmPackage): string | undefined {
|
||||
return this.config[packageName] || this.getEnvDefault(packageName);
|
||||
}
|
||||
|
||||
setUrl(packageName: WasmPackage, url: string): void {
|
||||
const normalizedUrl = url.endsWith('/') ? url : `${url}/`;
|
||||
this.config[packageName] = normalizedUrl;
|
||||
this.validationCache.delete(packageName);
|
||||
this.saveConfig();
|
||||
}
|
||||
|
||||
removeUrl(packageName: WasmPackage): void {
|
||||
delete this.config[packageName];
|
||||
this.validationCache.delete(packageName);
|
||||
this.saveConfig();
|
||||
}
|
||||
|
||||
isConfigured(packageName: WasmPackage): boolean {
|
||||
return !!(this.config[packageName] || this.getEnvDefault(packageName));
|
||||
}
|
||||
|
||||
isUserConfigured(packageName: WasmPackage): boolean {
|
||||
return !!this.config[packageName];
|
||||
}
|
||||
|
||||
hasEnvDefault(packageName: WasmPackage): boolean {
|
||||
return !!this.getEnvDefault(packageName);
|
||||
}
|
||||
|
||||
hasAnyProvider(): boolean {
|
||||
return (
|
||||
Object.keys(this.config).length > 0 ||
|
||||
Object.values(ENV_DEFAULTS).some(Boolean)
|
||||
);
|
||||
}
|
||||
|
||||
async validateUrl(
|
||||
packageName: WasmPackage,
|
||||
url?: string
|
||||
): Promise<{ valid: boolean; error?: string }> {
|
||||
const testUrl = url || this.config[packageName];
|
||||
if (!testUrl) {
|
||||
return { valid: false, error: 'No URL configured' };
|
||||
}
|
||||
|
||||
try {
|
||||
const parsedUrl = new URL(testUrl);
|
||||
if (!['http:', 'https:'].includes(parsedUrl.protocol)) {
|
||||
return {
|
||||
valid: false,
|
||||
error: 'URL must start with http:// or https://',
|
||||
};
|
||||
}
|
||||
} catch {
|
||||
return {
|
||||
valid: false,
|
||||
error:
|
||||
'Invalid URL format. Please enter a valid URL (e.g., https://example.com/wasm/)',
|
||||
};
|
||||
}
|
||||
|
||||
const normalizedUrl = testUrl.endsWith('/') ? testUrl : `${testUrl}/`;
|
||||
|
||||
try {
|
||||
const testFiles: Record<WasmPackage, string> = {
|
||||
pymupdf: 'dist/index.js',
|
||||
ghostscript: 'gs.js',
|
||||
cpdf: 'coherentpdf.browser.min.js',
|
||||
};
|
||||
|
||||
const testFile = testFiles[packageName];
|
||||
const fullUrl = `${normalizedUrl}${testFile}`;
|
||||
|
||||
const controller = new AbortController();
|
||||
const timeoutId = setTimeout(() => controller.abort(), 10000); // 10s
|
||||
|
||||
const response = await fetch(fullUrl, {
|
||||
method: 'GET',
|
||||
mode: 'cors',
|
||||
signal: controller.signal,
|
||||
});
|
||||
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
if (!response.ok) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `Could not find ${testFile} at the specified URL (HTTP ${response.status}). Make sure the file exists.`,
|
||||
};
|
||||
}
|
||||
|
||||
const reader = response.body?.getReader();
|
||||
if (reader) {
|
||||
try {
|
||||
await reader.read();
|
||||
reader.cancel();
|
||||
} catch {
|
||||
return {
|
||||
valid: false,
|
||||
error: `File exists but could not be read. Check CORS configuration.`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const contentType = response.headers.get('content-type');
|
||||
if (
|
||||
contentType &&
|
||||
!contentType.includes('javascript') &&
|
||||
!contentType.includes('application/octet-stream') &&
|
||||
!contentType.includes('text/')
|
||||
) {
|
||||
return {
|
||||
valid: false,
|
||||
error: `The URL returned unexpected content type: ${contentType}. Expected a JavaScript file.`,
|
||||
};
|
||||
}
|
||||
|
||||
if (!url || url === this.config[packageName]) {
|
||||
this.validationCache.set(packageName, true);
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
} catch (e: unknown) {
|
||||
const errorMessage = e instanceof Error ? e.message : 'Unknown error';
|
||||
|
||||
if (
|
||||
errorMessage.includes('Failed to fetch') ||
|
||||
errorMessage.includes('NetworkError')
|
||||
) {
|
||||
return {
|
||||
valid: false,
|
||||
error:
|
||||
'Network error: Could not connect to the URL. Check that the URL is correct and the server allows CORS requests.',
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
valid: false,
|
||||
error: `Network error: ${errorMessage}`,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
getAllProviders(): WasmProviderConfig {
|
||||
return {
|
||||
pymupdf: this.config.pymupdf || ENV_DEFAULTS.pymupdf,
|
||||
ghostscript: this.config.ghostscript || ENV_DEFAULTS.ghostscript,
|
||||
cpdf: this.config.cpdf || ENV_DEFAULTS.cpdf,
|
||||
};
|
||||
}
|
||||
|
||||
clearAll(): void {
|
||||
this.config = {};
|
||||
this.validationCache.clear();
|
||||
try {
|
||||
localStorage.removeItem(STORAGE_KEY);
|
||||
} catch (e) {
|
||||
console.error('[WasmProvider] Failed to clear localStorage:', e);
|
||||
}
|
||||
}
|
||||
|
||||
resetToDefaults(): void {
|
||||
this.clearAll();
|
||||
}
|
||||
|
||||
getPackageDisplayName(packageName: WasmPackage): string {
|
||||
const names: Record<WasmPackage, string> = {
|
||||
pymupdf: 'PyMuPDF (Document Processing)',
|
||||
ghostscript: 'Ghostscript (PDF/A Conversion)',
|
||||
cpdf: 'CoherentPDF (Bookmarks & Metadata)',
|
||||
};
|
||||
return names[packageName];
|
||||
}
|
||||
|
||||
getPackageFeatures(packageName: WasmPackage): string[] {
|
||||
const features: Record<WasmPackage, string[]> = {
|
||||
pymupdf: [
|
||||
'PDF to Text',
|
||||
'PDF to Markdown',
|
||||
'PDF to SVG',
|
||||
'PDF to Images (High Quality)',
|
||||
'PDF to DOCX',
|
||||
'PDF to Excel/CSV',
|
||||
'Extract Images',
|
||||
'Extract Tables',
|
||||
'EPUB/MOBI/FB2/XPS/CBZ to PDF',
|
||||
'Image Compression',
|
||||
'Deskew PDF',
|
||||
'PDF Layers',
|
||||
],
|
||||
ghostscript: ['PDF/A Conversion', 'Font to Outline'],
|
||||
cpdf: [
|
||||
'Merge PDF',
|
||||
'Alternate Merge',
|
||||
'Split by Bookmarks',
|
||||
'Table of Contents',
|
||||
'PDF to JSON',
|
||||
'JSON to PDF',
|
||||
'Add/Edit/Extract Attachments',
|
||||
'Edit Bookmarks',
|
||||
'PDF Metadata',
|
||||
],
|
||||
};
|
||||
return features[packageName];
|
||||
}
|
||||
}
|
||||
|
||||
export const WasmProvider = new WasmProviderManager();
|
||||
|
||||
export function showWasmRequiredDialog(
|
||||
packageName: WasmPackage,
|
||||
onConfigure?: () => void
|
||||
): void {
|
||||
const displayName = WasmProvider.getPackageDisplayName(packageName);
|
||||
const features = WasmProvider.getPackageFeatures(packageName);
|
||||
|
||||
// Create modal
|
||||
const overlay = document.createElement('div');
|
||||
overlay.className =
|
||||
'fixed inset-0 bg-black/60 backdrop-blur-sm z-50 flex items-center justify-center p-4';
|
||||
overlay.id = 'wasm-required-modal';
|
||||
|
||||
const modal = document.createElement('div');
|
||||
modal.className =
|
||||
'bg-gray-800 rounded-2xl max-w-md w-full shadow-2xl border border-gray-700';
|
||||
|
||||
modal.innerHTML = `
|
||||
<div class="p-6">
|
||||
<div class="flex items-center gap-3 mb-4">
|
||||
<div class="w-12 h-12 rounded-full bg-amber-500/20 flex items-center justify-center">
|
||||
<svg class="w-6 h-6 text-amber-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z"/>
|
||||
</svg>
|
||||
</div>
|
||||
<div>
|
||||
<h3 class="text-lg font-semibold text-white">Advanced Feature Required</h3>
|
||||
<p class="text-sm text-gray-400">External processing module needed</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p class="text-gray-300 mb-4">
|
||||
This feature requires <strong class="text-white">${displayName}</strong> to be configured.
|
||||
</p>
|
||||
|
||||
<div class="bg-gray-700/50 rounded-lg p-4 mb-4">
|
||||
<p class="text-sm text-gray-400 mb-2">Features enabled by this module:</p>
|
||||
<ul class="text-sm text-gray-300 space-y-1">
|
||||
${features
|
||||
.slice(0, 4)
|
||||
.map(
|
||||
(f) =>
|
||||
`<li class="flex items-center gap-2"><span class="text-green-400">✓</span> ${f}</li>`
|
||||
)
|
||||
.join('')}
|
||||
${features.length > 4 ? `<li class="text-gray-500">+ ${features.length - 4} more...</li>` : ''}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
<p class="text-xs text-gray-500 mb-4">
|
||||
This module is licensed under AGPL-3.0. By configuring it, you agree to its license terms.
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="border-t border-gray-700 p-4 flex gap-3">
|
||||
<button id="wasm-modal-cancel" class="flex-1 px-4 py-2.5 rounded-lg bg-gray-700 text-gray-300 hover:bg-gray-600 transition-colors font-medium">
|
||||
Cancel
|
||||
</button>
|
||||
<button id="wasm-modal-configure" class="flex-1 px-4 py-2.5 rounded-lg bg-gradient-to-r from-blue-600 to-blue-500 text-white hover:from-blue-500 hover:to-blue-400 transition-all font-medium">
|
||||
Configure
|
||||
</button>
|
||||
</div>
|
||||
`;
|
||||
|
||||
overlay.appendChild(modal);
|
||||
document.body.appendChild(overlay);
|
||||
|
||||
const cancelBtn = modal.querySelector('#wasm-modal-cancel');
|
||||
const configureBtn = modal.querySelector('#wasm-modal-configure');
|
||||
|
||||
const closeModal = () => {
|
||||
overlay.remove();
|
||||
};
|
||||
|
||||
cancelBtn?.addEventListener('click', closeModal);
|
||||
overlay.addEventListener('click', (e) => {
|
||||
if (e.target === overlay) closeModal();
|
||||
});
|
||||
|
||||
configureBtn?.addEventListener('click', () => {
|
||||
closeModal();
|
||||
if (onConfigure) {
|
||||
onConfigure();
|
||||
} else {
|
||||
window.location.href = `${import.meta.env.BASE_URL}wasm-settings.html`;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
export function requireWasm(
|
||||
packageName: WasmPackage,
|
||||
onAvailable?: () => void
|
||||
): boolean {
|
||||
if (WasmProvider.isConfigured(packageName)) {
|
||||
onAvailable?.();
|
||||
return true;
|
||||
}
|
||||
|
||||
showWasmRequiredDialog(packageName);
|
||||
return false;
|
||||
}
|
||||
196
src/js/utils/xml-to-pdf.ts
Normal file
196
src/js/utils/xml-to-pdf.ts
Normal file
@@ -0,0 +1,196 @@
|
||||
import { jsPDF } from 'jspdf';
|
||||
import autoTable from 'jspdf-autotable';
|
||||
|
||||
export interface XmlToPdfOptions {
|
||||
onProgress?: (percent: number, message: string) => void;
|
||||
}
|
||||
|
||||
interface jsPDFWithAutoTable extends jsPDF {
|
||||
lastAutoTable?: { finalY: number };
|
||||
}
|
||||
|
||||
export async function convertXmlToPdf(
|
||||
file: File,
|
||||
options?: XmlToPdfOptions
|
||||
): Promise<Blob> {
|
||||
const { onProgress } = options || {};
|
||||
|
||||
onProgress?.(10, 'Reading XML file...');
|
||||
const xmlText = await file.text();
|
||||
|
||||
onProgress?.(30, 'Parsing XML structure...');
|
||||
const parser = new DOMParser();
|
||||
const xmlDoc = parser.parseFromString(xmlText, 'text/xml');
|
||||
|
||||
const parseError = xmlDoc.querySelector('parsererror');
|
||||
if (parseError) {
|
||||
throw new Error('Invalid XML: ' + parseError.textContent);
|
||||
}
|
||||
|
||||
onProgress?.(50, 'Analyzing data structure...');
|
||||
|
||||
const doc: jsPDFWithAutoTable = new jsPDF({
|
||||
orientation: 'landscape',
|
||||
unit: 'mm',
|
||||
format: 'a4'
|
||||
});
|
||||
|
||||
const pageWidth = doc.internal.pageSize.getWidth();
|
||||
let yPosition = 20;
|
||||
|
||||
const root = xmlDoc.documentElement;
|
||||
const rootName = formatTitle(root.tagName);
|
||||
|
||||
doc.setFontSize(18);
|
||||
doc.setFont('helvetica', 'bold');
|
||||
doc.text(rootName, pageWidth / 2, yPosition, { align: 'center' });
|
||||
yPosition += 15;
|
||||
|
||||
onProgress?.(60, 'Generating formatted content...');
|
||||
|
||||
const children = Array.from(root.children);
|
||||
|
||||
if (children.length > 0) {
|
||||
const groups = groupByTagName(children);
|
||||
|
||||
for (const [groupName, elements] of Object.entries(groups)) {
|
||||
const { headers, rows } = extractTableData(elements);
|
||||
|
||||
if (headers.length > 0 && rows.length > 0) {
|
||||
if (Object.keys(groups).length > 1) {
|
||||
doc.setFontSize(14);
|
||||
doc.setFont('helvetica', 'bold');
|
||||
doc.text(formatTitle(groupName), 14, yPosition);
|
||||
yPosition += 8;
|
||||
}
|
||||
|
||||
autoTable(doc, {
|
||||
head: [headers.map(h => formatTitle(h))],
|
||||
body: rows,
|
||||
startY: yPosition,
|
||||
styles: {
|
||||
fontSize: 9,
|
||||
cellPadding: 4,
|
||||
overflow: 'linebreak',
|
||||
},
|
||||
headStyles: {
|
||||
fillColor: [79, 70, 229],
|
||||
textColor: 255,
|
||||
fontStyle: 'bold',
|
||||
},
|
||||
alternateRowStyles: {
|
||||
fillColor: [243, 244, 246],
|
||||
},
|
||||
margin: { top: 20, left: 14, right: 14 },
|
||||
theme: 'striped',
|
||||
didDrawPage: (data) => {
|
||||
yPosition = (data.cursor?.y || yPosition) + 10;
|
||||
}
|
||||
});
|
||||
|
||||
yPosition = (doc.lastAutoTable?.finalY || yPosition) + 15;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const kvPairs = extractKeyValuePairs(root);
|
||||
if (kvPairs.length > 0) {
|
||||
autoTable(doc, {
|
||||
head: [['Property', 'Value']],
|
||||
body: kvPairs,
|
||||
startY: yPosition,
|
||||
styles: {
|
||||
fontSize: 10,
|
||||
cellPadding: 5,
|
||||
},
|
||||
headStyles: {
|
||||
fillColor: [79, 70, 229],
|
||||
textColor: 255,
|
||||
fontStyle: 'bold',
|
||||
},
|
||||
columnStyles: {
|
||||
0: { fontStyle: 'bold', cellWidth: 60 },
|
||||
1: { cellWidth: 'auto' },
|
||||
},
|
||||
margin: { left: 14, right: 14 },
|
||||
theme: 'striped',
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
onProgress?.(90, 'Finalizing PDF...');
|
||||
|
||||
const pdfBlob = doc.output('blob');
|
||||
|
||||
onProgress?.(100, 'Complete!');
|
||||
return pdfBlob;
|
||||
}
|
||||
|
||||
|
||||
function groupByTagName(elements: Element[]): Record<string, Element[]> {
|
||||
const groups: Record<string, Element[]> = {};
|
||||
|
||||
for (const element of elements) {
|
||||
const tagName = element.tagName;
|
||||
if (!groups[tagName]) {
|
||||
groups[tagName] = [];
|
||||
}
|
||||
groups[tagName].push(element);
|
||||
}
|
||||
|
||||
return groups;
|
||||
}
|
||||
|
||||
function extractTableData(elements: Element[]): { headers: string[], rows: string[][] } {
|
||||
if (elements.length === 0) {
|
||||
return { headers: [], rows: [] };
|
||||
}
|
||||
|
||||
const headerSet = new Set<string>();
|
||||
for (const element of elements) {
|
||||
for (const child of Array.from(element.children)) {
|
||||
headerSet.add(child.tagName);
|
||||
}
|
||||
}
|
||||
const headers = Array.from(headerSet);
|
||||
|
||||
const rows: string[][] = [];
|
||||
for (const element of elements) {
|
||||
const row: string[] = [];
|
||||
for (const header of headers) {
|
||||
const child = element.querySelector(header);
|
||||
row.push(child?.textContent?.trim() || '');
|
||||
}
|
||||
rows.push(row);
|
||||
}
|
||||
|
||||
return { headers, rows };
|
||||
}
|
||||
|
||||
|
||||
function extractKeyValuePairs(element: Element): string[][] {
|
||||
const pairs: string[][] = [];
|
||||
|
||||
for (const child of Array.from(element.children)) {
|
||||
const key = child.tagName;
|
||||
const value = child.textContent?.trim() || '';
|
||||
if (value) {
|
||||
pairs.push([formatTitle(key), value]);
|
||||
}
|
||||
}
|
||||
|
||||
for (const attr of Array.from(element.attributes)) {
|
||||
pairs.push([formatTitle(attr.name), attr.value]);
|
||||
}
|
||||
|
||||
return pairs;
|
||||
}
|
||||
|
||||
|
||||
function formatTitle(tagName: string): string {
|
||||
return tagName
|
||||
.replace(/[_-]/g, ' ')
|
||||
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
||||
.split(' ')
|
||||
.map(word => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
|
||||
.join(' ');
|
||||
}
|
||||
Reference in New Issue
Block a user