Merge branch 'main' into add-spanish-translation
This commit is contained in:
@@ -693,6 +693,19 @@ export const categories = [
|
||||
subtitle:
|
||||
'Convert PDF to image-based PDF. Flatten layers and remove selectable text.',
|
||||
},
|
||||
{
|
||||
href: import.meta.env.BASE_URL + 'deskew-pdf.html',
|
||||
name: 'Deskew PDF',
|
||||
icon: 'ph-perspective',
|
||||
subtitle: 'Automatically straighten tilted scanned pages using OpenCV.',
|
||||
},
|
||||
{
|
||||
href: import.meta.env.BASE_URL + 'font-to-outline.html',
|
||||
name: 'Font to Outline',
|
||||
icon: 'ph-text-outdent',
|
||||
subtitle:
|
||||
'Convert all fonts to vector outlines for consistent rendering.',
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@@ -5,9 +5,11 @@ import HttpBackend from 'i18next-http-backend';
|
||||
// Supported languages
|
||||
export const supportedLanguages = [
|
||||
'en',
|
||||
'fr',
|
||||
'de',
|
||||
'es',
|
||||
'zh',
|
||||
'zh-TW',
|
||||
'vi',
|
||||
'tr',
|
||||
'id',
|
||||
@@ -17,9 +19,11 @@ export type SupportedLanguage = (typeof supportedLanguages)[number];
|
||||
|
||||
export const languageNames: Record<SupportedLanguage, string> = {
|
||||
en: 'English',
|
||||
fr: 'Français',
|
||||
de: 'Deutsch',
|
||||
es: 'Español',
|
||||
zh: '中文',
|
||||
"zh-TW": '繁體中文(台灣)',
|
||||
vi: 'Tiếng Việt',
|
||||
tr: 'Türkçe',
|
||||
id: 'Bahasa Indonesia',
|
||||
@@ -28,7 +32,8 @@ export const languageNames: Record<SupportedLanguage, string> = {
|
||||
|
||||
export const getLanguageFromUrl = (): SupportedLanguage => {
|
||||
const path = window.location.pathname;
|
||||
const langMatch = path.match(/^\/(en|de|es|zh|vi|tr|id|it)(?:\/|$)/);
|
||||
|
||||
const langMatch = path.match(/^\/(en|fr|es|de|zh|zh-TW|vi|tr|id|it)(?:\/|$)/);
|
||||
if (
|
||||
langMatch &&
|
||||
supportedLanguages.includes(langMatch[1] as SupportedLanguage)
|
||||
@@ -90,9 +95,9 @@ export const changeLanguage = (lang: SupportedLanguage): void => {
|
||||
const currentLang = getLanguageFromUrl();
|
||||
|
||||
let newPath: string;
|
||||
if (currentPath.match(/^\/(en|de|zh|vi|tr|id|it)\//)) {
|
||||
newPath = currentPath.replace(/^\/(en|de|zh|vi|tr|id|it)\//, `/${lang}/`);
|
||||
} else if (currentPath.match(/^\/(en|de|zh|vi|tr|id|it)$/)) {
|
||||
if (currentPath.match(/^\/(en|fr|de|zh|zh-TW|vi|tr|id|it)\//)) {
|
||||
newPath = currentPath.replace(/^\/(en|fr|de|zh|zh-TW|vi|tr|id|it)\//, `/${lang}/`);
|
||||
} else if (currentPath.match(/^\/(en|fr|de|zh|zh-TW|vi|tr|id|it)$/)) {
|
||||
newPath = `/${lang}`;
|
||||
} else {
|
||||
newPath = `/${lang}${currentPath}`;
|
||||
@@ -156,7 +161,7 @@ export const rewriteLinks = (): void => {
|
||||
return;
|
||||
}
|
||||
|
||||
if (href.match(/^\/(en|de|zh|vi|tr|id|it)\//)) {
|
||||
if (href.match(/^\/(en|fr|de|zh|zh-TW|vi|tr|id|it)\//)) {
|
||||
return;
|
||||
}
|
||||
let newHref: string;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
255
src/js/logic/deskew-pdf-page.ts
Normal file
255
src/js/logic/deskew-pdf-page.ts
Normal file
@@ -0,0 +1,255 @@
|
||||
import { PyMuPDF } from '@bentopdf/pymupdf-wasm';
|
||||
import { createIcons, icons } from 'lucide';
|
||||
import { downloadFile } from '../utils/helpers';
|
||||
|
||||
interface DeskewResult {
|
||||
totalPages: number;
|
||||
correctedPages: number;
|
||||
angles: number[];
|
||||
corrected: boolean[];
|
||||
}
|
||||
|
||||
let selectedFiles: File[] = [];
|
||||
let pymupdf: PyMuPDF | null = null;
|
||||
|
||||
function initPyMuPDF(): PyMuPDF {
|
||||
if (!pymupdf) {
|
||||
pymupdf = new PyMuPDF({
|
||||
assetPath: import.meta.env.BASE_URL + 'pymupdf-wasm/',
|
||||
});
|
||||
}
|
||||
return pymupdf;
|
||||
}
|
||||
|
||||
function showLoader(message: string): void {
|
||||
const loader = document.getElementById('loader-modal');
|
||||
const text = document.getElementById('loader-text');
|
||||
if (loader && text) {
|
||||
text.textContent = message;
|
||||
loader.classList.remove('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
function hideLoader(): void {
|
||||
const loader = document.getElementById('loader-modal');
|
||||
if (loader) {
|
||||
loader.classList.add('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
function showAlert(title: string, message: string): void {
|
||||
const modal = document.getElementById('alert-modal');
|
||||
const titleEl = document.getElementById('alert-title');
|
||||
const msgEl = document.getElementById('alert-message');
|
||||
if (modal && titleEl && msgEl) {
|
||||
titleEl.textContent = title;
|
||||
msgEl.textContent = message;
|
||||
modal.classList.remove('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
function updateFileDisplay(): void {
|
||||
const fileDisplayArea = document.getElementById('file-display-area');
|
||||
const fileControls = document.getElementById('file-controls');
|
||||
const deskewOptions = document.getElementById('deskew-options');
|
||||
const resultsArea = document.getElementById('results-area');
|
||||
|
||||
if (!fileDisplayArea || !fileControls || !deskewOptions || !resultsArea)
|
||||
return;
|
||||
|
||||
resultsArea.classList.add('hidden');
|
||||
|
||||
if (selectedFiles.length === 0) {
|
||||
fileDisplayArea.innerHTML = '';
|
||||
fileControls.classList.add('hidden');
|
||||
deskewOptions.classList.add('hidden');
|
||||
return;
|
||||
}
|
||||
|
||||
fileControls.classList.remove('hidden');
|
||||
deskewOptions.classList.remove('hidden');
|
||||
|
||||
fileDisplayArea.innerHTML = selectedFiles
|
||||
.map(
|
||||
(file, index) => `
|
||||
<div class="flex items-center justify-between bg-gray-700 p-3 rounded-lg">
|
||||
<div class="flex items-center gap-3">
|
||||
<i data-lucide="file-text" class="w-5 h-5 text-indigo-400"></i>
|
||||
<span class="text-gray-200 truncate max-w-xs">${file.name}</span>
|
||||
<span class="text-gray-500 text-sm">(${(file.size / 1024).toFixed(1)} KB)</span>
|
||||
</div>
|
||||
<button class="remove-file text-gray-400 hover:text-red-400" data-index="${index}">
|
||||
<i data-lucide="x" class="w-5 h-5"></i>
|
||||
</button>
|
||||
</div>
|
||||
`
|
||||
)
|
||||
.join('');
|
||||
|
||||
createIcons({ icons });
|
||||
|
||||
fileDisplayArea.querySelectorAll('.remove-file').forEach((btn) => {
|
||||
btn.addEventListener('click', (e) => {
|
||||
const index = parseInt(
|
||||
(e.currentTarget as HTMLElement).dataset.index || '0',
|
||||
10
|
||||
);
|
||||
selectedFiles.splice(index, 1);
|
||||
updateFileDisplay();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function displayResults(result: DeskewResult): void {
|
||||
const resultsArea = document.getElementById('results-area');
|
||||
const totalEl = document.getElementById('result-total');
|
||||
const correctedEl = document.getElementById('result-corrected');
|
||||
const anglesList = document.getElementById('angles-list');
|
||||
|
||||
if (!resultsArea || !totalEl || !correctedEl || !anglesList) return;
|
||||
|
||||
resultsArea.classList.remove('hidden');
|
||||
totalEl.textContent = result.totalPages.toString();
|
||||
correctedEl.textContent = result.correctedPages.toString();
|
||||
|
||||
anglesList.innerHTML = result.angles
|
||||
.map((angle, idx) => {
|
||||
const wasCorrected = result.corrected[idx];
|
||||
const color = wasCorrected ? 'text-green-400' : 'text-gray-400';
|
||||
const icon = wasCorrected ? 'check' : 'minus';
|
||||
return `
|
||||
<div class="flex items-center gap-2 text-sm py-1">
|
||||
<i data-lucide="${icon}" class="w-4 h-4 ${color}"></i>
|
||||
<span class="text-gray-300">Page ${idx + 1}:</span>
|
||||
<span class="${color}">${angle.toFixed(2)}°</span>
|
||||
${wasCorrected ? '<span class="text-green-400 text-xs">(corrected)</span>' : ''}
|
||||
</div>
|
||||
`;
|
||||
})
|
||||
.join('');
|
||||
|
||||
createIcons({ icons });
|
||||
}
|
||||
|
||||
async function processDeskew(): Promise<void> {
|
||||
if (selectedFiles.length === 0) {
|
||||
showAlert('No Files', 'Please select at least one PDF file.');
|
||||
return;
|
||||
}
|
||||
|
||||
const thresholdSelect = document.getElementById(
|
||||
'deskew-threshold'
|
||||
) as HTMLSelectElement;
|
||||
const dpiSelect = document.getElementById('deskew-dpi') as HTMLSelectElement;
|
||||
|
||||
const threshold = parseFloat(thresholdSelect?.value || '0.5');
|
||||
const dpi = parseInt(dpiSelect?.value || '150', 10);
|
||||
|
||||
showLoader('Initializing PyMuPDF...');
|
||||
|
||||
try {
|
||||
const pdf = initPyMuPDF();
|
||||
await pdf.load();
|
||||
|
||||
for (const file of selectedFiles) {
|
||||
showLoader(`Deskewing ${file.name}...`);
|
||||
|
||||
const { pdf: resultPdf, result } = await pdf.deskewPdf(file, {
|
||||
threshold,
|
||||
dpi,
|
||||
});
|
||||
|
||||
displayResults(result);
|
||||
|
||||
const filename = file.name.replace('.pdf', '_deskewed.pdf');
|
||||
downloadFile(resultPdf, filename);
|
||||
}
|
||||
|
||||
hideLoader();
|
||||
showAlert(
|
||||
'Success',
|
||||
`Deskewed ${selectedFiles.length} file(s). ${selectedFiles.length > 1 ? 'Downloads started for all files.' : ''}`
|
||||
);
|
||||
} catch (error) {
|
||||
hideLoader();
|
||||
console.error('Deskew error:', error);
|
||||
showAlert(
|
||||
'Error',
|
||||
`Failed to deskew PDF: ${error instanceof Error ? error.message : 'Unknown error'}`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
function initPage(): void {
|
||||
const fileInput = document.getElementById('file-input') as HTMLInputElement;
|
||||
const dropZone = document.getElementById('drop-zone');
|
||||
const addMoreBtn = document.getElementById('add-more-btn');
|
||||
const clearFilesBtn = document.getElementById('clear-files-btn');
|
||||
const processBtn = document.getElementById('process-btn');
|
||||
const alertOk = document.getElementById('alert-ok');
|
||||
const backBtn = document.getElementById('back-to-tools');
|
||||
|
||||
if (fileInput) {
|
||||
fileInput.addEventListener('change', () => {
|
||||
if (fileInput.files) {
|
||||
selectedFiles = [...selectedFiles, ...Array.from(fileInput.files)];
|
||||
updateFileDisplay();
|
||||
fileInput.value = '';
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (dropZone) {
|
||||
dropZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.add('bg-gray-700');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('dragleave', () => {
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
if (e.dataTransfer?.files) {
|
||||
const pdfFiles = Array.from(e.dataTransfer.files).filter(
|
||||
(f) => f.type === 'application/pdf'
|
||||
);
|
||||
selectedFiles = [...selectedFiles, ...pdfFiles];
|
||||
updateFileDisplay();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (addMoreBtn) {
|
||||
addMoreBtn.addEventListener('click', () => fileInput?.click());
|
||||
}
|
||||
|
||||
if (clearFilesBtn) {
|
||||
clearFilesBtn.addEventListener('click', () => {
|
||||
selectedFiles = [];
|
||||
updateFileDisplay();
|
||||
});
|
||||
}
|
||||
|
||||
if (processBtn) {
|
||||
processBtn.addEventListener('click', processDeskew);
|
||||
}
|
||||
|
||||
if (alertOk) {
|
||||
alertOk.addEventListener('click', () => {
|
||||
document.getElementById('alert-modal')?.classList.add('hidden');
|
||||
});
|
||||
}
|
||||
|
||||
if (backBtn) {
|
||||
backBtn.addEventListener('click', () => {
|
||||
window.location.href = '/';
|
||||
});
|
||||
}
|
||||
|
||||
createIcons({ icons });
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', initPage);
|
||||
@@ -1,14 +1,16 @@
|
||||
import PostalMime from 'postal-mime';
|
||||
import MsgReader from '@kenjiuno/msgreader';
|
||||
import { formatBytes, escapeHtml } from '../utils/helpers.js';
|
||||
import {
|
||||
formatBytes,
|
||||
escapeHtml,
|
||||
uint8ArrayToBase64,
|
||||
sanitizeEmailHtml,
|
||||
formatRawDate,
|
||||
} from '../utils/helpers.js';
|
||||
import type { EmailAttachment, ParsedEmail, EmailRenderOptions } from '@/types';
|
||||
|
||||
// Re-export types for convenience
|
||||
export type { EmailAttachment, ParsedEmail, EmailRenderOptions };
|
||||
|
||||
/**
|
||||
* Format email address without angle brackets for cleaner display
|
||||
*/
|
||||
function formatAddress(
|
||||
name: string | undefined,
|
||||
email: string | undefined
|
||||
@@ -172,80 +174,6 @@ export async function parseMsgFile(file: File): Promise<ParsedEmail> {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a raw RFC 2822 date string into a nicer human-readable format,
|
||||
* while preserving the original timezone and time.
|
||||
* Example input: "Sun, 8 Jan 2017 20:37:44 +0200"
|
||||
* Example output: "Sunday, January 8, 2017 at 8:37 PM (+0200)"
|
||||
*/
|
||||
function formatRawDate(raw: string): string {
|
||||
try {
|
||||
// Regex to parse RFC 2822 date parts: Day, DD Mon YYYY HH:MM:SS Timezone
|
||||
const match = raw.match(
|
||||
/([A-Za-z]{3}),\s+(\d{1,2})\s+([A-Za-z]{3})\s+(\d{4})\s+(\d{2}):(\d{2})(?::(\d{2}))?\s+([+-]\d{4})/
|
||||
);
|
||||
|
||||
if (match) {
|
||||
const [
|
||||
,
|
||||
dayAbbr,
|
||||
dom,
|
||||
monthAbbr,
|
||||
year,
|
||||
hoursStr,
|
||||
minsStr,
|
||||
secsStr,
|
||||
timezone,
|
||||
] = match;
|
||||
|
||||
// Map abbreviations to full names
|
||||
const days: Record<string, string> = {
|
||||
Sun: 'Sunday',
|
||||
Mon: 'Monday',
|
||||
Tue: 'Tuesday',
|
||||
Wed: 'Wednesday',
|
||||
Thu: 'Thursday',
|
||||
Fri: 'Friday',
|
||||
Sat: 'Saturday',
|
||||
};
|
||||
const months: Record<string, string> = {
|
||||
Jan: 'January',
|
||||
Feb: 'February',
|
||||
Mar: 'March',
|
||||
Apr: 'April',
|
||||
May: 'May',
|
||||
Jun: 'June',
|
||||
Jul: 'July',
|
||||
Aug: 'August',
|
||||
Sep: 'September',
|
||||
Oct: 'October',
|
||||
Nov: 'November',
|
||||
Dec: 'December',
|
||||
};
|
||||
|
||||
const fullDay = days[dayAbbr] || dayAbbr;
|
||||
const fullMonth = months[monthAbbr] || monthAbbr;
|
||||
|
||||
// Convert to 12-hour format manually
|
||||
let hours = parseInt(hoursStr, 10);
|
||||
const ampm = hours >= 12 ? 'PM' : 'AM';
|
||||
hours = hours % 12;
|
||||
hours = hours ? hours : 12; // the hour '0' should be '12'
|
||||
|
||||
// Format timezone: +0200 -> UTC+02:00
|
||||
const tzSign = timezone.substring(0, 1);
|
||||
const tzHours = timezone.substring(1, 3);
|
||||
const tzMins = timezone.substring(3, 5);
|
||||
const formattedTz = `UTC${tzSign}${tzHours}:${tzMins}`;
|
||||
|
||||
return `${fullDay}, ${fullMonth} ${dom}, ${year} at ${hours}:${minsStr} ${ampm} (${formattedTz})`;
|
||||
}
|
||||
} catch (e) {
|
||||
// Fallback to raw string if parsing fails
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace CID references in HTML with base64 data URIs
|
||||
*/
|
||||
@@ -263,23 +191,13 @@ function processInlineImages(
|
||||
}
|
||||
});
|
||||
|
||||
// Replace src="cid:..."
|
||||
return html.replace(/src=["']cid:([^"']+)["']/g, (match, cid) => {
|
||||
const att = cidMap.get(cid);
|
||||
if (att && att.content) {
|
||||
// Convert Uint8Array to base64
|
||||
let binary = '';
|
||||
const len = att.content.byteLength;
|
||||
for (let i = 0; i < len; i++) {
|
||||
binary += String.fromCharCode(att.content[i]);
|
||||
}
|
||||
const base64 =
|
||||
typeof btoa === 'function'
|
||||
? btoa(binary)
|
||||
: Buffer.from(binary, 'binary').toString('base64');
|
||||
const base64 = uint8ArrayToBase64(att.content);
|
||||
return `src="data:${att.contentType};base64,${base64}"`;
|
||||
}
|
||||
return match; // Keep original if not found
|
||||
return match;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -291,12 +209,12 @@ export function renderEmailToHtml(
|
||||
|
||||
let processedHtml = '';
|
||||
if (email.htmlBody) {
|
||||
processedHtml = processInlineImages(email.htmlBody, email.attachments);
|
||||
const sanitizedHtml = sanitizeEmailHtml(email.htmlBody);
|
||||
processedHtml = processInlineImages(sanitizedHtml, email.attachments);
|
||||
} else {
|
||||
processedHtml = `<pre style="white-space: pre-wrap; font-family: inherit; margin: 0;">${escapeHtml(email.textBody)}</pre>`;
|
||||
}
|
||||
|
||||
// Format date in a human-readable way
|
||||
let dateStr = 'Unknown Date';
|
||||
if (email.rawDateString) {
|
||||
dateStr = formatRawDate(email.rawDateString);
|
||||
@@ -329,7 +247,6 @@ export function renderEmailToHtml(
|
||||
`
|
||||
: '';
|
||||
|
||||
// Build CC/BCC rows
|
||||
let ccBccHtml = '';
|
||||
if (includeCcBcc) {
|
||||
if (email.cc.length > 0) {
|
||||
|
||||
222
src/js/logic/font-to-outline-page.ts
Normal file
222
src/js/logic/font-to-outline-page.ts
Normal file
@@ -0,0 +1,222 @@
|
||||
import { showAlert } from '../ui.js';
|
||||
import { downloadFile, formatBytes } from '../utils/helpers.js';
|
||||
import { convertFileToOutlines } from '../utils/ghostscript-loader.js';
|
||||
import { icons, createIcons } from 'lucide';
|
||||
import JSZip from 'jszip';
|
||||
|
||||
interface FontToOutlineState {
|
||||
files: File[];
|
||||
}
|
||||
|
||||
const pageState: FontToOutlineState = {
|
||||
files: [],
|
||||
};
|
||||
|
||||
function resetState() {
|
||||
pageState.files = [];
|
||||
|
||||
const fileDisplayArea = document.getElementById('file-display-area');
|
||||
if (fileDisplayArea) fileDisplayArea.innerHTML = '';
|
||||
|
||||
const toolOptions = document.getElementById('tool-options');
|
||||
if (toolOptions) toolOptions.classList.add('hidden');
|
||||
|
||||
const fileControls = document.getElementById('file-controls');
|
||||
if (fileControls) fileControls.classList.add('hidden');
|
||||
|
||||
const fileInput = document.getElementById('file-input') as HTMLInputElement;
|
||||
if (fileInput) fileInput.value = '';
|
||||
}
|
||||
|
||||
async function updateUI() {
|
||||
const fileDisplayArea = document.getElementById('file-display-area');
|
||||
const toolOptions = document.getElementById('tool-options');
|
||||
const fileControls = document.getElementById('file-controls');
|
||||
|
||||
if (!fileDisplayArea) return;
|
||||
|
||||
fileDisplayArea.innerHTML = '';
|
||||
|
||||
if (pageState.files.length > 0) {
|
||||
pageState.files.forEach((file, index) => {
|
||||
const fileDiv = document.createElement('div');
|
||||
fileDiv.className =
|
||||
'flex items-center justify-between bg-gray-700 p-3 rounded-lg text-sm';
|
||||
|
||||
const infoContainer = document.createElement('div');
|
||||
infoContainer.className = 'flex flex-col overflow-hidden';
|
||||
|
||||
const nameSpan = document.createElement('div');
|
||||
nameSpan.className = 'truncate font-medium text-gray-200 text-sm mb-1';
|
||||
nameSpan.textContent = file.name;
|
||||
|
||||
const metaSpan = document.createElement('div');
|
||||
metaSpan.className = 'text-xs text-gray-400';
|
||||
metaSpan.textContent = formatBytes(file.size);
|
||||
|
||||
infoContainer.append(nameSpan, metaSpan);
|
||||
|
||||
const removeBtn = document.createElement('button');
|
||||
removeBtn.className =
|
||||
'ml-4 text-red-400 hover:text-red-300 flex-shrink-0';
|
||||
removeBtn.innerHTML = '<i data-lucide="trash-2" class="w-4 h-4"></i>';
|
||||
removeBtn.onclick = function () {
|
||||
pageState.files.splice(index, 1);
|
||||
updateUI();
|
||||
};
|
||||
|
||||
fileDiv.append(infoContainer, removeBtn);
|
||||
fileDisplayArea.appendChild(fileDiv);
|
||||
});
|
||||
|
||||
createIcons({ icons });
|
||||
|
||||
if (toolOptions) toolOptions.classList.remove('hidden');
|
||||
if (fileControls) fileControls.classList.remove('hidden');
|
||||
} else {
|
||||
if (toolOptions) toolOptions.classList.add('hidden');
|
||||
if (fileControls) fileControls.classList.add('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
function handleFileSelect(files: FileList | null) {
|
||||
if (files && files.length > 0) {
|
||||
const pdfFiles = Array.from(files).filter(
|
||||
(f) =>
|
||||
f.type === 'application/pdf' || f.name.toLowerCase().endsWith('.pdf')
|
||||
);
|
||||
if (pdfFiles.length > 0) {
|
||||
pageState.files.push(...pdfFiles);
|
||||
updateUI();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function processFiles() {
|
||||
if (pageState.files.length === 0) {
|
||||
showAlert('No Files', 'Please select at least one PDF file.');
|
||||
return;
|
||||
}
|
||||
|
||||
const loaderModal = document.getElementById('loader-modal');
|
||||
const loaderText = document.getElementById('loader-text');
|
||||
|
||||
try {
|
||||
if (pageState.files.length === 1) {
|
||||
if (loaderModal) loaderModal.classList.remove('hidden');
|
||||
if (loaderText)
|
||||
loaderText.textContent = 'Converting fonts to outlines...';
|
||||
|
||||
const file = pageState.files[0];
|
||||
const resultBlob = await convertFileToOutlines(file, (msg) => {
|
||||
if (loaderText) loaderText.textContent = msg;
|
||||
});
|
||||
|
||||
const baseName = file.name.replace(/\.pdf$/i, '');
|
||||
downloadFile(resultBlob, `${baseName}_outlined.pdf`);
|
||||
if (loaderModal) loaderModal.classList.add('hidden');
|
||||
} else {
|
||||
if (loaderModal) loaderModal.classList.remove('hidden');
|
||||
if (loaderText) loaderText.textContent = 'Processing multiple PDFs...';
|
||||
|
||||
const zip = new JSZip();
|
||||
let processedCount = 0;
|
||||
|
||||
for (let i = 0; i < pageState.files.length; i++) {
|
||||
const file = pageState.files[i];
|
||||
if (loaderText)
|
||||
loaderText.textContent = `Processing ${i + 1}/${pageState.files.length}: ${file.name}...`;
|
||||
|
||||
try {
|
||||
const resultBlob = await convertFileToOutlines(file, () => {});
|
||||
const arrayBuffer = await resultBlob.arrayBuffer();
|
||||
const baseName = file.name.replace(/\.pdf$/i, '');
|
||||
zip.file(`${baseName}_outlined.pdf`, arrayBuffer);
|
||||
processedCount++;
|
||||
} catch (e) {
|
||||
console.error(`Error processing ${file.name}:`, e);
|
||||
}
|
||||
}
|
||||
|
||||
if (processedCount > 0) {
|
||||
const zipBlob = await zip.generateAsync({ type: 'blob' });
|
||||
downloadFile(zipBlob, 'outlined_pdfs.zip');
|
||||
showAlert(
|
||||
'Success',
|
||||
`Processed ${processedCount} PDFs.`,
|
||||
'success',
|
||||
() => {
|
||||
resetState();
|
||||
}
|
||||
);
|
||||
} else {
|
||||
showAlert('Error', 'No PDFs could be processed.');
|
||||
}
|
||||
if (loaderModal) loaderModal.classList.add('hidden');
|
||||
}
|
||||
} catch (e: unknown) {
|
||||
console.error(e);
|
||||
if (loaderModal) loaderModal.classList.add('hidden');
|
||||
const errorMessage =
|
||||
e instanceof Error ? e.message : 'An unexpected error occurred.';
|
||||
showAlert('Error', errorMessage);
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
const fileInput = document.getElementById('file-input') as HTMLInputElement;
|
||||
const dropZone = document.getElementById('drop-zone');
|
||||
const processBtn = document.getElementById('process-btn');
|
||||
const addMoreBtn = document.getElementById('add-more-btn');
|
||||
const clearFilesBtn = document.getElementById('clear-files-btn');
|
||||
const backBtn = document.getElementById('back-to-tools');
|
||||
|
||||
if (backBtn) {
|
||||
backBtn.addEventListener('click', function () {
|
||||
window.location.href = import.meta.env.BASE_URL;
|
||||
});
|
||||
}
|
||||
|
||||
if (fileInput && dropZone) {
|
||||
fileInput.addEventListener('change', function (e) {
|
||||
handleFileSelect((e.target as HTMLInputElement).files);
|
||||
});
|
||||
|
||||
dropZone.addEventListener('dragover', function (e) {
|
||||
e.preventDefault();
|
||||
dropZone.classList.add('bg-gray-700');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('dragleave', function (e) {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('drop', function (e) {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
handleFileSelect(e.dataTransfer?.files);
|
||||
});
|
||||
|
||||
fileInput.addEventListener('click', function () {
|
||||
fileInput.value = '';
|
||||
});
|
||||
}
|
||||
|
||||
if (processBtn) {
|
||||
processBtn.addEventListener('click', processFiles);
|
||||
}
|
||||
|
||||
if (addMoreBtn) {
|
||||
addMoreBtn.addEventListener('click', function () {
|
||||
fileInput.value = '';
|
||||
fileInput.click();
|
||||
});
|
||||
}
|
||||
|
||||
if (clearFilesBtn) {
|
||||
clearFilesBtn.addEventListener('click', function () {
|
||||
resetState();
|
||||
});
|
||||
}
|
||||
});
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -271,6 +271,8 @@ const init = async () => {
|
||||
'Remove Metadata': 'tools:removeMetadata',
|
||||
'Change Permissions': 'tools:changePermissions',
|
||||
'Email to PDF': 'tools:emailToPdf',
|
||||
'Font to Outline': 'tools:fontToOutline',
|
||||
'Deskew PDF': 'tools:deskewPdf',
|
||||
};
|
||||
|
||||
// Homepage-only tool grid rendering (not used on individual tool pages)
|
||||
|
||||
179
src/js/types/bookmark-pdf-type.ts
Normal file
179
src/js/types/bookmark-pdf-type.ts
Normal file
@@ -0,0 +1,179 @@
|
||||
import { PDFDocument as PDFLibDocument, PDFRef } from 'pdf-lib';
|
||||
import { PDFDocumentProxy, PageViewport } from 'pdfjs-dist';
|
||||
|
||||
// Core bookmark types
|
||||
export type BookmarkColor =
|
||||
| 'red'
|
||||
| 'blue'
|
||||
| 'green'
|
||||
| 'yellow'
|
||||
| 'purple'
|
||||
| null;
|
||||
export type BookmarkStyle = 'bold' | 'italic' | 'bold-italic' | null;
|
||||
|
||||
export interface BookmarkNode {
|
||||
id: number;
|
||||
title: string;
|
||||
page: number;
|
||||
children: BookmarkNode[];
|
||||
color: BookmarkColor | string;
|
||||
style: BookmarkStyle;
|
||||
destX: number | null;
|
||||
destY: number | null;
|
||||
zoom: string | null;
|
||||
}
|
||||
|
||||
export type BookmarkTree = BookmarkNode[];
|
||||
|
||||
// Modal system types
|
||||
export type ModalFieldType = 'text' | 'select' | 'destination' | 'preview';
|
||||
|
||||
export interface SelectOption {
|
||||
value: string;
|
||||
label: string;
|
||||
}
|
||||
|
||||
export interface BaseModalField {
|
||||
name: string;
|
||||
label: string;
|
||||
}
|
||||
|
||||
export interface TextModalField extends BaseModalField {
|
||||
type: 'text';
|
||||
placeholder?: string;
|
||||
}
|
||||
|
||||
export interface SelectModalField extends BaseModalField {
|
||||
type: 'select';
|
||||
options: SelectOption[];
|
||||
}
|
||||
|
||||
export interface DestinationModalField extends BaseModalField {
|
||||
type: 'destination';
|
||||
page?: number;
|
||||
maxPages?: number;
|
||||
}
|
||||
|
||||
export interface PreviewModalField {
|
||||
type: 'preview';
|
||||
label: string;
|
||||
}
|
||||
|
||||
export type ModalField =
|
||||
| TextModalField
|
||||
| SelectModalField
|
||||
| DestinationModalField
|
||||
| PreviewModalField;
|
||||
|
||||
export interface ModalResult {
|
||||
title?: string;
|
||||
color?: string;
|
||||
style?: string;
|
||||
destPage?: number | null;
|
||||
destX?: number | null;
|
||||
destY?: number | null;
|
||||
zoom?: string | null;
|
||||
[key: string]: string | number | null | undefined;
|
||||
}
|
||||
|
||||
export interface ModalDefaultValues {
|
||||
title?: string;
|
||||
color?: string;
|
||||
style?: string;
|
||||
destPage?: number;
|
||||
destX?: number | null;
|
||||
destY?: number | null;
|
||||
zoom?: string | null;
|
||||
[key: string]: string | number | null | undefined;
|
||||
}
|
||||
|
||||
// Destination picking types
|
||||
export type DestinationCallback = (
|
||||
page: number,
|
||||
pdfX: number,
|
||||
pdfY: number
|
||||
) => void;
|
||||
|
||||
export interface DestinationPickingState {
|
||||
isPickingDestination: boolean;
|
||||
currentPickingCallback: DestinationCallback | null;
|
||||
destinationMarker: HTMLDivElement | null;
|
||||
savedModalOverlay: HTMLDivElement | null;
|
||||
savedModal: HTMLDivElement | null;
|
||||
currentViewport: PageViewport | null;
|
||||
}
|
||||
|
||||
// State types
|
||||
export interface BookmarkEditorState {
|
||||
pdfLibDoc: PDFLibDocument | null;
|
||||
pdfJsDoc: PDFDocumentProxy | null;
|
||||
currentPage: number;
|
||||
currentZoom: number;
|
||||
originalFileName: string;
|
||||
bookmarkTree: BookmarkTree;
|
||||
history: BookmarkTree[];
|
||||
historyIndex: number;
|
||||
searchQuery: string;
|
||||
csvBookmarks: BookmarkTree | null;
|
||||
jsonBookmarks: BookmarkTree | null;
|
||||
batchMode: boolean;
|
||||
selectedBookmarks: Set<number>;
|
||||
collapsedNodes: Set<number>;
|
||||
}
|
||||
|
||||
// PDF outline types (from pdfjs-dist)
|
||||
export interface PDFOutlineItem {
|
||||
title: string;
|
||||
dest: string | unknown[] | null;
|
||||
items?: PDFOutlineItem[];
|
||||
color?: Uint8ClampedArray | [number, number, number];
|
||||
bold?: boolean;
|
||||
italic?: boolean;
|
||||
}
|
||||
|
||||
export interface FlattenedBookmark extends BookmarkNode {
|
||||
level: number;
|
||||
}
|
||||
|
||||
// Outline item for PDF creation
|
||||
export interface OutlineItem {
|
||||
ref: PDFRef;
|
||||
dict: {
|
||||
set: (key: unknown, value: unknown) => void;
|
||||
};
|
||||
}
|
||||
|
||||
// Color mapping types
|
||||
export type ColorClassMap = Record<string, string>;
|
||||
|
||||
export const COLOR_CLASSES: ColorClassMap = {
|
||||
red: 'bg-red-100 border-red-300',
|
||||
blue: 'bg-blue-100 border-blue-300',
|
||||
green: 'bg-green-100 border-green-300',
|
||||
yellow: 'bg-yellow-100 border-yellow-300',
|
||||
purple: 'bg-purple-100 border-purple-300',
|
||||
};
|
||||
|
||||
export const TEXT_COLOR_CLASSES: ColorClassMap = {
|
||||
red: 'text-red-600',
|
||||
blue: 'text-blue-600',
|
||||
green: 'text-green-600',
|
||||
yellow: 'text-yellow-600',
|
||||
purple: 'text-purple-600',
|
||||
};
|
||||
|
||||
export const HEX_COLOR_MAP: Record<string, string> = {
|
||||
red: '#dc2626',
|
||||
blue: '#2563eb',
|
||||
green: '#16a34a',
|
||||
yellow: '#ca8a04',
|
||||
purple: '#9333ea',
|
||||
};
|
||||
|
||||
export const PDF_COLOR_MAP: Record<string, [number, number, number]> = {
|
||||
red: [1.0, 0.0, 0.0],
|
||||
blue: [0.0, 0.0, 1.0],
|
||||
green: [0.0, 1.0, 0.0],
|
||||
yellow: [1.0, 1.0, 0.0],
|
||||
purple: [0.5, 0.0, 0.5],
|
||||
};
|
||||
@@ -46,3 +46,4 @@ export * from './pdf-to-zip-type.ts';
|
||||
export * from './sign-pdf-type.ts';
|
||||
export * from './add-watermark-type.ts';
|
||||
export * from './email-to-pdf-type.ts';
|
||||
export * from './bookmark-pdf-type.ts';
|
||||
|
||||
@@ -1,10 +1,46 @@
|
||||
export interface OcrWord {
|
||||
text: string;
|
||||
bbox: { x0: number; y0: number; x1: number; y1: number };
|
||||
confidence: number;
|
||||
text: string;
|
||||
bbox: { x0: number; y0: number; x1: number; y1: number };
|
||||
confidence: number;
|
||||
}
|
||||
|
||||
export interface OcrState {
|
||||
file: File | null;
|
||||
searchablePdfBytes: Uint8Array | null;
|
||||
file: File | null;
|
||||
searchablePdfBytes: Uint8Array | null;
|
||||
}
|
||||
|
||||
export interface BBox {
|
||||
x0: number; // left
|
||||
y0: number; // top (in hOCR coordinate system, origin at top-left)
|
||||
x1: number; // right
|
||||
y1: number; // bottom
|
||||
}
|
||||
|
||||
export interface Baseline {
|
||||
slope: number;
|
||||
intercept: number;
|
||||
}
|
||||
|
||||
export interface OcrLine {
|
||||
bbox: BBox;
|
||||
baseline: Baseline;
|
||||
textangle: number;
|
||||
words: OcrWord[];
|
||||
direction: 'ltr' | 'rtl';
|
||||
injectWordBreaks: boolean;
|
||||
}
|
||||
|
||||
export interface OcrPage {
|
||||
width: number;
|
||||
height: number;
|
||||
dpi: number;
|
||||
lines: OcrLine[];
|
||||
}
|
||||
|
||||
export interface WordTransform {
|
||||
x: number;
|
||||
y: number;
|
||||
fontSize: number;
|
||||
horizontalScale: number;
|
||||
rotation: number;
|
||||
}
|
||||
|
||||
@@ -42,7 +42,7 @@ export async function convertToPdfA(
|
||||
gs = cachedGsModule;
|
||||
} else {
|
||||
const gsBaseUrl = getWasmBaseUrl('ghostscript');
|
||||
gs = await loadWASM({
|
||||
gs = (await loadWASM({
|
||||
locateFile: (path: string) => {
|
||||
if (path.endsWith('.wasm')) {
|
||||
return gsBaseUrl + 'gs.wasm';
|
||||
@@ -51,7 +51,7 @@ export async function convertToPdfA(
|
||||
},
|
||||
print: (text: string) => console.log('[GS]', text),
|
||||
printErr: (text: string) => console.error('[GS Error]', text),
|
||||
}) as GhostscriptModule;
|
||||
})) as GhostscriptModule;
|
||||
cachedGsModule = gs;
|
||||
}
|
||||
|
||||
@@ -76,16 +76,24 @@ export async function convertToPdfA(
|
||||
const response = await fetchWasmFile('ghostscript', iccFileName);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Failed to fetch ICC profile: ${iccFileName}. Ensure it is in your assets folder.`);
|
||||
throw new Error(
|
||||
`Failed to fetch ICC profile: ${iccFileName}. Ensure it is in your assets folder.`
|
||||
);
|
||||
}
|
||||
|
||||
const iccData = new Uint8Array(await response.arrayBuffer());
|
||||
console.log('[Ghostscript] sRGB v2 ICC profile loaded:', iccData.length, 'bytes');
|
||||
console.log(
|
||||
'[Ghostscript] sRGB v2 ICC profile loaded:',
|
||||
iccData.length,
|
||||
'bytes'
|
||||
);
|
||||
|
||||
gs.FS.writeFile(iccPath, iccData);
|
||||
console.log('[Ghostscript] sRGB ICC profile written to FS:', iccPath);
|
||||
|
||||
const iccHex = Array.from(iccData).map(b => b.toString(16).padStart(2, '0')).join('');
|
||||
const iccHex = Array.from(iccData)
|
||||
.map((b) => b.toString(16).padStart(2, '0'))
|
||||
.join('');
|
||||
console.log('[Ghostscript] ICC profile hex length:', iccHex.length);
|
||||
|
||||
const pdfaSubtype = level === 'PDF/A-1b' ? '/GTS_PDFA1' : '/GTS_PDFA';
|
||||
@@ -114,7 +122,9 @@ export async function convertToPdfA(
|
||||
`;
|
||||
|
||||
gs.FS.writeFile(pdfaDefPath, pdfaPS);
|
||||
console.log('[Ghostscript] PDFA PostScript created with embedded ICC hex data');
|
||||
console.log(
|
||||
'[Ghostscript] PDFA PostScript created with embedded ICC hex data'
|
||||
);
|
||||
} catch (e) {
|
||||
console.error('[Ghostscript] Failed to setup PDF/A assets:', e);
|
||||
throw new Error('Conversion failed: could not create PDF/A definition');
|
||||
@@ -163,10 +173,26 @@ export async function convertToPdfA(
|
||||
console.log('[Ghostscript] Exit code:', exitCode);
|
||||
|
||||
if (exitCode !== 0) {
|
||||
try { gs.FS.unlink(inputPath); } catch { /* ignore */ }
|
||||
try { gs.FS.unlink(outputPath); } catch { /* ignore */ }
|
||||
try { gs.FS.unlink(iccPath); } catch { /* ignore */ }
|
||||
try { gs.FS.unlink(pdfaDefPath); } catch { /* ignore */ }
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(iccPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(pdfaDefPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`);
|
||||
}
|
||||
|
||||
@@ -182,14 +208,32 @@ export async function convertToPdfA(
|
||||
}
|
||||
|
||||
// Cleanup
|
||||
try { gs.FS.unlink(inputPath); } catch { /* ignore */ }
|
||||
try { gs.FS.unlink(outputPath); } catch { /* ignore */ }
|
||||
try { gs.FS.unlink(iccPath); } catch { /* ignore */ }
|
||||
try { gs.FS.unlink(pdfaDefPath); } catch { /* ignore */ }
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(iccPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
try {
|
||||
gs.FS.unlink(pdfaDefPath);
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
if (level !== 'PDF/A-1b') {
|
||||
onProgress?.('Post-processing for transparency compliance...');
|
||||
console.log('[Ghostscript] Adding Group dictionaries to pages for transparency compliance...');
|
||||
console.log(
|
||||
'[Ghostscript] Adding Group dictionaries to pages for transparency compliance...'
|
||||
);
|
||||
|
||||
try {
|
||||
output = await addPageGroupDictionaries(output);
|
||||
@@ -202,10 +246,12 @@ export async function convertToPdfA(
|
||||
return output;
|
||||
}
|
||||
|
||||
async function addPageGroupDictionaries(pdfData: Uint8Array): Promise<Uint8Array> {
|
||||
async function addPageGroupDictionaries(
|
||||
pdfData: Uint8Array
|
||||
): Promise<Uint8Array> {
|
||||
const pdfDoc = await PDFDocument.load(pdfData, {
|
||||
ignoreEncryption: true,
|
||||
updateMetadata: false
|
||||
updateMetadata: false,
|
||||
});
|
||||
|
||||
const catalog = pdfDoc.catalog;
|
||||
@@ -227,12 +273,22 @@ async function addPageGroupDictionaries(pdfData: Uint8Array): Promise<Uint8Array
|
||||
|
||||
if (currentCS instanceof PDFName) {
|
||||
const csName = currentCS.decodeText();
|
||||
if (csName === 'DeviceRGB' || csName === 'DeviceGray' || csName === 'DeviceCMYK') {
|
||||
const iccColorSpace = pdfDoc.context.obj([PDFName.of('ICCBased'), iccProfileRef]);
|
||||
if (
|
||||
csName === 'DeviceRGB' ||
|
||||
csName === 'DeviceGray' ||
|
||||
csName === 'DeviceCMYK'
|
||||
) {
|
||||
const iccColorSpace = pdfDoc.context.obj([
|
||||
PDFName.of('ICCBased'),
|
||||
iccProfileRef,
|
||||
]);
|
||||
groupDict.set(PDFName.of('CS'), iccColorSpace);
|
||||
}
|
||||
} else if (!currentCS) {
|
||||
const iccColorSpace = pdfDoc.context.obj([PDFName.of('ICCBased'), iccProfileRef]);
|
||||
const iccColorSpace = pdfDoc.context.obj([
|
||||
PDFName.of('ICCBased'),
|
||||
iccProfileRef,
|
||||
]);
|
||||
groupDict.set(PDFName.of('CS'), iccColorSpace);
|
||||
}
|
||||
};
|
||||
@@ -247,7 +303,10 @@ async function addPageGroupDictionaries(pdfData: Uint8Array): Promise<Uint8Array
|
||||
updateGroupCS(existingGroup);
|
||||
}
|
||||
} else if (iccProfileRef) {
|
||||
const colorSpace = pdfDoc.context.obj([PDFName.of('ICCBased'), iccProfileRef]);
|
||||
const colorSpace = pdfDoc.context.obj([
|
||||
PDFName.of('ICCBased'),
|
||||
iccProfileRef,
|
||||
]);
|
||||
const groupDict = pdfDoc.context.obj({
|
||||
Type: 'Group',
|
||||
S: 'Transparency',
|
||||
@@ -261,8 +320,12 @@ async function addPageGroupDictionaries(pdfData: Uint8Array): Promise<Uint8Array
|
||||
|
||||
if (iccProfileRef) {
|
||||
pdfDoc.context.enumerateIndirectObjects().forEach(([ref, obj]) => {
|
||||
if (obj instanceof PDFDict || (obj && typeof obj === 'object' && 'dict' in obj)) {
|
||||
const dict = 'dict' in obj ? (obj as { dict: PDFDict }).dict : obj as PDFDict;
|
||||
if (
|
||||
obj instanceof PDFDict ||
|
||||
(obj && typeof obj === 'object' && 'dict' in obj)
|
||||
) {
|
||||
const dict =
|
||||
'dict' in obj ? (obj as { dict: PDFDict }).dict : (obj as PDFDict);
|
||||
|
||||
const subtype = dict.get(PDFName.of('Subtype'));
|
||||
if (subtype instanceof PDFName && subtype.decodeText() === 'Form') {
|
||||
@@ -290,8 +353,100 @@ export async function convertFileToPdfA(
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdfData = new Uint8Array(arrayBuffer);
|
||||
const result = await convertToPdfA(pdfData, level, onProgress);
|
||||
// Copy to regular ArrayBuffer to avoid SharedArrayBuffer issues
|
||||
const copy = new Uint8Array(result.length);
|
||||
copy.set(result);
|
||||
return new Blob([copy], { type: 'application/pdf' });
|
||||
}
|
||||
}
|
||||
|
||||
export async function convertFontsToOutlines(
|
||||
pdfData: Uint8Array,
|
||||
onProgress?: (msg: string) => void
|
||||
): Promise<Uint8Array> {
|
||||
onProgress?.('Loading Ghostscript...');
|
||||
|
||||
let gs: GhostscriptModule;
|
||||
|
||||
if (cachedGsModule) {
|
||||
gs = cachedGsModule;
|
||||
} else {
|
||||
const gsBaseUrl = getWasmBaseUrl('ghostscript');
|
||||
gs = (await loadWASM({
|
||||
locateFile: (path: string) => {
|
||||
if (path.endsWith('.wasm')) {
|
||||
return gsBaseUrl + 'gs.wasm';
|
||||
}
|
||||
return path;
|
||||
},
|
||||
print: (text: string) => console.log('[GS]', text),
|
||||
printErr: (text: string) => console.error('[GS Error]', text),
|
||||
})) as GhostscriptModule;
|
||||
cachedGsModule = gs;
|
||||
}
|
||||
|
||||
const inputPath = '/tmp/input.pdf';
|
||||
const outputPath = '/tmp/output.pdf';
|
||||
|
||||
gs.FS.writeFile(inputPath, pdfData);
|
||||
|
||||
onProgress?.('Converting fonts to outlines...');
|
||||
|
||||
const args = [
|
||||
'-dNOSAFER',
|
||||
'-dBATCH',
|
||||
'-dNOPAUSE',
|
||||
'-sDEVICE=pdfwrite',
|
||||
'-dNoOutputFonts',
|
||||
'-dCompressPages=true',
|
||||
'-dAutoRotatePages=/None',
|
||||
`-sOutputFile=${outputPath}`,
|
||||
inputPath,
|
||||
];
|
||||
|
||||
let exitCode: number;
|
||||
try {
|
||||
exitCode = gs.callMain(args);
|
||||
} catch (e) {
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {}
|
||||
throw new Error(`Ghostscript threw an exception: ${e}`);
|
||||
}
|
||||
|
||||
if (exitCode !== 0) {
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {}
|
||||
throw new Error(`Ghostscript conversion failed with exit code ${exitCode}`);
|
||||
}
|
||||
|
||||
let output: Uint8Array;
|
||||
try {
|
||||
output = gs.FS.readFile(outputPath);
|
||||
} catch (e) {
|
||||
throw new Error('Ghostscript did not produce output file');
|
||||
}
|
||||
|
||||
try {
|
||||
gs.FS.unlink(inputPath);
|
||||
} catch {}
|
||||
try {
|
||||
gs.FS.unlink(outputPath);
|
||||
} catch {}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
export async function convertFileToOutlines(
|
||||
file: File,
|
||||
onProgress?: (msg: string) => void
|
||||
): Promise<Blob> {
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdfData = new Uint8Array(arrayBuffer);
|
||||
const result = await convertFontsToOutlines(pdfData, onProgress);
|
||||
const copy = new Uint8Array(result.length);
|
||||
copy.set(result);
|
||||
return new Blob([copy], { type: 'application/pdf' });
|
||||
}
|
||||
|
||||
@@ -306,3 +306,157 @@ export function escapeHtml(text: string): string {
|
||||
};
|
||||
return text.replace(/[&<>"']/g, (m) => map[m]);
|
||||
}
|
||||
|
||||
export function uint8ArrayToBase64(bytes: Uint8Array): string {
|
||||
const CHUNK_SIZE = 0x8000;
|
||||
const chunks: string[] = [];
|
||||
for (let i = 0; i < bytes.length; i += CHUNK_SIZE) {
|
||||
const chunk = bytes.subarray(i, Math.min(i + CHUNK_SIZE, bytes.length));
|
||||
chunks.push(String.fromCharCode(...chunk));
|
||||
}
|
||||
return btoa(chunks.join(''));
|
||||
}
|
||||
|
||||
export function sanitizeEmailHtml(html: string): string {
|
||||
if (!html) return html;
|
||||
|
||||
let sanitized = html;
|
||||
|
||||
sanitized = sanitized.replace(/<head[^>]*>[\s\S]*?<\/head>/gi, '');
|
||||
sanitized = sanitized.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
||||
sanitized = sanitized.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '');
|
||||
sanitized = sanitized.replace(/<link[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/\s+style=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(/\s+class=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(/\s+data-[a-z-]+=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(
|
||||
/<img[^>]*(?:width=["']1["'][^>]*height=["']1["']|height=["']1["'][^>]*width=["']1["'])[^>]*\/?>/gi,
|
||||
''
|
||||
);
|
||||
sanitized = sanitized.replace(
|
||||
/href=["']https?:\/\/[^"']*safelinks\.protection\.outlook\.com[^"']*url=([^&"']+)[^"']*["']/gi,
|
||||
(match, encodedUrl) => {
|
||||
try {
|
||||
const decodedUrl = decodeURIComponent(encodedUrl);
|
||||
return `href="${decodedUrl}"`;
|
||||
} catch {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
);
|
||||
sanitized = sanitized.replace(/\s+originalsrc=["'][^"']*["']/gi, '');
|
||||
sanitized = sanitized.replace(
|
||||
/href=["']([^"']{500,})["']/gi,
|
||||
(match, url) => {
|
||||
const baseUrl = url.split('?')[0];
|
||||
if (baseUrl && baseUrl.length < 200) {
|
||||
return `href="${baseUrl}"`;
|
||||
}
|
||||
return `href="${url.substring(0, 200)}"`;
|
||||
}
|
||||
);
|
||||
|
||||
sanitized = sanitized.replace(
|
||||
/\s+(cellpadding|cellspacing|bgcolor|border|valign|align|width|height|role|dir|id)=["'][^"']*["']/gi,
|
||||
''
|
||||
);
|
||||
sanitized = sanitized.replace(/<\/?table[^>]*>/gi, '<div>');
|
||||
sanitized = sanitized.replace(/<\/?tbody[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/<\/?thead[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/<\/?tfoot[^>]*>/gi, '');
|
||||
sanitized = sanitized.replace(/<tr[^>]*>/gi, '<div>');
|
||||
sanitized = sanitized.replace(/<\/tr>/gi, '</div>');
|
||||
sanitized = sanitized.replace(/<td[^>]*>/gi, '<span> ');
|
||||
sanitized = sanitized.replace(/<\/td>/gi, ' </span>');
|
||||
sanitized = sanitized.replace(/<th[^>]*>/gi, '<strong> ');
|
||||
sanitized = sanitized.replace(/<\/th>/gi, ' </strong>');
|
||||
sanitized = sanitized.replace(/<div>\s*<\/div>/gi, '');
|
||||
sanitized = sanitized.replace(/<span>\s*<\/span>/gi, '');
|
||||
sanitized = sanitized.replace(/(<div>)+/gi, '<div>');
|
||||
sanitized = sanitized.replace(/(<\/div>)+/gi, '</div>');
|
||||
sanitized = sanitized.replace(
|
||||
/<a[^>]*href=["']\s*["'][^>]*>([^<]*)<\/a>/gi,
|
||||
'$1'
|
||||
);
|
||||
|
||||
const MAX_HTML_SIZE = 100000;
|
||||
if (sanitized.length > MAX_HTML_SIZE) {
|
||||
const truncateAt = sanitized.lastIndexOf('</div>', MAX_HTML_SIZE);
|
||||
if (truncateAt > MAX_HTML_SIZE / 2) {
|
||||
sanitized = sanitized.substring(0, truncateAt) + '</div></body></html>';
|
||||
} else {
|
||||
sanitized = sanitized.substring(0, MAX_HTML_SIZE) + '...</body></html>';
|
||||
}
|
||||
}
|
||||
|
||||
return sanitized;
|
||||
}
|
||||
|
||||
/**
|
||||
* Formats a raw RFC 2822 date string into a nicer human-readable format,
|
||||
* while preserving the original timezone and time.
|
||||
* Example input: "Sun, 8 Jan 2017 20:37:44 +0200"
|
||||
* Example output: "Sunday, January 8, 2017 at 8:37 PM (+0200)"
|
||||
*/
|
||||
export function formatRawDate(raw: string): string {
|
||||
try {
|
||||
const match = raw.match(
|
||||
/([A-Za-z]{3}),\s+(\d{1,2})\s+([A-Za-z]{3})\s+(\d{4})\s+(\d{2}):(\d{2})(?::(\d{2}))?\s+([+-]\d{4})/
|
||||
);
|
||||
|
||||
if (match) {
|
||||
const [
|
||||
,
|
||||
dayAbbr,
|
||||
dom,
|
||||
monthAbbr,
|
||||
year,
|
||||
hoursStr,
|
||||
minsStr,
|
||||
secsStr,
|
||||
timezone,
|
||||
] = match;
|
||||
|
||||
const days: Record<string, string> = {
|
||||
Sun: 'Sunday',
|
||||
Mon: 'Monday',
|
||||
Tue: 'Tuesday',
|
||||
Wed: 'Wednesday',
|
||||
Thu: 'Thursday',
|
||||
Fri: 'Friday',
|
||||
Sat: 'Saturday',
|
||||
};
|
||||
const months: Record<string, string> = {
|
||||
Jan: 'January',
|
||||
Feb: 'February',
|
||||
Mar: 'March',
|
||||
Apr: 'April',
|
||||
May: 'May',
|
||||
Jun: 'June',
|
||||
Jul: 'July',
|
||||
Aug: 'August',
|
||||
Sep: 'September',
|
||||
Oct: 'October',
|
||||
Nov: 'November',
|
||||
Dec: 'December',
|
||||
};
|
||||
|
||||
const fullDay = days[dayAbbr] || dayAbbr;
|
||||
const fullMonth = months[monthAbbr] || monthAbbr;
|
||||
|
||||
let hours = parseInt(hoursStr, 10);
|
||||
const ampm = hours >= 12 ? 'PM' : 'AM';
|
||||
hours = hours % 12;
|
||||
hours = hours ? hours : 12;
|
||||
const tzSign = timezone.substring(0, 1);
|
||||
const tzHours = timezone.substring(1, 3);
|
||||
const tzMins = timezone.substring(3, 5);
|
||||
const formattedTz = `UTC${tzSign}${tzHours}:${tzMins}`;
|
||||
|
||||
return `${fullDay}, ${fullMonth} ${dom}, ${year} at ${hours}:${minsStr} ${ampm} (${formattedTz})`;
|
||||
}
|
||||
} catch (e) {
|
||||
// Fallback to raw string if parsing fails
|
||||
}
|
||||
return raw;
|
||||
}
|
||||
|
||||
266
src/js/utils/hocr-transform.ts
Normal file
266
src/js/utils/hocr-transform.ts
Normal file
@@ -0,0 +1,266 @@
|
||||
import {
|
||||
BBox,
|
||||
OcrLine,
|
||||
OcrPage,
|
||||
OcrWord,
|
||||
WordTransform,
|
||||
Baseline,
|
||||
} from '@/types';
|
||||
|
||||
const BBOX_PATTERN = /bbox\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/;
|
||||
const BASELINE_PATTERN = /baseline\s+([-+]?\d*\.?\d*)\s+([-+]?\d+)/;
|
||||
const TEXTANGLE_PATTERN = /textangle\s+([-+]?\d*\.?\d*)/;
|
||||
|
||||
export function parseBBox(title: string): BBox | null {
|
||||
const match = title.match(BBOX_PATTERN);
|
||||
if (!match) return null;
|
||||
|
||||
return {
|
||||
x0: parseInt(match[1], 10),
|
||||
y0: parseInt(match[2], 10),
|
||||
x1: parseInt(match[3], 10),
|
||||
y1: parseInt(match[4], 10),
|
||||
};
|
||||
}
|
||||
|
||||
export function parseBaseline(title: string): Baseline {
|
||||
const match = title.match(BASELINE_PATTERN);
|
||||
if (!match) {
|
||||
return { slope: 0, intercept: 0 };
|
||||
}
|
||||
|
||||
return {
|
||||
slope: parseFloat(match[1]) || 0,
|
||||
intercept: parseInt(match[2], 10) || 0,
|
||||
};
|
||||
}
|
||||
|
||||
export function parseTextangle(title: string): number {
|
||||
const match = title.match(TEXTANGLE_PATTERN);
|
||||
if (!match) return 0;
|
||||
return parseFloat(match[1]) || 0;
|
||||
}
|
||||
|
||||
export function getTextDirection(element: Element): 'ltr' | 'rtl' {
|
||||
const dir = element.getAttribute('dir');
|
||||
return dir === 'rtl' ? 'rtl' : 'ltr';
|
||||
}
|
||||
|
||||
export function shouldInjectWordBreaks(element: Element): boolean {
|
||||
const lang = element.getAttribute('lang') || '';
|
||||
const cjkLangs = ['chi_sim', 'chi_tra', 'jpn', 'kor', 'zh', 'ja', 'ko'];
|
||||
return !cjkLangs.includes(lang);
|
||||
}
|
||||
|
||||
export function normalizeText(text: string): string {
|
||||
return text.normalize('NFKC');
|
||||
}
|
||||
|
||||
export function parseHocrDocument(hocrText: string): OcrPage {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(hocrText, 'text/html');
|
||||
|
||||
let width = 0;
|
||||
let height = 0;
|
||||
const pageDiv = doc.querySelector('.ocr_page');
|
||||
if (pageDiv) {
|
||||
const title = pageDiv.getAttribute('title') || '';
|
||||
const bbox = parseBBox(title);
|
||||
if (bbox) {
|
||||
width = bbox.x1 - bbox.x0;
|
||||
height = bbox.y1 - bbox.y0;
|
||||
}
|
||||
}
|
||||
|
||||
const lines: OcrLine[] = [];
|
||||
|
||||
const lineClasses = [
|
||||
'ocr_line',
|
||||
'ocr_textfloat',
|
||||
'ocr_header',
|
||||
'ocr_caption',
|
||||
];
|
||||
const lineSelectors = lineClasses.map((c) => `.${c}`).join(', ');
|
||||
const lineElements = doc.querySelectorAll(lineSelectors);
|
||||
|
||||
if (lineElements.length > 0) {
|
||||
lineElements.forEach((lineEl) => {
|
||||
const line = parseHocrLine(lineEl);
|
||||
if (line && line.words.length > 0) {
|
||||
lines.push(line);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
const wordElements = doc.querySelectorAll('.ocrx_word');
|
||||
if (wordElements.length > 0) {
|
||||
const words = parseWordsFromElements(wordElements);
|
||||
if (words.length > 0) {
|
||||
const allBBox = calculateBoundingBox(words.map((w) => w.bbox));
|
||||
lines.push({
|
||||
bbox: allBBox,
|
||||
baseline: { slope: 0, intercept: 0 },
|
||||
textangle: 0,
|
||||
words,
|
||||
direction: 'ltr',
|
||||
injectWordBreaks: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { width, height, dpi: 72, lines };
|
||||
}
|
||||
|
||||
function parseHocrLine(lineElement: Element): OcrLine | null {
|
||||
const title = lineElement.getAttribute('title') || '';
|
||||
const bbox = parseBBox(title);
|
||||
|
||||
if (!bbox) return null;
|
||||
|
||||
const baseline = parseBaseline(title);
|
||||
const textangle = parseTextangle(title);
|
||||
|
||||
const parent = lineElement.closest('.ocr_par') || lineElement.parentElement;
|
||||
const direction = parent ? getTextDirection(parent) : 'ltr';
|
||||
const injectWordBreaks = parent ? shouldInjectWordBreaks(parent) : true;
|
||||
const wordElements = lineElement.querySelectorAll('.ocrx_word');
|
||||
const words = parseWordsFromElements(wordElements);
|
||||
|
||||
return {
|
||||
bbox,
|
||||
baseline,
|
||||
textangle,
|
||||
words,
|
||||
direction,
|
||||
injectWordBreaks,
|
||||
};
|
||||
}
|
||||
|
||||
function parseWordsFromElements(wordElements: NodeListOf<Element>): OcrWord[] {
|
||||
const words: OcrWord[] = [];
|
||||
|
||||
wordElements.forEach((wordEl) => {
|
||||
const title = wordEl.getAttribute('title') || '';
|
||||
const text = normalizeText((wordEl.textContent || '').trim());
|
||||
|
||||
if (!text) return;
|
||||
|
||||
const bbox = parseBBox(title);
|
||||
if (!bbox) return;
|
||||
|
||||
const confMatch = title.match(/x_wconf\s+(\d+)/);
|
||||
const confidence = confMatch ? parseInt(confMatch[1], 10) : 0;
|
||||
|
||||
words.push({
|
||||
text,
|
||||
bbox,
|
||||
confidence,
|
||||
});
|
||||
});
|
||||
|
||||
return words;
|
||||
}
|
||||
|
||||
function calculateBoundingBox(bboxes: BBox[]): BBox {
|
||||
if (bboxes.length === 0) {
|
||||
return { x0: 0, y0: 0, x1: 0, y1: 0 };
|
||||
}
|
||||
|
||||
return {
|
||||
x0: Math.min(...bboxes.map((b) => b.x0)),
|
||||
y0: Math.min(...bboxes.map((b) => b.y0)),
|
||||
x1: Math.max(...bboxes.map((b) => b.x1)),
|
||||
y1: Math.max(...bboxes.map((b) => b.y1)),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the transformation parameters for drawing a word
|
||||
*
|
||||
* pdf-lib doesn't support horizontal text scaling (Tz operator),
|
||||
* we calculate a font size that makes the text width exactly match the word bbox width.
|
||||
*
|
||||
* @param word - The word to position
|
||||
* @param line - The line containing this word
|
||||
* @param pageHeight - Height of the page in pixels (for coordinate flip)
|
||||
* @param fontWidthFn - Function to calculate text width at a given font size
|
||||
* @returns Transform parameters for pdf-lib
|
||||
*/
|
||||
export function calculateWordTransform(
|
||||
word: OcrWord,
|
||||
line: OcrLine,
|
||||
pageHeight: number,
|
||||
fontWidthFn: (text: string, fontSize: number) => number
|
||||
): WordTransform {
|
||||
const wordBBox = word.bbox;
|
||||
const wordWidth = wordBBox.x1 - wordBBox.x0;
|
||||
const wordHeight = wordBBox.y1 - wordBBox.y0;
|
||||
|
||||
let fontSize = wordHeight;
|
||||
const maxIterations = 10;
|
||||
|
||||
for (let i = 0; i < maxIterations; i++) {
|
||||
const currentWidth = fontWidthFn(word.text, fontSize);
|
||||
if (currentWidth <= 0) break;
|
||||
|
||||
const ratio = wordWidth / currentWidth;
|
||||
const newFontSize = fontSize * ratio;
|
||||
|
||||
if (Math.abs(newFontSize - fontSize) / fontSize < 0.01) {
|
||||
fontSize = newFontSize;
|
||||
break;
|
||||
}
|
||||
fontSize = newFontSize;
|
||||
}
|
||||
|
||||
fontSize = Math.max(1, Math.min(fontSize, wordHeight * 2));
|
||||
|
||||
const fontWidth = fontWidthFn(word.text, fontSize);
|
||||
const horizontalScale = fontWidth > 0 ? wordWidth / fontWidth : 1;
|
||||
|
||||
const slopeAngle = Math.atan(line.baseline.slope) * (180 / Math.PI);
|
||||
const rotation = -line.textangle + slopeAngle;
|
||||
|
||||
const x = wordBBox.x0;
|
||||
|
||||
// pdf-lib draws text from baseline, so we position at word bottom
|
||||
const y = pageHeight - wordBBox.y1;
|
||||
|
||||
return {
|
||||
x,
|
||||
y,
|
||||
fontSize,
|
||||
horizontalScale,
|
||||
rotation,
|
||||
};
|
||||
}
|
||||
|
||||
export function calculateSpaceTransform(
|
||||
prevWord: OcrWord,
|
||||
nextWord: OcrWord,
|
||||
line: OcrLine,
|
||||
pageHeight: number,
|
||||
spaceWidthFn: (fontSize: number) => number
|
||||
): { x: number; y: number; horizontalScale: number; fontSize: number } | null {
|
||||
const lineHeight = line.bbox.y1 - line.bbox.y0;
|
||||
const fontSize = Math.max(lineHeight + line.baseline.intercept, 1);
|
||||
|
||||
const gapStart = prevWord.bbox.x1;
|
||||
const gapEnd = nextWord.bbox.x0;
|
||||
const gapWidth = gapEnd - gapStart;
|
||||
|
||||
if (gapWidth <= 0) return null;
|
||||
|
||||
const spaceWidth = spaceWidthFn(fontSize);
|
||||
if (spaceWidth <= 0) return null;
|
||||
|
||||
const horizontalScale = gapWidth / spaceWidth;
|
||||
const baselineY = pageHeight - line.bbox.y1 - line.baseline.intercept;
|
||||
|
||||
return {
|
||||
x: gapStart,
|
||||
y: baselineY,
|
||||
horizontalScale,
|
||||
fontSize,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user