feat(ocr): add whitelist presets and improve UI for OCR tool
refactor: format code and improve mobile menu accessibility style: fix whitespace and formatting in multiple files docs: update documentation with better formatting and examples
This commit is contained in:
@@ -25,8 +25,8 @@ async function handleSinglePdfUpload(toolId, file) {
|
||||
showLoader('Loading PDF...');
|
||||
try {
|
||||
const pdfBytes = await readFileAsArrayBuffer(file);
|
||||
state.pdfDoc = await PDFLibDocument.load(pdfBytes as ArrayBuffer, {
|
||||
ignoreEncryption: true
|
||||
state.pdfDoc = await PDFLibDocument.load(pdfBytes as ArrayBuffer, {
|
||||
ignoreEncryption: true,
|
||||
});
|
||||
hideLoader();
|
||||
|
||||
@@ -324,7 +324,11 @@ async function handleSinglePdfUpload(toolId, file) {
|
||||
}
|
||||
|
||||
async function handleMultiFileUpload(toolId) {
|
||||
if (toolId === 'merge' || toolId === 'alternate-merge' || toolId === 'reverse-pages') {
|
||||
if (
|
||||
toolId === 'merge' ||
|
||||
toolId === 'alternate-merge' ||
|
||||
toolId === 'reverse-pages'
|
||||
) {
|
||||
const pdfFilesUnloaded: File[] = [];
|
||||
|
||||
state.files.forEach((file) => {
|
||||
|
||||
@@ -46,7 +46,7 @@ export async function addAttachments() {
|
||||
showAlert('Error', `Failed to attach files: ${error.message}`);
|
||||
} finally {
|
||||
hideLoader();
|
||||
clearAttachments();
|
||||
clearAttachments();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -103,7 +103,7 @@ export function setupAddAttachmentsTool() {
|
||||
|
||||
const nameSpan = document.createElement('span');
|
||||
nameSpan.className = 'truncate text-sm';
|
||||
nameSpan.textContent = file.name;
|
||||
nameSpan.textContent = file.name;
|
||||
|
||||
const sizeSpan = document.createElement('span');
|
||||
sizeSpan.className = 'text-xs text-gray-400';
|
||||
|
||||
@@ -113,11 +113,9 @@ async function runOCR() {
|
||||
tessjs_create_hocr: '1',
|
||||
});
|
||||
|
||||
if (whitelist.trim()) {
|
||||
await worker.setParameters({
|
||||
tessedit_char_whitelist: whitelist.trim(),
|
||||
});
|
||||
}
|
||||
await worker.setParameters({
|
||||
tessedit_char_whitelist: whitelist,
|
||||
});
|
||||
|
||||
// @ts-expect-error TS(2304) FIXME: Cannot find name 'pdfjsLib'.
|
||||
const pdf = await pdfjsLib.getDocument(
|
||||
@@ -144,7 +142,11 @@ async function runOCR() {
|
||||
binarizeCanvas(context);
|
||||
}
|
||||
|
||||
const result = await worker.recognize(canvas, {}, { text: true, hocr: true });
|
||||
const result = await worker.recognize(
|
||||
canvas,
|
||||
{},
|
||||
{ text: true, hocr: true }
|
||||
);
|
||||
const data = result.data;
|
||||
const newPage = newPdfDoc.addPage([viewport.width, viewport.height]);
|
||||
const pngImageBytes = await new Promise((resolve) =>
|
||||
@@ -281,9 +283,51 @@ export function setupOcrTool() {
|
||||
);
|
||||
const processBtn = document.getElementById('process-btn');
|
||||
|
||||
// Whitelist presets
|
||||
const whitelistPresets: Record<string, string> = {
|
||||
alphanumeric:
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-\'"',
|
||||
'numbers-currency': '0123456789$€£¥.,- ',
|
||||
'letters-only': 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ',
|
||||
'numbers-only': '0123456789',
|
||||
invoice: '0123456789$.,/-#: ',
|
||||
forms:
|
||||
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,()-_/@#:',
|
||||
};
|
||||
|
||||
// Handle whitelist preset selection
|
||||
const presetSelect = document.getElementById(
|
||||
'whitelist-preset'
|
||||
) as HTMLSelectElement;
|
||||
const whitelistInput = document.getElementById(
|
||||
'ocr-whitelist'
|
||||
) as HTMLInputElement;
|
||||
|
||||
presetSelect?.addEventListener('change', (e) => {
|
||||
const preset = (e.target as HTMLSelectElement).value;
|
||||
if (preset && preset !== 'custom') {
|
||||
whitelistInput.value = whitelistPresets[preset];
|
||||
whitelistInput.disabled = true;
|
||||
} else {
|
||||
whitelistInput.disabled = false;
|
||||
if (preset === '') {
|
||||
whitelistInput.value = '';
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// Handle details toggle icon rotation
|
||||
document.querySelectorAll('details').forEach((details) => {
|
||||
details.addEventListener('toggle', () => {
|
||||
const icon = details.querySelector('.details-icon') as HTMLElement;
|
||||
if (icon) {
|
||||
icon.style.transform = details.open ? 'rotate(180deg)' : 'rotate(0deg)';
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
langSearch.addEventListener('input', () => {
|
||||
// @ts-expect-error TS(2339) FIXME: Property 'value' does not exist on type 'HTMLEleme... Remove this comment to see the full error message
|
||||
const searchTerm = langSearch.value.toLowerCase();
|
||||
const searchTerm = (langSearch as HTMLInputElement).value.toLowerCase();
|
||||
langList.querySelectorAll('label').forEach((label) => {
|
||||
label.style.display = label.textContent.toLowerCase().includes(searchTerm)
|
||||
? ''
|
||||
@@ -291,19 +335,14 @@ export function setupOcrTool() {
|
||||
});
|
||||
});
|
||||
|
||||
// Update the display of selected languages
|
||||
langList.addEventListener('change', () => {
|
||||
const selected = Array.from(
|
||||
langList.querySelectorAll('.lang-checkbox:checked')
|
||||
)
|
||||
// @ts-expect-error TS(2339) FIXME: Property 'value' does not exist on type 'Element'.
|
||||
.map((cb) => tesseractLanguages[cb.value]);
|
||||
).map((cb) => tesseractLanguages[(cb as HTMLInputElement).value]);
|
||||
selectedLangsDisplay.textContent =
|
||||
selected.length > 0 ? selected.join(', ') : 'None';
|
||||
// @ts-expect-error TS(2339) FIXME: Property 'disabled' does not exist on type 'HTMLEl... Remove this comment to see the full error message
|
||||
processBtn.disabled = selected.length === 0;
|
||||
(processBtn as HTMLButtonElement).disabled = selected.length === 0;
|
||||
});
|
||||
|
||||
// Attach the main OCR function to the process button
|
||||
processBtn.addEventListener('click', runOCR);
|
||||
}
|
||||
|
||||
@@ -6,7 +6,9 @@ import { PDFDocument as PDFLibDocument } from 'pdf-lib';
|
||||
import JSZip from 'jszip';
|
||||
|
||||
export async function reversePages() {
|
||||
const pdfDocs = state.files.filter((file: File) => file.type === 'application/pdf');
|
||||
const pdfDocs = state.files.filter(
|
||||
(file: File) => file.type === 'application/pdf'
|
||||
);
|
||||
if (!pdfDocs.length) {
|
||||
showAlert('Error', 'PDF not loaded.');
|
||||
return;
|
||||
@@ -16,7 +18,7 @@ export async function reversePages() {
|
||||
const zip = new JSZip();
|
||||
for (let j = 0; j < pdfDocs.length; j++) {
|
||||
const file = pdfDocs[j];
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
const pdfDoc = await PDFLibDocument.load(arrayBuffer);
|
||||
const newPdf = await PDFLibDocument.create();
|
||||
const pageCount = pdfDoc.getPageCount();
|
||||
|
||||
@@ -21,7 +21,9 @@ const hideBrandingSections = () => {
|
||||
featuresSection.style.display = 'none';
|
||||
}
|
||||
|
||||
const securitySection = document.getElementById('security-compliance-section');
|
||||
const securitySection = document.getElementById(
|
||||
'security-compliance-section'
|
||||
);
|
||||
if (securitySection) {
|
||||
securitySection.style.display = 'none';
|
||||
}
|
||||
@@ -47,7 +49,7 @@ const hideBrandingSections = () => {
|
||||
}
|
||||
|
||||
const sectionDividers = document.querySelectorAll('.section-divider');
|
||||
sectionDividers.forEach(divider => {
|
||||
sectionDividers.forEach((divider) => {
|
||||
(divider as HTMLElement).style.display = 'none';
|
||||
});
|
||||
|
||||
|
||||
@@ -1,48 +1,53 @@
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const mobileMenuButton = document.getElementById('mobile-menu-button');
|
||||
const mobileMenu = document.getElementById('mobile-menu');
|
||||
const menuIcon = document.getElementById('menu-icon');
|
||||
const closeIcon = document.getElementById('close-icon');
|
||||
const mobileMenuButton = document.getElementById('mobile-menu-button');
|
||||
const mobileMenu = document.getElementById('mobile-menu');
|
||||
const menuIcon = document.getElementById('menu-icon');
|
||||
const closeIcon = document.getElementById('close-icon');
|
||||
|
||||
if (mobileMenuButton && mobileMenu && menuIcon && closeIcon) {
|
||||
// Toggle menu on button click
|
||||
mobileMenuButton.addEventListener('click', () => {
|
||||
const isExpanded = mobileMenuButton.getAttribute('aria-expanded') === 'true';
|
||||
|
||||
// Toggle menu visibility
|
||||
mobileMenu.classList.toggle('hidden');
|
||||
|
||||
// Toggle icons
|
||||
menuIcon.classList.toggle('hidden');
|
||||
closeIcon.classList.toggle('hidden');
|
||||
|
||||
// Update aria-expanded for accessibility
|
||||
mobileMenuButton.setAttribute('aria-expanded', (!isExpanded).toString());
|
||||
});
|
||||
if (mobileMenuButton && mobileMenu && menuIcon && closeIcon) {
|
||||
// Toggle menu on button click
|
||||
mobileMenuButton.addEventListener('click', () => {
|
||||
const isExpanded =
|
||||
mobileMenuButton.getAttribute('aria-expanded') === 'true';
|
||||
|
||||
// Close menu when clicking on a link
|
||||
const mobileLinks = mobileMenu.querySelectorAll('a');
|
||||
mobileLinks.forEach(link => {
|
||||
link.addEventListener('click', () => {
|
||||
mobileMenu.classList.add('hidden');
|
||||
menuIcon.classList.remove('hidden');
|
||||
closeIcon.classList.add('hidden');
|
||||
mobileMenuButton.setAttribute('aria-expanded', 'false');
|
||||
});
|
||||
});
|
||||
// Toggle menu visibility
|
||||
mobileMenu.classList.toggle('hidden');
|
||||
|
||||
// Close menu when clicking outside
|
||||
document.addEventListener('click', (event) => {
|
||||
const target = event.target as Node;
|
||||
const isClickInsideMenu = mobileMenu.contains(target);
|
||||
const isClickOnButton = mobileMenuButton.contains(target);
|
||||
|
||||
if (!isClickInsideMenu && !isClickOnButton && !mobileMenu.classList.contains('hidden')) {
|
||||
mobileMenu.classList.add('hidden');
|
||||
menuIcon.classList.remove('hidden');
|
||||
closeIcon.classList.add('hidden');
|
||||
mobileMenuButton.setAttribute('aria-expanded', 'false');
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
// Toggle icons
|
||||
menuIcon.classList.toggle('hidden');
|
||||
closeIcon.classList.toggle('hidden');
|
||||
|
||||
// Update aria-expanded for accessibility
|
||||
mobileMenuButton.setAttribute('aria-expanded', (!isExpanded).toString());
|
||||
});
|
||||
|
||||
// Close menu when clicking on a link
|
||||
const mobileLinks = mobileMenu.querySelectorAll('a');
|
||||
mobileLinks.forEach((link) => {
|
||||
link.addEventListener('click', () => {
|
||||
mobileMenu.classList.add('hidden');
|
||||
menuIcon.classList.remove('hidden');
|
||||
closeIcon.classList.add('hidden');
|
||||
mobileMenuButton.setAttribute('aria-expanded', 'false');
|
||||
});
|
||||
});
|
||||
|
||||
// Close menu when clicking outside
|
||||
document.addEventListener('click', (event) => {
|
||||
const target = event.target as Node;
|
||||
const isClickInsideMenu = mobileMenu.contains(target);
|
||||
const isClickOnButton = mobileMenuButton.contains(target);
|
||||
|
||||
if (
|
||||
!isClickInsideMenu &&
|
||||
!isClickOnButton &&
|
||||
!mobileMenu.classList.contains('hidden')
|
||||
) {
|
||||
mobileMenu.classList.add('hidden');
|
||||
menuIcon.classList.remove('hidden');
|
||||
closeIcon.classList.add('hidden');
|
||||
mobileMenuButton.setAttribute('aria-expanded', 'false');
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
36
src/js/ui.ts
36
src/js/ui.ts
@@ -1367,6 +1367,17 @@ export const toolTemplates = {
|
||||
'ocr-pdf': () => `
|
||||
<h2 class="text-2xl font-bold text-white mb-4">OCR PDF</h2>
|
||||
<p class="mb-6 text-gray-400">Convert scanned PDFs into searchable documents. Select one or more languages present in your file for the best results.</p>
|
||||
|
||||
<div class="p-3 bg-gray-900 rounded-lg border border-gray-700 mb-6">
|
||||
<p class="text-sm text-gray-300"><strong class="text-white">How it works:</strong></p>
|
||||
<ul class="list-disc list-inside text-xs text-gray-400 mt-1 space-y-1">
|
||||
<li><strong class="text-white">Extract Text:</strong> Uses Tesseract OCR to recognize text from scanned images or PDFs.</li>
|
||||
<li><strong class="text-white">Searchable Output:</strong> Creates a new PDF with an invisible text layer, making your document fully searchable while preserving the original appearance.</li>
|
||||
<li><strong class="text-white">Character Filtering:</strong> Use whitelists to filter out unwanted characters and improve accuracy for specific document types (invoices, forms, etc.).</li>
|
||||
<li><strong class="text-white">Multi-language Support:</strong> Select multiple languages for documents containing mixed language content.</li>
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
${createFileInputHTML()}
|
||||
<div id="file-display-area" class="mt-4 space-y-2"></div>
|
||||
|
||||
@@ -1393,7 +1404,10 @@ export const toolTemplates = {
|
||||
|
||||
<!-- Advanced settings section -->
|
||||
<details class="bg-gray-900 border border-gray-700 rounded-lg p-3">
|
||||
<summary class="text-sm font-medium text-gray-300 cursor-pointer">Advanced Settings</summary>
|
||||
<summary class="text-sm font-medium text-gray-300 cursor-pointer flex items-center justify-between">
|
||||
<span>Advanced Settings (Recommended to improve accuracy)</span>
|
||||
<i data-lucide="chevron-down" class="w-4 h-4 transition-transform details-icon"></i>
|
||||
</summary>
|
||||
<div class="mt-4 space-y-4">
|
||||
<!-- Resolution Setting -->
|
||||
<div>
|
||||
@@ -1409,10 +1423,28 @@ export const toolTemplates = {
|
||||
<input type="checkbox" id="ocr-binarize" class="w-4 h-4 rounded text-indigo-600 bg-gray-700 border-gray-600">
|
||||
Binarize Image (Enhance Contrast for Clean Scans)
|
||||
</label>
|
||||
<!-- Character Whitelist -->
|
||||
|
||||
<!-- Character Whitelist Presets -->
|
||||
<div>
|
||||
<label for="whitelist-preset" class="block mb-1 text-xs font-medium text-gray-400">Character Whitelist Preset</label>
|
||||
<select id="whitelist-preset" class="w-full bg-gray-700 border border-gray-600 text-white rounded-lg p-2 text-sm mb-2">
|
||||
<option value="">None (All characters)</option>
|
||||
<option value="alphanumeric">Alphanumeric + Basic Punctuation</option>
|
||||
<option value="numbers-currency">Numbers + Currency Symbols</option>
|
||||
<option value="letters-only">Letters Only (A-Z, a-z)</option>
|
||||
<option value="numbers-only">Numbers Only (0-9)</option>
|
||||
<option value="invoice">Invoice/Receipt (Numbers, $, ., -, /)</option>
|
||||
<option value="forms">Forms (Alphanumeric + Common Symbols)</option>
|
||||
<option value="custom">Custom...</option>
|
||||
</select>
|
||||
<p class="text-xs text-gray-500 mt-1">Only these characters will be recognized. Leave empty for all characters.</p>
|
||||
</div>
|
||||
|
||||
<!-- Character Whitelist Input -->
|
||||
<div>
|
||||
<label for="ocr-whitelist" class="block mb-1 text-xs font-medium text-gray-400">Character Whitelist (Optional)</label>
|
||||
<input type="text" id="ocr-whitelist" class="w-full bg-gray-700 border border-gray-600 text-white rounded-lg p-2 text-sm" placeholder="e.g., abcdefghijklmnopqrstuvwxyz0123456789$.,">
|
||||
<p class="text-xs text-gray-500 mt-1">Only these characters will be recognized. Leave empty for all characters.</p>
|
||||
</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
Reference in New Issue
Block a user