feat(ocr): add whitelist presets and improve UI for OCR tool

refactor: format code and improve mobile menu accessibility
style: fix whitespace and formatting in multiple files
docs: update documentation with better formatting and examples
This commit is contained in:
abdullahalam123
2025-10-20 18:51:49 +05:30
parent 59b351eee4
commit 0e7c53560a
19 changed files with 755 additions and 363 deletions

View File

@@ -25,8 +25,8 @@ async function handleSinglePdfUpload(toolId, file) {
showLoader('Loading PDF...');
try {
const pdfBytes = await readFileAsArrayBuffer(file);
state.pdfDoc = await PDFLibDocument.load(pdfBytes as ArrayBuffer, {
ignoreEncryption: true
state.pdfDoc = await PDFLibDocument.load(pdfBytes as ArrayBuffer, {
ignoreEncryption: true,
});
hideLoader();
@@ -324,7 +324,11 @@ async function handleSinglePdfUpload(toolId, file) {
}
async function handleMultiFileUpload(toolId) {
if (toolId === 'merge' || toolId === 'alternate-merge' || toolId === 'reverse-pages') {
if (
toolId === 'merge' ||
toolId === 'alternate-merge' ||
toolId === 'reverse-pages'
) {
const pdfFilesUnloaded: File[] = [];
state.files.forEach((file) => {

View File

@@ -46,7 +46,7 @@ export async function addAttachments() {
showAlert('Error', `Failed to attach files: ${error.message}`);
} finally {
hideLoader();
clearAttachments();
clearAttachments();
}
}
@@ -103,7 +103,7 @@ export function setupAddAttachmentsTool() {
const nameSpan = document.createElement('span');
nameSpan.className = 'truncate text-sm';
nameSpan.textContent = file.name;
nameSpan.textContent = file.name;
const sizeSpan = document.createElement('span');
sizeSpan.className = 'text-xs text-gray-400';

View File

@@ -113,11 +113,9 @@ async function runOCR() {
tessjs_create_hocr: '1',
});
if (whitelist.trim()) {
await worker.setParameters({
tessedit_char_whitelist: whitelist.trim(),
});
}
await worker.setParameters({
tessedit_char_whitelist: whitelist,
});
// @ts-expect-error TS(2304) FIXME: Cannot find name 'pdfjsLib'.
const pdf = await pdfjsLib.getDocument(
@@ -144,7 +142,11 @@ async function runOCR() {
binarizeCanvas(context);
}
const result = await worker.recognize(canvas, {}, { text: true, hocr: true });
const result = await worker.recognize(
canvas,
{},
{ text: true, hocr: true }
);
const data = result.data;
const newPage = newPdfDoc.addPage([viewport.width, viewport.height]);
const pngImageBytes = await new Promise((resolve) =>
@@ -281,9 +283,51 @@ export function setupOcrTool() {
);
const processBtn = document.getElementById('process-btn');
// Whitelist presets
const whitelistPresets: Record<string, string> = {
alphanumeric:
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-\'"',
'numbers-currency': '0123456789$€£¥.,- ',
'letters-only': 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ',
'numbers-only': '0123456789',
invoice: '0123456789$.,/-#: ',
forms:
'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,()-_/@#:',
};
// Handle whitelist preset selection
const presetSelect = document.getElementById(
'whitelist-preset'
) as HTMLSelectElement;
const whitelistInput = document.getElementById(
'ocr-whitelist'
) as HTMLInputElement;
presetSelect?.addEventListener('change', (e) => {
const preset = (e.target as HTMLSelectElement).value;
if (preset && preset !== 'custom') {
whitelistInput.value = whitelistPresets[preset];
whitelistInput.disabled = true;
} else {
whitelistInput.disabled = false;
if (preset === '') {
whitelistInput.value = '';
}
}
});
// Handle details toggle icon rotation
document.querySelectorAll('details').forEach((details) => {
details.addEventListener('toggle', () => {
const icon = details.querySelector('.details-icon') as HTMLElement;
if (icon) {
icon.style.transform = details.open ? 'rotate(180deg)' : 'rotate(0deg)';
}
});
});
langSearch.addEventListener('input', () => {
// @ts-expect-error TS(2339) FIXME: Property 'value' does not exist on type 'HTMLEleme... Remove this comment to see the full error message
const searchTerm = langSearch.value.toLowerCase();
const searchTerm = (langSearch as HTMLInputElement).value.toLowerCase();
langList.querySelectorAll('label').forEach((label) => {
label.style.display = label.textContent.toLowerCase().includes(searchTerm)
? ''
@@ -291,19 +335,14 @@ export function setupOcrTool() {
});
});
// Update the display of selected languages
langList.addEventListener('change', () => {
const selected = Array.from(
langList.querySelectorAll('.lang-checkbox:checked')
)
// @ts-expect-error TS(2339) FIXME: Property 'value' does not exist on type 'Element'.
.map((cb) => tesseractLanguages[cb.value]);
).map((cb) => tesseractLanguages[(cb as HTMLInputElement).value]);
selectedLangsDisplay.textContent =
selected.length > 0 ? selected.join(', ') : 'None';
// @ts-expect-error TS(2339) FIXME: Property 'disabled' does not exist on type 'HTMLEl... Remove this comment to see the full error message
processBtn.disabled = selected.length === 0;
(processBtn as HTMLButtonElement).disabled = selected.length === 0;
});
// Attach the main OCR function to the process button
processBtn.addEventListener('click', runOCR);
}

View File

@@ -6,7 +6,9 @@ import { PDFDocument as PDFLibDocument } from 'pdf-lib';
import JSZip from 'jszip';
export async function reversePages() {
const pdfDocs = state.files.filter((file: File) => file.type === 'application/pdf');
const pdfDocs = state.files.filter(
(file: File) => file.type === 'application/pdf'
);
if (!pdfDocs.length) {
showAlert('Error', 'PDF not loaded.');
return;
@@ -16,7 +18,7 @@ export async function reversePages() {
const zip = new JSZip();
for (let j = 0; j < pdfDocs.length; j++) {
const file = pdfDocs[j];
const arrayBuffer = await file.arrayBuffer();
const arrayBuffer = await file.arrayBuffer();
const pdfDoc = await PDFLibDocument.load(arrayBuffer);
const newPdf = await PDFLibDocument.create();
const pageCount = pdfDoc.getPageCount();

View File

@@ -21,7 +21,9 @@ const hideBrandingSections = () => {
featuresSection.style.display = 'none';
}
const securitySection = document.getElementById('security-compliance-section');
const securitySection = document.getElementById(
'security-compliance-section'
);
if (securitySection) {
securitySection.style.display = 'none';
}
@@ -47,7 +49,7 @@ const hideBrandingSections = () => {
}
const sectionDividers = document.querySelectorAll('.section-divider');
sectionDividers.forEach(divider => {
sectionDividers.forEach((divider) => {
(divider as HTMLElement).style.display = 'none';
});

View File

@@ -1,48 +1,53 @@
document.addEventListener('DOMContentLoaded', () => {
const mobileMenuButton = document.getElementById('mobile-menu-button');
const mobileMenu = document.getElementById('mobile-menu');
const menuIcon = document.getElementById('menu-icon');
const closeIcon = document.getElementById('close-icon');
const mobileMenuButton = document.getElementById('mobile-menu-button');
const mobileMenu = document.getElementById('mobile-menu');
const menuIcon = document.getElementById('menu-icon');
const closeIcon = document.getElementById('close-icon');
if (mobileMenuButton && mobileMenu && menuIcon && closeIcon) {
// Toggle menu on button click
mobileMenuButton.addEventListener('click', () => {
const isExpanded = mobileMenuButton.getAttribute('aria-expanded') === 'true';
// Toggle menu visibility
mobileMenu.classList.toggle('hidden');
// Toggle icons
menuIcon.classList.toggle('hidden');
closeIcon.classList.toggle('hidden');
// Update aria-expanded for accessibility
mobileMenuButton.setAttribute('aria-expanded', (!isExpanded).toString());
});
if (mobileMenuButton && mobileMenu && menuIcon && closeIcon) {
// Toggle menu on button click
mobileMenuButton.addEventListener('click', () => {
const isExpanded =
mobileMenuButton.getAttribute('aria-expanded') === 'true';
// Close menu when clicking on a link
const mobileLinks = mobileMenu.querySelectorAll('a');
mobileLinks.forEach(link => {
link.addEventListener('click', () => {
mobileMenu.classList.add('hidden');
menuIcon.classList.remove('hidden');
closeIcon.classList.add('hidden');
mobileMenuButton.setAttribute('aria-expanded', 'false');
});
});
// Toggle menu visibility
mobileMenu.classList.toggle('hidden');
// Close menu when clicking outside
document.addEventListener('click', (event) => {
const target = event.target as Node;
const isClickInsideMenu = mobileMenu.contains(target);
const isClickOnButton = mobileMenuButton.contains(target);
if (!isClickInsideMenu && !isClickOnButton && !mobileMenu.classList.contains('hidden')) {
mobileMenu.classList.add('hidden');
menuIcon.classList.remove('hidden');
closeIcon.classList.add('hidden');
mobileMenuButton.setAttribute('aria-expanded', 'false');
}
});
}
});
// Toggle icons
menuIcon.classList.toggle('hidden');
closeIcon.classList.toggle('hidden');
// Update aria-expanded for accessibility
mobileMenuButton.setAttribute('aria-expanded', (!isExpanded).toString());
});
// Close menu when clicking on a link
const mobileLinks = mobileMenu.querySelectorAll('a');
mobileLinks.forEach((link) => {
link.addEventListener('click', () => {
mobileMenu.classList.add('hidden');
menuIcon.classList.remove('hidden');
closeIcon.classList.add('hidden');
mobileMenuButton.setAttribute('aria-expanded', 'false');
});
});
// Close menu when clicking outside
document.addEventListener('click', (event) => {
const target = event.target as Node;
const isClickInsideMenu = mobileMenu.contains(target);
const isClickOnButton = mobileMenuButton.contains(target);
if (
!isClickInsideMenu &&
!isClickOnButton &&
!mobileMenu.classList.contains('hidden')
) {
mobileMenu.classList.add('hidden');
menuIcon.classList.remove('hidden');
closeIcon.classList.add('hidden');
mobileMenuButton.setAttribute('aria-expanded', 'false');
}
});
}
});

View File

@@ -1367,6 +1367,17 @@ export const toolTemplates = {
'ocr-pdf': () => `
<h2 class="text-2xl font-bold text-white mb-4">OCR PDF</h2>
<p class="mb-6 text-gray-400">Convert scanned PDFs into searchable documents. Select one or more languages present in your file for the best results.</p>
<div class="p-3 bg-gray-900 rounded-lg border border-gray-700 mb-6">
<p class="text-sm text-gray-300"><strong class="text-white">How it works:</strong></p>
<ul class="list-disc list-inside text-xs text-gray-400 mt-1 space-y-1">
<li><strong class="text-white">Extract Text:</strong> Uses Tesseract OCR to recognize text from scanned images or PDFs.</li>
<li><strong class="text-white">Searchable Output:</strong> Creates a new PDF with an invisible text layer, making your document fully searchable while preserving the original appearance.</li>
<li><strong class="text-white">Character Filtering:</strong> Use whitelists to filter out unwanted characters and improve accuracy for specific document types (invoices, forms, etc.).</li>
<li><strong class="text-white">Multi-language Support:</strong> Select multiple languages for documents containing mixed language content.</li>
</ul>
</div>
${createFileInputHTML()}
<div id="file-display-area" class="mt-4 space-y-2"></div>
@@ -1393,7 +1404,10 @@ export const toolTemplates = {
<!-- Advanced settings section -->
<details class="bg-gray-900 border border-gray-700 rounded-lg p-3">
<summary class="text-sm font-medium text-gray-300 cursor-pointer">Advanced Settings</summary>
<summary class="text-sm font-medium text-gray-300 cursor-pointer flex items-center justify-between">
<span>Advanced Settings (Recommended to improve accuracy)</span>
<i data-lucide="chevron-down" class="w-4 h-4 transition-transform details-icon"></i>
</summary>
<div class="mt-4 space-y-4">
<!-- Resolution Setting -->
<div>
@@ -1409,10 +1423,28 @@ export const toolTemplates = {
<input type="checkbox" id="ocr-binarize" class="w-4 h-4 rounded text-indigo-600 bg-gray-700 border-gray-600">
Binarize Image (Enhance Contrast for Clean Scans)
</label>
<!-- Character Whitelist -->
<!-- Character Whitelist Presets -->
<div>
<label for="whitelist-preset" class="block mb-1 text-xs font-medium text-gray-400">Character Whitelist Preset</label>
<select id="whitelist-preset" class="w-full bg-gray-700 border border-gray-600 text-white rounded-lg p-2 text-sm mb-2">
<option value="">None (All characters)</option>
<option value="alphanumeric">Alphanumeric + Basic Punctuation</option>
<option value="numbers-currency">Numbers + Currency Symbols</option>
<option value="letters-only">Letters Only (A-Z, a-z)</option>
<option value="numbers-only">Numbers Only (0-9)</option>
<option value="invoice">Invoice/Receipt (Numbers, $, ., -, /)</option>
<option value="forms">Forms (Alphanumeric + Common Symbols)</option>
<option value="custom">Custom...</option>
</select>
<p class="text-xs text-gray-500 mt-1">Only these characters will be recognized. Leave empty for all characters.</p>
</div>
<!-- Character Whitelist Input -->
<div>
<label for="ocr-whitelist" class="block mb-1 text-xs font-medium text-gray-400">Character Whitelist (Optional)</label>
<input type="text" id="ocr-whitelist" class="w-full bg-gray-700 border border-gray-600 text-white rounded-lg p-2 text-sm" placeholder="e.g., abcdefghijklmnopqrstuvwxyz0123456789$.,">
<p class="text-xs text-gray-500 mt-1">Only these characters will be recognized. Leave empty for all characters.</p>
</div>
</div>
</details>