feat: add option to opt out of embedding full fonts in OCR settings
This commit is contained in:
@@ -121,6 +121,9 @@ async function runOCR() {
|
|||||||
);
|
);
|
||||||
const binarize = (document.getElementById('ocr-binarize') as HTMLInputElement)
|
const binarize = (document.getElementById('ocr-binarize') as HTMLInputElement)
|
||||||
.checked;
|
.checked;
|
||||||
|
const embedFullFonts = (
|
||||||
|
document.getElementById('ocr-embed-full-fonts') as HTMLInputElement
|
||||||
|
).checked;
|
||||||
const whitelist = (
|
const whitelist = (
|
||||||
document.getElementById('ocr-whitelist') as HTMLInputElement
|
document.getElementById('ocr-whitelist') as HTMLInputElement
|
||||||
).value;
|
).value;
|
||||||
@@ -154,6 +157,7 @@ async function runOCR() {
|
|||||||
resolution: scale,
|
resolution: scale,
|
||||||
binarize,
|
binarize,
|
||||||
whitelist,
|
whitelist,
|
||||||
|
embedFullFonts,
|
||||||
onProgress: updateProgress,
|
onProgress: updateProgress,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ export interface OcrOptions {
|
|||||||
resolution: number;
|
resolution: number;
|
||||||
binarize: boolean;
|
binarize: boolean;
|
||||||
whitelist: string;
|
whitelist: string;
|
||||||
|
embedFullFonts?: boolean;
|
||||||
onProgress?: (status: string, progress: number) => void;
|
onProgress?: (status: string, progress: number) => void;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,7 +132,14 @@ export async function performOcr(
|
|||||||
pdfBytes: Uint8Array | ArrayBuffer,
|
pdfBytes: Uint8Array | ArrayBuffer,
|
||||||
options: OcrOptions
|
options: OcrOptions
|
||||||
): Promise<OcrResult> {
|
): Promise<OcrResult> {
|
||||||
const { language, resolution, binarize, whitelist, onProgress } = options;
|
const {
|
||||||
|
language,
|
||||||
|
resolution,
|
||||||
|
binarize,
|
||||||
|
whitelist,
|
||||||
|
embedFullFonts,
|
||||||
|
onProgress,
|
||||||
|
} = options;
|
||||||
const progress = onProgress || (() => {});
|
const progress = onProgress || (() => {});
|
||||||
|
|
||||||
const worker = await createConfiguredTesseractWorker(
|
const worker = await createConfiguredTesseractWorker(
|
||||||
@@ -198,14 +206,16 @@ export async function performOcr(
|
|||||||
getFontForLanguage('eng'),
|
getFontForLanguage('eng'),
|
||||||
]);
|
]);
|
||||||
primaryFont = await newPdfDoc.embedFont(scriptFontBytes, {
|
primaryFont = await newPdfDoc.embedFont(scriptFontBytes, {
|
||||||
subset: false,
|
subset: !embedFullFonts,
|
||||||
});
|
});
|
||||||
latinFont = await newPdfDoc.embedFont(latinFontBytes, {
|
latinFont = await newPdfDoc.embedFont(latinFontBytes, {
|
||||||
subset: false,
|
subset: !embedFullFonts,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
const fontBytes = await getFontForLanguage(primaryLang);
|
const fontBytes = await getFontForLanguage(primaryLang);
|
||||||
primaryFont = await newPdfDoc.embedFont(fontBytes, { subset: false });
|
primaryFont = await newPdfDoc.embedFont(fontBytes, {
|
||||||
|
subset: !embedFullFonts,
|
||||||
|
});
|
||||||
latinFont = primaryFont;
|
latinFont = primaryFont;
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
@@ -259,6 +259,16 @@
|
|||||||
/>
|
/>
|
||||||
Binarize Image (Enhance Contrast for Clean Scans)
|
Binarize Image (Enhance Contrast for Clean Scans)
|
||||||
</label>
|
</label>
|
||||||
|
<label
|
||||||
|
class="flex items-center gap-2 text-sm text-gray-300 cursor-pointer"
|
||||||
|
>
|
||||||
|
<input
|
||||||
|
type="checkbox"
|
||||||
|
id="ocr-embed-full-fonts"
|
||||||
|
class="w-4 h-4 rounded text-indigo-600 bg-gray-700 border-gray-600"
|
||||||
|
/>
|
||||||
|
Embed Full Fonts (Larger file, better compatibility)
|
||||||
|
</label>
|
||||||
<!-- Whitelist Presets -->
|
<!-- Whitelist Presets -->
|
||||||
<div>
|
<div>
|
||||||
<label
|
<label
|
||||||
|
|||||||
Reference in New Issue
Block a user