feat: integrate Tesseract.js with improved language availability and font handling

- Refactored OCR page recognition to utilize a configured Tesseract worker.
- Added functions to manage font URLs and asset filenames based on language.
- Implemented language availability checks and error handling for unsupported languages.
- Enhanced PDF workflow to display available OCR languages and handle user selections.
- Introduced utility functions for resolving Tesseract asset configurations.
- Added tests for OCR functionality, font loading, and Tesseract runtime behavior.
- Updated global types to include environment variables for Tesseract and font configurations.
This commit is contained in:
alam00000
2026-03-14 15:50:30 +05:30
parent 58c78b09d2
commit 77da6d7a7d
23 changed files with 1906 additions and 564 deletions

View File

@@ -1,7 +1,7 @@
import { showAlert } from '../ui.js';
import { tesseractLanguages } from '../config/tesseract-languages.js';
import { createWorkflowEditor, updateNodeDisplay } from '../workflow/editor';
import { executeWorkflow } from '../workflow/engine';
import { getAvailableTesseractLanguageEntries } from '../utils/tesseract-language-availability.js';
import {
nodeRegistry,
getNodesByCategory,
@@ -1194,7 +1194,7 @@ function showNodeSettings(node: BaseWorkflowNode) {
{ label: 'High (288 DPI)', value: '3.0' },
{ label: 'Ultra (384 DPI)', value: '4.0' },
],
language: Object.entries(tesseractLanguages).map(([code, name]) => ({
language: getAvailableTesseractLanguageEntries().map(([code, name]) => ({
label: name,
value: code,
})),