feat: integrate Tesseract.js with improved language availability and font handling
- Refactored OCR page recognition to utilize a configured Tesseract worker. - Added functions to manage font URLs and asset filenames based on language. - Implemented language availability checks and error handling for unsupported languages. - Enhanced PDF workflow to display available OCR languages and handle user selections. - Introduced utility functions for resolving Tesseract asset configurations. - Added tests for OCR functionality, font loading, and Tesseract runtime behavior. - Updated global types to include environment variables for Tesseract and font configurations.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
import { showAlert } from '../ui.js';
|
||||
import { tesseractLanguages } from '../config/tesseract-languages.js';
|
||||
import { createWorkflowEditor, updateNodeDisplay } from '../workflow/editor';
|
||||
import { executeWorkflow } from '../workflow/engine';
|
||||
import { getAvailableTesseractLanguageEntries } from '../utils/tesseract-language-availability.js';
|
||||
import {
|
||||
nodeRegistry,
|
||||
getNodesByCategory,
|
||||
@@ -1194,7 +1194,7 @@ function showNodeSettings(node: BaseWorkflowNode) {
|
||||
{ label: 'High (288 DPI)', value: '3.0' },
|
||||
{ label: 'Ultra (384 DPI)', value: '4.0' },
|
||||
],
|
||||
language: Object.entries(tesseractLanguages).map(([code, name]) => ({
|
||||
language: getAvailableTesseractLanguageEntries().map(([code, name]) => ({
|
||||
label: name,
|
||||
value: code,
|
||||
})),
|
||||
|
||||
Reference in New Issue
Block a user