Files
bentopdf/src/js/logic/pdf-to-markdown-page.ts
alam00000 2c85ca74e9 feat: separate AGPL libraries and add dynamic WASM loading
- Add WASM settings page for configuring external AGPL modules
- Implement dynamic loading for PyMuPDF, Ghostscript, and CoherentPDF
- Add Cloudflare Worker proxy for serving WASM files with CORS
- Update all affected tool pages to check WASM availability
- Add showWasmRequiredDialog for missing module configuration

Documentation:
- Update README, licensing.html, and docs to clarify AGPL components
  are not bundled and must be configured separately
- Add WASM-PROXY.md deployment guide with recommended source URLs
- Rename "CPDF" to "CoherentPDF" for consistency
2026-01-27 15:26:26 +05:30

222 lines
7.0 KiB
TypeScript

import { showLoader, hideLoader, showAlert } from '../ui.js';
import {
downloadFile,
readFileAsArrayBuffer,
formatBytes,
getPDFDocument,
} from '../utils/helpers.js';
import { state } from '../state.js';
import { createIcons, icons } from 'lucide';
import { isWasmAvailable, getWasmBaseUrl } from '../config/wasm-cdn-config.js';
import { showWasmRequiredDialog } from '../utils/wasm-provider.js';
import { loadPyMuPDF, isPyMuPDFAvailable } from '../utils/pymupdf-loader.js';
document.addEventListener('DOMContentLoaded', () => {
const fileInput = document.getElementById('file-input') as HTMLInputElement;
const dropZone = document.getElementById('drop-zone');
const processBtn = document.getElementById('process-btn');
const fileDisplayArea = document.getElementById('file-display-area');
const convertOptions = document.getElementById('convert-options');
const fileControls = document.getElementById('file-controls');
const addMoreBtn = document.getElementById('add-more-btn');
const clearFilesBtn = document.getElementById('clear-files-btn');
const backBtn = document.getElementById('back-to-tools');
const includeImagesCheckbox = document.getElementById(
'include-images'
) as HTMLInputElement;
if (backBtn) {
backBtn.addEventListener('click', () => {
window.location.href = import.meta.env.BASE_URL;
});
}
const updateUI = async () => {
if (!fileDisplayArea || !convertOptions || !processBtn || !fileControls)
return;
if (state.files.length > 0) {
fileDisplayArea.innerHTML = '';
for (let index = 0; index < state.files.length; index++) {
const file = state.files[index];
const fileDiv = document.createElement('div');
fileDiv.className =
'flex items-center justify-between bg-gray-700 p-3 rounded-lg text-sm';
const infoContainer = document.createElement('div');
infoContainer.className = 'flex flex-col overflow-hidden';
const nameSpan = document.createElement('div');
nameSpan.className = 'truncate font-medium text-gray-200 text-sm mb-1';
nameSpan.textContent = file.name;
const metaSpan = document.createElement('div');
metaSpan.className = 'text-xs text-gray-400';
metaSpan.textContent = `${formatBytes(file.size)} • Loading pages...`;
infoContainer.append(nameSpan, metaSpan);
const removeBtn = document.createElement('button');
removeBtn.className =
'ml-4 text-red-400 hover:text-red-300 flex-shrink-0';
removeBtn.innerHTML = '<i data-lucide="trash-2" class="w-4 h-4"></i>';
removeBtn.onclick = () => {
state.files = state.files.filter((_: File, i: number) => i !== index);
updateUI();
};
fileDiv.append(infoContainer, removeBtn);
fileDisplayArea.appendChild(fileDiv);
try {
const arrayBuffer = await readFileAsArrayBuffer(file);
const pdfDoc = await getPDFDocument({ data: arrayBuffer }).promise;
metaSpan.textContent = `${formatBytes(file.size)}${pdfDoc.numPages} pages`;
} catch (error) {
metaSpan.textContent = `${formatBytes(file.size)} • Could not load page count`;
}
}
createIcons({ icons });
fileControls.classList.remove('hidden');
convertOptions.classList.remove('hidden');
(processBtn as HTMLButtonElement).disabled = false;
} else {
fileDisplayArea.innerHTML = '';
fileControls.classList.add('hidden');
convertOptions.classList.add('hidden');
(processBtn as HTMLButtonElement).disabled = true;
}
};
const resetState = () => {
state.files = [];
state.pdfDoc = null;
updateUI();
};
const convert = async () => {
try {
if (state.files.length === 0) {
showAlert('No Files', 'Please select at least one PDF file.');
return;
}
showLoader('Loading PDF converter...');
const pymupdf = await loadPyMuPDF();
const includeImages = includeImagesCheckbox?.checked ?? false;
if (state.files.length === 1) {
const file = state.files[0];
showLoader(`Converting ${file.name}...`);
const markdown = await pymupdf.pdfToMarkdown(file, { includeImages });
const outName = file.name.replace(/\.pdf$/i, '') + '.md';
const blob = new Blob([markdown], { type: 'text/markdown' });
downloadFile(blob, outName);
hideLoader();
showAlert(
'Conversion Complete',
`Successfully converted ${file.name} to Markdown.`,
'success',
() => resetState()
);
} else {
showLoader('Converting multiple PDFs...');
const JSZip = (await import('jszip')).default;
const zip = new JSZip();
for (let i = 0; i < state.files.length; i++) {
const file = state.files[i];
showLoader(
`Converting ${i + 1}/${state.files.length}: ${file.name}...`
);
const markdown = await pymupdf.pdfToMarkdown(file, { includeImages });
const baseName = file.name.replace(/\.pdf$/i, '');
zip.file(`${baseName}.md`, markdown);
}
showLoader('Creating ZIP archive...');
const zipBlob = await zip.generateAsync({ type: 'blob' });
downloadFile(zipBlob, 'markdown-files.zip');
hideLoader();
showAlert(
'Conversion Complete',
`Successfully converted ${state.files.length} PDF(s) to Markdown.`,
'success',
() => resetState()
);
}
} catch (e: any) {
hideLoader();
showAlert(
'Error',
`An error occurred during conversion. Error: ${e.message}`
);
}
};
const handleFileSelect = (files: FileList | null) => {
if (files && files.length > 0) {
const pdfFiles = Array.from(files).filter(
(f) =>
f.type === 'application/pdf' || f.name.toLowerCase().endsWith('.pdf')
);
state.files = [...state.files, ...pdfFiles];
updateUI();
}
};
if (fileInput && dropZone) {
fileInput.addEventListener('change', (e) => {
handleFileSelect((e.target as HTMLInputElement).files);
});
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.classList.add('bg-gray-700');
});
dropZone.addEventListener('dragleave', (e) => {
e.preventDefault();
dropZone.classList.remove('bg-gray-700');
});
dropZone.addEventListener('drop', (e) => {
e.preventDefault();
dropZone.classList.remove('bg-gray-700');
const files = e.dataTransfer?.files;
if (files && files.length > 0) {
handleFileSelect(files);
}
});
fileInput.addEventListener('click', () => {
fileInput.value = '';
});
}
if (addMoreBtn) {
addMoreBtn.addEventListener('click', () => {
fileInput.click();
});
}
if (clearFilesBtn) {
clearFilesBtn.addEventListener('click', () => {
resetState();
});
}
if (processBtn) {
processBtn.addEventListener('click', convert);
}
});