feat: separate AGPL libraries and add dynamic WASM loading
- Add WASM settings page for configuring external AGPL modules - Implement dynamic loading for PyMuPDF, Ghostscript, and CoherentPDF - Add Cloudflare Worker proxy for serving WASM files with CORS - Update all affected tool pages to check WASM availability - Add showWasmRequiredDialog for missing module configuration Documentation: - Update README, licensing.html, and docs to clarify AGPL components are not bundled and must be configured separately - Add WASM-PROXY.md deployment guide with recommended source URLs - Rename "CPDF" to "CoherentPDF" for consistency
This commit is contained in:
@@ -1,204 +1,237 @@
|
||||
import { showLoader, hideLoader, showAlert } from '../ui.js';
|
||||
import { downloadFile, readFileAsArrayBuffer, formatBytes, getPDFDocument } from '../utils/helpers.js';
|
||||
import {
|
||||
downloadFile,
|
||||
readFileAsArrayBuffer,
|
||||
formatBytes,
|
||||
getPDFDocument,
|
||||
} from '../utils/helpers.js';
|
||||
import { state } from '../state.js';
|
||||
import { createIcons, icons } from 'lucide';
|
||||
import { PyMuPDF } from '@bentopdf/pymupdf-wasm';
|
||||
import { getWasmBaseUrl } from '../config/wasm-cdn-config.js';
|
||||
|
||||
const pymupdf = new PyMuPDF(getWasmBaseUrl('pymupdf'));
|
||||
import { isWasmAvailable, getWasmBaseUrl } from '../config/wasm-cdn-config.js';
|
||||
import { showWasmRequiredDialog } from '../utils/wasm-provider.js';
|
||||
import { loadPyMuPDF, isPyMuPDFAvailable } from '../utils/pymupdf-loader.js';
|
||||
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const fileInput = document.getElementById('file-input') as HTMLInputElement;
|
||||
const dropZone = document.getElementById('drop-zone');
|
||||
const processBtn = document.getElementById('process-btn');
|
||||
const fileDisplayArea = document.getElementById('file-display-area');
|
||||
const extractOptions = document.getElementById('extract-options');
|
||||
const fileControls = document.getElementById('file-controls');
|
||||
const addMoreBtn = document.getElementById('add-more-btn');
|
||||
const clearFilesBtn = document.getElementById('clear-files-btn');
|
||||
const backBtn = document.getElementById('back-to-tools');
|
||||
const fileInput = document.getElementById('file-input') as HTMLInputElement;
|
||||
const dropZone = document.getElementById('drop-zone');
|
||||
const processBtn = document.getElementById('process-btn');
|
||||
const fileDisplayArea = document.getElementById('file-display-area');
|
||||
const extractOptions = document.getElementById('extract-options');
|
||||
const fileControls = document.getElementById('file-controls');
|
||||
const addMoreBtn = document.getElementById('add-more-btn');
|
||||
const clearFilesBtn = document.getElementById('clear-files-btn');
|
||||
const backBtn = document.getElementById('back-to-tools');
|
||||
|
||||
if (backBtn) {
|
||||
backBtn.addEventListener('click', () => {
|
||||
window.location.href = import.meta.env.BASE_URL;
|
||||
});
|
||||
}
|
||||
if (backBtn) {
|
||||
backBtn.addEventListener('click', () => {
|
||||
window.location.href = import.meta.env.BASE_URL;
|
||||
});
|
||||
}
|
||||
|
||||
const updateUI = async () => {
|
||||
if (!fileDisplayArea || !extractOptions || !processBtn || !fileControls) return;
|
||||
const updateUI = async () => {
|
||||
if (!fileDisplayArea || !extractOptions || !processBtn || !fileControls)
|
||||
return;
|
||||
|
||||
if (state.files.length > 0) {
|
||||
fileDisplayArea.innerHTML = '';
|
||||
if (state.files.length > 0) {
|
||||
fileDisplayArea.innerHTML = '';
|
||||
|
||||
for (let index = 0; index < state.files.length; index++) {
|
||||
const file = state.files[index];
|
||||
const fileDiv = document.createElement('div');
|
||||
fileDiv.className = 'flex items-center justify-between bg-gray-700 p-3 rounded-lg text-sm';
|
||||
for (let index = 0; index < state.files.length; index++) {
|
||||
const file = state.files[index];
|
||||
const fileDiv = document.createElement('div');
|
||||
fileDiv.className =
|
||||
'flex items-center justify-between bg-gray-700 p-3 rounded-lg text-sm';
|
||||
|
||||
const infoContainer = document.createElement('div');
|
||||
infoContainer.className = 'flex flex-col overflow-hidden';
|
||||
const infoContainer = document.createElement('div');
|
||||
infoContainer.className = 'flex flex-col overflow-hidden';
|
||||
|
||||
const nameSpan = document.createElement('div');
|
||||
nameSpan.className = 'truncate font-medium text-gray-200 text-sm mb-1';
|
||||
nameSpan.textContent = file.name;
|
||||
const nameSpan = document.createElement('div');
|
||||
nameSpan.className = 'truncate font-medium text-gray-200 text-sm mb-1';
|
||||
nameSpan.textContent = file.name;
|
||||
|
||||
const metaSpan = document.createElement('div');
|
||||
metaSpan.className = 'text-xs text-gray-400';
|
||||
metaSpan.textContent = `${formatBytes(file.size)} • Loading pages...`;
|
||||
const metaSpan = document.createElement('div');
|
||||
metaSpan.className = 'text-xs text-gray-400';
|
||||
metaSpan.textContent = `${formatBytes(file.size)} • Loading pages...`;
|
||||
|
||||
infoContainer.append(nameSpan, metaSpan);
|
||||
infoContainer.append(nameSpan, metaSpan);
|
||||
|
||||
const removeBtn = document.createElement('button');
|
||||
removeBtn.className = 'ml-4 text-red-400 hover:text-red-300 flex-shrink-0';
|
||||
removeBtn.innerHTML = '<i data-lucide="trash-2" class="w-4 h-4"></i>';
|
||||
removeBtn.onclick = () => {
|
||||
state.files = state.files.filter((_, i) => i !== index);
|
||||
updateUI();
|
||||
};
|
||||
const removeBtn = document.createElement('button');
|
||||
removeBtn.className =
|
||||
'ml-4 text-red-400 hover:text-red-300 flex-shrink-0';
|
||||
removeBtn.innerHTML = '<i data-lucide="trash-2" class="w-4 h-4"></i>';
|
||||
removeBtn.onclick = () => {
|
||||
state.files = state.files.filter((_, i) => i !== index);
|
||||
updateUI();
|
||||
};
|
||||
|
||||
fileDiv.append(infoContainer, removeBtn);
|
||||
fileDisplayArea.appendChild(fileDiv);
|
||||
fileDiv.append(infoContainer, removeBtn);
|
||||
fileDisplayArea.appendChild(fileDiv);
|
||||
|
||||
try {
|
||||
const arrayBuffer = await readFileAsArrayBuffer(file);
|
||||
const pdfDoc = await getPDFDocument({ data: arrayBuffer }).promise;
|
||||
metaSpan.textContent = `${formatBytes(file.size)} • ${pdfDoc.numPages} pages`;
|
||||
} catch (error) {
|
||||
console.error('Error loading PDF:', error);
|
||||
metaSpan.textContent = `${formatBytes(file.size)} • Could not load page count`;
|
||||
}
|
||||
}
|
||||
|
||||
createIcons({ icons });
|
||||
fileControls.classList.remove('hidden');
|
||||
extractOptions.classList.remove('hidden');
|
||||
(processBtn as HTMLButtonElement).disabled = false;
|
||||
} else {
|
||||
fileDisplayArea.innerHTML = '';
|
||||
fileControls.classList.add('hidden');
|
||||
extractOptions.classList.add('hidden');
|
||||
(processBtn as HTMLButtonElement).disabled = true;
|
||||
}
|
||||
};
|
||||
|
||||
const resetState = () => {
|
||||
state.files = [];
|
||||
state.pdfDoc = null;
|
||||
updateUI();
|
||||
};
|
||||
|
||||
const extractForAI = async () => {
|
||||
try {
|
||||
if (state.files.length === 0) {
|
||||
showAlert('No Files', 'Please select at least one PDF file.');
|
||||
return;
|
||||
}
|
||||
|
||||
showLoader('Loading engine...');
|
||||
await pymupdf.load();
|
||||
|
||||
const total = state.files.length;
|
||||
let completed = 0;
|
||||
let failed = 0;
|
||||
|
||||
if (total === 1) {
|
||||
const file = state.files[0];
|
||||
showLoader(`Extracting ${file.name} for AI...`);
|
||||
|
||||
const llamaDocs = await (pymupdf as any).pdfToLlamaIndex(file);
|
||||
const outName = file.name.replace(/\.pdf$/i, '') + '_llm.json';
|
||||
const jsonContent = JSON.stringify(llamaDocs, null, 2);
|
||||
downloadFile(new Blob([jsonContent], { type: 'application/json' }), outName);
|
||||
|
||||
hideLoader();
|
||||
showAlert('Extraction Complete', `Successfully extracted PDF for AI/LLM use.`, 'success', () => resetState());
|
||||
} else {
|
||||
// Multiple files - create ZIP
|
||||
const JSZip = (await import('jszip')).default;
|
||||
const zip = new JSZip();
|
||||
|
||||
for (const file of state.files) {
|
||||
try {
|
||||
showLoader(`Extracting ${file.name} for AI (${completed + 1}/${total})...`);
|
||||
|
||||
const llamaDocs = await (pymupdf as any).pdfToLlamaIndex(file);
|
||||
const outName = file.name.replace(/\.pdf$/i, '') + '_llm.json';
|
||||
const jsonContent = JSON.stringify(llamaDocs, null, 2);
|
||||
zip.file(outName, jsonContent);
|
||||
|
||||
completed++;
|
||||
} catch (error) {
|
||||
console.error(`Failed to extract ${file.name}:`, error);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
showLoader('Creating ZIP archive...');
|
||||
const zipBlob = await zip.generateAsync({ type: 'blob' });
|
||||
|
||||
downloadFile(zipBlob, 'pdf-for-ai.zip');
|
||||
|
||||
hideLoader();
|
||||
|
||||
if (failed === 0) {
|
||||
showAlert('Extraction Complete', `Successfully extracted ${completed} PDF(s) for AI/LLM use.`, 'success', () => resetState());
|
||||
} else {
|
||||
showAlert('Extraction Partial', `Extracted ${completed} PDF(s), failed ${failed}.`, 'warning', () => resetState());
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
hideLoader();
|
||||
showAlert('Error', `An error occurred during extraction. Error: ${e.message}`);
|
||||
const arrayBuffer = await readFileAsArrayBuffer(file);
|
||||
const pdfDoc = await getPDFDocument({ data: arrayBuffer }).promise;
|
||||
metaSpan.textContent = `${formatBytes(file.size)} • ${pdfDoc.numPages} pages`;
|
||||
} catch (error) {
|
||||
console.error('Error loading PDF:', error);
|
||||
metaSpan.textContent = `${formatBytes(file.size)} • Could not load page count`;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const handleFileSelect = (files: FileList | null) => {
|
||||
if (files && files.length > 0) {
|
||||
const pdfFiles = Array.from(files).filter(f => f.type === 'application/pdf' || f.name.toLowerCase().endsWith('.pdf'));
|
||||
if (pdfFiles.length > 0) {
|
||||
state.files = [...state.files, ...pdfFiles];
|
||||
updateUI();
|
||||
}
|
||||
createIcons({ icons });
|
||||
fileControls.classList.remove('hidden');
|
||||
extractOptions.classList.remove('hidden');
|
||||
(processBtn as HTMLButtonElement).disabled = false;
|
||||
} else {
|
||||
fileDisplayArea.innerHTML = '';
|
||||
fileControls.classList.add('hidden');
|
||||
extractOptions.classList.add('hidden');
|
||||
(processBtn as HTMLButtonElement).disabled = true;
|
||||
}
|
||||
};
|
||||
|
||||
const resetState = () => {
|
||||
state.files = [];
|
||||
state.pdfDoc = null;
|
||||
updateUI();
|
||||
};
|
||||
|
||||
const extractForAI = async () => {
|
||||
try {
|
||||
if (state.files.length === 0) {
|
||||
showAlert('No Files', 'Please select at least one PDF file.');
|
||||
return;
|
||||
}
|
||||
|
||||
showLoader('Loading engine...');
|
||||
const pymupdf = await loadPyMuPDF();
|
||||
|
||||
const total = state.files.length;
|
||||
let completed = 0;
|
||||
let failed = 0;
|
||||
|
||||
if (total === 1) {
|
||||
const file = state.files[0];
|
||||
showLoader(`Extracting ${file.name} for AI...`);
|
||||
|
||||
const llamaDocs = await (pymupdf as any).pdfToLlamaIndex(file);
|
||||
const outName = file.name.replace(/\.pdf$/i, '') + '_llm.json';
|
||||
const jsonContent = JSON.stringify(llamaDocs, null, 2);
|
||||
downloadFile(
|
||||
new Blob([jsonContent], { type: 'application/json' }),
|
||||
outName
|
||||
);
|
||||
|
||||
hideLoader();
|
||||
showAlert(
|
||||
'Extraction Complete',
|
||||
`Successfully extracted PDF for AI/LLM use.`,
|
||||
'success',
|
||||
() => resetState()
|
||||
);
|
||||
} else {
|
||||
// Multiple files - create ZIP
|
||||
const JSZip = (await import('jszip')).default;
|
||||
const zip = new JSZip();
|
||||
|
||||
for (const file of state.files) {
|
||||
try {
|
||||
showLoader(
|
||||
`Extracting ${file.name} for AI (${completed + 1}/${total})...`
|
||||
);
|
||||
|
||||
const llamaDocs = await (pymupdf as any).pdfToLlamaIndex(file);
|
||||
const outName = file.name.replace(/\.pdf$/i, '') + '_llm.json';
|
||||
const jsonContent = JSON.stringify(llamaDocs, null, 2);
|
||||
zip.file(outName, jsonContent);
|
||||
|
||||
completed++;
|
||||
} catch (error) {
|
||||
console.error(`Failed to extract ${file.name}:`, error);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (fileInput && dropZone) {
|
||||
fileInput.addEventListener('change', (e) => {
|
||||
handleFileSelect((e.target as HTMLInputElement).files);
|
||||
});
|
||||
showLoader('Creating ZIP archive...');
|
||||
const zipBlob = await zip.generateAsync({ type: 'blob' });
|
||||
|
||||
dropZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.add('bg-gray-700');
|
||||
});
|
||||
downloadFile(zipBlob, 'pdf-for-ai.zip');
|
||||
|
||||
dropZone.addEventListener('dragleave', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
});
|
||||
hideLoader();
|
||||
|
||||
dropZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
handleFileSelect(e.dataTransfer?.files ?? null);
|
||||
});
|
||||
|
||||
fileInput.addEventListener('click', () => {
|
||||
fileInput.value = '';
|
||||
});
|
||||
if (failed === 0) {
|
||||
showAlert(
|
||||
'Extraction Complete',
|
||||
`Successfully extracted ${completed} PDF(s) for AI/LLM use.`,
|
||||
'success',
|
||||
() => resetState()
|
||||
);
|
||||
} else {
|
||||
showAlert(
|
||||
'Extraction Partial',
|
||||
`Extracted ${completed} PDF(s), failed ${failed}.`,
|
||||
'warning',
|
||||
() => resetState()
|
||||
);
|
||||
}
|
||||
}
|
||||
} catch (e: any) {
|
||||
hideLoader();
|
||||
showAlert(
|
||||
'Error',
|
||||
`An error occurred during extraction. Error: ${e.message}`
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
if (addMoreBtn) {
|
||||
addMoreBtn.addEventListener('click', () => {
|
||||
fileInput.click();
|
||||
});
|
||||
const handleFileSelect = (files: FileList | null) => {
|
||||
if (files && files.length > 0) {
|
||||
const pdfFiles = Array.from(files).filter(
|
||||
(f) =>
|
||||
f.type === 'application/pdf' || f.name.toLowerCase().endsWith('.pdf')
|
||||
);
|
||||
if (pdfFiles.length > 0) {
|
||||
state.files = [...state.files, ...pdfFiles];
|
||||
updateUI();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
if (clearFilesBtn) {
|
||||
clearFilesBtn.addEventListener('click', resetState);
|
||||
}
|
||||
if (fileInput && dropZone) {
|
||||
fileInput.addEventListener('change', (e) => {
|
||||
handleFileSelect((e.target as HTMLInputElement).files);
|
||||
});
|
||||
|
||||
if (processBtn) {
|
||||
processBtn.addEventListener('click', extractForAI);
|
||||
}
|
||||
dropZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.add('bg-gray-700');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('dragleave', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('bg-gray-700');
|
||||
handleFileSelect(e.dataTransfer?.files ?? null);
|
||||
});
|
||||
|
||||
fileInput.addEventListener('click', () => {
|
||||
fileInput.value = '';
|
||||
});
|
||||
}
|
||||
|
||||
if (addMoreBtn) {
|
||||
addMoreBtn.addEventListener('click', () => {
|
||||
fileInput.click();
|
||||
});
|
||||
}
|
||||
|
||||
if (clearFilesBtn) {
|
||||
clearFilesBtn.addEventListener('click', resetState);
|
||||
}
|
||||
|
||||
if (processBtn) {
|
||||
processBtn.addEventListener('click', extractForAI);
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user