Files
bentopdf/src/js/logic/pdf-to-markdown.ts
2025-10-17 11:37:32 +05:30

34 lines
1.2 KiB
TypeScript

import { showLoader, hideLoader, showAlert } from '../ui.js';
import { downloadFile, readFileAsArrayBuffer } from '../utils/helpers.js';
import { state } from '../state.js';
export async function pdfToMarkdown() {
showLoader('Converting to Markdown...');
try {
const file = state.files[0];
const arrayBuffer = await readFileAsArrayBuffer(file);
// @ts-expect-error TS(2304) FIXME: Cannot find name 'pdfjsLib'.
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
let markdown = '';
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
// This is a simple text extraction. For more advanced formatting, more complex logic is needed.
const text = content.items.map((item: any) => item.str).join(' ');
markdown += text + '\n\n'; // Add double newline for paragraph breaks between pages
}
const blob = new Blob([markdown], { type: 'text/markdown' });
downloadFile(blob, file.name.replace(/\.pdf$/i, '.md'));
} catch (e) {
console.error(e);
showAlert(
'Conversion Error',
'Failed to convert PDF. It may be image-based or corrupted.'
);
} finally {
hideLoader();
}
}