2026-02-08 17:05:40 +05:30
|
|
|
import { ClassicPreset } from 'rete';
|
|
|
|
|
import { BaseWorkflowNode } from './base-node';
|
|
|
|
|
import { pdfSocket } from '../sockets';
|
|
|
|
|
import type { SocketData } from '../types';
|
|
|
|
|
import { requirePdfInput, processBatch } from '../types';
|
|
|
|
|
import { PDFDocument } from 'pdf-lib';
|
|
|
|
|
import * as pdfjsLib from 'pdfjs-dist';
|
|
|
|
|
|
|
|
|
|
export class RemoveBlankPagesNode extends BaseWorkflowNode {
|
|
|
|
|
readonly category = 'Edit & Annotate' as const;
|
|
|
|
|
readonly icon = 'ph-file-minus';
|
|
|
|
|
readonly description = 'Remove blank pages automatically';
|
|
|
|
|
|
|
|
|
|
constructor() {
|
|
|
|
|
super('Remove Blank Pages');
|
|
|
|
|
this.addInput('pdf', new ClassicPreset.Input(pdfSocket, 'PDF'));
|
|
|
|
|
this.addOutput('pdf', new ClassicPreset.Output(pdfSocket, 'PDF'));
|
|
|
|
|
this.addControl(
|
|
|
|
|
'threshold',
|
|
|
|
|
new ClassicPreset.InputControl('number', { initial: 250 })
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async isPageBlank(
|
|
|
|
|
page: pdfjsLib.PDFPageProxy,
|
2026-03-03 23:34:55 +05:30
|
|
|
maxNonWhitePercent: number
|
2026-02-08 17:05:40 +05:30
|
|
|
): Promise<boolean> {
|
|
|
|
|
const viewport = page.getViewport({ scale: 0.5 });
|
|
|
|
|
const canvas = document.createElement('canvas');
|
|
|
|
|
canvas.width = viewport.width;
|
|
|
|
|
canvas.height = viewport.height;
|
|
|
|
|
const ctx = canvas.getContext('2d')!;
|
|
|
|
|
await page.render({ canvasContext: ctx, viewport, canvas }).promise;
|
|
|
|
|
|
|
|
|
|
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
|
|
|
|
|
const data = imageData.data;
|
2026-03-03 23:34:55 +05:30
|
|
|
const totalPixels = data.length / 4;
|
|
|
|
|
let nonWhitePixels = 0;
|
2026-02-08 17:05:40 +05:30
|
|
|
for (let i = 0; i < data.length; i += 4) {
|
2026-03-03 23:34:55 +05:30
|
|
|
const brightness = (data[i] + data[i + 1] + data[i + 2]) / 3;
|
|
|
|
|
if (brightness < 240) nonWhitePixels++;
|
2026-02-08 17:05:40 +05:30
|
|
|
}
|
2026-03-03 23:34:55 +05:30
|
|
|
const nonWhitePercent = (nonWhitePixels / totalPixels) * 100;
|
|
|
|
|
return nonWhitePercent <= maxNonWhitePercent;
|
2026-02-08 17:05:40 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
|
|
async data(
|
|
|
|
|
inputs: Record<string, SocketData[]>
|
|
|
|
|
): Promise<Record<string, SocketData>> {
|
|
|
|
|
const pdfInputs = requirePdfInput(inputs, 'Remove Blank Pages');
|
|
|
|
|
|
|
|
|
|
const threshCtrl = this.controls['threshold'] as
|
|
|
|
|
| ClassicPreset.InputControl<'number'>
|
|
|
|
|
| undefined;
|
2026-03-03 23:34:55 +05:30
|
|
|
const maxNonWhitePercent = Math.max(
|
|
|
|
|
0.1,
|
|
|
|
|
Math.min(5, threshCtrl?.value ?? 0.5)
|
|
|
|
|
);
|
2026-02-08 17:05:40 +05:30
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
pdf: await processBatch(pdfInputs, async (input) => {
|
|
|
|
|
const pdfjsDoc = await pdfjsLib.getDocument({ data: input.bytes })
|
|
|
|
|
.promise;
|
|
|
|
|
const srcDoc = await PDFDocument.load(input.bytes);
|
|
|
|
|
const nonBlankIndices: number[] = [];
|
|
|
|
|
|
|
|
|
|
for (let i = 1; i <= pdfjsDoc.numPages; i++) {
|
|
|
|
|
const page = await pdfjsDoc.getPage(i);
|
2026-03-03 23:34:55 +05:30
|
|
|
const blank = await this.isPageBlank(page, maxNonWhitePercent);
|
2026-02-08 17:05:40 +05:30
|
|
|
if (!blank) {
|
|
|
|
|
nonBlankIndices.push(i - 1);
|
|
|
|
|
} else {
|
|
|
|
|
console.log(`Page ${i} detected as blank, removing`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (nonBlankIndices.length === 0) {
|
|
|
|
|
throw new Error('All pages are blank');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const newDoc = await PDFDocument.create();
|
|
|
|
|
const copiedPages = await newDoc.copyPages(srcDoc, nonBlankIndices);
|
|
|
|
|
copiedPages.forEach((page) => newDoc.addPage(page));
|
|
|
|
|
|
|
|
|
|
const pdfBytes = await newDoc.save();
|
|
|
|
|
return {
|
|
|
|
|
type: 'pdf',
|
|
|
|
|
document: newDoc,
|
|
|
|
|
bytes: new Uint8Array(pdfBytes),
|
|
|
|
|
filename: input.filename.replace(/\.pdf$/i, '_cleaned.pdf'),
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|