Files
bentopdf/src/js/compare/engine/page-signatures.ts
alam00000 1d68691331 refactor: update PDF comparison types and enhance UI for better usability
- Refactored CompareState to import from a centralized type definition.
- Enhanced the compare-pdfs.html layout with improved styles for overlay and side-by-side modes.
- Added new CSS styles for various UI components including panels, buttons, and highlights.
- Implemented a new sidebar for displaying change summaries and filters.
- Created unit tests for text comparison logic, including diffing text runs and page pairing.
- Added tests for text normalization functions to ensure proper handling of punctuation and character normalization.
2026-03-08 23:55:33 +05:30

62 lines
1.6 KiB
TypeScript

import * as pdfjsLib from 'pdfjs-dist';
import type { ComparePageSignature, CompareTextItem } from '../types.ts';
import {
joinNormalizedText,
normalizeCompareText,
} from './text-normalization.ts';
type SignatureTextItem = {
str: string;
dir: string;
transform: number[];
width: number;
height: number;
fontName: string;
hasEOL: boolean;
};
function tokenToItem(token: string, index: number): CompareTextItem {
return {
id: `token-${index}-${token}`,
text: token,
normalizedText: token,
rect: { x: 0, y: 0, width: 0, height: 0 },
};
}
export async function extractPageSignature(
pdfDoc: pdfjsLib.PDFDocumentProxy,
pageNumber: number
): Promise<ComparePageSignature> {
const page = await pdfDoc.getPage(pageNumber);
const textContent = await page.getTextContent();
const tokens = textContent.items
.filter((item): item is SignatureTextItem => 'str' in item)
.map((item) => normalizeCompareText(item.str))
.filter(Boolean);
const limitedTokens = tokens.slice(0, 500);
return {
pageNumber,
plainText: joinNormalizedText(limitedTokens),
hasText: limitedTokens.length > 0,
tokenItems: limitedTokens.map((token, index) => tokenToItem(token, index)),
};
}
export async function extractDocumentSignatures(
pdfDoc: pdfjsLib.PDFDocumentProxy,
onProgress?: (pageNumber: number, totalPages: number) => void
) {
const signatures: ComparePageSignature[] = [];
for (let pageNumber = 1; pageNumber <= pdfDoc.numPages; pageNumber += 1) {
onProgress?.(pageNumber, pdfDoc.numPages);
signatures.push(await extractPageSignature(pdfDoc, pageNumber));
}
return signatures;
}