refactor: remove HTML report export and implement PDF export options in PDF comparison tool
- Deleted the exportCompareHtmlReport function and its related imports. - Introduced a dropdown menu for exporting comparison results as PDFs with multiple modes (split, alternating, left, right). - Updated the comparison logic to utilize caching for page models and comparison results. - Refactored rendering functions to improve code organization and maintainability. - Enhanced UI elements for better user experience during PDF export.
This commit is contained in:
37
src/js/compare/config.ts
Normal file
37
src/js/compare/config.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
export const COMPARE_COLORS = {
|
||||
added: { r: 34, g: 197, b: 94 },
|
||||
removed: { r: 239, g: 68, b: 68 },
|
||||
modified: { r: 245, g: 158, b: 11 },
|
||||
} as const;
|
||||
|
||||
export const HIGHLIGHT_OPACITY = 0.28;
|
||||
|
||||
export const COMPARE_GEOMETRY = {
|
||||
LINE_TOLERANCE_FACTOR: 0.6,
|
||||
MIN_LINE_TOLERANCE: 4,
|
||||
FOCUS_REGION_PADDING: 40,
|
||||
FOCUS_REGION_MIN_WIDTH: 320,
|
||||
FOCUS_REGION_MIN_HEIGHT: 200,
|
||||
} as const;
|
||||
|
||||
export const COMPARE_RENDER = {
|
||||
OFFLINE_SCALE: 1.2,
|
||||
MAX_SCALE_OVERLAY: 2.5,
|
||||
MAX_SCALE_SIDE: 2.0,
|
||||
EXPORT_EXTRACT_SCALE: 1.0,
|
||||
SPLIT_GAP_PT: 2,
|
||||
} as const;
|
||||
|
||||
export const COMPARE_TEXT = {
|
||||
DEFAULT_CHAR_WIDTH: 1,
|
||||
DEFAULT_SPACE_WIDTH: 0.33,
|
||||
} as const;
|
||||
|
||||
export const VISUAL_DIFF = {
|
||||
PIXELMATCH_THRESHOLD: 0.12,
|
||||
ALPHA: 0.2,
|
||||
DIFF_COLOR: [239, 68, 68] as readonly [number, number, number],
|
||||
DIFF_COLOR_ALT: [34, 197, 94] as readonly [number, number, number],
|
||||
} as const;
|
||||
|
||||
export const COMPARE_CACHE_MAX_SIZE = 50;
|
||||
@@ -8,6 +8,8 @@ import type {
|
||||
CompareTextItem,
|
||||
CompareWordToken,
|
||||
} from '../types.ts';
|
||||
import { calculateBoundingRect } from './text-normalization.ts';
|
||||
import { COMPARE_GEOMETRY } from '../config.ts';
|
||||
|
||||
interface WordToken {
|
||||
word: string;
|
||||
@@ -86,7 +88,11 @@ function groupAdjacentRects(rects: CompareRectangle[]): CompareRectangle[] {
|
||||
const lastRect = prev[prev.length - 1];
|
||||
const curr = sorted[i];
|
||||
const sameLine =
|
||||
Math.abs(curr.y - lastRect.y) < Math.max(lastRect.height * 0.6, 4);
|
||||
Math.abs(curr.y - lastRect.y) <
|
||||
Math.max(
|
||||
lastRect.height * COMPARE_GEOMETRY.LINE_TOLERANCE_FACTOR,
|
||||
COMPARE_GEOMETRY.MIN_LINE_TOLERANCE
|
||||
);
|
||||
const close = curr.x <= lastRect.x + lastRect.width + lastRect.height * 2;
|
||||
|
||||
if (sameLine && close) {
|
||||
@@ -96,13 +102,7 @@ function groupAdjacentRects(rects: CompareRectangle[]): CompareRectangle[] {
|
||||
}
|
||||
}
|
||||
|
||||
return groups.map((group) => {
|
||||
const minX = Math.min(...group.map((r) => r.x));
|
||||
const minY = Math.min(...group.map((r) => r.y));
|
||||
const maxX = Math.max(...group.map((r) => r.x + r.width));
|
||||
const maxY = Math.max(...group.map((r) => r.y + r.height));
|
||||
return { x: minX, y: minY, width: maxX - minX, height: maxY - minY };
|
||||
});
|
||||
return groups.map((group) => calculateBoundingRect(group));
|
||||
}
|
||||
|
||||
function collapseWords(words: WordToken[]) {
|
||||
|
||||
@@ -33,8 +33,10 @@ const textMeasurementCache: Map<string, number> | null = measurementContext
|
||||
: null;
|
||||
let lastMeasurementFont = '';
|
||||
|
||||
const DEFAULT_CHAR_WIDTH = 1;
|
||||
const DEFAULT_SPACE_WIDTH = 0.33;
|
||||
import { COMPARE_TEXT, COMPARE_GEOMETRY } from '../config.ts';
|
||||
|
||||
const DEFAULT_CHAR_WIDTH = COMPARE_TEXT.DEFAULT_CHAR_WIDTH;
|
||||
const DEFAULT_SPACE_WIDTH = COMPARE_TEXT.DEFAULT_SPACE_WIDTH;
|
||||
|
||||
function shouldJoinTokenWithPrevious(previous: string, current: string) {
|
||||
if (!previous) return false;
|
||||
@@ -261,8 +263,9 @@ function toRect(
|
||||
export function sortCompareTextItems(items: CompareTextItem[]) {
|
||||
return [...items].sort((left, right) => {
|
||||
const lineTolerance = Math.max(
|
||||
Math.min(left.rect.height, right.rect.height) * 0.6,
|
||||
4
|
||||
Math.min(left.rect.height, right.rect.height) *
|
||||
COMPARE_GEOMETRY.LINE_TOLERANCE_FACTOR,
|
||||
COMPARE_GEOMETRY.MIN_LINE_TOLERANCE
|
||||
);
|
||||
const topDiff = left.rect.y - right.rect.y;
|
||||
|
||||
@@ -450,8 +453,9 @@ export function mergeIntoLines(
|
||||
const anchor = currentLine[0];
|
||||
const curr = sortedItems[i];
|
||||
const lineTolerance = Math.max(
|
||||
Math.min(anchor.rect.height, curr.rect.height) * 0.6,
|
||||
4
|
||||
Math.min(anchor.rect.height, curr.rect.height) *
|
||||
COMPARE_GEOMETRY.LINE_TOLERANCE_FACTOR,
|
||||
COMPARE_GEOMETRY.MIN_LINE_TOLERANCE
|
||||
);
|
||||
|
||||
if (Math.abs(curr.rect.y - anchor.rect.y) <= lineTolerance) {
|
||||
|
||||
@@ -1,8 +1,5 @@
|
||||
import type { ComparePagePair, ComparePageSignature } from '../types.ts';
|
||||
|
||||
function tokenize(text: string) {
|
||||
return new Set(text.split(/\s+/).filter(Boolean));
|
||||
}
|
||||
import { tokenizeTextAsSet } from './text-normalization.ts';
|
||||
|
||||
function similarityScore(
|
||||
left: ComparePageSignature,
|
||||
@@ -16,8 +13,8 @@ function similarityScore(
|
||||
return 0.08;
|
||||
}
|
||||
|
||||
const leftTokens = tokenize(left.plainText);
|
||||
const rightTokens = tokenize(right.plainText);
|
||||
const leftTokens = tokenizeTextAsSet(left.plainText);
|
||||
const rightTokens = tokenizeTextAsSet(right.plainText);
|
||||
const union = new Set([...leftTokens, ...rightTokens]);
|
||||
let intersectionCount = 0;
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import type { CompareTextItem } from '../types.ts';
|
||||
import type { CompareRectangle, CompareTextItem } from '../types.ts';
|
||||
|
||||
export function normalizeCompareText(text: string) {
|
||||
return text
|
||||
@@ -62,3 +62,22 @@ export function isLowQualityExtractedText(text: string) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
export function tokenizeText(text: string): string[] {
|
||||
return text.split(/\s+/).filter(Boolean);
|
||||
}
|
||||
|
||||
export function tokenizeTextAsSet(text: string): Set<string> {
|
||||
return new Set(tokenizeText(text));
|
||||
}
|
||||
|
||||
export function calculateBoundingRect(
|
||||
rects: CompareRectangle[]
|
||||
): CompareRectangle {
|
||||
if (rects.length === 0) return { x: 0, y: 0, width: 0, height: 0 };
|
||||
const minX = Math.min(...rects.map((r) => r.x));
|
||||
const minY = Math.min(...rects.map((r) => r.y));
|
||||
const maxX = Math.max(...rects.map((r) => r.x + r.width));
|
||||
const maxY = Math.max(...rects.map((r) => r.y + r.height));
|
||||
return { x: minX, y: minY, width: maxX - minX, height: maxY - minY };
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import pixelmatch from 'pixelmatch';
|
||||
|
||||
import type { CompareVisualDiff } from '../types.ts';
|
||||
import { VISUAL_DIFF as VISUAL_DIFF_CONFIG } from '../config.ts';
|
||||
|
||||
type FocusRegion = {
|
||||
x: number;
|
||||
@@ -69,12 +70,16 @@ export function renderVisualDiff(
|
||||
width,
|
||||
height,
|
||||
{
|
||||
threshold: 0.12,
|
||||
threshold: VISUAL_DIFF_CONFIG.PIXELMATCH_THRESHOLD,
|
||||
includeAA: false,
|
||||
alpha: 0.2,
|
||||
alpha: VISUAL_DIFF_CONFIG.ALPHA,
|
||||
diffMask: false,
|
||||
diffColor: [239, 68, 68],
|
||||
diffColorAlt: [34, 197, 94],
|
||||
diffColor: [...VISUAL_DIFF_CONFIG.DIFF_COLOR] as [number, number, number],
|
||||
diffColorAlt: [...VISUAL_DIFF_CONFIG.DIFF_COLOR_ALT] as [
|
||||
number,
|
||||
number,
|
||||
number,
|
||||
],
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
38
src/js/compare/lru-cache.ts
Normal file
38
src/js/compare/lru-cache.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
export class LRUCache<K, V> {
|
||||
private map = new Map<K, V>();
|
||||
private maxSize: number;
|
||||
|
||||
constructor(maxSize: number) {
|
||||
this.maxSize = maxSize;
|
||||
}
|
||||
|
||||
get(key: K): V | undefined {
|
||||
const value = this.map.get(key);
|
||||
if (value !== undefined) {
|
||||
this.map.delete(key);
|
||||
this.map.set(key, value);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
set(key: K, value: V) {
|
||||
this.map.delete(key);
|
||||
this.map.set(key, value);
|
||||
if (this.map.size > this.maxSize) {
|
||||
const oldest = this.map.keys().next().value;
|
||||
if (oldest !== undefined) this.map.delete(oldest);
|
||||
}
|
||||
}
|
||||
|
||||
has(key: K): boolean {
|
||||
return this.map.has(key);
|
||||
}
|
||||
|
||||
clear() {
|
||||
this.map.clear();
|
||||
}
|
||||
|
||||
get size(): number {
|
||||
return this.map.size;
|
||||
}
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
import type { ComparePagePair, ComparePageResult } from '../types.ts';
|
||||
|
||||
function escapeHtml(text: string) {
|
||||
return text
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'/g, ''');
|
||||
}
|
||||
|
||||
export function buildCompareReport(
|
||||
fileName1: string,
|
||||
fileName2: string,
|
||||
pairs: ComparePagePair[],
|
||||
results: ComparePageResult[]
|
||||
) {
|
||||
const totals = results.reduce(
|
||||
(summary, result) => {
|
||||
summary.added += result.summary.added;
|
||||
summary.removed += result.summary.removed;
|
||||
summary.modified += result.summary.modified;
|
||||
return summary;
|
||||
},
|
||||
{ added: 0, removed: 0, modified: 0 }
|
||||
);
|
||||
|
||||
const rows = results
|
||||
.map((result, index) => {
|
||||
const pair = pairs[index];
|
||||
const changes = result.changes
|
||||
.map(
|
||||
(change) =>
|
||||
`<li><strong>${escapeHtml(change.type)}</strong>: ${escapeHtml(change.description)}</li>`
|
||||
)
|
||||
.join('');
|
||||
|
||||
return `
|
||||
<section class="pair-card">
|
||||
<h2>Comparison ${pair?.pairIndex || index + 1}</h2>
|
||||
<p class="meta">PDF 1 page: ${pair?.leftPageNumber ?? 'none'} | PDF 2 page: ${pair?.rightPageNumber ?? 'none'} | Confidence: ${((pair?.confidence || 0) * 100).toFixed(0)}%</p>
|
||||
<p class="meta">Status: ${escapeHtml(result.status)}${result.usedOcr ? ' | OCR used' : ''}</p>
|
||||
<p class="meta">Added: ${result.summary.added} | Removed: ${result.summary.removed} | Modified: ${result.summary.modified}</p>
|
||||
<ul>${changes || '<li>No semantic changes detected.</li>'}</ul>
|
||||
</section>
|
||||
`;
|
||||
})
|
||||
.join('');
|
||||
|
||||
return `<!doctype html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Compare report</title>
|
||||
<style>
|
||||
body { font-family: ui-sans-serif, system-ui, sans-serif; margin: 0; padding: 2rem; background: #111827; color: #e5e7eb; }
|
||||
.summary { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 1rem; margin: 1.5rem 0; }
|
||||
.card, .pair-card { background: #1f2937; border: 1px solid #374151; border-radius: 12px; padding: 1rem 1.25rem; }
|
||||
.pair-card { margin-bottom: 1rem; }
|
||||
.meta { color: #9ca3af; font-size: 0.95rem; }
|
||||
h1, h2 { margin: 0 0 0.75rem 0; }
|
||||
ul { margin: 0.75rem 0 0 1.25rem; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h1>PDF Compare Report</h1>
|
||||
<p class="meta">PDF 1: ${escapeHtml(fileName1)} | PDF 2: ${escapeHtml(fileName2)}</p>
|
||||
<div class="summary">
|
||||
<div class="card"><div class="meta">Added</div><div>${totals.added}</div></div>
|
||||
<div class="card"><div class="meta">Removed</div><div>${totals.removed}</div></div>
|
||||
<div class="card"><div class="meta">Modified</div><div>${totals.modified}</div></div>
|
||||
</div>
|
||||
${rows}
|
||||
</body>
|
||||
</html>`;
|
||||
}
|
||||
239
src/js/compare/reporting/export-compare-pdf.ts
Normal file
239
src/js/compare/reporting/export-compare-pdf.ts
Normal file
@@ -0,0 +1,239 @@
|
||||
import { PDFDocument, rgb } from 'pdf-lib';
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
import type {
|
||||
ComparePagePair,
|
||||
CompareTextChange,
|
||||
ComparePdfExportMode,
|
||||
} from '../types.ts';
|
||||
import { extractPageModel } from '../engine/extract-page-model.ts';
|
||||
import { comparePageModels } from '../engine/compare-page-models.ts';
|
||||
import {
|
||||
COMPARE_COLORS,
|
||||
HIGHLIGHT_OPACITY,
|
||||
COMPARE_RENDER,
|
||||
} from '../config.ts';
|
||||
import { downloadFile } from '../../utils/helpers.ts';
|
||||
|
||||
const HIGHLIGHT_COLORS: Record<
|
||||
string,
|
||||
{ r: number; g: number; b: number; opacity: number }
|
||||
> = {
|
||||
added: {
|
||||
r: COMPARE_COLORS.added.r / 255,
|
||||
g: COMPARE_COLORS.added.g / 255,
|
||||
b: COMPARE_COLORS.added.b / 255,
|
||||
opacity: HIGHLIGHT_OPACITY,
|
||||
},
|
||||
removed: {
|
||||
r: COMPARE_COLORS.removed.r / 255,
|
||||
g: COMPARE_COLORS.removed.g / 255,
|
||||
b: COMPARE_COLORS.removed.b / 255,
|
||||
opacity: HIGHLIGHT_OPACITY,
|
||||
},
|
||||
'page-removed': {
|
||||
r: COMPARE_COLORS.removed.r / 255,
|
||||
g: COMPARE_COLORS.removed.g / 255,
|
||||
b: COMPARE_COLORS.removed.b / 255,
|
||||
opacity: HIGHLIGHT_OPACITY,
|
||||
},
|
||||
modified: {
|
||||
r: COMPARE_COLORS.modified.r / 255,
|
||||
g: COMPARE_COLORS.modified.g / 255,
|
||||
b: COMPARE_COLORS.modified.b / 255,
|
||||
opacity: HIGHLIGHT_OPACITY,
|
||||
},
|
||||
};
|
||||
|
||||
const EXTRACT_SCALE = COMPARE_RENDER.EXPORT_EXTRACT_SCALE;
|
||||
|
||||
function drawHighlights(
|
||||
page: ReturnType<PDFDocument['getPage']>,
|
||||
pageHeight: number,
|
||||
changes: CompareTextChange[],
|
||||
side: 'before' | 'after'
|
||||
) {
|
||||
for (const change of changes) {
|
||||
const rects = side === 'before' ? change.beforeRects : change.afterRects;
|
||||
const color = HIGHLIGHT_COLORS[change.type];
|
||||
if (!color) continue;
|
||||
for (const rect of rects) {
|
||||
page.drawRectangle({
|
||||
x: rect.x / EXTRACT_SCALE,
|
||||
y: pageHeight - rect.y / EXTRACT_SCALE - rect.height / EXTRACT_SCALE,
|
||||
width: rect.width / EXTRACT_SCALE,
|
||||
height: rect.height / EXTRACT_SCALE,
|
||||
color: rgb(color.r, color.g, color.b),
|
||||
opacity: color.opacity,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export async function exportComparePdf(
|
||||
mode: ComparePdfExportMode,
|
||||
pdfDoc1: pdfjsLib.PDFDocumentProxy | null,
|
||||
pdfDoc2: pdfjsLib.PDFDocumentProxy | null,
|
||||
pairs: ComparePagePair[],
|
||||
onProgress?: (message: string, percent: number) => void
|
||||
) {
|
||||
if (!pdfDoc1 && !pdfDoc2) {
|
||||
throw new Error('At least one PDF document is required for export.');
|
||||
}
|
||||
if (!pairs || pairs.length === 0) {
|
||||
throw new Error('No page pairs to export.');
|
||||
}
|
||||
|
||||
const outDoc = await PDFDocument.create();
|
||||
|
||||
const [bytes1, bytes2] = await Promise.all([
|
||||
pdfDoc1?.getData(),
|
||||
pdfDoc2?.getData(),
|
||||
]);
|
||||
|
||||
const [libDoc1, libDoc2] = await Promise.all([
|
||||
bytes1 ? PDFDocument.load(bytes1, { ignoreEncryption: true }) : null,
|
||||
bytes2 ? PDFDocument.load(bytes2, { ignoreEncryption: true }) : null,
|
||||
]);
|
||||
|
||||
for (let i = 0; i < pairs.length; i++) {
|
||||
const pair = pairs[i];
|
||||
onProgress?.(
|
||||
`Rendering page ${i + 1} of ${pairs.length}...`,
|
||||
Math.round(((i + 1) / pairs.length) * 100)
|
||||
);
|
||||
|
||||
const leftPdjsPage =
|
||||
pair.leftPageNumber && pdfDoc1
|
||||
? await pdfDoc1.getPage(pair.leftPageNumber)
|
||||
: null;
|
||||
const rightPdjsPage =
|
||||
pair.rightPageNumber && pdfDoc2
|
||||
? await pdfDoc2.getPage(pair.rightPageNumber)
|
||||
: null;
|
||||
|
||||
const leftModel = leftPdjsPage
|
||||
? await extractPageModel(
|
||||
leftPdjsPage,
|
||||
leftPdjsPage.getViewport({ scale: EXTRACT_SCALE })
|
||||
)
|
||||
: null;
|
||||
const rightModel = rightPdjsPage
|
||||
? await extractPageModel(
|
||||
rightPdjsPage,
|
||||
rightPdjsPage.getViewport({ scale: EXTRACT_SCALE })
|
||||
)
|
||||
: null;
|
||||
|
||||
const comparison = comparePageModels(leftModel, rightModel);
|
||||
const changes = comparison.changes;
|
||||
|
||||
if (mode === 'split') {
|
||||
const refPage = leftPdjsPage || rightPdjsPage;
|
||||
const vp = refPage!.getViewport({ scale: 1.0 });
|
||||
const gap = COMPARE_RENDER.SPLIT_GAP_PT;
|
||||
const totalW = vp.width * 2 + gap;
|
||||
const outPage = outDoc.addPage([totalW, vp.height]);
|
||||
|
||||
if (pair.leftPageNumber && libDoc1) {
|
||||
const [copied] = await outDoc.copyPages(libDoc1, [
|
||||
pair.leftPageNumber - 1,
|
||||
]);
|
||||
const embedded = await outDoc.embedPage(copied);
|
||||
outPage.drawPage(embedded, {
|
||||
x: 0,
|
||||
y: 0,
|
||||
width: vp.width,
|
||||
height: vp.height,
|
||||
});
|
||||
}
|
||||
if (pair.rightPageNumber && libDoc2) {
|
||||
const [copied] = await outDoc.copyPages(libDoc2, [
|
||||
pair.rightPageNumber - 1,
|
||||
]);
|
||||
const embedded = await outDoc.embedPage(copied);
|
||||
outPage.drawPage(embedded, {
|
||||
x: vp.width + gap,
|
||||
y: 0,
|
||||
width: vp.width,
|
||||
height: vp.height,
|
||||
});
|
||||
}
|
||||
|
||||
if (changes.length) {
|
||||
for (const change of changes) {
|
||||
const color = HIGHLIGHT_COLORS[change.type];
|
||||
if (!color) continue;
|
||||
for (const rect of change.beforeRects) {
|
||||
outPage.drawRectangle({
|
||||
x: rect.x / EXTRACT_SCALE,
|
||||
y:
|
||||
vp.height -
|
||||
rect.y / EXTRACT_SCALE -
|
||||
rect.height / EXTRACT_SCALE,
|
||||
width: rect.width / EXTRACT_SCALE,
|
||||
height: rect.height / EXTRACT_SCALE,
|
||||
color: rgb(color.r, color.g, color.b),
|
||||
opacity: color.opacity,
|
||||
});
|
||||
}
|
||||
for (const rect of change.afterRects) {
|
||||
outPage.drawRectangle({
|
||||
x: vp.width + gap + rect.x / EXTRACT_SCALE,
|
||||
y:
|
||||
vp.height -
|
||||
rect.y / EXTRACT_SCALE -
|
||||
rect.height / EXTRACT_SCALE,
|
||||
width: rect.width / EXTRACT_SCALE,
|
||||
height: rect.height / EXTRACT_SCALE,
|
||||
color: rgb(color.r, color.g, color.b),
|
||||
opacity: color.opacity,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (mode === 'alternating') {
|
||||
if (pair.leftPageNumber && libDoc1) {
|
||||
const [copied] = await outDoc.copyPages(libDoc1, [
|
||||
pair.leftPageNumber - 1,
|
||||
]);
|
||||
const embedded = outDoc.addPage(copied);
|
||||
const { height } = embedded.getSize();
|
||||
if (changes.length) drawHighlights(embedded, height, changes, 'before');
|
||||
}
|
||||
if (pair.rightPageNumber && libDoc2) {
|
||||
const [copied] = await outDoc.copyPages(libDoc2, [
|
||||
pair.rightPageNumber - 1,
|
||||
]);
|
||||
const embedded = outDoc.addPage(copied);
|
||||
const { height } = embedded.getSize();
|
||||
if (changes.length) drawHighlights(embedded, height, changes, 'after');
|
||||
}
|
||||
} else if (mode === 'left') {
|
||||
if (pair.leftPageNumber && libDoc1) {
|
||||
const [copied] = await outDoc.copyPages(libDoc1, [
|
||||
pair.leftPageNumber - 1,
|
||||
]);
|
||||
const embedded = outDoc.addPage(copied);
|
||||
const { height } = embedded.getSize();
|
||||
if (changes.length) drawHighlights(embedded, height, changes, 'before');
|
||||
}
|
||||
} else {
|
||||
if (pair.rightPageNumber && libDoc2) {
|
||||
const [copied] = await outDoc.copyPages(libDoc2, [
|
||||
pair.rightPageNumber - 1,
|
||||
]);
|
||||
const embedded = outDoc.addPage(copied);
|
||||
const { height } = embedded.getSize();
|
||||
if (changes.length) drawHighlights(embedded, height, changes, 'after');
|
||||
}
|
||||
}
|
||||
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
}
|
||||
|
||||
const pdfBytes = await outDoc.save();
|
||||
const blob = new Blob([pdfBytes.buffer as ArrayBuffer], {
|
||||
type: 'application/pdf',
|
||||
});
|
||||
downloadFile(blob, 'bentopdf-compare-export.pdf');
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
import { buildCompareReport } from './build-report.ts';
|
||||
import type { ComparePagePair, ComparePageResult } from '../types.ts';
|
||||
|
||||
export function exportCompareHtmlReport(
|
||||
fileName1: string,
|
||||
fileName2: string,
|
||||
pairs: ComparePagePair[],
|
||||
results: ComparePageResult[]
|
||||
) {
|
||||
const html = buildCompareReport(fileName1, fileName2, pairs, results);
|
||||
const blob = new Blob([html], { type: 'text/html;charset=utf-8' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
const anchor = document.createElement('a');
|
||||
anchor.href = url;
|
||||
anchor.download = 'bentopdf-compare-report.html';
|
||||
anchor.click();
|
||||
URL.revokeObjectURL(url);
|
||||
}
|
||||
@@ -1,7 +1,40 @@
|
||||
import type * as pdfjsLib from 'pdfjs-dist';
|
||||
import type { LRUCache } from './lru-cache.ts';
|
||||
|
||||
export type CompareViewMode = 'overlay' | 'side-by-side';
|
||||
|
||||
export type ComparePdfExportMode = 'split' | 'alternating' | 'left' | 'right';
|
||||
|
||||
export interface RenderedPage {
|
||||
model: ComparePageModel | null;
|
||||
exists: boolean;
|
||||
}
|
||||
|
||||
export interface ComparisonPageLoad {
|
||||
model: ComparePageModel | null;
|
||||
exists: boolean;
|
||||
}
|
||||
|
||||
export interface DiffFocusRegion {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
}
|
||||
|
||||
export interface CompareCaches {
|
||||
pageModelCache: LRUCache<string, ComparePageModel>;
|
||||
comparisonCache: LRUCache<string, ComparePageResult>;
|
||||
comparisonResultsCache: LRUCache<number, ComparePageResult>;
|
||||
}
|
||||
|
||||
export interface CompareRenderContext {
|
||||
useOcr: boolean;
|
||||
ocrLanguage: string;
|
||||
viewMode: CompareViewMode;
|
||||
showLoader: (message: string, percent?: number) => void;
|
||||
}
|
||||
|
||||
export interface CompareRectangle {
|
||||
x: number;
|
||||
y: number;
|
||||
|
||||
@@ -1,23 +1,28 @@
|
||||
import { showLoader, hideLoader, showAlert } from '../ui.js';
|
||||
import { getPDFDocument } from '../utils/helpers.js';
|
||||
import { showLoader, hideLoader, showAlert } from '../ui.ts';
|
||||
import { getPDFDocument } from '../utils/helpers.ts';
|
||||
import { icons, createIcons } from 'lucide';
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
import { CompareState } from '@/types';
|
||||
import type {
|
||||
CompareFilterType,
|
||||
ComparePageModel,
|
||||
ComparePagePair,
|
||||
ComparePageResult,
|
||||
CompareTextChange,
|
||||
} from '../compare/types.ts';
|
||||
import { extractPageModel } from '../compare/engine/extract-page-model.ts';
|
||||
import { comparePageModels } from '../compare/engine/compare-page-models.ts';
|
||||
import { renderVisualDiff } from '../compare/engine/visual-diff.ts';
|
||||
import { extractDocumentSignatures } from '../compare/engine/page-signatures.ts';
|
||||
import { pairPages } from '../compare/engine/pair-pages.ts';
|
||||
import { recognizePageCanvas } from '../compare/engine/ocr-page.ts';
|
||||
import { exportCompareHtmlReport } from '../compare/reporting/export-html-report.ts';
|
||||
import { isLowQualityExtractedText } from '../compare/engine/text-normalization.ts';
|
||||
import type {
|
||||
ComparePdfExportMode,
|
||||
CompareCaches,
|
||||
CompareRenderContext,
|
||||
} from '../compare/types.ts';
|
||||
import { exportComparePdf } from '../compare/reporting/export-compare-pdf.ts';
|
||||
import { LRUCache } from '../compare/lru-cache.ts';
|
||||
import { COMPARE_CACHE_MAX_SIZE } from '../compare/config.ts';
|
||||
import {
|
||||
getElement,
|
||||
computeComparisonForPair,
|
||||
getComparisonCacheKey,
|
||||
} from './compare-render.ts';
|
||||
|
||||
pdfjsLib.GlobalWorkerOptions.workerSrc = new URL(
|
||||
'pdfjs-dist/build/pdf.worker.min.mjs',
|
||||
@@ -39,343 +44,31 @@ const pageState: CompareState = {
|
||||
ocrLanguage: 'eng',
|
||||
};
|
||||
|
||||
const pageModelCache = new Map<string, ComparePageModel>();
|
||||
const comparisonCache = new Map<string, ComparePageResult>();
|
||||
const comparisonResultsCache = new Map<number, ComparePageResult>();
|
||||
const caches: CompareCaches = {
|
||||
pageModelCache: new LRUCache(COMPARE_CACHE_MAX_SIZE),
|
||||
comparisonCache: new LRUCache(COMPARE_CACHE_MAX_SIZE),
|
||||
comparisonResultsCache: new LRUCache(COMPARE_CACHE_MAX_SIZE),
|
||||
};
|
||||
const documentNames = {
|
||||
left: 'first.pdf',
|
||||
right: 'second.pdf',
|
||||
};
|
||||
|
||||
type RenderedPage = {
|
||||
model: ComparePageModel | null;
|
||||
exists: boolean;
|
||||
};
|
||||
|
||||
type ComparisonPageLoad = {
|
||||
model: ComparePageModel | null;
|
||||
exists: boolean;
|
||||
};
|
||||
|
||||
type DiffFocusRegion = {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
|
||||
function getElement<T extends HTMLElement>(id: string) {
|
||||
return document.getElementById(id) as T | null;
|
||||
}
|
||||
|
||||
function clearCanvas(canvas: HTMLCanvasElement) {
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.width = 1;
|
||||
canvas.height = 1;
|
||||
context?.clearRect(0, 0, 1, 1);
|
||||
}
|
||||
|
||||
function renderMissingPage(
|
||||
canvas: HTMLCanvasElement,
|
||||
placeholderId: string,
|
||||
message: string
|
||||
) {
|
||||
clearCanvas(canvas);
|
||||
const placeholder = getElement<HTMLDivElement>(placeholderId);
|
||||
if (placeholder) {
|
||||
placeholder.textContent = message;
|
||||
placeholder.classList.remove('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
function hidePlaceholder(placeholderId: string) {
|
||||
const placeholder = getElement<HTMLDivElement>(placeholderId);
|
||||
placeholder?.classList.add('hidden');
|
||||
}
|
||||
|
||||
function getRenderScale(page: pdfjsLib.PDFPageProxy, container: HTMLElement) {
|
||||
const baseViewport = page.getViewport({ scale: 1.0 });
|
||||
const availableWidth = Math.max(
|
||||
container.clientWidth - (pageState.viewMode === 'overlay' ? 96 : 56),
|
||||
320
|
||||
);
|
||||
const fitScale = availableWidth / Math.max(baseViewport.width, 1);
|
||||
const maxScale = pageState.viewMode === 'overlay' ? 2.5 : 2.0;
|
||||
|
||||
return Math.min(Math.max(fitScale, 1.0), maxScale);
|
||||
}
|
||||
|
||||
function getPageModelCacheKey(
|
||||
cacheKeyPrefix: 'left' | 'right',
|
||||
pageNum: number,
|
||||
scale: number
|
||||
) {
|
||||
return `${cacheKeyPrefix}-${pageNum}-${scale.toFixed(3)}`;
|
||||
}
|
||||
|
||||
function shouldUseOcrForModel(model: ComparePageModel) {
|
||||
return !model.hasText || isLowQualityExtractedText(model.plainText);
|
||||
}
|
||||
|
||||
function buildDiffFocusRegion(
|
||||
comparison: ComparePageResult,
|
||||
leftCanvas: HTMLCanvasElement,
|
||||
rightCanvas: HTMLCanvasElement
|
||||
): DiffFocusRegion | undefined {
|
||||
const leftOffsetX = Math.floor(
|
||||
(Math.max(leftCanvas.width, rightCanvas.width) - leftCanvas.width) / 2
|
||||
);
|
||||
const leftOffsetY = Math.floor(
|
||||
(Math.max(leftCanvas.height, rightCanvas.height) - leftCanvas.height) / 2
|
||||
);
|
||||
const rightOffsetX = Math.floor(
|
||||
(Math.max(leftCanvas.width, rightCanvas.width) - rightCanvas.width) / 2
|
||||
);
|
||||
const rightOffsetY = Math.floor(
|
||||
(Math.max(leftCanvas.height, rightCanvas.height) - rightCanvas.height) / 2
|
||||
);
|
||||
const bounds = {
|
||||
minX: Infinity,
|
||||
minY: Infinity,
|
||||
maxX: -Infinity,
|
||||
maxY: -Infinity,
|
||||
};
|
||||
|
||||
for (const change of comparison.changes) {
|
||||
for (const rect of change.beforeRects) {
|
||||
bounds.minX = Math.min(bounds.minX, rect.x + leftOffsetX);
|
||||
bounds.minY = Math.min(bounds.minY, rect.y + leftOffsetY);
|
||||
bounds.maxX = Math.max(bounds.maxX, rect.x + leftOffsetX + rect.width);
|
||||
bounds.maxY = Math.max(bounds.maxY, rect.y + leftOffsetY + rect.height);
|
||||
}
|
||||
|
||||
for (const rect of change.afterRects) {
|
||||
bounds.minX = Math.min(bounds.minX, rect.x + rightOffsetX);
|
||||
bounds.minY = Math.min(bounds.minY, rect.y + rightOffsetY);
|
||||
bounds.maxX = Math.max(bounds.maxX, rect.x + rightOffsetX + rect.width);
|
||||
bounds.maxY = Math.max(bounds.maxY, rect.y + rightOffsetY + rect.height);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Number.isFinite(bounds.minX)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const fullWidth = Math.max(leftCanvas.width, rightCanvas.width, 1);
|
||||
const fullHeight = Math.max(leftCanvas.height, rightCanvas.height, 1);
|
||||
const padding = 40;
|
||||
|
||||
const x = Math.max(Math.floor(bounds.minX - padding), 0);
|
||||
const y = Math.max(Math.floor(bounds.minY - padding), 0);
|
||||
const maxX = Math.min(Math.ceil(bounds.maxX + padding), fullWidth);
|
||||
const maxY = Math.min(Math.ceil(bounds.maxY + padding), fullHeight);
|
||||
|
||||
return {
|
||||
x,
|
||||
y,
|
||||
width: Math.max(maxX - x, Math.min(320, fullWidth)),
|
||||
height: Math.max(maxY - y, Math.min(200, fullHeight)),
|
||||
};
|
||||
}
|
||||
|
||||
async function renderPage(
|
||||
pdfDoc: pdfjsLib.PDFDocumentProxy,
|
||||
pageNum: number,
|
||||
canvas: HTMLCanvasElement,
|
||||
container: HTMLElement,
|
||||
placeholderId: string,
|
||||
cacheKeyPrefix: 'left' | 'right'
|
||||
): Promise<RenderedPage> {
|
||||
if (pageNum > pdfDoc.numPages) {
|
||||
renderMissingPage(
|
||||
canvas,
|
||||
placeholderId,
|
||||
`Page ${pageNum} does not exist in this PDF.`
|
||||
);
|
||||
return { model: null, exists: false };
|
||||
}
|
||||
|
||||
const page = await pdfDoc.getPage(pageNum);
|
||||
|
||||
const targetScale = getRenderScale(page, container);
|
||||
const scaledViewport = page.getViewport({ scale: targetScale });
|
||||
const dpr = window.devicePixelRatio || 1;
|
||||
const hiResViewport = page.getViewport({ scale: targetScale * dpr });
|
||||
|
||||
hidePlaceholder(placeholderId);
|
||||
|
||||
canvas.width = hiResViewport.width;
|
||||
canvas.height = hiResViewport.height;
|
||||
canvas.style.width = `${scaledViewport.width}px`;
|
||||
canvas.style.height = `${scaledViewport.height}px`;
|
||||
|
||||
const cacheKey = getPageModelCacheKey(cacheKeyPrefix, pageNum, targetScale);
|
||||
const cachedModel = pageModelCache.get(cacheKey);
|
||||
const modelPromise = cachedModel
|
||||
? Promise.resolve(cachedModel)
|
||||
: extractPageModel(page, scaledViewport);
|
||||
const renderTask = page.render({
|
||||
canvasContext: canvas.getContext('2d')!,
|
||||
viewport: hiResViewport,
|
||||
canvas,
|
||||
}).promise;
|
||||
|
||||
const [model] = await Promise.all([modelPromise, renderTask]);
|
||||
|
||||
let finalModel = model;
|
||||
|
||||
if (!cachedModel && pageState.useOcr && shouldUseOcrForModel(model)) {
|
||||
showLoader(`Running OCR on page ${pageNum}...`);
|
||||
const ocrModel = await recognizePageCanvas(
|
||||
canvas,
|
||||
pageState.ocrLanguage,
|
||||
function (status, progress) {
|
||||
showLoader(`OCR: ${status}`, progress * 100);
|
||||
}
|
||||
);
|
||||
finalModel = {
|
||||
...ocrModel,
|
||||
pageNumber: pageNum,
|
||||
};
|
||||
}
|
||||
|
||||
pageModelCache.set(cacheKey, finalModel);
|
||||
|
||||
return { model: finalModel, exists: true };
|
||||
}
|
||||
|
||||
async function loadComparisonPage(
|
||||
pdfDoc: pdfjsLib.PDFDocumentProxy | null,
|
||||
pageNum: number | null,
|
||||
side: 'left' | 'right',
|
||||
renderTarget?: {
|
||||
canvas: HTMLCanvasElement;
|
||||
container: HTMLElement;
|
||||
placeholderId: string;
|
||||
}
|
||||
): Promise<ComparisonPageLoad> {
|
||||
if (!pdfDoc || !pageNum) {
|
||||
if (renderTarget) {
|
||||
renderMissingPage(
|
||||
renderTarget.canvas,
|
||||
renderTarget.placeholderId,
|
||||
'No paired page for this side.'
|
||||
);
|
||||
}
|
||||
return { model: null, exists: false };
|
||||
}
|
||||
|
||||
if (renderTarget) {
|
||||
return renderPage(
|
||||
pdfDoc,
|
||||
pageNum,
|
||||
renderTarget.canvas,
|
||||
renderTarget.container,
|
||||
renderTarget.placeholderId,
|
||||
side
|
||||
);
|
||||
}
|
||||
|
||||
const renderScale = 1.2;
|
||||
const cacheKey = getPageModelCacheKey(side, pageNum, renderScale);
|
||||
const cachedModel = pageModelCache.get(cacheKey);
|
||||
if (cachedModel) {
|
||||
return { model: cachedModel, exists: true };
|
||||
}
|
||||
|
||||
const page = await pdfDoc.getPage(pageNum);
|
||||
const viewport = page.getViewport({ scale: renderScale });
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
const context = canvas.getContext('2d');
|
||||
|
||||
if (!context) {
|
||||
throw new Error('Could not create offscreen comparison canvas.');
|
||||
}
|
||||
|
||||
const extractedModel = await extractPageModel(page, viewport);
|
||||
await page.render({
|
||||
canvasContext: context,
|
||||
viewport,
|
||||
canvas,
|
||||
}).promise;
|
||||
|
||||
let finalModel = extractedModel;
|
||||
if (pageState.useOcr && shouldUseOcrForModel(extractedModel)) {
|
||||
const ocrModel = await recognizePageCanvas(canvas, pageState.ocrLanguage);
|
||||
finalModel = {
|
||||
...ocrModel,
|
||||
pageNumber: pageNum,
|
||||
};
|
||||
}
|
||||
|
||||
pageModelCache.set(cacheKey, finalModel);
|
||||
return { model: finalModel, exists: true };
|
||||
}
|
||||
|
||||
async function computeComparisonForPair(
|
||||
pair: ComparePagePair,
|
||||
options?: {
|
||||
renderTargets?: {
|
||||
left: {
|
||||
canvas: HTMLCanvasElement;
|
||||
container: HTMLElement;
|
||||
placeholderId: string;
|
||||
};
|
||||
right: {
|
||||
canvas: HTMLCanvasElement;
|
||||
container: HTMLElement;
|
||||
placeholderId: string;
|
||||
};
|
||||
diffCanvas?: HTMLCanvasElement;
|
||||
};
|
||||
}
|
||||
) {
|
||||
const renderTargets = options?.renderTargets;
|
||||
const leftPage = await loadComparisonPage(
|
||||
pageState.pdfDoc1,
|
||||
pair.leftPageNumber,
|
||||
'left',
|
||||
renderTargets?.left
|
||||
);
|
||||
const rightPage = await loadComparisonPage(
|
||||
pageState.pdfDoc2,
|
||||
pair.rightPageNumber,
|
||||
'right',
|
||||
renderTargets?.right
|
||||
);
|
||||
|
||||
const comparison = comparePageModels(leftPage.model, rightPage.model);
|
||||
comparison.confidence = pair.confidence;
|
||||
|
||||
if (
|
||||
renderTargets?.diffCanvas &&
|
||||
comparison.status !== 'left-only' &&
|
||||
comparison.status !== 'right-only'
|
||||
) {
|
||||
const focusRegion = buildDiffFocusRegion(
|
||||
comparison,
|
||||
renderTargets.left.canvas,
|
||||
renderTargets.right.canvas
|
||||
);
|
||||
comparison.visualDiff = renderVisualDiff(
|
||||
renderTargets.left.canvas,
|
||||
renderTargets.right.canvas,
|
||||
renderTargets.diffCanvas,
|
||||
focusRegion
|
||||
);
|
||||
} else if (renderTargets?.diffCanvas) {
|
||||
clearCanvas(renderTargets.diffCanvas);
|
||||
}
|
||||
|
||||
return comparison;
|
||||
}
|
||||
let renderGeneration = 0;
|
||||
|
||||
function getActivePair() {
|
||||
return pageState.pagePairs[pageState.currentPage - 1] || null;
|
||||
}
|
||||
|
||||
function getRenderContext(): CompareRenderContext {
|
||||
return {
|
||||
useOcr: pageState.useOcr,
|
||||
ocrLanguage: pageState.ocrLanguage,
|
||||
viewMode: pageState.viewMode,
|
||||
showLoader,
|
||||
};
|
||||
}
|
||||
|
||||
function getVisibleChanges(result: ComparePageResult | null) {
|
||||
if (!result) return [];
|
||||
|
||||
@@ -508,14 +201,16 @@ function renderChangeList() {
|
||||
const emptyState = getElement<HTMLDivElement>('change-list-empty');
|
||||
const prevChangeBtn = getElement<HTMLButtonElement>('prev-change-btn');
|
||||
const nextChangeBtn = getElement<HTMLButtonElement>('next-change-btn');
|
||||
const exportReportBtn = getElement<HTMLButtonElement>('export-report-btn');
|
||||
const exportDropdownBtn = getElement<HTMLButtonElement>(
|
||||
'export-dropdown-btn'
|
||||
);
|
||||
|
||||
if (
|
||||
!list ||
|
||||
!emptyState ||
|
||||
!prevChangeBtn ||
|
||||
!nextChangeBtn ||
|
||||
!exportReportBtn
|
||||
!exportDropdownBtn
|
||||
)
|
||||
return;
|
||||
|
||||
@@ -531,7 +226,7 @@ function renderChangeList() {
|
||||
list.classList.add('hidden');
|
||||
prevChangeBtn.disabled = true;
|
||||
nextChangeBtn.disabled = true;
|
||||
exportReportBtn.disabled = pageState.pagePairs.length === 0;
|
||||
exportDropdownBtn.disabled = pageState.pagePairs.length === 0;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -560,7 +255,7 @@ function renderChangeList() {
|
||||
|
||||
prevChangeBtn.disabled = false;
|
||||
nextChangeBtn.disabled = false;
|
||||
exportReportBtn.disabled = pageState.pagePairs.length === 0;
|
||||
exportDropdownBtn.disabled = pageState.pagePairs.length === 0;
|
||||
}
|
||||
|
||||
function renderComparisonUI() {
|
||||
@@ -600,34 +295,31 @@ async function buildPagePairs() {
|
||||
|
||||
async function buildReportResults() {
|
||||
const results: ComparePageResult[] = [];
|
||||
const ctx = getRenderContext();
|
||||
|
||||
for (const pair of pageState.pagePairs) {
|
||||
const cached = comparisonResultsCache.get(pair.pairIndex);
|
||||
const cached = caches.comparisonResultsCache.get(pair.pairIndex);
|
||||
if (cached) {
|
||||
results.push(cached);
|
||||
continue;
|
||||
}
|
||||
|
||||
const leftSignatureKey = pair.leftPageNumber
|
||||
? `left-${pair.leftPageNumber}`
|
||||
: '';
|
||||
const rightSignatureKey = pair.rightPageNumber
|
||||
? `right-${pair.rightPageNumber}`
|
||||
: '';
|
||||
const cachedResult = comparisonCache.get(
|
||||
`${leftSignatureKey || 'none'}:${rightSignatureKey || 'none'}:${pageState.useOcr ? 'ocr' : 'no-ocr'}`
|
||||
);
|
||||
const cacheKey = getComparisonCacheKey(pair, pageState.useOcr);
|
||||
const cachedResult = caches.comparisonCache.get(cacheKey);
|
||||
if (cachedResult) {
|
||||
results.push(cachedResult);
|
||||
continue;
|
||||
}
|
||||
|
||||
const comparison = await computeComparisonForPair(pair);
|
||||
comparisonCache.set(
|
||||
`${leftSignatureKey || 'none'}:${rightSignatureKey || 'none'}:${pageState.useOcr ? 'ocr' : 'no-ocr'}`,
|
||||
comparison
|
||||
const comparison = await computeComparisonForPair(
|
||||
pageState.pdfDoc1,
|
||||
pageState.pdfDoc2,
|
||||
pair,
|
||||
caches,
|
||||
ctx
|
||||
);
|
||||
comparisonResultsCache.set(pair.pairIndex, comparison);
|
||||
caches.comparisonCache.set(cacheKey, comparison);
|
||||
caches.comparisonResultsCache.set(pair.pairIndex, comparison);
|
||||
results.push(comparison);
|
||||
}
|
||||
|
||||
@@ -640,6 +332,8 @@ async function renderBothPages() {
|
||||
const pair = getActivePair();
|
||||
if (!pair) return;
|
||||
|
||||
const gen = ++renderGeneration;
|
||||
|
||||
showLoader(
|
||||
`Loading comparison ${pageState.currentPage} of ${pageState.pagePairs.length}...`
|
||||
);
|
||||
@@ -652,27 +346,35 @@ async function renderBothPages() {
|
||||
) as HTMLCanvasElement;
|
||||
const panel1 = getElement<HTMLElement>('panel-1') as HTMLElement;
|
||||
const panel2 = getElement<HTMLElement>('panel-2') as HTMLElement;
|
||||
const wrapper = getElement<HTMLElement>(
|
||||
'compare-viewer-wrapper'
|
||||
) as HTMLElement;
|
||||
|
||||
const container1 = panel1;
|
||||
const container2 = pageState.viewMode === 'overlay' ? panel1 : panel2;
|
||||
|
||||
const comparison = await computeComparisonForPair(pair, {
|
||||
renderTargets: {
|
||||
left: {
|
||||
canvas: canvas1,
|
||||
container: container1,
|
||||
placeholderId: 'placeholder-1',
|
||||
const ctx = getRenderContext();
|
||||
|
||||
const comparison = await computeComparisonForPair(
|
||||
pageState.pdfDoc1,
|
||||
pageState.pdfDoc2,
|
||||
pair,
|
||||
caches,
|
||||
ctx,
|
||||
{
|
||||
renderTargets: {
|
||||
left: {
|
||||
canvas: canvas1,
|
||||
container: container1,
|
||||
placeholderId: 'placeholder-1',
|
||||
},
|
||||
right: {
|
||||
canvas: canvas2,
|
||||
container: container2,
|
||||
placeholderId: 'placeholder-2',
|
||||
},
|
||||
},
|
||||
right: {
|
||||
canvas: canvas2,
|
||||
container: container2,
|
||||
placeholderId: 'placeholder-2',
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
);
|
||||
|
||||
if (gen !== renderGeneration) return;
|
||||
|
||||
pageState.currentComparison = comparison;
|
||||
pageState.activeChangeIndex = 0;
|
||||
@@ -815,9 +517,9 @@ async function handleFileInput(
|
||||
showLoader(`Loading ${file.name}...`);
|
||||
const arrayBuffer = await file.arrayBuffer();
|
||||
pageState[docKey] = await getPDFDocument({ data: arrayBuffer }).promise;
|
||||
pageModelCache.clear();
|
||||
comparisonCache.clear();
|
||||
comparisonResultsCache.clear();
|
||||
caches.pageModelCache.clear();
|
||||
caches.comparisonCache.clear();
|
||||
caches.comparisonResultsCache.clear();
|
||||
pageState.changeSearchQuery = '';
|
||||
|
||||
const searchInput = getElement<HTMLInputElement>('compare-search-input');
|
||||
@@ -880,7 +582,7 @@ document.addEventListener('DOMContentLoaded', function () {
|
||||
prevBtn.addEventListener('click', function () {
|
||||
if (pageState.currentPage > 1) {
|
||||
pageState.currentPage--;
|
||||
renderBothPages();
|
||||
renderBothPages().catch(console.error);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -895,7 +597,7 @@ document.addEventListener('DOMContentLoaded', function () {
|
||||
);
|
||||
if (pageState.currentPage < totalPairs) {
|
||||
pageState.currentPage++;
|
||||
renderBothPages();
|
||||
renderBothPages().catch(console.error);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -955,7 +657,10 @@ document.addEventListener('DOMContentLoaded', function () {
|
||||
) as HTMLInputElement;
|
||||
const prevChangeBtn = getElement<HTMLButtonElement>('prev-change-btn');
|
||||
const nextChangeBtn = getElement<HTMLButtonElement>('next-change-btn');
|
||||
const exportReportBtn = getElement<HTMLButtonElement>('export-report-btn');
|
||||
const exportDropdownBtn = getElement<HTMLButtonElement>(
|
||||
'export-dropdown-btn'
|
||||
);
|
||||
const exportDropdownMenu = getElement<HTMLDivElement>('export-dropdown-menu');
|
||||
const ocrToggle = getElement<HTMLInputElement>('ocr-toggle');
|
||||
const searchInput = getElement<HTMLInputElement>('compare-search-input');
|
||||
|
||||
@@ -1037,12 +742,17 @@ document.addEventListener('DOMContentLoaded', function () {
|
||||
if (ocrToggle) {
|
||||
ocrToggle.checked = pageState.useOcr;
|
||||
ocrToggle.addEventListener('change', async function () {
|
||||
pageState.useOcr = ocrToggle.checked;
|
||||
pageModelCache.clear();
|
||||
comparisonCache.clear();
|
||||
comparisonResultsCache.clear();
|
||||
if (pageState.pdfDoc1 && pageState.pdfDoc2) {
|
||||
await renderBothPages();
|
||||
try {
|
||||
pageState.useOcr = ocrToggle.checked;
|
||||
caches.pageModelCache.clear();
|
||||
caches.comparisonCache.clear();
|
||||
caches.comparisonResultsCache.clear();
|
||||
if (pageState.pdfDoc1 && pageState.pdfDoc2) {
|
||||
await renderBothPages();
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('OCR toggle failed:', e);
|
||||
hideLoader();
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1063,22 +773,48 @@ document.addEventListener('DOMContentLoaded', function () {
|
||||
|
||||
window.cancelAnimationFrame(resizeFrame);
|
||||
resizeFrame = window.requestAnimationFrame(function () {
|
||||
renderBothPages();
|
||||
renderBothPages().catch(console.error);
|
||||
});
|
||||
});
|
||||
|
||||
if (exportReportBtn) {
|
||||
exportReportBtn.addEventListener('click', async function () {
|
||||
if (pageState.pagePairs.length === 0) return;
|
||||
showLoader('Building compare report...');
|
||||
const results = await buildReportResults();
|
||||
exportCompareHtmlReport(
|
||||
documentNames.left,
|
||||
documentNames.right,
|
||||
pageState.pagePairs,
|
||||
results
|
||||
);
|
||||
hideLoader();
|
||||
if (exportDropdownBtn && exportDropdownMenu) {
|
||||
exportDropdownBtn.addEventListener('click', function (e) {
|
||||
e.stopPropagation();
|
||||
exportDropdownMenu.classList.toggle('hidden');
|
||||
});
|
||||
|
||||
document.addEventListener('click', function () {
|
||||
exportDropdownMenu.classList.add('hidden');
|
||||
});
|
||||
|
||||
exportDropdownMenu.addEventListener('click', function (e) {
|
||||
e.stopPropagation();
|
||||
});
|
||||
|
||||
document.querySelectorAll('.export-menu-item').forEach(function (btn) {
|
||||
btn.addEventListener('click', async function () {
|
||||
const mode = (btn as HTMLElement).dataset
|
||||
.exportMode as ComparePdfExportMode;
|
||||
if (!mode || pageState.pagePairs.length === 0) return;
|
||||
exportDropdownMenu.classList.add('hidden');
|
||||
try {
|
||||
showLoader('Preparing PDF export...');
|
||||
await exportComparePdf(
|
||||
mode,
|
||||
pageState.pdfDoc1,
|
||||
pageState.pdfDoc2,
|
||||
pageState.pagePairs,
|
||||
function (message, percent) {
|
||||
showLoader(message, percent);
|
||||
}
|
||||
);
|
||||
} catch (e) {
|
||||
console.error('PDF export failed:', e);
|
||||
showAlert('Export Error', 'Could not export comparison PDF.');
|
||||
} finally {
|
||||
hideLoader();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
365
src/js/logic/compare-render.ts
Normal file
365
src/js/logic/compare-render.ts
Normal file
@@ -0,0 +1,365 @@
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
import type {
|
||||
ComparePageModel,
|
||||
ComparePagePair,
|
||||
ComparePageResult,
|
||||
RenderedPage,
|
||||
ComparisonPageLoad,
|
||||
DiffFocusRegion,
|
||||
CompareCaches,
|
||||
CompareRenderContext,
|
||||
} from '../compare/types.ts';
|
||||
import { extractPageModel } from '../compare/engine/extract-page-model.ts';
|
||||
import { comparePageModels } from '../compare/engine/compare-page-models.ts';
|
||||
import { renderVisualDiff } from '../compare/engine/visual-diff.ts';
|
||||
import { recognizePageCanvas } from '../compare/engine/ocr-page.ts';
|
||||
import { isLowQualityExtractedText } from '../compare/engine/text-normalization.ts';
|
||||
import { COMPARE_RENDER, COMPARE_GEOMETRY } from '../compare/config.ts';
|
||||
|
||||
export function getElement<T extends HTMLElement>(id: string) {
|
||||
return document.getElementById(id) as T | null;
|
||||
}
|
||||
|
||||
export function clearCanvas(canvas: HTMLCanvasElement) {
|
||||
const context = canvas.getContext('2d');
|
||||
canvas.width = 1;
|
||||
canvas.height = 1;
|
||||
context?.clearRect(0, 0, 1, 1);
|
||||
}
|
||||
|
||||
export function renderMissingPage(
|
||||
canvas: HTMLCanvasElement,
|
||||
placeholderId: string,
|
||||
message: string
|
||||
) {
|
||||
clearCanvas(canvas);
|
||||
const placeholder = getElement<HTMLDivElement>(placeholderId);
|
||||
if (placeholder) {
|
||||
placeholder.textContent = message;
|
||||
placeholder.classList.remove('hidden');
|
||||
}
|
||||
}
|
||||
|
||||
export function hidePlaceholder(placeholderId: string) {
|
||||
const placeholder = getElement<HTMLDivElement>(placeholderId);
|
||||
placeholder?.classList.add('hidden');
|
||||
}
|
||||
|
||||
export function getRenderScale(
|
||||
page: pdfjsLib.PDFPageProxy,
|
||||
container: HTMLElement,
|
||||
viewMode: 'overlay' | 'side-by-side'
|
||||
) {
|
||||
const baseViewport = page.getViewport({ scale: 1.0 });
|
||||
const availableWidth = Math.max(
|
||||
container.clientWidth - (viewMode === 'overlay' ? 96 : 56),
|
||||
320
|
||||
);
|
||||
const fitScale = availableWidth / Math.max(baseViewport.width, 1);
|
||||
const maxScale =
|
||||
viewMode === 'overlay'
|
||||
? COMPARE_RENDER.MAX_SCALE_OVERLAY
|
||||
: COMPARE_RENDER.MAX_SCALE_SIDE;
|
||||
|
||||
return Math.min(Math.max(fitScale, 1.0), maxScale);
|
||||
}
|
||||
|
||||
export function getPageModelCacheKey(
|
||||
cacheKeyPrefix: 'left' | 'right',
|
||||
pageNum: number,
|
||||
scale: number
|
||||
) {
|
||||
return `${cacheKeyPrefix}-${pageNum}-${scale.toFixed(3)}`;
|
||||
}
|
||||
|
||||
function shouldUseOcrForModel(model: ComparePageModel) {
|
||||
return !model.hasText || isLowQualityExtractedText(model.plainText);
|
||||
}
|
||||
|
||||
export function buildDiffFocusRegion(
|
||||
comparison: ComparePageResult,
|
||||
leftCanvas: HTMLCanvasElement,
|
||||
rightCanvas: HTMLCanvasElement
|
||||
): DiffFocusRegion | undefined {
|
||||
const leftOffsetX = Math.floor(
|
||||
(Math.max(leftCanvas.width, rightCanvas.width) - leftCanvas.width) / 2
|
||||
);
|
||||
const leftOffsetY = Math.floor(
|
||||
(Math.max(leftCanvas.height, rightCanvas.height) - leftCanvas.height) / 2
|
||||
);
|
||||
const rightOffsetX = Math.floor(
|
||||
(Math.max(leftCanvas.width, rightCanvas.width) - rightCanvas.width) / 2
|
||||
);
|
||||
const rightOffsetY = Math.floor(
|
||||
(Math.max(leftCanvas.height, rightCanvas.height) - rightCanvas.height) / 2
|
||||
);
|
||||
const bounds = {
|
||||
minX: Infinity,
|
||||
minY: Infinity,
|
||||
maxX: -Infinity,
|
||||
maxY: -Infinity,
|
||||
};
|
||||
|
||||
for (const change of comparison.changes) {
|
||||
for (const rect of change.beforeRects) {
|
||||
bounds.minX = Math.min(bounds.minX, rect.x + leftOffsetX);
|
||||
bounds.minY = Math.min(bounds.minY, rect.y + leftOffsetY);
|
||||
bounds.maxX = Math.max(bounds.maxX, rect.x + leftOffsetX + rect.width);
|
||||
bounds.maxY = Math.max(bounds.maxY, rect.y + leftOffsetY + rect.height);
|
||||
}
|
||||
|
||||
for (const rect of change.afterRects) {
|
||||
bounds.minX = Math.min(bounds.minX, rect.x + rightOffsetX);
|
||||
bounds.minY = Math.min(bounds.minY, rect.y + rightOffsetY);
|
||||
bounds.maxX = Math.max(bounds.maxX, rect.x + rightOffsetX + rect.width);
|
||||
bounds.maxY = Math.max(bounds.maxY, rect.y + rightOffsetY + rect.height);
|
||||
}
|
||||
}
|
||||
|
||||
if (!Number.isFinite(bounds.minX)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const fullWidth = Math.max(leftCanvas.width, rightCanvas.width, 1);
|
||||
const fullHeight = Math.max(leftCanvas.height, rightCanvas.height, 1);
|
||||
const padding = COMPARE_GEOMETRY.FOCUS_REGION_PADDING;
|
||||
|
||||
const x = Math.max(Math.floor(bounds.minX - padding), 0);
|
||||
const y = Math.max(Math.floor(bounds.minY - padding), 0);
|
||||
const maxX = Math.min(Math.ceil(bounds.maxX + padding), fullWidth);
|
||||
const maxY = Math.min(Math.ceil(bounds.maxY + padding), fullHeight);
|
||||
|
||||
return {
|
||||
x,
|
||||
y,
|
||||
width: Math.max(
|
||||
maxX - x,
|
||||
Math.min(COMPARE_GEOMETRY.FOCUS_REGION_MIN_WIDTH, fullWidth)
|
||||
),
|
||||
height: Math.max(
|
||||
maxY - y,
|
||||
Math.min(COMPARE_GEOMETRY.FOCUS_REGION_MIN_HEIGHT, fullHeight)
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
export async function renderPage(
|
||||
pdfDoc: pdfjsLib.PDFDocumentProxy,
|
||||
pageNum: number,
|
||||
canvas: HTMLCanvasElement,
|
||||
container: HTMLElement,
|
||||
placeholderId: string,
|
||||
cacheKeyPrefix: 'left' | 'right',
|
||||
caches: CompareCaches,
|
||||
ctx: CompareRenderContext
|
||||
): Promise<RenderedPage> {
|
||||
if (pageNum > pdfDoc.numPages) {
|
||||
renderMissingPage(
|
||||
canvas,
|
||||
placeholderId,
|
||||
`Page ${pageNum} does not exist in this PDF.`
|
||||
);
|
||||
return { model: null, exists: false };
|
||||
}
|
||||
|
||||
const page = await pdfDoc.getPage(pageNum);
|
||||
|
||||
const targetScale = getRenderScale(page, container, ctx.viewMode);
|
||||
const scaledViewport = page.getViewport({ scale: targetScale });
|
||||
const dpr = window.devicePixelRatio || 1;
|
||||
const hiResViewport = page.getViewport({ scale: targetScale * dpr });
|
||||
|
||||
hidePlaceholder(placeholderId);
|
||||
|
||||
canvas.width = hiResViewport.width;
|
||||
canvas.height = hiResViewport.height;
|
||||
canvas.style.width = `${scaledViewport.width}px`;
|
||||
canvas.style.height = `${scaledViewport.height}px`;
|
||||
|
||||
const cacheKey = getPageModelCacheKey(cacheKeyPrefix, pageNum, targetScale);
|
||||
const cachedModel = caches.pageModelCache.get(cacheKey);
|
||||
const modelPromise = cachedModel
|
||||
? Promise.resolve(cachedModel)
|
||||
: extractPageModel(page, scaledViewport);
|
||||
const renderTask = page.render({
|
||||
canvasContext: canvas.getContext('2d')!,
|
||||
viewport: hiResViewport,
|
||||
canvas,
|
||||
}).promise;
|
||||
|
||||
const [model] = await Promise.all([modelPromise, renderTask]);
|
||||
|
||||
let finalModel = model;
|
||||
|
||||
if (!cachedModel && ctx.useOcr && shouldUseOcrForModel(model)) {
|
||||
ctx.showLoader(`Running OCR on page ${pageNum}...`);
|
||||
const ocrModel = await recognizePageCanvas(
|
||||
canvas,
|
||||
ctx.ocrLanguage,
|
||||
function (status, progress) {
|
||||
ctx.showLoader(`OCR: ${status}`, progress * 100);
|
||||
}
|
||||
);
|
||||
finalModel = {
|
||||
...ocrModel,
|
||||
pageNumber: pageNum,
|
||||
};
|
||||
}
|
||||
|
||||
caches.pageModelCache.set(cacheKey, finalModel);
|
||||
|
||||
return { model: finalModel, exists: true };
|
||||
}
|
||||
|
||||
export async function loadComparisonPage(
|
||||
pdfDoc: pdfjsLib.PDFDocumentProxy | null,
|
||||
pageNum: number | null,
|
||||
side: 'left' | 'right',
|
||||
renderTarget:
|
||||
| {
|
||||
canvas: HTMLCanvasElement;
|
||||
container: HTMLElement;
|
||||
placeholderId: string;
|
||||
}
|
||||
| undefined,
|
||||
caches: CompareCaches,
|
||||
ctx: CompareRenderContext
|
||||
): Promise<ComparisonPageLoad> {
|
||||
if (!pdfDoc || !pageNum) {
|
||||
if (renderTarget) {
|
||||
renderMissingPage(
|
||||
renderTarget.canvas,
|
||||
renderTarget.placeholderId,
|
||||
'No paired page for this side.'
|
||||
);
|
||||
}
|
||||
return { model: null, exists: false };
|
||||
}
|
||||
|
||||
if (renderTarget) {
|
||||
return renderPage(
|
||||
pdfDoc,
|
||||
pageNum,
|
||||
renderTarget.canvas,
|
||||
renderTarget.container,
|
||||
renderTarget.placeholderId,
|
||||
side,
|
||||
caches,
|
||||
ctx
|
||||
);
|
||||
}
|
||||
|
||||
const renderScale = COMPARE_RENDER.OFFLINE_SCALE;
|
||||
const cacheKey = getPageModelCacheKey(side, pageNum, renderScale);
|
||||
const cachedModel = caches.pageModelCache.get(cacheKey);
|
||||
if (cachedModel) {
|
||||
return { model: cachedModel, exists: true };
|
||||
}
|
||||
|
||||
const page = await pdfDoc.getPage(pageNum);
|
||||
const viewport = page.getViewport({ scale: renderScale });
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = viewport.width;
|
||||
canvas.height = viewport.height;
|
||||
const context = canvas.getContext('2d');
|
||||
|
||||
if (!context) {
|
||||
throw new Error('Could not create offscreen comparison canvas.');
|
||||
}
|
||||
|
||||
const extractedModel = await extractPageModel(page, viewport);
|
||||
await page.render({
|
||||
canvasContext: context,
|
||||
viewport,
|
||||
canvas,
|
||||
}).promise;
|
||||
|
||||
let finalModel = extractedModel;
|
||||
if (ctx.useOcr && shouldUseOcrForModel(extractedModel)) {
|
||||
const ocrModel = await recognizePageCanvas(canvas, ctx.ocrLanguage);
|
||||
finalModel = {
|
||||
...ocrModel,
|
||||
pageNumber: pageNum,
|
||||
};
|
||||
}
|
||||
|
||||
canvas.width = 0;
|
||||
canvas.height = 0;
|
||||
|
||||
caches.pageModelCache.set(cacheKey, finalModel);
|
||||
return { model: finalModel, exists: true };
|
||||
}
|
||||
|
||||
export async function computeComparisonForPair(
|
||||
pdfDoc1: pdfjsLib.PDFDocumentProxy | null,
|
||||
pdfDoc2: pdfjsLib.PDFDocumentProxy | null,
|
||||
pair: ComparePagePair,
|
||||
caches: CompareCaches,
|
||||
ctx: CompareRenderContext,
|
||||
options?: {
|
||||
renderTargets?: {
|
||||
left: {
|
||||
canvas: HTMLCanvasElement;
|
||||
container: HTMLElement;
|
||||
placeholderId: string;
|
||||
};
|
||||
right: {
|
||||
canvas: HTMLCanvasElement;
|
||||
container: HTMLElement;
|
||||
placeholderId: string;
|
||||
};
|
||||
diffCanvas?: HTMLCanvasElement;
|
||||
};
|
||||
}
|
||||
) {
|
||||
const renderTargets = options?.renderTargets;
|
||||
const leftPage = await loadComparisonPage(
|
||||
pdfDoc1,
|
||||
pair.leftPageNumber,
|
||||
'left',
|
||||
renderTargets?.left,
|
||||
caches,
|
||||
ctx
|
||||
);
|
||||
const rightPage = await loadComparisonPage(
|
||||
pdfDoc2,
|
||||
pair.rightPageNumber,
|
||||
'right',
|
||||
renderTargets?.right,
|
||||
caches,
|
||||
ctx
|
||||
);
|
||||
|
||||
const comparison = comparePageModels(leftPage.model, rightPage.model);
|
||||
comparison.confidence = pair.confidence;
|
||||
|
||||
if (
|
||||
renderTargets?.diffCanvas &&
|
||||
comparison.status !== 'left-only' &&
|
||||
comparison.status !== 'right-only'
|
||||
) {
|
||||
const focusRegion = buildDiffFocusRegion(
|
||||
comparison,
|
||||
renderTargets.left.canvas,
|
||||
renderTargets.right.canvas
|
||||
);
|
||||
comparison.visualDiff = renderVisualDiff(
|
||||
renderTargets.left.canvas,
|
||||
renderTargets.right.canvas,
|
||||
renderTargets.diffCanvas,
|
||||
focusRegion
|
||||
);
|
||||
} else if (renderTargets?.diffCanvas) {
|
||||
clearCanvas(renderTargets.diffCanvas);
|
||||
}
|
||||
|
||||
return comparison;
|
||||
}
|
||||
|
||||
export function getComparisonCacheKey(pair: ComparePagePair, useOcr: boolean) {
|
||||
const leftKey = pair.leftPageNumber ? `left-${pair.leftPageNumber}` : 'none';
|
||||
const rightKey = pair.rightPageNumber
|
||||
? `right-${pair.rightPageNumber}`
|
||||
: 'none';
|
||||
return `${leftKey}:${rightKey}:${useOcr ? 'ocr' : 'no-ocr'}`;
|
||||
}
|
||||
@@ -1 +1,9 @@
|
||||
export type { CompareState } from '../compare/types.ts';
|
||||
export type {
|
||||
CompareState,
|
||||
ComparePdfExportMode,
|
||||
RenderedPage,
|
||||
ComparisonPageLoad,
|
||||
DiffFocusRegion,
|
||||
CompareCaches,
|
||||
CompareRenderContext,
|
||||
} from '../compare/types.ts';
|
||||
|
||||
Reference in New Issue
Block a user