diff --git a/src/js/compare/config.ts b/src/js/compare/config.ts new file mode 100644 index 0000000..e0ccc59 --- /dev/null +++ b/src/js/compare/config.ts @@ -0,0 +1,37 @@ +export const COMPARE_COLORS = { + added: { r: 34, g: 197, b: 94 }, + removed: { r: 239, g: 68, b: 68 }, + modified: { r: 245, g: 158, b: 11 }, +} as const; + +export const HIGHLIGHT_OPACITY = 0.28; + +export const COMPARE_GEOMETRY = { + LINE_TOLERANCE_FACTOR: 0.6, + MIN_LINE_TOLERANCE: 4, + FOCUS_REGION_PADDING: 40, + FOCUS_REGION_MIN_WIDTH: 320, + FOCUS_REGION_MIN_HEIGHT: 200, +} as const; + +export const COMPARE_RENDER = { + OFFLINE_SCALE: 1.2, + MAX_SCALE_OVERLAY: 2.5, + MAX_SCALE_SIDE: 2.0, + EXPORT_EXTRACT_SCALE: 1.0, + SPLIT_GAP_PT: 2, +} as const; + +export const COMPARE_TEXT = { + DEFAULT_CHAR_WIDTH: 1, + DEFAULT_SPACE_WIDTH: 0.33, +} as const; + +export const VISUAL_DIFF = { + PIXELMATCH_THRESHOLD: 0.12, + ALPHA: 0.2, + DIFF_COLOR: [239, 68, 68] as readonly [number, number, number], + DIFF_COLOR_ALT: [34, 197, 94] as readonly [number, number, number], +} as const; + +export const COMPARE_CACHE_MAX_SIZE = 50; diff --git a/src/js/compare/engine/diff-text-runs.ts b/src/js/compare/engine/diff-text-runs.ts index af1e3ef..d5fa213 100644 --- a/src/js/compare/engine/diff-text-runs.ts +++ b/src/js/compare/engine/diff-text-runs.ts @@ -8,6 +8,8 @@ import type { CompareTextItem, CompareWordToken, } from '../types.ts'; +import { calculateBoundingRect } from './text-normalization.ts'; +import { COMPARE_GEOMETRY } from '../config.ts'; interface WordToken { word: string; @@ -86,7 +88,11 @@ function groupAdjacentRects(rects: CompareRectangle[]): CompareRectangle[] { const lastRect = prev[prev.length - 1]; const curr = sorted[i]; const sameLine = - Math.abs(curr.y - lastRect.y) < Math.max(lastRect.height * 0.6, 4); + Math.abs(curr.y - lastRect.y) < + Math.max( + lastRect.height * COMPARE_GEOMETRY.LINE_TOLERANCE_FACTOR, + COMPARE_GEOMETRY.MIN_LINE_TOLERANCE + ); const close = curr.x <= lastRect.x + lastRect.width + lastRect.height * 2; if (sameLine && close) { @@ -96,13 +102,7 @@ function groupAdjacentRects(rects: CompareRectangle[]): CompareRectangle[] { } } - return groups.map((group) => { - const minX = Math.min(...group.map((r) => r.x)); - const minY = Math.min(...group.map((r) => r.y)); - const maxX = Math.max(...group.map((r) => r.x + r.width)); - const maxY = Math.max(...group.map((r) => r.y + r.height)); - return { x: minX, y: minY, width: maxX - minX, height: maxY - minY }; - }); + return groups.map((group) => calculateBoundingRect(group)); } function collapseWords(words: WordToken[]) { diff --git a/src/js/compare/engine/extract-page-model.ts b/src/js/compare/engine/extract-page-model.ts index 087b447..d1938fd 100644 --- a/src/js/compare/engine/extract-page-model.ts +++ b/src/js/compare/engine/extract-page-model.ts @@ -33,8 +33,10 @@ const textMeasurementCache: Map | null = measurementContext : null; let lastMeasurementFont = ''; -const DEFAULT_CHAR_WIDTH = 1; -const DEFAULT_SPACE_WIDTH = 0.33; +import { COMPARE_TEXT, COMPARE_GEOMETRY } from '../config.ts'; + +const DEFAULT_CHAR_WIDTH = COMPARE_TEXT.DEFAULT_CHAR_WIDTH; +const DEFAULT_SPACE_WIDTH = COMPARE_TEXT.DEFAULT_SPACE_WIDTH; function shouldJoinTokenWithPrevious(previous: string, current: string) { if (!previous) return false; @@ -261,8 +263,9 @@ function toRect( export function sortCompareTextItems(items: CompareTextItem[]) { return [...items].sort((left, right) => { const lineTolerance = Math.max( - Math.min(left.rect.height, right.rect.height) * 0.6, - 4 + Math.min(left.rect.height, right.rect.height) * + COMPARE_GEOMETRY.LINE_TOLERANCE_FACTOR, + COMPARE_GEOMETRY.MIN_LINE_TOLERANCE ); const topDiff = left.rect.y - right.rect.y; @@ -450,8 +453,9 @@ export function mergeIntoLines( const anchor = currentLine[0]; const curr = sortedItems[i]; const lineTolerance = Math.max( - Math.min(anchor.rect.height, curr.rect.height) * 0.6, - 4 + Math.min(anchor.rect.height, curr.rect.height) * + COMPARE_GEOMETRY.LINE_TOLERANCE_FACTOR, + COMPARE_GEOMETRY.MIN_LINE_TOLERANCE ); if (Math.abs(curr.rect.y - anchor.rect.y) <= lineTolerance) { diff --git a/src/js/compare/engine/pair-pages.ts b/src/js/compare/engine/pair-pages.ts index d9621c2..12da83e 100644 --- a/src/js/compare/engine/pair-pages.ts +++ b/src/js/compare/engine/pair-pages.ts @@ -1,8 +1,5 @@ import type { ComparePagePair, ComparePageSignature } from '../types.ts'; - -function tokenize(text: string) { - return new Set(text.split(/\s+/).filter(Boolean)); -} +import { tokenizeTextAsSet } from './text-normalization.ts'; function similarityScore( left: ComparePageSignature, @@ -16,8 +13,8 @@ function similarityScore( return 0.08; } - const leftTokens = tokenize(left.plainText); - const rightTokens = tokenize(right.plainText); + const leftTokens = tokenizeTextAsSet(left.plainText); + const rightTokens = tokenizeTextAsSet(right.plainText); const union = new Set([...leftTokens, ...rightTokens]); let intersectionCount = 0; diff --git a/src/js/compare/engine/text-normalization.ts b/src/js/compare/engine/text-normalization.ts index 1166dd1..6bc9e1b 100644 --- a/src/js/compare/engine/text-normalization.ts +++ b/src/js/compare/engine/text-normalization.ts @@ -1,4 +1,4 @@ -import type { CompareTextItem } from '../types.ts'; +import type { CompareRectangle, CompareTextItem } from '../types.ts'; export function normalizeCompareText(text: string) { return text @@ -62,3 +62,22 @@ export function isLowQualityExtractedText(text: string) { return false; } + +export function tokenizeText(text: string): string[] { + return text.split(/\s+/).filter(Boolean); +} + +export function tokenizeTextAsSet(text: string): Set { + return new Set(tokenizeText(text)); +} + +export function calculateBoundingRect( + rects: CompareRectangle[] +): CompareRectangle { + if (rects.length === 0) return { x: 0, y: 0, width: 0, height: 0 }; + const minX = Math.min(...rects.map((r) => r.x)); + const minY = Math.min(...rects.map((r) => r.y)); + const maxX = Math.max(...rects.map((r) => r.x + r.width)); + const maxY = Math.max(...rects.map((r) => r.y + r.height)); + return { x: minX, y: minY, width: maxX - minX, height: maxY - minY }; +} diff --git a/src/js/compare/engine/visual-diff.ts b/src/js/compare/engine/visual-diff.ts index b8e891c..0057472 100644 --- a/src/js/compare/engine/visual-diff.ts +++ b/src/js/compare/engine/visual-diff.ts @@ -1,6 +1,7 @@ import pixelmatch from 'pixelmatch'; import type { CompareVisualDiff } from '../types.ts'; +import { VISUAL_DIFF as VISUAL_DIFF_CONFIG } from '../config.ts'; type FocusRegion = { x: number; @@ -69,12 +70,16 @@ export function renderVisualDiff( width, height, { - threshold: 0.12, + threshold: VISUAL_DIFF_CONFIG.PIXELMATCH_THRESHOLD, includeAA: false, - alpha: 0.2, + alpha: VISUAL_DIFF_CONFIG.ALPHA, diffMask: false, - diffColor: [239, 68, 68], - diffColorAlt: [34, 197, 94], + diffColor: [...VISUAL_DIFF_CONFIG.DIFF_COLOR] as [number, number, number], + diffColorAlt: [...VISUAL_DIFF_CONFIG.DIFF_COLOR_ALT] as [ + number, + number, + number, + ], } ); diff --git a/src/js/compare/lru-cache.ts b/src/js/compare/lru-cache.ts new file mode 100644 index 0000000..591f3c1 --- /dev/null +++ b/src/js/compare/lru-cache.ts @@ -0,0 +1,38 @@ +export class LRUCache { + private map = new Map(); + private maxSize: number; + + constructor(maxSize: number) { + this.maxSize = maxSize; + } + + get(key: K): V | undefined { + const value = this.map.get(key); + if (value !== undefined) { + this.map.delete(key); + this.map.set(key, value); + } + return value; + } + + set(key: K, value: V) { + this.map.delete(key); + this.map.set(key, value); + if (this.map.size > this.maxSize) { + const oldest = this.map.keys().next().value; + if (oldest !== undefined) this.map.delete(oldest); + } + } + + has(key: K): boolean { + return this.map.has(key); + } + + clear() { + this.map.clear(); + } + + get size(): number { + return this.map.size; + } +} diff --git a/src/js/compare/reporting/build-report.ts b/src/js/compare/reporting/build-report.ts deleted file mode 100644 index ad7b9e1..0000000 --- a/src/js/compare/reporting/build-report.ts +++ /dev/null @@ -1,77 +0,0 @@ -import type { ComparePagePair, ComparePageResult } from '../types.ts'; - -function escapeHtml(text: string) { - return text - .replace(/&/g, '&') - .replace(//g, '>') - .replace(/"/g, '"') - .replace(/'/g, '''); -} - -export function buildCompareReport( - fileName1: string, - fileName2: string, - pairs: ComparePagePair[], - results: ComparePageResult[] -) { - const totals = results.reduce( - (summary, result) => { - summary.added += result.summary.added; - summary.removed += result.summary.removed; - summary.modified += result.summary.modified; - return summary; - }, - { added: 0, removed: 0, modified: 0 } - ); - - const rows = results - .map((result, index) => { - const pair = pairs[index]; - const changes = result.changes - .map( - (change) => - `
  • ${escapeHtml(change.type)}: ${escapeHtml(change.description)}
  • ` - ) - .join(''); - - return ` -
    -

    Comparison ${pair?.pairIndex || index + 1}

    -

    PDF 1 page: ${pair?.leftPageNumber ?? 'none'} | PDF 2 page: ${pair?.rightPageNumber ?? 'none'} | Confidence: ${((pair?.confidence || 0) * 100).toFixed(0)}%

    -

    Status: ${escapeHtml(result.status)}${result.usedOcr ? ' | OCR used' : ''}

    -

    Added: ${result.summary.added} | Removed: ${result.summary.removed} | Modified: ${result.summary.modified}

    -
      ${changes || '
    • No semantic changes detected.
    • '}
    -
    - `; - }) - .join(''); - - return ` - - - - - Compare report - - - -

    PDF Compare Report

    -

    PDF 1: ${escapeHtml(fileName1)} | PDF 2: ${escapeHtml(fileName2)}

    -
    -
    Added
    ${totals.added}
    -
    Removed
    ${totals.removed}
    -
    Modified
    ${totals.modified}
    -
    - ${rows} - -`; -} diff --git a/src/js/compare/reporting/export-compare-pdf.ts b/src/js/compare/reporting/export-compare-pdf.ts new file mode 100644 index 0000000..0e66cff --- /dev/null +++ b/src/js/compare/reporting/export-compare-pdf.ts @@ -0,0 +1,239 @@ +import { PDFDocument, rgb } from 'pdf-lib'; +import * as pdfjsLib from 'pdfjs-dist'; +import type { + ComparePagePair, + CompareTextChange, + ComparePdfExportMode, +} from '../types.ts'; +import { extractPageModel } from '../engine/extract-page-model.ts'; +import { comparePageModels } from '../engine/compare-page-models.ts'; +import { + COMPARE_COLORS, + HIGHLIGHT_OPACITY, + COMPARE_RENDER, +} from '../config.ts'; +import { downloadFile } from '../../utils/helpers.ts'; + +const HIGHLIGHT_COLORS: Record< + string, + { r: number; g: number; b: number; opacity: number } +> = { + added: { + r: COMPARE_COLORS.added.r / 255, + g: COMPARE_COLORS.added.g / 255, + b: COMPARE_COLORS.added.b / 255, + opacity: HIGHLIGHT_OPACITY, + }, + removed: { + r: COMPARE_COLORS.removed.r / 255, + g: COMPARE_COLORS.removed.g / 255, + b: COMPARE_COLORS.removed.b / 255, + opacity: HIGHLIGHT_OPACITY, + }, + 'page-removed': { + r: COMPARE_COLORS.removed.r / 255, + g: COMPARE_COLORS.removed.g / 255, + b: COMPARE_COLORS.removed.b / 255, + opacity: HIGHLIGHT_OPACITY, + }, + modified: { + r: COMPARE_COLORS.modified.r / 255, + g: COMPARE_COLORS.modified.g / 255, + b: COMPARE_COLORS.modified.b / 255, + opacity: HIGHLIGHT_OPACITY, + }, +}; + +const EXTRACT_SCALE = COMPARE_RENDER.EXPORT_EXTRACT_SCALE; + +function drawHighlights( + page: ReturnType, + pageHeight: number, + changes: CompareTextChange[], + side: 'before' | 'after' +) { + for (const change of changes) { + const rects = side === 'before' ? change.beforeRects : change.afterRects; + const color = HIGHLIGHT_COLORS[change.type]; + if (!color) continue; + for (const rect of rects) { + page.drawRectangle({ + x: rect.x / EXTRACT_SCALE, + y: pageHeight - rect.y / EXTRACT_SCALE - rect.height / EXTRACT_SCALE, + width: rect.width / EXTRACT_SCALE, + height: rect.height / EXTRACT_SCALE, + color: rgb(color.r, color.g, color.b), + opacity: color.opacity, + }); + } + } +} + +export async function exportComparePdf( + mode: ComparePdfExportMode, + pdfDoc1: pdfjsLib.PDFDocumentProxy | null, + pdfDoc2: pdfjsLib.PDFDocumentProxy | null, + pairs: ComparePagePair[], + onProgress?: (message: string, percent: number) => void +) { + if (!pdfDoc1 && !pdfDoc2) { + throw new Error('At least one PDF document is required for export.'); + } + if (!pairs || pairs.length === 0) { + throw new Error('No page pairs to export.'); + } + + const outDoc = await PDFDocument.create(); + + const [bytes1, bytes2] = await Promise.all([ + pdfDoc1?.getData(), + pdfDoc2?.getData(), + ]); + + const [libDoc1, libDoc2] = await Promise.all([ + bytes1 ? PDFDocument.load(bytes1, { ignoreEncryption: true }) : null, + bytes2 ? PDFDocument.load(bytes2, { ignoreEncryption: true }) : null, + ]); + + for (let i = 0; i < pairs.length; i++) { + const pair = pairs[i]; + onProgress?.( + `Rendering page ${i + 1} of ${pairs.length}...`, + Math.round(((i + 1) / pairs.length) * 100) + ); + + const leftPdjsPage = + pair.leftPageNumber && pdfDoc1 + ? await pdfDoc1.getPage(pair.leftPageNumber) + : null; + const rightPdjsPage = + pair.rightPageNumber && pdfDoc2 + ? await pdfDoc2.getPage(pair.rightPageNumber) + : null; + + const leftModel = leftPdjsPage + ? await extractPageModel( + leftPdjsPage, + leftPdjsPage.getViewport({ scale: EXTRACT_SCALE }) + ) + : null; + const rightModel = rightPdjsPage + ? await extractPageModel( + rightPdjsPage, + rightPdjsPage.getViewport({ scale: EXTRACT_SCALE }) + ) + : null; + + const comparison = comparePageModels(leftModel, rightModel); + const changes = comparison.changes; + + if (mode === 'split') { + const refPage = leftPdjsPage || rightPdjsPage; + const vp = refPage!.getViewport({ scale: 1.0 }); + const gap = COMPARE_RENDER.SPLIT_GAP_PT; + const totalW = vp.width * 2 + gap; + const outPage = outDoc.addPage([totalW, vp.height]); + + if (pair.leftPageNumber && libDoc1) { + const [copied] = await outDoc.copyPages(libDoc1, [ + pair.leftPageNumber - 1, + ]); + const embedded = await outDoc.embedPage(copied); + outPage.drawPage(embedded, { + x: 0, + y: 0, + width: vp.width, + height: vp.height, + }); + } + if (pair.rightPageNumber && libDoc2) { + const [copied] = await outDoc.copyPages(libDoc2, [ + pair.rightPageNumber - 1, + ]); + const embedded = await outDoc.embedPage(copied); + outPage.drawPage(embedded, { + x: vp.width + gap, + y: 0, + width: vp.width, + height: vp.height, + }); + } + + if (changes.length) { + for (const change of changes) { + const color = HIGHLIGHT_COLORS[change.type]; + if (!color) continue; + for (const rect of change.beforeRects) { + outPage.drawRectangle({ + x: rect.x / EXTRACT_SCALE, + y: + vp.height - + rect.y / EXTRACT_SCALE - + rect.height / EXTRACT_SCALE, + width: rect.width / EXTRACT_SCALE, + height: rect.height / EXTRACT_SCALE, + color: rgb(color.r, color.g, color.b), + opacity: color.opacity, + }); + } + for (const rect of change.afterRects) { + outPage.drawRectangle({ + x: vp.width + gap + rect.x / EXTRACT_SCALE, + y: + vp.height - + rect.y / EXTRACT_SCALE - + rect.height / EXTRACT_SCALE, + width: rect.width / EXTRACT_SCALE, + height: rect.height / EXTRACT_SCALE, + color: rgb(color.r, color.g, color.b), + opacity: color.opacity, + }); + } + } + } + } else if (mode === 'alternating') { + if (pair.leftPageNumber && libDoc1) { + const [copied] = await outDoc.copyPages(libDoc1, [ + pair.leftPageNumber - 1, + ]); + const embedded = outDoc.addPage(copied); + const { height } = embedded.getSize(); + if (changes.length) drawHighlights(embedded, height, changes, 'before'); + } + if (pair.rightPageNumber && libDoc2) { + const [copied] = await outDoc.copyPages(libDoc2, [ + pair.rightPageNumber - 1, + ]); + const embedded = outDoc.addPage(copied); + const { height } = embedded.getSize(); + if (changes.length) drawHighlights(embedded, height, changes, 'after'); + } + } else if (mode === 'left') { + if (pair.leftPageNumber && libDoc1) { + const [copied] = await outDoc.copyPages(libDoc1, [ + pair.leftPageNumber - 1, + ]); + const embedded = outDoc.addPage(copied); + const { height } = embedded.getSize(); + if (changes.length) drawHighlights(embedded, height, changes, 'before'); + } + } else { + if (pair.rightPageNumber && libDoc2) { + const [copied] = await outDoc.copyPages(libDoc2, [ + pair.rightPageNumber - 1, + ]); + const embedded = outDoc.addPage(copied); + const { height } = embedded.getSize(); + if (changes.length) drawHighlights(embedded, height, changes, 'after'); + } + } + + await new Promise((r) => setTimeout(r, 0)); + } + + const pdfBytes = await outDoc.save(); + const blob = new Blob([pdfBytes.buffer as ArrayBuffer], { + type: 'application/pdf', + }); + downloadFile(blob, 'bentopdf-compare-export.pdf'); +} diff --git a/src/js/compare/reporting/export-html-report.ts b/src/js/compare/reporting/export-html-report.ts deleted file mode 100644 index 7e3a4a4..0000000 --- a/src/js/compare/reporting/export-html-report.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { buildCompareReport } from './build-report.ts'; -import type { ComparePagePair, ComparePageResult } from '../types.ts'; - -export function exportCompareHtmlReport( - fileName1: string, - fileName2: string, - pairs: ComparePagePair[], - results: ComparePageResult[] -) { - const html = buildCompareReport(fileName1, fileName2, pairs, results); - const blob = new Blob([html], { type: 'text/html;charset=utf-8' }); - const url = URL.createObjectURL(blob); - const anchor = document.createElement('a'); - anchor.href = url; - anchor.download = 'bentopdf-compare-report.html'; - anchor.click(); - URL.revokeObjectURL(url); -} diff --git a/src/js/compare/types.ts b/src/js/compare/types.ts index 9996609..30b7f71 100644 --- a/src/js/compare/types.ts +++ b/src/js/compare/types.ts @@ -1,7 +1,40 @@ import type * as pdfjsLib from 'pdfjs-dist'; +import type { LRUCache } from './lru-cache.ts'; export type CompareViewMode = 'overlay' | 'side-by-side'; +export type ComparePdfExportMode = 'split' | 'alternating' | 'left' | 'right'; + +export interface RenderedPage { + model: ComparePageModel | null; + exists: boolean; +} + +export interface ComparisonPageLoad { + model: ComparePageModel | null; + exists: boolean; +} + +export interface DiffFocusRegion { + x: number; + y: number; + width: number; + height: number; +} + +export interface CompareCaches { + pageModelCache: LRUCache; + comparisonCache: LRUCache; + comparisonResultsCache: LRUCache; +} + +export interface CompareRenderContext { + useOcr: boolean; + ocrLanguage: string; + viewMode: CompareViewMode; + showLoader: (message: string, percent?: number) => void; +} + export interface CompareRectangle { x: number; y: number; diff --git a/src/js/logic/compare-pdfs-page.ts b/src/js/logic/compare-pdfs-page.ts index 5caf8d7..2253e65 100644 --- a/src/js/logic/compare-pdfs-page.ts +++ b/src/js/logic/compare-pdfs-page.ts @@ -1,23 +1,28 @@ -import { showLoader, hideLoader, showAlert } from '../ui.js'; -import { getPDFDocument } from '../utils/helpers.js'; +import { showLoader, hideLoader, showAlert } from '../ui.ts'; +import { getPDFDocument } from '../utils/helpers.ts'; import { icons, createIcons } from 'lucide'; import * as pdfjsLib from 'pdfjs-dist'; import { CompareState } from '@/types'; import type { CompareFilterType, - ComparePageModel, - ComparePagePair, ComparePageResult, CompareTextChange, } from '../compare/types.ts'; -import { extractPageModel } from '../compare/engine/extract-page-model.ts'; -import { comparePageModels } from '../compare/engine/compare-page-models.ts'; -import { renderVisualDiff } from '../compare/engine/visual-diff.ts'; import { extractDocumentSignatures } from '../compare/engine/page-signatures.ts'; import { pairPages } from '../compare/engine/pair-pages.ts'; -import { recognizePageCanvas } from '../compare/engine/ocr-page.ts'; -import { exportCompareHtmlReport } from '../compare/reporting/export-html-report.ts'; -import { isLowQualityExtractedText } from '../compare/engine/text-normalization.ts'; +import type { + ComparePdfExportMode, + CompareCaches, + CompareRenderContext, +} from '../compare/types.ts'; +import { exportComparePdf } from '../compare/reporting/export-compare-pdf.ts'; +import { LRUCache } from '../compare/lru-cache.ts'; +import { COMPARE_CACHE_MAX_SIZE } from '../compare/config.ts'; +import { + getElement, + computeComparisonForPair, + getComparisonCacheKey, +} from './compare-render.ts'; pdfjsLib.GlobalWorkerOptions.workerSrc = new URL( 'pdfjs-dist/build/pdf.worker.min.mjs', @@ -39,343 +44,31 @@ const pageState: CompareState = { ocrLanguage: 'eng', }; -const pageModelCache = new Map(); -const comparisonCache = new Map(); -const comparisonResultsCache = new Map(); +const caches: CompareCaches = { + pageModelCache: new LRUCache(COMPARE_CACHE_MAX_SIZE), + comparisonCache: new LRUCache(COMPARE_CACHE_MAX_SIZE), + comparisonResultsCache: new LRUCache(COMPARE_CACHE_MAX_SIZE), +}; const documentNames = { left: 'first.pdf', right: 'second.pdf', }; -type RenderedPage = { - model: ComparePageModel | null; - exists: boolean; -}; - -type ComparisonPageLoad = { - model: ComparePageModel | null; - exists: boolean; -}; - -type DiffFocusRegion = { - x: number; - y: number; - width: number; - height: number; -}; - -function getElement(id: string) { - return document.getElementById(id) as T | null; -} - -function clearCanvas(canvas: HTMLCanvasElement) { - const context = canvas.getContext('2d'); - canvas.width = 1; - canvas.height = 1; - context?.clearRect(0, 0, 1, 1); -} - -function renderMissingPage( - canvas: HTMLCanvasElement, - placeholderId: string, - message: string -) { - clearCanvas(canvas); - const placeholder = getElement(placeholderId); - if (placeholder) { - placeholder.textContent = message; - placeholder.classList.remove('hidden'); - } -} - -function hidePlaceholder(placeholderId: string) { - const placeholder = getElement(placeholderId); - placeholder?.classList.add('hidden'); -} - -function getRenderScale(page: pdfjsLib.PDFPageProxy, container: HTMLElement) { - const baseViewport = page.getViewport({ scale: 1.0 }); - const availableWidth = Math.max( - container.clientWidth - (pageState.viewMode === 'overlay' ? 96 : 56), - 320 - ); - const fitScale = availableWidth / Math.max(baseViewport.width, 1); - const maxScale = pageState.viewMode === 'overlay' ? 2.5 : 2.0; - - return Math.min(Math.max(fitScale, 1.0), maxScale); -} - -function getPageModelCacheKey( - cacheKeyPrefix: 'left' | 'right', - pageNum: number, - scale: number -) { - return `${cacheKeyPrefix}-${pageNum}-${scale.toFixed(3)}`; -} - -function shouldUseOcrForModel(model: ComparePageModel) { - return !model.hasText || isLowQualityExtractedText(model.plainText); -} - -function buildDiffFocusRegion( - comparison: ComparePageResult, - leftCanvas: HTMLCanvasElement, - rightCanvas: HTMLCanvasElement -): DiffFocusRegion | undefined { - const leftOffsetX = Math.floor( - (Math.max(leftCanvas.width, rightCanvas.width) - leftCanvas.width) / 2 - ); - const leftOffsetY = Math.floor( - (Math.max(leftCanvas.height, rightCanvas.height) - leftCanvas.height) / 2 - ); - const rightOffsetX = Math.floor( - (Math.max(leftCanvas.width, rightCanvas.width) - rightCanvas.width) / 2 - ); - const rightOffsetY = Math.floor( - (Math.max(leftCanvas.height, rightCanvas.height) - rightCanvas.height) / 2 - ); - const bounds = { - minX: Infinity, - minY: Infinity, - maxX: -Infinity, - maxY: -Infinity, - }; - - for (const change of comparison.changes) { - for (const rect of change.beforeRects) { - bounds.minX = Math.min(bounds.minX, rect.x + leftOffsetX); - bounds.minY = Math.min(bounds.minY, rect.y + leftOffsetY); - bounds.maxX = Math.max(bounds.maxX, rect.x + leftOffsetX + rect.width); - bounds.maxY = Math.max(bounds.maxY, rect.y + leftOffsetY + rect.height); - } - - for (const rect of change.afterRects) { - bounds.minX = Math.min(bounds.minX, rect.x + rightOffsetX); - bounds.minY = Math.min(bounds.minY, rect.y + rightOffsetY); - bounds.maxX = Math.max(bounds.maxX, rect.x + rightOffsetX + rect.width); - bounds.maxY = Math.max(bounds.maxY, rect.y + rightOffsetY + rect.height); - } - } - - if (!Number.isFinite(bounds.minX)) { - return undefined; - } - - const fullWidth = Math.max(leftCanvas.width, rightCanvas.width, 1); - const fullHeight = Math.max(leftCanvas.height, rightCanvas.height, 1); - const padding = 40; - - const x = Math.max(Math.floor(bounds.minX - padding), 0); - const y = Math.max(Math.floor(bounds.minY - padding), 0); - const maxX = Math.min(Math.ceil(bounds.maxX + padding), fullWidth); - const maxY = Math.min(Math.ceil(bounds.maxY + padding), fullHeight); - - return { - x, - y, - width: Math.max(maxX - x, Math.min(320, fullWidth)), - height: Math.max(maxY - y, Math.min(200, fullHeight)), - }; -} - -async function renderPage( - pdfDoc: pdfjsLib.PDFDocumentProxy, - pageNum: number, - canvas: HTMLCanvasElement, - container: HTMLElement, - placeholderId: string, - cacheKeyPrefix: 'left' | 'right' -): Promise { - if (pageNum > pdfDoc.numPages) { - renderMissingPage( - canvas, - placeholderId, - `Page ${pageNum} does not exist in this PDF.` - ); - return { model: null, exists: false }; - } - - const page = await pdfDoc.getPage(pageNum); - - const targetScale = getRenderScale(page, container); - const scaledViewport = page.getViewport({ scale: targetScale }); - const dpr = window.devicePixelRatio || 1; - const hiResViewport = page.getViewport({ scale: targetScale * dpr }); - - hidePlaceholder(placeholderId); - - canvas.width = hiResViewport.width; - canvas.height = hiResViewport.height; - canvas.style.width = `${scaledViewport.width}px`; - canvas.style.height = `${scaledViewport.height}px`; - - const cacheKey = getPageModelCacheKey(cacheKeyPrefix, pageNum, targetScale); - const cachedModel = pageModelCache.get(cacheKey); - const modelPromise = cachedModel - ? Promise.resolve(cachedModel) - : extractPageModel(page, scaledViewport); - const renderTask = page.render({ - canvasContext: canvas.getContext('2d')!, - viewport: hiResViewport, - canvas, - }).promise; - - const [model] = await Promise.all([modelPromise, renderTask]); - - let finalModel = model; - - if (!cachedModel && pageState.useOcr && shouldUseOcrForModel(model)) { - showLoader(`Running OCR on page ${pageNum}...`); - const ocrModel = await recognizePageCanvas( - canvas, - pageState.ocrLanguage, - function (status, progress) { - showLoader(`OCR: ${status}`, progress * 100); - } - ); - finalModel = { - ...ocrModel, - pageNumber: pageNum, - }; - } - - pageModelCache.set(cacheKey, finalModel); - - return { model: finalModel, exists: true }; -} - -async function loadComparisonPage( - pdfDoc: pdfjsLib.PDFDocumentProxy | null, - pageNum: number | null, - side: 'left' | 'right', - renderTarget?: { - canvas: HTMLCanvasElement; - container: HTMLElement; - placeholderId: string; - } -): Promise { - if (!pdfDoc || !pageNum) { - if (renderTarget) { - renderMissingPage( - renderTarget.canvas, - renderTarget.placeholderId, - 'No paired page for this side.' - ); - } - return { model: null, exists: false }; - } - - if (renderTarget) { - return renderPage( - pdfDoc, - pageNum, - renderTarget.canvas, - renderTarget.container, - renderTarget.placeholderId, - side - ); - } - - const renderScale = 1.2; - const cacheKey = getPageModelCacheKey(side, pageNum, renderScale); - const cachedModel = pageModelCache.get(cacheKey); - if (cachedModel) { - return { model: cachedModel, exists: true }; - } - - const page = await pdfDoc.getPage(pageNum); - const viewport = page.getViewport({ scale: renderScale }); - const canvas = document.createElement('canvas'); - canvas.width = viewport.width; - canvas.height = viewport.height; - const context = canvas.getContext('2d'); - - if (!context) { - throw new Error('Could not create offscreen comparison canvas.'); - } - - const extractedModel = await extractPageModel(page, viewport); - await page.render({ - canvasContext: context, - viewport, - canvas, - }).promise; - - let finalModel = extractedModel; - if (pageState.useOcr && shouldUseOcrForModel(extractedModel)) { - const ocrModel = await recognizePageCanvas(canvas, pageState.ocrLanguage); - finalModel = { - ...ocrModel, - pageNumber: pageNum, - }; - } - - pageModelCache.set(cacheKey, finalModel); - return { model: finalModel, exists: true }; -} - -async function computeComparisonForPair( - pair: ComparePagePair, - options?: { - renderTargets?: { - left: { - canvas: HTMLCanvasElement; - container: HTMLElement; - placeholderId: string; - }; - right: { - canvas: HTMLCanvasElement; - container: HTMLElement; - placeholderId: string; - }; - diffCanvas?: HTMLCanvasElement; - }; - } -) { - const renderTargets = options?.renderTargets; - const leftPage = await loadComparisonPage( - pageState.pdfDoc1, - pair.leftPageNumber, - 'left', - renderTargets?.left - ); - const rightPage = await loadComparisonPage( - pageState.pdfDoc2, - pair.rightPageNumber, - 'right', - renderTargets?.right - ); - - const comparison = comparePageModels(leftPage.model, rightPage.model); - comparison.confidence = pair.confidence; - - if ( - renderTargets?.diffCanvas && - comparison.status !== 'left-only' && - comparison.status !== 'right-only' - ) { - const focusRegion = buildDiffFocusRegion( - comparison, - renderTargets.left.canvas, - renderTargets.right.canvas - ); - comparison.visualDiff = renderVisualDiff( - renderTargets.left.canvas, - renderTargets.right.canvas, - renderTargets.diffCanvas, - focusRegion - ); - } else if (renderTargets?.diffCanvas) { - clearCanvas(renderTargets.diffCanvas); - } - - return comparison; -} +let renderGeneration = 0; function getActivePair() { return pageState.pagePairs[pageState.currentPage - 1] || null; } +function getRenderContext(): CompareRenderContext { + return { + useOcr: pageState.useOcr, + ocrLanguage: pageState.ocrLanguage, + viewMode: pageState.viewMode, + showLoader, + }; +} + function getVisibleChanges(result: ComparePageResult | null) { if (!result) return []; @@ -508,14 +201,16 @@ function renderChangeList() { const emptyState = getElement('change-list-empty'); const prevChangeBtn = getElement('prev-change-btn'); const nextChangeBtn = getElement('next-change-btn'); - const exportReportBtn = getElement('export-report-btn'); + const exportDropdownBtn = getElement( + 'export-dropdown-btn' + ); if ( !list || !emptyState || !prevChangeBtn || !nextChangeBtn || - !exportReportBtn + !exportDropdownBtn ) return; @@ -531,7 +226,7 @@ function renderChangeList() { list.classList.add('hidden'); prevChangeBtn.disabled = true; nextChangeBtn.disabled = true; - exportReportBtn.disabled = pageState.pagePairs.length === 0; + exportDropdownBtn.disabled = pageState.pagePairs.length === 0; return; } @@ -560,7 +255,7 @@ function renderChangeList() { prevChangeBtn.disabled = false; nextChangeBtn.disabled = false; - exportReportBtn.disabled = pageState.pagePairs.length === 0; + exportDropdownBtn.disabled = pageState.pagePairs.length === 0; } function renderComparisonUI() { @@ -600,34 +295,31 @@ async function buildPagePairs() { async function buildReportResults() { const results: ComparePageResult[] = []; + const ctx = getRenderContext(); for (const pair of pageState.pagePairs) { - const cached = comparisonResultsCache.get(pair.pairIndex); + const cached = caches.comparisonResultsCache.get(pair.pairIndex); if (cached) { results.push(cached); continue; } - const leftSignatureKey = pair.leftPageNumber - ? `left-${pair.leftPageNumber}` - : ''; - const rightSignatureKey = pair.rightPageNumber - ? `right-${pair.rightPageNumber}` - : ''; - const cachedResult = comparisonCache.get( - `${leftSignatureKey || 'none'}:${rightSignatureKey || 'none'}:${pageState.useOcr ? 'ocr' : 'no-ocr'}` - ); + const cacheKey = getComparisonCacheKey(pair, pageState.useOcr); + const cachedResult = caches.comparisonCache.get(cacheKey); if (cachedResult) { results.push(cachedResult); continue; } - const comparison = await computeComparisonForPair(pair); - comparisonCache.set( - `${leftSignatureKey || 'none'}:${rightSignatureKey || 'none'}:${pageState.useOcr ? 'ocr' : 'no-ocr'}`, - comparison + const comparison = await computeComparisonForPair( + pageState.pdfDoc1, + pageState.pdfDoc2, + pair, + caches, + ctx ); - comparisonResultsCache.set(pair.pairIndex, comparison); + caches.comparisonCache.set(cacheKey, comparison); + caches.comparisonResultsCache.set(pair.pairIndex, comparison); results.push(comparison); } @@ -640,6 +332,8 @@ async function renderBothPages() { const pair = getActivePair(); if (!pair) return; + const gen = ++renderGeneration; + showLoader( `Loading comparison ${pageState.currentPage} of ${pageState.pagePairs.length}...` ); @@ -652,27 +346,35 @@ async function renderBothPages() { ) as HTMLCanvasElement; const panel1 = getElement('panel-1') as HTMLElement; const panel2 = getElement('panel-2') as HTMLElement; - const wrapper = getElement( - 'compare-viewer-wrapper' - ) as HTMLElement; const container1 = panel1; const container2 = pageState.viewMode === 'overlay' ? panel1 : panel2; - const comparison = await computeComparisonForPair(pair, { - renderTargets: { - left: { - canvas: canvas1, - container: container1, - placeholderId: 'placeholder-1', + const ctx = getRenderContext(); + + const comparison = await computeComparisonForPair( + pageState.pdfDoc1, + pageState.pdfDoc2, + pair, + caches, + ctx, + { + renderTargets: { + left: { + canvas: canvas1, + container: container1, + placeholderId: 'placeholder-1', + }, + right: { + canvas: canvas2, + container: container2, + placeholderId: 'placeholder-2', + }, }, - right: { - canvas: canvas2, - container: container2, - placeholderId: 'placeholder-2', - }, - }, - }); + } + ); + + if (gen !== renderGeneration) return; pageState.currentComparison = comparison; pageState.activeChangeIndex = 0; @@ -815,9 +517,9 @@ async function handleFileInput( showLoader(`Loading ${file.name}...`); const arrayBuffer = await file.arrayBuffer(); pageState[docKey] = await getPDFDocument({ data: arrayBuffer }).promise; - pageModelCache.clear(); - comparisonCache.clear(); - comparisonResultsCache.clear(); + caches.pageModelCache.clear(); + caches.comparisonCache.clear(); + caches.comparisonResultsCache.clear(); pageState.changeSearchQuery = ''; const searchInput = getElement('compare-search-input'); @@ -880,7 +582,7 @@ document.addEventListener('DOMContentLoaded', function () { prevBtn.addEventListener('click', function () { if (pageState.currentPage > 1) { pageState.currentPage--; - renderBothPages(); + renderBothPages().catch(console.error); } }); } @@ -895,7 +597,7 @@ document.addEventListener('DOMContentLoaded', function () { ); if (pageState.currentPage < totalPairs) { pageState.currentPage++; - renderBothPages(); + renderBothPages().catch(console.error); } }); } @@ -955,7 +657,10 @@ document.addEventListener('DOMContentLoaded', function () { ) as HTMLInputElement; const prevChangeBtn = getElement('prev-change-btn'); const nextChangeBtn = getElement('next-change-btn'); - const exportReportBtn = getElement('export-report-btn'); + const exportDropdownBtn = getElement( + 'export-dropdown-btn' + ); + const exportDropdownMenu = getElement('export-dropdown-menu'); const ocrToggle = getElement('ocr-toggle'); const searchInput = getElement('compare-search-input'); @@ -1037,12 +742,17 @@ document.addEventListener('DOMContentLoaded', function () { if (ocrToggle) { ocrToggle.checked = pageState.useOcr; ocrToggle.addEventListener('change', async function () { - pageState.useOcr = ocrToggle.checked; - pageModelCache.clear(); - comparisonCache.clear(); - comparisonResultsCache.clear(); - if (pageState.pdfDoc1 && pageState.pdfDoc2) { - await renderBothPages(); + try { + pageState.useOcr = ocrToggle.checked; + caches.pageModelCache.clear(); + caches.comparisonCache.clear(); + caches.comparisonResultsCache.clear(); + if (pageState.pdfDoc1 && pageState.pdfDoc2) { + await renderBothPages(); + } + } catch (e) { + console.error('OCR toggle failed:', e); + hideLoader(); } }); } @@ -1063,22 +773,48 @@ document.addEventListener('DOMContentLoaded', function () { window.cancelAnimationFrame(resizeFrame); resizeFrame = window.requestAnimationFrame(function () { - renderBothPages(); + renderBothPages().catch(console.error); }); }); - if (exportReportBtn) { - exportReportBtn.addEventListener('click', async function () { - if (pageState.pagePairs.length === 0) return; - showLoader('Building compare report...'); - const results = await buildReportResults(); - exportCompareHtmlReport( - documentNames.left, - documentNames.right, - pageState.pagePairs, - results - ); - hideLoader(); + if (exportDropdownBtn && exportDropdownMenu) { + exportDropdownBtn.addEventListener('click', function (e) { + e.stopPropagation(); + exportDropdownMenu.classList.toggle('hidden'); + }); + + document.addEventListener('click', function () { + exportDropdownMenu.classList.add('hidden'); + }); + + exportDropdownMenu.addEventListener('click', function (e) { + e.stopPropagation(); + }); + + document.querySelectorAll('.export-menu-item').forEach(function (btn) { + btn.addEventListener('click', async function () { + const mode = (btn as HTMLElement).dataset + .exportMode as ComparePdfExportMode; + if (!mode || pageState.pagePairs.length === 0) return; + exportDropdownMenu.classList.add('hidden'); + try { + showLoader('Preparing PDF export...'); + await exportComparePdf( + mode, + pageState.pdfDoc1, + pageState.pdfDoc2, + pageState.pagePairs, + function (message, percent) { + showLoader(message, percent); + } + ); + } catch (e) { + console.error('PDF export failed:', e); + showAlert('Export Error', 'Could not export comparison PDF.'); + } finally { + hideLoader(); + } + }); }); } diff --git a/src/js/logic/compare-render.ts b/src/js/logic/compare-render.ts new file mode 100644 index 0000000..1ebbdc0 --- /dev/null +++ b/src/js/logic/compare-render.ts @@ -0,0 +1,365 @@ +import * as pdfjsLib from 'pdfjs-dist'; +import type { + ComparePageModel, + ComparePagePair, + ComparePageResult, + RenderedPage, + ComparisonPageLoad, + DiffFocusRegion, + CompareCaches, + CompareRenderContext, +} from '../compare/types.ts'; +import { extractPageModel } from '../compare/engine/extract-page-model.ts'; +import { comparePageModels } from '../compare/engine/compare-page-models.ts'; +import { renderVisualDiff } from '../compare/engine/visual-diff.ts'; +import { recognizePageCanvas } from '../compare/engine/ocr-page.ts'; +import { isLowQualityExtractedText } from '../compare/engine/text-normalization.ts'; +import { COMPARE_RENDER, COMPARE_GEOMETRY } from '../compare/config.ts'; + +export function getElement(id: string) { + return document.getElementById(id) as T | null; +} + +export function clearCanvas(canvas: HTMLCanvasElement) { + const context = canvas.getContext('2d'); + canvas.width = 1; + canvas.height = 1; + context?.clearRect(0, 0, 1, 1); +} + +export function renderMissingPage( + canvas: HTMLCanvasElement, + placeholderId: string, + message: string +) { + clearCanvas(canvas); + const placeholder = getElement(placeholderId); + if (placeholder) { + placeholder.textContent = message; + placeholder.classList.remove('hidden'); + } +} + +export function hidePlaceholder(placeholderId: string) { + const placeholder = getElement(placeholderId); + placeholder?.classList.add('hidden'); +} + +export function getRenderScale( + page: pdfjsLib.PDFPageProxy, + container: HTMLElement, + viewMode: 'overlay' | 'side-by-side' +) { + const baseViewport = page.getViewport({ scale: 1.0 }); + const availableWidth = Math.max( + container.clientWidth - (viewMode === 'overlay' ? 96 : 56), + 320 + ); + const fitScale = availableWidth / Math.max(baseViewport.width, 1); + const maxScale = + viewMode === 'overlay' + ? COMPARE_RENDER.MAX_SCALE_OVERLAY + : COMPARE_RENDER.MAX_SCALE_SIDE; + + return Math.min(Math.max(fitScale, 1.0), maxScale); +} + +export function getPageModelCacheKey( + cacheKeyPrefix: 'left' | 'right', + pageNum: number, + scale: number +) { + return `${cacheKeyPrefix}-${pageNum}-${scale.toFixed(3)}`; +} + +function shouldUseOcrForModel(model: ComparePageModel) { + return !model.hasText || isLowQualityExtractedText(model.plainText); +} + +export function buildDiffFocusRegion( + comparison: ComparePageResult, + leftCanvas: HTMLCanvasElement, + rightCanvas: HTMLCanvasElement +): DiffFocusRegion | undefined { + const leftOffsetX = Math.floor( + (Math.max(leftCanvas.width, rightCanvas.width) - leftCanvas.width) / 2 + ); + const leftOffsetY = Math.floor( + (Math.max(leftCanvas.height, rightCanvas.height) - leftCanvas.height) / 2 + ); + const rightOffsetX = Math.floor( + (Math.max(leftCanvas.width, rightCanvas.width) - rightCanvas.width) / 2 + ); + const rightOffsetY = Math.floor( + (Math.max(leftCanvas.height, rightCanvas.height) - rightCanvas.height) / 2 + ); + const bounds = { + minX: Infinity, + minY: Infinity, + maxX: -Infinity, + maxY: -Infinity, + }; + + for (const change of comparison.changes) { + for (const rect of change.beforeRects) { + bounds.minX = Math.min(bounds.minX, rect.x + leftOffsetX); + bounds.minY = Math.min(bounds.minY, rect.y + leftOffsetY); + bounds.maxX = Math.max(bounds.maxX, rect.x + leftOffsetX + rect.width); + bounds.maxY = Math.max(bounds.maxY, rect.y + leftOffsetY + rect.height); + } + + for (const rect of change.afterRects) { + bounds.minX = Math.min(bounds.minX, rect.x + rightOffsetX); + bounds.minY = Math.min(bounds.minY, rect.y + rightOffsetY); + bounds.maxX = Math.max(bounds.maxX, rect.x + rightOffsetX + rect.width); + bounds.maxY = Math.max(bounds.maxY, rect.y + rightOffsetY + rect.height); + } + } + + if (!Number.isFinite(bounds.minX)) { + return undefined; + } + + const fullWidth = Math.max(leftCanvas.width, rightCanvas.width, 1); + const fullHeight = Math.max(leftCanvas.height, rightCanvas.height, 1); + const padding = COMPARE_GEOMETRY.FOCUS_REGION_PADDING; + + const x = Math.max(Math.floor(bounds.minX - padding), 0); + const y = Math.max(Math.floor(bounds.minY - padding), 0); + const maxX = Math.min(Math.ceil(bounds.maxX + padding), fullWidth); + const maxY = Math.min(Math.ceil(bounds.maxY + padding), fullHeight); + + return { + x, + y, + width: Math.max( + maxX - x, + Math.min(COMPARE_GEOMETRY.FOCUS_REGION_MIN_WIDTH, fullWidth) + ), + height: Math.max( + maxY - y, + Math.min(COMPARE_GEOMETRY.FOCUS_REGION_MIN_HEIGHT, fullHeight) + ), + }; +} + +export async function renderPage( + pdfDoc: pdfjsLib.PDFDocumentProxy, + pageNum: number, + canvas: HTMLCanvasElement, + container: HTMLElement, + placeholderId: string, + cacheKeyPrefix: 'left' | 'right', + caches: CompareCaches, + ctx: CompareRenderContext +): Promise { + if (pageNum > pdfDoc.numPages) { + renderMissingPage( + canvas, + placeholderId, + `Page ${pageNum} does not exist in this PDF.` + ); + return { model: null, exists: false }; + } + + const page = await pdfDoc.getPage(pageNum); + + const targetScale = getRenderScale(page, container, ctx.viewMode); + const scaledViewport = page.getViewport({ scale: targetScale }); + const dpr = window.devicePixelRatio || 1; + const hiResViewport = page.getViewport({ scale: targetScale * dpr }); + + hidePlaceholder(placeholderId); + + canvas.width = hiResViewport.width; + canvas.height = hiResViewport.height; + canvas.style.width = `${scaledViewport.width}px`; + canvas.style.height = `${scaledViewport.height}px`; + + const cacheKey = getPageModelCacheKey(cacheKeyPrefix, pageNum, targetScale); + const cachedModel = caches.pageModelCache.get(cacheKey); + const modelPromise = cachedModel + ? Promise.resolve(cachedModel) + : extractPageModel(page, scaledViewport); + const renderTask = page.render({ + canvasContext: canvas.getContext('2d')!, + viewport: hiResViewport, + canvas, + }).promise; + + const [model] = await Promise.all([modelPromise, renderTask]); + + let finalModel = model; + + if (!cachedModel && ctx.useOcr && shouldUseOcrForModel(model)) { + ctx.showLoader(`Running OCR on page ${pageNum}...`); + const ocrModel = await recognizePageCanvas( + canvas, + ctx.ocrLanguage, + function (status, progress) { + ctx.showLoader(`OCR: ${status}`, progress * 100); + } + ); + finalModel = { + ...ocrModel, + pageNumber: pageNum, + }; + } + + caches.pageModelCache.set(cacheKey, finalModel); + + return { model: finalModel, exists: true }; +} + +export async function loadComparisonPage( + pdfDoc: pdfjsLib.PDFDocumentProxy | null, + pageNum: number | null, + side: 'left' | 'right', + renderTarget: + | { + canvas: HTMLCanvasElement; + container: HTMLElement; + placeholderId: string; + } + | undefined, + caches: CompareCaches, + ctx: CompareRenderContext +): Promise { + if (!pdfDoc || !pageNum) { + if (renderTarget) { + renderMissingPage( + renderTarget.canvas, + renderTarget.placeholderId, + 'No paired page for this side.' + ); + } + return { model: null, exists: false }; + } + + if (renderTarget) { + return renderPage( + pdfDoc, + pageNum, + renderTarget.canvas, + renderTarget.container, + renderTarget.placeholderId, + side, + caches, + ctx + ); + } + + const renderScale = COMPARE_RENDER.OFFLINE_SCALE; + const cacheKey = getPageModelCacheKey(side, pageNum, renderScale); + const cachedModel = caches.pageModelCache.get(cacheKey); + if (cachedModel) { + return { model: cachedModel, exists: true }; + } + + const page = await pdfDoc.getPage(pageNum); + const viewport = page.getViewport({ scale: renderScale }); + const canvas = document.createElement('canvas'); + canvas.width = viewport.width; + canvas.height = viewport.height; + const context = canvas.getContext('2d'); + + if (!context) { + throw new Error('Could not create offscreen comparison canvas.'); + } + + const extractedModel = await extractPageModel(page, viewport); + await page.render({ + canvasContext: context, + viewport, + canvas, + }).promise; + + let finalModel = extractedModel; + if (ctx.useOcr && shouldUseOcrForModel(extractedModel)) { + const ocrModel = await recognizePageCanvas(canvas, ctx.ocrLanguage); + finalModel = { + ...ocrModel, + pageNumber: pageNum, + }; + } + + canvas.width = 0; + canvas.height = 0; + + caches.pageModelCache.set(cacheKey, finalModel); + return { model: finalModel, exists: true }; +} + +export async function computeComparisonForPair( + pdfDoc1: pdfjsLib.PDFDocumentProxy | null, + pdfDoc2: pdfjsLib.PDFDocumentProxy | null, + pair: ComparePagePair, + caches: CompareCaches, + ctx: CompareRenderContext, + options?: { + renderTargets?: { + left: { + canvas: HTMLCanvasElement; + container: HTMLElement; + placeholderId: string; + }; + right: { + canvas: HTMLCanvasElement; + container: HTMLElement; + placeholderId: string; + }; + diffCanvas?: HTMLCanvasElement; + }; + } +) { + const renderTargets = options?.renderTargets; + const leftPage = await loadComparisonPage( + pdfDoc1, + pair.leftPageNumber, + 'left', + renderTargets?.left, + caches, + ctx + ); + const rightPage = await loadComparisonPage( + pdfDoc2, + pair.rightPageNumber, + 'right', + renderTargets?.right, + caches, + ctx + ); + + const comparison = comparePageModels(leftPage.model, rightPage.model); + comparison.confidence = pair.confidence; + + if ( + renderTargets?.diffCanvas && + comparison.status !== 'left-only' && + comparison.status !== 'right-only' + ) { + const focusRegion = buildDiffFocusRegion( + comparison, + renderTargets.left.canvas, + renderTargets.right.canvas + ); + comparison.visualDiff = renderVisualDiff( + renderTargets.left.canvas, + renderTargets.right.canvas, + renderTargets.diffCanvas, + focusRegion + ); + } else if (renderTargets?.diffCanvas) { + clearCanvas(renderTargets.diffCanvas); + } + + return comparison; +} + +export function getComparisonCacheKey(pair: ComparePagePair, useOcr: boolean) { + const leftKey = pair.leftPageNumber ? `left-${pair.leftPageNumber}` : 'none'; + const rightKey = pair.rightPageNumber + ? `right-${pair.rightPageNumber}` + : 'none'; + return `${leftKey}:${rightKey}:${useOcr ? 'ocr' : 'no-ocr'}`; +} diff --git a/src/js/types/compare-pdfs-type.ts b/src/js/types/compare-pdfs-type.ts index d3fa47c..279e7a9 100644 --- a/src/js/types/compare-pdfs-type.ts +++ b/src/js/types/compare-pdfs-type.ts @@ -1 +1,9 @@ -export type { CompareState } from '../compare/types.ts'; +export type { + CompareState, + ComparePdfExportMode, + RenderedPage, + ComparisonPageLoad, + DiffFocusRegion, + CompareCaches, + CompareRenderContext, +} from '../compare/types.ts'; diff --git a/src/pages/compare-pdfs.html b/src/pages/compare-pdfs.html index 43f0622..2f4c717 100644 --- a/src/pages/compare-pdfs.html +++ b/src/pages/compare-pdfs.html @@ -626,14 +626,55 @@ - +
    + + +