refactor: update PDF comparison types and enhance UI for better usability
- Refactored CompareState to import from a centralized type definition. - Enhanced the compare-pdfs.html layout with improved styles for overlay and side-by-side modes. - Added new CSS styles for various UI components including panels, buttons, and highlights. - Implemented a new sidebar for displaying change summaries and filters. - Created unit tests for text comparison logic, including diffing text runs and page pairing. - Added tests for text normalization functions to ensure proper handling of punctuation and character normalization.
This commit is contained in:
78
src/js/compare/engine/compare-page-models.ts
Normal file
78
src/js/compare/engine/compare-page-models.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import type { ComparePageModel, ComparePageResult } from '../types.ts';
|
||||
import { diffTextRuns } from './diff-text-runs.ts';
|
||||
|
||||
export function comparePageModels(
|
||||
leftPage: ComparePageModel | null,
|
||||
rightPage: ComparePageModel | null
|
||||
): ComparePageResult {
|
||||
if (leftPage && !rightPage) {
|
||||
return {
|
||||
status: 'left-only',
|
||||
leftPageNumber: leftPage.pageNumber,
|
||||
rightPageNumber: null,
|
||||
changes: [
|
||||
{
|
||||
id: 'page-removed',
|
||||
type: 'page-removed',
|
||||
description: `Page ${leftPage.pageNumber} exists only in the first PDF.`,
|
||||
beforeText: leftPage.plainText.slice(0, 200),
|
||||
afterText: '',
|
||||
beforeRects: [],
|
||||
afterRects: [],
|
||||
},
|
||||
],
|
||||
summary: { added: 0, removed: 1, modified: 0 },
|
||||
visualDiff: null,
|
||||
usedOcr: leftPage.source === 'ocr',
|
||||
};
|
||||
}
|
||||
|
||||
if (!leftPage && rightPage) {
|
||||
return {
|
||||
status: 'right-only',
|
||||
leftPageNumber: null,
|
||||
rightPageNumber: rightPage.pageNumber,
|
||||
changes: [
|
||||
{
|
||||
id: 'page-added',
|
||||
type: 'page-added',
|
||||
description: `Page ${rightPage.pageNumber} exists only in the second PDF.`,
|
||||
beforeText: '',
|
||||
afterText: rightPage.plainText.slice(0, 200),
|
||||
beforeRects: [],
|
||||
afterRects: [],
|
||||
},
|
||||
],
|
||||
summary: { added: 1, removed: 0, modified: 0 },
|
||||
visualDiff: null,
|
||||
usedOcr: rightPage.source === 'ocr',
|
||||
};
|
||||
}
|
||||
|
||||
if (!leftPage || !rightPage) {
|
||||
return {
|
||||
status: 'match',
|
||||
leftPageNumber: null,
|
||||
rightPageNumber: null,
|
||||
changes: [],
|
||||
summary: { added: 0, removed: 0, modified: 0 },
|
||||
visualDiff: null,
|
||||
usedOcr: false,
|
||||
};
|
||||
}
|
||||
|
||||
const { changes, summary } = diffTextRuns(
|
||||
leftPage.textItems,
|
||||
rightPage.textItems
|
||||
);
|
||||
|
||||
return {
|
||||
status: changes.length > 0 ? 'changed' : 'match',
|
||||
leftPageNumber: leftPage.pageNumber,
|
||||
rightPageNumber: rightPage.pageNumber,
|
||||
changes,
|
||||
summary,
|
||||
visualDiff: null,
|
||||
usedOcr: leftPage.source === 'ocr' || rightPage.source === 'ocr',
|
||||
};
|
||||
}
|
||||
237
src/js/compare/engine/diff-text-runs.ts
Normal file
237
src/js/compare/engine/diff-text-runs.ts
Normal file
@@ -0,0 +1,237 @@
|
||||
import { diffArrays } from 'diff';
|
||||
|
||||
import type {
|
||||
CharPosition,
|
||||
CompareChangeSummary,
|
||||
CompareRectangle,
|
||||
CompareTextChange,
|
||||
CompareTextItem,
|
||||
CompareWordToken,
|
||||
} from '../types.ts';
|
||||
|
||||
interface WordToken {
|
||||
word: string;
|
||||
compareWord: string;
|
||||
rect: CompareRectangle;
|
||||
}
|
||||
|
||||
function getCharMap(line: CompareTextItem): CharPosition[] {
|
||||
if (line.charMap && line.charMap.length === line.normalizedText.length) {
|
||||
return line.charMap;
|
||||
}
|
||||
const charWidth = line.rect.width / Math.max(line.normalizedText.length, 1);
|
||||
return Array.from({ length: line.normalizedText.length }, (_, i) => ({
|
||||
x: line.rect.x + i * charWidth,
|
||||
width: charWidth,
|
||||
}));
|
||||
}
|
||||
|
||||
function splitLineIntoWords(line: CompareTextItem): WordToken[] {
|
||||
if (line.wordTokens && line.wordTokens.length > 0) {
|
||||
return line.wordTokens.map((token: CompareWordToken) => ({
|
||||
word: token.word,
|
||||
compareWord: token.compareWord,
|
||||
rect: token.rect,
|
||||
}));
|
||||
}
|
||||
|
||||
const words = line.normalizedText.split(/\s+/).filter(Boolean);
|
||||
if (words.length === 0) return [];
|
||||
|
||||
const charMap = getCharMap(line);
|
||||
let offset = 0;
|
||||
|
||||
return words.map((word) => {
|
||||
const startIndex = line.normalizedText.indexOf(word, offset);
|
||||
const endIndex = startIndex + word.length - 1;
|
||||
offset = startIndex + word.length;
|
||||
|
||||
const startChar = charMap[startIndex];
|
||||
const endChar = charMap[endIndex];
|
||||
|
||||
if (!startChar || !endChar) {
|
||||
const charWidth =
|
||||
line.rect.width / Math.max(line.normalizedText.length, 1);
|
||||
return {
|
||||
word,
|
||||
compareWord: word.toLowerCase(),
|
||||
rect: {
|
||||
x: line.rect.x + startIndex * charWidth,
|
||||
y: line.rect.y,
|
||||
width: word.length * charWidth,
|
||||
height: line.rect.height,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
const x = startChar.x;
|
||||
const w = endChar.x + endChar.width - startChar.x;
|
||||
|
||||
return {
|
||||
word,
|
||||
compareWord: word.toLowerCase(),
|
||||
rect: { x, y: line.rect.y, width: w, height: line.rect.height },
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
function groupAdjacentRects(rects: CompareRectangle[]): CompareRectangle[] {
|
||||
if (rects.length === 0) return [];
|
||||
|
||||
const sorted = [...rects].sort((a, b) => a.y - b.y || a.x - b.x);
|
||||
const groups: CompareRectangle[][] = [[sorted[0]]];
|
||||
|
||||
for (let i = 1; i < sorted.length; i++) {
|
||||
const prev = groups[groups.length - 1];
|
||||
const lastRect = prev[prev.length - 1];
|
||||
const curr = sorted[i];
|
||||
const sameLine =
|
||||
Math.abs(curr.y - lastRect.y) < Math.max(lastRect.height * 0.6, 4);
|
||||
const close = curr.x <= lastRect.x + lastRect.width + lastRect.height * 2;
|
||||
|
||||
if (sameLine && close) {
|
||||
prev.push(curr);
|
||||
} else {
|
||||
groups.push([curr]);
|
||||
}
|
||||
}
|
||||
|
||||
return groups.map((group) => {
|
||||
const minX = Math.min(...group.map((r) => r.x));
|
||||
const minY = Math.min(...group.map((r) => r.y));
|
||||
const maxX = Math.max(...group.map((r) => r.x + r.width));
|
||||
const maxY = Math.max(...group.map((r) => r.y + r.height));
|
||||
return { x: minX, y: minY, width: maxX - minX, height: maxY - minY };
|
||||
});
|
||||
}
|
||||
|
||||
function collapseWords(words: WordToken[]) {
|
||||
return words.map((word) => word.compareWord).join('');
|
||||
}
|
||||
|
||||
function areEquivalentIgnoringWordBreaks(
|
||||
beforeWords: WordToken[],
|
||||
afterWords: WordToken[]
|
||||
) {
|
||||
if (beforeWords.length === 0 || afterWords.length === 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return collapseWords(beforeWords) === collapseWords(afterWords);
|
||||
}
|
||||
|
||||
function createWordChange(
|
||||
changes: CompareTextChange[],
|
||||
type: CompareTextChange['type'],
|
||||
beforeWords: WordToken[],
|
||||
afterWords: WordToken[]
|
||||
) {
|
||||
const beforeText = beforeWords.map((w) => w.word).join(' ');
|
||||
const afterText = afterWords.map((w) => w.word).join(' ');
|
||||
if (!beforeText && !afterText) return;
|
||||
|
||||
const id = `${type}-${changes.length}`;
|
||||
const beforeRects = groupAdjacentRects(beforeWords.map((w) => w.rect));
|
||||
const afterRects = groupAdjacentRects(afterWords.map((w) => w.rect));
|
||||
|
||||
if (type === 'modified') {
|
||||
changes.push({
|
||||
id,
|
||||
type,
|
||||
description: `Replaced "${beforeText}" with "${afterText}"`,
|
||||
beforeText,
|
||||
afterText,
|
||||
beforeRects,
|
||||
afterRects,
|
||||
});
|
||||
} else if (type === 'removed') {
|
||||
changes.push({
|
||||
id,
|
||||
type,
|
||||
description: `Removed "${beforeText}"`,
|
||||
beforeText,
|
||||
afterText: '',
|
||||
beforeRects,
|
||||
afterRects: [],
|
||||
});
|
||||
} else {
|
||||
changes.push({
|
||||
id,
|
||||
type,
|
||||
description: `Added "${afterText}"`,
|
||||
beforeText: '',
|
||||
afterText,
|
||||
beforeRects: [],
|
||||
afterRects,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function toSummary(changes: CompareTextChange[]): CompareChangeSummary {
|
||||
return changes.reduce(
|
||||
(summary, change) => {
|
||||
if (change.type === 'added') summary.added += 1;
|
||||
if (change.type === 'removed') summary.removed += 1;
|
||||
if (change.type === 'modified') summary.modified += 1;
|
||||
return summary;
|
||||
},
|
||||
{ added: 0, removed: 0, modified: 0 }
|
||||
);
|
||||
}
|
||||
|
||||
export function diffTextRuns(
|
||||
beforeItems: CompareTextItem[],
|
||||
afterItems: CompareTextItem[]
|
||||
) {
|
||||
const beforeWords = beforeItems.flatMap(splitLineIntoWords);
|
||||
const afterWords = afterItems.flatMap(splitLineIntoWords);
|
||||
|
||||
const rawChanges = diffArrays(
|
||||
beforeWords.map((w) => w.compareWord),
|
||||
afterWords.map((w) => w.compareWord)
|
||||
);
|
||||
|
||||
const changes: CompareTextChange[] = [];
|
||||
let beforeIndex = 0;
|
||||
let afterIndex = 0;
|
||||
|
||||
for (let i = 0; i < rawChanges.length; i++) {
|
||||
const change = rawChanges[i];
|
||||
const count = change.value.length;
|
||||
|
||||
if (change.removed) {
|
||||
const removedTokens = beforeWords.slice(beforeIndex, beforeIndex + count);
|
||||
beforeIndex += count;
|
||||
|
||||
const next = rawChanges[i + 1];
|
||||
if (next?.added) {
|
||||
const addedTokens = afterWords.slice(
|
||||
afterIndex,
|
||||
afterIndex + next.value.length
|
||||
);
|
||||
afterIndex += next.value.length;
|
||||
if (areEquivalentIgnoringWordBreaks(removedTokens, addedTokens)) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
createWordChange(changes, 'modified', removedTokens, addedTokens);
|
||||
i++;
|
||||
} else {
|
||||
createWordChange(changes, 'removed', removedTokens, []);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (change.added) {
|
||||
const addedTokens = afterWords.slice(afterIndex, afterIndex + count);
|
||||
afterIndex += count;
|
||||
createWordChange(changes, 'added', [], addedTokens);
|
||||
continue;
|
||||
}
|
||||
|
||||
beforeIndex += count;
|
||||
afterIndex += count;
|
||||
}
|
||||
|
||||
return { changes, summary: toSummary(changes) };
|
||||
}
|
||||
520
src/js/compare/engine/extract-page-model.ts
Normal file
520
src/js/compare/engine/extract-page-model.ts
Normal file
@@ -0,0 +1,520 @@
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
import type {
|
||||
ComparePageModel,
|
||||
CompareTextItem,
|
||||
CharPosition,
|
||||
CompareWordToken,
|
||||
} from '../types.ts';
|
||||
import {
|
||||
joinCompareTextItems,
|
||||
normalizeCompareText,
|
||||
} from './text-normalization.ts';
|
||||
|
||||
type PageTextItem = {
|
||||
str: string;
|
||||
width: number;
|
||||
height: number;
|
||||
transform: number[];
|
||||
dir: string;
|
||||
fontName: string;
|
||||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
type TextStyles = Record<string, { fontFamily?: string }>;
|
||||
|
||||
const measurementCanvas =
|
||||
typeof document !== 'undefined' ? document.createElement('canvas') : null;
|
||||
const measurementContext = measurementCanvas
|
||||
? measurementCanvas.getContext('2d')
|
||||
: null;
|
||||
const textMeasurementCache: Map<string, number> | null = measurementContext
|
||||
? new Map()
|
||||
: null;
|
||||
let lastMeasurementFont = '';
|
||||
|
||||
const DEFAULT_CHAR_WIDTH = 1;
|
||||
const DEFAULT_SPACE_WIDTH = 0.33;
|
||||
|
||||
function shouldJoinTokenWithPrevious(previous: string, current: string) {
|
||||
if (!previous) return false;
|
||||
if (/^[,.;:!?%)\]}]/.test(current)) return true;
|
||||
if (/^[''"'’”]/u.test(current)) return true;
|
||||
if (/[([{/"'“‘-]$/u.test(previous)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
function measureTextWidth(fontSpec: string, text: string): number {
|
||||
if (!measurementContext) {
|
||||
if (!text) return 0;
|
||||
if (text === ' ') return DEFAULT_SPACE_WIDTH;
|
||||
return text.length * DEFAULT_CHAR_WIDTH;
|
||||
}
|
||||
|
||||
if (lastMeasurementFont !== fontSpec) {
|
||||
measurementContext.font = fontSpec;
|
||||
lastMeasurementFont = fontSpec;
|
||||
}
|
||||
|
||||
const key = `${fontSpec}|${text}`;
|
||||
const cached = textMeasurementCache?.get(key);
|
||||
if (cached !== undefined) {
|
||||
return cached;
|
||||
}
|
||||
|
||||
const width = measurementContext.measureText(text).width || 0;
|
||||
textMeasurementCache?.set(key, width);
|
||||
return width;
|
||||
}
|
||||
|
||||
function buildItemWordTokens(
|
||||
viewport: pdfjsLib.PageViewport,
|
||||
item: PageTextItem,
|
||||
fallbackRect: CompareTextItem['rect'],
|
||||
styles: TextStyles
|
||||
): CompareWordToken[] {
|
||||
const rawText = item.str || '';
|
||||
if (!rawText.trim()) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const totalLen = Math.max(rawText.length, 1);
|
||||
const textStyle = item.fontName ? styles[item.fontName] : undefined;
|
||||
const fontFamily = textStyle?.fontFamily ?? 'sans-serif';
|
||||
const fontScale = Math.max(
|
||||
0.5,
|
||||
Math.hypot(item.transform[0], item.transform[1]) || 0
|
||||
);
|
||||
const fontSpec = `${fontScale}px ${fontFamily}`;
|
||||
|
||||
const weights: number[] = new Array(totalLen);
|
||||
let runningText = '';
|
||||
let previousAdvance = 0;
|
||||
for (let index = 0; index < totalLen; index += 1) {
|
||||
runningText += rawText[index];
|
||||
const advance = measureTextWidth(fontSpec, runningText);
|
||||
let width = advance - previousAdvance;
|
||||
if (!Number.isFinite(width) || width <= 0) {
|
||||
width = rawText[index] === ' ' ? DEFAULT_SPACE_WIDTH : DEFAULT_CHAR_WIDTH;
|
||||
}
|
||||
weights[index] = width;
|
||||
previousAdvance = advance;
|
||||
}
|
||||
|
||||
if (!Number.isFinite(previousAdvance) || previousAdvance <= 0) {
|
||||
for (let index = 0; index < totalLen; index += 1) {
|
||||
weights[index] =
|
||||
rawText[index] === ' ' ? DEFAULT_SPACE_WIDTH : DEFAULT_CHAR_WIDTH;
|
||||
}
|
||||
}
|
||||
|
||||
const prefix: number[] = new Array(totalLen + 1);
|
||||
prefix[0] = 0;
|
||||
for (let index = 0; index < totalLen; index += 1) {
|
||||
prefix[index + 1] = prefix[index] + weights[index];
|
||||
}
|
||||
const totalWeight = prefix[totalLen] || 1;
|
||||
|
||||
const rawX = item.transform[4];
|
||||
const rawY = item.transform[5];
|
||||
const transformed = [
|
||||
viewport.convertToViewportPoint(rawX, rawY),
|
||||
viewport.convertToViewportPoint(rawX + item.width, rawY),
|
||||
viewport.convertToViewportPoint(rawX, rawY + item.height),
|
||||
viewport.convertToViewportPoint(rawX + item.width, rawY + item.height),
|
||||
];
|
||||
const xs = transformed.map(([x]) => x);
|
||||
const ys = transformed.map(([, y]) => y);
|
||||
const left = Math.min(...xs);
|
||||
const right = Math.max(...xs);
|
||||
const top = Math.min(...ys);
|
||||
const bottom = Math.max(...ys);
|
||||
|
||||
const [baselineStart, baselineEnd, verticalEnd] = transformed;
|
||||
const baselineVector: [number, number] = [
|
||||
baselineEnd[0] - baselineStart[0],
|
||||
baselineEnd[1] - baselineStart[1],
|
||||
];
|
||||
const verticalVector: [number, number] = [
|
||||
verticalEnd[0] - baselineStart[0],
|
||||
verticalEnd[1] - baselineStart[1],
|
||||
];
|
||||
const hasOrientationVectors =
|
||||
Math.hypot(baselineVector[0], baselineVector[1]) > 1e-6 &&
|
||||
Math.hypot(verticalVector[0], verticalVector[1]) > 1e-6;
|
||||
|
||||
const tokens: CompareWordToken[] = [];
|
||||
const wordRegex = /\S+/gu;
|
||||
let match: RegExpExecArray | null;
|
||||
let previousEnd = 0;
|
||||
|
||||
while ((match = wordRegex.exec(rawText)) !== null) {
|
||||
const tokenText = match[0];
|
||||
const normalizedWord = normalizeCompareText(tokenText);
|
||||
if (!normalizedWord) {
|
||||
previousEnd = match.index + tokenText.length;
|
||||
continue;
|
||||
}
|
||||
|
||||
const startIndex = match.index;
|
||||
const endIndex = startIndex + tokenText.length;
|
||||
const relStart = prefix[startIndex] / totalWeight;
|
||||
const relEnd = prefix[endIndex] / totalWeight;
|
||||
|
||||
let wordLeft: number;
|
||||
let wordRight: number;
|
||||
let wordTop: number;
|
||||
let wordBottom: number;
|
||||
|
||||
if (hasOrientationVectors) {
|
||||
const segStart: [number, number] = [
|
||||
baselineStart[0] + baselineVector[0] * relStart,
|
||||
baselineStart[1] + baselineVector[1] * relStart,
|
||||
];
|
||||
const segEnd: [number, number] = [
|
||||
baselineStart[0] + baselineVector[0] * relEnd,
|
||||
baselineStart[1] + baselineVector[1] * relEnd,
|
||||
];
|
||||
const cornerPoints: Array<[number, number]> = [
|
||||
segStart,
|
||||
[segStart[0] + verticalVector[0], segStart[1] + verticalVector[1]],
|
||||
[segEnd[0] + verticalVector[0], segEnd[1] + verticalVector[1]],
|
||||
segEnd,
|
||||
];
|
||||
wordLeft = Math.min(...cornerPoints.map(([x]) => x));
|
||||
wordRight = Math.max(...cornerPoints.map(([x]) => x));
|
||||
wordTop = Math.min(...cornerPoints.map(([, y]) => y));
|
||||
wordBottom = Math.max(...cornerPoints.map(([, y]) => y));
|
||||
} else {
|
||||
const segLeft = left + (right - left) * relStart;
|
||||
const segRight = left + (right - left) * relEnd;
|
||||
wordLeft = Math.min(segLeft, segRight);
|
||||
wordRight = Math.max(segLeft, segRight);
|
||||
wordTop = top;
|
||||
wordBottom = bottom;
|
||||
}
|
||||
|
||||
const width = Math.max(wordRight - wordLeft, 1);
|
||||
const height = Math.max(wordBottom - wordTop, fallbackRect.height);
|
||||
const gapText = rawText.slice(previousEnd, startIndex);
|
||||
|
||||
const previousToken = tokens[tokens.length - 1];
|
||||
|
||||
tokens.push({
|
||||
word: normalizedWord,
|
||||
compareWord: normalizedWord.toLowerCase(),
|
||||
rect: {
|
||||
x: Number.isFinite(wordLeft) ? wordLeft : fallbackRect.x,
|
||||
y: Number.isFinite(wordTop) ? wordTop : fallbackRect.y,
|
||||
width,
|
||||
height,
|
||||
},
|
||||
joinsWithPrevious:
|
||||
(gapText.length > 0 && !/\s/u.test(gapText)) ||
|
||||
(previousToken
|
||||
? shouldJoinTokenWithPrevious(previousToken.word, normalizedWord)
|
||||
: false),
|
||||
});
|
||||
|
||||
previousEnd = endIndex;
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
function toRect(
|
||||
viewport: pdfjsLib.PageViewport,
|
||||
item: PageTextItem,
|
||||
index: number,
|
||||
styles: TextStyles
|
||||
) {
|
||||
const normalizedText = normalizeCompareText(item.str);
|
||||
|
||||
const transformed = pdfjsLib.Util.transform(
|
||||
viewport.transform,
|
||||
item.transform
|
||||
);
|
||||
const width = Math.max(item.width * viewport.scale, 1);
|
||||
const height = Math.max(
|
||||
Math.abs(transformed[3]) || item.height * viewport.scale,
|
||||
1
|
||||
);
|
||||
const x = transformed[4];
|
||||
const y = transformed[5] - height;
|
||||
|
||||
const rect = {
|
||||
x,
|
||||
y,
|
||||
width,
|
||||
height,
|
||||
};
|
||||
|
||||
return {
|
||||
id: `${index}-${normalizedText}`,
|
||||
text: item.str,
|
||||
normalizedText,
|
||||
rect,
|
||||
wordTokens: buildItemWordTokens(viewport, item, rect, styles),
|
||||
} satisfies CompareTextItem;
|
||||
}
|
||||
|
||||
export function sortCompareTextItems(items: CompareTextItem[]) {
|
||||
return [...items].sort((left, right) => {
|
||||
const lineTolerance = Math.max(
|
||||
Math.min(left.rect.height, right.rect.height) * 0.6,
|
||||
4
|
||||
);
|
||||
const topDiff = left.rect.y - right.rect.y;
|
||||
|
||||
if (Math.abs(topDiff) > lineTolerance) {
|
||||
return topDiff;
|
||||
}
|
||||
|
||||
const xDiff = left.rect.x - right.rect.x;
|
||||
if (Math.abs(xDiff) > 1) {
|
||||
return xDiff;
|
||||
}
|
||||
|
||||
return left.id.localeCompare(right.id);
|
||||
});
|
||||
}
|
||||
|
||||
function averageCharacterWidth(item: CompareTextItem) {
|
||||
const compactText = item.normalizedText.replace(/\s+/g, '');
|
||||
return item.rect.width / Math.max(compactText.length, 1);
|
||||
}
|
||||
|
||||
function shouldInsertSpaceBetweenItems(
|
||||
left: CompareTextItem,
|
||||
right: CompareTextItem
|
||||
) {
|
||||
if (!left.normalizedText || !right.normalizedText) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/^[,.;:!?%)\]}]/.test(right.normalizedText)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/^[''"'’”]/u.test(right.normalizedText)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (/[([{/"'“‘-]$/u.test(left.normalizedText)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const gap = right.rect.x - (left.rect.x + left.rect.width);
|
||||
if (gap <= 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const leftWidth = averageCharacterWidth(left);
|
||||
const rightWidth = averageCharacterWidth(right);
|
||||
const threshold = Math.max(Math.min(leftWidth, rightWidth) * 0.45, 1.5);
|
||||
|
||||
return gap >= threshold;
|
||||
}
|
||||
|
||||
function mergeLineText(lineItems: CompareTextItem[]): {
|
||||
text: string;
|
||||
charMap: CharPosition[];
|
||||
} {
|
||||
if (lineItems.length === 0) {
|
||||
return { text: '', charMap: [] };
|
||||
}
|
||||
|
||||
const charMap: CharPosition[] = [];
|
||||
|
||||
function pushFragChars(frag: CompareTextItem) {
|
||||
const fragText = frag.normalizedText;
|
||||
const fragCharWidth = frag.rect.width / Math.max(fragText.length, 1);
|
||||
for (let ci = 0; ci < fragText.length; ci++) {
|
||||
charMap.push({
|
||||
x: frag.rect.x + ci * fragCharWidth,
|
||||
width: fragCharWidth,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let merged = lineItems[0].normalizedText;
|
||||
pushFragChars(lineItems[0]);
|
||||
|
||||
for (let index = 1; index < lineItems.length; index += 1) {
|
||||
const previous = lineItems[index - 1];
|
||||
const current = lineItems[index];
|
||||
|
||||
if (shouldInsertSpaceBetweenItems(previous, current)) {
|
||||
const gap = current.rect.x - (previous.rect.x + previous.rect.width);
|
||||
charMap.push({
|
||||
x: previous.rect.x + previous.rect.width,
|
||||
width: Math.max(gap, 1),
|
||||
});
|
||||
merged += ` ${current.normalizedText}`;
|
||||
} else {
|
||||
merged += current.normalizedText;
|
||||
}
|
||||
pushFragChars(current);
|
||||
}
|
||||
|
||||
return { text: normalizeCompareText(merged), charMap };
|
||||
}
|
||||
|
||||
function mergeWordTokenRects(
|
||||
left: CompareWordToken,
|
||||
right: CompareWordToken
|
||||
): CompareWordToken {
|
||||
const minX = Math.min(left.rect.x, right.rect.x);
|
||||
const minY = Math.min(left.rect.y, right.rect.y);
|
||||
const maxX = Math.max(
|
||||
left.rect.x + left.rect.width,
|
||||
right.rect.x + right.rect.width
|
||||
);
|
||||
const maxY = Math.max(
|
||||
left.rect.y + left.rect.height,
|
||||
right.rect.y + right.rect.height
|
||||
);
|
||||
|
||||
return {
|
||||
word: `${left.word}${right.word}`,
|
||||
compareWord: `${left.compareWord}${right.compareWord}`,
|
||||
rect: {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX,
|
||||
height: maxY - minY,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function buildMergedWordTokens(lineItems: CompareTextItem[]) {
|
||||
if (
|
||||
!lineItems.some((item) => item.wordTokens && item.wordTokens.length > 0)
|
||||
) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const mergedTokens: CompareWordToken[] = [];
|
||||
let previousItem: CompareTextItem | null = null;
|
||||
|
||||
for (const item of lineItems) {
|
||||
const itemTokens =
|
||||
item.wordTokens && item.wordTokens.length > 0
|
||||
? item.wordTokens
|
||||
: [
|
||||
{
|
||||
word: item.normalizedText,
|
||||
compareWord: item.normalizedText.toLowerCase(),
|
||||
rect: item.rect,
|
||||
} satisfies CompareWordToken,
|
||||
];
|
||||
|
||||
itemTokens.forEach((token, tokenIndex) => {
|
||||
const joinsAcrossItems =
|
||||
tokenIndex === 0 && previousItem
|
||||
? !shouldInsertSpaceBetweenItems(previousItem, item)
|
||||
: false;
|
||||
const shouldJoin =
|
||||
mergedTokens.length > 0 &&
|
||||
(tokenIndex > 0 ? Boolean(token.joinsWithPrevious) : joinsAcrossItems);
|
||||
|
||||
if (shouldJoin) {
|
||||
mergedTokens[mergedTokens.length - 1] = mergeWordTokenRects(
|
||||
mergedTokens[mergedTokens.length - 1],
|
||||
token
|
||||
);
|
||||
} else {
|
||||
mergedTokens.push({
|
||||
word: token.word,
|
||||
compareWord: token.compareWord,
|
||||
rect: token.rect,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
previousItem = item;
|
||||
}
|
||||
|
||||
return mergedTokens;
|
||||
}
|
||||
|
||||
export function mergeIntoLines(
|
||||
sortedItems: CompareTextItem[]
|
||||
): CompareTextItem[] {
|
||||
if (sortedItems.length === 0) return [];
|
||||
|
||||
const lines: CompareTextItem[][] = [];
|
||||
let currentLine: CompareTextItem[] = [sortedItems[0]];
|
||||
|
||||
for (let i = 1; i < sortedItems.length; i++) {
|
||||
const anchor = currentLine[0];
|
||||
const curr = sortedItems[i];
|
||||
const lineTolerance = Math.max(
|
||||
Math.min(anchor.rect.height, curr.rect.height) * 0.6,
|
||||
4
|
||||
);
|
||||
|
||||
if (Math.abs(curr.rect.y - anchor.rect.y) <= lineTolerance) {
|
||||
currentLine.push(curr);
|
||||
} else {
|
||||
lines.push(currentLine);
|
||||
currentLine = [curr];
|
||||
}
|
||||
}
|
||||
lines.push(currentLine);
|
||||
|
||||
return lines.map((lineItems, lineIndex) => {
|
||||
const { text: normalizedText, charMap } = mergeLineText(lineItems);
|
||||
|
||||
const minX = Math.min(...lineItems.map((item) => item.rect.x));
|
||||
const minY = Math.min(...lineItems.map((item) => item.rect.y));
|
||||
const maxX = Math.max(
|
||||
...lineItems.map((item) => item.rect.x + item.rect.width)
|
||||
);
|
||||
const maxY = Math.max(
|
||||
...lineItems.map((item) => item.rect.y + item.rect.height)
|
||||
);
|
||||
|
||||
return {
|
||||
id: `line-${lineIndex}`,
|
||||
text: lineItems.map((item) => item.text).join(' '),
|
||||
normalizedText,
|
||||
rect: {
|
||||
x: minX,
|
||||
y: minY,
|
||||
width: maxX - minX,
|
||||
height: maxY - minY,
|
||||
},
|
||||
fragments: lineItems,
|
||||
charMap,
|
||||
wordTokens: buildMergedWordTokens(lineItems),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
export async function extractPageModel(
|
||||
page: pdfjsLib.PDFPageProxy,
|
||||
viewport: pdfjsLib.PageViewport
|
||||
): Promise<ComparePageModel> {
|
||||
const textContent = await page.getTextContent({
|
||||
disableCombineTextItems: true,
|
||||
});
|
||||
const styles = textContent.styles ?? {};
|
||||
const rawItems = sortCompareTextItems(
|
||||
textContent.items
|
||||
.filter((item): item is PageTextItem => 'str' in item)
|
||||
.map((item, index) => toRect(viewport, item, index, styles))
|
||||
.filter((item) => item.normalizedText.length > 0)
|
||||
);
|
||||
const textItems = mergeIntoLines(rawItems);
|
||||
|
||||
return {
|
||||
pageNumber: page.pageNumber,
|
||||
width: viewport.width,
|
||||
height: viewport.height,
|
||||
textItems,
|
||||
plainText: joinCompareTextItems(textItems),
|
||||
hasText: textItems.length > 0,
|
||||
source: 'pdfjs',
|
||||
};
|
||||
}
|
||||
76
src/js/compare/engine/ocr-page.ts
Normal file
76
src/js/compare/engine/ocr-page.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import Tesseract from 'tesseract.js';
|
||||
|
||||
import type { ComparePageModel, CompareTextItem } from '../types.ts';
|
||||
import { mergeIntoLines, sortCompareTextItems } from './extract-page-model.ts';
|
||||
import {
|
||||
joinCompareTextItems,
|
||||
normalizeCompareText,
|
||||
} from './text-normalization.ts';
|
||||
|
||||
type OcrWord = {
|
||||
text: string;
|
||||
bbox: {
|
||||
x0: number;
|
||||
y0: number;
|
||||
x1: number;
|
||||
y1: number;
|
||||
};
|
||||
};
|
||||
|
||||
export async function recognizePageCanvas(
|
||||
canvas: HTMLCanvasElement,
|
||||
language: string,
|
||||
onProgress?: (status: string, progress: number) => void
|
||||
): Promise<ComparePageModel> {
|
||||
const result = await Tesseract.recognize(canvas, language, {
|
||||
logger(message) {
|
||||
onProgress?.(message.status, message.progress || 0);
|
||||
},
|
||||
});
|
||||
|
||||
const ocrData = result.data as unknown as { words?: OcrWord[] };
|
||||
const words = ((ocrData.words || []) as OcrWord[])
|
||||
.map((word, index) => {
|
||||
const normalizedText = normalizeCompareText(word.text || '');
|
||||
if (!normalizedText) return null;
|
||||
|
||||
const item: CompareTextItem = {
|
||||
id: `ocr-${index}-${normalizedText}`,
|
||||
text: word.text,
|
||||
normalizedText,
|
||||
rect: {
|
||||
x: word.bbox.x0,
|
||||
y: word.bbox.y0,
|
||||
width: Math.max(word.bbox.x1 - word.bbox.x0, 1),
|
||||
height: Math.max(word.bbox.y1 - word.bbox.y0, 1),
|
||||
},
|
||||
wordTokens: [
|
||||
{
|
||||
word: normalizedText,
|
||||
compareWord: normalizedText.toLowerCase(),
|
||||
rect: {
|
||||
x: word.bbox.x0,
|
||||
y: word.bbox.y0,
|
||||
width: Math.max(word.bbox.x1 - word.bbox.x0, 1),
|
||||
height: Math.max(word.bbox.y1 - word.bbox.y0, 1),
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
return item;
|
||||
})
|
||||
.filter((word): word is CompareTextItem => Boolean(word));
|
||||
|
||||
const mergedItems = mergeIntoLines(sortCompareTextItems(words));
|
||||
|
||||
return {
|
||||
pageNumber: 0,
|
||||
width: canvas.width,
|
||||
height: canvas.height,
|
||||
textItems: mergedItems,
|
||||
plainText: joinCompareTextItems(mergedItems),
|
||||
hasText: mergedItems.length > 0,
|
||||
source: 'ocr',
|
||||
};
|
||||
}
|
||||
61
src/js/compare/engine/page-signatures.ts
Normal file
61
src/js/compare/engine/page-signatures.ts
Normal file
@@ -0,0 +1,61 @@
|
||||
import * as pdfjsLib from 'pdfjs-dist';
|
||||
|
||||
import type { ComparePageSignature, CompareTextItem } from '../types.ts';
|
||||
import {
|
||||
joinNormalizedText,
|
||||
normalizeCompareText,
|
||||
} from './text-normalization.ts';
|
||||
|
||||
type SignatureTextItem = {
|
||||
str: string;
|
||||
dir: string;
|
||||
transform: number[];
|
||||
width: number;
|
||||
height: number;
|
||||
fontName: string;
|
||||
hasEOL: boolean;
|
||||
};
|
||||
|
||||
function tokenToItem(token: string, index: number): CompareTextItem {
|
||||
return {
|
||||
id: `token-${index}-${token}`,
|
||||
text: token,
|
||||
normalizedText: token,
|
||||
rect: { x: 0, y: 0, width: 0, height: 0 },
|
||||
};
|
||||
}
|
||||
|
||||
export async function extractPageSignature(
|
||||
pdfDoc: pdfjsLib.PDFDocumentProxy,
|
||||
pageNumber: number
|
||||
): Promise<ComparePageSignature> {
|
||||
const page = await pdfDoc.getPage(pageNumber);
|
||||
const textContent = await page.getTextContent();
|
||||
const tokens = textContent.items
|
||||
.filter((item): item is SignatureTextItem => 'str' in item)
|
||||
.map((item) => normalizeCompareText(item.str))
|
||||
.filter(Boolean);
|
||||
|
||||
const limitedTokens = tokens.slice(0, 500);
|
||||
|
||||
return {
|
||||
pageNumber,
|
||||
plainText: joinNormalizedText(limitedTokens),
|
||||
hasText: limitedTokens.length > 0,
|
||||
tokenItems: limitedTokens.map((token, index) => tokenToItem(token, index)),
|
||||
};
|
||||
}
|
||||
|
||||
export async function extractDocumentSignatures(
|
||||
pdfDoc: pdfjsLib.PDFDocumentProxy,
|
||||
onProgress?: (pageNumber: number, totalPages: number) => void
|
||||
) {
|
||||
const signatures: ComparePageSignature[] = [];
|
||||
|
||||
for (let pageNumber = 1; pageNumber <= pdfDoc.numPages; pageNumber += 1) {
|
||||
onProgress?.(pageNumber, pdfDoc.numPages);
|
||||
signatures.push(await extractPageSignature(pdfDoc, pageNumber));
|
||||
}
|
||||
|
||||
return signatures;
|
||||
}
|
||||
122
src/js/compare/engine/pair-pages.ts
Normal file
122
src/js/compare/engine/pair-pages.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
import type { ComparePagePair, ComparePageSignature } from '../types.ts';
|
||||
|
||||
function tokenize(text: string) {
|
||||
return new Set(text.split(/\s+/).filter(Boolean));
|
||||
}
|
||||
|
||||
function similarityScore(
|
||||
left: ComparePageSignature,
|
||||
right: ComparePageSignature
|
||||
) {
|
||||
if (!left.hasText && !right.hasText) {
|
||||
return left.pageNumber === right.pageNumber ? 0.7 : 0.35;
|
||||
}
|
||||
|
||||
if (!left.hasText || !right.hasText) {
|
||||
return 0.08;
|
||||
}
|
||||
|
||||
const leftTokens = tokenize(left.plainText);
|
||||
const rightTokens = tokenize(right.plainText);
|
||||
const union = new Set([...leftTokens, ...rightTokens]);
|
||||
let intersectionCount = 0;
|
||||
|
||||
leftTokens.forEach((token) => {
|
||||
if (rightTokens.has(token)) intersectionCount += 1;
|
||||
});
|
||||
|
||||
const jaccard = union.size === 0 ? 0 : intersectionCount / union.size;
|
||||
const positionalBias = left.pageNumber === right.pageNumber ? 0.1 : 0;
|
||||
return Math.min(jaccard + positionalBias, 1);
|
||||
}
|
||||
|
||||
export function pairPages(
|
||||
leftPages: ComparePageSignature[],
|
||||
rightPages: ComparePageSignature[]
|
||||
) {
|
||||
const insertionCost = 0.8;
|
||||
const rowCount = leftPages.length + 1;
|
||||
const colCount = rightPages.length + 1;
|
||||
const dp = Array.from({ length: rowCount }, () =>
|
||||
Array<number>(colCount).fill(0)
|
||||
);
|
||||
const backtrack = Array.from({ length: rowCount }, () =>
|
||||
Array<'match' | 'left' | 'right'>(colCount).fill('match')
|
||||
);
|
||||
|
||||
for (let i = 1; i < rowCount; i += 1) {
|
||||
dp[i][0] = i * insertionCost;
|
||||
backtrack[i][0] = 'left';
|
||||
}
|
||||
|
||||
for (let j = 1; j < colCount; j += 1) {
|
||||
dp[0][j] = j * insertionCost;
|
||||
backtrack[0][j] = 'right';
|
||||
}
|
||||
|
||||
for (let i = 1; i < rowCount; i += 1) {
|
||||
for (let j = 1; j < colCount; j += 1) {
|
||||
const similarity = similarityScore(leftPages[i - 1], rightPages[j - 1]);
|
||||
const matchCost = dp[i - 1][j - 1] + (1 - similarity);
|
||||
const leftCost = dp[i - 1][j] + insertionCost;
|
||||
const rightCost = dp[i][j - 1] + insertionCost;
|
||||
|
||||
const minCost = Math.min(matchCost, leftCost, rightCost);
|
||||
dp[i][j] = minCost;
|
||||
|
||||
if (minCost === matchCost) {
|
||||
backtrack[i][j] = 'match';
|
||||
} else if (minCost === leftCost) {
|
||||
backtrack[i][j] = 'left';
|
||||
} else {
|
||||
backtrack[i][j] = 'right';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const pairs: ComparePagePair[] = [];
|
||||
let i = leftPages.length;
|
||||
let j = rightPages.length;
|
||||
|
||||
while (i > 0 || j > 0) {
|
||||
const direction = backtrack[i][j];
|
||||
|
||||
if (i > 0 && j > 0 && direction === 'match') {
|
||||
const confidence = similarityScore(leftPages[i - 1], rightPages[j - 1]);
|
||||
pairs.push({
|
||||
pairIndex: 0,
|
||||
leftPageNumber: leftPages[i - 1].pageNumber,
|
||||
rightPageNumber: rightPages[j - 1].pageNumber,
|
||||
confidence,
|
||||
});
|
||||
i -= 1;
|
||||
j -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i > 0 && (j === 0 || direction === 'left')) {
|
||||
pairs.push({
|
||||
pairIndex: 0,
|
||||
leftPageNumber: leftPages[i - 1].pageNumber,
|
||||
rightPageNumber: null,
|
||||
confidence: 0,
|
||||
});
|
||||
i -= 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (j > 0) {
|
||||
pairs.push({
|
||||
pairIndex: 0,
|
||||
leftPageNumber: null,
|
||||
rightPageNumber: rightPages[j - 1].pageNumber,
|
||||
confidence: 0,
|
||||
});
|
||||
j -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
return pairs
|
||||
.reverse()
|
||||
.map((pair, index) => ({ ...pair, pairIndex: index + 1 }));
|
||||
}
|
||||
64
src/js/compare/engine/text-normalization.ts
Normal file
64
src/js/compare/engine/text-normalization.ts
Normal file
@@ -0,0 +1,64 @@
|
||||
import type { CompareTextItem } from '../types.ts';
|
||||
|
||||
export function normalizeCompareText(text: string) {
|
||||
return text
|
||||
.normalize('NFKC')
|
||||
.replace(/[\u0000-\u001F\u007F-\u009F]/g, ' ')
|
||||
.replace(/[\u{E000}-\u{F8FF}]/gu, ' ')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function shouldAppendWithoutSpace(current: string, next: string) {
|
||||
if (!current) return true;
|
||||
if (/^[,.;:!?%)\]}]/.test(next)) return true;
|
||||
if (/^["']$/.test(next)) return true;
|
||||
if (/^['’”]/u.test(next)) return true;
|
||||
if (/[([{/"'“‘-]$/u.test(current)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
export function joinNormalizedText(tokens: string[]) {
|
||||
return tokens.reduce((result, token) => {
|
||||
if (!token) return result;
|
||||
if (shouldAppendWithoutSpace(result, token)) {
|
||||
return `${result}${token}`;
|
||||
}
|
||||
return `${result} ${token}`;
|
||||
}, '');
|
||||
}
|
||||
|
||||
export function joinCompareTextItems(items: CompareTextItem[]) {
|
||||
return joinNormalizedText(items.map((item) => item.normalizedText));
|
||||
}
|
||||
|
||||
export function isLowQualityExtractedText(text: string) {
|
||||
const normalized = normalizeCompareText(text);
|
||||
if (!normalized) return true;
|
||||
|
||||
const tokens = normalized.split(/\s+/).filter(Boolean);
|
||||
const visibleCharacters = Array.from(normalized).filter(
|
||||
(character) => character.trim().length > 0
|
||||
);
|
||||
const alphaNumericCount = visibleCharacters.filter((character) =>
|
||||
/[\p{L}\p{N}]/u.test(character)
|
||||
).length;
|
||||
const symbolCount = visibleCharacters.length - alphaNumericCount;
|
||||
const tokenWithAlphaNumericCount = tokens.filter((token) =>
|
||||
/[\p{L}\p{N}]/u.test(token)
|
||||
).length;
|
||||
|
||||
if (alphaNumericCount === 0) return true;
|
||||
if (
|
||||
visibleCharacters.length >= 12 &&
|
||||
alphaNumericCount / visibleCharacters.length < 0.45 &&
|
||||
symbolCount / visibleCharacters.length > 0.35
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (tokens.length >= 6 && tokenWithAlphaNumericCount / tokens.length < 0.6) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
134
src/js/compare/engine/visual-diff.ts
Normal file
134
src/js/compare/engine/visual-diff.ts
Normal file
@@ -0,0 +1,134 @@
|
||||
import pixelmatch from 'pixelmatch';
|
||||
|
||||
import type { CompareVisualDiff } from '../types.ts';
|
||||
|
||||
type FocusRegion = {
|
||||
x: number;
|
||||
y: number;
|
||||
width: number;
|
||||
height: number;
|
||||
};
|
||||
|
||||
function createCanvas(width: number, height: number) {
|
||||
const canvas = document.createElement('canvas');
|
||||
canvas.width = width;
|
||||
canvas.height = height;
|
||||
return canvas;
|
||||
}
|
||||
|
||||
function drawNormalized(
|
||||
sourceCanvas: HTMLCanvasElement,
|
||||
targetCanvas: HTMLCanvasElement
|
||||
) {
|
||||
const context = targetCanvas.getContext('2d');
|
||||
if (!context) {
|
||||
throw new Error('Could not create comparison canvas context.');
|
||||
}
|
||||
|
||||
context.fillStyle = '#ffffff';
|
||||
context.fillRect(0, 0, targetCanvas.width, targetCanvas.height);
|
||||
|
||||
const offsetX = Math.floor((targetCanvas.width - sourceCanvas.width) / 2);
|
||||
const offsetY = Math.floor((targetCanvas.height - sourceCanvas.height) / 2);
|
||||
context.drawImage(sourceCanvas, offsetX, offsetY);
|
||||
}
|
||||
|
||||
export function renderVisualDiff(
|
||||
canvas1: HTMLCanvasElement,
|
||||
canvas2: HTMLCanvasElement,
|
||||
outputCanvas: HTMLCanvasElement,
|
||||
focusRegion?: FocusRegion
|
||||
): CompareVisualDiff {
|
||||
const width = Math.max(canvas1.width, canvas2.width, 1);
|
||||
const height = Math.max(canvas1.height, canvas2.height, 1);
|
||||
const normalizedCanvas1 = createCanvas(width, height);
|
||||
const normalizedCanvas2 = createCanvas(width, height);
|
||||
|
||||
drawNormalized(canvas1, normalizedCanvas1);
|
||||
drawNormalized(canvas2, normalizedCanvas2);
|
||||
|
||||
outputCanvas.width = width;
|
||||
outputCanvas.height = height;
|
||||
|
||||
const context1 = normalizedCanvas1.getContext('2d');
|
||||
const context2 = normalizedCanvas2.getContext('2d');
|
||||
const outputContext = outputCanvas.getContext('2d');
|
||||
|
||||
if (!context1 || !context2 || !outputContext) {
|
||||
throw new Error('Could not create visual diff context.');
|
||||
}
|
||||
|
||||
const image1 = context1.getImageData(0, 0, width, height);
|
||||
const image2 = context2.getImageData(0, 0, width, height);
|
||||
const diffImage = outputContext.createImageData(width, height);
|
||||
|
||||
const mismatchPixels = pixelmatch(
|
||||
image1.data,
|
||||
image2.data,
|
||||
diffImage.data,
|
||||
width,
|
||||
height,
|
||||
{
|
||||
threshold: 0.12,
|
||||
includeAA: false,
|
||||
alpha: 0.2,
|
||||
diffMask: false,
|
||||
diffColor: [239, 68, 68],
|
||||
diffColorAlt: [34, 197, 94],
|
||||
}
|
||||
);
|
||||
|
||||
const overlayCanvas = createCanvas(width, height);
|
||||
const overlayContext = overlayCanvas.getContext('2d');
|
||||
|
||||
if (!overlayContext) {
|
||||
throw new Error('Could not create visual diff overlay context.');
|
||||
}
|
||||
|
||||
overlayContext.putImageData(diffImage, 0, 0);
|
||||
|
||||
const region = focusRegion
|
||||
? {
|
||||
x: Math.max(Math.floor(focusRegion.x), 0),
|
||||
y: Math.max(Math.floor(focusRegion.y), 0),
|
||||
width: Math.min(Math.ceil(focusRegion.width), width),
|
||||
height: Math.min(Math.ceil(focusRegion.height), height),
|
||||
}
|
||||
: { x: 0, y: 0, width, height };
|
||||
|
||||
outputCanvas.width = Math.max(region.width, 1);
|
||||
outputCanvas.height = Math.max(region.height, 1);
|
||||
|
||||
outputContext.fillStyle = '#ffffff';
|
||||
outputContext.fillRect(0, 0, outputCanvas.width, outputCanvas.height);
|
||||
outputContext.drawImage(
|
||||
normalizedCanvas2,
|
||||
region.x,
|
||||
region.y,
|
||||
region.width,
|
||||
region.height,
|
||||
0,
|
||||
0,
|
||||
outputCanvas.width,
|
||||
outputCanvas.height
|
||||
);
|
||||
outputContext.globalAlpha = 0.9;
|
||||
outputContext.drawImage(
|
||||
overlayCanvas,
|
||||
region.x,
|
||||
region.y,
|
||||
region.width,
|
||||
region.height,
|
||||
0,
|
||||
0,
|
||||
outputCanvas.width,
|
||||
outputCanvas.height
|
||||
);
|
||||
outputContext.globalAlpha = 1;
|
||||
|
||||
return {
|
||||
mismatchPixels,
|
||||
mismatchRatio: mismatchPixels / Math.max(width * height, 1),
|
||||
hasDiff: mismatchPixels > 0,
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user