Files
bentopdf/src/js/compare/engine/pair-pages.ts
alam00000 d2a1450bc0 refactor: remove HTML report export and implement PDF export options in PDF comparison tool
- Deleted the exportCompareHtmlReport function and its related imports.
- Introduced a dropdown menu for exporting comparison results as PDFs with multiple modes (split, alternating, left, right).
- Updated the comparison logic to utilize caching for page models and comparison results.
- Refactored rendering functions to improve code organization and maintainability.
- Enhanced UI elements for better user experience during PDF export.
2026-03-09 23:27:40 +05:30

120 lines
3.2 KiB
TypeScript

import type { ComparePagePair, ComparePageSignature } from '../types.ts';
import { tokenizeTextAsSet } from './text-normalization.ts';
function similarityScore(
left: ComparePageSignature,
right: ComparePageSignature
) {
if (!left.hasText && !right.hasText) {
return left.pageNumber === right.pageNumber ? 0.7 : 0.35;
}
if (!left.hasText || !right.hasText) {
return 0.08;
}
const leftTokens = tokenizeTextAsSet(left.plainText);
const rightTokens = tokenizeTextAsSet(right.plainText);
const union = new Set([...leftTokens, ...rightTokens]);
let intersectionCount = 0;
leftTokens.forEach((token) => {
if (rightTokens.has(token)) intersectionCount += 1;
});
const jaccard = union.size === 0 ? 0 : intersectionCount / union.size;
const positionalBias = left.pageNumber === right.pageNumber ? 0.1 : 0;
return Math.min(jaccard + positionalBias, 1);
}
export function pairPages(
leftPages: ComparePageSignature[],
rightPages: ComparePageSignature[]
) {
const insertionCost = 0.8;
const rowCount = leftPages.length + 1;
const colCount = rightPages.length + 1;
const dp = Array.from({ length: rowCount }, () =>
Array<number>(colCount).fill(0)
);
const backtrack = Array.from({ length: rowCount }, () =>
Array<'match' | 'left' | 'right'>(colCount).fill('match')
);
for (let i = 1; i < rowCount; i += 1) {
dp[i][0] = i * insertionCost;
backtrack[i][0] = 'left';
}
for (let j = 1; j < colCount; j += 1) {
dp[0][j] = j * insertionCost;
backtrack[0][j] = 'right';
}
for (let i = 1; i < rowCount; i += 1) {
for (let j = 1; j < colCount; j += 1) {
const similarity = similarityScore(leftPages[i - 1], rightPages[j - 1]);
const matchCost = dp[i - 1][j - 1] + (1 - similarity);
const leftCost = dp[i - 1][j] + insertionCost;
const rightCost = dp[i][j - 1] + insertionCost;
const minCost = Math.min(matchCost, leftCost, rightCost);
dp[i][j] = minCost;
if (minCost === matchCost) {
backtrack[i][j] = 'match';
} else if (minCost === leftCost) {
backtrack[i][j] = 'left';
} else {
backtrack[i][j] = 'right';
}
}
}
const pairs: ComparePagePair[] = [];
let i = leftPages.length;
let j = rightPages.length;
while (i > 0 || j > 0) {
const direction = backtrack[i][j];
if (i > 0 && j > 0 && direction === 'match') {
const confidence = similarityScore(leftPages[i - 1], rightPages[j - 1]);
pairs.push({
pairIndex: 0,
leftPageNumber: leftPages[i - 1].pageNumber,
rightPageNumber: rightPages[j - 1].pageNumber,
confidence,
});
i -= 1;
j -= 1;
continue;
}
if (i > 0 && (j === 0 || direction === 'left')) {
pairs.push({
pairIndex: 0,
leftPageNumber: leftPages[i - 1].pageNumber,
rightPageNumber: null,
confidence: 0,
});
i -= 1;
continue;
}
if (j > 0) {
pairs.push({
pairIndex: 0,
leftPageNumber: null,
rightPageNumber: rightPages[j - 1].pageNumber,
confidence: 0,
});
j -= 1;
}
}
return pairs
.reverse()
.map((pair, index) => ({ ...pair, pairIndex: index + 1 }));
}