feat: enhance PDF comparison with new change types and zoom functionality

- Added support for 'moved' and 'style-changed' change types in PDF comparison.
- Implemented category filters for changes, allowing users to filter by text, images, headers, annotations, formatting, and background.
- Introduced zoom functionality with buttons for zooming in, out, and resetting to default.
- Updated UI to reflect new change types and categories, including visual indicators for moved and style-changed items.
- Enhanced summary display to include counts for moved and style-changed changes.
- Refactored rendering logic to accommodate zoom levels and improve performance.
- Added tests for new change detection features and category assignments.
This commit is contained in:
alam00000
2026-03-10 13:47:46 +05:30
parent 89d7cd8e3d
commit 5232102ac0
31 changed files with 2503 additions and 183 deletions

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "مقارنة ملفات PDF", "name": "مقارنة ملفات PDF",
"subtitle": "مقارنة ملفي PDF جنبًا إلى جنب." "subtitle": "مقارنة ملفي PDF جنبًا إلى جنب.",
"firstPdf": "ملف PDF الأول",
"secondPdf": "ملف PDF الثاني",
"clickOrDrop": "انقر أو أفلت",
"page": "الصفحة",
"overlay": "تراكب",
"sideBySide": "جنبًا إلى جنب",
"flicker": "وميض",
"syncScroll": "مزامنة التمرير",
"export": "تصدير",
"exportAsPdf": "تصدير كملف PDF",
"splitView": "عرض مقسوم",
"alternating": "بالتناوب",
"leftDocument": "المستند الأيسر",
"rightDocument": "المستند الأيمن",
"original": "الأصلي",
"modified": "المعدل",
"searchChanges": "ابحث في التغييرات...",
"deleted": "محذوف",
"added": "مضاف",
"prevPage": "الصفحة السابقة",
"nextPage": "الصفحة التالية",
"prevChange": "التغيير السابق",
"nextChange": "التغيير التالي",
"uploadTwoPdfs": "حمّل ملفي PDF لرؤية الاختلافات.",
"noDifferences": "لم يتم اكتشاف اختلافات في هذه الصفحة.",
"noMatchingChanges": "لا توجد تغييرات تطابق عامل التصفية الحالي.",
"pageNotExist": "الصفحة {{page}} غير موجودة في ملف PDF هذا.",
"noPairedPage": "لا توجد صفحة مقترنة لهذا الجانب.",
"buildingModel": "جارٍ إنشاء نموذج إقران الصفحات...",
"indexingPdf": "جارٍ فهرسة PDF {{num}} الصفحة {{page}} من {{total}}...",
"loadingComparison": "جارٍ تحميل المقارنة {{current}} من {{total}}...",
"runningOcr": "جارٍ تشغيل OCR على الصفحة {{page}}...",
"preparingExport": "جارٍ تجهيز تصدير PDF...",
"renderingPage": "جارٍ عرض الصفحة {{current}} من {{total}}...",
"exportError": "خطأ في التصدير",
"exportFailed": "تعذر تصدير ملف PDF المقارن.",
"loadingFile": "جارٍ تحميل {{name}}...",
"invalidFile": "ملف غير صالح",
"invalidFileMsg": "يرجى اختيار ملف PDF صالح.",
"loadError": "تعذر تحميل ملف PDF. قد يكون تالفًا أو محميًا بكلمة مرور."
}, },
"posterizePdf": { "posterizePdf": {
"name": "تقسيم PDF إلى ملصقات", "name": "تقسيم PDF إلى ملصقات",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Параўнаць PDF", "name": "Параўнаць PDF",
"subtitle": "Параўнаць два PDF побач." "subtitle": "Параўнаць два PDF побач.",
"firstPdf": "Першы PDF",
"secondPdf": "Другі PDF",
"clickOrDrop": "Націсніце або перацягніце",
"page": "Старонка",
"overlay": "Накладанне",
"sideBySide": "Побач",
"flicker": "Мігценне",
"syncScroll": "Сінхранізаваць пракрутку",
"export": "Экспарт",
"exportAsPdf": "Экспартаваць як PDF",
"splitView": "Падзелены выгляд",
"alternating": "Чаргаванне",
"leftDocument": "Левы дакумент",
"rightDocument": "Правы дакумент",
"original": "Арыгінал",
"modified": "Зменены",
"searchChanges": "Шукаць змены...",
"deleted": "Выдалена",
"added": "Дададзена",
"prevPage": "Папярэдняя старонка",
"nextPage": "Наступная старонка",
"prevChange": "Папярэдняя змена",
"nextChange": "Наступная змена",
"uploadTwoPdfs": "Загрузіце два PDF, каб убачыць адрозненні.",
"noDifferences": "На гэтай старонцы адрозненняў не выяўлена.",
"noMatchingChanges": "Няма змен, што адпавядаюць бягучаму фільтру.",
"pageNotExist": "Старонка {{page}} не існуе ў гэтым PDF.",
"noPairedPage": "Для гэтага боку няма спаранай старонкі.",
"buildingModel": "Стварэнне мадэлі супастаўлення старонак...",
"indexingPdf": "Індэксацыя PDF {{num}}, старонка {{page}} з {{total}}...",
"loadingComparison": "Загрузка параўнання {{current}} з {{total}}...",
"runningOcr": "Запуск OCR на старонцы {{page}}...",
"preparingExport": "Падрыхтоўка экспарту PDF...",
"renderingPage": "Адмалёўка старонкі {{current}} з {{total}}...",
"exportError": "Памылка экспарту",
"exportFailed": "Не ўдалося экспартаваць PDF параўнання.",
"loadingFile": "Загрузка {{name}}...",
"invalidFile": "Няправільны файл",
"invalidFileMsg": "Калі ласка, абярыце сапраўдны PDF-файл.",
"loadError": "Не ўдалося загрузіць PDF. Магчыма, ён пашкоджаны або абаронены паролем."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Пераўтварыць у постэр", "name": "Пераўтварыць у постэр",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Sammenlign PDFer", "name": "Sammenlign PDFer",
"subtitle": "Sammenlign to PDFer side om side." "subtitle": "Sammenlign to PDFer side om side.",
"firstPdf": "Første PDF",
"secondPdf": "Anden PDF",
"clickOrDrop": "Klik eller slip",
"page": "Side",
"overlay": "Overlejring",
"sideBySide": "Side om side",
"flicker": "Blink",
"syncScroll": "Synkroniser rulning",
"export": "Eksportér",
"exportAsPdf": "Eksportér som PDF",
"splitView": "Opdelt visning",
"alternating": "Skiftevis",
"leftDocument": "Venstre dokument",
"rightDocument": "Højre dokument",
"original": "Original",
"modified": "Ændret",
"searchChanges": "Søg ændringer...",
"deleted": "Slettet",
"added": "Tilføjet",
"prevPage": "Forrige side",
"nextPage": "Næste side",
"prevChange": "Forrige ændring",
"nextChange": "Næste ændring",
"uploadTwoPdfs": "Upload to PDFer for at se forskellene.",
"noDifferences": "Ingen forskelle fundet på denne side.",
"noMatchingChanges": "Ingen ændringer matcher det aktuelle filter.",
"pageNotExist": "Side {{page}} findes ikke i denne PDF.",
"noPairedPage": "Ingen parret side for denne side.",
"buildingModel": "Opbygger sideparringsmodel...",
"indexingPdf": "Indekserer PDF {{num}}, side {{page}} af {{total}}...",
"loadingComparison": "Indlæser sammenligning {{current}} af {{total}}...",
"runningOcr": "Kører OCR på side {{page}}...",
"preparingExport": "Forbereder PDF-eksport...",
"renderingPage": "Renderer side {{current}} af {{total}}...",
"exportError": "Eksportfejl",
"exportFailed": "Kunne ikke eksportere sammenlignings-PDF.",
"loadingFile": "Indlæser {{name}}...",
"invalidFile": "Ugyldig fil",
"invalidFileMsg": "Vælg venligst en gyldig PDF-fil.",
"loadError": "Kunne ikke indlæse PDF. Den kan være beskadiget eller beskyttet med adgangskode."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posterisér PDF", "name": "Posterisér PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "PDFs vergleichen", "name": "PDFs vergleichen",
"subtitle": "Zwei PDFs nebeneinander vergleichen." "subtitle": "Zwei PDFs nebeneinander vergleichen.",
"firstPdf": "Erste PDF",
"secondPdf": "Zweite PDF",
"clickOrDrop": "Klicken oder ablegen",
"page": "Seite",
"overlay": "Überlagerung",
"sideBySide": "Nebeneinander",
"flicker": "Flackern",
"syncScroll": "Synchrones Scrollen",
"export": "Exportieren",
"exportAsPdf": "Als PDF exportieren",
"splitView": "Geteilte Ansicht",
"alternating": "Abwechselnd",
"leftDocument": "Linkes Dokument",
"rightDocument": "Rechtes Dokument",
"original": "Original",
"modified": "Geändert",
"searchChanges": "Änderungen suchen...",
"deleted": "Gelöscht",
"added": "Hinzugefügt",
"prevPage": "Vorherige Seite",
"nextPage": "Nächste Seite",
"prevChange": "Vorherige Änderung",
"nextChange": "Nächste Änderung",
"uploadTwoPdfs": "Laden Sie zwei PDFs hoch, um Unterschiede zu sehen.",
"noDifferences": "Auf dieser Seite wurden keine Unterschiede gefunden.",
"noMatchingChanges": "Keine Änderungen entsprechen dem aktuellen Filter.",
"pageNotExist": "Seite {{page}} existiert nicht in dieser PDF.",
"noPairedPage": "Für diese Seite gibt es keine zugeordnete Seite.",
"buildingModel": "Seitenzuordnungsmodell wird erstellt...",
"indexingPdf": "PDF {{num}}, Seite {{page}} von {{total}} wird indiziert...",
"loadingComparison": "Vergleich {{current}} von {{total}} wird geladen...",
"runningOcr": "OCR wird auf Seite {{page}} ausgeführt...",
"preparingExport": "PDF-Export wird vorbereitet...",
"renderingPage": "Seite {{current}} von {{total}} wird gerendert...",
"exportError": "Exportfehler",
"exportFailed": "Vergleichs-PDF konnte nicht exportiert werden.",
"loadingFile": "{{name}} wird geladen...",
"invalidFile": "Ungültige Datei",
"invalidFileMsg": "Bitte wählen Sie eine gültige PDF-Datei aus.",
"loadError": "PDF konnte nicht geladen werden. Sie ist möglicherweise beschädigt oder passwortgeschützt."
}, },
"posterizePdf": { "posterizePdf": {
"name": "PDF posterisieren", "name": "PDF posterisieren",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Compare PDFs", "name": "Compare PDFs",
"subtitle": "Compare two PDFs side by side." "subtitle": "Compare two PDFs side by side.",
"firstPdf": "First PDF",
"secondPdf": "Second PDF",
"clickOrDrop": "Click or drop",
"page": "Page",
"overlay": "Overlay",
"sideBySide": "Side-by-Side",
"flicker": "Flicker",
"syncScroll": "Sync scroll",
"export": "Export",
"exportAsPdf": "Export as PDF",
"splitView": "Split view",
"alternating": "Alternating",
"leftDocument": "Left Document",
"rightDocument": "Right Document",
"original": "Original",
"modified": "Modified",
"searchChanges": "Search changes...",
"deleted": "Deleted",
"added": "Added",
"prevPage": "Previous page",
"nextPage": "Next page",
"prevChange": "Previous change",
"nextChange": "Next change",
"uploadTwoPdfs": "Upload two PDFs to see differences.",
"noDifferences": "No differences detected on this page.",
"noMatchingChanges": "No changes match the current filter.",
"pageNotExist": "Page {{page}} does not exist in this PDF.",
"noPairedPage": "No paired page for this side.",
"buildingModel": "Building page pairing model...",
"indexingPdf": "Indexing PDF {{num}} page {{page}} of {{total}}...",
"loadingComparison": "Loading comparison {{current}} of {{total}}...",
"runningOcr": "Running OCR on page {{page}}...",
"preparingExport": "Preparing PDF export...",
"renderingPage": "Rendering page {{current}} of {{total}}...",
"exportError": "Export Error",
"exportFailed": "Could not export comparison PDF.",
"loadingFile": "Loading {{name}}...",
"invalidFile": "Invalid File",
"invalidFileMsg": "Please select a valid PDF file.",
"loadError": "Could not load PDF. It may be corrupt or password-protected."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posterize PDF", "name": "Posterize PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Comparar PDFs", "name": "Comparar PDFs",
"subtitle": "Compara dos PDFs lado a lado." "subtitle": "Compara dos PDFs lado a lado.",
"firstPdf": "Primer PDF",
"secondPdf": "Segundo PDF",
"clickOrDrop": "Haz clic o suelta",
"page": "Página",
"overlay": "Superposición",
"sideBySide": "Lado a lado",
"flicker": "Parpadeo",
"syncScroll": "Sincronizar desplazamiento",
"export": "Exportar",
"exportAsPdf": "Exportar como PDF",
"splitView": "Vista dividida",
"alternating": "Alternando",
"leftDocument": "Documento izquierdo",
"rightDocument": "Documento derecho",
"original": "Original",
"modified": "Modificado",
"searchChanges": "Buscar cambios...",
"deleted": "Eliminado",
"added": "Añadido",
"prevPage": "Página anterior",
"nextPage": "Página siguiente",
"prevChange": "Cambio anterior",
"nextChange": "Cambio siguiente",
"uploadTwoPdfs": "Sube dos PDFs para ver las diferencias.",
"noDifferences": "No se detectaron diferencias en esta página.",
"noMatchingChanges": "Ningún cambio coincide con el filtro actual.",
"pageNotExist": "La página {{page}} no existe en este PDF.",
"noPairedPage": "No hay una página emparejada para este lado.",
"buildingModel": "Creando el modelo de emparejamiento de páginas...",
"indexingPdf": "Indexando PDF {{num}}, página {{page}} de {{total}}...",
"loadingComparison": "Cargando comparación {{current}} de {{total}}...",
"runningOcr": "Ejecutando OCR en la página {{page}}...",
"preparingExport": "Preparando la exportación del PDF...",
"renderingPage": "Renderizando página {{current}} de {{total}}...",
"exportError": "Error de exportación",
"exportFailed": "No se pudo exportar el PDF de comparación.",
"loadingFile": "Cargando {{name}}...",
"invalidFile": "Archivo no válido",
"invalidFileMsg": "Selecciona un archivo PDF válido.",
"loadError": "No se pudo cargar el PDF. Puede estar dañado o protegido con contraseña."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posterizar PDF", "name": "Posterizar PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Comparer des PDF", "name": "Comparer des PDF",
"subtitle": "Comparer deux PDF côte à côte." "subtitle": "Comparer deux PDF côte à côte.",
"firstPdf": "Premier PDF",
"secondPdf": "Deuxième PDF",
"clickOrDrop": "Cliquer ou déposer",
"page": "Page",
"overlay": "Superposition",
"sideBySide": "Côte à côte",
"flicker": "Clignotement",
"syncScroll": "Synchroniser le défilement",
"export": "Exporter",
"exportAsPdf": "Exporter en PDF",
"splitView": "Vue divisée",
"alternating": "Alterné",
"leftDocument": "Document de gauche",
"rightDocument": "Document de droite",
"original": "Original",
"modified": "Modifié",
"searchChanges": "Rechercher des modifications...",
"deleted": "Supprimé",
"added": "Ajouté",
"prevPage": "Page précédente",
"nextPage": "Page suivante",
"prevChange": "Modification précédente",
"nextChange": "Modification suivante",
"uploadTwoPdfs": "Téléversez deux PDF pour voir les différences.",
"noDifferences": "Aucune différence détectée sur cette page.",
"noMatchingChanges": "Aucune modification ne correspond au filtre actuel.",
"pageNotExist": "La page {{page}} nexiste pas dans ce PDF.",
"noPairedPage": "Aucune page associée pour ce côté.",
"buildingModel": "Création du modèle dappariement des pages...",
"indexingPdf": "Indexation du PDF {{num}}, page {{page}} sur {{total}}...",
"loadingComparison": "Chargement de la comparaison {{current}} sur {{total}}...",
"runningOcr": "Exécution de lOCR sur la page {{page}}...",
"preparingExport": "Préparation de lexport PDF...",
"renderingPage": "Rendu de la page {{current}} sur {{total}}...",
"exportError": "Erreur dexport",
"exportFailed": "Impossible dexporter le PDF de comparaison.",
"loadingFile": "Chargement de {{name}}...",
"invalidFile": "Fichier invalide",
"invalidFileMsg": "Veuillez sélectionner un fichier PDF valide.",
"loadError": "Impossible de charger le PDF. Il est peut-être corrompu ou protégé par mot de passe."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posteriser un PDF", "name": "Posteriser un PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Bandingkan PDF", "name": "Bandingkan PDF",
"subtitle": "Bandingkan dua PDF berdampingan." "subtitle": "Bandingkan dua PDF berdampingan.",
"firstPdf": "PDF pertama",
"secondPdf": "PDF kedua",
"clickOrDrop": "Klik atau letakkan",
"page": "Halaman",
"overlay": "Hamparan",
"sideBySide": "Berdampingan",
"flicker": "Kedip",
"syncScroll": "Sinkronkan gulir",
"export": "Ekspor",
"exportAsPdf": "Ekspor sebagai PDF",
"splitView": "Tampilan terbagi",
"alternating": "Bergantian",
"leftDocument": "Dokumen kiri",
"rightDocument": "Dokumen kanan",
"original": "Asli",
"modified": "Diubah",
"searchChanges": "Cari perubahan...",
"deleted": "Dihapus",
"added": "Ditambahkan",
"prevPage": "Halaman sebelumnya",
"nextPage": "Halaman berikutnya",
"prevChange": "Perubahan sebelumnya",
"nextChange": "Perubahan berikutnya",
"uploadTwoPdfs": "Unggah dua PDF untuk melihat perbedaannya.",
"noDifferences": "Tidak ada perbedaan yang terdeteksi pada halaman ini.",
"noMatchingChanges": "Tidak ada perubahan yang cocok dengan filter saat ini.",
"pageNotExist": "Halaman {{page}} tidak ada di PDF ini.",
"noPairedPage": "Tidak ada halaman pasangan untuk sisi ini.",
"buildingModel": "Membangun model pemasangan halaman...",
"indexingPdf": "Mengindeks PDF {{num}} halaman {{page}} dari {{total}}...",
"loadingComparison": "Memuat perbandingan {{current}} dari {{total}}...",
"runningOcr": "Menjalankan OCR pada halaman {{page}}...",
"preparingExport": "Menyiapkan ekspor PDF...",
"renderingPage": "Merender halaman {{current}} dari {{total}}...",
"exportError": "Kesalahan ekspor",
"exportFailed": "Tidak dapat mengekspor PDF perbandingan.",
"loadingFile": "Memuat {{name}}...",
"invalidFile": "File tidak valid",
"invalidFileMsg": "Silakan pilih file PDF yang valid.",
"loadError": "Tidak dapat memuat PDF. Mungkin rusak atau dilindungi kata sandi."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posterisasi PDF", "name": "Posterisasi PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Confronta PDF", "name": "Confronta PDF",
"subtitle": "Confronta due PDF fianco a fianco." "subtitle": "Confronta due PDF fianco a fianco.",
"firstPdf": "Primo PDF",
"secondPdf": "Secondo PDF",
"clickOrDrop": "Clicca o rilascia",
"page": "Pagina",
"overlay": "Sovrapposizione",
"sideBySide": "Affiancato",
"flicker": "Lampeggio",
"syncScroll": "Sincronizza scorrimento",
"export": "Esporta",
"exportAsPdf": "Esporta come PDF",
"splitView": "Vista divisa",
"alternating": "Alternato",
"leftDocument": "Documento sinistro",
"rightDocument": "Documento destro",
"original": "Originale",
"modified": "Modificato",
"searchChanges": "Cerca modifiche...",
"deleted": "Eliminato",
"added": "Aggiunto",
"prevPage": "Pagina precedente",
"nextPage": "Pagina successiva",
"prevChange": "Modifica precedente",
"nextChange": "Modifica successiva",
"uploadTwoPdfs": "Carica due PDF per vedere le differenze.",
"noDifferences": "Nessuna differenza rilevata in questa pagina.",
"noMatchingChanges": "Nessuna modifica corrisponde al filtro corrente.",
"pageNotExist": "La pagina {{page}} non esiste in questo PDF.",
"noPairedPage": "Nessuna pagina associata per questo lato.",
"buildingModel": "Creazione del modello di abbinamento pagine...",
"indexingPdf": "Indicizzazione del PDF {{num}}, pagina {{page}} di {{total}}...",
"loadingComparison": "Caricamento confronto {{current}} di {{total}}...",
"runningOcr": "Esecuzione OCR sulla pagina {{page}}...",
"preparingExport": "Preparazione esportazione PDF...",
"renderingPage": "Rendering pagina {{current}} di {{total}}...",
"exportError": "Errore di esportazione",
"exportFailed": "Impossibile esportare il PDF di confronto.",
"loadingFile": "Caricamento di {{name}}...",
"invalidFile": "File non valido",
"invalidFileMsg": "Seleziona un file PDF valido.",
"loadError": "Impossibile caricare il PDF. Potrebbe essere danneggiato o protetto da password."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posterizza PDF", "name": "Posterizza PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "PDF 비교", "name": "PDF 비교",
"subtitle": "두 PDF를 나란히 비교합니다." "subtitle": "두 PDF를 나란히 비교합니다.",
"firstPdf": "첫 번째 PDF",
"secondPdf": "두 번째 PDF",
"clickOrDrop": "클릭 또는 드롭",
"page": "페이지",
"overlay": "오버레이",
"sideBySide": "나란히 보기",
"flicker": "깜빡임",
"syncScroll": "스크롤 동기화",
"export": "내보내기",
"exportAsPdf": "PDF로 내보내기",
"splitView": "분할 보기",
"alternating": "번갈아 보기",
"leftDocument": "왼쪽 문서",
"rightDocument": "오른쪽 문서",
"original": "원본",
"modified": "수정본",
"searchChanges": "변경 사항 검색...",
"deleted": "삭제됨",
"added": "추가됨",
"prevPage": "이전 페이지",
"nextPage": "다음 페이지",
"prevChange": "이전 변경",
"nextChange": "다음 변경",
"uploadTwoPdfs": "차이점을 보려면 두 개의 PDF를 업로드하세요.",
"noDifferences": "이 페이지에서 차이점이 감지되지 않았습니다.",
"noMatchingChanges": "현재 필터와 일치하는 변경 사항이 없습니다.",
"pageNotExist": "페이지 {{page}}는 이 PDF에 존재하지 않습니다.",
"noPairedPage": "이쪽에 대응되는 페이지가 없습니다.",
"buildingModel": "페이지 페어링 모델을 만드는 중...",
"indexingPdf": "PDF {{num}}의 {{page}} / {{total}} 페이지를 인덱싱하는 중...",
"loadingComparison": "비교 {{current}} / {{total}} 불러오는 중...",
"runningOcr": "페이지 {{page}}에서 OCR 실행 중...",
"preparingExport": "PDF 내보내기 준비 중...",
"renderingPage": "페이지 {{current}} / {{total}} 렌더링 중...",
"exportError": "내보내기 오류",
"exportFailed": "비교 PDF를 내보낼 수 없습니다.",
"loadingFile": "{{name}} 불러오는 중...",
"invalidFile": "잘못된 파일",
"invalidFileMsg": "유효한 PDF 파일을 선택하세요.",
"loadError": "PDF를 불러올 수 없습니다. 손상되었거나 비밀번호로 보호되었을 수 있습니다."
}, },
"posterizePdf": { "posterizePdf": {
"name": "PDF 포스터화", "name": "PDF 포스터화",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "PDF's Vergelijken", "name": "PDF's Vergelijken",
"subtitle": "Twee PDF's zij-aan-zij vergelijken." "subtitle": "Twee PDF's zij-aan-zij vergelijken.",
"firstPdf": "Eerste PDF",
"secondPdf": "Tweede PDF",
"clickOrDrop": "Klik of sleep neer",
"page": "Pagina",
"overlay": "Overlay",
"sideBySide": "Zij aan zij",
"flicker": "Flikkeren",
"syncScroll": "Scroll synchroniseren",
"export": "Exporteren",
"exportAsPdf": "Exporteren als PDF",
"splitView": "Gesplitste weergave",
"alternating": "Afwisselend",
"leftDocument": "Linkerdocument",
"rightDocument": "Rechterdocument",
"original": "Origineel",
"modified": "Gewijzigd",
"searchChanges": "Wijzigingen zoeken...",
"deleted": "Verwijderd",
"added": "Toegevoegd",
"prevPage": "Vorige pagina",
"nextPage": "Volgende pagina",
"prevChange": "Vorige wijziging",
"nextChange": "Volgende wijziging",
"uploadTwoPdfs": "Upload twee PDF's om de verschillen te zien.",
"noDifferences": "Geen verschillen gedetecteerd op deze pagina.",
"noMatchingChanges": "Geen wijzigingen komen overeen met het huidige filter.",
"pageNotExist": "Pagina {{page}} bestaat niet in deze PDF.",
"noPairedPage": "Geen gekoppelde pagina voor deze zijde.",
"buildingModel": "Model voor paginakoppeling wordt opgebouwd...",
"indexingPdf": "PDF {{num}}, pagina {{page}} van {{total}} wordt geïndexeerd...",
"loadingComparison": "Vergelijking {{current}} van {{total}} wordt geladen...",
"runningOcr": "OCR wordt uitgevoerd op pagina {{page}}...",
"preparingExport": "PDF-export wordt voorbereid...",
"renderingPage": "Pagina {{current}} van {{total}} wordt gerenderd...",
"exportError": "Exportfout",
"exportFailed": "Vergelijkings-PDF kon niet worden geëxporteerd.",
"loadingFile": "{{name}} wordt geladen...",
"invalidFile": "Ongeldig bestand",
"invalidFileMsg": "Selecteer een geldig PDF-bestand.",
"loadError": "Kon PDF niet laden. Het bestand kan beschadigd zijn of met een wachtwoord beveiligd zijn."
}, },
"posterizePdf": { "posterizePdf": {
"name": "PDF-Poster", "name": "PDF-Poster",

View File

@@ -288,7 +288,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Comparar PDFs", "name": "Comparar PDFs",
"subtitle": "Compare dois PDFs lado a lado." "subtitle": "Compare dois PDFs lado a lado.",
"firstPdf": "Primeiro PDF",
"secondPdf": "Segundo PDF",
"clickOrDrop": "Clique ou solte",
"page": "Página",
"overlay": "Sobreposição",
"sideBySide": "Lado a lado",
"flicker": "Alternância rápida",
"syncScroll": "Sincronizar rolagem",
"export": "Exportar",
"exportAsPdf": "Exportar como PDF",
"splitView": "Visualização dividida",
"alternating": "Alternado",
"leftDocument": "Documento esquerdo",
"rightDocument": "Documento direito",
"original": "Original",
"modified": "Modificado",
"searchChanges": "Pesquisar alterações...",
"deleted": "Excluído",
"added": "Adicionado",
"prevPage": "Página anterior",
"nextPage": "Próxima página",
"prevChange": "Alteração anterior",
"nextChange": "Próxima alteração",
"uploadTwoPdfs": "Envie dois PDFs para ver as diferenças.",
"noDifferences": "Nenhuma diferença detectada nesta página.",
"noMatchingChanges": "Nenhuma alteração corresponde ao filtro atual.",
"pageNotExist": "A página {{page}} não existe neste PDF.",
"noPairedPage": "Não há página pareada para este lado.",
"buildingModel": "Criando modelo de pareamento de páginas...",
"indexingPdf": "Indexando PDF {{num}}, página {{page}} de {{total}}...",
"loadingComparison": "Carregando comparação {{current}} de {{total}}...",
"runningOcr": "Executando OCR na página {{page}}...",
"preparingExport": "Preparando exportação em PDF...",
"renderingPage": "Renderizando página {{current}} de {{total}}...",
"exportError": "Erro de exportação",
"exportFailed": "Não foi possível exportar o PDF de comparação.",
"loadingFile": "Carregando {{name}}...",
"invalidFile": "Arquivo inválido",
"invalidFileMsg": "Selecione um arquivo PDF válido.",
"loadError": "Não foi possível carregar o PDF. Ele pode estar corrompido ou protegido por senha."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posterizar PDF", "name": "Posterizar PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "Jämför PDF:er", "name": "Jämför PDF:er",
"subtitle": "Jämför två PDF:er bredvid varandra." "subtitle": "Jämför två PDF:er bredvid varandra.",
"firstPdf": "Första PDF",
"secondPdf": "Andra PDF",
"clickOrDrop": "Klicka eller släpp",
"page": "Sida",
"overlay": "Överlägg",
"sideBySide": "Sida vid sida",
"flicker": "Flimmer",
"syncScroll": "Synkronisera rullning",
"export": "Exportera",
"exportAsPdf": "Exportera som PDF",
"splitView": "Delad vy",
"alternating": "Växlande",
"leftDocument": "Vänster dokument",
"rightDocument": "Höger dokument",
"original": "Original",
"modified": "Ändrad",
"searchChanges": "Sök ändringar...",
"deleted": "Borttagen",
"added": "Tillagd",
"prevPage": "Föregående sida",
"nextPage": "Nästa sida",
"prevChange": "Föregående ändring",
"nextChange": "Nästa ändring",
"uploadTwoPdfs": "Ladda upp två PDF:er för att se skillnaderna.",
"noDifferences": "Inga skillnader upptäcktes på denna sida.",
"noMatchingChanges": "Inga ändringar matchar det aktuella filtret.",
"pageNotExist": "Sidan {{page}} finns inte i denna PDF.",
"noPairedPage": "Ingen matchad sida för denna sida.",
"buildingModel": "Bygger sidparningsmodell...",
"indexingPdf": "Indexerar PDF {{num}}, sida {{page}} av {{total}}...",
"loadingComparison": "Läser in jämförelse {{current}} av {{total}}...",
"runningOcr": "Kör OCR på sida {{page}}...",
"preparingExport": "Förbereder PDF-export...",
"renderingPage": "Renderar sida {{current}} av {{total}}...",
"exportError": "Exportfel",
"exportFailed": "Kunde inte exportera jämförelse-PDF.",
"loadingFile": "Läser in {{name}}...",
"invalidFile": "Ogiltig fil",
"invalidFileMsg": "Välj en giltig PDF-fil.",
"loadError": "Kunde inte läsa in PDF. Den kan vara skadad eller lösenordsskyddad."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Postera PDF", "name": "Postera PDF",

View File

@@ -288,7 +288,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "PDF'leri Karşılaştır", "name": "PDF'leri Karşılaştır",
"subtitle": "İki PDF'yi yan yana karşılaştırın." "subtitle": "İki PDF'yi yan yana karşılaştırın.",
"firstPdf": "İlk PDF",
"secondPdf": "İkinci PDF",
"clickOrDrop": "Tıklayın veya bırakın",
"page": "Sayfa",
"overlay": "Üst üste",
"sideBySide": "Yan yana",
"flicker": "Titreşim",
"syncScroll": "Kaydırmayı senkronize et",
"export": "Dışa aktar",
"exportAsPdf": "PDF olarak dışa aktar",
"splitView": "Bölünmüş görünüm",
"alternating": "Sırayla",
"leftDocument": "Sol belge",
"rightDocument": "Sağ belge",
"original": "Orijinal",
"modified": "Değiştirilmiş",
"searchChanges": "Değişiklikleri ara...",
"deleted": "Silindi",
"added": "Eklendi",
"prevPage": "Önceki sayfa",
"nextPage": "Sonraki sayfa",
"prevChange": "Önceki değişiklik",
"nextChange": "Sonraki değişiklik",
"uploadTwoPdfs": "Farkları görmek için iki PDF yükleyin.",
"noDifferences": "Bu sayfada fark algılanmadı.",
"noMatchingChanges": "Geçerli filtreyle eşleşen değişiklik yok.",
"pageNotExist": "{{page}} sayfası bu PDF'de yok.",
"noPairedPage": "Bu taraf için eşleştirilmiş sayfa yok.",
"buildingModel": "Sayfa eşleştirme modeli oluşturuluyor...",
"indexingPdf": "PDF {{num}} için {{total}} içinden {{page}}. sayfa dizinleniyor...",
"loadingComparison": "{{total}} içinden {{current}}. karşılaştırma yükleniyor...",
"runningOcr": "{{page}}. sayfada OCR çalıştırılıyor...",
"preparingExport": "PDF dışa aktarma hazırlanıyor...",
"renderingPage": "{{total}} içinden {{current}}. sayfa işleniyor...",
"exportError": "Dışa aktarma hatası",
"exportFailed": "Karşılaştırma PDF'i dışa aktarılamadı.",
"loadingFile": "{{name}} yükleniyor...",
"invalidFile": "Geçersiz dosya",
"invalidFileMsg": "Lütfen geçerli bir PDF dosyası seçin.",
"loadError": "PDF yüklenemedi. Bozuk olabilir veya parola korumalı olabilir."
}, },
"posterizePdf": { "posterizePdf": {
"name": "PDF'yi Posta Boyutuna Böl", "name": "PDF'yi Posta Boyutuna Böl",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "So sánh PDF", "name": "So sánh PDF",
"subtitle": "So sánh hai PDF cạnh nhau." "subtitle": "So sánh hai PDF cạnh nhau.",
"firstPdf": "PDF thứ nhất",
"secondPdf": "PDF thứ hai",
"clickOrDrop": "Nhấp hoặc thả",
"page": "Trang",
"overlay": "Chồng lớp",
"sideBySide": "Cạnh nhau",
"flicker": "Nhấp nháy",
"syncScroll": "Đồng bộ cuộn",
"export": "Xuất",
"exportAsPdf": "Xuất dưới dạng PDF",
"splitView": "Chế độ chia đôi",
"alternating": "Luân phiên",
"leftDocument": "Tài liệu bên trái",
"rightDocument": "Tài liệu bên phải",
"original": "Bản gốc",
"modified": "Đã sửa đổi",
"searchChanges": "Tìm kiếm thay đổi...",
"deleted": "Đã xóa",
"added": "Đã thêm",
"prevPage": "Trang trước",
"nextPage": "Trang sau",
"prevChange": "Thay đổi trước",
"nextChange": "Thay đổi sau",
"uploadTwoPdfs": "Tải lên hai PDF để xem sự khác biệt.",
"noDifferences": "Không phát hiện khác biệt trên trang này.",
"noMatchingChanges": "Không có thay đổi nào khớp với bộ lọc hiện tại.",
"pageNotExist": "Trang {{page}} không tồn tại trong PDF này.",
"noPairedPage": "Không có trang ghép cho phía này.",
"buildingModel": "Đang xây dựng mô hình ghép trang...",
"indexingPdf": "Đang lập chỉ mục PDF {{num}}, trang {{page}} trên {{total}}...",
"loadingComparison": "Đang tải so sánh {{current}} trên {{total}}...",
"runningOcr": "Đang chạy OCR trên trang {{page}}...",
"preparingExport": "Đang chuẩn bị xuất PDF...",
"renderingPage": "Đang kết xuất trang {{current}} trên {{total}}...",
"exportError": "Lỗi xuất",
"exportFailed": "Không thể xuất PDF so sánh.",
"loadingFile": "Đang tải {{name}}...",
"invalidFile": "Tệp không hợp lệ",
"invalidFileMsg": "Vui lòng chọn tệp PDF hợp lệ.",
"loadError": "Không thể tải PDF. Có thể tệp bị hỏng hoặc được bảo vệ bằng mật khẩu."
}, },
"posterizePdf": { "posterizePdf": {
"name": "Posterize PDF", "name": "Posterize PDF",

View File

@@ -288,7 +288,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "比較 PDF", "name": "比較 PDF",
"subtitle": "並排比較兩個 PDF。" "subtitle": "並排比較兩個 PDF。",
"firstPdf": "第一個 PDF",
"secondPdf": "第二個 PDF",
"clickOrDrop": "點擊或拖放",
"page": "頁面",
"overlay": "疊加",
"sideBySide": "並排",
"flicker": "閃爍",
"syncScroll": "同步捲動",
"export": "匯出",
"exportAsPdf": "匯出為 PDF",
"splitView": "分割檢視",
"alternating": "交替",
"leftDocument": "左側文件",
"rightDocument": "右側文件",
"original": "原始",
"modified": "修改後",
"searchChanges": "搜尋變更...",
"deleted": "已刪除",
"added": "已新增",
"prevPage": "上一頁",
"nextPage": "下一頁",
"prevChange": "上一個變更",
"nextChange": "下一個變更",
"uploadTwoPdfs": "上傳兩個 PDF 以查看差異。",
"noDifferences": "此頁面未偵測到差異。",
"noMatchingChanges": "沒有符合目前篩選條件的變更。",
"pageNotExist": "此 PDF 中不存在第 {{page}} 頁。",
"noPairedPage": "此側沒有配對頁面。",
"buildingModel": "正在建立頁面配對模型...",
"indexingPdf": "正在索引 PDF {{num}},第 {{page}} / {{total}} 頁...",
"loadingComparison": "正在載入比較 {{current}} / {{total}}...",
"runningOcr": "正在對第 {{page}} 頁執行 OCR...",
"preparingExport": "正在準備 PDF 匯出...",
"renderingPage": "正在轉譯第 {{current}} / {{total}} 頁...",
"exportError": "匯出錯誤",
"exportFailed": "無法匯出比較 PDF。",
"loadingFile": "正在載入 {{name}}...",
"invalidFile": "無效檔案",
"invalidFileMsg": "請選擇有效的 PDF 檔案。",
"loadError": "無法載入 PDF。檔案可能已損毀或受密碼保護。"
}, },
"posterizePdf": { "posterizePdf": {
"name": "海報化 PDF", "name": "海報化 PDF",

View File

@@ -292,7 +292,47 @@
}, },
"comparePdfs": { "comparePdfs": {
"name": "比较 PDF", "name": "比较 PDF",
"subtitle": "并排比较两个 PDF。" "subtitle": "并排比较两个 PDF。",
"firstPdf": "第一个 PDF",
"secondPdf": "第二个 PDF",
"clickOrDrop": "点击或拖放",
"page": "页面",
"overlay": "叠加",
"sideBySide": "并排",
"flicker": "闪烁",
"syncScroll": "同步滚动",
"export": "导出",
"exportAsPdf": "导出为 PDF",
"splitView": "分屏视图",
"alternating": "交替",
"leftDocument": "左侧文档",
"rightDocument": "右侧文档",
"original": "原始",
"modified": "修改后",
"searchChanges": "搜索更改...",
"deleted": "已删除",
"added": "已添加",
"prevPage": "上一页",
"nextPage": "下一页",
"prevChange": "上一处更改",
"nextChange": "下一处更改",
"uploadTwoPdfs": "上传两个 PDF 以查看差异。",
"noDifferences": "此页面未检测到差异。",
"noMatchingChanges": "没有与当前筛选条件匹配的更改。",
"pageNotExist": "此 PDF 中不存在第 {{page}} 页。",
"noPairedPage": "此侧没有配对页面。",
"buildingModel": "正在构建页面配对模型...",
"indexingPdf": "正在索引 PDF {{num}},第 {{page}} / {{total}} 页...",
"loadingComparison": "正在加载比较 {{current}} / {{total}}...",
"runningOcr": "正在对第 {{page}} 页运行 OCR...",
"preparingExport": "正在准备 PDF 导出...",
"renderingPage": "正在渲染第 {{current}} / {{total}} 页...",
"exportError": "导出错误",
"exportFailed": "无法导出比较 PDF。",
"loadingFile": "正在加载 {{name}}...",
"invalidFile": "无效文件",
"invalidFileMsg": "请选择有效的 PDF 文件。",
"loadError": "无法加载 PDF。文件可能已损坏或受密码保护。"
}, },
"posterizePdf": { "posterizePdf": {
"name": "海报化 PDF", "name": "海报化 PDF",

View File

@@ -2,6 +2,8 @@ export const COMPARE_COLORS = {
added: { r: 34, g: 197, b: 94 }, added: { r: 34, g: 197, b: 94 },
removed: { r: 239, g: 68, b: 68 }, removed: { r: 239, g: 68, b: 68 },
modified: { r: 245, g: 158, b: 11 }, modified: { r: 245, g: 158, b: 11 },
moved: { r: 168, g: 85, b: 247 },
'style-changed': { r: 59, g: 130, b: 246 },
} as const; } as const;
export const HIGHLIGHT_OPACITY = 0.28; export const HIGHLIGHT_OPACITY = 0.28;

View File

@@ -0,0 +1,213 @@
import type {
CompareAnnotation,
CompareContentCategory,
CompareImageRef,
ComparePageModel,
CompareRectangle,
CompareTextChange,
} from '../types.ts';
const HEADER_FOOTER_ZONE = 0.12;
export function classifyChangeCategory(
change: CompareTextChange,
pageHeight: number
): CompareContentCategory {
if (change.type === 'style-changed') return 'formatting';
const rects =
change.beforeRects.length > 0 ? change.beforeRects : change.afterRects;
if (rects.length > 0 && isHeaderFooterZone(rects, pageHeight)) {
return 'header-footer';
}
return 'text';
}
function isHeaderFooterZone(
rects: CompareRectangle[],
pageHeight: number
): boolean {
const headerThreshold = pageHeight * HEADER_FOOTER_ZONE;
const footerThreshold = pageHeight * (1 - HEADER_FOOTER_ZONE);
return rects.every(
(r) => r.y < headerThreshold || r.y + r.height > footerThreshold
);
}
export function diffAnnotations(
before: CompareAnnotation[],
after: CompareAnnotation[],
baseId: number
): CompareTextChange[] {
const changes: CompareTextChange[] = [];
const beforeMap = new Map(before.map((a) => [annotationKey(a), a]));
const afterMap = new Map(after.map((a) => [annotationKey(a), a]));
let idx = baseId;
for (const [key, ann] of beforeMap) {
if (!afterMap.has(key)) {
changes.push({
id: `annotation-removed-${idx++}`,
type: 'removed',
category: 'annotation',
description:
`Removed ${ann.subtype} annotation: "${ann.contents || ann.title || ''}"`.trim(),
beforeText: ann.contents || ann.title || '',
afterText: '',
beforeRects: [ann.rect],
afterRects: [],
});
}
}
for (const [key, ann] of afterMap) {
if (!beforeMap.has(key)) {
changes.push({
id: `annotation-added-${idx++}`,
type: 'added',
category: 'annotation',
description:
`Added ${ann.subtype} annotation: "${ann.contents || ann.title || ''}"`.trim(),
beforeText: '',
afterText: ann.contents || ann.title || '',
beforeRects: [],
afterRects: [ann.rect],
});
}
}
return changes;
}
function annotationKey(ann: CompareAnnotation): string {
return `${ann.subtype}|${ann.contents}|${Math.round(ann.rect.x)},${Math.round(ann.rect.y)}`;
}
export function diffImages(
before: CompareImageRef[],
after: CompareImageRef[],
baseId: number
): CompareTextChange[] {
const changes: CompareTextChange[] = [];
const matched = new Set<string>();
let idx = baseId;
for (const bImg of before) {
const match = after.find(
(aImg) => !matched.has(aImg.id) && imagesOverlap(bImg.rect, aImg.rect)
);
if (match) {
matched.add(match.id);
if (bImg.width !== match.width || bImg.height !== match.height) {
changes.push({
id: `image-modified-${idx++}`,
type: 'modified',
category: 'image',
description: `Image resized from ${bImg.width}×${bImg.height} to ${match.width}×${match.height}`,
beforeText: `${bImg.width}×${bImg.height}`,
afterText: `${match.width}×${match.height}`,
beforeRects: [bImg.rect],
afterRects: [match.rect],
});
}
} else {
changes.push({
id: `image-removed-${idx++}`,
type: 'removed',
category: 'image',
description: `Removed image (${bImg.width}×${bImg.height})`,
beforeText: '',
afterText: '',
beforeRects: [bImg.rect],
afterRects: [],
});
}
}
for (const aImg of after) {
if (!matched.has(aImg.id)) {
changes.push({
id: `image-added-${idx++}`,
type: 'added',
category: 'image',
description: `Added image (${aImg.width}×${aImg.height})`,
beforeText: '',
afterText: '',
beforeRects: [],
afterRects: [aImg.rect],
});
}
}
return changes;
}
function imagesOverlap(a: CompareRectangle, b: CompareRectangle): boolean {
const overlapX = Math.max(
0,
Math.min(a.x + a.width, b.x + b.width) - Math.max(a.x, b.x)
);
const overlapY = Math.max(
0,
Math.min(a.y + a.height, b.y + b.height) - Math.max(a.y, b.y)
);
const overlapArea = overlapX * overlapY;
const aArea = a.width * a.height;
const bArea = b.width * b.height;
const smallerArea = Math.min(aArea, bArea);
return smallerArea > 0 && overlapArea / smallerArea > 0.3;
}
export function detectBackgroundChanges(
leftModel: ComparePageModel,
rightModel: ComparePageModel,
visualMismatchRatio: number,
textChangeRects: CompareRectangle[],
baseId: number
): CompareTextChange[] {
if (visualMismatchRatio < 0.01) return [];
const textCoverage = textChangeRects.reduce(
(sum, r) => sum + r.width * r.height,
0
);
const pageArea = leftModel.width * leftModel.height;
const textRatio = pageArea > 0 ? textCoverage / pageArea : 0;
if (visualMismatchRatio > textRatio + 0.05) {
return [
{
id: `background-changed-${baseId}`,
type: 'modified',
category: 'background',
description: 'Page background or layout changed',
beforeText: '',
afterText: '',
beforeRects: [
{ x: 0, y: 0, width: leftModel.width, height: leftModel.height },
],
afterRects: [
{ x: 0, y: 0, width: rightModel.width, height: rightModel.height },
],
},
];
}
return [];
}
export function buildCategorySummary(changes: CompareTextChange[]) {
const summary = {
text: 0,
image: 0,
'header-footer': 0,
annotation: 0,
formatting: 0,
background: 0,
};
for (const c of changes) {
summary[c.category] += 1;
}
return summary;
}

View File

@@ -1,10 +1,49 @@
import type { ComparePageModel, ComparePageResult } from '../types.ts'; import type {
ComparePageModel,
ComparePageResult,
CompareCategorySummary,
} from '../types.ts';
import { diffTextRuns } from './diff-text-runs.ts'; import { diffTextRuns } from './diff-text-runs.ts';
import { diffTextRunsAsync } from '../worker-api.ts';
import {
classifyChangeCategory,
diffAnnotations,
diffImages,
buildCategorySummary,
} from './compare-content.ts';
const EMPTY_CATEGORY_SUMMARY: CompareCategorySummary = {
text: 0,
image: 0,
'header-footer': 0,
annotation: 0,
formatting: 0,
background: 0,
};
export function comparePageModels( export function comparePageModels(
leftPage: ComparePageModel | null, leftPage: ComparePageModel | null,
rightPage: ComparePageModel | null rightPage: ComparePageModel | null
): ComparePageResult { ): ComparePageResult {
return comparePageModelsCore(leftPage, rightPage, false) as ComparePageResult;
}
export function comparePageModelsAsync(
leftPage: ComparePageModel | null,
rightPage: ComparePageModel | null
): Promise<ComparePageResult> {
return comparePageModelsCore(
leftPage,
rightPage,
true
) as Promise<ComparePageResult>;
}
function comparePageModelsCore(
leftPage: ComparePageModel | null,
rightPage: ComparePageModel | null,
useWorker: boolean
): ComparePageResult | Promise<ComparePageResult> {
if (leftPage && !rightPage) { if (leftPage && !rightPage) {
return { return {
status: 'left-only', status: 'left-only',
@@ -14,6 +53,7 @@ export function comparePageModels(
{ {
id: 'page-removed', id: 'page-removed',
type: 'page-removed', type: 'page-removed',
category: 'text',
description: `Page ${leftPage.pageNumber} exists only in the first PDF.`, description: `Page ${leftPage.pageNumber} exists only in the first PDF.`,
beforeText: leftPage.plainText.slice(0, 200), beforeText: leftPage.plainText.slice(0, 200),
afterText: '', afterText: '',
@@ -21,7 +61,8 @@ export function comparePageModels(
afterRects: [], afterRects: [],
}, },
], ],
summary: { added: 0, removed: 1, modified: 0 }, summary: { added: 0, removed: 1, modified: 0, moved: 0, styleChanged: 0 },
categorySummary: { ...EMPTY_CATEGORY_SUMMARY, text: 1 },
visualDiff: null, visualDiff: null,
usedOcr: leftPage.source === 'ocr', usedOcr: leftPage.source === 'ocr',
}; };
@@ -36,6 +77,7 @@ export function comparePageModels(
{ {
id: 'page-added', id: 'page-added',
type: 'page-added', type: 'page-added',
category: 'text',
description: `Page ${rightPage.pageNumber} exists only in the second PDF.`, description: `Page ${rightPage.pageNumber} exists only in the second PDF.`,
beforeText: '', beforeText: '',
afterText: rightPage.plainText.slice(0, 200), afterText: rightPage.plainText.slice(0, 200),
@@ -43,7 +85,8 @@ export function comparePageModels(
afterRects: [], afterRects: [],
}, },
], ],
summary: { added: 1, removed: 0, modified: 0 }, summary: { added: 1, removed: 0, modified: 0, moved: 0, styleChanged: 0 },
categorySummary: { ...EMPTY_CATEGORY_SUMMARY, text: 1 },
visualDiff: null, visualDiff: null,
usedOcr: rightPage.source === 'ocr', usedOcr: rightPage.source === 'ocr',
}; };
@@ -55,24 +98,57 @@ export function comparePageModels(
leftPageNumber: null, leftPageNumber: null,
rightPageNumber: null, rightPageNumber: null,
changes: [], changes: [],
summary: { added: 0, removed: 0, modified: 0 }, summary: { added: 0, removed: 0, modified: 0, moved: 0, styleChanged: 0 },
categorySummary: { ...EMPTY_CATEGORY_SUMMARY },
visualDiff: null, visualDiff: null,
usedOcr: false, usedOcr: false,
}; };
} }
const { changes, summary } = diffTextRuns( function buildResult(diff: {
leftPage.textItems, changes: ComparePageResult['changes'];
rightPage.textItems summary: ComparePageResult['summary'];
}): ComparePageResult {
const allChanges = [...diff.changes];
const pageHeight = Math.max(leftPage!.height, rightPage!.height);
for (const c of allChanges) {
if (c.category === 'text') {
c.category = classifyChangeCategory(c, pageHeight);
}
}
const annotChanges = diffAnnotations(
leftPage!.annotations ?? [],
rightPage!.annotations ?? [],
allChanges.length
); );
allChanges.push(...annotChanges);
const imageChanges = diffImages(
leftPage!.images ?? [],
rightPage!.images ?? [],
allChanges.length
);
allChanges.push(...imageChanges);
return { return {
status: changes.length > 0 ? 'changed' : 'match', status: allChanges.length > 0 ? 'changed' : 'match',
leftPageNumber: leftPage.pageNumber, leftPageNumber: leftPage!.pageNumber,
rightPageNumber: rightPage.pageNumber, rightPageNumber: rightPage!.pageNumber,
changes, changes: allChanges,
summary, summary: diff.summary,
categorySummary: buildCategorySummary(allChanges),
visualDiff: null, visualDiff: null,
usedOcr: leftPage.source === 'ocr' || rightPage.source === 'ocr', usedOcr: leftPage!.source === 'ocr' || rightPage!.source === 'ocr',
}; };
} }
if (useWorker) {
return diffTextRunsAsync(leftPage.textItems, rightPage.textItems).then(
buildResult
);
}
return buildResult(diffTextRuns(leftPage.textItems, rightPage.textItems));
}

View File

@@ -0,0 +1,77 @@
import { diffTextRuns } from './diff-text-runs.ts';
import { pairPages } from './pair-pages.ts';
import type {
CompareTextItem,
ComparePageSignature,
ComparePagePair,
ComparePageResult,
CompareChangeSummary,
CompareTextChange,
} from '../types.ts';
interface DiffMessage {
type: 'diff';
id: number;
beforeItems: CompareTextItem[];
afterItems: CompareTextItem[];
}
interface PairMessage {
type: 'pair';
id: number;
leftPages: ComparePageSignature[];
rightPages: ComparePageSignature[];
}
type WorkerMessage = DiffMessage | PairMessage;
interface DiffResult {
type: 'diff';
id: number;
changes: CompareTextChange[];
summary: CompareChangeSummary;
}
interface PairResult {
type: 'pair';
id: number;
pairs: ComparePagePair[];
}
interface ErrorResult {
type: 'error';
id: number;
message: string;
}
type WorkerResult = DiffResult | PairResult | ErrorResult;
self.onmessage = function (e: MessageEvent<WorkerMessage>) {
const msg = e.data;
try {
if (msg.type === 'diff') {
const { changes, summary } = diffTextRuns(
msg.beforeItems,
msg.afterItems
);
const result: DiffResult = {
type: 'diff',
id: msg.id,
changes,
summary,
};
self.postMessage(result);
} else if (msg.type === 'pair') {
const pairs = pairPages(msg.leftPages, msg.rightPages);
const result: PairResult = { type: 'pair', id: msg.id, pairs };
self.postMessage(result);
}
} catch (err) {
const result: ErrorResult = {
type: 'error',
id: msg.id,
message: err instanceof Error ? err.message : String(err),
};
self.postMessage(result);
}
};

View File

@@ -8,13 +8,19 @@ import type {
CompareTextItem, CompareTextItem,
CompareWordToken, CompareWordToken,
} from '../types.ts'; } from '../types.ts';
import { calculateBoundingRect } from './text-normalization.ts'; import {
calculateBoundingRect,
containsCJK,
segmentCJKText,
} from './text-normalization.ts';
import { COMPARE_GEOMETRY } from '../config.ts'; import { COMPARE_GEOMETRY } from '../config.ts';
interface WordToken { interface WordToken {
word: string; word: string;
compareWord: string; compareWord: string;
rect: CompareRectangle; rect: CompareRectangle;
fontName?: string;
fontSize?: number;
} }
function getCharMap(line: CompareTextItem): CharPosition[] { function getCharMap(line: CompareTextItem): CharPosition[] {
@@ -30,11 +36,15 @@ function getCharMap(line: CompareTextItem): CharPosition[] {
function splitLineIntoWords(line: CompareTextItem): WordToken[] { function splitLineIntoWords(line: CompareTextItem): WordToken[] {
if (line.wordTokens && line.wordTokens.length > 0) { if (line.wordTokens && line.wordTokens.length > 0) {
return line.wordTokens.map((token: CompareWordToken) => ({ const baseTokens = line.wordTokens.map((token: CompareWordToken) => ({
word: token.word, word: token.word,
compareWord: token.compareWord, compareWord: token.compareWord,
rect: token.rect, rect: token.rect,
fontName: token.fontName,
fontSize: token.fontSize,
})); }));
if (!containsCJK(line.normalizedText)) return baseTokens;
return baseTokens.flatMap(splitCJKToken);
} }
const words = line.normalizedText.split(/\s+/).filter(Boolean); const words = line.normalizedText.split(/\s+/).filter(Boolean);
@@ -43,7 +53,7 @@ function splitLineIntoWords(line: CompareTextItem): WordToken[] {
const charMap = getCharMap(line); const charMap = getCharMap(line);
let offset = 0; let offset = 0;
return words.map((word) => { const baseTokens = words.map((word) => {
const startIndex = line.normalizedText.indexOf(word, offset); const startIndex = line.normalizedText.indexOf(word, offset);
const endIndex = startIndex + word.length - 1; const endIndex = startIndex + word.length - 1;
offset = startIndex + word.length; offset = startIndex + word.length;
@@ -75,6 +85,31 @@ function splitLineIntoWords(line: CompareTextItem): WordToken[] {
rect: { x, y: line.rect.y, width: w, height: line.rect.height }, rect: { x, y: line.rect.y, width: w, height: line.rect.height },
}; };
}); });
if (!containsCJK(line.normalizedText)) return baseTokens;
return baseTokens.flatMap(splitCJKToken);
}
function splitCJKToken(token: WordToken): WordToken[] {
if (!containsCJK(token.word)) return [token];
const segments = segmentCJKText(token.word);
if (segments.length <= 1) return [token];
const totalLen = token.word.length;
const charWidth = token.rect.width / Math.max(totalLen, 1);
let charOffset = 0;
return segments.map((seg) => {
const x = token.rect.x + charOffset * charWidth;
const width = seg.length * charWidth;
charOffset += seg.length;
return {
word: seg,
compareWord: seg.toLowerCase(),
rect: { x, y: token.rect.y, width, height: token.rect.height },
};
});
} }
function groupAdjacentRects(rects: CompareRectangle[]): CompareRectangle[] { function groupAdjacentRects(rects: CompareRectangle[]): CompareRectangle[] {
@@ -138,6 +173,7 @@ function createWordChange(
changes.push({ changes.push({
id, id,
type, type,
category: 'text',
description: `Replaced "${beforeText}" with "${afterText}"`, description: `Replaced "${beforeText}" with "${afterText}"`,
beforeText, beforeText,
afterText, afterText,
@@ -148,6 +184,7 @@ function createWordChange(
changes.push({ changes.push({
id, id,
type, type,
category: 'text',
description: `Removed "${beforeText}"`, description: `Removed "${beforeText}"`,
beforeText, beforeText,
afterText: '', afterText: '',
@@ -158,6 +195,7 @@ function createWordChange(
changes.push({ changes.push({
id, id,
type, type,
category: 'text',
description: `Added "${afterText}"`, description: `Added "${afterText}"`,
beforeText: '', beforeText: '',
afterText, afterText,
@@ -173,9 +211,11 @@ function toSummary(changes: CompareTextChange[]): CompareChangeSummary {
if (change.type === 'added') summary.added += 1; if (change.type === 'added') summary.added += 1;
if (change.type === 'removed') summary.removed += 1; if (change.type === 'removed') summary.removed += 1;
if (change.type === 'modified') summary.modified += 1; if (change.type === 'modified') summary.modified += 1;
if (change.type === 'moved') summary.moved += 1;
if (change.type === 'style-changed') summary.styleChanged += 1;
return summary; return summary;
}, },
{ added: 0, removed: 0, modified: 0 } { added: 0, removed: 0, modified: 0, moved: 0, styleChanged: 0 }
); );
} }
@@ -233,5 +273,202 @@ export function diffTextRuns(
afterIndex += count; afterIndex += count;
} }
detectStyleChanges(changes, beforeWords, afterWords, rawChanges);
detectMovedText(changes);
return { changes, summary: toSummary(changes) }; return { changes, summary: toSummary(changes) };
} }
function normalizeFontName(name: string): string {
return name.replace(/^g_d\d+_/, 'g_d_');
}
function hasStyleDifference(before: WordToken, after: WordToken): boolean {
if (
before.fontName &&
after.fontName &&
normalizeFontName(before.fontName) !== normalizeFontName(after.fontName)
)
return true;
if (
before.fontSize &&
after.fontSize &&
Math.abs(before.fontSize - after.fontSize) > 0.5
)
return true;
return false;
}
function detectStyleChanges(
changes: CompareTextChange[],
beforeWords: WordToken[],
afterWords: WordToken[],
rawChanges: ReturnType<typeof diffArrays<string>>
) {
interface StyleFragment {
bFont: string;
aFont: string;
bSize: number | undefined;
aSize: number | undefined;
text: string;
beforeRects: CompareRectangle[];
afterRects: CompareRectangle[];
}
const fragments: StyleFragment[] = [];
let beforeIdx = 0;
let afterIdx = 0;
for (const change of rawChanges) {
const count = change.value.length;
if (change.removed) {
beforeIdx += count;
continue;
}
if (change.added) {
afterIdx += count;
continue;
}
let styleRunStart = -1;
for (let k = 0; k < count; k++) {
const bw = beforeWords[beforeIdx + k];
const aw = afterWords[afterIdx + k];
const isDiff = hasStyleDifference(bw, aw);
if (isDiff && styleRunStart < 0) {
styleRunStart = k;
}
if ((!isDiff || k === count - 1) && styleRunStart >= 0) {
const end = isDiff ? k + 1 : k;
const bTokens = beforeWords.slice(
beforeIdx + styleRunStart,
beforeIdx + end
);
const aTokens = afterWords.slice(
afterIdx + styleRunStart,
afterIdx + end
);
fragments.push({
bFont: bTokens[0].fontName ?? 'unknown',
aFont: aTokens[0].fontName ?? 'unknown',
bSize: bTokens[0].fontSize,
aSize: aTokens[0].fontSize,
text: bTokens.map((w) => w.word).join(' '),
beforeRects: groupAdjacentRects(bTokens.map((w) => w.rect)),
afterRects: groupAdjacentRects(aTokens.map((w) => w.rect)),
});
styleRunStart = -1;
}
}
beforeIdx += count;
afterIdx += count;
}
const groups = new Map<string, StyleFragment[]>();
for (const frag of fragments) {
const key = `${frag.bFont}${frag.aFont}|${frag.bSize ?? ''}${frag.aSize ?? ''}`;
const arr = groups.get(key);
if (arr) arr.push(frag);
else groups.set(key, [frag]);
}
for (const groupFrags of groups.values()) {
const bFont = groupFrags[0].bFont;
const aFont = groupFrags[0].aFont;
const bSize = groupFrags[0].bSize;
const aSize = groupFrags[0].aSize;
const allText = groupFrags.map((f) => f.text).join(' … ');
const allBeforeRects = groupFrags.flatMap((f) => f.beforeRects);
const allAfterRects = groupFrags.flatMap((f) => f.afterRects);
let desc = `Style changed (${groupFrags.length} regions)`;
const details: string[] = [];
if (bFont !== aFont) details.push(`Font: ${bFont}${aFont}`);
if (bSize && aSize && Math.abs(bSize - aSize) > 0.5)
details.push(`Font size: ${bSize}${aSize}`);
if (details.length) desc += '\n' + details.map((d) => `${d}`).join('\n');
changes.push({
id: `style-changed-${changes.length}`,
type: 'style-changed',
category: 'formatting',
description: desc,
beforeText: allText,
afterText: allText,
beforeRects: allBeforeRects,
afterRects: allAfterRects,
});
}
}
const MOVE_MIN_WORDS = 3;
const MOVE_SIMILARITY_THRESHOLD = 0.8;
function normalizeForMove(text: string): string {
return text.toLowerCase().replace(/\s+/g, ' ').trim();
}
function moveSimilarity(a: string, b: string): number {
if (a === b) return 1;
if (!a || !b) return 0;
const aWords = a.split(' ');
const bWords = b.split(' ');
const bSet = new Set(bWords);
let matches = 0;
for (const w of aWords) {
if (bSet.has(w)) matches++;
}
return matches / Math.max(aWords.length, bWords.length);
}
function detectMovedText(changes: CompareTextChange[]) {
const removed = changes.filter((c) => c.type === 'removed');
const added = changes.filter((c) => c.type === 'added');
if (removed.length === 0 || added.length === 0) return;
const matchedRemoved = new Set<string>();
const matchedAdded = new Set<string>();
for (const rem of removed) {
const remNorm = normalizeForMove(rem.beforeText);
const remWordCount = remNorm.split(' ').length;
if (remWordCount < MOVE_MIN_WORDS) continue;
let bestMatch: CompareTextChange | null = null;
let bestScore = MOVE_SIMILARITY_THRESHOLD;
for (const add of added) {
if (matchedAdded.has(add.id)) continue;
const addNorm = normalizeForMove(add.afterText);
const score = moveSimilarity(remNorm, addNorm);
if (score > bestScore) {
bestScore = score;
bestMatch = add;
}
}
if (bestMatch) {
matchedRemoved.add(rem.id);
matchedAdded.add(bestMatch.id);
changes.push({
id: `moved-${changes.length}`,
type: 'moved',
category: 'text',
description: `Moved "${rem.beforeText.slice(0, 80)}"`,
beforeText: rem.beforeText,
afterText: bestMatch.afterText,
beforeRects: rem.beforeRects,
afterRects: bestMatch.afterRects,
});
}
}
for (let i = changes.length - 1; i >= 0; i--) {
if (matchedRemoved.has(changes[i].id) || matchedAdded.has(changes[i].id)) {
changes.splice(i, 1);
}
}
}

View File

@@ -1,6 +1,8 @@
import * as pdfjsLib from 'pdfjs-dist'; import * as pdfjsLib from 'pdfjs-dist';
import type { import type {
CompareAnnotation,
CompareImageRef,
ComparePageModel, ComparePageModel,
CompareTextItem, CompareTextItem,
CharPosition, CharPosition,
@@ -9,6 +11,8 @@ import type {
import { import {
joinCompareTextItems, joinCompareTextItems,
normalizeCompareText, normalizeCompareText,
containsCJK,
segmentCJKText,
} from './text-normalization.ts'; } from './text-normalization.ts';
type PageTextItem = { type PageTextItem = {
@@ -69,11 +73,14 @@ function measureTextWidth(fontSpec: string, text: string): number {
return width; return width;
} }
type FontNameMap = Map<string, string>;
function buildItemWordTokens( function buildItemWordTokens(
viewport: pdfjsLib.PageViewport, viewport: pdfjsLib.PageViewport,
item: PageTextItem, item: PageTextItem,
fallbackRect: CompareTextItem['rect'], fallbackRect: CompareTextItem['rect'],
styles: TextStyles styles: TextStyles,
fontNameMap: FontNameMap
): CompareWordToken[] { ): CompareWordToken[] {
const rawText = item.str || ''; const rawText = item.str || '';
if (!rawText.trim()) { if (!rawText.trim()) {
@@ -216,19 +223,47 @@ function buildItemWordTokens(
(previousToken (previousToken
? shouldJoinTokenWithPrevious(previousToken.word, normalizedWord) ? shouldJoinTokenWithPrevious(previousToken.word, normalizedWord)
: false), : false),
fontName: fontNameMap.get(item.fontName) ?? item.fontName ?? undefined,
fontSize: fontScale > 0 ? Math.round(fontScale * 100) / 100 : undefined,
}); });
previousEnd = endIndex; previousEnd = endIndex;
} }
return tokens; if (!containsCJK(rawText)) return tokens;
return tokens.flatMap(splitCJKWordToken);
}
function splitCJKWordToken(token: CompareWordToken): CompareWordToken[] {
if (!containsCJK(token.word)) return [token];
const segments = segmentCJKText(token.word);
if (segments.length <= 1) return [token];
const totalLen = token.word.length;
const charWidth = token.rect.width / Math.max(totalLen, 1);
let charOffset = 0;
return segments.map((seg, i) => {
const x = token.rect.x + charOffset * charWidth;
const width = seg.length * charWidth;
charOffset += seg.length;
return {
word: seg,
compareWord: seg.toLowerCase(),
rect: { x, y: token.rect.y, width, height: token.rect.height },
joinsWithPrevious: i > 0 ? true : token.joinsWithPrevious,
fontName: token.fontName,
fontSize: token.fontSize,
};
});
} }
function toRect( function toRect(
viewport: pdfjsLib.PageViewport, viewport: pdfjsLib.PageViewport,
item: PageTextItem, item: PageTextItem,
index: number, index: number,
styles: TextStyles styles: TextStyles,
fontNameMap: FontNameMap
) { ) {
const normalizedText = normalizeCompareText(item.str); const normalizedText = normalizeCompareText(item.str);
@@ -256,7 +291,7 @@ function toRect(
text: item.str, text: item.str,
normalizedText, normalizedText,
rect, rect,
wordTokens: buildItemWordTokens(viewport, item, rect, styles), wordTokens: buildItemWordTokens(viewport, item, rect, styles, fontNameMap),
} satisfies CompareTextItem; } satisfies CompareTextItem;
} }
@@ -387,6 +422,8 @@ function mergeWordTokenRects(
width: maxX - minX, width: maxX - minX,
height: maxY - minY, height: maxY - minY,
}, },
fontName: left.fontName,
fontSize: left.fontSize,
}; };
} }
@@ -431,6 +468,8 @@ function buildMergedWordTokens(lineItems: CompareTextItem[]) {
word: token.word, word: token.word,
compareWord: token.compareWord, compareWord: token.compareWord,
rect: token.rect, rect: token.rect,
fontName: token.fontName,
fontSize: token.fontSize,
}); });
} }
}); });
@@ -496,16 +535,131 @@ export function mergeIntoLines(
}); });
} }
function extractAnnotations(
rawAnnotations: Array<Record<string, unknown>>,
viewport: pdfjsLib.PageViewport
): CompareAnnotation[] {
return rawAnnotations
.filter((ann) => {
const subtype = ann.subtype as string | undefined;
return subtype && subtype !== 'Link' && subtype !== 'Widget';
})
.map((ann, index) => {
const rawRect = ann.rect as number[] | undefined;
let rect = { x: 0, y: 0, width: 0, height: 0 };
if (rawRect && rawRect.length === 4) {
const [p1, p2] = [
viewport.convertToViewportPoint(rawRect[0], rawRect[1]),
viewport.convertToViewportPoint(rawRect[2], rawRect[3]),
];
const x = Math.min(p1[0], p2[0]);
const y = Math.min(p1[1], p2[1]);
rect = {
x,
y,
width: Math.max(Math.abs(p2[0] - p1[0]), 1),
height: Math.max(Math.abs(p2[1] - p1[1]), 1),
};
}
const color = ann.color as number[] | undefined;
return {
id: `ann-${index}`,
subtype: (ann.subtype as string) || 'Unknown',
rect,
contents: ((ann.contents as string) || '').trim(),
title: ((ann.title as string) || '').trim(),
color: color ? `rgb(${color.join(',')})` : '',
};
});
}
function extractImages(
opList: { fnArray: number[]; argsArray: unknown[][] },
viewport: pdfjsLib.PageViewport
): CompareImageRef[] {
const OPS_PAINT_IMAGE = 85;
const OPS_PAINT_INLINE_IMAGE = 84;
const images: CompareImageRef[] = [];
for (let i = 0; i < opList.fnArray.length; i++) {
const op = opList.fnArray[i];
if (op !== OPS_PAINT_IMAGE && op !== OPS_PAINT_INLINE_IMAGE) continue;
const args = opList.argsArray[i];
if (!args) continue;
let imgWidth = 0;
let imgHeight = 0;
if (op === OPS_PAINT_INLINE_IMAGE && args[0]) {
const imgData = args[0] as Record<string, unknown>;
imgWidth = (imgData.width as number) || 0;
imgHeight = (imgData.height as number) || 0;
} else if (op === OPS_PAINT_IMAGE) {
imgWidth = (args[1] as number) || 0;
imgHeight = (args[2] as number) || 0;
}
if (imgWidth < 2 || imgHeight < 2) continue;
const [vpX, vpY] = viewport.convertToViewportPoint(0, 0);
const [vpX2, vpY2] = viewport.convertToViewportPoint(imgWidth, imgHeight);
const x = Math.min(vpX, vpX2);
const y = Math.min(vpY, vpY2);
images.push({
id: `img-${images.length}`,
rect: {
x,
y,
width: Math.abs(vpX2 - vpX) || imgWidth,
height: Math.abs(vpY2 - vpY) || imgHeight,
},
width: imgWidth,
height: imgHeight,
});
}
return images;
}
export async function extractPageModel( export async function extractPageModel(
page: pdfjsLib.PDFPageProxy, page: pdfjsLib.PDFPageProxy,
viewport: pdfjsLib.PageViewport viewport: pdfjsLib.PageViewport
): Promise<ComparePageModel> { ): Promise<ComparePageModel> {
const textContent = await page.getTextContent(); const [textContent, rawAnnotations, opList] = await Promise.all([
page.getTextContent(),
page
.getAnnotations({ intent: 'any' })
.catch(() => [] as Array<Record<string, unknown>>),
page
.getOperatorList()
.catch(() => ({ fnArray: [] as number[], argsArray: [] as unknown[][] })),
]);
const styles = textContent.styles ?? {}; const styles = textContent.styles ?? {};
const fontNameMap: FontNameMap = new Map();
const seenFonts = new Set<string>();
for (const item of textContent.items) {
if ('fontName' in item && typeof item.fontName === 'string') {
seenFonts.add(item.fontName);
}
}
for (const internalName of seenFonts) {
try {
if (page.commonObjs.has(internalName)) {
const fontObj = page.commonObjs.get(internalName);
if (fontObj?.name && typeof fontObj.name === 'string') {
fontNameMap.set(internalName, fontObj.name);
}
}
} catch {}
}
const rawItems = sortCompareTextItems( const rawItems = sortCompareTextItems(
textContent.items textContent.items
.filter((item): item is PageTextItem => 'str' in item) .filter((item): item is PageTextItem => 'str' in item)
.map((item, index) => toRect(viewport, item, index, styles)) .map((item, index) => toRect(viewport, item, index, styles, fontNameMap))
.filter((item) => item.normalizedText.length > 0) .filter((item) => item.normalizedText.length > 0)
); );
const textItems = mergeIntoLines(rawItems); const textItems = mergeIntoLines(rawItems);
@@ -518,5 +672,13 @@ export async function extractPageModel(
plainText: joinCompareTextItems(textItems), plainText: joinCompareTextItems(textItems),
hasText: textItems.length > 0, hasText: textItems.length > 0,
source: 'pdfjs', source: 'pdfjs',
annotations: extractAnnotations(
rawAnnotations as Array<Record<string, unknown>>,
viewport
),
images: extractImages(
opList as { fnArray: number[]; argsArray: unknown[][] },
viewport
),
}; };
} }

View File

@@ -71,6 +71,32 @@ export function tokenizeTextAsSet(text: string): Set<string> {
return new Set(tokenizeText(text)); return new Set(tokenizeText(text));
} }
const CJK_REGEX =
/[\u2E80-\u9FFF\uF900-\uFAFF\uFE30-\uFE4F\u{20000}-\u{2FA1F}]/u;
export function containsCJK(text: string): boolean {
return CJK_REGEX.test(text);
}
let cachedSegmenter: Intl.Segmenter | null = null;
function getWordSegmenter(): Intl.Segmenter | null {
if (cachedSegmenter) return cachedSegmenter;
if (typeof Intl !== 'undefined' && Intl.Segmenter) {
cachedSegmenter = new Intl.Segmenter(undefined, { granularity: 'word' });
return cachedSegmenter;
}
return null;
}
export function segmentCJKText(text: string): string[] {
const segmenter = getWordSegmenter();
if (!segmenter) return [text];
return [...segmenter.segment(text)]
.filter((seg) => seg.isWordLike)
.map((seg) => seg.segment);
}
export function calculateBoundingRect( export function calculateBoundingRect(
rects: CompareRectangle[] rects: CompareRectangle[]
): CompareRectangle { ): CompareRectangle {

View File

@@ -6,7 +6,7 @@ import type {
ComparePdfExportMode, ComparePdfExportMode,
} from '../types.ts'; } from '../types.ts';
import { extractPageModel } from '../engine/extract-page-model.ts'; import { extractPageModel } from '../engine/extract-page-model.ts';
import { comparePageModels } from '../engine/compare-page-models.ts'; import { comparePageModelsAsync } from '../engine/compare-page-models.ts';
import { import {
COMPARE_COLORS, COMPARE_COLORS,
HIGHLIGHT_OPACITY, HIGHLIGHT_OPACITY,
@@ -42,6 +42,18 @@ const HIGHLIGHT_COLORS: Record<
b: COMPARE_COLORS.modified.b / 255, b: COMPARE_COLORS.modified.b / 255,
opacity: HIGHLIGHT_OPACITY, opacity: HIGHLIGHT_OPACITY,
}, },
moved: {
r: COMPARE_COLORS.moved.r / 255,
g: COMPARE_COLORS.moved.g / 255,
b: COMPARE_COLORS.moved.b / 255,
opacity: HIGHLIGHT_OPACITY,
},
'style-changed': {
r: COMPARE_COLORS['style-changed'].r / 255,
g: COMPARE_COLORS['style-changed'].g / 255,
b: COMPARE_COLORS['style-changed'].b / 255,
opacity: HIGHLIGHT_OPACITY,
},
}; };
const EXTRACT_SCALE = COMPARE_RENDER.EXPORT_EXTRACT_SCALE; const EXTRACT_SCALE = COMPARE_RENDER.EXPORT_EXTRACT_SCALE;
@@ -124,7 +136,7 @@ export async function exportComparePdf(
) )
: null; : null;
const comparison = comparePageModels(leftModel, rightModel); const comparison = await comparePageModelsAsync(leftModel, rightModel);
const changes = comparison.changes; const changes = comparison.changes;
if (mode === 'split') { if (mode === 'split') {

View File

@@ -22,16 +22,24 @@ export interface DiffFocusRegion {
height: number; height: number;
} }
export interface OcrCacheEntry {
model: ComparePageModel;
width: number;
height: number;
}
export interface CompareCaches { export interface CompareCaches {
pageModelCache: LRUCache<string, ComparePageModel>; pageModelCache: LRUCache<string, ComparePageModel>;
comparisonCache: LRUCache<string, ComparePageResult>; comparisonCache: LRUCache<string, ComparePageResult>;
comparisonResultsCache: LRUCache<number, ComparePageResult>; comparisonResultsCache: LRUCache<number, ComparePageResult>;
ocrModelCache: LRUCache<string, OcrCacheEntry>;
} }
export interface CompareRenderContext { export interface CompareRenderContext {
useOcr: boolean; useOcr: boolean;
ocrLanguage: string; ocrLanguage: string;
viewMode: CompareViewMode; viewMode: CompareViewMode;
zoomLevel: number;
showLoader: (message: string, percent?: number) => void; showLoader: (message: string, percent?: number) => void;
} }
@@ -52,6 +60,8 @@ export interface CompareWordToken {
compareWord: string; compareWord: string;
rect: CompareRectangle; rect: CompareRectangle;
joinsWithPrevious?: boolean; joinsWithPrevious?: boolean;
fontName?: string;
fontSize?: number;
} }
export interface CompareTextItem { export interface CompareTextItem {
@@ -72,6 +82,24 @@ export interface ComparePageModel {
plainText: string; plainText: string;
hasText: boolean; hasText: boolean;
source: 'pdfjs' | 'ocr'; source: 'pdfjs' | 'ocr';
annotations?: CompareAnnotation[];
images?: CompareImageRef[];
}
export interface CompareAnnotation {
id: string;
subtype: string;
rect: CompareRectangle;
contents: string;
title: string;
color: string;
}
export interface CompareImageRef {
id: string;
rect: CompareRectangle;
width: number;
height: number;
} }
export interface ComparePageSignature { export interface ComparePageSignature {
@@ -98,12 +126,23 @@ export type CompareChangeType =
| 'added' | 'added'
| 'removed' | 'removed'
| 'modified' | 'modified'
| 'moved'
| 'style-changed'
| 'page-added' | 'page-added'
| 'page-removed'; | 'page-removed';
export type CompareContentCategory =
| 'text'
| 'image'
| 'header-footer'
| 'annotation'
| 'formatting'
| 'background';
export interface CompareTextChange { export interface CompareTextChange {
id: string; id: string;
type: CompareChangeType; type: CompareChangeType;
category: CompareContentCategory;
description: string; description: string;
beforeText: string; beforeText: string;
afterText: string; afterText: string;
@@ -115,6 +154,17 @@ export interface CompareChangeSummary {
added: number; added: number;
removed: number; removed: number;
modified: number; modified: number;
moved: number;
styleChanged: number;
}
export interface CompareCategorySummary {
text: number;
image: number;
'header-footer': number;
annotation: number;
formatting: number;
background: number;
} }
export interface ComparePageResult { export interface ComparePageResult {
@@ -123,12 +173,28 @@ export interface ComparePageResult {
rightPageNumber: number | null; rightPageNumber: number | null;
changes: CompareTextChange[]; changes: CompareTextChange[];
summary: CompareChangeSummary; summary: CompareChangeSummary;
categorySummary: CompareCategorySummary;
visualDiff: CompareVisualDiff | null; visualDiff: CompareVisualDiff | null;
confidence?: number; confidence?: number;
usedOcr?: boolean; usedOcr?: boolean;
} }
export type CompareFilterType = 'added' | 'removed' | 'modified' | 'all'; export type CompareFilterType =
| 'added'
| 'removed'
| 'modified'
| 'moved'
| 'style-changed'
| 'all';
export interface CompareCategoryFilterState {
text: boolean;
image: boolean;
'header-footer': boolean;
annotation: boolean;
formatting: boolean;
background: boolean;
}
export interface CompareState { export interface CompareState {
pdfDoc1: pdfjsLib.PDFDocumentProxy | null; pdfDoc1: pdfjsLib.PDFDocumentProxy | null;
@@ -140,7 +206,9 @@ export interface CompareState {
activeChangeIndex: number; activeChangeIndex: number;
pagePairs: ComparePagePair[]; pagePairs: ComparePagePair[];
activeFilter: CompareFilterType; activeFilter: CompareFilterType;
categoryFilter: CompareCategoryFilterState;
changeSearchQuery: string; changeSearchQuery: string;
useOcr: boolean; useOcr: boolean;
ocrLanguage: string; ocrLanguage: string;
zoomLevel: number;
} }

View File

@@ -0,0 +1,90 @@
import type {
CompareTextItem,
ComparePageSignature,
ComparePagePair,
CompareChangeSummary,
CompareTextChange,
} from './types.ts';
import { diffTextRuns } from './engine/diff-text-runs.ts';
import { pairPages } from './engine/pair-pages.ts';
let worker: Worker | null = null;
let messageId = 0;
const pending = new Map<
number,
{ resolve: (value: unknown) => void; reject: (reason: unknown) => void }
>();
function getWorker(): Worker | null {
if (worker) return worker;
try {
worker = new Worker(
new URL('./engine/compare.worker.ts', import.meta.url),
{ type: 'module' }
);
worker.onmessage = function (e) {
const { id, type, ...rest } = e.data;
const p = pending.get(id);
if (!p) return;
pending.delete(id);
if (type === 'error') {
p.reject(new Error((rest as { message: string }).message));
} else {
p.resolve(rest);
}
};
worker.onerror = function () {
worker?.terminate();
worker = null;
for (const [, p] of pending) {
p.reject(new Error('Worker crashed'));
}
pending.clear();
};
return worker;
} catch {
return null;
}
}
function postToWorker(msg: Record<string, unknown>): Promise<unknown> {
const w = getWorker();
if (!w) return Promise.reject(new Error('No worker'));
const id = ++messageId;
return new Promise((resolve, reject) => {
pending.set(id, { resolve, reject });
w.postMessage({ ...msg, id });
});
}
export async function diffTextRunsAsync(
beforeItems: CompareTextItem[],
afterItems: CompareTextItem[]
): Promise<{ changes: CompareTextChange[]; summary: CompareChangeSummary }> {
try {
const result = (await postToWorker({
type: 'diff',
beforeItems,
afterItems,
})) as { changes: CompareTextChange[]; summary: CompareChangeSummary };
return result;
} catch {
return diffTextRuns(beforeItems, afterItems);
}
}
export async function pairPagesAsync(
leftPages: ComparePageSignature[],
rightPages: ComparePageSignature[]
): Promise<ComparePagePair[]> {
try {
const result = (await postToWorker({
type: 'pair',
leftPages,
rightPages,
})) as { pairs: ComparePagePair[] };
return result.pairs;
} catch {
return pairPages(leftPages, rightPages);
}
}

View File

@@ -7,9 +7,10 @@ import type {
CompareFilterType, CompareFilterType,
ComparePageResult, ComparePageResult,
CompareTextChange, CompareTextChange,
CompareCategoryFilterState,
} from '../compare/types.ts'; } from '../compare/types.ts';
import { extractDocumentSignatures } from '../compare/engine/page-signatures.ts'; import { extractDocumentSignatures } from '../compare/engine/page-signatures.ts';
import { pairPages } from '../compare/engine/pair-pages.ts'; import { pairPagesAsync } from '../compare/worker-api.ts';
import type { import type {
ComparePdfExportMode, ComparePdfExportMode,
CompareCaches, CompareCaches,
@@ -39,15 +40,25 @@ const pageState: CompareState = {
activeChangeIndex: 0, activeChangeIndex: 0,
pagePairs: [], pagePairs: [],
activeFilter: 'all', activeFilter: 'all',
categoryFilter: {
text: true,
image: true,
'header-footer': true,
annotation: true,
formatting: true,
background: true,
},
changeSearchQuery: '', changeSearchQuery: '',
useOcr: true, useOcr: true,
ocrLanguage: 'eng', ocrLanguage: 'eng',
zoomLevel: 1.0,
}; };
const caches: CompareCaches = { const caches: CompareCaches = {
pageModelCache: new LRUCache(COMPARE_CACHE_MAX_SIZE), pageModelCache: new LRUCache(COMPARE_CACHE_MAX_SIZE),
comparisonCache: new LRUCache(COMPARE_CACHE_MAX_SIZE), comparisonCache: new LRUCache(COMPARE_CACHE_MAX_SIZE),
comparisonResultsCache: new LRUCache(COMPARE_CACHE_MAX_SIZE), comparisonResultsCache: new LRUCache(COMPARE_CACHE_MAX_SIZE),
ocrModelCache: new LRUCache(COMPARE_CACHE_MAX_SIZE),
}; };
const documentNames = { const documentNames = {
left: 'first.pdf', left: 'first.pdf',
@@ -65,6 +76,7 @@ function getRenderContext(): CompareRenderContext {
useOcr: pageState.useOcr, useOcr: pageState.useOcr,
ocrLanguage: pageState.ocrLanguage, ocrLanguage: pageState.ocrLanguage,
viewMode: pageState.viewMode, viewMode: pageState.viewMode,
zoomLevel: pageState.zoomLevel,
showLoader, showLoader,
}; };
} }
@@ -79,15 +91,22 @@ function getVisibleChanges(result: ComparePageResult | null) {
if (pageState.activeFilter === 'removed') { if (pageState.activeFilter === 'removed') {
return change.type === 'removed' || change.type === 'page-removed'; return change.type === 'removed' || change.type === 'page-removed';
} }
if (pageState.activeFilter === 'added') {
return change.type === 'added' || change.type === 'page-added';
}
return change.type === pageState.activeFilter; return change.type === pageState.activeFilter;
}); });
const filteredByCategory = filteredByType.filter(
(change) => pageState.categoryFilter[change.category]
);
const searchQuery = pageState.changeSearchQuery.trim().toLowerCase(); const searchQuery = pageState.changeSearchQuery.trim().toLowerCase();
if (!searchQuery) { if (!searchQuery) {
return filteredByType; return filteredByCategory;
} }
return filteredByType.filter((change) => { return filteredByCategory.filter((change) => {
const searchableText = [ const searchableText = [
change.description, change.description,
change.beforeText, change.beforeText,
@@ -104,6 +123,8 @@ function updateFilterButtons() {
{ id: 'filter-modified', filter: 'modified' }, { id: 'filter-modified', filter: 'modified' },
{ id: 'filter-added', filter: 'added' }, { id: 'filter-added', filter: 'added' },
{ id: 'filter-removed', filter: 'removed' }, { id: 'filter-removed', filter: 'removed' },
{ id: 'filter-moved', filter: 'moved' },
{ id: 'filter-style-changed', filter: 'style-changed' },
]; ];
pills.forEach(({ id, filter }) => { pills.forEach(({ id, filter }) => {
@@ -118,6 +139,10 @@ function updateSummary() {
const addedCount = getElement<HTMLElement>('summary-added-count'); const addedCount = getElement<HTMLElement>('summary-added-count');
const removedCount = getElement<HTMLElement>('summary-removed-count'); const removedCount = getElement<HTMLElement>('summary-removed-count');
const modifiedCount = getElement<HTMLElement>('summary-modified-count'); const modifiedCount = getElement<HTMLElement>('summary-modified-count');
const movedCount = getElement<HTMLElement>('summary-moved-count');
const styleChangedCount = getElement<HTMLElement>(
'summary-style-changed-count'
);
const panelLabel1 = getElement<HTMLElement>('compare-panel-label-1'); const panelLabel1 = getElement<HTMLElement>('compare-panel-label-1');
const panelLabel2 = getElement<HTMLElement>('compare-panel-label-2'); const panelLabel2 = getElement<HTMLElement>('compare-panel-label-2');
@@ -128,6 +153,9 @@ function updateSummary() {
if (addedCount) addedCount.textContent = '0'; if (addedCount) addedCount.textContent = '0';
if (removedCount) removedCount.textContent = '0'; if (removedCount) removedCount.textContent = '0';
if (modifiedCount) modifiedCount.textContent = '0'; if (modifiedCount) modifiedCount.textContent = '0';
if (movedCount) movedCount.textContent = '0';
if (styleChangedCount) styleChangedCount.textContent = '0';
updateCategoryPills(null);
return; return;
} }
@@ -136,6 +164,34 @@ function updateSummary() {
removedCount.textContent = comparison.summary.removed.toString(); removedCount.textContent = comparison.summary.removed.toString();
if (modifiedCount) if (modifiedCount)
modifiedCount.textContent = comparison.summary.modified.toString(); modifiedCount.textContent = comparison.summary.modified.toString();
if (movedCount) movedCount.textContent = comparison.summary.moved.toString();
if (styleChangedCount)
styleChangedCount.textContent = comparison.summary.styleChanged.toString();
updateCategoryPills(comparison);
}
function updateCategoryPills(comparison: ComparePageResult | null) {
const categoryKeys: Array<keyof CompareCategoryFilterState> = [
'text',
'image',
'header-footer',
'annotation',
'formatting',
'background',
];
const summary = comparison?.categorySummary;
for (const key of categoryKeys) {
const countEl = getElement<HTMLElement>(`category-count-${key}`);
const pill = getElement<HTMLButtonElement>(`category-${key}`);
if (countEl) countEl.textContent = summary ? summary[key].toString() : '0';
if (pill) {
pill.classList.toggle('active', pageState.categoryFilter[key]);
pill.classList.toggle('disabled', !pageState.categoryFilter[key]);
}
}
} }
function renderHighlights() { function renderHighlights() {
@@ -233,16 +289,49 @@ function renderChangeList() {
emptyState.classList.add('hidden'); emptyState.classList.add('hidden');
list.classList.remove('hidden'); list.classList.remove('hidden');
const typeLabels: Record<string, string> = {
added: 'Added',
removed: 'Deleted',
modified: 'Modified',
moved: 'Moved',
'style-changed': 'Style Changed',
'page-added': 'Page Added',
'page-removed': 'Page Removed',
};
const grouped = new Map<
string,
Array<{ change: CompareTextChange; index: number }>
>();
visibleChanges.forEach((change, index) => { visibleChanges.forEach((change, index) => {
const key = change.type;
if (!grouped.has(key)) grouped.set(key, []);
grouped.get(key)!.push({ change, index });
});
for (const [type, entries] of grouped) {
const header = document.createElement('div');
header.className = 'compare-section-header';
header.innerHTML = `
<span class="compare-section-label ${type}">${typeLabels[type] || type}</span>
<span class="compare-section-count">${entries.length}</span>
<span class="compare-section-line"></span>
`;
list.appendChild(header);
const arrowSvg =
'<svg xmlns="http://www.w3.org/2000/svg" width="14" height="14" viewBox="0 0 256 256" fill="currentColor" style="display:inline-block;vertical-align:-2px;margin:0 2px;opacity:0.5"><path d="M221.66,133.66l-72,72a8,8,0,0,1-11.32-11.32L196.69,136H40a8,8,0,0,1,0-16H196.69L138.34,61.66a8,8,0,0,1,11.32-11.32l72,72A8,8,0,0,1,221.66,133.66Z"></path></svg>';
for (const { change, index } of entries) {
const item = document.createElement('div'); const item = document.createElement('div');
item.className = `compare-change-item${index === pageState.activeChangeIndex ? ' active' : ''}`; item.className = `compare-change-item${index === pageState.activeChangeIndex ? ' active' : ''}`;
item.innerHTML = ` const safeDesc = change.description
<span class="compare-change-dot ${change.type}"></span> .replace(/&/g, '&amp;')
<div class="compare-change-desc"> .replace(/</g, '&lt;')
<div class="compare-change-desc-text">${change.description}</div> .replace(/>/g, '&gt;')
</div> .replace(/\n/g, '<br>')
<span class="compare-change-type ${change.type}">${change.type.replace('-', ' ')}</span> .replace(/→/g, arrowSvg);
`; item.innerHTML = `<div class="compare-change-desc">${safeDesc}</div>`;
item.addEventListener('click', function () { item.addEventListener('click', function () {
pageState.activeChangeIndex = index; pageState.activeChangeIndex = index;
@@ -251,7 +340,8 @@ function renderChangeList() {
}); });
list.appendChild(item); list.appendChild(item);
}); }
}
prevChangeBtn.disabled = false; prevChangeBtn.disabled = false;
nextChangeBtn.disabled = false; nextChangeBtn.disabled = false;
@@ -289,7 +379,7 @@ async function buildPagePairs() {
} }
); );
pageState.pagePairs = pairPages(leftSignatures, rightSignatures); pageState.pagePairs = await pairPagesAsync(leftSignatures, rightSignatures);
pageState.currentPage = 1; pageState.currentPage = 1;
} }
@@ -668,6 +758,8 @@ document.addEventListener('DOMContentLoaded', function () {
{ id: 'filter-modified', filter: 'modified' }, { id: 'filter-modified', filter: 'modified' },
{ id: 'filter-added', filter: 'added' }, { id: 'filter-added', filter: 'added' },
{ id: 'filter-removed', filter: 'removed' }, { id: 'filter-removed', filter: 'removed' },
{ id: 'filter-moved', filter: 'moved' },
{ id: 'filter-style-changed', filter: 'style-changed' },
]; ];
if (syncToggle) { if (syncToggle) {
@@ -725,6 +817,59 @@ document.addEventListener('DOMContentLoaded', function () {
}); });
} }
const ZOOM_STEP = 0.25;
const ZOOM_MIN = 0.25;
const ZOOM_MAX = 5.0;
const zoomInBtn = getElement<HTMLButtonElement>('zoom-in-btn');
const zoomOutBtn = getElement<HTMLButtonElement>('zoom-out-btn');
const zoomResetBtn = getElement<HTMLButtonElement>('zoom-reset-btn');
const zoomDisplay = getElement<HTMLElement>('zoom-level-display');
function updateZoomDisplay() {
if (zoomDisplay) {
zoomDisplay.textContent = `${Math.round(pageState.zoomLevel * 100)}%`;
}
if (zoomOutBtn) zoomOutBtn.disabled = pageState.zoomLevel <= ZOOM_MIN;
if (zoomInBtn) zoomInBtn.disabled = pageState.zoomLevel >= ZOOM_MAX;
}
function applyZoom() {
updateZoomDisplay();
caches.pageModelCache.clear();
caches.comparisonCache.clear();
caches.comparisonResultsCache.clear();
if (pageState.pdfDoc1 && pageState.pdfDoc2) {
renderBothPages().catch(console.error);
}
}
if (zoomInBtn) {
zoomInBtn.addEventListener('click', function () {
pageState.zoomLevel = Math.min(
Math.round((pageState.zoomLevel + ZOOM_STEP) * 100) / 100,
ZOOM_MAX
);
applyZoom();
});
}
if (zoomOutBtn) {
zoomOutBtn.addEventListener('click', function () {
pageState.zoomLevel = Math.max(
Math.round((pageState.zoomLevel - ZOOM_STEP) * 100) / 100,
ZOOM_MIN
);
applyZoom();
});
}
if (zoomResetBtn) {
zoomResetBtn.addEventListener('click', function () {
pageState.zoomLevel = 1.0;
applyZoom();
});
}
filterButtons.forEach(({ id, filter }) => { filterButtons.forEach(({ id, filter }) => {
const button = getElement<HTMLButtonElement>(id); const button = getElement<HTMLButtonElement>(id);
if (!button) return; if (!button) return;
@@ -739,6 +884,26 @@ document.addEventListener('DOMContentLoaded', function () {
}); });
}); });
const categoryKeys: Array<keyof CompareCategoryFilterState> = [
'text',
'image',
'header-footer',
'annotation',
'formatting',
'background',
];
for (const key of categoryKeys) {
const pill = getElement<HTMLButtonElement>(`category-${key}`);
if (pill) {
pill.addEventListener('click', function () {
pageState.categoryFilter[key] = !pageState.categoryFilter[key];
pageState.activeChangeIndex = 0;
renderComparisonUI();
});
}
}
if (ocrToggle) { if (ocrToggle) {
ocrToggle.checked = pageState.useOcr; ocrToggle.checked = pageState.useOcr;
ocrToggle.addEventListener('change', async function () { ocrToggle.addEventListener('change', async function () {

View File

@@ -3,6 +3,9 @@ import type {
ComparePageModel, ComparePageModel,
ComparePagePair, ComparePagePair,
ComparePageResult, ComparePageResult,
CompareRectangle,
CompareWordToken,
CompareTextItem,
RenderedPage, RenderedPage,
ComparisonPageLoad, ComparisonPageLoad,
DiffFocusRegion, DiffFocusRegion,
@@ -10,7 +13,7 @@ import type {
CompareRenderContext, CompareRenderContext,
} from '../compare/types.ts'; } from '../compare/types.ts';
import { extractPageModel } from '../compare/engine/extract-page-model.ts'; import { extractPageModel } from '../compare/engine/extract-page-model.ts';
import { comparePageModels } from '../compare/engine/compare-page-models.ts'; import { comparePageModelsAsync } from '../compare/engine/compare-page-models.ts';
import { renderVisualDiff } from '../compare/engine/visual-diff.ts'; import { renderVisualDiff } from '../compare/engine/visual-diff.ts';
import { recognizePageCanvas } from '../compare/engine/ocr-page.ts'; import { recognizePageCanvas } from '../compare/engine/ocr-page.ts';
import { isLowQualityExtractedText } from '../compare/engine/text-normalization.ts'; import { isLowQualityExtractedText } from '../compare/engine/text-normalization.ts';
@@ -48,7 +51,8 @@ export function hidePlaceholder(placeholderId: string) {
export function getRenderScale( export function getRenderScale(
page: pdfjsLib.PDFPageProxy, page: pdfjsLib.PDFPageProxy,
container: HTMLElement, container: HTMLElement,
viewMode: 'overlay' | 'side-by-side' viewMode: 'overlay' | 'side-by-side',
zoomLevel = 1.0
) { ) {
const baseViewport = page.getViewport({ scale: 1.0 }); const baseViewport = page.getViewport({ scale: 1.0 });
const availableWidth = Math.max( const availableWidth = Math.max(
@@ -61,7 +65,8 @@ export function getRenderScale(
? COMPARE_RENDER.MAX_SCALE_OVERLAY ? COMPARE_RENDER.MAX_SCALE_OVERLAY
: COMPARE_RENDER.MAX_SCALE_SIDE; : COMPARE_RENDER.MAX_SCALE_SIDE;
return Math.min(Math.max(fitScale, 1.0), maxScale); const baseScale = Math.min(Math.max(fitScale, 1.0), maxScale);
return baseScale * zoomLevel;
} }
export function getPageModelCacheKey( export function getPageModelCacheKey(
@@ -76,6 +81,72 @@ function shouldUseOcrForModel(model: ComparePageModel) {
return !model.hasText || isLowQualityExtractedText(model.plainText); return !model.hasText || isLowQualityExtractedText(model.plainText);
} }
function rescaleRect(
rect: CompareRectangle,
scaleX: number,
scaleY: number
): CompareRectangle {
return {
x: rect.x * scaleX,
y: rect.y * scaleY,
width: rect.width * scaleX,
height: rect.height * scaleY,
};
}
function rescaleWordToken(
token: CompareWordToken,
scaleX: number,
scaleY: number
): CompareWordToken {
return {
...token,
rect: rescaleRect(token.rect, scaleX, scaleY),
};
}
function rescaleTextItem(
item: CompareTextItem,
scaleX: number,
scaleY: number
): CompareTextItem {
return {
...item,
rect: rescaleRect(item.rect, scaleX, scaleY),
charMap: item.charMap?.map((c) => ({
x: c.x * scaleX,
width: c.width * scaleX,
})),
wordTokens: item.wordTokens?.map((t) =>
rescaleWordToken(t, scaleX, scaleY)
),
fragments: item.fragments?.map((f) => rescaleTextItem(f, scaleX, scaleY)),
};
}
function rescalePageModel(
model: ComparePageModel,
cachedWidth: number,
cachedHeight: number,
targetWidth: number,
targetHeight: number
): ComparePageModel {
const scaleX = targetWidth / Math.max(cachedWidth, 1);
const scaleY = targetHeight / Math.max(cachedHeight, 1);
return {
...model,
width: targetWidth,
height: targetHeight,
textItems: model.textItems.map((item) =>
rescaleTextItem(item, scaleX, scaleY)
),
};
}
function getOcrCacheKey(side: string, pageNum: number) {
return `${side}-${pageNum}`;
}
export function buildDiffFocusRegion( export function buildDiffFocusRegion(
comparison: ComparePageResult, comparison: ComparePageResult,
leftCanvas: HTMLCanvasElement, leftCanvas: HTMLCanvasElement,
@@ -164,7 +235,12 @@ export async function renderPage(
const page = await pdfDoc.getPage(pageNum); const page = await pdfDoc.getPage(pageNum);
const targetScale = getRenderScale(page, container, ctx.viewMode); const targetScale = getRenderScale(
page,
container,
ctx.viewMode,
ctx.zoomLevel
);
const scaledViewport = page.getViewport({ scale: targetScale }); const scaledViewport = page.getViewport({ scale: targetScale });
const dpr = window.devicePixelRatio || 1; const dpr = window.devicePixelRatio || 1;
const hiResViewport = page.getViewport({ scale: targetScale * dpr }); const hiResViewport = page.getViewport({ scale: targetScale * dpr });
@@ -192,6 +268,18 @@ export async function renderPage(
let finalModel = model; let finalModel = model;
if (!cachedModel && ctx.useOcr && shouldUseOcrForModel(model)) { if (!cachedModel && ctx.useOcr && shouldUseOcrForModel(model)) {
const ocrKey = getOcrCacheKey(cacheKeyPrefix, pageNum);
const cachedOcr = caches.ocrModelCache.get(ocrKey);
if (cachedOcr) {
finalModel = rescalePageModel(
cachedOcr.model,
cachedOcr.width,
cachedOcr.height,
scaledViewport.width,
scaledViewport.height
);
finalModel.pageNumber = pageNum;
} else {
ctx.showLoader(`Running OCR on page ${pageNum}...`); ctx.showLoader(`Running OCR on page ${pageNum}...`);
const ocrModel = await recognizePageCanvas( const ocrModel = await recognizePageCanvas(
canvas, canvas,
@@ -204,6 +292,12 @@ export async function renderPage(
...ocrModel, ...ocrModel,
pageNumber: pageNum, pageNumber: pageNum,
}; };
caches.ocrModelCache.set(ocrKey, {
model: finalModel,
width: scaledViewport.width,
height: scaledViewport.height,
});
}
} }
caches.pageModelCache.set(cacheKey, finalModel); caches.pageModelCache.set(cacheKey, finalModel);
@@ -276,11 +370,29 @@ export async function loadComparisonPage(
let finalModel = extractedModel; let finalModel = extractedModel;
if (ctx.useOcr && shouldUseOcrForModel(extractedModel)) { if (ctx.useOcr && shouldUseOcrForModel(extractedModel)) {
const ocrKey = getOcrCacheKey(side, pageNum);
const cachedOcr = caches.ocrModelCache.get(ocrKey);
if (cachedOcr) {
finalModel = rescalePageModel(
cachedOcr.model,
cachedOcr.width,
cachedOcr.height,
viewport.width,
viewport.height
);
finalModel.pageNumber = pageNum;
} else {
const ocrModel = await recognizePageCanvas(canvas, ctx.ocrLanguage); const ocrModel = await recognizePageCanvas(canvas, ctx.ocrLanguage);
finalModel = { finalModel = {
...ocrModel, ...ocrModel,
pageNumber: pageNum, pageNumber: pageNum,
}; };
caches.ocrModelCache.set(ocrKey, {
model: finalModel,
width: viewport.width,
height: viewport.height,
});
}
} }
canvas.width = 0; canvas.width = 0;
@@ -330,7 +442,10 @@ export async function computeComparisonForPair(
ctx ctx
); );
const comparison = comparePageModels(leftPage.model, rightPage.model); const comparison = await comparePageModelsAsync(
leftPage.model,
rightPage.model
);
comparison.confidence = pair.confidence; comparison.confidence = pair.confidence;
if ( if (

View File

@@ -209,6 +209,14 @@
background: rgba(245, 158, 11, 0.28); background: rgba(245, 158, 11, 0.28);
} }
.compare-highlight.moved {
background: rgba(168, 85, 247, 0.28);
}
.compare-highlight.style-changed {
background: rgba(59, 130, 246, 0.28);
}
.compare-highlight.active { .compare-highlight.active {
outline: 2px solid rgba(99, 102, 241, 0.7); outline: 2px solid rgba(99, 102, 241, 0.7);
outline-offset: 1px; outline-offset: 1px;
@@ -232,11 +240,6 @@
display: none; display: none;
} }
.compare-change-item.active {
border-color: #818cf8;
background: rgba(79, 70, 229, 0.12);
}
.compare-sidebar { .compare-sidebar {
display: flex; display: flex;
flex-direction: column; flex-direction: column;
@@ -245,30 +248,50 @@
border: 1px solid rgba(51, 65, 85, 0.5); border: 1px solid rgba(51, 65, 85, 0.5);
border-radius: 0.75rem; border-radius: 0.75rem;
overflow: hidden; overflow: hidden;
height: clamp(36rem, 82vh, 72rem); min-height: 0;
} }
.compare-sidebar-header { .compare-sidebar-header {
padding: 0.75rem 1rem; padding: 0.5rem 0.75rem;
border-bottom: 1px solid rgba(51, 65, 85, 0.5); border-bottom: 1px solid rgba(51, 65, 85, 0.5);
} }
.compare-sidebar-filters { .compare-sidebar-filters {
display: flex; display: flex;
align-items: center; flex-direction: column;
gap: 0.375rem; gap: 0;
padding: 0.625rem 1rem; padding: 0;
border-bottom: 1px solid rgba(51, 65, 85, 0.4); border-bottom: 1px solid rgba(51, 65, 85, 0.4);
}
.compare-filter-group {
display: flex;
flex-wrap: wrap; flex-wrap: wrap;
align-items: center;
gap: 0.25rem;
padding: 0.375rem 0.75rem;
}
.compare-filter-group + .compare-filter-group {
border-top: 1px solid rgba(51, 65, 85, 0.25);
}
.compare-filter-label {
width: 100%;
font-size: 0.5625rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.08em;
color: #64748b;
} }
.compare-pill { .compare-pill {
display: inline-flex; display: inline-flex;
align-items: center; align-items: center;
gap: 0.25rem; gap: 0.1875rem;
border-radius: 9999px; border-radius: 9999px;
padding: 0.25rem 0.625rem; padding: 0.125rem 0.5rem;
font-size: 0.6875rem; font-size: 0.625rem;
font-weight: 600; font-weight: 600;
border: 1px solid transparent; border: 1px solid transparent;
cursor: pointer; cursor: pointer;
@@ -309,51 +332,130 @@
border-color: rgba(34, 197, 94, 0.5); border-color: rgba(34, 197, 94, 0.5);
} }
.compare-pill.moved {
color: #c4b5fd;
background: rgba(168, 85, 247, 0.1);
border-color: rgba(168, 85, 247, 0.15);
}
.compare-pill.moved.active {
background: rgba(168, 85, 247, 0.25);
border-color: rgba(168, 85, 247, 0.5);
}
.compare-pill.style-changed {
color: #93c5fd;
background: rgba(59, 130, 246, 0.1);
border-color: rgba(59, 130, 246, 0.15);
}
.compare-pill.style-changed.active {
background: rgba(59, 130, 246, 0.25);
border-color: rgba(59, 130, 246, 0.5);
}
.compare-pill.category {
color: #a5b4fc;
background: rgba(99, 102, 241, 0.1);
border-color: rgba(99, 102, 241, 0.15);
}
.compare-pill.category.active {
background: rgba(99, 102, 241, 0.25);
border-color: rgba(99, 102, 241, 0.5);
}
.compare-pill.category.disabled {
opacity: 0.35;
text-decoration: line-through;
}
.compare-change-list { .compare-change-list {
flex: 1; flex: 1;
min-height: 0; min-height: 0;
overflow-y: auto; overflow-y: auto;
padding: 0.75rem;
}
#compare-change-list {
display: flex; display: flex;
flex-direction: column; flex-direction: column;
gap: 1rem; gap: 0.5rem;
padding: 1rem;
} }
.compare-change-item { .compare-change-item {
display: flex; padding: 0.5rem 0.75rem;
align-items: flex-start;
gap: 0.625rem;
padding: 0.75rem 1rem;
cursor: pointer; cursor: pointer;
transition: background 0.1s; transition: background 0.15s;
border: 1px solid rgba(51, 65, 85, 0.3); border: 1px solid rgba(51, 65, 85, 0.3);
border-left: 2px solid transparent;
border-radius: 0.5rem; border-radius: 0.5rem;
font-size: 0.8125rem; font-size: 0.8125rem;
color: #cbd5e1; color: #cbd5e1;
line-height: 1.4; line-height: 1.5;
} }
.compare-change-item:hover { .compare-change-item:hover {
background: rgba(99, 102, 241, 0.08); background: rgba(99, 102, 241, 0.06);
border-color: rgba(99, 102, 241, 0.2);
} }
.compare-change-item.active { .compare-change-item.active {
background: rgba(99, 102, 241, 0.15); background: rgba(99, 102, 241, 0.1);
border-left: 2px solid #818cf8; border-color: rgba(99, 102, 241, 0.3);
}
.compare-section-header {
display: flex;
align-items: center;
gap: 0.375rem;
padding: 0.25rem 0;
margin-top: 0.25rem;
}
.compare-section-label {
font-size: 0.6875rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.compare-section-label.added,
.compare-section-label.page-added {
color: #86efac;
}
.compare-section-label.removed,
.compare-section-label.page-removed {
color: #fca5a5;
}
.compare-section-label.modified {
color: #fcd34d;
}
.compare-section-label.moved {
color: #c4b5fd;
}
.compare-section-label.style-changed {
color: #93c5fd;
}
.compare-section-count {
font-size: 0.625rem;
font-weight: 600;
color: #64748b;
}
.compare-section-line {
flex: 1;
height: 1px;
background: rgba(51, 65, 85, 0.4);
} }
.compare-change-dot { .compare-change-dot {
width: 0.5rem; width: 0.375rem;
height: 0.5rem; height: 0.375rem;
border-radius: 50%; border-radius: 50%;
flex-shrink: 0; flex-shrink: 0;
} }
.compare-change-item .compare-change-dot {
margin-top: 0.35rem;
}
.compare-change-dot.added { .compare-change-dot.added {
background: #22c55e; background: #22c55e;
} }
@@ -369,36 +471,18 @@
.compare-change-dot.page-removed { .compare-change-dot.page-removed {
background: #ef4444; background: #ef4444;
} }
.compare-change-dot.moved {
background: #a855f7;
}
.compare-change-dot.style-changed {
background: #3b82f6;
}
.compare-change-desc { .compare-change-desc {
flex: 1; font-size: 0.8125rem;
min-width: 0; color: #e2e8f0;
}
.compare-change-desc-text {
white-space: normal;
overflow-wrap: anywhere; overflow-wrap: anywhere;
} white-space: pre-line;
.compare-change-type {
font-size: 0.625rem;
font-weight: 600;
text-transform: uppercase;
letter-spacing: 0.04em;
flex-shrink: 0;
margin-top: 0.2rem;
}
.compare-change-type.added,
.compare-change-type.page-added {
color: #86efac;
}
.compare-change-type.removed,
.compare-change-type.page-removed {
color: #fca5a5;
}
.compare-change-type.modified {
color: #fcd34d;
} }
.compare-change-empty { .compare-change-empty {
@@ -415,7 +499,7 @@
.compare-sidebar { .compare-sidebar {
height: auto; height: auto;
max-height: 20rem; max-height: 24rem;
} }
.compare-viewer-wrapper.side-by-side-mode { .compare-viewer-wrapper.side-by-side-mode {
@@ -592,6 +676,35 @@
<i data-lucide="chevron-down" class="w-3.5 h-3.5"></i> <i data-lucide="chevron-down" class="w-3.5 h-3.5"></i>
</button> </button>
<div class="border-l border-gray-700 h-5 mx-1"></div>
<button
id="zoom-out-btn"
class="btn p-1.5 rounded bg-gray-700 hover:bg-gray-600 disabled:opacity-50"
title="Zoom out"
>
<i data-lucide="minus" class="w-3.5 h-3.5"></i>
</button>
<span
id="zoom-level-display"
class="text-xs text-gray-300 font-medium min-w-[3rem] text-center select-none"
>100%</span
>
<button
id="zoom-in-btn"
class="btn p-1.5 rounded bg-gray-700 hover:bg-gray-600 disabled:opacity-50"
title="Zoom in"
>
<i data-lucide="plus" class="w-3.5 h-3.5"></i>
</button>
<button
id="zoom-reset-btn"
class="btn px-2.5 py-1 rounded bg-gray-700 hover:bg-gray-600 text-xs font-semibold"
title="Reset zoom to fit"
>
Fit
</button>
<div class="flex-1"></div> <div class="flex-1"></div>
<div id="overlay-controls" class="hidden flex items-center gap-2"> <div id="overlay-controls" class="hidden flex items-center gap-2">
@@ -624,6 +737,17 @@
/> />
Sync scroll Sync scroll
</label> </label>
<label
class="flex items-center gap-1.5 text-xs text-gray-300 cursor-pointer"
>
<input
type="checkbox"
id="ocr-toggle"
checked
class="w-3.5 h-3.5 rounded text-indigo-600 bg-gray-700 border-gray-600"
/>
OCR
</label>
</div> </div>
<div class="relative" id="export-dropdown-wrapper"> <div class="relative" id="export-dropdown-wrapper">
@@ -732,6 +856,8 @@
</div> </div>
<div class="compare-sidebar-filters"> <div class="compare-sidebar-filters">
<div class="compare-filter-group">
<div class="compare-filter-label">Change Type</div>
<button id="filter-removed" class="compare-pill removed"> <button id="filter-removed" class="compare-pill removed">
<span class="compare-change-dot removed"></span> <span class="compare-change-dot removed"></span>
<span id="summary-removed-count">0</span> Deleted <span id="summary-removed-count">0</span> Deleted
@@ -744,23 +870,57 @@
<span class="compare-change-dot modified"></span> <span class="compare-change-dot modified"></span>
<span id="summary-modified-count">0</span> Modified <span id="summary-modified-count">0</span> Modified
</button> </button>
<label <button id="filter-moved" class="compare-pill moved">
class="compare-pill" <span class="compare-change-dot moved"></span>
style=" <span id="summary-moved-count">0</span> Moved
color: #94a3b8; </button>
background: rgba(51, 65, 85, 0.3); <button
border-color: rgba(51, 65, 85, 0.4); id="filter-style-changed"
cursor: pointer; class="compare-pill style-changed"
"
> >
<input <span class="compare-change-dot style-changed"></span>
id="ocr-toggle" <span id="summary-style-changed-count">0</span> Style
type="checkbox" </button>
checked </div>
class="w-3 h-3 rounded text-indigo-600 bg-gray-700 border-gray-600" <div class="compare-filter-group">
/> <div class="compare-filter-label">Content</div>
OCR <button
</label> id="category-text"
class="compare-pill category active"
>
<span id="category-count-text">0</span> Text
</button>
<button
id="category-image"
class="compare-pill category active"
>
<span id="category-count-image">0</span> Images
</button>
<button
id="category-header-footer"
class="compare-pill category active"
>
<span id="category-count-header-footer">0</span> Headers
</button>
<button
id="category-annotation"
class="compare-pill category active"
>
<span id="category-count-annotation">0</span> Annotations
</button>
<button
id="category-formatting"
class="compare-pill category active"
>
<span id="category-count-formatting">0</span> Formatting
</button>
<button
id="category-background"
class="compare-pill category active"
>
<span id="category-count-background">0</span> Backgrounds
</button>
</div>
</div> </div>
<div class="compare-change-list"> <div class="compare-change-list">

View File

@@ -6,7 +6,11 @@ import {
mergeIntoLines, mergeIntoLines,
sortCompareTextItems, sortCompareTextItems,
} from '@/js/compare/engine/extract-page-model.ts'; } from '@/js/compare/engine/extract-page-model.ts';
import type { ComparePageModel, CompareTextItem } from '@/js/compare/types.ts'; import type {
ComparePageModel,
CompareTextItem,
CompareWordToken,
} from '@/js/compare/types.ts';
function makeItem(id: string, text: string): CompareTextItem { function makeItem(id: string, text: string): CompareTextItem {
return { return {
@@ -39,7 +43,13 @@ describe('diffTextRuns', () => {
[makeItem('a', 'Hello'), makeItem('c', 'there')] [makeItem('a', 'Hello'), makeItem('c', 'there')]
); );
expect(result.summary).toEqual({ added: 0, removed: 0, modified: 1 }); expect(result.summary).toEqual({
added: 0,
removed: 0,
modified: 1,
moved: 0,
styleChanged: 0,
});
expect(result.changes).toHaveLength(1); expect(result.changes).toHaveLength(1);
expect(result.changes[0].type).toBe('modified'); expect(result.changes[0].type).toBe('modified');
expect(result.changes[0].beforeText).toBe('world'); expect(result.changes[0].beforeText).toBe('world');
@@ -52,7 +62,13 @@ describe('diffTextRuns', () => {
[makeItem('a', 'Hello'), makeItem('b', 'again')] [makeItem('a', 'Hello'), makeItem('b', 'again')]
); );
expect(result.summary).toEqual({ added: 1, removed: 0, modified: 0 }); expect(result.summary).toEqual({
added: 1,
removed: 0,
modified: 0,
moved: 0,
styleChanged: 0,
});
expect(result.changes[0].type).toBe('added'); expect(result.changes[0].type).toBe('added');
}); });
@@ -86,7 +102,13 @@ describe('diffTextRuns', () => {
); );
expect(result.changes).toHaveLength(2); expect(result.changes).toHaveLength(2);
expect(result.summary).toEqual({ added: 1, removed: 0, modified: 1 }); expect(result.summary).toEqual({
added: 1,
removed: 0,
modified: 1,
moved: 0,
styleChanged: 0,
});
expect( expect(
result.changes.some( result.changes.some(
(change) => (change) =>
@@ -308,6 +330,221 @@ describe('mergeIntoLines', () => {
); );
expect(result.changes).toHaveLength(0); expect(result.changes).toHaveLength(0);
expect(result.summary).toEqual({ added: 0, removed: 0, modified: 0 }); expect(result.summary).toEqual({
added: 0,
removed: 0,
modified: 0,
moved: 0,
styleChanged: 0,
});
});
});
function makeItemWithTokens(
id: string,
text: string,
fontName?: string,
fontSize?: number
): CompareTextItem {
const words = text.split(/\s+/).filter(Boolean);
const charWidth = 10 / Math.max(text.length, 1);
let offset = 0;
const wordTokens: CompareWordToken[] = words.map((w) => {
const startIndex = text.indexOf(w, offset);
offset = startIndex + w.length;
return {
word: w,
compareWord: w.toLowerCase(),
rect: {
x: startIndex * charWidth,
y: 0,
width: w.length * charWidth,
height: 10,
},
fontName,
fontSize,
};
});
return {
id,
text,
normalizedText: text,
rect: { x: 0, y: 0, width: 10, height: 10 },
wordTokens,
};
}
describe('detectStyleChanges', () => {
it('detects font name change on identical text', () => {
const result = diffTextRuns(
[makeItemWithTokens('a', 'Hello world test', 'Arial', 12)],
[makeItemWithTokens('b', 'Hello world test', 'Times', 12)]
);
expect(result.summary.styleChanged).toBe(1);
expect(result.changes.some((c) => c.type === 'style-changed')).toBe(true);
});
it('detects font size change on identical text', () => {
const result = diffTextRuns(
[makeItemWithTokens('a', 'Hello world test', 'Arial', 12)],
[makeItemWithTokens('b', 'Hello world test', 'Arial', 16)]
);
expect(result.summary.styleChanged).toBe(1);
const sc = result.changes.find((c) => c.type === 'style-changed')!;
expect(sc.beforeText).toBe('Hello world test');
});
it('ignores negligible font size difference', () => {
const result = diffTextRuns(
[makeItemWithTokens('a', 'Same text here', 'Arial', 12)],
[makeItemWithTokens('b', 'Same text here', 'Arial', 12.3)]
);
expect(result.summary.styleChanged).toBe(0);
});
it('reports no style change when fonts match', () => {
const result = diffTextRuns(
[makeItemWithTokens('a', 'Identical font', 'Arial', 12)],
[makeItemWithTokens('b', 'Identical font', 'Arial', 12)]
);
expect(result.changes).toHaveLength(0);
expect(result.summary.styleChanged).toBe(0);
});
it('ignores pdfjs document-scoped font name prefixes', () => {
const result = diffTextRuns(
[makeItemWithTokens('a', 'Same font here', 'g_d0_f3', 12)],
[makeItemWithTokens('b', 'Same font here', 'g_d1_f3', 12)]
);
expect(result.changes).toHaveLength(0);
expect(result.summary.styleChanged).toBe(0);
});
});
describe('detectMovedText', () => {
it('detects moved text block with identical words', () => {
const result = diffTextRuns(
[
makeItem('a', 'Introduction to the topic'),
makeItem('b', 'Another paragraph here'),
],
[
makeItem('c', 'Another paragraph here'),
makeItem('d', 'Introduction to the topic'),
]
);
expect(result.summary.moved).toBeGreaterThanOrEqual(1);
expect(result.changes.some((c) => c.type === 'moved')).toBe(true);
expect(result.changes.some((c) => c.type === 'removed')).toBe(false);
expect(result.changes.some((c) => c.type === 'added')).toBe(false);
});
it('does not detect move for short text', () => {
const result = diffTextRuns(
[makeItem('a', 'Hi'), makeItem('b', 'World')],
[makeItem('c', 'World'), makeItem('d', 'Hi')]
);
expect(result.summary.moved).toBe(0);
});
it('does not detect move when text is dissimilar', () => {
const result = diffTextRuns(
[makeItem('a', 'This is the first paragraph with details')],
[makeItem('b', 'Completely different content and wording here')]
);
expect(result.summary.moved).toBe(0);
});
});
describe('CJK segmentation in diffTextRuns', () => {
it('segments Chinese text into words', () => {
const result = diffTextRuns(
[makeItem('a', '日本語テストです')],
[makeItem('b', '日本語テストでした')]
);
expect(result.changes.length).toBeGreaterThan(0);
expect(result.summary.modified).toBeGreaterThanOrEqual(1);
});
it('reports no changes for identical CJK text', () => {
const result = diffTextRuns(
[makeItem('a', '日本語テストです')],
[makeItem('b', '日本語テストです')]
);
expect(result.changes).toHaveLength(0);
});
});
describe('content categories', () => {
it('assigns text category to added/removed/modified changes', () => {
const result = diffTextRuns(
[makeItem('a', 'Hello world')],
[makeItem('b', 'Hello there')]
);
expect(result.changes).toHaveLength(1);
expect(result.changes[0].category).toBe('text');
});
it('assigns formatting category to style-changed changes', () => {
const result = diffTextRuns(
[makeItemWithTokens('a', 'Hello world test', 'Arial', 12)],
[makeItemWithTokens('b', 'Hello world test', 'Times', 12)]
);
const styleChange = result.changes.find((c) => c.type === 'style-changed');
expect(styleChange).toBeDefined();
expect(styleChange!.category).toBe('formatting');
});
it('assigns text category to moved changes', () => {
const result = diffTextRuns(
[
makeItem('a', 'Introduction to the topic'),
makeItem('b', 'Another paragraph here'),
],
[
makeItem('c', 'Another paragraph here'),
makeItem('d', 'Introduction to the topic'),
]
);
const movedChange = result.changes.find((c) => c.type === 'moved');
expect(movedChange).toBeDefined();
expect(movedChange!.category).toBe('text');
});
it('includes categorySummary on page comparison result', () => {
const result = comparePageModels(
makePage(1, [makeItem('a', 'Hello')]),
makePage(1, [makeItem('b', 'World')])
);
expect(result.categorySummary).toBeDefined();
const total = Object.values(result.categorySummary).reduce(
(a, b) => a + b,
0
);
expect(total).toBeGreaterThanOrEqual(1);
});
it('assigns text category to page-removed changes', () => {
const result = comparePageModels(
makePage(1, [makeItem('a', 'Only')]),
null
);
expect(result.changes[0].category).toBe('text');
expect(result.categorySummary.text).toBe(1);
}); });
}); });