diff --git a/package-lock.json b/package-lock.json index 8c37d7a..35943af 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,6 +10,7 @@ "dependencies": { "@ffmpeg/ffmpeg": "^0.12.15", "@ffmpeg/util": "^0.12.2", + "docx": "^9.6.0", "fast-xml-parser": "^5.4.2", "framer-motion": "^12.35.2", "html2canvas-pro": "^2.0.2", @@ -20,9 +21,10 @@ "marked": "^17.0.4", "next": "16.1.6", "papaparse": "^5.5.3", - "pdf-lib": "^1.17.1", + "pdfjs-dist": "^5.5.207", "react": "19.2.3", - "react-dom": "19.2.3" + "react-dom": "19.2.3", + "smol-toml": "^1.6.0" }, "devDependencies": { "@tailwindcss/postcss": "^4", @@ -1074,6 +1076,256 @@ "@jridgewell/sourcemap-codec": "^1.4.14" } }, + "node_modules/@napi-rs/canvas": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.96.tgz", + "integrity": "sha512-6NNmNxvoJKeucVjxaaRUt3La2i5jShgiAbaY3G/72s1Vp3U06XPrAIxkAjBxpDcamEn/t+WJ4OOlGmvILo4/Ew==", + "license": "MIT", + "optional": true, + "workspaces": [ + "e2e/*" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + }, + "optionalDependencies": { + "@napi-rs/canvas-android-arm64": "0.1.96", + "@napi-rs/canvas-darwin-arm64": "0.1.96", + "@napi-rs/canvas-darwin-x64": "0.1.96", + "@napi-rs/canvas-linux-arm-gnueabihf": "0.1.96", + "@napi-rs/canvas-linux-arm64-gnu": "0.1.96", + "@napi-rs/canvas-linux-arm64-musl": "0.1.96", + "@napi-rs/canvas-linux-riscv64-gnu": "0.1.96", + "@napi-rs/canvas-linux-x64-gnu": "0.1.96", + "@napi-rs/canvas-linux-x64-musl": "0.1.96", + "@napi-rs/canvas-win32-arm64-msvc": "0.1.96", + "@napi-rs/canvas-win32-x64-msvc": "0.1.96" + } + }, + "node_modules/@napi-rs/canvas-android-arm64": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-android-arm64/-/canvas-android-arm64-0.1.96.tgz", + "integrity": "sha512-ew1sPrN3dGdZ3L4FoohPfnjq0f9/Jk7o+wP7HkQZokcXgIUD6FIyICEWGhMYzv53j63wUcPvZeAwgewX58/egg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-darwin-arm64": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-arm64/-/canvas-darwin-arm64-0.1.96.tgz", + "integrity": "sha512-Q/wOXZ5PzTqpdmA5eUOcegCf4Go/zz3aZ5DlzSeDpOjFmfwMKh8EzLAoweQ+mJVagcHQyzoJhaTEnrO68TNyNg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-darwin-x64": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-darwin-x64/-/canvas-darwin-x64-0.1.96.tgz", + "integrity": "sha512-UrXiQz28tQEvGM1qvyptewOAfmUrrd5+wvi6Rzjj2VprZI8iZ2KIvBD2lTTG1bVF95AbeDeG7PJA0D9sLKaOFA==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-arm-gnueabihf": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm-gnueabihf/-/canvas-linux-arm-gnueabihf-0.1.96.tgz", + "integrity": "sha512-I90ODxweD8aEP6XKU/NU+biso95MwCtQ2F46dUvhec1HesFi0tq/tAJkYic/1aBSiO/1kGKmSeD1B0duOHhEHQ==", + "cpu": [ + "arm" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-gnu": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-gnu/-/canvas-linux-arm64-gnu-0.1.96.tgz", + "integrity": "sha512-Dx/0+RFV++w3PcRy+4xNXkghhXjA5d0Mw1bs95emn5Llinp1vihMaA6WJt3oYv2LAHc36+gnrhIBsPhUyI2SGw==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-arm64-musl": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-arm64-musl/-/canvas-linux-arm64-musl-0.1.96.tgz", + "integrity": "sha512-UvOi7fii3IE2KDfEfhh8m+LpzSRvhGK7o1eho99M2M0HTik11k3GX+2qgVx9EtujN3/bhFFS1kSO3+vPMaJ0Mg==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-riscv64-gnu": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-riscv64-gnu/-/canvas-linux-riscv64-gnu-0.1.96.tgz", + "integrity": "sha512-MBSukhGCQ5nRtf9NbFYWOU080yqkZU1PbuH4o1ROvB4CbPl12fchDR35tU83Wz8gWIM9JTn99lBn9DenPIv7Ig==", + "cpu": [ + "riscv64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-gnu": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-gnu/-/canvas-linux-x64-gnu-0.1.96.tgz", + "integrity": "sha512-I/ccu2SstyKiV3HIeVzyBIWfrJo8cN7+MSQZPnabewWV6hfJ2nY7Df2WqOHmobBRUw84uGR6zfQHsUEio/m5Vg==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-linux-x64-musl": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-linux-x64-musl/-/canvas-linux-x64-musl-0.1.96.tgz", + "integrity": "sha512-H3uov7qnTl73GDT4h52lAqpJPsl1tIUyNPWJyhQ6gHakohNqqRq3uf80+NEpzcytKGEOENP1wX3yGwZxhjiWEQ==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-win32-arm64-msvc": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-arm64-msvc/-/canvas-win32-arm64-msvc-0.1.96.tgz", + "integrity": "sha512-ATp6Y+djOjYtkfV/VRH7CZ8I1MEtkUQBmKUbuWw5zWEHHqfL0cEcInE4Cxgx7zkNAhEdBbnH8HMVrqNp+/gwxA==", + "cpu": [ + "arm64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, + "node_modules/@napi-rs/canvas-win32-x64-msvc": { + "version": "0.1.96", + "resolved": "https://registry.npmjs.org/@napi-rs/canvas-win32-x64-msvc/-/canvas-win32-x64-msvc-0.1.96.tgz", + "integrity": "sha512-UYGdTltVd+Z8mcIuoqGmAXXUvwH5CLf2M6mIB5B0/JmX5J041jETjqtSYl7gN+aj3k1by/SG6sS0hAwCqyK7zw==", + "cpu": [ + "x64" + ], + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Brooooooklyn" + } + }, "node_modules/@napi-rs/wasm-runtime": { "version": "0.2.12", "resolved": "https://registry.npmjs.org/@napi-rs/wasm-runtime/-/wasm-runtime-0.2.12.tgz", @@ -1279,36 +1531,6 @@ "node": ">=12.4.0" } }, - "node_modules/@pdf-lib/standard-fonts": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/@pdf-lib/standard-fonts/-/standard-fonts-1.0.0.tgz", - "integrity": "sha512-hU30BK9IUN/su0Mn9VdlVKsWBS6GyhVfqjwl1FjZN4TxP6cCw0jP2w7V3Hf5uX7M0AZJ16vey9yE0ny7Sa59ZA==", - "license": "MIT", - "dependencies": { - "pako": "^1.0.6" - } - }, - "node_modules/@pdf-lib/standard-fonts/node_modules/pako": { - "version": "1.0.11", - "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", - "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", - "license": "(MIT AND Zlib)" - }, - "node_modules/@pdf-lib/upng": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@pdf-lib/upng/-/upng-1.0.1.tgz", - "integrity": "sha512-dQK2FUMQtowVP00mtIksrlZhdFXQZPC+taih1q4CvPZ5vqdxR/LKBaFg0oAfzd1GlHZXXSPdQfzQnt+ViGvEIQ==", - "license": "MIT", - "dependencies": { - "pako": "^1.0.10" - } - }, - "node_modules/@pdf-lib/upng/node_modules/pako": { - "version": "1.0.11", - "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", - "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", - "license": "(MIT AND Zlib)" - }, "node_modules/@rtsao/scc": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@rtsao/scc/-/scc-1.1.0.tgz", @@ -3022,6 +3244,56 @@ "node": ">=0.10.0" } }, + "node_modules/docx": { + "version": "9.6.0", + "resolved": "https://registry.npmjs.org/docx/-/docx-9.6.0.tgz", + "integrity": "sha512-y6EaJJMDvt4P7wgGQB9KsZf4wsRkQMJfkc9LlNufRshggI5BT35hGNkXBCAeEoI3MLMwApKguxzjdqqVcBCqNA==", + "license": "MIT", + "dependencies": { + "@types/node": "^25.2.3", + "hash.js": "^1.1.7", + "jszip": "^3.10.1", + "nanoid": "^5.1.3", + "xml": "^1.0.1", + "xml-js": "^1.6.8" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/docx/node_modules/@types/node": { + "version": "25.3.5", + "resolved": "https://registry.npmjs.org/@types/node/-/node-25.3.5.tgz", + "integrity": "sha512-oX8xrhvpiyRCQkG1MFchB09f+cXftgIXb3a7UUa4Y3wpmZPw5tyZGTLWhlESOLq1Rq6oDlc8npVU2/9xiCuXMA==", + "license": "MIT", + "dependencies": { + "undici-types": "~7.18.0" + } + }, + "node_modules/docx/node_modules/nanoid": { + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-5.1.6.tgz", + "integrity": "sha512-c7+7RQ+dMB5dPwwCp4ee1/iV/q2P6aK1mTZcfr1BTuVlyW9hJYiMPybJCcnBlQtuSmTIWNeazm/zqNoZSSElBg==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/ai" + } + ], + "license": "MIT", + "bin": { + "nanoid": "bin/nanoid.js" + }, + "engines": { + "node": "^18 || >=20" + } + }, + "node_modules/docx/node_modules/undici-types": { + "version": "7.18.2", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-7.18.2.tgz", + "integrity": "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==", + "license": "MIT" + }, "node_modules/dompurify": { "version": "3.3.2", "resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.3.2.tgz", @@ -4208,6 +4480,16 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/hash.js": { + "version": "1.1.7", + "resolved": "https://registry.npmjs.org/hash.js/-/hash.js-1.1.7.tgz", + "integrity": "sha512-taOaskGt4z4SOANNseOviYDvjEJinIkRgmp7LbKP2YTTmVxWBl87s/uzK9r+44BclBSp2X7K1hqeNfz9JbBeXA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "minimalistic-assert": "^1.0.1" + } + }, "node_modules/hasown": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", @@ -5369,6 +5651,12 @@ "node": ">=8.6" } }, + "node_modules/minimalistic-assert": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/minimalistic-assert/-/minimalistic-assert-1.0.1.tgz", + "integrity": "sha512-UtJcAD4yEaGtjPezWuO9wC4nwUnVH/8/Im3yEHQP4b67cXlD/Qr9hdITCU1xDbSEXg2XKNaP8jsReV7vQd00/A==", + "license": "ISC" + }, "node_modules/minimatch": { "version": "3.1.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", @@ -5555,6 +5843,13 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/node-readable-to-web-readable-stream": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/node-readable-to-web-readable-stream/-/node-readable-to-web-readable-stream-0.4.2.tgz", + "integrity": "sha512-/cMZNI34v//jUTrI+UIo4ieHAB5EZRY/+7OmXZgBxaWBMcW2tGdceIw06RFxWxrKZ5Jp3sI2i5TsRo+CBhtVLQ==", + "license": "MIT", + "optional": true + }, "node_modules/node-releases": { "version": "2.0.36", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", @@ -5820,30 +6115,19 @@ "dev": true, "license": "MIT" }, - "node_modules/pdf-lib": { - "version": "1.17.1", - "resolved": "https://registry.npmjs.org/pdf-lib/-/pdf-lib-1.17.1.tgz", - "integrity": "sha512-V/mpyJAoTsN4cnP31vc0wfNA1+p20evqqnap0KLoRUN0Yk/p3wN52DOEsL4oBFcLdb76hlpKPtzJIgo67j/XLw==", - "license": "MIT", - "dependencies": { - "@pdf-lib/standard-fonts": "^1.0.0", - "@pdf-lib/upng": "^1.0.1", - "pako": "^1.0.11", - "tslib": "^1.11.1" + "node_modules/pdfjs-dist": { + "version": "5.5.207", + "resolved": "https://registry.npmjs.org/pdfjs-dist/-/pdfjs-dist-5.5.207.tgz", + "integrity": "sha512-WMqqw06w1vUt9ZfT0gOFhMf3wHsWhaCrxGrckGs5Cci6ybDW87IvPaOd2pnBwT6BJuP/CzXDZxjFgmSULLdsdw==", + "license": "Apache-2.0", + "engines": { + "node": ">=20.19.0 || >=22.13.0 || >=24" + }, + "optionalDependencies": { + "@napi-rs/canvas": "^0.1.95", + "node-readable-to-web-readable-stream": "^0.4.2" } }, - "node_modules/pdf-lib/node_modules/pako": { - "version": "1.0.11", - "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", - "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", - "license": "(MIT AND Zlib)" - }, - "node_modules/pdf-lib/node_modules/tslib": { - "version": "1.14.1", - "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", - "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==", - "license": "0BSD" - }, "node_modules/performance-now": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", @@ -6225,6 +6509,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/sax": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/sax/-/sax-1.5.0.tgz", + "integrity": "sha512-21IYA3Q5cQf089Z6tgaUTr7lDAyzoTPx5HRtbhsME8Udispad8dC/+sziTNugOEx54ilvatQ9YCzl4KQLPcRHA==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=11.0.0" + } + }, "node_modules/scheduler": { "version": "0.27.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.27.0.tgz", @@ -6453,6 +6746,18 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/smol-toml": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/smol-toml/-/smol-toml-1.6.0.tgz", + "integrity": "sha512-4zemZi0HvTnYwLfrpk/CF9LOd9Lt87kAt50GnqhMpyF9U3poDAP2+iukq2bZsO/ufegbYehBkqINbsWxj4l4cw==", + "license": "BSD-3-Clause", + "engines": { + "node": ">= 18" + }, + "funding": { + "url": "https://github.com/sponsors/cyyynthia" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -7218,6 +7523,24 @@ "node": ">=0.10.0" } }, + "node_modules/xml": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/xml/-/xml-1.0.1.tgz", + "integrity": "sha512-huCv9IH9Tcf95zuYCsQraZtWnJvBtLVE0QHMOs8bWyZAFZNDcYjsPq1nEx8jKA9y+Beo9v+7OBPRisQTjinQMw==", + "license": "MIT" + }, + "node_modules/xml-js": { + "version": "1.6.11", + "resolved": "https://registry.npmjs.org/xml-js/-/xml-js-1.6.11.tgz", + "integrity": "sha512-7rVi2KMfwfWFl+GpPg6m80IVMWXLRjO+PxTq7V2CDhoGak0wzYzFgUY2m4XJ47OGdXd8eLE8EmwfAmdjw7lC1g==", + "license": "MIT", + "dependencies": { + "sax": "^1.2.4" + }, + "bin": { + "xml-js": "bin/cli.js" + } + }, "node_modules/xmlbuilder": { "version": "10.1.1", "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz", diff --git a/package.json b/package.json index b5ece41..be7cbb0 100644 --- a/package.json +++ b/package.json @@ -11,6 +11,7 @@ "dependencies": { "@ffmpeg/ffmpeg": "^0.12.15", "@ffmpeg/util": "^0.12.2", + "docx": "^9.6.0", "fast-xml-parser": "^5.4.2", "framer-motion": "^12.35.2", "html2canvas-pro": "^2.0.2", @@ -21,9 +22,10 @@ "marked": "^17.0.4", "next": "16.1.6", "papaparse": "^5.5.3", - "pdf-lib": "^1.17.1", + "pdfjs-dist": "^5.5.207", "react": "19.2.3", - "react-dom": "19.2.3" + "react-dom": "19.2.3", + "smol-toml": "^1.6.0" }, "devDependencies": { "@tailwindcss/postcss": "^4", diff --git a/src/lib/conversionMap.ts b/src/lib/conversionMap.ts index 4c2cf85..81d3836 100644 --- a/src/lib/conversionMap.ts +++ b/src/lib/conversionMap.ts @@ -1,57 +1,59 @@ import { FileCategory } from '@/types'; const IMAGE_CONVERSIONS: Record = { - png: ['jpg', 'webp', 'gif', 'bmp', 'avif'], - jpg: ['png', 'webp', 'gif', 'bmp', 'avif'], - jpeg: ['png', 'webp', 'gif', 'bmp', 'avif'], - webp: ['png', 'jpg', 'gif', 'bmp', 'avif'], - gif: ['png', 'jpg', 'webp', 'bmp'], - bmp: ['png', 'jpg', 'webp', 'gif'], - tiff: ['png', 'jpg', 'webp'], - tif: ['png', 'jpg', 'webp'], - avif: ['png', 'jpg', 'webp'], - svg: ['png', 'jpg', 'webp'], - ico: ['png', 'jpg', 'webp'], + png: ['jpg', 'webp', 'gif', 'bmp', 'avif', 'tiff', 'ico'], + jpg: ['png', 'webp', 'gif', 'bmp', 'avif', 'tiff', 'ico'], + jpeg: ['png', 'webp', 'gif', 'bmp', 'avif', 'tiff', 'ico'], + webp: ['png', 'jpg', 'gif', 'bmp', 'avif', 'tiff', 'ico'], + gif: ['png', 'jpg', 'webp', 'bmp', 'avif', 'tiff'], + bmp: ['png', 'jpg', 'webp', 'gif', 'avif', 'tiff'], + tiff: ['png', 'jpg', 'webp', 'gif', 'bmp', 'avif'], + tif: ['png', 'jpg', 'webp', 'gif', 'bmp', 'avif'], + avif: ['png', 'jpg', 'webp', 'gif', 'bmp', 'tiff'], + svg: ['png', 'jpg', 'webp', 'gif', 'bmp', 'avif', 'tiff'], + ico: ['png', 'jpg', 'webp', 'gif', 'bmp'], }; const DOCUMENT_CONVERSIONS: Record = { - docx: ['html', 'txt', 'pdf'], - md: ['html', 'pdf', 'txt'], - html: ['pdf', 'txt', 'md'], - htm: ['pdf', 'txt', 'md'], - txt: ['pdf', 'html', 'md'], - pdf: ['txt'], + pdf: ['txt', 'html', 'md', 'docx'], + docx: ['pdf', 'html', 'txt', 'md'], + md: ['html', 'pdf', 'txt', 'docx'], + html: ['pdf', 'txt', 'md', 'docx'], + htm: ['pdf', 'txt', 'md', 'docx'], + txt: ['pdf', 'html', 'md', 'docx'], + rtf: ['txt', 'html', 'md', 'pdf', 'docx'], }; const AUDIO_CONVERSIONS: Record = { - mp3: ['wav', 'ogg', 'aac', 'flac', 'm4a'], - wav: ['mp3', 'ogg', 'aac', 'flac', 'm4a'], - flac: ['mp3', 'wav', 'ogg', 'aac', 'm4a'], - ogg: ['mp3', 'wav', 'aac', 'flac', 'm4a'], - aac: ['mp3', 'wav', 'ogg', 'flac', 'm4a'], - m4a: ['mp3', 'wav', 'ogg', 'flac', 'aac'], - wma: ['mp3', 'wav', 'ogg', 'flac'], - opus: ['mp3', 'wav', 'ogg', 'flac'], + mp3: ['wav', 'ogg', 'aac', 'flac', 'm4a', 'opus'], + wav: ['mp3', 'ogg', 'aac', 'flac', 'm4a', 'opus'], + flac: ['mp3', 'wav', 'ogg', 'aac', 'm4a', 'opus'], + ogg: ['mp3', 'wav', 'aac', 'flac', 'm4a', 'opus'], + aac: ['mp3', 'wav', 'ogg', 'flac', 'm4a', 'opus'], + m4a: ['mp3', 'wav', 'ogg', 'flac', 'aac', 'opus'], + wma: ['mp3', 'wav', 'ogg', 'flac', 'aac', 'm4a'], + opus: ['mp3', 'wav', 'ogg', 'flac', 'aac', 'm4a'], }; const VIDEO_CONVERSIONS: Record = { - mp4: ['webm', 'avi', 'mov', 'gif', 'mp3'], - webm: ['mp4', 'avi', 'mov', 'gif', 'mp3'], - avi: ['mp4', 'webm', 'mov', 'gif', 'mp3'], - mov: ['mp4', 'webm', 'avi', 'gif', 'mp3'], - mkv: ['mp4', 'webm', 'avi', 'gif', 'mp3'], - flv: ['mp4', 'webm', 'avi', 'mp3'], - wmv: ['mp4', 'webm', 'avi', 'mp3'], - m4v: ['mp4', 'webm', 'avi', 'mp3'], + mp4: ['webm', 'avi', 'mov', 'mkv', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], + webm: ['mp4', 'avi', 'mov', 'mkv', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], + avi: ['mp4', 'webm', 'mov', 'mkv', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], + mov: ['mp4', 'webm', 'avi', 'mkv', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], + mkv: ['mp4', 'webm', 'avi', 'mov', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], + flv: ['mp4', 'webm', 'avi', 'mov', 'mkv', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], + wmv: ['mp4', 'webm', 'avi', 'mov', 'mkv', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], + m4v: ['mp4', 'webm', 'avi', 'mov', 'mkv', 'gif', 'mp3', 'wav', 'ogg', 'aac', 'flac'], }; const DATA_CONVERSIONS: Record = { - csv: ['json', 'xml', 'yaml', 'tsv'], - json: ['csv', 'xml', 'yaml'], - xml: ['json', 'csv', 'yaml'], - yaml: ['json', 'csv', 'xml'], - yml: ['json', 'csv', 'xml'], - tsv: ['csv', 'json', 'xml', 'yaml'], + csv: ['json', 'xml', 'yaml', 'tsv', 'toml'], + json: ['csv', 'xml', 'yaml', 'tsv', 'toml'], + xml: ['json', 'csv', 'yaml', 'tsv', 'toml'], + yaml: ['json', 'csv', 'xml', 'tsv', 'toml'], + yml: ['json', 'csv', 'xml', 'tsv', 'toml'], + tsv: ['csv', 'json', 'xml', 'yaml', 'toml'], + toml: ['json', 'csv', 'xml', 'yaml', 'tsv'], }; const ALL_CONVERSIONS: Record> = { @@ -75,14 +77,15 @@ export function getDefaultTarget(category: FileCategory, extension: string): str // Images → WebP (modern, smaller) png: 'webp', jpg: 'webp', jpeg: 'webp', gif: 'webp', bmp: 'png', tiff: 'png', tif: 'png', avif: 'png', svg: 'png', ico: 'png', - // Documents → PDF - docx: 'pdf', md: 'html', html: 'pdf', txt: 'pdf', pdf: 'txt', + // Documents → PDF (except PDF → DOCX) + docx: 'pdf', md: 'html', html: 'pdf', htm: 'pdf', txt: 'pdf', + pdf: 'docx', rtf: 'docx', // Audio → MP3 wav: 'mp3', flac: 'mp3', ogg: 'mp3', aac: 'mp3', m4a: 'mp3', wma: 'mp3', opus: 'mp3', mp3: 'wav', // Video → MP4 avi: 'mp4', mov: 'mp4', mkv: 'mp4', flv: 'mp4', wmv: 'mp4', m4v: 'mp4', mp4: 'webm', webm: 'mp4', // Data → JSON - csv: 'json', xml: 'json', yaml: 'json', yml: 'json', tsv: 'csv', json: 'csv', + csv: 'json', xml: 'json', yaml: 'json', yml: 'json', tsv: 'csv', json: 'csv', toml: 'json', }; return defaults[extension] || formats[0]; diff --git a/src/lib/converters/dataConverter.ts b/src/lib/converters/dataConverter.ts index 1b197a8..7d6aa55 100644 --- a/src/lib/converters/dataConverter.ts +++ b/src/lib/converters/dataConverter.ts @@ -52,6 +52,17 @@ function yamlToJson(text: string): unknown { return yaml.load(text); } +async function tomlToJson(text: string): Promise { + const TOML = await import('smol-toml'); + return TOML.parse(text); +} + +async function jsonToToml(data: unknown): Promise { + const TOML = await import('smol-toml'); + const obj = typeof data === 'string' ? JSON.parse(data) : data; + return TOML.stringify(obj as Record); +} + async function toIntermediate(file: File, ext: string): Promise { const text = await readFileAsText(file); @@ -67,12 +78,14 @@ async function toIntermediate(file: File, ext: string): Promise { case 'yaml': case 'yml': return yamlToJson(text); + case 'toml': + return tomlToJson(text); default: throw new Error(`Unsupported source format: ${ext}`); } } -function fromIntermediate(data: unknown, targetFormat: string): string { +async function fromIntermediate(data: unknown, targetFormat: string): Promise { switch (targetFormat) { case 'json': return JSON.stringify(data, null, 2); @@ -85,6 +98,8 @@ function fromIntermediate(data: unknown, targetFormat: string): string { case 'yaml': case 'yml': return jsonToYaml(data); + case 'toml': + return jsonToToml(data); default: throw new Error(`Unsupported target format: ${targetFormat}`); } @@ -101,7 +116,7 @@ export async function convertData( const intermediate = await toIntermediate(file, ext); onProgress?.(60); - const output = fromIntermediate(intermediate, targetFormat); + const output = await fromIntermediate(intermediate, targetFormat); onProgress?.(90); const blob = new Blob([output], { type: getMimeType(targetFormat) }); diff --git a/src/lib/converters/documentConverter.ts b/src/lib/converters/documentConverter.ts index e0f43bb..afe4267 100644 --- a/src/lib/converters/documentConverter.ts +++ b/src/lib/converters/documentConverter.ts @@ -26,12 +26,6 @@ async function readFileAsArrayBuffer(file: File): Promise { /* ============================================ Styled HTML document wrapper - - This is used for ALL HTML output and as the - intermediate step for PDF rendering. Embeds - full CSS so the document looks correct both - as a standalone .html file and when rendered - to PDF via jsPDF.html(). ============================================ */ function wrapInStyledHtml(bodyHtml: string, title: string): string { @@ -42,9 +36,7 @@ function wrapInStyledHtml(bodyHtml: string, title: string): string { ${escapeHtml(title)} @@ -198,6 +98,313 @@ function escapeHtml(text: string): string { .replace(/"/g, '"'); } +/* ============================================ + PDF text extraction via pdfjs-dist + ============================================ */ + +async function pdfToText(file: File): Promise { + const pdfjsLib = await import('pdfjs-dist'); + + // Use the bundled worker + pdfjsLib.GlobalWorkerOptions.workerSrc = `https://unpkg.com/pdfjs-dist@${pdfjsLib.version}/build/pdf.worker.min.mjs`; + + const arrayBuffer = await readFileAsArrayBuffer(file); + const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise; + + const textParts: string[] = []; + + for (let i = 1; i <= pdf.numPages; i++) { + const page = await pdf.getPage(i); + const content = await page.getTextContent(); + const pageText = content.items + .filter((item) => 'str' in item) + .map((item) => (item as { str: string }).str) + .join(' '); + if (pageText.trim()) { + textParts.push(pageText); + } + } + + if (textParts.length === 0) { + return `[This PDF contains no extractable text — it may be image-based/scanned.]`; + } + + return textParts.join('\n\n'); +} + +/* ============================================ + PDF → HTML + Extracts text per page, wraps in styled HTML + ============================================ */ + +async function pdfToHtml(file: File): Promise { + const text = await pdfToText(file); + const paragraphs = text.split(/\n\n+/).filter(Boolean); + const bodyHtml = paragraphs.map((p) => `

${escapeHtml(p)}

`).join('\n'); + return wrapInStyledHtml(bodyHtml, file.name.replace(/\.pdf$/i, '')); +} + +/* ============================================ + PDF → Markdown + ============================================ */ + +async function pdfToMarkdown(file: File): Promise { + const text = await pdfToText(file); + // Attempt to detect headings (ALL CAPS lines, short lines) + const lines = text.split('\n'); + const mdLines: string[] = []; + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) { + mdLines.push(''); + continue; + } + // Heuristic: short all-caps lines are likely headings + if (trimmed.length < 80 && trimmed === trimmed.toUpperCase() && /[A-Z]/.test(trimmed)) { + mdLines.push(`## ${trimmed}`); + } else { + mdLines.push(trimmed); + } + } + + return mdLines.join('\n'); +} + +/* ============================================ + PDF → DOCX + Extracts text, builds DOCX using docx package + ============================================ */ + +async function pdfToDocx(file: File): Promise { + const text = await pdfToText(file); + return textToDocx(text); +} + +/* ============================================ + Text/HTML/MD → DOCX generation using docx pkg + ============================================ */ + +async function textToDocx(text: string): Promise { + const docx = await import('docx'); + const paragraphs = text.split(/\n\n+/).filter(Boolean); + + const children = paragraphs.map( + (p) => + new docx.Paragraph({ + children: [new docx.TextRun({ text: p, size: 24 })], + spacing: { after: 200 }, + }) + ); + + const doc = new docx.Document({ + sections: [{ children }], + }); + + return await docx.Packer.toBlob(doc); +} + +async function htmlToDocx(html: string): Promise { + // Convert HTML to plain text, then build DOCX + const plainText = htmlToText(html); + return textToDocx(plainText); +} + +async function markdownToDocx(mdText: string): Promise { + const docx = await import('docx'); + const lines = mdText.split('\n'); + const children: InstanceType[] = []; + + let i = 0; + while (i < lines.length) { + const line = lines[i]; + + // Headings + const h1Match = line.match(/^#\s+(.+)/); + const h2Match = line.match(/^##\s+(.+)/); + const h3Match = line.match(/^###\s+(.+)/); + const h4Match = line.match(/^####\s+(.+)/); + + if (h1Match) { + children.push( + new docx.Paragraph({ + children: [new docx.TextRun({ text: h1Match[1], bold: true, size: 48 })], + heading: docx.HeadingLevel.HEADING_1, + spacing: { after: 200 }, + }) + ); + } else if (h2Match) { + children.push( + new docx.Paragraph({ + children: [new docx.TextRun({ text: h2Match[1], bold: true, size: 36 })], + heading: docx.HeadingLevel.HEADING_2, + spacing: { after: 160 }, + }) + ); + } else if (h3Match) { + children.push( + new docx.Paragraph({ + children: [new docx.TextRun({ text: h3Match[1], bold: true, size: 28 })], + heading: docx.HeadingLevel.HEADING_3, + spacing: { after: 120 }, + }) + ); + } else if (h4Match) { + children.push( + new docx.Paragraph({ + children: [new docx.TextRun({ text: h4Match[1], bold: true, size: 24 })], + heading: docx.HeadingLevel.HEADING_4, + spacing: { after: 100 }, + }) + ); + } + // Unordered list + else if (line.match(/^[-*+]\s+/)) { + children.push( + new docx.Paragraph({ + children: parseInlineMarkdown(docx, line.replace(/^[-*+]\s+/, '')), + bullet: { level: 0 }, + }) + ); + } + // Ordered list + else if (line.match(/^\d+\.\s+/)) { + children.push( + new docx.Paragraph({ + children: parseInlineMarkdown(docx, line.replace(/^\d+\.\s+/, '')), + numbering: { reference: 'default-numbering', level: 0 }, + }) + ); + } + // Blockquote + else if (line.startsWith('>')) { + children.push( + new docx.Paragraph({ + children: [ + new docx.TextRun({ + text: line.replace(/^>\s*/, ''), + italics: true, + color: '555555', + size: 24, + }), + ], + indent: { left: 720 }, + border: { + left: { style: docx.BorderStyle.SINGLE, size: 6, color: '0066cc', space: 10 }, + }, + spacing: { after: 120 }, + }) + ); + } + // Horizontal rule + else if (line.match(/^(-{3,}|\*{3,}|_{3,})$/)) { + children.push( + new docx.Paragraph({ + children: [], + border: { + bottom: { style: docx.BorderStyle.SINGLE, size: 1, color: 'CCCCCC', space: 10 }, + }, + spacing: { before: 200, after: 200 }, + }) + ); + } + // Code block + else if (line.startsWith('```')) { + i++; + const codeLines: string[] = []; + while (i < lines.length && !lines[i].startsWith('```')) { + codeLines.push(lines[i]); + i++; + } + children.push( + new docx.Paragraph({ + children: [ + new docx.TextRun({ + text: codeLines.join('\n'), + font: 'Courier New', + size: 20, + }), + ], + shading: { type: docx.ShadingType.SOLID, color: 'F6F8FA' }, + spacing: { before: 120, after: 120 }, + }) + ); + } + // Empty line + else if (line.trim() === '') { + children.push(new docx.Paragraph({ children: [], spacing: { after: 120 } })); + } + // Regular paragraph + else { + children.push( + new docx.Paragraph({ + children: parseInlineMarkdown(docx, line), + spacing: { after: 160 }, + }) + ); + } + + i++; + } + + const doc = new docx.Document({ + numbering: { + config: [ + { + reference: 'default-numbering', + levels: [ + { + level: 0, + format: docx.LevelFormat.DECIMAL, + text: '%1.', + alignment: docx.AlignmentType.START, + }, + ], + }, + ], + }, + sections: [{ children }], + }); + + return await docx.Packer.toBlob(doc); +} + +/* eslint-disable @typescript-eslint/no-explicit-any */ +function parseInlineMarkdown(docx: any, text: string): any[] { + const runs: any[] = []; + // Regex to detect **bold**, *italic*, `code`, ~~strikethrough~~ + const regex = /(\*\*(.+?)\*\*|\*(.+?)\*|`(.+?)`|~~(.+?)~~|([^*`~]+))/g; + let match; + + while ((match = regex.exec(text)) !== null) { + if (match[2]) { + // Bold + runs.push(new docx.TextRun({ text: match[2], bold: true, size: 24 })); + } else if (match[3]) { + // Italic + runs.push(new docx.TextRun({ text: match[3], italics: true, size: 24 })); + } else if (match[4]) { + // Code + runs.push( + new docx.TextRun({ text: match[4], font: 'Courier New', size: 22, color: 'C7254E' }) + ); + } else if (match[5]) { + // Strikethrough + runs.push(new docx.TextRun({ text: match[5], strike: true, size: 24 })); + } else if (match[6]) { + // Plain text + runs.push(new docx.TextRun({ text: match[6], size: 24 })); + } + } + + if (runs.length === 0) { + runs.push(new docx.TextRun({ text, size: 24 })); + } + + return runs; +} +/* eslint-enable @typescript-eslint/no-explicit-any */ + /* ============================================ Source → HTML conversions ============================================ */ @@ -205,9 +412,7 @@ function escapeHtml(text: string): string { async function docxToHtml(file: File): Promise { const mammoth = await import('mammoth'); const arrayBuffer = await readFileAsArrayBuffer(file); - const result = await mammoth.convertToHtml({ - arrayBuffer, - }); + const result = await mammoth.convertToHtml({ arrayBuffer }); return result.value; } @@ -218,6 +423,11 @@ async function docxToText(file: File): Promise { return result.value; } +async function docxToMarkdown(file: File): Promise { + const bodyHtml = await docxToHtml(file); + return htmlToMarkdown(bodyHtml); +} + async function markdownToHtml(text: string): Promise { const { marked } = await import('marked'); return await marked(text); @@ -230,7 +440,6 @@ function htmlToText(html: string): string { } function htmlToMarkdown(html: string): string { - // Parse properly using DOMParser for reliable conversion const parser = new DOMParser(); const doc = parser.parseFromString(html, 'text/html'); @@ -238,7 +447,6 @@ function htmlToMarkdown(html: string): string { if (node.nodeType === Node.TEXT_NODE) { return node.textContent || ''; } - if (node.nodeType !== Node.ELEMENT_NODE) return ''; const el = node as Element; @@ -293,32 +501,20 @@ function htmlToMarkdown(html: string): string { case 'table': { const rows = Array.from(el.querySelectorAll('tr')); if (rows.length === 0) return children; - const tableData: string[][] = rows.map(row => Array.from(row.querySelectorAll('th, td')).map(cell => walk(cell).trim()) ); - if (tableData.length === 0) return ''; - const colCount = Math.max(...tableData.map(r => r.length)); const colWidths = Array.from({ length: colCount }, (_, i) => Math.max(3, ...tableData.map(r => (r[i] || '').length)) ); - const formatRow = (row: string[]) => '| ' + colWidths.map((w, i) => (row[i] || '').padEnd(w)).join(' | ') + ' |'; - const separator = '| ' + colWidths.map(w => '-'.repeat(w)).join(' | ') + ' |'; - const lines = [formatRow(tableData[0]), separator, ...tableData.slice(1).map(formatRow)]; return lines.join('\n') + '\n\n'; } - case 'div': - case 'section': - case 'article': - case 'main': - case 'span': - return children; default: return children; } @@ -327,35 +523,62 @@ function htmlToMarkdown(html: string): string { return walk(doc.body).replace(/\n{3,}/g, '\n\n').trim(); } +/* ============================================ + RTF → text (basic extraction) + ============================================ */ + +function rtfToText(rtf: string): string { + // Strip RTF control words and groups, extract plain text + let text = rtf; + // Remove header up to first \pard + const pardIndex = text.indexOf('\\pard'); + if (pardIndex > 0) { + // Keep content from first \pard onwards but strip the \pard itself + text = text.substring(pardIndex); + } + // Handle common RTF escapes + text = text.replace(/\\par\b/g, '\n'); + text = text.replace(/\\tab\b/g, '\t'); + text = text.replace(/\\line\b/g, '\n'); + text = text.replace(/\\\n/g, '\n'); + text = text.replace(/\\pard[^\\]*/g, ''); + // Remove {\*\...} groups (destinations we don't care about) + text = text.replace(/\{\\\*\\[^}]*\}/g, ''); + // Remove remaining RTF commands (\word or \wordN) + text = text.replace(/\\[a-z]+\d*\s?/gi, ''); + // Remove braces + text = text.replace(/[{}]/g, ''); + // Handle unicode escapes \\uN + text = text.replace(/\\u(\d+)\??/g, (_, code) => String.fromCharCode(parseInt(code))); + // Handle hex escapes \\'XX + text = text.replace(/\\'([0-9a-fA-F]{2})/g, (_, hex) => + String.fromCharCode(parseInt(hex, 16)) + ); + // Clean up + text = text.replace(/\r\n/g, '\n'); + text = text.replace(/\n{3,}/g, '\n\n'); + return text.trim(); +} + /* ============================================ HTML → PDF via jsPDF.html() - - Renders a styled HTML document into a real - PDF by injecting it into a hidden DOM container - and using jsPDF's html() method (backed by - html2canvas) to capture the visual rendering. ============================================ */ async function renderHtmlToPdf(htmlContent: string): Promise { const { jsPDF } = await import('jspdf'); - // html2canvas-pro is imported for its side-effect: - // jsPDF.html() looks for it on the window/global scope const html2canvas = (await import('html2canvas-pro')).default; - // Create a hidden container for rendering const container = document.createElement('div'); container.style.position = 'fixed'; container.style.left = '-10000px'; container.style.top = '0'; - container.style.width = '794px'; // A4 width in px at 96dpi + container.style.width = '794px'; container.style.background = '#ffffff'; container.style.zIndex = '-9999'; - // Parse the HTML and inject just the body + styles const parser = new DOMParser(); const parsed = parser.parseFromString(htmlContent, 'text/html'); - // Apply styles inline const styleEl = parsed.querySelector('style'); const bodyContent = parsed.body.innerHTML; @@ -375,19 +598,15 @@ async function renderHtmlToPdf(htmlContent: string): Promise { container.appendChild(content); document.body.appendChild(container); - - // Wait for fonts/images to load await new Promise((resolve) => setTimeout(resolve, 100)); try { - // A4 dimensions in mm: 210 x 297 const pdfWidth = 210; const pdfHeight = 297; - const margin = 15; // mm + const margin = 15; - // Capture the rendered content as a canvas const canvas = await html2canvas(content, { - scale: 2, // Higher resolution + scale: 2, useCORS: true, allowTaint: true, backgroundColor: '#ffffff', @@ -395,7 +614,6 @@ async function renderHtmlToPdf(htmlContent: string): Promise { windowWidth: 794, }); - // Calculate how the content maps to PDF pages const imgWidth = pdfWidth - margin * 2; const imgHeight = (canvas.height * imgWidth) / canvas.width; @@ -403,57 +621,27 @@ async function renderHtmlToPdf(htmlContent: string): Promise { const pageContentHeight = pdfHeight - margin * 2; if (imgHeight <= pageContentHeight) { - // Single page — fits entirely - doc.addImage( - canvas.toDataURL('image/jpeg', 0.95), - 'JPEG', - margin, - margin, - imgWidth, - imgHeight - ); + doc.addImage(canvas.toDataURL('image/jpeg', 0.95), 'JPEG', margin, margin, imgWidth, imgHeight); } else { - // Multi-page — slice the canvas into page-sized chunks const totalPages = Math.ceil(imgHeight / pageContentHeight); - for (let page = 0; page < totalPages; page++) { if (page > 0) doc.addPage(); - - // Calculate the portion of the source canvas for this page const sourceY = (page * pageContentHeight * canvas.width) / imgWidth; const sourceHeight = Math.min( (pageContentHeight * canvas.width) / imgWidth, canvas.height - sourceY ); - - // Create a canvas slice for this page const pageCanvas = document.createElement('canvas'); pageCanvas.width = canvas.width; pageCanvas.height = sourceHeight; - const ctx = pageCanvas.getContext('2d'); if (ctx) { ctx.fillStyle = '#ffffff'; ctx.fillRect(0, 0, pageCanvas.width, pageCanvas.height); - ctx.drawImage( - canvas, - 0, sourceY, - canvas.width, sourceHeight, - 0, 0, - canvas.width, sourceHeight - ); + ctx.drawImage(canvas, 0, sourceY, canvas.width, sourceHeight, 0, 0, canvas.width, sourceHeight); } - const sliceHeight = (sourceHeight * imgWidth) / canvas.width; - - doc.addImage( - pageCanvas.toDataURL('image/jpeg', 0.95), - 'JPEG', - margin, - margin, - imgWidth, - sliceHeight - ); + doc.addImage(pageCanvas.toDataURL('image/jpeg', 0.95), 'JPEG', margin, margin, imgWidth, sliceHeight); } } @@ -464,9 +652,7 @@ async function renderHtmlToPdf(htmlContent: string): Promise { } /* ============================================ - Plain text → PDF (for .txt files) - Still uses jsPDF.text() since plain text - has no formatting to preserve. + Plain text → PDF ============================================ */ async function plainTextToPdf(text: string): Promise { @@ -493,37 +679,11 @@ async function plainTextToPdf(text: string): Promise { } /* ============================================ - PDF → Text extraction - ============================================ */ - -async function pdfToText(file: File): Promise { - const { PDFDocument } = await import('pdf-lib'); - const arrayBuffer = await readFileAsArrayBuffer(file); - const pdfDoc = await PDFDocument.load(arrayBuffer); - const pages = pdfDoc.getPages(); - - let text = `PDF Document: ${file.name}\n`; - text += `Pages: ${pages.length}\n\n`; - - const form = pdfDoc.getForm(); - try { - const fields = form.getFields(); - if (fields.length > 0) { - text += `Form Fields:\n`; - fields.forEach((field) => { - text += `- ${field.getName()}\n`; - }); - } - } catch { - // No form fields - } - - text += `\nNote: Full text extraction from PDF requires OCR. This extracts metadata and structure.\n`; - return text; -} - -/* ============================================ - Main export + Main export — full conversion matrix + + Source formats: pdf, docx, md, html, htm, txt, rtf + Each can convert to: pdf, docx, html, md, txt + (minus converting to its own format) ============================================ */ export async function convertDocument( @@ -538,7 +698,29 @@ export async function convertDocument( onProgress?.(30); + // Strategy: convert source → intermediate (text or HTML), then intermediate → target switch (sourceExt) { + /* ---- PDF source ---- */ + case 'pdf': { + if (targetFormat === 'txt') { + const text = await pdfToText(file); + resultBlob = new Blob([text], { type: 'text/plain' }); + } else if (targetFormat === 'html') { + const html = await pdfToHtml(file); + resultBlob = new Blob([html], { type: 'text/html' }); + } else if (targetFormat === 'md') { + const md = await pdfToMarkdown(file); + resultBlob = new Blob([md], { type: 'text/markdown' }); + } else if (targetFormat === 'docx') { + onProgress?.(50); + resultBlob = await pdfToDocx(file); + } else { + throw new Error(`Unsupported: pdf → ${targetFormat}`); + } + break; + } + + /* ---- DOCX source ---- */ case 'docx': { if (targetFormat === 'html') { const bodyHtml = await docxToHtml(file); @@ -547,6 +729,9 @@ export async function convertDocument( } else if (targetFormat === 'txt') { const text = await docxToText(file); resultBlob = new Blob([text], { type: 'text/plain' }); + } else if (targetFormat === 'md') { + const md = await docxToMarkdown(file); + resultBlob = new Blob([md], { type: 'text/markdown' }); } else if (targetFormat === 'pdf') { onProgress?.(40); const bodyHtml = await docxToHtml(file); @@ -554,11 +739,12 @@ export async function convertDocument( onProgress?.(60); resultBlob = await renderHtmlToPdf(styledHtml); } else { - throw new Error(`Unsupported: docx to ${targetFormat}`); + throw new Error(`Unsupported: docx → ${targetFormat}`); } break; } + /* ---- Markdown source ---- */ case 'md': { const mdText = await readFileAsText(file); if (targetFormat === 'html') { @@ -572,23 +758,24 @@ export async function convertDocument( onProgress?.(60); resultBlob = await renderHtmlToPdf(styledHtml); } else if (targetFormat === 'txt') { - // Strip markdown syntax for plain text const bodyHtml = await markdownToHtml(mdText); const text = htmlToText(bodyHtml); resultBlob = new Blob([text], { type: 'text/plain' }); + } else if (targetFormat === 'docx') { + onProgress?.(50); + resultBlob = await markdownToDocx(mdText); } else { - throw new Error(`Unsupported: md to ${targetFormat}`); + throw new Error(`Unsupported: md → ${targetFormat}`); } break; } + /* ---- HTML source ---- */ case 'html': case 'htm': { const rawHtml = await readFileAsText(file); if (targetFormat === 'pdf') { onProgress?.(40); - // If the HTML already has a