From 5b1182a818c2db1c6b36e5d2dc284093890a94cc Mon Sep 17 00:00:00 2001 From: noah Date: Mon, 9 Mar 2026 20:44:38 +0100 Subject: [PATCH] fix: landing page Unicode rendering, orbit overlap, and add PDF OCR fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix Unicode escapes (\u26A1, \u2192, \u{1F6E1}) rendered as literal text in JSX by wrapping them in JSX expressions - Increase orbit radii (520/320px -> 720/480px) so items don't overlap hero text - Add orbit animation keyframes (orbit-slow, orbit-med) to globals.css - Add Tesseract.js OCR fallback for scanned/image-based PDFs that have no extractable text layer — renders pages to canvas then runs browser-based OCR - Thread onProgress through all PDF conversion functions for OCR progress --- package-lock.json | 112 ++++++- package.json | 1 + src/app/globals.css | 27 ++ src/app/page.tsx | 387 ++++++++++++++++++++---- src/lib/converters/documentConverter.ts | 80 ++++- 5 files changed, 531 insertions(+), 76 deletions(-) diff --git a/package-lock.json b/package-lock.json index e725b2c..33609ae 100644 --- a/package-lock.json +++ b/package-lock.json @@ -29,6 +29,7 @@ "react": "19.2.3", "react-dom": "19.2.3", "smol-toml": "^1.6.0", + "tesseract.js": "^7.0.0", "woff2-encoder": "^2.0.0", "xlsx": "^0.18.5" }, @@ -2862,6 +2863,12 @@ "integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==", "license": "MIT" }, + "node_modules/bmp-js": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/bmp-js/-/bmp-js-0.1.0.tgz", + "integrity": "sha512-vHdS19CnY3hwiNdkaqk93DvjVLfbEcI8mys4UjuWrlX1haDmroo8o4xCzh4wD6DGV6HxRCyauwhHRqMTfERtjw==", + "license": "MIT" + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -4635,6 +4642,12 @@ "integrity": "sha512-4EC57ddXrkaF0x83Oj8sM6SLQHAWXw90Skqu2M4AEWENZ3F02dFJE/GARA8igO79tcgYqGrD7ae4f5L3um2lgg==", "license": "ISC" }, + "node_modules/idb-keyval": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/idb-keyval/-/idb-keyval-6.2.2.tgz", + "integrity": "sha512-yjD9nARJ/jb1g+CvD0tlhUHOrJ9Sy0P8T9MF3YaLlHnSRpwPfpTX0XIvpmw3gAJUmEu3FiICLBDPXVwyEvrleg==", + "license": "Apache-2.0" + }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -5089,6 +5102,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/is-url": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/is-url/-/is-url-1.2.4.tgz", + "integrity": "sha512-ITvGim8FhRiYe4IQ5uHSkj7pVaPDrCTkNd3yq3cV7iZAcJdHTUMPMEHcqSOy9xZ9qFenQCvi+2wjH9a1nXqHww==", + "license": "MIT" + }, "node_modules/is-weakmap": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/is-weakmap/-/is-weakmap-2.0.2.tgz", @@ -5946,6 +5965,26 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", + "license": "MIT", + "dependencies": { + "whatwg-url": "^5.0.0" + }, + "engines": { + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } + } + }, "node_modules/node-readable-to-web-readable-stream": { "version": "0.4.2", "resolved": "https://registry.npmjs.org/node-readable-to-web-readable-stream/-/node-readable-to-web-readable-stream-0.4.2.tgz", @@ -6083,6 +6122,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/opencollective-postinstall": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/opencollective-postinstall/-/opencollective-postinstall-2.0.3.tgz", + "integrity": "sha512-8AV/sCtuzUeTo8gQK5qDZzARrulB3egtLzFgteqB2tcT4Mw7B8Kt7JcDHmltjz6FOAHsvTevk70gZEbhM4ZS9Q==", + "license": "MIT", + "bin": { + "opencollective-postinstall": "index.js" + } + }, "node_modules/opentype.js": { "version": "1.3.4", "resolved": "https://registry.npmjs.org/opentype.js/-/opentype.js-1.3.4.tgz", @@ -6487,8 +6535,7 @@ "version": "0.13.11", "resolved": "https://registry.npmjs.org/regenerator-runtime/-/regenerator-runtime-0.13.11.tgz", "integrity": "sha512-kY1AZVr2Ra+t+piVaJ4gxaFaReZVH40AKNo7UCX6W+dEwBo/2oZJzqfuN1qLq1oL45o56cPaTXELwrTh8Fpggg==", - "license": "MIT", - "optional": true + "license": "MIT" }, "node_modules/regexp.prototype.flags": { "version": "1.5.4", @@ -7208,6 +7255,30 @@ "url": "https://opencollective.com/webpack" } }, + "node_modules/tesseract.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js/-/tesseract.js-7.0.0.tgz", + "integrity": "sha512-exPBkd+z+wM1BuMkx/Bjv43OeLBxhL5kKWsz/9JY+DXcXdiBjiAch0V49QR3oAJqCaL5qURE0vx9Eo+G5YE7mA==", + "hasInstallScript": true, + "license": "Apache-2.0", + "dependencies": { + "bmp-js": "^0.1.0", + "idb-keyval": "^6.2.0", + "is-url": "^1.2.4", + "node-fetch": "^2.6.9", + "opencollective-postinstall": "^2.0.3", + "regenerator-runtime": "^0.13.3", + "tesseract.js-core": "^7.0.0", + "wasm-feature-detect": "^1.8.0", + "zlibjs": "^0.3.1" + } + }, + "node_modules/tesseract.js-core": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/tesseract.js-core/-/tesseract.js-core-7.0.0.tgz", + "integrity": "sha512-WnNH518NzmbSq9zgTPeoF8c+xmilS8rFIl1YKbk/ptuuc7p6cLNELNuPAzcmsYw450ca6bLa8j3t0VAtq435Vw==", + "license": "Apache-2.0" + }, "node_modules/text-segmentation": { "version": "1.0.3", "resolved": "https://registry.npmjs.org/text-segmentation/-/text-segmentation-1.0.3.tgz", @@ -7284,6 +7355,12 @@ "node": ">=8.0" } }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", + "license": "MIT" + }, "node_modules/ts-api-utils": { "version": "2.4.0", "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-2.4.0.tgz", @@ -7580,6 +7657,28 @@ "base64-arraybuffer": "^1.0.2" } }, + "node_modules/wasm-feature-detect": { + "version": "1.8.0", + "resolved": "https://registry.npmjs.org/wasm-feature-detect/-/wasm-feature-detect-1.8.0.tgz", + "integrity": "sha512-zksaLKM2fVlnB5jQQDqKXXwYHLQUVH9es+5TOOHwGOVJOCeRBCiPjwSg+3tN2AdTCzjgli4jijCH290kXb/zWQ==", + "license": "Apache-2.0" + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", + "license": "BSD-2-Clause" + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "license": "MIT", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -7790,6 +7889,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/zlibjs": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/zlibjs/-/zlibjs-0.3.1.tgz", + "integrity": "sha512-+J9RrgTKOmlxFSDHo0pI1xM6BLVUv+o0ZT9ANtCxGkjIVCCUdx9alUF8Gm+dGLKbkkkidWIHFDZHDMpfITt4+w==", + "license": "MIT", + "engines": { + "node": "*" + } + }, "node_modules/zod": { "version": "4.3.6", "resolved": "https://registry.npmjs.org/zod/-/zod-4.3.6.tgz", diff --git a/package.json b/package.json index b084899..2476737 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "react": "19.2.3", "react-dom": "19.2.3", "smol-toml": "^1.6.0", + "tesseract.js": "^7.0.0", "woff2-encoder": "^2.0.0", "xlsx": "^0.18.5" }, diff --git a/src/app/globals.css b/src/app/globals.css index 3bfab9e..a3d030d 100644 --- a/src/app/globals.css +++ b/src/app/globals.css @@ -71,6 +71,33 @@ body { animation: pulse-soft 2s ease-in-out infinite; } +/* ---- Orbit animations for landing page constellation ---- */ +@keyframes orbit-slow { + from { transform: rotate(0deg); } + to { transform: rotate(360deg); } +} + +@keyframes orbit-med { + from { transform: rotate(0deg); } + to { transform: rotate(-360deg); } +} + +.animate-orbit-slow { + animation: orbit-slow 60s linear infinite; +} + +.animate-counter-orbit-slow { + animation: orbit-slow 60s linear infinite reverse; +} + +.animate-orbit-med { + animation: orbit-med 40s linear infinite reverse; +} + +.animate-counter-orbit-med { + animation: orbit-med 40s linear infinite; +} + /* ---- Dot pattern background ---- */ .bg-dots { background-image: radial-gradient(circle, rgba(180, 140, 100, 0.12) 1px, transparent 1px); diff --git a/src/app/page.tsx b/src/app/page.tsx index d6ff374..0caca59 100644 --- a/src/app/page.tsx +++ b/src/app/page.tsx @@ -1,65 +1,103 @@ 'use client'; -import { motion } from 'framer-motion'; +import { useState, useEffect } from 'react'; +import { motion, AnimatePresence } from 'framer-motion'; import Link from 'next/link'; -const floatingBadges = [ - { label: 'PNG', color: 'bg-pink-200', dot: 'bg-pink-400', top: '18%', left: '8%', delay: 0 }, - { label: 'MP4', color: 'bg-orange-100', dot: 'bg-orange-400', top: '22%', right: '10%', delay: 0.5 }, - { label: 'CSV', color: 'bg-emerald-100', dot: 'bg-emerald-400', bottom: '32%', left: '6%', delay: 1.0 }, - { label: 'PDF', color: 'bg-blue-100', dot: 'bg-blue-400', bottom: '28%', right: '8%', delay: 0.3 }, - { label: 'WAV', color: 'bg-purple-100', dot: 'bg-purple-400', top: '42%', left: '3%', delay: 0.7 }, - { label: 'WEBP', color: 'bg-pink-100', dot: 'bg-pink-400', top: '35%', right: '4%', delay: 1.2 }, +/* ─── Orbiting Constellation Data ─── */ + +const orbitItems = [ + { icon: '\u{1F5BC}', label: 'PNG', angle: 0 }, + { icon: '\u{1F3B5}', label: 'MP3', angle: 45 }, + { icon: '\u{1F4C4}', label: 'PDF', angle: 90 }, + { icon: '\u{1F3AC}', label: 'MP4', angle: 135 }, + { icon: '\u{1F4CA}', label: 'CSV', angle: 180 }, + { icon: '\u{1F310}', label: 'SVG', angle: 225 }, + { icon: '\u{1F4D6}', label: 'EPUB', angle: 270 }, + { icon: '\u{1F3A8}', label: 'PSD', angle: 315 }, ]; +const innerOrbitItems = [ + { icon: '\u{2728}', label: 'WebP', angle: 30 }, + { icon: '\u{1F4DD}', label: 'DOCX', angle: 120 }, + { icon: '\u{1F4BE}', label: 'JSON', angle: 210 }, + { icon: '\u{1F399}', label: 'WAV', angle: 300 }, +]; + +/* ─── Format Ticker Data ─── */ + +const conversionPairs = [ + { from: 'PNG', to: 'WebP', icon: '\u{1F5BC}', color: '#f472b6' }, + { from: 'DOCX', to: 'PDF', icon: '\u{1F4C4}', color: '#60a5fa' }, + { from: 'MP4', to: 'GIF', icon: '\u{1F3AC}', color: '#fb923c' }, + { from: 'CSV', to: 'JSON', icon: '\u{1F4CA}', color: '#34d399' }, + { from: 'WAV', to: 'MP3', icon: '\u{1F3B5}', color: '#a78bfa' }, + { from: 'HEIC', to: 'JPG', icon: '\u{1F4F7}', color: '#f472b6' }, + { from: 'XLSX', to: 'CSV', icon: '\u{1F4CA}', color: '#34d399' }, + { from: 'TTF', to: 'WOFF2', icon: '\u{1F524}', color: '#2dd4bf' }, + { from: 'EPUB', to: 'PDF', icon: '\u{1F4D6}', color: '#60a5fa' }, + { from: 'YAML', to: 'JSON', icon: '\u{2699}', color: '#34d399' }, + { from: 'PSD', to: 'PNG', icon: '\u{1F3A8}', color: '#f472b6' }, + { from: 'MKV', to: 'MP4', icon: '\u{1F39E}', color: '#fb923c' }, +]; + +/* ─── Conversion Flow Data ─── */ + +const flowSteps = [ + { inputIcon: '\u{1F5BC}', inputLabel: '.PNG', outputIcon: '\u{2728}', outputLabel: '.WebP' }, + { inputIcon: '\u{1F4C4}', inputLabel: '.DOCX', outputIcon: '\u{1F4D1}', outputLabel: '.PDF' }, + { inputIcon: '\u{1F3AC}', inputLabel: '.MKV', outputIcon: '\u{1F4F1}', outputLabel: '.MP4' }, + { inputIcon: '\u{1F4CA}', inputLabel: '.CSV', outputIcon: '\u{1F4CB}', outputLabel: '.JSON' }, + { inputIcon: '\u{1F3B5}', inputLabel: '.FLAC', outputIcon: '\u{1F3A7}', outputLabel: '.MP3' }, +]; + +/* ─── Features ─── */ + const features = [ { icon: '\u{1F5BC}', title: 'Images', - desc: 'PNG, JPG, WebP, GIF, BMP, AVIF, SVG \u2014 convert between any format using Canvas API.', - bg: 'bg-pink-50', + desc: 'PNG, JPG, WebP, GIF, BMP, AVIF, SVG, PSD, HEIC \u2014 convert between any format.', iconBg: 'bg-pink-100', - formats: ['PNG', 'JPG', 'WebP', 'GIF', 'AVIF', 'SVG'], + formats: ['PNG', 'JPG', 'WebP', 'GIF', 'AVIF', 'SVG', 'PSD', 'HEIC'], wide: true, }, { icon: '\u{1F4C4}', title: 'Documents', - desc: 'DOCX, PDF, Markdown, HTML, TXT \u2014 preserves formatting with styled rendering.', - bg: 'bg-blue-50', + desc: 'DOCX, PDF, Markdown, HTML, TXT, PPTX, EPUB \u2014 preserves formatting.', iconBg: 'bg-blue-100', - formats: ['DOCX', 'PDF', 'MD', 'HTML', 'TXT'], + formats: ['DOCX', 'PDF', 'MD', 'HTML', 'TXT', 'PPTX', 'EPUB'], wide: false, }, { icon: '\u{1F3B5}', title: 'Audio', desc: 'MP3, WAV, OGG, AAC, FLAC, M4A \u2014 powered by FFmpeg WebAssembly.', - bg: 'bg-purple-50', iconBg: 'bg-purple-100', - formats: ['MP3', 'WAV', 'OGG', 'FLAC'], + formats: ['MP3', 'WAV', 'OGG', 'FLAC', 'AAC'], wide: false, }, { icon: '\u{1F3AC}', title: 'Video', desc: 'MP4, WebM, AVI, MOV, MKV \u2014 full video transcoding in your browser.', - bg: 'bg-orange-50', iconBg: 'bg-orange-100', - formats: ['MP4', 'WebM', 'AVI', 'MOV'], + formats: ['MP4', 'WebM', 'AVI', 'MOV', 'MKV'], wide: false, }, { icon: '\u{1F4CA}', - title: 'Data', - desc: 'CSV, JSON, XML, YAML, TSV \u2014 smart parsing with structure preservation.', - bg: 'bg-emerald-50', + title: 'Data & Fonts', + desc: 'CSV, JSON, XML, YAML, XLSX, TTF, OTF, WOFF2 \u2014 smart structure preservation.', iconBg: 'bg-emerald-100', - formats: ['CSV', 'JSON', 'XML', 'YAML', 'TSV'], + formats: ['CSV', 'JSON', 'XML', 'YAML', 'XLSX', 'TTF', 'WOFF2'], wide: true, }, ]; +/* ─── Animation Variants ─── */ + const stagger = { hidden: {}, visible: { transition: { staggerChildren: 0.08 } }, @@ -74,6 +112,236 @@ const fadeUp = { }, }; +/* ─── Orbiting Constellation Component ─── */ + +function OrbitingConstellation() { + return ( +
+ {/* Center glow */} +
+
+ + {/* Outer orbit ring (visual) */} +
+ {/* Inner orbit ring */} +
+ + {/* Outer orbit items */} +
+ {orbitItems.map((item) => { + const rad = (item.angle * Math.PI) / 180; + const x = Math.cos(rad) * 360; + const y = Math.sin(rad) * 360; + return ( +
+
+
+ {item.icon} +
+ {item.label} +
+
+ ); + })} +
+ + {/* Inner orbit items */} +
+ {innerOrbitItems.map((item) => { + const rad = (item.angle * Math.PI) / 180; + const x = Math.cos(rad) * 240; + const y = Math.sin(rad) * 240; + return ( +
+
+
+ {item.icon} +
+ {item.label} +
+
+ ); + })} +
+
+ ); +} + +/* ─── Format Ticker Component ─── */ + +function FormatTicker() { + const [index, setIndex] = useState(0); + + useEffect(() => { + const interval = setInterval(() => { + setIndex((prev) => (prev + 1) % conversionPairs.length); + }, 2200); + return () => clearInterval(interval); + }, []); + + const pair = conversionPairs[index]; + + return ( +
+ {pair.icon} +
+ + + .{pair.from} + + + + {'\u2192'} + + + + .{pair.to} + + +
+
+ ); +} + +/* ─── Conversion Flow Animation ─── */ + +function ConversionFlow() { + const [step, setStep] = useState(0); + + useEffect(() => { + const interval = setInterval(() => { + setStep((prev) => (prev + 1) % flowSteps.length); + }, 3000); + return () => clearInterval(interval); + }, []); + + const current = flowSteps[step]; + + return ( + +
+ {/* Input file */} + + +
+ {current.inputIcon} +
+ {current.inputLabel} +
+
+ + {/* Arrow + bolt animation */} +
+ {/* Traveling dots */} +
+ + +
+ + {/* Center bolt */} + + {'\u26A1'} + + + {/* More traveling dots */} +
+ + +
+
+ + {/* Output file */} + + +
+ {current.outputIcon} +
+ {current.outputLabel} +
+
+
+
+ ); +} + +/* ─── Main Page ─── */ + export default function LandingPage() { return (
@@ -84,6 +352,7 @@ export default function LandingPage() { {/* ──── NAV ──── */}
+ {/* eslint-disable-next-line @next/next/no-img-element */} Transmute Transmute @@ -97,27 +366,8 @@ export default function LandingPage() { {/* ──── HERO ──── */}
- {/* Floating format badges */} - {floatingBadges.map((badge) => ( - -
- - .{badge.label} -
-
- ))} + {/* Orbiting constellation behind hero text */} + + {/* Format Ticker */} + + + + {/* CTAs */} - +
+ {/* ──── CONVERSION FLOW ──── */} +
+ + + Live Preview + +

Watch files transform in real time

+
+ +
+ {/* ──── FEATURES ──── */}

- 40+ file formats across 5 categories, all converted instantly with zero quality loss. + 70+ file formats across 5 categories, all converted instantly with zero quality loss.

@@ -257,15 +529,16 @@ export default function LandingPage() { viewport={{ once: true, margin: '-60px' }} > {[ - { num: '1', title: 'Drop your files', desc: 'Drag and drop any file \u2014 or click to browse. We accept everything.' }, - { num: '2', title: 'Pick a format', desc: 'Choose your target format from smart suggestions based on file type.' }, - { num: '3', title: 'Download', desc: 'Hit convert and download instantly. Files never leave your browser.' }, + { num: '1', icon: '\u{1F4E5}', title: 'Drop your files', desc: 'Drag and drop any file \u2014 or click to browse. We accept everything.' }, + { num: '2', icon: '\u{2699}', title: 'Pick a format', desc: 'Choose your target format from smart suggestions based on file type.' }, + { num: '3', icon: '\u{2B07}', title: 'Download', desc: 'Hit convert and download instantly. Files never leave your browser.' }, ].map((step, i) => ( +
{step.icon}
{step.num}
@@ -292,11 +565,7 @@ export default function LandingPage() { viewport={{ once: true, margin: '-80px' }} transition={{ duration: 0.6, ease: [0.16, 1, 0.3, 1] as const }} > -
- - - -
+
{'\u{1F6E1}'}

Your files stay yours

Every conversion happens entirely in your browser using WebAssembly and Canvas APIs. @@ -304,16 +573,14 @@ export default function LandingPage() {

{[ - { label: 'No uploads', color: 'bg-emerald-50', stroke: '#34d399' }, - { label: 'No servers', color: 'bg-blue-50', stroke: '#60a5fa' }, - { label: 'No tracking', color: 'bg-purple-50', stroke: '#a78bfa' }, - { label: 'No limits', color: 'bg-orange-50', stroke: '#fb923c' }, + { icon: '\u{1F6AB}', label: 'No uploads', color: 'bg-emerald-50' }, + { icon: '\u{1F4BB}', label: 'No servers', color: 'bg-blue-50' }, + { icon: '\u{1F440}', label: 'No tracking', color: 'bg-purple-50' }, + { icon: '\u{267E}', label: 'No limits', color: 'bg-orange-50' }, ].map((b) => (
- - - + {b.icon}
{b.label}
diff --git a/src/lib/converters/documentConverter.ts b/src/lib/converters/documentConverter.ts index 43cdf3a..47e9340 100644 --- a/src/lib/converters/documentConverter.ts +++ b/src/lib/converters/documentConverter.ts @@ -100,9 +100,11 @@ function escapeHtml(text: string): string { /* ============================================ PDF text extraction via pdfjs-dist + With OCR fallback via Tesseract.js for + scanned/image-based PDFs ============================================ */ -async function pdfToText(file: File): Promise { +async function pdfToText(file: File, onProgress?: (progress: number) => void): Promise { const pdfjsLib = await import('pdfjs-dist'); // Try loading the worker from CDN; if it fails, run without worker (main thread) @@ -133,11 +135,61 @@ async function pdfToText(file: File): Promise { } } - if (textParts.length === 0) { - return `[This PDF contains no extractable text — it may be image-based/scanned.]`; + // If we got text via normal extraction, return it + if (textParts.length > 0) { + return textParts.join('\n\n'); } - return textParts.join('\n\n'); + // ── OCR fallback for scanned/image-based PDFs ── + // Render each page to canvas, then run Tesseract.js OCR + onProgress?.(35); + + try { + const Tesseract = await import('tesseract.js'); + const ocrTextParts: string[] = []; + + // Create a single worker for all pages + const worker = await Tesseract.createWorker('eng'); + + for (let i = 1; i <= pdf.numPages; i++) { + const page = await pdf.getPage(i); + const viewport = page.getViewport({ scale: 2.0 }); // 2x for better OCR quality + + const canvas = document.createElement('canvas'); + canvas.width = viewport.width; + canvas.height = viewport.height; + const ctx = canvas.getContext('2d'); + + if (!ctx) continue; + + await page.render({ + canvas, + canvasContext: ctx, + viewport, + } as Parameters[0]).promise; + + // Run OCR on the rendered page + const { data } = await worker.recognize(canvas); + if (data.text.trim()) { + ocrTextParts.push(data.text.trim()); + } + + // Update progress (spread OCR progress from 35% to 85%) + const pageProgress = 35 + ((i / pdf.numPages) * 50); + onProgress?.(Math.round(pageProgress)); + } + + await worker.terminate(); + + if (ocrTextParts.length > 0) { + return ocrTextParts.join('\n\n'); + } + } catch (ocrError) { + console.warn('OCR fallback failed:', ocrError); + // Fall through to the error message below + } + + return `[This PDF contains no extractable text and OCR could not recover text. It may contain only vector graphics or be empty.]`; } /* ============================================ @@ -145,8 +197,8 @@ async function pdfToText(file: File): Promise { Extracts text per page, wraps in styled HTML ============================================ */ -async function pdfToHtml(file: File): Promise { - const text = await pdfToText(file); +async function pdfToHtml(file: File, onProgress?: (progress: number) => void): Promise { + const text = await pdfToText(file, onProgress); const paragraphs = text.split(/\n\n+/).filter(Boolean); const bodyHtml = paragraphs.map((p) => `

${escapeHtml(p)}

`).join('\n'); return wrapInStyledHtml(bodyHtml, file.name.replace(/\.pdf$/i, '')); @@ -156,8 +208,8 @@ async function pdfToHtml(file: File): Promise { PDF → Markdown ============================================ */ -async function pdfToMarkdown(file: File): Promise { - const text = await pdfToText(file); +async function pdfToMarkdown(file: File, onProgress?: (progress: number) => void): Promise { + const text = await pdfToText(file, onProgress); // Attempt to detect headings (ALL CAPS lines, short lines) const lines = text.split('\n'); const mdLines: string[] = []; @@ -184,8 +236,8 @@ async function pdfToMarkdown(file: File): Promise { Extracts text, builds DOCX using docx package ============================================ */ -async function pdfToDocx(file: File): Promise { - const text = await pdfToText(file); +async function pdfToDocx(file: File, onProgress?: (progress: number) => void): Promise { + const text = await pdfToText(file, onProgress); return textToDocx(text); } @@ -711,17 +763,17 @@ export async function convertDocument( /* ---- PDF source ---- */ case 'pdf': { if (targetFormat === 'txt') { - const text = await pdfToText(file); + const text = await pdfToText(file, onProgress); resultBlob = new Blob([text], { type: 'text/plain' }); } else if (targetFormat === 'html') { - const html = await pdfToHtml(file); + const html = await pdfToHtml(file, onProgress); resultBlob = new Blob([html], { type: 'text/html' }); } else if (targetFormat === 'md') { - const md = await pdfToMarkdown(file); + const md = await pdfToMarkdown(file, onProgress); resultBlob = new Blob([md], { type: 'text/markdown' }); } else if (targetFormat === 'docx') { onProgress?.(50); - resultBlob = await pdfToDocx(file); + resultBlob = await pdfToDocx(file, onProgress); } else { throw new Error(`Unsupported: pdf → ${targetFormat}`); }