From 221a72b2bffe503347dd78cc6c6a8d3a4f5167cd Mon Sep 17 00:00:00 2001
From: noah <auf230184@spengergasse.at>
Date: Mon, 9 Mar 2026 18:12:42 +0100
Subject: [PATCH] fix: rewrite document converter to preserve formatting in PDF
 output

Replace jsPDF.text() (plain text dump) with jsPDF.html() + html2canvas-pro
which renders actual styled DOM elements into the PDF. DOCX headings, bold,
italic, tables, lists, links now render correctly. All HTML output also gets
a full styled document wrapper with embedded CSS. HTML-to-Markdown converter
rewritten with proper DOM-walking parser instead of fragile regex.
---
 package-lock.json                       |  22 +-
 package.json                            |   1 +
 src/lib/converters/documentConverter.ts | 520 +++++++++++++++++++++---
 3 files changed, 491 insertions(+), 52 deletions(-)

diff --git a/package-lock.json b/package-lock.json
index 59fbac5..8c37d7a 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,17 +1,18 @@
 {
-  "name": "2026-03-03_dbi_swift_macos_liquidglass",
+  "name": "transmute",
   "version": "0.1.0",
   "lockfileVersion": 3,
   "requires": true,
   "packages": {
     "": {
-      "name": "2026-03-03_dbi_swift_macos_liquidglass",
+      "name": "transmute",
       "version": "0.1.0",
       "dependencies": {
         "@ffmpeg/ffmpeg": "^0.12.15",
         "@ffmpeg/util": "^0.12.2",
         "fast-xml-parser": "^5.4.2",
         "framer-motion": "^12.35.2",
+        "html2canvas-pro": "^2.0.2",
         "js-yaml": "^4.1.1",
         "jspdf": "^4.2.0",
         "jszip": "^3.10.1",
@@ -2564,7 +2565,6 @@
       "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz",
       "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==",
       "license": "MIT",
-      "optional": true,
       "engines": {
         "node": ">= 0.6.0"
       }
@@ -2860,7 +2860,6 @@
       "resolved": "https://registry.npmjs.org/css-line-break/-/css-line-break-2.1.0.tgz",
       "integrity": "sha512-FHcKFCZcAha3LwfVBhCQbW2nCNbkZXn7KVUJcsT5/P8YmfsVja0FMPJr0B903j/E69HUphKiV9iQArX8SDYA4w==",
       "license": "MIT",
-      "optional": true,
       "dependencies": {
         "utrie": "^1.0.2"
       }
@@ -4253,6 +4252,19 @@
         "node": ">=8.0.0"
       }
     },
+    "node_modules/html2canvas-pro": {
+      "version": "2.0.2",
+      "resolved": "https://registry.npmjs.org/html2canvas-pro/-/html2canvas-pro-2.0.2.tgz",
+      "integrity": "sha512-9G/t0XgCZWonLwL0JwI7su6NdbOPUY7Ur4Ihpp8+XMaW9ibA2nDXF181Jr6tm94k8lX6sthpaXB3XqEnsMd5Cw==",
+      "license": "MIT",
+      "dependencies": {
+        "css-line-break": "^2.1.0",
+        "text-segmentation": "^1.0.3"
+      },
+      "engines": {
+        "node": ">=16.0.0"
+      }
+    },
     "node_modules/ignore": {
       "version": "5.3.2",
       "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -6729,7 +6741,6 @@
       "resolved": "https://registry.npmjs.org/text-segmentation/-/text-segmentation-1.0.3.tgz",
       "integrity": "sha512-iOiPUo/BGnZ6+54OsWxZidGCsdU8YbE4PSpdPinp7DeMtUJNJBoJ/ouUSTJjHkh1KntHaltHl/gDs2FC4i5+Nw==",
       "license": "MIT",
-      "optional": true,
       "dependencies": {
         "utrie": "^1.0.2"
       }
@@ -7088,7 +7099,6 @@
       "resolved": "https://registry.npmjs.org/utrie/-/utrie-1.0.2.tgz",
       "integrity": "sha512-1MLa5ouZiOmQzUbjbu9VmjLzn1QLXBhwpUa7kdLUQK+KQ5KA9I1vk5U4YHe/X2Ch7PYnJfWuWT+VbuxbGwljhw==",
       "license": "MIT",
-      "optional": true,
       "dependencies": {
         "base64-arraybuffer": "^1.0.2"
       }
diff --git a/package.json b/package.json
index f07fe67..b5ece41 100644
--- a/package.json
+++ b/package.json
@@ -13,6 +13,7 @@
     "@ffmpeg/util": "^0.12.2",
     "fast-xml-parser": "^5.4.2",
     "framer-motion": "^12.35.2",
+    "html2canvas-pro": "^2.0.2",
     "js-yaml": "^4.1.1",
     "jspdf": "^4.2.0",
     "jszip": "^3.10.1",
diff --git a/src/lib/converters/documentConverter.ts b/src/lib/converters/documentConverter.ts
index 06da972..e0f43bb 100644
--- a/src/lib/converters/documentConverter.ts
+++ b/src/lib/converters/documentConverter.ts
@@ -2,6 +2,10 @@ import { ConversionResult } from '@/types';
 import { buildOutputFilename } from '@/lib/utils';
 import { getExtension } from '@/lib/fileDetector';
 
+/* ============================================
+   File reading helpers
+   ============================================ */
+
 async function readFileAsText(file: File): Promise<string> {
   return new Promise((resolve, reject) => {
     const reader = new FileReader();
@@ -20,10 +24,190 @@ async function readFileAsArrayBuffer(file: File): Promise<ArrayBuffer> {
   });
 }
 
+/* ============================================
+   Styled HTML document wrapper
+   
+   This is used for ALL HTML output and as the
+   intermediate step for PDF rendering. Embeds
+   full CSS so the document looks correct both
+   as a standalone .html file and when rendered
+   to PDF via jsPDF.html().
+   ============================================ */
+
+function wrapInStyledHtml(bodyHtml: string, title: string): string {
+  return `<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<meta name="viewport" content="width=device-width, initial-scale=1">
+<title>${escapeHtml(title)}</title>
+<style>
+  /* Reset */
+  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
+
+  body {
+    font-family: 'Segoe UI', -apple-system, BlinkMacSystemFont, 'Helvetica Neue', Arial, sans-serif;
+    font-size: 14px;
+    line-height: 1.7;
+    color: #1a1a1a;
+    background: #ffffff;
+    padding: 40px;
+    max-width: 800px;
+    margin: 0 auto;
+  }
+
+  /* Headings */
+  h1, h2, h3, h4, h5, h6 {
+    margin-top: 1.4em;
+    margin-bottom: 0.6em;
+    font-weight: 700;
+    line-height: 1.3;
+    color: #111111;
+  }
+  h1 { font-size: 2em; border-bottom: 2px solid #e5e5e5; padding-bottom: 0.3em; }
+  h2 { font-size: 1.5em; border-bottom: 1px solid #eeeeee; padding-bottom: 0.25em; }
+  h3 { font-size: 1.25em; }
+  h4 { font-size: 1.1em; }
+  h5, h6 { font-size: 1em; color: #555555; }
+
+  /* Paragraphs & inline */
+  p { margin-bottom: 1em; }
+  strong, b { font-weight: 700; }
+  em, i { font-style: italic; }
+  u { text-decoration: underline; }
+  s, strike, del { text-decoration: line-through; color: #888; }
+  small { font-size: 0.85em; }
+  sup { vertical-align: super; font-size: 0.75em; }
+  sub { vertical-align: sub; font-size: 0.75em; }
+  mark { background: #fff3b0; padding: 0.1em 0.2em; border-radius: 2px; }
+  abbr { text-decoration: underline dotted; cursor: help; }
+
+  /* Links */
+  a { color: #0066cc; text-decoration: underline; }
+  a:hover { color: #004499; }
+
+  /* Lists */
+  ul, ol { margin-bottom: 1em; padding-left: 2em; }
+  ul ul, ol ol, ul ol, ol ul { margin-bottom: 0; }
+  li { margin-bottom: 0.3em; }
+  li > p { margin-bottom: 0.3em; }
+
+  /* Blockquote */
+  blockquote {
+    margin: 1em 0;
+    padding: 0.8em 1.2em;
+    border-left: 4px solid #0066cc;
+    background: #f6f8fa;
+    color: #333;
+    font-style: italic;
+  }
+  blockquote p:last-child { margin-bottom: 0; }
+
+  /* Code */
+  code {
+    font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
+    font-size: 0.9em;
+    background: #f0f0f0;
+    padding: 0.15em 0.4em;
+    border-radius: 3px;
+    color: #c7254e;
+  }
+  pre {
+    margin: 1em 0;
+    padding: 1em;
+    background: #f6f8fa;
+    border: 1px solid #e1e4e8;
+    border-radius: 6px;
+    overflow-x: auto;
+    font-size: 0.9em;
+    line-height: 1.5;
+  }
+  pre code {
+    background: none;
+    padding: 0;
+    border-radius: 0;
+    color: inherit;
+  }
+
+  /* Tables */
+  table {
+    width: 100%;
+    border-collapse: collapse;
+    margin: 1em 0;
+    font-size: 0.95em;
+  }
+  th, td {
+    padding: 8px 12px;
+    border: 1px solid #d0d7de;
+    text-align: left;
+    vertical-align: top;
+  }
+  th {
+    background: #f6f8fa;
+    font-weight: 700;
+    color: #111;
+  }
+  tr:nth-child(even) { background: #fafbfc; }
+  caption {
+    caption-side: bottom;
+    padding: 8px;
+    font-size: 0.9em;
+    color: #666;
+    font-style: italic;
+  }
+
+  /* Horizontal rule */
+  hr {
+    border: none;
+    border-top: 1px solid #e5e5e5;
+    margin: 2em 0;
+  }
+
+  /* Images embedded in documents */
+  img {
+    max-width: 100%;
+    height: auto;
+    border-radius: 4px;
+    margin: 1em 0;
+  }
+
+  /* Definition lists */
+  dl { margin-bottom: 1em; }
+  dt { font-weight: 700; margin-top: 0.5em; }
+  dd { margin-left: 2em; margin-bottom: 0.5em; }
+
+  /* Figure */
+  figure { margin: 1.5em 0; text-align: center; }
+  figcaption { font-size: 0.9em; color: #666; margin-top: 0.5em; font-style: italic; }
+
+  /* First element shouldn't have top margin */
+  body > *:first-child { margin-top: 0; }
+</style>
+</head>
+<body>
+${bodyHtml}
+</body>
+</html>`;
+}
+
+function escapeHtml(text: string): string {
+  return text
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;');
+}
+
+/* ============================================
+   Source → HTML conversions
+   ============================================ */
+
 async function docxToHtml(file: File): Promise<string> {
   const mammoth = await import('mammoth');
   const arrayBuffer = await readFileAsArrayBuffer(file);
-  const result = await mammoth.convertToHtml({ arrayBuffer });
+  const result = await mammoth.convertToHtml({
+    arrayBuffer,
+  });
   return result.value;
 }
 
@@ -46,50 +230,271 @@ function htmlToText(html: string): string {
 }
 
 function htmlToMarkdown(html: string): string {
-  let md = html;
-  md = md.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n\n');
-  md = md.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n\n');
-  md = md.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n\n');
-  md = md.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n\n');
-  md = md.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**');
-  md = md.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**');
-  md = md.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*');
-  md = md.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*');
-  md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>(.*?)<\/a>/gi, '[$2]($1)');
-  md = md.replace(/<br\s*\/?>/gi, '\n');
-  md = md.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n');
-  md = md.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n');
-  md = md.replace(/<[^>]+>/g, '');
-  md = md.replace(/&nbsp;/g, ' ');
-  md = md.replace(/&amp;/g, '&');
-  md = md.replace(/&lt;/g, '<');
-  md = md.replace(/&gt;/g, '>');
-  return md.trim();
+  // Parse properly using DOMParser for reliable conversion
+  const parser = new DOMParser();
+  const doc = parser.parseFromString(html, 'text/html');
+
+  function walk(node: Node): string {
+    if (node.nodeType === Node.TEXT_NODE) {
+      return node.textContent || '';
+    }
+
+    if (node.nodeType !== Node.ELEMENT_NODE) return '';
+
+    const el = node as Element;
+    const tag = el.tagName.toLowerCase();
+    const children = Array.from(el.childNodes).map(walk).join('');
+
+    switch (tag) {
+      case 'h1': return `# ${children.trim()}\n\n`;
+      case 'h2': return `## ${children.trim()}\n\n`;
+      case 'h3': return `### ${children.trim()}\n\n`;
+      case 'h4': return `#### ${children.trim()}\n\n`;
+      case 'h5': return `##### ${children.trim()}\n\n`;
+      case 'h6': return `###### ${children.trim()}\n\n`;
+      case 'p': return `${children.trim()}\n\n`;
+      case 'br': return '\n';
+      case 'hr': return '\n---\n\n';
+      case 'strong':
+      case 'b': return `**${children}**`;
+      case 'em':
+      case 'i': return `*${children}*`;
+      case 'u': return `<u>${children}</u>`;
+      case 's':
+      case 'strike':
+      case 'del': return `~~${children}~~`;
+      case 'code': return `\`${children}\``;
+      case 'pre': return `\n\`\`\`\n${children.trim()}\n\`\`\`\n\n`;
+      case 'blockquote': return children.split('\n').map(l => `> ${l}`).join('\n') + '\n\n';
+      case 'a': {
+        const href = el.getAttribute('href') || '';
+        return `[${children}](${href})`;
+      }
+      case 'img': {
+        const src = el.getAttribute('src') || '';
+        const alt = el.getAttribute('alt') || '';
+        return `![${alt}](${src})`;
+      }
+      case 'ul': {
+        const items = Array.from(el.children)
+          .filter(c => c.tagName.toLowerCase() === 'li')
+          .map(c => `- ${walk(c).trim()}`)
+          .join('\n');
+        return `${items}\n\n`;
+      }
+      case 'ol': {
+        const items = Array.from(el.children)
+          .filter(c => c.tagName.toLowerCase() === 'li')
+          .map((c, i) => `${i + 1}. ${walk(c).trim()}`)
+          .join('\n');
+        return `${items}\n\n`;
+      }
+      case 'li': return children;
+      case 'table': {
+        const rows = Array.from(el.querySelectorAll('tr'));
+        if (rows.length === 0) return children;
+
+        const tableData: string[][] = rows.map(row =>
+          Array.from(row.querySelectorAll('th, td')).map(cell => walk(cell).trim())
+        );
+
+        if (tableData.length === 0) return '';
+
+        const colCount = Math.max(...tableData.map(r => r.length));
+        const colWidths = Array.from({ length: colCount }, (_, i) =>
+          Math.max(3, ...tableData.map(r => (r[i] || '').length))
+        );
+
+        const formatRow = (row: string[]) =>
+          '| ' + colWidths.map((w, i) => (row[i] || '').padEnd(w)).join(' | ') + ' |';
+
+        const separator = '| ' + colWidths.map(w => '-'.repeat(w)).join(' | ') + ' |';
+
+        const lines = [formatRow(tableData[0]), separator, ...tableData.slice(1).map(formatRow)];
+        return lines.join('\n') + '\n\n';
+      }
+      case 'div':
+      case 'section':
+      case 'article':
+      case 'main':
+      case 'span':
+        return children;
+      default:
+        return children;
+    }
+  }
+
+  return walk(doc.body).replace(/\n{3,}/g, '\n\n').trim();
 }
 
-async function textToPdf(text: string): Promise<Blob> {
+/* ============================================
+   HTML → PDF via jsPDF.html()
+   
+   Renders a styled HTML document into a real
+   PDF by injecting it into a hidden DOM container
+   and using jsPDF's html() method (backed by
+   html2canvas) to capture the visual rendering.
+   ============================================ */
+
+async function renderHtmlToPdf(htmlContent: string): Promise<Blob> {
+  const { jsPDF } = await import('jspdf');
+  // html2canvas-pro is imported for its side-effect:
+  // jsPDF.html() looks for it on the window/global scope
+  const html2canvas = (await import('html2canvas-pro')).default;
+
+  // Create a hidden container for rendering
+  const container = document.createElement('div');
+  container.style.position = 'fixed';
+  container.style.left = '-10000px';
+  container.style.top = '0';
+  container.style.width = '794px'; // A4 width in px at 96dpi
+  container.style.background = '#ffffff';
+  container.style.zIndex = '-9999';
+
+  // Parse the HTML and inject just the body + styles
+  const parser = new DOMParser();
+  const parsed = parser.parseFromString(htmlContent, 'text/html');
+
+  // Apply styles inline
+  const styleEl = parsed.querySelector('style');
+  const bodyContent = parsed.body.innerHTML;
+
+  if (styleEl) {
+    const style = document.createElement('style');
+    style.textContent = styleEl.textContent;
+    container.appendChild(style);
+  }
+
+  const content = document.createElement('div');
+  content.innerHTML = bodyContent;
+  content.style.padding = '40px';
+  content.style.fontFamily = "'Segoe UI', -apple-system, BlinkMacSystemFont, 'Helvetica Neue', Arial, sans-serif";
+  content.style.fontSize = '14px';
+  content.style.lineHeight = '1.7';
+  content.style.color = '#1a1a1a';
+  container.appendChild(content);
+
+  document.body.appendChild(container);
+
+  // Wait for fonts/images to load
+  await new Promise((resolve) => setTimeout(resolve, 100));
+
+  try {
+    // A4 dimensions in mm: 210 x 297
+    const pdfWidth = 210;
+    const pdfHeight = 297;
+    const margin = 15; // mm
+
+    // Capture the rendered content as a canvas
+    const canvas = await html2canvas(content, {
+      scale: 2, // Higher resolution
+      useCORS: true,
+      allowTaint: true,
+      backgroundColor: '#ffffff',
+      width: 794,
+      windowWidth: 794,
+    });
+
+    // Calculate how the content maps to PDF pages
+    const imgWidth = pdfWidth - margin * 2;
+    const imgHeight = (canvas.height * imgWidth) / canvas.width;
+
+    const doc = new jsPDF('p', 'mm', 'a4');
+    const pageContentHeight = pdfHeight - margin * 2;
+
+    if (imgHeight <= pageContentHeight) {
+      // Single page — fits entirely
+      doc.addImage(
+        canvas.toDataURL('image/jpeg', 0.95),
+        'JPEG',
+        margin,
+        margin,
+        imgWidth,
+        imgHeight
+      );
+    } else {
+      // Multi-page — slice the canvas into page-sized chunks
+      const totalPages = Math.ceil(imgHeight / pageContentHeight);
+
+      for (let page = 0; page < totalPages; page++) {
+        if (page > 0) doc.addPage();
+
+        // Calculate the portion of the source canvas for this page
+        const sourceY = (page * pageContentHeight * canvas.width) / imgWidth;
+        const sourceHeight = Math.min(
+          (pageContentHeight * canvas.width) / imgWidth,
+          canvas.height - sourceY
+        );
+
+        // Create a canvas slice for this page
+        const pageCanvas = document.createElement('canvas');
+        pageCanvas.width = canvas.width;
+        pageCanvas.height = sourceHeight;
+
+        const ctx = pageCanvas.getContext('2d');
+        if (ctx) {
+          ctx.fillStyle = '#ffffff';
+          ctx.fillRect(0, 0, pageCanvas.width, pageCanvas.height);
+          ctx.drawImage(
+            canvas,
+            0, sourceY,
+            canvas.width, sourceHeight,
+            0, 0,
+            canvas.width, sourceHeight
+          );
+        }
+
+        const sliceHeight = (sourceHeight * imgWidth) / canvas.width;
+
+        doc.addImage(
+          pageCanvas.toDataURL('image/jpeg', 0.95),
+          'JPEG',
+          margin,
+          margin,
+          imgWidth,
+          sliceHeight
+        );
+      }
+    }
+
+    return doc.output('blob');
+  } finally {
+    document.body.removeChild(container);
+  }
+}
+
+/* ============================================
+   Plain text → PDF (for .txt files)
+   Still uses jsPDF.text() since plain text
+   has no formatting to preserve.
+   ============================================ */
+
+async function plainTextToPdf(text: string): Promise<Blob> {
   const { jsPDF } = await import('jspdf');
   const doc = new jsPDF();
-  const lines = doc.splitTextToSize(text, 180);
-  let y = 15;
+
+  doc.setFont('courier', 'normal');
+  doc.setFontSize(11);
+
+  const lines = doc.splitTextToSize(text, 170);
+  let y = 20;
   const pageHeight = doc.internal.pageSize.getHeight();
 
   for (const line of lines) {
-    if (y > pageHeight - 15) {
+    if (y > pageHeight - 20) {
       doc.addPage();
-      y = 15;
+      y = 20;
     }
-    doc.text(line, 15, y);
-    y += 7;
+    doc.text(line, 20, y);
+    y += 6;
   }
 
   return doc.output('blob');
 }
 
-async function htmlToPdf(html: string): Promise<Blob> {
-  const text = htmlToText(html);
-  return textToPdf(text);
-}
+/* ============================================
+   PDF → Text extraction
+   ============================================ */
 
 async function pdfToText(file: File): Promise<string> {
   const { PDFDocument } = await import('pdf-lib');
@@ -117,6 +522,10 @@ async function pdfToText(file: File): Promise<string> {
   return text;
 }
 
+/* ============================================
+   Main export
+   ============================================ */
+
 export async function convertDocument(
   file: File,
   targetFormat: string,
@@ -132,14 +541,18 @@ export async function convertDocument(
   switch (sourceExt) {
     case 'docx': {
       if (targetFormat === 'html') {
-        const html = await docxToHtml(file);
-        resultBlob = new Blob([html], { type: 'text/html' });
+        const bodyHtml = await docxToHtml(file);
+        const styledHtml = wrapInStyledHtml(bodyHtml, file.name);
+        resultBlob = new Blob([styledHtml], { type: 'text/html' });
       } else if (targetFormat === 'txt') {
         const text = await docxToText(file);
         resultBlob = new Blob([text], { type: 'text/plain' });
       } else if (targetFormat === 'pdf') {
-        const html = await docxToHtml(file);
-        resultBlob = await htmlToPdf(html);
+        onProgress?.(40);
+        const bodyHtml = await docxToHtml(file);
+        const styledHtml = wrapInStyledHtml(bodyHtml, file.name);
+        onProgress?.(60);
+        resultBlob = await renderHtmlToPdf(styledHtml);
       } else {
         throw new Error(`Unsupported: docx to ${targetFormat}`);
       }
@@ -149,12 +562,20 @@ export async function convertDocument(
     case 'md': {
       const mdText = await readFileAsText(file);
       if (targetFormat === 'html') {
-        const html = await markdownToHtml(mdText);
-        resultBlob = new Blob([html], { type: 'text/html' });
+        const bodyHtml = await markdownToHtml(mdText);
+        const styledHtml = wrapInStyledHtml(bodyHtml, file.name);
+        resultBlob = new Blob([styledHtml], { type: 'text/html' });
       } else if (targetFormat === 'pdf') {
-        resultBlob = await textToPdf(mdText);
+        onProgress?.(40);
+        const bodyHtml = await markdownToHtml(mdText);
+        const styledHtml = wrapInStyledHtml(bodyHtml, file.name);
+        onProgress?.(60);
+        resultBlob = await renderHtmlToPdf(styledHtml);
       } else if (targetFormat === 'txt') {
-        resultBlob = new Blob([mdText], { type: 'text/plain' });
+        // Strip markdown syntax for plain text
+        const bodyHtml = await markdownToHtml(mdText);
+        const text = htmlToText(bodyHtml);
+        resultBlob = new Blob([text], { type: 'text/plain' });
       } else {
         throw new Error(`Unsupported: md to ${targetFormat}`);
       }
@@ -163,14 +584,20 @@ export async function convertDocument(
 
     case 'html':
     case 'htm': {
-      const html = await readFileAsText(file);
+      const rawHtml = await readFileAsText(file);
       if (targetFormat === 'pdf') {
-        resultBlob = await htmlToPdf(html);
+        onProgress?.(40);
+        // If the HTML already has a <style> or is a full document, use as-is
+        // Otherwise wrap it in our styled wrapper
+        const hasFullDoc = rawHtml.toLowerCase().includes('<!doctype') || rawHtml.toLowerCase().includes('<html');
+        const htmlForPdf = hasFullDoc ? rawHtml : wrapInStyledHtml(rawHtml, file.name);
+        onProgress?.(60);
+        resultBlob = await renderHtmlToPdf(htmlForPdf);
       } else if (targetFormat === 'txt') {
-        const text = htmlToText(html);
+        const text = htmlToText(rawHtml);
         resultBlob = new Blob([text], { type: 'text/plain' });
       } else if (targetFormat === 'md') {
-        const md = htmlToMarkdown(html);
+        const md = htmlToMarkdown(rawHtml);
         resultBlob = new Blob([md], { type: 'text/markdown' });
       } else {
         throw new Error(`Unsupported: html to ${targetFormat}`);
@@ -181,10 +608,11 @@ export async function convertDocument(
     case 'txt': {
       const text = await readFileAsText(file);
       if (targetFormat === 'pdf') {
-        resultBlob = await textToPdf(text);
+        resultBlob = await plainTextToPdf(text);
       } else if (targetFormat === 'html') {
-        const html = `<!DOCTYPE html><html><head><meta charset="utf-8"></head><body><pre>${text.replace(/</g, '&lt;').replace(/>/g, '&gt;')}</pre></body></html>`;
-        resultBlob = new Blob([html], { type: 'text/html' });
+        const bodyHtml = `<pre><code>${escapeHtml(text)}</code></pre>`;
+        const styledHtml = wrapInStyledHtml(bodyHtml, file.name);
+        resultBlob = new Blob([styledHtml], { type: 'text/html' });
       } else if (targetFormat === 'md') {
         resultBlob = new Blob([text], { type: 'text/markdown' });
       } else {