package converter import ( "archive/zip" "fmt" "io" "os" "os/exec" "strings" "github.com/russross/blackfriday/v2" ) func convertDocument(inputPath, outputPath, sourceExt, targetFormat string) error { raw, err := os.ReadFile(inputPath) if err != nil { return fmt.Errorf("reading document: %w", err) } content := string(raw) switch sourceExt { case "txt": return convertFromTxt(content, outputPath, targetFormat) case "md": return convertFromMarkdown(content, outputPath, targetFormat) case "html", "htm": return convertFromHTML(content, outputPath, targetFormat) case "rtf": return convertFromRTF(content, outputPath, targetFormat) case "docx": return convertDocx(inputPath, outputPath, targetFormat) case "pdf": return convertPdf(inputPath, outputPath, targetFormat) default: return fmt.Errorf("unsupported document source: %s", sourceExt) } } // ─── TXT conversions ───────────────────────────────────────── func convertFromTxt(content, outputPath, target string) error { switch target { case "html": html := "
" + escapeHTML(content) + "" return os.WriteFile(outputPath, []byte(html), 0o644) case "md": return os.WriteFile(outputPath, []byte(content), 0o644) case "pdf": return textToPDF(content, outputPath) default: return fmt.Errorf("unsupported target for txt: %s", target) } } // ─── Markdown conversions ──────────────────────────────────── func convertFromMarkdown(content, outputPath, target string) error { switch target { case "html": html := blackfriday.Run([]byte(content)) wrapped := "" + string(html) + "" return os.WriteFile(outputPath, []byte(wrapped), 0o644) case "txt": text := stripMarkdown(content) return os.WriteFile(outputPath, []byte(text), 0o644) case "pdf": html := string(blackfriday.Run([]byte(content))) return htmlToPDF(html, outputPath) default: return fmt.Errorf("unsupported target for md: %s", target) } } // ─── HTML conversions ──────────────────────────────────────── func convertFromHTML(content, outputPath, target string) error { switch target { case "txt": text := stripHTMLTags(content) return os.WriteFile(outputPath, []byte(text), 0o644) case "md": md := htmlToMarkdown(content) return os.WriteFile(outputPath, []byte(md), 0o644) case "pdf": return htmlToPDF(content, outputPath) default: return fmt.Errorf("unsupported target for html: %s", target) } } // ─── RTF conversions ───────────────────────────────────────── func convertFromRTF(content, outputPath, target string) error { text := stripRTF(content) switch target { case "txt": return os.WriteFile(outputPath, []byte(text), 0o644) case "html": html := "
" + escapeHTML(text) + "" return os.WriteFile(outputPath, []byte(html), 0o644) case "md": return os.WriteFile(outputPath, []byte(text), 0o644) default: return fmt.Errorf("unsupported target for rtf: %s", target) } } // ─── DOCX conversions ──────────────────────────────────────── func convertDocx(inputPath, outputPath, target string) error { text, err := extractDocxText(inputPath) if err != nil { return fmt.Errorf("extracting DOCX text: %w", err) } switch target { case "txt": return os.WriteFile(outputPath, []byte(text), 0o644) case "html": html := "
" + escapeHTML(text) + "" return os.WriteFile(outputPath, []byte(html), 0o644) case "md": return os.WriteFile(outputPath, []byte(text), 0o644) case "pdf": return textToPDF(text, outputPath) default: return fmt.Errorf("unsupported target for docx: %s", target) } } // ─── PDF conversions ───────────────────────────────────────── func convertPdf(inputPath, outputPath, target string) error { text, err := extractPDFText(inputPath) if err != nil { return fmt.Errorf("extracting PDF text: %w", err) } switch target { case "txt": return os.WriteFile(outputPath, []byte(text), 0o644) case "html": html := "
" + escapeHTML(text) + "" return os.WriteFile(outputPath, []byte(html), 0o644) case "md": return os.WriteFile(outputPath, []byte(text), 0o644) default: return fmt.Errorf("unsupported target for pdf: %s", target) } } // ─── Helpers ───────────────────────────────────────────────── func escapeHTML(s string) string { s = strings.ReplaceAll(s, "&", "&") s = strings.ReplaceAll(s, "<", "<") s = strings.ReplaceAll(s, ">", ">") return s } func stripHTMLTags(html string) string { var result strings.Builder inTag := false for _, r := range html { switch { case r == '<': inTag = true case r == '>': inTag = false case !inTag: result.WriteRune(r) } } return strings.TrimSpace(result.String()) } func stripMarkdown(md string) string { lines := strings.Split(md, "\n") var result []string for _, line := range lines { line = strings.TrimLeft(line, "# ") line = strings.ReplaceAll(line, "**", "") line = strings.ReplaceAll(line, "*", "") line = strings.ReplaceAll(line, "__", "") line = strings.ReplaceAll(line, "_", "") line = strings.ReplaceAll(line, "`", "") result = append(result, line) } return strings.Join(result, "\n") } func htmlToMarkdown(html string) string { md := html md = strings.ReplaceAll(md, "
", "\n") md = strings.ReplaceAll(md, "
", "\n") md = strings.ReplaceAll(md, "", "**") md = strings.ReplaceAll(md, "", "**") md = strings.ReplaceAll(md, "", "*") md = strings.ReplaceAll(md, "", "*") md = strings.ReplaceAll(md, "