04a1f33cb1
- Full-screen Bubble Tea TUI with cream background fill using PadLine/FillBlankLines - Self-update command (--update) pulling from GitHub releases - install.sh for curl one-liner installation - Terminal Lovers section on web landing page with install command and CLI features - All 7 format categories, glob/directory batch support, auto-download ffmpeg
327 lines
9.6 KiB
Go
327 lines
9.6 KiB
Go
package converter
|
|
|
|
import (
|
|
"archive/zip"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"strings"
|
|
|
|
"github.com/russross/blackfriday/v2"
|
|
)
|
|
|
|
func convertDocument(inputPath, outputPath, sourceExt, targetFormat string) error {
|
|
raw, err := os.ReadFile(inputPath)
|
|
if err != nil {
|
|
return fmt.Errorf("reading document: %w", err)
|
|
}
|
|
content := string(raw)
|
|
|
|
switch sourceExt {
|
|
case "txt":
|
|
return convertFromTxt(content, outputPath, targetFormat)
|
|
case "md":
|
|
return convertFromMarkdown(content, outputPath, targetFormat)
|
|
case "html", "htm":
|
|
return convertFromHTML(content, outputPath, targetFormat)
|
|
case "rtf":
|
|
return convertFromRTF(content, outputPath, targetFormat)
|
|
case "docx":
|
|
return convertDocx(inputPath, outputPath, targetFormat)
|
|
case "pdf":
|
|
return convertPdf(inputPath, outputPath, targetFormat)
|
|
default:
|
|
return fmt.Errorf("unsupported document source: %s", sourceExt)
|
|
}
|
|
}
|
|
|
|
// ─── TXT conversions ─────────────────────────────────────────
|
|
|
|
func convertFromTxt(content, outputPath, target string) error {
|
|
switch target {
|
|
case "html":
|
|
html := "<html><body><pre>" + escapeHTML(content) + "</pre></body></html>"
|
|
return os.WriteFile(outputPath, []byte(html), 0o644)
|
|
case "md":
|
|
return os.WriteFile(outputPath, []byte(content), 0o644)
|
|
case "pdf":
|
|
return textToPDF(content, outputPath)
|
|
default:
|
|
return fmt.Errorf("unsupported target for txt: %s", target)
|
|
}
|
|
}
|
|
|
|
// ─── Markdown conversions ────────────────────────────────────
|
|
|
|
func convertFromMarkdown(content, outputPath, target string) error {
|
|
switch target {
|
|
case "html":
|
|
html := blackfriday.Run([]byte(content))
|
|
wrapped := "<html><body>" + string(html) + "</body></html>"
|
|
return os.WriteFile(outputPath, []byte(wrapped), 0o644)
|
|
case "txt":
|
|
text := stripMarkdown(content)
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
case "pdf":
|
|
html := string(blackfriday.Run([]byte(content)))
|
|
return htmlToPDF(html, outputPath)
|
|
default:
|
|
return fmt.Errorf("unsupported target for md: %s", target)
|
|
}
|
|
}
|
|
|
|
// ─── HTML conversions ────────────────────────────────────────
|
|
|
|
func convertFromHTML(content, outputPath, target string) error {
|
|
switch target {
|
|
case "txt":
|
|
text := stripHTMLTags(content)
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
case "md":
|
|
md := htmlToMarkdown(content)
|
|
return os.WriteFile(outputPath, []byte(md), 0o644)
|
|
case "pdf":
|
|
return htmlToPDF(content, outputPath)
|
|
default:
|
|
return fmt.Errorf("unsupported target for html: %s", target)
|
|
}
|
|
}
|
|
|
|
// ─── RTF conversions ─────────────────────────────────────────
|
|
|
|
func convertFromRTF(content, outputPath, target string) error {
|
|
text := stripRTF(content)
|
|
switch target {
|
|
case "txt":
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
case "html":
|
|
html := "<html><body><pre>" + escapeHTML(text) + "</pre></body></html>"
|
|
return os.WriteFile(outputPath, []byte(html), 0o644)
|
|
case "md":
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
default:
|
|
return fmt.Errorf("unsupported target for rtf: %s", target)
|
|
}
|
|
}
|
|
|
|
// ─── DOCX conversions ────────────────────────────────────────
|
|
|
|
func convertDocx(inputPath, outputPath, target string) error {
|
|
text, err := extractDocxText(inputPath)
|
|
if err != nil {
|
|
return fmt.Errorf("extracting DOCX text: %w", err)
|
|
}
|
|
|
|
switch target {
|
|
case "txt":
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
case "html":
|
|
html := "<html><body><pre>" + escapeHTML(text) + "</pre></body></html>"
|
|
return os.WriteFile(outputPath, []byte(html), 0o644)
|
|
case "md":
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
case "pdf":
|
|
return textToPDF(text, outputPath)
|
|
default:
|
|
return fmt.Errorf("unsupported target for docx: %s", target)
|
|
}
|
|
}
|
|
|
|
// ─── PDF conversions ─────────────────────────────────────────
|
|
|
|
func convertPdf(inputPath, outputPath, target string) error {
|
|
text, err := extractPDFText(inputPath)
|
|
if err != nil {
|
|
return fmt.Errorf("extracting PDF text: %w", err)
|
|
}
|
|
|
|
switch target {
|
|
case "txt":
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
case "html":
|
|
html := "<html><body><pre>" + escapeHTML(text) + "</pre></body></html>"
|
|
return os.WriteFile(outputPath, []byte(html), 0o644)
|
|
case "md":
|
|
return os.WriteFile(outputPath, []byte(text), 0o644)
|
|
default:
|
|
return fmt.Errorf("unsupported target for pdf: %s", target)
|
|
}
|
|
}
|
|
|
|
// ─── Helpers ─────────────────────────────────────────────────
|
|
|
|
func escapeHTML(s string) string {
|
|
s = strings.ReplaceAll(s, "&", "&")
|
|
s = strings.ReplaceAll(s, "<", "<")
|
|
s = strings.ReplaceAll(s, ">", ">")
|
|
return s
|
|
}
|
|
|
|
func stripHTMLTags(html string) string {
|
|
var result strings.Builder
|
|
inTag := false
|
|
for _, r := range html {
|
|
switch {
|
|
case r == '<':
|
|
inTag = true
|
|
case r == '>':
|
|
inTag = false
|
|
case !inTag:
|
|
result.WriteRune(r)
|
|
}
|
|
}
|
|
return strings.TrimSpace(result.String())
|
|
}
|
|
|
|
func stripMarkdown(md string) string {
|
|
lines := strings.Split(md, "\n")
|
|
var result []string
|
|
for _, line := range lines {
|
|
line = strings.TrimLeft(line, "# ")
|
|
line = strings.ReplaceAll(line, "**", "")
|
|
line = strings.ReplaceAll(line, "*", "")
|
|
line = strings.ReplaceAll(line, "__", "")
|
|
line = strings.ReplaceAll(line, "_", "")
|
|
line = strings.ReplaceAll(line, "`", "")
|
|
result = append(result, line)
|
|
}
|
|
return strings.Join(result, "\n")
|
|
}
|
|
|
|
func htmlToMarkdown(html string) string {
|
|
md := html
|
|
md = strings.ReplaceAll(md, "<br>", "\n")
|
|
md = strings.ReplaceAll(md, "<br/>", "\n")
|
|
md = strings.ReplaceAll(md, "<br />", "\n")
|
|
md = strings.ReplaceAll(md, "<p>", "\n")
|
|
md = strings.ReplaceAll(md, "</p>", "\n")
|
|
md = strings.ReplaceAll(md, "<strong>", "**")
|
|
md = strings.ReplaceAll(md, "</strong>", "**")
|
|
md = strings.ReplaceAll(md, "<em>", "*")
|
|
md = strings.ReplaceAll(md, "</em>", "*")
|
|
md = strings.ReplaceAll(md, "<h1>", "# ")
|
|
md = strings.ReplaceAll(md, "</h1>", "\n")
|
|
md = strings.ReplaceAll(md, "<h2>", "## ")
|
|
md = strings.ReplaceAll(md, "</h2>", "\n")
|
|
md = strings.ReplaceAll(md, "<h3>", "### ")
|
|
md = strings.ReplaceAll(md, "</h3>", "\n")
|
|
md = stripHTMLTags(md)
|
|
return strings.TrimSpace(md)
|
|
}
|
|
|
|
func stripRTF(rtf string) string {
|
|
var result strings.Builder
|
|
i := 0
|
|
depth := 0
|
|
for i < len(rtf) {
|
|
ch := rtf[i]
|
|
switch {
|
|
case ch == '{':
|
|
depth++
|
|
i++
|
|
case ch == '}':
|
|
depth--
|
|
i++
|
|
case ch == '\\':
|
|
i++
|
|
if i < len(rtf) && rtf[i] == '\'' {
|
|
i += 3
|
|
} else {
|
|
for i < len(rtf) && ((rtf[i] >= 'a' && rtf[i] <= 'z') || (rtf[i] >= 'A' && rtf[i] <= 'Z')) {
|
|
i++
|
|
}
|
|
for i < len(rtf) && ((rtf[i] >= '0' && rtf[i] <= '9') || rtf[i] == '-') {
|
|
i++
|
|
}
|
|
if i < len(rtf) && rtf[i] == ' ' {
|
|
i++
|
|
}
|
|
}
|
|
default:
|
|
if depth <= 1 {
|
|
result.WriteByte(ch)
|
|
}
|
|
i++
|
|
}
|
|
}
|
|
return strings.TrimSpace(result.String())
|
|
}
|
|
|
|
// extractDocxText extracts plain text from a .docx file (ZIP of XML files).
|
|
func extractDocxText(path string) (string, error) {
|
|
r, err := zip.OpenReader(path)
|
|
if err != nil {
|
|
return "", fmt.Errorf("opening docx: %w", err)
|
|
}
|
|
defer r.Close()
|
|
|
|
for _, f := range r.File {
|
|
if f.Name == "word/document.xml" {
|
|
rc, err := f.Open()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer rc.Close()
|
|
data, err := io.ReadAll(rc)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return stripHTMLTags(string(data)), nil
|
|
}
|
|
}
|
|
return "", fmt.Errorf("word/document.xml not found in docx")
|
|
}
|
|
|
|
// extractPDFText tries pdftotext (poppler-utils), falls back to error.
|
|
func extractPDFText(path string) (string, error) {
|
|
pdftotextPath, err := exec.LookPath("pdftotext")
|
|
if err != nil {
|
|
return "", fmt.Errorf("PDF text extraction requires 'pdftotext' — install poppler-utils")
|
|
}
|
|
out, err := exec.Command(pdftotextPath, path, "-").CombinedOutput()
|
|
if err != nil {
|
|
return "", fmt.Errorf("pdftotext failed: %w\n%s", err, string(out))
|
|
}
|
|
return string(out), nil
|
|
}
|
|
|
|
// textToPDF creates a basic PDF from plain text.
|
|
func textToPDF(text, outputPath string) error {
|
|
lines := strings.Split(text, "\n")
|
|
var content strings.Builder
|
|
|
|
content.WriteString("%PDF-1.4\n")
|
|
content.WriteString("1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n")
|
|
content.WriteString("2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n")
|
|
|
|
var stream strings.Builder
|
|
stream.WriteString("BT\n/F1 10 Tf\n")
|
|
y := 780.0
|
|
for _, line := range lines {
|
|
if y < 40 {
|
|
break
|
|
}
|
|
safe := strings.ReplaceAll(line, "\\", "\\\\")
|
|
safe = strings.ReplaceAll(safe, "(", "\\(")
|
|
safe = strings.ReplaceAll(safe, ")", "\\)")
|
|
stream.WriteString(fmt.Sprintf("1 0 0 1 40 %.0f Tm\n(%s) Tj\n", y, safe))
|
|
y -= 14
|
|
}
|
|
stream.WriteString("ET\n")
|
|
streamBytes := stream.String()
|
|
|
|
content.WriteString(fmt.Sprintf("3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] /Contents 4 0 R /Resources << /Font << /F1 5 0 R >> >> >>\nendobj\n"))
|
|
content.WriteString(fmt.Sprintf("4 0 obj\n<< /Length %d >>\nstream\n%sendstream\nendobj\n", len(streamBytes), streamBytes))
|
|
content.WriteString("5 0 obj\n<< /Type /Font /Subtype /Type1 /BaseFont /Helvetica >>\nendobj\n")
|
|
content.WriteString("xref\n0 6\n")
|
|
content.WriteString("trailer\n<< /Size 6 /Root 1 0 R >>\nstartxref\n0\n%%EOF\n")
|
|
|
|
return os.WriteFile(outputPath, []byte(content.String()), 0o644)
|
|
}
|
|
|
|
func htmlToPDF(html, outputPath string) error {
|
|
text := stripHTMLTags(html)
|
|
return textToPDF(text, outputPath)
|
|
}
|