package converter
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"time"
"github.com/russross/blackfriday/v2"
)
// convertEbook handles epub ↔ txt/html/md/pdf conversions.
func convertEbook(inputPath, outputPath, sourceExt, targetFormat string) error {
if sourceExt == "epub" {
return convertFromEpub(inputPath, outputPath, targetFormat)
}
// txt/html/md → epub
return convertToEpub(inputPath, outputPath, sourceExt)
}
// ─── EPUB → other formats ────────────────────────────────────
func convertFromEpub(inputPath, outputPath, targetFormat string) error {
title, htmlChapters, err := extractEpubContent(inputPath)
if err != nil {
return fmt.Errorf("reading epub: %w", err)
}
fullHTML := strings.Join(htmlChapters, "\n
\n")
switch targetFormat {
case "txt":
text := stripHTMLTags(fullHTML)
return os.WriteFile(outputPath, []byte(text), 0o644)
case "html":
styled := fmt.Sprintf(`
%s
%s
%s`, escapeHTML(title), escapeHTML(title), fullHTML)
return os.WriteFile(outputPath, []byte(styled), 0o644)
case "md":
md := "# " + title + "\n\n" + ebookHTMLToMarkdown(fullHTML)
return os.WriteFile(outputPath, []byte(md), 0o644)
case "pdf":
text := stripHTMLTags(fullHTML)
return textToPDF(text, outputPath)
default:
return fmt.Errorf("unsupported target for epub: %s", targetFormat)
}
}
// ─── Other formats → EPUB ────────────────────────────────────
func convertToEpub(inputPath, outputPath, sourceExt string) error {
raw, err := os.ReadFile(inputPath)
if err != nil {
return fmt.Errorf("reading input: %w", err)
}
content := string(raw)
title := strings.TrimSuffix(filepath.Base(inputPath), filepath.Ext(inputPath))
var htmlContent string
switch sourceExt {
case "txt":
// Split into paragraphs on double newlines
paragraphs := strings.Split(content, "\n\n")
var sb strings.Builder
for _, p := range paragraphs {
p = strings.TrimSpace(p)
if p != "" {
sb.WriteString("" + escapeHTML(p) + "
\n")
}
}
htmlContent = sb.String()
case "html", "htm":
// Extract body if full document
bodyRe := regexp.MustCompile(`(?is)]*>(.*)`)
if m := bodyRe.FindStringSubmatch(content); m != nil {
htmlContent = m[1]
} else {
htmlContent = content
}
case "md":
htmlBytes := blackfriday.Run([]byte(content))
htmlContent = string(htmlBytes)
default:
return fmt.Errorf("unsupported source for epub creation: %s", sourceExt)
}
return writeEpubFile(outputPath, title, htmlContent)
}
// ─── EPUB reader ─────────────────────────────────────────────
func extractEpubContent(path string) (string, []string, error) {
r, err := zip.OpenReader(path)
if err != nil {
return "", nil, err
}
defer r.Close()
title := "Untitled"
var htmlChapters []string
// Find OPF file via container.xml
var opfPath string
for _, f := range r.File {
if f.Name == "META-INF/container.xml" {
data, err := readZipFile(f)
if err != nil {
break
}
re := regexp.MustCompile(`full-path="([^"]+)"`)
if m := re.FindStringSubmatch(string(data)); m != nil {
opfPath = m[1]
}
break
}
}
if opfPath != "" {
// Read OPF
opfContent := ""
for _, f := range r.File {
if f.Name == opfPath {
data, err := readZipFile(f)
if err == nil {
opfContent = string(data)
}
break
}
}
if opfContent != "" {
// Extract title
titleRe := regexp.MustCompile(`]*>([^<]+)`)
if m := titleRe.FindStringSubmatch(opfContent); m != nil {
title = m[1]
}
// Build manifest map: id -> href
manifest := make(map[string]string)
itemRe := regexp.MustCompile(`- ]*id="([^"]*)"[^>]*href="([^"]*)"[^>]*`)
for _, m := range itemRe.FindAllStringSubmatch(opfContent, -1) {
manifest[m[1]] = m[2]
}
// Also handle reversed attr order
itemRe2 := regexp.MustCompile(`
- ]*href="([^"]*)"[^>]*id="([^"]*)"[^>]*`)
for _, m := range itemRe2.FindAllStringSubmatch(opfContent, -1) {
manifest[m[2]] = m[1]
}
// Get spine order
var spineIDs []string
spineRe := regexp.MustCompile(`]*idref="([^"]*)"[^>]*`)
for _, m := range spineRe.FindAllStringSubmatch(opfContent, -1) {
spineIDs = append(spineIDs, m[1])
}
// Resolve relative to OPF dir
opfDir := ""
if idx := strings.LastIndex(opfPath, "/"); idx >= 0 {
opfDir = opfPath[:idx+1]
}
// Build a map of zip files for quick lookup
zipFiles := make(map[string]*zip.File)
for _, f := range r.File {
zipFiles[f.Name] = f
}
for _, id := range spineIDs {
href, ok := manifest[id]
if !ok {
continue
}
fullPath := opfDir + href
zf, ok := zipFiles[fullPath]
if !ok {
continue
}
data, err := readZipFile(zf)
if err != nil {
continue
}
// Extract body content
bodyRe := regexp.MustCompile(`(?is)]*>(.*)`)
if m := bodyRe.FindStringSubmatch(string(data)); m != nil {
htmlChapters = append(htmlChapters, m[1])
} else {
htmlChapters = append(htmlChapters, string(data))
}
}
}
}
// Fallback: scan for any xhtml/html files
if len(htmlChapters) == 0 {
var htmlFiles []*zip.File
htmlRe := regexp.MustCompile(`(?i)\.(x?html?)$`)
for _, f := range r.File {
if htmlRe.MatchString(f.Name) {
htmlFiles = append(htmlFiles, f)
}
}
sort.Slice(htmlFiles, func(i, j int) bool {
return htmlFiles[i].Name < htmlFiles[j].Name
})
for _, f := range htmlFiles {
data, err := readZipFile(f)
if err != nil {
continue
}
bodyRe := regexp.MustCompile(`(?is)]*>(.*)`)
if m := bodyRe.FindStringSubmatch(string(data)); m != nil {
htmlChapters = append(htmlChapters, m[1])
} else {
htmlChapters = append(htmlChapters, string(data))
}
}
}
return title, htmlChapters, nil
}
func readZipFile(f *zip.File) ([]byte, error) {
rc, err := f.Open()
if err != nil {
return nil, err
}
defer rc.Close()
return io.ReadAll(rc)
}
// ─── EPUB writer ─────────────────────────────────────────────
func writeEpubFile(outputPath, title, htmlContent string) error {
f, err := os.Create(outputPath)
if err != nil {
return err
}
defer f.Close()
w := zip.NewWriter(f)
defer w.Close()
uid := fmt.Sprintf("transmute-%d", time.Now().UnixNano())
modified := time.Now().UTC().Format("2006-01-02T15:04:05Z")
// mimetype (must be stored, not compressed)
mimeHeader := &zip.FileHeader{
Name: "mimetype",
Method: zip.Store,
}
mw, err := w.CreateHeader(mimeHeader)
if err != nil {
return err
}
mw.Write([]byte("application/epub+zip"))
// META-INF/container.xml
cw, _ := w.Create("META-INF/container.xml")
cw.Write([]byte(`
`))
// OEBPS/content.opf
ow, _ := w.Create("OEBPS/content.opf")
ow.Write([]byte(fmt.Sprintf(`
%s
%s
en
%s
`, uid, escapeHTML(title), modified)))
// OEBPS/nav.xhtml
nw, _ := w.Create("OEBPS/nav.xhtml")
nw.Write([]byte(fmt.Sprintf(`
Navigation
`, escapeHTML(title))))
// OEBPS/chapter1.xhtml
chw, _ := w.Create("OEBPS/chapter1.xhtml")
chw.Write([]byte(fmt.Sprintf(`
%s
%s
`, escapeHTML(title), htmlContent)))
return nil
}
// ─── Helpers ─────────────────────────────────────────────────
func ebookHTMLToMarkdown(html string) string {
md := html
// Headers
for i := 6; i >= 1; i-- {
prefix := strings.Repeat("#", i) + " "
openTag := fmt.Sprintf("", i)
closeTag := fmt.Sprintf("", i)
md = strings.ReplaceAll(md, openTag, prefix)
md = strings.ReplaceAll(md, closeTag, "\n\n")
// Also case-insensitive with attributes
re := regexp.MustCompile(fmt.Sprintf(`(?i)]*>`, i))
md = re.ReplaceAllString(md, prefix)
re2 := regexp.MustCompile(fmt.Sprintf(`(?i)`, i))
md = re2.ReplaceAllString(md, "\n\n")
}
md = strings.ReplaceAll(md, "", "**")
md = strings.ReplaceAll(md, "", "**")
md = strings.ReplaceAll(md, "", "**")
md = strings.ReplaceAll(md, "", "**")
md = strings.ReplaceAll(md, "", "*")
md = strings.ReplaceAll(md, "", "*")
md = strings.ReplaceAll(md, "", "*")
md = strings.ReplaceAll(md, "", "*")
md = strings.ReplaceAll(md, "
", "\n")
md = strings.ReplaceAll(md, "
", "\n")
md = strings.ReplaceAll(md, "
", "\n")
md = strings.ReplaceAll(md, "", "\n")
md = strings.ReplaceAll(md, "
", "\n")
md = strings.ReplaceAll(md, "
", "\n---\n")
md = strings.ReplaceAll(md, "
", "\n---\n")
md = strings.ReplaceAll(md, "
", "\n---\n")
// Strip remaining tags
md = stripHTMLTags(md)
return strings.TrimSpace(md)
}