package converter import ( "archive/zip" "fmt" "io" "os" "path/filepath" "regexp" "sort" "strings" "time" "github.com/russross/blackfriday/v2" ) // convertEbook handles epub ↔ txt/html/md/pdf conversions. func convertEbook(inputPath, outputPath, sourceExt, targetFormat string) error { if sourceExt == "epub" { return convertFromEpub(inputPath, outputPath, targetFormat) } // txt/html/md → epub return convertToEpub(inputPath, outputPath, sourceExt) } // ─── EPUB → other formats ──────────────────────────────────── func convertFromEpub(inputPath, outputPath, targetFormat string) error { title, htmlChapters, err := extractEpubContent(inputPath) if err != nil { return fmt.Errorf("reading epub: %w", err) } fullHTML := strings.Join(htmlChapters, "\n

\n") switch targetFormat { case "txt": text := stripHTMLTags(fullHTML) return os.WriteFile(outputPath, []byte(text), 0o644) case "html": styled := fmt.Sprintf(` %s

%s

%s`, escapeHTML(title), escapeHTML(title), fullHTML) return os.WriteFile(outputPath, []byte(styled), 0o644) case "md": md := "# " + title + "\n\n" + ebookHTMLToMarkdown(fullHTML) return os.WriteFile(outputPath, []byte(md), 0o644) case "pdf": text := stripHTMLTags(fullHTML) return textToPDF(text, outputPath) default: return fmt.Errorf("unsupported target for epub: %s", targetFormat) } } // ─── Other formats → EPUB ──────────────────────────────────── func convertToEpub(inputPath, outputPath, sourceExt string) error { raw, err := os.ReadFile(inputPath) if err != nil { return fmt.Errorf("reading input: %w", err) } content := string(raw) title := strings.TrimSuffix(filepath.Base(inputPath), filepath.Ext(inputPath)) var htmlContent string switch sourceExt { case "txt": // Split into paragraphs on double newlines paragraphs := strings.Split(content, "\n\n") var sb strings.Builder for _, p := range paragraphs { p = strings.TrimSpace(p) if p != "" { sb.WriteString("

" + escapeHTML(p) + "

\n") } } htmlContent = sb.String() case "html", "htm": // Extract body if full document bodyRe := regexp.MustCompile(`(?is)]*>(.*)`) if m := bodyRe.FindStringSubmatch(content); m != nil { htmlContent = m[1] } else { htmlContent = content } case "md": htmlBytes := blackfriday.Run([]byte(content)) htmlContent = string(htmlBytes) default: return fmt.Errorf("unsupported source for epub creation: %s", sourceExt) } return writeEpubFile(outputPath, title, htmlContent) } // ─── EPUB reader ───────────────────────────────────────────── func extractEpubContent(path string) (string, []string, error) { r, err := zip.OpenReader(path) if err != nil { return "", nil, err } defer r.Close() title := "Untitled" var htmlChapters []string // Find OPF file via container.xml var opfPath string for _, f := range r.File { if f.Name == "META-INF/container.xml" { data, err := readZipFile(f) if err != nil { break } re := regexp.MustCompile(`full-path="([^"]+)"`) if m := re.FindStringSubmatch(string(data)); m != nil { opfPath = m[1] } break } } if opfPath != "" { // Read OPF opfContent := "" for _, f := range r.File { if f.Name == opfPath { data, err := readZipFile(f) if err == nil { opfContent = string(data) } break } } if opfContent != "" { // Extract title titleRe := regexp.MustCompile(`]*>([^<]+)`) if m := titleRe.FindStringSubmatch(opfContent); m != nil { title = m[1] } // Build manifest map: id -> href manifest := make(map[string]string) itemRe := regexp.MustCompile(`]*id="([^"]*)"[^>]*href="([^"]*)"[^>]*`) for _, m := range itemRe.FindAllStringSubmatch(opfContent, -1) { manifest[m[1]] = m[2] } // Also handle reversed attr order itemRe2 := regexp.MustCompile(`]*href="([^"]*)"[^>]*id="([^"]*)"[^>]*`) for _, m := range itemRe2.FindAllStringSubmatch(opfContent, -1) { manifest[m[2]] = m[1] } // Get spine order var spineIDs []string spineRe := regexp.MustCompile(`]*idref="([^"]*)"[^>]*`) for _, m := range spineRe.FindAllStringSubmatch(opfContent, -1) { spineIDs = append(spineIDs, m[1]) } // Resolve relative to OPF dir opfDir := "" if idx := strings.LastIndex(opfPath, "/"); idx >= 0 { opfDir = opfPath[:idx+1] } // Build a map of zip files for quick lookup zipFiles := make(map[string]*zip.File) for _, f := range r.File { zipFiles[f.Name] = f } for _, id := range spineIDs { href, ok := manifest[id] if !ok { continue } fullPath := opfDir + href zf, ok := zipFiles[fullPath] if !ok { continue } data, err := readZipFile(zf) if err != nil { continue } // Extract body content bodyRe := regexp.MustCompile(`(?is)]*>(.*)`) if m := bodyRe.FindStringSubmatch(string(data)); m != nil { htmlChapters = append(htmlChapters, m[1]) } else { htmlChapters = append(htmlChapters, string(data)) } } } } // Fallback: scan for any xhtml/html files if len(htmlChapters) == 0 { var htmlFiles []*zip.File htmlRe := regexp.MustCompile(`(?i)\.(x?html?)$`) for _, f := range r.File { if htmlRe.MatchString(f.Name) { htmlFiles = append(htmlFiles, f) } } sort.Slice(htmlFiles, func(i, j int) bool { return htmlFiles[i].Name < htmlFiles[j].Name }) for _, f := range htmlFiles { data, err := readZipFile(f) if err != nil { continue } bodyRe := regexp.MustCompile(`(?is)]*>(.*)`) if m := bodyRe.FindStringSubmatch(string(data)); m != nil { htmlChapters = append(htmlChapters, m[1]) } else { htmlChapters = append(htmlChapters, string(data)) } } } return title, htmlChapters, nil } func readZipFile(f *zip.File) ([]byte, error) { rc, err := f.Open() if err != nil { return nil, err } defer rc.Close() return io.ReadAll(rc) } // ─── EPUB writer ───────────────────────────────────────────── func writeEpubFile(outputPath, title, htmlContent string) error { f, err := os.Create(outputPath) if err != nil { return err } defer f.Close() w := zip.NewWriter(f) defer w.Close() uid := fmt.Sprintf("transmute-%d", time.Now().UnixNano()) modified := time.Now().UTC().Format("2006-01-02T15:04:05Z") // mimetype (must be stored, not compressed) mimeHeader := &zip.FileHeader{ Name: "mimetype", Method: zip.Store, } mw, err := w.CreateHeader(mimeHeader) if err != nil { return err } mw.Write([]byte("application/epub+zip")) // META-INF/container.xml cw, _ := w.Create("META-INF/container.xml") cw.Write([]byte(` `)) // OEBPS/content.opf ow, _ := w.Create("OEBPS/content.opf") ow.Write([]byte(fmt.Sprintf(` %s %s en %s `, uid, escapeHTML(title), modified))) // OEBPS/nav.xhtml nw, _ := w.Create("OEBPS/nav.xhtml") nw.Write([]byte(fmt.Sprintf(` Navigation `, escapeHTML(title)))) // OEBPS/chapter1.xhtml chw, _ := w.Create("OEBPS/chapter1.xhtml") chw.Write([]byte(fmt.Sprintf(` %s %s `, escapeHTML(title), htmlContent))) return nil } // ─── Helpers ───────────────────────────────────────────────── func ebookHTMLToMarkdown(html string) string { md := html // Headers for i := 6; i >= 1; i-- { prefix := strings.Repeat("#", i) + " " openTag := fmt.Sprintf("", i) closeTag := fmt.Sprintf("", i) md = strings.ReplaceAll(md, openTag, prefix) md = strings.ReplaceAll(md, closeTag, "\n\n") // Also case-insensitive with attributes re := regexp.MustCompile(fmt.Sprintf(`(?i)]*>`, i)) md = re.ReplaceAllString(md, prefix) re2 := regexp.MustCompile(fmt.Sprintf(`(?i)`, i)) md = re2.ReplaceAllString(md, "\n\n") } md = strings.ReplaceAll(md, "", "**") md = strings.ReplaceAll(md, "", "**") md = strings.ReplaceAll(md, "", "**") md = strings.ReplaceAll(md, "", "**") md = strings.ReplaceAll(md, "", "*") md = strings.ReplaceAll(md, "", "*") md = strings.ReplaceAll(md, "", "*") md = strings.ReplaceAll(md, "", "*") md = strings.ReplaceAll(md, "
", "\n") md = strings.ReplaceAll(md, "
", "\n") md = strings.ReplaceAll(md, "
", "\n") md = strings.ReplaceAll(md, "

", "\n") md = strings.ReplaceAll(md, "

", "\n---\n") md = strings.ReplaceAll(md, "

", "\n---\n") // Strip remaining tags md = stripHTMLTags(md) return strings.TrimSpace(md) }