aboutsummaryrefslogtreecommitdiff
path: root/emacs/.emacs.d/lisp/my/my-epub.el
blob: 4a3dfca12a4c4b435bca7788a3fa345be6b03f22 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
;;; my-epub.el -- epub utils -*- lexical-binding: t -*-

;; Copyright (C) 2025  Free Software Foundation, Inc.

;; Author: Yuchen Pei <id@ypei.org>
;; Package-Requires: ((emacs "30.1"))

;; This file is part of dotted.

;; dotted is free software: you can redistribute it and/or modify it under
;; the terms of the GNU Affero General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; dotted is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General
;; Public License for more details.

;; You should have received a copy of the GNU Affero General Public
;; License along with dotted.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

;; epub utils.

;;; Code:


(defun my-epub-content-file-name (file-name)
  (with-temp-buffer
    (if (eq 0 (call-process "unzip" nil t nil
                            "-p" file-name "META-INF/container.xml"))
        (let ((dom (libxml-parse-xml-region (point-min) (point-max))))
          (dom-attr
           (dom-by-tag
            (dom-by-tag (dom-by-tag dom 'container) 'rootfiles)
            'rootfile)
           'full-path))
      (message "Failed to extract container.xml: %s" (buffer-string))
      nil)))

(defun my-epub-metadata (file-name)
  "Get metadata of an epub file."
  (when-let ((content-file-name (my-epub-content-file-name file-name)))
    (with-temp-buffer
      (call-process "unzip" nil t nil "-p" file-name content-file-name)
      (let* ((dom (libxml-parse-xml-region (point-min) (point-max)))
             (metadata (dom-by-tag dom 'metadata))
             (title (dom-text (dom-by-tag metadata 'title)))
             (authors (dom-texts (dom-by-tag metadata 'creator) ", "))
             (identifier
              (replace-regexp-in-string
               "[^0-9,]" ""
               (dom-texts
                (seq-filter
                 (lambda (node)
                   (or (equal "ISBN" (dom-attr node 'scheme))
                       (string-match-p "^[0-9]+$" (dom-text node))))
                 (dom-by-tag metadata 'identifier))
                ",")))
             (date (replace-regexp-in-string
                    "[^0-9]" ""
                    (dom-text (dom-by-tag metadata 'date))))
             (year (substring date 0 (min 4 (length date)))))
        `((title . ,title)
          (authors . ,authors)
          (year . ,year)
          (identifier . ,identifier))
        ;; (pp metadata)
        ))
    ))

(provide 'my-epub)
;;; my-epub.el ends here