blob: 4a3dfca12a4c4b435bca7788a3fa345be6b03f22 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
;;; my-epub.el -- epub utils -*- lexical-binding: t -*-
;; Copyright (C) 2025 Free Software Foundation, Inc.
;; Author: Yuchen Pei <id@ypei.org>
;; Package-Requires: ((emacs "30.1"))
;; This file is part of dotted.
;; dotted is free software: you can redistribute it and/or modify it under
;; the terms of the GNU Affero General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; dotted is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
;; Public License for more details.
;; You should have received a copy of the GNU Affero General Public
;; License along with dotted. If not, see <https://www.gnu.org/licenses/>.
;;; Commentary:
;; epub utils.
;;; Code:
(defun my-epub-content-file-name (file-name)
(with-temp-buffer
(if (eq 0 (call-process "unzip" nil t nil
"-p" file-name "META-INF/container.xml"))
(let ((dom (libxml-parse-xml-region (point-min) (point-max))))
(dom-attr
(dom-by-tag
(dom-by-tag (dom-by-tag dom 'container) 'rootfiles)
'rootfile)
'full-path))
(message "Failed to extract container.xml: %s" (buffer-string))
nil)))
(defun my-epub-metadata (file-name)
"Get metadata of an epub file."
(when-let ((content-file-name (my-epub-content-file-name file-name)))
(with-temp-buffer
(call-process "unzip" nil t nil "-p" file-name content-file-name)
(let* ((dom (libxml-parse-xml-region (point-min) (point-max)))
(metadata (dom-by-tag dom 'metadata))
(title (dom-text (dom-by-tag metadata 'title)))
(authors (dom-texts (dom-by-tag metadata 'creator) ", "))
(identifier
(replace-regexp-in-string
"[^0-9,]" ""
(dom-texts
(seq-filter
(lambda (node)
(or (equal "ISBN" (dom-attr node 'scheme))
(string-match-p "^[0-9]+$" (dom-text node))))
(dom-by-tag metadata 'identifier))
",")))
(date (replace-regexp-in-string
"[^0-9]" ""
(dom-text (dom-by-tag metadata 'date))))
(year (substring date 0 (min 4 (length date)))))
`((title . ,title)
(authors . ,authors)
(year . ,year)
(identifier . ,identifier))
;; (pp metadata)
))
))
(provide 'my-epub)
;;; my-epub.el ends here
|