aboutsummaryrefslogtreecommitdiff
path: root/nov.el
diff options
context:
space:
mode:
authorVasilij Schneidermann <mail@vasilij.de>2017-08-30 22:01:38 +0200
committerVasilij Schneidermann <mail@vasilij.de>2017-08-30 22:02:45 +0200
commit4406b0a998623f7077db699aa51663e60394e222 (patch)
treeac40ca794fdded7cd9cd799c6cf2cbd2f2c8e2bd /nov.el
Check in prototype
Diffstat (limited to 'nov.el')
-rw-r--r--nov.el559
1 files changed, 559 insertions, 0 deletions
diff --git a/nov.el b/nov.el
new file mode 100644
index 0000000..af20042
--- /dev/null
+++ b/nov.el
@@ -0,0 +1,559 @@
+;;; nov.el --- Featureful EPUB reader mode
+
+;; Copyright (C) 2017 Vasilij Schneidermann <mail@vasilij.de>
+
+;; Author: Vasilij Schneidermann <mail@vasilij.de>
+;; Keywords: hypermedia, multimedia, epub
+
+;; This file is NOT part of GNU Emacs.
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; nov.el provides a major mode for reading EPUB documents.
+;;
+;; Features:
+;;
+;; - Basic navigation (jump to TOC, previous/next chapter)
+;; - Jump to next chapter when scrolling beyond end
+;; - Renders EPUB2 (.ncx) and EPUB3 (<nav>) TOCs
+;; - Hyperlinks to internal and external targets
+;; - Supports textual and image documents
+;; - View source of document files
+;; - Metadata display
+;; - Image rescaling
+
+;;; Code:
+
+(require 'dash)
+(require 'esxml)
+(require 'esxml-query)
+(require 'shr)
+(require 'url-parse)
+
+
+;;; EPUB preparation
+
+(defgroup nov nil
+ "EPUB reader mode"
+ :group 'multimedia)
+
+(defcustom nov-unzip-program (executable-find "unzip")
+ "Path to `unzip` executable"
+ :type '(file :must-match t)
+ :group 'nov)
+
+(defun nov-make-path (directory file)
+ "Create a path from DIRECTORY and FILE."
+ (concat (file-name-as-directory directory) file))
+
+(defun nov-contains-nested-directory-p (directory)
+ "Non-nil if DIRECTORY contains exactly one directory."
+ (let* ((files (directory-files directory t "^[^.]"))
+ (file (car files)))
+ (and (= (length files) 1)
+ (file-directory-p file)
+ file)))
+
+(defun nov-unnest-directory (directory child)
+ "Move contents of CHILD into DIRECTORY, then delete CHILD."
+ ;; FIXME: this will most certainly fail for con/con
+ (dolist (item (directory-files child t "^[^.]"))
+ (rename-file item directory))
+ (delete-directory child))
+
+(defun nov-unzip-epub (directory filename)
+ "Extract FILENAME into DIRECTORY.
+Unnecessary nesting is removed with `nov-unnest-directory'."
+ (let ((status (call-process nov-unzip-program nil nil t
+ "-d" directory filename))
+ child)
+ (while (setq child (nov-contains-nested-directory-p directory))
+ (nov-unnest-directory directory child))
+ status))
+
+(defmacro nov-ignore-file-errors (&rest body)
+ "Like `ignore-errors', but for file errors."
+ `(condition-case nil (progn ,@body) (file-error nil)))
+
+(defun nov-slurp (filename)
+ "Return the contents of FILENAME."
+ (with-temp-buffer
+ (insert-file-contents filename)
+ (buffer-string)))
+
+(defun nov-mimetype-valid-p (directory)
+ "Return t if DIRECTORY contains a valid EPUB mimetype file."
+ (nov-ignore-file-errors
+ (let ((filename (nov-make-path directory "mimetype")))
+ (equal (nov-slurp filename) "application/epub+zip"))))
+
+(defun nov-container-filename (directory)
+ "Return the container filename for DIRECTORY."
+ (let ((filename (nov-make-path directory "META-INF")))
+ (nov-make-path filename "container.xml")))
+
+(defun nov-container-content-filename (content)
+ "Return the content filename for CONTENT."
+ (let* ((query "container>rootfiles>rootfile[media-type='application/oebps-package+xml']")
+ (node (esxml-query query content)))
+ (esxml-node-attribute 'full-path node)))
+
+(defun nov-container-valid-p (directory)
+ "Return t if DIRECTORY holds a valid EPUB container."
+ (let ((filename (nov-container-filename directory)))
+ (when (and filename (file-exists-p filename))
+ (let* ((content (xml-to-esxml (nov-slurp filename)))
+ (content-file (nov-container-content-filename content)))
+ (when (and content content-file)
+ (file-exists-p (nov-make-path directory content-file)))))))
+
+(defun nov-epub-valid-p (directory)
+ "Return t if DIRECTORY makes up a valid EPUB document."
+ (and (nov-mimetype-valid-p directory)
+ (nov-container-valid-p directory)))
+
+(defun nov-content-version (content)
+ "Return the EPUB version for CONTENT."
+ (let* ((node (esxml-query "package" content))
+ (version (esxml-node-attribute 'version node)))
+ (when (not version)
+ (error "Version not specified"))
+ version))
+
+(defun nov-content-unique-identifier-name (content)
+ "Return the UUID name for CONTENT.
+This is used in `nov-content-unique-identifier' to retrieve the
+UUID."
+ (let* ((node (esxml-query "package[unique-identifier]" content))
+ (name (esxml-node-attribute 'unique-identifier node)))
+ (when (not name)
+ (error "Unique identifier name not specified"))
+ name))
+
+(defun nov-content-unique-identifier (content)
+ "Return the UUID for CONTENT."
+ (let* ((name (nov-content-unique-identifier-name content))
+ (selector (format "package>metadata>identifier[id='%s']"
+ (regexp-quote name)))
+ (id (car (esxml-node-children (esxml-query selector content)))))
+ (when (not id)
+ (error "Unique identifier not found by its name: %s" name))
+ (replace-regexp-in-string "^urn:uuid:" "" id)))
+
+;; NOTE: unique identifier is queried separately as identifiers can
+;; appear more than once and only one of them can be the unique one
+(defvar nov-required-metadata-tags '(title language)
+ "Required metadata tags used for `nov-content-metadata'.")
+
+(defvar nov-optional-metadata-tags
+ '(contributor coverage creator date description format
+ publisher relation rights source subject type)
+ "Optional metadata tags used for 'nov-content-metadata'.")
+
+(defun nov-content-metadata (content)
+ "Return a metadata alist for CONTENT.
+Required keys are 'identifier and everything in
+`nov-required-metadata-tags', optional keys are in
+`nov-optional-metadata-tags'."
+ (let* ((identifier (nov-content-unique-identifier content))
+ (candidates (mapcar (lambda (node)
+ (cons (esxml-node-tag node)
+ (car (esxml-node-children node))))
+ (esxml-query-all "package>metadata>*" content)))
+ (required (mapcar (lambda (tag)
+ (let ((candidate (assq tag candidates)))
+ (when (or (not candidate) (not (cdr candidate)))
+ (error "Required metadatum %s not found" tag))
+ candidate))
+ nov-required-metadata-tags))
+ (optional (->> nov-optional-metadata-tags
+ (mapcar (lambda (tag) (assq tag candidates)))
+ (--remove (or (not it) (not (cdr it)))))))
+ (append `((identifier . ,identifier)) required optional)))
+
+(defun nov-content-manifest (directory content)
+ "Extract an alist of manifest files for CONTENT in DIRECTORY.
+Each alist item consists of the identifier and full path."
+ (mapcar (lambda (node)
+ (-let [(&alist 'id id 'href href) (esxml-node-attributes node)]
+ (cons (intern id)
+ (nov-make-path directory href))))
+ (esxml-query-all "package>manifest>item" content)))
+
+(defun nov-content-spine (content)
+ "Extract a list of spine identifiers for CONTENT."
+ (mapcar (lambda (node) (intern (esxml-node-attribute 'idref node)))
+ (esxml-query-all "package>spine>itemref" content)))
+
+(defun nov--content-epub2-files (content manifest files)
+ (let* ((node (esxml-query "package>spine[toc]" content))
+ (id (esxml-node-attribute 'toc node)))
+ (when (not id)
+ (error "EPUB 2 NCX ID not found"))
+ (setq nov-toc-id (intern id))
+ (let ((toc-file (assq nov-toc-id manifest)))
+ (when (not toc-file)
+ (error "EPUB 2 NCX file not found"))
+ (cons toc-file files))))
+
+(defun nov--content-epub3-files (content manifest files)
+ (let* ((node (esxml-query "package>manifest>item[properties=nav]" content))
+ (id (esxml-node-attribute 'id node)))
+ (when (not id)
+ (error "EPUB 3 <nav> ID not found"))
+ (setq nov-toc-id (intern id))
+ (let ((toc-file (assq nov-toc-id manifest)))
+ (when (not toc-file)
+ (error "EPUB 3 <nav> file not found"))
+ (setq files (--remove (eq (car it) nov-toc-id) files))
+ (cons toc-file files))))
+
+(defun nov-content-files (directory content)
+ "Create correctly ordered file alist for CONTENT in DIRECTORY.
+Each alist item consists of the identifier and full path."
+ (let* ((manifest (nov-content-manifest directory content))
+ (spine (nov-content-spine content))
+ (files (mapcar (lambda (item) (assq item manifest)) spine)))
+ (if (version< nov-epub-version "3.0")
+ (nov--content-epub2-files content manifest files)
+ (nov--content-epub3-files content manifest files))))
+
+(defun nov--walk-ncx-node (node depth)
+ (let ((tag (esxml-node-tag node))
+ (children (--filter (eq (esxml-node-tag it) 'navPoint)
+ (esxml-node-children node))))
+ (cond
+ ((eq tag 'navMap)
+ (insert "<ol>\n")
+ (mapc (lambda (node) (nov--walk-ncx-node node (1+ depth))) children)
+ (insert "</ol>\n"))
+ ((eq tag 'navPoint)
+ (let* ((label-node (esxml-query "navLabel>text" node))
+ (content-node (esxml-query "content" node))
+ (href (esxml-node-attribute 'src content-node))
+ (label (car (esxml-node-children label-node))))
+ (when (not href)
+ (error "Navigation point is missing href attribute"))
+ (let ((link (format "<a href=\"%s\">%s</a>" href (or label href))))
+ (if children
+ (progn
+ (insert (format "<li>\n%s\n<ol>\n" link))
+ (mapc (lambda (node) (nov--walk-ncx-node node (1+ depth)))
+ children)
+ (insert (format "</ol>\n</li>\n")))
+ (insert (format "<li>\n%s\n</li>\n" link)))))))))
+
+(defun nov-ncx-to-html (path)
+ "Convert NCX document at PATH to HTML."
+ (let ((root (esxml-query "navMap" (xml-to-esxml (nov-slurp path)))))
+ (with-temp-buffer
+ (nov--walk-ncx-node root 0)
+ (buffer-string))))
+
+
+;;; UI
+
+(defvar nov-temp-dir nil
+ "Temporary directory containing the buffer's EPUB files.")
+(make-variable-buffer-local 'nov-temp-dir)
+
+(defvar nov-content-file nil
+ "Path of the EPUB buffer's .opf file.")
+(make-variable-buffer-local 'nov-content-file)
+
+(defvar nov-epub-version nil
+ "Version string of the EPUB buffer.")
+(make-variable-buffer-local 'nov-epub-version)
+
+(defvar nov-metadata nil
+ "Metadata of the EPUB buffer.")
+(make-variable-buffer-local 'nov-metadata)
+
+(defvar nov-documents nil
+ "Alist for the EPUB buffer's documents.
+Each alist item consists of the identifier and full path.")
+(make-variable-buffer-local 'nov-documents)
+
+(defvar nov-documents-index 0
+ "Index of the currently rendered document in the EPUB buffer.")
+(make-variable-buffer-local 'nov-documents-index)
+
+(defvar nov-toc-id nil
+ "TOC identifier of the EPUB buffer.")
+(make-variable-buffer-local 'nov-toc-id)
+
+(defvar nov-mode-map
+ (let ((map (make-sparse-keymap)))
+ (define-key map (kbd "g") 'nov-render-document)
+ (define-key map (kbd "v") 'nov-view-source)
+ (define-key map (kbd "V") 'nov-view-content-source)
+ (define-key map (kbd "m") 'nov-display-metadata)
+ (define-key map (kbd "n") 'nov-next-chapter)
+ (define-key map (kbd "p") 'nov-previous-chapter)
+ (define-key map (kbd "t") 'nov-goto-toc)
+ (define-key map (kbd "RET") 'nov-browse-url)
+ (define-key map (kbd "<follow-link>") 'mouse-face)
+ (define-key map (kbd "<mouse-2>") 'nov-browse-url)
+ (define-key map (kbd "TAB") 'shr-next-link)
+ (define-key map (kbd "M-TAB") 'shr-previous-link)
+ (define-key map (kbd "<backtab>") 'shr-previous-link)
+ (define-key map (kbd "SPC") 'nov-scroll-up)
+ (define-key map (kbd "<home>") 'beginning-of-buffer)
+ (define-key map (kbd "<end>") 'end-of-buffer)
+ map))
+
+(defun nov-clean-up ()
+ "Delete temporary files of all current EPUB buffer."
+ (when nov-temp-dir
+ (nov-ignore-file-errors
+ (delete-directory nov-temp-dir t))))
+
+(defun nov-clean-up-all ()
+ "Delete temporary files of all opened EPUB buffers."
+ (dolist (buffer (buffer-list))
+ (with-current-buffer buffer
+ (when (eq major-mode 'nov-mode)
+ (nov-clean-up)))))
+
+(defun nov-external-url-p (url)
+ "Return t if URL refers to an external document."
+ (and (url-type (url-generic-parse-url url)) t))
+
+(defun nov-url-filename-and-target (url)
+ "Return a list of URL's filename and target."
+ (setq url (url-generic-parse-url url))
+ (list (url-filename url) (url-target url)))
+
+(defun nov-insert-image (path)
+ "Insert an image for PATH at point.
+This function honors `shr-max-image-proportion' if possible."
+ ;; adapted from `shr-rescale-image'
+ (if (fboundp 'imagemagick-types)
+ (let ((edges (window-inside-pixel-edges
+ (get-buffer-window (current-buffer)))))
+ (insert-image
+ (create-image path 'imagemagick nil
+ :ascent 100
+ :max-width (truncate (* shr-max-image-proportion
+ (- (nth 2 edges)
+ (nth 0 edges))))
+ :max-height (truncate (* shr-max-image-proportion
+ (- (nth 3 edges)
+ (nth 1 edges)))))))
+ (insert-image (create-image path nil nil :ascent 100))))
+
+(defun nov-render-img (dom)
+ "Custom <img> rendering function for DOM.
+Uses `shr-tag-img' for external paths and `nov-insert-image' for
+internal ones."
+ (let ((url (cdr (assq 'src (cadr dom)))))
+ (if (nov-external-url-p url)
+ (funcall 'shr-tag-img dom)
+ (setq url (expand-file-name url))
+ (nov-insert-image url))))
+
+(defun nov-render-title (dom)
+ "Custom <title> rendering function for DOM.
+Sets `header-line-format' to a combination of the EPUB title and
+chapter title."
+ (let ((title (cdr (assq 'title nov-metadata)))
+ (chapter-title (car (esxml-node-children dom))))
+ (when (not chapter-title)
+ (setq chapter-title '(:propertize "No title" face italic)))
+ (setq header-line-format (list title ": " chapter-title))))
+
+(defvar nov-rendering-functions
+ '(;; default function uses url-retrieve and fails on local images
+ (img . nov-render-img)
+ ;; titles are rendered *inside* the document by default
+ (title . nov-render-title))
+ "Alist of rendering functions used with `shr-render-region'.")
+
+(defun nov-render-document ()
+ "Render the document referenced by `nov-documents-index'.
+If the document path refers to an image (as determined by
+`image-type-file-name-regexps'), an image is inserted, otherwise
+the HTML is rendered with `shr-render-region'."
+ (interactive)
+ (let* ((document (aref nov-documents nov-documents-index))
+ (id (car document))
+ (path (cdr document))
+ ;; HACK: this should be looked up in the manifest
+ (imagep (--find (string-match-p (car it) path)
+ image-type-file-name-regexps))
+ ;; NOTE: allows resolving image references correctly
+ (default-directory (file-name-directory path))
+ buffer-read-only)
+ (erase-buffer)
+
+ (cond
+ (imagep
+ (nov-insert-image path))
+ ((and (version< nov-epub-version "3.0")
+ (eq id nov-toc-id))
+ (insert (nov-ncx-to-html path)))
+ (t
+ (insert (nov-slurp path))))
+
+ (when (not imagep)
+ (let (;; HACK: make buttons use our own commands
+ (shr-map nov-mode-map)
+ (shr-external-rendering-functions nov-rendering-functions))
+ (shr-render-region (point-min) (point-max))))
+ (goto-char (point-min))))
+
+(defun nov-find-document (predicate)
+ "Return first item in `nov-documents' PREDICATE is true for."
+ (let ((i 0)
+ done)
+ (while (and (not done)
+ (< i (length nov-documents)))
+ (when (funcall predicate (aref nov-documents i))
+ (setq done t))
+ (setq i (1+ i)))
+ (when done
+ (1- i))))
+
+(defun nov-goto-toc ()
+ "Go to the TOC index and render the TOC document."
+ (interactive)
+ (let ((index (nov-find-document (lambda (doc) (eq (car doc) nov-toc-id)))))
+ (when (not index)
+ (error "Couldn't locate TOC"))
+ (setq nov-documents-index index)
+ (nov-render-document)))
+
+(defun nov-view-source ()
+ "View the source of the current document in a new buffer."
+ (interactive)
+ (find-file (cdr (aref nov-documents nov-documents-index))))
+
+(defun nov-view-content-source ()
+ "View the source of the content file in a new buffer."
+ (interactive)
+ (find-file nov-content-file))
+
+(defun nov-display-metadata ()
+ "View the metadata of the EPUB document in a new buffer."
+ (interactive)
+ (let ((buffer "*EPUB metadata*")
+ (metadata nov-metadata)
+ (version nov-epub-version))
+ (with-current-buffer (get-buffer-create buffer)
+ (special-mode)
+ (let (buffer-read-only)
+ (erase-buffer)
+ (insert (format "EPUB Version: %s\n" version))
+ (dolist (item metadata)
+ (-let [(key . value) item]
+ (insert (format "%s: " (capitalize (symbol-name key))))
+ (if (eq key 'description)
+ (let ((beg (point)))
+ (insert value)
+ (shr-render-region beg (point)))
+ (insert value))
+ (insert "\n")))
+ (goto-char (point-min))))
+ (display-buffer buffer)))
+
+(defun nov-next-chapter ()
+ "Go to the next chapter and render its document."
+ (interactive)
+ (when (< nov-documents-index (1- (length nov-documents)))
+ (setq nov-documents-index (1+ nov-documents-index))
+ (nov-render-document)))
+
+(defun nov-previous-chapter ()
+ "Go to the previous chapter and render its document."
+ (interactive)
+ (when (> nov-documents-index 0)
+ (setq nov-documents-index (1- nov-documents-index))
+ (nov-render-document)))
+
+(defun nov-scroll-up (arg)
+ "Scroll with `scroll-up' or visit next chapter if at bottom."
+ (interactive "P")
+ (if (>= (window-end) (point-max))
+ (nov-next-chapter)
+ (scroll-up arg)))
+
+(defun nov-visit-relative-file (filename target)
+ "Visit the document as specified by FILENAME and TARGET."
+ (let ((index (nov-find-document (lambda (doc)
+ (string-suffix-p filename (cdr doc))))))
+ (when (not index)
+ (error "Couldn't locate document"))
+ (setq nov-documents-index index)
+ (let ((shr-target-id target))
+ (nov-render-document))
+ (when target
+ (let ((pos (next-single-property-change (point-min) 'shr-target-id)))
+ (when (not pos)
+ (error "Couldn't locate target"))
+ (goto-char pos)
+ (recenter (1- (max 1 scroll-margin)))))))
+
+;; adapted from `shr-browse-url'
+(defun nov-browse-url (&optional mouse-event)
+ "Follow an external url with `browse-url'.
+Internal URLs are visited with `nov-visit-relative-file'."
+ (interactive (list last-nonmenu-event))
+ (mouse-set-point mouse-event)
+ (let ((url (get-text-property (point) 'shr-url)))
+ (when (not url)
+ (user-error "No link under point"))
+ (if (nov-external-url-p url)
+ (browse-url url)
+ (apply 'nov-visit-relative-file (nov-url-filename-and-target url)))))
+
+(define-derived-mode nov-mode special-mode "EPUB"
+ "Major mode for reading EPUB documents"
+ (add-hook 'kill-buffer-hook 'nov-clean-up nil t)
+ (add-hook 'kill-emacs-hook 'nov-clean-up-all)
+ (add-hook 'change-major-mode-hook 'nov-clean-up nil t)
+ (when (not buffer-file-name)
+ (error "EPUB must be associated with file"))
+ (setq nov-temp-dir (make-temp-file "nov-" t ".epub"))
+ (let ((exit-code (nov-unzip-epub nov-temp-dir buffer-file-name)))
+ (when (not (integerp exit-code))
+ (nov-clean-up)
+ (error "EPUB extraction aborted by signal %s" exit-code))
+ (when (not (zerop exit-code))
+ (nov-clean-up)
+ (error "EPUB extraction failed with exit code %d" exit-code)))
+ (when (not (nov-epub-valid-p nov-temp-dir))
+ (nov-clean-up)
+ (error "Invalid EPUB file"))
+ (let* ((content (-> (nov-container-filename nov-temp-dir)
+ (nov-slurp)
+ (xml-to-esxml)))
+ (content-file (->> (nov-container-content-filename content)
+ (nov-make-path nov-temp-dir)))
+ (work-dir (file-name-directory content-file))
+ (content (-> (nov-slurp content-file)
+ (xml-to-esxml))))
+ (setq nov-content-file content-file)
+ (setq nov-epub-version (nov-content-version content))
+ (setq nov-metadata (nov-content-metadata content))
+ (setq nov-documents (->> (nov-content-files work-dir content)
+ (apply 'vector)))
+ (setq nov-documents-index 0))
+ (setq buffer-undo-list t)
+ (set-visited-file-name nil t) ; disable autosaves and save questions
+ (nov-render-document))
+
+(provide 'nov)
+;;; nov.el ends here