From f2d3c6ca51bfbf7620ddde9faf83ec5fd973abf5 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Sun, 23 Jul 2023 23:05:13 +1000 Subject: Add a mediawiki API fetcher. Applicable to wikihow. Also clean up the code to reduce duplication with fetching and finding files. And fix when the wiki entry is under a directory naming itself which is common in mediawiki: both foo and foo/bar could be a valid wiki title. So locally files need to have an extension (by default .wiki). Also add the extension to auto-mode-alist. --- wiki-utils.el | 114 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 72 insertions(+), 42 deletions(-) (limited to 'wiki-utils.el') diff --git a/wiki-utils.el b/wiki-utils.el index 0e204b1..61357d2 100644 --- a/wiki-utils.el +++ b/wiki-utils.el @@ -25,6 +25,7 @@ ;;; Code: (require 'url-parse) +(require 'json) (defvar wiki-local-dir (locate-user-emacs-file "wiki") "Path to local directory of wiki files.") @@ -32,40 +33,17 @@ (defvar wiki-fetch-prefer-local t "If non-nil, visit the local file if exists when fetching.") -(defun wiki-fetch-url (url dir &optional callback title) - "Fetch URL asynchronously to a file in DIR. - -Then call CALLBACK which is a closure taking no argument. - -A non-nil TITLE overrides title inferred from the url." - (interactive "sURL: ") - (let ((file-name (expand-file-name - (or title (wiki-make-file-name-from-url url)) - dir)) - (cb (lambda (status file-name) - (wiki-fetch-url-save-and-switch status file-name) - (when callback (funcall callback))))) - (url-retrieve url cb (list file-name))) - ) - -(defun wiki-fetch-url-save-and-switch (status file-name) - "Fetch url to FILE-NAME if STATUS is ok. - -And switch to the corresponding buffer." - (when (plist-get status :error) - (error "Wiki fetch failed: %s" (plist-get status :error))) - (wiki-delete-http-header) - (let ((to-insert (buffer-string)) - (buffer (find-file-noselect file-name)) - (coding-system-for-write 'utf-8)) - (kill-buffer) - (with-current-buffer buffer - (insert to-insert) - (goto-char (point-min)) - (save-buffer) - (revert-buffer t t)) - (switch-to-buffer buffer)) - ) +(defvar wiki-extension ".wiki" + "The extension of local wiki files.") + +(add-to-list 'auto-mode-alist + `(,(format "\\%s\\'" wiki-extension) . wiki-mode)) + +(defun wiki-kill-http-header () + "Kill the http header in current buffer. + +Assuming the current buffer to be a `url-retrieve' response buffer." + (kill-region (point-min) (progn (wiki-skip-http-header) (point)))) (defun wiki-delete-http-header () "Delete the http header in current buffer. @@ -80,14 +58,7 @@ Assuming the current buffer to be a `url-retrieve' response buffer." (goto-char (point-min)) (re-search-forward "\r?\n\r?\n")) -(defun wiki-make-file-name-from-url (url) - "Make a file name from URL." - (file-name-nondirectory - (directory-file-name - (car (url-path-and-query (url-generic-parse-url - (url-unhex-string url))))))) - -;; TODO: add wikihow; generalise fandom +;; TODO: generalise fandom ;; TODO: default engine to mediawiki (defvar wiki-sites '((local) @@ -134,6 +105,10 @@ Assuming the current buffer to be a `url-retrieve' response buffer." :base-url "https://help.ubuntu.com/community/" :engine moinmoin :display-name "Ubuntu Community Help Wiki") + (wikihow :base-url "https://www.wikihow.com/" + :engine mediawiki + :display-name "wikiHow" + :fetcher wiki-engine-mediawiki-api-fetch) (wikiindex :base-url "https://wikiindex.org/" :engine mediawiki :display-name "WikiIndex") @@ -162,5 +137,60 @@ One of the sites is (local), meaning a local filesystem.") (intern (format "wiki-%s-fetch" site-id)) 'wiki-find-file)) +(defvar wiki-client-buffer-name "*wiki api*" + "Name of the buffer recording wiki API calls.") + +(defun wiki-parse-http-header (text) + "Parse the http header TEXT." + (let ((status) (fields)) + (with-temp-buffer + (insert text) + (goto-char (point-min)) + (re-search-forward "^HTTP.*\\([0-9]\\{3\\}\\).*$") + (setq status (match-string 1)) + (while (re-search-forward "^\\(.*?\\): \\(.*\\)$" nil t) + (push (cons (intern (match-string 1)) (match-string 2)) fields))) + (list (cons 'status status) (cons 'fields fields)))) + +(defun wiki-url-fetch-internal (url processor &optional + decompression with-header) + "Fetch from URL and process the response payload using PROCESSOR. + +PROCESSOR is a function that takes no argument and processes the +current buffer. +With non-nil DECOMPRESSION, decompress the response. +With non-nil WITH-HEADER, include the header in the result." + (with-current-buffer (get-buffer-create wiki-client-buffer-name) + (goto-char (point-max)) + (insert "[" (current-time-string) "] Request: " url "\n")) + (with-current-buffer (url-retrieve-synchronously url t) + (let ((header) (status) (fields)) + (wiki-kill-http-header) + (goto-char (point-min)) + (setq header (wiki-parse-http-header (car kill-ring)) + status (alist-get 'status header) + fields (alist-get 'fields header)) + (with-current-buffer wiki-client-buffer-name + (insert "[" (current-time-string) "] Response: " status "\n")) + (when decompression + (call-process-region (point) (point-max) "gunzip" t t t) + (goto-char (point-min))) + (call-interactively 'delete-trailing-whitespace) + (if (string= status "200") + (unless (= (point) (point-max)) + (if with-header + (list + (cons 'header fields) + (cons 'json (funcall processor))) + (funcall processor))) + (error "HTTP error: %s" (buffer-substring (point) (point-max))))))) + +(defun wiki-url-fetch-json (url &optional decompression with-header) + "Fetch and parse a json object from URL. + +With non-nil DECOMPRESSION, decompress the response. +With non-nil WITH-HEADER, include the header in the result." + (wiki-url-fetch-internal url 'json-read decompression with-header)) + (provide 'wiki-utils) ;;; wiki-utils.el ends here -- cgit v1.2.3