aboutsummaryrefslogtreecommitdiff
path: root/emacs/.emacs.d/lisp/my/my-wikipedia.el
diff options
context:
space:
mode:
authorYuchen Pei <id@ypei.org>2023-06-17 17:20:29 +1000
committerYuchen Pei <id@ypei.org>2023-06-17 17:20:29 +1000
commit093ffa5fbf7143f4668bb0a3dc9659a5cc836e12 (patch)
tree1ed4e14b2a43b8e338f4ad6a04d969b99b9239be /emacs/.emacs.d/lisp/my/my-wikipedia.el
parentabc686827ae38ee715d9eed1c5c29161c74127e6 (diff)
Moving things one level deeper
To ease gnu stow usage. Now we can do stow -t ~ emacs
Diffstat (limited to 'emacs/.emacs.d/lisp/my/my-wikipedia.el')
-rw-r--r--emacs/.emacs.d/lisp/my/my-wikipedia.el182
1 files changed, 182 insertions, 0 deletions
diff --git a/emacs/.emacs.d/lisp/my/my-wikipedia.el b/emacs/.emacs.d/lisp/my/my-wikipedia.el
new file mode 100644
index 0000000..557c553
--- /dev/null
+++ b/emacs/.emacs.d/lisp/my/my-wikipedia.el
@@ -0,0 +1,182 @@
+;;; my-wikipedia.el -- wikipedia client -*- lexical-binding: t -*-
+
+;; Copyright (C) 2023 Free Software Foundation.
+
+;; Author: Yuchen Pei <id@ypei.org>
+;; Package-Requires: ((emacs "28.2"))
+
+;; This file is part of dotfiles.
+
+;; dotfiles is free software: you can redistribute it and/or modify it under
+;; the terms of the GNU Affero General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; dotfiles is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+;; Public License for more details.
+
+;; You should have received a copy of the GNU Affero General Public
+;; License along with dotfiles. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; wikipedia client.
+
+;;; Code:
+
+
+(require 'my-utils)
+(require 'my-markup)
+(require 'my-net)
+
+;; TODO: much of these can be generalised to any mediawiki site
+(defvar my-wikipedia-lang "en")
+(defvar my-wikipedia-host
+ (format "https://%s.wikipedia.org" my-wikipedia-lang))
+(defun my-grok-wikipedia (url)
+ "groks wikipedia by url and returns the info of the wikipedia entry."
+ (with-current-buffer (url-retrieve-synchronously url)
+ (my-delete-http-header)
+ (goto-char (point-min))
+ (let ((results (my-grok-wikipedia-get-imdb-rating))
+ (html
+ (libxml-parse-html-region (point-min) (point-max))))
+ (append (my-grok-wikipedia-html html url) results))))
+
+(defun my-grok-wikipedia-get-imdb-rating ()
+ (when (re-search-forward
+ "\\(https://\\(www\\.\\)?imdb.com/title/tt[0-9]+/\\)" nil t)
+ (let ((url (match-string 1)))
+ (with-current-buffer (url-retrieve-synchronously
+ (concat url "ratings"))
+ (goto-char (point-min))
+ (when
+ (re-search-forward
+ "\\([0-9,]+\\)\\s-*IMDb.*?\\([0-9\\.]+\\) / 10" nil t)
+ (list (cons "IMDB-link" url)
+ (cons "IMDB-rating" (match-string 2))
+ (cons "IMDB-rated-by" (match-string 1))))))))
+
+(defun my-wikipedia-api-summary (title)
+ (my-url-fetch-json
+ (format "%s/api/rest_v1/page/summary/%s" my-wikipedia-host title)))
+
+(defun my-grok-wikipedia-summary (url)
+ "get wikipedia summary using the rest api"
+ (let ((resp (my-wikipedia-api-summary
+ (replace-regexp-in-string ".*/wiki/" "" url))))
+ (list (cons "Wikipedia-link"
+ (alist-get 'page
+ (alist-get 'desktop
+ (alist-get 'content_urls resp))))
+ (cons "Description" (my-clean-property-value
+ (alist-get 'extract resp)))
+ (cons "Title" (alist-get 'title resp))
+ (cons "Cover" (alist-get 'source
+ (alist-get 'thumbnail resp)))
+ (cons "Latitude" (when-let (coord (alist-get 'coordinates resp))
+ (number-to-string (alist-get 'lat coord))))
+ (cons "Longitude" (when-let (coord (alist-get 'coordinates resp))
+ (number-to-string
+ (alist-get 'lon coord)))))))
+(defun my-grok-wikipedia-html (html url)
+ (let* ((result (my-grok-wikipedia-summary url))
+ (info (car (dom-by-class html "infobox")))
+ (ths (dom-by-tag info 'th))
+ (tds (mapcar (lambda (th)
+ (my-dom-remove-style
+ (car (dom-by-tag (dom-parent info th) 'td))))
+ ths))
+ (len (length ths)))
+ (dotimes (unused len)
+ (let* ((key (my-clean-property-key
+ (dom-texts (pop ths) "")))
+ (value (my-clean-property-value
+ (dom-texts (pop tds) "")))
+ (to-push
+ (cond ((string-empty-p key) nil)
+ ((string-empty-p value) nil)
+ ((string= key "Coordinates")
+ (my-grok-wikipedia-clean-coordinates value))
+ ((or (member key '("Website" "Source" "URL")))
+ (list (cons key (my-grok-wikipedia-fix-url value))))
+ (t (list (cons key value))))))
+ (mapc (lambda (pair) (push pair result)) to-push)))
+ (reverse result)))
+(defun my-grok-wikipedia-clean-coordinates (raw)
+ (let ((float-re "\\([-+]?[0-9]+\\(?:\\.[0-9]*\\)?\\)"))
+ (string-match (format "%s; %s$" float-re float-re) raw)
+ (list (cons "Latitude" (match-string 1 raw))
+ (cons "Longitude" (match-string 2 raw)))))
+
+(defun my-grok-wikipedia-fix-url (url)
+ (let* ((urlobj (url-generic-parse-url url))
+ (filename (url-filename urlobj)))
+ (unless (url-type urlobj)
+ (setf (url-type urlobj) "https")
+ (string-match "^\\([^/]+\\)\\(/.*\\)?$" filename)
+ (setf (url-host urlobj) (match-string 1 filename))
+ (setf (url-filename urlobj) (or (match-string 2 filename) ""))
+ (setf (url-fullness urlobj) t))
+ (url-recreate-url urlobj)))
+
+(defun my-wikipedia-api-search (query)
+ (my-url-fetch-json
+ (format
+ "%s/w/api.php?action=query&format=json&list=search&srsearch=%s"
+ my-wikipedia-host query)))
+
+(defun my-wikipedia-search (query)
+ (interactive "sQuery: ")
+ (generic-search-open
+ (alist-get 'search
+ (alist-get 'query
+ (my-wikipedia-api-search query)))
+ (format "wikipedia-query:%s" query)
+ `((formatter . my-wikipedia-format-result)
+ (default-action . my-wikipedia-grok-action)
+ (keymap . ,my-wikipedia-button-keymap))))
+
+(defun my-wikipedia-format-result (result)
+ (concat
+ (format "%s (%d words)"
+ (alist-get 'title result)
+ (alist-get 'wordcount result))
+ (propertize
+ (format "\n\n%s"
+ (my-wikipedia-highlight-snippet-matches
+ (alist-get 'snippet result)))
+ 'face 'default)))
+
+(defun my-wikipedia-highlight-snippet-matches (snippet)
+ (with-temp-buffer
+ (insert snippet)
+ (goto-char (point-min))
+ (while (re-search-forward "<span class=\"searchmatch\">\\(.*?\\)</span>" nil t)
+ (replace-match
+ (propertize (match-string 1) 'face 'match)))
+ (buffer-string)))
+
+(defun my-wikipedia-grok-action (info)
+ (interactive)
+ (my-org-grok (format "%s/wiki/%s"
+ my-wikipedia-host
+ (alist-get 'title info))))
+
+(defun my-wikipedia-fetch-wiki ()
+ (interactive)
+ (my-fetch-url (format "/wiki/%s?action=raw"
+ my-wikipedia-host
+ (alist-get 'title
+ (get-text-property (point) 'button-data)))))
+
+(defvar my-wikipedia-button-keymap
+ (let ((kmap (make-sparse-keymap)))
+ (set-keymap-parent kmap button-map)
+ (define-key kmap "f" 'my-wikipedia-fetch-wiki)
+ kmap))
+
+(provide 'my-wikipedia)
+;;; my-wikipedia.el ends here