From 093ffa5fbf7143f4668bb0a3dc9659a5cc836e12 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Sat, 17 Jun 2023 17:20:29 +1000 Subject: Moving things one level deeper To ease gnu stow usage. Now we can do stow -t ~ emacs --- emacs/.emacs.d/lisp/my/my-wikipedia.el | 182 +++++++++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 emacs/.emacs.d/lisp/my/my-wikipedia.el (limited to 'emacs/.emacs.d/lisp/my/my-wikipedia.el') diff --git a/emacs/.emacs.d/lisp/my/my-wikipedia.el b/emacs/.emacs.d/lisp/my/my-wikipedia.el new file mode 100644 index 0000000..557c553 --- /dev/null +++ b/emacs/.emacs.d/lisp/my/my-wikipedia.el @@ -0,0 +1,182 @@ +;;; my-wikipedia.el -- wikipedia client -*- lexical-binding: t -*- + +;; Copyright (C) 2023 Free Software Foundation. + +;; Author: Yuchen Pei +;; Package-Requires: ((emacs "28.2")) + +;; This file is part of dotfiles. + +;; dotfiles is free software: you can redistribute it and/or modify it under +;; the terms of the GNU Affero General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; dotfiles is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +;; Public License for more details. + +;; You should have received a copy of the GNU Affero General Public +;; License along with dotfiles. If not, see . + +;;; Commentary: + +;; wikipedia client. + +;;; Code: + + +(require 'my-utils) +(require 'my-markup) +(require 'my-net) + +;; TODO: much of these can be generalised to any mediawiki site +(defvar my-wikipedia-lang "en") +(defvar my-wikipedia-host + (format "https://%s.wikipedia.org" my-wikipedia-lang)) +(defun my-grok-wikipedia (url) + "groks wikipedia by url and returns the info of the wikipedia entry." + (with-current-buffer (url-retrieve-synchronously url) + (my-delete-http-header) + (goto-char (point-min)) + (let ((results (my-grok-wikipedia-get-imdb-rating)) + (html + (libxml-parse-html-region (point-min) (point-max)))) + (append (my-grok-wikipedia-html html url) results)))) + +(defun my-grok-wikipedia-get-imdb-rating () + (when (re-search-forward + "\\(https://\\(www\\.\\)?imdb.com/title/tt[0-9]+/\\)" nil t) + (let ((url (match-string 1))) + (with-current-buffer (url-retrieve-synchronously + (concat url "ratings")) + (goto-char (point-min)) + (when + (re-search-forward + "\\([0-9,]+\\)\\s-*IMDb.*?\\([0-9\\.]+\\) / 10" nil t) + (list (cons "IMDB-link" url) + (cons "IMDB-rating" (match-string 2)) + (cons "IMDB-rated-by" (match-string 1)))))))) + +(defun my-wikipedia-api-summary (title) + (my-url-fetch-json + (format "%s/api/rest_v1/page/summary/%s" my-wikipedia-host title))) + +(defun my-grok-wikipedia-summary (url) + "get wikipedia summary using the rest api" + (let ((resp (my-wikipedia-api-summary + (replace-regexp-in-string ".*/wiki/" "" url)))) + (list (cons "Wikipedia-link" + (alist-get 'page + (alist-get 'desktop + (alist-get 'content_urls resp)))) + (cons "Description" (my-clean-property-value + (alist-get 'extract resp))) + (cons "Title" (alist-get 'title resp)) + (cons "Cover" (alist-get 'source + (alist-get 'thumbnail resp))) + (cons "Latitude" (when-let (coord (alist-get 'coordinates resp)) + (number-to-string (alist-get 'lat coord)))) + (cons "Longitude" (when-let (coord (alist-get 'coordinates resp)) + (number-to-string + (alist-get 'lon coord))))))) +(defun my-grok-wikipedia-html (html url) + (let* ((result (my-grok-wikipedia-summary url)) + (info (car (dom-by-class html "infobox"))) + (ths (dom-by-tag info 'th)) + (tds (mapcar (lambda (th) + (my-dom-remove-style + (car (dom-by-tag (dom-parent info th) 'td)))) + ths)) + (len (length ths))) + (dotimes (unused len) + (let* ((key (my-clean-property-key + (dom-texts (pop ths) ""))) + (value (my-clean-property-value + (dom-texts (pop tds) ""))) + (to-push + (cond ((string-empty-p key) nil) + ((string-empty-p value) nil) + ((string= key "Coordinates") + (my-grok-wikipedia-clean-coordinates value)) + ((or (member key '("Website" "Source" "URL"))) + (list (cons key (my-grok-wikipedia-fix-url value)))) + (t (list (cons key value)))))) + (mapc (lambda (pair) (push pair result)) to-push))) + (reverse result))) +(defun my-grok-wikipedia-clean-coordinates (raw) + (let ((float-re "\\([-+]?[0-9]+\\(?:\\.[0-9]*\\)?\\)")) + (string-match (format "%s; %s$" float-re float-re) raw) + (list (cons "Latitude" (match-string 1 raw)) + (cons "Longitude" (match-string 2 raw))))) + +(defun my-grok-wikipedia-fix-url (url) + (let* ((urlobj (url-generic-parse-url url)) + (filename (url-filename urlobj))) + (unless (url-type urlobj) + (setf (url-type urlobj) "https") + (string-match "^\\([^/]+\\)\\(/.*\\)?$" filename) + (setf (url-host urlobj) (match-string 1 filename)) + (setf (url-filename urlobj) (or (match-string 2 filename) "")) + (setf (url-fullness urlobj) t)) + (url-recreate-url urlobj))) + +(defun my-wikipedia-api-search (query) + (my-url-fetch-json + (format + "%s/w/api.php?action=query&format=json&list=search&srsearch=%s" + my-wikipedia-host query))) + +(defun my-wikipedia-search (query) + (interactive "sQuery: ") + (generic-search-open + (alist-get 'search + (alist-get 'query + (my-wikipedia-api-search query))) + (format "wikipedia-query:%s" query) + `((formatter . my-wikipedia-format-result) + (default-action . my-wikipedia-grok-action) + (keymap . ,my-wikipedia-button-keymap)))) + +(defun my-wikipedia-format-result (result) + (concat + (format "%s (%d words)" + (alist-get 'title result) + (alist-get 'wordcount result)) + (propertize + (format "\n\n%s" + (my-wikipedia-highlight-snippet-matches + (alist-get 'snippet result))) + 'face 'default))) + +(defun my-wikipedia-highlight-snippet-matches (snippet) + (with-temp-buffer + (insert snippet) + (goto-char (point-min)) + (while (re-search-forward "\\(.*?\\)" nil t) + (replace-match + (propertize (match-string 1) 'face 'match))) + (buffer-string))) + +(defun my-wikipedia-grok-action (info) + (interactive) + (my-org-grok (format "%s/wiki/%s" + my-wikipedia-host + (alist-get 'title info)))) + +(defun my-wikipedia-fetch-wiki () + (interactive) + (my-fetch-url (format "/wiki/%s?action=raw" + my-wikipedia-host + (alist-get 'title + (get-text-property (point) 'button-data))))) + +(defvar my-wikipedia-button-keymap + (let ((kmap (make-sparse-keymap))) + (set-keymap-parent kmap button-map) + (define-key kmap "f" 'my-wikipedia-fetch-wiki) + kmap)) + +(provide 'my-wikipedia) +;;; my-wikipedia.el ends here -- cgit v1.2.3