;;; my-wikipedia.el -- wikipedia client -*- lexical-binding: t -*- ;; Copyright (C) 2023 Free Software Foundation. ;; Author: Yuchen Pei ;; Package-Requires: ((emacs "28.2")) ;; This file is part of dotfiles. ;; dotfiles is free software: you can redistribute it and/or modify it under ;; the terms of the GNU Affero General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; dotfiles is distributed in the hope that it will be useful, but WITHOUT ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General ;; Public License for more details. ;; You should have received a copy of the GNU Affero General Public ;; License along with dotfiles. If not, see . ;;; Commentary: ;; wikipedia client. ;;; Code: (require 'my-utils) (require 'my-markup) (require 'my-net) (require 'wiki) ;; TODO: much of these can be generalised to any mediawiki site (defvar my-wikipedia-lang "en") (defvar my-wikipedia-host (format "https://%s.wikipedia.org" my-wikipedia-lang)) (defun my-grok-wikipedia (url) "groks wikipedia by url and returns the info of the wikipedia entry." (with-current-buffer (url-retrieve-synchronously url) (my-delete-http-header) (goto-char (point-min)) (let ((results (my-grok-wikipedia-get-imdb-rating)) (html (libxml-parse-html-region (point-min) (point-max)))) (append (my-grok-wikipedia-html html url) results)))) (defun my-wiki-grok-wikipedia () "groks current wiki-mode buffer if the site is wikipedia." (interactive) (unless (and (derived-mode-p 'wiki-mode) (string-prefix-p "wikipedia" (format "%s" wiki-site))) (error "Not in wiki mode or wikipedia.")) (my-org-grok (wiki-current-html-url))) (defun my-grok-wikipedia-get-imdb-rating () (when (re-search-forward "\\(https://\\(www\\.\\)?imdb.com/title/tt[0-9]+/\\)" nil t) (let ((url (match-string 1))) (with-current-buffer (url-retrieve-synchronously (concat url "ratings")) (goto-char (point-min)) (when (re-search-forward "\\([0-9,]+\\)\\s-*IMDb.*?\\([0-9\\.]+\\) / 10" nil t) (list (cons "IMDB-link" url) (cons "IMDB-rating" (match-string 2)) (cons "IMDB-rated-by" (match-string 1)))))))) (defun my-wikipedia-api-summary (title) (my-url-fetch-json (format "%s/api/rest_v1/page/summary/%s" my-wikipedia-host title))) (defun my-grok-wikipedia-summary (url) "get wikipedia summary using the rest api" (let ((resp (my-wikipedia-api-summary (replace-regexp-in-string ".*/wiki/" "" url)))) (list (cons "Wikipedia-link" (alist-get 'page (alist-get 'desktop (alist-get 'content_urls resp)))) (cons "Description" (my-clean-property-value (alist-get 'extract resp))) (cons "Title" (alist-get 'title resp)) (cons "Cover" (alist-get 'source (alist-get 'thumbnail resp))) (cons "Latitude" (when-let (coord (alist-get 'coordinates resp)) (number-to-string (alist-get 'lat coord)))) (cons "Longitude" (when-let (coord (alist-get 'coordinates resp)) (number-to-string (alist-get 'lon coord))))))) (defun my-grok-wikipedia-html (html url) (let* ((result (my-grok-wikipedia-summary url)) (info (car (dom-by-class html "infobox"))) (ths (dom-by-tag info 'th)) (tds (mapcar (lambda (th) (my-dom-remove-style (car (dom-by-tag (dom-parent info th) 'td)))) ths)) (len (length ths))) (dotimes (unused len) (let* ((key (my-clean-property-key (dom-texts (pop ths) ""))) (value (my-clean-property-value (dom-texts (pop tds) ""))) (to-push (cond ((string-empty-p key) nil) ((string-empty-p value) nil) ((string= key "Coordinates") (my-grok-wikipedia-clean-coordinates value)) ((or (member key '("Website" "Source" "URL"))) (list (cons key (my-grok-wikipedia-fix-url value)))) (t (list (cons key value)))))) (mapc (lambda (pair) (push pair result)) to-push))) (reverse result))) (defun my-grok-wikipedia-clean-coordinates (raw) (let ((float-re "\\([-+]?[0-9]+\\(?:\\.[0-9]*\\)?\\)")) (string-match (format "%s; %s$" float-re float-re) raw) (list (cons "Latitude" (match-string 1 raw)) (cons "Longitude" (match-string 2 raw))))) (defun my-grok-wikipedia-fix-url (url) (let* ((urlobj (url-generic-parse-url url)) (filename (url-filename urlobj))) (unless (url-type urlobj) (setf (url-type urlobj) "https") (string-match "^\\([^/]+\\)\\(/.*\\)?$" filename) (setf (url-host urlobj) (match-string 1 filename)) (setf (url-filename urlobj) (or (match-string 2 filename) "")) (setf (url-fullness urlobj) t)) (url-recreate-url urlobj))) (defun my-wikipedia-api-search (query) (my-url-fetch-json (format "%s/w/api.php?action=query&format=json&list=search&srsearch=%s" my-wikipedia-host query))) (defun my-wikipedia-search (query) (interactive "sQuery: ") (generic-search-open (alist-get 'search (alist-get 'query (my-wikipedia-api-search query))) (format "wikipedia-query:%s" query) `((formatter . my-wikipedia-format-result) (default-action . my-wikipedia-grok-action) (keymap . ,my-wikipedia-button-keymap)))) (defun my-wikipedia-format-result (result) (concat (format "%s (%d words)" (alist-get 'title result) (alist-get 'wordcount result)) (propertize (format "\n\n%s" (my-wikipedia-highlight-snippet-matches (alist-get 'snippet result))) 'face 'default))) (defun my-wikipedia-highlight-snippet-matches (snippet) (with-temp-buffer (insert snippet) (goto-char (point-min)) (while (re-search-forward "\\(.*?\\)" nil t) (replace-match (propertize (match-string 1) 'face 'match))) (buffer-string))) (defun my-wikipedia-grok-action (info) (interactive) (my-org-grok (format "%s/wiki/%s" my-wikipedia-host (alist-get 'title info)))) (defun my-wikipedia-fetch-wiki-at-point () (interactive) (my-fetch-url (format "%s/wiki/%s?action=raw" my-wikipedia-host (alist-get 'title (get-text-property (point) 'button-data))))) (defvar my-wikipedia-button-keymap (let ((kmap (make-sparse-keymap))) (set-keymap-parent kmap button-map) (define-key kmap "f" 'my-wikipedia-fetch-wiki-at-point) kmap)) (provide 'my-wikipedia) ;;; my-wikipedia.el ends here