;;; url-rewrite.el -- A library for rewriting URLs -*- lexical-binding: t -*- ;; Copyright (C) 2023 Free Software Foundation, Inc. ;; Author: Yuchen Pei ;; Package-Requires: ((emacs "28.2")) ;; This file is part of url-rewrite. ;; url-rewrite is free software: you can redistribute it and/or modify it under ;; the terms of the GNU Affero General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; url-rewrite is distributed in the hope that it will be useful, but WITHOUT ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General ;; Public License for more details. ;; You should have received a copy of the GNU Affero General Public ;; License along with url-rewrite. If not, see . ;;; Commentary: ;; A library for rewriting URLs. Usecases include redirecting to an ;; alternative client, removing tracking queries etc. ;; Includes a collection of building block functions for url ;; rewriting. ;; Another usage would be: ;; 1. Declare rewriting rules in `url-rw-rules' (see ;; `url-rw-example-rules' for example) ;; 2. Then declare some ad-hoc rewriting functions and add them to ;; `url-rw-extra-rewriters'. An example is ;; `url-rw-example-zoom-to-dial'. ;; 3. Eval `(url-rw-refresh)' to populate the rewrite function list ;; `url-rw-rewriters'. ;; 4. The function `url-rw' can now be used on any url you wish to ;; rewrite. ;;; Code: (require 'url-parse) (require 'subr-x) ;;; Utility functions (defun url-rw-format-filename (path queries) "Format a filename from PATH and QUERIES alist." (let ((formatted-queries (string-join (mapcar (lambda (query) (url-rw-join-string-pair (car query) (cdr query) "=")) queries) "&"))) (concat path (when queries (concat "?" formatted-queries))))) (defun url-rw-join-string-pair (s1 s2 sep) "Join S1 and S2 with SEP if both S1 and S2 are nonnil. Otherwise just concat the two." (if (and s1 s2) (format "%s%s%s" s1 sep s2) (concat s1 s2))) (defun url-rw-parse-queries (queries) "From QUERIES to an alist." (when queries (mapcar (lambda (query) (string-match "^\\(.*?\\)\\(=.*\\)?$" query) (let ((key (match-string 1 query)) (value (match-string 2 query))) (cons key (when value (substring value 1))))) (split-string queries "&")))) (defun url-rw-random-item (xs) "Returns a random item from list XS." (elt xs (random (length xs)))) (defun url-rw-copy-url (from to) "Copy from FROM to TO, both url structs." (setf (url-type to) (url-type from) (url-user to) (url-user from) (url-password to) (url-password from) (url-host to) (url-host from) (url-portspec to) (url-portspec from) (url-filename to) (url-filename from) (url-target to) (url-target from) (url-attributes to) (url-attributes from) (url-fullness to) (url-fullness from) (url-silent to) (url-silent from) (url-use-cookies to) (url-use-cookies from) (url-asynchronous to) (url-asynchronous from))) ;;; Building blocks for url rewriting, used in :actions list in rules. (defun url-rw-put-query (urlobj key &optional value) "Put query KEY=VALUE to URLOBJ. If VALUE is nil, just put KEY." (pcase-let* ((`(,path . ,queries) (url-path-and-query urlobj)) (parsed-queries (url-rw-parse-queries queries))) (setf (alist-get key parsed-queries nil nil 'equal) value) (setf (url-filename urlobj) (url-rw-format-filename path parsed-queries)))) (defun url-rw-remove-query (urlobj queries-to-remove) "Remove queries in QUERIES-TO-REMOVE from URLOBJ in place. QUERIES-TO-REMOVE is a regexp." (pcase-let* ((`(,path . ,queries) (url-path-and-query urlobj)) (parsed-queries (url-rw-parse-queries queries))) (setf (url-filename urlobj) (url-rw-format-filename path (seq-filter (lambda (pair) (not (string-match-p queries-to-remove (car pair)))) parsed-queries))))) (defun url-rw-replace-host (urlobj new-host) "Replace host in URLOBJ with NEW-HOST in place." (setf (url-host urlobj) new-host)) (defun url-rw-replace-path-regexp (urlobj regexp rep) "Replace REGEXP in the path of URLOBJ with REP in place." (pcase-let ((`(,path . ,query) (url-path-and-query urlobj))) (setf (url-filename urlobj) (concat (replace-regexp-in-string regexp rep path) (when query (concat "?" query)))))) (defun url-rw-replace-path (urlobj new-path) "Replace the path of URLOBJ with NEW-PATH in place." (pcase-let ((`(_ . ,query) (url-path-and-query urlobj))) (setf (url-filename urlobj) (concat new-path (when query (concat "?" query)))))) (defun url-rw-replace-random-host (urlobj hosts) "Replace the host in URLOBJ with a random one from HOSTS." (url-rw-replace-host urlobj (url-rw-random-item hosts))) (defun url-rw-replace-by-redirect (urlobj redirect-query) "Replace URLOBJ with an unhexed value from REDIRECT-QUERY." (let ((queries (url-rw-parse-queries (cdr (url-path-and-query urlobj))))) (when-let* ((value (alist-get redirect-query queries nil nil 'equal)) (new-urlobj (url-generic-parse-url (url-unhex-string value)))) (url-rw-copy-url new-urlobj urlobj)))) (defun url-rw-replace-type (urlobj new-type) "Replace the type of URLOBJ with NEW-TYPE." (setf (url-type urlobj) new-type)) ;;; Rewrite rules and functions ;; TODO: This should be a defcustom (defvar url-rw-rules nil "Rewrite rules for url rewrite.") (defcustom url-rw-max-rewrites 100 "Maximum rewrites allowed on a url." :group 'url-rewrite :type 'natnum) (defcustom url-rw-extra-rewriters nil "Extra rewrite functions." :group 'url-rewrite :type '(repeat function)) (defvar url-rw-rewriters nil "List of url rewrite functions. A concatenation of rewriters generated from `url-rw-rules' and `url-rw-rewriters'.") (defmacro url-rw-define-rewrite (rule) "Defines a rewrite function using RULE. Also add the function to `url-rw-rewriters'." (let ((func-name (intern (format "url-rewrite-%s" (plist-get rule :name)))) (match (plist-get rule :match)) (actions (plist-get rule :actions)) (description (plist-get rule :description))) `(progn (defun ,func-name (url) ,description (if (string-match-p ,match url) (let ((urlobj (url-generic-parse-url url))) ,@(mapcar (lambda (action) `(,(intern (format "url-rw-%s" (car action))) urlobj ,@(cdr action))) actions) (url-recreate-url urlobj)) url)) (add-to-list 'url-rw-rewriters ',func-name)))) ;;;###autoload (defun url-rw-refresh () "Refresh the list of rewriters." (setq url-rw-rewriters nil) (dolist (rule url-rw-rules) (eval `(url-rw-define-rewrite ,rule))) (dolist (rewriter url-rw-extra-rewriters) (add-to-list 'url-rw-rewriters rewriter)) url-rw-rewriters) ;;;###autoload (defun url-rw (url) "Rewrite a URL. Try applying each function in `url-rw-rewriters'. If after one iteration the url does not change, or if the number of iterations has exceeded `url-rw-max-rewrites', stop." (let ((old-url) (count 0)) (while (and (< count url-rw-max-rewrites) (not (equal url old-url))) (setq old-url url) (dolist (rewriter url-rw-rewriters) (setq url (funcall rewriter url))) (setq count (1+ count)))) url) ;;; Examples (defvar url-rw-www-re "^https?://\\(www\\.\\)?") (defvar url-rw-example-rules '((:name ddg-result :description "duckduckgo result transform." :match (concat url-rw-www-re (rx "duckduckgo.com/l/?uddg=")) :actions ((replace-by-redirect "uddg"))) (:name google-to-ddg :description "Google search to duckduckgo html." :match (concat url-rw-www-re (rx "google.com/search?q=")) :actions ((replace-host "html.duckduckgo.com") (replace-path "/html"))) (:name google-docs-odt :description "Download Google docs document as odt." :match (concat url-rw-www-re (rx "docs.google.com/document/d/")) :actions ((replace-path-regexp "\\(/document/d/.*?\\)/.*" "\\1/export") (put-query "format" "odt"))) (:name reddit-to-old :description "Reddit to old Reddit" :match (concat url-rw-www-re (rx "reddit.com")) :actions ((replace-host "old.reddit.com"))) (:name strip-utm :description "Strip utm_* queries." :match "\\.") ;; Uncomment the following to use the example rules and the zoom to ;; dial functions ;; (setq url-rw-rules url-rw-example-rules ;; url-rw-extra-rewriters '(url-rw-example-zoom-to-dial)) ;; (url-rw-refresh) (provide 'url-rewrite) ;;; url-rewrite.el ends here