aboutsummaryrefslogtreecommitdiff
path: root/url-rewrite.el
blob: 8fd20282af8eaac6cdb1857d0e3391c8d820e69b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
;;; url-rewrite.el -- A library for rewriting URLs -*- lexical-binding: t -*-

;; Copyright (C) 2023  Free Software Foundation, Inc.

;; Author: Yuchen Pei <id@ypei.org>
;; Package-Requires: ((emacs "28.2"))

;; This file is part of url-rewrite.

;; url-rewrite is free software: you can redistribute it and/or modify it under
;; the terms of the GNU Affero General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; url-rewrite is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Affero General
;; Public License for more details.

;; You should have received a copy of the GNU Affero General Public
;; License along with url-rewrite.  If not, see <https://www.gnu.org/licenses/>.

;;; Commentary:

;; A library for rewriting URLs. Usecases include redirecting to an
;; alternative client, removing tracking queries etc.

;; Includes a collection of building block functions for url
;; rewriting.

;; Another usage would be:

;; 1. Declare rewriting rules in `url-rw-rules' (see
;; `url-rw-example-rules' for example)

;; 2. Then declare some ad-hoc rewriting functions and add them to
;; `url-rw-extra-rewriters'. An example is
;; `url-rw-example-zoom-to-dial'.

;; 3. Eval `(url-rw-refresh)' to populate the rewrite function list
;; `url-rw-rewriters'.

;; 4. The function `url-rw' can now be used on any url you wish to
;; rewrite.

;;; Code:
(require 'url-parse)
(require 'subr-x)

;;; Utility functions
(defun url-rw-format-filename (path queries)
  "Format a filename from PATH and QUERIES alist."
  (let ((formatted-queries
         (string-join
          (mapcar
           (lambda (query)
             (url-rw-join-string-pair (car query) (cdr query) "="))
           queries)
          "&")))
    (concat
     path
     (when queries (concat "?" formatted-queries)))))

(defun url-rw-join-string-pair (s1 s2 sep)
  "Join S1 and S2 with SEP if both S1 and S2 are nonnil.

Otherwise just concat the two."
  (if (and s1 s2)
      (format "%s%s%s" s1 sep s2)
    (concat s1 s2)))

(defun url-rw-parse-queries (queries)
  "From QUERIES to an alist."
  (when queries
    (mapcar
     (lambda (query)
       (string-match "^\\(.*?\\)\\(=.*\\)?$" query)
       (let ((key (match-string 1 query))
             (value (match-string 2 query)))
         (cons key (when value (substring value 1)))))
     (split-string queries "&"))))

(defun url-rw-random-item (xs)
  "Returns a random item from list XS."
  (elt xs (random (length xs))))

(defun url-rw-copy-url (from to)
  "Copy from FROM to TO, both url structs."
  (setf
   (url-type to) (url-type from)
   (url-user to) (url-user from)
   (url-password to) (url-password from)
   (url-host to) (url-host from)
   (url-portspec to) (url-portspec from)
   (url-filename to) (url-filename from)
   (url-target to) (url-target from)
   (url-attributes to) (url-attributes from)
   (url-fullness to) (url-fullness from)
   (url-silent to) (url-silent from)
   (url-use-cookies to) (url-use-cookies from)
   (url-asynchronous to) (url-asynchronous from)))

;;; Building blocks for url rewriting, used in :actions list in rules.
(defun url-rw-put-query (urlobj key &optional value)
  "Put query KEY=VALUE to URLOBJ.

If VALUE is nil, just put KEY."
  (pcase-let* ((`(,path . ,queries) (url-path-and-query urlobj))
               (parsed-queries (url-rw-parse-queries queries)))
    (setf (alist-get key parsed-queries nil nil 'equal) value)
    (setf (url-filename urlobj)
          (url-rw-format-filename path parsed-queries))))

(defun url-rw-remove-query (urlobj queries-to-remove)
  "Remove queries in QUERIES-TO-REMOVE from URLOBJ in place.

QUERIES-TO-REMOVE is a regexp."
  (pcase-let* ((`(,path . ,queries) (url-path-and-query urlobj))
               (parsed-queries (url-rw-parse-queries queries)))
    (setf (url-filename urlobj)
          (url-rw-format-filename
           path
           (seq-filter
            (lambda (pair)
              (not (string-match-p queries-to-remove (car pair))))
            parsed-queries)))))

(defun url-rw-replace-host (urlobj new-host)
  "Replace host in URLOBJ with NEW-HOST in place."
  (setf (url-host urlobj) new-host))

(defun url-rw-replace-path (urlobj new-path)
  "Replace the path of URLOBJ with NEW-PATH in place."
  (pcase-let ((`(_ . ,query) (url-path-and-query urlobj)))
    (setf (url-filename urlobj) (concat new-path (when query (concat "?" query))))))

(defun url-rw-replace-random-host (urlobj hosts)
  "Replace the host in URLOBJ with a random one from HOSTS."
  (url-rw-replace-host urlobj (url-rw-random-item hosts)))

(defun url-rw-replace-by-redirect (urlobj redirect-query)
  "Replace URLOBJ with an unhexed value from REDIRECT-QUERY."
  (let ((queries (url-rw-parse-queries (cdr (url-path-and-query urlobj)))))
    (when-let* ((value (alist-get redirect-query queries nil nil 'equal))
                (new-urlobj (url-generic-parse-url (url-unhex-string value))))
      (url-rw-copy-url new-urlobj urlobj))))

(defun url-rw-replace-type (urlobj new-type)
  "Replace the type of URLOBJ with NEW-TYPE."
  (setf (url-type urlobj) new-type))

;;; Rewrite rules and functions
;; TODO: This should be a defcustom
(defvar url-rw-rules nil "Rewrite rules for url rewrite.")

(defcustom url-rw-max-rewrites 100 "Maximum rewrites allowed on a url."
  :group 'url-rewrite
  :type 'natnum)

(defcustom url-rw-extra-rewriters nil "Extra rewrite functions."
  :group 'url-rewrite
  :type '(repeat function))

(defvar url-rw-rewriters nil
  "List of url rewrite functions.

A concatenation of rewriters generated from `url-rw-rules' and
`url-rw-rewriters'.")

(defmacro url-rw-define-rewrite (rule)
  "Defines a rewrite function using RULE.

Also add the function to `url-rw-rewriters'."
  (let ((func-name (intern (format "url-rewrite-%s" (plist-get rule :name))))
        (match (plist-get rule :match))
        (actions (plist-get rule :actions))
        (description (plist-get rule :description)))
    `(progn
       (defun ,func-name (url)
         ,description
         (if (string-match-p ,match url)
             (let ((urlobj (url-generic-parse-url url)))
               ,@(mapcar
                  (lambda (action)
                    `(,(intern (format "url-rw-%s" (car action)))
                      urlobj ,@(cdr action)))
                  actions)
               (url-recreate-url urlobj))
           url))
       (add-to-list 'url-rw-rewriters ',func-name))))

;;;###autoload
(defun url-rw-refresh ()
  "Refresh the list of rewriters."
  (setq url-rw-rewriters nil)
  (dolist (rule url-rw-rules)
    (eval `(url-rw-define-rewrite ,rule)))
  (dolist (rewriter url-rw-extra-rewriters)
    (add-to-list 'url-rw-rewriters rewriter))
  url-rw-rewriters)

;;;###autoload
(defun url-rw (url)
  "Rewrite a URL.

Try applying each function in `url-rw-rewriters'. If after one
iteration the url does not change, or if the number of iterations
has exceeded `url-rw-max-rewrites', stop."
  (let ((old-url)
        (count 0))
    (while (and (< count url-rw-max-rewrites)
                (not (equal url old-url)))
      (setq old-url url)
      (dolist (rewriter url-rw-rewriters)
        (setq url (funcall rewriter url)))
      (setq count (1+ count))))
  url)

;;; Examples
(defvar url-rw-www-re "^https?://\\(www\\.\\)?")

(defvar url-rw-example-rules
  '((:name ddg-result
           :description "duckduckgo result transform."
           :match (concat url-rw-www-re (rx "duckduckgo.com/l/?uddg="))
           :actions ((replace-by-redirect "uddg")))
    (:name google-to-ddg
           :description "Google search to duckduckgo html."
           :match (concat url-rw-www-re (rx "google.com/search?q="))
           :actions ((replace-host "html.duckduckgo.com")
                     (replace-path "/html")))
    (:name reddit-to-old
           :description "Reddit to old Reddit"
           :match (concat url-rw-www-re (rx "reddit.com"))
           :actions ((replace-host "old.reddit.com")))
    (:name strip-utm
           :description "Strip utm_* queries."
           :match "\\<utm_[a-z_]+="
           :actions ((remove-query "utm_.*")))
    (:name youtube-to-invidious
           :description "YouTube to a random invidious instance."
           :match (concat url-rw-www-re (rx (or "youtube.com"
                                                "youtu.be"
                                                "m.youtube.com")))
           :actions ((replace-random-host url-rw-example-invidious-hosts))))
  "Example rewrite rules.")

(defvar url-rw-example-zoom-numbers
  '("+61731853730"
    "+61871501149")
  "List of zoom dial-in numbers.")

(defun url-rewrite-example-zoom-to-dial (url)
  "Rewrite a zoom URL to a dial in number."
  (pcase-let* ((urlobj (url-generic-parse-url url))
               (`(,path . _) (url-path-and-query urlobj))
               (host (url-host urlobj)))
    (if (and host
             (string-match "\\(^.*\\.\\)?zoom.us" host)
             (string-match "^/j/\\([0-9]+\\)$" path))
        (format "tel:%s,,%s#"
                (url-rw-random-item url-rw-example-zoom-numbers)
                (match-string 1 path))
      url)))

(defvar url-rw-example-invidious-hosts
  '("yewtu.be"
    "onion.tube")
  "Invidious hosts.

For a longer list, see <https://redirect.invidious.io/>.")

;; Uncomment the following to use the example rules and the zoom to
;; dial functions

;; (setq url-rw-rules url-rw-example-rules
;;       url-rw-extra-rewriters '(url-rw-example-zoom-to-dial))
;; (url-rw-refresh)

(provide 'url-rewrite)
;;; url-rewrite.el ends here