aboutsummaryrefslogtreecommitdiff
path: root/traclicker.el
diff options
context:
space:
mode:
Diffstat (limited to 'traclicker.el')
-rw-r--r--traclicker.el194
1 files changed, 194 insertions, 0 deletions
diff --git a/traclicker.el b/traclicker.el
new file mode 100644
index 0000000..9174cc1
--- /dev/null
+++ b/traclicker.el
@@ -0,0 +1,194 @@
+;;; traclicker.el -- fight email click trackers -*- lexical-binding: t -*-
+
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+
+;; Author: Yuchen Pei <id@ypei.org>
+;; Package-Requires: ((emacs "28.2"))
+
+;; This file is part of traclicker.
+
+;; traclicker is free software: you can redistribute it and/or modify it under
+;; the terms of the GNU Affero General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; traclicker is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General
+;; Public License for more details.
+
+;; You should have received a copy of the GNU Affero General Public
+;; License along with traclicker. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; traclicker is a tool that protects user privacy against email click
+;; trackers by sending a HEAD request to all urls in these emails and
+;; recording the real urls.
+
+;;; Code:
+(require 'gnus-util)
+(require 'ol)
+(require 'mm-decode)
+
+(defvar tracli-senders
+ nil
+ "List of email addresses.")
+
+(defvar tracli-db-file (locate-user-emacs-file "traclicker.eld")
+ "Database file to read.")
+
+(defvar tracli-maildirs '("~/mail")
+ "List of maildirs to scan and process urls.
+
+Will scan from both cur and new subdirs.")
+
+(defun tracli-read-data ()
+ "Read data from `tracli-db-file'."
+ (when (file-exists-p tracli-db-file)
+ (with-temp-buffer
+ (insert-file-contents tracli-db-file)
+ (goto-char (point-min))
+ (read (current-buffer)))))
+
+(defun tracli-scan (&optional after)
+ "Scan emails and get redirect urls.
+
+Only scan emails that are sent by `tracli-senders', delivered
+after AFTER (in epoch), in `tracli-maildirs'.
+
+If AFTER is nil, use the timestamp in `tracli-db-file', or an
+empty string (i.e. scan all mails)."
+ (let* ((sender-re (format "<%s>" (regexp-opt tracli-senders)))
+ (counter 0)
+ (data (tracli-read-data))
+ (now (format-time-string "%s" (current-time)))
+ (urls (or (alist-get 'urls data)
+ (make-hash-table :test 'equal))))
+ (setq after (or after
+ (alist-get 'timestamp data)
+ ""))
+ (dolist (maildir tracli-maildirs)
+ (dolist (subdir '("new" "cur"))
+ (dolist (file (directory-files (file-name-concat maildir subdir)))
+ (when (and (string-match "^\\([0-9]+\\)\\." file)
+ (string> (match-string 1 file) after))
+ (with-temp-buffer
+ (insert-file-contents (file-name-concat maildir subdir file))
+ (goto-char (point-min))
+ (when (and
+ ;; Only scan if the email is not a reply.
+ (not (gnus-fetch-field "In-Reply-To"))
+ (string-match sender-re (gnus-fetch-field "From")))
+ (message "Processing mail from %s with subject %s..."
+ (gnus-fetch-field "From")
+ (gnus-fetch-field "Subject"))
+ (setq counter (1+ counter))
+ (pcase-dolist (`(,source . ,target)
+ (tracli-get-buffer-redirect-urls))
+ (puthash source target urls))))))))
+ (setf (alist-get 'timestamp data) now)
+ (setf (alist-get 'urls data) urls)
+ (with-temp-buffer
+ (insert (prin1-to-string data))
+ (write-region nil nil tracli-db-file))
+ (message "Processed %d mails" counter)))
+
+(defun tracli-get-redirect-url (url)
+ "Get redirect link of URL.
+
+Sends a HEAD request."
+ (let* ((url-request-method "HEAD")
+ (url-max-redirections 0)
+ (buffer (url-retrieve-synchronously url))
+ (inhibit-message t))
+ (with-current-buffer buffer
+ (goto-char (point-min))
+ (when (re-search-forward "^Location: \\(.*\\)$" nil t)
+ (match-string 1)))))
+
+(defun tracli-collect-urls ()
+ "Collect all http/https urls in the current buffer."
+ (save-excursion
+ (let ((results))
+ (while (re-search-forward org-link-plain-re nil t)
+ (let ((scheme (match-string 1))
+ (url (match-string 0)))
+ (when (and (member scheme '("http" "https"))
+ (not (member url results)))
+ (push url results))))
+ (reverse results))))
+
+(defun tracli-get-buffer-redirect-urls ()
+ "Get a list of redirect urls in the current buffer."
+ (let ((parsed (tracli-parse-buffer)))
+ (with-temp-buffer
+ (insert (format "%s" parsed))
+ (goto-char (point-min))
+ (mapcar (lambda (url)
+ (cons url (tracli-get-redirect-url url)))
+ (tracli-collect-urls)))))
+
+(defun tracli-parse-buffer ()
+ "Parse a buffer."
+ (let ((handles (mm-dissect-buffer t t)))
+ (with-temp-buffer
+ (tracli-parse-first handles))))
+
+(defun tracli-parse-first (handle)
+ "Parse the first handle in the tree of HANDLE's."
+ (if (stringp (car handle))
+ (tracli-parse-first (cadr handle))
+ (tracli-parse handle)))
+
+(defun tracli-parse (handle)
+ "Parse a mime HANDLE.
+
+Adapted from `mm-shr'."
+ (let ((shr-width (if shr-use-fonts
+ nil
+ fill-column))
+ (shr-content-function (lambda (id)
+ (let ((handle (mm-get-content-id id)))
+ (when handle
+ (mm-with-part handle
+ (buffer-string))))))
+ (shr-inhibit-images t)
+ (shr-blocked-images ".*")
+ charset coding char)
+ (mm-with-part (or handle (setq handle (mm-dissect-buffer t)))
+ (setq case-fold-search t)
+ (or (setq charset
+ (mail-content-type-get (mm-handle-type handle) 'charset))
+ (progn
+ (goto-char (point-min))
+ (and (re-search-forward "\
+<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']?\
+text/html;\\s-*charset=\\([^\t\n\r \"'>]+\\)[^>]*>" nil t)
+ (setq coding (mm-charset-to-coding-system (match-string 1)
+ nil t))))
+ (setq charset mail-parse-charset))
+ (when (and (or coding
+ (setq coding (mm-charset-to-coding-system charset nil t)))
+ (not (eq coding 'ascii)))
+ (insert (prog1
+ (decode-coding-string (buffer-string) coding)
+ (erase-buffer)
+ (set-buffer-multibyte t))))
+ (goto-char (point-min))
+ (while (re-search-forward
+ "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t)
+ (when (setq char
+ (cdr (assq (if (match-beginning 1)
+ (string-to-number (match-string 1) 16)
+ (string-to-number (match-string 2)))
+ mm-extra-numeric-entities)))
+ (replace-match (char-to-string char))))
+ ;; Remove "soft hyphens".
+ (goto-char (point-min))
+ (while (search-forward "­" nil t)
+ (replace-match "" t t))
+ (buffer-substring-no-properties (point-min) (point-max)))))
+
+(provide 'traclicker)
+;;; traclicker.el ends here