;;; traclicker.el -- fight email click trackers -*- lexical-binding: t -*- ;; Copyright (C) 2023 Free Software Foundation, Inc. ;; Author: Yuchen Pei ;; Package-Requires: ((emacs "28.2")) ;; This file is part of traclicker. ;; traclicker is free software: you can redistribute it and/or modify it under ;; the terms of the GNU Affero General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; traclicker is distributed in the hope that it will be useful, but WITHOUT ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General ;; Public License for more details. ;; You should have received a copy of the GNU Affero General Public ;; License along with traclicker. If not, see . ;;; Commentary: ;; traclicker is a tool that protects user privacy against email click ;; trackers by sending a HEAD request to all urls in these emails and ;; recording the real urls. The real urls are collected into a ;; database, which is consulted when the user reads emails in gnus. To ;; substitute all click trackers in an email with the real ones, run ;; `tracli-wash-gnus-article', bound to "v t" in gnus-summary-mode / ;; gnus-article-mode. ;;; Code: (require 'gnus) (require 'gnus-art) (require 'gnus-util) (require 'ol) (require 'mm-decode) (require 'shr) (defcustom tracli-senders nil "List of sender email addresses to scan for links." :type '(repeat string) :group 'traclicker) (defcustom tracli-db-file (locate-user-emacs-file "traclicker.eld") "Database file to read and write." :type 'file :group 'traclicker) (defcustom tracli-maildirs '("~/mail") "List of maildirs to scan and process urls. Will scan from both cur and new subdirs." :type '(repeat directory) :group 'traclicker) (defvar tracli-data nil "Data read from and written to `tracli-db-file'.") (defcustom tracli-exclude-url-pattern "\\<\\([uU]nsubscribe\\|opt.?out\\)\\>" "URL pattern to exclude from collection. For example to avoid accidental unsubscription." :type 'regexp :group 'traclicker) (defcustom tracli-url-rewriter 'identity "Function to rewrite redirect url. A rewrite function takes a string and returns a string. It may, for example, remove utm tracking queries." :type 'function :group 'traclicker) (defun tracli-read-data () "Read data from `tracli-db-file'." (when (file-exists-p tracli-db-file) (with-temp-buffer (insert-file-contents tracli-db-file) (goto-char (point-min)) (read (current-buffer))))) (defun tracli-refresh-data () "Read into `tracli-data'." (setq tracli-data (tracli-read-data))) (unless tracli-data (tracli-refresh-data)) (defun tracli-scan (&optional after) "Scan emails and get redirect urls. Only scan emails that are sent by `tracli-senders', delivered after AFTER (in epoch), in `tracli-maildirs'. If AFTER is nil, use the timestamp in `tracli-db-file', or an empty string (i.e. scan mails from any time)." (interactive) (let* ((sender-re (format "<%s>" (regexp-opt tracli-senders))) (counter 0) (data (tracli-read-data)) (now (format-time-string "%s" (current-time))) (urls (or (alist-get 'urls data) (make-hash-table :test 'equal)))) (setq after (or after (alist-get 'timestamp data) "")) (dolist (maildir tracli-maildirs) (dolist (subdir '("new" "cur")) (dolist (file (directory-files (file-name-concat maildir subdir))) (when (and (string-match "^\\([0-9]+\\)\\." file) (string> (match-string 1 file) after)) (with-temp-buffer (insert-file-contents (file-name-concat maildir subdir file)) (goto-char (point-min)) (when (and ;; Only scan if the email is not a reply. (not (gnus-fetch-field "In-Reply-To")) (string-match sender-re (gnus-fetch-field "From"))) (message "Processing mail from %s with subject %s..." (gnus-fetch-field "From") (gnus-fetch-field "Subject")) (setq counter (1+ counter)) (pcase-dolist (`(,source . ,target) (tracli-get-buffer-redirect-urls)) (puthash source target urls)))))))) (setf (alist-get 'timestamp data) now) (setf (alist-get 'urls data) urls) (with-temp-buffer (insert (prin1-to-string data)) (write-region nil nil tracli-db-file)) (setq tracli-data data) (message "Processed %d mails" counter))) (defun tracli-get-redirect-url (url) "Get redirect link of URL. Sends a HEAD request." (let* ((url-request-method "HEAD") (url-max-redirections 0) (buffer (url-retrieve-synchronously url)) (inhibit-message t)) (with-current-buffer buffer (goto-char (point-min)) (when (re-search-forward "^Location: \\(.*\\)$" nil t) (funcall tracli-url-rewriter (match-string 1)))))) (defun tracli-collect-urls () "Collect all http/https urls in the current buffer." (save-excursion (let ((results) (urls (alist-get 'urls tracli-data))) (while (re-search-forward org-link-plain-re nil t) (let ((scheme (match-string-no-properties 1)) (url (match-string-no-properties 0))) (when (and (member scheme '("http" "https")) (not (member url results)) (not (string-match-p tracli-exclude-url-pattern url)) (not (and urls (gethash url urls)))) (push url results)))) (reverse results)))) (defun tracli-get-buffer-redirect-urls () "Get a list of redirect urls in the current buffer." (let ((parsed (tracli-parse-buffer))) (with-temp-buffer (insert (format "%s" parsed)) (goto-char (point-min)) (mapcar (lambda (url) (cons url (tracli-get-redirect-url url))) (tracli-collect-urls))))) (defun tracli-parse-buffer () "Parse a buffer." (let ((handles (mm-dissect-buffer t t))) (with-temp-buffer (tracli-parse-first handles)))) (defun tracli-parse-first (handle) "Parse the first handle in the tree of HANDLE's." (if (stringp (car handle)) (tracli-parse-first (cadr handle)) (tracli-parse handle))) (defun tracli-parse (handle) "Parse a mime HANDLE. Adapted from `mm-shr'." (let ((shr-width (if shr-use-fonts nil fill-column)) (shr-content-function (lambda (id) (let ((handle (mm-get-content-id id))) (when handle (mm-with-part handle (buffer-string)))))) (shr-inhibit-images t) (shr-blocked-images ".*") charset coding char) (mm-with-part (or handle (setq handle (mm-dissect-buffer t))) (setq case-fold-search t) (or (setq charset (mail-content-type-get (mm-handle-type handle) 'charset)) (progn (goto-char (point-min)) (and (re-search-forward "\ ]+\\)[^>]*>" nil t) (setq coding (mm-charset-to-coding-system (match-string 1) nil t)))) (setq charset mail-parse-charset)) (when (and (or coding (setq coding (mm-charset-to-coding-system charset nil t))) (not (eq coding 'ascii))) (insert (prog1 (decode-coding-string (buffer-string) coding) (erase-buffer) (set-buffer-multibyte t)))) (goto-char (point-min)) (while (re-search-forward "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t) (when (setq char (cdr (assq (if (match-beginning 1) (string-to-number (match-string 1) 16) (string-to-number (match-string 2))) mm-extra-numeric-entities))) (replace-match (char-to-string char)))) ;; Remove "soft hyphens". (goto-char (point-min)) (while (search-forward "­" nil t) (replace-match "" t t)) (buffer-substring-no-properties (point-min) (point-max))))) (defun tracli-wash-gnus-article () "Wash gnus article by replacing links with those in the `tracli-data'." (interactive) (with-current-buffer gnus-article-buffer (save-excursion (when-let ((inhibit-read-only t) (urls (alist-get 'urls tracli-data))) (article-goto-body) (while (re-search-forward org-link-plain-re nil t) (let ((matched (match-string-no-properties 0))) (when-let ((new-url (gethash matched urls))) (replace-match new-url)))) (article-goto-body) (let ((match)) (while (setq match (text-property-search-forward 'shr-url)) (when-let ((start (prop-match-beginning match)) (end (prop-match-end match)) (new-url (gethash (prop-match-value match) urls))) (put-text-property start end 'shr-url new-url) (put-text-property start end 'help-echo new-url)))))) (gnus-article-add-buttons))) (define-key gnus-summary-mode-map (kbd "v t") #'tracli-wash-gnus-article) (provide 'traclicker) ;;; traclicker.el ends here