;;; traclicker.el -- fight email click trackers -*- lexical-binding: t -*- ;; Copyright (C) 2023 Free Software Foundation, Inc. ;; Author: Yuchen Pei ;; Package-Requires: ((emacs "28.2")) ;; This file is part of traclicker. ;; traclicker is free software: you can redistribute it and/or modify it under ;; the terms of the GNU Affero General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; traclicker is distributed in the hope that it will be useful, but WITHOUT ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General ;; Public License for more details. ;; You should have received a copy of the GNU Affero General Public ;; License along with traclicker. If not, see . ;;; Commentary: ;; traclicker is a tool that protects user privacy against email click ;; trackers by sending a HEAD request to all urls in these emails and ;; recording the real urls. ;;; Code: (require 'gnus-util) (require 'ol) (require 'mm-decode) (defvar tracli-senders nil "List of email addresses.") (defvar tracli-db-file (locate-user-emacs-file "traclicker.eld") "Database file to read.") (defvar tracli-maildirs '("~/mail") "List of maildirs to scan and process urls. Will scan from both cur and new subdirs.") (defun tracli-read-data () "Read data from `tracli-db-file'." (when (file-exists-p tracli-db-file) (with-temp-buffer (insert-file-contents tracli-db-file) (goto-char (point-min)) (read (current-buffer))))) (defun tracli-scan (&optional after) "Scan emails and get redirect urls. Only scan emails that are sent by `tracli-senders', delivered after AFTER (in epoch), in `tracli-maildirs'. If AFTER is nil, use the timestamp in `tracli-db-file', or an empty string (i.e. scan all mails)." (let* ((sender-re (format "<%s>" (regexp-opt tracli-senders))) (counter 0) (data (tracli-read-data)) (now (format-time-string "%s" (current-time))) (urls (or (alist-get 'urls data) (make-hash-table :test 'equal)))) (setq after (or after (alist-get 'timestamp data) "")) (dolist (maildir tracli-maildirs) (dolist (subdir '("new" "cur")) (dolist (file (directory-files (file-name-concat maildir subdir))) (when (and (string-match "^\\([0-9]+\\)\\." file) (string> (match-string 1 file) after)) (with-temp-buffer (insert-file-contents (file-name-concat maildir subdir file)) (goto-char (point-min)) (when (and ;; Only scan if the email is not a reply. (not (gnus-fetch-field "In-Reply-To")) (string-match sender-re (gnus-fetch-field "From"))) (message "Processing mail from %s with subject %s..." (gnus-fetch-field "From") (gnus-fetch-field "Subject")) (setq counter (1+ counter)) (pcase-dolist (`(,source . ,target) (tracli-get-buffer-redirect-urls)) (puthash source target urls)))))))) (setf (alist-get 'timestamp data) now) (setf (alist-get 'urls data) urls) (with-temp-buffer (insert (prin1-to-string data)) (write-region nil nil tracli-db-file)) (message "Processed %d mails" counter))) (defun tracli-get-redirect-url (url) "Get redirect link of URL. Sends a HEAD request." (let* ((url-request-method "HEAD") (url-max-redirections 0) (buffer (url-retrieve-synchronously url)) (inhibit-message t)) (with-current-buffer buffer (goto-char (point-min)) (when (re-search-forward "^Location: \\(.*\\)$" nil t) (match-string 1))))) (defun tracli-collect-urls () "Collect all http/https urls in the current buffer." (save-excursion (let ((results)) (while (re-search-forward org-link-plain-re nil t) (let ((scheme (match-string 1)) (url (match-string 0))) (when (and (member scheme '("http" "https")) (not (member url results))) (push url results)))) (reverse results)))) (defun tracli-get-buffer-redirect-urls () "Get a list of redirect urls in the current buffer." (let ((parsed (tracli-parse-buffer))) (with-temp-buffer (insert (format "%s" parsed)) (goto-char (point-min)) (mapcar (lambda (url) (cons url (tracli-get-redirect-url url))) (tracli-collect-urls))))) (defun tracli-parse-buffer () "Parse a buffer." (let ((handles (mm-dissect-buffer t t))) (with-temp-buffer (tracli-parse-first handles)))) (defun tracli-parse-first (handle) "Parse the first handle in the tree of HANDLE's." (if (stringp (car handle)) (tracli-parse-first (cadr handle)) (tracli-parse handle))) (defun tracli-parse (handle) "Parse a mime HANDLE. Adapted from `mm-shr'." (let ((shr-width (if shr-use-fonts nil fill-column)) (shr-content-function (lambda (id) (let ((handle (mm-get-content-id id))) (when handle (mm-with-part handle (buffer-string)))))) (shr-inhibit-images t) (shr-blocked-images ".*") charset coding char) (mm-with-part (or handle (setq handle (mm-dissect-buffer t))) (setq case-fold-search t) (or (setq charset (mail-content-type-get (mm-handle-type handle) 'charset)) (progn (goto-char (point-min)) (and (re-search-forward "\ ]+\\)[^>]*>" nil t) (setq coding (mm-charset-to-coding-system (match-string 1) nil t)))) (setq charset mail-parse-charset)) (when (and (or coding (setq coding (mm-charset-to-coding-system charset nil t))) (not (eq coding 'ascii))) (insert (prog1 (decode-coding-string (buffer-string) coding) (erase-buffer) (set-buffer-multibyte t)))) (goto-char (point-min)) (while (re-search-forward "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t) (when (setq char (cdr (assq (if (match-beginning 1) (string-to-number (match-string 1) 16) (string-to-number (match-string 2))) mm-extra-numeric-entities))) (replace-match (char-to-string char)))) ;; Remove "soft hyphens". (goto-char (point-min)) (while (search-forward "­" nil t) (replace-match "" t t)) (buffer-substring-no-properties (point-min) (point-max))))) (provide 'traclicker) ;;; traclicker.el ends here