diff options
Diffstat (limited to 'traclicker.el')
-rw-r--r-- | traclicker.el | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/traclicker.el b/traclicker.el new file mode 100644 index 0000000..9174cc1 --- /dev/null +++ b/traclicker.el @@ -0,0 +1,194 @@ +;;; traclicker.el -- fight email click trackers -*- lexical-binding: t -*- + +;; Copyright (C) 2023 Free Software Foundation, Inc. + +;; Author: Yuchen Pei <id@ypei.org> +;; Package-Requires: ((emacs "28.2")) + +;; This file is part of traclicker. + +;; traclicker is free software: you can redistribute it and/or modify it under +;; the terms of the GNU Affero General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; traclicker is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General +;; Public License for more details. + +;; You should have received a copy of the GNU Affero General Public +;; License along with traclicker. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; traclicker is a tool that protects user privacy against email click +;; trackers by sending a HEAD request to all urls in these emails and +;; recording the real urls. + +;;; Code: +(require 'gnus-util) +(require 'ol) +(require 'mm-decode) + +(defvar tracli-senders + nil + "List of email addresses.") + +(defvar tracli-db-file (locate-user-emacs-file "traclicker.eld") + "Database file to read.") + +(defvar tracli-maildirs '("~/mail") + "List of maildirs to scan and process urls. + +Will scan from both cur and new subdirs.") + +(defun tracli-read-data () + "Read data from `tracli-db-file'." + (when (file-exists-p tracli-db-file) + (with-temp-buffer + (insert-file-contents tracli-db-file) + (goto-char (point-min)) + (read (current-buffer))))) + +(defun tracli-scan (&optional after) + "Scan emails and get redirect urls. + +Only scan emails that are sent by `tracli-senders', delivered +after AFTER (in epoch), in `tracli-maildirs'. + +If AFTER is nil, use the timestamp in `tracli-db-file', or an +empty string (i.e. scan all mails)." + (let* ((sender-re (format "<%s>" (regexp-opt tracli-senders))) + (counter 0) + (data (tracli-read-data)) + (now (format-time-string "%s" (current-time))) + (urls (or (alist-get 'urls data) + (make-hash-table :test 'equal)))) + (setq after (or after + (alist-get 'timestamp data) + "")) + (dolist (maildir tracli-maildirs) + (dolist (subdir '("new" "cur")) + (dolist (file (directory-files (file-name-concat maildir subdir))) + (when (and (string-match "^\\([0-9]+\\)\\." file) + (string> (match-string 1 file) after)) + (with-temp-buffer + (insert-file-contents (file-name-concat maildir subdir file)) + (goto-char (point-min)) + (when (and + ;; Only scan if the email is not a reply. + (not (gnus-fetch-field "In-Reply-To")) + (string-match sender-re (gnus-fetch-field "From"))) + (message "Processing mail from %s with subject %s..." + (gnus-fetch-field "From") + (gnus-fetch-field "Subject")) + (setq counter (1+ counter)) + (pcase-dolist (`(,source . ,target) + (tracli-get-buffer-redirect-urls)) + (puthash source target urls)))))))) + (setf (alist-get 'timestamp data) now) + (setf (alist-get 'urls data) urls) + (with-temp-buffer + (insert (prin1-to-string data)) + (write-region nil nil tracli-db-file)) + (message "Processed %d mails" counter))) + +(defun tracli-get-redirect-url (url) + "Get redirect link of URL. + +Sends a HEAD request." + (let* ((url-request-method "HEAD") + (url-max-redirections 0) + (buffer (url-retrieve-synchronously url)) + (inhibit-message t)) + (with-current-buffer buffer + (goto-char (point-min)) + (when (re-search-forward "^Location: \\(.*\\)$" nil t) + (match-string 1))))) + +(defun tracli-collect-urls () + "Collect all http/https urls in the current buffer." + (save-excursion + (let ((results)) + (while (re-search-forward org-link-plain-re nil t) + (let ((scheme (match-string 1)) + (url (match-string 0))) + (when (and (member scheme '("http" "https")) + (not (member url results))) + (push url results)))) + (reverse results)))) + +(defun tracli-get-buffer-redirect-urls () + "Get a list of redirect urls in the current buffer." + (let ((parsed (tracli-parse-buffer))) + (with-temp-buffer + (insert (format "%s" parsed)) + (goto-char (point-min)) + (mapcar (lambda (url) + (cons url (tracli-get-redirect-url url))) + (tracli-collect-urls))))) + +(defun tracli-parse-buffer () + "Parse a buffer." + (let ((handles (mm-dissect-buffer t t))) + (with-temp-buffer + (tracli-parse-first handles)))) + +(defun tracli-parse-first (handle) + "Parse the first handle in the tree of HANDLE's." + (if (stringp (car handle)) + (tracli-parse-first (cadr handle)) + (tracli-parse handle))) + +(defun tracli-parse (handle) + "Parse a mime HANDLE. + +Adapted from `mm-shr'." + (let ((shr-width (if shr-use-fonts + nil + fill-column)) + (shr-content-function (lambda (id) + (let ((handle (mm-get-content-id id))) + (when handle + (mm-with-part handle + (buffer-string)))))) + (shr-inhibit-images t) + (shr-blocked-images ".*") + charset coding char) + (mm-with-part (or handle (setq handle (mm-dissect-buffer t))) + (setq case-fold-search t) + (or (setq charset + (mail-content-type-get (mm-handle-type handle) 'charset)) + (progn + (goto-char (point-min)) + (and (re-search-forward "\ +<meta\\s-+http-equiv=[\"']?content-type[\"']?\\s-+content=[\"']?\ +text/html;\\s-*charset=\\([^\t\n\r \"'>]+\\)[^>]*>" nil t) + (setq coding (mm-charset-to-coding-system (match-string 1) + nil t)))) + (setq charset mail-parse-charset)) + (when (and (or coding + (setq coding (mm-charset-to-coding-system charset nil t))) + (not (eq coding 'ascii))) + (insert (prog1 + (decode-coding-string (buffer-string) coding) + (erase-buffer) + (set-buffer-multibyte t)))) + (goto-char (point-min)) + (while (re-search-forward + "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t) + (when (setq char + (cdr (assq (if (match-beginning 1) + (string-to-number (match-string 1) 16) + (string-to-number (match-string 2))) + mm-extra-numeric-entities))) + (replace-match (char-to-string char)))) + ;; Remove "soft hyphens". + (goto-char (point-min)) + (while (search-forward "" nil t) + (replace-match "" t t)) + (buffer-substring-no-properties (point-min) (point-max))))) + +(provide 'traclicker) +;;; traclicker.el ends here |