From 44e64e91e598c588e3170500a6c9eb55bddf5d10 Mon Sep 17 00:00:00 2001 From: Yuchen Pei Date: Mon, 30 Oct 2023 00:20:42 +1100 Subject: Finish initial implementation --- README.org | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- traclicker.el | 62 ++++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 120 insertions(+), 11 deletions(-) diff --git a/README.org b/README.org index d6efc6d..a26be98 100644 --- a/README.org +++ b/README.org @@ -3,7 +3,7 @@ * Introduction :PROPERTIES: - :UPDATED: [2023-10-29 Sun 19:13] + :UPDATED: [2023-10-30 Mon 00:02] :END: Click trackers are recipient-specific links in email newsletters which @@ -20,8 +20,71 @@ privacy risk for email recipients, by automatically sending a HEAD request to all links and collecting the real urls, and at the time when the recipient actually reads the mails, showing the real urls. -This is a work in progress, and at this moment only the collection -part has been implemented, for messages in maildirs. +* Usage + :PROPERTIES: + :UPDATED: [2023-10-30 Mon 00:20] + :END: + +Add to load path and require. + +#+begin_src emacs-lisp +(add-to-list 'load-path "~/.emacs.d/lisp/traclicker") +(require 'traclicker) +#+end_src + +Configure the maildir boxes (directories with =cur=, =new= and =tmp= +sub-directories) and senders of emails you want to scan and replace +click trackers with real links: + +#+begin_src emacs-lisp +(setq tracli-senders '("info@some-org.com" + "newsletter@another-org.com")) +(setq tracli-maildirs '("~/mail/Inbox" "~/mail/Archive")) +#+end_src + +You are ready for an initial scan now. To do an initial scan of all +mails in these maildirs by these senders, do ~M-x tracli-scan~. + +To do an initial scan of emails from say the past 30 days, eval + +#+begin_src emacs-lisp +(tracli-scan + (format "%d" + (- (string-to-number (format-time-string "%s" (current-time))) + (* 60 60 24 30)))) +#+end_src + +Or to do a symbolic scan of emails, i.e. just initialise an empty +database with the current time stamp for future scans, do + +#+begin_src emacs-lisp +(tracli-scan (format-time-string "%s" (current-time))) +#+end_src + +You now have a database at ~tracli-db-file~. You can then run +~tracli-scan~ periodically + +#+begin_src emacs-lisp +(setq my-tracli-timer + (run-at-time + "07:00am" + 86400 #'tracli-scan)) +#+end_src + +or as a hook after running say ~gnus-group-get-new-news~ + +#+begin_src emacs-lisp +(add-hook 'gnus-after-getting-new-news-hook #'tracli-scan) +#+end_src + +Whenever you view a mail containing click trackers in the traclicker +database, run ~M-x tracli-wash-gnus-article~ (by default bound to ~v +t~ in gnus-summary-mode or gnus-article-mode), and all the click +tracker urls are replaced by the real urls! + +Note that the real urls may still contain tracking query parts like +utm parameters, but the cleaning of these urls by hand is feasible, +and a package that does so belongs to a separate project :D * Copyright and contact :PROPERTIES: diff --git a/traclicker.el b/traclicker.el index f06580f..c4b53a4 100644 --- a/traclicker.el +++ b/traclicker.el @@ -24,24 +24,40 @@ ;; traclicker is a tool that protects user privacy against email click ;; trackers by sending a HEAD request to all urls in these emails and -;; recording the real urls. +;; recording the real urls. The real urls are collected into a +;; database, which is consulted when the user reads emails in gnus. To +;; substitute all click trackers in an email with the real ones, run +;; `tracli-wash-gnus-article', bound to "v t" in gnus-summary-mode / +;; gnus-article-mode. ;;; Code: +(require 'gnus) +(require 'gnus-art) (require 'gnus-util) (require 'ol) (require 'mm-decode) +(require 'shr) -(defvar tracli-senders +(defcustom tracli-senders nil - "List of email addresses.") + "List of sender email addresses to scan for links." + :type '(repeat string) + :group 'traclicker) -(defvar tracli-db-file (locate-user-emacs-file "traclicker.eld") - "Database file to read.") +(defcustom tracli-db-file (locate-user-emacs-file "traclicker.eld") + "Database file to read and write." + :type 'file + :group 'traclicker) -(defvar tracli-maildirs '("~/mail") +(defcustom tracli-maildirs '("~/mail") "List of maildirs to scan and process urls. -Will scan from both cur and new subdirs.") +Will scan from both cur and new subdirs." + :type '(repeat directory) + :group 'traclicker) + +(defvar tracli-data nil + "Data read from and written to `tracli-db-file'.") (defun tracli-read-data () "Read data from `tracli-db-file'." @@ -51,6 +67,10 @@ Will scan from both cur and new subdirs.") (goto-char (point-min)) (read (current-buffer))))) +(defun tracli-refresh-data () + "Read into `tracli-data'." + (setq tracli-data (tracli-read-data))) + (defun tracli-scan (&optional after) "Scan emails and get redirect urls. @@ -58,7 +78,8 @@ Only scan emails that are sent by `tracli-senders', delivered after AFTER (in epoch), in `tracli-maildirs'. If AFTER is nil, use the timestamp in `tracli-db-file', or an -empty string (i.e. scan all mails)." +empty string (i.e. scan mails from any time)." + (interactive) (let* ((sender-re (format "<%s>" (regexp-opt tracli-senders))) (counter 0) (data (tracli-read-data)) @@ -92,6 +113,7 @@ empty string (i.e. scan all mails)." (with-temp-buffer (insert (prin1-to-string data)) (write-region nil nil tracli-db-file)) + (setq tracli-data data) (message "Processed %d mails" counter))) (defun tracli-get-redirect-url (url) @@ -190,5 +212,29 @@ text/html;\\s-*charset=\\([^\t\n\r \"'>]+\\)[^>]*>" nil t) (replace-match "" t t)) (buffer-substring-no-properties (point-min) (point-max))))) +(defun tracli-wash-gnus-article () + "Wash gnus article by replacing links with those in the `tracli-data'." + (interactive) + (with-current-buffer gnus-article-buffer + (save-excursion + (when-let ((inhibit-read-only t) + (urls (alist-get 'urls tracli-data))) + (article-goto-body) + (while (re-search-forward org-link-plain-re nil t) + (let ((matched (match-string-no-properties 0))) + (when-let ((new-url (gethash matched urls))) + (replace-match new-url)))) + (article-goto-body) + (let ((match)) + (while (setq match (text-property-search-forward 'shr-url)) + (when-let ((start (prop-match-beginning match)) + (end (prop-match-end match)) + (new-url (gethash (prop-match-value match) urls))) + (put-text-property start end 'shr-url new-url) + (put-text-property start end 'help-echo new-url)))))) + (gnus-article-add-buttons))) + +(define-key gnus-summary-mode-map (kbd "v t") #'tracli-wash-gnus-article) + (provide 'traclicker) ;;; traclicker.el ends here -- cgit v1.2.3