aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuchen Pei <id@ypei.org>2023-10-30 00:20:42 +1100
committerYuchen Pei <id@ypei.org>2023-10-30 00:20:42 +1100
commit44e64e91e598c588e3170500a6c9eb55bddf5d10 (patch)
tree48211bb5ac45993f508a559b16b5897a9a7dcfa2
parent53a3ed523e7e567f853e0238681da5302180c017 (diff)
Finish initial implementation
-rw-r--r--README.org69
-rw-r--r--traclicker.el62
2 files changed, 120 insertions, 11 deletions
diff --git a/README.org b/README.org
index d6efc6d..a26be98 100644
--- a/README.org
+++ b/README.org
@@ -3,7 +3,7 @@
* Introduction
:PROPERTIES:
- :UPDATED: [2023-10-29 Sun 19:13]
+ :UPDATED: [2023-10-30 Mon 00:02]
:END:
Click trackers are recipient-specific links in email newsletters which
@@ -20,8 +20,71 @@ privacy risk for email recipients, by automatically sending a HEAD
request to all links and collecting the real urls, and at the time
when the recipient actually reads the mails, showing the real urls.
-This is a work in progress, and at this moment only the collection
-part has been implemented, for messages in maildirs.
+* Usage
+ :PROPERTIES:
+ :UPDATED: [2023-10-30 Mon 00:20]
+ :END:
+
+Add to load path and require.
+
+#+begin_src emacs-lisp
+(add-to-list 'load-path "~/.emacs.d/lisp/traclicker")
+(require 'traclicker)
+#+end_src
+
+Configure the maildir boxes (directories with =cur=, =new= and =tmp=
+sub-directories) and senders of emails you want to scan and replace
+click trackers with real links:
+
+#+begin_src emacs-lisp
+(setq tracli-senders '("info@some-org.com"
+ "newsletter@another-org.com"))
+(setq tracli-maildirs '("~/mail/Inbox" "~/mail/Archive"))
+#+end_src
+
+You are ready for an initial scan now. To do an initial scan of all
+mails in these maildirs by these senders, do ~M-x tracli-scan~.
+
+To do an initial scan of emails from say the past 30 days, eval
+
+#+begin_src emacs-lisp
+(tracli-scan
+ (format "%d"
+ (- (string-to-number (format-time-string "%s" (current-time)))
+ (* 60 60 24 30))))
+#+end_src
+
+Or to do a symbolic scan of emails, i.e. just initialise an empty
+database with the current time stamp for future scans, do
+
+#+begin_src emacs-lisp
+(tracli-scan (format-time-string "%s" (current-time)))
+#+end_src
+
+You now have a database at ~tracli-db-file~. You can then run
+~tracli-scan~ periodically
+
+#+begin_src emacs-lisp
+(setq my-tracli-timer
+ (run-at-time
+ "07:00am"
+ 86400 #'tracli-scan))
+#+end_src
+
+or as a hook after running say ~gnus-group-get-new-news~
+
+#+begin_src emacs-lisp
+(add-hook 'gnus-after-getting-new-news-hook #'tracli-scan)
+#+end_src
+
+Whenever you view a mail containing click trackers in the traclicker
+database, run ~M-x tracli-wash-gnus-article~ (by default bound to ~v
+t~ in gnus-summary-mode or gnus-article-mode), and all the click
+tracker urls are replaced by the real urls!
+
+Note that the real urls may still contain tracking query parts like
+utm parameters, but the cleaning of these urls by hand is feasible,
+and a package that does so belongs to a separate project :D
* Copyright and contact
:PROPERTIES:
diff --git a/traclicker.el b/traclicker.el
index f06580f..c4b53a4 100644
--- a/traclicker.el
+++ b/traclicker.el
@@ -24,24 +24,40 @@
;; traclicker is a tool that protects user privacy against email click
;; trackers by sending a HEAD request to all urls in these emails and
-;; recording the real urls.
+;; recording the real urls. The real urls are collected into a
+;; database, which is consulted when the user reads emails in gnus. To
+;; substitute all click trackers in an email with the real ones, run
+;; `tracli-wash-gnus-article', bound to "v t" in gnus-summary-mode /
+;; gnus-article-mode.
;;; Code:
+(require 'gnus)
+(require 'gnus-art)
(require 'gnus-util)
(require 'ol)
(require 'mm-decode)
+(require 'shr)
-(defvar tracli-senders
+(defcustom tracli-senders
nil
- "List of email addresses.")
+ "List of sender email addresses to scan for links."
+ :type '(repeat string)
+ :group 'traclicker)
-(defvar tracli-db-file (locate-user-emacs-file "traclicker.eld")
- "Database file to read.")
+(defcustom tracli-db-file (locate-user-emacs-file "traclicker.eld")
+ "Database file to read and write."
+ :type 'file
+ :group 'traclicker)
-(defvar tracli-maildirs '("~/mail")
+(defcustom tracli-maildirs '("~/mail")
"List of maildirs to scan and process urls.
-Will scan from both cur and new subdirs.")
+Will scan from both cur and new subdirs."
+ :type '(repeat directory)
+ :group 'traclicker)
+
+(defvar tracli-data nil
+ "Data read from and written to `tracli-db-file'.")
(defun tracli-read-data ()
"Read data from `tracli-db-file'."
@@ -51,6 +67,10 @@ Will scan from both cur and new subdirs.")
(goto-char (point-min))
(read (current-buffer)))))
+(defun tracli-refresh-data ()
+ "Read into `tracli-data'."
+ (setq tracli-data (tracli-read-data)))
+
(defun tracli-scan (&optional after)
"Scan emails and get redirect urls.
@@ -58,7 +78,8 @@ Only scan emails that are sent by `tracli-senders', delivered
after AFTER (in epoch), in `tracli-maildirs'.
If AFTER is nil, use the timestamp in `tracli-db-file', or an
-empty string (i.e. scan all mails)."
+empty string (i.e. scan mails from any time)."
+ (interactive)
(let* ((sender-re (format "<%s>" (regexp-opt tracli-senders)))
(counter 0)
(data (tracli-read-data))
@@ -92,6 +113,7 @@ empty string (i.e. scan all mails)."
(with-temp-buffer
(insert (prin1-to-string data))
(write-region nil nil tracli-db-file))
+ (setq tracli-data data)
(message "Processed %d mails" counter)))
(defun tracli-get-redirect-url (url)
@@ -190,5 +212,29 @@ text/html;\\s-*charset=\\([^\t\n\r \"'>]+\\)[^>]*>" nil t)
(replace-match "" t t))
(buffer-substring-no-properties (point-min) (point-max)))))
+(defun tracli-wash-gnus-article ()
+ "Wash gnus article by replacing links with those in the `tracli-data'."
+ (interactive)
+ (with-current-buffer gnus-article-buffer
+ (save-excursion
+ (when-let ((inhibit-read-only t)
+ (urls (alist-get 'urls tracli-data)))
+ (article-goto-body)
+ (while (re-search-forward org-link-plain-re nil t)
+ (let ((matched (match-string-no-properties 0)))
+ (when-let ((new-url (gethash matched urls)))
+ (replace-match new-url))))
+ (article-goto-body)
+ (let ((match))
+ (while (setq match (text-property-search-forward 'shr-url))
+ (when-let ((start (prop-match-beginning match))
+ (end (prop-match-end match))
+ (new-url (gethash (prop-match-value match) urls)))
+ (put-text-property start end 'shr-url new-url)
+ (put-text-property start end 'help-echo new-url))))))
+ (gnus-article-add-buttons)))
+
+(define-key gnus-summary-mode-map (kbd "v t") #'tracli-wash-gnus-article)
+
(provide 'traclicker)
;;; traclicker.el ends here