;;; link-gopher.el -- Find and filter urls -*- lexical-binding: t -*- ;; Copyright (C) 2023 Free Software Foundation. ;; Author: Yuchen Pei ;; Package-Requires: ((emacs "28.2")) ;; This file is part of dotfiles. ;; dotfiles is free software: you can redistribute it and/or modify it ;; under the terms of the GNU Affero General Public License as ;; published by the Free Software Foundation, either version 3 of the ;; License, or (at your option) any later version. ;; dotfiles is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; Affero General Public License for more details. ;; You should have received a copy of the GNU Affero General Public ;; License along with dotfiles. If not, see ;; . ;;; Commentary: ;; Find and filter urls. ;;; Code: (require 'my-utils) ;;; todo: some of these functions could be unnecessary (defun link-gopher-kill-all-links (url filter-regexp) (interactive (list (read-string "URL: " (thing-at-point-url-at-point)) (read-string "Regexp: "))) (let ((results (link-gopher-get-all-links url filter-regexp))) (kill-new (string-join results " ")) (message "Added %d links to the kill ring!" (length results)))) (defun link-gopher-kill-all-links-in-buffer (filter-regexp) "may not report accurate links e.g. when the link contains special chars like space" (interactive "sRegexp: ") (let ((links (link-gopher-get-all-links-in-buffer filter-regexp))) (kill-new (string-join links " ")) (message "Added %d links to the kill ring!" (length links)))) (defun link-gopher-get-all-links (url filter-regexp) "get all links satisfying a regexp on url. no duplicates." (with-current-buffer (url-retrieve-synchronously url) (my-skip-http-header) (let ((results) (clean-url) (hash (make-hash-table :test 'equal))) (while (re-search-forward "\\(href\\|HREF\\|src\\|SRC\\)\\ *=\\ *['\"]\\([^\"']+\\)['\"]" nil t) (setq clean-url (link-gopher-clean-url (match-string 2) url)) (when (or (not filter-regexp) (string-match filter-regexp clean-url)) (when (not (gethash clean-url hash)) (puthash clean-url t hash) (push clean-url results)))) (reverse results)))) (defun link-gopher-clean-url (url current-url) "clean url hello - filename: hello /hello - type: nil; host: nil; filename: /hello //hello - type: nil; host: hello; filename: empty string removing frags " (let* ((current-domain (progn (string-match "^\\(.*://[^/]+/\\)" current-url) (match-string 1 current-url))) (current-domain-dir-path (progn (string-match "^\\(.*/\\)" current-url) (match-string 1 current-url))) (url-no-frags (replace-regexp-in-string "#.*" "" url))) (url-encode-url (cond ((string-match "://" url-no-frags) url-no-frags) ((string-match "^//" url-no-frags) (concat "https:" url-no-frags)) ((string-match "^/" url-no-frags) (concat current-domain url-no-frags)) (t (concat current-domain-dir-path url-no-frags)))))) (defun link-gopher-get-all-links-in-buffer (filter-regexp) (let ((results) (hash (make-hash-table :test 'equal))) (save-excursion (goto-char (point-min)) (while (progn (when-let ((url (get-text-property (point) 'shr-url))) (when (or (not filter-regexp) (string-match filter-regexp url)) (when (not (gethash url hash)) (puthash url t hash) (push url results)))) (when-let ((next-change-point (next-single-property-change (point) 'shr-url))) (goto-char next-change-point))))) results)) (defun http-s-links-in-buffer (&optional filter-regexp) (save-excursion (unless filter-regexp (setq filter-regexp ".*")) (let ((results) (url)) (while (re-search-forward "\\(http\\(s\\)://[^\" \n]+\\)" nil t) (setq url (match-string 1)) (when (and (string-match filter-regexp url) (not (member url results))) (push url results))) (reverse results)))) (defun http-s-media-links-in-buffer () (http-s-links-in-buffer "\\.\\(jpeg\\|jpg\\|png\\|gif\\|webp\\|mp4\\|flv\\|mkv\\|mov\\|webm\\|ogv\\|avi\\|rmvb\\|mp3\\|ogg\\|opus\\|pdf\\|docx\\|epub\\)$")) (provide 'link-gopher) ;;; link-gopher.el ends here