aboutsummaryrefslogblamecommitdiff
path: root/emacs/.emacs.d/lisp/my/link-gopher.el
blob: 5e3fe7737019ba42659c60f33ffa7c12177f5792 (plain) (tree)


















































                                                                                      




                                                                           







                                                          





                                                                      
                                                           

                                                                                        




























                                                                        
                                                                                                                                       


                            
;;; link-gopher.el -- Find and filter urls -*- lexical-binding: t -*-

;; Copyright (C) 2023 Free Software Foundation.

;; Author: Yuchen Pei <id@ypei.org>
;; Package-Requires: ((emacs "28.2"))

;; This file is part of dotfiles.

;; dotfiles is free software: you can redistribute it and/or modify it
;; under the terms of the GNU Affero General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.

;; dotfiles is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;; Affero General Public License for more details.

;; You should have received a copy of the GNU Affero General Public
;; License along with dotfiles.  If not, see
;; <https://www.gnu.org/licenses/>.

;;; Commentary:

;; Find and filter urls.

;;; Code:
(require 'my-utils)

;;; todo: some of these functions could be unnecessary
(defun link-gopher-kill-all-links (url filter-regexp)
  (interactive (list (read-string "URL: "
				  (thing-at-point-url-at-point))
		     (read-string "Regexp: ")))
  (let ((results (link-gopher-get-all-links url filter-regexp)))
    (kill-new (string-join results " "))
    (message "Added %d links to the kill ring!" (length results))))
(defun link-gopher-kill-all-links-in-buffer (filter-regexp)
  "may not report accurate links e.g. when the link contains special chars like space"
  (interactive "sRegexp: ")
  (let ((links (link-gopher-get-all-links-in-buffer filter-regexp)))
    (kill-new (string-join links " "))
    (message "Added %d links to the kill ring!" (length links))))
(defun link-gopher-get-all-links (url filter-regexp)
  "get all links satisfying a regexp on url.
no duplicates."
  (with-current-buffer (url-retrieve-synchronously url)
    (my-skip-http-header)
    (let ((results) (clean-url) (hash (make-hash-table :test 'equal)))
      (while (re-search-forward
              "\\(href\\|HREF\\|src\\|SRC\\)\\ *=\\ *['\"]\\([^\"']+\\)['\"]" nil t)
	      (setq clean-url (link-gopher-clean-url (match-string 2) url))
	      (when (or (not filter-regexp)
		              (string-match filter-regexp clean-url))
	        (when (not (gethash clean-url hash))
	          (puthash clean-url t hash)
	          (push clean-url results))))
      (reverse results))))
(defun link-gopher-clean-url (url current-url)
  "clean url
  hello - filename: hello
  /hello - type: nil; host: nil; filename: /hello
  //hello - type: nil; host: hello; filename: empty string
removing frags
"
  (let* ((current-domain
	        (progn (string-match "^\\(.*://[^/]+/\\)" current-url)
		             (match-string 1 current-url)))
	       (current-domain-dir-path
	        (progn (string-match "^\\(.*/\\)" current-url)
		             (match-string 1 current-url)))
	       (url-no-frags (replace-regexp-in-string "#.*" "" url)))
    (url-encode-url
     (cond ((string-match "://" url-no-frags) url-no-frags)
	         ((string-match "^//" url-no-frags) (concat "https:" url-no-frags))
	         ((string-match "^/" url-no-frags) (concat current-domain url-no-frags))
	         (t (concat current-domain-dir-path url-no-frags))))))
(defun link-gopher-get-all-links-in-buffer (filter-regexp)
  (let ((results) (hash (make-hash-table :test 'equal)))
    (save-excursion
      (goto-char (point-min))
      (while
	  (progn
	    (when-let ((url (get-text-property (point) 'shr-url)))
	      (when (or (not filter-regexp)
			(string-match filter-regexp url))
		(when (not (gethash url hash))
		  (puthash url t hash)
		  (push url results))))
	    (when-let ((next-change-point
			(next-single-property-change (point) 'shr-url)))
	      (goto-char next-change-point)))))
    results))

(defun http-s-links-in-buffer (&optional filter-regexp)
  (save-excursion
    (unless filter-regexp (setq filter-regexp ".*"))
    (let ((results) (url))
      (while (re-search-forward "\\(http\\(s\\)://[^\" \n]+\\)" nil t)
        (setq url (match-string 1))
        (when (and (string-match filter-regexp url)
                   (not (member url results)))
          (push url results)))
      (reverse results))))

(defun http-s-media-links-in-buffer ()
  (http-s-links-in-buffer
   "\\.\\(jpeg\\|jpg\\|png\\|gif\\|webp\\|mp4\\|flv\\|mkv\\|mov\\|webm\\|ogv\\|avi\\|rmvb\\|mp3\\|ogg\\|opus\\|pdf\\|docx\\|epub\\)$"))

(provide 'link-gopher)
;;; link-gopher.el ends here