blob: 11bb75daacecf58a0e28105a2a166c03bc715e52 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
;;; link-gopher.el -- Find and filter urls -*- lexical-binding: t -*-
;; Copyright (C) 2023 Free Software Foundation.
;; Author: Yuchen Pei <id@ypei.org>
;; Package-Requires: ((emacs "28.2"))
;; This file is part of dotfiles.
;; dotfiles is free software: you can redistribute it and/or modify it
;; under the terms of the GNU Affero General Public License as
;; published by the Free Software Foundation, either version 3 of the
;; License, or (at your option) any later version.
;; dotfiles is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; Affero General Public License for more details.
;; You should have received a copy of the GNU Affero General Public
;; License along with dotfiles. If not, see
;; <https://www.gnu.org/licenses/>.
;;; Commentary:
;; Find and filter urls.
;;; Code:
(require 'my-utils)
;;; todo: some of these functions could be unnecessary
(defun link-gopher-kill-all-links (url filter-regexp)
(interactive (list (read-string "URL: "
(thing-at-point-url-at-point))
(read-string "Regexp: ")))
(let ((results (link-gopher-get-all-links url filter-regexp)))
(kill-new (string-join results " "))
(message "Added %d links to the kill ring!" (length results))))
(defun link-gopher-kill-all-links-in-buffer (filter-regexp)
"may not report accurate links e.g. when the link contains special chars like space"
(interactive "sRegexp: ")
(let ((links (link-gopher-get-all-links-in-buffer filter-regexp)))
(kill-new (string-join links " "))
(message "Added %d links to the kill ring!" (length links))))
(defun link-gopher-get-all-links (url filter-regexp)
"get all links satisfying a regexp on url.
no duplicates."
(with-current-buffer (url-retrieve-synchronously url)
(my-skip-http-header)
(let ((results) (clean-url) (hash (make-hash-table :test 'equal)))
(while (re-search-forward
"\\(href\\|HREF\\|src\\|SRC\\)\\ *=\\ *['\"]\\([^\"']+\\)['\"]" nil t)
(setq clean-url (link-gopher-clean-url (match-string 2) url))
(when (or (not filter-regexp)
(string-match filter-regexp clean-url))
(when (not (gethash clean-url hash))
(puthash clean-url t hash)
(push clean-url results))))
(reverse results))))
(defun link-gopher-clean-url (url current-url)
"clean url
hello - filename: hello
/hello - type: nil; host: nil; filename: /hello
//hello - type: nil; host: hello; filename: empty string
removing frags
"
(let* ((current-domain
(progn (string-match "^\\(.*://[^/]+/\\)" current-url)
(match-string 1 current-url)))
(current-domain-dir-path
(progn (string-match "^\\(.*/\\)" current-url)
(match-string 1 current-url)))
(url-no-frags (replace-regexp-in-string "#.*" "" url)))
(url-encode-url
(cond ((string-match "://" url-no-frags) url-no-frags)
((string-match "^//" url-no-frags) (concat "https:" url-no-frags))
((string-match "^/" url-no-frags) (concat current-domain url-no-frags))
(t (concat current-domain-dir-path url-no-frags))))))
(defun link-gopher-get-all-links-in-buffer (filter-regexp)
(let ((results) (hash (make-hash-table :test 'equal)))
(save-excursion
(goto-char (point-min))
(while
(progn
(when-let ((url (get-text-property (point) 'shr-url)))
(when (or (not filter-regexp)
(string-match filter-regexp url))
(when (not (gethash url hash))
(puthash url t hash)
(push url results))))
(when-let ((next-change-point
(next-single-property-change (point) 'shr-url)))
(goto-char next-change-point)))))
results))
(defun http-s-links-in-buffer (&optional filter-regexp)
(save-excursion
(unless filter-regexp (setq filter-regexp ".*"))
(let ((results) (url))
(while (re-search-forward "\\(http\\(s\\)://[^\" \n]+\\)" nil t)
(setq url (match-string 1))
(when (and (string-match filter-regexp url)
(not (member url results)))
(push url results)))
(reverse results))))
(defun http-s-media-links-in-buffer ()
(http-s-links-in-buffer
"\\.\\(jpeg\\|jpg\\|png\\|gif\\|webp\\|mp4\\|flv\\|mkv\\|mov\\|webm\\|ogv\\|avi\\|rmvb\\|mp3\\|ogg\\|opus\\|pdf\\|docx\\|epub\\)$"))
(provide 'link-gopher)
;;; link-gopher.el ends here
|