aboutsummaryrefslogtreecommitdiff
path: root/org-recoll.el
blob: 414ee95cacedb4f121db78b8066cc4d808d969af (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
;; -*- lexical-binding: t; -*-
;;; org-recoll.el --- a simple emacs interface to recoll full-text search that outputs with org-mode links
;;; Commentary:

;;; Code:

(require 'org)
(require 'dired)
(require 'doc-view)

;; Also recommended for best results:
;;(require 'pdf-tools)
;;(require 'org-pdfview)
;;(require 'ereader)
;;(require 'shr)

;;
;; Setup/Init
;;

(defgroup org-recoll nil
  "Recoll text-search integration for Org Mode"
  :group 'org
  :prefix "org-recoll-")

;; Public Variables

(defcustom org-recoll-results-num 10
  "This is the number of results to be displayed per page.
High numbers will reduce responsiveness.  It's set to 10 by
default for convenient paging"
  :group 'org-recoll
  :type 'integer)

(defcustom org-recoll-command-invocation "recoll -t -A"
  "This is the stem of the recoll shell command called by ORG-RECOLL-SEARCH.
Change this if your recoll executable is not in your path.
CAUTION: At present the parsing below expects a specific output
format, so changing the flags will break things."
  :group 'org-recoll
  :type 'string)

(defcustom org-recoll-index-invocation "recollindex"
  "This is the shell command called by ORG-RECOLL-UPDATE-INDEX.
Modify this if your recoll configuration file is not in the
standard location.  Note: do not add an & as the function already
backgrounds the process by default"
  :group 'org-recoll
  :type 'string)

(defcustom org-recoll-file-search-automatically t
  "Toggle whether file-search starts automatically after following a link.
Set to nil to disable.  This is a good idea if you aren't opening
the files in Emacs."
  :group 'org-recoll
  :type 'boolean)

(defcustom org-recoll-file-search-prompt t
  "Toggle prompt for an alternative file-search term after following a link.
If ORG-RECOLL-FILE-SEARCH-AUTOMATICALLY is t and this is nil, then a
file-search for the default term will auto-start on link opening.  If
ORG-RECOLL-FILE-SEARCH-AUTOMATICALLY is nil, then no search is
initated and this variable is not evaluated."
  :group 'org-recoll
  :type 'boolean)

(defcustom org-recoll-search-history nil
  "List to store your recoll search history."
  :group 'org-recoll
  :type 'list
)

(defcustom org-recoll-result-file-read-only t
  "Toggle whether opened search results are read-only.
This setting is intended as a precaution against accidentally
deleting/editing parts of your research library."
  :group 'org-recoll
  :type 'boolean)

(defcustom org-recoll-render-html t
  "Toggle whether opened html search results are automatically rendered."
  :group 'org-recoll
  :type 'boolean)

;; Internal Variables

(defvar org-recoll-end-of-current-page org-recoll-results-num)

(defvar org-recoll-start-of-current-page 0)

(defvar org-recoll-search-query nil)

(defvar org-recoll-filename nil)

(defvar org-recoll-html-file-types '(html xml opf htm))

;; Mode setup

(defvar org-recoll-mode-map
  (let ((kmap (make-sparse-keymap)))
    (define-key kmap (kbd "C-c n") 'org-recoll-next-page)
    (define-key kmap (kbd "C-c p") 'org-recoll-previous-page)
    (define-key kmap (kbd "C-c q") 'delete-window)
    kmap)
  "The keymap used for `org-recoll-mode'.")

(define-minor-mode org-recoll-mode
  "A minor mode to simplify navigation of recoll search results.
\\<org-recoll-mode-map> Some useful keys are:
\n
\\[org-recoll-next-page] - Advance to Next page of search results.
\\[org-recoll-previous-page] - Go back to previous page of search results.
\\[delete-window] - Exit.
\n
\\{org-recoll-mode-map}"
  nil ; default value
  :lighter " org-recoll" ; modeline notice
  :keymap org-recoll-mode-map ; key bindings
  :group 'org-recoll)

;;
;; Internal Functions
;;

(defun org-recoll-compare-string-to-list (string list)
  "Compares STRING to each element of LIST."
  (let ((matched nil))
    (while list
      (if (string= (prin1-to-string (car list)) string)
	  (setq matched t))
      (setq list (cdr list)))
    matched))

(defun org-recoll-fill-region-paragraphs ()
  "Fill region like `org-fill-paragraph' for each para in buffer."
  (interactive "r\nP")
  (goto-char (point-min))
  (save-excursion
    (while (< (point) (point-max))
     (org-fill-paragraph t)
      (forward-paragraph))))


(defun org-recoll-sanitize-single-quote (source-string)
  "Replace all instances of ' in SOURCE-STRING to be shell safe."
  (replace-regexp-in-string (regexp-quote "'") "'\\''" source-string nil 'literal))


(defun org-recoll-reformat-for-file-search (source-string)
  "Strip certain special search language characters in SOURCE-STRING with nil.
This is necessary because isearch has a different search idiom
than recoll, so, for example, a successful \"phrasal search\" in
recoll will yield no results in isearch.  I also strip out result
narrowing features like author: or title: style searches."
  (setq source-string (replace-regexp-in-string (regexp-quote "\"") "" source-string nil 'literal))
  (setq source-string (replace-regexp-in-string "\\(.*\\):.*?\s" "" source-string nil 'literal)))



(defun org-recoll-shr-render-current-buffer()
  (or (fboundp 'libxml-parse-html-region)
      (error "This function requires Emacs to be compiled with libxml2"))
  (let* (
	 (render-buffer (get-buffer-create "*html*"))
	 (intermediate-render (libxml-parse-html-region (point-min) (point-max))))
    (kill-buffer-if-not-modified (current-buffer))
    (with-current-buffer render-buffer
      (erase-buffer)
      (shr-insert-document intermediate-render)
      (goto-char (point-min)))
    (switch-to-buffer render-buffer)))
 
(defun org-recoll-doc-view-search (squery)
  "Jump to the next match of SQUERY in 'doc-view-mode'.
If the current document hasn't been transformed to plain text
till now do that first."
    ;; New search, so forget the old results.
    (setq doc-view--current-search-matches nil)
    (let ((txt (expand-file-name "doc.txt"
				 (doc-view--current-cache-dir))))
      (if (file-readable-p txt)
	  (progn
	    (setq doc-view--current-search-matches
		  (doc-view-search-internal squery txt))
	    (message "DocView: search yielded %d matches."
		     (doc-view-search-no-of-matches
		      doc-view--current-search-matches)))
	;; We must convert to TXT first!
	(if doc-view--current-converter-processes
	    (message "DocView: please wait till conversion finished.")
	  (doc-view-doc->txt txt (lambda () (org-recoll-doc-view-search nil)))))))

(defun org-recoll-post-open-actions (squery)
  "Perform rendering or search actions on opened file.
Prompt to start a file-search for SQUERY in the opened file or to
call pdf-occur for a pdf.  isearch can be a bit slow with pdfs
due to rendering speed.  PDF-OCCUR provides a speedy alternative.
Falls back gracefully to a modified doc-view-search if in
doc-view (where isearch doesn't work."
  (interactive)
  ;;For some reason at the stage in the org hook where this is called,
  ;;the opened file document is the "selected window" but is not the
  ;;"current buffer."  This lead to weird results attempting to start
  ;;searches, and this line fixes it.
  (switch-to-buffer (window-buffer (selected-window)))
  ;;Retrieve the filename from the buffer title.
  (setq org-recoll-filename (prin1-to-string (window-buffer (selected-window))))
  (setq org-recoll-filename (replace-regexp-in-string ">" "" org-recoll-filename))
  (setq org-recoll-filename (replace-regexp-in-string "#<buffer " "" org-recoll-filename))
  ;;If it's html, render it with shr
  (if (and org-recoll-render-html (featurep 'shr) (org-recoll-compare-string-to-list (file-name-extension org-recoll-filename) org-recoll-html-file-types))
      (org-recoll-shr-render-current-buffer))
  ;; Set pdfs and docs to page view by default
  (if (string= major-mode "doc-view-mode")
      (doc-view-fit-page-to-window)
    (if (string= (file-name-extension org-recoll-filename) "pdf") (pdf-view-fit-page-to-window)))
  ;;Search logic
  (if org-recoll-file-search-automatically
      (progn
	(if org-recoll-file-search-prompt (setq squery (read-string (concat "Enter file-search query: (default: " squery ")") nil nil squery)))
	;;If its a pdf, call pdf-occur (if available); otherwise start
	;;an isearch
	(if (string= (file-name-extension org-recoll-filename) "pdf")
	    (if (featurep 'pdf-tools) (pdf-occur squery) (message "Install pdf-tools and org-pdfview for pdf searching"))
	  (if(string= major-mode "doc-view-mode")
	      (org-recoll-doc-view-search squery)
	    (progn
	      (isearch-forward nil 1)
	      (isearch-yank-string squery))))))
    (if org-recoll-result-file-read-only (setq buffer-read-only t)))


(defun org-recoll-split-and-focus ()
  "Split window and focus the recoll results window after an original search."
  (when (= (length (window-list)) 1)
    (split-window-right))
  (other-window 1)
  (switch-to-buffer "*org-recoll-results*"))


(defun org-recoll-reset-result-count ()
  "Reset results count."
  (setq org-recoll-end-of-current-page org-recoll-results-num)
  (setq org-recoll-start-of-current-page 0))


(defun org-recoll-regexp-replace-in-buffer (from to)
  "Non-interactively replace all occurrences of FROM with TO."
  (goto-char (point-min))
  (while (re-search-forward from nil t)
    (replace-match to))
  )


(defun org-recoll-format-results ()
  "Format recoll results in buffer."
  ;; Format results in org format and tidy up
  (org-recoll-regexp-replace-in-buffer "\\[\\(.*\\)" "[[\\1")
  (org-recoll-regexp-replace-in-buffer "\\]\\(.*\\)" "-link")
  (org-recoll-regexp-replace-in-buffer "text\\/" "* ")
  (org-recoll-regexp-replace-in-buffer "inode\\/" "* ")
  (org-recoll-regexp-replace-in-buffer "message\\/rfc822" "* e-mail")
  (org-recoll-regexp-replace-in-buffer "image\\/" "* ")
  (org-recoll-regexp-replace-in-buffer "application\\/" "* ")
  (org-recoll-regexp-replace-in-buffer "\\/ABSTRACT" "")
  (org-recoll-regexp-replace-in-buffer "ABSTRACT" "")
  (org-recoll-regexp-replace-in-buffer "\\/\\([^\\/]*\\)-link" "/\\1][\\1]]")
  ;; Justify results
  (goto-char (point-min))
  (org-recoll-fill-region-paragraphs)
  ;; Add emphasis
  (highlight-phrase (org-recoll-reformat-for-file-search org-recoll-search-query) 'bold-italic))


(defun org-recoll-recollq-to-org (squery &optional paging)
  "Conduct recoll full-text search and format the results in org links.
SQUERY is the query passed in from the wrapper.
If PAGING is t this indicates that the function is being called to page through results."
  ;;If paging through results or starting a new search from the
  ;;buffer, stay in the buffer
  (unless (or paging (string= "#<buffer *org-recoll-results*>" (prin1-to-string (window-buffer (selected-window))))) (org-recoll-split-and-focus))
  ;;If you're not paging through the results, reset the paging
  ;;counters
  (unless paging (org-recoll-reset-result-count))
  ;;Unset read-only-mode to make the buffer temporarily writeable
  (read-only-mode -1)
  ;;Clear the buffer and do some setup
  (erase-buffer)
  (org-mode)
  (make-local-variable 'org-return-follows-link)
  (setq org-return-follows-link t)
  (org-recoll-mode t)
  ;;Should probably remove this or make it conditional as it's just
  ;;for my benefit
  (if (featurep 'flyspell) (flyspell-mode -1))
  ;;Print the query header and result count
  (insert (shell-command-to-string (concat org-recoll-command-invocation " -Q '" (org-recoll-sanitize-single-quote squery) "'")))
     (insert (concat "\n" "Results: " (number-to-string org-recoll-start-of-current-page) " - " (number-to-string org-recoll-end-of-current-page) "\n\n"))
     ;;Print results
     (insert (shell-command-to-string (concat org-recoll-command-invocation " -n '" (number-to-string org-recoll-start-of-current-page) "-" (number-to-string org-recoll-results-num) "' -q " "'" (org-recoll-sanitize-single-quote squery) "'" " | tail +3")))
  ;;Format
  (org-recoll-format-results)
  ;;Prevent editing
  (read-only-mode)
  ;;Add post-processing/file-search hook
  (add-hook 'org-follow-link-hook (lambda () (org-recoll-post-open-actions (org-recoll-reformat-for-file-search squery))) nil t))

;;
;; User-Facing Functions
;;


;;;###autoload
(defun org-recoll-update-index ()
  "Invoke the recoll index update command string specified in ORG-RECOLL-INDEX-INVOCATION."
  (interactive)
  (shell-command (concat org-recoll-index-invocation " &") "*org-recoll-index*" "*org-recoll-index*"))

(defun org-recoll-next-page ()
  "Delivers the next page of results."
  (interactive)
  (setq org-recoll-start-of-current-page org-recoll-end-of-current-page)
  (setq org-recoll-end-of-current-page (+ org-recoll-end-of-current-page org-recoll-results-num))
  (org-recoll-recollq-to-org org-recoll-search-query t))

(defun org-recoll-previous-page ()
  "Delivers the previous page of results."
  (interactive)
  (if (eq org-recoll-start-of-current-page 0) (error "You are already at the beginning of the results list"))
  (setq org-recoll-end-of-current-page org-recoll-start-of-current-page)
  (setq org-recoll-start-of-current-page (- org-recoll-start-of-current-page org-recoll-results-num))
  (org-recoll-recollq-to-org org-recoll-search-query t))

(defun org-recoll-completing-read (&optional squery)
  "Read from minibuffer with completion, but allowing spaces.
If SQUERY is passed offer it as a default."
  (define-key minibuffer-local-completion-map
    " " nil)
  (setq squery (completing-read "Enter your query: " org-recoll-search-history nil nil nil 'org-recoll-search-history squery))
  (define-key minibuffer-local-completion-map
    " " 'minibuffer-complete-word)
  squery)

;;;###autoload
(defun org-recoll-search (&optional query)
  "Prompt for a QUERY and search."
  (interactive)
  (if query
      (setq org-recoll-search-query query)
    (setq org-recoll-search-query (org-recoll-completing-read)))
  (org-recoll-recollq-to-org org-recoll-search-query))

(provide 'org-recoll)

;;; org-recoll.el ends here