forked from jkitchin/org-ref
-
Notifications
You must be signed in to change notification settings - Fork 0
/
org-ref-pdf.el
290 lines (247 loc) · 9.83 KB
/
org-ref-pdf.el
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
;;; org-ref-pdf.el --- Drag-n-drop PDF onto bibtex files -*- lexical-binding: t; -*-
;; Copyright (C) 2015 John Kitchin
;; Author: John Kitchin <[email protected]>
;; Keywords:
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;; This library provides functions to enable drag-n-drop of pdfs onto a bibtex
;; buffer to add bibtex entries to it.
;; TODO: If no DOI is found, figure out a way to do a crossref/google query to
;; get a doi. This needs a reliable title/citation.
;;; Code:
(require 'f)
;; [2019-10-13 Sun] I am commenting this out for now. I added it for some
;; reason, but I cannot figure out why. It is pretty slow to load, so since I
;; don't know why it is here, I am commenting it out until it is obvious again.
;; (require 'pdf-tools)
(eval-when-compile
(require 'cl-lib))
(declare-function org-ref-bibtex-key-from-doi "org-ref-bibtex.el")
;; See https://github.com/jkitchin/org-ref/issues/812
;; apparently there is a function name change coming in
;; (if (and (not (fboundp 'dnd-unescape-uri))
;; (fboundp 'dnd--escape-uri))
;; (defalias 'dnd-unescape-uri 'dnd--unescape-uri)
;; (warn "dnd-unescape-uri is undefined. Some things may not work."))
(defgroup org-ref-pdf nil
"Customization group for org-ref-pdf"
:tag "Org Ref PDF"
:group 'org-ref-pdf)
(defcustom pdftotext-executable
"pdftotext"
"Executable for pdftotext. Set if the executable is not on your
path, or you want to use another version."
:type 'file
:group 'org-ref-pdf)
(defcustom org-ref-pdf-doi-regex
"10\\.[0-9]\\{4,9\\}/[-+._;()/:A-Z0-9]+"
"Regular expression to match DOIs in a pdf converted to text."
:type 'regexp
:group 'org-ref-pdf)
(defcustom org-ref-pdf-to-bibtex-function
'copy-file
"Function for getting a pdf to the `org-ref-pdf-directory'.
Defaults to `copy-file', but could also be `rename-file'."
:type 'File :group 'org-ref-pdf)
(defun org-ref-extract-doi-from-pdf (pdf)
"Try to extract a doi from a PDF file.
There may be more than one doi in the file. This function returns
all the ones it finds based on two patterns: doi: up to a quote,
bracket, space or end of line. dx.doi.org/up to a quote, bracket,
space or end of line.
If there is a trailing . we chomp it off. Returns a list of doi
strings, or nil.
"
(with-temp-buffer
(insert (shell-command-to-string (format "%s %s -"
pdftotext-executable
(shell-quote-argument (dnd-unescape-uri pdf)))))
(goto-char (point-min))
(let ((matches '()))
(while (re-search-forward org-ref-pdf-doi-regex nil t)
;; I don't know how to avoid a trailing . on some dois with the
;; expression above, so if it is there, I chomp it off here.
(let ((doi (match-string 0)))
(when (s-ends-with? "." doi)
(setq doi (substring doi 0 (- (length doi) 1))))
(cl-pushnew doi matches :test #'equal)))
matches)))
(defun org-ref-pdf-doi-candidates (dois)
"Generate candidate list for helm source.
Used when multiple dois are found in a pdf file."
(cl-loop for doi in dois
collect
(condition-case nil
(cons
(plist-get (doi-utils-get-json-metadata doi) :title)
doi)
(error (cons (format "%s read error" doi) doi)))))
(defun org-ref-pdf-add-dois (_)
"Add all entries for CANDIDATE in `helm-marked-candidates'."
(cl-loop for doi in (helm-marked-candidates)
do
(doi-utils-add-bibtex-entry-from-doi
doi
(buffer-file-name))))
;;;###autoload
(defun org-ref-pdf-to-bibtex ()
"Add pdf of current buffer to bib file and save pdf to
`org-ref-default-bibliography'. The pdf should be open in Emacs
using the `pdf-tools' package."
(interactive)
(when (not (f-ext? (downcase (buffer-file-name)) "pdf"))
(error "Buffer is not a pdf file"))
;; Get doi from pdf of current buffer
(let* ((dois (org-ref-extract-doi-from-pdf (buffer-file-name)))
(doi-utils-download-pdf nil)
(doi (if (= 1 (length dois))
(car dois)
(completing-read "Select DOI: " dois))))
;; Add bib entry from doi:
(doi-utils-add-bibtex-entry-from-doi doi)
;; Copy pdf to `org-ref-pdf-directory':
(let ((key (org-ref-bibtex-key-from-doi doi)))
(funcall org-ref-pdf-to-bibtex-function
(buffer-file-name)
(expand-file-name (format "%s.pdf" key)
org-ref-pdf-directory)))))
;;;###autoload
;; (defun org-ref-pdf-dnd-func (event)
;; "Drag-n-drop support to add a bibtex entry from a pdf file."
;; (interactive "e")
;; (goto-char (nth 1 (event-start event)))
;; (x-focus-frame nil)
;; (let* ((payload (car (last event)))
;; (pdf (cadr payload))
;; (dois (org-ref-extract-doi-from-pdf pdf)))
;; (cond
;; ((null dois)
;; (message "No doi found in %s" pdf))
;; ((= 1 (length dois))
;; (doi-utils-add-bibtex-entry-from-doi
;; (car dois)
;; (buffer-file-name)))
;; ;; Multiple DOIs found
;; (t
;; (helm :sources `((name . "Select a DOI")
;; (candidates . ,(org-ref-pdf-doi-candidates dois))
;; (action . org-ref-pdf-add-dois)))))))
;; This isn't very flexible, as it hijacks all drag-n-drop events. I switched to
;; using `dnd-protocol-alist'.
;; (define-key bibtex-mode-map (kbd "<drag-n-drop>") 'org-ref-pdf-dnd-func)
;; This is what the original dnd function was.
;; (define-key bibtex-mode-map (kbd "<drag-n-drop>") 'ns-drag-n-drop)
;; I replaced the functionality above with this new approach that leverages
;; ns-drag-n-drop. An alternative approach would be to adapt the function above
;; so that if the item dragged on wasn't a pdf, it would use another function.
;; that is essentially what ns-drag-n-drop enables, multiple handlers for
;; different uris that get dropped on the windwo.
(defun org-ref-pdf-dnd-protocol (uri action)
"Drag-n-drop protocol.
PDF will be a string like file:path.
ACTION is what to do. It is required for `dnd-protocol-alist'.
This function should only apply when in a bibtex file."
(if (and (buffer-file-name)
(f-ext? (buffer-file-name) "bib"))
(let* ((path (substring uri 5))
dois)
(cond
((f-ext? path "pdf")
(setq dois (org-ref-extract-doi-from-pdf
path))
(cond
((null dois)
(message "No doi found in %s" path)
nil)
((= 1 (length dois))
;; we do not need to get the pdf, since we have one.
(let ((doi-utils-download-pdf nil))
(doi-utils-add-bibtex-entry-from-doi
(car dois)
(buffer-file-name))
;; we should copy the pdf to the pdf directory though
(let ((key (cdr (assoc "=key=" (bibtex-parse-entry)))))
(copy-file (dnd-unescape-uri path) (expand-file-name (format "%s.pdf" key) org-ref-pdf-directory))))
action)
;; Multiple DOIs found
(t
(helm :sources `((name . "Select a DOI")
(candidates . ,(org-ref-pdf-doi-candidates dois))
(action . org-ref-pdf-add-dois)))
action)))
;; drag a bib file on and add contents to the end of the file.
((f-ext? path "bib")
(goto-char (point-max))
(insert "\n")
(insert-file-contents path))))
;; ignoring. pass back to dnd. Copied from `org-download-dnd'. Apparently
;; returning nil does not do this.
(let ((dnd-protocol-alist
(rassq-delete-all
'org-ref-pdf-dnd-protocol
(copy-alist dnd-protocol-alist))))
(dnd-handle-one-url nil action uri))))
(add-to-list 'dnd-protocol-alist '("^file:" . org-ref-pdf-dnd-protocol))
;;;###autoload
(defun org-ref-pdf-dir-to-bibtex (bibfile directory)
"Create BIBFILE from pdf files in DIRECTORY."
(interactive (list
(read-file-name "Bibtex file: ")
(read-directory-name "Directory: ")))
(find-file bibfile)
(goto-char (point-max))
(cl-loop for pdf in (f-entries directory (lambda (f) (f-ext? f "pdf")))
do
(goto-char (point-max))
(let ((dois (org-ref-extract-doi-from-pdf pdf)))
(cond
((null dois)
(insert (format "%% No doi found to create entry in %s.\n" pdf)))
((= 1 (length dois))
(doi-utils-add-bibtex-entry-from-doi
(car dois)
(buffer-file-name))
(bibtex-beginning-of-entry)
(insert (format "%% [[file:%s]]\n" pdf)))
;; Multiple DOIs found
(t
(insert (format "%% Multiple dois found in %s\n" pdf))
(helm :sources `((name . "Select a DOI")
(candidates . ,(org-ref-pdf-doi-candidates dois))
(action . org-ref-pdf-add-dois))))))))
;;;###autoload
(defun org-ref-pdf-debug-pdf (pdf-file)
"Try to debug getting a doi from a pdf.
Opens a buffer with the pdf converted to text, and `occur' on the
variable `org-ref-pdf-doi-regex'."
(interactive "fPDF: ")
(switch-to-buffer (get-buffer-create "*org-ref-pdf debug*"))
(erase-buffer)
(insert (shell-command-to-string (format "%s %s -"
pdftotext-executable
(shell-quote-argument pdf-file))))
(goto-char (point-min))
(highlight-regexp org-ref-pdf-doi-regex)
(occur org-ref-pdf-doi-regex)
(switch-to-buffer-other-window "*Occur*"))
;;;###autoload
(defun org-ref-pdf-crossref-lookup ()
"Lookup highlighted text in PDFView in CrossRef."
(interactive)
(require 'pdf-view)
(unless (pdf-view-active-region-p)
(error "The region is not active"))
(let* ((txt (pdf-view-active-region-text)))
(pdf-view-deactivate-region)
(crossref-lookup (mapconcat 'identity txt " \n"))))
(provide 'org-ref-pdf)
;;; org-ref-pdf.el ends here