;;; doi-utils.el --- DOI utilities for making bibtex entries ;; Copyright (C) 2015 John Kitchin ;; Author: John Kitchin ;; Keywords: convenience ;; Version: 0.1 ;; Package-Requires: ((org-ref)) ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, either version 3 of the License, or ;; (at your option) any later version. ;; This program is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with this program. If not, see . ;;; Commentary: ;; This package provides functionality to download PDFs and bibtex entries from ;; a DOI, as well as to update a bibtex entry from a DOI. It depends slightly ;; on org-ref, to determine where to save pdf files too, and where to insert ;; bibtex entries in the default bibliography. ;; The principle commands you will use from here are: ;; - doi-utils-get-bibtex-entry-pdf with the cursor in a bibtex entry. ;; - doi-utils-insert-bibtex-entry-from-doi to insert a bibtex entry at your cursor, clean it and try to get a pdf. ;; - doi-utils-add-bibtex-entry-from-doi to add an entry to your default bibliography (cleaned with pdf if possible). ;; - doi-utils-update-bibtex-entry-from-doi with cursor in an entry to update its fields. ;;; Code: (defvar org-ref-pdf-directory) (defvar org-ref-bibliography-notes) (defvar org-ref-default-bibliography) (defvar reftex-default-bibliography) (defvar url-http-end-of-headers) (declare-function org-ref-bib-citation "org-ref-core") (declare-function org-ref-find-bibliography "org-ref-core") (declare-function org-ref-clean-bibtex-entry "org-ref-core") (declare-function reftex-get-bib-field "reftex-cite") (declare-function bibtex-completion-edit-notes "bibtex-completion") (declare-function helm "helm") (declare-function org-bibtex-yank "org-bibtex") (declare-function org-ref-possible-bibfiles "org-ref-core") (eval-when-compile (require 'cl-lib)) (require 'bibtex) (require 'dash) (require 'json) (require 'org) ; org-add-link-type (or (require 'ol-bibtex nil t) (require 'org-bibtex)) ; org-bibtex-yank (require 'url-http) (require 'org-ref-utils) ;;* Customization (defgroup doi-utils nil "Customization group for doi-utils." :tag "DOI utils" :group 'doi-utils) (defcustom doi-utils-download-pdf t "Try to download PDFs when adding bibtex entries when non-nil." :type 'boolean :group 'doi-utils) (defcustom doi-utils-open-pdf-after-download nil "Open PDF after adding bibtex entries." :type 'boolean :group 'doi-utils) (defcustom doi-utils-make-notes t "Whether to create notes when adding bibtex entries." :type 'boolean :group 'doi-utils) (defcustom doi-utils-timestamp-field "DATE_ADDED" "The bibtex field to store the date when an entry has been added." :type 'string :group 'doi-utils) (defcustom doi-utils-timestamp-format-function 'current-time-string "The function to format the timestamp for a bibtex entry. Set to a function that returns nil to avoid setting timestamps in the entries. e.g. (lambda () nil)" :type 'function :group 'doi-utils) (defcustom doi-utils-make-notes-function (lambda () (bibtex-beginning-of-entry) (bibtex-completion-edit-notes (list (cdr (assoc "=key=" (bibtex-parse-entry)))))) "Function to create notes for a bibtex entry. Set `doi-utils-make-notes' to nil if you want no notes." :type 'function :group 'doi-utils) (defcustom doi-utils-dx-doi-org-url "https://doi.org/" "Base url to retrieve doi metadata from. A trailing / is required." :type 'string :group 'doi-utils) ;;* Getting pdf files from a DOI ;; The idea here is simple. When you visit http://dx.doi.org/doi or ;; https://doi.org/doi, you get redirected to the journal site. Once you have ;; the url for the article, you can usually compute the url to the pdf, or find ;; it in the page. Then you simply download it. ;; There are some subtleties in doing this that are described here. To get the ;; redirect, we have to use url-retrieve, and a callback function. The callback ;; does not return anything, so we communicate through global variables. ;; url-retrieve is asynchronous, so we have to make sure to wait for it to ;; finish. (defvar *doi-utils-waiting* t "Stores waiting state for url retrieval.") (defvar *doi-utils-redirect* nil "Stores redirect url from a callback function.") (defun doi-utils-redirect-callback (&optional status) "Callback for `url-retrieve' to set the redirect. Optional argument STATUS Unknown why this is optional." (when (plist-get status :error) (signal (car (plist-get status :error)) (cdr(plist-get status :error)))) (when (plist-get status :redirect) ; is nil if there none (setq *doi-utils-redirect* (plist-get status :redirect))) ;; we have done our job, so we are not waiting any more. (setq *doi-utils-waiting* nil)) ;; To actually get the redirect we use url-retrieve like this. (defun doi-utils-get-redirect (doi) "Get redirect url from `doi-utils-dx-doi-org-url'/doi." ;; we are going to wait until the url-retrieve is done (setq *doi-utils-waiting* t) ;; start with no redirect. it will be set in the callback. (setq *doi-utils-redirect* nil) (url-retrieve (format "%s%s" doi-utils-dx-doi-org-url doi) 'doi-utils-redirect-callback) ;; I suspect we need to wait here for the asynchronous process to ;; finish. we loop and sleep until the callback says it is done via ;; `*doi-utils-waiting*'. this works as far as i can tell. Before I ;; had to run this a few times to get it to work, which i suspect ;; just gave the first one enough time to finish. (while *doi-utils-waiting* (sleep-for 0.1))) ;; Once we have a redirect for a particular doi, we need to compute the url to ;; the pdf. We do this with a series of functions. Each function takes a single ;; argument, the redirect url. If it knows how to compute the pdf url it does, ;; and returns it. We store the functions in a variable: (defvar doi-utils-pdf-url-functions nil "Functions that return a url to a pdf from a redirect url. Each function takes one argument, the redirect url. The function must return a pdf-url, or nil.") ;;** APS journals (defun aps-pdf-url (*doi-utils-redirect*) "Get url to the pdf from *DOI-UTILS-REDIRECT*." (when (string-match "^http://journals.aps.org" *doi-utils-redirect*) (replace-regexp-in-string "/abstract/" "/pdf/" *doi-utils-redirect*))) ;;** Science (defun science-pdf-url (*doi-utils-redirect*) "Get url to the pdf from *DOI-UTILS-REDIRECT*." (when (string-match "^http://www.sciencemag.org" *doi-utils-redirect*) (concat *doi-utils-redirect* ".full.pdf"))) ;;** Nature (defun nature-pdf-url (*doi-utils-redirect*) "Get url to the pdf from *DOI-UTILS-REDIRECT*." (when (string-match "^http://www.nature.com" *doi-utils-redirect*) (let ((result *doi-utils-redirect*)) (setq result (replace-regexp-in-string "/full/" "/pdf/" result)) (replace-regexp-in-string "\.html$" "\.pdf" result)))) ;;** Elsevier/ScienceDirect ;; You cannot compute these pdf links; they are embedded in the redirected pages. (defvar *doi-utils-pdf-url* nil "Stores url to pdf download from a callback function.") ;;** Wiley ;; http://onlinelibrary.wiley.com/doi/10.1002/anie.201402680/abstract ;; http://onlinelibrary.wiley.com/doi/10.1002/anie.201402680/pdf ;; It appears that it is not enough to use the pdf url above. That takes you to ;; an html page. The actual link to teh pdf is embedded in that page. This is ;; how ScienceDirect does things too. ;; This is where the link is hidden: ;; (defun doi-utils-get-wiley-pdf-url (redirect-url) "Wileyscience direct hides the pdf url in html. We get it out here by parsing the html. Argument REDIRECT-URL URL you are redirected to." (setq *doi-utils-waiting* t) (url-retrieve redirect-url (lambda (status) (goto-char (point-min)) (re-search-forward "