Klimi's new dotfiles with stow.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

676 lines
32 KiB

4 years ago
  1. ;;; parsebib.el --- A library for parsing bib files -*- lexical-binding: t -*-
  2. ;; Copyright (c) 2014-2017 Joost Kremers
  3. ;; All rights reserved.
  4. ;; Author: Joost Kremers <joostkremers@fastmail.fm>
  5. ;; Maintainer: Joost Kremers <joostkremers@fastmail.fm>
  6. ;; Created: 2014
  7. ;; Version: 2.3
  8. ;; Package-Version: 20181219.928
  9. ;; Keywords: text bibtex
  10. ;; Package-Requires: ((emacs "24.3"))
  11. ;; Redistribution and use in source and binary forms, with or without
  12. ;; modification, are permitted provided that the following conditions
  13. ;; are met:
  14. ;;
  15. ;; 1. Redistributions of source code must retain the above copyright
  16. ;; notice, this list of conditions and the following disclaimer.
  17. ;; 2. Redistributions in binary form must reproduce the above copyright
  18. ;; notice, this list of conditions and the following disclaimer in the
  19. ;; documentation and/or other materials provided with the distribution.
  20. ;; 3. The name of the author may not be used to endorse or promote products
  21. ;; derived from this software without specific prior written permission.
  22. ;;
  23. ;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  24. ;; IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  25. ;; OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  26. ;; IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  27. ;; INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  28. ;; NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES ; LOSS OF USE,
  29. ;; DATA, OR PROFITS ; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30. ;; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31. ;; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  32. ;; THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33. ;;; Commentary:
  34. ;;
  35. ;;; Code:
  36. (require 'bibtex)
  37. (require 'cl-lib)
  38. (eval-when-compile (require 'subr-x)) ; for `string-join'.
  39. (defvar parsebib--biblatex-inheritances '(("all"
  40. "all"
  41. (("ids" . none)
  42. ("crossref" . none)
  43. ("xref" . none)
  44. ("entryset" . none)
  45. ("entrysubtype" . none)
  46. ("execute" . none)
  47. ("label" . none)
  48. ("options" . none)
  49. ("presort" . none)
  50. ("related" . none)
  51. ("relatedoptions" . none)
  52. ("relatedstring" . none)
  53. ("relatedtype" . none)
  54. ("shorthand" . none)
  55. ("shorthandintro" . none)
  56. ("sortkey" . none)))
  57. ("mvbook, book"
  58. "inbook, bookinbook, suppbook"
  59. (("author" . "author")
  60. ("author" . "bookauthor")))
  61. ("mvbook"
  62. "book, inbook, bookinbook, suppbook"
  63. (("title" . "maintitle")
  64. ("subtitle" . "mainsubtitle")
  65. ("titleaddon" . "maintitleaddon")
  66. ("shorttitle" . none)
  67. ("sorttitle" . none)
  68. ("indextitle" . none)
  69. ("indexsorttitle" . none)))
  70. ("mvcollection, mvreference"
  71. "collection, reference, incollection, inreference, suppcollection"
  72. (("title" . "maintitle")
  73. ("subtitle" . "mainsubtitle")
  74. ("titleaddon" . "maintitleaddon")
  75. ("shorttitle" . none)
  76. ("sorttitle" . none)
  77. ("indextitle" . none)
  78. ("indexsorttitle" . none)))
  79. ("mvproceedings"
  80. "proceedings, inproceedings"
  81. (("title" . "maintitle")
  82. ("subtitle" . "mainsubtitle")
  83. ("titleaddon" . "maintitleaddon")
  84. ("shorttitle" . none)
  85. ("sorttitle" . none)
  86. ("indextitle" . none)
  87. ("indexsorttitle" . none)))
  88. ("book"
  89. "inbook, bookinbook, suppbook"
  90. (("title" . "booktitle")
  91. ("subtitle" . "booksubtitle")
  92. ("titleaddon" . "booktitleaddon")
  93. ("shorttitle" . none)
  94. ("sorttitle" . none)
  95. ("indextitle" . none)
  96. ("indexsorttitle" . none)))
  97. ("collection, reference"
  98. "incollection, inreference, suppcollection"
  99. (("title" . "booktitle")
  100. ("subtitle" . "booksubtitle")
  101. ("titleaddon" . "booktitleaddon")
  102. ("shorttitle" . none)
  103. ("sorttitle" . none)
  104. ("indextitle" . none)
  105. ("indexsorttitle" . none)))
  106. ("proceedings"
  107. "inproceedings"
  108. (("title" . "booktitle")
  109. ("subtitle" . "booksubtitle")
  110. ("titleaddon" . "booktitleaddon")
  111. ("shorttitle" . none)
  112. ("sorttitle" . none)
  113. ("indextitle" . none)
  114. ("indexsorttitle" . none)))
  115. ("periodical"
  116. "article, suppperiodical"
  117. (("title" . "journaltitle")
  118. ("subtitle" . "journalsubtitle")
  119. ("shorttitle" . none)
  120. ("sorttitle" . none)
  121. ("indextitle" . none)
  122. ("indexsorttitle" . none))))
  123. "Inheritance scheme for BibLaTeX cross-referencing.
  124. Inheritances are specified for pairs of source and target entry
  125. type, where the target is the cross-referencing entry and the
  126. source the cross-referenced entry. Each pair specifies the
  127. fields in the source and the fields in the target that they
  128. correspond with.
  129. Inheritances valid for all entry types are defined by specifying
  130. the entry type as \"all\". The entry type may also be a
  131. comma-separated list of entry types.
  132. If no inheritance rule is set up for a given entry type+field
  133. combination, the field inherits from the same-name field in the
  134. cross-referenced entry. If no inheritance should take place, the
  135. target field is set to the symbol `none'.")
  136. ;; Regexes describing BibTeX identifiers and keys. Note that while $ ^ & are
  137. ;; valid in BibTeX keys, they may nonetheless be problematic, because they are
  138. ;; special for TeX. The difference between `parsebib--bibtex-identifier' and
  139. ;; `parsebib--key-regexp' are the parentheses (), which are valid in keys. It may in
  140. ;; fact not be necessary (or desirable) to distinguish the two, but until
  141. ;; someone complains, I'll keep it this way.
  142. (defconst parsebib--bibtex-identifier "[^\"@\\#%',={}() \t\n\f]+" "Regexp describing a licit BibTeX identifier.")
  143. (defconst parsebib--key-regexp "[^\"@\\#%',={} \t\n\f]+" "Regexp describing a licit key.")
  144. (defconst parsebib--entry-start "^[ \t]*@" "Regexp describing the start of an entry.")
  145. ;; Emacs 24.3 compatibility code.
  146. (unless (fboundp 'define-error)
  147. ;; This definition is simply copied from the Emacs 24.4 sources
  148. (defun define-error (name message &optional parent)
  149. "Define NAME as a new error signal.
  150. MESSAGE is a string that will be output to the echo area if such an error
  151. is signaled without being caught by a `condition-case'.
  152. PARENT is either a signal or a list of signals from which it inherits.
  153. Defaults to `error'."
  154. (unless parent (setq parent 'error))
  155. (let ((conditions
  156. (if (consp parent)
  157. (apply #'nconc
  158. (mapcar (lambda (parent)
  159. (cons parent
  160. (or (get parent 'error-conditions)
  161. (error "Unknown signal `%s'" parent))))
  162. parent))
  163. (cons parent (get parent 'error-conditions)))))
  164. (put name 'error-conditions
  165. (delete-dups (copy-sequence (cons name conditions))))
  166. (when message (put name 'error-message message)))))
  167. (define-error 'parsebib-entry-type-error "Illegal entry type" 'error)
  168. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  169. ;; matching and parsing stuff ;;
  170. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  171. (defun parsebib--looking-at-goto-end (str &optional match)
  172. "Like `looking-at' but move point to the end of the matching string STR.
  173. MATCH acts just like the argument to MATCH-END, and defaults to
  174. 0. Comparison is done case-insensitively."
  175. (or match (setq match 0))
  176. (let ((case-fold-search t))
  177. (if (looking-at str)
  178. (goto-char (match-end match)))))
  179. (defun parsebib--match-paren-forward ()
  180. "Move forward to the closing paren matching the opening paren at point.
  181. This function handles parentheses () and braces {}. Return t if
  182. a matching parenthesis was found. This function puts point
  183. immediately after the matching parenthesis."
  184. (cond
  185. ((eq (char-after) ?\{)
  186. (parsebib--match-brace-forward))
  187. ((eq (char-after) ?\()
  188. (bibtex-end-of-entry))))
  189. (defun parsebib--match-delim-forward ()
  190. "Move forward to the closing delimiter matching the delimiter at point.
  191. This function handles braces {} and double quotes \"\". Return t
  192. if a matching delimiter was found."
  193. (let ((result (cond
  194. ((eq (char-after) ?\{)
  195. (parsebib--match-brace-forward))
  196. ((eq (char-after) ?\")
  197. (parsebib--match-quote-forward)))))
  198. result))
  199. (defun parsebib--match-brace-forward ()
  200. "Move forward to the closing brace matching the opening brace at point."
  201. (with-syntax-table bibtex-braced-string-syntax-table
  202. (forward-sexp 1)
  203. ;; if forward-sexp does not result in an error, we want to return t
  204. t))
  205. (defun parsebib--match-quote-forward ()
  206. "Move to the closing double quote matching the quote at point."
  207. (with-syntax-table bibtex-quoted-string-syntax-table
  208. (forward-sexp 1)
  209. ;; if forward-sexp does not result in an error, we want to return t
  210. t))
  211. (defun parsebib--parse-value (limit &optional strings)
  212. "Parse value at point.
  213. A value is either a field value or a @String expansion. Return
  214. the value as a string. No parsing is done beyond LIMIT, but note
  215. that parsing may stop well before LIMIT.
  216. STRINGS, if non-nil, is a hash table of @String definitions.
  217. @String abbrevs in the value to be parsed are then replaced with
  218. their expansions. Additionally, newlines in field values are
  219. removed, white space is reduced to a single space and braces or
  220. double quotes around field values are removed."
  221. (let (res)
  222. (while (and (< (point) limit)
  223. (not (looking-at-p ",")))
  224. (cond
  225. ((looking-at-p "[{\"]")
  226. (let ((beg (point)))
  227. (parsebib--match-delim-forward)
  228. (push (buffer-substring-no-properties beg (point)) res)))
  229. ((looking-at parsebib--bibtex-identifier)
  230. (push (buffer-substring-no-properties (point) (match-end 0)) res)
  231. (goto-char (match-end 0)))
  232. ((looking-at "[[:space:]]*#[[:space:]]*")
  233. (goto-char (match-end 0)))
  234. (t (forward-char 1)))) ; so as not to get stuck in an infinite loop.
  235. (if strings
  236. (string-join (parsebib--expand-strings (nreverse res) strings))
  237. (string-join (nreverse res) " # "))))
  238. ;;;;;;;;;;;;;;;;;;;;;
  239. ;; expanding stuff ;;
  240. ;;;;;;;;;;;;;;;;;;;;;
  241. (defun parsebib--expand-strings (strings abbrevs)
  242. "Expand strings in STRINGS using expansions in ABBREVS.
  243. STRINGS is a list of strings. If a string in STRINGS has an
  244. expansion in hash table ABBREVS, replace it with its expansion.
  245. Otherwise, if the string is enclosed in braces {} or double
  246. quotes \"\", remove the delimiters. In addition, newlines and
  247. multiple spaces in the string are replaced with a single space."
  248. (mapcar (lambda (str)
  249. (setq str (replace-regexp-in-string "[ \t\n\f]+" " " str))
  250. (cond
  251. ((gethash str abbrevs))
  252. ((string-match "\\`[\"{]\\(.*?\\)[\"}]\\'" str)
  253. (match-string 1 str))
  254. (t str)))
  255. strings))
  256. (defun parsebib-expand-xrefs (entries inheritance)
  257. "Expand cross-referencing items in ENTRIES.
  258. BibTeX entries in ENTRIES that have a `crossref' field are
  259. expanded with the fields in the cross-referenced entry. ENTRIES
  260. is a hash table with entries. This hash table is updated with
  261. the new fields. The return value of this function is always nil.
  262. INHERITANCE indicates the inheritance schema. It can be a symbol
  263. `BibTeX' or `biblatex', or it can be an explicit inheritance
  264. schema. See the variable `parsebib--biblatex-inheritances' for
  265. details on the structure of such an inheritance schema."
  266. (maphash (lambda (key fields)
  267. (let ((xref (cdr (assoc-string "crossref" fields))))
  268. (when xref
  269. (if (string-match-p (concat "\\b[\"{]" parsebib--key-regexp "[\"}]\\b") xref)
  270. (setq xref (substring xref 1 -1)))
  271. (let* ((source (gethash xref entries))
  272. (updated-entry (parsebib--get-xref-fields fields source inheritance)))
  273. (when updated-entry
  274. (puthash key updated-entry entries))))))
  275. entries))
  276. (defun parsebib--get-xref-fields (target-entry source-entry inheritance)
  277. "Return TARGET-ENTRY supplemented with fields inherited from SOURCE-ENTRY.
  278. TARGET-ENTRY and SOURCE-ENTRY are entry alists. Fields in
  279. SOURCE-ENTRY for which TARGET-ENTRY has no value are added to
  280. TARGET-ENTRY. Return value is the modified TARGET-ENTRY.
  281. INHERITANCE is an inheritance schema. It can either be one of
  282. the symbols `BibTeX' or `biblatex', or it can be an explicit
  283. inheritance schema. See the variable
  284. `parsebib--biblatex-inheritances' for details on the structure of
  285. such an inheritance schema."
  286. (when (and target-entry source-entry)
  287. (when (eq inheritance 'biblatex)
  288. (setq inheritance parsebib--biblatex-inheritances))
  289. (let* ((inheritable-fields (unless (eq inheritance 'BibTeX)
  290. (append (cl-third (cl-find-if (lambda (elem)
  291. (and (string-match-p (concat "\\b" (cdr (assoc-string "=type=" source-entry)) "\\b") (cl-first elem))
  292. (string-match-p (concat "\\b" (cdr (assoc-string "=type=" target-entry)) "\\b") (cl-second elem))))
  293. inheritance))
  294. (cl-third (assoc-string "all" inheritance)))))
  295. (new-fields (delq nil (mapcar (lambda (field)
  296. (let ((target-field (parsebib--get-target-field (car field) inheritable-fields)))
  297. (if (and target-field
  298. (not (assoc-string target-field target-entry 'case-fold)))
  299. (cons target-field (cdr field)))))
  300. source-entry))))
  301. (append target-entry new-fields))))
  302. (defun parsebib--get-target-field (source-field inheritances)
  303. "Return the target field for inheritance from SOURCE-FIELD.
  304. Inheritance is determined by INHERITANCES, which is an alist of
  305. source/target pairs. If no inheritance should take place for
  306. SOURCE-FIELD, the target in the relevant item in INHERITANCES is
  307. the symbol `none'. If there is no item for SOURCE-FIELD in
  308. INHERITANCES, SOURCE-FIELD is returned. Note that it is valid
  309. for INHERITANCES to be nil."
  310. ;; Note: the argument INHERITANCES differs from the INHERITANCE argument in
  311. ;; the previous two functions. It is a simple alist of (source-field
  312. ;; . target-field) pairs.
  313. (let ((target-field (cdr (assoc-string source-field inheritances 'case-fold))))
  314. (cond
  315. ((null target-field)
  316. source-field)
  317. ((eq target-field 'none)
  318. nil)
  319. (t target-field))))
  320. ;;;;;;;;;;;;;;;;;;;
  321. ;; low-level API ;;
  322. ;;;;;;;;;;;;;;;;;;;
  323. (defun parsebib-find-next-item (&optional pos)
  324. "Find the first (potential) BibTeX item following POS.
  325. This function simply searches for an @ at the start of a line,
  326. possibly preceded by spaces or tabs, followed by a string of
  327. characters as defined by `parsebib--bibtex-identifier'. When
  328. successful, point is placed right after the item's type, i.e.,
  329. generally on the opening brace or parenthesis following the entry
  330. type, \"@Comment\", \"@Preamble\" or \"@String\".
  331. The return value is the name of the item as a string, either
  332. \"Comment\", \"Preamble\" or \"String\", or the entry
  333. type (without the @). If an item name is found that includes an
  334. illegal character, an error of type `parsebib-entry-type-error'
  335. is raised. If no item is found, nil is returned and point is left
  336. at the end of the buffer.
  337. POS can be a number or a marker and defaults to point."
  338. (when pos (goto-char pos))
  339. (when (re-search-forward parsebib--entry-start nil 0)
  340. (if (parsebib--looking-at-goto-end (concat "\\(" parsebib--bibtex-identifier "\\)" "[[:space:]]*[\(\{]?") 1)
  341. (match-string-no-properties 1)
  342. (signal 'parsebib-entry-type-error (list (point))))))
  343. (defun parsebib-read-comment (&optional pos)
  344. "Read the @Comment beginning at the line POS is on.
  345. Return value is the text of the @Comment including the braces.
  346. For comments that last until the end of the line (i.e., comments
  347. that are not delimited by braces), the return value includes the
  348. whitespace between `@comment' and the actual comment text.
  349. If no comment could be found, return nil.
  350. POS can be a number or a marker. It does not have to be at the
  351. beginning of a line, but the @Comment entry must start at the
  352. beginning of the line POS is on. If POS is nil, it defaults to
  353. point."
  354. (when pos (goto-char pos))
  355. (beginning-of-line)
  356. (when (parsebib--looking-at-goto-end (concat parsebib--entry-start "\\(comment\\)[[:space:]]*[\(\{]?") 1)
  357. (let ((beg (point)))
  358. (if (looking-at-p "[[:space:]]*[\(\{]")
  359. (progn (skip-chars-forward "[:space:]")
  360. (parsebib--match-paren-forward))
  361. (goto-char (point-at-eol)))
  362. (buffer-substring-no-properties beg (point)))))
  363. (defun parsebib-read-string (&optional pos strings)
  364. "Read the @String definition beginning at the line POS is on.
  365. If a proper abbreviation and expansion are found, they are
  366. returned as a cons cell (<abbrev> . <expansion>). Otherwise, nil
  367. is returned.
  368. POS can be a number or a marker. It does not have to be at the
  369. beginning of a line, but the @String entry must start at the
  370. beginning of the line POS is on. If POS is nil, it defaults to
  371. point.
  372. If STRINGS is provided it should be a hash table with string
  373. abbreviations, which are used to expand abbrevs in the string's
  374. expansion."
  375. (interactive)
  376. (when pos (goto-char pos))
  377. (beginning-of-line)
  378. (when (parsebib--looking-at-goto-end (concat parsebib--entry-start "\\(string[[:space:]]*\\)[\(\{]") 1)
  379. (let ((limit (save-excursion
  380. (parsebib--match-paren-forward)
  381. (point))))
  382. (parsebib--looking-at-goto-end (concat "[({]\\(" parsebib--bibtex-identifier "\\)[[:space:]]*=[[:space:]]*"))
  383. (let ((abbr (match-string-no-properties 1)))
  384. (when (and abbr (> (length abbr) 0)) ; if we found an abbrev
  385. (let ((expansion (parsebib--parse-value limit strings)))
  386. (goto-char limit)
  387. (cons abbr expansion)))))))
  388. (defun parsebib-read-preamble (&optional pos)
  389. "Read the @Preamble definition at the line POS is on.
  390. Return the preamble as a string (including the braces surrounding
  391. the preamble text), or nil if no preamble was found.
  392. POS can be a number or a marker. It does not have to be at the
  393. beginning of a line, but the @Preamble must start at the
  394. beginning of the line POS is on. If POS is nil, it defaults to
  395. point."
  396. (when pos (goto-char pos))
  397. (beginning-of-line)
  398. (when (parsebib--looking-at-goto-end (concat parsebib--entry-start "\\(preamble[[:space:]]*\\)[\(\{]") 1)
  399. (let ((beg (point)))
  400. (when (parsebib--match-paren-forward)
  401. (buffer-substring-no-properties beg (point))))))
  402. (defun parsebib-read-entry (type &optional pos strings)
  403. "Read a BibTeX entry of type TYPE at the line POS is on.
  404. TYPE should be a string and should not contain the @
  405. sign. The return value is the entry as an alist of (<field> .
  406. <contents>) cons pairs, or nil if no entry was found. In this
  407. alist, the entry key is provided in the field \"=key=\" and the
  408. entry type in the field \"=type=\".
  409. POS can be a number or a marker. It does not have to be at the
  410. beginning of a line, but the entry must start at the beginning of
  411. the line POS is on. If POS is nil, it defaults to point.
  412. ENTRY should not be \"Comment\", \"Preamble\" or \"String\", but
  413. is otherwise not limited to any set of possible entry types. If
  414. so required, the calling function has to ensure that the entry
  415. type is valid.
  416. If STRINGS is provided, it should be a hash table with string
  417. abbreviations, which are used to expand abbrevs in the entry's
  418. fields."
  419. (unless (member-ignore-case type '("comment" "preamble" "string"))
  420. (when pos (goto-char pos))
  421. (beginning-of-line)
  422. (when (parsebib--looking-at-goto-end (concat parsebib--entry-start type "[[:space:]]*[\(\{]"))
  423. ;; find the end of the entry and the beginning of the entry key
  424. (let* ((limit (save-excursion
  425. (backward-char)
  426. (parsebib--match-paren-forward)
  427. (point)))
  428. (beg (progn
  429. (skip-chars-forward " \n\t\f") ; note the space!
  430. (point)))
  431. (key (when (parsebib--looking-at-goto-end (concat "\\(" parsebib--key-regexp "\\)[ \t\n\f]*,") 1)
  432. (buffer-substring-no-properties beg (point)))))
  433. (or key (setq key "")) ; if no key was found, we pretend it's empty and try to read the entry anyway
  434. (skip-chars-forward "^," limit) ; move to the comma after the entry key
  435. (let ((fields (cl-loop for field = (parsebib--find-bibtex-field limit strings)
  436. while field collect field)))
  437. (push (cons "=type=" type) fields)
  438. (push (cons "=key=" key) fields)
  439. (nreverse fields))))))
  440. (defun parsebib--find-bibtex-field (limit &optional strings)
  441. "Find the field after point.
  442. Do not search beyond LIMIT (a buffer position). Return a
  443. cons (FIELD . VALUE), or nil if no field was found.
  444. If STRINGS is provided it should be a hash table with string
  445. abbreviations, which are used to expand abbrevs in the field's
  446. value."
  447. (skip-chars-forward "\"#%'(),={} \n\t\f" limit) ; move to the first char of the field name
  448. (unless (>= (point) limit) ; if we haven't reached the end of the entry
  449. (let ((beg (point)))
  450. (if (parsebib--looking-at-goto-end (concat "\\(" parsebib--bibtex-identifier "\\)[[:space:]]*=[[:space:]]*") 1)
  451. (let ((field-type (buffer-substring-no-properties beg (point))))
  452. (let ((field-contents (parsebib--parse-value limit strings)))
  453. (cons field-type field-contents)))))))
  454. ;;;;;;;;;;;;;;;;;;;;
  455. ;; high-level API ;;
  456. ;;;;;;;;;;;;;;;;;;;;
  457. (defun parsebib-collect-preambles ()
  458. "Collect all @Preamble definitions in the current buffer.
  459. Return a list of strings, each string a separate @Preamble."
  460. (save-excursion
  461. (goto-char (point-min))
  462. (let (res)
  463. (cl-loop for item = (parsebib-find-next-item)
  464. while item do
  465. (when (cl-equalp item "preamble")
  466. (push (parsebib-read-preamble) res)))
  467. (nreverse res))))
  468. (defun parsebib-collect-comments ()
  469. "Collect all @Comment definitions in the current buffer.
  470. Return a list of strings, each string a separate @Comment."
  471. (save-excursion
  472. (goto-char (point-min))
  473. (let (res)
  474. (cl-loop for item = (parsebib-find-next-item)
  475. while item do
  476. (when (cl-equalp item "comment")
  477. (push (parsebib-read-comment) res)))
  478. (nreverse (delq nil res)))))
  479. (defun parsebib-collect-strings (&optional hash expand-strings)
  480. "Collect all @String definitions in the current buffer.
  481. Return value is a hash with the abbreviations as keys and the
  482. expansions as values. If HASH is a hash table with test function
  483. `equal', it is used to store the @String definitions. If
  484. EXPAND-STRINGS is non-nil, @String expansions are expanded
  485. themselves using the @String definitions already stored in HASH."
  486. (or (and (hash-table-p hash)
  487. (eq 'equal (hash-table-test hash)))
  488. (setq hash (make-hash-table :test #'equal)))
  489. (save-excursion
  490. (goto-char (point-min))
  491. (cl-loop with string = nil
  492. for item = (parsebib-find-next-item)
  493. while item do
  494. (when (cl-equalp item "string")
  495. (setq string (parsebib-read-string nil (if expand-strings hash)))
  496. (puthash (car string) (cdr string) hash)))
  497. hash))
  498. (defun parsebib-collect-entries (&optional hash strings inheritance)
  499. "Collect all entries in the current buffer.
  500. Return value is a hash table containing the entries. If HASH is
  501. a hash table, with test function `equal', it is used to store the
  502. entries. If STRINGS is non-nil, it should be a hash table of
  503. string definitions, which are used to expand abbreviations used
  504. in the entries.
  505. If INHERITANCE is non-nil, cross-references in the entries are
  506. resolved: if the crossref field of an entry points to an entry
  507. already in HASH, the fields of the latter that do not occur in
  508. the entry are added to it. INHERITANCE indicates the inheritance
  509. schema used for determining which fields inherit from which
  510. fields. It can be a symbol `BibTeX' or `biblatex', or it can be
  511. an explicit inheritance schema. (See the variable
  512. `parsebib--biblatex-inheritances' for details on the structure of
  513. such an inheritance schema.) It can also be the symbol t, in
  514. which case the local variable block is checked for a
  515. dialect (using the variable `bibtex-dialect'), or, if no such
  516. local variable is found, the value of the variable
  517. `bibtex-dialect'."
  518. (or (and (hash-table-p hash)
  519. (eq 'equal (hash-table-test hash)))
  520. (setq hash (make-hash-table :test #'equal)))
  521. (if (eq inheritance t)
  522. (setq inheritance (or (parsebib-find-bibtex-dialect)
  523. bibtex-dialect
  524. 'BibTeX)))
  525. (save-excursion
  526. (goto-char (point-min))
  527. (cl-loop with entry = nil
  528. for entry-type = (parsebib-find-next-item)
  529. while entry-type do
  530. (unless (member-ignore-case entry-type '("preamble" "string" "comment"))
  531. (setq entry (parsebib-read-entry entry-type nil strings))
  532. (if entry
  533. (puthash (cdr (assoc-string "=key=" entry)) entry hash))))
  534. (when inheritance
  535. (parsebib-expand-xrefs hash inheritance))
  536. hash))
  537. (defun parsebib-find-bibtex-dialect ()
  538. "Find the BibTeX dialect of a file if one is set.
  539. This function looks for a local value of the variable
  540. `bibtex-dialect' in the local variable block at the end of the
  541. file. Return nil if no dialect is found."
  542. (save-excursion
  543. (goto-char (point-max))
  544. (let ((case-fold-search t))
  545. (when (re-search-backward (concat parsebib--entry-start "comment") (- (point-max) 3000) t)
  546. (let ((comment (parsebib-read-comment)))
  547. (when (and comment
  548. (string-match-p "\\`{[ \n\t\r]*Local Variables:" comment)
  549. (string-match-p "End:[ \n\t\r]*}\\'" comment)
  550. (string-match (concat "bibtex-dialect: " (regexp-opt (mapcar #'symbol-name bibtex-dialect-list) t)) comment))
  551. (intern (match-string 1 comment))))))))
  552. (defun parsebib-parse-buffer (&optional entries strings expand-strings inheritance)
  553. "Parse the current buffer and return all BibTeX data.
  554. Return list of five elements: a hash table with the entries, a
  555. hash table with the @String definitions, a list of @Preamble
  556. definitions, a list of @Comments and the BibTeX dialect, if
  557. present in the file.
  558. If ENTRIES is a hash table with test function `equal', it is used
  559. to store the entries. Any existing entries with identical keys
  560. are overwritten. Similarly, if STRINGS is a hash table with test
  561. function `equal', the @String definitions are stored in it.
  562. If EXPAND-STRINGS is non-nil, abbreviations in the entries and
  563. @String definitions are expanded using the @String definitions
  564. already in STRINGS.
  565. If INHERITANCE is non-nil, cross-references in the entries are
  566. resolved: if the crossref field of an entry points to an entry
  567. already in ENTRIES, the fields of the latter that do not occur in
  568. the entry are added to it. INHERITANCE indicates the inheritance
  569. schema used for determining which fields inherit from which
  570. fields. It can be a symbol `BibTeX' or `biblatex', which means
  571. to use the default inheritance schema for either dialect, or it
  572. can be an explicit inheritance schema. (See the variable
  573. `parsebib--biblatex-inheritances' for details on the structure of
  574. such an inheritance schema.) It can also be the symbol t, in
  575. which case the local variable block is checked for a
  576. dialect (using the variable `bibtex-dialect'), or, if no such
  577. local variable is found, the value of the variable
  578. `bibtex-dialect'."
  579. (save-excursion
  580. (goto-char (point-min))
  581. (or (and (hash-table-p entries)
  582. (eq (hash-table-test entries) 'equal))
  583. (setq entries (make-hash-table :test #'equal)))
  584. (or (and (hash-table-p strings)
  585. (eq (hash-table-test strings) 'equal))
  586. (setq strings (make-hash-table :test #'equal)))
  587. (let ((dialect (or (parsebib-find-bibtex-dialect)
  588. bibtex-dialect
  589. 'BibTeX))
  590. preambles comments)
  591. (cl-loop for item = (parsebib-find-next-item)
  592. while item do
  593. (cond
  594. ((cl-equalp item "string") ; `cl-equalp' compares strings case-insensitively.
  595. (let ((string (parsebib-read-string nil (if expand-strings strings))))
  596. (if string
  597. (puthash (car string) (cdr string) strings))))
  598. ((cl-equalp item "preamble")
  599. (push (parsebib-read-preamble) preambles))
  600. ((cl-equalp item "comment")
  601. (push (parsebib-read-comment) comments))
  602. ((stringp item)
  603. (let ((entry (parsebib-read-entry item nil (if expand-strings strings))))
  604. (when entry
  605. (puthash (cdr (assoc-string "=key=" entry)) entry entries))))))
  606. (when inheritance (parsebib-expand-xrefs entries (if (eq inheritance t) dialect inheritance)))
  607. (list entries strings (nreverse preambles) (nreverse comments) dialect))))
  608. (provide 'parsebib)
  609. ;;; parsebib.el ends here