Klimi's new dotfiles with stow.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

278 lines
12 KiB

5 years ago
  1. ;;; parseclj-parser.el --- Clojure/EDN parser -*- lexical-binding: t; -*-
  2. ;; Copyright (C) 2017-2018 Arne Brasseur
  3. ;; Author: Arne Brasseur <arne@arnebrasseur.net>
  4. ;; Keywords: lisp
  5. ;; Package-Requires: ((emacs "25") (a "0.1.0alpha4"))
  6. ;; Version: 0.1.0
  7. ;; This file is not part of GNU Emacs.
  8. ;; This file is free software; you can redistribute it and/or modify
  9. ;; it under the terms of the GNU General Public License as published by
  10. ;; the Free Software Foundation; either version 3, or (at your option)
  11. ;; any later version.
  12. ;; This file is distributed in the hope that it will be useful,
  13. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ;; GNU General Public License for more details.
  16. ;; You should have received a copy of the GNU General Public License
  17. ;; along with GNU Emacs; see the file COPYING. If not, write to
  18. ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  19. ;; Boston, MA 02110-1301, USA.
  20. ;;; Commentary:
  21. ;; A shift/reduce parser for Clojure source.
  22. ;;; Code:
  23. (require 'cl-lib)
  24. (require 'subr-x)
  25. (require 'a)
  26. (require 'parseclj-lex)
  27. (define-error 'parseclj-parser-error "parseclj: Syntax error")
  28. (defun parseclj--error (format &rest args)
  29. "Signal a parse error.
  30. Takes a FORMAT string and optional ARGS to be passed to
  31. `format-message'. Signals a 'parseclj-parser-error signal, which
  32. can be handled with `condition-case'."
  33. (signal 'parseclj-parser-error (list (apply #'format-message format args))))
  34. (defun parseclj--find-opening-token (stack closing-token)
  35. "Scan STACK for an opening-token matching CLOSING-TOKEN."
  36. (cl-case (parseclj-lex-token-type closing-token)
  37. (:rparen (parseclj-lex-token-type
  38. (seq-find (lambda (token)
  39. (member (parseclj-lex-token-type token)
  40. '(:lparen :lambda)))
  41. stack)))
  42. (:rbracket :lbracket)
  43. (:rbrace (parseclj-lex-token-type
  44. (seq-find (lambda (token)
  45. (member (parseclj-lex-token-type token)
  46. '(:lbrace :set)))
  47. stack)))))
  48. (defun parseclj--reduce-coll (stack closing-token reduce-branch options)
  49. "Reduce collection based on the top of the STACK and a CLOSING-TOKEN.
  50. REDUCE-BRANCH is a function to be applied to the collection of tokens found
  51. from the top of the stack until an opening token that matches
  52. CLOSING-TOKEN. This function should return an AST token representing such
  53. collection.
  54. OPTIONS is an association list. This list is also passed down to the
  55. REDUCE-BRANCH function. See `parseclj-parser' for more information on
  56. available options."
  57. (let ((opening-token-type (parseclj--find-opening-token stack closing-token))
  58. (fail-fast (a-get options :fail-fast t))
  59. (collection nil))
  60. ;; unwind the stack until opening-token-type is found, adding to collection
  61. (while (and stack (not (eq (parseclj-lex-token-type (car stack)) opening-token-type)))
  62. (push (pop stack) collection))
  63. ;; did we find the right token?
  64. (if (eq (parseclj-lex-token-type (car stack)) opening-token-type)
  65. (progn
  66. (when fail-fast
  67. ;; any unreduced tokens left: bail early
  68. (when-let ((token (seq-find #'parseclj-lex-token-p collection)))
  69. (parseclj--error "At position %s, unmatched %S"
  70. (a-get token :pos)
  71. (parseclj-lex-token-type token))))
  72. ;; all good, call the reducer so it can return an updated stack with a
  73. ;; new node at the top.
  74. (let ((opening-token (pop stack)))
  75. (funcall reduce-branch stack opening-token collection options)))
  76. ;; Unwound the stack without finding a matching paren: either bail early
  77. ;; or return the original stack and continue parsing
  78. (if fail-fast
  79. (parseclj--error "At position %s, unmatched %S"
  80. (a-get closing-token :pos)
  81. (parseclj-lex-token-type closing-token))
  82. (reverse collection)))))
  83. (defun parseclj--take-value (stack value-p)
  84. "Scan STACK until a value is found.
  85. Return everything up to the value in reversed order (meaning the value
  86. comes first in the result).
  87. STACK is the current parse stack to scan.
  88. VALUE-P a predicate to distinguish reduced values from non-values (tokens
  89. and whitespace)."
  90. (let ((result nil))
  91. (cl-block nil
  92. (while stack
  93. (cond
  94. ((parseclj-lex-token-p (car stack))
  95. (cl-return nil))
  96. ((funcall value-p (car stack))
  97. (cl-return (cons (car stack) result)))
  98. (t
  99. (push (pop stack) result)))))))
  100. (defun parseclj--take-token (stack value-p token-types)
  101. "Scan STACK until a token of a certain type is found.
  102. Returns nil if a value is encountered before a matching token is found.
  103. Return everything up to the token in reversed order (meaning the token
  104. comes first in the result).
  105. STACK is the current parse stack to scan.
  106. VALUE-P a predicate to distinguish reduced values from non-values (tokens
  107. and whitespace).
  108. TOKEN-TYPES are the token types to look for."
  109. (let ((result nil))
  110. (cl-block nil
  111. (while stack
  112. (cond
  113. ((member (parseclj-lex-token-type (car stack)) token-types)
  114. (cl-return (cons (car stack) result)))
  115. ((funcall value-p (car stack))
  116. (cl-return nil))
  117. ((parseclj-lex-token-p (car stack))
  118. (cl-return nil))
  119. (t
  120. (push (pop stack) result)))))))
  121. (defun parseclj-single-value-p (stack value-p)
  122. "Return t if STACK only has a single node for which VALUE-P is true.
  123. This checks if the stack contains a single, fully reduced value, and no
  124. dangling unmatched tokens. When parsing with `:read-one' this indicates a
  125. form can be returned."
  126. (and (not (cl-reduce (lambda (bool node)
  127. (or bool (parseclj-lex-token-p node)))
  128. stack
  129. :initial-value nil))
  130. (parseclj--take-value stack value-p)))
  131. (defun parseclj-parser (reduce-leaf reduce-branch &optional options)
  132. "Clojure/EDN stack-based shift-reduce parser.
  133. REDUCE-LEAF does reductions for leaf nodes. It is a function that takes
  134. the current value of the stack and a token, and either returns an updated
  135. stack, with a new leaf node at the top (front), or returns the stack
  136. unmodified.
  137. REDUCE-BRANCH does reductions for branch nodes. It is a function that
  138. takes the current value of the stack, the type of branch node to create,
  139. and a list of child nodes, and returns an updated stack, with the new node
  140. at the top (front).
  141. What \"node\" means in this case is up to the reducing functions, it could
  142. be AST nodes (as in the case of `parseclj-parser-clojure'), or plain
  143. values/sexps (as in the case of `parseedn-read'), or something else. The
  144. only requirement is that they should not put raw tokens back on the stack,
  145. as the parser relies on the presence or absence of these to detect parse
  146. errors.
  147. OPTIONS is an association list which is passed on to the reducing
  148. functions. Additionally the following options are recognized
  149. - `:fail-fast'
  150. Raise an error when a parse error is encountered, rather than continuing
  151. with a partial result.
  152. - `:value-p'
  153. A predicate function to differentiate values from tokens and
  154. whitespace. This is needed when scanning the stack to see if any
  155. reductions can be performed. By default anything that isn't a token is
  156. considered a value. This can be problematic when parsing with
  157. `:lexical-preservation', and which case you should provide an
  158. implementation that also returns falsy for :whitespace, :comment, and
  159. :discard AST nodes.
  160. - `:tag-readers'
  161. An association list that describes tag handler functions for any possible
  162. tag. This options in only available in `parseedn-read', for more
  163. information, please refer to its documentation.
  164. - `:read-one'
  165. Return as soon as a single complete value has been read."
  166. (let ((fail-fast (a-get options :fail-fast t))
  167. (read-one (a-get options :read-one))
  168. (value-p (a-get options :value-p (lambda (e) (not (parseclj-lex-token-p e)))))
  169. (stack nil)
  170. (token (parseclj-lex-next)))
  171. (while (not (or (and read-one (parseclj-single-value-p stack value-p))
  172. (eq (parseclj-lex-token-type token) :eof)))
  173. ;; (message "STACK: %S" stack)
  174. ;; (message "TOKEN: %S\n" token)
  175. (when (and fail-fast (parseclj-lex-error-p token))
  176. (parseclj--error "Invalid token at %s: %S"
  177. (a-get token :pos)
  178. (parseclj-lex-token-form token)))
  179. ;; Reduce based on the top item on the stack (collections)
  180. (cond
  181. ((parseclj-lex-leaf-token-p token)
  182. (setf stack (funcall reduce-leaf stack token options)))
  183. ((parseclj-lex-closing-token-p token)
  184. (setf stack (parseclj--reduce-coll stack token reduce-branch options)))
  185. (t (push token stack)))
  186. ;; Reduce based on top two items on the stack (special prefixed elements)
  187. (let* ((top-value (parseclj--take-value stack value-p))
  188. (opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))
  189. new-stack)
  190. (while (and top-value opening-token)
  191. ;; (message "Reducing...")
  192. ;; (message " - STACK %S" stack)
  193. ;; (message " - OPENING-TOKEN %S" opening-token)
  194. ;; (message " - TOP-VALUE %S" top-value)
  195. (setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack))
  196. (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options))
  197. ;; recur
  198. (setq top-value (parseclj--take-value stack value-p))
  199. (setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))))
  200. ;; Reduce based on top three items on the stack (metadata, namespaced maps)
  201. (let* ((top-value-1 (parseclj--take-value stack value-p))
  202. (top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
  203. (opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
  204. (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
  205. new-stack)
  206. (while (and top-value-1 top-value-2 opening-token)
  207. (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack))
  208. (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options))
  209. ;; recur
  210. (setq top-value-1 (parseclj--take-value stack value-p))
  211. (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
  212. (setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
  213. (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
  214. (setq token (parseclj-lex-next)))
  215. ;; reduce root
  216. (when fail-fast
  217. (when-let ((token (seq-find #'parseclj-lex-token-p stack)))
  218. (parseclj--error "At position %s, unmatched %S"
  219. (a-get token :pos)
  220. (parseclj-lex-token-type token))))
  221. (if read-one
  222. (car (parseclj--take-value stack value-p))
  223. (car (funcall reduce-branch nil (parseclj-lex-token :root "" 1)
  224. (reverse stack)
  225. options)))))
  226. (provide 'parseclj-parser)
  227. ;;; parseclj-parser.el ends here