|
|
- ;;; parseclj-parser.el --- Clojure/EDN parser -*- lexical-binding: t; -*-
-
- ;; Copyright (C) 2017-2018 Arne Brasseur
-
- ;; Author: Arne Brasseur <arne@arnebrasseur.net>
- ;; Keywords: lisp
- ;; Package-Requires: ((emacs "25") (a "0.1.0alpha4"))
- ;; Version: 0.1.0
-
- ;; This file is not part of GNU Emacs.
-
- ;; This file is free software; you can redistribute it and/or modify
- ;; it under the terms of the GNU General Public License as published by
- ;; the Free Software Foundation; either version 3, or (at your option)
- ;; any later version.
-
- ;; This file is distributed in the hope that it will be useful,
- ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;; GNU General Public License for more details.
-
- ;; You should have received a copy of the GNU General Public License
- ;; along with GNU Emacs; see the file COPYING. If not, write to
- ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
- ;; Boston, MA 02110-1301, USA.
-
- ;;; Commentary:
-
- ;; A shift/reduce parser for Clojure source.
-
- ;;; Code:
-
- (require 'cl-lib)
- (require 'subr-x)
- (require 'a)
- (require 'parseclj-lex)
-
- (define-error 'parseclj-parser-error "parseclj: Syntax error")
-
- (defun parseclj--error (format &rest args)
- "Signal a parse error.
- Takes a FORMAT string and optional ARGS to be passed to
- `format-message'. Signals a 'parseclj-parser-error signal, which
- can be handled with `condition-case'."
- (signal 'parseclj-parser-error (list (apply #'format-message format args))))
-
- (defun parseclj--find-opening-token (stack closing-token)
- "Scan STACK for an opening-token matching CLOSING-TOKEN."
- (cl-case (parseclj-lex-token-type closing-token)
- (:rparen (parseclj-lex-token-type
- (seq-find (lambda (token)
- (member (parseclj-lex-token-type token)
- '(:lparen :lambda)))
- stack)))
- (:rbracket :lbracket)
- (:rbrace (parseclj-lex-token-type
- (seq-find (lambda (token)
- (member (parseclj-lex-token-type token)
- '(:lbrace :set)))
- stack)))))
-
- (defun parseclj--reduce-coll (stack closing-token reduce-branch options)
- "Reduce collection based on the top of the STACK and a CLOSING-TOKEN.
-
- REDUCE-BRANCH is a function to be applied to the collection of tokens found
- from the top of the stack until an opening token that matches
- CLOSING-TOKEN. This function should return an AST token representing such
- collection.
-
- OPTIONS is an association list. This list is also passed down to the
- REDUCE-BRANCH function. See `parseclj-parser' for more information on
- available options."
- (let ((opening-token-type (parseclj--find-opening-token stack closing-token))
- (fail-fast (a-get options :fail-fast t))
- (collection nil))
-
- ;; unwind the stack until opening-token-type is found, adding to collection
- (while (and stack (not (eq (parseclj-lex-token-type (car stack)) opening-token-type)))
- (push (pop stack) collection))
-
- ;; did we find the right token?
- (if (eq (parseclj-lex-token-type (car stack)) opening-token-type)
- (progn
- (when fail-fast
- ;; any unreduced tokens left: bail early
- (when-let ((token (seq-find #'parseclj-lex-token-p collection)))
- (parseclj--error "At position %s, unmatched %S"
- (a-get token :pos)
- (parseclj-lex-token-type token))))
-
- ;; all good, call the reducer so it can return an updated stack with a
- ;; new node at the top.
- (let ((opening-token (pop stack)))
- (funcall reduce-branch stack opening-token collection options)))
-
- ;; Unwound the stack without finding a matching paren: either bail early
- ;; or return the original stack and continue parsing
- (if fail-fast
- (parseclj--error "At position %s, unmatched %S"
- (a-get closing-token :pos)
- (parseclj-lex-token-type closing-token))
-
- (reverse collection)))))
-
- (defun parseclj--take-value (stack value-p)
- "Scan STACK until a value is found.
- Return everything up to the value in reversed order (meaning the value
- comes first in the result).
-
- STACK is the current parse stack to scan.
-
- VALUE-P a predicate to distinguish reduced values from non-values (tokens
- and whitespace)."
- (let ((result nil))
- (cl-block nil
- (while stack
- (cond
- ((parseclj-lex-token-p (car stack))
- (cl-return nil))
-
- ((funcall value-p (car stack))
- (cl-return (cons (car stack) result)))
-
- (t
- (push (pop stack) result)))))))
-
- (defun parseclj--take-token (stack value-p token-types)
- "Scan STACK until a token of a certain type is found.
- Returns nil if a value is encountered before a matching token is found.
- Return everything up to the token in reversed order (meaning the token
- comes first in the result).
-
- STACK is the current parse stack to scan.
-
- VALUE-P a predicate to distinguish reduced values from non-values (tokens
- and whitespace).
-
- TOKEN-TYPES are the token types to look for."
- (let ((result nil))
- (cl-block nil
- (while stack
- (cond
- ((member (parseclj-lex-token-type (car stack)) token-types)
- (cl-return (cons (car stack) result)))
- ((funcall value-p (car stack))
- (cl-return nil))
- ((parseclj-lex-token-p (car stack))
- (cl-return nil))
- (t
- (push (pop stack) result)))))))
-
- (defun parseclj-single-value-p (stack value-p)
- "Return t if STACK only has a single node for which VALUE-P is true.
-
- This checks if the stack contains a single, fully reduced value, and no
- dangling unmatched tokens. When parsing with `:read-one' this indicates a
- form can be returned."
- (and (not (cl-reduce (lambda (bool node)
- (or bool (parseclj-lex-token-p node)))
- stack
- :initial-value nil))
- (parseclj--take-value stack value-p)))
-
- (defun parseclj-parser (reduce-leaf reduce-branch &optional options)
- "Clojure/EDN stack-based shift-reduce parser.
-
- REDUCE-LEAF does reductions for leaf nodes. It is a function that takes
- the current value of the stack and a token, and either returns an updated
- stack, with a new leaf node at the top (front), or returns the stack
- unmodified.
-
- REDUCE-BRANCH does reductions for branch nodes. It is a function that
- takes the current value of the stack, the type of branch node to create,
- and a list of child nodes, and returns an updated stack, with the new node
- at the top (front).
-
- What \"node\" means in this case is up to the reducing functions, it could
- be AST nodes (as in the case of `parseclj-parser-clojure'), or plain
- values/sexps (as in the case of `parseedn-read'), or something else. The
- only requirement is that they should not put raw tokens back on the stack,
- as the parser relies on the presence or absence of these to detect parse
- errors.
-
- OPTIONS is an association list which is passed on to the reducing
- functions. Additionally the following options are recognized
-
- - `:fail-fast'
- Raise an error when a parse error is encountered, rather than continuing
- with a partial result.
- - `:value-p'
- A predicate function to differentiate values from tokens and
- whitespace. This is needed when scanning the stack to see if any
- reductions can be performed. By default anything that isn't a token is
- considered a value. This can be problematic when parsing with
- `:lexical-preservation', and which case you should provide an
- implementation that also returns falsy for :whitespace, :comment, and
- :discard AST nodes.
- - `:tag-readers'
- An association list that describes tag handler functions for any possible
- tag. This options in only available in `parseedn-read', for more
- information, please refer to its documentation.
- - `:read-one'
- Return as soon as a single complete value has been read."
- (let ((fail-fast (a-get options :fail-fast t))
- (read-one (a-get options :read-one))
- (value-p (a-get options :value-p (lambda (e) (not (parseclj-lex-token-p e)))))
- (stack nil)
- (token (parseclj-lex-next)))
-
- (while (not (or (and read-one (parseclj-single-value-p stack value-p))
- (eq (parseclj-lex-token-type token) :eof)))
- ;; (message "STACK: %S" stack)
- ;; (message "TOKEN: %S\n" token)
-
- (when (and fail-fast (parseclj-lex-error-p token))
- (parseclj--error "Invalid token at %s: %S"
- (a-get token :pos)
- (parseclj-lex-token-form token)))
-
- ;; Reduce based on the top item on the stack (collections)
- (cond
- ((parseclj-lex-leaf-token-p token)
- (setf stack (funcall reduce-leaf stack token options)))
-
- ((parseclj-lex-closing-token-p token)
- (setf stack (parseclj--reduce-coll stack token reduce-branch options)))
-
- (t (push token stack)))
-
- ;; Reduce based on top two items on the stack (special prefixed elements)
- (let* ((top-value (parseclj--take-value stack value-p))
- (opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))
- new-stack)
- (while (and top-value opening-token)
- ;; (message "Reducing...")
- ;; (message " - STACK %S" stack)
- ;; (message " - OPENING-TOKEN %S" opening-token)
- ;; (message " - TOP-VALUE %S" top-value)
- (setq new-stack (nthcdr (+ (length top-value) (length opening-token)) stack))
- (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value) options))
-
- ;; recur
- (setq top-value (parseclj--take-value stack value-p))
- (setq opening-token (parseclj--take-token (nthcdr (length top-value) stack) value-p parseclj-lex--prefix-tokens))))
-
- ;; Reduce based on top three items on the stack (metadata, namespaced maps)
- (let* ((top-value-1 (parseclj--take-value stack value-p))
- (top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
- (opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
- (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
- new-stack)
- (while (and top-value-1 top-value-2 opening-token)
- (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list top-value-1 top-value-2 opening-token))) stack))
- (setq stack (funcall reduce-branch new-stack (car opening-token) (append (cdr opening-token) top-value-2 top-value-1) options))
-
- ;; recur
- (setq top-value-1 (parseclj--take-value stack value-p))
- (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1) stack) value-p))
- (setq opening-token (parseclj--take-token (nthcdr (+ (length top-value-1)
- (length top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
-
- (setq token (parseclj-lex-next)))
-
- ;; reduce root
- (when fail-fast
- (when-let ((token (seq-find #'parseclj-lex-token-p stack)))
- (parseclj--error "At position %s, unmatched %S"
- (a-get token :pos)
- (parseclj-lex-token-type token))))
-
- (if read-one
- (car (parseclj--take-value stack value-p))
- (car (funcall reduce-branch nil (parseclj-lex-token :root "" 1)
- (reverse stack)
- options)))))
-
- (provide 'parseclj-parser)
- ;;; parseclj-parser.el ends here
|