You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

685 lines
26 KiB

#include <map>
#include <optional>
#include <sstream>
#include <iostream>
#include <array>
#include <charconv>
#include <utility>
#include <algorithm>
#include <limits>
#include "UserScript/parser.h"
#include "UserScript.h"
//////////////////////
/// PARSER HELPERS ///
//////////////////////
using token = scripting::ast::token;
using symbol_t = scripting::ast::symbol_t;
template<typename T>
struct parse_result {
std::optional<T> result;
std::span<token> rest;
operator bool() { return result.has_value(); }
};
bool next_is_newline(std::span<token> current) {
return (not current.empty()) && (holds_alternative<symbol_t>(current.front().value) && (get<symbol_t>(current.front().value) == symbol_t::new_line));
}
std::span<token> trim_newline(std::span<token> current) {
while(next_is_newline(current)) {
current = current.subspan(1);
}
return current;
}
///////////////////////////
/// PARSER DECLARATIONS ///
///////////////////////////
parse_result<scripting::ast::expression> try_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::binary_algebraic_expression> try_binary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::variable_expression> try_variable_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::literal_int_expression> try_literal_int_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::literal_string_expression> try_literal_string_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::statement> try_statement(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::command_expression> try_command_expr(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::conditional> try_conditional(const std::span<token> code, std::vector<scripting::script_error>& errors, const std::string_view initiator = "if");
parse_result<scripting::ast::while_loop> try_while_loop(const std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::paren_expression> try_paren_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
parse_result<scripting::ast::unary_algebraic_expression> try_unary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
//////////////////////////
/// PARSER DEFINITIONS ///
//////////////////////////
parse_result<scripting::ast::command_expression> try_command_expr(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::command_expression cmd;
auto current = code;
if(current.empty()) return {std::nullopt, code};
if(not holds_alternative<symbol_t>(current.front().value)) return {std::nullopt, code};
if(get<symbol_t>(current.front().value) != symbol_t::divide) return {std::nullopt, code};
cmd.location = current.front().location;
current = current.subspan(1);
if(current.empty()) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Expected command name"});
return {std::nullopt, code};
}
if(not holds_alternative<scripting::ast::identifier>(current.front().value)) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Expected command name"});
return {std::nullopt, code};
}
cmd.name = get<scripting::ast::identifier>(current.front().value);
current = current.subspan(1);
while(
not current.empty()
and not (
holds_alternative<symbol_t>(current.front().value)
and get<symbol_t>(current.front().value) == symbol_t::new_line
)
and not (
holds_alternative<symbol_t>(current.front().value)
and get<symbol_t>(current.front().value) == symbol_t::r_paren
)
) {
auto [expr, rest] = try_expression(current, errors);
if(not expr) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Expected expression"});
return {std::nullopt, code};
}
cmd.arguments.push_back(std::make_unique<scripting::ast::expression>(std::move(expr.value())));
current = rest;
}
return {std::move(cmd), current};
}
parse_result<scripting::ast::expression> try_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::expression node;
auto current = code;
#ifdef HANDLE_EXPRESSION
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting");
#endif
#define HANDLE_EXPRESSION(type) \
node = scripting::ast::expression{ \
.location = current.front().location, \
.contents = std::make_unique<type>(std::move(expr.value())) \
}; \
current = rest;
if(auto [expr, rest] = try_binary_algebraic_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::binary_algebraic_expression)
} else if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::literal_string_expression)
} else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::literal_int_expression)
} else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression)
} else if(auto [expr, rest] = try_paren_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::paren_expression)
} else if(auto [expr, rest] = try_variable_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::variable_expression)
} else {
return {std::nullopt, code};
}
#undef HANDLE_EXPRESSION
return {.result = std::move(node), .rest = current};
}
parse_result<scripting::ast::conditional> try_conditional(const std::span<token> code, std::vector<scripting::script_error>& errors, const std::string_view initiator) {
#ifdef FAILURE
static_assert(false, "Found a macro name FAILURE, halting");
#endif
#define FAILURE {.result = std::nullopt, .rest = code}
scripting::ast::conditional result{};
if(code.empty()) return FAILURE;
auto current = code;
result.location = current.front().location;
const auto endif_found = [&]() -> bool {
if (current.empty()) return false;
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return false;
if (get<scripting::ast::identifier>(current.front().value).value != "endif") return false;
return true;
};
// chomps a new line if available, returns true if it failed
const auto MISSING_NEW_LINE = [&]() -> bool {
if (current.empty()) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"});
return true;
}
auto new_line = current.front();
if (not holds_alternative<symbol_t>(current.front().value)) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
return true;
}
if (get<symbol_t>(current.front().value) != symbol_t::new_line) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
return true;
}
current = current.subspan(1);
return false;
};
if (current.empty()) return FAILURE;
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return FAILURE;
if (get<scripting::ast::identifier>(current.front().value).value != initiator) return FAILURE;
current = current.subspan(1);
// Read the condition
auto conditional_node = try_expression(current, errors);
if (not conditional_node.result) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Expected expression in conditional"});
return FAILURE;
}
result.condition = std::make_unique<scripting::ast::expression>(std::move(conditional_node.result.value()));
current = conditional_node.rest;
if(MISSING_NEW_LINE()) return FAILURE;
if (current.empty()) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"});
return FAILURE;
}
result.on_condition = std::make_unique<scripting::ast::block>();
result.on_condition->location = current.front().location;
bool else_mode = false;
while (not endif_found()) {
const auto error_count = errors.size();
// Handles elseif sequences as nested ifs in the else block
auto nester = try_conditional(current, errors, "elseif");
if(nester.result) {
result.otherwise = std::make_unique<scripting::ast::block>();
result.otherwise->location = current.front().location;
result.otherwise->contents.push_back(scripting::ast::statement{
.location = current.front().location,
.contents = std::make_unique<scripting::ast::conditional>(std::move(nester.result.value())),
});
current = nester.rest;
return {.result = std::move(result), .rest = current};
} else if(error_count != errors.size()) {
return FAILURE;
}
// Handles code
if(auto block_contents = try_statement(current, errors); block_contents.result) {
if(not else_mode) {
result.on_condition->contents.push_back(std::move(block_contents.result.value()));
} else {
result.otherwise->contents.push_back(std::move(block_contents.result.value()));
}
current = block_contents.rest;
} else {
if(current.empty()) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"});
return FAILURE;
}
// Handles switching to else mode
if(
holds_alternative<scripting::ast::identifier>(current.front().value)
&& get<scripting::ast::identifier>(current.front().value).value == "else"
) {
auto loc = current.front().location;
current = current.subspan(1);
if(MISSING_NEW_LINE()) return FAILURE;
if(else_mode) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Repeated else in conditional"});
} else {
else_mode = true;
result.otherwise = std::make_unique<scripting::ast::block>();
result.otherwise->location = std::move(loc);
}
}else {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Unexpected expression content"});
return FAILURE;
}
}
}
// checks for endif
if(endif_found()) {
current = current.subspan(1);
if(not current.empty() && MISSING_NEW_LINE()) return FAILURE;
}
return {.result = std::move(result), .rest = current};
#undef FAILURE
}
parse_result<scripting::ast::while_loop> try_while_loop(const std::span<token> code, std::vector<scripting::script_error>& errors) {
#ifdef FAILURE
static_assert(false, "Found a macro name FAILURE, halting");
#endif
#define FAILURE {.result = std::nullopt, .rest = code}
scripting::ast::while_loop result{};
if(code.empty()) return FAILURE;
auto current = code;
result.location = current.front().location;
const auto endwhile_found = [&]() -> bool {
if (current.empty()) return false;
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return false;
if (get<scripting::ast::identifier>(current.front().value).value != "endwhile") return false;
return true;
};
// chomps a new line if available, returns true if it failed
const auto MISSING_NEW_LINE = [&]() -> bool {
if (current.empty()) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"});
return true;
}
auto new_line = current.front();
if (not holds_alternative<symbol_t>(current.front().value)) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
return true;
}
if (get<symbol_t>(current.front().value) != symbol_t::new_line) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
return true;
}
current = current.subspan(1);
return false;
};
if (current.empty()) return FAILURE;
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return FAILURE;
if (get<scripting::ast::identifier>(current.front().value).value != "while") return FAILURE;
current = current.subspan(1);
// Read the condition
auto conditional_node = try_expression(current, errors);
if (not conditional_node.result) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Expected expression in while loop"});
return FAILURE;
}
result.condition = std::make_unique<scripting::ast::expression>(std::move(conditional_node.result.value()));
current = conditional_node.rest;
if(MISSING_NEW_LINE()) return FAILURE;
if (current.empty()) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"});
return FAILURE;
}
result.on_condition = std::make_unique<scripting::ast::block>();
result.on_condition->location = current.front().location;
while (not endwhile_found()) {
const auto error_count = errors.size();
if(error_count != errors.size()) {
return FAILURE;
}
// Handles code
if(auto block_contents = try_statement(current, errors); block_contents.result) {
result.on_condition->contents.push_back(std::move(block_contents.result.value()));
current = block_contents.rest;
} else {
if(current.empty()) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"});
return FAILURE;
} else {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "Unexpected statement in block"});
return FAILURE;
}
}
}
// checks for endif
if(endwhile_found()) {
current = current.subspan(1);
if(not current.empty() && MISSING_NEW_LINE()) return FAILURE;
}
return {.result = std::move(result), .rest = current};
#undef FAILURE
}
parse_result<scripting::ast::paren_expression> try_paren_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::paren_expression paren;
auto current = code;
if(current.empty()) return {std::nullopt, code};
if(not holds_alternative<symbol_t>(current.front().value)) return {std::nullopt, code};
if(get<symbol_t>(current.front().value) != symbol_t::l_paren) return {std::nullopt, code};
paren.location = current.front().location;
current = current.subspan(1);
if(auto [cmd, cmd_rest] = try_command_expr(current, errors); cmd) {
paren.content = std::make_unique<scripting::ast::command_expression>(std::move(cmd.value()));
current = cmd_rest;
} else if(auto [expr, expr_rest] = try_expression(current, errors); expr) {
paren.content = std::make_unique<scripting::ast::expression>(std::move(expr.value()));
current = expr_rest;
} else {
errors.push_back(
scripting::script_error{.location = (current.empty() ? paren.location : current.front().location), .message = "Expected either a command or some type of expression"});
return {std::nullopt, code};
}
if(current.empty()) {
errors.push_back(
scripting::script_error{.location = paren.location, .message = "No matching parenthesis"});
return {std::nullopt, code};
}
if(not holds_alternative<symbol_t>(current.front().value) or get<symbol_t>(current.front().value) != symbol_t::r_paren) {
errors.push_back(
scripting::script_error{.location = current.front().location, .message = "No matching parenthesis, expected a closing parenthesis"});
return {std::nullopt, code};
}
current = current.subspan(1);
return {.result = std::move(paren), .rest = current};
}
parse_result<scripting::ast::statement> try_statement(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::statement node;
auto current = code;
current = trim_newline(current);
if(auto [expr, rest] = try_conditional(current, errors); expr) {
node.contents = std::make_unique<scripting::ast::conditional>(std::move(expr.value()));
node.location = current.front().location;
current = rest;
} else if(auto [expr, rest] = try_command_expr(current, errors); expr) {
node.contents = std::make_unique<scripting::ast::command_expression>(std::move(expr.value()));
node.location = current.front().location;
current = rest;
} else if(auto [expr, rest] = try_while_loop(current, errors); expr) {
node.contents = std::make_unique<scripting::ast::while_loop>(std::move(expr.value()));
node.location = current.front().location;
current = rest;
} else return {std::nullopt, code};
current = trim_newline(current);
return {.result = std::move(node), .rest = current};
}
parse_result<scripting::ast::unary_algebraic_expression> try_unary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
constexpr std::array lexer_operators = {symbol_t::binary_not, symbol_t::logical_not};
constexpr std::array ast_operators = {scripting::ast::operator_t::binary_not, scripting::ast::operator_t::logical_not};
static_assert(lexer_operators.size() == ast_operators.size());
scripting::ast::unary_algebraic_expression node;
auto current = code;
if(current.empty()) return {std::nullopt, code};
if(not holds_alternative<symbol_t>(current.front().value)) return {std::nullopt, code};
auto res = std::ranges::find(lexer_operators, get<symbol_t>(current.front().value));
if(res == lexer_operators.end()) return {std::nullopt, code};
node.location = current.front().location;
node.op = *(ast_operators.begin() + (res - lexer_operators.begin()));
current = current.subspan(1);
/// TODO: Gives the lowest priority to unaries (aka, they are applied last)
auto operand = try_expression(current, errors);
if (not operand.result) {
errors.push_back(
scripting::script_error{.location = code.front().location, .message = "Expected expression after unary operator"});
return {std::nullopt, code};
}
node.content = std::make_unique<scripting::ast::expression>(std::move(operand.result.value()));
current = operand.rest;
return {.result = std::move(node), .rest = current};
}
parse_result<scripting::ast::binary_algebraic_expression> try_binary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
// The two following arrays are the operator mappings
constexpr std::array lexer_operators = {
symbol_t::divide,
symbol_t::modulo,
symbol_t::multiply,
symbol_t::subtract,
symbol_t::add,
symbol_t::bitshift_left,
symbol_t::bitshift_right,
symbol_t::rotate_left,
symbol_t::rotate_right,
symbol_t::less_than,
symbol_t::greater_than,
symbol_t::less_or_equal_than,
symbol_t::greater_or_equal_than,
symbol_t::equals,
symbol_t::different,
symbol_t::binary_and,
symbol_t::binary_or,
symbol_t::binary_xor,
symbol_t::logical_and,
symbol_t::logical_or,
};
constexpr std::array ast_operators = {
scripting::ast::operator_t::divide,
scripting::ast::operator_t::modulo,
scripting::ast::operator_t::multiply,
scripting::ast::operator_t::subtract,
scripting::ast::operator_t::add,
scripting::ast::operator_t::bitshift_left,
scripting::ast::operator_t::bitshift_right,
scripting::ast::operator_t::rotate_left,
scripting::ast::operator_t::rotate_right,
scripting::ast::operator_t::less_than,
scripting::ast::operator_t::greater_than,
scripting::ast::operator_t::less_or_equal_than,
scripting::ast::operator_t::greater_or_equal_than,
scripting::ast::operator_t::equals,
scripting::ast::operator_t::different,
scripting::ast::operator_t::binary_and,
scripting::ast::operator_t::binary_or,
scripting::ast::operator_t::binary_xor,
scripting::ast::operator_t::logical_and,
scripting::ast::operator_t::logical_or,
};
constexpr std::array ast_precedences = {
1,
1,
1,
2,
2,
3,
3,
3,
3,
4,
4,
4,
4,
4,
4,
5,
5,
5,
6,
7,
};
static_assert(lexer_operators.size() == ast_operators.size());
static_assert(ast_precedences.size() == ast_operators.size());
scripting::ast::binary_algebraic_expression node;
auto current = code;
#ifdef HANDLE_EXPRESSION
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting");
#endif
#define HANDLE_EXPRESSION(type) \
node.lhs = std::make_unique<scripting::ast::expression>(scripting::ast::expression{ \
.location = current.front().location, \
.contents = std::make_unique<type>(std::move(expr.value())) \
}); \
current = rest;
if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::literal_string_expression)
} else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::literal_int_expression)
} else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression)
} else if(auto [expr, rest] = try_paren_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::paren_expression)
} else if(auto [expr, rest] = try_variable_expression(current, errors); expr) {
HANDLE_EXPRESSION(scripting::ast::variable_expression)
} else {
return {std::nullopt, code};
}
#undef HANDLE_EXPRESSION
if(current.empty()) {
return {std::nullopt, code};
}
if(not holds_alternative<symbol_t>(current.front().value)) {
return {std::nullopt, code};
}
auto res = std::ranges::find(lexer_operators, get<symbol_t>(current.front().value));
if(res == lexer_operators.end()) {
return {std::nullopt, code};
}
auto rhs_idx = (res - lexer_operators.begin());
node.op = *(ast_operators.begin() + rhs_idx);
node.location = current.front().location;
current = current.subspan(1);
auto operand = try_expression(current, errors);
if (not operand.result) {
errors.push_back(
scripting::script_error{.location = node.location, .message = "Expected expression after binary operator"});
return {std::nullopt, code};
}
/// Check if our "large (rhs) bite" has an operation precedence that is bigger and in that case, swap the operations around
//... We may need to do that iteratively until we risk a priority reversal. This is basically shifting the ast to the left until
//... the precedence are "heapified" and left associative
if(std::holds_alternative<std::unique_ptr<scripting::ast::binary_algebraic_expression>>(operand.result.value().contents)) {
// Must check for precedence reordering
auto& lhs = std::get<std::unique_ptr<scripting::ast::binary_algebraic_expression>>(operand.result.value().contents);
auto lhs_it = std::ranges::find(ast_operators, lhs->op);
auto lhs_idx = lhs_it - ast_operators.begin();
// >= ensures left associativity
if(ast_precedences[rhs_idx] <= ast_precedences[lhs_idx]) {
// Precedence reordering required
// https://link.excalidraw.com/l/hxPegpAmTX/6d1BYX0rfKU
node.rhs = std::move(lhs->lhs);
scripting::ast::binary_algebraic_expression reordered{
.location = operand.result.value().location,
.lhs = std::make_unique<scripting::ast::expression>(scripting::ast::expression{
.location = node.location,
.contents = std::make_unique<scripting::ast::binary_algebraic_expression>(std::move(node))
}),
.op = lhs->op,
.rhs = std::move(lhs->rhs),
};
current = operand.rest;
return {.result = std::move(reordered), .rest = current};
}
}
// No reordering required
node.rhs = std::make_unique<scripting::ast::expression>(std::move(operand.result.value()));
current = operand.rest;
return {.result = std::move(node), .rest = current};
}
parse_result<scripting::ast::variable_expression> try_variable_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::variable_expression node;
auto current = code;
if(current.empty()) return {std::nullopt, code};
if(not holds_alternative<scripting::ast::identifier>(current.front().value)) return {std::nullopt, code};
node.location = current.front().location;
node.name = get<scripting::ast::identifier>(current.front().value);
current = current.subspan(1);
return {.result = std::move(node), .rest = current};
}
parse_result<scripting::ast::literal_string_expression> try_literal_string_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::literal_string_expression node;
auto current = code;
if(current.empty()) return {std::nullopt, code};
if(not holds_alternative<std::string>(current.front().value)) return {std::nullopt, code};
node.location = current.front().location;
node.value = get<std::string>(current.front().value);
current = current.subspan(1);
return {.result = std::move(node), .rest = current};
}
parse_result<scripting::ast::literal_int_expression> try_literal_int_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::literal_int_expression node;
auto current = code;
if(current.empty()) return {std::nullopt, code};
if(not holds_alternative<int32_t>(current.front().value)) return {std::nullopt, code};
node.location = current.front().location;
node.value = get<int32_t>(current.front().value);
current = current.subspan(1);
return {.result = std::move(node), .rest = current};
}
scripting::ast::block scripting::ast::parse(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::block node;
auto current = trim_newline(code);
while(not current.empty()) {
auto pre_size = current.size();
auto [expr, rest] = try_statement(current, errors);
if(expr) {
node.contents.push_back(std::move(expr.value()));
current = rest;
} else {
bool progress = false;
while(not (next_is_newline(current) or current.empty())) {
current = current.subspan(1);
progress = true;
}
if(not progress && not errors.empty()) {
return scripting::ast::block{};
}
if(not current.empty() && current.size() == pre_size) {
errors.push_back(script_error{
.location = current.front().location,
.message = "Parsing stuck in infinite loop with no progress"
});
}
}
current = trim_newline(current);
}
if(not errors.empty()) {
return scripting::ast::block{};
}
return std::move(node);
}