#include #include #include "UserScript/parser.h" #include "UserScript.h" ////////////////////// /// PARSER HELPERS /// ////////////////////// using token = scripting::ast::token; using symbol_t = scripting::ast::symbol_t; template struct parse_result { std::optional result; std::span rest; operator bool() { return result.has_value(); } }; bool next_is_newline(std::span current) { return (not current.empty()) && (holds_alternative(current.front().value) && (get(current.front().value) == symbol_t::new_line)); } std::span trim_newline(std::span current) { while(next_is_newline(current)) { current = current.subspan(1); } return current; } /////////////////////////// /// PARSER DECLARATIONS /// /////////////////////////// parse_result try_expression(std::span code, std::vector& errors); parse_result try_binary_algebraic_expression(std::span code, std::vector& errors); parse_result try_variable_expression(std::span code, std::vector& errors); parse_result try_literal_int_expression(std::span code, std::vector& errors); parse_result try_literal_string_expression(std::span code, std::vector& errors); parse_result try_statement(std::span code, std::vector& errors); parse_result try_command_expr(std::span code, std::vector& errors); parse_result try_conditional(const std::span code, std::vector& errors, const std::string_view initiator = "if"); parse_result try_while_loop(const std::span code, std::vector& errors); parse_result try_paren_expression(std::span code, std::vector& errors); parse_result try_unary_algebraic_expression(std::span code, std::vector& errors); ////////////////////////// /// PARSER DEFINITIONS /// ////////////////////////// parse_result try_command_expr(std::span code, std::vector& errors) { scripting::ast::command_expression cmd; auto current = code; if(current.empty()) return {std::nullopt, code}; if(not holds_alternative(current.front().value)) return {std::nullopt, code}; if(get(current.front().value) != symbol_t::divide) return {std::nullopt, code}; cmd.location = current.front().location; current = current.subspan(1); if(current.empty()) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Expected command name"}); return {std::nullopt, code}; } if(not holds_alternative(current.front().value)) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Expected command name"}); return {std::nullopt, code}; } cmd.name = get(current.front().value); current = current.subspan(1); while( not current.empty() and not ( holds_alternative(current.front().value) and get(current.front().value) == symbol_t::new_line ) and not ( holds_alternative(current.front().value) and get(current.front().value) == symbol_t::r_paren ) ) { auto [expr, rest] = try_expression(current, errors); if(not expr) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Expected expression"}); return {std::nullopt, code}; } cmd.arguments.push_back(std::make_unique(std::move(expr.value()))); current = rest; } return {std::move(cmd), current}; } parse_result try_expression(std::span code, std::vector& errors) { scripting::ast::expression node; auto current = code; #ifdef HANDLE_EXPRESSION static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting"); #endif #define HANDLE_EXPRESSION(type) \ node = scripting::ast::expression{ \ .location = current.front().location, \ .contents = std::make_unique(std::move(expr.value())) \ }; \ current = rest; if(auto [expr, rest] = try_binary_algebraic_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::binary_algebraic_expression) } else if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::literal_string_expression) } else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::literal_int_expression) } else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression) } else if(auto [expr, rest] = try_paren_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::paren_expression) } else if(auto [expr, rest] = try_variable_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::variable_expression) } else { return {std::nullopt, code}; } #undef HANDLE_EXPRESSION return {.result = std::move(node), .rest = current}; } parse_result try_conditional(const std::span code, std::vector& errors, const std::string_view initiator) { #ifdef FAILURE static_assert(false, "Found a macro name FAILURE, halting"); #endif #define FAILURE {.result = std::nullopt, .rest = code} scripting::ast::conditional result{}; if(code.empty()) return FAILURE; auto current = code; result.location = current.front().location; const auto endif_found = [&]() -> bool { if (current.empty()) return false; if (not holds_alternative(current.front().value)) return false; if (get(current.front().value).value != "endif") return false; return true; }; // chomps a new line if available, returns true if it failed const auto MISSING_NEW_LINE = [&]() -> bool { if (current.empty()) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"}); return true; } auto new_line = current.front(); if (not holds_alternative(current.front().value)) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Expected new line"}); return true; } if (get(current.front().value) != symbol_t::new_line) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Expected new line"}); return true; } current = current.subspan(1); return false; }; if (current.empty()) return FAILURE; if (not holds_alternative(current.front().value)) return FAILURE; if (get(current.front().value).value != initiator) return FAILURE; current = current.subspan(1); // Read the condition auto conditional_node = try_expression(current, errors); if (not conditional_node.result) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Expected expression in conditional"}); return FAILURE; } result.condition = std::make_unique(std::move(conditional_node.result.value())); current = conditional_node.rest; if(MISSING_NEW_LINE()) return FAILURE; if (current.empty()) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"}); return FAILURE; } result.on_condition = std::make_unique(); result.on_condition->location = current.front().location; bool else_mode = false; while (not endif_found()) { const auto error_count = errors.size(); // Handles elseif sequences as nested ifs in the else block auto nester = try_conditional(current, errors, "elseif"); if(nester.result) { result.otherwise = std::make_unique(); result.otherwise->location = current.front().location; result.otherwise->contents.push_back(scripting::ast::statement{ .location = current.front().location, .contents = std::make_unique(std::move(nester.result.value())), }); current = nester.rest; return {.result = std::move(result), .rest = current}; } else if(error_count != errors.size()) { return FAILURE; } // Handles code if(auto block_contents = try_statement(current, errors); block_contents.result) { if(not else_mode) { result.on_condition->contents.push_back(std::move(block_contents.result.value())); } else { result.otherwise->contents.push_back(std::move(block_contents.result.value())); } current = block_contents.rest; } else { if(current.empty()) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"}); return FAILURE; } // Handles switching to else mode if( holds_alternative(current.front().value) && get(current.front().value).value == "else" ) { auto loc = current.front().location; current = current.subspan(1); if(MISSING_NEW_LINE()) return FAILURE; if(else_mode) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Repeated else in conditional"}); } else { else_mode = true; result.otherwise = std::make_unique(); result.otherwise->location = std::move(loc); } }else { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Unexpected expression content"}); return FAILURE; } } } // checks for endif if(endif_found()) { current = current.subspan(1); if(not current.empty() && MISSING_NEW_LINE()) return FAILURE; } return {.result = std::move(result), .rest = current}; #undef FAILURE } parse_result try_while_loop(const std::span code, std::vector& errors) { #ifdef FAILURE static_assert(false, "Found a macro name FAILURE, halting"); #endif #define FAILURE {.result = std::nullopt, .rest = code} scripting::ast::while_loop result{}; if(code.empty()) return FAILURE; auto current = code; result.location = current.front().location; const auto endwhile_found = [&]() -> bool { if (current.empty()) return false; if (not holds_alternative(current.front().value)) return false; if (get(current.front().value).value != "endwhile") return false; return true; }; // chomps a new line if available, returns true if it failed const auto MISSING_NEW_LINE = [&]() -> bool { if (current.empty()) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"}); return true; } auto new_line = current.front(); if (not holds_alternative(current.front().value)) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Expected new line"}); return true; } if (get(current.front().value) != symbol_t::new_line) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Expected new line"}); return true; } current = current.subspan(1); return false; }; if (current.empty()) return FAILURE; if (not holds_alternative(current.front().value)) return FAILURE; if (get(current.front().value).value != "while") return FAILURE; current = current.subspan(1); // Read the condition auto conditional_node = try_expression(current, errors); if (not conditional_node.result) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Expected expression in while loop"}); return FAILURE; } result.condition = std::make_unique(std::move(conditional_node.result.value())); current = conditional_node.rest; if(MISSING_NEW_LINE()) return FAILURE; if (current.empty()) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"}); return FAILURE; } result.on_condition = std::make_unique(); result.on_condition->location = current.front().location; while (not endwhile_found()) { const auto error_count = errors.size(); if(error_count != errors.size()) { return FAILURE; } // Handles code if(auto block_contents = try_statement(current, errors); block_contents.result) { result.on_condition->contents.push_back(std::move(block_contents.result.value())); current = block_contents.rest; } else { if(current.empty()) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"}); return FAILURE; } else { errors.push_back( scripting::script_error{.location = current.front().location, .message = "Unexpected statement in block"}); return FAILURE; } } } // checks for endif if(endwhile_found()) { current = current.subspan(1); if(not current.empty() && MISSING_NEW_LINE()) return FAILURE; } return {.result = std::move(result), .rest = current}; #undef FAILURE } parse_result try_paren_expression(std::span code, std::vector& errors) { scripting::ast::paren_expression paren; auto current = code; if(current.empty()) return {std::nullopt, code}; if(not holds_alternative(current.front().value)) return {std::nullopt, code}; if(get(current.front().value) != symbol_t::l_paren) return {std::nullopt, code}; paren.location = current.front().location; current = current.subspan(1); if(auto [cmd, cmd_rest] = try_command_expr(current, errors); cmd) { paren.content = std::make_unique(std::move(cmd.value())); current = cmd_rest; } else if(auto [expr, expr_rest] = try_expression(current, errors); expr) { paren.content = std::make_unique(std::move(expr.value())); current = expr_rest; } else { errors.push_back( scripting::script_error{.location = (current.empty() ? paren.location : current.front().location), .message = "Expected either a command or some type of expression"}); return {std::nullopt, code}; } if(current.empty()) { errors.push_back( scripting::script_error{.location = paren.location, .message = "No matching parenthesis"}); return {std::nullopt, code}; } if(not holds_alternative(current.front().value) or get(current.front().value) != symbol_t::r_paren) { errors.push_back( scripting::script_error{.location = current.front().location, .message = "No matching parenthesis, expected a closing parenthesis"}); return {std::nullopt, code}; } current = current.subspan(1); return {.result = std::move(paren), .rest = current}; } parse_result try_statement(std::span code, std::vector& errors) { scripting::ast::statement node; auto current = code; current = trim_newline(current); if(auto [expr, rest] = try_conditional(current, errors); expr) { node.contents = std::make_unique(std::move(expr.value())); node.location = current.front().location; current = rest; } else if(auto [expr, rest] = try_command_expr(current, errors); expr) { node.contents = std::make_unique(std::move(expr.value())); node.location = current.front().location; current = rest; } else if(auto [expr, rest] = try_while_loop(current, errors); expr) { node.contents = std::make_unique(std::move(expr.value())); node.location = current.front().location; current = rest; } else return {std::nullopt, code}; current = trim_newline(current); return {.result = std::move(node), .rest = current}; } parse_result try_unary_algebraic_expression(std::span code, std::vector& errors) { constexpr std::array lexer_operators = {symbol_t::binary_not, symbol_t::logical_not}; constexpr std::array ast_operators = {scripting::ast::operator_t::binary_not, scripting::ast::operator_t::logical_not}; static_assert(lexer_operators.size() == ast_operators.size()); scripting::ast::unary_algebraic_expression node; auto current = code; if(current.empty()) return {std::nullopt, code}; if(not holds_alternative(current.front().value)) return {std::nullopt, code}; auto res = std::ranges::find(lexer_operators, get(current.front().value)); if(res == lexer_operators.end()) return {std::nullopt, code}; node.location = current.front().location; node.op = *(ast_operators.begin() + (res - lexer_operators.begin())); current = current.subspan(1); /// TODO: Gives the lowest priority to unaries (aka, they are applied last) auto operand = try_expression(current, errors); if (not operand.result) { errors.push_back( scripting::script_error{.location = code.front().location, .message = "Expected expression after unary operator"}); return {std::nullopt, code}; } node.content = std::make_unique(std::move(operand.result.value())); current = operand.rest; return {.result = std::move(node), .rest = current}; } parse_result try_binary_algebraic_expression(std::span code, std::vector& errors) { // The two following arrays are the operator mappings constexpr std::array lexer_operators = { symbol_t::divide, symbol_t::modulo, symbol_t::multiply, symbol_t::subtract, symbol_t::add, symbol_t::bitshift_left, symbol_t::bitshift_right, symbol_t::rotate_left, symbol_t::rotate_right, symbol_t::less_than, symbol_t::greater_than, symbol_t::less_or_equal_than, symbol_t::greater_or_equal_than, symbol_t::equals, symbol_t::different, symbol_t::binary_and, symbol_t::binary_or, symbol_t::binary_xor, symbol_t::logical_and, symbol_t::logical_or, }; constexpr std::array ast_operators = { scripting::ast::operator_t::divide, scripting::ast::operator_t::modulo, scripting::ast::operator_t::multiply, scripting::ast::operator_t::subtract, scripting::ast::operator_t::add, scripting::ast::operator_t::bitshift_left, scripting::ast::operator_t::bitshift_right, scripting::ast::operator_t::rotate_left, scripting::ast::operator_t::rotate_right, scripting::ast::operator_t::less_than, scripting::ast::operator_t::greater_than, scripting::ast::operator_t::less_or_equal_than, scripting::ast::operator_t::greater_or_equal_than, scripting::ast::operator_t::equals, scripting::ast::operator_t::different, scripting::ast::operator_t::binary_and, scripting::ast::operator_t::binary_or, scripting::ast::operator_t::binary_xor, scripting::ast::operator_t::logical_and, scripting::ast::operator_t::logical_or, }; constexpr std::array ast_precedences = { 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 6, 7, }; static_assert(lexer_operators.size() == ast_operators.size()); static_assert(ast_precedences.size() == ast_operators.size()); scripting::ast::binary_algebraic_expression node; auto current = code; #ifdef HANDLE_EXPRESSION static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting"); #endif #define HANDLE_EXPRESSION(type) \ node.lhs = std::make_unique(scripting::ast::expression{ \ .location = current.front().location, \ .contents = std::make_unique(std::move(expr.value())) \ }); \ current = rest; if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::literal_string_expression) } else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::literal_int_expression) } else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression) } else if(auto [expr, rest] = try_paren_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::paren_expression) } else if(auto [expr, rest] = try_variable_expression(current, errors); expr) { HANDLE_EXPRESSION(scripting::ast::variable_expression) } else { return {std::nullopt, code}; } #undef HANDLE_EXPRESSION if(current.empty()) { return {std::nullopt, code}; } if(not holds_alternative(current.front().value)) { return {std::nullopt, code}; } auto res = std::ranges::find(lexer_operators, get(current.front().value)); if(res == lexer_operators.end()) { return {std::nullopt, code}; } auto rhs_idx = (res - lexer_operators.begin()); node.op = *(ast_operators.begin() + rhs_idx); node.location = current.front().location; current = current.subspan(1); auto operand = try_expression(current, errors); if (not operand.result) { errors.push_back( scripting::script_error{.location = node.location, .message = "Expected expression after binary operator"}); return {std::nullopt, code}; } /// Check if our "large (rhs) bite" has an operation precedence that is bigger and in that case, swap the operations around //... We may need to do that iteratively until we risk a priority reversal. This is basically shifting the ast to the left until //... the precedence are "heapified" and left associative if(std::holds_alternative>(operand.result.value().contents)) { // Must check for precedence reordering auto& lhs = std::get>(operand.result.value().contents); auto lhs_it = std::ranges::find(ast_operators, lhs->op); auto lhs_idx = lhs_it - ast_operators.begin(); // >= ensures left associativity if(ast_precedences[rhs_idx] <= ast_precedences[lhs_idx]) { // Precedence reordering required // https://link.excalidraw.com/l/hxPegpAmTX/6d1BYX0rfKU node.rhs = std::move(lhs->lhs); scripting::ast::binary_algebraic_expression reordered{ .location = operand.result.value().location, .lhs = std::make_unique(scripting::ast::expression{ .location = node.location, .contents = std::make_unique(std::move(node)) }), .op = lhs->op, .rhs = std::move(lhs->rhs), }; current = operand.rest; return {.result = std::move(reordered), .rest = current}; } } // No reordering required node.rhs = std::make_unique(std::move(operand.result.value())); current = operand.rest; return {.result = std::move(node), .rest = current}; } parse_result try_variable_expression(std::span code, std::vector& errors) { scripting::ast::variable_expression node; auto current = code; if(current.empty()) return {std::nullopt, code}; if(not holds_alternative(current.front().value)) return {std::nullopt, code}; node.location = current.front().location; node.name = get(current.front().value); current = current.subspan(1); return {.result = std::move(node), .rest = current}; } parse_result try_literal_string_expression(std::span code, std::vector& errors) { scripting::ast::literal_string_expression node; auto current = code; if(current.empty()) return {std::nullopt, code}; if(not holds_alternative(current.front().value)) return {std::nullopt, code}; node.location = current.front().location; node.value = get(current.front().value); current = current.subspan(1); return {.result = std::move(node), .rest = current}; } parse_result try_literal_int_expression(std::span code, std::vector& errors) { scripting::ast::literal_int_expression node; auto current = code; if(current.empty()) return {std::nullopt, code}; if(not holds_alternative(current.front().value)) return {std::nullopt, code}; node.location = current.front().location; node.value = get(current.front().value); current = current.subspan(1); return {.result = std::move(node), .rest = current}; } scripting::ast::block scripting::ast::parse(std::span code, std::vector& errors) { scripting::ast::block node; auto current = trim_newline(code); while(not current.empty()) { auto pre_size = current.size(); auto [expr, rest] = try_statement(current, errors); if(expr) { node.contents.push_back(std::move(expr.value())); current = rest; } else { bool progress = false; while(not (next_is_newline(current) or current.empty())) { current = current.subspan(1); progress = true; } if(not progress && not errors.empty()) { return scripting::ast::block{}; } if(not current.empty() && current.size() == pre_size) { errors.push_back(script_error{ .location = current.front().location, .message = "Parsing stuck in infinite loop with no progress" }); } } current = trim_newline(current); } if(not errors.empty()) { return scripting::ast::block{}; } return std::move(node); }