|
#include <map>
|
|
#include <optional>
|
|
#include <sstream>
|
|
#include <iostream>
|
|
#include <array>
|
|
#include <charconv>
|
|
#include <utility>
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include "UserScript/parser.h"
|
|
#include "UserScript.h"
|
|
|
|
/////////////////
|
|
/// CONSTANTS ///
|
|
/////////////////
|
|
|
|
using symbol_t = scripting::ast::symbol_t;
|
|
|
|
constexpr std::array<std::pair<std::string_view, symbol_t>, 25> operators {
|
|
std::pair<std::string_view, symbol_t>{"(", symbol_t::l_paren},
|
|
std::pair<std::string_view, symbol_t>{")", symbol_t::r_paren},
|
|
std::pair<std::string_view, symbol_t>{"!=", symbol_t::different},
|
|
std::pair<std::string_view, symbol_t>{"!", symbol_t::logical_not},
|
|
std::pair<std::string_view, symbol_t>{"~", symbol_t::binary_not},
|
|
std::pair<std::string_view, symbol_t>{"/", symbol_t::divide},
|
|
std::pair<std::string_view, symbol_t>{"%", symbol_t::modulo},
|
|
std::pair<std::string_view, symbol_t>{"*", symbol_t::multiply},
|
|
std::pair<std::string_view, symbol_t>{"-", symbol_t::subtract},
|
|
std::pair<std::string_view, symbol_t>{"+", symbol_t::add},
|
|
std::pair<std::string_view, symbol_t>{"<<<", symbol_t::rotate_left},
|
|
std::pair<std::string_view, symbol_t>{">>>", symbol_t::rotate_right},
|
|
std::pair<std::string_view, symbol_t>{"<<", symbol_t::bitshift_left},
|
|
std::pair<std::string_view, symbol_t>{">>", symbol_t::bitshift_right},
|
|
std::pair<std::string_view, symbol_t>{"<=", symbol_t::less_or_equal_than},
|
|
std::pair<std::string_view, symbol_t>{">=", symbol_t::greater_or_equal_than},
|
|
std::pair<std::string_view, symbol_t>{"<", symbol_t::less_than},
|
|
std::pair<std::string_view, symbol_t>{">", symbol_t::greater_than},
|
|
std::pair<std::string_view, symbol_t>{"==", symbol_t::equals},
|
|
std::pair<std::string_view, symbol_t>{"&&", symbol_t::logical_and},
|
|
std::pair<std::string_view, symbol_t>{"&", symbol_t::binary_and},
|
|
std::pair<std::string_view, symbol_t>{"||", symbol_t::logical_or},
|
|
std::pair<std::string_view, symbol_t>{"|", symbol_t::binary_or},
|
|
std::pair<std::string_view, symbol_t>{"^", symbol_t::binary_xor},
|
|
std::pair<std::string_view, symbol_t>{"\n", symbol_t::new_line}
|
|
};
|
|
|
|
const std::vector<std::string_view> reserved_character_sequences {
|
|
"(",
|
|
")",
|
|
"!=",
|
|
"!",
|
|
"~",
|
|
"/",
|
|
"%",
|
|
"*",
|
|
"-",
|
|
"+",
|
|
"<<<",
|
|
">>>",
|
|
"<<",
|
|
">>",
|
|
"<=",
|
|
">=",
|
|
"<",
|
|
">",
|
|
"==",
|
|
"&&",
|
|
"&",
|
|
"||",
|
|
"|",
|
|
"^",
|
|
"=",
|
|
"\n"
|
|
};
|
|
|
|
/////////////////////
|
|
/// LEXER HELPERS ///
|
|
/////////////////////
|
|
|
|
using token = scripting::ast::token;
|
|
|
|
struct lex_token_result {
|
|
token tok;
|
|
std::string_view rest;
|
|
};
|
|
|
|
struct rune_ref {
|
|
std::string_view str;
|
|
explicit operator uint32_t() const {
|
|
if(str.empty()) return 0;
|
|
if(str.size() == 1) return str[0];
|
|
auto bytes = 8 - (str.size() + 1);
|
|
uint32_t rune = static_cast<const uint8_t>(str[0]) & (1 << (bytes - 1));
|
|
for(auto c : str.substr(1)) {
|
|
rune <<= 6;
|
|
rune ^= static_cast<const uint8_t>(c) & 0b00111111;
|
|
}
|
|
return rune;
|
|
}
|
|
|
|
[[nodiscard]] bool is_space() const {
|
|
constexpr std::array<uint32_t, 19> spaces{
|
|
0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
|
|
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x2002, 0x205F, 0x3000
|
|
};
|
|
|
|
return std::find(spaces.begin(), spaces.end(), static_cast<uint32_t>(*this)) != spaces.end();
|
|
}
|
|
};
|
|
|
|
struct try_rune_result {
|
|
rune_ref rune;
|
|
std::string_view rest;
|
|
};
|
|
|
|
std::shared_ptr<scripting::code_location> get_loc(std::string_view original, std::string_view rest, std::shared_ptr<const std::string> last_line) {
|
|
// TODO: Check everything again for weird ass cases
|
|
if(original.empty()) {
|
|
return std::make_shared<scripting::code_location>(scripting::code_location{
|
|
.line_contents = std::make_shared<std::string>(),
|
|
.line_number = (int32_t)std::clamp<size_t>(1, 1, std::numeric_limits<int32_t>::max()),
|
|
.column_number = (int32_t)std::clamp<size_t>(1 + 1, 1, std::numeric_limits<int32_t>::max())
|
|
});
|
|
}
|
|
const auto before = original.substr(0, original.size() - rest.size());
|
|
const auto line_no = std::ranges::count(before, '\n') + 1;
|
|
const auto line_start = std::find(before.crbegin(), before.crend(), '\n');
|
|
const auto column_no = line_start != before.crend() ? (line_start - before.crbegin()) : before.size();
|
|
const auto back_tracked = before.size() - column_no;
|
|
const auto front_tracked = rest.empty() ? original.size() : before.size() + (std::ranges::find(rest, '\n') - rest.begin());
|
|
const std::string_view current{original.begin() + back_tracked, original.begin() + front_tracked};
|
|
|
|
if(not last_line || *last_line != current) {
|
|
last_line = std::make_shared<std::string>(current);
|
|
}
|
|
|
|
return std::make_shared<scripting::code_location>(scripting::code_location{
|
|
.line_contents = last_line,
|
|
.line_number = (int32_t)std::clamp<size_t>(line_no, 1, std::numeric_limits<int32_t>::max()),
|
|
.column_number = (int32_t)std::clamp<size_t>(column_no + 1, 1, std::numeric_limits<int32_t>::max())
|
|
});
|
|
}
|
|
|
|
////////////////////
|
|
/// LEXER PROPER ///
|
|
////////////////////
|
|
|
|
auto try_rune(std::string_view text, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<try_rune_result> {
|
|
static_assert(CHAR_BIT == 8, "Get your weird ass cpu outta here");
|
|
|
|
if(text.empty()) return std::nullopt;
|
|
|
|
if(0 == (*reinterpret_cast<const uint8_t*>(&text.front()) & 0b10000000)) {
|
|
return try_rune_result{text.substr(0, 1), text.substr(1)};
|
|
}
|
|
|
|
switch(auto bytes = std::countl_one(*reinterpret_cast<const uint8_t*>(&text.front())); bytes) {
|
|
case 0: // ASCII
|
|
{
|
|
return try_rune_result{text.substr(0, 1), text.substr(1)};
|
|
}
|
|
case 1: // Middle of sequence
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
case 7: [[fallthrough]];
|
|
case 8: // Invalid sequence start
|
|
{
|
|
return std::nullopt;
|
|
}
|
|
default: // Maybe it is valid
|
|
{
|
|
if(text.size() < bytes) { // Nope, too short to get a full rune
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
|
|
return std::nullopt;
|
|
}
|
|
auto rune = text.substr(0,bytes);
|
|
|
|
// Check if the rest of the rune is valid
|
|
if(std::ranges::any_of(rune.substr(1), [](const char& byte){ return std::countl_one(*reinterpret_cast<const uint8_t*>(&byte)) != 1;})) {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
|
|
return std::nullopt;
|
|
}
|
|
return try_rune_result{rune, text.substr(bytes)};
|
|
}
|
|
}
|
|
}
|
|
constexpr auto try_string = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
|
|
constexpr std::array<int8_t, 256> hexdigits = {
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
|
|
};
|
|
auto it = view.begin();
|
|
while (it != view.end() and std::isspace(*it)) ++it;
|
|
if(it == view.end()) return std::nullopt;
|
|
std::stringstream generated;
|
|
if(*it != '"') return std::nullopt;
|
|
std::string str;
|
|
while(true) {
|
|
++it;
|
|
if(it == view.end()) {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"});
|
|
return std::nullopt;
|
|
}
|
|
switch(*it) {
|
|
case '\\':
|
|
++it;
|
|
if(it == view.end()) {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"});
|
|
}
|
|
switch(*it) {
|
|
case '\\': generated << '\\'; break;
|
|
case 'a': generated << '\a'; break;
|
|
case 'b': generated << '\b'; break;
|
|
case 'f': generated << '\f'; break;
|
|
case 'n': generated << '\n'; break;
|
|
case 'r': generated << '\r'; break;
|
|
case 't': generated << '\t'; break;
|
|
case 'v': generated << '\v'; break;
|
|
case '\'': generated << '\''; break;
|
|
case '"': generated << '"'; break;
|
|
case '0': [[fallthrough]];
|
|
case '1': [[fallthrough]];
|
|
case '2': [[fallthrough]];
|
|
case '3': [[fallthrough]];
|
|
case '4': [[fallthrough]];
|
|
case '5': [[fallthrough]];
|
|
case '6': [[fallthrough]];
|
|
case '7':
|
|
{
|
|
char c = uint8_t(*it - '0') * 8 * 8;
|
|
if(uint8_t(*it - '0') > 8) {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
|
|
}
|
|
++it;
|
|
if(it == view.end()) return std::nullopt;
|
|
c += uint8_t(*it - '0') * 8;
|
|
if(uint8_t(*it - '0') > 8) {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
|
|
}
|
|
++it;
|
|
if(it == view.end()) return std::nullopt;
|
|
c += uint8_t(*it - '0');
|
|
if(uint8_t(*it - '0') > 8) {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
|
|
}
|
|
generated << c;
|
|
break; }
|
|
case 'x':
|
|
{
|
|
++it;
|
|
if(it == view.end()) return std::nullopt;
|
|
if(hexdigits[*it] < 0) return std::nullopt;
|
|
char c = hexdigits[*it] << 4;
|
|
++it;
|
|
if(it == view.end()) return std::nullopt;
|
|
if(hexdigits[*it] < 0) return std::nullopt;
|
|
c += hexdigits[*it];
|
|
generated << c;
|
|
break; }
|
|
default:
|
|
generated << *it;
|
|
}
|
|
break;
|
|
case '"':
|
|
str = generated.str();
|
|
return lex_token_result {
|
|
token{.location = location, .value = std::string(str)},
|
|
std::string_view(++it, view.end())
|
|
};
|
|
default:
|
|
generated << *it;
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
constexpr auto try_int32 = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
|
|
int32_t i;
|
|
auto v = std::from_chars(view.begin(), view.end(), i);
|
|
if(v.ptr == view.begin()) return std::nullopt;
|
|
auto rest = std::string_view(v.ptr, view.end());
|
|
return lex_token_result{
|
|
token{.location = std::move(location), .value = i},
|
|
rest
|
|
};
|
|
};
|
|
std::optional<lex_token_result> try_operator(std::string_view code, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) {
|
|
for(auto& [representation, type] : operators) {
|
|
if(code.starts_with(representation)) {
|
|
return lex_token_result{
|
|
token{.location = location, .value = type},
|
|
code.substr(representation.size())
|
|
};
|
|
}
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
auto try_identifier(std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
|
|
constexpr auto starts_with_reserved = [](std::string_view v) -> bool {
|
|
return std::ranges::any_of(reserved_character_sequences, [&](auto seq){
|
|
return v.starts_with(seq);
|
|
});
|
|
};
|
|
|
|
std::stringstream identifier_value;
|
|
|
|
if(view.empty()) return std::nullopt;
|
|
while(!view.empty() && !starts_with_reserved(view)) {
|
|
if(auto maybe_rune = try_rune(view, location, errors); maybe_rune) {
|
|
auto [rune, rest] = maybe_rune.value();
|
|
if(rune.is_space()) {
|
|
view = rest;
|
|
break;
|
|
}
|
|
identifier_value << rune.str;
|
|
view = rest;
|
|
} else {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
scripting::ast::identifier result {.location = location, .value = identifier_value.str()};
|
|
|
|
if(result.value.empty()) return std::nullopt;
|
|
|
|
return lex_token_result{.tok = token{.location = location, .value = result}, .rest = view};
|
|
}
|
|
|
|
std::vector<token> scripting::ast::lex(const std::string& code, std::vector<scripting::script_error>& errors) {
|
|
std::vector<token> return_value;
|
|
std::string_view current = code;
|
|
std::shared_ptr<const std::string> last_line;
|
|
|
|
while(not current.empty()) {
|
|
for(;;) {
|
|
if(current.empty()) break;
|
|
auto location = get_loc(code, current, last_line);
|
|
auto c = try_rune(current, location, errors);
|
|
if(not c.has_value()) {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 encoding detected while trimming space"});
|
|
return return_value;
|
|
} else {
|
|
if(c.value().rune.is_space()) {
|
|
current = c.value().rest;
|
|
} else break;
|
|
}
|
|
}
|
|
|
|
auto location = get_loc(code, current, last_line);
|
|
last_line = location->line_contents;
|
|
auto res = try_string(current, location, errors);
|
|
if (!res) res = try_operator(current, location, errors);
|
|
if (!res) res = try_int32(current, location, errors);
|
|
if (!res) res = try_identifier(current, location, errors);
|
|
if(res.has_value()) {
|
|
current = res.value().rest;
|
|
return_value.emplace_back(std::move(res.value().tok));
|
|
} else {
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Unknown token"});
|
|
return return_value;
|
|
}
|
|
}
|
|
|
|
return return_value;
|
|
}
|
|
|
|
//////////////////////
|
|
/// PARSER HELPERS ///
|
|
//////////////////////
|
|
|
|
template<typename T>
|
|
struct parse_result {
|
|
std::optional<T> result;
|
|
std::span<token> rest;
|
|
operator bool() { return result.has_value(); }
|
|
};
|
|
|
|
bool next_is_newline(std::span<token> current) {
|
|
return (not current.empty()) && (holds_alternative<symbol_t>(current.front().value) && (get<symbol_t>(current.front().value) == symbol_t::new_line));
|
|
}
|
|
|
|
std::span<token> trim_newline(std::span<token> current) {
|
|
while(next_is_newline(current)) {
|
|
current = current.subspan(1);
|
|
}
|
|
return current;
|
|
}
|
|
|
|
///////////////////////////
|
|
/// PARSER DECLARATIONS ///
|
|
///////////////////////////
|
|
|
|
parse_result<scripting::ast::expression> try_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::binary_algebraic_expression> try_binary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::variable_expression> try_variable_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::literal_int_expression> try_literal_int_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::literal_string_expression> try_literal_string_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::statement> try_statement(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::command_expression> try_command_expr(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::conditional> try_conditional(const std::span<token> code, std::vector<scripting::script_error>& errors, const std::string_view initiator = "if");
|
|
parse_result<scripting::ast::while_loop> try_while_loop(const std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::paren_expression> try_paren_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
parse_result<scripting::ast::unary_algebraic_expression> try_unary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors);
|
|
|
|
|
|
//////////////////////////
|
|
/// PARSER DEFINITIONS ///
|
|
//////////////////////////
|
|
|
|
parse_result<scripting::ast::command_expression> try_command_expr(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::command_expression cmd;
|
|
auto current = code;
|
|
if(current.empty()) return {std::nullopt, code};
|
|
if(not holds_alternative<symbol_t>(current.front().value)) return {std::nullopt, code};
|
|
if(get<symbol_t>(current.front().value) != symbol_t::divide) return {std::nullopt, code};
|
|
cmd.location = current.front().location;
|
|
current = current.subspan(1);
|
|
|
|
if(current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Expected command name"});
|
|
return {std::nullopt, code};
|
|
}
|
|
if(not holds_alternative<scripting::ast::identifier>(current.front().value)) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Expected command name"});
|
|
return {std::nullopt, code};
|
|
}
|
|
cmd.name = get<scripting::ast::identifier>(current.front().value);
|
|
current = current.subspan(1);
|
|
|
|
while(
|
|
not current.empty()
|
|
and not (
|
|
holds_alternative<symbol_t>(current.front().value)
|
|
and get<symbol_t>(current.front().value) == symbol_t::new_line
|
|
)
|
|
and not (
|
|
holds_alternative<symbol_t>(current.front().value)
|
|
and get<symbol_t>(current.front().value) == symbol_t::r_paren
|
|
)
|
|
) {
|
|
auto [expr, rest] = try_expression(current, errors);
|
|
|
|
if(not expr) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Expected expression"});
|
|
return {std::nullopt, code};
|
|
}
|
|
|
|
cmd.arguments.push_back(std::make_unique<scripting::ast::expression>(std::move(expr.value())));
|
|
current = rest;
|
|
}
|
|
|
|
return {std::move(cmd), current};
|
|
}
|
|
|
|
parse_result<scripting::ast::expression> try_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::expression node;
|
|
auto current = code;
|
|
|
|
#ifdef HANDLE_EXPRESSION
|
|
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting");
|
|
#endif
|
|
#define HANDLE_EXPRESSION(type) \
|
|
node = scripting::ast::expression{ \
|
|
.location = current.front().location, \
|
|
.contents = std::make_unique<type>(std::move(expr.value())) \
|
|
}; \
|
|
current = rest;
|
|
|
|
if(auto [expr, rest] = try_binary_algebraic_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::binary_algebraic_expression)
|
|
} else if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::literal_string_expression)
|
|
} else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::literal_int_expression)
|
|
} else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression)
|
|
} else if(auto [expr, rest] = try_paren_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::paren_expression)
|
|
} else if(auto [expr, rest] = try_variable_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::variable_expression)
|
|
} else {
|
|
return {std::nullopt, code};
|
|
}
|
|
#undef HANDLE_EXPRESSION
|
|
return {.result = std::move(node), .rest = current};
|
|
}
|
|
|
|
parse_result<scripting::ast::conditional> try_conditional(const std::span<token> code, std::vector<scripting::script_error>& errors, const std::string_view initiator) {
|
|
#ifdef FAILURE
|
|
static_assert(false, "Found a macro name FAILURE, halting");
|
|
#endif
|
|
#define FAILURE {.result = std::nullopt, .rest = code}
|
|
scripting::ast::conditional result{};
|
|
if(code.empty()) return FAILURE;
|
|
auto current = code;
|
|
result.location = current.front().location;
|
|
|
|
const auto endif_found = [&]() -> bool {
|
|
if (current.empty()) return false;
|
|
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return false;
|
|
if (get<scripting::ast::identifier>(current.front().value).value != "endif") return false;
|
|
return true;
|
|
};
|
|
|
|
// chomps a new line if available, returns true if it failed
|
|
const auto MISSING_NEW_LINE = [&]() -> bool {
|
|
if (current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"});
|
|
return true;
|
|
}
|
|
auto new_line = current.front();
|
|
if (not holds_alternative<symbol_t>(current.front().value)) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
|
|
return true;
|
|
}
|
|
if (get<symbol_t>(current.front().value) != symbol_t::new_line) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
|
|
return true;
|
|
}
|
|
current = current.subspan(1);
|
|
return false;
|
|
};
|
|
|
|
if (current.empty()) return FAILURE;
|
|
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return FAILURE;
|
|
if (get<scripting::ast::identifier>(current.front().value).value != initiator) return FAILURE;
|
|
current = current.subspan(1);
|
|
|
|
// Read the condition
|
|
auto conditional_node = try_expression(current, errors);
|
|
if (not conditional_node.result) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Expected expression in conditional"});
|
|
return FAILURE;
|
|
}
|
|
result.condition = std::make_unique<scripting::ast::expression>(std::move(conditional_node.result.value()));
|
|
current = conditional_node.rest;
|
|
|
|
if(MISSING_NEW_LINE()) return FAILURE;
|
|
|
|
if (current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"});
|
|
return FAILURE;
|
|
}
|
|
result.on_condition = std::make_unique<scripting::ast::block>();
|
|
result.on_condition->location = current.front().location;
|
|
|
|
bool else_mode = false;
|
|
|
|
while (not endif_found()) {
|
|
const auto error_count = errors.size();
|
|
|
|
// Handles elseif sequences as nested ifs in the else block
|
|
auto nester = try_conditional(current, errors, "elseif");
|
|
if(nester.result) {
|
|
result.otherwise = std::make_unique<scripting::ast::block>();
|
|
result.otherwise->location = current.front().location;
|
|
result.otherwise->contents.push_back(scripting::ast::statement{
|
|
.location = current.front().location,
|
|
.contents = std::make_unique<scripting::ast::conditional>(std::move(nester.result.value())),
|
|
});
|
|
current = nester.rest;
|
|
return {.result = std::move(result), .rest = current};
|
|
} else if(error_count != errors.size()) {
|
|
return FAILURE;
|
|
}
|
|
|
|
// Handles code
|
|
if(auto block_contents = try_statement(current, errors); block_contents.result) {
|
|
if(not else_mode) {
|
|
result.on_condition->contents.push_back(std::move(block_contents.result.value()));
|
|
} else {
|
|
result.otherwise->contents.push_back(std::move(block_contents.result.value()));
|
|
}
|
|
current = block_contents.rest;
|
|
} else {
|
|
if(current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"});
|
|
return FAILURE;
|
|
}
|
|
// Handles switching to else mode
|
|
if(
|
|
holds_alternative<scripting::ast::identifier>(current.front().value)
|
|
&& get<scripting::ast::identifier>(current.front().value).value == "else"
|
|
) {
|
|
auto loc = current.front().location;
|
|
current = current.subspan(1);
|
|
if(MISSING_NEW_LINE()) return FAILURE;
|
|
if(else_mode) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Repeated else in conditional"});
|
|
} else {
|
|
else_mode = true;
|
|
result.otherwise = std::make_unique<scripting::ast::block>();
|
|
result.otherwise->location = std::move(loc);
|
|
}
|
|
}else {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Unexpected expression content"});
|
|
return FAILURE;
|
|
}
|
|
}
|
|
}
|
|
|
|
// checks for endif
|
|
if(endif_found()) {
|
|
current = current.subspan(1);
|
|
if(not current.empty() && MISSING_NEW_LINE()) return FAILURE;
|
|
}
|
|
|
|
return {.result = std::move(result), .rest = current};
|
|
#undef FAILURE
|
|
}
|
|
|
|
parse_result<scripting::ast::while_loop> try_while_loop(const std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
#ifdef FAILURE
|
|
static_assert(false, "Found a macro name FAILURE, halting");
|
|
#endif
|
|
#define FAILURE {.result = std::nullopt, .rest = code}
|
|
scripting::ast::while_loop result{};
|
|
if(code.empty()) return FAILURE;
|
|
auto current = code;
|
|
result.location = current.front().location;
|
|
|
|
const auto endwhile_found = [&]() -> bool {
|
|
if (current.empty()) return false;
|
|
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return false;
|
|
if (get<scripting::ast::identifier>(current.front().value).value != "endwhile") return false;
|
|
return true;
|
|
};
|
|
|
|
// chomps a new line if available, returns true if it failed
|
|
const auto MISSING_NEW_LINE = [&]() -> bool {
|
|
if (current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"});
|
|
return true;
|
|
}
|
|
auto new_line = current.front();
|
|
if (not holds_alternative<symbol_t>(current.front().value)) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
|
|
return true;
|
|
}
|
|
if (get<symbol_t>(current.front().value) != symbol_t::new_line) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Expected new line"});
|
|
return true;
|
|
}
|
|
current = current.subspan(1);
|
|
return false;
|
|
};
|
|
|
|
if (current.empty()) return FAILURE;
|
|
if (not holds_alternative<scripting::ast::identifier>(current.front().value)) return FAILURE;
|
|
if (get<scripting::ast::identifier>(current.front().value).value != "while") return FAILURE;
|
|
current = current.subspan(1);
|
|
|
|
// Read the condition
|
|
auto conditional_node = try_expression(current, errors);
|
|
if (not conditional_node.result) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Expected expression in while loop"});
|
|
return FAILURE;
|
|
}
|
|
result.condition = std::make_unique<scripting::ast::expression>(std::move(conditional_node.result.value()));
|
|
current = conditional_node.rest;
|
|
|
|
if(MISSING_NEW_LINE()) return FAILURE;
|
|
|
|
if (current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"});
|
|
return FAILURE;
|
|
}
|
|
result.on_condition = std::make_unique<scripting::ast::block>();
|
|
result.on_condition->location = current.front().location;
|
|
|
|
while (not endwhile_found()) {
|
|
const auto error_count = errors.size();
|
|
|
|
if(error_count != errors.size()) {
|
|
return FAILURE;
|
|
}
|
|
|
|
// Handles code
|
|
if(auto block_contents = try_statement(current, errors); block_contents.result) {
|
|
result.on_condition->contents.push_back(std::move(block_contents.result.value()));
|
|
current = block_contents.rest;
|
|
} else {
|
|
if(current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"});
|
|
return FAILURE;
|
|
} else {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "Unexpected statement in block"});
|
|
return FAILURE;
|
|
}
|
|
}
|
|
}
|
|
|
|
// checks for endif
|
|
if(endwhile_found()) {
|
|
current = current.subspan(1);
|
|
if(not current.empty() && MISSING_NEW_LINE()) return FAILURE;
|
|
}
|
|
|
|
return {.result = std::move(result), .rest = current};
|
|
#undef FAILURE
|
|
}
|
|
|
|
parse_result<scripting::ast::paren_expression> try_paren_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::paren_expression paren;
|
|
auto current = code;
|
|
if(current.empty()) return {std::nullopt, code};
|
|
if(not holds_alternative<symbol_t>(current.front().value)) return {std::nullopt, code};
|
|
if(get<symbol_t>(current.front().value) != symbol_t::l_paren) return {std::nullopt, code};
|
|
paren.location = current.front().location;
|
|
current = current.subspan(1);
|
|
|
|
if(auto [cmd, cmd_rest] = try_command_expr(current, errors); cmd) {
|
|
paren.content = std::make_unique<scripting::ast::command_expression>(std::move(cmd.value()));
|
|
current = cmd_rest;
|
|
} else if(auto [expr, expr_rest] = try_expression(current, errors); expr) {
|
|
paren.content = std::make_unique<scripting::ast::expression>(std::move(expr.value()));
|
|
current = expr_rest;
|
|
} else {
|
|
errors.push_back(
|
|
scripting::script_error{.location = (current.empty() ? paren.location : current.front().location), .message = "Expected either a command or some type of expression"});
|
|
return {std::nullopt, code};
|
|
}
|
|
|
|
if(current.empty()) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = paren.location, .message = "No matching parenthesis"});
|
|
return {std::nullopt, code};
|
|
}
|
|
if(not holds_alternative<symbol_t>(current.front().value) or get<symbol_t>(current.front().value) != symbol_t::r_paren) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = current.front().location, .message = "No matching parenthesis, expected a closing parenthesis"});
|
|
return {std::nullopt, code};
|
|
}
|
|
current = current.subspan(1);
|
|
|
|
return {.result = std::move(paren), .rest = current};
|
|
}
|
|
|
|
parse_result<scripting::ast::statement> try_statement(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::statement node;
|
|
auto current = code;
|
|
current = trim_newline(current);
|
|
if(auto [expr, rest] = try_conditional(current, errors); expr) {
|
|
node.contents = std::make_unique<scripting::ast::conditional>(std::move(expr.value()));
|
|
node.location = current.front().location;
|
|
current = rest;
|
|
} else if(auto [expr, rest] = try_command_expr(current, errors); expr) {
|
|
node.contents = std::make_unique<scripting::ast::command_expression>(std::move(expr.value()));
|
|
node.location = current.front().location;
|
|
current = rest;
|
|
} else if(auto [expr, rest] = try_while_loop(current, errors); expr) {
|
|
node.contents = std::make_unique<scripting::ast::while_loop>(std::move(expr.value()));
|
|
node.location = current.front().location;
|
|
current = rest;
|
|
} else return {std::nullopt, code};
|
|
|
|
current = trim_newline(current);
|
|
|
|
return {.result = std::move(node), .rest = current};
|
|
}
|
|
|
|
parse_result<scripting::ast::unary_algebraic_expression> try_unary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
constexpr std::array lexer_operators = {symbol_t::binary_not, symbol_t::logical_not};
|
|
constexpr std::array ast_operators = {scripting::ast::operator_t::binary_not, scripting::ast::operator_t::logical_not};
|
|
static_assert(lexer_operators.size() == ast_operators.size());
|
|
|
|
scripting::ast::unary_algebraic_expression node;
|
|
auto current = code;
|
|
|
|
if(current.empty()) return {std::nullopt, code};
|
|
if(not holds_alternative<symbol_t>(current.front().value)) return {std::nullopt, code};
|
|
auto res = std::ranges::find(lexer_operators, get<symbol_t>(current.front().value));
|
|
if(res == lexer_operators.end()) return {std::nullopt, code};
|
|
node.location = current.front().location;
|
|
node.op = *(ast_operators.begin() + (res - lexer_operators.begin()));
|
|
current = current.subspan(1);
|
|
|
|
|
|
/// TODO: Gives the lowest priority to unaries (aka, they are applied last)
|
|
auto operand = try_expression(current, errors);
|
|
if (not operand.result) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = code.front().location, .message = "Expected expression after unary operator"});
|
|
return {std::nullopt, code};
|
|
}
|
|
node.content = std::make_unique<scripting::ast::expression>(std::move(operand.result.value()));
|
|
current = operand.rest;
|
|
|
|
return {.result = std::move(node), .rest = current};
|
|
}
|
|
|
|
parse_result<scripting::ast::binary_algebraic_expression> try_binary_algebraic_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
// The two following arrays are the operator mappings
|
|
|
|
constexpr std::array lexer_operators = {
|
|
symbol_t::divide,
|
|
symbol_t::modulo,
|
|
symbol_t::multiply,
|
|
symbol_t::subtract,
|
|
symbol_t::add,
|
|
symbol_t::bitshift_left,
|
|
symbol_t::bitshift_right,
|
|
symbol_t::rotate_left,
|
|
symbol_t::rotate_right,
|
|
symbol_t::less_than,
|
|
symbol_t::greater_than,
|
|
symbol_t::less_or_equal_than,
|
|
symbol_t::greater_or_equal_than,
|
|
symbol_t::equals,
|
|
symbol_t::different,
|
|
symbol_t::binary_and,
|
|
symbol_t::binary_or,
|
|
symbol_t::binary_xor,
|
|
symbol_t::logical_and,
|
|
symbol_t::logical_or,
|
|
};
|
|
constexpr std::array ast_operators = {
|
|
scripting::ast::operator_t::divide,
|
|
scripting::ast::operator_t::modulo,
|
|
scripting::ast::operator_t::multiply,
|
|
scripting::ast::operator_t::subtract,
|
|
scripting::ast::operator_t::add,
|
|
scripting::ast::operator_t::bitshift_left,
|
|
scripting::ast::operator_t::bitshift_right,
|
|
scripting::ast::operator_t::rotate_left,
|
|
scripting::ast::operator_t::rotate_right,
|
|
scripting::ast::operator_t::less_than,
|
|
scripting::ast::operator_t::greater_than,
|
|
scripting::ast::operator_t::less_or_equal_than,
|
|
scripting::ast::operator_t::greater_or_equal_than,
|
|
scripting::ast::operator_t::equals,
|
|
scripting::ast::operator_t::different,
|
|
scripting::ast::operator_t::binary_and,
|
|
scripting::ast::operator_t::binary_or,
|
|
scripting::ast::operator_t::binary_xor,
|
|
scripting::ast::operator_t::logical_and,
|
|
scripting::ast::operator_t::logical_or,
|
|
};
|
|
constexpr std::array ast_precedences = {
|
|
1,
|
|
1,
|
|
1,
|
|
2,
|
|
2,
|
|
3,
|
|
3,
|
|
3,
|
|
3,
|
|
4,
|
|
4,
|
|
4,
|
|
4,
|
|
4,
|
|
4,
|
|
5,
|
|
5,
|
|
5,
|
|
6,
|
|
7,
|
|
};
|
|
static_assert(lexer_operators.size() == ast_operators.size());
|
|
static_assert(ast_precedences.size() == ast_operators.size());
|
|
|
|
scripting::ast::binary_algebraic_expression node;
|
|
auto current = code;
|
|
|
|
#ifdef HANDLE_EXPRESSION
|
|
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting");
|
|
#endif
|
|
#define HANDLE_EXPRESSION(type) \
|
|
node.lhs = std::make_unique<scripting::ast::expression>(scripting::ast::expression{ \
|
|
.location = current.front().location, \
|
|
.contents = std::make_unique<type>(std::move(expr.value())) \
|
|
}); \
|
|
current = rest;
|
|
|
|
if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::literal_string_expression)
|
|
} else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::literal_int_expression)
|
|
} else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression)
|
|
} else if(auto [expr, rest] = try_paren_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::paren_expression)
|
|
} else if(auto [expr, rest] = try_variable_expression(current, errors); expr) {
|
|
HANDLE_EXPRESSION(scripting::ast::variable_expression)
|
|
} else {
|
|
return {std::nullopt, code};
|
|
}
|
|
#undef HANDLE_EXPRESSION
|
|
|
|
if(current.empty()) {
|
|
return {std::nullopt, code};
|
|
}
|
|
if(not holds_alternative<symbol_t>(current.front().value)) {
|
|
return {std::nullopt, code};
|
|
}
|
|
auto res = std::ranges::find(lexer_operators, get<symbol_t>(current.front().value));
|
|
if(res == lexer_operators.end()) {
|
|
return {std::nullopt, code};
|
|
}
|
|
auto rhs_idx = (res - lexer_operators.begin());
|
|
node.op = *(ast_operators.begin() + rhs_idx);
|
|
node.location = current.front().location;
|
|
current = current.subspan(1);
|
|
|
|
auto operand = try_expression(current, errors);
|
|
if (not operand.result) {
|
|
errors.push_back(
|
|
scripting::script_error{.location = node.location, .message = "Expected expression after binary operator"});
|
|
return {std::nullopt, code};
|
|
}
|
|
/// Check if our "large (rhs) bite" has an operation precedence that is bigger and in that case, swap the operations around
|
|
//... We may need to do that iteratively until we risk a priority reversal. This is basically shifting the ast to the left until
|
|
//... the precedence are "heapified" and left associative
|
|
if(std::holds_alternative<std::unique_ptr<scripting::ast::binary_algebraic_expression>>(operand.result.value().contents)) {
|
|
// Must check for precedence reordering
|
|
auto& lhs = std::get<std::unique_ptr<scripting::ast::binary_algebraic_expression>>(operand.result.value().contents);
|
|
auto lhs_it = std::ranges::find(ast_operators, lhs->op);
|
|
auto lhs_idx = lhs_it - ast_operators.begin();
|
|
// >= ensures left associativity
|
|
if(ast_precedences[rhs_idx] <= ast_precedences[lhs_idx]) {
|
|
// Precedence reordering required
|
|
// https://link.excalidraw.com/l/hxPegpAmTX/6d1BYX0rfKU
|
|
node.rhs = std::move(lhs->lhs);
|
|
scripting::ast::binary_algebraic_expression reordered{
|
|
.location = operand.result.value().location,
|
|
.lhs = std::make_unique<scripting::ast::expression>(scripting::ast::expression{
|
|
.location = node.location,
|
|
.contents = std::make_unique<scripting::ast::binary_algebraic_expression>(std::move(node))
|
|
}),
|
|
.op = lhs->op,
|
|
.rhs = std::move(lhs->rhs),
|
|
};
|
|
current = operand.rest;
|
|
return {.result = std::move(reordered), .rest = current};
|
|
}
|
|
}
|
|
// No reordering required
|
|
node.rhs = std::make_unique<scripting::ast::expression>(std::move(operand.result.value()));
|
|
current = operand.rest;
|
|
|
|
return {.result = std::move(node), .rest = current};
|
|
}
|
|
|
|
parse_result<scripting::ast::variable_expression> try_variable_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::variable_expression node;
|
|
auto current = code;
|
|
|
|
if(current.empty()) return {std::nullopt, code};
|
|
if(not holds_alternative<scripting::ast::identifier>(current.front().value)) return {std::nullopt, code};
|
|
node.location = current.front().location;
|
|
node.name = get<scripting::ast::identifier>(current.front().value);
|
|
current = current.subspan(1);
|
|
|
|
return {.result = std::move(node), .rest = current};
|
|
}
|
|
|
|
parse_result<scripting::ast::literal_string_expression> try_literal_string_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::literal_string_expression node;
|
|
auto current = code;
|
|
|
|
if(current.empty()) return {std::nullopt, code};
|
|
if(not holds_alternative<std::string>(current.front().value)) return {std::nullopt, code};
|
|
node.location = current.front().location;
|
|
node.value = get<std::string>(current.front().value);
|
|
current = current.subspan(1);
|
|
|
|
return {.result = std::move(node), .rest = current};
|
|
}
|
|
|
|
parse_result<scripting::ast::literal_int_expression> try_literal_int_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::literal_int_expression node;
|
|
auto current = code;
|
|
|
|
if(current.empty()) return {std::nullopt, code};
|
|
if(not holds_alternative<int32_t>(current.front().value)) return {std::nullopt, code};
|
|
node.location = current.front().location;
|
|
node.value = get<int32_t>(current.front().value);
|
|
current = current.subspan(1);
|
|
|
|
return {.result = std::move(node), .rest = current};
|
|
}
|
|
|
|
scripting::ast::block scripting::ast::parse(std::span<token> code, std::vector<scripting::script_error>& errors) {
|
|
scripting::ast::block node;
|
|
auto current = trim_newline(code);
|
|
|
|
while(not current.empty()) {
|
|
auto pre_size = current.size();
|
|
auto [expr, rest] = try_statement(current, errors);
|
|
if(expr) {
|
|
node.contents.push_back(std::move(expr.value()));
|
|
current = rest;
|
|
} else {
|
|
bool progress = false;
|
|
while(not (next_is_newline(current) or current.empty())) {
|
|
current = current.subspan(1);
|
|
progress = true;
|
|
}
|
|
if(not progress && not errors.empty()) {
|
|
return scripting::ast::block{};
|
|
}
|
|
if(not current.empty() && current.size() == pre_size) {
|
|
errors.push_back(script_error{
|
|
.location = current.front().location,
|
|
.message = "Parsing stuck in infinite loop with no progress"
|
|
});
|
|
}
|
|
}
|
|
current = trim_newline(current);
|
|
}
|
|
if(not errors.empty()) {
|
|
return scripting::ast::block{};
|
|
}
|
|
|
|
return std::move(node);
|
|
}
|