|
|
@ -10,379 +10,13 @@ |
|
|
|
#include "UserScript/parser.h"
|
|
|
|
#include "UserScript.h"
|
|
|
|
|
|
|
|
/////////////////
|
|
|
|
/// CONSTANTS ///
|
|
|
|
/////////////////
|
|
|
|
|
|
|
|
using symbol_t = scripting::ast::symbol_t; |
|
|
|
|
|
|
|
constexpr std::array<std::pair<std::string_view, symbol_t>, 25> operators { |
|
|
|
std::pair<std::string_view, symbol_t>{"(", symbol_t::l_paren}, |
|
|
|
std::pair<std::string_view, symbol_t>{")", symbol_t::r_paren}, |
|
|
|
std::pair<std::string_view, symbol_t>{"!=", symbol_t::different}, |
|
|
|
std::pair<std::string_view, symbol_t>{"!", symbol_t::logical_not}, |
|
|
|
std::pair<std::string_view, symbol_t>{"~", symbol_t::binary_not}, |
|
|
|
std::pair<std::string_view, symbol_t>{"/", symbol_t::divide}, |
|
|
|
std::pair<std::string_view, symbol_t>{"%", symbol_t::modulo}, |
|
|
|
std::pair<std::string_view, symbol_t>{"*", symbol_t::multiply}, |
|
|
|
std::pair<std::string_view, symbol_t>{"-", symbol_t::subtract}, |
|
|
|
std::pair<std::string_view, symbol_t>{"+", symbol_t::add}, |
|
|
|
std::pair<std::string_view, symbol_t>{"<<<", symbol_t::rotate_left}, |
|
|
|
std::pair<std::string_view, symbol_t>{">>>", symbol_t::rotate_right}, |
|
|
|
std::pair<std::string_view, symbol_t>{"<<", symbol_t::bitshift_left}, |
|
|
|
std::pair<std::string_view, symbol_t>{">>", symbol_t::bitshift_right}, |
|
|
|
std::pair<std::string_view, symbol_t>{"<=", symbol_t::less_or_equal_than}, |
|
|
|
std::pair<std::string_view, symbol_t>{">=", symbol_t::greater_or_equal_than}, |
|
|
|
std::pair<std::string_view, symbol_t>{"<", symbol_t::less_than}, |
|
|
|
std::pair<std::string_view, symbol_t>{">", symbol_t::greater_than}, |
|
|
|
std::pair<std::string_view, symbol_t>{"==", symbol_t::equals}, |
|
|
|
std::pair<std::string_view, symbol_t>{"&&", symbol_t::logical_and}, |
|
|
|
std::pair<std::string_view, symbol_t>{"&", symbol_t::binary_and}, |
|
|
|
std::pair<std::string_view, symbol_t>{"||", symbol_t::logical_or}, |
|
|
|
std::pair<std::string_view, symbol_t>{"|", symbol_t::binary_or}, |
|
|
|
std::pair<std::string_view, symbol_t>{"^", symbol_t::binary_xor}, |
|
|
|
std::pair<std::string_view, symbol_t>{"\n", symbol_t::new_line} |
|
|
|
}; |
|
|
|
|
|
|
|
const std::vector<std::string_view> reserved_character_sequences { |
|
|
|
"(", |
|
|
|
")", |
|
|
|
"!=", |
|
|
|
"!", |
|
|
|
"~", |
|
|
|
"/", |
|
|
|
"%", |
|
|
|
"*", |
|
|
|
"-", |
|
|
|
"+", |
|
|
|
"<<<", |
|
|
|
">>>", |
|
|
|
"<<", |
|
|
|
">>", |
|
|
|
"<=", |
|
|
|
">=", |
|
|
|
"<", |
|
|
|
">", |
|
|
|
"==", |
|
|
|
"&&", |
|
|
|
"&", |
|
|
|
"||", |
|
|
|
"|", |
|
|
|
"^", |
|
|
|
"=", |
|
|
|
"\n" |
|
|
|
}; |
|
|
|
|
|
|
|
/////////////////////
|
|
|
|
/// LEXER HELPERS ///
|
|
|
|
/////////////////////
|
|
|
|
|
|
|
|
using token = scripting::ast::token; |
|
|
|
|
|
|
|
struct lex_token_result { |
|
|
|
token tok; |
|
|
|
std::string_view rest; |
|
|
|
}; |
|
|
|
|
|
|
|
struct rune_ref { |
|
|
|
std::string_view str; |
|
|
|
explicit operator uint32_t() const { |
|
|
|
if(str.empty()) return 0; |
|
|
|
if(str.size() == 1) return str[0]; |
|
|
|
auto bytes = 8 - (str.size() + 1); |
|
|
|
uint32_t rune = static_cast<const uint8_t>(str[0]) & (1 << (bytes - 1)); |
|
|
|
for(auto c : str.substr(1)) { |
|
|
|
rune <<= 6; |
|
|
|
rune ^= static_cast<const uint8_t>(c) & 0b00111111; |
|
|
|
} |
|
|
|
return rune; |
|
|
|
} |
|
|
|
|
|
|
|
[[nodiscard]] bool is_space() const { |
|
|
|
constexpr std::array<uint32_t, 19> spaces{ |
|
|
|
0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, |
|
|
|
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x2002, 0x205F, 0x3000 |
|
|
|
}; |
|
|
|
|
|
|
|
return std::find(spaces.begin(), spaces.end(), static_cast<uint32_t>(*this)) != spaces.end(); |
|
|
|
} |
|
|
|
}; |
|
|
|
|
|
|
|
struct try_rune_result { |
|
|
|
rune_ref rune; |
|
|
|
std::string_view rest; |
|
|
|
}; |
|
|
|
|
|
|
|
std::shared_ptr<scripting::code_location> get_loc(std::string_view original, std::string_view rest, std::shared_ptr<const std::string> last_line) { |
|
|
|
// TODO: Check everything again for weird ass cases
|
|
|
|
if(original.empty()) { |
|
|
|
return std::make_shared<scripting::code_location>(scripting::code_location{ |
|
|
|
.line_contents = std::make_shared<std::string>(), |
|
|
|
.line_number = (int32_t)std::clamp<size_t>(1, 1, std::numeric_limits<int32_t>::max()), |
|
|
|
.column_number = (int32_t)std::clamp<size_t>(1 + 1, 1, std::numeric_limits<int32_t>::max()) |
|
|
|
}); |
|
|
|
} |
|
|
|
const auto before = original.substr(0, original.size() - rest.size()); |
|
|
|
const auto line_no = std::ranges::count(before, '\n') + 1; |
|
|
|
const auto line_start = std::find(before.crbegin(), before.crend(), '\n'); |
|
|
|
const auto column_no = line_start != before.crend() ? (line_start - before.crbegin()) : before.size(); |
|
|
|
const auto back_tracked = before.size() - column_no; |
|
|
|
const auto front_tracked = rest.empty() ? original.size() : before.size() + (std::ranges::find(rest, '\n') - rest.begin()); |
|
|
|
const std::string_view current{original.begin() + back_tracked, original.begin() + front_tracked}; |
|
|
|
|
|
|
|
if(not last_line || *last_line != current) { |
|
|
|
last_line = std::make_shared<std::string>(current); |
|
|
|
} |
|
|
|
|
|
|
|
return std::make_shared<scripting::code_location>(scripting::code_location{ |
|
|
|
.line_contents = last_line, |
|
|
|
.line_number = (int32_t)std::clamp<size_t>(line_no, 1, std::numeric_limits<int32_t>::max()), |
|
|
|
.column_number = (int32_t)std::clamp<size_t>(column_no + 1, 1, std::numeric_limits<int32_t>::max()) |
|
|
|
}); |
|
|
|
} |
|
|
|
|
|
|
|
////////////////////
|
|
|
|
/// LEXER PROPER ///
|
|
|
|
////////////////////
|
|
|
|
|
|
|
|
auto try_rune(std::string_view text, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<try_rune_result> { |
|
|
|
static_assert(CHAR_BIT == 8, "Get your weird ass cpu outta here"); |
|
|
|
|
|
|
|
if(text.empty()) return std::nullopt; |
|
|
|
|
|
|
|
if(0 == (*reinterpret_cast<const uint8_t*>(&text.front()) & 0b10000000)) { |
|
|
|
return try_rune_result{text.substr(0, 1), text.substr(1)}; |
|
|
|
} |
|
|
|
|
|
|
|
switch(auto bytes = std::countl_one(*reinterpret_cast<const uint8_t*>(&text.front())); bytes) { |
|
|
|
case 0: // ASCII
|
|
|
|
{ |
|
|
|
return try_rune_result{text.substr(0, 1), text.substr(1)}; |
|
|
|
} |
|
|
|
case 1: // Middle of sequence
|
|
|
|
{ |
|
|
|
return std::nullopt; |
|
|
|
} |
|
|
|
case 7: [[fallthrough]]; |
|
|
|
case 8: // Invalid sequence start
|
|
|
|
{ |
|
|
|
return std::nullopt; |
|
|
|
} |
|
|
|
default: // Maybe it is valid
|
|
|
|
{ |
|
|
|
if(text.size() < bytes) { // Nope, too short to get a full rune
|
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"}); |
|
|
|
return std::nullopt; |
|
|
|
} |
|
|
|
auto rune = text.substr(0,bytes); |
|
|
|
|
|
|
|
// Check if the rest of the rune is valid
|
|
|
|
if(std::ranges::any_of(rune.substr(1), [](const char& byte){ return std::countl_one(*reinterpret_cast<const uint8_t*>(&byte)) != 1;})) { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"}); |
|
|
|
return std::nullopt; |
|
|
|
} |
|
|
|
return try_rune_result{rune, text.substr(bytes)}; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
constexpr auto try_string = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> { |
|
|
|
constexpr std::array<int8_t, 256> hexdigits = { |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 |
|
|
|
}; |
|
|
|
auto it = view.begin(); |
|
|
|
while (it != view.end() and std::isspace(*it)) ++it; |
|
|
|
if(it == view.end()) return std::nullopt; |
|
|
|
std::stringstream generated; |
|
|
|
if(*it != '"') return std::nullopt; |
|
|
|
std::string str; |
|
|
|
while(true) { |
|
|
|
++it; |
|
|
|
if(it == view.end()) { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"}); |
|
|
|
return std::nullopt; |
|
|
|
} |
|
|
|
switch(*it) { |
|
|
|
case '\\': |
|
|
|
++it; |
|
|
|
if(it == view.end()) { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"}); |
|
|
|
} |
|
|
|
switch(*it) { |
|
|
|
case '\\': generated << '\\'; break; |
|
|
|
case 'a': generated << '\a'; break; |
|
|
|
case 'b': generated << '\b'; break; |
|
|
|
case 'f': generated << '\f'; break; |
|
|
|
case 'n': generated << '\n'; break; |
|
|
|
case 'r': generated << '\r'; break; |
|
|
|
case 't': generated << '\t'; break; |
|
|
|
case 'v': generated << '\v'; break; |
|
|
|
case '\'': generated << '\''; break; |
|
|
|
case '"': generated << '"'; break; |
|
|
|
case '0': [[fallthrough]]; |
|
|
|
case '1': [[fallthrough]]; |
|
|
|
case '2': [[fallthrough]]; |
|
|
|
case '3': [[fallthrough]]; |
|
|
|
case '4': [[fallthrough]]; |
|
|
|
case '5': [[fallthrough]]; |
|
|
|
case '6': [[fallthrough]]; |
|
|
|
case '7': |
|
|
|
{ |
|
|
|
char c = uint8_t(*it - '0') * 8 * 8; |
|
|
|
if(uint8_t(*it - '0') > 8) { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"}); |
|
|
|
} |
|
|
|
++it; |
|
|
|
if(it == view.end()) return std::nullopt; |
|
|
|
c += uint8_t(*it - '0') * 8; |
|
|
|
if(uint8_t(*it - '0') > 8) { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"}); |
|
|
|
} |
|
|
|
++it; |
|
|
|
if(it == view.end()) return std::nullopt; |
|
|
|
c += uint8_t(*it - '0'); |
|
|
|
if(uint8_t(*it - '0') > 8) { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"}); |
|
|
|
} |
|
|
|
generated << c; |
|
|
|
break; } |
|
|
|
case 'x': |
|
|
|
{ |
|
|
|
++it; |
|
|
|
if(it == view.end()) return std::nullopt; |
|
|
|
if(hexdigits[*it] < 0) return std::nullopt; |
|
|
|
char c = hexdigits[*it] << 4; |
|
|
|
++it; |
|
|
|
if(it == view.end()) return std::nullopt; |
|
|
|
if(hexdigits[*it] < 0) return std::nullopt; |
|
|
|
c += hexdigits[*it]; |
|
|
|
generated << c; |
|
|
|
break; } |
|
|
|
default: |
|
|
|
generated << *it; |
|
|
|
} |
|
|
|
break; |
|
|
|
case '"': |
|
|
|
str = generated.str(); |
|
|
|
return lex_token_result { |
|
|
|
token{.location = location, .value = std::string(str)}, |
|
|
|
std::string_view(++it, view.end()) |
|
|
|
}; |
|
|
|
default: |
|
|
|
generated << *it; |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
}; |
|
|
|
constexpr auto try_int32 = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> { |
|
|
|
int32_t i; |
|
|
|
auto v = std::from_chars(view.begin(), view.end(), i); |
|
|
|
if(v.ptr == view.begin()) return std::nullopt; |
|
|
|
auto rest = std::string_view(v.ptr, view.end()); |
|
|
|
return lex_token_result{ |
|
|
|
token{.location = std::move(location), .value = i}, |
|
|
|
rest |
|
|
|
}; |
|
|
|
}; |
|
|
|
std::optional<lex_token_result> try_operator(std::string_view code, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) { |
|
|
|
for(auto& [representation, type] : operators) { |
|
|
|
if(code.starts_with(representation)) { |
|
|
|
return lex_token_result{ |
|
|
|
token{.location = location, .value = type}, |
|
|
|
code.substr(representation.size()) |
|
|
|
}; |
|
|
|
} |
|
|
|
} |
|
|
|
return std::nullopt; |
|
|
|
} |
|
|
|
auto try_identifier(std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> { |
|
|
|
constexpr auto starts_with_reserved = [](std::string_view v) -> bool { |
|
|
|
return std::ranges::any_of(reserved_character_sequences, [&](auto seq){ |
|
|
|
return v.starts_with(seq); |
|
|
|
}); |
|
|
|
}; |
|
|
|
|
|
|
|
std::stringstream identifier_value; |
|
|
|
|
|
|
|
if(view.empty()) return std::nullopt; |
|
|
|
while(!view.empty() && !starts_with_reserved(view)) { |
|
|
|
if(auto maybe_rune = try_rune(view, location, errors); maybe_rune) { |
|
|
|
auto [rune, rest] = maybe_rune.value(); |
|
|
|
if(rune.is_space()) { |
|
|
|
view = rest; |
|
|
|
break; |
|
|
|
} |
|
|
|
identifier_value << rune.str; |
|
|
|
view = rest; |
|
|
|
} else { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"}); |
|
|
|
return std::nullopt; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
scripting::ast::identifier result {.location = location, .value = identifier_value.str()}; |
|
|
|
|
|
|
|
if(result.value.empty()) return std::nullopt; |
|
|
|
|
|
|
|
return lex_token_result{.tok = token{.location = location, .value = result}, .rest = view}; |
|
|
|
} |
|
|
|
|
|
|
|
std::vector<token> scripting::ast::lex(const std::string& code, std::vector<scripting::script_error>& errors) { |
|
|
|
std::vector<token> return_value; |
|
|
|
std::string_view current = code; |
|
|
|
std::shared_ptr<const std::string> last_line; |
|
|
|
|
|
|
|
while(not current.empty()) { |
|
|
|
for(;;) { |
|
|
|
if(current.empty()) break; |
|
|
|
auto location = get_loc(code, current, last_line); |
|
|
|
auto c = try_rune(current, location, errors); |
|
|
|
if(not c.has_value()) { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 encoding detected while trimming space"}); |
|
|
|
return return_value; |
|
|
|
} else { |
|
|
|
if(c.value().rune.is_space()) { |
|
|
|
current = c.value().rest; |
|
|
|
} else break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
auto location = get_loc(code, current, last_line); |
|
|
|
last_line = location->line_contents; |
|
|
|
auto res = try_string(current, location, errors); |
|
|
|
if (!res) res = try_operator(current, location, errors); |
|
|
|
if (!res) res = try_int32(current, location, errors); |
|
|
|
if (!res) res = try_identifier(current, location, errors); |
|
|
|
if(res.has_value()) { |
|
|
|
current = res.value().rest; |
|
|
|
return_value.emplace_back(std::move(res.value().tok)); |
|
|
|
} else { |
|
|
|
errors.push_back(scripting::script_error{.location = location, .message = "Unknown token"}); |
|
|
|
return return_value; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return return_value; |
|
|
|
} |
|
|
|
|
|
|
|
//////////////////////
|
|
|
|
/// PARSER HELPERS ///
|
|
|
|
//////////////////////
|
|
|
|
|
|
|
|
using token = scripting::ast::token; |
|
|
|
using symbol_t = scripting::ast::symbol_t; |
|
|
|
|
|
|
|
template<typename T> |
|
|
|
struct parse_result { |
|
|
|
std::optional<T> result; |
|
|
@ -454,7 +88,7 @@ parse_result try_command_expr(std::span |
|
|
|
holds_alternative<symbol_t>(current.front().value) |
|
|
|
and get<symbol_t>(current.front().value) == symbol_t::r_paren |
|
|
|
) |
|
|
|
) { |
|
|
|
) { |
|
|
|
auto [expr, rest] = try_expression(current, errors); |
|
|
|
|
|
|
|
if(not expr) { |
|
|
@ -473,7 +107,7 @@ parse_result try_command_expr(std::span |
|
|
|
parse_result<scripting::ast::expression> try_expression(std::span<token> code, std::vector<scripting::script_error>& errors) { |
|
|
|
scripting::ast::expression node; |
|
|
|
auto current = code; |
|
|
|
|
|
|
|
|
|
|
|
#ifdef HANDLE_EXPRESSION
|
|
|
|
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting"); |
|
|
|
#endif
|
|
|
@ -896,7 +530,7 @@ parse_result try_binary_algebraic_e |
|
|
|
|
|
|
|
scripting::ast::binary_algebraic_expression node; |
|
|
|
auto current = code; |
|
|
|
|
|
|
|
|
|
|
|
#ifdef HANDLE_EXPRESSION
|
|
|
|
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting"); |
|
|
|
#endif
|