From ac7cd9cc0d110decbdc4400ee914e433f91efdac Mon Sep 17 00:00:00 2001 From: Ludovic 'Archivist' Lagouardette Date: Wed, 12 Jul 2023 17:32:33 +0200 Subject: [PATCH] Initial commit after splitting --- .gitignore | 4 + .idea/.gitignore | 8 + .idea/UserScript.iml | 2 + .idea/misc.xml | 4 + .idea/modules.xml | 8 + .idea/vcs.xml | 6 + CMakeLists.txt | 48 ++ include/UserScript.h | 59 ++ include/UserScript/parser.h | 169 +++++ script_exe/main.cpp | 347 ++++++++++ src/interpreter.cpp | 1186 +++++++++++++++++++++++++++++++++++ src/lex_parse.cpp | 1051 +++++++++++++++++++++++++++++++ tests/lexer_test.cpp | 118 ++++ tests/parser_test.cpp | 168 +++++ tests/scripts/001.results | 13 + tests/scripts/001.script | 18 + tests/scripts/002.results | 1 + tests/scripts/002.script | 17 + tests/scripts/003.results | 10 + tests/scripts/003.script | 5 + tests/scripts/004.results | 4 + tests/scripts/004.script | 5 + tests/scripts/005.results | 4 + tests/scripts/005.script | 5 + tests/scripts/testfile.test | 16 + 25 files changed, 3276 insertions(+) create mode 100644 .idea/.gitignore create mode 100644 .idea/UserScript.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml create mode 100644 CMakeLists.txt create mode 100644 include/UserScript.h create mode 100644 include/UserScript/parser.h create mode 100644 script_exe/main.cpp create mode 100644 src/interpreter.cpp create mode 100644 src/lex_parse.cpp create mode 100644 tests/lexer_test.cpp create mode 100644 tests/parser_test.cpp create mode 100644 tests/scripts/001.results create mode 100644 tests/scripts/001.script create mode 100644 tests/scripts/002.results create mode 100644 tests/scripts/002.script create mode 100644 tests/scripts/003.results create mode 100644 tests/scripts/003.script create mode 100644 tests/scripts/004.results create mode 100644 tests/scripts/004.script create mode 100644 tests/scripts/005.results create mode 100644 tests/scripts/005.script create mode 100644 tests/scripts/testfile.test diff --git a/.gitignore b/.gitignore index 12404dd..a66640b 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,7 @@ compile_commands.json CTestTestfile.cmake _deps +cmake-build-debug/ +cmake-build-release/ +cmake-build-minsizerel/ +cmake-build-relwithdebinfo/ \ No newline at end of file diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/UserScript.iml b/.idea/UserScript.iml new file mode 100644 index 0000000..6d70257 --- /dev/null +++ b/.idea/UserScript.iml @@ -0,0 +1,2 @@ + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..f1c67df --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..33e7d3d --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..c8397c9 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..d410da5 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,48 @@ +cmake_minimum_required(VERSION 3.24) +project(UserScript) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_VERBOSE_MAKEFILE ON) +set(FETCHCONTENT_QUIET OFF) +set(CATCH_CONFIG_DISABLE_EXCEPTIONS ON) +Include(FetchContent) + +FetchContent_Declare( + Catch2 + GIT_REPOSITORY https://github.com/catchorg/Catch2.git + GIT_TAG v3.3.2 +) + +FetchContent_MakeAvailable(Catch2) + +enable_testing() +include(CTest) +include(Catch) + +add_library(UserScript STATIC + src/interpreter.cpp + src/lex_parse.cpp) + +add_executable(ushell script_exe/main.cpp) +target_link_libraries(ushell PUBLIC UserScript) +include_directories(include) + +add_executable(tests tests/lexer_test.cpp tests/parser_test.cpp) +target_link_libraries(tests PUBLIC UserScript Catch2::Catch2WithMain) + +catch_discover_tests(tests) + +function(add_script_test [testname filename resultname]) + message("Added test: ${ARGV0}") + add_test( + NAME "${ARGV0}" + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMAND $ "compare" "${ARGV1}" "${ARGV2}" + ) +endfunction() + +add_script_test("Scripting 001: Operators" tests/scripts/001.script tests/scripts/001.results) +add_script_test("Scripting 002: Statements and Conditionals" tests/scripts/002.script tests/scripts/002.results) +add_script_test("Scripting 003: While loops" tests/scripts/003.script tests/scripts/003.results) +add_script_test("Scripting 004: While loops with bad terminator" tests/scripts/004.script tests/scripts/004.results) +add_script_test("Scripting 005: If statements with bad terminator" tests/scripts/005.script tests/scripts/005.results) diff --git a/include/UserScript.h b/include/UserScript.h new file mode 100644 index 0000000..b6bdb7d --- /dev/null +++ b/include/UserScript.h @@ -0,0 +1,59 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace scripting { + struct null {}; + struct array; + + using script_value = std::variant; + struct script_variable { + std::string name; + }; + + struct code_location { + std::shared_ptr line_contents; + int32_t line_number; + int32_t column_number; + }; + + struct script_error { + std::shared_ptr location; + std::string message; + }; + + struct array { + std::vector value; + operator std::vector&() { + return value; + } + }; + + using argument = std::variant; + + class UserScript; + + struct function_impl { + virtual std::optional apply(UserScript* self, std::vector, std::optional&) = 0; + virtual ~function_impl() = default; + }; + + using function = std::unique_ptr; + + class UserScript { + public: + virtual std::optional> getValue(const std::string& name) = 0; + virtual bool setValue(const std::string& name, script_value value) = 0; + virtual void registerFunction(std::string name, function fn) = 0; + virtual script_value resolve(const std::string& name) = 0; + virtual std::variant> executeAtOnce(std::string code) = 0; + virtual std::vector prepare(std::string code) = 0; + virtual std::optional stepOnce() = 0; + virtual ~UserScript() = default; + }; + + std::unique_ptr prepare_interpreter(const std::string& code); +} \ No newline at end of file diff --git a/include/UserScript/parser.h b/include/UserScript/parser.h new file mode 100644 index 0000000..127f6fe --- /dev/null +++ b/include/UserScript/parser.h @@ -0,0 +1,169 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace scripting { + namespace ast { + enum class operator_t : uint8_t { + logical_not = 0b00000, + binary_not = 0b00001, + divide = 0b00010, + modulo = 0b00100, + multiply = 0b00101, + subtract = 0b00110, + add = 0b01000, + bitshift_left = 0b01001, + bitshift_right = 0b01010, + rotate_left = 0b01100, + rotate_right = 0b01101, + less_than = 0b01110, + greater_than = 0b10000, + less_or_equal_than = 0b10001, + greater_or_equal_than = 0b10010, + equals = 0b10100, + different = 0b10101, + binary_and = 0b10110, + binary_or = 0b11000, + binary_xor = 0b11001, + logical_and = 0b11010, + logical_or = 0b11100, + }; + + enum class symbol_t { + l_paren, r_paren, + logical_not, + binary_not, + divide, + modulo, + multiply, + subtract, + add, + bitshift_left, + bitshift_right, + rotate_left, + rotate_right, + less_than, + greater_than, + less_or_equal_than, + greater_or_equal_than, + equals, + different, + binary_and, + binary_or, + binary_xor, + logical_and, + logical_or, + new_line + }; + + struct identifier { + std::shared_ptr location; + std::string value; + }; + + inline auto operator<=>(const identifier& lhs, const identifier& rhs) { + // TODO: check if the stdlib evolves to support ALL THE HELLA <=> THAT SHOULD BE THERE + return -1 * (lhs.value < rhs.value) + (lhs.value > rhs.value); + } + + inline auto operator==(const identifier& lhs, const identifier& rhs) { + return lhs.value == rhs.value; + } + + struct expression; + + struct unary_algebraic_expression { + std::shared_ptr location; + operator_t op; + std::unique_ptr content; + }; + + struct binary_algebraic_expression { + std::shared_ptr location; + std::unique_ptr lhs; + operator_t op; + std::unique_ptr rhs; + }; + + struct command_expression { + std::shared_ptr location; + identifier name; + std::vector> arguments; + }; + + struct variable_expression { + std::shared_ptr location; + identifier name; + }; + + struct paren_expression { + std::shared_ptr location; + std::variant< + std::unique_ptr, + std::unique_ptr + > content; + }; + + struct literal_int_expression { + std::shared_ptr location; + int32_t value; + }; + + struct literal_string_expression { + std::shared_ptr location; + std::string value; + }; + + struct expression { + std::shared_ptr location; + std::variant< + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr, + std::unique_ptr + > contents; + }; + + struct statement; + + struct block { + std::shared_ptr location; + std::vector contents; + }; + + struct conditional { + std::shared_ptr location; + std::unique_ptr condition; + std::unique_ptr on_condition; + std::unique_ptr otherwise; + }; + + struct while_loop { + std::shared_ptr location; + std::unique_ptr condition; + std::unique_ptr on_condition; + }; + + struct statement { + std::shared_ptr location; + std::variant< + std::unique_ptr, + std::unique_ptr, + std::unique_ptr + > contents; + }; + + struct token { + std::shared_ptr location; + std::variant value; + }; + + std::vector lex(const std::string& code, std::vector& errors); + scripting::ast::block parse(std::span code, std::vector& errors); + } +} \ No newline at end of file diff --git a/script_exe/main.cpp b/script_exe/main.cpp new file mode 100644 index 0000000..06d750d --- /dev/null +++ b/script_exe/main.cpp @@ -0,0 +1,347 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "UserScript.h" + +void print_value(std::ostream& stream, const scripting::script_value& res) { + if(std::holds_alternative(res)) { + stream << "["; + auto max = std::get(res).value.size(); + auto no_comma = max - 1; + for(size_t idx = 0; idx < max; ++idx) { + print_value(stream, std::get(res).value[idx]); + stream << (idx != no_comma ? ", " : ""); + } + stream << "]"; + } else if(std::holds_alternative(res)) { + stream << std::get(res); + } else if(std::holds_alternative(res)) { + stream << "null"; + } else { + stream << std::get(res); + } +} + +struct identity : public scripting::function_impl { + std::optional apply(scripting::UserScript* self,std::vector args, std::optional& errors) final { + if(args.size() != 1) { + errors = scripting::script_error{.message = "identity expects a single argument"}; + } else { + if(std::holds_alternative(args.front())) { + return std::get(args.front()); + } else { + return self->resolve(std::get(args.front()).name); + } + } + return scripting::script_value({}); + } +}; + +struct print : public scripting::function_impl { + std::ostream& stream; + + print(std::ostream& _stream) : stream(_stream) {} + + std::optional apply(scripting::UserScript* self,std::vector args, std::optional& errors) final { + + while(not args.empty()) { + auto& arg = args.back(); + if(std::holds_alternative(arg)) { + print_value(stream, std::get(arg)); + } else { + print_value(stream, self->resolve(std::get(arg).name)); + } + args.pop_back(); + } + return scripting::script_value({}); + } +}; + +struct set : public scripting::function_impl { + std::optional apply(scripting::UserScript* self,std::vector args, std::optional& errors) final { + + if(args.size() != 2) { + errors = scripting::script_error{ + .message = "set expects 2 arguments" + }; + return scripting::script_value{}; + } + + auto& var = args.back(); + if(not holds_alternative(var)) { + errors = scripting::script_error{ + .message = "set expects the first argument to be a target variable" + }; + return scripting::script_value{}; + } + + auto& arg = args.front(); + + if(std::holds_alternative(arg)) { + self->setValue(get(var).name, std::get(arg)); + } else { + self->setValue(get(var).name, self->resolve(std::get(arg).name)); + } + if(auto v = self->getValue(get(var).name); v) { + return v.value(); + } else { + return scripting::script_value{}; + } + } +}; + +struct terminate : public scripting::function_impl { + std::optional apply(scripting::UserScript*,std::vector, std::optional&) final { + std::exit(1); + // PLEASE DO NOT ACTUALLY EXIT YOU FUCKING IDIOT + return scripting::script_value({}); + } +}; + +void process_bench(std::string target = "./tests/scripts/testfile.test") { + auto engine = scripting::prepare_interpreter(std::string{}); + + engine->registerFunction("identity", std::make_unique()); + engine->registerFunction("exit", std::make_unique()); + engine->registerFunction("set", std::make_unique()); + + /*** + * This is a half assed benchmark, + * Document results here to keep the thingy in check performance wise (release mode only) + * + * 2023-07-04 Archivist -> 2618ns - 308ns - 49ns (clang+libstdc++) + * 2023-07-07 Archivist -> 2481ns - 291ns - 46ns (clang+libc++) + * 2023-07-07 Archivist -> 106ns - 12ns - 2ns (clang+march=native+libc++) + */ + engine->registerFunction("print", std::make_unique(std::cout)); + std::ifstream src_str(target); + std::stringstream code; + code << src_str.rdbuf(); + int steps = 0; + + decltype(std::chrono::high_resolution_clock::now()-std::chrono::high_resolution_clock::now()) per_exec{}, per_step{}, per_op{}; + + for(int runs = 0; runs < 20; runs++) { + + auto res = engine->prepare(code.str()); + + auto begin = std::chrono::high_resolution_clock::now(); + while (not engine->getValue("exit_ctr").has_value()) { + engine->stepOnce(); + steps++; + } + auto end = std::chrono::high_resolution_clock::now(); + per_exec += (end - begin) / 5000; + per_step += (end - begin) / steps; + per_op += (end - begin) / (5000 * 53); + } + + per_exec /= 20; + per_step /= 20; + per_op /= 20; + + std::cout << "time per exec = " << std::chrono::duration_cast(per_exec).count() << "ns\n"; + std::cout << "time per step = " << std::chrono::duration_cast(per_step).count() << "ns\n"; + std::cout << "time per avg op = " << std::chrono::duration_cast(per_op).count() << "ns\n"; +} + +void compile_bench(std::string target = "./tests/scripts/testfile.test") { + auto engine = scripting::prepare_interpreter(std::string{}); + + engine->registerFunction("identity", std::make_unique()); + engine->registerFunction("exit", std::make_unique()); + engine->registerFunction("set", std::make_unique()); + + /*** + * Same as above but for compilation times + * + * 2023-07-04 Archivist -> 386µs + */ + engine->registerFunction("print", std::make_unique(std::cout)); + std::ifstream src_str("./tests/scripts/testfile.test"); + std::stringstream code; + code << src_str.rdbuf(); + + auto begin = std::chrono::high_resolution_clock::now(); + + + + [&]() __attribute__((optimize("O0"))) { + auto res = engine->prepare(code.str()); + res = engine->prepare(code.str()); + res = engine->prepare(code.str()); + res = engine->prepare(code.str()); + res = engine->prepare(code.str()); + }(); + + auto end = std::chrono::high_resolution_clock::now(); + auto per_exec = (end - begin)/5; + std::cout << "time per exec = " << std::chrono::duration_cast(per_exec).count() << "µs\n"; +} + +void compare(std::string target, std::string expect) { + auto engine = scripting::prepare_interpreter(std::string{}); + + engine->registerFunction("identity", std::make_unique()); + engine->registerFunction("exit", std::make_unique()); + engine->registerFunction("set", std::make_unique()); + + + std::stringstream str; + std::string_view filename_source = target; + std::string_view filename_output = expect; + engine->registerFunction("print", std::make_unique(str)); + + std::ifstream src_str(std::string{filename_source}); + std::stringstream code; + code << src_str.rdbuf(); + + std::ifstream out_str(std::string{filename_output}); + std::stringstream output; + output << out_str.rdbuf(); + + auto res = engine->executeAtOnce(code.str()); + if (std::holds_alternative(res)) { + } else { + auto &errors = std::get>(res); + for (auto &line: errors) { + str << line.message << "\n at line " << line.location->line_number << ":" + << line.location->column_number << "\n"; + str << " " << *line.location->line_contents << "\n"; + str << " " << std::string(line.location->column_number - 1, ' ') << "^\n"; + } + } + + int status = 0; + + while(not output.eof()) { + std::string expected, found; + std::getline(output, expected); + std::getline(str, found); + bool ok = (expected != found); + status+= ok ; + (ok ? std::cerr : std::cout) + << (not ok ? "\033[21;32m" : "\033[1;31m") << expected + << std::string(std::max(0, 40 - expected.size()), ' ')<< "| " << found << std::endl; + } + if(status) std::exit(status); +} + +void immediate_interactive() { + auto engine = scripting::prepare_interpreter(std::string{}); + + engine->registerFunction("identity", std::make_unique()); + engine->registerFunction("exit", std::make_unique()); + engine->registerFunction("set", std::make_unique()); + + engine->registerFunction("print", std::make_unique(std::cout)); + bool exit = false; + while (not exit) { + std::string code; + std::getline(std::cin, code); + auto res = engine->executeAtOnce(code); + if (std::holds_alternative(res)) { + } else { + auto &errors = std::get>(res); + for (auto &line: errors) { + std::cout << line.message << "\n at line "; + if(line.location) { + std::cout << line.location->line_number << ":" + << line.location->column_number << "\n"; + std::cout << " " << *line.location->line_contents << "\n"; + std::cout << " " << std::string(line.location->column_number - 1, ' ') << "^\n"; + } else std::cout << "UNKNOWN\n"; + } + } + } +} + +void exec(std::span args) { + std::vector batch; + + auto engine = scripting::prepare_interpreter(std::string{}); + + engine->registerFunction("identity", std::make_unique()); + engine->registerFunction("terminate", std::make_unique()); + engine->registerFunction("set", std::make_unique()); + + engine->registerFunction("print", std::make_unique(std::cout)); + bool exit = false; + while (not exit) { + std::string code; + std::getline(std::cin, code); + auto res = engine->executeAtOnce(code); + if (std::holds_alternative(res)) { + } else { + auto &errors = std::get>(res); + for (auto &line: errors) { + std::cout << line.message << "\n at line "; + if(line.location) { + std::cout << line.location->line_number << ":" + << line.location->column_number << "\n"; + std::cout << " " << *line.location->line_contents << "\n"; + std::cout << " " << std::string(line.location->column_number - 1, ' ') << "^\n"; + } else std::cout << "UNKNOWN\n"; + } + } + } +} + +#if defined(__linux__) or defined(WIN32) +constexpr bool trim_first_argument = true; +#else +constexpr bool trim_first_argument = false; +static_assert(false, "Undefined status of the first argument"); +#endif + +int cpp_main(std::span args) { + if constexpr (trim_first_argument) { + args = args.subspan(1); + } + + if(args.empty() || args.front() == "immediate") { + immediate_interactive(); + std::exit(0); + } else if(args.front() == "compare") { + args = args.subspan(1); + if(args.size() != 2) { + std::cerr << "compare expects 2 files as arguments" << std::endl; + std::terminate(); + } + } else if(args.front() == "bench_exec") { + args = args.subspan(1); + if(args.size() > 1) { + std::cerr << "bench_exec expects 0 or 1 file as arguments" << std::endl; + std::terminate(); + } + if(args.empty()) process_bench(); + else process_bench(std::string{args.front()}); + } else if(args.front() == "bench_compile") { + args = args.subspan(1); + if(args.size() > 1) { + std::cerr << "bench_exec expects 0 or 1 file as arguments" << std::endl; + std::terminate(); + } + if(args.empty()) compile_bench(); + else compile_bench(std::string{args.front()}); + } else if(args.front() == "exec") { + // exec(args.subspan(1)); + } else { + std::cerr << "Unknown option" << std::endl; + } + return 0; +} + +int main(int argc, char** argv) { + std::vector args; + for(auto& arg : std::span(argv, argv+argc)) { + args.emplace_back(arg, arg+strlen(arg)); + } + return cpp_main(args); +} \ No newline at end of file diff --git a/src/interpreter.cpp b/src/interpreter.cpp new file mode 100644 index 0000000..8e9834c --- /dev/null +++ b/src/interpreter.cpp @@ -0,0 +1,1186 @@ +#include +#include +#include "UserScript.h" +#include "UserScript/parser.h" + +namespace scripting { + void to_null(script_value& value, auto on_value, auto on_error) { + if(std::holds_alternative(value)) { + on_value(std::get(value)); + } else { + on_error(value); + } + } + void to_int(script_value& value, auto on_value, auto on_error) { + if(std::holds_alternative(value)) { + on_value(std::get(value)); + } else { + on_error(value); + } + } + void to_string(script_value& value, auto on_value, auto on_error) { + if(std::holds_alternative(value)) { + on_value(std::get(value)); + } else { + on_error(value); + } + } + void to_array(script_value& value, auto on_value, auto on_error) { + if(std::holds_alternative(value)) { + on_value(std::get(value)); + } else { + on_error(value); + } + } + + class ByteCodeInterpreter final : public UserScript { + std::map variables; + std::map functions; + std::vector execution_stack; + + public: + struct function_tag { + std::string name; + size_t arity; + std::shared_ptr location; + }; + + struct variable_tag { + std::string name; + std::shared_ptr location; + }; + + enum class operator_t : uint8_t { + logical_not, + binary_not, + unary_plus, + unary_minus, + divide, + modulo, + multiply, + subtract, + add, + bitshift_left, + bitshift_right, + rotate_left, + rotate_right, + less_than, + greater_than, + less_or_equal_than, + greater_or_equal_than, + equals, + different, + binary_and, + binary_or, + binary_xor, + logical_and, + logical_or, + INTERNAL_jump, + INTERNAL_jump_if, + INTERNAL_stack_cls, + }; + + struct operand { + std::variant element; + std::shared_ptr location; + }; + + std::optional> getValue(const std::string& name) { + if(auto var = variables.find(name); var != variables.end()) { + return var->second; + } else { + return std::nullopt; + } + } + bool setValue(const std::string& name, script_value value) { + if(auto var = variables.find(name); var != variables.end()) { + var->second = value; + return true; + } else { + variables.emplace(std::make_pair(name, value)); + return false; + } + } + + std::vector bytecode; + size_t instruction_ptr; + + script_value resolve(const std::string& name) final { + auto it = variables.find(name); + if(it == variables.end()) { + return script_value{}; + } + return (*it).second; + } + + script_value resolve_and_pop() { + if(execution_stack.empty()) return script_value{}; + auto value = std::move(execution_stack.back()); + auto resolved = std::visit([&](auto v) -> script_value { + if constexpr (std::is_same_v) { + auto it = variables.find(v.name); + if(it == variables.end()) { + return script_value{}; + } + return (*it).second; + } else { + return v; + } + }, value); + execution_stack.pop_back(); + return resolved; + } + + void big_f_ing_switch(operand& op, std::optional& error); + + std::vector generate(std::vector& errors, ast::block &tree, bool loop = true); + + void registerFunction(std::string name, function fn) final { + functions.insert_or_assign(name, std::move(fn)); + } + + std::variant> executeAtOnce(std::string code) final { + std::vector errors; + auto lexed = ast::lex(code, errors); + auto parsed = ast::parse(lexed, errors); + if(not errors.empty()) return errors; + bytecode = generate(errors, parsed, false); + if(not errors.empty()) return errors; + std::optional maybe_error; + instruction_ptr = 0; + while(instruction_ptr < bytecode.size()) { + step(maybe_error); + if(maybe_error) return std::vector({maybe_error.value()}); + } + auto v = resolve_and_pop(); + execution_stack.clear(); + return v; + } + + std::vector prepare(std::string code) final { + std::vector errors; + auto lexed = ast::lex(code, errors); + auto parsed = ast::parse(lexed, errors); + if(errors.empty()) { + bytecode = generate(errors, parsed, true); + } + + return errors; + } + + std::optional stepOnce() final { + std::optional error; + while(not step(error)); + return error; + } + + bool step(std::optional& error); + ~ByteCodeInterpreter() final {} + }; + + namespace wizardry { + // taken from cppreference: https://en.cppreference.com/w/cpp/utility/variant/visit + template + struct overloaded : Ts ... { + using Ts::operator()...; + }; + // explicit deduction guide (not needed as of C++20) + template + overloaded(Ts...) -> overloaded; + } + + bool ByteCodeInterpreter::step(std::optional& error) { + if(instruction_ptr >= bytecode.size()) return true; + + auto& curr_op = bytecode[instruction_ptr]; + auto& instr = curr_op.element; + + bool ret = std::visit(wizardry::overloaded{ + [&](script_value& v){ + execution_stack.push_back(v); + return false; + }, + [&](variable_tag& v){ + execution_stack.push_back(script_variable{.name = v.name}); + return false; + }, + [&](ByteCodeInterpreter::operator_t& v){ + big_f_ing_switch(curr_op, error); + return v == ByteCodeInterpreter::operator_t::INTERNAL_jump or v == ByteCodeInterpreter::operator_t::INTERNAL_jump_if; + }, + [&](ByteCodeInterpreter::function_tag& v){ + if(v.arity > execution_stack.size()) { + error = script_error{.location = bytecode[instruction_ptr].location, .message = "INTERNAL ERROR: invalid amount of argument found in stack, please warn the devs, this is bad and should never happen"}; + return true; + } + auto it = functions.find(v.name); + if(it == functions.end()) { + for(auto arity = v.arity; arity != 0; arity--) { + execution_stack.pop_back(); + } + execution_stack.push_back(argument{script_value{}}); + // Invalid function is not an error + return true; + } + auto args_in_situ = std::span{execution_stack}.subspan(execution_stack.size() - v.arity); + std::vector arguments{args_in_situ.begin(), args_in_situ.end()}; + auto item = (*it).second->apply(this, arguments, error); + if(item) { + execution_stack.emplace_back(item.value()); + } + return true; + }, + + }, instr); + instruction_ptr = instruction_ptr + 1; + return instruction_ptr >= bytecode.size() || error || ret; + } + + // Replace with constexpr vector & find ? + static const std::map mappings = { + {ast::operator_t::logical_not, ByteCodeInterpreter::operator_t::logical_not}, + {ast::operator_t::binary_not, ByteCodeInterpreter::operator_t::binary_not}, + {ast::operator_t::divide, ByteCodeInterpreter::operator_t::divide}, + {ast::operator_t::modulo, ByteCodeInterpreter::operator_t::modulo}, + {ast::operator_t::multiply, ByteCodeInterpreter::operator_t::multiply}, + {ast::operator_t::subtract, ByteCodeInterpreter::operator_t::subtract}, + {ast::operator_t::add, ByteCodeInterpreter::operator_t::add}, + {ast::operator_t::bitshift_left, ByteCodeInterpreter::operator_t::bitshift_left}, + {ast::operator_t::bitshift_right, ByteCodeInterpreter::operator_t::bitshift_right}, + {ast::operator_t::rotate_left, ByteCodeInterpreter::operator_t::rotate_left}, + {ast::operator_t::rotate_right, ByteCodeInterpreter::operator_t::rotate_right}, + {ast::operator_t::less_than, ByteCodeInterpreter::operator_t::less_than}, + {ast::operator_t::greater_than, ByteCodeInterpreter::operator_t::greater_than}, + {ast::operator_t::less_or_equal_than, ByteCodeInterpreter::operator_t::less_or_equal_than}, + {ast::operator_t::greater_or_equal_than, ByteCodeInterpreter::operator_t::greater_or_equal_than}, + {ast::operator_t::equals, ByteCodeInterpreter::operator_t::equals}, + {ast::operator_t::different, ByteCodeInterpreter::operator_t::different}, + {ast::operator_t::binary_and, ByteCodeInterpreter::operator_t::binary_and}, + {ast::operator_t::binary_or, ByteCodeInterpreter::operator_t::binary_or}, + {ast::operator_t::binary_xor, ByteCodeInterpreter::operator_t::binary_xor}, + {ast::operator_t::logical_and, ByteCodeInterpreter::operator_t::logical_and}, + {ast::operator_t::logical_or, ByteCodeInterpreter::operator_t::logical_or}, + }; + + /// GENERATION HANDLERS DECLARATIONS + + template + void handle(std::vector&, std::vector&, T&); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::block& block); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::command_expression& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::binary_algebraic_expression& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::unary_algebraic_expression& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::paren_expression& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::conditional& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::while_loop& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::expression& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::variable_expression& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::literal_int_expression& cmd); + template<> + void handle(std::vector& ctx, std::vector& errors, ast::literal_string_expression& cmd); + + /// GENERATION HANDLERS DEFINITIONS + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::block& block) { + for(auto& elem : block.contents) { + std::visit([&](auto& v) {handle(ctx, errors, *v);}, elem.contents); + ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_stack_cls}); + } + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::command_expression& cmd) { + for(auto it = cmd.arguments.rbegin(); it != cmd.arguments.rend(); ++it) { + std::visit([&](auto& v) {handle(ctx, errors, *v);}, (*it)->contents); + } + ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::function_tag{.name = cmd.name.value, .arity = cmd.arguments.size()}, .location = cmd.location}); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::paren_expression& expr) { + std::visit([&](auto& v) {handle(ctx, errors, *v);}, expr.content); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::expression& expr) { + std::visit([&](auto& v) {handle(ctx, errors, *v);}, expr.contents); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::binary_algebraic_expression& expr) { + handle(ctx, errors, *expr.lhs); + handle(ctx, errors, *expr.rhs); + ctx.push_back(ByteCodeInterpreter::operand{.element = mappings.at(expr.op), .location = expr.location}); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::unary_algebraic_expression& expr) { + handle(ctx, errors, *expr.content); + ctx.push_back(ByteCodeInterpreter::operand{.element = mappings.at(expr.op), .location = expr.location}); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::variable_expression& expr) { + ctx.push_back(ByteCodeInterpreter::operand{ByteCodeInterpreter::variable_tag{.name = expr.name.value, .location = expr.location}}); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::literal_int_expression& expr) { + ctx.push_back(ByteCodeInterpreter::operand{script_value{expr.value}}); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::literal_string_expression& expr) { + ctx.push_back(ByteCodeInterpreter::operand{script_value{expr.value}}); + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::conditional& cond) { + /// some basic documentation (from before the reference stability bug but things are the same): + /// https://app.excalidraw.com/s/hxPegpAmTX/2c8KKzinqeg + std::visit([&](auto& v) {handle(ctx, errors, *v);}, cond.condition->contents); + ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location}); + /// As you can see, being smart is dumb, be a fucking monkey that comes from the 70s and use 70s technology:tm: to your advantage + /// More seriously, WTF (?) we do this because we used to have a bug with unreliable references to these locations, which makes sense since we + /// don't have reference stability + auto else_side_idx = ctx.size()-1; + ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump_if, .location = cond.location}); + handle(ctx, errors, *cond.on_condition); + if(cond.otherwise) { + ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location}); + auto end_side_idx = ctx.size()-1; + ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = cond.location}); + ctx[else_side_idx].element = static_cast(ctx.size())-1; + ctx[else_side_idx].location = cond.location; + handle(ctx, errors, *cond.otherwise); + ctx[end_side_idx].element = static_cast(ctx.size())-1; + ctx[end_side_idx].location = cond.location; + } else { + ctx[else_side_idx].element = static_cast(ctx.size())-1; + ctx[else_side_idx].location = cond.location; + } + } + + template<> + void handle(std::vector& ctx, std::vector& errors, ast::while_loop& cond) { + auto beforewhile_side_idx = static_cast(ctx.size())-1; + std::visit([&](auto& v) {handle(ctx, errors, *v);}, cond.condition->contents); + ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location}); + auto endwhile_side_idx = ctx.size()-1; + ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump_if, .location = cond.location}); + handle(ctx, errors, *cond.on_condition); + ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{beforewhile_side_idx}, .location = cond.location}); + ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = cond.location}); + ctx[endwhile_side_idx].element = static_cast(ctx.size())-1; + ctx[endwhile_side_idx].location = cond.location; + } + + std::vector ByteCodeInterpreter::generate(std::vector& errors, ast::block &tree, bool loop) { + std::vector code; + + handle(code, errors, tree); + if(loop) { + // Here we have to deal with the quirks of jumping before the increments happens again + code.push_back(ByteCodeInterpreter::operand{.element = script_value{-1}, .location = tree.location}); + code.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = tree.location}); + } + return code; + } + + std::unique_ptr prepare_interpreter(const std::string& code) { + auto script = std::make_unique(); + script->prepare(code); + return script; + } + + /// BIG FUCKING SWITCH + + void ByteCodeInterpreter::big_f_ing_switch(operand& op, std::optional& error) { + switch (get(op.element)) { + case operator_t::logical_not: { + auto v = resolve_and_pop(); + // TODO: strings and arrays to booleans? + to_int( + v, + [&](int32_t &value) { + execution_stack.push_back(script_value{int32_t(!value)}); + }, + [&](auto &other) { + to_null( + other, + [&](null &) { + execution_stack.push_back(script_value{int32_t(1)}); + }, + [&](auto &) { + error = script_error{ + op.location, + "! operator requires an integer or null" + }; + execution_stack.push_back(script_value{}); + } + ); + } + ); + break; + } + case operator_t::binary_not: { + auto v = resolve_and_pop(); + to_int( + v, + [&](int32_t &value) { + execution_stack.push_back(script_value{int32_t(~value)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "~ operator requires an integer" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::unary_plus: { + auto v = resolve_and_pop(); + error = script_error{ + op.location, + "unary + operator is unimplemented" + }; + execution_stack.push_back(script_value{}); + break; + } + case operator_t::unary_minus: { + auto rhs = resolve_and_pop(); + error = script_error{ + op.location, + "unary - operator is unimplemented" + }; + execution_stack.push_back(script_value{}); + break; + } + case operator_t::divide: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + if (value_rhs == 0) { + error = script_error{ + op.location, + "Division by zero: / operator requires an non-zero integer as right hand side" + }; + execution_stack.push_back(script_value{}); + return; + }// TODO: this should be `value_` versions of the variables + execution_stack.push_back(script_value{value_lhs / value_rhs}); + }, + [&](auto &other) { + error = script_error { + op.location, + "/ operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "/ operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::modulo: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + if (value_rhs == 0) { + error = script_error{ + op.location, + "Division by zero: % operator requires an non-zero integer as right hand side" + }; + execution_stack.push_back(script_value{}); + return; + } + execution_stack.push_back(script_value{value_lhs % value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "% operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "% operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::multiply: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs * value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "* operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "* operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::subtract: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs - value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "- operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "- operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::add: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays concats? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs + value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "+ operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "+ operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::bitshift_left: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays shifts and rotates? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + if (value_rhs < 0 or value_rhs > 32) { + error = script_error{ + op.location, + "bad shift: shift must be between 0 and 32 bits" + }; + execution_stack.push_back(script_value{}); + return; + } + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(true_lhs << true_rhs)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "<< operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "<< operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::bitshift_right: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays shifts and rotates? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + if (value_rhs < 0 or value_rhs > 32) { + error = script_error{ + op.location, + "bad shift: shift must be between 0 and 32 bits" + }; + execution_stack.push_back(script_value{}); + return; + } + uint32_t true_lhs = *reinterpret_cast(&lhs), true_rhs = *reinterpret_cast(&rhs); + execution_stack.push_back(script_value{static_cast(true_lhs >> true_rhs)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + ">> operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + ">> operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::rotate_left: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays shifts and rotates? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + if (value_rhs < 0 or value_rhs > 32) { + error = script_error{ + op.location, + "bad rotate: rotate must be between 0 and 32 bits" + }; + execution_stack.push_back(script_value{}); + return; + } + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(std::rotl(true_lhs, true_rhs))}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "<<< operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "<<< operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::rotate_right: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays shifts and rotates? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + if (value_rhs < 0 or value_rhs > 32) { + error = script_error{ + op.location, + "bad rotate: rotate must be between 0 and 32 bits" + }; + execution_stack.push_back(script_value{}); + return; + } + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(std::rotr(true_lhs, true_rhs))}); + }, + [&](auto &other) { + error = script_error{ + op.location, + ">>> operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + ">>> operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::less_than: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: lexicographical strings and arrays shifts comparisons + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs < value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "< operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "< operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::greater_than: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: lexicographical strings and arrays shifts comparisons + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs > value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "> operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "> operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::less_or_equal_than: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: lexicographical strings and arrays shifts comparisons + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs <= value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "<= operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "<= operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::greater_or_equal_than: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: lexicographical strings and arrays shifts comparisons + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs >= value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + ">= operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + ">= operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::equals: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: error reporting (got degraded) + // TODO: compare arrays + if(rhs.index() != lhs.index()) { + execution_stack.push_back(script_value{0}); + } else if(holds_alternative(rhs)) { + execution_stack.push_back(script_value{1}); + } else if(holds_alternative(rhs)) { + execution_stack.push_back(script_value{get(rhs) == get(lhs)}); + } else if(holds_alternative(rhs)) { + execution_stack.push_back(script_value{get(rhs) == get(lhs)}); + } else if(holds_alternative(rhs)) { + execution_stack.push_back(script_value{0}); + //execution_stack.push_back(script_value{get(rhs).value == get(lhs).value}); + } + break; + } + case operator_t::different: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays different + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + execution_stack.push_back(script_value{value_lhs != value_rhs}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "!= operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "!= operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::binary_and: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(true_lhs & true_rhs)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "& operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "& operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::binary_or: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(true_lhs | true_rhs)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "| operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "| operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::binary_xor: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: XORing strings maybe? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(true_lhs ^ true_rhs)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "^ operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "^ operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::logical_and: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays to booleans? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(true_lhs && true_rhs)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "&& operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "&& operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::logical_or: { + auto rhs = resolve_and_pop(); + auto lhs = resolve_and_pop(); + // TODO: strings and arrays to booleans? + to_int( + rhs, + [&](int32_t &value_rhs) { + to_int( + lhs, + [&](int32_t &value_lhs) { + uint32_t true_lhs = *reinterpret_cast(&value_lhs), true_rhs = *reinterpret_cast(&value_rhs); + execution_stack.push_back(script_value{static_cast(true_lhs || true_rhs)}); + }, + [&](auto &other) { + error = script_error{ + op.location, + "|| operator requires an integer as left hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + }, + [&](auto &other) { + error = script_error{ + op.location, + "|| operator requires an integer as right hand side" + }; + execution_stack.push_back(script_value{}); + } + ); + break; + } + case operator_t::INTERNAL_jump: { + auto location = resolve_and_pop(); + to_int( + location, + [&](int32_t &instruction_target) { + instruction_ptr = instruction_target; + }, + [&](auto &instruction_target) { + error = script_error{ + op.location, + "Jump to invalid location" + }; + } + ); + break; + } + case operator_t::INTERNAL_jump_if: { + auto location = resolve_and_pop(); + auto condition = resolve_and_pop(); + // TODO: handle null as the condition + to_int( + condition, + [&](int32_t &condition_value) { + // CAUTION: the condition is inverted, this should really be called jump_if_not + // TODO: rename to jump_if_not appropriately + if (not condition_value) { + to_int( + location, + [&](int32_t &instruction_target) { + instruction_ptr = instruction_target; + }, + [&](auto &instruction_target) { + error = script_error{ + op.location, + "JumpIf to invalid location "// + std::to_string(holds_alternative(instruction_target)) + }; + } + ); + } + }, + [&](auto &instruction_target) { + error = script_error{ + op.location, + "Condition is not an integer" + }; + } + ); + break; + } + case operator_t::INTERNAL_stack_cls: { + execution_stack.clear(); + break; + } + } + } +} \ No newline at end of file diff --git a/src/lex_parse.cpp b/src/lex_parse.cpp new file mode 100644 index 0000000..efb6546 --- /dev/null +++ b/src/lex_parse.cpp @@ -0,0 +1,1051 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "UserScript/parser.h" +#include "UserScript.h" + +///////////////// +/// CONSTANTS /// +///////////////// + +using symbol_t = scripting::ast::symbol_t; + +constexpr std::array, 25> operators { + std::pair{"(", symbol_t::l_paren}, + std::pair{")", symbol_t::r_paren}, + std::pair{"!=", symbol_t::different}, + std::pair{"!", symbol_t::logical_not}, + std::pair{"~", symbol_t::binary_not}, + std::pair{"/", symbol_t::divide}, + std::pair{"%", symbol_t::modulo}, + std::pair{"*", symbol_t::multiply}, + std::pair{"-", symbol_t::subtract}, + std::pair{"+", symbol_t::add}, + std::pair{"<<<", symbol_t::rotate_left}, + std::pair{">>>", symbol_t::rotate_right}, + std::pair{"<<", symbol_t::bitshift_left}, + std::pair{">>", symbol_t::bitshift_right}, + std::pair{"<=", symbol_t::less_or_equal_than}, + std::pair{">=", symbol_t::greater_or_equal_than}, + std::pair{"<", symbol_t::less_than}, + std::pair{">", symbol_t::greater_than}, + std::pair{"==", symbol_t::equals}, + std::pair{"&&", symbol_t::logical_and}, + std::pair{"&", symbol_t::binary_and}, + std::pair{"||", symbol_t::logical_or}, + std::pair{"|", symbol_t::binary_or}, + std::pair{"^", symbol_t::binary_xor}, + std::pair{"\n", symbol_t::new_line} +}; + +const std::vector reserved_character_sequences { + "(", + ")", + "!=", + "!", + "~", + "/", + "%", + "*", + "-", + "+", + "<<<", + ">>>", + "<<", + ">>", + "<=", + ">=", + "<", + ">", + "==", + "&&", + "&", + "||", + "|", + "^", + "=", + "\n" +}; + +///////////////////// +/// LEXER HELPERS /// +///////////////////// + +using token = scripting::ast::token; + +struct lex_token_result { + token tok; + std::string_view rest; +}; + +struct rune_ref { + std::string_view str; + explicit operator uint32_t() const { + if(str.empty()) return 0; + if(str.size() == 1) return str[0]; + auto bytes = 8 - (str.size() + 1); + uint32_t rune = static_cast(str[0]) & (1 << (bytes - 1)); + for(auto c : str.substr(1)) { + rune <<= 6; + rune ^= static_cast(c) & 0b00111111; + } + return rune; + } + + [[nodiscard]] bool is_space() const { + constexpr std::array spaces{ + 0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, + 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x2002, 0x205F, 0x3000 + }; + + return std::find(spaces.begin(), spaces.end(), static_cast(*this)) != spaces.end(); + } +}; + +struct try_rune_result { + rune_ref rune; + std::string_view rest; +}; + +std::shared_ptr get_loc(std::string_view original, std::string_view rest, std::shared_ptr last_line) { + // TODO: Check everything again for weird ass cases + if(original.empty()) { + return std::make_shared(scripting::code_location{ + .line_contents = std::make_shared(), + .line_number = (int32_t)std::clamp(1, 1, std::numeric_limits::max()), + .column_number = (int32_t)std::clamp(1 + 1, 1, std::numeric_limits::max()) + }); + } + const auto before = original.substr(0, original.size() - rest.size()); + const auto line_no = std::ranges::count(before, '\n') + 1; + const auto line_start = std::find(before.crbegin(), before.crend(), '\n'); + const auto column_no = line_start != before.crend() ? (line_start - before.crbegin()) : before.size(); + const auto back_tracked = before.size() - column_no; + const auto front_tracked = rest.empty() ? original.size() : before.size() + (std::ranges::find(rest, '\n') - rest.begin()); + const std::string_view current{original.begin() + back_tracked, original.begin() + front_tracked}; + + if(not last_line || *last_line != current) { + last_line = std::make_shared(current); + } + + return std::make_shared(scripting::code_location{ + .line_contents = last_line, + .line_number = (int32_t)std::clamp(line_no, 1, std::numeric_limits::max()), + .column_number = (int32_t)std::clamp(column_no + 1, 1, std::numeric_limits::max()) + }); +} + +//////////////////// +/// LEXER PROPER /// +//////////////////// + +auto try_rune(std::string_view text, std::shared_ptr& location, std::vector& errors) -> std::optional { + static_assert(CHAR_BIT == 8, "Get your weird ass cpu outta here"); + + if(text.empty()) return std::nullopt; + + if(0 == (*reinterpret_cast(&text.front()) & 0b10000000)) { + return try_rune_result{text.substr(0, 1), text.substr(1)}; + } + + switch(auto bytes = std::countl_one(*reinterpret_cast(&text.front())); bytes) { + case 0: // ASCII + { + return try_rune_result{text.substr(0, 1), text.substr(1)}; + } + case 1: // Middle of sequence + { + return std::nullopt; + } + case 7: [[fallthrough]]; + case 8: // Invalid sequence start + { + return std::nullopt; + } + default: // Maybe it is valid + { + if(text.size() < bytes) { // Nope, too short to get a full rune + errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"}); + return std::nullopt; + } + auto rune = text.substr(0,bytes); + + // Check if the rest of the rune is valid + if(std::ranges::any_of(rune.substr(1), [](const char& byte){ return std::countl_one(*reinterpret_cast(&byte)) != 1;})) { + errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"}); + return std::nullopt; + } + return try_rune_result{rune, text.substr(bytes)}; + } + } +} +constexpr auto try_string = [](std::string_view view, std::shared_ptr& location, std::vector& errors) -> std::optional { + constexpr std::array hexdigits = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + +0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 + }; + auto it = view.begin(); + while (it != view.end() and std::isspace(*it)) ++it; + if(it == view.end()) return std::nullopt; + std::stringstream generated; + if(*it != '"') return std::nullopt; + std::string str; + while(true) { + ++it; + if(it == view.end()) { + errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"}); + return std::nullopt; + } + switch(*it) { + case '\\': + ++it; + if(it == view.end()) { + errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"}); + } + switch(*it) { + case '\\': generated << '\\'; break; + case 'a': generated << '\a'; break; + case 'b': generated << '\b'; break; + case 'f': generated << '\f'; break; + case 'n': generated << '\n'; break; + case 'r': generated << '\r'; break; + case 't': generated << '\t'; break; + case 'v': generated << '\v'; break; + case '\'': generated << '\''; break; + case '"': generated << '"'; break; + case '0': [[fallthrough]]; + case '1': [[fallthrough]]; + case '2': [[fallthrough]]; + case '3': [[fallthrough]]; + case '4': [[fallthrough]]; + case '5': [[fallthrough]]; + case '6': [[fallthrough]]; + case '7': + { + char c = uint8_t(*it - '0') * 8 * 8; + if(uint8_t(*it - '0') > 8) { + errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"}); + } + ++it; + if(it == view.end()) return std::nullopt; + c += uint8_t(*it - '0') * 8; + if(uint8_t(*it - '0') > 8) { + errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"}); + } + ++it; + if(it == view.end()) return std::nullopt; + c += uint8_t(*it - '0'); + if(uint8_t(*it - '0') > 8) { + errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"}); + } + generated << c; + break; } + case 'x': + { + ++it; + if(it == view.end()) return std::nullopt; + if(hexdigits[*it] < 0) return std::nullopt; + char c = hexdigits[*it] << 4; + ++it; + if(it == view.end()) return std::nullopt; + if(hexdigits[*it] < 0) return std::nullopt; + c += hexdigits[*it]; + generated << c; + break; } + default: + generated << *it; + } + break; + case '"': + str = generated.str(); + return lex_token_result { + token{.location = location, .value = std::string(str)}, + std::string_view(++it, view.end()) + }; + default: + generated << *it; + break; + } + } +}; +constexpr auto try_int32 = [](std::string_view view, std::shared_ptr& location, std::vector& errors) -> std::optional { + int32_t i; + auto v = std::from_chars(view.begin(), view.end(), i); + if(v.ptr == view.begin()) return std::nullopt; + auto rest = std::string_view(v.ptr, view.end()); + return lex_token_result{ + token{.location = std::move(location), .value = i}, + rest + }; +}; +std::optional try_operator(std::string_view code, std::shared_ptr& location, std::vector& errors) { + for(auto& [representation, type] : operators) { + if(code.starts_with(representation)) { + return lex_token_result{ + token{.location = location, .value = type}, + code.substr(representation.size()) + }; + } + } + return std::nullopt; +} +auto try_identifier(std::string_view view, std::shared_ptr& location, std::vector& errors) -> std::optional { + constexpr auto starts_with_reserved = [](std::string_view v) -> bool { + return std::ranges::any_of(reserved_character_sequences, [&](auto seq){ + return v.starts_with(seq); + }); + }; + + std::stringstream identifier_value; + + if(view.empty()) return std::nullopt; + while(!view.empty() && !starts_with_reserved(view)) { + if(auto maybe_rune = try_rune(view, location, errors); maybe_rune) { + auto [rune, rest] = maybe_rune.value(); + if(rune.is_space()) { + view = rest; + break; + } + identifier_value << rune.str; + view = rest; + } else { + errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"}); + return std::nullopt; + } + } + + scripting::ast::identifier result {.location = location, .value = identifier_value.str()}; + + if(result.value.empty()) return std::nullopt; + + return lex_token_result{.tok = token{.location = location, .value = result}, .rest = view}; +} + +std::vector scripting::ast::lex(const std::string& code, std::vector& errors) { + std::vector return_value; + std::string_view current = code; + std::shared_ptr last_line; + + while(not current.empty()) { + for(;;) { + if(current.empty()) break; + auto location = get_loc(code, current, last_line); + auto c = try_rune(current, location, errors); + if(not c.has_value()) { + errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 encoding detected while trimming space"}); + return return_value; + } else { + if(c.value().rune.is_space()) { + current = c.value().rest; + } else break; + } + } + + auto location = get_loc(code, current, last_line); + last_line = location->line_contents; + auto res = try_string(current, location, errors); + if (!res) res = try_operator(current, location, errors); + if (!res) res = try_int32(current, location, errors); + if (!res) res = try_identifier(current, location, errors); + if(res.has_value()) { + current = res.value().rest; + return_value.emplace_back(std::move(res.value().tok)); + } else { + errors.push_back(scripting::script_error{.location = location, .message = "Unknown token"}); + return return_value; + } + } + + return return_value; +} + +////////////////////// +/// PARSER HELPERS /// +////////////////////// + +template +struct parse_result { + std::optional result; + std::span rest; + operator bool() { return result.has_value(); } +}; + +bool next_is_newline(std::span current) { + return (not current.empty()) && (holds_alternative(current.front().value) && (get(current.front().value) == symbol_t::new_line)); +} + +std::span trim_newline(std::span current) { + while(next_is_newline(current)) { + current = current.subspan(1); + } + return current; +} + +/////////////////////////// +/// PARSER DECLARATIONS /// +/////////////////////////// + +parse_result try_expression(std::span code, std::vector& errors); +parse_result try_binary_algebraic_expression(std::span code, std::vector& errors); +parse_result try_variable_expression(std::span code, std::vector& errors); +parse_result try_literal_int_expression(std::span code, std::vector& errors); +parse_result try_literal_string_expression(std::span code, std::vector& errors); +parse_result try_statement(std::span code, std::vector& errors); +parse_result try_command_expr(std::span code, std::vector& errors); +parse_result try_conditional(const std::span code, std::vector& errors, const std::string_view initiator = "if"); +parse_result try_while_loop(const std::span code, std::vector& errors); +parse_result try_paren_expression(std::span code, std::vector& errors); +parse_result try_unary_algebraic_expression(std::span code, std::vector& errors); + + +////////////////////////// +/// PARSER DEFINITIONS /// +////////////////////////// + +parse_result try_command_expr(std::span code, std::vector& errors) { + scripting::ast::command_expression cmd; + auto current = code; + if(current.empty()) return {std::nullopt, code}; + if(not holds_alternative(current.front().value)) return {std::nullopt, code}; + if(get(current.front().value) != symbol_t::divide) return {std::nullopt, code}; + cmd.location = current.front().location; + current = current.subspan(1); + + if(current.empty()) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Expected command name"}); + return {std::nullopt, code}; + } + if(not holds_alternative(current.front().value)) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Expected command name"}); + return {std::nullopt, code}; + } + cmd.name = get(current.front().value); + current = current.subspan(1); + + while( + not current.empty() + and not ( + holds_alternative(current.front().value) + and get(current.front().value) == symbol_t::new_line + ) + and not ( + holds_alternative(current.front().value) + and get(current.front().value) == symbol_t::r_paren + ) + ) { + auto [expr, rest] = try_expression(current, errors); + + if(not expr) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Expected expression"}); + return {std::nullopt, code}; + } + + cmd.arguments.push_back(std::make_unique(std::move(expr.value()))); + current = rest; + } + + return {std::move(cmd), current}; +} + +parse_result try_expression(std::span code, std::vector& errors) { + scripting::ast::expression node; + auto current = code; + +#ifdef HANDLE_EXPRESSION + static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting"); +#endif +#define HANDLE_EXPRESSION(type) \ + node = scripting::ast::expression{ \ + .location = current.front().location, \ + .contents = std::make_unique(std::move(expr.value())) \ + }; \ + current = rest; + + if(auto [expr, rest] = try_binary_algebraic_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::binary_algebraic_expression) + } else if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::literal_string_expression) + } else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::literal_int_expression) + } else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression) + } else if(auto [expr, rest] = try_paren_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::paren_expression) + } else if(auto [expr, rest] = try_variable_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::variable_expression) + } else { + return {std::nullopt, code}; + } +#undef HANDLE_EXPRESSION + return {.result = std::move(node), .rest = current}; +} + +parse_result try_conditional(const std::span code, std::vector& errors, const std::string_view initiator) { +#ifdef FAILURE + static_assert(false, "Found a macro name FAILURE, halting"); +#endif +#define FAILURE {.result = std::nullopt, .rest = code} + scripting::ast::conditional result{}; + if(code.empty()) return FAILURE; + auto current = code; + result.location = current.front().location; + + const auto endif_found = [&]() -> bool { + if (current.empty()) return false; + if (not holds_alternative(current.front().value)) return false; + if (get(current.front().value).value != "endif") return false; + return true; + }; + + // chomps a new line if available, returns true if it failed + const auto MISSING_NEW_LINE = [&]() -> bool { + if (current.empty()) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"}); + return true; + } + auto new_line = current.front(); + if (not holds_alternative(current.front().value)) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Expected new line"}); + return true; + } + if (get(current.front().value) != symbol_t::new_line) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Expected new line"}); + return true; + } + current = current.subspan(1); + return false; + }; + + if (current.empty()) return FAILURE; + if (not holds_alternative(current.front().value)) return FAILURE; + if (get(current.front().value).value != initiator) return FAILURE; + current = current.subspan(1); + + // Read the condition + auto conditional_node = try_expression(current, errors); + if (not conditional_node.result) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Expected expression in conditional"}); + return FAILURE; + } + result.condition = std::make_unique(std::move(conditional_node.result.value())); + current = conditional_node.rest; + + if(MISSING_NEW_LINE()) return FAILURE; + + if (current.empty()) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"}); + return FAILURE; + } + result.on_condition = std::make_unique(); + result.on_condition->location = current.front().location; + + bool else_mode = false; + + while (not endif_found()) { + const auto error_count = errors.size(); + + // Handles elseif sequences as nested ifs in the else block + auto nester = try_conditional(current, errors, "elseif"); + if(nester.result) { + result.otherwise = std::make_unique(); + result.otherwise->location = current.front().location; + result.otherwise->contents.push_back(scripting::ast::statement{ + .location = current.front().location, + .contents = std::make_unique(std::move(nester.result.value())), + }); + current = nester.rest; + return {.result = std::move(result), .rest = current}; + } else if(error_count != errors.size()) { + return FAILURE; + } + + // Handles code + if(auto block_contents = try_statement(current, errors); block_contents.result) { + if(not else_mode) { + result.on_condition->contents.push_back(std::move(block_contents.result.value())); + } else { + result.otherwise->contents.push_back(std::move(block_contents.result.value())); + } + current = block_contents.rest; + } else { + if(current.empty()) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Interrupted conditional"}); + return FAILURE; + } + // Handles switching to else mode + if( + holds_alternative(current.front().value) + && get(current.front().value).value == "else" + ) { + auto loc = current.front().location; + current = current.subspan(1); + if(MISSING_NEW_LINE()) return FAILURE; + if(else_mode) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Repeated else in conditional"}); + } else { + else_mode = true; + result.otherwise = std::make_unique(); + result.otherwise->location = std::move(loc); + } + }else { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Unexpected expression content"}); + return FAILURE; + } + } + } + + // checks for endif + if(endif_found()) { + current = current.subspan(1); + if(not current.empty() && MISSING_NEW_LINE()) return FAILURE; + } + + return {.result = std::move(result), .rest = current}; +#undef FAILURE +} + +parse_result try_while_loop(const std::span code, std::vector& errors) { +#ifdef FAILURE + static_assert(false, "Found a macro name FAILURE, halting"); +#endif +#define FAILURE {.result = std::nullopt, .rest = code} + scripting::ast::while_loop result{}; + if(code.empty()) return FAILURE; + auto current = code; + result.location = current.front().location; + + const auto endwhile_found = [&]() -> bool { + if (current.empty()) return false; + if (not holds_alternative(current.front().value)) return false; + if (get(current.front().value).value != "endwhile") return false; + return true; + }; + + // chomps a new line if available, returns true if it failed + const auto MISSING_NEW_LINE = [&]() -> bool { + if (current.empty()) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"}); + return true; + } + auto new_line = current.front(); + if (not holds_alternative(current.front().value)) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Expected new line"}); + return true; + } + if (get(current.front().value) != symbol_t::new_line) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Expected new line"}); + return true; + } + current = current.subspan(1); + return false; + }; + + if (current.empty()) return FAILURE; + if (not holds_alternative(current.front().value)) return FAILURE; + if (get(current.front().value).value != "while") return FAILURE; + current = current.subspan(1); + + // Read the condition + auto conditional_node = try_expression(current, errors); + if (not conditional_node.result) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Expected expression in while loop"}); + return FAILURE; + } + result.condition = std::make_unique(std::move(conditional_node.result.value())); + current = conditional_node.rest; + + if(MISSING_NEW_LINE()) return FAILURE; + + if (current.empty()) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"}); + return FAILURE; + } + result.on_condition = std::make_unique(); + result.on_condition->location = current.front().location; + + while (not endwhile_found()) { + const auto error_count = errors.size(); + + if(error_count != errors.size()) { + return FAILURE; + } + + // Handles code + if(auto block_contents = try_statement(current, errors); block_contents.result) { + result.on_condition->contents.push_back(std::move(block_contents.result.value())); + current = block_contents.rest; + } else { + if(current.empty()) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Interrupted while loop"}); + return FAILURE; + } else { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "Unexpected statement in block"}); + return FAILURE; + } + } + } + + // checks for endif + if(endwhile_found()) { + current = current.subspan(1); + if(not current.empty() && MISSING_NEW_LINE()) return FAILURE; + } + + return {.result = std::move(result), .rest = current}; +#undef FAILURE +} + +parse_result try_paren_expression(std::span code, std::vector& errors) { + scripting::ast::paren_expression paren; + auto current = code; + if(current.empty()) return {std::nullopt, code}; + if(not holds_alternative(current.front().value)) return {std::nullopt, code}; + if(get(current.front().value) != symbol_t::l_paren) return {std::nullopt, code}; + paren.location = current.front().location; + current = current.subspan(1); + + if(auto [cmd, cmd_rest] = try_command_expr(current, errors); cmd) { + paren.content = std::make_unique(std::move(cmd.value())); + current = cmd_rest; + } else if(auto [expr, expr_rest] = try_expression(current, errors); expr) { + paren.content = std::make_unique(std::move(expr.value())); + current = expr_rest; + } else { + errors.push_back( + scripting::script_error{.location = (current.empty() ? paren.location : current.front().location), .message = "Expected either a command or some type of expression"}); + return {std::nullopt, code}; + } + + if(current.empty()) { + errors.push_back( + scripting::script_error{.location = paren.location, .message = "No matching parenthesis"}); + return {std::nullopt, code}; + } + if(not holds_alternative(current.front().value) or get(current.front().value) != symbol_t::r_paren) { + errors.push_back( + scripting::script_error{.location = current.front().location, .message = "No matching parenthesis, expected a closing parenthesis"}); + return {std::nullopt, code}; + } + current = current.subspan(1); + + return {.result = std::move(paren), .rest = current}; +} + +parse_result try_statement(std::span code, std::vector& errors) { + scripting::ast::statement node; + auto current = code; + current = trim_newline(current); + if(auto [expr, rest] = try_conditional(current, errors); expr) { + node.contents = std::make_unique(std::move(expr.value())); + node.location = current.front().location; + current = rest; + } else if(auto [expr, rest] = try_command_expr(current, errors); expr) { + node.contents = std::make_unique(std::move(expr.value())); + node.location = current.front().location; + current = rest; + } else if(auto [expr, rest] = try_while_loop(current, errors); expr) { + node.contents = std::make_unique(std::move(expr.value())); + node.location = current.front().location; + current = rest; + } else return {std::nullopt, code}; + + current = trim_newline(current); + + return {.result = std::move(node), .rest = current}; +} + +parse_result try_unary_algebraic_expression(std::span code, std::vector& errors) { + constexpr std::array lexer_operators = {symbol_t::binary_not, symbol_t::logical_not}; + constexpr std::array ast_operators = {scripting::ast::operator_t::binary_not, scripting::ast::operator_t::logical_not}; + static_assert(lexer_operators.size() == ast_operators.size()); + + scripting::ast::unary_algebraic_expression node; + auto current = code; + + if(current.empty()) return {std::nullopt, code}; + if(not holds_alternative(current.front().value)) return {std::nullopt, code}; + auto res = std::ranges::find(lexer_operators, get(current.front().value)); + if(res == lexer_operators.end()) return {std::nullopt, code}; + node.location = current.front().location; + node.op = *(ast_operators.begin() + (res - lexer_operators.begin())); + current = current.subspan(1); + + + /// TODO: Gives the lowest priority to unaries (aka, they are applied last) + auto operand = try_expression(current, errors); + if (not operand.result) { + errors.push_back( + scripting::script_error{.location = code.front().location, .message = "Expected expression after unary operator"}); + return {std::nullopt, code}; + } + node.content = std::make_unique(std::move(operand.result.value())); + current = operand.rest; + + return {.result = std::move(node), .rest = current}; +} + +parse_result try_binary_algebraic_expression(std::span code, std::vector& errors) { + // The two following arrays are the operator mappings + + constexpr std::array lexer_operators = { + symbol_t::divide, + symbol_t::modulo, + symbol_t::multiply, + symbol_t::subtract, + symbol_t::add, + symbol_t::bitshift_left, + symbol_t::bitshift_right, + symbol_t::rotate_left, + symbol_t::rotate_right, + symbol_t::less_than, + symbol_t::greater_than, + symbol_t::less_or_equal_than, + symbol_t::greater_or_equal_than, + symbol_t::equals, + symbol_t::different, + symbol_t::binary_and, + symbol_t::binary_or, + symbol_t::binary_xor, + symbol_t::logical_and, + symbol_t::logical_or, + }; + constexpr std::array ast_operators = { + scripting::ast::operator_t::divide, + scripting::ast::operator_t::modulo, + scripting::ast::operator_t::multiply, + scripting::ast::operator_t::subtract, + scripting::ast::operator_t::add, + scripting::ast::operator_t::bitshift_left, + scripting::ast::operator_t::bitshift_right, + scripting::ast::operator_t::rotate_left, + scripting::ast::operator_t::rotate_right, + scripting::ast::operator_t::less_than, + scripting::ast::operator_t::greater_than, + scripting::ast::operator_t::less_or_equal_than, + scripting::ast::operator_t::greater_or_equal_than, + scripting::ast::operator_t::equals, + scripting::ast::operator_t::different, + scripting::ast::operator_t::binary_and, + scripting::ast::operator_t::binary_or, + scripting::ast::operator_t::binary_xor, + scripting::ast::operator_t::logical_and, + scripting::ast::operator_t::logical_or, + }; + constexpr std::array ast_precedences = { + 1, + 1, + 1, + 2, + 2, + 3, + 3, + 3, + 3, + 4, + 4, + 4, + 4, + 4, + 4, + 5, + 5, + 5, + 6, + 7, + }; + static_assert(lexer_operators.size() == ast_operators.size()); + static_assert(ast_precedences.size() == ast_operators.size()); + + scripting::ast::binary_algebraic_expression node; + auto current = code; + +#ifdef HANDLE_EXPRESSION + static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting"); +#endif +#define HANDLE_EXPRESSION(type) \ + node.lhs = std::make_unique(scripting::ast::expression{ \ + .location = current.front().location, \ + .contents = std::make_unique(std::move(expr.value())) \ + }); \ + current = rest; + + if(auto [expr, rest] = try_literal_string_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::literal_string_expression) + } else if(auto [expr, rest] = try_literal_int_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::literal_int_expression) + } else if(auto [expr, rest] = try_unary_algebraic_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::unary_algebraic_expression) + } else if(auto [expr, rest] = try_paren_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::paren_expression) + } else if(auto [expr, rest] = try_variable_expression(current, errors); expr) { + HANDLE_EXPRESSION(scripting::ast::variable_expression) + } else { + return {std::nullopt, code}; + } +#undef HANDLE_EXPRESSION + + if(current.empty()) { + return {std::nullopt, code}; + } + if(not holds_alternative(current.front().value)) { + return {std::nullopt, code}; + } + auto res = std::ranges::find(lexer_operators, get(current.front().value)); + if(res == lexer_operators.end()) { + return {std::nullopt, code}; + } + auto rhs_idx = (res - lexer_operators.begin()); + node.op = *(ast_operators.begin() + rhs_idx); + node.location = current.front().location; + current = current.subspan(1); + + auto operand = try_expression(current, errors); + if (not operand.result) { + errors.push_back( + scripting::script_error{.location = node.location, .message = "Expected expression after binary operator"}); + return {std::nullopt, code}; + } + /// Check if our "large (rhs) bite" has an operation precedence that is bigger and in that case, swap the operations around + //... We may need to do that iteratively until we risk a priority reversal. This is basically shifting the ast to the left until + //... the precedence are "heapified" and left associative + if(std::holds_alternative>(operand.result.value().contents)) { + // Must check for precedence reordering + auto& lhs = std::get>(operand.result.value().contents); + auto lhs_it = std::ranges::find(ast_operators, lhs->op); + auto lhs_idx = lhs_it - ast_operators.begin(); + // >= ensures left associativity + if(ast_precedences[rhs_idx] <= ast_precedences[lhs_idx]) { + // Precedence reordering required + // https://link.excalidraw.com/l/hxPegpAmTX/6d1BYX0rfKU + node.rhs = std::move(lhs->lhs); + scripting::ast::binary_algebraic_expression reordered{ + .location = operand.result.value().location, + .lhs = std::make_unique(scripting::ast::expression{ + .location = node.location, + .contents = std::make_unique(std::move(node)) + }), + .op = lhs->op, + .rhs = std::move(lhs->rhs), + }; + current = operand.rest; + return {.result = std::move(reordered), .rest = current}; + } + } + // No reordering required + node.rhs = std::make_unique(std::move(operand.result.value())); + current = operand.rest; + + return {.result = std::move(node), .rest = current}; +} + +parse_result try_variable_expression(std::span code, std::vector& errors) { + scripting::ast::variable_expression node; + auto current = code; + + if(current.empty()) return {std::nullopt, code}; + if(not holds_alternative(current.front().value)) return {std::nullopt, code}; + node.location = current.front().location; + node.name = get(current.front().value); + current = current.subspan(1); + + return {.result = std::move(node), .rest = current}; +} + +parse_result try_literal_string_expression(std::span code, std::vector& errors) { + scripting::ast::literal_string_expression node; + auto current = code; + + if(current.empty()) return {std::nullopt, code}; + if(not holds_alternative(current.front().value)) return {std::nullopt, code}; + node.location = current.front().location; + node.value = get(current.front().value); + current = current.subspan(1); + + return {.result = std::move(node), .rest = current}; +} + +parse_result try_literal_int_expression(std::span code, std::vector& errors) { + scripting::ast::literal_int_expression node; + auto current = code; + + if(current.empty()) return {std::nullopt, code}; + if(not holds_alternative(current.front().value)) return {std::nullopt, code}; + node.location = current.front().location; + node.value = get(current.front().value); + current = current.subspan(1); + + return {.result = std::move(node), .rest = current}; +} + +scripting::ast::block scripting::ast::parse(std::span code, std::vector& errors) { + scripting::ast::block node; + auto current = trim_newline(code); + + while(not current.empty()) { + auto pre_size = current.size(); + auto [expr, rest] = try_statement(current, errors); + if(expr) { + node.contents.push_back(std::move(expr.value())); + current = rest; + } else { + bool progress = false; + while(not (next_is_newline(current) or current.empty())) { + current = current.subspan(1); + progress = true; + } + if(not progress && not errors.empty()) { + return scripting::ast::block{}; + } + if(not current.empty() && current.size() == pre_size) { + errors.push_back(script_error{ + .location = current.front().location, + .message = "Parsing stuck in infinite loop with no progress" + }); + } + } + current = trim_newline(current); + } + if(not errors.empty()) { + return scripting::ast::block{}; + } + + return std::move(node); +} \ No newline at end of file diff --git a/tests/lexer_test.cpp b/tests/lexer_test.cpp new file mode 100644 index 0000000..6876759 --- /dev/null +++ b/tests/lexer_test.cpp @@ -0,0 +1,118 @@ +#include +#include "UserScript/parser.h" + +using token = scripting::ast::token; +using symbol_t = scripting::ast::symbol_t; +using identifier = scripting::ast::identifier; + +TEST_CASE("Lexer Test 01") { + std::string code = "/salad 12 13 \"hello\" ident\n"; + std::vector expected = { + token{.value = symbol_t::divide}, + token{.value = identifier{.value = "salad"}}, + token{.value = 12}, + token{.value = 13}, + token{.value = "hello"}, + token{.value = identifier{.value = "ident"}}, + token{.value = symbol_t::new_line} + }; + + std::vector errors; + auto lexed = scripting::ast::lex(code, errors); + + REQUIRE(errors.empty()); + + REQUIRE(lexed.size() == expected.size()); + for(size_t idx = 0; idx < lexed.size(); ++idx) { + REQUIRE(lexed[idx].value.index() == expected[idx].value.index()); + REQUIRE(lexed[idx].value == expected[idx].value); + } +} + +TEST_CASE("Lexer Test 01 (Doubled)") { + std::string code = "/salad 12 13 \"hello\" ident\n/salad 12 13 \"hello\" ident\n"; + std::vector expected = { + token{.value = symbol_t::divide}, + token{.value = identifier{.value = "salad"}}, + token{.value = 12}, + token{.value = 13}, + token{.value = "hello"}, + token{.value = identifier{.value = "ident"}}, + token{.value = symbol_t::new_line}, + token{.value = symbol_t::divide}, + token{.value = identifier{.value = "salad"}}, + token{.value = 12}, + token{.value = 13}, + token{.value = "hello"}, + token{.value = identifier{.value = "ident"}}, + token{.value = symbol_t::new_line} + }; + + std::vector errors; + auto lexed = scripting::ast::lex(code, errors); + + REQUIRE(errors.empty()); + + REQUIRE(lexed.size() == expected.size()); + for(size_t idx = 0; idx < lexed.size(); ++idx) { + REQUIRE(lexed[idx].value.index() == expected[idx].value.index()); + REQUIRE(lexed[idx].value == expected[idx].value); + } +} + +TEST_CASE("Lexer Test 02") { + std::string code = "/salad 12 13 \"hello\" ident\n" + "/salad 12 13 \"hello\" ident\n" + "if(/test)\n" + " /nice\n" + "endif"; + auto line1 = std::make_shared("/salad 12 13 \"hello\" ident"); + auto line2 = line1; + auto line3 = std::make_shared("if(/test)"); + auto line4 = std::make_shared(" /nice"); + auto line5 = std::make_shared("endif"); + using cl = scripting::code_location; + std::vector expected = { + token{.location = std::make_shared(cl{.line_contents = line1, .line_number = 1, .column_number = 1}), .value = symbol_t::divide}, + token{.location = std::make_shared(cl{.line_contents = line1, .line_number = 1, .column_number = 2}), .value = identifier{.value = "salad"}}, + token{.location = std::make_shared(cl{.line_contents = line1, .line_number = 1, .column_number = 8}), .value = 12}, + token{.location = std::make_shared(cl{.line_contents = line1, .line_number = 1, .column_number = 11}), .value = 13}, + token{.location = std::make_shared(cl{.line_contents = line1, .line_number = 1, .column_number = 14}), .value = "hello"}, + token{.location = std::make_shared(cl{.line_contents = line1, .line_number = 1, .column_number = 22}), .value = identifier{.value = "ident"}}, + token{.location = std::make_shared(cl{.line_contents = line1, .line_number = 1, .column_number = 27}), .value = symbol_t::new_line}, + token{.location = std::make_shared(cl{.line_contents = line2, .line_number = 2, .column_number = 1}), .value = symbol_t::divide}, + token{.location = std::make_shared(cl{.line_contents = line2, .line_number = 2, .column_number = 2}), .value = identifier{.value = "salad"}}, + token{.location = std::make_shared(cl{.line_contents = line2, .line_number = 2, .column_number = 8}), .value = 12}, + token{.location = std::make_shared(cl{.line_contents = line2, .line_number = 2, .column_number = 11}), .value = 13}, + token{.location = std::make_shared(cl{.line_contents = line2, .line_number = 2, .column_number = 14}), .value = "hello"}, + token{.location = std::make_shared(cl{.line_contents = line2, .line_number = 2, .column_number = 22}), .value = identifier{.value = "ident"}}, + token{.location = std::make_shared(cl{.line_contents = line2, .line_number = 2, .column_number = 27}), .value = symbol_t::new_line}, + token{.location = std::make_shared(cl{.line_contents = line3, .line_number = 3, .column_number = 1}), .value = identifier{.value = "if"}}, + token{.location = std::make_shared(cl{.line_contents = line3, .line_number = 3, .column_number = 3}), .value = symbol_t::l_paren}, + token{.location = std::make_shared(cl{.line_contents = line3, .line_number = 3, .column_number = 4}), .value = symbol_t::divide}, + token{.location = std::make_shared(cl{.line_contents = line3, .line_number = 3, .column_number = 5}), .value = identifier{.value = "test"}}, + token{.location = std::make_shared(cl{.line_contents = line3, .line_number = 3, .column_number = 9}), .value = symbol_t::r_paren}, + token{.location = std::make_shared(cl{.line_contents = line3, .line_number = 3, .column_number = 10}), .value = symbol_t::new_line}, + token{.location = std::make_shared(cl{.line_contents = line4, .line_number = 4, .column_number = 5}), .value = symbol_t::divide}, + token{.location = std::make_shared(cl{.line_contents = line4, .line_number = 4, .column_number = 6}), .value = identifier{.value = "nice"}}, + token{.location = std::make_shared(cl{.line_contents = line4, .line_number = 4, .column_number = 10}), .value = symbol_t::new_line}, + token{.location = std::make_shared(cl{.line_contents = line5, .line_number = 5, .column_number = 1}), .value = identifier{.value = "endif"}}, + }; + std::vector errors; + auto lexed = scripting::ast::lex(code, errors); + + REQUIRE(errors.empty()); + + REQUIRE(lexed.size() == expected.size()); + for(size_t idx = 0; idx < lexed.size(); ++idx) { + REQUIRE(lexed[idx].value.index() == expected[idx].value.index()); + REQUIRE(lexed[idx].value == expected[idx].value); + REQUIRE(lexed[idx].location); + if(expected[idx].location) { + REQUIRE(expected[idx].location->column_number == lexed[idx].location->column_number); + REQUIRE(expected[idx].location->line_number == lexed[idx].location->line_number); + REQUIRE((bool)lexed[idx].location->line_contents); + REQUIRE(*(expected[idx].location->line_contents) == *(lexed[idx].location->line_contents)); + } + } +} \ No newline at end of file diff --git a/tests/parser_test.cpp b/tests/parser_test.cpp new file mode 100644 index 0000000..96db09f --- /dev/null +++ b/tests/parser_test.cpp @@ -0,0 +1,168 @@ +#include "UserScript/parser.h" +#include +#include +#include +#include +#include +#include +#include + +TEST_CASE("Can parse") { + + std::string code = "/salad (/potato) 12 13 \"hello\" ident\n" + "/salad 12 13 \"hello\" ident\n" + "if(/test)\n" + " /nice\n" + "endif"; + std::vector errors; + auto lexed = scripting::ast::lex(code, errors); + auto parsed = scripting::ast::parse(lexed, errors); + + if(not errors.empty()) { + for(auto& line : errors) { + std::cout << line.message << "\n at line " << line.location->line_number << ":" << line.location->column_number << "\n"; + std::cout << " " << *line.location->line_contents << "\n"; + std::cout << " " << std::string(line.location->column_number - 1, ' ') << "^\n"; + } + } + + auto& block = parsed; + REQUIRE(block.contents.size() == 3); + REQUIRE(std::holds_alternative>(block.contents.front().contents)); + auto& cmd1 = std::get>(block.contents.front().contents); + REQUIRE(cmd1->name.value == "salad"); + REQUIRE(std::holds_alternative>(std::span(block.contents).subspan(1).front().contents)); + auto& cmd2 = std::get>(std::span(block.contents).subspan(1).front().contents); + REQUIRE(cmd2->name.value == "salad"); + REQUIRE(std::holds_alternative>(std::span(block.contents).subspan(2).front().contents)); + auto& conditional = std::get>(std::span(block.contents).subspan(2).front().contents); + REQUIRE(std::holds_alternative>(conditional->condition->contents)); + auto& paren = std::get>(conditional->condition->contents)->content; + REQUIRE(std::holds_alternative>(paren)); + auto& condition = std::get>(paren); + REQUIRE(condition->name.value == "test"); +} + +TEST_CASE("Can parse 2") { + + std::string code = "/salad (/potato) 12+13*16/(/potato)+myvar \"hello\" ident\n" + "/salad 12 13 \"hello\" ident\n" + "if !(/test)\n" + " /nice\n" + "endif"; + std::vector errors; + auto lexed = scripting::ast::lex(code, errors); + auto parsed = scripting::ast::parse(lexed, errors); + + if(not errors.empty()) { + for(auto& line : errors) { + std::cout << line.message << "\n at line " << line.location->line_number << ":" << line.location->column_number << "\n"; + std::cout << " " << *line.location->line_contents << "\n"; + std::cout << " " << std::string(line.location->column_number - 1, ' ') << "^\n"; + } + } + + auto& block = parsed; + REQUIRE(block.contents.size() == 3); + REQUIRE(std::holds_alternative>(block.contents.front().contents)); + auto& cmd1 = std::get>(block.contents.front().contents); + REQUIRE(cmd1->name.value == "salad"); + REQUIRE(cmd1->arguments.size() == 4); + REQUIRE(std::holds_alternative>(std::span(block.contents).subspan(1).front().contents)); + auto& cmd2 = std::get>(std::span(block.contents).subspan(1).front().contents); + REQUIRE(cmd2->name.value == "salad"); + REQUIRE(std::holds_alternative>(std::span(block.contents).subspan(2).front().contents)); + auto& conditional = std::get>(std::span(block.contents).subspan(2).front().contents); + REQUIRE(std::holds_alternative>(conditional->condition->contents)); +} + +template +constexpr auto runner = [](){ + std::vector sources = { + "../tests/scripts/001.script", + "../tests/scripts/002.script", + "../tests/scripts/003.script", + "../tests/scripts/004.script", + "../tests/scripts/005.script", + }; + + auto seed = seed_template == -1 ? std::random_device{}() : seed_template; + + std::cout << "TEST \"Try to crash the parser 1\" with seed " << seed << std::endl; + + std::mt19937_64 rand(seed); + + auto mod = [&](std::string tmp) -> std::string { + if(tmp.empty()) return tmp; + auto alter_idx = rand()%tmp.size(); + switch(rand()%3) { + case 0:{ + tmp.erase(alter_idx); + }break; + case 1:{ + tmp[alter_idx] = rand() % 256; + }break; + case 2:{ + tmp.insert(alter_idx, 1, char(rand() % 256)); + }break; + } + return tmp; + }; + + auto codes = sources | std::ranges::views::transform([](std::string file){ + std::ifstream file_str{file}; + std::stringstream read; + read << file_str.rdbuf(); + return read.str(); + }); + + std::vector vec; + std::copy(codes.begin(), codes.end(), std::back_inserter(vec)); + + size_t count = 0; + size_t error_cnt = 0; + size_t success_cnt = 0; + constexpr size_t max_count = 5000000; + + auto begin = std::chrono::high_resolution_clock::now(); + while(count < max_count) { + std::cout << 100.0*double(count)/max_count <<"%"<< std::endl; + for(auto& code : vec) { + std::vector errors; + auto lexed = scripting::ast::lex(code, errors); + auto parsed = scripting::ast::parse(lexed, errors); + if(errors.empty()) success_cnt++; + else error_cnt++; + count++; + } + + auto limit = std::min(vec.size(), 5000) ; + for(size_t idx = 0; idx < limit; ++idx) { + vec.push_back(mod(vec[idx])); + } + + std::transform(vec.begin(), vec.end(), vec.begin(), mod); + + std::shuffle(vec.begin(), vec.end(), rand); + if(vec.size()>30000) vec.resize(30000); + } + auto end = std::chrono::high_resolution_clock::now(); + + std::cout + << "Successes: " << success_cnt << "\n" + << "Failures: " << error_cnt << "\n" + << "Ratio: " << double(success_cnt)/double(success_cnt+error_cnt) << "\n" + << "Total time: " << std::chrono::duration_cast(end-begin).count() << "µs\n" + << "Time per iteration: " << (std::chrono::duration_cast(end-begin)/(error_cnt+success_cnt)).count() << "ns\n"; +}; + +TEST_CASE("Try to crash the parser (known seeds)") { + runner<1547293717>(); + runner<1759257947>(); + runner<2909912711>(); + runner<1236548620>(); +} + +TEST_CASE("Try to crash the parser (new seeds)") { + runner<>(); +} \ No newline at end of file diff --git a/tests/scripts/001.results b/tests/scripts/001.results new file mode 100644 index 0000000..4df45b3 --- /dev/null +++ b/tests/scripts/001.results @@ -0,0 +1,13 @@ +8 +12 +12 +14 +14 +10 +2 +2 +0 +7 +0 +3 +true diff --git a/tests/scripts/001.script b/tests/scripts/001.script new file mode 100644 index 0000000..5854c68 --- /dev/null +++ b/tests/scripts/001.script @@ -0,0 +1,18 @@ +/print 2+6 "\n" +/print 2*6 "\n" +/print 6*2 "\n" +/print 2+6*2 "\n" +/print 6*2+2 "\n" +/print 6*2-2 "\n" +/print 6-2-2 "\n" +/print 6-2*2 "\n" +/print 6&1 "\n" +/print 6|1 "\n" +/print 21%7 "\n" +/print 21/7 "\n" +if(2*3 == 6) + /print "true" +else + /print "false" +endif +/print "\n" \ No newline at end of file diff --git a/tests/scripts/002.results b/tests/scripts/002.results new file mode 100644 index 0000000..d00491f --- /dev/null +++ b/tests/scripts/002.results @@ -0,0 +1 @@ +1 diff --git a/tests/scripts/002.script b/tests/scripts/002.script new file mode 100644 index 0000000..24fa80d --- /dev/null +++ b/tests/scripts/002.script @@ -0,0 +1,17 @@ +if(counter == (/null)) + /set counter 0 +endif +/bigDoNothing 17 12 36*78 +if(counter % 2 == 1) + /bigDoNothing 17 12 36*78 + /set counter counter+1 +else + /bigDoNothing 17 12 36*78 + /set counter counter+1 +endif + +if(counter == 5000) + /set exit_ctr 1 +endif + +/print counter "\n" \ No newline at end of file diff --git a/tests/scripts/003.results b/tests/scripts/003.results new file mode 100644 index 0000000..8b1acc1 --- /dev/null +++ b/tests/scripts/003.results @@ -0,0 +1,10 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/scripts/003.script b/tests/scripts/003.script new file mode 100644 index 0000000..e8f0f39 --- /dev/null +++ b/tests/scripts/003.script @@ -0,0 +1,5 @@ +/set counter 0 +while(counter < 10) + /print counter "\n" + /set counter (counter+1) +endwhile diff --git a/tests/scripts/004.results b/tests/scripts/004.results new file mode 100644 index 0000000..739278b --- /dev/null +++ b/tests/scripts/004.results @@ -0,0 +1,4 @@ +Unexpected statement in block + at line 5:1 + endif + ^ \ No newline at end of file diff --git a/tests/scripts/004.script b/tests/scripts/004.script new file mode 100644 index 0000000..985d147 --- /dev/null +++ b/tests/scripts/004.script @@ -0,0 +1,5 @@ +/set counter 0 +while(counter < 10) + /print counter "\n" + /set counter (counter+1) +endif diff --git a/tests/scripts/005.results b/tests/scripts/005.results new file mode 100644 index 0000000..9b6b140 --- /dev/null +++ b/tests/scripts/005.results @@ -0,0 +1,4 @@ +Unexpected expression content + at line 5:1 + endwhile + ^ \ No newline at end of file diff --git a/tests/scripts/005.script b/tests/scripts/005.script new file mode 100644 index 0000000..2a27649 --- /dev/null +++ b/tests/scripts/005.script @@ -0,0 +1,5 @@ +/set counter 0 +if(counter < 10) + /print counter "\n" + /set counter (counter+1) +endwhile diff --git a/tests/scripts/testfile.test b/tests/scripts/testfile.test new file mode 100644 index 0000000..dd13b02 --- /dev/null +++ b/tests/scripts/testfile.test @@ -0,0 +1,16 @@ +if(counter == (/null)) + /set counter 0 + /print "Init...\n" +endif +/bigDoNothing 17 12 36*78 +if(counter % 2 == 1) + /bigDoNothing 17 12 36*78 + /set counter counter+1 +else + /bigDoNothing 17 12 36*78 + /set counter counter+1 +endif + +if(counter == 5000) + /set exit_ctr 1 +endif \ No newline at end of file