Browse Source

Clean up the repo and split large multipurpose files into more single purpose files

crazy_things
Ludovic 'Archivist' Lagouardette 1 year ago
parent
commit
106abebc27
8 changed files with 773 additions and 695 deletions
  1. +2
    -2
      CMakeLists.txt
  2. +155
    -0
      priv_include/UserScript/interpreter.h
  3. +0
    -0
      priv_include/UserScript/parser.h
  4. +8
    -9
      script_exe/main.cpp
  5. +219
    -0
      src/generator.cpp
  6. +2
    -312
      src/interpreter.cpp
  7. +381
    -0
      src/lex.cpp
  8. +6
    -372
      src/parse.cpp

+ 2
- 2
CMakeLists.txt View File

@ -21,12 +21,12 @@ include(Catch)
add_library(UserScript STATIC
src/interpreter.cpp
src/lex_parse.cpp)
src/lex.cpp src/parse.cpp priv_ipan>ncan>lur">despan>/Urr">serScript/interpreter.h src/generator.cpp)
target_include_directories(UserScript PUBLIC include)
include_directories(priv_include)
add_executable(ushell script_exe/main.cpp)
target_link_libraries(ushell PUBLIC UserScript)
include_directories(include)
add_executable(userscript_tests tests/lexer_test.cpp tests/parser_test.cpp)
target_link_libraries(userscript_tests PUBLIC UserScript Catch2::Catch2WithMain)

+ 155
- 0
priv_include/UserScript/interpreter.h View File

@ -0,0 +1,155 @@
#pragma once
#include <stack>
#include <map>
#include "UserScript.h"
#include "UserScript/parser.h"
namespace scripting {
class ByteCodeInterpreter final : public UserScript {
std::map<std::string, script_value> variables;
std::map<std::string, function> functions;
std::vector<argument> execution_stack;
public:
struct function_tag {
std::string name;
size_t arity;
std::shared_ptr<const code_location> location;
};
struct variable_tag {
std::string name;
std::shared_ptr<const code_location> location;
};
enum class operator_t : uint8_t {
logical_not,
binary_not,
unary_plus,
unary_minus,
divide,
modulo,
multiply,
subtract,
add,
bitshift_left,
bitshift_right,
rotate_left,
rotate_right,
less_than,
greater_than,
less_or_equal_than,
greater_or_equal_than,
equals,
different,
binary_and,
binary_or,
binary_xor,
logical_and,
logical_or,
INTERNAL_jump,
INTERNAL_jump_if,
INTERNAL_stack_cls,
};
struct operand {
std::variant<script_value, function_tag, variable_tag, operator_t> element;
std::shared_ptr<const code_location> location;
};
std::optional<std::reference_wrapper<script_value>> getValue(const std::string &name) {
if (auto var = variables.find(name); var != variables.end()) {
return var->second;
} else {
return std::nullopt;
}
}
bool setValue(const std::string &name, script_value value) {
if (auto var = variables.find(name); var != variables.end()) {
var->second = value;
return true;
} else {
variables.emplace(std::make_pair(name, value));
return false;
}
}
std::vector<operand> bytecode;
size_t instruction_ptr;
script_value resolve(const std::string &name) final {
auto it = variables.find(name);
if (it == variables.end()) {
return script_value{};
}
return (*it).second;
}
script_value resolve_and_pop() {
if (execution_stack.empty()) return script_value{};
auto value = std::move(execution_stack.back());
auto resolved = std::visit([&](auto v) -> script_value {
if constexpr (std::is_same_v<script_variable, decltype(v)>) {
auto it = variables.find(v.name);
if (it == variables.end()) {
return script_value{};
}
return (*it).second;
} else {
return v;
}
}, value);
execution_stack.pop_back();
return resolved;
}
void big_f_ing_switch(operand &op, std::optional<script_error> &error);
std::vector<ByteCodeInterpreter::operand>
generate(std::vector<script_error> &errors, ast::block &tree, bool loop = true);
void registerFunction(std::string name, function fn) final {
functions.insert_or_assign(name, std::move(fn));
}
std::variant<script_value, std::vector<script_error>> executeAtOnce(std::string code) final {
std::vector<script_error> errors;
auto lexed = ast::lex(code, errors);
auto parsed = ast::parse(lexed, errors);
if (not errors.empty()) return errors;
bytecode = generate(errors, parsed, false);
if (not errors.empty()) return errors;
std::optional<script_error> maybe_error;
instruction_ptr = 0;
while (instruction_ptr < bytecode.size()) {
step(maybe_error);
if (maybe_error) return std::vector<script_error>({maybe_error.value()});
}
auto v = resolve_and_pop();
execution_stack.clear();
return v;
}
std::vector<script_error> prepare(std::string code) final {
std::vector<script_error> errors;
auto lexed = ast::lex(code, errors);
auto parsed = ast::parse(lexed, errors);
if (errors.empty()) {
bytecode = generate(errors, parsed, true);
}
return errors;
}
std::optional<script_error> stepOnce() final {
std::optional<script_error> error;
while (not step(error));
return error;
}
bool step(std::optional<script_error> &error);
~ByteCodeInterpreter() final {}
};
}

include/UserScript/parser.h → priv_include/UserScript/parser.h View File


+ 8
- 9
script_exe/main.cpp View File

@ -127,7 +127,7 @@ void process_bench(std::string target = "./tests/scripts/testfile.test") {
decltype(std::chrono::high_resolution_clock::now()-std::chrono::high_resolution_clock::now()) per_exec{}, per_step{}, per_op{};
for(int runs = 0; runs < 20; runs++) {
for(int runs = 0; runs < 5000; runs++) {
auto res = engine->prepare(code.str());
@ -137,14 +137,13 @@ void process_bench(std::string target = "./tests/scripts/testfile.test") {
steps++;
}
auto end = std::chrono::high_resolution_clock::now();
per_exec += (end - begin) / 5000;
per_step += (end - begin) / steps;
per_op += (end - begin) / (5000 * 53);
per_exec += (end - begin);
per_step += (end - begin);
per_op += (end - begin);
}
per_exec /= 20;
per_step /= 20;
per_op /= 20;
per_exec /= 5000;
per_step /= steps;
per_op = per_op / 5000 / 53;
std::cout << "time per exec = " << std::chrono::duration_cast<std::chrono::nanoseconds>(per_exec).count() << "ns\n";
std::cout << "time per step = " << std::chrono::duration_cast<std::chrono::nanoseconds>(per_step).count() << "ns\n";
@ -325,7 +324,7 @@ int cpp_main(std::span args) {
} else if(args.front() == "bench_compile") {
args = args.subspan(1);
if(args.size() > 1) {
std::cerr << "bench_exec expects 0 or 1 file as arguments" << std::endl;
std::cerr << "bench_compile expects 0 or 1 file as arguments" << std::endl;
std::terminate();
}
if(args.empty()) compile_bench();

+ 219
- 0
src/generator.cpp View File

@ -0,0 +1,219 @@
#include "UserScript/interpreter.h"
namespace scripting {
// Replace with constexpr vector & find ?
static const std::map<ast::operator_t, ByteCodeInterpreter::operator_t> mappings = {
{ast::operator_t::logical_not, ByteCodeInterpreter::operator_t::logical_not},
{ast::operator_t::binary_not, ByteCodeInterpreter::operator_t::binary_not},
{ast::operator_t::divide, ByteCodeInterpreter::operator_t::divide},
{ast::operator_t::modulo, ByteCodeInterpreter::operator_t::modulo},
{ast::operator_t::multiply, ByteCodeInterpreter::operator_t::multiply},
{ast::operator_t::subtract, ByteCodeInterpreter::operator_t::subtract},
{ast::operator_t::add, ByteCodeInterpreter::operator_t::add},
{ast::operator_t::bitshift_left, ByteCodeInterpreter::operator_t::bitshift_left},
{ast::operator_t::bitshift_right, ByteCodeInterpreter::operator_t::bitshift_right},
{ast::operator_t::rotate_left, ByteCodeInterpreter::operator_t::rotate_left},
{ast::operator_t::rotate_right, ByteCodeInterpreter::operator_t::rotate_right},
{ast::operator_t::less_than, ByteCodeInterpreter::operator_t::less_than},
{ast::operator_t::greater_than, ByteCodeInterpreter::operator_t::greater_than},
{ast::operator_t::less_or_equal_than, ByteCodeInterpreter::operator_t::less_or_equal_than},
{ast::operator_t::greater_or_equal_than, ByteCodeInterpreter::operator_t::greater_or_equal_than},
{ast::operator_t::equals, ByteCodeInterpreter::operator_t::equals},
{ast::operator_t::different, ByteCodeInterpreter::operator_t::different},
{ast::operator_t::binary_and, ByteCodeInterpreter::operator_t::binary_and},
{ast::operator_t::binary_or, ByteCodeInterpreter::operator_t::binary_or},
{ast::operator_t::binary_xor, ByteCodeInterpreter::operator_t::binary_xor},
{ast::operator_t::logical_and, ByteCodeInterpreter::operator_t::logical_and},
{ast::operator_t::logical_or, ByteCodeInterpreter::operator_t::logical_or},
};
/// GENERATION HANDLERS DECLARATIONS
template<typename T>
void handle(std::vector<ByteCodeInterpreter::operand> &, std::vector<script_error> &, T &);
template<>
void handle<ast::block>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::block &block);
template<>
void
handle<ast::command_expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::command_expression &cmd);
template<>
void handle<ast::binary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors,
ast::binary_algebraic_expression &cmd);
template<>
void handle<ast::unary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors,
ast::unary_algebraic_expression &cmd);
template<>
void
handle<ast::paren_expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::paren_expression &cmd);
template<>
void handle<ast::conditional>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::conditional &cmd);
template<>
void handle<ast::while_loop>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::while_loop &cmd);
template<>
void handle<ast::expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::expression &cmd);
template<>
void
handle<ast::variable_expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::variable_expression &cmd);
template<>
void handle<ast::literal_int_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors, ast::literal_int_expression &cmd);
template<>
void handle<ast::literal_string_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors, ast::literal_string_expression &cmd);
/// GENERATION HANDLERS DEFINITIONS
template<>
void handle<ast::block>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::block &block) {
for (auto &elem: block.contents) {
std::visit([&](auto &v) { handle(ctx, errors, *v); }, elem.contents);
ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_stack_cls});
}
}
template<>
void
handle<ast::command_expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::command_expression &cmd) {
for (auto it = cmd.arguments.rbegin(); it != cmd.arguments.rend(); ++it) {
std::visit([&](auto &v) { handle(ctx, errors, *v); }, (*it)->contents);
}
ctx.push_back(
ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::function_tag{.name = cmd.name.value, .arity = cmd.arguments.size()}, .location = cmd.location});
}
template<>
void
handle<ast::paren_expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::paren_expression &expr) {
std::visit([&](auto &v) { handle(ctx, errors, *v); }, expr.content);
}
template<>
void handle<ast::expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::expression &expr) {
std::visit([&](auto &v) { handle(ctx, errors, *v); }, expr.contents);
}
template<>
void handle<ast::binary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors,
ast::binary_algebraic_expression &expr) {
handle(ctx, errors, *expr.lhs);
handle(ctx, errors, *expr.rhs);
ctx.push_back(ByteCodeInterpreter::operand{.element = mappings.at(expr.op), .location = expr.location});
}
template<>
void handle<ast::unary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors,
ast::unary_algebraic_expression &expr) {
handle(ctx, errors, *expr.content);
ctx.push_back(ByteCodeInterpreter::operand{.element = mappings.at(expr.op), .location = expr.location});
}
template<>
void
handle<ast::variable_expression>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::variable_expression &expr) {
ctx.push_back(ByteCodeInterpreter::operand{
ByteCodeInterpreter::variable_tag{.name = expr.name.value, .location = expr.location}});
}
template<>
void handle<ast::literal_int_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors, ast::literal_int_expression &expr) {
ctx.push_back(ByteCodeInterpreter::operand{script_value{expr.value}});
}
template<>
void handle<ast::literal_string_expression>(std::vector<ByteCodeInterpreter::operand> &ctx,
std::vector<script_error> &errors,
ast::literal_string_expression &expr) {
ctx.push_back(ByteCodeInterpreter::operand{script_value{expr.value}});
}
template<>
void handle<ast::conditional>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::conditional &cond) {
/// some basic documentation (from before the reference stability bug but things are the same):
/// https://app.excalidraw.com/s/hxPegpAmTX/2c8KKzinqeg
std::visit([&](auto &v) { handle(ctx, errors, *v); }, cond.condition->contents);
ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location});
/// As you can see, being smart is dumb, be a fucking monkey that comes from the 70s and use 70s technology:tm: to your advantage
/// More seriously, WTF (?) we do this because we used to have a bug with unreliable references to these locations, which makes sense since we
/// don't have reference stability
auto else_side_idx = ctx.size() - 1;
ctx.push_back(
ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump_if, .location = cond.location});
handle(ctx, errors, *cond.on_condition);
if (cond.otherwise) {
ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location});
auto end_side_idx = ctx.size() - 1;
ctx.push_back(
ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = cond.location});
ctx[else_side_idx].element = static_cast<int32_t>(ctx.size()) - 1;
ctx[else_side_idx].location = cond.location;
handle(ctx, errors, *cond.otherwise);
ctx[end_side_idx].element = static_cast<int32_t>(ctx.size()) - 1;
ctx[end_side_idx].location = cond.location;
} else {
ctx[else_side_idx].element = static_cast<int32_t>(ctx.size()) - 1;
ctx[else_side_idx].location = cond.location;
}
}
template<>
void handle<ast::while_loop>(std::vector<ByteCodeInterpreter::operand> &ctx, std::vector<script_error> &errors,
ast::while_loop &cond) {
auto beforewhile_side_idx = static_cast<int32_t>(ctx.size()) - 1;
std::visit([&](auto &v) { handle(ctx, errors, *v); }, cond.condition->contents);
ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location});
auto endwhile_side_idx = ctx.size() - 1;
ctx.push_back(
ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump_if, .location = cond.location});
handle(ctx, errors, *cond.on_condition);
ctx.push_back(
ByteCodeInterpreter::operand{.element = script_value{beforewhile_side_idx}, .location = cond.location});
ctx.push_back(
ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = cond.location});
ctx[endwhile_side_idx].element = static_cast<int32_t>(ctx.size()) - 1;
ctx[endwhile_side_idx].location = cond.location;
}
std::vector<ByteCodeInterpreter::operand>
ByteCodeInterpreter::generate(std::vector<script_error> &errors, ast::block &tree, bool loop) {
std::vector<operand> code;
handle(code, errors, tree);
if (loop) {
// Here we have to deal with the quirks of jumping before the increments happens again
code.push_back(ByteCodeInterpreter::operand{.element = script_value{-1}, .location = tree.location});
code.push_back(
ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = tree.location});
}
return code;
}
}

+ 2
- 312
src/interpreter.cpp View File

@ -1,7 +1,4 @@
#include <stack>
#include <map>
#include "UserScript.h"
#include "UserScript/parser.h"
#include "UserScript/interpreter.h"
namespace scripting {
void to_null(script_value& value, auto on_value, auto on_error) {
@ -33,151 +30,6 @@ namespace scripting {
}
}
class ByteCodeInterpreter final : public UserScript {
std::map<std::string, script_value> variables;
std::map<std::string, function> functions;
std::vector<argument> execution_stack;
public:
struct function_tag {
std::string name;
size_t arity;
std::shared_ptr<const code_location> location;
};
struct variable_tag {
std::string name;
std::shared_ptr<const code_location> location;
};
enum class operator_t : uint8_t {
logical_not,
binary_not,
unary_plus,
unary_minus,
divide,
modulo,
multiply,
subtract,
add,
bitshift_left,
bitshift_right,
rotate_left,
rotate_right,
less_than,
greater_than,
less_or_equal_than,
greater_or_equal_than,
equals,
different,
binary_and,
binary_or,
binary_xor,
logical_and,
logical_or,
INTERNAL_jump,
INTERNAL_jump_if,
INTERNAL_stack_cls,
};
struct operand {
std::variant<script_value, function_tag, variable_tag, operator_t> element;
std::shared_ptr<const code_location> location;
};
std::optional<std::reference_wrapper<script_value>> getValue(const std::string& name) {
if(auto var = variables.find(name); var != variables.end()) {
return var->second;
} else {
return std::nullopt;
}
}
bool setValue(const std::string& name, script_value value) {
if(auto var = variables.find(name); var != variables.end()) {
var->second = value;
return true;
} else {
variables.emplace(std::make_pair(name, value));
return false;
}
}
std::vector<operand> bytecode;
size_t instruction_ptr;
script_value resolve(const std::string& name) final {
auto it = variables.find(name);
if(it == variables.end()) {
return script_value{};
}
return (*it).second;
}
script_value resolve_and_pop() {
if(execution_stack.empty()) return script_value{};
auto value = std::move(execution_stack.back());
auto resolved = std::visit([&](auto v) -> script_value {
if constexpr (std::is_same_v<script_variable, decltype(v)>) {
auto it = variables.find(v.name);
if(it == variables.end()) {
return script_value{};
}
return (*it).second;
} else {
return v;
}
}, value);
execution_stack.pop_back();
return resolved;
}
void big_f_ing_switch(operand& op, std::optional<script_error>& error);
std::vector<ByteCodeInterpreter::operand> generate(std::vector<script_error>& errors, ast::block &tree, bool loop = true);
void registerFunction(std::string name, function fn) final {
functions.insert_or_assign(name, std::move(fn));
}
std::variant<script_value, std::vector<script_error>> executeAtOnce(std::string code) final {
std::vector<script_error> errors;
auto lexed = ast::lex(code, errors);
auto parsed = ast::parse(lexed, errors);
if(not errors.empty()) return errors;
bytecode = generate(errors, parsed, false);
if(not errors.empty()) return errors;
std::optional<script_error> maybe_error;
instruction_ptr = 0;
while(instruction_ptr < bytecode.size()) {
step(maybe_error);
if(maybe_error) return std::vector<script_error>({maybe_error.value()});
}
auto v = resolve_and_pop();
execution_stack.clear();
return v;
}
std::vector<script_error> prepare(std::string code) final {
std::vector<script_error> errors;
auto lexed = ast::lex(code, errors);
auto parsed = ast::parse(lexed, errors);
if(errors.empty()) {
bytecode = generate(errors, parsed, true);
}
return errors;
}
std::optional<script_error> stepOnce() final {
std::optional<script_error> error;
while(not step(error));
return error;
}
bool step(std::optional<script_error>& error);
~ByteCodeInterpreter() final {}
};
namespace wizardry {
// taken from cppreference: https://en.cppreference.com/w/cpp/utility/variant/visit
template<class... Ts>
@ -236,168 +88,6 @@ namespace scripting {
return instruction_ptr >= bytecode.size() || error || ret;
}
// Replace with constexpr vector & find ?
static const std::map<ast::operator_t, ByteCodeInterpreter::operator_t> mappings = {
{ast::operator_t::logical_not, ByteCodeInterpreter::operator_t::logical_not},
{ast::operator_t::binary_not, ByteCodeInterpreter::operator_t::binary_not},
{ast::operator_t::divide, ByteCodeInterpreter::operator_t::divide},
{ast::operator_t::modulo, ByteCodeInterpreter::operator_t::modulo},
{ast::operator_t::multiply, ByteCodeInterpreter::operator_t::multiply},
{ast::operator_t::subtract, ByteCodeInterpreter::operator_t::subtract},
{ast::operator_t::add, ByteCodeInterpreter::operator_t::add},
{ast::operator_t::bitshift_left, ByteCodeInterpreter::operator_t::bitshift_left},
{ast::operator_t::bitshift_right, ByteCodeInterpreter::operator_t::bitshift_right},
{ast::operator_t::rotate_left, ByteCodeInterpreter::operator_t::rotate_left},
{ast::operator_t::rotate_right, ByteCodeInterpreter::operator_t::rotate_right},
{ast::operator_t::less_than, ByteCodeInterpreter::operator_t::less_than},
{ast::operator_t::greater_than, ByteCodeInterpreter::operator_t::greater_than},
{ast::operator_t::less_or_equal_than, ByteCodeInterpreter::operator_t::less_or_equal_than},
{ast::operator_t::greater_or_equal_than, ByteCodeInterpreter::operator_t::greater_or_equal_than},
{ast::operator_t::equals, ByteCodeInterpreter::operator_t::equals},
{ast::operator_t::different, ByteCodeInterpreter::operator_t::different},
{ast::operator_t::binary_and, ByteCodeInterpreter::operator_t::binary_and},
{ast::operator_t::binary_or, ByteCodeInterpreter::operator_t::binary_or},
{ast::operator_t::binary_xor, ByteCodeInterpreter::operator_t::binary_xor},
{ast::operator_t::logical_and, ByteCodeInterpreter::operator_t::logical_and},
{ast::operator_t::logical_or, ByteCodeInterpreter::operator_t::logical_or},
};
/// GENERATION HANDLERS DECLARATIONS
template<typename T>
void handle(std::vector<ByteCodeInterpreter::operand>&, std::vector<script_error>&, T&);
template<>
void handle<ast::block>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::block& block);
template<>
void handle<ast::command_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::command_expression& cmd);
template<>
void handle<ast::binary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::binary_algebraic_expression& cmd);
template<>
void handle<ast::unary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::unary_algebraic_expression& cmd);
template<>
void handle<ast::paren_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::paren_expression& cmd);
template<>
void handle<ast::conditional>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::conditional& cmd);
template<>
void handle<ast::while_loop>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::while_loop& cmd);
template<>
void handle<ast::expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::expression& cmd);
template<>
void handle<ast::variable_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::variable_expression& cmd);
template<>
void handle<ast::literal_int_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::literal_int_expression& cmd);
template<>
void handle<ast::literal_string_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::literal_string_expression& cmd);
/// GENERATION HANDLERS DEFINITIONS
template<>
void handle<ast::block>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::block& block) {
for(auto& elem : block.contents) {
std::visit([&](auto& v) {handle(ctx, errors, *v);}, elem.contents);
ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_stack_cls});
}
}
template<>
void handle<ast::command_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::command_expression& cmd) {
for(auto it = cmd.arguments.rbegin(); it != cmd.arguments.rend(); ++it) {
std::visit([&](auto& v) {handle(ctx, errors, *v);}, (*it)->contents);
}
ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::function_tag{.name = cmd.name.value, .arity = cmd.arguments.size()}, .location = cmd.location});
}
template<>
void handle<ast::paren_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::paren_expression& expr) {
std::visit([&](auto& v) {handle(ctx, errors, *v);}, expr.content);
}
template<>
void handle<ast::expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::expression& expr) {
std::visit([&](auto& v) {handle(ctx, errors, *v);}, expr.contents);
}
template<>
void handle<ast::binary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::binary_algebraic_expression& expr) {
handle(ctx, errors, *expr.lhs);
handle(ctx, errors, *expr.rhs);
ctx.push_back(ByteCodeInterpreter::operand{.element = mappings.at(expr.op), .location = expr.location});
}
template<>
void handle<ast::unary_algebraic_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::unary_algebraic_expression& expr) {
handle(ctx, errors, *expr.content);
ctx.push_back(ByteCodeInterpreter::operand{.element = mappings.at(expr.op), .location = expr.location});
}
template<>
void handle<ast::variable_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::variable_expression& expr) {
ctx.push_back(ByteCodeInterpreter::operand{ByteCodeInterpreter::variable_tag{.name = expr.name.value, .location = expr.location}});
}
template<>
void handle<ast::literal_int_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::literal_int_expression& expr) {
ctx.push_back(ByteCodeInterpreter::operand{script_value{expr.value}});
}
template<>
void handle<ast::literal_string_expression>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::literal_string_expression& expr) {
ctx.push_back(ByteCodeInterpreter::operand{script_value{expr.value}});
}
template<>
void handle<ast::conditional>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::conditional& cond) {
/// some basic documentation (from before the reference stability bug but things are the same):
/// https://app.excalidraw.com/s/hxPegpAmTX/2c8KKzinqeg
std::visit([&](auto& v) {handle(ctx, errors, *v);}, cond.condition->contents);
ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location});
/// As you can see, being smart is dumb, be a fucking monkey that comes from the 70s and use 70s technology:tm: to your advantage
/// More seriously, WTF (?) we do this because we used to have a bug with unreliable references to these locations, which makes sense since we
/// don't have reference stability
auto else_side_idx = ctx.size()-1;
ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump_if, .location = cond.location});
handle(ctx, errors, *cond.on_condition);
if(cond.otherwise) {
ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location});
auto end_side_idx = ctx.size()-1;
ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = cond.location});
ctx[else_side_idx].element = static_cast<int32_t>(ctx.size())-1;
ctx[else_side_idx].location = cond.location;
handle(ctx, errors, *cond.otherwise);
ctx[end_side_idx].element = static_cast<int32_t>(ctx.size())-1;
ctx[end_side_idx].location = cond.location;
} else {
ctx[else_side_idx].element = static_cast<int32_t>(ctx.size())-1;
ctx[else_side_idx].location = cond.location;
}
}
template<>
void handle<ast::while_loop>(std::vector<ByteCodeInterpreter::operand>& ctx, std::vector<script_error>& errors, ast::while_loop& cond) {
auto beforewhile_side_idx = static_cast<int32_t>(ctx.size())-1;
std::visit([&](auto& v) {handle(ctx, errors, *v);}, cond.condition->contents);
ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{}, .location = cond.location});
auto endwhile_side_idx = ctx.size()-1;
ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump_if, .location = cond.location});
handle(ctx, errors, *cond.on_condition);
ctx.push_back(ByteCodeInterpreter::operand{.element = script_value{beforewhile_side_idx}, .location = cond.location});
ctx.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = cond.location});
ctx[endwhile_side_idx].element = static_cast<int32_t>(ctx.size())-1;
ctx[endwhile_side_idx].location = cond.location;
}
std::vector<ByteCodeInterpreter::operand> ByteCodeInterpreter::generate(std::vector<script_error>& errors, ast::block &tree, bool loop) {
std::vector<operand> code;
handle(code, errors, tree);
if(loop) {
// Here we have to deal with the quirks of jumping before the increments happens again
code.push_back(ByteCodeInterpreter::operand{.element = script_value{-1}, .location = tree.location});
code.push_back(ByteCodeInterpreter::operand{.element = ByteCodeInterpreter::operator_t::INTERNAL_jump, .location = tree.location});
}
return code;
}
std::unique_ptr<UserScript> prepare_interpreter(const std::string& code) {
auto script = std::make_unique<ByteCodeInterpreter>();
script->prepare(code);
@ -1162,7 +852,7 @@ namespace scripting {
[&](auto &instruction_target) {
error = script_error{
op.location,
"JumpIf to invalid location "// + std::to_string(holds_alternative<scripting::null>(instruction_target))
"JumpIf to invalid location "
};
}
);

+ 381
- 0
src/lex.cpp View File

@ -0,0 +1,381 @@
#include <map>
#include <optional>
#include <sstream>
#include <iostream>
#include <array>
#include <charconv>
#include <utility>
#include <algorithm>
#include <limits>
#include "UserScript/parser.h"
#include "UserScript.h"
/////////////////
/// CONSTANTS ///
/////////////////
using symbol_t = scripting::ast::symbol_t;
constexpr std::array<std::pair<std::string_view, symbol_t>, 25> operators {
std::pair<std::string_view, symbol_t>{"(", symbol_t::l_paren},
std::pair<std::string_view, symbol_t>{")", symbol_t::r_paren},
std::pair<std::string_view, symbol_t>{"!=", symbol_t::different},
std::pair<std::string_view, symbol_t>{"!", symbol_t::logical_not},
std::pair<std::string_view, symbol_t>{"~", symbol_t::binary_not},
std::pair<std::string_view, symbol_t>{"/", symbol_t::divide},
std::pair<std::string_view, symbol_t>{"%", symbol_t::modulo},
std::pair<std::string_view, symbol_t>{"*", symbol_t::multiply},
std::pair<std::string_view, symbol_t>{"-", symbol_t::subtract},
std::pair<std::string_view, symbol_t>{"+", symbol_t::add},
std::pair<std::string_view, symbol_t>{"<<<", symbol_t::rotate_left},
std::pair<std::string_view, symbol_t>{">>>", symbol_t::rotate_right},
std::pair<std::string_view, symbol_t>{"<<", symbol_t::bitshift_left},
std::pair<std::string_view, symbol_t>{">>", symbol_t::bitshift_right},
std::pair<std::string_view, symbol_t>{"<=", symbol_t::less_or_equal_than},
std::pair<std::string_view, symbol_t>{">=", symbol_t::greater_or_equal_than},
std::pair<std::string_view, symbol_t>{"<", symbol_t::less_than},
std::pair<std::string_view, symbol_t>{">", symbol_t::greater_than},
std::pair<std::string_view, symbol_t>{"==", symbol_t::equals},
std::pair<std::string_view, symbol_t>{"&&", symbol_t::logical_and},
std::pair<std::string_view, symbol_t>{"&", symbol_t::binary_and},
std::pair<std::string_view, symbol_t>{"||", symbol_t::logical_or},
std::pair<std::string_view, symbol_t>{"|", symbol_t::binary_or},
std::pair<std::string_view, symbol_t>{"^", symbol_t::binary_xor},
std::pair<std::string_view, symbol_t>{"\n", symbol_t::new_line}
};
const std::vector<std::string_view> reserved_character_sequences {
"(",
")",
"!=",
"!",
"~",
"/",
"%",
"*",
"-",
"+",
"<<<",
">>>",
"<<",
">>",
"<=",
">=",
"<",
">",
"==",
"&&",
"&",
"||",
"|",
"^",
"=",
"\n"
};
using token = scripting::ast::token;
/////////////////////
/// LEXER HELPERS ///
/////////////////////
struct lex_token_result {
token tok;
std::string_view rest;
};
struct rune_ref {
std::string_view str;
explicit operator uint32_t() const {
if(str.empty()) return 0;
if(str.size() == 1) return str[0];
auto bytes = 8 - (str.size() + 1);
uint32_t rune = static_cast<const uint8_t>(str[0]) & (1 << (bytes - 1));
for(auto c : str.substr(1)) {
rune <<= 6;
rune ^= static_cast<const uint8_t>(c) & 0b00111111;
}
return rune;
}
[[nodiscard]] bool is_space() const {
constexpr std::array<uint32_t, 19> spaces{
0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x2002, 0x205F, 0x3000
};
return std::find(spaces.begin(), spaces.end(), static_cast<uint32_t>(*this)) != spaces.end();
}
};
struct try_rune_result {
rune_ref rune;
std::string_view rest;
};
std::shared_ptr<scripting::code_location> get_loc(std::string_view original, std::string_view rest, std::shared_ptr<const std::string> last_line) {
// TODO: Check everything again for weird ass cases
if(original.empty()) {
return std::make_shared<scripting::code_location>(scripting::code_location{
.line_contents = std::make_shared<std::string>(),
.line_number = (int32_t)std::clamp<size_t>(1, 1, std::numeric_limits<int32_t>::max()),
.column_number = (int32_t)std::clamp<size_t>(1 + 1, 1, std::numeric_limits<int32_t>::max())
});
}
const auto before = original.substr(0, original.size() - rest.size());
const auto line_no = std::ranges::count(before, '\n') + 1;
const auto line_start = std::find(before.crbegin(), before.crend(), '\n');
const auto column_no = line_start != before.crend() ? (line_start - before.crbegin()) : before.size();
const auto back_tracked = before.size() - column_no;
const auto front_tracked = rest.empty() ? original.size() : before.size() + (std::ranges::find(rest, '\n') - rest.begin());
const std::string_view current{original.begin() + back_tracked, original.begin() + front_tracked};
if(not last_line || *last_line != current) {
last_line = std::make_shared<std::string>(current);
}
return std::make_shared<scripting::code_location>(scripting::code_location{
.line_contents = last_line,
.line_number = (int32_t)std::clamp<size_t>(line_no, 1, std::numeric_limits<int32_t>::max()),
.column_number = (int32_t)std::clamp<size_t>(column_no + 1, 1, std::numeric_limits<int32_t>::max())
});
}
////////////////////
/// LEXER PROPER ///
////////////////////
auto try_rune(std::string_view text, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<try_rune_result> {
static_assert(CHAR_BIT == 8, "Get your weird ass cpu outta here");
if(text.empty()) return std::nullopt;
if(0 == (*reinterpret_cast<const uint8_t*>(&text.front()) & 0b10000000)) {
return try_rune_result{text.substr(0, 1), text.substr(1)};
}
switch(auto bytes = std::countl_one(*reinterpret_cast<const uint8_t*>(&text.front())); bytes) {
case 0: // ASCII
{
return try_rune_result{text.substr(0, 1), text.substr(1)};
}
case 1: // Middle of sequence
{
return std::nullopt;
}
case 7: [[fallthrough]];
case 8: // Invalid sequence start
{
return std::nullopt;
}
default: // Maybe it is valid
{
if(text.size() < bytes) { // Nope, too short to get a full rune
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
return std::nullopt;
}
auto rune = text.substr(0,bytes);
// Check if the rest of the rune is valid
if(std::ranges::any_of(rune.substr(1), [](const char& byte){ return std::countl_one(*reinterpret_cast<const uint8_t*>(&byte)) != 1;})) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
return std::nullopt;
}
return try_rune_result{rune, text.substr(bytes)};
}
}
}
constexpr auto try_string = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
constexpr std::array<int8_t, 256> hexdigits = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
auto it = view.begin();
while (it != view.end() and std::isspace(*it)) ++it;
if(it == view.end()) return std::nullopt;
std::stringstream generated;
if(*it != '"') return std::nullopt;
std::string str;
while(true) {
++it;
if(it == view.end()) {
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"});
return std::nullopt;
}
switch(*it) {
case '\\':
++it;
if(it == view.end()) {
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"});
}
switch(*it) {
case '\\': generated << '\\'; break;
case 'a': generated << '\a'; break;
case 'b': generated << '\b'; break;
case 'f': generated << '\f'; break;
case 'n': generated << '\n'; break;
case 'r': generated << '\r'; break;
case 't': generated << '\t'; break;
case 'v': generated << '\v'; break;
case '\'': generated << '\''; break;
case '"': generated << '"'; break;
case '0': [[fallthrough]];
case '1': [[fallthrough]];
case '2': [[fallthrough]];
case '3': [[fallthrough]];
case '4': [[fallthrough]];
case '5': [[fallthrough]];
case '6': [[fallthrough]];
case '7':
{
char c = uint8_t(*it - '0') * 8 * 8;
if(uint8_t(*it - '0') > 8) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
}
++it;
if(it == view.end()) return std::nullopt;
c += uint8_t(*it - '0') * 8;
if(uint8_t(*it - '0') > 8) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
}
++it;
if(it == view.end()) return std::nullopt;
c += uint8_t(*it - '0');
if(uint8_t(*it - '0') > 8) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
}
generated << c;
break; }
case 'x':
{
++it;
if(it == view.end()) return std::nullopt;
if(hexdigits[*it] < 0) return std::nullopt;
char c = hexdigits[*it] << 4;
++it;
if(it == view.end()) return std::nullopt;
if(hexdigits[*it] < 0) return std::nullopt;
c += hexdigits[*it];
generated << c;
break; }
default:
generated << *it;
}
break;
case '"':
str = generated.str();
return lex_token_result {
token{.location = location, .value = std::string(str)},
std::string_view(++it, view.end())
};
default:
generated << *it;
break;
}
}
};
constexpr auto try_int32 = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
int32_t i;
auto v = std::from_chars(view.begin(), view.end(), i);
if(v.ptr == view.begin()) return std::nullopt;
auto rest = std::string_view(v.ptr, view.end());
return lex_token_result{
token{.location = std::move(location), .value = i},
rest
};
};
std::optional<lex_token_result> try_operator(std::string_view code, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) {
for(auto& [representation, type] : operators) {
if(code.starts_with(representation)) {
return lex_token_result{
token{.location = location, .value = type},
code.substr(representation.size())
};
}
}
return std::nullopt;
}
auto try_identifier(std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
constexpr auto starts_with_reserved = [](std::string_view v) -> bool {
return std::ranges::any_of(reserved_character_sequences, [&](auto seq){
return v.starts_with(seq);
});
};
std::stringstream identifier_value;
if(view.empty()) return std::nullopt;
while(!view.empty() && !starts_with_reserved(view)) {
if(auto maybe_rune = try_rune(view, location, errors); maybe_rune) {
auto [rune, rest] = maybe_rune.value();
if(rune.is_space()) {
view = rest;
break;
}
identifier_value << rune.str;
view = rest;
} else {
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
return std::nullopt;
}
}
scripting::ast::identifier result {.location = location, .value = identifier_value.str()};
if(result.value.empty()) return std::nullopt;
return lex_token_result{.tok = token{.location = location, .value = result}, .rest = view};
}
std::vector<token> scripting::ast::lex(const std::string& code, std::vector<scripting::script_error>& errors) {
std::vector<token> return_value;
std::string_view current = code;
std::shared_ptr<const std::string> last_line;
while(not current.empty()) {
for(;;) {
if(current.empty()) break;
auto location = get_loc(code, current, last_line);
auto c = try_rune(current, location, errors);
if(not c.has_value()) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 encoding detected while trimming space"});
return return_value;
} else {
if(c.value().rune.is_space()) {
current = c.value().rest;
} else break;
}
}
auto location = get_loc(code, current, last_line);
last_line = location->line_contents;
auto res = try_string(current, location, errors);
if (!res) res = try_operator(current, location, errors);
if (!res) res = try_int32(current, location, errors);
if (!res) res = try_identifier(current, location, errors);
if(res.has_value()) {
current = res.value().rest;
return_value.emplace_back(std::move(res.value().tok));
} else {
errors.push_back(scripting::script_error{.location = location, .message = "Unknown token"});
return return_value;
}
}
return return_value;
}

src/lex_parse.cpp → src/parse.cpp View File

@ -10,379 +10,13 @@
#include "UserScript/parser.h"
#include "UserScript.h"
/////////////////
/// CONSTANTS ///
/////////////////
using symbol_t = scripting::ast::symbol_t;
constexpr std::array<std::pair<std::string_view, symbol_t>, 25> operators {
std::pair<std::string_view, symbol_t>{"(", symbol_t::l_paren},
std::pair<std::string_view, symbol_t>{")", symbol_t::r_paren},
std::pair<std::string_view, symbol_t>{"!=", symbol_t::different},
std::pair<std::string_view, symbol_t>{"!", symbol_t::logical_not},
std::pair<std::string_view, symbol_t>{"~", symbol_t::binary_not},
std::pair<std::string_view, symbol_t>{"/", symbol_t::divide},
std::pair<std::string_view, symbol_t>{"%", symbol_t::modulo},
std::pair<std::string_view, symbol_t>{"*", symbol_t::multiply},
std::pair<std::string_view, symbol_t>{"-", symbol_t::subtract},
std::pair<std::string_view, symbol_t>{"+", symbol_t::add},
std::pair<std::string_view, symbol_t>{"<<<", symbol_t::rotate_left},
std::pair<std::string_view, symbol_t>{">>>", symbol_t::rotate_right},
std::pair<std::string_view, symbol_t>{"<<", symbol_t::bitshift_left},
std::pair<std::string_view, symbol_t>{">>", symbol_t::bitshift_right},
std::pair<std::string_view, symbol_t>{"<=", symbol_t::less_or_equal_than},
std::pair<std::string_view, symbol_t>{">=", symbol_t::greater_or_equal_than},
std::pair<std::string_view, symbol_t>{"<", symbol_t::less_than},
std::pair<std::string_view, symbol_t>{">", symbol_t::greater_than},
std::pair<std::string_view, symbol_t>{"==", symbol_t::equals},
std::pair<std::string_view, symbol_t>{"&&", symbol_t::logical_and},
std::pair<std::string_view, symbol_t>{"&", symbol_t::binary_and},
std::pair<std::string_view, symbol_t>{"||", symbol_t::logical_or},
std::pair<std::string_view, symbol_t>{"|", symbol_t::binary_or},
std::pair<std::string_view, symbol_t>{"^", symbol_t::binary_xor},
std::pair<std::string_view, symbol_t>{"\n", symbol_t::new_line}
};
const std::vector<std::string_view> reserved_character_sequences {
"(",
")",
"!=",
"!",
"~",
"/",
"%",
"*",
"-",
"+",
"<<<",
">>>",
"<<",
">>",
"<=",
">=",
"<",
">",
"==",
"&&",
"&",
"||",
"|",
"^",
"=",
"\n"
};
/////////////////////
/// LEXER HELPERS ///
/////////////////////
using token = scripting::ast::token;
struct lex_token_result {
token tok;
std::string_view rest;
};
struct rune_ref {
std::string_view str;
explicit operator uint32_t() const {
if(str.empty()) return 0;
if(str.size() == 1) return str[0];
auto bytes = 8 - (str.size() + 1);
uint32_t rune = static_cast<const uint8_t>(str[0]) & (1 << (bytes - 1));
for(auto c : str.substr(1)) {
rune <<= 6;
rune ^= static_cast<const uint8_t>(c) & 0b00111111;
}
return rune;
}
[[nodiscard]] bool is_space() const {
constexpr std::array<uint32_t, 19> spaces{
0x0020, 0x00A0, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005,
0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202F, 0x2002, 0x205F, 0x3000
};
return std::find(spaces.begin(), spaces.end(), static_cast<uint32_t>(*this)) != spaces.end();
}
};
struct try_rune_result {
rune_ref rune;
std::string_view rest;
};
std::shared_ptr<scripting::code_location> get_loc(std::string_view original, std::string_view rest, std::shared_ptr<const std::string> last_line) {
// TODO: Check everything again for weird ass cases
if(original.empty()) {
return std::make_shared<scripting::code_location>(scripting::code_location{
.line_contents = std::make_shared<std::string>(),
.line_number = (int32_t)std::clamp<size_t>(1, 1, std::numeric_limits<int32_t>::max()),
.column_number = (int32_t)std::clamp<size_t>(1 + 1, 1, std::numeric_limits<int32_t>::max())
});
}
const auto before = original.substr(0, original.size() - rest.size());
const auto line_no = std::ranges::count(before, '\n') + 1;
const auto line_start = std::find(before.crbegin(), before.crend(), '\n');
const auto column_no = line_start != before.crend() ? (line_start - before.crbegin()) : before.size();
const auto back_tracked = before.size() - column_no;
const auto front_tracked = rest.empty() ? original.size() : before.size() + (std::ranges::find(rest, '\n') - rest.begin());
const std::string_view current{original.begin() + back_tracked, original.begin() + front_tracked};
if(not last_line || *last_line != current) {
last_line = std::make_shared<std::string>(current);
}
return std::make_shared<scripting::code_location>(scripting::code_location{
.line_contents = last_line,
.line_number = (int32_t)std::clamp<size_t>(line_no, 1, std::numeric_limits<int32_t>::max()),
.column_number = (int32_t)std::clamp<size_t>(column_no + 1, 1, std::numeric_limits<int32_t>::max())
});
}
////////////////////
/// LEXER PROPER ///
////////////////////
auto try_rune(std::string_view text, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<try_rune_result> {
static_assert(CHAR_BIT == 8, "Get your weird ass cpu outta here");
if(text.empty()) return std::nullopt;
if(0 == (*reinterpret_cast<const uint8_t*>(&text.front()) & 0b10000000)) {
return try_rune_result{text.substr(0, 1), text.substr(1)};
}
switch(auto bytes = std::countl_one(*reinterpret_cast<const uint8_t*>(&text.front())); bytes) {
case 0: // ASCII
{
return try_rune_result{text.substr(0, 1), text.substr(1)};
}
case 1: // Middle of sequence
{
return std::nullopt;
}
case 7: [[fallthrough]];
case 8: // Invalid sequence start
{
return std::nullopt;
}
default: // Maybe it is valid
{
if(text.size() < bytes) { // Nope, too short to get a full rune
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
return std::nullopt;
}
auto rune = text.substr(0,bytes);
// Check if the rest of the rune is valid
if(std::ranges::any_of(rune.substr(1), [](const char& byte){ return std::countl_one(*reinterpret_cast<const uint8_t*>(&byte)) != 1;})) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
return std::nullopt;
}
return try_rune_result{rune, text.substr(bytes)};
}
}
}
constexpr auto try_string = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
constexpr std::array<int8_t, 256> hexdigits = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
auto it = view.begin();
while (it != view.end() and std::isspace(*it)) ++it;
if(it == view.end()) return std::nullopt;
std::stringstream generated;
if(*it != '"') return std::nullopt;
std::string str;
while(true) {
++it;
if(it == view.end()) {
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"});
return std::nullopt;
}
switch(*it) {
case '\\':
++it;
if(it == view.end()) {
errors.push_back(scripting::script_error{.location = location, .message = "Unterminated string"});
}
switch(*it) {
case '\\': generated << '\\'; break;
case 'a': generated << '\a'; break;
case 'b': generated << '\b'; break;
case 'f': generated << '\f'; break;
case 'n': generated << '\n'; break;
case 'r': generated << '\r'; break;
case 't': generated << '\t'; break;
case 'v': generated << '\v'; break;
case '\'': generated << '\''; break;
case '"': generated << '"'; break;
case '0': [[fallthrough]];
case '1': [[fallthrough]];
case '2': [[fallthrough]];
case '3': [[fallthrough]];
case '4': [[fallthrough]];
case '5': [[fallthrough]];
case '6': [[fallthrough]];
case '7':
{
char c = uint8_t(*it - '0') * 8 * 8;
if(uint8_t(*it - '0') > 8) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
}
++it;
if(it == view.end()) return std::nullopt;
c += uint8_t(*it - '0') * 8;
if(uint8_t(*it - '0') > 8) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
}
++it;
if(it == view.end()) return std::nullopt;
c += uint8_t(*it - '0');
if(uint8_t(*it - '0') > 8) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad octal value in string"});
}
generated << c;
break; }
case 'x':
{
++it;
if(it == view.end()) return std::nullopt;
if(hexdigits[*it] < 0) return std::nullopt;
char c = hexdigits[*it] << 4;
++it;
if(it == view.end()) return std::nullopt;
if(hexdigits[*it] < 0) return std::nullopt;
c += hexdigits[*it];
generated << c;
break; }
default:
generated << *it;
}
break;
case '"':
str = generated.str();
return lex_token_result {
token{.location = location, .value = std::string(str)},
std::string_view(++it, view.end())
};
default:
generated << *it;
break;
}
}
};
constexpr auto try_int32 = [](std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
int32_t i;
auto v = std::from_chars(view.begin(), view.end(), i);
if(v.ptr == view.begin()) return std::nullopt;
auto rest = std::string_view(v.ptr, view.end());
return lex_token_result{
token{.location = std::move(location), .value = i},
rest
};
};
std::optional<lex_token_result> try_operator(std::string_view code, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) {
for(auto& [representation, type] : operators) {
if(code.starts_with(representation)) {
return lex_token_result{
token{.location = location, .value = type},
code.substr(representation.size())
};
}
}
return std::nullopt;
}
auto try_identifier(std::string_view view, std::shared_ptr<scripting::code_location>& location, std::vector<scripting::script_error>& errors) -> std::optional<lex_token_result> {
constexpr auto starts_with_reserved = [](std::string_view v) -> bool {
return std::ranges::any_of(reserved_character_sequences, [&](auto seq){
return v.starts_with(seq);
});
};
std::stringstream identifier_value;
if(view.empty()) return std::nullopt;
while(!view.empty() && !starts_with_reserved(view)) {
if(auto maybe_rune = try_rune(view, location, errors); maybe_rune) {
auto [rune, rest] = maybe_rune.value();
if(rune.is_space()) {
view = rest;
break;
}
identifier_value << rune.str;
view = rest;
} else {
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 found when parsing identifier"});
return std::nullopt;
}
}
scripting::ast::identifier result {.location = location, .value = identifier_value.str()};
if(result.value.empty()) return std::nullopt;
return lex_token_result{.tok = token{.location = location, .value = result}, .rest = view};
}
std::vector<token> scripting::ast::lex(const std::string& code, std::vector<scripting::script_error>& errors) {
std::vector<token> return_value;
std::string_view current = code;
std::shared_ptr<const std::string> last_line;
while(not current.empty()) {
for(;;) {
if(current.empty()) break;
auto location = get_loc(code, current, last_line);
auto c = try_rune(current, location, errors);
if(not c.has_value()) {
errors.push_back(scripting::script_error{.location = location, .message = "Bad UTF-8 encoding detected while trimming space"});
return return_value;
} else {
if(c.value().rune.is_space()) {
current = c.value().rest;
} else break;
}
}
auto location = get_loc(code, current, last_line);
last_line = location->line_contents;
auto res = try_string(current, location, errors);
if (!res) res = try_operator(current, location, errors);
if (!res) res = try_int32(current, location, errors);
if (!res) res = try_identifier(current, location, errors);
if(res.has_value()) {
current = res.value().rest;
return_value.emplace_back(std::move(res.value().tok));
} else {
errors.push_back(scripting::script_error{.location = location, .message = "Unknown token"});
return return_value;
}
}
return return_value;
}
//////////////////////
/// PARSER HELPERS ///
//////////////////////
using token = scripting::ast::token;
using symbol_t = scripting::ast::symbol_t;
template<typename T>
struct parse_result {
std::optional<T> result;
@ -454,7 +88,7 @@ parse_result try_command_expr(std::span
holds_alternative<symbol_t>(current.front().value)
and get<symbol_t>(current.front().value) == symbol_t::r_paren
)
) {
) {
auto [expr, rest] = try_expression(current, errors);
if(not expr) {
@ -473,7 +107,7 @@ parse_result try_command_expr(std::span
parse_result<scripting::ast::expression> try_expression(std::span<token> code, std::vector<scripting::script_error>& errors) {
scripting::ast::expression node;
auto current = code;
#ifdef HANDLE_EXPRESSION
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting");
#endif
@ -896,7 +530,7 @@ parse_result try_binary_algebraic_e
scripting::ast::binary_algebraic_expression node;
auto current = code;
#ifdef HANDLE_EXPRESSION
static_assert(false, "Found a macro name HANDLE_EXPRESSION, halting");
#endif

Loading…
Cancel
Save