From 26724a3812463c3a755b0641a29314ac66e9b101 Mon Sep 17 00:00:00 2001 From: Ludovic 'Archivist' Lagouardette Date: Fri, 11 Aug 2023 07:19:09 +0200 Subject: [PATCH] Added type checked support for GOTO and LABEL in prevision of adding conditionals jumps and loops --- CMakeLists.txt | 3 +- include/generalized_parsing.h | 29 ++++++ include/molasses/errors.h | 9 ++ include/molasses/generator_primitives.h | 5 +- include/molasses/parser_primitives.h | 9 +- include/molasses/parser_types.h | 4 +- .../generator_primitives_x86_64_linux.cpp | 15 ++- src/molasses/parser_primitives.cpp | 96 +++++++++++++++---- tests/007.exp | 82 ++++++++++++++++ tests/007/exit-with-3.mol | 7 ++ tests/007/library.mol | 39 ++++++++ 11 files changed, 271 insertions(+), 27 deletions(-) create mode 100644 include/generalized_parsing.h create mode 100644 tests/007.exp create mode 100644 tests/007/exit-with-3.mol create mode 100644 tests/007/library.mol diff --git a/CMakeLists.txt b/CMakeLists.txt index 70568db..f1638ac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -13,7 +13,7 @@ add_executable(sugar include/molasses/lexer.h src/molasses/parser_primitives.cpp include/molasses/parser_primitives.h - include/molasses/generator_primitives.h include/molasses/errors.h include/molasses/parser_types.h src/molasses/generator_primitives.cpp) + include/molasses/generator_primitives.h include/molasses/errors.h include/molasses/parser_types.h src/molasses/generator_primitives.cpp include/generalized_parsing.h) function(add_expect_test [testname filename]) add_test( @@ -31,6 +31,7 @@ add_expect_test(id003 ./tests/003.exp) add_expect_test(id004 ./tests/004.exp) add_expect_test(id005 ./tests/005.exp) add_expect_test(id006 ./tests/006.exp) +add_expect_test(id007 ./tests/007.exp) diff --git a/include/generalized_parsing.h b/include/generalized_parsing.h new file mode 100644 index 0000000..f036e63 --- /dev/null +++ b/include/generalized_parsing.h @@ -0,0 +1,29 @@ +#pragma once +#include "molasses/lexer.h" +#include "molasses/parser_primitives.h" +#include +#include +#include + +struct parser { + /** + * + * @param ctx + * @param lexer_data + * @param current + * @return a span of the tokens left before reaching the target, assuming the parser is as greedy as possible + */ + virtual std::span identify(molasses::parser_context& ctx, const molasses::lexed_output& lexer_data, std::span current) = 0; +}; + +struct ast_node { + virtual std::vector apply(molasses::parser_context& ctx, const molasses::lexed_output& lexer_data, std::span current) = 0; +}; + +struct parser_branch : public parser { + std::vector> choice; +}; + +struct parser_sequence : public parser { + std::vector> sequence; +}; \ No newline at end of file diff --git a/include/molasses/errors.h b/include/molasses/errors.h index 4c2d440..79ce2dd 100644 --- a/include/molasses/errors.h +++ b/include/molasses/errors.h @@ -69,6 +69,15 @@ namespace molasses { )) {} // TODO: Better error message }; + struct orphan_goto_error : public parser_error { + orphan_goto_error(const symbol& sym, const std::string& label_name) + : parser_error(details::concatenate_builder( + "A __GOTO__ has been found to be without a label\n", "\tAt ", sym.file_name, ":", sym.line, ":", sym.column, + "\n", "\tExpected a label ", label_name, + "\n" + )) {} + // TODO: Better error message + }; struct unknown_token_error : public parser_error { explicit unknown_token_error(const symbol& sym) : parser_error(details::concatenate_builder( diff --git a/include/molasses/generator_primitives.h b/include/molasses/generator_primitives.h index e03ee0c..24556b6 100644 --- a/include/molasses/generator_primitives.h +++ b/include/molasses/generator_primitives.h @@ -9,7 +9,10 @@ namespace molasses { std::vector generate_return(); template - std::vector generate_label(const std::string& target); + std::vector generate_label(const std::string& target); + + template + std::vector generate_goto(const std::string& target); template std::vector generate_push_int32(int32_t target); diff --git a/include/molasses/parser_primitives.h b/include/molasses/parser_primitives.h index 32e7f4a..72bd07e 100644 --- a/include/molasses/parser_primitives.h +++ b/include/molasses/parser_primitives.h @@ -57,6 +57,13 @@ namespace molasses { template parser_context register_i32_operations(parser_context); - bool type_check(const parser_context&, const lexed_output&, const std::vector&, std::vector execution_input, const std::vector& execution_output); + bool type_check( + const parser_context& parser_state, + const lexed_output& lexer_state, + const std::vector& consumed_stream, + std::vector execution_input, + const std::vector& execution_output, + const std::vector>& sub_bodies + ); } diff --git a/include/molasses/parser_types.h b/include/molasses/parser_types.h index 7fce53f..80fa4e3 100644 --- a/include/molasses/parser_types.h +++ b/include/molasses/parser_types.h @@ -98,12 +98,14 @@ namespace molasses { std::vector _args; std::vector _rets; std::vector _body; + std::vector> _simple_sub_bodies; - procedure_operation(std::string name, std::vector args, std::vector rets, std::vector body) + procedure_operation(std::string name, std::vector args, std::vector rets, std::vector body, std::vector> sub_bodies) : _name(std::forward(name)) , _args(std::forward>(args)) , _rets(std::forward>(rets)) , _body(std::forward>(body)) + , _simple_sub_bodies(std::forward>>(sub_bodies)) {} [[nodiscard]] std::string name() const final { diff --git a/src/molasses/generator_primitives_x86_64_linux.cpp b/src/molasses/generator_primitives_x86_64_linux.cpp index 6b2cca8..237af1c 100644 --- a/src/molasses/generator_primitives_x86_64_linux.cpp +++ b/src/molasses/generator_primitives_x86_64_linux.cpp @@ -536,11 +536,18 @@ namespace molasses { } template<> - std::vector generate_label(const std::string& target) { - return { + std::vector generate_label(const std::string& target) { + return { marshal(target)+":\n" - }; - } + }; + } + + template<> + std::vector generate_goto(const std::string& target) { + return { + " jmp "+marshal(target)+"\n" + }; + } template<> std::vector generate_return() { diff --git a/src/molasses/parser_primitives.cpp b/src/molasses/parser_primitives.cpp index 421ef51..40ce4dc 100644 --- a/src/molasses/parser_primitives.cpp +++ b/src/molasses/parser_primitives.cpp @@ -1,8 +1,9 @@ -#include -#include #include "molasses/parser_primitives.h" -#include "molasses/generator_primitives.h" #include "molasses/errors.h" +#include "molasses/generator_primitives.h" +#include +#include +#include namespace molasses { std::vector operator>>(std::vector current_stack, const operation& next_op) { @@ -90,24 +91,40 @@ namespace molasses { const lexed_output& lexer_state, const std::vector& consumed_stream, std::vector execution_input, - const std::vector& execution_output + const std::vector& execution_output, + const std::vector>& sub_bodies ) { auto& type_stack = execution_input; - - for(const auto& symbol : consumed_stream) { + + std::map> effective_snapshots; + size_t idx = 0; + + for(auto it = consumed_stream.begin(); it != consumed_stream.end(); ++it, ++idx) { + const auto& symbol = *it; const auto& symbol_text = lexer_state.dictionary.at(symbol); - if(symbol.is_string) { + // Skip GOTOs and LABELs + if(auto ahead = it; ++ahead != consumed_stream.end() and (lexer_state.dictionary.at(*ahead) == "__GOTO__" or lexer_state.dictionary.at(*ahead) == "__LABEL__")) { + effective_snapshots[idx] = type_stack; + it = ahead; + ++idx; + } else if(symbol.is_string) { type_stack.emplace_back("u8 ptr"); } else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) { type_stack.emplace_back("i32"); } else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) { type_stack.emplace_back("i64"); } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) { - type_stack = type_stack >> *is_op; - } + type_stack = type_stack >> *is_op; + } } + if(type_stack != execution_output) return false; + for(auto [start, end] : sub_bodies) { + if(effective_snapshots[start] != effective_snapshots[end]) { + return false; + } + } - return type_stack == execution_output; + return true; } generate_context parse(parser_context ctx, const lexed_output& lexer_data) { @@ -115,14 +132,19 @@ namespace molasses { DO_KW = 1, SEPARATOR_KW, PROC_KW, - END_KW + END_KW, + LABEL_KW, + GOTO_KW }; lexed_output fake; - fake.dictionary[PROC_KW] = "__PROC__"; + fake.dictionary[PROC_KW] = "__PROC__"; fake.dictionary[SEPARATOR_KW] = "__--__"; fake.dictionary[DO_KW] = "__DO__"; fake.dictionary[END_KW] = "__END__"; + fake.dictionary[LABEL_KW] = "__LABEL__"; + fake.dictionary[GOTO_KW] = "__GOTO__"; + auto tokens = concatenate(fake, lexer_data); @@ -197,16 +219,40 @@ namespace molasses { // Process body std::vector body; + std::vector> sub_bodies; + std::map found_labels; + std::map found_gotos; while(*it != END_KW) { - body.emplace_back(*it); - last_valid = it; - ++it; + if(auto ahead = it; ++ahead != tokens.symbols.end() and (*ahead == GOTO_KW or *ahead == LABEL_KW)) { + if(*ahead == GOTO_KW) { + found_gotos[tokens.dictionary[*it]] = body.size(); + } else if(*ahead == LABEL_KW) { + found_labels[tokens.dictionary[*it]] = body.size(); + // TODO: Handle duplicate labels + } + body.emplace_back(*it); + body.emplace_back(*ahead); + last_valid = ahead; + it = ++ahead; + } else { + body.emplace_back(*it); + last_valid = it; + ++it; + } check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); } last_valid = it; ++it; + + for(auto& [dest, index] : found_gotos) { + if(not found_labels.contains(dest)) { + throw orphan_goto_error(body[index], dest); + } + + sub_bodies.emplace_back(std::min(index, found_labels[dest]), std::max(index, found_labels[dest])); + } - return std::make_pair(it, std::make_shared(name, argument_types, return_types, body)); + return std::make_pair(it, std::make_shared(name, argument_types, return_types, body, sub_bodies)); #undef check_for_unexpected_stream_end }; @@ -220,7 +266,7 @@ namespace molasses { } while (progress != tokens.symbols.end()); for(auto& proc : parsed_procedures) { - if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) { + if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets, proc->_simple_sub_bodies)) { throw procedure_stack_error(); } } @@ -239,9 +285,21 @@ namespace molasses { ops.push_back(instruction); } - for(auto elem : _body) { + for(auto it = _body.begin(); it != _body.end(); ++it) { + auto elem = *it; auto token = lexer_data.dictionary.at(elem); - if(elem.is_string) { + if(auto ahead = it; ++ahead != _body.end() and (lexer_data.dictionary.at(*ahead) == "__GOTO__" or lexer_data.dictionary.at(*ahead) == "__LABEL__")) { + if(lexer_data.dictionary.at(*ahead) == "__GOTO__") { + for(auto&& instruction : generate_goto(name() + " in " + token)) { + ops.push_back(instruction); + } + } else if(lexer_data.dictionary.at(*ahead) == "__LABEL__") { + for(auto&& instruction : generate_label(name() + " in " + token)) { + ops.push_back(instruction); + } + } + it = ahead; + } else if(elem.is_string) { for(auto&& instruction : generate_push_string_ptr(elem)) { ops.push_back(instruction); } diff --git a/tests/007.exp b/tests/007.exp new file mode 100644 index 0000000..ee0bd2e --- /dev/null +++ b/tests/007.exp @@ -0,0 +1,82 @@ +#!/usr/bin/expect + +set SUGAR_EXECUTABLE $::env(SUGAR_EXECUTABLE) +set BUILD_NAME 007. + +proc abort {reason} { + puts "test failed $reason" + exit 1 +} + +spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all +expect { + error { abort "failed to parse" } + eof { abort "cannot find the symbol main in lexed output" } + main +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + +# ------------------------------------------------------------------- + +spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all parse +expect { + error { abort "failed to parse" } + eof { abort "cannot find the main procedure" } + main +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + +# ------------------------------------------------------------------- + +spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all parse +expect { + error { abort "failed to parse" } + eof { abort "cannot find the exit-syscall-number procedure" } + exit-syscall-number +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + +# ------------------------------------------------------------------- + +spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol lex tests/007/library.mol lex merge parse /tmp/sugar.generated.$BUILD_NAME generate assemble +expect { + error { abort "failed to compile" } + eof { abort "didn't run clang" } + clang +} +expect { + error { abort "failed to link" } + eof { abort "didn't run ld" } + ld +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + +# ------------------------------------------------------------------- + +spawn -noecho /tmp/sugar.generated.$BUILD_NAME +expect { + error { abort "failed to compile" } + eof { abort "didn't output" } + Hello +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 3} { + abort "executable didn't return exit code 3 but $value instead" +} \ No newline at end of file diff --git a/tests/007/exit-with-3.mol b/tests/007/exit-with-3.mol new file mode 100644 index 0000000..1e3409f --- /dev/null +++ b/tests/007/exit-with-3.mol @@ -0,0 +1,7 @@ +__PROC__ main +__--__ +__DO__ + +11_i64 "\tHello world\n" write-out +1 2_i32 + i32-to-i64 exit +__END__ diff --git a/tests/007/library.mol b/tests/007/library.mol new file mode 100644 index 0000000..112f9d5 --- /dev/null +++ b/tests/007/library.mol @@ -0,0 +1,39 @@ +__PROC__ write-syscall-number +__--__ +i64 +__DO__ +1_i64 +__END__ + +__PROC__ stdout-fd +__--__ +i64 +__DO__ +1_i64 +__END__ + +__PROC__ exit-syscall-number +__--__ +i64 +__DO__ +60_i64 +__END__ + +__PROC__ exit +i64 +__--__ +__DO__ +"POTAT" __LABEL__ +exit-syscall-number syscall1 drop_i64 +60_i64 +"POTAT" __GOTO__ +drop_i64 +__END__ + +__PROC__ write-out +i64 +u8 ptr +__--__ +__DO__ +u8-ptr_to_i64 stdout-fd write-syscall-number syscall3 drop_i64 +__END__ \ No newline at end of file