diff --git a/.clang-tidy b/.clang-tidy index 120a956..0f715b0 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -169,4 +169,4 @@ CheckOptions: - {key: readability-identifier-naming.LocalVariableCase, value: lower_case } - {key: readability-identifier-naming.GlobalConstantCase, value: lower_case } - {key: readability-identifier-naming.ConstexprVariableCase, value: lower_case } - - {key: readability-identifier-naming.EnumConstantCase, value: lower_case } \ No newline at end of file + - {key: readability-identifier-naming.EnumConstantCase, value: AnyCase } \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index d3769b1..62407e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,9 +17,16 @@ add_executable(sugar include/molasses/parser_primitives.h include/molasses/generator_primitives.h) -add_test( - NAME id001 - WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" - COMMAND expect ./tests/001.exp -) -set_property(TEST id001 PROPERTY ENVIRONMENT "SUGAR_EXECUTABLE=$") \ No newline at end of file +function(add_expect_test [testname filename]) + add_test( + NAME "${ARGV0}" + WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}" + COMMAND expect "${ARGV1}" + ) + set_property(TEST "${ARGV0}" PROPERTY ENVIRONMENT "SUGAR_EXECUTABLE=$") +endfunction() + + +add_expect_test(id001 ./tests/001.exp) +add_expect_test(id002 ./tests/002.exp) +add_expect_test(id003 ./tests/003.exp) diff --git a/include/molasses/generator_primitives.h b/include/molasses/generator_primitives.h index c339749..e6ea0f7 100644 --- a/include/molasses/generator_primitives.h +++ b/include/molasses/generator_primitives.h @@ -8,9 +8,15 @@ namespace molasses { std::vector generate_label(const std::string& target); - std::vector generate_push_int32(int32_t target); + std::vector generate_push_int32(int32_t target); + + std::vector generate_push_int64(int64_t target); std::vector generate_call(const std::string& target); std::vector generate_enter(); + + std::vector generate_push_string_ptr(const symbol&); + + std::vector generate_string(const symbol&, const std::string&); } \ No newline at end of file diff --git a/include/molasses/lexer.h b/include/molasses/lexer.h index b08e052..b489764 100644 --- a/include/molasses/lexer.h +++ b/include/molasses/lexer.h @@ -5,13 +5,23 @@ namespace molasses { // We will always want symbols to be convertible to int for dictionary lookups - using symbol = int; + struct symbol { + int id; + std::string file_name; + int line; + int column; + [[no_unique_address]] bool is_string; + + operator int() const { + return id; + } + }; struct lexed_output { std::map dictionary; std::vector symbols; }; - lexed_output lex(const std::string &); + lexed_output lex(const std::string & file_name,const std::string & source); lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs); } \ No newline at end of file diff --git a/include/molasses/parser_primitives.h b/include/molasses/parser_primitives.h index e4fb0bf..864ccf0 100644 --- a/include/molasses/parser_primitives.h +++ b/include/molasses/parser_primitives.h @@ -6,6 +6,7 @@ #include #include #include +#include #include "molasses/lexer.h" @@ -24,6 +25,25 @@ constexpr size_t architecture_ptr_size = 4; #endif namespace molasses { + namespace details { + template + std::string concatenate_builder_impl(std::stringstream& stream) + requires (sizeof...(RestT) == 0) + { + return stream.str(); + } + template + std::string concatenate_builder_impl(std::stringstream& stream, FirstT first, RestT... rest) { + stream << first; + return concatenate_builder_impl(stream, rest...); + } + template + std::string concatenate_builder(RestT... rest) { + std::stringstream stream{}; + return concatenate_builder_impl(stream, rest...); + } + } + struct type { [[nodiscard]] virtual std::string name() const = 0; [[nodiscard]] virtual size_t byte_size() const = 0; @@ -119,33 +139,52 @@ namespace molasses { inline auto operator<=>(const operation& lhs, const operation& rhs) { return lhs.name() <=> rhs.name(); } + + struct parser_error : public std::runtime_error { + explicit parser_error(const std::string& str) : std::runtime_error(str) {} + }; - struct type_input_error : std::runtime_error { - type_input_error() : std::runtime_error("Bad type provided") {} + struct type_input_error : public parser_error { + type_input_error() : parser_error("Bad type provided") {} // TODO: Better error message }; - struct value_missing_error : std::runtime_error { - value_missing_error() : std::runtime_error("Expected value, none provided") {} + struct value_missing_error : public parser_error { + value_missing_error() : parser_error("Expected value, none provided") {} // TODO: Better error message }; - struct procedure_stack_error : std::runtime_error { - procedure_stack_error() : std::runtime_error("Expected the stack to look like the return stack upon completion") {} + struct procedure_stack_error : public parser_error { + procedure_stack_error() : parser_error("Expected the stack to look like the return stack upon completion") {} // TODO: Better error message }; - struct unexpected_token_error : std::runtime_error { - unexpected_token_error() : std::runtime_error("An unexpected token has been encountered") {} - // TODO: Better error message + struct unexpected_token_error : public parser_error { + unexpected_token_error(const symbol& sym, const std::string& found, const std::string& expected) + : parser_error ( + details::concatenate_builder( + "Unexpected token encountered\n", + "\tAt ", sym.file_name,":",sym.line,":",sym.column,"\n", + "\tExpected ", expected, "\n", + "\tFound ", found, "\n" + ) + ) {} }; - struct expecting_token_error : std::runtime_error { - expecting_token_error() : std::runtime_error("An expected token has not been encountered before the end of the input") {} + struct expecting_token_error : public parser_error { + expecting_token_error(const std::string& expected, const std::string& context) + : parser_error( + details::concatenate_builder( + "An expected token has not been encountered before the end of the input\n", + "\tExpected ", expected,"\n", + "\t", context,"\n" + ) + ) + {} // TODO: Better error message }; - struct unknown_token_error : std::runtime_error { - unknown_token_error() : std::runtime_error("An unknown token has not been encountered") {} + struct unknown_token_error : public parser_error { + explicit unknown_token_error(const symbol& sym) : parser_error(details::concatenate_builder("An unknown token has been encountered\n", "\tAt ", sym.file_name,":",sym.line,":",sym.column,"\n")) {} // TODO: Better error message }; - struct type_expected_with_modifier_error : std::runtime_error { - type_expected_with_modifier_error() : std::runtime_error("A type is expected before a modifier") {} + struct type_expected_with_modifier_error : public parser_error { + type_expected_with_modifier_error() : parser_error("A type is expected before a modifier") {} // TODO: Better error message }; @@ -169,7 +208,7 @@ namespace molasses { }; generate_context parse(parser_context, const lexed_output&); - std::vector generate(generate_context); + std::vector generate(const generate_context&); parser_context register_integers(parser_context); parser_context register_i32_operations(parser_context); diff --git a/src/main.cpp b/src/main.cpp index 0120cf9..70a629f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -19,120 +18,169 @@ int main(int argc, char** argv) { argc--; } - std::stack compile_stack; + try { - for(auto elem : arguments) { - if(elem == "generate") { - if(std::holds_alternative(compile_stack.top())) { - auto filename = std::get(compile_stack.top()); - compile_stack.pop(); - if(std::holds_alternative(compile_stack.top())) { - auto generator = std::get(compile_stack.top()); + std::stack compile_stack; + + for(auto elem : arguments) { + if(elem == "generate") { + if(std::holds_alternative(compile_stack.top())) { + auto filename = std::get(compile_stack.top()); compile_stack.pop(); - auto assembler = molasses::generate(generator); - std::ofstream output(filename+".s"); - for(const auto& line : assembler) { - output << line; - } - compile_stack.emplace(filename); - } else throw std::runtime_error("generate expects a parsed output"); - } else throw std::runtime_error("generate expects a filename"); - } else if(elem == "parse") { - molasses::parser_context ctx; - ctx = molasses::register_integers(ctx); - ctx = molasses::register_i32_operations(ctx); + if(std::holds_alternative(compile_stack.top())) { + auto generator = std::get(compile_stack.top()); + compile_stack.pop(); + auto assembler = molasses::generate(generator); + std::ofstream output(filename + ".s"); + for(const auto& line : assembler) { + output << line; + } + compile_stack.emplace(filename); + } else + throw std::runtime_error("generate expects a parsed output"); + } else + throw std::runtime_error("generate expects a filename"); + } else if(elem == "parse") { + molasses::parser_context ctx; + ctx = molasses::register_integers(ctx); + ctx = molasses::register_i32_operations(ctx); - if(std::holds_alternative(compile_stack.top())) { - auto lexer = std::get(compile_stack.top()); - compile_stack.pop(); - auto generator = molasses::parse(ctx, lexer); - compile_stack.emplace(generator); - } else throw std::runtime_error("parse expects a lexed output"); - } else if(elem == "lex") { - if(std::holds_alternative(compile_stack.top())) { - auto filename = std::get(compile_stack.top()); - compile_stack.pop(); - if(not std::filesystem::exists(filename)) throw std::runtime_error("file "+filename+" does not exist"); - std::ifstream t(filename); - std::stringstream buffer; - buffer << t.rdbuf(); - auto lexed = molasses::lex(buffer.str()); - compile_stack.emplace(lexed); - } else throw std::runtime_error("lex expects a filename"); - } else if(elem == "merge") { - if(std::holds_alternative(compile_stack.top())) { - auto lexer_1 = std::get(compile_stack.top()); - compile_stack.pop(); if(std::holds_alternative(compile_stack.top())) { - auto lexer_2 = std::get(compile_stack.top()); + auto lexer = std::get(compile_stack.top()); compile_stack.pop(); - compile_stack.emplace(molasses::concatenate(lexer_1, lexer_2)); - } else throw std::runtime_error("merge expects 2 lexed outputs"); - } else throw std::runtime_error("merge expects 2 lexed outputs"); - } else if(elem == "assemble") { - if(std::holds_alternative(compile_stack.top())) { - auto filename = std::get(compile_stack.top()); - compile_stack.pop(); - std::stringstream compile; compile << "clang -c " << filename << ".s -o " << filename << ".o"; - std::stringstream link; link << "ld -e _start " << filename << ".o -o " << filename; - std::cout << compile.str() << std::endl; - system(compile.str().c_str()); - std::cout << link.str() << std::endl; - system(link.str().c_str()); - } else throw std::runtime_error("assemble expects an assembly file"); - } else if(elem == "help" or elem == "--help") { - std::cout << "# Sugar\n\n"; - std::cout << "## Commands\n\n"; - std::cout << "lex : string ➔ lexed_output\n"; - std::cout << "> takes a filename to a file that must be compiled\n\n"; - std::cout << "merge : lexed_output lexed_output ➔ lexed_output\n"; - std::cout << "> merges two lexed modules together\n\n"; - std::cout << "parse : lexed_output ➔ parsed_output\n"; - std::cout << "> prepares code for generation\n\n"; - std::cout << "generate : parsed_output string ➔ string\n"; - std::cout << "> takes a root filename, it will be appended with \".s\" and that will be the generated assembly file,\n"; - std::cout << "> the filename will not be consumed\n\n"; - std::cout << "assemble : string ➔ _ \n"; - std::cout << "> takes a root filename, it will be appended with \".s\" and that file will be compiled,\n"; - std::cout << "> the compiled output will be the given filename\n\n"; - std::cout << "help : _ ➔ _ \n"; - std::cout << "> prints this help\n\n"; - std::cout << "## Examples\n\n"; - std::cout << "- compile the file \"example.mol\" into the \"potato.s\" assembly file\n"; - std::cout << "> `$ sugar example.mol lex parse potato generate`\n"; - std::cout << "\n"; - std::cout << "- compile the file \"example.mol\" into the \"potato\" executable\n"; - std::cout << "> `$ sugar example.mol lex parse potato generate assemble`\n"; - std::cout << "\n"; - std::cout << "- compile the file \"example.mol\" and \"2.mol\" into the \"potato\" executable\n"; - std::cout << "> `$ sugar example.mol lex 2.mol lex merge parse potato generate assemble`\n"; - } else compile_stack.emplace(elem); - } - - if(compile_stack.size() > 1) throw std::runtime_error("build left unfinished operations"); - if(not compile_stack.empty()) { - if(std::holds_alternative(compile_stack.top())) { - auto lexer = std::get(compile_stack.top()); - for(auto elem : lexer.symbols) { - std::cout << elem << " "; - } - std::cout << "\n\n"; - for(auto [id, name] : lexer.dictionary) { - std::cout << id << ": " << name << "\n"; - } - } else if(std::holds_alternative(compile_stack.top())) { - auto generator = std::get(compile_stack.top()); - for(const auto& elem : generator.procedures) { - std::cout << elem->_name << " : "; - for(const auto& args : elem->_args) { - std::cout << args << " "; + auto generator = molasses::parse(ctx, lexer); + compile_stack.emplace(generator); + } else + throw std::runtime_error("parse expects a lexed output"); + } else if(elem == "lex") { + if(std::holds_alternative(compile_stack.top())) { + auto filename = std::get(compile_stack.top()); + compile_stack.pop(); + if(not std::filesystem::exists(filename)) + throw std::runtime_error("file " + filename + " does not exist"); + std::ifstream t(filename); + std::stringstream buffer; + buffer << t.rdbuf(); + auto lexed = molasses::lex(filename, buffer.str()); + compile_stack.emplace(lexed); + } else + throw std::runtime_error("lex expects a filename"); + } else if(elem == "lex-all") { + std::vector lexed_list; + while(not compile_stack.empty() and std::holds_alternative(compile_stack.top())) { + auto filename = std::get(compile_stack.top()); + compile_stack.pop(); + if(not std::filesystem::exists(filename)) + throw std::runtime_error("file " + filename + " does not exist"); + std::ifstream t(filename); + std::stringstream buffer; + buffer << t.rdbuf(); + auto lexed = molasses::lex(filename, buffer.str()); + lexed_list.emplace_back(lexed); } - std::cout << "->"; - for(const auto& rets : elem->_rets) { - std::cout << " " << rets; + for(auto& lexed : lexed_list) { + compile_stack.emplace(std::move(lexed)); } + } else if(elem == "merge") { + if(std::holds_alternative(compile_stack.top())) { + auto lexer_1 = std::get(compile_stack.top()); + compile_stack.pop(); + if(std::holds_alternative(compile_stack.top())) { + auto lexer_2 = std::get(compile_stack.top()); + compile_stack.pop(); + compile_stack.emplace(molasses::concatenate(lexer_1, lexer_2)); + } else + throw std::runtime_error("merge expects 2 lexed outputs"); + } else + throw std::runtime_error("merge expects 2 lexed outputs"); + } else if(elem == "merge-all") { + if(std::holds_alternative(compile_stack.top())) { + auto lexer_1 = std::get(compile_stack.top()); + compile_stack.pop(); + while(not compile_stack.empty() and std::holds_alternative(compile_stack.top())) { + auto lexer_2 = std::get(compile_stack.top()); + compile_stack.pop(); + lexer_1 = molasses::concatenate(lexer_1, lexer_2); + } + compile_stack.emplace(lexer_1); + } else + throw std::runtime_error("merge-all expects at least 1 lexed outputs"); + } else if(elem == "assemble") { + if(std::holds_alternative(compile_stack.top())) { + auto filename = std::get(compile_stack.top()); + compile_stack.pop(); + std::stringstream compile; + compile << "clang -c " << filename << ".s -o " << filename << ".o"; + std::stringstream link; + link << "ld -e _start " << filename << ".o -o " << filename; + std::cout << compile.str() << std::endl; + system(compile.str().c_str()); + std::cout << link.str() << std::endl; + system(link.str().c_str()); + } else + throw std::runtime_error("assemble expects an assembly file"); + } else if(elem == "help" or elem == "--help") { + std::cout << "# Sugar\n\n"; + std::cout << "## Commands\n\n"; + std::cout << "lex : string ➔ lexed_output\n"; + std::cout << "> takes a filename to a file that must be compiled\n\n"; + std::cout << "lex-all : string* ➔ lexed_output*\n"; + std::cout << "> takes as many filenames to files that must be compiled as can be read and passes them through the lexed\n\n"; + std::cout << "merge : lexed_output lexed_output ➔ lexed_output\n"; + std::cout << "> merges two lexed modules together\n\n"; + std::cout << "merge-all : lexed_output* ➔ lexed_output\n"; + std::cout << "> merges as many lexed modules together as present on the top of the stack\n\n"; + std::cout << "parse : lexed_output ➔ parsed_output\n"; + std::cout << "> prepares code for generation\n\n"; + std::cout << "generate : parsed_output string ➔ string\n"; + std::cout << "> takes a root filename, it will be appended with \".s\" and that will be the generated assembly file,\n"; + std::cout << "> the filename will not be consumed\n\n"; + std::cout << "assemble : string ➔ _ \n"; + std::cout + << "> takes a root filename, it will be appended with \".s\" and that file will be compiled,\n"; + std::cout << "> the compiled output will be the given filename\n\n"; + std::cout << "help : _ ➔ _ \n"; + std::cout << "> prints this help\n\n"; + std::cout << "## Examples\n\n"; + std::cout << "- compile the file \"example.mol\" into the \"potato.s\" assembly file\n"; + std::cout << "> `$ sugar example.mol lex parse potato generate`\n"; + std::cout << "\n"; + std::cout << "- compile the file \"example.mol\" into the \"potato\" executable\n"; + std::cout << "> `$ sugar example.mol lex parse potato generate assemble`\n"; std::cout << "\n"; + std::cout << "- compile the file \"example.mol\" and \"2.mol\" into the \"potato\" executable\n"; + std::cout << "> `$ sugar example.mol lex 2.mol lex merge parse potato generate assemble`\n"; + } else + compile_stack.emplace(elem); + } + + if(compile_stack.size() > 1) throw std::runtime_error("build left unfinished operations"); + if(not compile_stack.empty()) { + if(std::holds_alternative(compile_stack.top())) { + auto lexer = std::get(compile_stack.top()); + for(auto elem : lexer.symbols) { + std::cout << elem << " "; + } + std::cout << "\n\n"; + for(auto [id, name] : lexer.dictionary) { + std::cout << id << ": " << name << "\n"; + } + } else if(std::holds_alternative(compile_stack.top())) { + auto generator = std::get(compile_stack.top()); + for(const auto& elem : generator.procedures) { + std::cout << elem->_name << " : "; + for(const auto& args : elem->_args) { + std::cout << args << " "; + } + std::cout << "->"; + for(const auto& rets : elem->_rets) { + std::cout << " " << rets; + } + std::cout << "\n"; + } } } + } catch (molasses::parser_error& error) { + std::cerr << error.what(); } } diff --git a/src/molasses/generator_primitives_x86_64_linux.cpp b/src/molasses/generator_primitives_x86_64_linux.cpp index 9750643..f330615 100644 --- a/src/molasses/generator_primitives_x86_64_linux.cpp +++ b/src/molasses/generator_primitives_x86_64_linux.cpp @@ -2,36 +2,117 @@ #include "molasses/generator_primitives.h" namespace molasses { - parser_context register_integers(parser_context ctx) - requires (architecture == architecture_t::x86_64_linux) { - ctx.types.push_back(std::make_shared("i8", 1)); - ctx.types.push_back(std::make_shared("i16", 2)); - ctx.types.push_back(std::make_shared("i32", 4)); - ctx.types.push_back(std::make_shared("i64", 8)); - ctx.types.push_back(std::make_shared("u8", 1)); - ctx.types.push_back(std::make_shared("u16", 2)); - ctx.types.push_back(std::make_shared("u32", 4)); - ctx.types.push_back(std::make_shared("u64", 8)); - - return ctx; - } - - parser_context register_i32_operations(parser_context ctx) + + + std::string marshal(const std::string& target) { + std::stringstream builder; + bool is_first = true; + for(char character : target) { + if(isalpha(character)) { + builder << character; + } else if(isdigit(character) and is_first) { + builder << "___" << (int)character << "___"; + } else { + builder << "__" << (int)character << "__"; + } + is_first = false; + } + return builder.str(); + } + + std::string escape(const std::string& target) { + std::stringstream builder; + for(char character : target) { + switch(character) { + case 0: + builder << "\\0"; + break; + case '\n': + builder << "\\n"; + break; + case '"': + builder << "\\\""; + break; + case '\t': + builder << "\\t"; + break; + case '\'': + builder << "\\'"; + break; + default: + builder << character; + } + } + return builder.str(); + } + + parser_context register_integers(parser_context ctx) requires(architecture == architecture_t::x86_64_linux) + { + ctx.types.push_back(std::make_shared("i8", 1)); + ctx.types.push_back(std::make_shared("i16", 2)); + ctx.types.push_back(std::make_shared("i32", 4)); + ctx.types.push_back(std::make_shared("i64", 8)); + ctx.types.push_back(std::make_shared("u8", 1)); + ctx.types.push_back(std::make_shared("u16", 2)); + ctx.types.push_back(std::make_shared("u32", 4)); + ctx.types.push_back(std::make_shared("u64", 8)); + + return ctx; + } + + parser_context register_i32_operations(parser_context ctx) requires (architecture == architecture_t::x86_64_linux) { - ctx.operations.emplace_back( - std::make_shared( - std::string{"+"}, - std::vector({"i32", "i32"}), - std::vector({"i32"}), - std::vector({ - " popq %rax\n", - " popq %rbx\n", - " addl %ebx, %eax\n", - " andl $0xFFFFFFFF, %eax\n", - " pushq %rax\n" - }) - ) - ); + ctx.operations.emplace_back( + std::make_shared( + std::string{"+"}, + std::vector({"i32", "i32"}), + std::vector({"i32"}), + std::vector({ + " popq %rax\n", + " popq %rbx\n", + " addl %ebx, %eax\n", + " andl $0xFFFFFFFF, %eax\n", + " pushq %rax\n" + }) + ) + ); + ctx.operations.emplace_back( + std::make_shared( + std::string{"+_i64"}, + std::vector({"i64", "i64"}), + std::vector({"i64"}), + std::vector({ + " popq %rax\n", + " popq %rbx\n", + " addq %rbx, %rax\n", + " pushq %rax\n" + }) + ) + ); + ctx.operations.emplace_back( + std::make_shared( + std::string{"/%_i64"}, + std::vector({"i64", "i64"}), + std::vector({"i64", "i64"}), + std::vector({ + " popq %rax\n", + " popq %rbx\n", + " divq %rbx, %rax\n", + " pushq %rax\n", + " pushq %rdx\n" + // TODO: this is actually unsigned division, so it needs improvements on negative numbers + }) + ) + ); + ctx.operations.emplace_back( + std::make_shared( + std::string{"u8-ptr_to_i64"}, + std::vector({"u8 ptr"}), + std::vector({"i64"}), + std::vector({ + }) + ) + ); ctx.operations.emplace_back( std::make_shared( std::string{"*"}, @@ -181,24 +262,45 @@ namespace molasses { return ctx; } - std::vector generate_call(const std::string& target) - requires (architecture == architecture_t::x86_64_linux) { - return { - " call "+target+"\n", - }; - } + std::vector generate_call(const std::string& target) + requires (architecture == architecture_t::x86_64_linux) { + return { + " call "+marshal(target)+"\n", + }; + } + + std::vector generate_string(const symbol& representation, const std::string& string_value) + requires (architecture == architecture_t::x86_64_linux) { + return { + "__EMITED_STRING_____"+std::to_string(representation.id)+"___:\n", + " .asciz \""+escape(string_value)+"\"\n", + }; + } + + std::vector generate_push_string_ptr(const symbol& representation) { + return { + " pushq $__EMITED_STRING_____"+std::to_string(representation.id)+"___\n" + }; + } - std::vector generate_push_int32(int32_t target) - requires (architecture == architecture_t::x86_64_linux) { - return { - " pushq $" +std::to_string(target)+ "\n" - }; - } + std::vector generate_push_int32(int32_t target) + requires (architecture == architecture_t::x86_64_linux) { + return { + " pushq $" +std::to_string(target)+ "\n" + }; + } + + std::vector generate_push_int64(int64_t target) + requires (architecture == architecture_t::x86_64_linux) { + return { + " pushq $" +std::to_string(target)+ "\n" + }; + } std::vector generate_label(const std::string& target) requires (architecture == architecture_t::x86_64_linux) { return { - target+":\n" + marshal(target)+":\n" }; } diff --git a/src/molasses/lexer.cpp b/src/molasses/lexer.cpp index 5e9b790..e1d7bd5 100644 --- a/src/molasses/lexer.cpp +++ b/src/molasses/lexer.cpp @@ -5,12 +5,23 @@ #include namespace molasses { - lexed_output lex(const std::string & source) { + lexed_output lex(const std::string& file_name, const std::string & source) { lexed_output output; std::map reverse_dictionary; std::stringstream builder; int token_counter = 1; - + int line = 1; + int column = 0; + + enum class state_machine_t { + normal, + string, + string_escape, + string_end, + }; + + state_machine_t state = state_machine_t::normal; + // Processes the current token into the output if it is not empty // This should be called upon reaching the end of a token const auto process_token = [&](const std::string& token) { @@ -22,10 +33,10 @@ namespace molasses { ) { reverse_dictionary[token] = token_counter; output.dictionary[token_counter] = token; - current_symbol = token_counter; + current_symbol = {token_counter, file_name, line, column, state == state_machine_t::string_end}; token_counter++; } else { - current_symbol = it->second; + current_symbol = {it->second, file_name, line, column, state == state_machine_t::string_end}; } output.symbols.push_back(current_symbol); builder = std::stringstream(); @@ -33,9 +44,47 @@ namespace molasses { }; for(auto& character : source) { - if(std::isspace(character)) { - process_token(builder.str()); - } else { + if(character == '\n') { + line++; + column = 0; + } + column++; + if(state == state_machine_t::string_escape) { + switch(character) { + case 'n': builder << '\n'; break; + case 't': builder << '\t'; break; + case '\\': [[fallthrough]]; + default: + builder << character; + } + continue; + } + if(character == '\"') { + if(builder.view().empty() && state == state_machine_t::normal) { + state = state_machine_t::string; + continue; + } else if (state == state_machine_t::string) { + state = state_machine_t::string_end; + continue; + } + } else if(character == '\\' && state == state_machine_t::string) { + state = state_machine_t::string_escape; + continue; + } + if(std::isspace(character)) { + if(state == state_machine_t::normal or state == state_machine_t::string_end) { + process_token(builder.str()); + state = state_machine_t::normal; + } else { + builder << character; + } + } else { + if(state == state_machine_t::string_end) { + std::stringstream quoted; + quoted << "\"" << builder.str() << "\""; + builder.swap(quoted); + state = state_machine_t::normal; + } builder << character; } } @@ -99,7 +148,7 @@ namespace molasses { //This diagnostic is pretty lousy, but that is what happens when keys are taken by reference #pragma clang diagnostic push #pragma ide diagnostic ignored "LocalValueEscapesScope" - old_symbol = conversions[old_symbol]; + old_symbol.id = conversions[old_symbol]; #pragma clang diagnostic pop } diff --git a/src/molasses/parser_primitives.cpp b/src/molasses/parser_primitives.cpp index d14a01a..f0e1a33 100644 --- a/src/molasses/parser_primitives.cpp +++ b/src/molasses/parser_primitives.cpp @@ -1,5 +1,4 @@ #include -#include #include #include "molasses/parser_primitives.h" #include "molasses/generator_primitives.h" @@ -26,27 +25,55 @@ namespace molasses { } return current_stack; } + + auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) { + if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") { + return std::from_chars(begin+2, end, value, 16); + } else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") { + return std::from_chars(begin+1, end, value, 8); + } else { + return std::from_chars(begin, end, value, 10); + } + } - std::optional try_parse_int32(const std::string& str) { - int32_t value; - auto begin = str.data(); - auto end = str.data()+str.size(); - auto result = std::from_chars(begin, end, value, 10); - // TODO: Add other bases - if(result.ptr == end) { - return value; - } - return std::nullopt; - } + std::optional try_parse_int32(const std::string& str) { + int32_t value; + auto begin = str.data(); + auto end = str.data()+str.size(); + auto result = conditional_begin_int_parse(str, value, begin, end); + if(result.ptr == end) { + return value; + } else { + if(std::string_view{result.ptr, end} == "_i32") { + return value; + } + } + return std::nullopt; + } + + std::optional try_parse_int64(const std::string& str) { + int64_t value; + auto begin = str.data(); + auto end = str.data()+str.size(); + auto result = conditional_begin_int_parse(str, value, begin, end); + if(result.ptr == end) { + return std::nullopt; + } else { + if(std::string_view{result.ptr, end} == "_i64") { + return value; + } + } + return std::nullopt; + } - auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) { + auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t { auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){ return elem->name() == name; }); if(it != std::end(container)) { return *it; } - return {}; + return nullptr; } std::shared_ptr parser_context::lookup_type(const std::string & name) const { @@ -68,9 +95,13 @@ namespace molasses { for(const auto& symbol : consumed_stream) { const auto& symbol_text = lexer_state.dictionary.at(symbol); - if(auto is_int = try_parse_int32(symbol_text); is_int) { - type_stack.emplace_back("i32"); - } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) { + if(symbol.is_string) { + type_stack.emplace_back("u8 ptr"); + } else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) { + type_stack.emplace_back("i32"); + } else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) { + type_stack.emplace_back("i64"); + } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) { type_stack = type_stack >> *is_op; } } @@ -104,7 +135,7 @@ namespace molasses { auto compact_type_modifiers = [&](const std::vector& type_info) -> std::vector { std::vector ret_val; - for(auto elem : type_info) { + for(const auto& elem : type_info) { if(elem == "ptr") { if(ret_val.empty()) throw type_expected_with_modifier_error(); ret_val.back() += " ptr"; @@ -116,31 +147,32 @@ namespace molasses { return ret_val; }; - auto parse_proc = [&](auto it) -> std::pair> { -#define CHECK_FOR_UNEXPECTED_STREAM_END \ - if(it == tokens.symbols.end()) { \ - throw expecting_token_error(); \ - } + auto parse_proc = [&](auto it) -> std::pair> { +#define check_for_unexpected_stream_end(expected, context) \ + do{if(it == tokens.symbols.end()) { \ + throw expecting_token_error(expected, context); \ + }}while(false) if(*it != PROC_KW) { - throw unexpected_token_error(); + throw unexpected_token_error(*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]); } ++it; - CHECK_FOR_UNEXPECTED_STREAM_END; + check_for_unexpected_stream_end(tokens.dictionary[PROC_KW], details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column)); std::string name = tokens.dictionary.at(*it); + auto& name_symbol = *it; ++it; - CHECK_FOR_UNEXPECTED_STREAM_END; + check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column)); // Process arguments list std::vector argument_types; while(*it != SEPARATOR_KW) { argument_types.emplace_back(tokens.dictionary.at(*it)); ++it; - CHECK_FOR_UNEXPECTED_STREAM_END; + check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column)); } ++it; - CHECK_FOR_UNEXPECTED_STREAM_END; + check_for_unexpected_stream_end("Procedure-Argument-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column)); argument_types = compact_type_modifiers(argument_types); // Process return types list @@ -148,10 +180,10 @@ namespace molasses { while(*it != DO_KW) { return_types.emplace_back(tokens.dictionary.at(*it)); ++it; - CHECK_FOR_UNEXPECTED_STREAM_END; + check_for_unexpected_stream_end(tokens.dictionary[DO_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column)); } ++it; - CHECK_FOR_UNEXPECTED_STREAM_END; + check_for_unexpected_stream_end("Procedure-Return-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column)); return_types = compact_type_modifiers(return_types); // Process return types list @@ -159,13 +191,13 @@ namespace molasses { while(*it != END_KW) { body.emplace_back(*it); ++it; - CHECK_FOR_UNEXPECTED_STREAM_END; + check_for_unexpected_stream_end(tokens.dictionary[END_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column)); } ++it; return std::make_pair(it, std::make_shared(name, argument_types, return_types, body)); -#undef CHECK_FOR_UNEXPECTED_STREAM_END - }; +#undef check_for_unexpected_stream_end + }; auto progress = tokens.symbols.begin(); @@ -198,16 +230,24 @@ namespace molasses { for(auto elem : _body) { auto token = lexer_data.dictionary.at(elem); - if(auto result = try_parse_int32(token); result) { + if(elem.is_string) { + for(auto&& instruction : generate_push_string_ptr(elem)) { + ops.push_back(instruction); + } + } else if(auto result = try_parse_int32(token); result) { for(auto&& instruction : generate_push_int32(result.value())) { ops.push_back(instruction); } + } else if(auto result = try_parse_int64(token); result) { + for(auto&& instruction : generate_push_int64(result.value())) { + ops.push_back(instruction); + } } else if(auto op = ctx.lookup_operation(token); op) { for(auto&& instruction : op->emit(ctx)) { ops.push_back(instruction); } } else { - throw unknown_token_error(); + throw unknown_token_error(elem); } } @@ -222,18 +262,29 @@ namespace molasses { return generate_call(name()); } - std::vector generate(generate_context ctx) { + std::vector generate(const generate_context& ctx) { std::vector generated; - for(auto instr : initialize_stack()) { + for(const auto& instr : initialize_stack()) { generated.push_back(instr); } - for(auto proc : ctx.procedures) { - for(auto instr : proc->generate(ctx.parser, ctx.lexer)) { + for(const auto& proc : ctx.procedures) { + for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) { generated.push_back(instr); } } + + std::set done; + for(const auto& value : ctx.lexer.symbols) { + if(value.is_string && not done.contains(value.id)) { + for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) { + generated.push_back(instr); + } + done.insert(value.id); + } + } + return generated; } } diff --git a/tests/002.exp b/tests/002.exp new file mode 100644 index 0000000..fbc7caf --- /dev/null +++ b/tests/002.exp @@ -0,0 +1,45 @@ +#!/usr/bin/expect + +set SUGAR_EXECUTABLE $::env(SUGAR_EXECUTABLE) + +proc abort {reason} { + puts "test failed $reason" + exit 1 +} + +spawn -noecho $SUGAR_EXECUTABLE tests/002/exit-with-1.mol lex parse +expect { + error { abort "failed to parse" } + eof { abort "cannot find the main" } + main +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + + +spawn -noecho $SUGAR_EXECUTABLE tests/002/exit-with-1.mol lex parse /tmp/sugar.generated generate assemble +expect { + error { abort "failed to compile" } + eof { abort "didn't run clang" } + clang +} +expect { + error { abort "failed to link" } + eof { abort "didn't run ld" } + ld +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + +spawn -noecho /tmp/sugar.generated +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 1} { + abort "executable didn't return exit code 1 but $value instead" +} \ No newline at end of file diff --git a/tests/002/exit-with-1.mol b/tests/002/exit-with-1.mol new file mode 100644 index 0000000..c85c229 --- /dev/null +++ b/tests/002/exit-with-1.mol @@ -0,0 +1,5 @@ +__PROC__ main +__--__ +__DO__ +1_i64 30_i64 30_i64 +_i64 syscall1 drop_i64 +__END__ diff --git a/tests/003.exp b/tests/003.exp new file mode 100644 index 0000000..2b61900 --- /dev/null +++ b/tests/003.exp @@ -0,0 +1,55 @@ +#!/usr/bin/expect + +set SUGAR_EXECUTABLE $::env(SUGAR_EXECUTABLE) + +proc abort {reason} { + puts "test failed $reason" + exit 1 +} + +spawn -noecho $SUGAR_EXECUTABLE tests/003/exit-with-1.mol lex tests/003/library.mol lex merge parse +expect { + error { abort "failed to parse" } + eof { abort "cannot find the exit-syscall-number procedure" } + exit-syscall-number +} +expect { + error { abort "failed to parse" } + eof { abort "cannot find the main" } + main +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + + +spawn -noecho $SUGAR_EXECUTABLE tests/003/exit-with-1.mol lex tests/003/library.mol lex merge parse /tmp/sugar.generated generate assemble +expect { + error { abort "failed to compile" } + eof { abort "didn't run clang" } + clang +} +expect { + error { abort "failed to link" } + eof { abort "didn't run ld" } + ld +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 0} { + abort "compiler crashed" +} + +spawn -noecho /tmp/sugar.generated +expect { + error { abort "failed to compile" } + eof { abort "didn't output" } + Hello +} +expect eof +lassign [wait] pid spawnid os_error_flag value +if {$value != 1} { + abort "executable didn't return exit code 1 but $value instead" +} \ No newline at end of file diff --git a/tests/003/exit-with-1.mol b/tests/003/exit-with-1.mol new file mode 100644 index 0000000..ef0238e --- /dev/null +++ b/tests/003/exit-with-1.mol @@ -0,0 +1,6 @@ +__PROC__ main +__--__ +__DO__ +11_i64 "Hello world" write-out +1_i64 exit +__END__ diff --git a/tests/003/library.mol b/tests/003/library.mol new file mode 100644 index 0000000..0305152 --- /dev/null +++ b/tests/003/library.mol @@ -0,0 +1,35 @@ +__PROC__ write-syscall-number +__--__ +i64 +__DO__ +1_i64 +__END__ + +__PROC__ stdout-fd +__--__ +i64 +__DO__ +1_i64 +__END__ + +__PROC__ exit-syscall-number +__--__ +i64 +__DO__ +60_i64 +__END__ + +__PROC__ exit +i64 +__--__ +__DO__ +exit-syscall-number syscall1 drop_i64 +__END__ + +__PROC__ write-out +i64 +u8 ptr +__--__ +__DO__ +u8-ptr_to_i64 stdout-fd write-syscall-number syscall3 drop_i64 +__END__ \ No newline at end of file