| @ -0,0 +1,4 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" /> | |||
| </project> | |||
| @ -0,0 +1,8 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="ProjectModuleManager"> | |||
| <modules> | |||
| <module fileurl="file://$PROJECT_DIR$/.idea/sugar.iml" filepath="$PROJECT_DIR$/.idea/sugar.iml" /> | |||
| </modules> | |||
| </component> | |||
| </project> | |||
| @ -0,0 +1,2 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <module classpath="CMake" type="CPP_MODULE" version="4" /> | |||
| @ -0,0 +1,6 @@ | |||
| <?xml version="1.0" encoding="UTF-8"?> | |||
| <project version="4"> | |||
| <component name="VcsDirectoryMappings"> | |||
| <mapping directory="$PROJECT_DIR$" vcs="Git" /> | |||
| </component> | |||
| </project> | |||
| @ -0,0 +1,10 @@ | |||
| cmake_minimum_required(VERSION 3.24) | |||
| project(sugar) | |||
| find_package(cppfront REQUIRED) | |||
| set(CMAKE_CXX_STANDARD 20) | |||
| include_directories(include) | |||
| add_executable(sugar src/main.cpp src/molasses/lexer.cpp include/molasses/lexer.h src/molasses/parser_primitives.cpp include/molasses/parser_primitives.h) | |||
| @ -0,0 +1,17 @@ | |||
| #pragma once | |||
| #include <vector> | |||
| #include <map> | |||
| #include <string> | |||
| namespace molasses { | |||
| // We will always want symbols to be convertible to int for dictionary lookups | |||
| using symbol = int; | |||
| struct lexed_output { | |||
| std::map<int, std::string> dictionary; | |||
| std::vector<symbol> symbols; | |||
| }; | |||
| lexed_output lex(const std::string &); | |||
| lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs); | |||
| } | |||
| @ -0,0 +1,144 @@ | |||
| #pragma once | |||
| #include <string> | |||
| #include <set> | |||
| #include <vector> | |||
| #include <memory> | |||
| #include <optional> | |||
| #include <charconv> | |||
| #include <concepts> | |||
| #include "molasses/lexer.h" | |||
| namespace molasses { | |||
| struct type { | |||
| [[nodiscard]] virtual std::string name() const = 0; | |||
| [[nodiscard]] virtual size_t byte_size() const = 0; | |||
| }; | |||
| inline auto operator<=>(const type& lhs, const type& rhs) { | |||
| return lhs.name() <=> rhs.name(); | |||
| } | |||
| struct primitive_type : public type { | |||
| std::string _name; | |||
| size_t _byte_size; | |||
| primitive_type(std::string name, size_t byte_size) | |||
| : _name(std::forward<std::string>(name)) | |||
| , _byte_size(byte_size) | |||
| {} | |||
| [[nodiscard]] std::string name() const final { | |||
| return _name; | |||
| } | |||
| [[nodiscard]] size_t byte_size() const final { | |||
| return _byte_size; | |||
| }; | |||
| }; | |||
| struct parser_context; | |||
| struct operation { | |||
| [[nodiscard]] virtual std::string name() const = 0; | |||
| [[nodiscard]] virtual std::vector<std::string> argument_types() const = 0; | |||
| [[nodiscard]] virtual std::vector<std::string> return_types() const = 0; | |||
| [[nodiscard]] virtual std::vector<std::string> generate(const parser_context&) const = 0; | |||
| // Add generate() -> instruction[] | |||
| }; | |||
| struct primitive_operation : public operation { | |||
| std::string _name; | |||
| std::vector<std::string> _args; | |||
| std::vector<std::string> _rets; | |||
| std::vector<std::string> _instructions; | |||
| primitive_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets) | |||
| : _name(std::forward<std::string>(name)) | |||
| , _args(std::forward<std::vector<std::string>>(args)) | |||
| , _rets(std::forward<std::vector<std::string>>(rets)) | |||
| {} | |||
| [[nodiscard]] std::string name() const final { | |||
| return _name; | |||
| } | |||
| [[nodiscard]] std::vector<std::string> argument_types() const final { | |||
| return _args; | |||
| } | |||
| [[nodiscard]] std::vector<std::string> return_types() const final { | |||
| return _rets; | |||
| } | |||
| [[nodiscard]] std::vector<std::string> generate(const parser_context&) const final { | |||
| return _instructions; | |||
| } | |||
| }; | |||
| struct procedure_operation : public operation { | |||
| std::string _name; | |||
| std::vector<std::string> _args; | |||
| std::vector<std::string> _rets; | |||
| std::vector<symbol> _body; | |||
| procedure_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets, std::vector<symbol> body) | |||
| : _name(std::forward<std::string>(name)) | |||
| , _args(std::forward<std::vector<std::string>>(args)) | |||
| , _rets(std::forward<std::vector<std::string>>(rets)) | |||
| , _body(std::forward<std::vector<symbol>>(body)) | |||
| {} | |||
| [[nodiscard]] std::string name() const final { | |||
| return _name; | |||
| } | |||
| [[nodiscard]] std::vector<std::string> argument_types() const final { | |||
| return _args; | |||
| } | |||
| [[nodiscard]] std::vector<std::string> return_types() const final { | |||
| return _rets; | |||
| } | |||
| [[nodiscard]] std::vector<std::string> generate(const parser_context&) const final; | |||
| }; | |||
| inline auto operator<=>(const operation& lhs, const operation& rhs) { | |||
| return lhs.name() <=> rhs.name(); | |||
| } | |||
| struct TypeInputError : std::runtime_error { | |||
| TypeInputError() : std::runtime_error("Bad type provided") {} | |||
| // TODO: Better error message | |||
| }; | |||
| struct ValueMissingError : std::runtime_error { | |||
| ValueMissingError() : std::runtime_error("Expected value, none provided") {} | |||
| // TODO: Better error message | |||
| }; | |||
| struct ProcedureStackError : std::runtime_error { | |||
| ProcedureStackError() : std::runtime_error("Expected the stack to look like the return stack upon completion") {} | |||
| // TODO: Better error message | |||
| }; | |||
| struct UnexpectedTokenError : std::runtime_error { | |||
| UnexpectedTokenError() : std::runtime_error("An unexpected token has been encountered") {} | |||
| // TODO: Better error message | |||
| }; | |||
| struct ExpectingTokenError : std::runtime_error { | |||
| ExpectingTokenError() : std::runtime_error("An expected token has not been encountered before the end of the input") {} | |||
| // TODO: Better error message | |||
| }; | |||
| std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op); | |||
| std::optional<int32_t> try_parse_int32(const std::string& str); | |||
| struct parser_context { | |||
| std::vector<std::shared_ptr<type>> types; | |||
| std::vector<std::shared_ptr<operation>> operations; | |||
| [[nodiscard]] std::shared_ptr<type> lookup_type(const std::string&) const; | |||
| [[nodiscard]] std::shared_ptr<operation> lookup_operation(const std::string&) const; | |||
| }; | |||
| parser_context parse(parser_context, const lexed_output&); | |||
| parser_context register_integers(parser_context); | |||
| bool type_check(const parser_context&, const lexed_output&, const std::vector<symbol>&, std::vector<std::string> execution_input, const std::vector<std::string>& execution_output); | |||
| } | |||
| @ -0,0 +1,7 @@ | |||
| __PROC__ procedure_name | |||
| i16 i8 | |||
| __--__ | |||
| i32 | |||
| __DO__ | |||
| i32 __CAST__ __SWAP__ i32 __CAST__ * | |||
| __END__ | |||
| @ -0,0 +1,17 @@ | |||
| __PROC__ write | |||
| i64 i8 ptr i64 | |||
| __--__ | |||
| i32 | |||
| __DO__ | |||
| __LET__ size, ptr, fd | |||
| size ptr __CAST_I64__ fd 1_i64 __SYSCALL4__ | |||
| __END_LET__ | |||
| __END__ | |||
| proc __DEREF_I64_PTR__ | |||
| i64 ptr | |||
| -- | |||
| i64 | |||
| DO | |||
| END | |||
| @ -0,0 +1,50 @@ | |||
| #include "molasses/lexer.h" | |||
| #include "molasses/parser_primitives.h" | |||
| #include <iostream> | |||
| int main() { | |||
| /* | |||
| molasses::lexed_output initial; | |||
| initial.dictionary[1] = "+"; | |||
| { | |||
| auto v = molasses::lex("hello hello potato 128 hello 128 +"); | |||
| auto v2 = molasses::lex("salad hello potato 129 hello 128"); | |||
| for (auto symbol: v.symbols) { | |||
| std::cout << "v: " << symbol << " - " << v.dictionary.at(symbol) << "\n"; | |||
| } | |||
| std::cout << "\n"; | |||
| for (auto symbol: v2.symbols) { | |||
| std::cout << "v2: " << symbol << " - " << v2.dictionary.at(symbol) << "\n"; | |||
| } | |||
| auto v_merged = molasses::concatenate(initial, molasses::concatenate(v, v2)); | |||
| std::cout << "\n"; | |||
| for (auto symbol: v_merged.symbols) { | |||
| std::cout << "v_merged: " << symbol << " - " << v_merged.dictionary.at(symbol) << "\n"; | |||
| } | |||
| } | |||
| auto v = molasses::lex("1 2 +"); | |||
| molasses::parser_context ctx; | |||
| ctx = molasses::register_integers(ctx); | |||
| ctx.operations.emplace_back(std::make_shared<molasses::primitive_operation>(std::string{"+"}, std::vector<std::string>({"i32", "i32"}), std::vector<std::string>({"i32"}))); | |||
| if(molasses::type_check(ctx, v, v.symbols, {}, {"i32"})) { | |||
| std::cout << "Checks out\n"; | |||
| }*/ | |||
| auto lexed = molasses::lex("__PROC__ sum\n" | |||
| "i32 i32\n" | |||
| "__--__\n" | |||
| "i32\n" | |||
| "__DO__\n" | |||
| "+\n" | |||
| "__END__"); | |||
| molasses::parser_context ctx; | |||
| ctx = molasses::register_integers(ctx); | |||
| ctx.operations.emplace_back(std::make_shared<molasses::primitive_operation>(std::string{"+"}, std::vector<std::string>({"i32", "i32"}), std::vector<std::string>({"i32"}))); | |||
| molasses::parse(ctx, lexed); | |||
| } | |||
| @ -0,0 +1,110 @@ | |||
| #include "molasses/lexer.h" | |||
| #include <algorithm> | |||
| #include <sstream> | |||
| #include <iostream> | |||
| namespace molasses { | |||
| lexed_output lex(const std::string & source) { | |||
| lexed_output output; | |||
| std::map<std::string, int> reverse_dictionary; | |||
| std::stringstream builder; | |||
| int token_counter = 1; | |||
| // Processes the current token into the output if it is not empty | |||
| // This should be called upon reaching the end of a token | |||
| const auto process_token = [&](const std::string& token) { | |||
| if(not token.empty()) { | |||
| symbol current_symbol; | |||
| if( | |||
| auto it = reverse_dictionary.find(token); | |||
| it == reverse_dictionary.end() | |||
| ) { | |||
| reverse_dictionary[token] = token_counter; | |||
| output.dictionary[token_counter] = token; | |||
| current_symbol = token_counter; | |||
| token_counter++; | |||
| } else { | |||
| current_symbol = it->second; | |||
| } | |||
| output.symbols.push_back(current_symbol); | |||
| builder = std::stringstream(); | |||
| } | |||
| }; | |||
| for(auto& character : source) { | |||
| if(std::isspace(character)) { | |||
| process_token(builder.str()); | |||
| } else { | |||
| builder << character; | |||
| } | |||
| } | |||
| process_token(builder.str()); // process the last token if needed | |||
| return output; | |||
| } | |||
| using conversion_table = std::map<int, int>; | |||
| lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs) { | |||
| // primitive that flips keys and values of dictionaries | |||
| constexpr auto dictionary_reversal = [](auto& destination,const auto& source) { | |||
| for(auto& it : source) { | |||
| destination.insert_or_assign(it.second, it.first); | |||
| } | |||
| }; | |||
| // primitive that merges a dictionary into a reversed one and returns a conversion table of symbols | |||
| // from the dictionary to the newly generated reverse dictionary | |||
| auto build_reverse_dictionary = [dictionary_reversal] (auto& reverse_dictionary, auto dictionary) -> conversion_table { | |||
| // Make the right dictionary into a reverse dictionary | |||
| std::map<std::string, int> right_reverse_dictionary; | |||
| dictionary_reversal(right_reverse_dictionary, dictionary); | |||
| // find the maximum token id in the left dictionary | |||
| int max_token = 0; | |||
| if(not reverse_dictionary.empty()) { | |||
| max_token = std::max_element( | |||
| reverse_dictionary.begin(), | |||
| reverse_dictionary.end(), | |||
| [](const auto &lhs, const auto &rhs) -> bool { | |||
| return lhs.second < rhs.second; | |||
| } | |||
| )->second; | |||
| } | |||
| // make the conversions and update the reverse dictionary | |||
| conversion_table conversions; | |||
| for(auto& [key, value] : right_reverse_dictionary) { | |||
| if(auto match = reverse_dictionary.find(key); match != reverse_dictionary.end()) { | |||
| conversions[value] = match->second; | |||
| } else { | |||
| max_token+=1; | |||
| conversions[value] = max_token; | |||
| reverse_dictionary[key] = max_token; | |||
| } | |||
| } | |||
| return conversions; | |||
| }; | |||
| std::map<std::string, int> reverse_dictionary; | |||
| dictionary_reversal(reverse_dictionary, lhs.dictionary); | |||
| auto conversions = build_reverse_dictionary(reverse_dictionary, rhs.dictionary); | |||
| auto symbol_stream = rhs.symbols; | |||
| lexed_output output{.symbols = lhs.symbols}; | |||
| for(auto& old_symbol : symbol_stream) { | |||
| //This diagnostic is pretty lousy, but that is what happens when keys are taken by reference | |||
| #pragma clang diagnostic push | |||
| #pragma ide diagnostic ignored "LocalValueEscapesScope" | |||
| old_symbol = conversions[old_symbol]; | |||
| #pragma clang diagnostic pop | |||
| } | |||
| dictionary_reversal(output.dictionary, reverse_dictionary); | |||
| std::copy(symbol_stream.begin(), symbol_stream.end(), std::back_inserter(output.symbols)); | |||
| return output; | |||
| } | |||
| } | |||
| @ -0,0 +1,228 @@ | |||
| #include <algorithm> | |||
| #include <cassert> | |||
| #include "molasses/parser_primitives.h" | |||
| namespace molasses { | |||
| parser_context register_integers(parser_context ctx) { | |||
| ctx.types.push_back(std::make_shared<primitive_type>("i8",1)); | |||
| ctx.types.push_back(std::make_shared<primitive_type>("i16",2)); | |||
| ctx.types.push_back(std::make_shared<primitive_type>("i32",4)); | |||
| ctx.types.push_back(std::make_shared<primitive_type>("i64",8)); | |||
| ctx.types.push_back(std::make_shared<primitive_type>("u8",1)); | |||
| ctx.types.push_back(std::make_shared<primitive_type>("u16",2)); | |||
| ctx.types.push_back(std::make_shared<primitive_type>("u32",4)); | |||
| ctx.types.push_back(std::make_shared<primitive_type>("u64",8)); | |||
| return ctx; | |||
| } | |||
| std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) { | |||
| { | |||
| auto args = next_op.argument_types(); | |||
| while(not (args.empty() or current_stack.empty())) { | |||
| if(current_stack.back() != args.back()) { | |||
| throw TypeInputError(); | |||
| } else { | |||
| args.pop_back(); | |||
| current_stack.pop_back(); | |||
| } | |||
| } | |||
| if(not args.empty()) { | |||
| throw ValueMissingError(); | |||
| } | |||
| } | |||
| { | |||
| auto return_types = next_op.return_types(); | |||
| std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack)); | |||
| } | |||
| return current_stack; | |||
| } | |||
| std::optional<int32_t> try_parse_int32(const std::string& str) { | |||
| int32_t value; | |||
| auto begin = str.data(); | |||
| auto end = str.data()+str.size(); | |||
| auto result = std::from_chars(begin, end, value, 10); | |||
| // TODO: Add other bases | |||
| if(result.ptr == end) { | |||
| return value; | |||
| } | |||
| return std::nullopt; | |||
| } | |||
| auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) { | |||
| auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){ | |||
| return elem->name() == name; | |||
| }); | |||
| if(it != std::end(container)) { | |||
| return *it; | |||
| } | |||
| return {}; | |||
| } | |||
| std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const { | |||
| return find_ptr_by_name_in_container(types, name); | |||
| } | |||
| std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const { | |||
| return find_ptr_by_name_in_container(operations, name); | |||
| } | |||
| bool type_check( | |||
| const parser_context& parser_state, | |||
| const lexed_output& lexer_state, | |||
| const std::vector<symbol>& consumed_stream, | |||
| std::vector<std::string> execution_input, | |||
| const std::vector<std::string>& execution_output | |||
| ) { | |||
| auto& type_stack = execution_input; | |||
| for(const auto& symbol : consumed_stream) { | |||
| const auto& symbol_text = lexer_state.dictionary.at(symbol); | |||
| if(auto is_int = try_parse_int32(symbol_text); is_int) { | |||
| type_stack.emplace_back("i32"); | |||
| } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) { | |||
| type_stack = type_stack >> *is_op; | |||
| } | |||
| } | |||
| return type_stack == execution_output; | |||
| } | |||
| parser_context parse(parser_context ctx, const lexed_output& lexer_data) { | |||
| enum op : int { | |||
| DO_KW = 1, | |||
| SEPARATOR_KW, | |||
| PROC_KW, | |||
| END_KW | |||
| }; | |||
| lexed_output fake; | |||
| fake.dictionary[PROC_KW] = "__PROC__"; | |||
| fake.dictionary[SEPARATOR_KW] = "__--__"; | |||
| fake.dictionary[DO_KW] = "__DO__"; | |||
| fake.dictionary[END_KW] = "__END__"; | |||
| auto tokens = concatenate(fake, lexer_data); | |||
| std::vector<std::shared_ptr<procedure_operation>> parsed_procedures; | |||
| auto parse_proc = [&](auto it) -> std::pair<typeof(it), std::shared_ptr<procedure_operation>> { | |||
| #define CHECK_FOR_UNEXPECTED_STREAM_END \ | |||
| if(it == tokens.symbols.end()) { \ | |||
| throw ExpectingTokenError(); \ | |||
| } | |||
| if(*it != PROC_KW) { | |||
| throw UnexpectedTokenError(); | |||
| } | |||
| ++it; | |||
| CHECK_FOR_UNEXPECTED_STREAM_END; | |||
| std::string name = tokens.dictionary.at(*it); | |||
| ++it; | |||
| CHECK_FOR_UNEXPECTED_STREAM_END; | |||
| if(it == tokens.symbols.end()) { | |||
| throw ExpectingTokenError(); | |||
| } | |||
| // Process arguments list | |||
| std::vector<std::string> argument_types; | |||
| while(*it != SEPARATOR_KW) { | |||
| argument_types.emplace_back(tokens.dictionary.at(*it)); | |||
| ++it; | |||
| CHECK_FOR_UNEXPECTED_STREAM_END; | |||
| } | |||
| ++it; | |||
| CHECK_FOR_UNEXPECTED_STREAM_END; | |||
| // Process return types list | |||
| std::vector<std::string> return_types; | |||
| while(*it != DO_KW) { | |||
| return_types.emplace_back(tokens.dictionary.at(*it)); | |||
| ++it; | |||
| CHECK_FOR_UNEXPECTED_STREAM_END; | |||
| } | |||
| ++it; | |||
| CHECK_FOR_UNEXPECTED_STREAM_END; | |||
| // Process return types list | |||
| std::vector<symbol> body; | |||
| while(*it != END_KW) { | |||
| body.emplace_back(*it); | |||
| ++it; | |||
| CHECK_FOR_UNEXPECTED_STREAM_END; | |||
| } | |||
| ++it; | |||
| return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body)); | |||
| #undef CHECK_FOR_UNEXPECTED_STREAM_END | |||
| }; | |||
| auto [iterator, procedure] = parse_proc(tokens.symbols.begin()); | |||
| ctx.operations.push_back(procedure); | |||
| parsed_procedures.emplace_back(std::move(procedure)); | |||
| for(auto& proc : parsed_procedures) { | |||
| if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) { | |||
| throw ProcedureStackError(); | |||
| } | |||
| } | |||
| return ctx; | |||
| } | |||
| std::vector<std::string> initialize_stack() { | |||
| return { | |||
| ".bss\n",// TODO: make threadlocal | |||
| "stack_instruction:", | |||
| " .quad 0", | |||
| ".text\n", | |||
| "initialize_callstack:\n", | |||
| " movq $9, %rax\n", | |||
| " movq $0, %rdi\n", | |||
| " movq $8192, %rsi\n", | |||
| " movq $3, %rdx\n", | |||
| " movq $34, %r10\n", | |||
| " movq $-1, %r8\n", | |||
| " movq $0, %r9\n", | |||
| " syscall\n", | |||
| " movq %rax, (stack_instruction)\n", | |||
| " retq\n", | |||
| }; | |||
| } | |||
| std::vector<std::string> generate_call(std::string target) { | |||
| static uint64_t label_count= 0; | |||
| return { | |||
| "movq return_label_n"+std::to_string(label_count)+", (stack_instruction)\n", | |||
| "addq $8, stack_instruction\n", | |||
| "jmp "+target+"\n", | |||
| "return_label_n"+std::to_string(label_count++)+":" | |||
| }; | |||
| } | |||
| std::vector<std::string> procedure_operation::generate(const parser_context& ctx) const { | |||
| size_t initial_stack = 0; | |||
| size_t final_stack = 0; | |||
| for(const auto& elem : argument_types()) { | |||
| initial_stack += ctx.lookup_type(elem)->byte_size(); | |||
| } | |||
| for(const auto& elem : return_types()) { | |||
| final_stack += ctx.lookup_type(elem)->byte_size(); | |||
| } | |||
| std::vector<std::string> ops; | |||
| ops.emplace_back(name()+":\n"); | |||
| // Return to caller | |||
| ops.emplace_back(" addq $-8, stack_instruction\n"); | |||
| ops.emplace_back(" movq (stack_instruction), %rax\n"); | |||
| ops.emplace_back(" pushq %rax\n"); | |||
| ops.emplace_back(" retq\n"); | |||
| return ops; | |||
| } | |||
| } | |||