#include #include #include #include "molasses/parser_primitives.h" namespace molasses { parser_context register_integers(parser_context ctx) { ctx.types.push_back(std::make_shared("i8",1)); ctx.types.push_back(std::make_shared("i16",2)); ctx.types.push_back(std::make_shared("i32",4)); ctx.types.push_back(std::make_shared("i64",8)); ctx.types.push_back(std::make_shared("u8",1)); ctx.types.push_back(std::make_shared("u16",2)); ctx.types.push_back(std::make_shared("u32",4)); ctx.types.push_back(std::make_shared("u64",8)); return ctx; } std::vector operator>>(std::vector current_stack, const operation& next_op) { { auto args = next_op.argument_types(); while(not (args.empty() or current_stack.empty())) { if(current_stack.back() != args.back()) { throw TypeInputError(); } else { args.pop_back(); current_stack.pop_back(); } } if(not args.empty()) { throw ValueMissingError(); } } { auto return_types = next_op.return_types(); std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack)); } return current_stack; } std::optional try_parse_int32(const std::string& str) { int32_t value; auto begin = str.data(); auto end = str.data()+str.size(); auto result = std::from_chars(begin, end, value, 10); // TODO: Add other bases if(result.ptr == end) { return value; } return std::nullopt; } auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) { auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){ return elem->name() == name; }); if(it != std::end(container)) { return *it; } return {}; } std::shared_ptr parser_context::lookup_type(const std::string & name) const { return find_ptr_by_name_in_container(types, name); } std::shared_ptr parser_context::lookup_operation(const std::string & name) const { return find_ptr_by_name_in_container(operations, name); } bool type_check( const parser_context& parser_state, const lexed_output& lexer_state, const std::vector& consumed_stream, std::vector execution_input, const std::vector& execution_output ) { auto& type_stack = execution_input; for(const auto& symbol : consumed_stream) { const auto& symbol_text = lexer_state.dictionary.at(symbol); if(auto is_int = try_parse_int32(symbol_text); is_int) { type_stack.emplace_back("i32"); } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) { type_stack = type_stack >> *is_op; } } return type_stack == execution_output; } std::vector initialize_stack() { return { ".bss\n",// TODO: make threadlocal "stack_instruction:\n", " .quad 0\n", ".text\n", "initialize_callstack:\n", " movq $9, %rax\n", " movq $0, %rdi\n", " movq $8192, %rsi\n", " movq $3, %rdx\n", " movq $34, %r10\n", " movq $-1, %r8\n", " movq $0, %r9\n", " syscall\n", " movq %rax, (stack_instruction)\n", " retq\n", }; } parser_context parse(parser_context ctx, const lexed_output& lexer_data) { enum op : int { DO_KW = 1, SEPARATOR_KW, PROC_KW, END_KW }; lexed_output fake; fake.dictionary[PROC_KW] = "__PROC__"; fake.dictionary[SEPARATOR_KW] = "__--__"; fake.dictionary[DO_KW] = "__DO__"; fake.dictionary[END_KW] = "__END__"; auto tokens = concatenate(fake, lexer_data); std::vector> parsed_procedures; auto parse_proc = [&](auto it) -> std::pair> { #define CHECK_FOR_UNEXPECTED_STREAM_END \ if(it == tokens.symbols.end()) { \ throw ExpectingTokenError(); \ } if(*it != PROC_KW) { throw UnexpectedTokenError(); } ++it; CHECK_FOR_UNEXPECTED_STREAM_END; std::string name = tokens.dictionary.at(*it); ++it; CHECK_FOR_UNEXPECTED_STREAM_END; if(it == tokens.symbols.end()) { throw ExpectingTokenError(); } // Process arguments list std::vector argument_types; while(*it != SEPARATOR_KW) { argument_types.emplace_back(tokens.dictionary.at(*it)); ++it; CHECK_FOR_UNEXPECTED_STREAM_END; } ++it; CHECK_FOR_UNEXPECTED_STREAM_END; // Process return types list std::vector return_types; while(*it != DO_KW) { return_types.emplace_back(tokens.dictionary.at(*it)); ++it; CHECK_FOR_UNEXPECTED_STREAM_END; } ++it; CHECK_FOR_UNEXPECTED_STREAM_END; // Process return types list std::vector body; while(*it != END_KW) { body.emplace_back(*it); ++it; CHECK_FOR_UNEXPECTED_STREAM_END; } ++it; return std::make_pair(it, std::make_shared(name, argument_types, return_types, body)); #undef CHECK_FOR_UNEXPECTED_STREAM_END }; auto progress = tokens.symbols.begin(); do { auto [iterator, procedure] = parse_proc(progress); ctx.operations.push_back(procedure); parsed_procedures.emplace_back(std::move(procedure)); progress = iterator; } while (progress != tokens.symbols.end()); for(auto& proc : parsed_procedures) { if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) { throw ProcedureStackError(); } } std::vector generated; for(auto instr : initialize_stack()) { generated.push_back(instr); } for(auto proc : parsed_procedures) { for(auto instr : proc->generate(ctx, tokens)) { generated.push_back(instr); } } for(auto line : generated) { std::cout << line; } return ctx; } std::vector generate_call(std::string target) { static uint64_t label_count= 0; return { " movq return_label_n"+std::to_string(label_count)+", (stack_instruction)\n", " addq $8, stack_instruction\n", " jmp "+target+"\n", " return_label_n"+std::to_string(label_count++)+":" }; } std::vector generate_push_int32(int32_t target) { return { " pushq $" +std::to_string(target)+ "\n" }; } std::vector procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const { std::vector ops; ops.emplace_back(name()+":\n"); for(auto elem : _body) { auto token = lexer_data.dictionary.at(elem); if(auto result = try_parse_int32(token); result) { for(auto&& elem : generate_push_int32(result.value())) { ops.push_back(elem); } } else if(auto op = ctx.lookup_operation(token); op) { for(auto&& elem : op->emit(ctx)) { ops.push_back(elem); } } else { throw UnknownTokenError(); } } ops.emplace_back(" // Return to caller\n"); ops.emplace_back(" addq $-8, stack_instruction\n"); ops.emplace_back(" movq (stack_instruction), %rax\n"); ops.emplace_back(" pushq %rax\n"); ops.emplace_back(" retq\n"); return ops; } std::vector procedure_operation::emit(const parser_context& ctx) const { return generate_call(name()); } }