#include #include #include "molasses/parser_primitives.h" #include "molasses/generator_primitives.h" namespace molasses { std::vector operator>>(std::vector current_stack, const operation& next_op) { { auto args = next_op.argument_types(); while(not (args.empty() or current_stack.empty())) { if(current_stack.back() != args.back()) { throw type_input_error(); } else { args.pop_back(); current_stack.pop_back(); } } if(not args.empty()) { throw value_missing_error(); } } { auto return_types = next_op.return_types(); std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack)); } return current_stack; } auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) { if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") { return std::from_chars(begin+2, end, value, 16); } else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") { return std::from_chars(begin+1, end, value, 8); } else { return std::from_chars(begin, end, value, 10); } } std::optional try_parse_int32(const std::string& str) { int32_t value; auto begin = str.data(); auto end = str.data()+str.size(); auto result = conditional_begin_int_parse(str, value, begin, end); if(result.ptr == end) { return value; } else { if(std::string_view{result.ptr, end} == "_i32") { return value; } } return std::nullopt; } std::optional try_parse_int64(const std::string& str) { int64_t value; auto begin = str.data(); auto end = str.data()+str.size(); auto result = conditional_begin_int_parse(str, value, begin, end); if(result.ptr == end) { return std::nullopt; } else { if(std::string_view{result.ptr, end} == "_i64") { return value; } } return std::nullopt; } auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t { auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){ return elem->name() == name; }); if(it != std::end(container)) { return *it; } return nullptr; } std::shared_ptr parser_context::lookup_type(const std::string & name) const { return find_ptr_by_name_in_container(types, name); } std::shared_ptr parser_context::lookup_operation(const std::string & name) const { return find_ptr_by_name_in_container(operations, name); } bool type_check( const parser_context& parser_state, const lexed_output& lexer_state, const std::vector& consumed_stream, std::vector execution_input, const std::vector& execution_output ) { auto& type_stack = execution_input; for(const auto& symbol : consumed_stream) { const auto& symbol_text = lexer_state.dictionary.at(symbol); if(symbol.is_string) { type_stack.emplace_back("u8 ptr"); } else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) { type_stack.emplace_back("i32"); } else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) { type_stack.emplace_back("i64"); } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) { type_stack = type_stack >> *is_op; } } return type_stack == execution_output; } generate_context parse(parser_context ctx, const lexed_output& lexer_data) { enum op : int { DO_KW = 1, SEPARATOR_KW, PROC_KW, END_KW }; lexed_output fake; fake.dictionary[PROC_KW] = "__PROC__"; fake.dictionary[SEPARATOR_KW] = "__--__"; fake.dictionary[DO_KW] = "__DO__"; fake.dictionary[END_KW] = "__END__"; auto tokens = concatenate(fake, lexer_data); std::vector> parsed_procedures; auto register_pointer_type = [&](std::string full_ptr_type_name) -> void { if(auto type = ctx.lookup_type(full_ptr_type_name); !type) { ctx.types.push_back(std::make_shared(std::move(full_ptr_type_name), architecture_ptr_size)); } }; auto compact_type_modifiers = [&](const std::vector& type_info) -> std::vector { std::vector ret_val; for(const auto& elem : type_info) { if(elem == "ptr") { if(ret_val.empty()) throw type_expected_with_modifier_error(); ret_val.back() += " ptr"; register_pointer_type(ret_val.back()); } else { ret_val.push_back(elem); } } return ret_val; }; auto parse_proc = [&](auto it) -> std::pair> { #define check_for_unexpected_stream_end(expected, context) \ do{if(it == tokens.symbols.end()) { \ throw expecting_token_error(expected, context); \ }}while(false) decltype(it) last_valid; if(*it != PROC_KW) { throw unexpected_token_error(*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]); } last_valid = it; ++it; check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); std::string name = tokens.dictionary.at(*it); auto& name_symbol = *it; last_valid = it; ++it; check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); // Process arguments list std::vector argument_types; while(*it != SEPARATOR_KW) { argument_types.emplace_back(tokens.dictionary.at(*it)); last_valid = it; ++it; check_for_unexpected_stream_end("Procedure-Argument-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); } last_valid = it; ++it; check_for_unexpected_stream_end("Procedure-Argument-List to be followed by a return list or a __DO__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); argument_types = compact_type_modifiers(argument_types); // Process return types list std::vector return_types; while(*it != DO_KW) { return_types.emplace_back(tokens.dictionary.at(*it)); last_valid = it; ++it; check_for_unexpected_stream_end("Procedure-Return-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); } last_valid = it; ++it; check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); return_types = compact_type_modifiers(return_types); // Process body std::vector body; while(*it != END_KW) { body.emplace_back(*it); last_valid = it; ++it; check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column)); } last_valid = it; ++it; return std::make_pair(it, std::make_shared(name, argument_types, return_types, body)); #undef check_for_unexpected_stream_end }; auto progress = tokens.symbols.begin(); do { auto [iterator, procedure] = parse_proc(progress); ctx.operations.push_back(procedure); parsed_procedures.emplace_back(std::move(procedure)); progress = iterator; } while (progress != tokens.symbols.end()); for(auto& proc : parsed_procedures) { if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) { throw procedure_stack_error(); } } return { tokens, ctx, parsed_procedures }; } std::vector procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const { std::vector ops = generate_label(name()); for(auto&& instruction : generate_enter()) { ops.push_back(instruction); } for(auto elem : _body) { auto token = lexer_data.dictionary.at(elem); if(elem.is_string) { for(auto&& instruction : generate_push_string_ptr(elem)) { ops.push_back(instruction); } } else if(auto result = try_parse_int32(token); result) { for(auto&& instruction : generate_push_int32(result.value())) { ops.push_back(instruction); } } else if(auto result = try_parse_int64(token); result) { for(auto&& instruction : generate_push_int64(result.value())) { ops.push_back(instruction); } } else if(auto op = ctx.lookup_operation(token); op) { for(auto&& instruction : op->emit(ctx)) { ops.push_back(instruction); } } else { throw unknown_token_error(elem); } } for(auto&& instruction : generate_return()) { ops.push_back(instruction); } return ops; } std::vector procedure_operation::emit(const parser_context& ctx) const { return generate_call(name()); } std::vector generate(const generate_context& ctx) { std::vector generated; for(const auto& instr : initialize_stack()) { generated.push_back(instr); } for(const auto& proc : ctx.procedures) { for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) { generated.push_back(instr); } } std::set done; for(const auto& value : ctx.lexer.symbols) { if(value.is_string && not done.contains(value.id)) { for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) { generated.push_back(instr); } done.insert(value.id); } } return generated; } void procedure_operation::execute(const generate_context& ctx, interpreter_stack& stack) const { } }