- #include <algorithm>
- #include <iostream>
- #include "molasses/parser_primitives.h"
- #include "molasses/generator_primitives.h"
-
- namespace molasses {
- std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
- {
- auto args = next_op.argument_types();
- while(not (args.empty() or current_stack.empty())) {
- if(current_stack.back() != args.back()) {
- throw type_input_error();
- } else {
- args.pop_back();
- current_stack.pop_back();
- }
- }
- if(not args.empty()) {
- throw value_missing_error();
- }
- }
- {
- auto return_types = next_op.return_types();
- std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
- }
- return current_stack;
- }
-
- auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) {
- if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") {
- return std::from_chars(begin+2, end, value, 16);
- } else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") {
- return std::from_chars(begin+1, end, value, 8);
- } else {
- return std::from_chars(begin, end, value, 10);
- }
- }
-
- std::optional<int32_t> try_parse_int32(const std::string& str) {
- int32_t value;
- auto begin = str.data();
- auto end = str.data()+str.size();
- auto result = conditional_begin_int_parse(str, value, begin, end);
- if(result.ptr == end) {
- return value;
- } else {
- if(std::string_view{result.ptr, end} == "_i32") {
- return value;
- }
- }
- return std::nullopt;
- }
-
- std::optional<int64_t> try_parse_int64(const std::string& str) {
- int64_t value;
- auto begin = str.data();
- auto end = str.data()+str.size();
- auto result = conditional_begin_int_parse(str, value, begin, end);
- if(result.ptr == end) {
- return std::nullopt;
- } else {
- if(std::string_view{result.ptr, end} == "_i64") {
- return value;
- }
- }
- return std::nullopt;
- }
-
- auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t<decltype(*std::begin(container))> {
- auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
- return elem->name() == name;
- });
- if(it != std::end(container)) {
- return *it;
- }
- return nullptr;
- }
-
- std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
- return find_ptr_by_name_in_container(types, name);
- }
-
- std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
- return find_ptr_by_name_in_container(operations, name);
- }
-
- bool type_check(
- const parser_context& parser_state,
- const lexed_output& lexer_state,
- const std::vector<symbol>& consumed_stream,
- std::vector<std::string> execution_input,
- const std::vector<std::string>& execution_output
- ) {
- auto& type_stack = execution_input;
-
- for(const auto& symbol : consumed_stream) {
- const auto& symbol_text = lexer_state.dictionary.at(symbol);
- if(symbol.is_string) {
- type_stack.emplace_back("u8 ptr");
- } else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
- type_stack.emplace_back("i32");
- } else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
- type_stack.emplace_back("i64");
- } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
- type_stack = type_stack >> *is_op;
- }
- }
-
- return type_stack == execution_output;
- }
-
- generate_context parse(parser_context ctx, const lexed_output& lexer_data) {
- enum op : int {
- DO_KW = 1,
- SEPARATOR_KW,
- PROC_KW,
- END_KW
- };
-
- lexed_output fake;
- fake.dictionary[PROC_KW] = "__PROC__";
- fake.dictionary[SEPARATOR_KW] = "__--__";
- fake.dictionary[DO_KW] = "__DO__";
- fake.dictionary[END_KW] = "__END__";
-
- auto tokens = concatenate(fake, lexer_data);
-
- std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
-
- auto register_pointer_type = [&](std::string full_ptr_type_name) -> void {
- if(auto type = ctx.lookup_type(full_ptr_type_name); !type) {
- ctx.types.push_back(std::make_shared<primitive_type>(std::move(full_ptr_type_name), architecture_ptr_size));
- }
- };
-
- auto compact_type_modifiers = [&](const std::vector<std::string>& type_info) -> std::vector<std::string> {
- std::vector<std::string> ret_val;
- for(const auto& elem : type_info) {
- if(elem == "ptr") {
- if(ret_val.empty()) throw type_expected_with_modifier_error();
- ret_val.back() += " ptr";
- register_pointer_type(ret_val.back());
- } else {
- ret_val.push_back(elem);
- }
- }
- return ret_val;
- };
-
- auto parse_proc = [&](auto it) -> std::pair<decltype(it), std::shared_ptr<procedure_operation>> {
- #define check_for_unexpected_stream_end(expected, context) \
- do{if(it == tokens.symbols.end()) { \
- throw expecting_token_error(expected, context); \
- }}while(false)
-
- if(*it != PROC_KW) {
- throw unexpected_token_error(*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]);
- }
- ++it;
- check_for_unexpected_stream_end(tokens.dictionary[PROC_KW], details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column));
-
- std::string name = tokens.dictionary.at(*it);
- auto& name_symbol = *it;
- ++it;
- check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column));
-
- // Process arguments list
- std::vector<std::string> argument_types;
- while(*it != SEPARATOR_KW) {
- argument_types.emplace_back(tokens.dictionary.at(*it));
- ++it;
- check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
- }
- ++it;
- check_for_unexpected_stream_end("Procedure-Argument-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
- argument_types = compact_type_modifiers(argument_types);
-
- // Process return types list
- std::vector<std::string> return_types;
- while(*it != DO_KW) {
- return_types.emplace_back(tokens.dictionary.at(*it));
- ++it;
- check_for_unexpected_stream_end(tokens.dictionary[DO_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
- }
- ++it;
- check_for_unexpected_stream_end("Procedure-Return-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
- return_types = compact_type_modifiers(return_types);
-
- // Process return types list
- std::vector<symbol> body;
- while(*it != END_KW) {
- body.emplace_back(*it);
- ++it;
- check_for_unexpected_stream_end(tokens.dictionary[END_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
- }
- ++it;
-
- return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
- #undef check_for_unexpected_stream_end
- };
-
- auto progress = tokens.symbols.begin();
-
- do {
- auto [iterator, procedure] = parse_proc(progress);
- ctx.operations.push_back(procedure);
- parsed_procedures.emplace_back(std::move(procedure));
- progress = iterator;
- } while (progress != tokens.symbols.end());
-
- for(auto& proc : parsed_procedures) {
- if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
- throw procedure_stack_error();
- }
- }
-
- return {
- tokens,
- ctx,
- parsed_procedures
- };
- }
-
- std::vector<std::string> procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const {
- std::vector<std::string> ops = generate_label(name());
-
- for(auto&& instruction : generate_enter()) {
- ops.push_back(instruction);
- }
-
- for(auto elem : _body) {
- auto token = lexer_data.dictionary.at(elem);
- if(elem.is_string) {
- for(auto&& instruction : generate_push_string_ptr(elem)) {
- ops.push_back(instruction);
- }
- } else if(auto result = try_parse_int32(token); result) {
- for(auto&& instruction : generate_push_int32(result.value())) {
- ops.push_back(instruction);
- }
- } else if(auto result = try_parse_int64(token); result) {
- for(auto&& instruction : generate_push_int64(result.value())) {
- ops.push_back(instruction);
- }
- } else if(auto op = ctx.lookup_operation(token); op) {
- for(auto&& instruction : op->emit(ctx)) {
- ops.push_back(instruction);
- }
- } else {
- throw unknown_token_error(elem);
- }
- }
-
- for(auto&& instruction : generate_return()) {
- ops.push_back(instruction);
- }
-
- return ops;
- }
-
- std::vector<std::string> procedure_operation::emit(const parser_context& ctx) const {
- return generate_call(name());
- }
-
- std::vector<std::string> generate(const generate_context& ctx) {
- std::vector<std::string> generated;
-
- for(const auto& instr : initialize_stack()) {
- generated.push_back(instr);
- }
-
- for(const auto& proc : ctx.procedures) {
- for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) {
- generated.push_back(instr);
- }
- }
-
- std::set<int> done;
- for(const auto& value : ctx.lexer.symbols) {
- if(value.is_string && not done.contains(value.id)) {
- for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) {
- generated.push_back(instr);
- }
- done.insert(value.id);
- }
- }
-
- return generated;
- }
- }
|