You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

407 lines
16 KiB

#include "molasses/parser_primitives.h"
#include "molasses/errors.h"
#include "molasses/generator_primitives.h"
#include <algorithm>
#include <iostream>
#include <span>
namespace molasses {
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
{
auto args = next_op.argument_types();
while(not (args.empty() or current_stack.empty())) {
if(current_stack.back() != args.back()) {
throw type_input_error();
} else {
args.pop_back();
current_stack.pop_back();
}
}
if(not args.empty()) {
throw value_missing_error();
}
}
{
auto return_types = next_op.return_types();
std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
}
return current_stack;
}
auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) {
if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") {
return std::from_chars(begin+2, end, value, 16);
} else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") {
return std::from_chars(begin+1, end, value, 8);
} else {
return std::from_chars(begin, end, value, 10);
}
}
std::optional<int32_t> try_parse_int32(const std::string& str) {
int32_t value;
auto begin = str.data();
auto end = str.data()+str.size();
auto result = conditional_begin_int_parse(str, value, begin, end);
if(result.ptr == end) {
return value;
} else {
if(std::string_view{result.ptr, end} == "_i32") {
return value;
}
}
return std::nullopt;
}
std::optional<int64_t> try_parse_int64(const std::string& str) {
int64_t value;
auto begin = str.data();
auto end = str.data()+str.size();
auto result = conditional_begin_int_parse(str, value, begin, end);
if(result.ptr == end) {
return std::nullopt;
} else {
if(std::string_view{result.ptr, end} == "_i64") {
return value;
}
}
return std::nullopt;
}
auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t<decltype(*std::begin(container))> {
auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
return elem->name() == name;
});
if(it != std::end(container)) {
return *it;
}
return nullptr;
}
std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
return find_ptr_by_name_in_container(types, name);
}
std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
return find_ptr_by_name_in_container(operations, name);
}
bool type_check(
const parser_context& parser_state,
const lexed_output& lexer_state,
const std::vector<symbol>& consumed_stream,
std::vector<std::string> execution_input,
const std::vector<std::string>& execution_output,
const std::vector<std::pair<size_t, size_t>>& sub_bodies
) {
auto& type_stack = execution_input;
std::map<size_t, std::vector<std::string>> effective_snapshots;
size_t idx = 0;
for(auto it = consumed_stream.begin(); it != consumed_stream.end(); ++it, ++idx) {
const auto& symbol = *it;
const auto& symbol_text = lexer_state.dictionary.at(symbol);
// Skip GOTOs and LABELs
if(auto ahead = it; ++ahead != consumed_stream.end() and (lexer_state.dictionary.at(*ahead) == "__GOTO__" or lexer_state.dictionary.at(*ahead) == "__LABEL__")) {
effective_snapshots[idx] = type_stack;
it = ahead;
++idx;
} else if(auto ahead = it; ++ahead != consumed_stream.end() and (lexer_state.dictionary.at(*ahead) == "__JUMP_IF__")) {
if(type_stack.empty()) return false;
if(type_stack.back() != "i64") return false;
//type_stack.pop_back(); // JUMP_IF does not consume its test variable
effective_snapshots[idx] = type_stack;
it = ahead;
++idx;
} else if(symbol.is_string) {
type_stack.emplace_back("u8 ptr");
} else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
type_stack.emplace_back("i32");
} else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
type_stack.emplace_back("i64");
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
type_stack = type_stack >> *is_op;
}
}
if(type_stack != execution_output) return false;
for(auto [start, end] : sub_bodies) {
if(effective_snapshots[start] != effective_snapshots[end]) {
return false;
}
}
return true;
}
generate_context parse(parser_context ctx, const lexed_output& lexer_data) {
enum op : int {
DO_KW = 1,
SEPARATOR_KW,
PROC_KW,
END_KW,
LABEL_KW,
GOTO_KW,
JUMP_IF_KW
};
lexed_output fake;
fake.dictionary[PROC_KW] = "__PROC__";
fake.dictionary[SEPARATOR_KW] = "__--__";
fake.dictionary[DO_KW] = "__DO__";
fake.dictionary[END_KW] = "__END__";
fake.dictionary[LABEL_KW] = "__LABEL__";
fake.dictionary[GOTO_KW] = "__GOTO__";
fake.dictionary[JUMP_IF_KW] = "__JUMP_IF__";
auto tokens = concatenate(fake, lexer_data);
std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
auto register_pointer_type = [&](std::string full_ptr_type_name) -> void {
if(auto type = ctx.lookup_type(full_ptr_type_name); !type) {
ctx.types.push_back(std::make_shared<primitive_type>(std::move(full_ptr_type_name), architecture_ptr_size));
}
};
auto compact_type_modifiers = [&](const std::vector<std::string>& type_info) -> std::vector<std::string> {
std::vector<std::string> ret_val;
for(const auto& elem : type_info) {
if(elem == "ptr") {
if(ret_val.empty()) throw type_expected_with_modifier_error();
ret_val.back() += " ptr";
register_pointer_type(ret_val.back());
} else {
ret_val.push_back(elem);
}
}
return ret_val;
};
auto parse_proc = [&](auto it) -> std::pair<decltype(it), std::shared_ptr<procedure_operation>> {
#define check_for_unexpected_stream_end(expected, context) \
do{if(it == tokens.symbols.end()) { \
throw expecting_token_error(expected, context); \
}}while(false)
decltype(it) last_valid = it;
if(*it != PROC_KW) {
throw unexpected_token_error(*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]);
}
++it;
check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
std::string name = tokens.dictionary.at(*it);
auto& name_symbol = *it;
last_valid = it;
++it;
check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
// Process arguments list
std::vector<std::string> argument_types;
while(*it != SEPARATOR_KW) {
argument_types.emplace_back(tokens.dictionary.at(*it));
last_valid = it;
++it;
check_for_unexpected_stream_end("Procedure-Argument-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
}
last_valid = it;
++it;
check_for_unexpected_stream_end("Procedure-Argument-List to be followed by a return list or a __DO__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
argument_types = compact_type_modifiers(argument_types);
// Process return types list
std::vector<std::string> return_types;
while(*it != DO_KW) {
return_types.emplace_back(tokens.dictionary.at(*it));
last_valid = it;
++it;
check_for_unexpected_stream_end("Procedure-Return-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
}
last_valid = it;
++it;
check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
return_types = compact_type_modifiers(return_types);
// Process body
std::vector<symbol> body;
std::vector<std::pair<size_t, size_t>> sub_bodies;
std::map<std::string, size_t> found_labels;
std::map<std::string, size_t> found_gotos;
std::map<std::string, size_t> found_jump_ifs;
while(*it != END_KW) {
if(auto ahead = it; ++ahead != tokens.symbols.end() and (*ahead == GOTO_KW or *ahead == JUMP_IF_KW or *ahead == LABEL_KW)) {
if(*ahead == GOTO_KW) {
found_gotos[tokens.dictionary[*it]] = body.size();
} else if(*ahead == JUMP_IF_KW) {
found_jump_ifs[tokens.dictionary[*it]] = body.size();
} else if(*ahead == LABEL_KW) {
auto label_value = tokens.dictionary[*it];
if(found_labels.contains(label_value)) {
throw duplicate_label_error(*it, label_value);
}
found_labels[label_value] = body.size();
}
body.emplace_back(*it);
body.emplace_back(*ahead);
last_valid = ahead;
it = ++ahead;
} else {
body.emplace_back(*it);
last_valid = it;
++it;
}
check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
}
last_valid = it;
++it;
for(auto& [dest, index] : found_gotos) {
if(not found_labels.contains(dest)) {
throw orphan_goto_error(body[index], dest);
}
sub_bodies.emplace_back(std::min(index, found_labels[dest]), std::max(index, found_labels[dest]));
}
for(auto& [dest, index] : found_jump_ifs) {
if(not found_labels.contains(dest)) {
throw orphan_goto_error(body[index], dest);
}
sub_bodies.emplace_back(std::min(index, found_labels[dest]), std::max(index, found_labels[dest]));
}
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body, sub_bodies));
#undef check_for_unexpected_stream_end
};
auto progress = tokens.symbols.begin();
do {
auto [iterator, procedure] = parse_proc(progress);
ctx.operations.push_back(procedure);
parsed_procedures.emplace_back(std::move(procedure));
progress = iterator;
} while (progress != tokens.symbols.end());
for(auto& proc : parsed_procedures) {
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets, proc->_simple_sub_bodies)) {
throw procedure_stack_error();
}
}
return {
tokens,
ctx,
parsed_procedures
};
}
std::vector<std::string> procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const {
std::vector<std::string> ops = generate_label(name());
for(auto&& instruction : generate_enter()) {
ops.push_back(instruction);
}
for(auto it = _body.begin(); it != _body.end(); ++it) {
auto elem = *it;
auto token = lexer_data.dictionary.at(elem);
if(
auto ahead = it;
++ahead != _body.end() and (
lexer_data.dictionary.at(*ahead) == "__GOTO__"
or lexer_data.dictionary.at(*ahead) == "__LABEL__"
or lexer_data.dictionary.at(*ahead) == "__JUMP_IF__"
)
) {
if(lexer_data.dictionary.at(*ahead) == "__GOTO__") {
for(auto&& instruction : generate_goto(name() + " in " + token)) {
ops.push_back(instruction);
}
} else if(lexer_data.dictionary.at(*ahead) == "__LABEL__") {
for(auto&& instruction : generate_label(name() + " in " + token)) {
ops.push_back(instruction);
}
} else if(lexer_data.dictionary.at(*ahead) == "__JUMP_IF__") {
for(auto&& instruction : generate_jump_if(name() + " in " + token)) {
ops.push_back(instruction);
}
}
it = ahead;
} else if(elem.is_string) {
for(auto&& instruction : generate_push_string_ptr(elem)) {
ops.push_back(instruction);
}
} else if(auto result = try_parse_int32(token); result) {
for(auto&& instruction : generate_push_int32(result.value())) {
ops.push_back(instruction);
}
} else if(auto result = try_parse_int64(token); result) {
for(auto&& instruction : generate_push_int64(result.value())) {
ops.push_back(instruction);
}
} else if(auto op = ctx.lookup_operation(token); op) {
for(auto&& instruction : op->emit(ctx)) {
ops.push_back(instruction);
}
} else {
throw unknown_token_error(elem);
}
}
for(auto&& instruction : generate_return()) {
ops.push_back(instruction);
}
return ops;
}
std::vector<std::string> procedure_operation::emit(const parser_context& ctx) const {
return generate_call(name());
}
std::vector<std::string> generate(const generate_context& ctx) {
std::vector<std::string> generated;
for(const auto& instr : initialize_stack()) {
generated.push_back(instr);
}
for(const auto& proc : ctx.procedures) {
for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) {
generated.push_back(instr);
}
}
std::set<int> done;
for(const auto& value : ctx.lexer.symbols) {
if(value.is_string && not done.contains(value.id)) {
for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) {
generated.push_back(instr);
}
done.insert(value.id);
}
}
return generated;
}
void procedure_operation::execute(const generate_context& ctx, interpreter_stack& stack) const {
for(const auto& elem : _body) {
if(auto n = ctx.parser.lookup_operation(ctx.lexer.dictionary.at(elem)); n)
n->execute(ctx, stack);
else if(auto maybe_i64 = try_parse_int64(ctx.lexer.dictionary.at(elem)); maybe_i64)
stack.emplace(int64_t(maybe_i64.value()));
else if(auto maybe_i32 = try_parse_int32(ctx.lexer.dictionary.at(elem)); maybe_i32)
stack.emplace(int32_t(maybe_i32.value()));
else
std::cerr << "OPERATION NOT FOUND: " << elem <<"\n";
}
}
}