#include <algorithm>
|
|
#include <cassert>
|
|
#include <iostream>
|
|
#include "molasses/parser_primitives.h"
|
|
|
|
namespace molasses {
|
|
parser_context register_integers(parser_context ctx) {
|
|
ctx.types.push_back(std::make_shared<primitive_type>("i8",1));
|
|
ctx.types.push_back(std::make_shared<primitive_type>("i16",2));
|
|
ctx.types.push_back(std::make_shared<primitive_type>("i32",4));
|
|
ctx.types.push_back(std::make_shared<primitive_type>("i64",8));
|
|
ctx.types.push_back(std::make_shared<primitive_type>("u8",1));
|
|
ctx.types.push_back(std::make_shared<primitive_type>("u16",2));
|
|
ctx.types.push_back(std::make_shared<primitive_type>("u32",4));
|
|
ctx.types.push_back(std::make_shared<primitive_type>("u64",8));
|
|
|
|
return ctx;
|
|
}
|
|
|
|
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
|
|
{
|
|
auto args = next_op.argument_types();
|
|
while(not (args.empty() or current_stack.empty())) {
|
|
if(current_stack.back() != args.back()) {
|
|
throw TypeInputError();
|
|
} else {
|
|
args.pop_back();
|
|
current_stack.pop_back();
|
|
}
|
|
}
|
|
if(not args.empty()) {
|
|
throw ValueMissingError();
|
|
}
|
|
}
|
|
{
|
|
auto return_types = next_op.return_types();
|
|
std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
|
|
}
|
|
return current_stack;
|
|
}
|
|
|
|
std::optional<int32_t> try_parse_int32(const std::string& str) {
|
|
int32_t value;
|
|
auto begin = str.data();
|
|
auto end = str.data()+str.size();
|
|
auto result = std::from_chars(begin, end, value, 10);
|
|
// TODO: Add other bases
|
|
if(result.ptr == end) {
|
|
return value;
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) {
|
|
auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
|
|
return elem->name() == name;
|
|
});
|
|
if(it != std::end(container)) {
|
|
return *it;
|
|
}
|
|
return {};
|
|
}
|
|
|
|
std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
|
|
return find_ptr_by_name_in_container(types, name);
|
|
}
|
|
|
|
std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
|
|
return find_ptr_by_name_in_container(operations, name);
|
|
}
|
|
|
|
bool type_check(
|
|
const parser_context& parser_state,
|
|
const lexed_output& lexer_state,
|
|
const std::vector<symbol>& consumed_stream,
|
|
std::vector<std::string> execution_input,
|
|
const std::vector<std::string>& execution_output
|
|
) {
|
|
auto& type_stack = execution_input;
|
|
|
|
for(const auto& symbol : consumed_stream) {
|
|
const auto& symbol_text = lexer_state.dictionary.at(symbol);
|
|
if(auto is_int = try_parse_int32(symbol_text); is_int) {
|
|
type_stack.emplace_back("i32");
|
|
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
|
|
type_stack = type_stack >> *is_op;
|
|
}
|
|
}
|
|
|
|
return type_stack == execution_output;
|
|
}
|
|
|
|
std::vector<std::string> initialize_stack() {
|
|
return {
|
|
".bss\n",// TODO: make threadlocal
|
|
"stack_instruction:\n",
|
|
" .quad 0\n",
|
|
".text\n",
|
|
"initialize_callstack:\n",
|
|
" movq $9, %rax\n",
|
|
" movq $0, %rdi\n",
|
|
" movq $8192, %rsi\n",
|
|
" movq $3, %rdx\n",
|
|
" movq $34, %r10\n",
|
|
" movq $-1, %r8\n",
|
|
" movq $0, %r9\n",
|
|
" syscall\n",
|
|
" movq %rax, (stack_instruction)\n",
|
|
" retq\n",
|
|
};
|
|
}
|
|
|
|
parser_context parse(parser_context ctx, const lexed_output& lexer_data) {
|
|
enum op : int {
|
|
DO_KW = 1,
|
|
SEPARATOR_KW,
|
|
PROC_KW,
|
|
END_KW
|
|
};
|
|
|
|
lexed_output fake;
|
|
fake.dictionary[PROC_KW] = "__PROC__";
|
|
fake.dictionary[SEPARATOR_KW] = "__--__";
|
|
fake.dictionary[DO_KW] = "__DO__";
|
|
fake.dictionary[END_KW] = "__END__";
|
|
|
|
auto tokens = concatenate(fake, lexer_data);
|
|
|
|
std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
|
|
|
|
auto parse_proc = [&](auto it) -> std::pair<typeof(it), std::shared_ptr<procedure_operation>> {
|
|
#define CHECK_FOR_UNEXPECTED_STREAM_END \
|
|
if(it == tokens.symbols.end()) { \
|
|
throw ExpectingTokenError(); \
|
|
}
|
|
|
|
if(*it != PROC_KW) {
|
|
throw UnexpectedTokenError();
|
|
}
|
|
++it;
|
|
CHECK_FOR_UNEXPECTED_STREAM_END;
|
|
|
|
std::string name = tokens.dictionary.at(*it);
|
|
++it;
|
|
CHECK_FOR_UNEXPECTED_STREAM_END;
|
|
|
|
if(it == tokens.symbols.end()) {
|
|
throw ExpectingTokenError();
|
|
}
|
|
|
|
// Process arguments list
|
|
std::vector<std::string> argument_types;
|
|
while(*it != SEPARATOR_KW) {
|
|
argument_types.emplace_back(tokens.dictionary.at(*it));
|
|
++it;
|
|
CHECK_FOR_UNEXPECTED_STREAM_END;
|
|
}
|
|
++it;
|
|
CHECK_FOR_UNEXPECTED_STREAM_END;
|
|
|
|
// Process return types list
|
|
std::vector<std::string> return_types;
|
|
while(*it != DO_KW) {
|
|
return_types.emplace_back(tokens.dictionary.at(*it));
|
|
++it;
|
|
CHECK_FOR_UNEXPECTED_STREAM_END;
|
|
}
|
|
++it;
|
|
CHECK_FOR_UNEXPECTED_STREAM_END;
|
|
|
|
// Process return types list
|
|
std::vector<symbol> body;
|
|
while(*it != END_KW) {
|
|
body.emplace_back(*it);
|
|
++it;
|
|
CHECK_FOR_UNEXPECTED_STREAM_END;
|
|
}
|
|
++it;
|
|
|
|
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
|
|
#undef CHECK_FOR_UNEXPECTED_STREAM_END
|
|
};
|
|
|
|
auto progress = tokens.symbols.begin();
|
|
|
|
do {
|
|
auto [iterator, procedure] = parse_proc(progress);
|
|
ctx.operations.push_back(procedure);
|
|
parsed_procedures.emplace_back(std::move(procedure));
|
|
progress = iterator;
|
|
} while (progress != tokens.symbols.end());
|
|
|
|
for(auto& proc : parsed_procedures) {
|
|
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
|
|
throw ProcedureStackError();
|
|
}
|
|
}
|
|
|
|
std::vector<std::string> generated;
|
|
|
|
for(auto instr : initialize_stack()) {
|
|
generated.push_back(instr);
|
|
}
|
|
|
|
for(auto proc : parsed_procedures) {
|
|
for(auto instr : proc->generate(ctx, tokens)) {
|
|
generated.push_back(instr);
|
|
}
|
|
}
|
|
|
|
for(auto line : generated) {
|
|
std::cout << line;
|
|
}
|
|
|
|
return ctx;
|
|
}
|
|
|
|
std::vector<std::string> generate_call(std::string target) {
|
|
static uint64_t label_count= 0;
|
|
return {
|
|
" movq return_label_n"+std::to_string(label_count)+", (stack_instruction)\n",
|
|
" addq $8, stack_instruction\n",
|
|
" jmp "+target+"\n",
|
|
" return_label_n"+std::to_string(label_count++)+":"
|
|
};
|
|
}
|
|
|
|
std::vector<std::string> generate_push_int32(int32_t target) {
|
|
return {
|
|
" pushq $" +std::to_string(target)+ "\n"
|
|
};
|
|
}
|
|
|
|
std::vector<std::string> procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const {
|
|
std::vector<std::string> ops;
|
|
ops.emplace_back(name()+":\n");
|
|
|
|
for(auto elem : _body) {
|
|
auto token = lexer_data.dictionary.at(elem);
|
|
if(auto result = try_parse_int32(token); result) {
|
|
for(auto&& elem : generate_push_int32(result.value())) {
|
|
ops.push_back(elem);
|
|
}
|
|
} else if(auto op = ctx.lookup_operation(token); op) {
|
|
for(auto&& elem : op->emit(ctx)) {
|
|
ops.push_back(elem);
|
|
}
|
|
} else {
|
|
throw UnknownTokenError();
|
|
}
|
|
}
|
|
|
|
ops.emplace_back(" // Return to caller\n");
|
|
ops.emplace_back(" addq $-8, stack_instruction\n");
|
|
ops.emplace_back(" movq (stack_instruction), %rax\n");
|
|
ops.emplace_back(" pushq %rax\n");
|
|
ops.emplace_back(" retq\n");
|
|
|
|
return ops;
|
|
}
|
|
|
|
std::vector<std::string> procedure_operation::emit(const parser_context& ctx) const {
|
|
return generate_call(name());
|
|
}
|
|
}
|