Browse Source

Added type checked support for GOTO and LABEL in prevision of adding conditionals jumps and loops

master
Ludovic 'Archivist' Lagouardette 1 year ago
parent
commit
26724a3812
11 changed files with 271 additions and 27 deletions
  1. +2
    -1
      CMakeLists.txt
  2. +29
    -0
      include/generalized_parsing.h
  3. +9
    -0
      include/molasses/errors.h
  4. +4
    -1
      include/molasses/generator_primitives.h
  5. +8
    -1
      include/molasses/parser_primitives.h
  6. +3
    -1
      include/molasses/parser_types.h
  7. +11
    -4
      src/molasses/generator_primitives_x86_64_linux.cpp
  8. +77
    -19
      src/molasses/parser_primitives.cpp
  9. +82
    -0
      tests/007.exp
  10. +7
    -0
      tests/007/exit-with-3.mol
  11. +39
    -0
      tests/007/library.mol

+ 2
- 1
CMakeLists.txt View File

@ -13,7 +13,7 @@ add_executable(sugar
include/molasses/lexer.h
src/molasses/parser_primitives.cpp
include/molasses/parser_primitives.h
include/molasses/generator_primitives.h include/molasses/errors.h include/molasses/parser_types.h src/molasses/generator_primitives.cpp)
include/molasses/generator_primitives.h include/molasses/errors.h include/molasses/parser_types.h src/molasses/generator_primitives.cpp include/generalized_parsing.h)
function(add_expect_test [testname filename])
add_test(
@ -31,6 +31,7 @@ add_expect_test(id003 ./tests/003.exp)
add_expect_test(id004 ./tests/004.exp)
add_expect_test(id005 ./tests/005.exp)
add_expect_test(id006 ./tests/006.exp)
add_expect_test(id007 ./tests/007.exp)

+ 29
- 0
include/generalized_parsing.h View File

@ -0,0 +1,29 @@
#pragma once
#include "molasses/lexer.h"
#include "molasses/parser_primitives.h"
#include <memory>
#include <span>
#include <vector>
struct parser {
/**
*
* @param ctx
* @param lexer_data
* @param current
* @return a span of the tokens left before reaching the target, assuming the parser is as greedy as possible
*/
virtual std::span<molasses::symbol> identify(molasses::parser_context& ctx, const molasses::lexed_output& lexer_data, std::span<molasses::symbol> current) = 0;
};
struct ast_node {
virtual std::vector<molasses::symbol> apply(molasses::parser_context& ctx, const molasses::lexed_output& lexer_data, std::span<molasses::symbol> current) = 0;
};
struct parser_branch : public parser {
std::vector<std::shared_ptr<parser>> choice;
};
struct parser_sequence : public parser {
std::vector<std::shared_ptr<parser>> sequence;
};

+ 9
- 0
include/molasses/errors.h View File

@ -69,6 +69,15 @@ namespace molasses {
)) {}
// TODO: Better error message
};
struct orphan_goto_error : public parser_error {
orphan_goto_error(const symbol& sym, const std::string& label_name)
: parser_error(details::concatenate_builder(
"A __GOTO__ has been found to be without a label\n", "\tAt ", sym.file_name, ":", sym.line, ":", sym.column,
"\n", "\tExpected a label ", label_name,
"\n"
)) {}
// TODO: Better error message
};
struct unknown_token_error : public parser_error {
explicit unknown_token_error(const symbol& sym)
: parser_error(details::concatenate_builder(

+ 4
- 1
include/molasses/generator_primitives.h View File

@ -9,7 +9,10 @@ namespace molasses {
std::vector<std::string> generate_return();
template<architecture_t Arch = architecture>
std::vector<std::string> generate_label(const std::string& target);
std::vector<std::string> generate_label(const std::string& target);
template<architecture_t Arch = architecture>
std::vector<std::string> generate_goto(const std::string& target);
template<architecture_t Arch = architecture>
std::vector<std::string> generate_push_int32(int32_t target);

+ 8
- 1
include/molasses/parser_primitives.h View File

@ -57,6 +57,13 @@ namespace molasses {
template<architecture_t Arch = architecture>
parser_context register_i32_operations(parser_context);
bool type_check(const parser_context&, const lexed_output&, const std::vector<symbol>&, std::vector<std::string> execution_input, const std::vector<std::string>& execution_output);
bool type_check(
const parser_context& parser_state,
const lexed_output& lexer_state,
const std::vector<symbol>& consumed_stream,
std::vector<std::string> execution_input,
const std::vector<std::string>& execution_output,
const std::vector<std::pair<size_t, size_t>>& sub_bodies
);
}

+ 3
- 1
include/molasses/parser_types.h View File

@ -98,12 +98,14 @@ namespace molasses {
std::vector<std::string> _args;
std::vector<std::string> _rets;
std::vector<symbol> _body;
std::vector<std::pair<size_t, size_t>> _simple_sub_bodies;
procedure_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets, std::vector<symbol> body)
procedure_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets, std::vector<symbol> body, std::vector<std::pair<size_t, size_t>> sub_bodies)
: _name(std::forward<std::string>(name))
, _args(std::forward<std::vector<std::string>>(args))
, _rets(std::forward<std::vector<std::string>>(rets))
, _body(std::forward<std::vector<symbol>>(body))
, _simple_sub_bodies(std::forward<std::vector<std::pair<size_t, size_t>>>(sub_bodies))
{}
[[nodiscard]] std::string name() const final {

+ 11
- 4
src/molasses/generator_primitives_x86_64_linux.cpp View File

@ -536,11 +536,18 @@ namespace molasses {
}
template<>
std::vector<std::string> generate_label<architecture_t::x86_64_linux>(const std::string& target) {
return {
std::vector<std::string> generate_label<architecture_t::x86_64_linux>(const std::string& target) {
return {
marshal(target)+":\n"
};
}
};
}
template<>
std::vector<std::string> generate_goto<architecture_t::x86_64_linux>(const std::string& target) {
return {
" jmp "+marshal(target)+"\n"
};
}
template<>
std::vector<std::string> generate_return<architecture_t::x86_64_linux>() {

+ 77
- 19
src/molasses/parser_primitives.cpp View File

@ -1,8 +1,9 @@
#include <algorithm>
#include <iostream>
#include "molasses/parser_primitives.h"
#include "molasses/generator_primitives.h"
#include "molasses/errors.h"
#include "molasses/generator_primitives.h"
#include <algorithm>
#include <iostream>
#include <span>
namespace molasses {
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
@ -90,24 +91,40 @@ namespace molasses {
const lexed_output& lexer_state,
const std::vector<symbol>& consumed_stream,
std::vector<std::string> execution_input,
const std::vector<std::string>& execution_output
const std::vector<std::string>& execution_output,
const std::vector<std::pair<size_t, size_t>>& sub_bodies
) {
auto& type_stack = execution_input;
for(const auto& symbol : consumed_stream) {
std::map<size_t, std::vector<std::string>> effective_snapshots;
size_t idx = 0;
for(auto it = consumed_stream.begin(); it != consumed_stream.end(); ++it, ++idx) {
const auto& symbol = *it;
const auto& symbol_text = lexer_state.dictionary.at(symbol);
if(symbol.is_string) {
// Skip GOTOs and LABELs
if(auto ahead = it; ++ahead != consumed_stream.end() and (lexer_state.dictionary.at(*ahead) == "__GOTO__" or lexer_state.dictionary.at(*ahead) == "__LABEL__")) {
effective_snapshots[idx] = type_stack;
it = ahead;
++idx;
} else if(symbol.is_string) {
type_stack.emplace_back("u8 ptr");
} else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
type_stack.emplace_back("i32");
} else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
type_stack.emplace_back("i64");
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
type_stack = type_stack >> *is_op;
}
type_stack = type_stack >> *is_op;
}
}
if(type_stack != execution_output) return false;
for(auto [start, end] : sub_bodies) {
if(effective_snapshots[start] != effective_snapshots[end]) {
return false;
}
}
return type_stack == execution_output;
return b">true;
}
generate_context parse(parser_context ctx, const lexed_output& lexer_data) {
@ -115,14 +132,19 @@ namespace molasses {
DO_KW = 1,
SEPARATOR_KW,
PROC_KW,
END_KW
END_KW,
LABEL_KW,
GOTO_KW
};
lexed_output fake;
fake.dictionary[PROC_KW] = "__PROC__";
fake.dictionary[PROC_KW] = "__PROC__";
fake.dictionary[SEPARATOR_KW] = "__--__";
fake.dictionary[DO_KW] = "__DO__";
fake.dictionary[END_KW] = "__END__";
fake.dictionary[LABEL_KW] = "__LABEL__";
fake.dictionary[GOTO_KW] = "__GOTO__";
auto tokens = concatenate(fake, lexer_data);
@ -197,16 +219,40 @@ namespace molasses {
// Process body
std::vector<symbol> body;
std::vector<std::pair<size_t, size_t>> sub_bodies;
std::map<std::string, size_t> found_labels;
std::map<std::string, size_t> found_gotos;
while(*it != END_KW) {
body.emplace_back(*it);
last_valid = it;
++it;
if(auto ahead = it; ++ahead != tokens.symbols.end() and (*ahead == GOTO_KW or *ahead == LABEL_KW)) {
if(*ahead == GOTO_KW) {
found_gotos[tokens.dictionary[*it]] = body.size();
} else if(*ahead == LABEL_KW) {
found_labels[tokens.dictionary[*it]] = body.size();
// TODO: Handle duplicate labels
}
body.emplace_back(*it);
body.emplace_back(*ahead);
last_valid = ahead;
it = ++ahead;
} else {
body.emplace_back(*it);
last_valid = it;
++it;
}
check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
}
last_valid = it;
++it;
for(auto& [dest, index] : found_gotos) {
if(not found_labels.contains(dest)) {
throw orphan_goto_error(body[index], dest);
}
sub_bodies.emplace_back(std::min(index, found_labels[dest]), std::max(index, found_labels[dest]));
}
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body, sub_bodies));
#undef check_for_unexpected_stream_end
};
@ -220,7 +266,7 @@ namespace molasses {
} while (progress != tokens.symbols.end());
for(auto& proc : parsed_procedures) {
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets, proc->_simple_sub_bodies)) {
throw procedure_stack_error();
}
}
@ -239,9 +285,21 @@ namespace molasses {
ops.push_back(instruction);
}
for(auto elem : _body) {
for(auto it = _body.begin(); it != _body.end(); ++it) {
auto elem = *it;
auto token = lexer_data.dictionary.at(elem);
if(elem.is_string) {
if(auto ahead = it; ++ahead != _body.end() and (lexer_data.dictionary.at(*ahead) == "__GOTO__" or lexer_data.dictionary.at(*ahead) == "__LABEL__")) {
if(lexer_data.dictionary.at(*ahead) == "__GOTO__") {
for(auto&& instruction : generate_goto(name() + " in " + token)) {
ops.push_back(instruction);
}
} else if(lexer_data.dictionary.at(*ahead) == "__LABEL__") {
for(auto&& instruction : generate_label(name() + " in " + token)) {
ops.push_back(instruction);
}
}
it = ahead;
} else if(elem.is_string) {
for(auto&& instruction : generate_push_string_ptr(elem)) {
ops.push_back(instruction);
}

+ 82
- 0
tests/007.exp View File

@ -0,0 +1,82 @@
#!/usr/bin/expect
set SUGAR_EXECUTABLE $::env(SUGAR_EXECUTABLE)
set BUILD_NAME 007.
proc abort {reason} {
puts "test failed $reason"
exit 1
}
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all
expect {
error { abort "failed to parse" }
eof { abort "cannot find the symbol main in lexed output" }
main
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all parse
expect {
error { abort "failed to parse" }
eof { abort "cannot find the main procedure" }
main
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all parse
expect {
error { abort "failed to parse" }
eof { abort "cannot find the exit-syscall-number procedure" }
exit-syscall-number
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol lex tests/007/library.mol lex merge parse /tmp/sugar.generated.$BUILD_NAME generate assemble
expect {
error { abort "failed to compile" }
eof { abort "didn't run clang" }
clang
}
expect {
error { abort "failed to link" }
eof { abort "didn't run ld" }
ld
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho /tmp/sugar.generated.$BUILD_NAME
expect {
error { abort "failed to compile" }
eof { abort "didn't output" }
Hello
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 3} {
abort "executable didn't return exit code 3 but $value instead"
}

+ 7
- 0
tests/007/exit-with-3.mol View File

@ -0,0 +1,7 @@
__PROC__ main
__--__
__DO__
11_i64 "\tHello world\n" write-out
1 2_i32 + i32-to-i64 exit
__END__

+ 39
- 0
tests/007/library.mol View File

@ -0,0 +1,39 @@
__PROC__ write-syscall-number
__--__
i64
__DO__
1_i64
__END__
__PROC__ stdout-fd
__--__
i64
__DO__
1_i64
__END__
__PROC__ exit-syscall-number
__--__
i64
__DO__
60_i64
__END__
__PROC__ exit
i64
__--__
__DO__
"POTAT" __LABEL__
exit-syscall-number syscall1 drop_i64
60_i64
"POTAT" __GOTO__
drop_i64
__END__
__PROC__ write-out
i64
u8 ptr
__--__
__DO__
u8-ptr_to_i64 stdout-fd write-syscall-number syscall3 drop_i64
__END__

Loading…
Cancel
Save