浏览代码

Added type checked support for GOTO and LABEL in prevision of adding conditionals jumps and loops

master
Ludovic 'Archivist' Lagouardette 1年前
父节点
当前提交
26724a3812
共有 11 个文件被更改,包括 271 次插入27 次删除
  1. +2
    -1
      CMakeLists.txt
  2. +29
    -0
      include/generalized_parsing.h
  3. +9
    -0
      include/molasses/errors.h
  4. +4
    -1
      include/molasses/generator_primitives.h
  5. +8
    -1
      include/molasses/parser_primitives.h
  6. +3
    -1
      include/molasses/parser_types.h
  7. +11
    -4
      src/molasses/generator_primitives_x86_64_linux.cpp
  8. +77
    -19
      src/molasses/parser_primitives.cpp
  9. +82
    -0
      tests/007.exp
  10. +7
    -0
      tests/007/exit-with-3.mol
  11. +39
    -0
      tests/007/library.mol

+ 2
- 1
CMakeLists.txt 查看文件

@ -13,7 +13,7 @@ add_executable(sugar
include/molasses/lexer.h
src/molasses/parser_primitives.cpp
include/molasses/parser_primitives.h
include/molasses/generator_primitives.h include/molasses/errors.h include/molasses/parser_types.h src/molasses/generator_primitives.cpp)
include/molasses/generator_primitives.h include/molasses/errors.h include/molasses/parser_types.h src/molasses/generator_primitives.cpp include/generalized_parsing.h)
function(add_expect_test [testname filename])
add_test(
@ -31,6 +31,7 @@ add_expect_test(id003 ./tests/003.exp)
add_expect_test(id004 ./tests/004.exp)
add_expect_test(id005 ./tests/005.exp)
add_expect_test(id006 ./tests/006.exp)
add_expect_test(id007 ./tests/007.exp)

+ 29
- 0
include/generalized_parsing.h 查看文件

@ -0,0 +1,29 @@
#pragma once
#include "molasses/lexer.h"
#include "molasses/parser_primitives.h"
#include <memory>
#include <span>
#include <vector>
struct parser {
/**
*
* @param ctx
* @param lexer_data
* @param current
* @return a span of the tokens left before reaching the target, assuming the parser is as greedy as possible
*/
virtual std::span<molasses::symbol> identify(molasses::parser_context& ctx, const molasses::lexed_output& lexer_data, std::span<molasses::symbol> current) = 0;
};
struct ast_node {
virtual std::vector<molasses::symbol> apply(molasses::parser_context& ctx, const molasses::lexed_output& lexer_data, std::span<molasses::symbol> current) = 0;
};
struct parser_branch : public parser {
std::vector<std::shared_ptr<parser>> choice;
};
struct parser_sequence : public parser {
std::vector<std::shared_ptr<parser>> sequence;
};

+ 9
- 0
include/molasses/errors.h 查看文件

@ -69,6 +69,15 @@ namespace molasses {
)) {}
// TODO: Better error message
};
struct orphan_goto_error : public parser_error {
orphan_goto_error(const symbol& sym, const std::string& label_name)
: parser_error(details::concatenate_builder(
"A __GOTO__ has been found to be without a label\n", "\tAt ", sym.file_name, ":", sym.line, ":", sym.column,
"\n", "\tExpected a label ", label_name,
"\n"
)) {}
// TODO: Better error message
};
struct unknown_token_error : public parser_error {
explicit unknown_token_error(const symbol& sym)
: parser_error(details::concatenate_builder(

+ 4
- 1
include/molasses/generator_primitives.h 查看文件

@ -9,7 +9,10 @@ namespace molasses {
std::vector<std::string> generate_return();
template<architecture_t Arch = architecture>
std::vector<std::string> generate_label(const std::string& target);
std::vector<std::string> generate_label(const std::string& target);
template<architecture_t Arch = architecture>
std::vector<std::string> generate_goto(const std::string& target);
template<architecture_t Arch = architecture>
std::vector<std::string> generate_push_int32(int32_t target);

+ 8
- 1
include/molasses/parser_primitives.h 查看文件

@ -57,6 +57,13 @@ namespace molasses {
template<architecture_t Arch = architecture>
parser_context register_i32_operations(parser_context);
bool type_check(const parser_context&, const lexed_output&, const std::vector<symbol>&, std::vector<std::string> execution_input, const std::vector<std::string>& execution_output);
bool type_check(
const parser_context& parser_state,
const lexed_output& lexer_state,
const std::vector<symbol>& consumed_stream,
std::vector<std::string> execution_input,
const std::vector<std::string>& execution_output,
const std::vector<std::pair<size_t, size_t>>& sub_bodies
);
}

+ 3
- 1
include/molasses/parser_types.h 查看文件

@ -98,12 +98,14 @@ namespace molasses {
std::vector<std::string> _args;
std::vector<std::string> _rets;
std::vector<symbol> _body;
std::vector<std::pair<size_t, size_t>> _simple_sub_bodies;
procedure_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets, std::vector<symbol> body)
procedure_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets, std::vector<symbol> body, std::vector<std::pair<size_t, size_t>> sub_bodies)
: _name(std::forward<std::string>(name))
, _args(std::forward<std::vector<std::string>>(args))
, _rets(std::forward<std::vector<std::string>>(rets))
, _body(std::forward<std::vector<symbol>>(body))
, _simple_sub_bodies(std::forward<std::vector<std::pair<size_t, size_t>>>(sub_bodies))
{}
[[nodiscard]] std::string name() const final {

+ 11
- 4
src/molasses/generator_primitives_x86_64_linux.cpp 查看文件

@ -536,11 +536,18 @@ namespace molasses {
}
template<>
std::vector<std::string> generate_label<architecture_t::x86_64_linux>(const std::string& target) {
return {
std::vector<std::string> generate_label<architecture_t::x86_64_linux>(const std::string& target) {
return {
marshal(target)+":\n"
};
}
};
}
template<>
std::vector<std::string> generate_goto<architecture_t::x86_64_linux>(const std::string& target) {
return {
" jmp "+marshal(target)+"\n"
};
}
template<>
std::vector<std::string> generate_return<architecture_t::x86_64_linux>() {

+ 77
- 19
src/molasses/parser_primitives.cpp 查看文件

@ -1,8 +1,9 @@
#include <algorithm>
#include <iostream>
#include "molasses/parser_primitives.h"
#include "molasses/generator_primitives.h"
#include "molasses/errors.h"
#include "molasses/generator_primitives.h"
#include <algorithm>
#include <iostream>
#include <span>
namespace molasses {
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
@ -90,24 +91,40 @@ namespace molasses {
const lexed_output& lexer_state,
const std::vector<symbol>& consumed_stream,
std::vector<std::string> execution_input,
const std::vector<std::string>& execution_output
const std::vector<std::string>& execution_output,
const std::vector<std::pair<size_t, size_t>>& sub_bodies
) {
auto& type_stack = execution_input;
for(const auto& symbol : consumed_stream) {
std::map<size_t, std::vector<std::string>> effective_snapshots;
size_t idx = 0;
for(auto it = consumed_stream.begin(); it != consumed_stream.end(); ++it, ++idx) {
const auto& symbol = *it;
const auto& symbol_text = lexer_state.dictionary.at(symbol);
if(symbol.is_string) {
// Skip GOTOs and LABELs
if(auto ahead = it; ++ahead != consumed_stream.end() and (lexer_state.dictionary.at(*ahead) == "__GOTO__" or lexer_state.dictionary.at(*ahead) == "__LABEL__")) {
effective_snapshots[idx] = type_stack;
it = ahead;
++idx;
} else if(symbol.is_string) {
type_stack.emplace_back("u8 ptr");
} else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
type_stack.emplace_back("i32");
} else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
type_stack.emplace_back("i64");
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
type_stack = type_stack >> *is_op;
}
type_stack = type_stack >> *is_op;
}
}
if(type_stack != execution_output) return false;
for(auto [start, end] : sub_bodies) {
if(effective_snapshots[start] != effective_snapshots[end]) {
return false;
}
}
return type_stack == execution_output;
return b">true;
}
generate_context parse(parser_context ctx, const lexed_output& lexer_data) {
@ -115,14 +132,19 @@ namespace molasses {
DO_KW = 1,
SEPARATOR_KW,
PROC_KW,
END_KW
END_KW,
LABEL_KW,
GOTO_KW
};
lexed_output fake;
fake.dictionary[PROC_KW] = "__PROC__";
fake.dictionary[PROC_KW] = "__PROC__";
fake.dictionary[SEPARATOR_KW] = "__--__";
fake.dictionary[DO_KW] = "__DO__";
fake.dictionary[END_KW] = "__END__";
fake.dictionary[LABEL_KW] = "__LABEL__";
fake.dictionary[GOTO_KW] = "__GOTO__";
auto tokens = concatenate(fake, lexer_data);
@ -197,16 +219,40 @@ namespace molasses {
// Process body
std::vector<symbol> body;
std::vector<std::pair<size_t, size_t>> sub_bodies;
std::map<std::string, size_t> found_labels;
std::map<std::string, size_t> found_gotos;
while(*it != END_KW) {
body.emplace_back(*it);
last_valid = it;
++it;
if(auto ahead = it; ++ahead != tokens.symbols.end() and (*ahead == GOTO_KW or *ahead == LABEL_KW)) {
if(*ahead == GOTO_KW) {
found_gotos[tokens.dictionary[*it]] = body.size();
} else if(*ahead == LABEL_KW) {
found_labels[tokens.dictionary[*it]] = body.size();
// TODO: Handle duplicate labels
}
body.emplace_back(*it);
body.emplace_back(*ahead);
last_valid = ahead;
it = ++ahead;
} else {
body.emplace_back(*it);
last_valid = it;
++it;
}
check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
}
last_valid = it;
++it;
for(auto& [dest, index] : found_gotos) {
if(not found_labels.contains(dest)) {
throw orphan_goto_error(body[index], dest);
}
sub_bodies.emplace_back(std::min(index, found_labels[dest]), std::max(index, found_labels[dest]));
}
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body, sub_bodies));
#undef check_for_unexpected_stream_end
};
@ -220,7 +266,7 @@ namespace molasses {
} while (progress != tokens.symbols.end());
for(auto& proc : parsed_procedures) {
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets, proc->_simple_sub_bodies)) {
throw procedure_stack_error();
}
}
@ -239,9 +285,21 @@ namespace molasses {
ops.push_back(instruction);
}
for(auto elem : _body) {
for(auto it = _body.begin(); it != _body.end(); ++it) {
auto elem = *it;
auto token = lexer_data.dictionary.at(elem);
if(elem.is_string) {
if(auto ahead = it; ++ahead != _body.end() and (lexer_data.dictionary.at(*ahead) == "__GOTO__" or lexer_data.dictionary.at(*ahead) == "__LABEL__")) {
if(lexer_data.dictionary.at(*ahead) == "__GOTO__") {
for(auto&& instruction : generate_goto(name() + " in " + token)) {
ops.push_back(instruction);
}
} else if(lexer_data.dictionary.at(*ahead) == "__LABEL__") {
for(auto&& instruction : generate_label(name() + " in " + token)) {
ops.push_back(instruction);
}
}
it = ahead;
} else if(elem.is_string) {
for(auto&& instruction : generate_push_string_ptr(elem)) {
ops.push_back(instruction);
}

+ 82
- 0
tests/007.exp 查看文件

@ -0,0 +1,82 @@
#!/usr/bin/expect
set SUGAR_EXECUTABLE $::env(SUGAR_EXECUTABLE)
set BUILD_NAME 007.
proc abort {reason} {
puts "test failed $reason"
exit 1
}
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all
expect {
error { abort "failed to parse" }
eof { abort "cannot find the symbol main in lexed output" }
main
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all parse
expect {
error { abort "failed to parse" }
eof { abort "cannot find the main procedure" }
main
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol tests/007/library.mol lex-all merge-all parse
expect {
error { abort "failed to parse" }
eof { abort "cannot find the exit-syscall-number procedure" }
exit-syscall-number
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho $SUGAR_EXECUTABLE tests/007/exit-with-3.mol lex tests/007/library.mol lex merge parse /tmp/sugar.generated.$BUILD_NAME generate assemble
expect {
error { abort "failed to compile" }
eof { abort "didn't run clang" }
clang
}
expect {
error { abort "failed to link" }
eof { abort "didn't run ld" }
ld
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
# -------------------------------------------------------------------
spawn -noecho /tmp/sugar.generated.$BUILD_NAME
expect {
error { abort "failed to compile" }
eof { abort "didn't output" }
Hello
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 3} {
abort "executable didn't return exit code 3 but $value instead"
}

+ 7
- 0
tests/007/exit-with-3.mol 查看文件

@ -0,0 +1,7 @@
__PROC__ main
__--__
__DO__
11_i64 "\tHello world\n" write-out
1 2_i32 + i32-to-i64 exit
__END__

+ 39
- 0
tests/007/library.mol 查看文件

@ -0,0 +1,39 @@
__PROC__ write-syscall-number
__--__
i64
__DO__
1_i64
__END__
__PROC__ stdout-fd
__--__
i64
__DO__
1_i64
__END__
__PROC__ exit-syscall-number
__--__
i64
__DO__
60_i64
__END__
__PROC__ exit
i64
__--__
__DO__
"POTAT" __LABEL__
exit-syscall-number syscall1 drop_i64
60_i64
"POTAT" __GOTO__
drop_i64
__END__
__PROC__ write-out
i64
u8 ptr
__--__
__DO__
u8-ptr_to_i64 stdout-fd write-syscall-number syscall3 drop_i64
__END__

正在加载...
取消
保存