浏览代码

Added several features

- c-strings
- some compiler commands to make building things easier
- enough for a true hello world
- tests for that hello world
- better integration of expect and CTests
- more primitives
- More detailed error messages
master
Ludovic 'Archivist' Lagouardette 1年前
父节点
当前提交
94389f6ae1
共有 14 个文件被更改,包括 679 次插入221 次删除
  1. +1
    -1
      .clang-tidy
  2. +13
    -6
      CMakeLists.txt
  3. +7
    -1
      include/molasses/generator_primitives.h
  4. +12
    -2
      include/molasses/lexer.h
  5. +55
    -16
      include/molasses/parser_primitives.h
  6. +154
    -106
      src/main.cpp
  7. +144
    -42
      src/molasses/generator_primitives_x86_64_linux.cpp
  8. +57
    -8
      src/molasses/lexer.cpp
  9. +90
    -39
      src/molasses/parser_primitives.cpp
  10. +45
    -0
      tests/002.exp
  11. +5
    -0
      tests/002/exit-with-1.mol
  12. +55
    -0
      tests/003.exp
  13. +6
    -0
      tests/003/exit-with-1.mol
  14. +35
    -0
      tests/003/library.mol

+ 1
- 1
.clang-tidy 查看文件

@ -169,4 +169,4 @@ CheckOptions:
- {key: readability-identifier-naming.LocalVariableCase, value: lower_case }
- {key: readability-identifier-naming.GlobalConstantCase, value: lower_case }
- {key: readability-identifier-naming.ConstexprVariableCase, value: lower_case }
- {key: readability-identifier-naming.EnumConstantCase, value: lower_case }
- {key: readability-identifier-naming.EnumConstantCase, value: AnyCase }

+ 13
- 6
CMakeLists.txt 查看文件

@ -17,9 +17,16 @@ add_executable(sugar
include/molasses/parser_primitives.h
include/molasses/generator_primitives.h)
add_test(
NAME id001
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
COMMAND expect ./tests/001.exp
)
set_property(TEST id001 PROPERTY ENVIRONMENT "SUGAR_EXECUTABLE=$<TARGET_FILE:sugar>")
function(add_expect_test [testname filename])
add_test(
NAME "${ARGV0}"
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
COMMAND expect "${ARGV1}"
)
set_property(TEST "${ARGV0}" PROPERTY ENVIRONMENT "SUGAR_EXECUTABLE=$<TARGET_FILE:sugar>")
endfunction()
add_expect_test(id001 ./tests/001.exp)
add_expect_test(id002 ./tests/002.exp)
add_expect_test(id003 ./tests/003.exp)

+ 7
- 1
include/molasses/generator_primitives.h 查看文件

@ -8,9 +8,15 @@ namespace molasses {
std::vector<std::string> generate_label(const std::string& target);
std::vector<std::string> generate_push_int32(int32_t target);
std::vector<std::string> generate_push_int32(int32_t target);
std::vector<std::string> generate_push_int64(int64_t target);
std::vector<std::string> generate_call(const std::string& target);
std::vector<std::string> generate_enter();
std::vector<std::string> generate_push_string_ptr(const symbol&);
std::vector<std::string> generate_string(const symbol&, const std::string&);
}

+ 12
- 2
include/molasses/lexer.h 查看文件

@ -5,13 +5,23 @@
namespace molasses {
// We will always want symbols to be convertible to int for dictionary lookups
using symbol = int;
struct symbol {
int id;
std::string file_name;
int line;
int column;
[[no_unique_address]] bool is_string;
operator int() const {
return id;
}
};
struct lexed_output {
std::map<int, std::string> dictionary;
std::vector<symbol> symbols;
};
lexed_output lex(const std::string &);
lexed_output lex(const std::string & file_name,const std::string & source);
lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs);
}

+ 55
- 16
include/molasses/parser_primitives.h 查看文件

@ -6,6 +6,7 @@
#include <optional>
#include <charconv>
#include <concepts>
#include <sstream>
#include "molasses/lexer.h"
@ -24,6 +25,25 @@ constexpr size_t architecture_ptr_size = 4;
#endif
namespace molasses {
namespace details {
template<typename ...RestT>
std::string concatenate_builder_impl(std::stringstream& stream)
requires (sizeof...(RestT) == 0)
{
return stream.str();
}
template<typename FirstT, typename ...RestT>
std::string concatenate_builder_impl(std::stringstream& stream, FirstT first, RestT... rest) {
stream << first;
return concatenate_builder_impl(stream, rest...);
}
template<typename ...RestT>
std::string concatenate_builder(RestT... rest) {
std::stringstream stream{};
return concatenate_builder_impl(stream, rest...);
}
}
struct type {
[[nodiscard]] virtual std::string name() const = 0;
[[nodiscard]] virtual size_t byte_size() const = 0;
@ -119,33 +139,52 @@ namespace molasses {
inline auto operator<=>(const operation& lhs, const operation& rhs) {
return lhs.name() <=> rhs.name();
}
struct parser_error : public std::runtime_error {
explicit parser_error(const std::string& str) : std::runtime_error(str) {}
};
struct type_input_error : std::runtime_error {
type_input_error() : std::runtime_error("Bad type provided") {}
struct type_input_error : public parser_error {
type_input_error() : parser_error("Bad type provided") {}
// TODO: Better error message
};
struct value_missing_error : std::runtime_error {
value_missing_error() : std::runtime_error("Expected value, none provided") {}
struct value_missing_error : public parser_error {
value_missing_error() : parser_error("Expected value, none provided") {}
// TODO: Better error message
};
struct procedure_stack_error : std::runtime_error {
procedure_stack_error() : std::runtime_error("Expected the stack to look like the return stack upon completion") {}
struct procedure_stack_error : public parser_error {
procedure_stack_error() : parser_error("Expected the stack to look like the return stack upon completion") {}
// TODO: Better error message
};
struct unexpected_token_error : std::runtime_error {
unexpected_token_error() : std::runtime_error("An unexpected token has been encountered") {}
// TODO: Better error message
struct unexpected_token_error : public parser_error {
unexpected_token_error(const symbol& sym, const std::string& found, const std::string& expected)
: parser_error (
details::concatenate_builder(
"Unexpected token encountered\n",
"\tAt ", sym.file_name,":",sym.line,":",sym.column,"\n",
"\tExpected ", expected, "\n",
"\tFound ", found, "\n"
)
) {}
};
struct expecting_token_error : std::runtime_error {
expecting_token_error() : std::runtime_error("An expected token has not been encountered before the end of the input") {}
struct expecting_token_error : public parser_error {
expecting_token_error(const std::string& expected, const std::string& context)
: parser_error(
details::concatenate_builder(
"An expected token has not been encountered before the end of the input\n",
"\tExpected ", expected,"\n",
"\t", context,"\n"
)
)
{}
// TODO: Better error message
};
struct unknown_token_error : std::runtime_error {
unknown_token_error() : std::runtime_error("An unknown token has not been encountered") {}
struct unknown_token_error : public parser_error {
explicit unknown_token_error(k">const symbol& sym) : parser_error(details::concatenate_builder("An unknown token has been encounterede">\n", "\tAt ", sym.file_name,":",sym.line,":",sym.column,"\n")) {}
// TODO: Better error message
};
struct type_expected_with_modifier_error : std::runtime_error {
type_expected_with_modifier_error() : std::runtime_error("A type is expected before a modifier") {}
struct type_expected_with_modifier_error : public parser_error {
type_expected_with_modifier_error() : parser_error("A type is expected before a modifier") {}
// TODO: Better error message
};
@ -169,7 +208,7 @@ namespace molasses {
};
generate_context parse(parser_context, const lexed_output&);
std::vector<std::string> generate(generate_context);
std::vector<std::string> generate(k">const generate_context&);
parser_context register_integers(parser_context);
parser_context register_i32_operations(parser_context);

+ 154
- 106
src/main.cpp 查看文件

@ -4,7 +4,6 @@
#include <filesystem>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stack>
#include <string>
#include <variant>
@ -19,120 +18,169 @@ int main(int argc, char** argv) {
argc--;
}
n">std::stack<compile_element> compile_stack;
k">try {
for(auto elem : arguments) {
if(elem == "generate") {
if(std::holds_alternative<std::string>(compile_stack.top())) {
auto filename = std::get<std::string>(compile_stack.top());
compile_stack.pop();
if(std::holds_alternative<molasses::generate_context>(compile_stack.top())) {
auto generator = std::get<molasses::generate_context>(compile_stack.top());
std::stack<compile_element> compile_stack;
for(auto elem : arguments) {
if(elem == "generate") {
if(std::holds_alternative<std::string>(compile_stack.top())) {
auto filename = std::get<std::string>(compile_stack.top());
compile_stack.pop();
auto assembler = molasses::generate(generator);
std::ofstream output(filename+".s");
for(const auto& line : assembler) {
output << line;
}
compile_stack.emplace(filename);
} else throw std::runtime_error("generate expects a parsed output");
} else throw std::runtime_error("generate expects a filename");
} else if(elem == "parse") {
molasses::parser_context ctx;
ctx = molasses::register_integers(ctx);
ctx = molasses::register_i32_operations(ctx);
if(std::holds_alternative<molasses::generate_context>(compile_stack.top())) {
auto generator = std::get<molasses::generate_context>(compile_stack.top());
compile_stack.pop();
auto assembler = molasses::generate(generator);
std::ofstream output(filename + ".s");
for(const auto& line : assembler) {
output << line;
}
compile_stack.emplace(filename);
} else
throw std::runtime_error("generate expects a parsed output");
} else
throw std::runtime_error("generate expects a filename");
} else if(elem == "parse") {
molasses::parser_context ctx;
ctx = molasses::register_integers(ctx);
ctx = molasses::register_i32_operations(ctx);
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer = std::get<molasses::lexed_output>(compile_stack.top());
compile_stack.pop();
auto generator = molasses::parse(ctx, lexer);
compile_stack.emplace(generator);
} else throw std::runtime_error("parse expects a lexed output");
} else if(elem == "lex") {
if(std::holds_alternative<std::string>(compile_stack.top())) {
auto filename = std::get<std::string>(compile_stack.top());
compile_stack.pop();
if(not std::filesystem::exists(filename)) throw std::runtime_error("file "+filename+" does not exist");
std::ifstream t(filename);
std::stringstream buffer;
buffer << t.rdbuf();
auto lexed = molasses::lex(buffer.str());
compile_stack.emplace(lexed);
} else throw std::runtime_error("lex expects a filename");
} else if(elem == "merge") {
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer_1 = std::get<molasses::lexed_output>(compile_stack.top());
compile_stack.pop();
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer_2 = std::get<molasses::lexed_output>(compile_stack.top());
auto lexer = std::get<molasses::lexed_output>(compile_stack.top());
compile_stack.pop();
compile_stack.emplace(molasses::concatenate(lexer_1, lexer_2));
} else throw std::runtime_error("merge expects 2 lexed outputs");
} else throw std::runtime_error("merge expects 2 lexed outputs");
} else if(elem == "assemble") {
if(std::holds_alternative<std::string>(compile_stack.top())) {
auto filename = std::get<std::string>(compile_stack.top());
compile_stack.pop();
std::stringstream compile; compile << "clang -c " << filename << ".s -o " << filename << ".o";
std::stringstream link; link << "ld -e _start " << filename << ".o -o " << filename;
std::cout << compile.str() << std::endl;
system(compile.str().c_str());
std::cout << link.str() << std::endl;
system(link.str().c_str());
} else throw std::runtime_error("assemble expects an assembly file");
} else if(elem == "help" or elem == "--help") {
std::cout << "# Sugar\n\n";
std::cout << "## Commands\n\n";
std::cout << "lex : string ➔ lexed_output\n";
std::cout << "> takes a filename to a file that must be compiled\n\n";
std::cout << "merge : lexed_output lexed_output ➔ lexed_output\n";
std::cout << "> merges two lexed modules together\n\n";
std::cout << "parse : lexed_output ➔ parsed_output\n";
std::cout << "> prepares code for generation\n\n";
std::cout << "generate : parsed_output string ➔ string\n";
std::cout << "> takes a root filename, it will be appended with \".s\" and that will be the generated assembly file,\n";
std::cout << "> the filename will not be consumed\n\n";
std::cout << "assemble : string ➔ _ \n";
std::cout << "> takes a root filename, it will be appended with \".s\" and that file will be compiled,\n";
std::cout << "> the compiled output will be the given filename\n\n";
std::cout << "help : _ ➔ _ \n";
std::cout << "> prints this help\n\n";
std::cout << "## Examples\n\n";
std::cout << "- compile the file \"example.mol\" into the \"potato.s\" assembly file\n";
std::cout << "> `$ sugar example.mol lex parse potato generate`\n";
std::cout << "\n";
std::cout << "- compile the file \"example.mol\" into the \"potato\" executable\n";
std::cout << "> `$ sugar example.mol lex parse potato generate assemble`\n";
std::cout << "\n";
std::cout << "- compile the file \"example.mol\" and \"2.mol\" into the \"potato\" executable\n";
std::cout << "> `$ sugar example.mol lex 2.mol lex merge parse potato generate assemble`\n";
} else compile_stack.emplace(elem);
}
if(compile_stack.size() > 1) throw std::runtime_error("build left unfinished operations");
if(not compile_stack.empty()) {
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer = std::get<molasses::lexed_output>(compile_stack.top());
for(auto elem : lexer.symbols) {
std::cout << elem << " ";
}
std::cout << "\n\n";
for(auto [id, name] : lexer.dictionary) {
std::cout << id << ": " << name << "\n";
}
} else if(std::holds_alternative<molasses::generate_context>(compile_stack.top())) {
auto generator = std::get<molasses::generate_context>(compile_stack.top());
for(const auto& elem : generator.procedures) {
std::cout << elem->_name << " : ";
for(const auto& args : elem->_args) {
std::cout << args << " ";
auto generator = molasses::parse(ctx, lexer);
compile_stack.emplace(generator);
} else
throw std::runtime_error("parse expects a lexed output");
} else if(elem == "lex") {
if(std::holds_alternative<std::string>(compile_stack.top())) {
auto filename = std::get<std::string>(compile_stack.top());
compile_stack.pop();
if(not std::filesystem::exists(filename))
throw std::runtime_error("file " + filename + " does not exist");
std::ifstream t(filename);
std::stringstream buffer;
buffer << t.rdbuf();
auto lexed = molasses::lex(filename, buffer.str());
compile_stack.emplace(lexed);
} else
throw std::runtime_error("lex expects a filename");
} else if(elem == "lex-all") {
std::vector<molasses::lexed_output> lexed_list;
while(not compile_stack.empty() and std::holds_alternative<std::string>(compile_stack.top())) {
auto filename = std::get<std::string>(compile_stack.top());
compile_stack.pop();
if(not std::filesystem::exists(filename))
throw std::runtime_error("file " + filename + " does not exist");
std::ifstream t(filename);
std::stringstream buffer;
buffer << t.rdbuf();
auto lexed = molasses::lex(filename, buffer.str());
lexed_list.emplace_back(lexed);
}
std::cout << "->";
for(const auto& rets : elem->_rets) {
std::cout << " " << rets;
for(auto& lexed : lexed_list) {
compile_stack.emplace(std::move(lexed));
}
} else if(elem == "merge") {
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer_1 = std::get<molasses::lexed_output>(compile_stack.top());
compile_stack.pop();
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer_2 = std::get<molasses::lexed_output>(compile_stack.top());
compile_stack.pop();
compile_stack.emplace(molasses::concatenate(lexer_1, lexer_2));
} else
throw std::runtime_error("merge expects 2 lexed outputs");
} else
throw std::runtime_error("merge expects 2 lexed outputs");
} else if(elem == "merge-all") {
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer_1 = std::get<molasses::lexed_output>(compile_stack.top());
compile_stack.pop();
while(not compile_stack.empty() and std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer_2 = std::get<molasses::lexed_output>(compile_stack.top());
compile_stack.pop();
lexer_1 = molasses::concatenate(lexer_1, lexer_2);
}
compile_stack.emplace(lexer_1);
} else
throw std::runtime_error("merge-all expects at least 1 lexed outputs");
} else if(elem == "assemble") {
if(std::holds_alternative<std::string>(compile_stack.top())) {
auto filename = std::get<std::string>(compile_stack.top());
compile_stack.pop();
std::stringstream compile;
compile << "clang -c " << filename << ".s -o " << filename << ".o";
std::stringstream link;
link << "ld -e _start " << filename << ".o -o " << filename;
std::cout << compile.str() << std::endl;
system(compile.str().c_str());
std::cout << link.str() << std::endl;
system(link.str().c_str());
} else
throw std::runtime_error("assemble expects an assembly file");
} else if(elem == "help" or elem == "--help") {
std::cout << "# Sugar\n\n";
std::cout << "## Commands\n\n";
std::cout << "lex : string ➔ lexed_output\n";
std::cout << "> takes a filename to a file that must be compiled\n\n";
std::cout << "lex-all : string* ➔ lexed_output*\n";
std::cout << "> takes as many filenames to files that must be compiled as can be read and passes them through the lexed\n\n";
std::cout << "merge : lexed_output lexed_output ➔ lexed_output\n";
std::cout << "> merges two lexed modules together\n\n";
std::cout << "merge-all : lexed_output* ➔ lexed_output\n";
std::cout << "> merges as many lexed modules together as present on the top of the stack\n\n";
std::cout << "parse : lexed_output ➔ parsed_output\n";
std::cout << "> prepares code for generation\n\n";
std::cout << "generate : parsed_output string ➔ string\n";
std::cout << "> takes a root filename, it will be appended with \".s\" and that will be the generated assembly file,\n";
std::cout << "> the filename will not be consumed\n\n";
std::cout << "assemble : string ➔ _ \n";
std::cout
<< "> takes a root filename, it will be appended with \".s\" and that file will be compiled,\n";
std::cout << "> the compiled output will be the given filename\n\n";
std::cout << "help : _ ➔ _ \n";
std::cout << "> prints this help\n\n";
std::cout << "## Examples\n\n";
std::cout << "- compile the file \"example.mol\" into the \"potato.s\" assembly file\n";
std::cout << "> `$ sugar example.mol lex parse potato generate`\n";
std::cout << "\n";
std::cout << "- compile the file \"example.mol\" into the \"potato\" executable\n";
std::cout << "> `$ sugar example.mol lex parse potato generate assemble`\n";
std::cout << "\n";
std::cout << "- compile the file \"example.mol\" and \"2.mol\" into the \"potato\" executable\n";
std::cout << "> `$ sugar example.mol lex 2.mol lex merge parse potato generate assemble`\n";
} else
compile_stack.emplace(elem);
}
if(compile_stack.size() > 1) throw std::runtime_error("build left unfinished operations");
if(not compile_stack.empty()) {
if(std::holds_alternative<molasses::lexed_output>(compile_stack.top())) {
auto lexer = std::get<molasses::lexed_output>(compile_stack.top());
for(auto elem : lexer.symbols) {
std::cout << elem << " ";
}
std::cout << "\n\n";
for(auto [id, name] : lexer.dictionary) {
std::cout << id << ": " << name << "\n";
}
} else if(std::holds_alternative<molasses::generate_context>(compile_stack.top())) {
auto generator = std::get<molasses::generate_context>(compile_stack.top());
for(const auto& elem : generator.procedures) {
std::cout << elem->_name << " : ";
for(const auto& args : elem->_args) {
std::cout << args << " ";
}
std::cout << "->";
for(const auto& rets : elem->_rets) {
std::cout << " " << rets;
}
std::cout << "\n";
}
}
}
} catch (molasses::parser_error& error) {
std::cerr << error.what();
}
}

+ 144
- 42
src/molasses/generator_primitives_x86_64_linux.cpp 查看文件

@ -2,36 +2,117 @@
#include "molasses/generator_primitives.h"
namespace molasses {
parser_context register_integers(parser_context ctx)
requires (architecture == architecture_t::x86_64_linux) {
ctx.types.push_back(std::make_shared<primitive_type>("i8", 1));
ctx.types.push_back(std::make_shared<primitive_type>("i16", 2));
ctx.types.push_back(std::make_shared<primitive_type>("i32", 4));
ctx.types.push_back(std::make_shared<primitive_type>("i64", 8));
ctx.types.push_back(std::make_shared<primitive_type>("u8", 1));
ctx.types.push_back(std::make_shared<primitive_type>("u16", 2));
ctx.types.push_back(std::make_shared<primitive_type>("u32", 4));
ctx.types.push_back(std::make_shared<primitive_type>("u64", 8));
return ctx;
}
parser_context register_i32_operations(parser_context ctx)
std::string marshal(const std::string& target) {
std::stringstream builder;
bool is_first = true;
for(char character : target) {
if(isalpha(character)) {
builder << character;
} else if(isdigit(character) and is_first) {
builder << "___" << (int)character << "___";
} else {
builder << "__" << (int)character << "__";
}
is_first = false;
}
return builder.str();
}
std::string escape(const std::string& target) {
std::stringstream builder;
for(char character : target) {
switch(character) {
case 0:
builder << "\\0";
break;
case '\n':
builder << "\\n";
break;
case '"':
builder << "\\\"";
break;
case '\t':
builder << "\\t";
break;
case '\'':
builder << "\\'";
break;
default:
builder << character;
}
}
return builder.str();
}
parser_context register_integers(parser_context ctx) requires(architecture == architecture_t::x86_64_linux)
{
ctx.types.push_back(std::make_shared<primitive_type>("i8", 1));
ctx.types.push_back(std::make_shared<primitive_type>("i16", 2));
ctx.types.push_back(std::make_shared<primitive_type>("i32", 4));
ctx.types.push_back(std::make_shared<primitive_type>("i64", 8));
ctx.types.push_back(std::make_shared<primitive_type>("u8", 1));
ctx.types.push_back(std::make_shared<primitive_type>("u16", 2));
ctx.types.push_back(std::make_shared<primitive_type>("u32", 4));
ctx.types.push_back(std::make_shared<primitive_type>("u64", 8));
return ctx;
}
parser_context register_i32_operations(parser_context ctx)
requires (architecture == architecture_t::x86_64_linux) {
ctx.operations.emplace_back(
std::make_shared<molasses::primitive_operation>(
std::string{"+"},
std::vector<std::string>({"i32", "i32"}),
std::vector<std::string>({"i32"}),
std::vector<std::string>({
" popq %rax\n",
" popq %rbx\n",
" addl %ebx, %eax\n",
" andl $0xFFFFFFFF, %eax\n",
" pushq %rax\n"
})
)
);
ctx.operations.emplace_back(
std::make_shared<molasses::primitive_operation>(
std::string{"+"},
std::vector<std::string>({"i32", "i32"}),
std::vector<std::string>({"i32"}),
std::vector<std::string>({
" popq %rax\n",
" popq %rbx\n",
" addl %ebx, %eax\n",
" andl $0xFFFFFFFF, %eax\n",
" pushq %rax\n"
})
)
);
ctx.operations.emplace_back(
std::make_shared<molasses::primitive_operation>(
std::string{"+_i64"},
std::vector<std::string>({"i64", "i64"}),
std::vector<std::string>({"i64"}),
std::vector<std::string>({
" popq %rax\n",
" popq %rbx\n",
" addq %rbx, %rax\n",
" pushq %rax\n"
})
)
);
ctx.operations.emplace_back(
std::make_shared<molasses::primitive_operation>(
std::string{"/%_i64"},
std::vector<std::string>({"i64", "i64"}),
std::vector<std::string>({"i64", "i64"}),
std::vector<std::string>({
" popq %rax\n",
" popq %rbx\n",
" divq %rbx, %rax\n",
" pushq %rax\n",
" pushq %rdx\n"
// TODO: this is actually unsigned division, so it needs improvements on negative numbers
})
)
);
ctx.operations.emplace_back(
std::make_shared<molasses::primitive_operation>(
std::string{"u8-ptr_to_i64"},
std::vector<std::string>({"u8 ptr"}),
std::vector<std::string>({"i64"}),
std::vector<std::string>({
})
)
);
ctx.operations.emplace_back(
std::make_shared<molasses::primitive_operation>(
std::string{"*"},
@ -181,24 +262,45 @@ namespace molasses {
return ctx;
}
std::vector<std::string> generate_call(const std::string& target)
requires (architecture == architecture_t::x86_64_linux) {
return {
" call "+target+"\n",
};
}
std::vector<std::string> generate_call(const std::string& target)
requires (architecture == architecture_t::x86_64_linux) {
return {
" call "+marshal(target)+"\n",
};
}
std::vector<std::string> generate_string(const symbol& representation, const std::string& string_value)
requires (architecture == architecture_t::x86_64_linux) {
return {
"__EMITED_STRING_____"+std::to_string(representation.id)+"___:\n",
" .asciz \""+escape(string_value)+"\"\n",
};
}
std::vector<std::string> generate_push_string_ptr(const symbol& representation) {
return {
" pushq $__EMITED_STRING_____"+std::to_string(representation.id)+"___\n"
};
}
std::vector<std::string> generate_push_int32(int32_t target)
requires (architecture == architecture_t::x86_64_linux) {
return {
" pushq $" +std::to_string(target)+ "\n"
};
}
std::vector<std::string> generate_push_int32(int32_t target)
requires (architecture == architecture_t::x86_64_linux) {
return {
" pushq $" +std::to_string(target)+ "\n"
};
}
std::vector<std::string> generate_push_int64(int64_t target)
requires (architecture == architecture_t::x86_64_linux) {
return {
" pushq $" +std::to_string(target)+ "\n"
};
}
std::vector<std::string> generate_label(const std::string& target)
requires (architecture == architecture_t::x86_64_linux) {
return {
target+":\n"
marshal(target)+":\n"
};
}

+ 57
- 8
src/molasses/lexer.cpp 查看文件

@ -5,12 +5,23 @@
#include <iostream>
namespace molasses {
lexed_output lex(const std::string & source) {
lexed_output lex(const std::string& file_name, const std::string & source) {
lexed_output output;
std::map<std::string, int> reverse_dictionary;
std::stringstream builder;
int token_counter = 1;
int line = 1;
int column = 0;
enum class state_machine_t {
normal,
string,
string_escape,
string_end,
};
state_machine_t state = state_machine_t::normal;
// Processes the current token into the output if it is not empty
// This should be called upon reaching the end of a token
const auto process_token = [&](const std::string& token) {
@ -22,10 +33,10 @@ namespace molasses {
) {
reverse_dictionary[token] = token_counter;
output.dictionary[token_counter] = token;
current_symbol = token_counter;
current_symbol = p">{token_counter, file_name, line, column, state == state_machine_t::string_end};
token_counter++;
} else {
current_symbol = it->second;
current_symbol = p">{it->second, file_name, line, column, state == state_machine_t::string_end};
}
output.symbols.push_back(current_symbol);
builder = std::stringstream();
@ -33,9 +44,47 @@ namespace molasses {
};
for(auto& character : source) {
if(std::isspace(character)) {
process_token(builder.str());
} else {
if(character == '\n') {
line++;
column = 0;
}
column++;
if(state == state_machine_t::string_escape) {
switch(character) {
case 'n': builder << '\n'; break;
case 't': builder << '\t'; break;
case '\\': [[fallthrough]];
default:
builder << character;
}
continue;
}
if(character == '\"') {
if(builder.view().empty() && state == state_machine_t::normal) {
state = state_machine_t::string;
continue;
} else if (state == state_machine_t::string) {
state = state_machine_t::string_end;
continue;
}
} else if(character == '\\' && state == state_machine_t::string) {
state = state_machine_t::string_escape;
continue;
}
if(std::isspace(character)) {
if(state == state_machine_t::normal or state == state_machine_t::string_end) {
process_token(builder.str());
state = state_machine_t::normal;
} else {
builder << character;
}
} else {
if(state == state_machine_t::string_end) {
std::stringstream quoted;
quoted << "\"" << builder.str() << "\"";
builder.swap(quoted);
state = state_machine_t::normal;
}
builder << character;
}
}
@ -99,7 +148,7 @@ namespace molasses {
//This diagnostic is pretty lousy, but that is what happens when keys are taken by reference
#pragma clang diagnostic push
#pragma ide diagnostic ignored "LocalValueEscapesScope"
old_symbol = conversions[old_symbol];
old_symbol.id = conversions[old_symbol];
#pragma clang diagnostic pop
}

+ 90
- 39
src/molasses/parser_primitives.cpp 查看文件

@ -1,5 +1,4 @@
#include <algorithm>
#include <cassert>
#include <iostream>
#include "molasses/parser_primitives.h"
#include "molasses/generator_primitives.h"
@ -26,27 +25,55 @@ namespace molasses {
}
return current_stack;
}
auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) {
if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") {
return std::from_chars(begin+2, end, value, 16);
} else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") {
return std::from_chars(begin+1, end, value, 8);
} else {
return std::from_chars(begin, end, value, 10);
}
}
std::optional<int32_t> try_parse_int32(const std::string& str) {
int32_t value;
auto begin = str.data();
auto end = str.data()+str.size();
auto result = std::from_chars(begin, end, value, 10);
// TODO: Add other bases
if(result.ptr == end) {
return value;
}
return std::nullopt;
}
std::optional<int32_t> try_parse_int32(const std::string& str) {
int32_t value;
auto begin = str.data();
auto end = str.data()+str.size();
auto result = conditional_begin_int_parse(str, value, begin, end);
if(result.ptr == end) {
return value;
} else {
if(std::string_view{result.ptr, end} == "_i32") {
return value;
}
}
return std::nullopt;
}
std::optional<int64_t> try_parse_int64(const std::string& str) {
int64_t value;
auto begin = str.data();
auto end = str.data()+str.size();
auto result = conditional_begin_int_parse(str, value, begin, end);
if(result.ptr == end) {
return std::nullopt;
} else {
if(std::string_view{result.ptr, end} == "_i64") {
return value;
}
}
return std::nullopt;
}
auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) {
auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t<decltype(*std::begin(container))> {
auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
return elem->name() == name;
});
if(it != std::end(container)) {
return *it;
}
return {};
return k">nullptr;
}
std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
@ -68,9 +95,13 @@ namespace molasses {
for(const auto& symbol : consumed_stream) {
const auto& symbol_text = lexer_state.dictionary.at(symbol);
if(auto is_int = try_parse_int32(symbol_text); is_int) {
type_stack.emplace_back("i32");
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
if(symbol.is_string) {
type_stack.emplace_back("u8 ptr");
} else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
type_stack.emplace_back("i32");
} else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
type_stack.emplace_back("i64");
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
type_stack = type_stack >> *is_op;
}
}
@ -104,7 +135,7 @@ namespace molasses {
auto compact_type_modifiers = [&](const std::vector<std::string>& type_info) -> std::vector<std::string> {
std::vector<std::string> ret_val;
for(auto elem : type_info) {
for(const auto& elem : type_info) {
if(elem == "ptr") {
if(ret_val.empty()) throw type_expected_with_modifier_error();
ret_val.back() += " ptr";
@ -116,31 +147,32 @@ namespace molasses {
return ret_val;
};
auto parse_proc = [&](auto it) -> std::pair<n">typeof(it), std::shared_ptr<procedure_operation>> {
#define CHECK_FOR_UNEXPECTED_STREAM_END \
if(it == tokens.symbols.end()) { \
throw expecting_token_error(); \
}
auto parse_proc = [&](auto it) -> std::pair<k">decltype(it), std::shared_ptr<procedure_operation>> {
#define check_for_unexpected_stream_end(expected, context) \
do{if(it == tokens.symbols.end()) { \
throw expecting_token_error(n">expected, context); \
}}while(false)
if(*it != PROC_KW) {
throw unexpected_token_error();
throw unexpected_token_error(o">*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]);
}
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
check_for_unexpected_stream_end(tokens.dictionary[PROC_KW], details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column));
std::string name = tokens.dictionary.at(*it);
auto& name_symbol = *it;
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column));
// Process arguments list
std::vector<std::string> argument_types;
while(*it != SEPARATOR_KW) {
argument_types.emplace_back(tokens.dictionary.at(*it));
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
}
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
check_for_unexpected_stream_end("Procedure-Argument-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
argument_types = compact_type_modifiers(argument_types);
// Process return types list
@ -148,10 +180,10 @@ namespace molasses {
while(*it != DO_KW) {
return_types.emplace_back(tokens.dictionary.at(*it));
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
check_for_unexpected_stream_end(tokens.dictionary[DO_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
}
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
check_for_unexpected_stream_end("Procedure-Return-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
return_types = compact_type_modifiers(return_types);
// Process return types list
@ -159,13 +191,13 @@ namespace molasses {
while(*it != END_KW) {
body.emplace_back(*it);
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
check_for_unexpected_stream_end(tokens.dictionary[END_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
}
++it;
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
#undef CHECK_FOR_UNEXPECTED_STREAM_END
};
#undef check_for_unexpected_stream_end
};
auto progress = tokens.symbols.begin();
@ -198,16 +230,24 @@ namespace molasses {
for(auto elem : _body) {
auto token = lexer_data.dictionary.at(elem);
if(auto result = try_parse_int32(token); result) {
if(elem.is_string) {
for(auto&& instruction : generate_push_string_ptr(elem)) {
ops.push_back(instruction);
}
} else if(auto result = try_parse_int32(token); result) {
for(auto&& instruction : generate_push_int32(result.value())) {
ops.push_back(instruction);
}
} else if(auto result = try_parse_int64(token); result) {
for(auto&& instruction : generate_push_int64(result.value())) {
ops.push_back(instruction);
}
} else if(auto op = ctx.lookup_operation(token); op) {
for(auto&& instruction : op->emit(ctx)) {
ops.push_back(instruction);
}
} else {
throw unknown_token_error();
throw unknown_token_error(n">elem);
}
}
@ -222,18 +262,29 @@ namespace molasses {
return generate_call(name());
}
std::vector<std::string> generate(generate_context ctx) {
std::vector<std::string> generate(k">const generate_context& ctx) {
std::vector<std::string> generated;
for(auto instr : initialize_stack()) {
for(const auto& instr : initialize_stack()) {
generated.push_back(instr);
}
for(auto proc : ctx.procedures) {
for(auto instr : proc->generate(ctx.parser, ctx.lexer)) {
for(const auto& proc : ctx.procedures) {
for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) {
generated.push_back(instr);
}
}
std::set<int> done;
for(const auto& value : ctx.lexer.symbols) {
if(value.is_string && not done.contains(value.id)) {
for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) {
generated.push_back(instr);
}
done.insert(value.id);
}
}
return generated;
}
}

+ 45
- 0
tests/002.exp 查看文件

@ -0,0 +1,45 @@
#!/usr/bin/expect
set SUGAR_EXECUTABLE $::env(SUGAR_EXECUTABLE)
proc abort {reason} {
puts "test failed $reason"
exit 1
}
spawn -noecho $SUGAR_EXECUTABLE tests/002/exit-with-1.mol lex parse
expect {
error { abort "failed to parse" }
eof { abort "cannot find the main" }
main
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
spawn -noecho $SUGAR_EXECUTABLE tests/002/exit-with-1.mol lex parse /tmp/sugar.generated generate assemble
expect {
error { abort "failed to compile" }
eof { abort "didn't run clang" }
clang
}
expect {
error { abort "failed to link" }
eof { abort "didn't run ld" }
ld
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
spawn -noecho /tmp/sugar.generated
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 1} {
abort "executable didn't return exit code 1 but $value instead"
}

+ 5
- 0
tests/002/exit-with-1.mol 查看文件

@ -0,0 +1,5 @@
__PROC__ main
__--__
__DO__
1_i64 30_i64 30_i64 +_i64 syscall1 drop_i64
__END__

+ 55
- 0
tests/003.exp 查看文件

@ -0,0 +1,55 @@
#!/usr/bin/expect
set SUGAR_EXECUTABLE $::env(SUGAR_EXECUTABLE)
proc abort {reason} {
puts "test failed $reason"
exit 1
}
spawn -noecho $SUGAR_EXECUTABLE tests/003/exit-with-1.mol lex tests/003/library.mol lex merge parse
expect {
error { abort "failed to parse" }
eof { abort "cannot find the exit-syscall-number procedure" }
exit-syscall-number
}
expect {
error { abort "failed to parse" }
eof { abort "cannot find the main" }
main
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
spawn -noecho $SUGAR_EXECUTABLE tests/003/exit-with-1.mol lex tests/003/library.mol lex merge parse /tmp/sugar.generated generate assemble
expect {
error { abort "failed to compile" }
eof { abort "didn't run clang" }
clang
}
expect {
error { abort "failed to link" }
eof { abort "didn't run ld" }
ld
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 0} {
abort "compiler crashed"
}
spawn -noecho /tmp/sugar.generated
expect {
error { abort "failed to compile" }
eof { abort "didn't output" }
Hello
}
expect eof
lassign [wait] pid spawnid os_error_flag value
if {$value != 1} {
abort "executable didn't return exit code 1 but $value instead"
}

+ 6
- 0
tests/003/exit-with-1.mol 查看文件

@ -0,0 +1,6 @@
__PROC__ main
__--__
__DO__
11_i64 "Hello world" write-out
1_i64 exit
__END__

+ 35
- 0
tests/003/library.mol 查看文件

@ -0,0 +1,35 @@
__PROC__ write-syscall-number
__--__
i64
__DO__
1_i64
__END__
__PROC__ stdout-fd
__--__
i64
__DO__
1_i64
__END__
__PROC__ exit-syscall-number
__--__
i64
__DO__
60_i64
__END__
__PROC__ exit
i64
__--__
__DO__
exit-syscall-number syscall1 drop_i64
__END__
__PROC__ write-out
i64
u8 ptr
__--__
__DO__
u8-ptr_to_i64 stdout-fd write-syscall-number syscall3 drop_i64
__END__

||||||
x
 
000:0
正在加载...
取消
保存