@ -0,0 +1,4 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" /> | |||
</project> |
@ -0,0 +1,8 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="ProjectModuleManager"> | |||
<modules> | |||
<module fileurl="file://$PROJECT_DIR$/.idea/sugar.iml" filepath="$PROJECT_DIR$/.idea/sugar.iml" /> | |||
</modules> | |||
</component> | |||
</project> |
@ -0,0 +1,2 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<module classpath="CMake" type="CPP_MODULE" version="4" /> |
@ -0,0 +1,6 @@ | |||
<?xml version="1.0" encoding="UTF-8"?> | |||
<project version="4"> | |||
<component name="VcsDirectoryMappings"> | |||
<mapping directory="$PROJECT_DIR$" vcs="Git" /> | |||
</component> | |||
</project> |
@ -0,0 +1,10 @@ | |||
cmake_minimum_required(VERSION 3.24) | |||
project(sugar) | |||
find_package(cppfront REQUIRED) | |||
set(CMAKE_CXX_STANDARD 20) | |||
include_directories(include) | |||
add_executable(sugar src/main.cpp src/molasses/lexer.cpp include/molasses/lexer.h src/molasses/parser_primitives.cpp include/molasses/parser_primitives.h) |
@ -0,0 +1,17 @@ | |||
#pragma once | |||
#include <vector> | |||
#include <map> | |||
#include <string> | |||
namespace molasses { | |||
// We will always want symbols to be convertible to int for dictionary lookups | |||
using symbol = int; | |||
struct lexed_output { | |||
std::map<int, std::string> dictionary; | |||
std::vector<symbol> symbols; | |||
}; | |||
lexed_output lex(const std::string &); | |||
lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs); | |||
} |
@ -0,0 +1,144 @@ | |||
#pragma once | |||
#include <string> | |||
#include <set> | |||
#include <vector> | |||
#include <memory> | |||
#include <optional> | |||
#include <charconv> | |||
#include <concepts> | |||
#include "molasses/lexer.h" | |||
namespace molasses { | |||
struct type { | |||
[[nodiscard]] virtual std::string name() const = 0; | |||
[[nodiscard]] virtual size_t byte_size() const = 0; | |||
}; | |||
inline auto operator<=>(const type& lhs, const type& rhs) { | |||
return lhs.name() <=> rhs.name(); | |||
} | |||
struct primitive_type : public type { | |||
std::string _name; | |||
size_t _byte_size; | |||
primitive_type(std::string name, size_t byte_size) | |||
: _name(std::forward<std::string>(name)) | |||
, _byte_size(byte_size) | |||
{} | |||
[[nodiscard]] std::string name() const final { | |||
return _name; | |||
} | |||
[[nodiscard]] size_t byte_size() const final { | |||
return _byte_size; | |||
}; | |||
}; | |||
struct parser_context; | |||
struct operation { | |||
[[nodiscard]] virtual std::string name() const = 0; | |||
[[nodiscard]] virtual std::vector<std::string> argument_types() const = 0; | |||
[[nodiscard]] virtual std::vector<std::string> return_types() const = 0; | |||
[[nodiscard]] virtual std::vector<std::string> generate(const parser_context&) const = 0; | |||
// Add generate() -> instruction[] | |||
}; | |||
struct primitive_operation : public operation { | |||
std::string _name; | |||
std::vector<std::string> _args; | |||
std::vector<std::string> _rets; | |||
std::vector<std::string> _instructions; | |||
primitive_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets) | |||
: _name(std::forward<std::string>(name)) | |||
, _args(std::forward<std::vector<std::string>>(args)) | |||
, _rets(std::forward<std::vector<std::string>>(rets)) | |||
{} | |||
[[nodiscard]] std::string name() const final { | |||
return _name; | |||
} | |||
[[nodiscard]] std::vector<std::string> argument_types() const final { | |||
return _args; | |||
} | |||
[[nodiscard]] std::vector<std::string> return_types() const final { | |||
return _rets; | |||
} | |||
[[nodiscard]] std::vector<std::string> generate(const parser_context&) const final { | |||
return _instructions; | |||
} | |||
}; | |||
struct procedure_operation : public operation { | |||
std::string _name; | |||
std::vector<std::string> _args; | |||
std::vector<std::string> _rets; | |||
std::vector<symbol> _body; | |||
procedure_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets, std::vector<symbol> body) | |||
: _name(std::forward<std::string>(name)) | |||
, _args(std::forward<std::vector<std::string>>(args)) | |||
, _rets(std::forward<std::vector<std::string>>(rets)) | |||
, _body(std::forward<std::vector<symbol>>(body)) | |||
{} | |||
[[nodiscard]] std::string name() const final { | |||
return _name; | |||
} | |||
[[nodiscard]] std::vector<std::string> argument_types() const final { | |||
return _args; | |||
} | |||
[[nodiscard]] std::vector<std::string> return_types() const final { | |||
return _rets; | |||
} | |||
[[nodiscard]] std::vector<std::string> generate(const parser_context&) const final; | |||
}; | |||
inline auto operator<=>(const operation& lhs, const operation& rhs) { | |||
return lhs.name() <=> rhs.name(); | |||
} | |||
struct TypeInputError : std::runtime_error { | |||
TypeInputError() : std::runtime_error("Bad type provided") {} | |||
// TODO: Better error message | |||
}; | |||
struct ValueMissingError : std::runtime_error { | |||
ValueMissingError() : std::runtime_error("Expected value, none provided") {} | |||
// TODO: Better error message | |||
}; | |||
struct ProcedureStackError : std::runtime_error { | |||
ProcedureStackError() : std::runtime_error("Expected the stack to look like the return stack upon completion") {} | |||
// TODO: Better error message | |||
}; | |||
struct UnexpectedTokenError : std::runtime_error { | |||
UnexpectedTokenError() : std::runtime_error("An unexpected token has been encountered") {} | |||
// TODO: Better error message | |||
}; | |||
struct ExpectingTokenError : std::runtime_error { | |||
ExpectingTokenError() : std::runtime_error("An expected token has not been encountered before the end of the input") {} | |||
// TODO: Better error message | |||
}; | |||
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op); | |||
std::optional<int32_t> try_parse_int32(const std::string& str); | |||
struct parser_context { | |||
std::vector<std::shared_ptr<type>> types; | |||
std::vector<std::shared_ptr<operation>> operations; | |||
[[nodiscard]] std::shared_ptr<type> lookup_type(const std::string&) const; | |||
[[nodiscard]] std::shared_ptr<operation> lookup_operation(const std::string&) const; | |||
}; | |||
parser_context parse(parser_context, const lexed_output&); | |||
parser_context register_integers(parser_context); | |||
bool type_check(const parser_context&, const lexed_output&, const std::vector<symbol>&, std::vector<std::string> execution_input, const std::vector<std::string>& execution_output); | |||
} | |||
@ -0,0 +1,7 @@ | |||
__PROC__ procedure_name | |||
i16 i8 | |||
__--__ | |||
i32 | |||
__DO__ | |||
i32 __CAST__ __SWAP__ i32 __CAST__ * | |||
__END__ |
@ -0,0 +1,17 @@ | |||
__PROC__ write | |||
i64 i8 ptr i64 | |||
__--__ | |||
i32 | |||
__DO__ | |||
__LET__ size, ptr, fd | |||
size ptr __CAST_I64__ fd 1_i64 __SYSCALL4__ | |||
__END_LET__ | |||
__END__ | |||
proc __DEREF_I64_PTR__ | |||
i64 ptr | |||
-- | |||
i64 | |||
DO | |||
END |
@ -0,0 +1,50 @@ | |||
#include "molasses/lexer.h" | |||
#include "molasses/parser_primitives.h" | |||
#include <iostream> | |||
int main() { | |||
/* | |||
molasses::lexed_output initial; | |||
initial.dictionary[1] = "+"; | |||
{ | |||
auto v = molasses::lex("hello hello potato 128 hello 128 +"); | |||
auto v2 = molasses::lex("salad hello potato 129 hello 128"); | |||
for (auto symbol: v.symbols) { | |||
std::cout << "v: " << symbol << " - " << v.dictionary.at(symbol) << "\n"; | |||
} | |||
std::cout << "\n"; | |||
for (auto symbol: v2.symbols) { | |||
std::cout << "v2: " << symbol << " - " << v2.dictionary.at(symbol) << "\n"; | |||
} | |||
auto v_merged = molasses::concatenate(initial, molasses::concatenate(v, v2)); | |||
std::cout << "\n"; | |||
for (auto symbol: v_merged.symbols) { | |||
std::cout << "v_merged: " << symbol << " - " << v_merged.dictionary.at(symbol) << "\n"; | |||
} | |||
} | |||
auto v = molasses::lex("1 2 +"); | |||
molasses::parser_context ctx; | |||
ctx = molasses::register_integers(ctx); | |||
ctx.operations.emplace_back(std::make_shared<molasses::primitive_operation>(std::string{"+"}, std::vector<std::string>({"i32", "i32"}), std::vector<std::string>({"i32"}))); | |||
if(molasses::type_check(ctx, v, v.symbols, {}, {"i32"})) { | |||
std::cout << "Checks out\n"; | |||
}*/ | |||
auto lexed = molasses::lex("__PROC__ sum\n" | |||
"i32 i32\n" | |||
"__--__\n" | |||
"i32\n" | |||
"__DO__\n" | |||
"+\n" | |||
"__END__"); | |||
molasses::parser_context ctx; | |||
ctx = molasses::register_integers(ctx); | |||
ctx.operations.emplace_back(std::make_shared<molasses::primitive_operation>(std::string{"+"}, std::vector<std::string>({"i32", "i32"}), std::vector<std::string>({"i32"}))); | |||
molasses::parse(ctx, lexed); | |||
} |
@ -0,0 +1,110 @@ | |||
#include "molasses/lexer.h" | |||
#include <algorithm> | |||
#include <sstream> | |||
#include <iostream> | |||
namespace molasses { | |||
lexed_output lex(const std::string & source) { | |||
lexed_output output; | |||
std::map<std::string, int> reverse_dictionary; | |||
std::stringstream builder; | |||
int token_counter = 1; | |||
// Processes the current token into the output if it is not empty | |||
// This should be called upon reaching the end of a token | |||
const auto process_token = [&](const std::string& token) { | |||
if(not token.empty()) { | |||
symbol current_symbol; | |||
if( | |||
auto it = reverse_dictionary.find(token); | |||
it == reverse_dictionary.end() | |||
) { | |||
reverse_dictionary[token] = token_counter; | |||
output.dictionary[token_counter] = token; | |||
current_symbol = token_counter; | |||
token_counter++; | |||
} else { | |||
current_symbol = it->second; | |||
} | |||
output.symbols.push_back(current_symbol); | |||
builder = std::stringstream(); | |||
} | |||
}; | |||
for(auto& character : source) { | |||
if(std::isspace(character)) { | |||
process_token(builder.str()); | |||
} else { | |||
builder << character; | |||
} | |||
} | |||
process_token(builder.str()); // process the last token if needed | |||
return output; | |||
} | |||
using conversion_table = std::map<int, int>; | |||
lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs) { | |||
// primitive that flips keys and values of dictionaries | |||
constexpr auto dictionary_reversal = [](auto& destination,const auto& source) { | |||
for(auto& it : source) { | |||
destination.insert_or_assign(it.second, it.first); | |||
} | |||
}; | |||
// primitive that merges a dictionary into a reversed one and returns a conversion table of symbols | |||
// from the dictionary to the newly generated reverse dictionary | |||
auto build_reverse_dictionary = [dictionary_reversal] (auto& reverse_dictionary, auto dictionary) -> conversion_table { | |||
// Make the right dictionary into a reverse dictionary | |||
std::map<std::string, int> right_reverse_dictionary; | |||
dictionary_reversal(right_reverse_dictionary, dictionary); | |||
// find the maximum token id in the left dictionary | |||
int max_token = 0; | |||
if(not reverse_dictionary.empty()) { | |||
max_token = std::max_element( | |||
reverse_dictionary.begin(), | |||
reverse_dictionary.end(), | |||
[](const auto &lhs, const auto &rhs) -> bool { | |||
return lhs.second < rhs.second; | |||
} | |||
)->second; | |||
} | |||
// make the conversions and update the reverse dictionary | |||
conversion_table conversions; | |||
for(auto& [key, value] : right_reverse_dictionary) { | |||
if(auto match = reverse_dictionary.find(key); match != reverse_dictionary.end()) { | |||
conversions[value] = match->second; | |||
} else { | |||
max_token+=1; | |||
conversions[value] = max_token; | |||
reverse_dictionary[key] = max_token; | |||
} | |||
} | |||
return conversions; | |||
}; | |||
std::map<std::string, int> reverse_dictionary; | |||
dictionary_reversal(reverse_dictionary, lhs.dictionary); | |||
auto conversions = build_reverse_dictionary(reverse_dictionary, rhs.dictionary); | |||
auto symbol_stream = rhs.symbols; | |||
lexed_output output{.symbols = lhs.symbols}; | |||
for(auto& old_symbol : symbol_stream) { | |||
//This diagnostic is pretty lousy, but that is what happens when keys are taken by reference | |||
#pragma clang diagnostic push | |||
#pragma ide diagnostic ignored "LocalValueEscapesScope" | |||
old_symbol = conversions[old_symbol]; | |||
#pragma clang diagnostic pop | |||
} | |||
dictionary_reversal(output.dictionary, reverse_dictionary); | |||
std::copy(symbol_stream.begin(), symbol_stream.end(), std::back_inserter(output.symbols)); | |||
return output; | |||
} | |||
} |
@ -0,0 +1,228 @@ | |||
#include <algorithm> | |||
#include <cassert> | |||
#include "molasses/parser_primitives.h" | |||
namespace molasses { | |||
parser_context register_integers(parser_context ctx) { | |||
ctx.types.push_back(std::make_shared<primitive_type>("i8",1)); | |||
ctx.types.push_back(std::make_shared<primitive_type>("i16",2)); | |||
ctx.types.push_back(std::make_shared<primitive_type>("i32",4)); | |||
ctx.types.push_back(std::make_shared<primitive_type>("i64",8)); | |||
ctx.types.push_back(std::make_shared<primitive_type>("u8",1)); | |||
ctx.types.push_back(std::make_shared<primitive_type>("u16",2)); | |||
ctx.types.push_back(std::make_shared<primitive_type>("u32",4)); | |||
ctx.types.push_back(std::make_shared<primitive_type>("u64",8)); | |||
return ctx; | |||
} | |||
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) { | |||
{ | |||
auto args = next_op.argument_types(); | |||
while(not (args.empty() or current_stack.empty())) { | |||
if(current_stack.back() != args.back()) { | |||
throw TypeInputError(); | |||
} else { | |||
args.pop_back(); | |||
current_stack.pop_back(); | |||
} | |||
} | |||
if(not args.empty()) { | |||
throw ValueMissingError(); | |||
} | |||
} | |||
{ | |||
auto return_types = next_op.return_types(); | |||
std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack)); | |||
} | |||
return current_stack; | |||
} | |||
std::optional<int32_t> try_parse_int32(const std::string& str) { | |||
int32_t value; | |||
auto begin = str.data(); | |||
auto end = str.data()+str.size(); | |||
auto result = std::from_chars(begin, end, value, 10); | |||
// TODO: Add other bases | |||
if(result.ptr == end) { | |||
return value; | |||
} | |||
return std::nullopt; | |||
} | |||
auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) { | |||
auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){ | |||
return elem->name() == name; | |||
}); | |||
if(it != std::end(container)) { | |||
return *it; | |||
} | |||
return {}; | |||
} | |||
std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const { | |||
return find_ptr_by_name_in_container(types, name); | |||
} | |||
std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const { | |||
return find_ptr_by_name_in_container(operations, name); | |||
} | |||
bool type_check( | |||
const parser_context& parser_state, | |||
const lexed_output& lexer_state, | |||
const std::vector<symbol>& consumed_stream, | |||
std::vector<std::string> execution_input, | |||
const std::vector<std::string>& execution_output | |||
) { | |||
auto& type_stack = execution_input; | |||
for(const auto& symbol : consumed_stream) { | |||
const auto& symbol_text = lexer_state.dictionary.at(symbol); | |||
if(auto is_int = try_parse_int32(symbol_text); is_int) { | |||
type_stack.emplace_back("i32"); | |||
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) { | |||
type_stack = type_stack >> *is_op; | |||
} | |||
} | |||
return type_stack == execution_output; | |||
} | |||
parser_context parse(parser_context ctx, const lexed_output& lexer_data) { | |||
enum op : int { | |||
DO_KW = 1, | |||
SEPARATOR_KW, | |||
PROC_KW, | |||
END_KW | |||
}; | |||
lexed_output fake; | |||
fake.dictionary[PROC_KW] = "__PROC__"; | |||
fake.dictionary[SEPARATOR_KW] = "__--__"; | |||
fake.dictionary[DO_KW] = "__DO__"; | |||
fake.dictionary[END_KW] = "__END__"; | |||
auto tokens = concatenate(fake, lexer_data); | |||
std::vector<std::shared_ptr<procedure_operation>> parsed_procedures; | |||
auto parse_proc = [&](auto it) -> std::pair<typeof(it), std::shared_ptr<procedure_operation>> { | |||
#define CHECK_FOR_UNEXPECTED_STREAM_END \ | |||
if(it == tokens.symbols.end()) { \ | |||
throw ExpectingTokenError(); \ | |||
} | |||
if(*it != PROC_KW) { | |||
throw UnexpectedTokenError(); | |||
} | |||
++it; | |||
CHECK_FOR_UNEXPECTED_STREAM_END; | |||
std::string name = tokens.dictionary.at(*it); | |||
++it; | |||
CHECK_FOR_UNEXPECTED_STREAM_END; | |||
if(it == tokens.symbols.end()) { | |||
throw ExpectingTokenError(); | |||
} | |||
// Process arguments list | |||
std::vector<std::string> argument_types; | |||
while(*it != SEPARATOR_KW) { | |||
argument_types.emplace_back(tokens.dictionary.at(*it)); | |||
++it; | |||
CHECK_FOR_UNEXPECTED_STREAM_END; | |||
} | |||
++it; | |||
CHECK_FOR_UNEXPECTED_STREAM_END; | |||
// Process return types list | |||
std::vector<std::string> return_types; | |||
while(*it != DO_KW) { | |||
return_types.emplace_back(tokens.dictionary.at(*it)); | |||
++it; | |||
CHECK_FOR_UNEXPECTED_STREAM_END; | |||
} | |||
++it; | |||
CHECK_FOR_UNEXPECTED_STREAM_END; | |||
// Process return types list | |||
std::vector<symbol> body; | |||
while(*it != END_KW) { | |||
body.emplace_back(*it); | |||
++it; | |||
CHECK_FOR_UNEXPECTED_STREAM_END; | |||
} | |||
++it; | |||
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body)); | |||
#undef CHECK_FOR_UNEXPECTED_STREAM_END | |||
}; | |||
auto [iterator, procedure] = parse_proc(tokens.symbols.begin()); | |||
ctx.operations.push_back(procedure); | |||
parsed_procedures.emplace_back(std::move(procedure)); | |||
for(auto& proc : parsed_procedures) { | |||
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) { | |||
throw ProcedureStackError(); | |||
} | |||
} | |||
return ctx; | |||
} | |||
std::vector<std::string> initialize_stack() { | |||
return { | |||
".bss\n",// TODO: make threadlocal | |||
"stack_instruction:", | |||
" .quad 0", | |||
".text\n", | |||
"initialize_callstack:\n", | |||
" movq $9, %rax\n", | |||
" movq $0, %rdi\n", | |||
" movq $8192, %rsi\n", | |||
" movq $3, %rdx\n", | |||
" movq $34, %r10\n", | |||
" movq $-1, %r8\n", | |||
" movq $0, %r9\n", | |||
" syscall\n", | |||
" movq %rax, (stack_instruction)\n", | |||
" retq\n", | |||
}; | |||
} | |||
std::vector<std::string> generate_call(std::string target) { | |||
static uint64_t label_count= 0; | |||
return { | |||
"movq return_label_n"+std::to_string(label_count)+", (stack_instruction)\n", | |||
"addq $8, stack_instruction\n", | |||
"jmp "+target+"\n", | |||
"return_label_n"+std::to_string(label_count++)+":" | |||
}; | |||
} | |||
std::vector<std::string> procedure_operation::generate(const parser_context& ctx) const { | |||
size_t initial_stack = 0; | |||
size_t final_stack = 0; | |||
for(const auto& elem : argument_types()) { | |||
initial_stack += ctx.lookup_type(elem)->byte_size(); | |||
} | |||
for(const auto& elem : return_types()) { | |||
final_stack += ctx.lookup_type(elem)->byte_size(); | |||
} | |||
std::vector<std::string> ops; | |||
ops.emplace_back(name()+":\n"); | |||
// Return to caller | |||
ops.emplace_back(" addq $-8, stack_instruction\n"); | |||
ops.emplace_back(" movq (stack_instruction), %rax\n"); | |||
ops.emplace_back(" pushq %rax\n"); | |||
ops.emplace_back(" retq\n"); | |||
return ops; | |||
} | |||
} |