Browse Source

Initial commit

master
Ludovic 'Archivist' Lagouardette 1 year ago
parent
commit
64c3850df4
12 changed files with 603 additions and 0 deletions
  1. +4
    -0
      .idea/misc.xml
  2. +8
    -0
      .idea/modules.xml
  3. +2
    -0
      .idea/sugar.iml
  4. +6
    -0
      .idea/vcs.xml
  5. +10
    -0
      CMakeLists.txt
  6. +17
    -0
      include/molasses/lexer.h
  7. +144
    -0
      include/molasses/parser_primitives.h
  8. +7
    -0
      prototypes/molasses/basic_file.mol
  9. +17
    -0
      prototypes/molasses/first_rountrip.mol
  10. +50
    -0
      src/main.cpp
  11. +110
    -0
      src/molasses/lexer.cpp
  12. +228
    -0
      src/molasses/parser_primitives.cpp

+ 4
- 0
.idea/misc.xml View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CMakeWorkspace" PROJECT_DIR="$PROJECT_DIR$" />
</project>

+ 8
- 0
.idea/modules.xml View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/sugar.iml" filepath="$PROJECT_DIR$/.idea/sugar.iml" />
</modules>
</component>
</project>

+ 2
- 0
.idea/sugar.iml View File

@ -0,0 +1,2 @@
<?xml version="1.0" encoding="UTF-8"?>
<module classpath="CMake" type="CPP_MODULE" version="4" />

+ 6
- 0
.idea/vcs.xml View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

+ 10
- 0
CMakeLists.txt View File

@ -0,0 +1,10 @@
cmake_minimum_required(VERSION 3.24)
project(sugar)
find_package(cppfront REQUIRED)
set(CMAKE_CXX_STANDARD 20)
include_directories(include)
add_executable(sugar src/main.cpp src/molasses/lexer.cpp include/molasses/lexer.h src/molasses/parser_primitives.cpp include/molasses/parser_primitives.h)

+ 17
- 0
include/molasses/lexer.h View File

@ -0,0 +1,17 @@
#pragma once
#include <vector>
#include <map>
#include <string>
namespace molasses {
// We will always want symbols to be convertible to int for dictionary lookups
using symbol = int;
struct lexed_output {
std::map<int, std::string> dictionary;
std::vector<symbol> symbols;
};
lexed_output lex(const std::string &);
lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs);
}

+ 144
- 0
include/molasses/parser_primitives.h View File

@ -0,0 +1,144 @@
#pragma once
#include <string>
#include <set>
#include <vector>
#include <memory>
#include <optional>
#include <charconv>
#include <concepts>
#include "molasses/lexer.h"
namespace molasses {
struct type {
[[nodiscard]] virtual std::string name() const = 0;
[[nodiscard]] virtual size_t byte_size() const = 0;
};
inline auto operator<=>(const type& lhs, const type& rhs) {
return lhs.name() <=> rhs.name();
}
struct primitive_type : public type {
std::string _name;
size_t _byte_size;
primitive_type(std::string name, size_t byte_size)
: _name(std::forward<std::string>(name))
, _byte_size(byte_size)
{}
[[nodiscard]] std::string name() const final {
return _name;
}
[[nodiscard]] size_t byte_size() const final {
return _byte_size;
};
};
struct parser_context;
struct operation {
[[nodiscard]] virtual std::string name() const = 0;
[[nodiscard]] virtual std::vector<std::string> argument_types() const = 0;
[[nodiscard]] virtual std::vector<std::string> return_types() const = 0;
[[nodiscard]] virtual std::vector<std::string> generate(const parser_context&) const = 0;
// Add generate() -> instruction[]
};
struct primitive_operation : public operation {
std::string _name;
std::vector<std::string> _args;
std::vector<std::string> _rets;
std::vector<std::string> _instructions;
primitive_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets)
: _name(std::forward<std::string>(name))
, _args(std::forward<std::vector<std::string>>(args))
, _rets(std::forward<std::vector<std::string>>(rets))
{}
[[nodiscard]] std::string name() const final {
return _name;
}
[[nodiscard]] std::vector<std::string> argument_types() const final {
return _args;
}
[[nodiscard]] std::vector<std::string> return_types() const final {
return _rets;
}
[[nodiscard]] std::vector<std::string> generate(const parser_context&) const final {
return _instructions;
}
};
struct procedure_operation : public operation {
std::string _name;
std::vector<std::string> _args;
std::vector<std::string> _rets;
std::vector<symbol> _body;
procedure_operation(std::string name, std::vector<std::string> args, std::vector<std::string> rets, std::vector<symbol> body)
: _name(std::forward<std::string>(name))
, _args(std::forward<std::vector<std::string>>(args))
, _rets(std::forward<std::vector<std::string>>(rets))
, _body(std::forward<std::vector<symbol>>(body))
{}
[[nodiscard]] std::string name() const final {
return _name;
}
[[nodiscard]] std::vector<std::string> argument_types() const final {
return _args;
}
[[nodiscard]] std::vector<std::string> return_types() const final {
return _rets;
}
[[nodiscard]] std::vector<std::string> generate(const parser_context&) const final;
};
inline auto operator<=>(const operation& lhs, const operation& rhs) {
return lhs.name() <=> rhs.name();
}
struct TypeInputError : std::runtime_error {
TypeInputError() : std::runtime_error("Bad type provided") {}
// TODO: Better error message
};
struct ValueMissingError : std::runtime_error {
ValueMissingError() : std::runtime_error("Expected value, none provided") {}
// TODO: Better error message
};
struct ProcedureStackError : std::runtime_error {
ProcedureStackError() : std::runtime_error("Expected the stack to look like the return stack upon completion") {}
// TODO: Better error message
};
struct UnexpectedTokenError : std::runtime_error {
UnexpectedTokenError() : std::runtime_error("An unexpected token has been encountered") {}
// TODO: Better error message
};
struct ExpectingTokenError : std::runtime_error {
ExpectingTokenError() : std::runtime_error("An expected token has not been encountered before the end of the input") {}
// TODO: Better error message
};
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op);
std::optional<int32_t> try_parse_int32(const std::string& str);
struct parser_context {
std::vector<std::shared_ptr<type>> types;
std::vector<std::shared_ptr<operation>> operations;
[[nodiscard]] std::shared_ptr<type> lookup_type(const std::string&) const;
[[nodiscard]] std::shared_ptr<operation> lookup_operation(const std::string&) const;
};
parser_context parse(parser_context, const lexed_output&);
parser_context register_integers(parser_context);
bool type_check(const parser_context&, const lexed_output&, const std::vector<symbol>&, std::vector<std::string> execution_input, const std::vector<std::string>& execution_output);
}

+ 7
- 0
prototypes/molasses/basic_file.mol View File

@ -0,0 +1,7 @@
__PROC__ procedure_name
i16 i8
__--__
i32
__DO__
i32 __CAST__ __SWAP__ i32 __CAST__ *
__END__

+ 17
- 0
prototypes/molasses/first_rountrip.mol View File

@ -0,0 +1,17 @@
__PROC__ write
i64 i8 ptr i64
__--__
i32
__DO__
__LET__ size, ptr, fd
size ptr __CAST_I64__ fd 1_i64 __SYSCALL4__
__END_LET__
__END__
proc __DEREF_I64_PTR__
i64 ptr
--
i64
DO
END

+ 50
- 0
src/main.cpp View File

@ -0,0 +1,50 @@
#include "molasses/lexer.h"
#include "molasses/parser_primitives.h"
#include <iostream>
int main() {
/*
molasses::lexed_output initial;
initial.dictionary[1] = "+";
{
auto v = molasses::lex("hello hello potato 128 hello 128 +");
auto v2 = molasses::lex("salad hello potato 129 hello 128");
for (auto symbol: v.symbols) {
std::cout << "v: " << symbol << " - " << v.dictionary.at(symbol) << "\n";
}
std::cout << "\n";
for (auto symbol: v2.symbols) {
std::cout << "v2: " << symbol << " - " << v2.dictionary.at(symbol) << "\n";
}
auto v_merged = molasses::concatenate(initial, molasses::concatenate(v, v2));
std::cout << "\n";
for (auto symbol: v_merged.symbols) {
std::cout << "v_merged: " << symbol << " - " << v_merged.dictionary.at(symbol) << "\n";
}
}
auto v = molasses::lex("1 2 +");
molasses::parser_context ctx;
ctx = molasses::register_integers(ctx);
ctx.operations.emplace_back(std::make_shared<molasses::primitive_operation>(std::string{"+"}, std::vector<std::string>({"i32", "i32"}), std::vector<std::string>({"i32"})));
if(molasses::type_check(ctx, v, v.symbols, {}, {"i32"})) {
std::cout << "Checks out\n";
}*/
auto lexed = molasses::lex("__PROC__ sum\n"
"i32 i32\n"
"__--__\n"
"i32\n"
"__DO__\n"
"+\n"
"__END__");
molasses::parser_context ctx;
ctx = molasses::register_integers(ctx);
ctx.operations.emplace_back(std::make_shared<molasses::primitive_operation>(std::string{"+"}, std::vector<std::string>({"i32", "i32"}), std::vector<std::string>({"i32"})));
molasses::parse(ctx, lexed);
}

+ 110
- 0
src/molasses/lexer.cpp View File

@ -0,0 +1,110 @@
#include "molasses/lexer.h"
#include <algorithm>
#include <sstream>
#include <iostream>
namespace molasses {
lexed_output lex(const std::string & source) {
lexed_output output;
std::map<std::string, int> reverse_dictionary;
std::stringstream builder;
int token_counter = 1;
// Processes the current token into the output if it is not empty
// This should be called upon reaching the end of a token
const auto process_token = [&](const std::string& token) {
if(not token.empty()) {
symbol current_symbol;
if(
auto it = reverse_dictionary.find(token);
it == reverse_dictionary.end()
) {
reverse_dictionary[token] = token_counter;
output.dictionary[token_counter] = token;
current_symbol = token_counter;
token_counter++;
} else {
current_symbol = it->second;
}
output.symbols.push_back(current_symbol);
builder = std::stringstream();
}
};
for(auto& character : source) {
if(std::isspace(character)) {
process_token(builder.str());
} else {
builder << character;
}
}
process_token(builder.str()); // process the last token if needed
return output;
}
using conversion_table = std::map<int, int>;
lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs) {
// primitive that flips keys and values of dictionaries
constexpr auto dictionary_reversal = [](auto& destination,const auto& source) {
for(auto& it : source) {
destination.insert_or_assign(it.second, it.first);
}
};
// primitive that merges a dictionary into a reversed one and returns a conversion table of symbols
// from the dictionary to the newly generated reverse dictionary
auto build_reverse_dictionary = [dictionary_reversal] (auto& reverse_dictionary, auto dictionary) -> conversion_table {
// Make the right dictionary into a reverse dictionary
std::map<std::string, int> right_reverse_dictionary;
dictionary_reversal(right_reverse_dictionary, dictionary);
// find the maximum token id in the left dictionary
int max_token = 0;
if(not reverse_dictionary.empty()) {
max_token = std::max_element(
reverse_dictionary.begin(),
reverse_dictionary.end(),
[](const auto &lhs, const auto &rhs) -> bool {
return lhs.second < rhs.second;
}
)->second;
}
// make the conversions and update the reverse dictionary
conversion_table conversions;
for(auto& [key, value] : right_reverse_dictionary) {
if(auto match = reverse_dictionary.find(key); match != reverse_dictionary.end()) {
conversions[value] = match->second;
} else {
max_token+=1;
conversions[value] = max_token;
reverse_dictionary[key] = max_token;
}
}
return conversions;
};
std::map<std::string, int> reverse_dictionary;
dictionary_reversal(reverse_dictionary, lhs.dictionary);
auto conversions = build_reverse_dictionary(reverse_dictionary, rhs.dictionary);
auto symbol_stream = rhs.symbols;
lexed_output output{.symbols = lhs.symbols};
for(auto& old_symbol : symbol_stream) {
//This diagnostic is pretty lousy, but that is what happens when keys are taken by reference
#pragma clang diagnostic push
#pragma ide diagnostic ignored "LocalValueEscapesScope"
old_symbol = conversions[old_symbol];
#pragma clang diagnostic pop
}
dictionary_reversal(output.dictionary, reverse_dictionary);
std::copy(symbol_stream.begin(), symbol_stream.end(), std::back_inserter(output.symbols));
return output;
}
}

+ 228
- 0
src/molasses/parser_primitives.cpp View File

@ -0,0 +1,228 @@
#include <algorithm>
#include <cassert>
#include "molasses/parser_primitives.h"
namespace molasses {
parser_context register_integers(parser_context ctx) {
ctx.types.push_back(std::make_shared<primitive_type>("i8",1));
ctx.types.push_back(std::make_shared<primitive_type>("i16",2));
ctx.types.push_back(std::make_shared<primitive_type>("i32",4));
ctx.types.push_back(std::make_shared<primitive_type>("i64",8));
ctx.types.push_back(std::make_shared<primitive_type>("u8",1));
ctx.types.push_back(std::make_shared<primitive_type>("u16",2));
ctx.types.push_back(std::make_shared<primitive_type>("u32",4));
ctx.types.push_back(std::make_shared<primitive_type>("u64",8));
return ctx;
}
std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
{
auto args = next_op.argument_types();
while(not (args.empty() or current_stack.empty())) {
if(current_stack.back() != args.back()) {
throw TypeInputError();
} else {
args.pop_back();
current_stack.pop_back();
}
}
if(not args.empty()) {
throw ValueMissingError();
}
}
{
auto return_types = next_op.return_types();
std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
}
return current_stack;
}
std::optional<int32_t> try_parse_int32(const std::string& str) {
int32_t value;
auto begin = str.data();
auto end = str.data()+str.size();
auto result = std::from_chars(begin, end, value, 10);
// TODO: Add other bases
if(result.ptr == end) {
return value;
}
return std::nullopt;
}
auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) {
auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
return elem->name() == name;
});
if(it != std::end(container)) {
return *it;
}
return {};
}
std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
return find_ptr_by_name_in_container(types, name);
}
std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
return find_ptr_by_name_in_container(operations, name);
}
bool type_check(
const parser_context& parser_state,
const lexed_output& lexer_state,
const std::vector<symbol>& consumed_stream,
std::vector<std::string> execution_input,
const std::vector<std::string>& execution_output
) {
auto& type_stack = execution_input;
for(const auto& symbol : consumed_stream) {
const auto& symbol_text = lexer_state.dictionary.at(symbol);
if(auto is_int = try_parse_int32(symbol_text); is_int) {
type_stack.emplace_back("i32");
} else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
type_stack = type_stack >> *is_op;
}
}
return type_stack == execution_output;
}
parser_context parse(parser_context ctx, const lexed_output& lexer_data) {
enum op : int {
DO_KW = 1,
SEPARATOR_KW,
PROC_KW,
END_KW
};
lexed_output fake;
fake.dictionary[PROC_KW] = "__PROC__";
fake.dictionary[SEPARATOR_KW] = "__--__";
fake.dictionary[DO_KW] = "__DO__";
fake.dictionary[END_KW] = "__END__";
auto tokens = concatenate(fake, lexer_data);
std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
auto parse_proc = [&](auto it) -> std::pair<typeof(it), std::shared_ptr<procedure_operation>> {
#define CHECK_FOR_UNEXPECTED_STREAM_END \
if(it == tokens.symbols.end()) { \
throw ExpectingTokenError(); \
}
if(*it != PROC_KW) {
throw UnexpectedTokenError();
}
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
std::string name = tokens.dictionary.at(*it);
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
if(it == tokens.symbols.end()) {
throw ExpectingTokenError();
}
// Process arguments list
std::vector<std::string> argument_types;
while(*it != SEPARATOR_KW) {
argument_types.emplace_back(tokens.dictionary.at(*it));
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
}
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
// Process return types list
std::vector<std::string> return_types;
while(*it != DO_KW) {
return_types.emplace_back(tokens.dictionary.at(*it));
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
}
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
// Process return types list
std::vector<symbol> body;
while(*it != END_KW) {
body.emplace_back(*it);
++it;
CHECK_FOR_UNEXPECTED_STREAM_END;
}
++it;
return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
#undef CHECK_FOR_UNEXPECTED_STREAM_END
};
auto [iterator, procedure] = parse_proc(tokens.symbols.begin());
ctx.operations.push_back(procedure);
parsed_procedures.emplace_back(std::move(procedure));
for(auto& proc : parsed_procedures) {
if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
throw ProcedureStackError();
}
}
return ctx;
}
std::vector<std::string> initialize_stack() {
return {
".bss\n",// TODO: make threadlocal
"stack_instruction:",
" .quad 0",
".text\n",
"initialize_callstack:\n",
" movq $9, %rax\n",
" movq $0, %rdi\n",
" movq $8192, %rsi\n",
" movq $3, %rdx\n",
" movq $34, %r10\n",
" movq $-1, %r8\n",
" movq $0, %r9\n",
" syscall\n",
" movq %rax, (stack_instruction)\n",
" retq\n",
};
}
std::vector<std::string> generate_call(std::string target) {
static uint64_t label_count= 0;
return {
"movq return_label_n"+std::to_string(label_count)+", (stack_instruction)\n",
"addq $8, stack_instruction\n",
"jmp "+target+"\n",
"return_label_n"+std::to_string(label_count++)+":"
};
}
std::vector<std::string> procedure_operation::generate(const parser_context& ctx) const {
size_t initial_stack = 0;
size_t final_stack = 0;
for(const auto& elem : argument_types()) {
initial_stack += ctx.lookup_type(elem)->byte_size();
}
for(const auto& elem : return_types()) {
final_stack += ctx.lookup_type(elem)->byte_size();
}
std::vector<std::string> ops;
ops.emplace_back(name()+":\n");
// Return to caller
ops.emplace_back(" addq $-8, stack_instruction\n");
ops.emplace_back(" movq (stack_instruction), %rax\n");
ops.emplace_back(" pushq %rax\n");
ops.emplace_back(" retq\n");
return ops;
}
}

Loading…
Cancel
Save