Ви не можете вибрати більше 25 тем Теми мають розпочинатися з літери або цифри, можуть містити дефіси (-) і не повинні перевищувати 35 символів.

160 рядки
5.5 KiB

#include "molasses/lexer.h"
#include <algorithm>
#include <sstream>
#include <iostream>
namespace molasses {
lexed_output lex(const std::string& file_name, const std::string & source) {
lexed_output output;
std::map<std::string, int> reverse_dictionary;
std::stringstream builder;
int token_counter = 1;
int line = 1;
int column = 0;
enum class state_machine_t {
normal,
string,
string_escape,
string_end,
};
state_machine_t state = state_machine_t::normal;
// Processes the current token into the output if it is not empty
// This should be called upon reaching the end of a token
const auto process_token = [&](const std::string& token) {
if(not token.empty()) {
symbol current_symbol;
if(
auto it = reverse_dictionary.find(token);
it == reverse_dictionary.end()
) {
reverse_dictionary[token] = token_counter;
output.dictionary[token_counter] = token;
current_symbol = {token_counter, file_name, line, column, state == state_machine_t::string_end};
token_counter++;
} else {
current_symbol = {it->second, file_name, line, column, state == state_machine_t::string_end};
}
output.symbols.push_back(current_symbol);
builder = std::stringstream();
}
};
for(auto& character : source) {
column++;
if(state == state_machine_t::string_escape) {
switch(character) {
case 'n': builder << '\n'; break;
case 't': builder << '\t'; break;
case '\\': [[fallthrough]];
default:
builder << character;
}
state = state_machine_t::string;
continue;
}
if(character == '\"') {
if(builder.str().empty() && state == state_machine_t::normal) {
state = state_machine_t::string;
continue;
} else if (state == state_machine_t::string) {
state = state_machine_t::string_end;
continue;
}
} else if(character == '\\' && state == state_machine_t::string) {
state = state_machine_t::string_escape;
continue;
}
if(std::isspace(character)) {
if(state == state_machine_t::normal or state == state_machine_t::string_end) {
process_token(builder.str());
state = state_machine_t::normal;
} else {
builder << character;
}
if(character == '\n') {
line++;
column = 0;
}
} else {
if(state == state_machine_t::string_end) {
std::stringstream quoted;
quoted << "\"" << builder.str() << "\"";
builder.swap(quoted);
state = state_machine_t::normal;
}
builder << character;
}
}
process_token(builder.str()); // process the last token if needed
return output;
}
using conversion_table = std::map<int, int>;
lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs) {
// primitive that flips keys and values of dictionaries
constexpr auto dictionary_reversal = [](auto& destination,const auto& source) {
for(auto& it : source) {
destination.insert_or_assign(it.second, it.first);
}
};
// primitive that merges a dictionary into a reversed one and returns a conversion table of symbols
// from the dictionary to the newly generated reverse dictionary
auto build_reverse_dictionary = [dictionary_reversal] (auto& reverse_dictionary, auto dictionary) -> conversion_table {
// Make the right dictionary into a reverse dictionary
std::map<std::string, int> right_reverse_dictionary;
dictionary_reversal(right_reverse_dictionary, dictionary);
// find the maximum token id in the left dictionary
int max_token = 0;
if(not reverse_dictionary.empty()) {
max_token = std::max_element(
reverse_dictionary.begin(),
reverse_dictionary.end(),
[](const auto &lhs, const auto &rhs) -> bool {
return lhs.second < rhs.second;
}
)->second;
}
// make the conversions and update the reverse dictionary
conversion_table conversions;
for(auto& [key, value] : right_reverse_dictionary) {
if(auto match = reverse_dictionary.find(key); match != reverse_dictionary.end()) {
conversions[value] = match->second;
} else {
max_token+=1;
conversions[value] = max_token;
reverse_dictionary[key] = max_token;
}
}
return conversions;
};
std::map<std::string, int> reverse_dictionary;
dictionary_reversal(reverse_dictionary, lhs.dictionary);
auto conversions = build_reverse_dictionary(reverse_dictionary, rhs.dictionary);
auto symbol_stream = rhs.symbols;
lexed_output output{.symbols = lhs.symbols};
for(auto& old_symbol : symbol_stream) {
//This diagnostic is pretty lousy, but that is what happens when keys are taken by reference
#pragma clang diagnostic push
#pragma ide diagnostic ignored "LocalValueEscapesScope"
old_symbol.id = conversions[old_symbol];
#pragma clang diagnostic pop
}
dictionary_reversal(output.dictionary, reverse_dictionary);
std::copy(symbol_stream.begin(), symbol_stream.end(), std::back_inserter(output.symbols));
return output;
}
}