You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 

110 lines
3.5 KiB

#include "molasses/lexer.h"
#include <algorithm>
#include <sstream>
#include <iostream>
namespace molasses {
lexed_output lex(const std::string & source) {
lexed_output output;
std::map<std::string, int> reverse_dictionary;
std::stringstream builder;
int token_counter = 1;
// Processes the current token into the output if it is not empty
// This should be called upon reaching the end of a token
const auto process_token = [&](const std::string& token) {
if(not token.empty()) {
symbol current_symbol;
if(
auto it = reverse_dictionary.find(token);
it == reverse_dictionary.end()
) {
reverse_dictionary[token] = token_counter;
output.dictionary[token_counter] = token;
current_symbol = token_counter;
token_counter++;
} else {
current_symbol = it->second;
}
output.symbols.push_back(current_symbol);
builder = std::stringstream();
}
};
for(auto& character : source) {
if(std::isspace(character)) {
process_token(builder.str());
} else {
builder << character;
}
}
process_token(builder.str()); // process the last token if needed
return output;
}
using conversion_table = std::map<int, int>;
lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs) {
// primitive that flips keys and values of dictionaries
constexpr auto dictionary_reversal = [](auto& destination,const auto& source) {
for(auto& it : source) {
destination.insert_or_assign(it.second, it.first);
}
};
// primitive that merges a dictionary into a reversed one and returns a conversion table of symbols
// from the dictionary to the newly generated reverse dictionary
auto build_reverse_dictionary = [dictionary_reversal] (auto& reverse_dictionary, auto dictionary) -> conversion_table {
// Make the right dictionary into a reverse dictionary
std::map<std::string, int> right_reverse_dictionary;
dictionary_reversal(right_reverse_dictionary, dictionary);
// find the maximum token id in the left dictionary
int max_token = 0;
if(not reverse_dictionary.empty()) {
max_token = std::max_element(
reverse_dictionary.begin(),
reverse_dictionary.end(),
[](const auto &lhs, const auto &rhs) -> bool {
return lhs.second < rhs.second;
}
)->second;
}
// make the conversions and update the reverse dictionary
conversion_table conversions;
for(auto& [key, value] : right_reverse_dictionary) {
if(auto match = reverse_dictionary.find(key); match != reverse_dictionary.end()) {
conversions[value] = match->second;
} else {
max_token+=1;
conversions[value] = max_token;
reverse_dictionary[key] = max_token;
}
}
return conversions;
};
std::map<std::string, int> reverse_dictionary;
dictionary_reversal(reverse_dictionary, lhs.dictionary);
auto conversions = build_reverse_dictionary(reverse_dictionary, rhs.dictionary);
auto symbol_stream = rhs.symbols;
lexed_output output{.symbols = lhs.symbols};
for(auto& old_symbol : symbol_stream) {
//This diagnostic is pretty lousy, but that is what happens when keys are taken by reference
#pragma clang diagnostic push
#pragma ide diagnostic ignored "LocalValueEscapesScope"
old_symbol = conversions[old_symbol];
#pragma clang diagnostic pop
}
dictionary_reversal(output.dictionary, reverse_dictionary);
std::copy(symbol_stream.begin(), symbol_stream.end(), std::back_inserter(output.symbols));
return output;
}
}