#include "molasses/lexer.h" #include #include #include namespace molasses { lexed_output lex(const std::string& file_name, const std::string & source) { lexed_output output; std::map reverse_dictionary; std::stringstream builder; int token_counter = 1; int line = 1; int column = 0; enum class state_machine_t { normal, string, string_escape, string_end, }; state_machine_t state = state_machine_t::normal; // Processes the current token into the output if it is not empty // This should be called upon reaching the end of a token const auto process_token = [&](const std::string& token) { if(not token.empty()) { symbol current_symbol; if( auto it = reverse_dictionary.find(token); it == reverse_dictionary.end() ) { reverse_dictionary[token] = token_counter; output.dictionary[token_counter] = token; current_symbol = {token_counter, file_name, line, column, state == state_machine_t::string_end}; token_counter++; } else { current_symbol = {it->second, file_name, line, column, state == state_machine_t::string_end}; } output.symbols.push_back(current_symbol); builder = std::stringstream(); } }; for(auto& character : source) { column++; if(state == state_machine_t::string_escape) { switch(character) { case 'n': builder << '\n'; break; case 't': builder << '\t'; break; case '\\': [[fallthrough]]; default: builder << character; } state = state_machine_t::string; continue; } if(character == '\"') { if(builder.str().empty() && state == state_machine_t::normal) { state = state_machine_t::string; continue; } else if (state == state_machine_t::string) { state = state_machine_t::string_end; continue; } } else if(character == '\\' && state == state_machine_t::string) { state = state_machine_t::string_escape; continue; } if(std::isspace(character)) { if(state == state_machine_t::normal or state == state_machine_t::string_end) { process_token(builder.str()); state = state_machine_t::normal; } else { builder << character; } if(character == '\n') { line++; column = 0; } } else { if(state == state_machine_t::string_end) { std::stringstream quoted; quoted << "\"" << builder.str() << "\""; builder.swap(quoted); state = state_machine_t::normal; } builder << character; } } process_token(builder.str()); // process the last token if needed return output; } using conversion_table = std::map; lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs) { // primitive that flips keys and values of dictionaries constexpr auto dictionary_reversal = [](auto& destination,const auto& source) { for(auto& it : source) { destination.insert_or_assign(it.second, it.first); } }; // primitive that merges a dictionary into a reversed one and returns a conversion table of symbols // from the dictionary to the newly generated reverse dictionary auto build_reverse_dictionary = [dictionary_reversal] (auto& reverse_dictionary, auto dictionary) -> conversion_table { // Make the right dictionary into a reverse dictionary std::map right_reverse_dictionary; dictionary_reversal(right_reverse_dictionary, dictionary); // find the maximum token id in the left dictionary int max_token = 0; if(not reverse_dictionary.empty()) { max_token = std::max_element( reverse_dictionary.begin(), reverse_dictionary.end(), [](const auto &lhs, const auto &rhs) -> bool { return lhs.second < rhs.second; } )->second; } // make the conversions and update the reverse dictionary conversion_table conversions; for(auto& [key, value] : right_reverse_dictionary) { if(auto match = reverse_dictionary.find(key); match != reverse_dictionary.end()) { conversions[value] = match->second; } else { max_token+=1; conversions[value] = max_token; reverse_dictionary[key] = max_token; } } return conversions; }; std::map reverse_dictionary; dictionary_reversal(reverse_dictionary, lhs.dictionary); auto conversions = build_reverse_dictionary(reverse_dictionary, rhs.dictionary); auto symbol_stream = rhs.symbols; lexed_output output{.symbols = lhs.symbols}; for(auto& old_symbol : symbol_stream) { //This diagnostic is pretty lousy, but that is what happens when keys are taken by reference #pragma clang diagnostic push #pragma ide diagnostic ignored "LocalValueEscapesScope" old_symbol.id = conversions[old_symbol]; #pragma clang diagnostic pop } dictionary_reversal(output.dictionary, reverse_dictionary); std::copy(symbol_stream.begin(), symbol_stream.end(), std::back_inserter(output.symbols)); return output; } }