You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

109 lines
3.5 KiB

1 year ago
  1. #include "molasses/lexer.h"
  2. #include <algorithm>
  3. #include <sstream>
  4. #include <iostream>
  5. namespace molasses {
  6. lexed_output lex(const std::string & source) {
  7. lexed_output output;
  8. std::map<std::string, int> reverse_dictionary;
  9. std::stringstream builder;
  10. int token_counter = 1;
  11. // Processes the current token into the output if it is not empty
  12. // This should be called upon reaching the end of a token
  13. const auto process_token = [&](const std::string& token) {
  14. if(not token.empty()) {
  15. symbol current_symbol;
  16. if(
  17. auto it = reverse_dictionary.find(token);
  18. it == reverse_dictionary.end()
  19. ) {
  20. reverse_dictionary[token] = token_counter;
  21. output.dictionary[token_counter] = token;
  22. current_symbol = token_counter;
  23. token_counter++;
  24. } else {
  25. current_symbol = it->second;
  26. }
  27. output.symbols.push_back(current_symbol);
  28. builder = std::stringstream();
  29. }
  30. };
  31. for(auto& character : source) {
  32. if(std::isspace(character)) {
  33. process_token(builder.str());
  34. } else {
  35. builder << character;
  36. }
  37. }
  38. process_token(builder.str()); // process the last token if needed
  39. return output;
  40. }
  41. using conversion_table = std::map<int, int>;
  42. lexed_output concatenate(const lexed_output& lhs, const lexed_output& rhs) {
  43. // primitive that flips keys and values of dictionaries
  44. constexpr auto dictionary_reversal = [](auto& destination,const auto& source) {
  45. for(auto& it : source) {
  46. destination.insert_or_assign(it.second, it.first);
  47. }
  48. };
  49. // primitive that merges a dictionary into a reversed one and returns a conversion table of symbols
  50. // from the dictionary to the newly generated reverse dictionary
  51. auto build_reverse_dictionary = [dictionary_reversal] (auto& reverse_dictionary, auto dictionary) -> conversion_table {
  52. // Make the right dictionary into a reverse dictionary
  53. std::map<std::string, int> right_reverse_dictionary;
  54. dictionary_reversal(right_reverse_dictionary, dictionary);
  55. // find the maximum token id in the left dictionary
  56. int max_token = 0;
  57. if(not reverse_dictionary.empty()) {
  58. max_token = std::max_element(
  59. reverse_dictionary.begin(),
  60. reverse_dictionary.end(),
  61. [](const auto &lhs, const auto &rhs) -> bool {
  62. return lhs.second < rhs.second;
  63. }
  64. )->second;
  65. }
  66. // make the conversions and update the reverse dictionary
  67. conversion_table conversions;
  68. for(auto& [key, value] : right_reverse_dictionary) {
  69. if(auto match = reverse_dictionary.find(key); match != reverse_dictionary.end()) {
  70. conversions[value] = match->second;
  71. } else {
  72. max_token+=1;
  73. conversions[value] = max_token;
  74. reverse_dictionary[key] = max_token;
  75. }
  76. }
  77. return conversions;
  78. };
  79. std::map<std::string, int> reverse_dictionary;
  80. dictionary_reversal(reverse_dictionary, lhs.dictionary);
  81. auto conversions = build_reverse_dictionary(reverse_dictionary, rhs.dictionary);
  82. auto symbol_stream = rhs.symbols;
  83. lexed_output output{.symbols = lhs.symbols};
  84. for(auto& old_symbol : symbol_stream) {
  85. //This diagnostic is pretty lousy, but that is what happens when keys are taken by reference
  86. #pragma clang diagnostic push
  87. #pragma ide diagnostic ignored "LocalValueEscapesScope"
  88. old_symbol = conversions[old_symbol];
  89. #pragma clang diagnostic pop
  90. }
  91. dictionary_reversal(output.dictionary, reverse_dictionary);
  92. std::copy(symbol_stream.begin(), symbol_stream.end(), std::back_inserter(output.symbols));
  93. return output;
  94. }
  95. }