You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

290 lines
10 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. #include <algorithm>
  2. #include <iostream>
  3. #include "molasses/parser_primitives.h"
  4. #include "molasses/generator_primitives.h"
  5. namespace molasses {
  6. std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
  7. {
  8. auto args = next_op.argument_types();
  9. while(not (args.empty() or current_stack.empty())) {
  10. if(current_stack.back() != args.back()) {
  11. throw type_input_error();
  12. } else {
  13. args.pop_back();
  14. current_stack.pop_back();
  15. }
  16. }
  17. if(not args.empty()) {
  18. throw value_missing_error();
  19. }
  20. }
  21. {
  22. auto return_types = next_op.return_types();
  23. std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
  24. }
  25. return current_stack;
  26. }
  27. auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) {
  28. if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") {
  29. return std::from_chars(begin+2, end, value, 16);
  30. } else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") {
  31. return std::from_chars(begin+1, end, value, 8);
  32. } else {
  33. return std::from_chars(begin, end, value, 10);
  34. }
  35. }
  36. std::optional<int32_t> try_parse_int32(const std::string& str) {
  37. int32_t value;
  38. auto begin = str.data();
  39. auto end = str.data()+str.size();
  40. auto result = conditional_begin_int_parse(str, value, begin, end);
  41. if(result.ptr == end) {
  42. return value;
  43. } else {
  44. if(std::string_view{result.ptr, end} == "_i32") {
  45. return value;
  46. }
  47. }
  48. return std::nullopt;
  49. }
  50. std::optional<int64_t> try_parse_int64(const std::string& str) {
  51. int64_t value;
  52. auto begin = str.data();
  53. auto end = str.data()+str.size();
  54. auto result = conditional_begin_int_parse(str, value, begin, end);
  55. if(result.ptr == end) {
  56. return std::nullopt;
  57. } else {
  58. if(std::string_view{result.ptr, end} == "_i64") {
  59. return value;
  60. }
  61. }
  62. return std::nullopt;
  63. }
  64. auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t<decltype(*std::begin(container))> {
  65. auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
  66. return elem->name() == name;
  67. });
  68. if(it != std::end(container)) {
  69. return *it;
  70. }
  71. return nullptr;
  72. }
  73. std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
  74. return find_ptr_by_name_in_container(types, name);
  75. }
  76. std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
  77. return find_ptr_by_name_in_container(operations, name);
  78. }
  79. bool type_check(
  80. const parser_context& parser_state,
  81. const lexed_output& lexer_state,
  82. const std::vector<symbol>& consumed_stream,
  83. std::vector<std::string> execution_input,
  84. const std::vector<std::string>& execution_output
  85. ) {
  86. auto& type_stack = execution_input;
  87. for(const auto& symbol : consumed_stream) {
  88. const auto& symbol_text = lexer_state.dictionary.at(symbol);
  89. if(symbol.is_string) {
  90. type_stack.emplace_back("u8 ptr");
  91. } else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
  92. type_stack.emplace_back("i32");
  93. } else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
  94. type_stack.emplace_back("i64");
  95. } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
  96. type_stack = type_stack >> *is_op;
  97. }
  98. }
  99. return type_stack == execution_output;
  100. }
  101. generate_context parse(parser_context ctx, const lexed_output& lexer_data) {
  102. enum op : int {
  103. DO_KW = 1,
  104. SEPARATOR_KW,
  105. PROC_KW,
  106. END_KW
  107. };
  108. lexed_output fake;
  109. fake.dictionary[PROC_KW] = "__PROC__";
  110. fake.dictionary[SEPARATOR_KW] = "__--__";
  111. fake.dictionary[DO_KW] = "__DO__";
  112. fake.dictionary[END_KW] = "__END__";
  113. auto tokens = concatenate(fake, lexer_data);
  114. std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
  115. auto register_pointer_type = [&](std::string full_ptr_type_name) -> void {
  116. if(auto type = ctx.lookup_type(full_ptr_type_name); !type) {
  117. ctx.types.push_back(std::make_shared<primitive_type>(std::move(full_ptr_type_name), architecture_ptr_size));
  118. }
  119. };
  120. auto compact_type_modifiers = [&](const std::vector<std::string>& type_info) -> std::vector<std::string> {
  121. std::vector<std::string> ret_val;
  122. for(const auto& elem : type_info) {
  123. if(elem == "ptr") {
  124. if(ret_val.empty()) throw type_expected_with_modifier_error();
  125. ret_val.back() += " ptr";
  126. register_pointer_type(ret_val.back());
  127. } else {
  128. ret_val.push_back(elem);
  129. }
  130. }
  131. return ret_val;
  132. };
  133. auto parse_proc = [&](auto it) -> std::pair<decltype(it), std::shared_ptr<procedure_operation>> {
  134. #define check_for_unexpected_stream_end(expected, context) \
  135. do{if(it == tokens.symbols.end()) { \
  136. throw expecting_token_error(expected, context); \
  137. }}while(false)
  138. if(*it != PROC_KW) {
  139. throw unexpected_token_error(*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]);
  140. }
  141. ++it;
  142. check_for_unexpected_stream_end(tokens.dictionary[PROC_KW], details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column));
  143. std::string name = tokens.dictionary.at(*it);
  144. auto& name_symbol = *it;
  145. ++it;
  146. check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",it->file_name, ":", it->line, ":", it->column));
  147. // Process arguments list
  148. std::vector<std::string> argument_types;
  149. while(*it != SEPARATOR_KW) {
  150. argument_types.emplace_back(tokens.dictionary.at(*it));
  151. ++it;
  152. check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
  153. }
  154. ++it;
  155. check_for_unexpected_stream_end("Procedure-Argument-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
  156. argument_types = compact_type_modifiers(argument_types);
  157. // Process return types list
  158. std::vector<std::string> return_types;
  159. while(*it != DO_KW) {
  160. return_types.emplace_back(tokens.dictionary.at(*it));
  161. ++it;
  162. check_for_unexpected_stream_end(tokens.dictionary[DO_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
  163. }
  164. ++it;
  165. check_for_unexpected_stream_end("Procedure-Return-List", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
  166. return_types = compact_type_modifiers(return_types);
  167. // Process return types list
  168. std::vector<symbol> body;
  169. while(*it != END_KW) {
  170. body.emplace_back(*it);
  171. ++it;
  172. check_for_unexpected_stream_end(tokens.dictionary[END_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",it->file_name, ":", it->line, ":", it->column));
  173. }
  174. ++it;
  175. return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
  176. #undef check_for_unexpected_stream_end
  177. };
  178. auto progress = tokens.symbols.begin();
  179. do {
  180. auto [iterator, procedure] = parse_proc(progress);
  181. ctx.operations.push_back(procedure);
  182. parsed_procedures.emplace_back(std::move(procedure));
  183. progress = iterator;
  184. } while (progress != tokens.symbols.end());
  185. for(auto& proc : parsed_procedures) {
  186. if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
  187. throw procedure_stack_error();
  188. }
  189. }
  190. return {
  191. tokens,
  192. ctx,
  193. parsed_procedures
  194. };
  195. }
  196. std::vector<std::string> procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const {
  197. std::vector<std::string> ops = generate_label(name());
  198. for(auto&& instruction : generate_enter()) {
  199. ops.push_back(instruction);
  200. }
  201. for(auto elem : _body) {
  202. auto token = lexer_data.dictionary.at(elem);
  203. if(elem.is_string) {
  204. for(auto&& instruction : generate_push_string_ptr(elem)) {
  205. ops.push_back(instruction);
  206. }
  207. } else if(auto result = try_parse_int32(token); result) {
  208. for(auto&& instruction : generate_push_int32(result.value())) {
  209. ops.push_back(instruction);
  210. }
  211. } else if(auto result = try_parse_int64(token); result) {
  212. for(auto&& instruction : generate_push_int64(result.value())) {
  213. ops.push_back(instruction);
  214. }
  215. } else if(auto op = ctx.lookup_operation(token); op) {
  216. for(auto&& instruction : op->emit(ctx)) {
  217. ops.push_back(instruction);
  218. }
  219. } else {
  220. throw unknown_token_error(elem);
  221. }
  222. }
  223. for(auto&& instruction : generate_return()) {
  224. ops.push_back(instruction);
  225. }
  226. return ops;
  227. }
  228. std::vector<std::string> procedure_operation::emit(const parser_context& ctx) const {
  229. return generate_call(name());
  230. }
  231. std::vector<std::string> generate(const generate_context& ctx) {
  232. std::vector<std::string> generated;
  233. for(const auto& instr : initialize_stack()) {
  234. generated.push_back(instr);
  235. }
  236. for(const auto& proc : ctx.procedures) {
  237. for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) {
  238. generated.push_back(instr);
  239. }
  240. }
  241. std::set<int> done;
  242. for(const auto& value : ctx.lexer.symbols) {
  243. if(value.is_string && not done.contains(value.id)) {
  244. for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) {
  245. generated.push_back(instr);
  246. }
  247. done.insert(value.id);
  248. }
  249. }
  250. return generated;
  251. }
  252. }