You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

364 lines
14 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. #include "molasses/parser_primitives.h"
  2. #include "molasses/errors.h"
  3. #include "molasses/generator_primitives.h"
  4. #include <algorithm>
  5. #include <iostream>
  6. #include <span>
  7. namespace molasses {
  8. std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
  9. {
  10. auto args = next_op.argument_types();
  11. while(not (args.empty() or current_stack.empty())) {
  12. if(current_stack.back() != args.back()) {
  13. throw type_input_error();
  14. } else {
  15. args.pop_back();
  16. current_stack.pop_back();
  17. }
  18. }
  19. if(not args.empty()) {
  20. throw value_missing_error();
  21. }
  22. }
  23. {
  24. auto return_types = next_op.return_types();
  25. std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
  26. }
  27. return current_stack;
  28. }
  29. auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) {
  30. if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") {
  31. return std::from_chars(begin+2, end, value, 16);
  32. } else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") {
  33. return std::from_chars(begin+1, end, value, 8);
  34. } else {
  35. return std::from_chars(begin, end, value, 10);
  36. }
  37. }
  38. std::optional<int32_t> try_parse_int32(const std::string& str) {
  39. int32_t value;
  40. auto begin = str.data();
  41. auto end = str.data()+str.size();
  42. auto result = conditional_begin_int_parse(str, value, begin, end);
  43. if(result.ptr == end) {
  44. return value;
  45. } else {
  46. if(std::string_view{result.ptr, end} == "_i32") {
  47. return value;
  48. }
  49. }
  50. return std::nullopt;
  51. }
  52. std::optional<int64_t> try_parse_int64(const std::string& str) {
  53. int64_t value;
  54. auto begin = str.data();
  55. auto end = str.data()+str.size();
  56. auto result = conditional_begin_int_parse(str, value, begin, end);
  57. if(result.ptr == end) {
  58. return std::nullopt;
  59. } else {
  60. if(std::string_view{result.ptr, end} == "_i64") {
  61. return value;
  62. }
  63. }
  64. return std::nullopt;
  65. }
  66. auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t<decltype(*std::begin(container))> {
  67. auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
  68. return elem->name() == name;
  69. });
  70. if(it != std::end(container)) {
  71. return *it;
  72. }
  73. return nullptr;
  74. }
  75. std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
  76. return find_ptr_by_name_in_container(types, name);
  77. }
  78. std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
  79. return find_ptr_by_name_in_container(operations, name);
  80. }
  81. bool type_check(
  82. const parser_context& parser_state,
  83. const lexed_output& lexer_state,
  84. const std::vector<symbol>& consumed_stream,
  85. std::vector<std::string> execution_input,
  86. const std::vector<std::string>& execution_output,
  87. const std::vector<std::pair<size_t, size_t>>& sub_bodies
  88. ) {
  89. auto& type_stack = execution_input;
  90. std::map<size_t, std::vector<std::string>> effective_snapshots;
  91. size_t idx = 0;
  92. for(auto it = consumed_stream.begin(); it != consumed_stream.end(); ++it, ++idx) {
  93. const auto& symbol = *it;
  94. const auto& symbol_text = lexer_state.dictionary.at(symbol);
  95. // Skip GOTOs and LABELs
  96. if(auto ahead = it; ++ahead != consumed_stream.end() and (lexer_state.dictionary.at(*ahead) == "__GOTO__" or lexer_state.dictionary.at(*ahead) == "__LABEL__")) {
  97. effective_snapshots[idx] = type_stack;
  98. it = ahead;
  99. ++idx;
  100. } else if(symbol.is_string) {
  101. type_stack.emplace_back("u8 ptr");
  102. } else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
  103. type_stack.emplace_back("i32");
  104. } else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
  105. type_stack.emplace_back("i64");
  106. } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
  107. type_stack = type_stack >> *is_op;
  108. }
  109. }
  110. if(type_stack != execution_output) return false;
  111. for(auto [start, end] : sub_bodies) {
  112. if(effective_snapshots[start] != effective_snapshots[end]) {
  113. return false;
  114. }
  115. }
  116. return true;
  117. }
  118. generate_context parse(parser_context ctx, const lexed_output& lexer_data) {
  119. enum op : int {
  120. DO_KW = 1,
  121. SEPARATOR_KW,
  122. PROC_KW,
  123. END_KW,
  124. LABEL_KW,
  125. GOTO_KW
  126. };
  127. lexed_output fake;
  128. fake.dictionary[PROC_KW] = "__PROC__";
  129. fake.dictionary[SEPARATOR_KW] = "__--__";
  130. fake.dictionary[DO_KW] = "__DO__";
  131. fake.dictionary[END_KW] = "__END__";
  132. fake.dictionary[LABEL_KW] = "__LABEL__";
  133. fake.dictionary[GOTO_KW] = "__GOTO__";
  134. auto tokens = concatenate(fake, lexer_data);
  135. std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
  136. auto register_pointer_type = [&](std::string full_ptr_type_name) -> void {
  137. if(auto type = ctx.lookup_type(full_ptr_type_name); !type) {
  138. ctx.types.push_back(std::make_shared<primitive_type>(std::move(full_ptr_type_name), architecture_ptr_size));
  139. }
  140. };
  141. auto compact_type_modifiers = [&](const std::vector<std::string>& type_info) -> std::vector<std::string> {
  142. std::vector<std::string> ret_val;
  143. for(const auto& elem : type_info) {
  144. if(elem == "ptr") {
  145. if(ret_val.empty()) throw type_expected_with_modifier_error();
  146. ret_val.back() += " ptr";
  147. register_pointer_type(ret_val.back());
  148. } else {
  149. ret_val.push_back(elem);
  150. }
  151. }
  152. return ret_val;
  153. };
  154. auto parse_proc = [&](auto it) -> std::pair<decltype(it), std::shared_ptr<procedure_operation>> {
  155. #define check_for_unexpected_stream_end(expected, context) \
  156. do{if(it == tokens.symbols.end()) { \
  157. throw expecting_token_error(expected, context); \
  158. }}while(false)
  159. decltype(it) last_valid;
  160. if(*it != PROC_KW) {
  161. throw unexpected_token_error(*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]);
  162. }
  163. last_valid = it;
  164. ++it;
  165. check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  166. std::string name = tokens.dictionary.at(*it);
  167. auto& name_symbol = *it;
  168. last_valid = it;
  169. ++it;
  170. check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  171. // Process arguments list
  172. std::vector<std::string> argument_types;
  173. while(*it != SEPARATOR_KW) {
  174. argument_types.emplace_back(tokens.dictionary.at(*it));
  175. last_valid = it;
  176. ++it;
  177. check_for_unexpected_stream_end("Procedure-Argument-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  178. }
  179. last_valid = it;
  180. ++it;
  181. check_for_unexpected_stream_end("Procedure-Argument-List to be followed by a return list or a __DO__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  182. argument_types = compact_type_modifiers(argument_types);
  183. // Process return types list
  184. std::vector<std::string> return_types;
  185. while(*it != DO_KW) {
  186. return_types.emplace_back(tokens.dictionary.at(*it));
  187. last_valid = it;
  188. ++it;
  189. check_for_unexpected_stream_end("Procedure-Return-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  190. }
  191. last_valid = it;
  192. ++it;
  193. check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  194. return_types = compact_type_modifiers(return_types);
  195. // Process body
  196. std::vector<symbol> body;
  197. std::vector<std::pair<size_t, size_t>> sub_bodies;
  198. std::map<std::string, size_t> found_labels;
  199. std::map<std::string, size_t> found_gotos;
  200. while(*it != END_KW) {
  201. if(auto ahead = it; ++ahead != tokens.symbols.end() and (*ahead == GOTO_KW or *ahead == LABEL_KW)) {
  202. if(*ahead == GOTO_KW) {
  203. found_gotos[tokens.dictionary[*it]] = body.size();
  204. } else if(*ahead == LABEL_KW) {
  205. found_labels[tokens.dictionary[*it]] = body.size();
  206. // TODO: Handle duplicate labels
  207. }
  208. body.emplace_back(*it);
  209. body.emplace_back(*ahead);
  210. last_valid = ahead;
  211. it = ++ahead;
  212. } else {
  213. body.emplace_back(*it);
  214. last_valid = it;
  215. ++it;
  216. }
  217. check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  218. }
  219. last_valid = it;
  220. ++it;
  221. for(auto& [dest, index] : found_gotos) {
  222. if(not found_labels.contains(dest)) {
  223. throw orphan_goto_error(body[index], dest);
  224. }
  225. sub_bodies.emplace_back(std::min(index, found_labels[dest]), std::max(index, found_labels[dest]));
  226. }
  227. return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body, sub_bodies));
  228. #undef check_for_unexpected_stream_end
  229. };
  230. auto progress = tokens.symbols.begin();
  231. do {
  232. auto [iterator, procedure] = parse_proc(progress);
  233. ctx.operations.push_back(procedure);
  234. parsed_procedures.emplace_back(std::move(procedure));
  235. progress = iterator;
  236. } while (progress != tokens.symbols.end());
  237. for(auto& proc : parsed_procedures) {
  238. if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets, proc->_simple_sub_bodies)) {
  239. throw procedure_stack_error();
  240. }
  241. }
  242. return {
  243. tokens,
  244. ctx,
  245. parsed_procedures
  246. };
  247. }
  248. std::vector<std::string> procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const {
  249. std::vector<std::string> ops = generate_label(name());
  250. for(auto&& instruction : generate_enter()) {
  251. ops.push_back(instruction);
  252. }
  253. for(auto it = _body.begin(); it != _body.end(); ++it) {
  254. auto elem = *it;
  255. auto token = lexer_data.dictionary.at(elem);
  256. if(auto ahead = it; ++ahead != _body.end() and (lexer_data.dictionary.at(*ahead) == "__GOTO__" or lexer_data.dictionary.at(*ahead) == "__LABEL__")) {
  257. if(lexer_data.dictionary.at(*ahead) == "__GOTO__") {
  258. for(auto&& instruction : generate_goto(name() + " in " + token)) {
  259. ops.push_back(instruction);
  260. }
  261. } else if(lexer_data.dictionary.at(*ahead) == "__LABEL__") {
  262. for(auto&& instruction : generate_label(name() + " in " + token)) {
  263. ops.push_back(instruction);
  264. }
  265. }
  266. it = ahead;
  267. } else if(elem.is_string) {
  268. for(auto&& instruction : generate_push_string_ptr(elem)) {
  269. ops.push_back(instruction);
  270. }
  271. } else if(auto result = try_parse_int32(token); result) {
  272. for(auto&& instruction : generate_push_int32(result.value())) {
  273. ops.push_back(instruction);
  274. }
  275. } else if(auto result = try_parse_int64(token); result) {
  276. for(auto&& instruction : generate_push_int64(result.value())) {
  277. ops.push_back(instruction);
  278. }
  279. } else if(auto op = ctx.lookup_operation(token); op) {
  280. for(auto&& instruction : op->emit(ctx)) {
  281. ops.push_back(instruction);
  282. }
  283. } else {
  284. throw unknown_token_error(elem);
  285. }
  286. }
  287. for(auto&& instruction : generate_return()) {
  288. ops.push_back(instruction);
  289. }
  290. return ops;
  291. }
  292. std::vector<std::string> procedure_operation::emit(const parser_context& ctx) const {
  293. return generate_call(name());
  294. }
  295. std::vector<std::string> generate(const generate_context& ctx) {
  296. std::vector<std::string> generated;
  297. for(const auto& instr : initialize_stack()) {
  298. generated.push_back(instr);
  299. }
  300. for(const auto& proc : ctx.procedures) {
  301. for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) {
  302. generated.push_back(instr);
  303. }
  304. }
  305. std::set<int> done;
  306. for(const auto& value : ctx.lexer.symbols) {
  307. if(value.is_string && not done.contains(value.id)) {
  308. for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) {
  309. generated.push_back(instr);
  310. }
  311. done.insert(value.id);
  312. }
  313. }
  314. return generated;
  315. }
  316. void procedure_operation::execute(const generate_context& ctx, interpreter_stack& stack) const {
  317. }
  318. }