You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

306 lines
11 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. #include <algorithm>
  2. #include <iostream>
  3. #include "molasses/parser_primitives.h"
  4. #include "molasses/generator_primitives.h"
  5. #include "molasses/errors.h"
  6. namespace molasses {
  7. std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
  8. {
  9. auto args = next_op.argument_types();
  10. while(not (args.empty() or current_stack.empty())) {
  11. if(current_stack.back() != args.back()) {
  12. throw type_input_error();
  13. } else {
  14. args.pop_back();
  15. current_stack.pop_back();
  16. }
  17. }
  18. if(not args.empty()) {
  19. throw value_missing_error();
  20. }
  21. }
  22. {
  23. auto return_types = next_op.return_types();
  24. std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
  25. }
  26. return current_stack;
  27. }
  28. auto conditional_begin_int_parse(const auto& str, auto& value, auto& begin, auto& end) {
  29. if(str.size() > 2 and std::string_view{begin, begin + 2} == "0x") {
  30. return std::from_chars(begin+2, end, value, 16);
  31. } else if (str.size() > 1 and std::string_view{begin, begin + 1} == "0") {
  32. return std::from_chars(begin+1, end, value, 8);
  33. } else {
  34. return std::from_chars(begin, end, value, 10);
  35. }
  36. }
  37. std::optional<int32_t> try_parse_int32(const std::string& str) {
  38. int32_t value;
  39. auto begin = str.data();
  40. auto end = str.data()+str.size();
  41. auto result = conditional_begin_int_parse(str, value, begin, end);
  42. if(result.ptr == end) {
  43. return value;
  44. } else {
  45. if(std::string_view{result.ptr, end} == "_i32") {
  46. return value;
  47. }
  48. }
  49. return std::nullopt;
  50. }
  51. std::optional<int64_t> try_parse_int64(const std::string& str) {
  52. int64_t value;
  53. auto begin = str.data();
  54. auto end = str.data()+str.size();
  55. auto result = conditional_begin_int_parse(str, value, begin, end);
  56. if(result.ptr == end) {
  57. return std::nullopt;
  58. } else {
  59. if(std::string_view{result.ptr, end} == "_i64") {
  60. return value;
  61. }
  62. }
  63. return std::nullopt;
  64. }
  65. auto find_ptr_by_name_in_container(auto container, const auto& name) -> std::remove_cvref_t<decltype(*std::begin(container))> {
  66. auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
  67. return elem->name() == name;
  68. });
  69. if(it != std::end(container)) {
  70. return *it;
  71. }
  72. return nullptr;
  73. }
  74. std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
  75. return find_ptr_by_name_in_container(types, name);
  76. }
  77. std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
  78. return find_ptr_by_name_in_container(operations, name);
  79. }
  80. bool type_check(
  81. const parser_context& parser_state,
  82. const lexed_output& lexer_state,
  83. const std::vector<symbol>& consumed_stream,
  84. std::vector<std::string> execution_input,
  85. const std::vector<std::string>& execution_output
  86. ) {
  87. auto& type_stack = execution_input;
  88. for(const auto& symbol : consumed_stream) {
  89. const auto& symbol_text = lexer_state.dictionary.at(symbol);
  90. if(symbol.is_string) {
  91. type_stack.emplace_back("u8 ptr");
  92. } else if(auto is_int32 = try_parse_int32(symbol_text); is_int32) {
  93. type_stack.emplace_back("i32");
  94. } else if(auto is_int64 = try_parse_int64(symbol_text); is_int64) {
  95. type_stack.emplace_back("i64");
  96. } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
  97. type_stack = type_stack >> *is_op;
  98. }
  99. }
  100. return type_stack == execution_output;
  101. }
  102. generate_context parse(parser_context ctx, const lexed_output& lexer_data) {
  103. enum op : int {
  104. DO_KW = 1,
  105. SEPARATOR_KW,
  106. PROC_KW,
  107. END_KW
  108. };
  109. lexed_output fake;
  110. fake.dictionary[PROC_KW] = "__PROC__";
  111. fake.dictionary[SEPARATOR_KW] = "__--__";
  112. fake.dictionary[DO_KW] = "__DO__";
  113. fake.dictionary[END_KW] = "__END__";
  114. auto tokens = concatenate(fake, lexer_data);
  115. std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
  116. auto register_pointer_type = [&](std::string full_ptr_type_name) -> void {
  117. if(auto type = ctx.lookup_type(full_ptr_type_name); !type) {
  118. ctx.types.push_back(std::make_shared<primitive_type>(std::move(full_ptr_type_name), architecture_ptr_size));
  119. }
  120. };
  121. auto compact_type_modifiers = [&](const std::vector<std::string>& type_info) -> std::vector<std::string> {
  122. std::vector<std::string> ret_val;
  123. for(const auto& elem : type_info) {
  124. if(elem == "ptr") {
  125. if(ret_val.empty()) throw type_expected_with_modifier_error();
  126. ret_val.back() += " ptr";
  127. register_pointer_type(ret_val.back());
  128. } else {
  129. ret_val.push_back(elem);
  130. }
  131. }
  132. return ret_val;
  133. };
  134. auto parse_proc = [&](auto it) -> std::pair<decltype(it), std::shared_ptr<procedure_operation>> {
  135. #define check_for_unexpected_stream_end(expected, context) \
  136. do{if(it == tokens.symbols.end()) { \
  137. throw expecting_token_error(expected, context); \
  138. }}while(false)
  139. decltype(it) last_valid;
  140. if(*it != PROC_KW) {
  141. throw unexpected_token_error(*it, tokens.dictionary[*it],tokens.dictionary[PROC_KW]);
  142. }
  143. last_valid = it;
  144. ++it;
  145. check_for_unexpected_stream_end("Procedure-Name", details::concatenate_builder("In top level, file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  146. std::string name = tokens.dictionary.at(*it);
  147. auto& name_symbol = *it;
  148. last_valid = it;
  149. ++it;
  150. check_for_unexpected_stream_end(tokens.dictionary[SEPARATOR_KW], details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  151. // Process arguments list
  152. std::vector<std::string> argument_types;
  153. while(*it != SEPARATOR_KW) {
  154. argument_types.emplace_back(tokens.dictionary.at(*it));
  155. last_valid = it;
  156. ++it;
  157. check_for_unexpected_stream_end("Procedure-Argument-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  158. }
  159. last_valid = it;
  160. ++it;
  161. check_for_unexpected_stream_end("Procedure-Argument-List to be followed by a return list or a __DO__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  162. argument_types = compact_type_modifiers(argument_types);
  163. // Process return types list
  164. std::vector<std::string> return_types;
  165. while(*it != DO_KW) {
  166. return_types.emplace_back(tokens.dictionary.at(*it));
  167. last_valid = it;
  168. ++it;
  169. check_for_unexpected_stream_end("Procedure-Return-List to end", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  170. }
  171. last_valid = it;
  172. ++it;
  173. check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  174. return_types = compact_type_modifiers(return_types);
  175. // Process body
  176. std::vector<symbol> body;
  177. while(*it != END_KW) {
  178. body.emplace_back(*it);
  179. last_valid = it;
  180. ++it;
  181. check_for_unexpected_stream_end("__DO__ block needs a matching __END__", details::concatenate_builder("Parsing procedure ",tokens.dictionary.at(name_symbol),", file ",last_valid->file_name, ":", last_valid->line, ":", last_valid->column));
  182. }
  183. last_valid = it;
  184. ++it;
  185. return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
  186. #undef check_for_unexpected_stream_end
  187. };
  188. auto progress = tokens.symbols.begin();
  189. do {
  190. auto [iterator, procedure] = parse_proc(progress);
  191. ctx.operations.push_back(procedure);
  192. parsed_procedures.emplace_back(std::move(procedure));
  193. progress = iterator;
  194. } while (progress != tokens.symbols.end());
  195. for(auto& proc : parsed_procedures) {
  196. if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
  197. throw procedure_stack_error();
  198. }
  199. }
  200. return {
  201. tokens,
  202. ctx,
  203. parsed_procedures
  204. };
  205. }
  206. std::vector<std::string> procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const {
  207. std::vector<std::string> ops = generate_label(name());
  208. for(auto&& instruction : generate_enter()) {
  209. ops.push_back(instruction);
  210. }
  211. for(auto elem : _body) {
  212. auto token = lexer_data.dictionary.at(elem);
  213. if(elem.is_string) {
  214. for(auto&& instruction : generate_push_string_ptr(elem)) {
  215. ops.push_back(instruction);
  216. }
  217. } else if(auto result = try_parse_int32(token); result) {
  218. for(auto&& instruction : generate_push_int32(result.value())) {
  219. ops.push_back(instruction);
  220. }
  221. } else if(auto result = try_parse_int64(token); result) {
  222. for(auto&& instruction : generate_push_int64(result.value())) {
  223. ops.push_back(instruction);
  224. }
  225. } else if(auto op = ctx.lookup_operation(token); op) {
  226. for(auto&& instruction : op->emit(ctx)) {
  227. ops.push_back(instruction);
  228. }
  229. } else {
  230. throw unknown_token_error(elem);
  231. }
  232. }
  233. for(auto&& instruction : generate_return()) {
  234. ops.push_back(instruction);
  235. }
  236. return ops;
  237. }
  238. std::vector<std::string> procedure_operation::emit(const parser_context& ctx) const {
  239. return generate_call(name());
  240. }
  241. std::vector<std::string> generate(const generate_context& ctx) {
  242. std::vector<std::string> generated;
  243. for(const auto& instr : initialize_stack()) {
  244. generated.push_back(instr);
  245. }
  246. for(const auto& proc : ctx.procedures) {
  247. for(const auto& instr : proc->generate(ctx.parser, ctx.lexer)) {
  248. generated.push_back(instr);
  249. }
  250. }
  251. std::set<int> done;
  252. for(const auto& value : ctx.lexer.symbols) {
  253. if(value.is_string && not done.contains(value.id)) {
  254. for(const auto& instr : generate_string(value, ctx.lexer.dictionary.at(value.id))) {
  255. generated.push_back(instr);
  256. }
  257. done.insert(value.id);
  258. }
  259. }
  260. return generated;
  261. }
  262. void procedure_operation::execute(const generate_context& ctx, interpreter_stack& stack) const {
  263. }
  264. }