You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

265 lines
7.9 KiB

1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
1 year ago
  1. #include <algorithm>
  2. #include <cassert>
  3. #include <iostream>
  4. #include "molasses/parser_primitives.h"
  5. namespace molasses {
  6. parser_context register_integers(parser_context ctx) {
  7. ctx.types.push_back(std::make_shared<primitive_type>("i8",1));
  8. ctx.types.push_back(std::make_shared<primitive_type>("i16",2));
  9. ctx.types.push_back(std::make_shared<primitive_type>("i32",4));
  10. ctx.types.push_back(std::make_shared<primitive_type>("i64",8));
  11. ctx.types.push_back(std::make_shared<primitive_type>("u8",1));
  12. ctx.types.push_back(std::make_shared<primitive_type>("u16",2));
  13. ctx.types.push_back(std::make_shared<primitive_type>("u32",4));
  14. ctx.types.push_back(std::make_shared<primitive_type>("u64",8));
  15. return ctx;
  16. }
  17. std::vector<std::string> operator>>(std::vector<std::string> current_stack, const operation& next_op) {
  18. {
  19. auto args = next_op.argument_types();
  20. while(not (args.empty() or current_stack.empty())) {
  21. if(current_stack.back() != args.back()) {
  22. throw TypeInputError();
  23. } else {
  24. args.pop_back();
  25. current_stack.pop_back();
  26. }
  27. }
  28. if(not args.empty()) {
  29. throw ValueMissingError();
  30. }
  31. }
  32. {
  33. auto return_types = next_op.return_types();
  34. std::move(return_types.begin(), return_types.end(), std::back_inserter(current_stack));
  35. }
  36. return current_stack;
  37. }
  38. std::optional<int32_t> try_parse_int32(const std::string& str) {
  39. int32_t value;
  40. auto begin = str.data();
  41. auto end = str.data()+str.size();
  42. auto result = std::from_chars(begin, end, value, 10);
  43. // TODO: Add other bases
  44. if(result.ptr == end) {
  45. return value;
  46. }
  47. return std::nullopt;
  48. }
  49. auto find_ptr_by_name_in_container(auto container, const auto& name) -> typeof(*std::begin(container)) {
  50. auto it = std::find_if(std::begin(container), std::end(container), [&](auto elem){
  51. return elem->name() == name;
  52. });
  53. if(it != std::end(container)) {
  54. return *it;
  55. }
  56. return {};
  57. }
  58. std::shared_ptr<type> parser_context::lookup_type(const std::string & name) const {
  59. return find_ptr_by_name_in_container(types, name);
  60. }
  61. std::shared_ptr<operation> parser_context::lookup_operation(const std::string & name) const {
  62. return find_ptr_by_name_in_container(operations, name);
  63. }
  64. bool type_check(
  65. const parser_context& parser_state,
  66. const lexed_output& lexer_state,
  67. const std::vector<symbol>& consumed_stream,
  68. std::vector<std::string> execution_input,
  69. const std::vector<std::string>& execution_output
  70. ) {
  71. auto& type_stack = execution_input;
  72. for(const auto& symbol : consumed_stream) {
  73. const auto& symbol_text = lexer_state.dictionary.at(symbol);
  74. if(auto is_int = try_parse_int32(symbol_text); is_int) {
  75. type_stack.emplace_back("i32");
  76. } else if(auto is_op = parser_state.lookup_operation(symbol_text); is_op) {
  77. type_stack = type_stack >> *is_op;
  78. }
  79. }
  80. return type_stack == execution_output;
  81. }
  82. std::vector<std::string> initialize_stack() {
  83. return {
  84. ".bss\n",// TODO: make threadlocal
  85. "stack_instruction:\n",
  86. " .quad 0\n",
  87. ".text\n",
  88. "initialize_callstack:\n",
  89. " movq $9, %rax\n",
  90. " movq $0, %rdi\n",
  91. " movq $8192, %rsi\n",
  92. " movq $3, %rdx\n",
  93. " movq $34, %r10\n",
  94. " movq $-1, %r8\n",
  95. " movq $0, %r9\n",
  96. " syscall\n",
  97. " movq %rax, (stack_instruction)\n",
  98. " retq\n",
  99. };
  100. }
  101. parser_context parse(parser_context ctx, const lexed_output& lexer_data) {
  102. enum op : int {
  103. DO_KW = 1,
  104. SEPARATOR_KW,
  105. PROC_KW,
  106. END_KW
  107. };
  108. lexed_output fake;
  109. fake.dictionary[PROC_KW] = "__PROC__";
  110. fake.dictionary[SEPARATOR_KW] = "__--__";
  111. fake.dictionary[DO_KW] = "__DO__";
  112. fake.dictionary[END_KW] = "__END__";
  113. auto tokens = concatenate(fake, lexer_data);
  114. std::vector<std::shared_ptr<procedure_operation>> parsed_procedures;
  115. auto parse_proc = [&](auto it) -> std::pair<typeof(it), std::shared_ptr<procedure_operation>> {
  116. #define CHECK_FOR_UNEXPECTED_STREAM_END \
  117. if(it == tokens.symbols.end()) { \
  118. throw ExpectingTokenError(); \
  119. }
  120. if(*it != PROC_KW) {
  121. throw UnexpectedTokenError();
  122. }
  123. ++it;
  124. CHECK_FOR_UNEXPECTED_STREAM_END;
  125. std::string name = tokens.dictionary.at(*it);
  126. ++it;
  127. CHECK_FOR_UNEXPECTED_STREAM_END;
  128. if(it == tokens.symbols.end()) {
  129. throw ExpectingTokenError();
  130. }
  131. // Process arguments list
  132. std::vector<std::string> argument_types;
  133. while(*it != SEPARATOR_KW) {
  134. argument_types.emplace_back(tokens.dictionary.at(*it));
  135. ++it;
  136. CHECK_FOR_UNEXPECTED_STREAM_END;
  137. }
  138. ++it;
  139. CHECK_FOR_UNEXPECTED_STREAM_END;
  140. // Process return types list
  141. std::vector<std::string> return_types;
  142. while(*it != DO_KW) {
  143. return_types.emplace_back(tokens.dictionary.at(*it));
  144. ++it;
  145. CHECK_FOR_UNEXPECTED_STREAM_END;
  146. }
  147. ++it;
  148. CHECK_FOR_UNEXPECTED_STREAM_END;
  149. // Process return types list
  150. std::vector<symbol> body;
  151. while(*it != END_KW) {
  152. body.emplace_back(*it);
  153. ++it;
  154. CHECK_FOR_UNEXPECTED_STREAM_END;
  155. }
  156. ++it;
  157. return std::make_pair(it, std::make_shared<procedure_operation>(name, argument_types, return_types, body));
  158. #undef CHECK_FOR_UNEXPECTED_STREAM_END
  159. };
  160. auto progress = tokens.symbols.begin();
  161. do {
  162. auto [iterator, procedure] = parse_proc(progress);
  163. ctx.operations.push_back(procedure);
  164. parsed_procedures.emplace_back(std::move(procedure));
  165. progress = iterator;
  166. } while (progress != tokens.symbols.end());
  167. for(auto& proc : parsed_procedures) {
  168. if(not type_check(ctx, tokens, proc->_body, proc->_args, proc->_rets)) {
  169. throw ProcedureStackError();
  170. }
  171. }
  172. std::vector<std::string> generated;
  173. for(auto instr : initialize_stack()) {
  174. generated.push_back(instr);
  175. }
  176. for(auto proc : parsed_procedures) {
  177. for(auto instr : proc->generate(ctx, tokens)) {
  178. generated.push_back(instr);
  179. }
  180. }
  181. for(auto line : generated) {
  182. std::cout << line;
  183. }
  184. return ctx;
  185. }
  186. std::vector<std::string> generate_call(std::string target) {
  187. static uint64_t label_count= 0;
  188. return {
  189. " movq return_label_n"+std::to_string(label_count)+", (stack_instruction)\n",
  190. " addq $8, stack_instruction\n",
  191. " jmp "+target+"\n",
  192. " return_label_n"+std::to_string(label_count++)+":"
  193. };
  194. }
  195. std::vector<std::string> generate_push_int32(int32_t target) {
  196. return {
  197. " pushq $" +std::to_string(target)+ "\n"
  198. };
  199. }
  200. std::vector<std::string> procedure_operation::generate(const parser_context& ctx, const lexed_output& lexer_data) const {
  201. std::vector<std::string> ops;
  202. ops.emplace_back(name()+":\n");
  203. for(auto elem : _body) {
  204. auto token = lexer_data.dictionary.at(elem);
  205. if(auto result = try_parse_int32(token); result) {
  206. for(auto&& elem : generate_push_int32(result.value())) {
  207. ops.push_back(elem);
  208. }
  209. } else if(auto op = ctx.lookup_operation(token); op) {
  210. for(auto&& elem : op->emit(ctx)) {
  211. ops.push_back(elem);
  212. }
  213. } else {
  214. throw UnknownTokenError();
  215. }
  216. }
  217. ops.emplace_back(" // Return to caller\n");
  218. ops.emplace_back(" addq $-8, stack_instruction\n");
  219. ops.emplace_back(" movq (stack_instruction), %rax\n");
  220. ops.emplace_back(" pushq %rax\n");
  221. ops.emplace_back(" retq\n");
  222. return ops;
  223. }
  224. std::vector<std::string> procedure_operation::emit(const parser_context& ctx) const {
  225. return generate_call(name());
  226. }
  227. }