#pragma once #include "gp/containers/array.hpp" #include "gp/containers/vector.hpp" #include "gp/text/ascii.hpp" using string = gp::vector; #define FOREACH_LEXEME_TYPE \ LEX(leftBracket) \ LEX(rightBracket) \ LEX(leftParenthesis) \ LEX(rightParenthesis) \ LEX(leftCurlyBrace) \ LEX(rightCurlyBrace) \ LEX(period) \ LEX(semicolon) \ LEX(colon) \ LEX(doubleColon) \ LEX(comma) \ LEX(at) \ LEX(pointer) \ LEX(as) \ LEX(try_) \ LEX(catch_) \ LEX(raise_) \ LEX(defer) \ LEX(assign) \ LEX(addAssign) \ LEX(substractAssign) \ LEX(multiplyAssign) \ LEX(divideAssign) \ LEX(concatenateAssign) \ LEX(remainderAssign) \ LEX(powerAssign) \ LEX(plus) \ LEX(minus) \ LEX(add) \ LEX(substract) \ LEX(multiply) \ LEX(divide) \ LEX(concatenate) \ LEX(remainder) \ LEX(power) \ LEX(equal) \ LEX(doubleEqual) \ LEX(threeWayComparison) \ LEX(notEqual) \ LEX(greaterOrEqual) \ LEX(greater) \ LEX(lesserOrEqual) \ LEX(lesser) \ LEX(leftShift) \ LEX(rightShift) \ LEX(and_) \ LEX(or_) \ LEX(xor_) \ LEX(not_) \ LEX(increment) \ LEX(decrement) \ LEX(identifier) \ LEX(integer) \ LEX(float_) \ LEX(boolean) \ LEX(string_) \ LEX(null_) \ LEX(public_) \ LEX(main_) \ LEX(type_) \ LEX(event_) \ LEX(class_) \ LEX(enum_) \ LEX(template_) \ LEX(new_) \ LEX(copy) \ LEX(send) \ LEX(receive) \ LEX(intType) \ LEX(floatType) \ LEX(boolType) \ LEX(stringType) \ LEX(arrayType) \ LEX(functionType) \ LEX(taskType) \ LEX(chanType) \ LEX(autoType) \ LEX(if_) \ LEX(unless) \ LEX(else_) \ LEX(switch_) \ LEX(select) \ LEX(case_) \ LEX(while_) \ LEX(do_) \ LEX(until) \ LEX(for_) \ LEX(loop) \ LEX(return_) \ LEX(self) \ LEX(kill) \ LEX(killAll) \ LEX(yield) \ LEX(break_) \ LEX(continue_) #define LEX(x) x, enum class gr_lexeme_type { FOREACH_LEXEME_TYPE }; #undef LEX #define LEX(x) case gr_lexeme_type::x: return #x; constexpr inline const char* to_string(const gr_lexeme_type& value) { switch(value) { FOREACH_LEXEME_TYPE } } #undef LEX #define LEX(x) +1 constexpr inline const char* to_pretty_string(const gr_lexeme_type& value) { constexpr gp::array names = { "[", "]", "(", ")", "{", "}", ".", ";", ":", "::", ",", "@", "&", "as", "try", "catch", "raise", "defer", "=", "+=", "-=", "*=", "/=", "~=", "%=", "^=", "+", "-", "+", "-", "*", "/", "~", "%", "^", "==", "===", "<=>", "!=", ">=", ">", "<=", "<", "<<", ">>", "and", "or", "xor", "not", "++", "--", "identifier", "const_int", "const_float", "const_bool", "const_str", "null", "pub", "main", "type", "event", "class", "enum", "template", "new", "copy", "send", "receive", "int", "float", "bool", "string", "array", "func", "task", "chan", "let", "if", "unless", "else", "switch", "select", "case", "while", "do", "until", "for", "loop", "return", "self", "kill", "killall", "yield", "break", "continue" }; return names[(size_t)value]; } #undef LEX struct gr_lexeme; class gr_lexer { public: using file_loader_t = gp::function(const gp::vector&)>; gp::allocator& _allocator; file_loader_t file_loader; private: gp::vector _files_to_import, _files_imported, _lines; string _file, _text; uint64_t _line, _current, _position_of_line, _file_id; gp::vector _lexemes; char get(ssize_t offset = 0); bool advance(bool start_from_current = false); void scan_script(); void scan_number(); void scan_string(); void scan_operator(); void scan_word(); void scan_file_path(); void scan_use(); string convert_path_to_import(string&); public: gr_lexer(gp::allocator& alloc, file_loader_t& loader) : _allocator(alloc) , file_loader(loader) , _files_to_import(_allocator) , _files_imported(_allocator) , _lines(_allocator) , _file(_allocator) , _text(_allocator) , _lexemes(_allocator) {} const gp::vector& lexemes() { return _lexemes; } void scan_file(gp::vector& file_name); const string& get_line(const gr_lexeme&) const; const string& get_file(const gr_lexeme&) const; const string& get_file(const size_t&) const; }; struct gr_lexeme { const gr_lexer& lexer; size_t _file_id; size_t _line, _column, _text_length = 1; gr_lexeme_type type; bool is_literal, is_operator, is_keyword, is_type; int ivalue; float fvalue; bool bvalue; string svalue; gr_lexeme(const gr_lexer& v) : lexer(v) , svalue(v._allocator) {} const string& get_line() const { return lexer.get_line(*this); } const string& get_file() { return lexer.get_file(*this); } }; inline char gr_lexer::get(ssize_t offset) { const uint position = ssize_t(_current) + offset; gp_config::assertion(!(position < 0 || position >= _text.size()), "Unexpected end of script"); return _text[position]; } inline const string& gr_lexer::get_line(const gr_lexeme& lex) const { gp_config::assertion(!(lex._file_id >= _files_imported.size()), "Lexeme file id out of bounds"); // TODO: Implement this return _files_imported[lex._file_id]; } inline const string& gr_lexer::get_file(const gr_lexeme& lex) const { gp_config::assertion(!(lex._file_id >= _files_imported.size()), "Lexeme file id out of bounds"); return _files_imported[lex._file_id]; } inline const string& gr_lexer::get_file(const size_t& file_id) const { gp_config::assertion(!(file_id >= _files_imported.size()), "File id out of bounds"); return _files_imported[file_id]; } inline bool gr_lexer::advance(bool start_from_current) { if(!start_from_current) { _current++; } if(_current >= _text.size()) { return false; } char symbol = _text[_current]; whileLoop: while(symbol <= 0x20 || symbol == '/' || symbol == '#') { if(_current >= _text.size()) { return false; } symbol = _text[_current]; if(symbol == '\n') { _position_of_line = _current; _line++; } else if(symbol == '#') { do { if(_current >= _text.size()) return false; _current++; } while (_text[_current] != '\n'); _position_of_line = _current; _line++; } else if(symbol == '/') { if((_current + 1) >= _text.size()) { return false; } switch(_text[_current + 1]) { case '/': { do { if(_current >= _text.size()) { return false; } } while(_current < _text.size() && _text[_current] != '\n'); _position_of_line = _current; _line++; }break; case '*': { for(;;) { if((_current + 1) >= _text.size()) { _current++; return false; } if(_text[_current] == '\n') { _position_of_line = _current; _line++; } if(_text[_current] == '*' && _text[_current + 1] == '/') { _current++; break; } _current++; } }break; default: // Goto honorable goto whileLoop; } } _current++; if(_current >= _text.size()) { return false; } symbol = _text[_current]; } return true; } inline void gr_lexer::scan_script() { advance(true); constexpr static auto is_operator = [](char v) { if(v == '!') return true; if(v >= '#' && v <='&') return true; if(v >= '(' && v <='-') return true; if(v == '/') return true; if(v >= ':' && v <='@') return true; if(v >= '[' && v <='^') return true; if(v >= '{' && v <='~') return true; return false; }; do { if (_current >= _text.size()) break; auto c = get(); if(is_digit(c)) scan_number(); else if(c == '.') { if (get(1) >= '0' && get(1) <= '9') scan_number(); else scan_operator(); } else if(is_operator(c)) scan_operator(); else if(c == '\"') scan_string(); else scan_word(); }while (advance()); } inline void gr_lexer::scan_number(){ gr_lexeme lex = gr_lexeme(*this); lex.is_literal = true; bool isFloat; string buffer(_allocator); for (;;) { char symbol = get(); if (symbol >= '0' && symbol <= '9') buffer.push_back(symbol); else if (symbol == '_') { // Do nothing, only cosmetic (e.g. 1_000_000). } else if (symbol == '.') { if (isFloat) break; isFloat = true; buffer.push_back(symbol); } else if (symbol == 'f') { isFloat = true; break; } else { if (_current) _current--; break; } _current++; if (_current >= _text.size()) break; } } inline void gr_lexer::scan_string(){ gr_lexeme lex = gr_lexeme(*this); lex.type = gr_lexeme_type::string_; lex.is_literal = true; gp_config::assertion(get() != '\"',"Expected \'\"\' at the beginning of the string."); _current++; string buffer(_allocator); bool escape = false; bool wasEscape = false; for (;;) { gp_config::assertion(_current >= _text.size(),"Missing \'\"\' character."); char symbol = get(); if (symbol == '\n') { _position_of_line = _current; _line++; } else if (symbol == '\"' && (!wasEscape)) break; else if (symbol == '\\' && (!wasEscape)) { escape = true; } if (!escape) { if (!wasEscape) { buffer.push_back(symbol); } else { if (symbol == 'n') buffer.push_back('\n'); else buffer.push_back(symbol); } } wasEscape = escape; escape = false; _current++; } lex._text_length = size_t(buffer.size() + 2u); lex.svalue = buffer; _lexemes.push_back(lex); } inline void gr_lexer::scan_operator(){ gr_lexeme lex = gr_lexeme(*this); lex.is_operator = true; switch (get()) { case '{': lex.type = gr_lexeme_type::leftCurlyBrace; break; case '}': lex.type = gr_lexeme_type::rightCurlyBrace; break; case '(': lex.type = gr_lexeme_type::leftParenthesis; break; case ')': lex.type = gr_lexeme_type::rightParenthesis; break; case '[': lex.type = gr_lexeme_type::leftBracket; break; case ']': lex.type = gr_lexeme_type::rightBracket; break; case '.': lex.type = gr_lexeme_type::period; break; case ';': lex.type = gr_lexeme_type::semicolon; break; case ':': lex.type = gr_lexeme_type::colon; if (_current + 1 >= _text.size()) break; if (get(1) == ':') { lex.type = gr_lexeme_type::doubleColon; lex._text_length = 2; _current++; } break; case ',': lex.type = gr_lexeme_type::comma; break; case '^': lex.type = gr_lexeme_type::power; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::powerAssign; lex._text_length = 2; _current++; } break; case '@': lex.type = gr_lexeme_type::at; break; case '&': lex.type = gr_lexeme_type::pointer; break; case '~': lex.type = gr_lexeme_type::concatenate; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::concatenateAssign; lex._text_length = 2; _current++; } break; case '+': lex.type = gr_lexeme_type::add; if (_current + 1 >= _text.size()) break; switch (get(1)) { case '=': lex.type = gr_lexeme_type::addAssign; lex._text_length = 2; _current++; break; case '+': lex.type = gr_lexeme_type::increment; lex._text_length = 2; _current++; break; default: break; } break; case '-': lex.type = gr_lexeme_type::substract; if (_current + 1 >= _text.size()) break; switch (get(1)) { case '=': lex.type = gr_lexeme_type::substractAssign; lex._text_length = 2; _current++; break; case '-': lex.type = gr_lexeme_type::decrement; lex._text_length = 2; _current++; break; default: break; } break; case '*': lex.type = gr_lexeme_type::multiply; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::multiplyAssign; lex._text_length = 2; _current++; } break; case '/': lex.type = gr_lexeme_type::divide; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::divideAssign; lex._text_length = 2; _current++; } break; case '%': lex.type = gr_lexeme_type::remainder; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::remainderAssign; lex._text_length = 2; _current++; } break; case '=': lex.type = gr_lexeme_type::assign; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::equal; lex._text_length = 2; _current++; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::doubleEqual; lex._text_length = 3; _current++; } } break; case '<': lex.type = gr_lexeme_type::lesser; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::lesserOrEqual; lex._text_length = 2; _current++; if (_current + 1 >= _text.size()) break; if (get(1) == '>') { lex.type = gr_lexeme_type::threeWayComparison; lex._text_length = 3; _current++; } } else if (get(1) == '-') { lex.type = gr_lexeme_type::send; lex._text_length = 2; _current++; } else if (get(1) == '<') { lex.type = gr_lexeme_type::leftShift; lex._text_length = 2; _current++; } break; case '>': lex.type = gr_lexeme_type::greater; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::greaterOrEqual; lex._text_length = 2; _current++; } else if (get(1) == '>') { lex.type = gr_lexeme_type::rightShift; lex._text_length = 2; _current++; } break; case '!': lex.type = gr_lexeme_type::not_; if (_current + 1 >= _text.size()) break; if (get(1) == '=') { lex.type = gr_lexeme_type::notEqual; lex._text_length = 2; _current++; } break; default: gp_config::assertion(false, "GrLexer: invalid operator"); } _lexemes.push_back(lex); } namespace _hidden { bool operator==(const gp::vector& lhs, const char* rhs) { for(size_t index = 0; index < lhs.size() && rhs[index] != 0; index++){ if(lhs[index] != rhs[index]) return false; } return true; } } inline void gr_lexer::scan_word(){ gr_lexeme lex = gr_lexeme(*this); lex.is_keyword = true; string symbol_buffer(_allocator); for (;;) { if (_current >= _text.size()) break; char symbol = get(); if (symbol == '!' || symbol == '?') { symbol_buffer.push_back(symbol); _current++; break; } if (symbol <= '&' || (symbol >= '(' && symbol <= '/') || (symbol >= ':' && symbol <= '@') || (symbol >= '[' && symbol <= '^') || (symbol >= '{' && symbol <= 0x7F)) break; symbol_buffer.push_back(symbol); _current++; } _current--; lex._text_length = symbol_buffer.size(); using namespace _hidden; if(symbol_buffer == "use"){ scan_use(); return; } else if(symbol_buffer == "pub"){ lex.type = gr_lexeme_type::public_; } else if(symbol_buffer == "main"){ lex.type = gr_lexeme_type::main_; } else if(symbol_buffer == "type"){ lex.type = gr_lexeme_type::type_; } else if(symbol_buffer == "event"){ lex.type = gr_lexeme_type::event_; } else if(symbol_buffer == "class"){ lex.type = gr_lexeme_type::class_; } else if(symbol_buffer == "enum"){ lex.type = gr_lexeme_type::enum_; } else if(symbol_buffer == "template"){ lex.type = gr_lexeme_type::template_; } else if(symbol_buffer == "if"){ lex.type = gr_lexeme_type::if_; } else if(symbol_buffer == "unless"){ lex.type = gr_lexeme_type::unless; } else if(symbol_buffer == "else"){ lex.type = gr_lexeme_type::else_; } else if(symbol_buffer == "switch"){ lex.type = gr_lexeme_type::switch_; } else if(symbol_buffer == "select"){ lex.type = gr_lexeme_type::select; } else if(symbol_buffer == "case"){ lex.type = gr_lexeme_type::case_; } else if(symbol_buffer == "while"){ lex.type = gr_lexeme_type::while_; } else if(symbol_buffer == "do"){ lex.type = gr_lexeme_type::do_; } else if(symbol_buffer == "until"){ lex.type = gr_lexeme_type::until; } else if(symbol_buffer == "for"){ lex.type = gr_lexeme_type::for_; } else if(symbol_buffer == "loop"){ lex.type = gr_lexeme_type::loop; } else if(symbol_buffer == "return"){ lex.type = gr_lexeme_type::return_; } else if(symbol_buffer == "self"){ lex.type = gr_lexeme_type::self; } else if(symbol_buffer == "kill"){ lex.type = gr_lexeme_type::kill; } else if(symbol_buffer == "killall"){ lex.type = gr_lexeme_type::killAll; } else if(symbol_buffer == "yield"){ lex.type = gr_lexeme_type::yield; } else if(symbol_buffer == "break"){ lex.type = gr_lexeme_type::break_; } else if(symbol_buffer == "continue"){ lex.type = gr_lexeme_type::continue_; } else if(symbol_buffer == "as"){ lex.type = gr_lexeme_type::as; } else if(symbol_buffer == "try"){ lex.type = gr_lexeme_type::try_; } else if(symbol_buffer == "catch"){ lex.type = gr_lexeme_type::catch_; } else if(symbol_buffer == "raise"){ lex.type = gr_lexeme_type::raise_; } else if(symbol_buffer == "defer"){ lex.type = gr_lexeme_type::defer; } else if(symbol_buffer == "task"){ lex.type = gr_lexeme_type::taskType; lex.is_type = true; } else if(symbol_buffer == "func"){ lex.type = gr_lexeme_type::functionType; lex.is_type = true; } else if(symbol_buffer == "int"){ lex.type = gr_lexeme_type::intType; lex.is_type = true; } else if(symbol_buffer == "float"){ lex.type = gr_lexeme_type::floatType; lex.is_type = true; } else if(symbol_buffer == "bool"){ lex.type = gr_lexeme_type::boolType; lex.is_type = true; } else if(symbol_buffer == "string"){ lex.type = gr_lexeme_type::stringType; lex.is_type = true; } else if(symbol_buffer == "array"){ lex.type = gr_lexeme_type::arrayType; lex.is_type = true; } else if(symbol_buffer == "chan"){ lex.type = gr_lexeme_type::chanType; lex.is_type = true; } else if(symbol_buffer == "new"){ lex.type = gr_lexeme_type::new_; lex.is_type = false; } else if(symbol_buffer == "let"){ lex.type = gr_lexeme_type::autoType; lex.is_type = false; } else if(symbol_buffer == "true"){ lex.type = gr_lexeme_type::boolean; lex.is_keyword = false; lex.is_literal = true; lex.bvalue = true; } else if(symbol_buffer == "false"){ lex.type = gr_lexeme_type::boolean; lex.is_keyword = false; lex.is_literal = true; lex.bvalue = false; } else if(symbol_buffer == "null"){ lex.type = gr_lexeme_type::null_; lex.is_keyword = false; lex.is_literal = true; } else if(symbol_buffer == "not"){ lex.type = gr_lexeme_type::not_; lex.is_keyword = false; lex.is_operator = true; } else if(symbol_buffer == "and"){ lex.type = gr_lexeme_type::and_; lex.is_keyword = false; lex.is_operator = true; } else if(symbol_buffer == "or"){ lex.type = gr_lexeme_type::or_; lex.is_keyword = false; lex.is_operator = true; } else if(symbol_buffer == "xor"){ lex.type = gr_lexeme_type::xor_; lex.is_keyword = false; lex.is_operator = true; } else { lex.is_keyword = false; lex.type = gr_lexeme_type::identifier; lex.svalue = symbol_buffer; } _lexemes.push_back(lex); } inline string gr_lexer::convert_path_to_import(string& path) { return path; } inline void gr_lexer::scan_file(gp::vector& file_name){ _files_to_import.push_back(file_name); while (_files_to_import.size()) { _file = _files_to_import[_files_to_import.size()-1]; _files_imported.push_back(_file); _text = file_loader(file_name); _files_to_import.pop_back(); _line = 0u; _current = 0u; _lines = gp::vector(_allocator); gp::vector tmp(_allocator); for(char c : _text) { if(c == '\n') { _lines.push_back(tmp); tmp = gp::vector(_allocator); } else { tmp.push_back(c); } } scan_script(); _file_id++; } } inline void gr_lexer::scan_file_path(){ gp_config::assertion(get() != '\"', "Expected \'\"\' at the beginning of the import."); _current++; string buffer(_allocator); for (;;) { gp_config::assertion(_current >= _text.size(), "Missing \'\"\' character."); char symbol = get(); if (symbol == '\n') { _position_of_line = _current; _line++; } else if (symbol == '\"') break; buffer.push_back(symbol); _current++; } buffer = convert_path_to_import(buffer); for(auto& file : _files_imported) { if(file == buffer) return; } for(auto& file : _files_to_import) { if(file == buffer) return; } _files_to_import.push_back(buffer); } inline void gr_lexer::scan_use(){ advance(); // Multiple files import. if (get() == '{') { advance(); bool isFirst = true; for (;;) { if (isFirst) isFirst = false; else if (get() == '\"') advance(); else gp_config::assertion(false, "Missing \'}\' after import list."); // EOF gp_config::assertion(_current >= _text.size(), "Missing \'}\' after import list."); // End of the import list. if (get() == '}') break; // Scan scan_file_path(); } } else { scan_file_path(); } }