From c14c3b61567838d96a4cef3455c964b7fe3c3775 Mon Sep 17 00:00:00 2001 From: Ludovic 'Archivist' Lagouardette Date: Sat, 10 Aug 2019 12:36:33 +0200 Subject: [PATCH] Work on csv-parse and lispy --- .gitignore | 2 + .vscode/launch.json | 16 + include/rigid_paradise/csv/csv-base.h | 63 ++++ include/rigid_paradise/lispy/lispy.hpp | 446 +++++++++++++++++++++++++ include/rigid_paradise/string.h | 2 +- src/csv-sheet/csv-parse.cpp | 11 + 6 files changed, 539 insertions(+), 1 deletion(-) create mode 100644 .vscode/launch.json create mode 100644 include/rigid_paradise/csv/csv-base.h create mode 100644 include/rigid_paradise/lispy/lispy.hpp create mode 100644 src/csv-sheet/csv-parse.cpp diff --git a/.gitignore b/.gitignore index a007fea..d60e5a6 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ build/* +vgcore.* +a.out diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..19334d5 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Debug", + "type": "gdb", + "request": "launch", + "target": "./a.out", + "cwd": "${workspaceRoot}", + "valuesFormatting": "parseText" + } + ] +} \ No newline at end of file diff --git a/include/rigid_paradise/csv/csv-base.h b/include/rigid_paradise/csv/csv-base.h new file mode 100644 index 0000000..8a4dd06 --- /dev/null +++ b/include/rigid_paradise/csv/csv-base.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace csv { + struct cell_id { + size_t x,y; + bool operator<(const cell_id& oth) { + if (y < oth.y) return true; + if (x < oth.x) return true; + return false; + }; + }; + + cell_id get_coords_from_id(const std::string_view id) { + size_t x = 0; + size_t y = 0; + auto c = id.begin(); + while(*c < 'A' || *c > 'Z') + { + x += *c-'A'; + x *= 26; + c++; + } + x /= 26; + while(*c < '0' || *c > '9') + { + y += *c-'0'; + y *= 10; + c++; + } + y /= 10; + cell_id ret; + ret.x = x; + ret.y = y; + return ret; + } + + class context; + + class cell { + public: + std::string data = ""; + mutable std::string computed_value = ""; + mutable std::string display_value = ""; + + void eval(const context& ctx); + }; + + class context { + public: + std::map data; + + const cell& operator[] (const std::string_view id) const { + auto coords = get_coords_from_id(id); + return data[coords]; + } + }; + +} \ No newline at end of file diff --git a/include/rigid_paradise/lispy/lispy.hpp b/include/rigid_paradise/lispy/lispy.hpp new file mode 100644 index 0000000..7b91a71 --- /dev/null +++ b/include/rigid_paradise/lispy/lispy.hpp @@ -0,0 +1,446 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace lispy { + class cons; + class function; + + struct empty {}; + struct cons_start{}; + struct cons_end{}; + + struct atom { + int value; + }; + + struct sp_coords { + bool is_relative = false; + size_t x, y; + }; + + struct sp_range { + bool is_relative = false; + size_t x, y; + size_t width, height; + }; + + using lvalue = std::variant, sp_coords, sp_range, std::shared_ptr>; + + using token = std::variant; + + struct context { + std::unordered_map atoms; + int last_atom = 0; + bool error_crash = false; + + int get_atom(const std::string& key) + { + if(atoms.count(key)) { + return atoms[key]; + } else { + return atoms[key] = ++last_atom; + } + } + }; + + class cons { + public: + lvalue self = empty{}; + std::unique_ptr other{}; + + cons() + { + self = empty{}; + other = std::unique_ptr{}; + } + + cons(lvalue first) + : self(first) + , other() + { } + + cons(std::vector data) + { + if(data.size() == 0) { + self = empty{}; + other = std::unique_ptr{}; + } else if(data.size() >= 1) { + self = data[0]; + for(auto it = data.begin()+1; it != data.end(); ++it) + this->append(*it); + } + } + + cons(const cons& oth) + : self(oth.self) + , other(oth.other ? std::make_unique(*(oth.other)) : nullptr) + {} + void operator=(const cons& oth) + { + self = oth.self; + if(oth.other) { + other = std::make_unique(*oth.other); + } else { + other = std::unique_ptr{}; + } + } + + cons(cons&& oth) + : self(oth.self) + , other(oth.other ? std::move(std::make_unique(*(oth.other))) : std::unique_ptr{}) + {} + + void append(lvalue value) + { + if(!other) { + other = std::make_unique(value); + } else { + other->append(value); + } + } + }; + + class function { + public: + virtual lvalue operator() (cons arguments) = 0; + }; + + inline char hexdigit(char v) + { + if(v >= '0' && v<='9') { + return v - '0'; + } else if(v >= 'a' && v <= 'f') { + return 10 + v - 'a'; + } else if(v >= 'A' && v <= 'F') { + return 10 + v - 'A'; + } + return -1; + } + + inline std::string escape(std::string_view v) + { + auto it = v.begin(); + std::stringstream stream; + stream<<'"'; + while(it != v.end()) + { + switch(*it) + { + case '"': + { + stream << "\\\""; + } + break; + case '\n': + { + stream << "\\n"; + } + break; + case '\t': + { + stream << "\\t"; + } + break; + case '\v': + { + stream << "\\v"; + } + break; + case '\a': + { + stream << "\\a"; + } + break; + default: + stream << *it; + } + ++it; + } + stream<<'"'; + return stream.str(); + } + + inline void print_visitor(std::ostream& stream, const lvalue& value, const context& ctx) { + std::visit([&](auto arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + stream << arg; + } else if constexpr (std::is_same_v) { + stream << arg; + } else if constexpr (std::is_same_v) { + stream << "()"; + } else if constexpr (std::is_same_v) { + stream << "#func"<<&*arg; + } else if constexpr (std::is_same_v) { + stream << escape(arg); + } else if constexpr (std::is_same_v>) { + stream << "("; + bool is_first = true; + cons p = *arg; + do{ + + if(!is_first) + { + p = *p.other; + } + print_visitor(stream, p.self, ctx); + if(p.other) + { + stream << " "; + } + is_first = false; + } while(p.other); + stream << ")"; + } + else if constexpr (std::is_same_v){ + assert(false); + } else if constexpr (std::is_same_v){ + assert(false); + } else if constexpr (std::is_same_v) { + for(auto& v : ctx.atoms) + { + if(v.second == arg.value) + { + stream << v.first; + return; + } + } + assert(false); + } else { + std::cerr << typeid(T).name() << " detected in print_visitor ?" << std::endl; + if(ctx.error_crash) + { + std::exit(-1); + } + } + }, value); + } + + inline void print_types_visitor(std::ostream& stream, const lvalue& value, const context& ctx) { + std::visit([&](auto arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + stream << "integer"; + } else if constexpr (std::is_same_v) { + stream << "double"; + } else if constexpr (std::is_same_v) { + stream << "nil"; + } else if constexpr (std::is_same_v) { + stream << "function"; + } else if constexpr (std::is_same_v) { + stream << "string"; + } else if constexpr (std::is_same_v>){ + stream << "("; + bool is_first = true; + cons p = *arg; + do{ + + if(!is_first) + { + p = *p.other; + } + print_types_visitor(stream, p.self, ctx); + if(p.other) + { + stream << " "; + } + is_first = false; + } while(p.other); + stream << ")"; + } else if constexpr (std::is_same_v){ + stream << "coords"; + } else if constexpr (std::is_same_v){ + stream << "range"; + } else if constexpr (std::is_same_v) { + stream << "atom"; + } else { + std::cerr << typeid(T).name() << " detected in print_types_visitor ?" << std::endl; + if(ctx.error_crash) + { + std::exit(-1); + } + } + }, value); + } + + inline std::pair parse_string(std::string_view data) + { + auto it = data.begin(); + assert(*it == '\"'); + ++it; + std::stringstream value(std::string(data.begin(), data.end())); + std::string ret; + value >> std::quoted(ret); + return std::make_pair(ret, std::string_view{data.begin(), (size_t)value.rdbuf()->in_avail()}); + } + + inline std::pair parse_atom(std::string_view data, context& ctx) + { + assert(!iswspace(data[0])); + size_t idx = 1; + while(!iswspace(data[idx])) + { + idx++; + } + atom v; + v.value = ctx.get_atom(std::string(data.begin(), data.begin()+idx)); + return std::make_pair(lvalue{v}, std::string_view{data.begin(), idx}); + } + + inline std::pair parse_number(std::string_view data) + { + char* end_f; + char* end_d; + double try_f = strtod (data.data(), &end_f); + int err_f = errno; + int64_t try_d = strtoll(data.data(), &end_d, 10); + int err_d = errno; + + if(err_d == ERANGE) + { + return std::make_pair(lvalue{(double)try_f}, std::string_view{data.begin(), (size_t)(end_f-data.data())}); + } + + if(try_f != std::trunc(try_f)) + { + return std::make_pair(lvalue{(double)try_f}, std::string_view{data.begin(), (size_t)(end_f-data.data())}); + } + + return std::make_pair(lvalue{int64_t(try_d)}, std::string_view{data.begin(), (size_t)(end_f-data.data())}); + } + + inline std::pair parse_selector(std::string_view data) + { + return std::make_pair(lvalue{}, std::string_view{data.begin(), 0}); + } + + inline size_t find_matching(const std::basic_string_view& data, const size_t idx) + { + size_t try_idx = idx; + int mass = 1; + + do{ + try_idx++; + std::visit([&](auto arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + ++mass; + } else if constexpr (std::is_same_v) { + --mass; + } else {} + }, data[try_idx]); + } while(mass != 0 && try_idx < data.size()); + + if(try_idx parse(const std::basic_string_view& data, context& ctx) + { + auto ret = std::make_shared(); + size_t sz = 0; + + if(data.size() == 0) + { + return std::make_pair(0,(lvalue)empty{}); + } + + size_t skip = 0; + + size_t idx = 0; + + while(idx < data.size()) + { + if(skip) + { + skip--; + ++idx; + continue; + } + + std::visit([&](const auto& arg) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + auto matching = find_matching(data, idx); + auto res = parse(std::basic_string_view{data.begin()+1, matching-idx-1}, ctx); + ret->append(res.second); + skip = matching - idx; + ++sz; + } else if constexpr (std::is_same_v) { + std::cerr << typeid(T).name() << " mismatched parenthesis" << std::endl; + } else if constexpr (std::is_same_v) { + ret->append(arg); + ++sz; + } else { + std::cerr << typeid(T).name() << " cat in the parser ?" << std::endl; + if(ctx.error_crash) + { + std::exit(-1); + } + } + }, data[idx]); + ++idx; + } + return std::make_pair(sz, (lvalue)(std::make_shared(*ret->other))); + } + + inline std::vector lex(const std::string_view data, context& ctx) + { + std::vector ret; + + auto it = data.begin(); + bool is_done = false; + do{ + if (it == data.end()) { + is_done = true; + } else if(isdigit(*it) || (*it == '-' && isdigit(*(it+1)))) { + auto value = parse_number(std::string_view{it, (size_t)(data.end() - it)}); + ret.push_back(value.first); + it += value.second.size()+1; + } else if(*it == '\"') { + auto value = parse_string(std::string_view{it, (size_t)(data.end() - it)}); + ret.push_back(value.first); + size_t forward_jump = std::string_view{it, (size_t)(data.end() - it)}.size()-value.second.size(); + it += forward_jump; + } else if (*it == '(') { + ret.push_back(cons_start{}); + ++it; + } else if (*it == ')') { + ret.push_back(cons_end{}); + ++it; + } else if (iswspace(*it)) { + ++it; + } else if (*it == '$') { + auto value = parse_selector(std::string_view{it, (size_t)(data.end() - it)}); + ret.push_back(value.first); + it += value.second.size()+1; + } else { + auto value = parse_atom(std::string_view{it, (size_t)(data.end() - it)}, ctx); + ret.push_back(value.first); + it += value.second.size(); + } + }while(!is_done); + + return ret; + } + + inline lvalue eval(const std::string_view& data, context& ctx) + { + auto n = lex(data, ctx); + auto p = parse(std::basic_string_view(n.data(), n.size()), ctx); + return p.second; + } +} \ No newline at end of file diff --git a/include/rigid_paradise/string.h b/include/rigid_paradise/string.h index 2073cc6..faef68f 100644 --- a/include/rigid_paradise/string.h +++ b/include/rigid_paradise/string.h @@ -5,6 +5,6 @@ namespace cstring{ constexpr size_t strlen( const char* s ) noexcept { - return *s ? 1 + strlen(s + 1) : 0; + return *s ? 1 + strlen(s + 1) : 0; } } \ No newline at end of file diff --git a/src/csv-sheet/csv-parse.cpp b/src/csv-sheet/csv-parse.cpp new file mode 100644 index 0000000..4e11a82 --- /dev/null +++ b/src/csv-sheet/csv-parse.cpp @@ -0,0 +1,11 @@ +#include "rigid_paradise/lispy/lispy.hpp" +#include + +int main() { + lispy::context ctx; + lispy::lvalue v = lispy::eval("cat 13 15.69 \"data \\ ひらがな \n\ttabulated\" \"\" \"test 2 \\x65\"", ctx); + + lispy::print_types_visitor(std::cout, v, ctx); + std::cout << std::endl; + lispy::print_visitor(std::cout, v, ctx); +} \ No newline at end of file