Add 'less_minimal_scheduler.cpp'

hace 3 años · 98bbefa622
--- a/less_minimal_scheduler.cpp
+++ b/less_minimal_scheduler.cpp
@ -0,0 +1,469 @@
 // Build with: clang++ --std=c++20 -g -O1 schedtest.cpp

 #include <array>
 #include <span>
 #include <memory>
 #include <list>
 #include <functional>
 #include <iostream>
 #include <map>
 #include <chrono>
 #include <exception>
 #include <atomic>
 #include <set>
 #include <vector>
 #include <optional>

 #if defined(__x86_64__)
    #if defined(__GNUG__)
        constexpr bool is_x86_64_linux = true;
    #else
        #if defined(__clang__)
            constexpr bool is_x86_64_linux = true;
        #else
            constexpr bool is_x86_64_linux = false;
        #endif
    #endif
 #else
    constexpr bool is_x86_64_linux = false;
 #endif

 static_assert(is_x86_64_linux, "This code probably only works on x86_64 GNU Extensions C++");

 struct platform_data {
    platform_data() = default;
    
    platform_data(std::span<char> stack_str)
    : stack_ptr(&*(stack_str.end()-16))
    , base_ptr(stack_ptr)
    {}

    uint64_t rbx, r12, r13, r14, r15;

    void* stack_ptr;
    void* base_ptr;

    void pull() __attribute__((always_inline))
    {
        __asm__ __volatile__(
            "movq %%rsp, %0\n"
            "movq %%rbp, %1\n" 
            "movq %%rbx, %2\n" 
            "movq %%r12, %3\n" 
            "movq %%r13, %4\n" 
            "movq %%r14, %5\n" 
            "movq %%r15, %6\n" 
            : "=m"(stack_ptr)
            , "=m"(base_ptr)
            , "=m"(rbx)
            , "=m"(r12)
            , "=m"(r13)
            , "=m"(r14)
            , "=m"(r15)
        );
    }

    void* push(void* location)  __attribute__((always_inline))
    {
        volatile void* volatile tmp = static_cast<char*>(stack_ptr) - sizeof(void*);
        *static_cast<volatile void* volatile * volatile>(tmp) = location;
        __asm__ __volatile__(
            "movq %1, %%rsp\n"
            "movq %2, %%rbp\n"
            "movq %3, %%rbx\n"
            "movq %4, %%r12\n"
            "movq %5, %%r13\n"
            "movq %6, %%r14\n"
            "movq %7, %%r15\n"
            "popq %0\n"
            : "+r"(location)
            : "m"(tmp)
            , "m"(base_ptr)
            , "m"(rbx)
            , "m"(r12)
            , "m"(r13)
            , "m"(r14)
            , "m"(r15)
            : "memory"
        );
        return location;
    }
 };

 enum class process_status {
    inactive = 0,
    running = 1,
    waiting = 2,
    finished = 3,
    zombie = 4
 };

 struct process {
    static int64_t counter;

    char* stack;
    size_t sz;
    platform_data scheduling_swapper;
    process_status state = process_status::inactive;
    std::function<void()> fn;
    int64_t t_id;

    process(std::function<void()> _fn, size_t _sz = 16384)
    : stack(new char[_sz])
    , sz(_sz)
    , scheduling_swapper(std::span<char>(stack, sz))
    , fn(_fn)
    , t_id(counter++)
    {}

    process(char* stack_base)
    : stack(stack_base)
    , sz(0)
    , t_id(counter++)
    {}

    process(const process&) = delete;

    ~process() {
        if(sz) delete[] stack;
    }
 };
 int64_t process::counter = 0;


 __attribute__((noinline)) struct system* spawner (struct system* sys);

 struct system {
    static system sys;

    std::list<std::unique_ptr<process>>
        running,
        waiting,
        naughty;
    
    std::unique_ptr<process> previous;
    std::unique_ptr<process> current;
    
    std::unique_ptr<process> one() {
        auto v = std::move(running.back());
        running.pop_back();
        return v;
    }

    void rid(std::unique_ptr<process> current) {
        switch(current->state) {
            case process_status::inactive:
            case process_status::running:
                running.push_front(std::move(current));
                break;
            case process_status::finished:
                clean(std::move(current));
                break;
            case process_status::zombie:
                naughty.push_front(std::move(current));
                break;
            case process_status::waiting:
                waiting.push_front(std::move(current));
                break;
        }
    }

    void clean(std::unique_ptr<process>) {}

    void yield_to(std::unique_ptr<process> target) noexcept {
        current->scheduling_swapper.pull();
        sys.rid(std::move(current));
        current = std::move(target);
        current->scheduling_swapper.push(this);
        spawner(&sys);
    }


    void yield() noexcept {
        current->scheduling_swapper.pull();
        sys.rid(std::move(current));
        current = one();
        current->scheduling_swapper.push(this);
        spawner(&sys);
    }

    template<typename fn>
    void steal_and_yield(fn func) noexcept {
        current->scheduling_swapper.pull();
        func(std::move(current));
        current = one();
        current->scheduling_swapper.push(this);
        spawner(&sys);
    }
 };


 // Needs to return the system one way or another
 __attribute__((noinline)) struct system* spawner (struct system* sys) {
    auto& proc = *system::sys.current;
    if(proc.state == process_status::inactive) {
        proc.state = process_status::running;
        proc.fn();
        proc.state = process_status::finished;
        sys->current->scheduling_swapper.pull();
        sys->yield();
    }
    return sys;
 }

 struct system system::sys;

 /*********************       ***********************/

 class dirty_bottleneck {
    std::atomic_bool flag;

    [[nodiscard]] bool try_lock() {
        bool f = false;
        bool t = true;
        return flag.compare_exchange_strong(f,t,std::memory_order::acquire);
    }

    [[nodiscard]] bool try_unlock() {
        bool f = false;
        bool t = true;
        return flag.compare_exchange_strong(t,f,std::memory_order::release);
    }
 public:
    dirty_bottleneck() = default;
    dirty_bottleneck(dirty_bottleneck&) = delete;
    dirty_bottleneck(dirty_bottleneck&&) = delete;

    void lock() {
        while(not try_lock());
    }

    void unlock() {
        if(!try_unlock()) throw std::runtime_error("Unlocking failed in dirty_bottleneck: potential double unlocking issue");
    }
 };

 /*********************       ***********************/

 template<typename T>
 class lock_guard {
    T& ref;
 public:
    lock_guard(T& _ref)
    : ref(_ref)
    {
        ref.lock();
    }

    ~lock_guard() {
        ref.unlock();
    }
 };

 /*********************       ***********************/

 using mutex_handle = size_t;

 enum class thread_state {
    locking,
    waiting,
    unlocking
 };

 enum class mutex_state {
    remove = 0,
    create = 1
 };

 using namespace std::chrono_literals;

 void mutex_state_update(mutex_handle, mutex_state);
 void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd);

 class fast_bottleneck {
    
    /** This is a secret tool that will help us later **/
    static std::atomic<size_t> counter;
    const mutex_handle handle;
    dirty_bottleneck trigger_lock;
    std::list<std::unique_ptr<process>> waiting; 
    
    std::atomic_bool flag;

    [[nodiscard]] bool try_lock() {
        bool f = false;
        bool t = true;
        return flag.compare_exchange_strong(f,t,std::memory_order::acquire);
    }

    [[nodiscard]] bool try_unlock() {
        bool f = false;
        bool t = true;
        return flag.compare_exchange_strong(t,f,std::memory_order::release);
    }
 public:
    fast_bottleneck()
    : flag()
    , handle(counter.fetch_add(1)) //< We have to initialize that
    {
        mutex_state_update(handle, mutex_state::create);
    }
    fast_bottleneck(fast_bottleneck&) = delete;
    fast_bottleneck(fast_bottleneck&&) = delete;
    fast_bottleneck& operator=(fast_bottleneck&) = delete;
    fast_bottleneck& operator=(fast_bottleneck&&) = delete;
    
    ~fast_bottleneck() {
        mutex_state_update(handle, mutex_state::remove);
    }


    void lock() {
        /// The exponential backing variables
        constexpr std::chrono::milliseconds max{1};
        std::chrono::nanoseconds wait{256};
        while(not try_lock()) {
            /// The implementation of our little trick when waiting
            signal_locking(thread_state::waiting, handle, system::sys.current->t_id);

            system::sys.steal_and_yield([&](std::unique_ptr<process> p){
                lock_guard triggers(trigger_lock);
                p->state = process_status::waiting;
                waiting.push_front(std::move(p));
            });
            
            /// The exponential backing
            //std::this_thread::sleep_for(wait);
            wait += wait < max ? std::chrono::nanoseconds(wait.count()/2) : 0ns;
        }
        /// The implementation of our little trick when locking
        signal_locking(thread_state::locking, handle, system::sys.current->t_id);
    }

    void unlock() {
        if(!try_unlock()) throw std::runtime_error("Unlocking failed in fast_bottleneck: potential double unlocking issue");
        /// The implementation of our little trick when unlocking
        signal_locking(thread_state::unlocking, handle, system::sys.current->t_id);
        
        {
            lock_guard triggers(trigger_lock);
            if(waiting.size()) {
                system::sys.running.push_front(std::move(waiting.back()));
                waiting.pop_back();
            }
        }
    }
 };

 dirty_bottleneck checker_lock;
 dirty_bottleneck lister_lock;
 std::map<int64_t, std::vector<mutex_handle>> owned_locks;
 std::map<int64_t, std::optional<mutex_handle>> waiting_locks;
 std::set<mutex_handle> locks_that_exist;

 void mutex_state_update(mutex_handle mtx, mutex_state state) {
    lock_guard lister(lister_lock);
    switch(state) {
        case mutex_state::create: {
            locks_that_exist.insert(mtx);
        }break;
        case mutex_state::remove: {
            locks_that_exist.erase(mtx);
        }break;
    }
 }

 bool build_dependency_graph (
    const mutex_handle mtx,
    const int64_t thrd, 
    std::map<int64_t, std::vector<mutex_handle>>& owned_locks, 
    std::map<int64_t, std::optional<mutex_handle>>& waiting_locks
 ) {
    std::map<mutex_handle, std::set<mutex_handle>> graph;
    for(auto& elem : waiting_locks) {
        if(elem.second.has_value()) {
            for(auto& n : owned_locks[elem.first]) {
                graph[n].insert(elem.second.value());
            }
        }
    }

    std::set<mutex_handle> nodes;
    {
        lock_guard lister(lister_lock);
        nodes = locks_that_exist;
    }

    bool happened = true;

    while(happened) {
        happened = false;
        for(auto& n : nodes) {
            if(graph[n].size() == 0)
            {
                happened = true;
                for(auto v : graph) {
                    v.second.erase(n);
                }
                nodes.erase(n);
                break;
            }
        }
    }

    return nodes.size();
 }

 void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd) {
    bool bad = false;
    {
        lock_guard checker(checker_lock);
        switch(state) {
            case thread_state::locking: {
                waiting_locks[thrd].reset();
                owned_locks[thrd].push_back(mtx);
            } break;
            case thread_state::unlocking: {
                auto it = std::find(owned_locks[thrd].begin(), owned_locks[thrd].end(), mtx);
                if(it != owned_locks[thrd].end()) {
                    owned_locks[thrd].erase(it);
                }
            } break;
            case thread_state::waiting: {
                waiting_locks[thrd] = mtx;
                bad = build_dependency_graph(mtx, thrd, owned_locks, waiting_locks);
            } break;
        }
    }
    if(bad) throw std::runtime_error("Deadlock detected");
 }

 std::atomic<size_t> fast_bottleneck::counter;

 /*********************       ***********************/

 fast_bottleneck A;
 fast_bottleneck B;

 int main() {
    char c;
    system::sys.current = std::make_unique<process>(&c);

    std::cout << "1" << std::endl;
    A.lock();
    system::sys.current->state = process_status::running;
    system::sys.yield_to(std::make_unique<process>([](){
        A.lock();
        std::cout << "A" << std::endl;
        A.unlock();
    }));
    A.unlock();
    system::sys.yield();
    system::sys.yield_to(std::make_unique<process>([](){
        A.lock();
        std::cout << "B" << std::endl;
        A.unlock();
    }));
    std::cout << "3" << std::endl;
    return 0;
 }