| @ -0,0 +1,469 @@ | |||
| // Build with: clang++ --std=c++20 -g -O1 schedtest.cpp | |||
| #include <array> | |||
| #include <span> | |||
| #include <memory> | |||
| #include <list> | |||
| #include <functional> | |||
| #include <iostream> | |||
| #include <map> | |||
| #include <chrono> | |||
| #include <exception> | |||
| #include <atomic> | |||
| #include <set> | |||
| #include <vector> | |||
| #include <optional> | |||
| #if defined(__x86_64__) | |||
| #if defined(__GNUG__) | |||
| constexpr bool is_x86_64_linux = true; | |||
| #else | |||
| #if defined(__clang__) | |||
| constexpr bool is_x86_64_linux = true; | |||
| #else | |||
| constexpr bool is_x86_64_linux = false; | |||
| #endif | |||
| #endif | |||
| #else | |||
| constexpr bool is_x86_64_linux = false; | |||
| #endif | |||
| static_assert(is_x86_64_linux, "This code probably only works on x86_64 GNU Extensions C++"); | |||
| struct platform_data { | |||
| platform_data() = default; | |||
| platform_data(std::span<char> stack_str) | |||
| : stack_ptr(&*(stack_str.end()-16)) | |||
| , base_ptr(stack_ptr) | |||
| {} | |||
| uint64_t rbx, r12, r13, r14, r15; | |||
| void* stack_ptr; | |||
| void* base_ptr; | |||
| void pull() __attribute__((always_inline)) | |||
| { | |||
| __asm__ __volatile__( | |||
| "movq %%rsp, %0\n" | |||
| "movq %%rbp, %1\n" | |||
| "movq %%rbx, %2\n" | |||
| "movq %%r12, %3\n" | |||
| "movq %%r13, %4\n" | |||
| "movq %%r14, %5\n" | |||
| "movq %%r15, %6\n" | |||
| : "=m"(stack_ptr) | |||
| , "=m"(base_ptr) | |||
| , "=m"(rbx) | |||
| , "=m"(r12) | |||
| , "=m"(r13) | |||
| , "=m"(r14) | |||
| , "=m"(r15) | |||
| ); | |||
| } | |||
| void* push(void* location) __attribute__((always_inline)) | |||
| { | |||
| volatile void* volatile tmp = static_cast<char*>(stack_ptr) - sizeof(void*); | |||
| *static_cast<volatile void* volatile * volatile>(tmp) = location; | |||
| __asm__ __volatile__( | |||
| "movq %1, %%rsp\n" | |||
| "movq %2, %%rbp\n" | |||
| "movq %3, %%rbx\n" | |||
| "movq %4, %%r12\n" | |||
| "movq %5, %%r13\n" | |||
| "movq %6, %%r14\n" | |||
| "movq %7, %%r15\n" | |||
| "popq %0\n" | |||
| : "+r"(location) | |||
| : "m"(tmp) | |||
| , "m"(base_ptr) | |||
| , "m"(rbx) | |||
| , "m"(r12) | |||
| , "m"(r13) | |||
| , "m"(r14) | |||
| , "m"(r15) | |||
| : "memory" | |||
| ); | |||
| return location; | |||
| } | |||
| }; | |||
| enum class process_status { | |||
| inactive = 0, | |||
| running = 1, | |||
| waiting = 2, | |||
| finished = 3, | |||
| zombie = 4 | |||
| }; | |||
| struct process { | |||
| static int64_t counter; | |||
| char* stack; | |||
| size_t sz; | |||
| platform_data scheduling_swapper; | |||
| process_status state = process_status::inactive; | |||
| std::function<void()> fn; | |||
| int64_t t_id; | |||
| process(std::function<void()> _fn, size_t _sz = 16384) | |||
| : stack(new char[_sz]) | |||
| , sz(_sz) | |||
| , scheduling_swapper(std::span<char>(stack, sz)) | |||
| , fn(_fn) | |||
| , t_id(counter++) | |||
| {} | |||
| process(char* stack_base) | |||
| : stack(stack_base) | |||
| , sz(0) | |||
| , t_id(counter++) | |||
| {} | |||
| process(const process&) = delete; | |||
| ~process() { | |||
| if(sz) delete[] stack; | |||
| } | |||
| }; | |||
| int64_t process::counter = 0; | |||
| __attribute__((noinline)) struct system* spawner (struct system* sys); | |||
| struct system { | |||
| static system sys; | |||
| std::list<std::unique_ptr<process>> | |||
| running, | |||
| waiting, | |||
| naughty; | |||
| std::unique_ptr<process> previous; | |||
| std::unique_ptr<process> current; | |||
| std::unique_ptr<process> one() { | |||
| auto v = std::move(running.back()); | |||
| running.pop_back(); | |||
| return v; | |||
| } | |||
| void rid(std::unique_ptr<process> current) { | |||
| switch(current->state) { | |||
| case process_status::inactive: | |||
| case process_status::running: | |||
| running.push_front(std::move(current)); | |||
| break; | |||
| case process_status::finished: | |||
| clean(std::move(current)); | |||
| break; | |||
| case process_status::zombie: | |||
| naughty.push_front(std::move(current)); | |||
| break; | |||
| case process_status::waiting: | |||
| waiting.push_front(std::move(current)); | |||
| break; | |||
| } | |||
| } | |||
| void clean(std::unique_ptr<process>) {} | |||
| void yield_to(std::unique_ptr<process> target) noexcept { | |||
| current->scheduling_swapper.pull(); | |||
| sys.rid(std::move(current)); | |||
| current = std::move(target); | |||
| current->scheduling_swapper.push(this); | |||
| spawner(&sys); | |||
| } | |||
| void yield() noexcept { | |||
| current->scheduling_swapper.pull(); | |||
| sys.rid(std::move(current)); | |||
| current = one(); | |||
| current->scheduling_swapper.push(this); | |||
| spawner(&sys); | |||
| } | |||
| template<typename fn> | |||
| void steal_and_yield(fn func) noexcept { | |||
| current->scheduling_swapper.pull(); | |||
| func(std::move(current)); | |||
| current = one(); | |||
| current->scheduling_swapper.push(this); | |||
| spawner(&sys); | |||
| } | |||
| }; | |||
| // Needs to return the system one way or another | |||
| __attribute__((noinline)) struct system* spawner (struct system* sys) { | |||
| auto& proc = *system::sys.current; | |||
| if(proc.state == process_status::inactive) { | |||
| proc.state = process_status::running; | |||
| proc.fn(); | |||
| proc.state = process_status::finished; | |||
| sys->current->scheduling_swapper.pull(); | |||
| sys->yield(); | |||
| } | |||
| return sys; | |||
| } | |||
| struct system system::sys; | |||
| /********************* ***********************/ | |||
| class dirty_bottleneck { | |||
| std::atomic_bool flag; | |||
| [[nodiscard]] bool try_lock() { | |||
| bool f = false; | |||
| bool t = true; | |||
| return flag.compare_exchange_strong(f,t,std::memory_order::acquire); | |||
| } | |||
| [[nodiscard]] bool try_unlock() { | |||
| bool f = false; | |||
| bool t = true; | |||
| return flag.compare_exchange_strong(t,f,std::memory_order::release); | |||
| } | |||
| public: | |||
| dirty_bottleneck() = default; | |||
| dirty_bottleneck(dirty_bottleneck&) = delete; | |||
| dirty_bottleneck(dirty_bottleneck&&) = delete; | |||
| void lock() { | |||
| while(not try_lock()); | |||
| } | |||
| void unlock() { | |||
| if(!try_unlock()) throw std::runtime_error("Unlocking failed in dirty_bottleneck: potential double unlocking issue"); | |||
| } | |||
| }; | |||
| /********************* ***********************/ | |||
| template<typename T> | |||
| class lock_guard { | |||
| T& ref; | |||
| public: | |||
| lock_guard(T& _ref) | |||
| : ref(_ref) | |||
| { | |||
| ref.lock(); | |||
| } | |||
| ~lock_guard() { | |||
| ref.unlock(); | |||
| } | |||
| }; | |||
| /********************* ***********************/ | |||
| using mutex_handle = size_t; | |||
| enum class thread_state { | |||
| locking, | |||
| waiting, | |||
| unlocking | |||
| }; | |||
| enum class mutex_state { | |||
| remove = 0, | |||
| create = 1 | |||
| }; | |||
| using namespace std::chrono_literals; | |||
| void mutex_state_update(mutex_handle, mutex_state); | |||
| void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd); | |||
| class fast_bottleneck { | |||
| /** This is a secret tool that will help us later **/ | |||
| static std::atomic<size_t> counter; | |||
| const mutex_handle handle; | |||
| dirty_bottleneck trigger_lock; | |||
| std::list<std::unique_ptr<process>> waiting; | |||
| std::atomic_bool flag; | |||
| [[nodiscard]] bool try_lock() { | |||
| bool f = false; | |||
| bool t = true; | |||
| return flag.compare_exchange_strong(f,t,std::memory_order::acquire); | |||
| } | |||
| [[nodiscard]] bool try_unlock() { | |||
| bool f = false; | |||
| bool t = true; | |||
| return flag.compare_exchange_strong(t,f,std::memory_order::release); | |||
| } | |||
| public: | |||
| fast_bottleneck() | |||
| : flag() | |||
| , handle(counter.fetch_add(1)) //< We have to initialize that | |||
| { | |||
| mutex_state_update(handle, mutex_state::create); | |||
| } | |||
| fast_bottleneck(fast_bottleneck&) = delete; | |||
| fast_bottleneck(fast_bottleneck&&) = delete; | |||
| fast_bottleneck& operator=(fast_bottleneck&) = delete; | |||
| fast_bottleneck& operator=(fast_bottleneck&&) = delete; | |||
| ~fast_bottleneck() { | |||
| mutex_state_update(handle, mutex_state::remove); | |||
| } | |||
| void lock() { | |||
| /// The exponential backing variables | |||
| constexpr std::chrono::milliseconds max{1}; | |||
| std::chrono::nanoseconds wait{256}; | |||
| while(not try_lock()) { | |||
| /// The implementation of our little trick when waiting | |||
| signal_locking(thread_state::waiting, handle, system::sys.current->t_id); | |||
| system::sys.steal_and_yield([&](std::unique_ptr<process> p){ | |||
| lock_guard triggers(trigger_lock); | |||
| p->state = process_status::waiting; | |||
| waiting.push_front(std::move(p)); | |||
| }); | |||
| /// The exponential backing | |||
| //std::this_thread::sleep_for(wait); | |||
| wait += wait < max ? std::chrono::nanoseconds(wait.count()/2) : 0ns; | |||
| } | |||
| /// The implementation of our little trick when locking | |||
| signal_locking(thread_state::locking, handle, system::sys.current->t_id); | |||
| } | |||
| void unlock() { | |||
| if(!try_unlock()) throw std::runtime_error("Unlocking failed in fast_bottleneck: potential double unlocking issue"); | |||
| /// The implementation of our little trick when unlocking | |||
| signal_locking(thread_state::unlocking, handle, system::sys.current->t_id); | |||
| { | |||
| lock_guard triggers(trigger_lock); | |||
| if(waiting.size()) { | |||
| system::sys.running.push_front(std::move(waiting.back())); | |||
| waiting.pop_back(); | |||
| } | |||
| } | |||
| } | |||
| }; | |||
| dirty_bottleneck checker_lock; | |||
| dirty_bottleneck lister_lock; | |||
| std::map<int64_t, std::vector<mutex_handle>> owned_locks; | |||
| std::map<int64_t, std::optional<mutex_handle>> waiting_locks; | |||
| std::set<mutex_handle> locks_that_exist; | |||
| void mutex_state_update(mutex_handle mtx, mutex_state state) { | |||
| lock_guard lister(lister_lock); | |||
| switch(state) { | |||
| case mutex_state::create: { | |||
| locks_that_exist.insert(mtx); | |||
| }break; | |||
| case mutex_state::remove: { | |||
| locks_that_exist.erase(mtx); | |||
| }break; | |||
| } | |||
| } | |||
| bool build_dependency_graph ( | |||
| const mutex_handle mtx, | |||
| const int64_t thrd, | |||
| std::map<int64_t, std::vector<mutex_handle>>& owned_locks, | |||
| std::map<int64_t, std::optional<mutex_handle>>& waiting_locks | |||
| ) { | |||
| std::map<mutex_handle, std::set<mutex_handle>> graph; | |||
| for(auto& elem : waiting_locks) { | |||
| if(elem.second.has_value()) { | |||
| for(auto& n : owned_locks[elem.first]) { | |||
| graph[n].insert(elem.second.value()); | |||
| } | |||
| } | |||
| } | |||
| std::set<mutex_handle> nodes; | |||
| { | |||
| lock_guard lister(lister_lock); | |||
| nodes = locks_that_exist; | |||
| } | |||
| bool happened = true; | |||
| while(happened) { | |||
| happened = false; | |||
| for(auto& n : nodes) { | |||
| if(graph[n].size() == 0) | |||
| { | |||
| happened = true; | |||
| for(auto v : graph) { | |||
| v.second.erase(n); | |||
| } | |||
| nodes.erase(n); | |||
| break; | |||
| } | |||
| } | |||
| } | |||
| return nodes.size(); | |||
| } | |||
| void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd) { | |||
| bool bad = false; | |||
| { | |||
| lock_guard checker(checker_lock); | |||
| switch(state) { | |||
| case thread_state::locking: { | |||
| waiting_locks[thrd].reset(); | |||
| owned_locks[thrd].push_back(mtx); | |||
| } break; | |||
| case thread_state::unlocking: { | |||
| auto it = std::find(owned_locks[thrd].begin(), owned_locks[thrd].end(), mtx); | |||
| if(it != owned_locks[thrd].end()) { | |||
| owned_locks[thrd].erase(it); | |||
| } | |||
| } break; | |||
| case thread_state::waiting: { | |||
| waiting_locks[thrd] = mtx; | |||
| bad = build_dependency_graph(mtx, thrd, owned_locks, waiting_locks); | |||
| } break; | |||
| } | |||
| } | |||
| if(bad) throw std::runtime_error("Deadlock detected"); | |||
| } | |||
| std::atomic<size_t> fast_bottleneck::counter; | |||
| /********************* ***********************/ | |||
| fast_bottleneck A; | |||
| fast_bottleneck B; | |||
| int main() { | |||
| char c; | |||
| system::sys.current = std::make_unique<process>(&c); | |||
| std::cout << "1" << std::endl; | |||
| A.lock(); | |||
| system::sys.current->state = process_status::running; | |||
| system::sys.yield_to(std::make_unique<process>([](){ | |||
| A.lock(); | |||
| std::cout << "A" << std::endl; | |||
| A.unlock(); | |||
| })); | |||
| A.unlock(); | |||
| system::sys.yield(); | |||
| system::sys.yield_to(std::make_unique<process>([](){ | |||
| A.lock(); | |||
| std::cout << "B" << std::endl; | |||
| A.unlock(); | |||
| })); | |||
| std::cout << "3" << std::endl; | |||
| return 0; | |||
| } | |||