// Build with: clang++ --std=c++20 -g -O1 schedtest.cpp #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__x86_64__) #if defined(__GNUG__) constexpr bool is_x86_64_linux = true; #else #if defined(__clang__) constexpr bool is_x86_64_linux = true; #else constexpr bool is_x86_64_linux = false; #endif #endif #else constexpr bool is_x86_64_linux = false; #endif static_assert(is_x86_64_linux, "This code probably only works on x86_64 GNU Extensions C++"); struct platform_data { platform_data() = default; platform_data(std::span stack_str) : stack_ptr(&*(stack_str.end()-16)) , base_ptr(stack_ptr) {} uint64_t rbx, r12, r13, r14, r15; void* stack_ptr; void* base_ptr; void pull() __attribute__((always_inline)) { __asm__ __volatile__( "movq %%rsp, %0\n" "movq %%rbp, %1\n" "movq %%rbx, %2\n" "movq %%r12, %3\n" "movq %%r13, %4\n" "movq %%r14, %5\n" "movq %%r15, %6\n" : "=m"(stack_ptr) , "=m"(base_ptr) , "=m"(rbx) , "=m"(r12) , "=m"(r13) , "=m"(r14) , "=m"(r15) ); } void* push(void* location) __attribute__((always_inline)) { volatile void* volatile tmp = static_cast(stack_ptr) - sizeof(void*); *static_cast(tmp) = location; __asm__ __volatile__( "movq %1, %%rsp\n" "movq %2, %%rbp\n" "movq %3, %%rbx\n" "movq %4, %%r12\n" "movq %5, %%r13\n" "movq %6, %%r14\n" "movq %7, %%r15\n" "popq %0\n" : "+r"(location) : "m"(tmp) , "m"(base_ptr) , "m"(rbx) , "m"(r12) , "m"(r13) , "m"(r14) , "m"(r15) : "memory" ); return location; } }; enum class process_status { inactive = 0, running = 1, waiting = 2, finished = 3, zombie = 4 }; struct process { static int64_t counter; char* stack; size_t sz; platform_data scheduling_swapper; process_status state = process_status::inactive; std::function fn; int64_t t_id; process(std::function _fn, size_t _sz = 16384) : stack(new char[_sz]) , sz(_sz) , scheduling_swapper(std::span(stack, sz)) , fn(_fn) , t_id(counter++) {} process(char* stack_base) : stack(stack_base) , sz(0) , t_id(counter++) {} process(const process&) = delete; ~process() { if(sz) delete[] stack; } }; int64_t process::counter = 0; __attribute__((noinline)) struct system* spawner (struct system* sys); struct system { static system sys; std::list> running, waiting, naughty; std::unique_ptr previous; std::unique_ptr current; std::unique_ptr one() { auto v = std::move(running.back()); running.pop_back(); return v; } void rid(std::unique_ptr current) { switch(current->state) { case process_status::inactive: case process_status::running: running.push_front(std::move(current)); break; case process_status::finished: clean(std::move(current)); break; case process_status::zombie: naughty.push_front(std::move(current)); break; case process_status::waiting: waiting.push_front(std::move(current)); break; } } void clean(std::unique_ptr) {} void yield_to(std::unique_ptr target) noexcept { current->scheduling_swapper.pull(); sys.rid(std::move(current)); current = std::move(target); current->scheduling_swapper.push(this); spawner(&sys); } void yield() noexcept { current->scheduling_swapper.pull(); sys.rid(std::move(current)); current = one(); current->scheduling_swapper.push(this); spawner(&sys); } template void steal_and_yield(fn func) noexcept { current->scheduling_swapper.pull(); func(std::move(current)); current = one(); current->scheduling_swapper.push(this); spawner(&sys); } }; // Needs to return the system one way or another __attribute__((noinline)) struct system* spawner (struct system* sys) { auto& proc = *system::sys.current; if(proc.state == process_status::inactive) { proc.state = process_status::running; proc.fn(); proc.state = process_status::finished; sys->current->scheduling_swapper.pull(); sys->yield(); } return sys; } struct system system::sys; /********************* ***********************/ class dirty_bottleneck { std::atomic_bool flag; [[nodiscard]] bool try_lock() { bool f = false; bool t = true; return flag.compare_exchange_strong(f,t,std::memory_order::acquire); } [[nodiscard]] bool try_unlock() { bool f = false; bool t = true; return flag.compare_exchange_strong(t,f,std::memory_order::release); } public: dirty_bottleneck() = default; dirty_bottleneck(dirty_bottleneck&) = delete; dirty_bottleneck(dirty_bottleneck&&) = delete; void lock() { while(not try_lock()); } void unlock() { if(!try_unlock()) throw std::runtime_error("Unlocking failed in dirty_bottleneck: potential double unlocking issue"); } }; /********************* ***********************/ template class lock_guard { T& ref; public: lock_guard(T& _ref) : ref(_ref) { ref.lock(); } ~lock_guard() { ref.unlock(); } }; /********************* ***********************/ using mutex_handle = size_t; enum class thread_state { locking, waiting, unlocking }; enum class mutex_state { remove = 0, create = 1 }; using namespace std::chrono_literals; void mutex_state_update(mutex_handle, mutex_state); void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd); class fast_bottleneck { /** This is a secret tool that will help us later **/ static std::atomic counter; const mutex_handle handle; dirty_bottleneck trigger_lock; std::list> waiting; std::atomic_bool flag; [[nodiscard]] bool try_lock() { bool f = false; bool t = true; return flag.compare_exchange_strong(f,t,std::memory_order::acquire); } [[nodiscard]] bool try_unlock() { bool f = false; bool t = true; return flag.compare_exchange_strong(t,f,std::memory_order::release); } public: fast_bottleneck() : flag() , handle(counter.fetch_add(1)) //< We have to initialize that { mutex_state_update(handle, mutex_state::create); } fast_bottleneck(fast_bottleneck&) = delete; fast_bottleneck(fast_bottleneck&&) = delete; fast_bottleneck& operator=(fast_bottleneck&) = delete; fast_bottleneck& operator=(fast_bottleneck&&) = delete; ~fast_bottleneck() { mutex_state_update(handle, mutex_state::remove); } void lock() { /// The exponential backing variables constexpr std::chrono::milliseconds max{1}; std::chrono::nanoseconds wait{256}; while(not try_lock()) { /// The implementation of our little trick when waiting signal_locking(thread_state::waiting, handle, system::sys.current->t_id); system::sys.steal_and_yield([&](std::unique_ptr p){ lock_guard triggers(trigger_lock); p->state = process_status::waiting; waiting.push_front(std::move(p)); }); /// The exponential backing //std::this_thread::sleep_for(wait); wait += wait < max ? std::chrono::nanoseconds(wait.count()/2) : 0ns; } /// The implementation of our little trick when locking signal_locking(thread_state::locking, handle, system::sys.current->t_id); } void unlock() { if(!try_unlock()) throw std::runtime_error("Unlocking failed in fast_bottleneck: potential double unlocking issue"); /// The implementation of our little trick when unlocking signal_locking(thread_state::unlocking, handle, system::sys.current->t_id); { lock_guard triggers(trigger_lock); if(waiting.size()) { system::sys.running.push_front(std::move(waiting.back())); waiting.pop_back(); } } } }; dirty_bottleneck checker_lock; dirty_bottleneck lister_lock; std::map> owned_locks; std::map> waiting_locks; std::set locks_that_exist; void mutex_state_update(mutex_handle mtx, mutex_state state) { lock_guard lister(lister_lock); switch(state) { case mutex_state::create: { locks_that_exist.insert(mtx); }break; case mutex_state::remove: { locks_that_exist.erase(mtx); }break; } } bool build_dependency_graph ( const mutex_handle mtx, const int64_t thrd, std::map>& owned_locks, std::map>& waiting_locks ) { std::map> graph; for(auto& elem : waiting_locks) { if(elem.second.has_value()) { for(auto& n : owned_locks[elem.first]) { graph[n].insert(elem.second.value()); } } } std::set nodes; { lock_guard lister(lister_lock); nodes = locks_that_exist; } bool happened = true; while(happened) { happened = false; for(auto& n : nodes) { if(graph[n].size() == 0) { happened = true; for(auto v : graph) { v.second.erase(n); } nodes.erase(n); break; } } } return nodes.size(); } void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd) { bool bad = false; { lock_guard checker(checker_lock); switch(state) { case thread_state::locking: { waiting_locks[thrd].reset(); owned_locks[thrd].push_back(mtx); } break; case thread_state::unlocking: { auto it = std::find(owned_locks[thrd].begin(), owned_locks[thrd].end(), mtx); if(it != owned_locks[thrd].end()) { owned_locks[thrd].erase(it); } } break; case thread_state::waiting: { waiting_locks[thrd] = mtx; bad = build_dependency_graph(mtx, thrd, owned_locks, waiting_locks); } break; } } if(bad) throw std::runtime_error("Deadlock detected"); } std::atomic fast_bottleneck::counter; /********************* ***********************/ fast_bottleneck A; int main() { char c; system::sys.current = std::make_unique(&c); std::cout << "1" << std::endl; A.lock(); system::sys.current->state = process_status::running; system::sys.yield_to(std::make_unique([](){ A.lock(); std::cout << "A" << std::endl; A.unlock(); })); A.unlock(); system::sys.yield(); system::sys.yield_to(std::make_unique([](){ A.lock(); std::cout << "B" << std::endl; A.unlock(); })); std::cout << "3" << std::endl; return 0; }