// Build with: clang++ --std=c++20 -g -O1 schedtest.cpp
|
|
|
|
#include <array>
|
|
#include <span>
|
|
#include <memory>
|
|
#include <list>
|
|
#include <functional>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <chrono>
|
|
#include <exception>
|
|
#include <atomic>
|
|
#include <set>
|
|
#include <vector>
|
|
#include <optional>
|
|
|
|
#if defined(__x86_64__)
|
|
#if defined(__GNUG__)
|
|
constexpr bool is_x86_64_linux = true;
|
|
#else
|
|
#if defined(__clang__)
|
|
constexpr bool is_x86_64_linux = true;
|
|
#else
|
|
constexpr bool is_x86_64_linux = false;
|
|
#endif
|
|
#endif
|
|
#else
|
|
constexpr bool is_x86_64_linux = false;
|
|
#endif
|
|
|
|
static_assert(is_x86_64_linux, "This code probably only works on x86_64 GNU Extensions C++");
|
|
|
|
struct platform_data {
|
|
platform_data() = default;
|
|
|
|
platform_data(std::span<char> stack_str)
|
|
: stack_ptr(&*(stack_str.end()-16))
|
|
, base_ptr(stack_ptr)
|
|
{}
|
|
|
|
uint64_t rbx, r12, r13, r14, r15;
|
|
|
|
void* stack_ptr;
|
|
void* base_ptr;
|
|
|
|
void pull() __attribute__((always_inline))
|
|
{
|
|
__asm__ __volatile__(
|
|
"movq %%rsp, %0\n"
|
|
"movq %%rbp, %1\n"
|
|
"movq %%rbx, %2\n"
|
|
"movq %%r12, %3\n"
|
|
"movq %%r13, %4\n"
|
|
"movq %%r14, %5\n"
|
|
"movq %%r15, %6\n"
|
|
: "=m"(stack_ptr)
|
|
, "=m"(base_ptr)
|
|
, "=m"(rbx)
|
|
, "=m"(r12)
|
|
, "=m"(r13)
|
|
, "=m"(r14)
|
|
, "=m"(r15)
|
|
);
|
|
}
|
|
|
|
void* push(void* location) __attribute__((always_inline))
|
|
{
|
|
volatile void* volatile tmp = static_cast<char*>(stack_ptr) - sizeof(void*);
|
|
*static_cast<volatile void* volatile * volatile>(tmp) = location;
|
|
__asm__ __volatile__(
|
|
"movq %1, %%rsp\n"
|
|
"movq %2, %%rbp\n"
|
|
"movq %3, %%rbx\n"
|
|
"movq %4, %%r12\n"
|
|
"movq %5, %%r13\n"
|
|
"movq %6, %%r14\n"
|
|
"movq %7, %%r15\n"
|
|
"popq %0\n"
|
|
: "+r"(location)
|
|
: "m"(tmp)
|
|
, "m"(base_ptr)
|
|
, "m"(rbx)
|
|
, "m"(r12)
|
|
, "m"(r13)
|
|
, "m"(r14)
|
|
, "m"(r15)
|
|
: "memory"
|
|
);
|
|
return location;
|
|
}
|
|
};
|
|
|
|
enum class process_status {
|
|
inactive = 0,
|
|
running = 1,
|
|
waiting = 2,
|
|
finished = 3,
|
|
zombie = 4
|
|
};
|
|
|
|
struct process {
|
|
static int64_t counter;
|
|
|
|
char* stack;
|
|
size_t sz;
|
|
platform_data scheduling_swapper;
|
|
process_status state = process_status::inactive;
|
|
std::function<void()> fn;
|
|
int64_t t_id;
|
|
|
|
process(std::function<void()> _fn, size_t _sz = 16384)
|
|
: stack(new char[_sz])
|
|
, sz(_sz)
|
|
, scheduling_swapper(std::span<char>(stack, sz))
|
|
, fn(_fn)
|
|
, t_id(counter++)
|
|
{}
|
|
|
|
process(char* stack_base)
|
|
: stack(stack_base)
|
|
, sz(0)
|
|
, t_id(counter++)
|
|
{}
|
|
|
|
process(const process&) = delete;
|
|
|
|
~process() {
|
|
if(sz) delete[] stack;
|
|
}
|
|
};
|
|
int64_t process::counter = 0;
|
|
|
|
|
|
__attribute__((noinline)) struct system* spawner (struct system* sys);
|
|
|
|
struct system {
|
|
static system sys;
|
|
|
|
std::list<std::unique_ptr<process>>
|
|
running,
|
|
waiting,
|
|
naughty;
|
|
|
|
std::unique_ptr<process> previous;
|
|
std::unique_ptr<process> current;
|
|
|
|
std::unique_ptr<process> one() {
|
|
auto v = std::move(running.back());
|
|
running.pop_back();
|
|
return v;
|
|
}
|
|
|
|
void rid(std::unique_ptr<process> current) {
|
|
switch(current->state) {
|
|
case process_status::inactive:
|
|
case process_status::running:
|
|
running.push_front(std::move(current));
|
|
break;
|
|
case process_status::finished:
|
|
clean(std::move(current));
|
|
break;
|
|
case process_status::zombie:
|
|
naughty.push_front(std::move(current));
|
|
break;
|
|
case process_status::waiting:
|
|
waiting.push_front(std::move(current));
|
|
break;
|
|
}
|
|
}
|
|
|
|
void clean(std::unique_ptr<process>) {}
|
|
|
|
void yield_to(std::unique_ptr<process> target) noexcept {
|
|
current->scheduling_swapper.pull();
|
|
sys.rid(std::move(current));
|
|
current = std::move(target);
|
|
current->scheduling_swapper.push(this);
|
|
spawner(&sys);
|
|
}
|
|
|
|
|
|
void yield() noexcept {
|
|
current->scheduling_swapper.pull();
|
|
sys.rid(std::move(current));
|
|
current = one();
|
|
current->scheduling_swapper.push(this);
|
|
spawner(&sys);
|
|
}
|
|
|
|
template<typename fn>
|
|
void steal_and_yield(fn func) noexcept {
|
|
current->scheduling_swapper.pull();
|
|
func(std::move(current));
|
|
current = one();
|
|
current->scheduling_swapper.push(this);
|
|
spawner(&sys);
|
|
}
|
|
};
|
|
|
|
|
|
// Needs to return the system one way or another
|
|
__attribute__((noinline)) struct system* spawner (struct system* sys) {
|
|
auto& proc = *system::sys.current;
|
|
if(proc.state == process_status::inactive) {
|
|
proc.state = process_status::running;
|
|
proc.fn();
|
|
proc.state = process_status::finished;
|
|
sys->current->scheduling_swapper.pull();
|
|
sys->yield();
|
|
}
|
|
return sys;
|
|
}
|
|
|
|
struct system system::sys;
|
|
|
|
/********************* ***********************/
|
|
|
|
class dirty_bottleneck {
|
|
std::atomic_bool flag;
|
|
|
|
[[nodiscard]] bool try_lock() {
|
|
bool f = false;
|
|
bool t = true;
|
|
return flag.compare_exchange_strong(f,t,std::memory_order::acquire);
|
|
}
|
|
|
|
[[nodiscard]] bool try_unlock() {
|
|
bool f = false;
|
|
bool t = true;
|
|
return flag.compare_exchange_strong(t,f,std::memory_order::release);
|
|
}
|
|
public:
|
|
dirty_bottleneck() = default;
|
|
dirty_bottleneck(dirty_bottleneck&) = delete;
|
|
dirty_bottleneck(dirty_bottleneck&&) = delete;
|
|
|
|
void lock() {
|
|
while(not try_lock());
|
|
}
|
|
|
|
void unlock() {
|
|
if(!try_unlock()) throw std::runtime_error("Unlocking failed in dirty_bottleneck: potential double unlocking issue");
|
|
}
|
|
};
|
|
|
|
/********************* ***********************/
|
|
|
|
template<typename T>
|
|
class lock_guard {
|
|
T& ref;
|
|
public:
|
|
lock_guard(T& _ref)
|
|
: ref(_ref)
|
|
{
|
|
ref.lock();
|
|
}
|
|
|
|
~lock_guard() {
|
|
ref.unlock();
|
|
}
|
|
};
|
|
|
|
/********************* ***********************/
|
|
|
|
using mutex_handle = size_t;
|
|
|
|
enum class thread_state {
|
|
locking,
|
|
waiting,
|
|
unlocking
|
|
};
|
|
|
|
enum class mutex_state {
|
|
remove = 0,
|
|
create = 1
|
|
};
|
|
|
|
using namespace std::chrono_literals;
|
|
|
|
void mutex_state_update(mutex_handle, mutex_state);
|
|
void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd);
|
|
|
|
class fast_bottleneck {
|
|
|
|
/** This is a secret tool that will help us later **/
|
|
static std::atomic<size_t> counter;
|
|
const mutex_handle handle;
|
|
dirty_bottleneck trigger_lock;
|
|
std::list<std::unique_ptr<process>> waiting;
|
|
|
|
std::atomic_bool flag;
|
|
|
|
[[nodiscard]] bool try_lock() {
|
|
bool f = false;
|
|
bool t = true;
|
|
return flag.compare_exchange_strong(f,t,std::memory_order::acquire);
|
|
}
|
|
|
|
[[nodiscard]] bool try_unlock() {
|
|
bool f = false;
|
|
bool t = true;
|
|
return flag.compare_exchange_strong(t,f,std::memory_order::release);
|
|
}
|
|
public:
|
|
fast_bottleneck()
|
|
: flag()
|
|
, handle(counter.fetch_add(1)) //< We have to initialize that
|
|
{
|
|
mutex_state_update(handle, mutex_state::create);
|
|
}
|
|
fast_bottleneck(fast_bottleneck&) = delete;
|
|
fast_bottleneck(fast_bottleneck&&) = delete;
|
|
fast_bottleneck& operator=(fast_bottleneck&) = delete;
|
|
fast_bottleneck& operator=(fast_bottleneck&&) = delete;
|
|
|
|
~fast_bottleneck() {
|
|
mutex_state_update(handle, mutex_state::remove);
|
|
}
|
|
|
|
|
|
void lock() {
|
|
/// The exponential backing variables
|
|
constexpr std::chrono::milliseconds max{1};
|
|
std::chrono::nanoseconds wait{256};
|
|
while(not try_lock()) {
|
|
/// The implementation of our little trick when waiting
|
|
signal_locking(thread_state::waiting, handle, system::sys.current->t_id);
|
|
|
|
system::sys.steal_and_yield([&](std::unique_ptr<process> p){
|
|
lock_guard triggers(trigger_lock);
|
|
p->state = process_status::waiting;
|
|
waiting.push_front(std::move(p));
|
|
});
|
|
|
|
/// The exponential backing
|
|
//std::this_thread::sleep_for(wait);
|
|
wait += wait < max ? std::chrono::nanoseconds(wait.count()/2) : 0ns;
|
|
}
|
|
/// The implementation of our little trick when locking
|
|
signal_locking(thread_state::locking, handle, system::sys.current->t_id);
|
|
}
|
|
|
|
void unlock() {
|
|
if(!try_unlock()) throw std::runtime_error("Unlocking failed in fast_bottleneck: potential double unlocking issue");
|
|
/// The implementation of our little trick when unlocking
|
|
signal_locking(thread_state::unlocking, handle, system::sys.current->t_id);
|
|
|
|
{
|
|
lock_guard triggers(trigger_lock);
|
|
if(waiting.size()) {
|
|
system::sys.running.push_front(std::move(waiting.back()));
|
|
waiting.pop_back();
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
dirty_bottleneck checker_lock;
|
|
dirty_bottleneck lister_lock;
|
|
std::map<int64_t, std::vector<mutex_handle>> owned_locks;
|
|
std::map<int64_t, std::optional<mutex_handle>> waiting_locks;
|
|
std::set<mutex_handle> locks_that_exist;
|
|
|
|
void mutex_state_update(mutex_handle mtx, mutex_state state) {
|
|
lock_guard lister(lister_lock);
|
|
switch(state) {
|
|
case mutex_state::create: {
|
|
locks_that_exist.insert(mtx);
|
|
}break;
|
|
case mutex_state::remove: {
|
|
locks_that_exist.erase(mtx);
|
|
}break;
|
|
}
|
|
}
|
|
|
|
bool build_dependency_graph (
|
|
const mutex_handle mtx,
|
|
const int64_t thrd,
|
|
std::map<int64_t, std::vector<mutex_handle>>& owned_locks,
|
|
std::map<int64_t, std::optional<mutex_handle>>& waiting_locks
|
|
) {
|
|
std::map<mutex_handle, std::set<mutex_handle>> graph;
|
|
for(auto& elem : waiting_locks) {
|
|
if(elem.second.has_value()) {
|
|
for(auto& n : owned_locks[elem.first]) {
|
|
graph[n].insert(elem.second.value());
|
|
}
|
|
}
|
|
}
|
|
|
|
std::set<mutex_handle> nodes;
|
|
{
|
|
lock_guard lister(lister_lock);
|
|
nodes = locks_that_exist;
|
|
}
|
|
|
|
bool happened = true;
|
|
|
|
while(happened) {
|
|
happened = false;
|
|
for(auto& n : nodes) {
|
|
if(graph[n].size() == 0)
|
|
{
|
|
happened = true;
|
|
for(auto v : graph) {
|
|
v.second.erase(n);
|
|
}
|
|
nodes.erase(n);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return nodes.size();
|
|
}
|
|
|
|
void signal_locking(thread_state state, mutex_handle mtx, int64_t thrd) {
|
|
bool bad = false;
|
|
{
|
|
lock_guard checker(checker_lock);
|
|
switch(state) {
|
|
case thread_state::locking: {
|
|
waiting_locks[thrd].reset();
|
|
owned_locks[thrd].push_back(mtx);
|
|
} break;
|
|
case thread_state::unlocking: {
|
|
auto it = std::find(owned_locks[thrd].begin(), owned_locks[thrd].end(), mtx);
|
|
if(it != owned_locks[thrd].end()) {
|
|
owned_locks[thrd].erase(it);
|
|
}
|
|
} break;
|
|
case thread_state::waiting: {
|
|
waiting_locks[thrd] = mtx;
|
|
bad = build_dependency_graph(mtx, thrd, owned_locks, waiting_locks);
|
|
} break;
|
|
}
|
|
}
|
|
if(bad) throw std::runtime_error("Deadlock detected");
|
|
}
|
|
|
|
std::atomic<size_t> fast_bottleneck::counter;
|
|
|
|
/********************* ***********************/
|
|
|
|
fast_bottleneck A;
|
|
fast_bottleneck B;
|
|
|
|
int main() {
|
|
char c;
|
|
system::sys.current = std::make_unique<process>(&c);
|
|
|
|
std::cout << "1" << std::endl;
|
|
A.lock();
|
|
system::sys.current->state = process_status::running;
|
|
system::sys.yield_to(std::make_unique<process>([](){
|
|
A.lock();
|
|
std::cout << "A" << std::endl;
|
|
A.unlock();
|
|
}));
|
|
A.unlock();
|
|
system::sys.yield();
|
|
system::sys.yield_to(std::make_unique<process>([](){
|
|
A.lock();
|
|
std::cout << "B" << std::endl;
|
|
A.unlock();
|
|
}));
|
|
std::cout << "3" << std::endl;
|
|
return 0;
|
|
}
|